From fe8ab488e9161c46dd9885d58fc52996dc0249ff Mon Sep 17 00:00:00 2001 From: Apple Date: Fri, 24 Oct 2014 19:21:07 +0000 Subject: [PATCH] xnu-2782.1.97.tar.gz --- EXTERNAL_HEADERS/Availability.h | 59 +- EXTERNAL_HEADERS/AvailabilityInternal.h | 1478 ++++- EXTERNAL_HEADERS/AvailabilityMacros.h | 354 +- EXTERNAL_HEADERS/Makefile | 12 +- EXTERNAL_HEADERS/architecture/Makefile | 3 + EXTERNAL_HEADERS/corecrypto/cc.h | 19 +- EXTERNAL_HEADERS/corecrypto/cc_config.h | 149 +- EXTERNAL_HEADERS/corecrypto/cc_priv.h | 102 +- EXTERNAL_HEADERS/corecrypto/ccaes.h | 10 + EXTERNAL_HEADERS/corecrypto/ccasn1.h | 95 + EXTERNAL_HEADERS/corecrypto/ccder.h | 112 +- EXTERNAL_HEADERS/corecrypto/ccdigest.h | 68 +- EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h | 22 + EXTERNAL_HEADERS/corecrypto/ccdrbg.h | 105 + EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h | 68 + EXTERNAL_HEADERS/corecrypto/cchmac.h | 12 +- EXTERNAL_HEADERS/corecrypto/ccmode.h | 253 +- EXTERNAL_HEADERS/corecrypto/ccmode_factory.h | 203 +- EXTERNAL_HEADERS/corecrypto/ccmode_impl.h | 69 +- EXTERNAL_HEADERS/corecrypto/ccn.h | 107 +- EXTERNAL_HEADERS/corecrypto/ccpad.h | 58 +- EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h | 1 - EXTERNAL_HEADERS/corecrypto/ccrc4.h | 8 +- EXTERNAL_HEADERS/corecrypto/ccrng.h | 8 +- EXTERNAL_HEADERS/corecrypto/ccrng_system.h | 2 + EXTERNAL_HEADERS/corecrypto/ccsha1.h | 5 +- EXTERNAL_HEADERS/corecrypto/ccsha2.h | 16 +- Makefile | 59 +- README | 22 +- SETUP/Makefile | 3 +- SETUP/config/Makefile | 7 +- SETUP/config/config.h | 123 +- SETUP/config/doconf | 97 +- SETUP/config/externs.c | 18 - SETUP/config/lexer.l | 46 - SETUP/config/main.c | 82 +- SETUP/config/mkglue.c | 331 - SETUP/config/mkheaders.c | 58 +- SETUP/config/mkioconf.c | 1994 +----- SETUP/config/mkmakefile.c | 417 +- SETUP/config/mkswapconf.c | 247 - SETUP/config/parser.y | 996 +-- SETUP/kextsymboltool/Makefile | 4 +- SETUP/replacecontents/Makefile | 30 + SETUP/replacecontents/replacecontents.c | 137 + bsd/Makefile | 16 +- bsd/bsm/audit_kevents.h | 9 + bsd/conf/MASTER.x86_64 | 125 - bsd/conf/Makefile | 32 +- bsd/conf/Makefile.template | 32 +- bsd/conf/compat_hdrs | 82 - bsd/conf/files | 62 +- bsd/conf/files.x86_64 | 8 - bsd/conf/machine.awk | 18 - bsd/conf/param.c | 11 +- bsd/crypto/Makefile | 2 - bsd/crypto/blowfish/Makefile | 20 - bsd/crypto/blowfish/bf_enc.c | 154 - bsd/crypto/blowfish/bf_locl.h | 226 - bsd/crypto/blowfish/bf_pi.h | 328 - bsd/crypto/blowfish/bf_skey.c | 120 - bsd/crypto/blowfish/blowfish.h | 93 - bsd/crypto/cast128/cast128.c | 887 --- bsd/crypto/cast128/cast128.h | 55 - bsd/crypto/cast128/cast128_subkey.h | 92 - bsd/dev/Makefile | 2 +- bsd/dev/dtrace/Makefile | 2 +- bsd/dev/dtrace/dtrace.c | 3810 +++-------- bsd/dev/dtrace/dtrace_glue.c | 164 +- bsd/dev/dtrace/dtrace_subr.c | 142 +- bsd/dev/dtrace/fasttrap.c | 234 +- bsd/dev/dtrace/fbt.c | 23 +- bsd/dev/dtrace/lockstat.c | 20 +- bsd/dev/dtrace/profile_prvd.c | 243 +- bsd/dev/dtrace/scripts/Makefile | 4 +- bsd/dev/dtrace/scripts/darwin.d | 20 + bsd/dev/dtrace/scripts/mptcp.d | 8 +- bsd/dev/dtrace/sdt.c | 114 +- bsd/dev/dtrace/sdt_subr.c | 4 +- bsd/dev/dtrace/systrace.c | 183 +- bsd/dev/dtrace/systrace.h | 20 +- bsd/dev/i386/conf.c | 12 +- bsd/dev/i386/dis_tables.c | 31 +- bsd/dev/i386/dtrace_isa.c | 4 +- bsd/dev/i386/fasttrap_isa.c | 53 +- bsd/dev/i386/fbt_x86.c | 2 + bsd/dev/i386/instr_size.c | 16 - bsd/dev/i386/kern_machdep.c | 38 +- bsd/dev/i386/stubs.c | 22 - bsd/dev/i386/sysctl.c | 105 - bsd/dev/i386/systemcalls.c | 111 +- bsd/dev/{i386 => }/mem.c | 143 +- bsd/dev/munge.c | 94 +- bsd/dev/random/randomdev.c | 453 +- bsd/dev/unix_startup.c | 13 +- bsd/dev/vn/vn.c | 18 +- bsd/hfs/hfs.h | 132 +- bsd/hfs/hfs_attrlist.c | 834 ++- bsd/hfs/hfs_btreeio.c | 4 +- bsd/hfs/hfs_catalog.c | 153 +- bsd/hfs/hfs_catalog.h | 19 +- bsd/hfs/hfs_chash.c | 8 +- bsd/hfs/hfs_cnode.c | 370 +- bsd/hfs/hfs_cnode.h | 195 +- bsd/hfs/hfs_cprotect.c | 391 +- bsd/hfs/hfs_encodings.c | 2 +- bsd/hfs/hfs_endian.c | 4 +- bsd/hfs/hfs_fsctl.h | 38 +- bsd/hfs/hfs_hotfiles.c | 2 +- bsd/hfs/hfs_kdebug.h | 81 +- bsd/hfs/hfs_link.c | 79 +- bsd/hfs/hfs_lookup.c | 5 +- bsd/hfs/hfs_macos_defs.h | 4 +- bsd/hfs/hfs_notification.c | 21 +- bsd/hfs/hfs_readwrite.c | 932 +-- bsd/hfs/hfs_resize.c | 3497 ++++++++++ bsd/hfs/hfs_vfsops.c | 3983 ++---------- bsd/hfs/hfs_vfsutils.c | 580 +- bsd/hfs/hfs_vnops.c | 1599 +++-- bsd/hfs/hfs_xattr.c | 106 +- bsd/hfs/hfscommon/BTree/BTree.c | 8 +- bsd/hfs/hfscommon/Catalog/CatalogUtilities.c | 2 +- bsd/hfs/hfscommon/Misc/FileExtentMapping.c | 10 +- bsd/hfs/hfscommon/Misc/VolumeAllocation.c | 52 +- bsd/hfs/rangelist.c | 35 +- bsd/hfs/rangelist.h | 3 +- bsd/i386/Makefile | 8 +- bsd/i386/_mcontext.h | 7 - bsd/i386/dis_tables.h | 6 - bsd/i386/fasttrap_isa.h | 4 - bsd/i386/setjmp.h | 83 - bsd/i386/types.h | 2 - bsd/i386/vmparam.h | 2 - bsd/kern/Makefile | 2 +- bsd/kern/bsd_init.c | 77 +- bsd/kern/bsd_stubs.c | 11 - bsd/kern/decmpfs.c | 44 +- bsd/kern/kdebug.c | 442 +- bsd/kern/kern_acct.c | 54 +- bsd/kern/kern_aio.c | 9 +- bsd/kern/kern_control.c | 1503 +++-- bsd/kern/kern_core.c | 4 + bsd/kern/kern_credential.c | 28 +- bsd/kern/kern_cs.c | 272 +- bsd/kern/kern_csr.c | 190 + bsd/kern/kern_descrip.c | 258 +- bsd/kern/kern_ecc.c | 67 + bsd/kern/kern_event.c | 226 +- bsd/kern/kern_exec.c | 673 +- bsd/kern/kern_exit.c | 138 +- bsd/kern/kern_fork.c | 84 +- bsd/kern/kern_guarded.c | 282 +- bsd/kern/kern_kpc.c | 49 +- bsd/kern/kern_lockf.c | 13 +- bsd/kern/kern_malloc.c | 25 + bsd/kern/kern_memorystatus.c | 1576 ++++- bsd/kern/kern_mib.c | 42 +- bsd/kern/kern_mman.c | 328 +- bsd/kern/kern_newsysctl.c | 462 +- bsd/kern/kern_overrides.c | 47 +- bsd/kern/kern_proc.c | 382 +- bsd/kern/kern_prot.c | 17 +- bsd/kern/kern_resource.c | 554 +- bsd/kern/kern_sfi.c | 258 + bsd/kern/kern_sig.c | 7 +- bsd/kern/kern_symfile.c | 97 +- bsd/kern/kern_synch.c | 2 +- bsd/kern/kern_sysctl.c | 902 +-- bsd/kern/kern_xxx.c | 2 - bsd/kern/kpi_mbuf.c | 56 +- bsd/kern/kpi_socket.c | 13 +- bsd/kern/kpi_socketfilter.c | 3 +- bsd/kern/mach_loader.c | 145 +- bsd/kern/mach_loader.h | 3 +- bsd/kern/mach_process.c | 10 +- bsd/kern/makesyscalls.sh | 105 +- bsd/kern/mcache.c | 131 +- bsd/kern/policy_check.c | 68 +- bsd/kern/posix_sem.c | 18 - bsd/kern/posix_shm.c | 4 - bsd/kern/proc_info.c | 496 +- bsd/kern/proc_uuid_policy.c | 187 +- bsd/kern/process_policy.c | 65 +- bsd/kern/pthread_shims.c | 94 +- bsd/kern/socket_info.c | 10 +- bsd/kern/subr_xxx.c | 20 +- bsd/kern/sys_coalition.c | 281 + bsd/kern/sys_domain.c | 7 +- bsd/kern/sys_generic.c | 161 +- bsd/kern/sys_pipe.c | 6 +- bsd/kern/syscalls.master | 88 +- bsd/kern/sysv_sem.c | 5 +- bsd/kern/trace.codes | 387 +- bsd/kern/tty.c | 36 +- bsd/kern/tty_conf.c | 2 +- bsd/kern/tty_dev.c | 1205 ++++ bsd/kern/tty_dev.h | 80 + bsd/kern/tty_ptmx.c | 1190 +--- bsd/kern/tty_pty.c | 916 +-- bsd/kern/ubc_subr.c | 552 +- bsd/kern/uipc_domain.c | 49 - bsd/kern/uipc_mbuf.c | 494 +- bsd/kern/uipc_mbuf2.c | 1 + bsd/kern/uipc_socket.c | 1388 +++- bsd/kern/uipc_socket2.c | 321 +- bsd/kern/uipc_syscalls.c | 553 +- bsd/kern/uipc_usrreq.c | 28 +- bsd/kern/vm_pressure.c | 130 +- bsd/kern/vm_pressure.h | 4 +- bsd/machine/Makefile | 6 +- bsd/man/Makefile | 2 +- bsd/man/man2/Makefile | 13 + bsd/man/man2/access.2 | 69 +- bsd/man/man2/bind.2 | 5 + bsd/man/man2/chmod.2 | 78 +- bsd/man/man2/chown.2 | 85 +- bsd/man/man2/faccessat.2 | 1 + bsd/man/man2/fchmodat.2 | 1 + bsd/man/man2/fchownat.2 | 1 + bsd/man/man2/fstatat.2 | 1 + bsd/man/man2/getattrlist.2 | 241 +- bsd/man/man2/getattrlistat.2 | 1 + bsd/man/man2/getattrlistbulk.2 | 376 ++ bsd/man/man2/getdirentriesattr.2 | 13 +- bsd/man/man2/link.2 | 113 +- bsd/man/man2/linkat.2 | 1 + bsd/man/man2/mkdir.2 | 57 +- bsd/man/man2/mkdirat.2 | 1 + bsd/man/man2/open.2 | 61 +- bsd/man/man2/openat.2 | 1 + bsd/man/man2/readlink.2 | 58 +- bsd/man/man2/readlinkat.2 | 1 + bsd/man/man2/rename.2 | 74 +- bsd/man/man2/renameat.2 | 1 + bsd/man/man2/sem_close.2 | 1 - bsd/man/man2/stat.2 | 82 +- bsd/man/man2/symlink.2 | 57 +- bsd/man/man2/symlinkat.2 | 1 + bsd/man/man2/unlink.2 | 101 +- bsd/man/man2/unlinkat.2 | 1 + bsd/man/man4/tcp.4 | 26 +- bsd/man/man5/dir.5 | 2 +- bsd/miscfs/devfs/devfs_tree.c | 23 +- bsd/miscfs/devfs/devfs_vfsops.c | 156 +- bsd/miscfs/devfs/devfsdefs.h | 2 + bsd/miscfs/specfs/spec_vnops.c | 169 +- bsd/net/Makefile | 2 +- bsd/net/bpf.c | 209 +- bsd/net/bpf.h | 7 +- bsd/net/bpfdesc.h | 9 +- bsd/net/classq/classq.h | 2 +- bsd/net/classq/classq_sfb.c | 332 +- bsd/net/classq/classq_sfb.h | 22 +- bsd/net/classq/classq_subr.c | 8 + bsd/net/classq/classq_util.c | 15 +- bsd/net/classq/if_classq.h | 2 + bsd/net/content_filter.c | 3942 +++++++++++ bsd/net/content_filter.h | 375 ++ bsd/net/dlil.c | 73 +- bsd/net/ether_inet6_pr_module.c | 1 - bsd/net/ether_inet_pr_module.c | 1 - bsd/net/if.c | 85 +- bsd/net/if.h | 66 +- bsd/net/if_bond.c | 40 +- bsd/net/if_bridge.c | 473 +- bsd/net/if_bridgevar.h | 70 +- bsd/net/if_gif.c | 41 - bsd/net/if_ipsec.c | 190 +- bsd/net/if_ipsec.h | 18 +- bsd/net/if_llreach.h | 2 +- bsd/net/if_utun.c | 112 +- bsd/net/if_utun.h | 4 + bsd/net/if_var.h | 39 +- bsd/net/if_vlan.c | 111 +- bsd/net/kpi_interface.c | 98 +- bsd/net/kpi_interface.h | 79 +- bsd/net/ndrv.c | 7 +- bsd/net/necp.c | 5788 +++++++++++++++++ bsd/net/necp.h | 376 ++ bsd/net/net_stubs.c | 3119 ++------- bsd/net/ntstat.c | 795 ++- bsd/net/ntstat.h | 104 +- bsd/net/packet_mangler.c | 1037 +++ bsd/net/packet_mangler.h | 109 + bsd/net/pf.c | 3 +- bsd/net/pf_ioctl.c | 76 +- bsd/net/pfkeyv2.h | 19 +- bsd/net/pfvar.h | 8 +- bsd/net/pktap.c | 440 +- bsd/net/pktap.h | 52 +- bsd/net/pktsched/pktsched.h | 1 + bsd/net/pktsched/pktsched_qfq.c | 6 + bsd/net/pktsched/pktsched_qfq.h | 1 + bsd/net/pktsched/pktsched_tcq.c | 6 + bsd/net/pktsched/pktsched_tcq.h | 1 + bsd/net/route.c | 38 +- bsd/net/route.h | 5 - bsd/net/rtsock.c | 2 +- bsd/netinet/Makefile | 10 +- bsd/netinet/cbrtf.c | 419 ++ bsd/netinet/flow_divert.c | 249 +- bsd/netinet/flow_divert.h | 3 - bsd/netinet/flow_divert_proto.h | 3 + bsd/netinet/igmp.c | 8 +- bsd/netinet/in.c | 239 +- bsd/netinet/in.h | 12 + bsd/netinet/in_arp.c | 127 +- bsd/netinet/in_dhcp.c | 4 +- bsd/netinet/in_gif.c | 4 - bsd/netinet/in_mcast.c | 45 - bsd/netinet/in_pcb.c | 424 +- bsd/netinet/in_pcb.h | 55 +- bsd/netinet/in_pcblist.c | 44 +- bsd/netinet/in_proto.c | 11 - bsd/netinet/in_rmx.c | 2 +- bsd/netinet/in_tclass.c | 17 +- bsd/netinet/in_var.h | 12 + bsd/netinet/ip_divert.c | 2 +- bsd/netinet/ip_dummynet.c | 2 +- bsd/netinet/ip_encap.c | 15 - bsd/netinet/ip_fw2.c | 2 +- bsd/netinet/ip_icmp.c | 10 +- bsd/netinet/ip_input.c | 90 +- bsd/netinet/ip_mroute.c | 2170 ------ bsd/netinet/ip_mroute.h | 313 - bsd/netinet/ip_output.c | 226 +- bsd/netinet/ip_var.h | 23 +- bsd/netinet/kpi_ipfilter.c | 34 +- bsd/netinet/kpi_ipfilter.h | 3 +- bsd/netinet/mptcp.c | 175 +- bsd/netinet/mptcp_opt.c | 198 +- bsd/netinet/mptcp_opt.h | 2 +- bsd/netinet/mptcp_subr.c | 762 ++- bsd/netinet/mptcp_usrreq.c | 85 +- bsd/netinet/mptcp_var.h | 39 +- bsd/netinet/raw_ip.c | 168 +- bsd/netinet/tcp.h | 26 +- bsd/netinet/tcp_cc.c | 405 ++ bsd/netinet/tcp_cc.h | 42 +- bsd/netinet/tcp_cubic.c | 495 ++ bsd/netinet/tcp_input.c | 930 +-- bsd/netinet/tcp_ledbat.c | 15 +- bsd/netinet/tcp_newreno.c | 104 +- bsd/netinet/tcp_output.c | 582 +- bsd/netinet/tcp_sack.c | 135 +- bsd/netinet/tcp_subr.c | 310 +- bsd/netinet/tcp_timer.c | 879 ++- bsd/netinet/tcp_timer.h | 236 +- bsd/netinet/tcp_usrreq.c | 180 +- bsd/netinet/tcp_var.h | 164 +- bsd/netinet/udp_usrreq.c | 137 +- bsd/netinet6/Makefile | 6 +- bsd/netinet6/ah_core.c | 16 +- bsd/netinet6/ah_input.c | 154 +- bsd/netinet6/dest6.c | 12 - bsd/netinet6/esp_core.c | 6 +- bsd/netinet6/esp_input.c | 54 +- bsd/netinet6/frag6.c | 6 - bsd/netinet6/icmp6.c | 46 +- bsd/netinet6/in6.c | 457 +- bsd/netinet6/in6.h | 7 +- bsd/netinet6/in6_ifattach.c | 21 +- bsd/netinet6/in6_pcb.c | 97 +- bsd/netinet6/in6_proto.c | 25 +- bsd/netinet6/in6_rmx.c | 2 +- bsd/netinet6/in6_src.c | 248 +- bsd/netinet6/in6_var.h | 35 +- bsd/netinet6/ip6_forward.c | 5 +- bsd/netinet6/ip6_fw.c | 6 +- bsd/netinet6/ip6_input.c | 171 +- bsd/netinet6/ip6_mroute.c | 1919 ------ bsd/netinet6/ip6_mroute.h | 333 - bsd/netinet6/ip6_output.c | 175 +- bsd/netinet6/ip6_var.h | 16 +- bsd/netinet6/ipcomp_core.c | 22 +- bsd/netinet6/ipsec.c | 2196 ++++--- bsd/netinet6/ipsec.h | 10 +- bsd/netinet6/ipsec6.h | 3 +- bsd/netinet6/mld6.c | 11 +- bsd/netinet6/nd6.c | 30 +- bsd/netinet6/nd6.h | 12 +- bsd/netinet6/nd6_nbr.c | 117 +- bsd/netinet6/nd6_prproxy.c | 31 +- bsd/netinet6/nd6_rtr.c | 154 +- bsd/netinet6/nd6_send.c | 18 + bsd/netinet6/pim6.h | 70 - bsd/netinet6/pim6_var.h | 72 - bsd/netinet6/raw_ip6.c | 135 +- bsd/netinet6/route6.c | 12 - bsd/netinet6/udp6_output.c | 49 +- bsd/netinet6/udp6_usrreq.c | 92 +- bsd/netkey/key.c | 432 +- bsd/netkey/key.h | 2 +- bsd/netkey/keydb.h | 9 +- bsd/nfs/nfs.h | 40 +- bsd/nfs/nfs4_subs.c | 4 +- bsd/nfs/nfs4_vnops.c | 116 +- bsd/nfs/nfs_bio.c | 87 +- bsd/nfs/nfs_gss.c | 713 +- bsd/nfs/nfs_gss.h | 17 +- bsd/nfs/nfs_lock.c | 15 +- bsd/nfs/nfs_node.c | 8 +- bsd/nfs/nfs_socket.c | 372 +- bsd/nfs/nfs_subs.c | 73 +- bsd/nfs/nfs_syscalls.c | 87 +- bsd/nfs/nfs_vfsops.c | 250 +- bsd/nfs/nfs_vnops.c | 507 +- bsd/nfs/nfsmount.h | 10 +- bsd/nfs/nfsnode.h | 7 +- bsd/security/audit/audit.c | 21 +- bsd/security/audit/audit_arg.c | 1 - bsd/security/audit/audit_bsm.c | 122 +- bsd/security/audit/audit_bsm_klib.c | 247 +- bsd/security/audit/audit_mac.c | 1 - bsd/security/audit/audit_private.h | 4 + bsd/security/audit/audit_syscalls.c | 1 - bsd/security/audit/audit_worker.c | 1 - bsd/sys/Makefile | 26 +- bsd/sys/_endian.h | 12 + bsd/sys/_structs.h | 130 +- bsd/sys/_types.h | 73 +- bsd/sys/_types/Makefile | 13 +- bsd/sys/_types/{___offsetof.h => _fsid_t.h} | 17 +- bsd/sys/_types/{_pthread_t.h => _offsetof.h} | 7 +- bsd/sys/_types/_pthread_cond_t.h | 31 - bsd/sys/_types/_pthread_condattr_t.h | 31 - bsd/sys/_types/_pthread_mutex_t.h | 31 - bsd/sys/_types/_pthread_mutexattr_t.h | 31 - bsd/sys/_types/_pthread_once_t.h | 31 - bsd/sys/_types/_pthread_rwlock_t.h | 31 - bsd/sys/_types/_pthread_rwlockattr_t.h | 31 - bsd/sys/aio.h | 2 + bsd/sys/attr.h | 38 +- bsd/sys/bsdtask_info.h | 5 + bsd/sys/buf.h | 53 +- bsd/sys/buf_internal.h | 21 +- bsd/sys/cdefs.h | 42 +- bsd/sys/coalition.h | 86 + .../ucontext.h => sys/codedir_internal.h} | 28 +- bsd/sys/codesign.h | 29 +- bsd/sys/conf.h | 2 +- bsd/sys/cprotect.h | 95 +- bsd/sys/csr.h | 88 + bsd/sys/disk.h | 63 +- bsd/sys/dtrace.h | 37 +- bsd/sys/dtrace_glue.h | 38 +- bsd/sys/dtrace_impl.h | 55 +- bsd/sys/dtrace_ptss.h | 7 +- bsd/sys/event.h | 5 +- bsd/sys/eventvar.h | 1 + bsd/sys/fasttrap_impl.h | 9 +- bsd/sys/fbt.h | 3 - bsd/sys/fcntl.h | 86 +- bsd/sys/file_internal.h | 7 +- bsd/sys/fsctl.h | 11 +- bsd/sys/fsgetpath.h | 18 + bsd/sys/fslog.h | 94 +- bsd/sys/guarded.h | 15 +- bsd/sys/imgact.h | 2 + bsd/sys/kasl.h | 4 +- bsd/sys/kauth.h | 2 +- bsd/sys/kdebug.h | 133 +- bsd/sys/kern_control.h | 180 +- bsd/sys/kern_event.h | 24 +- bsd/sys/kern_memorystatus.h | 29 +- bsd/sys/kern_overrides.h | 4 +- bsd/sys/kern_tests.h | 1 + bsd/sys/kpi_mbuf.h | 41 +- bsd/sys/kpi_socket.h | 8 + bsd/sys/loadable_fs.h | 2 +- bsd/sys/malloc.h | 10 +- bsd/sys/mbuf.h | 34 +- bsd/sys/mcache.h | 24 +- bsd/sys/mman.h | 5 + bsd/sys/mount.h | 22 +- bsd/sys/mount_internal.h | 29 +- bsd/sys/munge.h | 135 +- bsd/sys/namei.h | 15 +- bsd/sys/param.h | 2 +- bsd/sys/priv.h | 13 +- bsd/sys/proc.h | 14 +- bsd/sys/proc_info.h | 45 +- bsd/sys/proc_internal.h | 22 +- bsd/sys/proc_uuid_policy.h | 15 +- bsd/sys/process_policy.h | 22 +- bsd/sys/protosw.h | 17 +- bsd/sys/pthread_shims.h | 34 +- bsd/sys/random.h | 2 + bsd/sys/resource.h | 62 +- bsd/sys/resourcevar.h | 3 +- bsd/sys/sdt_impl.h | 6 +- bsd/sys/semaphore.h | 6 +- bsd/sys/sfi.h | 114 + bsd/sys/signal.h | 8 +- bsd/sys/signalvar.h | 6 +- bsd/sys/socket.h | 182 +- bsd/sys/socketvar.h | 138 +- bsd/sys/sockio.h | 14 +- bsd/sys/spawn.h | 10 +- bsd/sys/spawn_internal.h | 27 +- bsd/sys/stat.h | 15 +- bsd/{machine/setjmp.h => sys/stdio.h} | 27 +- bsd/sys/sys_domain.h | 67 +- bsd/sys/sysctl.h | 103 +- bsd/sys/sysent.h | 11 +- bsd/sys/systm.h | 9 +- bsd/sys/tty.h | 3 + bsd/sys/types.h | 24 +- bsd/sys/ubc.h | 20 +- bsd/sys/ubc_internal.h | 13 +- bsd/sys/unistd.h | 43 +- bsd/sys/user.h | 19 +- bsd/sys/ux_exception.h | 2 +- bsd/sys/vnode.h | 150 +- bsd/sys/vnode_if.h | 42 +- bsd/sys/vnode_internal.h | 52 +- bsd/uxkern/ux_exception.c | 2 +- bsd/vfs/kpi_vfs.c | 116 +- bsd/vfs/vfs_attrlist.c | 2847 +++++--- bsd/vfs/vfs_bio.c | 509 +- bsd/vfs/vfs_cache.c | 16 +- bsd/vfs/vfs_cluster.c | 175 +- bsd/vfs/vfs_conf.c | 62 +- bsd/vfs/vfs_fsevents.c | 23 +- bsd/vfs/vfs_fslog.c | 230 +- bsd/vfs/vfs_init.c | 80 +- bsd/vfs/vfs_journal.c | 212 +- bsd/vfs/vfs_journal.h | 6 +- bsd/vfs/vfs_lookup.c | 58 +- bsd/vfs/vfs_quota.c | 1 + bsd/vfs/vfs_subr.c | 872 ++- bsd/vfs/vfs_syscalls.c | 2015 ++++-- bsd/vfs/vfs_vnops.c | 98 +- bsd/vfs/vfs_xattr.c | 25 +- bsd/vfs/vnode_if.c | 20 +- bsd/vm/dp_backing_file.c | 73 +- bsd/vm/vm_compressor_backing_file.c | 82 +- bsd/vm/vm_unix.c | 191 +- bsd/vm/vnode_pager.c | 77 +- config/BSDKernel.exports | 17 +- config/DtraceIgnored.symbols | 1 - config/IOKit.exports | 165 + config/IOKit.x86_64.exports | 3 + config/Libkern.exports | 2 +- config/MACFramework.exports | 2 +- config/MACFramework.x86_64.exports | 1 - {bsd/conf => config}/MASTER | 403 +- config/MASTER.x86_64 | 65 + config/Mach.exports | 2 + config/Makefile | 58 +- config/MasterVersion | 2 +- config/Private.exports | 102 +- config/Private.x86_64.exports | 11 +- config/Unsupported.exports | 4 +- config/Unused.exports | 5 + config/newvers.pl | 37 +- iokit/IOKit/IOCPU.h | 1 + iokit/IOKit/IODMACommand.h | 8 +- iokit/IOKit/IODataQueue.h | 9 + iokit/IOKit/IOHibernatePrivate.h | 11 +- iokit/IOKit/IOInterruptAccounting.h | 148 + iokit/IOKit/IOInterruptAccountingPrivate.h | 223 + iokit/IOKit/IOInterruptEventSource.h | 9 +- iokit/IOKit/IOKernelReportStructs.h | 282 + iokit/IOKit/IOKernelReporters.h | 1663 +++++ iokit/IOKit/IOKitDebug.h | 7 +- iokit/IOKit/IOKitKeys.h | 1 + iokit/IOKit/IOKitKeysPrivate.h | 18 +- iokit/IOKit/IOLocks.h | 9 +- iokit/IOKit/IOMemoryDescriptor.h | 66 +- iokit/IOKit/IOMessage.h | 2 +- iokit/IOKit/IONVRAM.h | 14 +- iokit/IOKit/IOPlatformExpert.h | 18 +- iokit/IOKit/IOReportMacros.h | 322 +- iokit/IOKit/IOReportTypes.h | 46 +- iokit/IOKit/IOReturn.h | 3 + iokit/IOKit/IOService.h | 54 +- iokit/IOKit/IOSharedDataQueue.h | 18 + iokit/IOKit/IOTimeStamp.h | 12 +- iokit/IOKit/IOTypes.h | 5 +- iokit/IOKit/IOUserClient.h | 6 + iokit/IOKit/Makefile | 25 +- iokit/IOKit/OSMessageNotification.h | 7 +- iokit/IOKit/pwr_mgt/IOPM.h | 3 +- iokit/IOKit/pwr_mgt/IOPMPrivate.h | 255 +- iokit/IOKit/pwr_mgt/RootDomain.h | 228 +- iokit/Kernel/IOBufferMemoryDescriptor.cpp | 70 +- iokit/Kernel/IOCPU.cpp | 53 +- iokit/Kernel/IOCatalogue.cpp | 162 +- iokit/Kernel/IODMAController.cpp | 9 +- iokit/Kernel/IODataQueue.cpp | 57 +- iokit/Kernel/IODeviceTreeSupport.cpp | 17 +- iokit/Kernel/IOFilterInterruptEventSource.cpp | 41 +- iokit/Kernel/IOHibernateIO.cpp | 353 +- iokit/Kernel/IOHistogramReporter.cpp | 362 ++ iokit/Kernel/IOInterruptAccounting.cpp | 90 + iokit/Kernel/IOInterruptEventSource.cpp | 171 +- iokit/Kernel/IOKitDebug.cpp | 8 +- iokit/Kernel/IOKitKernelInternal.h | 4 +- iokit/Kernel/IOLib.cpp | 33 +- iokit/Kernel/IOLocks.cpp | 51 +- iokit/Kernel/IOMemoryDescriptor.cpp | 2073 +++--- iokit/Kernel/IONVRAM.cpp | 733 +-- iokit/Kernel/IOPMrootDomain.cpp | 3390 +++++----- iokit/Kernel/IOPlatformExpert.cpp | 81 +- iokit/Kernel/IOReportLegend.cpp | 217 + iokit/Kernel/IOReporter.cpp | 1070 +++ iokit/Kernel/IOReporterDefs.h | 114 + iokit/Kernel/IOService.cpp | 775 ++- iokit/Kernel/IOServicePM.cpp | 3626 +++++------ iokit/Kernel/IOServicePMPrivate.h | 87 +- iokit/Kernel/IOServicePrivate.h | 2 + iokit/Kernel/IOSharedDataQueue.cpp | 36 +- iokit/Kernel/IOSimpleReporter.cpp | 161 + iokit/Kernel/IOStartIOKit.cpp | 22 + iokit/Kernel/IOStateReporter.cpp | 888 +++ iokit/Kernel/IOUserClient.cpp | 557 +- iokit/Kernel/IOWorkLoop.cpp | 5 - iokit/Kernel/RootDomainUserClient.cpp | 145 +- iokit/Kernel/RootDomainUserClient.h | 28 +- iokit/bsddev/IOKitBSDInit.cpp | 6 + iokit/conf/MASTER | 104 - iokit/conf/MASTER.x86_64 | 26 - iokit/conf/Makefile | 32 +- iokit/conf/Makefile.template | 29 +- iokit/conf/files | 10 +- libkern/Makefile | 2 + libkern/OSKextLib.cpp | 9 +- libkern/c++/OSArray.cpp | 24 +- libkern/c++/OSBoolean.cpp | 2 + libkern/c++/OSData.cpp | 23 +- libkern/c++/OSDictionary.cpp | 17 +- libkern/c++/OSKext.cpp | 821 +-- libkern/c++/OSMetaClass.cpp | 1 - libkern/c++/OSObject.cpp | 18 +- libkern/c++/OSOrderedSet.cpp | 17 +- libkern/c++/OSRuntimeSupport.c | 10 +- libkern/c++/OSSerialize.cpp | 27 +- libkern/c++/OSSerializeBinary.cpp | 469 ++ libkern/c++/OSSet.cpp | 4 +- libkern/c++/OSString.cpp | 32 + libkern/c++/OSSymbol.cpp | 6 +- libkern/c++/OSUnserializeXML.cpp | 14 +- libkern/conf/MASTER | 89 - libkern/conf/MASTER.x86_64 | 17 - libkern/conf/Makefile | 32 +- libkern/conf/Makefile.template | 35 +- libkern/conf/files | 8 +- libkern/crypto/corecrypto_aes.c | 4 +- libkern/gen/OSDebug.cpp | 4 +- libkern/kxld/Makefile | 25 +- libkern/kxld/kxld_object.c | 19 +- libkern/kxld/kxld_reloc.c | 140 + libkern/kxld/kxld_seg.c | 4 + libkern/libkern/Makefile | 7 +- libkern/libkern/OSKextLib.h | 18 +- libkern/libkern/OSSerializeBinary.h | 51 + libkern/libkern/OSTypes.h | 4 +- libkern/libkern/c++/OSArray.h | 1 + libkern/libkern/c++/OSBoolean.h | 1 + libkern/libkern/c++/OSData.h | 20 +- libkern/libkern/c++/OSDictionary.h | 1 + libkern/libkern/c++/OSKext.h | 26 +- libkern/libkern/c++/OSMetaClass.h | 4 - libkern/libkern/c++/OSNumber.h | 1 + libkern/libkern/c++/OSSerialize.h | 28 + libkern/libkern/c++/OSSet.h | 1 + libkern/libkern/c++/OSString.h | 2 + libkern/libkern/c++/OSUnserialize.h | 3 + libkern/libkern/crypto/aes.h | 4 +- libkern/libkern/kext_request_keys.h | 10 - libkern/libkern/kxld_types.h | 5 + libkern/libkern/tree.h | 2 +- libsa/bootstrap.cpp | 86 +- libsa/conf/MASTER | 77 - libsa/conf/MASTER.x86_64 | 15 - libsa/conf/Makefile | 32 +- libsa/conf/Makefile.template | 31 +- libsa/printPlist | 80 - libsyscall/Libsyscall.xcconfig | 31 +- .../Libsyscall.xcodeproj/project.pbxproj | 262 +- libsyscall/Platforms/MacOSX/i386/syscall.map | 3 + .../Platforms/MacOSX/x86_64/syscall.map | 3 + libsyscall/custom/__thread_selfid.s | 4 +- .../custom/__thread_selfusage.s | 17 +- libsyscall/custom/errno.c | 19 - libsyscall/mach/.gitignore | 3 + libsyscall/mach/err_mach_ipc.sub | 12 +- libsyscall/mach/mach/mach.h | 100 +- libsyscall/mach/mach/mach_init.h | 19 +- libsyscall/mach/mach/vm_page_size.h | 24 +- libsyscall/mach/mach_init.c | 33 +- libsyscall/mach/mach_msg.c | 172 +- libsyscall/mach/mach_port.c | 12 + libsyscall/mach/mach_vm.c | 2 +- .../mach/mach_voucher.defs | 5 +- libsyscall/mach/string.c | 2 +- libsyscall/mach/thread_act.c | 77 + libsyscall/wrappers/_libc_funcptr.c | 64 +- libsyscall/wrappers/_libkernel_init.h | 45 +- libsyscall/wrappers/cancelable/fcntl-base.c | 2 + libsyscall/wrappers/coalition.c | 52 + .../wrappers/csr.c | 42 +- libsyscall/wrappers/getiopolicy_np.c | 13 +- .../wrappers/guarded_open_dprotected_np.c | 45 + libsyscall/wrappers/libproc/libproc.c | 304 +- libsyscall/wrappers/libproc/libproc.h | 1 + .../wrappers/libproc/libproc_internal.h | 54 +- .../wrappers/libproc/proc_listpidspath.c | 131 +- libsyscall/wrappers/mach_approximate_time.c | 39 + libsyscall/wrappers/mach_approximate_time.s | 62 + libsyscall/wrappers/posix_sem_obsolete.c | 48 + libsyscall/wrappers/rename_ext.c | 45 + libsyscall/wrappers/renameat.c | 33 + .../wrappers/setpriority.c | 50 +- libsyscall/wrappers/sfi.c | 57 + libsyscall/wrappers/spawn/posix_spawn.c | 104 +- libsyscall/wrappers/spawn/spawn_private.h | 9 +- libsyscall/wrappers/unlinkat.c | 33 + libsyscall/wrappers/varargs_wrappers.s | 23 + libsyscall/xcodescripts/compat-symlinks.sh | 32 - libsyscall/xcodescripts/create-syscalls.pl | 19 +- libsyscall/xcodescripts/mach_install_mig.sh | 37 +- makedefs/MakeInc.cmd | 34 +- makedefs/MakeInc.def | 120 +- makedefs/MakeInc.dir | 5 +- makedefs/MakeInc.kernel | 269 +- makedefs/MakeInc.rule | 23 +- makedefs/MakeInc.top | 230 +- osfmk/Makefile | 18 +- .../UserNotification/KUNCUserNotifications.c | 2 +- osfmk/atm/Makefile | 123 + osfmk/atm/atm.c | 1429 ++++ osfmk/atm/atm_internal.h | 117 + osfmk/atm/atm_notification.defs | 63 + osfmk/atm/atm_types.defs | 48 + osfmk/atm/atm_types.h | 64 + osfmk/bank/Makefile | 119 + osfmk/bank/bank.c | 985 +++ osfmk/bank/bank_internal.h | 162 + osfmk/bank/bank_types.h | 43 + osfmk/conf/MASTER | 322 - osfmk/conf/MASTER.x86_64 | 81 - osfmk/conf/Makefile | 32 +- osfmk/conf/Makefile.template | 61 +- osfmk/conf/files | 55 +- osfmk/conf/files.x86_64 | 25 +- .../art/ProgressBarEmptyLeftEndcap.png | Bin 0 -> 3764 bytes .../art/ProgressBarEmptyLeftEndcap@2x.png | Bin 0 -> 3837 bytes osfmk/console/art/ProgressBarEmptyMiddle.png | Bin 0 -> 3718 bytes .../console/art/ProgressBarEmptyMiddle@2x.png | Bin 0 -> 3723 bytes .../art/ProgressBarEmptyRightEndcap.png | Bin 0 -> 3758 bytes .../art/ProgressBarEmptyRightEndcap@2x.png | Bin 0 -> 3838 bytes .../console/art/ProgressBarFullLeftEndcap.png | Bin 0 -> 3772 bytes .../art/ProgressBarFullLeftEndcap@2x.png | Bin 0 -> 3875 bytes osfmk/console/art/ProgressBarFullMiddle.png | Bin 0 -> 3723 bytes .../console/art/ProgressBarFullMiddle@2x.png | Bin 0 -> 3735 bytes .../art/ProgressBarFullRightEndcap.png | Bin 0 -> 3771 bytes .../art/ProgressBarFullRightEndcap@2x.png | Bin 0 -> 3881 bytes osfmk/console/art/progress.m | 177 + osfmk/console/art/scalegear.c | 53 + osfmk/console/i386/serial_console.c | 1 + osfmk/console/panic_ui/README | 74 - osfmk/console/panic_ui/appleclut8.h | 51 - .../panic_ui/generated_files/panic_image.c | 1953 ------ .../generated_files/rendered_numbers.c | 376 -- osfmk/console/panic_ui/genimage.c | 1621 ----- .../console/panic_ui/images/panic_dialog.tiff | Bin 156664 -> 0 bytes .../panic_ui/images/panic_dialogWHD.raw | Bin 120366 -> 0 bytes .../panic_ui/images/rendered_numbers.tiff | Bin 3218 -> 0 bytes .../panic_ui/images/rendered_numbersWHD.raw | Bin 1425 -> 0 bytes osfmk/console/panic_ui/qtif2kraw.c | 892 --- osfmk/console/panic_ui/setupdialog.c | 359 - osfmk/console/panic_ui/systemCLUT.act | Bin 768 -> 0 bytes osfmk/console/progress_meter_data.c | 658 +- osfmk/console/serial_general.c | 1 - osfmk/console/video_console.c | 519 +- osfmk/console/video_console.h | 6 +- osfmk/corecrypto/ccdbrg/src/ccdrbg_nisthmac.c | 416 ++ osfmk/corecrypto/ccdigest/src/ccdigest_init.c | 17 + .../corecrypto/ccdigest/src/ccdigest_update.c | 40 + osfmk/corecrypto/cchmac/src/cchmac.c | 20 + osfmk/corecrypto/cchmac/src/cchmac_final.c | 21 + osfmk/corecrypto/cchmac/src/cchmac_init.c | 59 + osfmk/corecrypto/cchmac/src/cchmac_update.c | 15 + osfmk/corecrypto/ccn/src/ccn_set.c | 16 + .../ccsha1/src/ccdigest_final_64be.c | 41 + osfmk/corecrypto/ccsha1/src/ccsha1_eay.c | 294 + .../ccsha1/src/ccsha1_initial_state.c | 19 + osfmk/default_pager/default_pager_internal.h | 2 +- osfmk/default_pager/dp_backing_store.c | 5 +- osfmk/default_pager/dp_memory_object.c | 4 + osfmk/device/device.defs | 79 +- osfmk/device/device_types.h | 7 +- osfmk/device/subrs.c | 6 +- osfmk/i386/AT386/conf.c | 1 - osfmk/i386/AT386/model_dep.c | 7 +- osfmk/i386/Diagnostics.c | 18 +- osfmk/i386/Makefile | 4 +- osfmk/i386/acpi.c | 6 + osfmk/i386/bit_routines.h | 211 + osfmk/i386/bsd_i386.c | 32 +- osfmk/i386/bsd_i386_native.c | 40 +- osfmk/i386/commpage/commpage.c | 71 +- osfmk/i386/commpage/commpage.h | 1 + osfmk/i386/cpu.c | 4 +- osfmk/i386/cpu_capabilities.h | 4 + osfmk/i386/cpu_data.h | 3 + osfmk/i386/cpu_threads.c | 2 +- osfmk/i386/cpu_topology.c | 4 +- osfmk/i386/cpuid.c | 30 +- osfmk/i386/cpuid.h | 42 +- osfmk/i386/fpu.c | 188 +- osfmk/i386/genassym.c | 302 +- osfmk/i386/i386_init.c | 62 +- osfmk/i386/i386_lock.s | 745 +-- osfmk/i386/i386_timer.c | 169 +- osfmk/i386/i386_vm_init.c | 3 +- osfmk/i386/lapic.h | 1 + osfmk/i386/lapic_native.c | 4 + osfmk/i386/lock.h | 211 +- osfmk/i386/locks.h | 6 + osfmk/i386/locks_i386.c | 188 +- osfmk/i386/machdep_call.c | 5 + osfmk/i386/machdep_call.h | 6 + osfmk/i386/machine_routines.c | 79 +- osfmk/i386/machine_routines.h | 27 +- osfmk/i386/machine_task.c | 11 + osfmk/i386/misc_protos.h | 4 +- osfmk/i386/mp.c | 265 +- osfmk/i386/mp.h | 12 +- osfmk/i386/mp_desc.c | 12 +- osfmk/i386/mp_native.c | 1 + osfmk/i386/panic_hooks.c | 203 + osfmk/i386/{gdb_defs.h => panic_hooks.h} | 62 +- osfmk/i386/pcb.c | 55 +- osfmk/i386/pcb_native.c | 77 +- osfmk/i386/pmCPU.c | 12 - osfmk/i386/pmap.h | 26 +- osfmk/i386/pmap_common.c | 29 + osfmk/i386/pmap_internal.h | 15 +- osfmk/i386/pmap_x86_common.c | 285 +- osfmk/i386/proc_reg.h | 35 +- osfmk/i386/rtclock.c | 55 +- osfmk/i386/rtclock_native.c | 8 +- osfmk/i386/rtclock_protos.h | 6 +- osfmk/i386/simple_lock.h | 3 + osfmk/i386/startup64.c | 4 - osfmk/i386/thread.h | 4 +- osfmk/i386/trap.c | 51 +- osfmk/i386/trap.h | 2 - osfmk/i386/tsc.c | 10 +- osfmk/i386/vmx/vmx_cpu.c | 195 +- osfmk/i386/vmx/vmx_cpu.h | 45 +- osfmk/i386/xpr.h | 3 - osfmk/ipc/Makefile | 3 +- osfmk/ipc/ipc_entry.c | 132 +- osfmk/ipc/ipc_entry.h | 11 + osfmk/ipc/ipc_hash.c | 1 - osfmk/ipc/ipc_importance.c | 3266 ++++++++++ osfmk/ipc/ipc_importance.h | 267 + osfmk/ipc/ipc_init.c | 36 +- osfmk/ipc/ipc_init.h | 5 +- osfmk/ipc/ipc_kmsg.c | 1134 ++-- osfmk/ipc/ipc_kmsg.h | 29 +- osfmk/ipc/ipc_labelh.c | 248 - osfmk/ipc/ipc_labelh.h | 114 - osfmk/ipc/ipc_mqueue.c | 39 - osfmk/ipc/ipc_object.c | 62 +- osfmk/ipc/ipc_object.h | 3 +- osfmk/ipc/ipc_port.c | 329 +- osfmk/ipc/ipc_port.h | 42 +- osfmk/ipc/ipc_pset.c | 3 +- osfmk/ipc/ipc_right.c | 393 +- osfmk/ipc/ipc_right.h | 4 +- osfmk/ipc/ipc_space.c | 2 + osfmk/ipc/ipc_space.h | 3 +- osfmk/ipc/ipc_table.c | 2 +- osfmk/ipc/ipc_types.h | 10 + osfmk/ipc/ipc_voucher.c | 2977 +++++++++ osfmk/ipc/ipc_voucher.h | 379 ++ osfmk/ipc/mach_debug.c | 52 +- osfmk/ipc/mach_msg.c | 76 +- osfmk/ipc/mach_port.c | 258 +- osfmk/kdp/kdp.c | 514 +- osfmk/kdp/kdp_internal.h | 1 + osfmk/kdp/kdp_udp.c | 190 +- osfmk/kdp/kdp_udp.h | 12 + osfmk/kdp/ml/i386/kdp_x86_common.c | 6 +- osfmk/kdp/ml/x86_64/kdp_machdep.c | 35 +- osfmk/kern/Makefile | 21 +- osfmk/kern/affinity.c | 4 + osfmk/kern/affinity.h | 1 - osfmk/kern/assert.h | 4 +- osfmk/kern/ast.c | 35 +- osfmk/kern/ast.h | 36 +- osfmk/kern/bsd_kern.c | 125 +- osfmk/kern/btlog.c | 13 +- osfmk/kern/clock.c | 14 +- osfmk/kern/clock.h | 5 - osfmk/kern/clock_oldops.c | 5 +- osfmk/kern/coalition.c | 743 +++ osfmk/kern/coalition.h | 121 + osfmk/kern/debug.c | 73 +- osfmk/kern/debug.h | 124 +- bsd/dev/i386/memmove.c => osfmk/kern/ecc.h | 55 +- osfmk/kern/energy_perf.c | 61 + osfmk/kern/energy_perf.h | 100 + osfmk/kern/exc_resource.h | 5 +- osfmk/kern/exception.c | 70 +- osfmk/kern/extmod_statistics.h | 2 +- osfmk/kern/hibernate.c | 3 +- osfmk/kern/host.c | 2 - osfmk/kern/host_notify.c | 7 +- osfmk/kern/hv_support.c | 271 + osfmk/kern/hv_support.h | 90 + osfmk/kern/ipc_host.c | 17 +- osfmk/kern/ipc_kobject.c | 63 +- osfmk/kern/ipc_kobject.h | 4 +- osfmk/kern/ipc_mig.c | 2 +- osfmk/kern/ipc_tt.c | 51 +- osfmk/kern/kalloc.c | 1 - osfmk/kern/kalloc.h | 2 - osfmk/kern/kern_ecc.c | 134 + osfmk/kern/kern_stackshot.c | 837 +++ osfmk/kern/kern_types.h | 7 +- osfmk/kern/kpc.h | 47 +- osfmk/kern/kpc_common.c | 119 +- osfmk/kern/ledger.c | 87 +- osfmk/kern/ledger.h | 6 + osfmk/kern/lock.h | 70 +- osfmk/kern/locks.c | 265 +- osfmk/kern/locks.h | 9 +- osfmk/kern/machine.c | 28 +- osfmk/kern/mk_sp.c | 99 +- osfmk/kern/mk_timer.c | 6 +- osfmk/kern/printf.c | 2 - osfmk/kern/priority.c | 432 +- osfmk/kern/processor.c | 41 +- osfmk/kern/processor.h | 112 +- osfmk/kern/queue.c | 169 - osfmk/kern/queue.h | 156 +- osfmk/kern/sched.h | 87 +- osfmk/kern/sched_dualq.c | 474 ++ osfmk/kern/sched_fixedpriority.c | 734 --- osfmk/kern/sched_grrr.c | 107 +- osfmk/kern/sched_multiq.c | 1424 ++++ osfmk/kern/sched_prim.c | 1575 +++-- osfmk/kern/sched_prim.h | 81 +- osfmk/kern/sched_proto.c | 104 +- osfmk/kern/security.c | 425 -- osfmk/kern/sfi.c | 1006 +++ osfmk/kern/sfi.h | 69 + osfmk/kern/simple_lock.h | 2 - osfmk/kern/spl.c | 1 - osfmk/kern/stack.c | 13 +- osfmk/kern/startup.c | 193 +- osfmk/kern/sync_sema.h | 1 - osfmk/kern/syscall_subr.c | 10 +- osfmk/kern/syscall_sw.c | 9 +- osfmk/kern/syscall_sw.h | 20 +- osfmk/kern/task.c | 821 ++- osfmk/kern/task.h | 217 +- osfmk/kern/task_policy.c | 1850 ++++-- osfmk/kern/task_swap.c | 1 - osfmk/kern/telemetry.c | 411 +- osfmk/kern/telemetry.h | 17 +- osfmk/kern/thread.c | 661 +- osfmk/kern/thread.h | 164 +- osfmk/kern/thread_act.c | 47 +- osfmk/kern/thread_call.c | 68 +- osfmk/kern/thread_call.h | 1 + osfmk/kern/thread_policy.c | 839 ++- osfmk/kern/timer.c | 12 - osfmk/kern/timer_call.c | 497 +- osfmk/kern/timer_call.h | 32 +- osfmk/kern/timer_queue.h | 39 + osfmk/kern/wait_queue.c | 6 +- osfmk/kern/wait_queue.h | 3 +- osfmk/kern/xpr.c | 1 - osfmk/kern/zalloc.c | 300 +- osfmk/kern/zalloc.h | 13 +- osfmk/kperf/kperfbsd.c | 5 + osfmk/kperf/pet.c | 7 +- osfmk/kperf/threadinfo.c | 4 - osfmk/mach/Makefile | 30 +- osfmk/mach/coalition_notification.defs | 22 + osfmk/mach/host_special_ports.h | 17 +- osfmk/mach/i386/Makefile | 2 +- osfmk/mach/i386/machine_types.defs | 129 - osfmk/mach/i386/sdt_isa.h | 4 +- osfmk/mach/i386/thread_status.h | 18 +- osfmk/mach/i386/vm_param.h | 19 +- osfmk/mach/kern_return.h | 4 + osfmk/mach/mach_host.defs | 33 +- osfmk/mach/mach_interface.h | 1 - osfmk/mach/mach_port.defs | 9 + osfmk/mach/mach_time.h | 1 + osfmk/mach/mach_traps.h | 55 +- osfmk/mach/mach_types.defs | 48 + osfmk/mach/mach_types.h | 5 + osfmk/mach/mach_voucher.defs | 69 + osfmk/mach/mach_voucher_attr_control.defs | 48 + osfmk/mach/mach_voucher_types.h | 247 + osfmk/mach/machine.h | 14 +- osfmk/mach/machine/machine_types.defs | 106 +- osfmk/mach/memory_object.defs | 4 - osfmk/mach/memory_object_control.defs | 4 - osfmk/mach/memory_object_name.defs | 82 - osfmk/mach/memory_object_types.h | 29 +- osfmk/mach/message.h | 130 +- osfmk/mach/mig_voucher_support.h | 8 + osfmk/mach/port.h | 3 +- osfmk/mach/security.defs | 208 - osfmk/mach/sfi_class.h | 135 + osfmk/mach/shared_region.h | 18 +- osfmk/mach/syscall_sw.h | 3 - osfmk/mach/task.defs | 19 +- osfmk/mach/task_info.h | 43 + osfmk/mach/task_policy.h | 80 +- osfmk/mach/task_special_ports.h | 7 + osfmk/mach/thread_act.defs | 36 +- osfmk/mach/thread_info.h | 17 + osfmk/mach/thread_policy.h | 60 +- osfmk/mach/upl.defs | 4 - osfmk/mach/vm_param.h | 53 +- osfmk/mach/vm_statistics.h | 46 +- osfmk/mach_debug/ipc_info.h | 16 +- osfmk/mach_debug/mach_debug_types.defs | 1 + {bsd/crypto/cast128 => osfmk/prng}/Makefile | 16 +- .../include/WindowsTypesForMac.h | 4 +- .../prng}/YarrowCoreLib/include/yarrow.h | 2 +- .../prng}/YarrowCoreLib/include/yarrowUtils.h | 4 +- .../prng}/YarrowCoreLib/port/smf.c | 39 +- .../prng}/YarrowCoreLib/src/assertverify.h | 2 +- .../prng}/YarrowCoreLib/src/comp.c | 4 +- .../prng}/YarrowCoreLib/src/comp.h | 2 +- .../prng}/YarrowCoreLib/src/entropysources.h | 2 +- .../prng}/YarrowCoreLib/src/macOnly.h | 4 +- .../prng}/YarrowCoreLib/src/prng.c | 18 +- .../prng}/YarrowCoreLib/src/prng.h | 4 +- .../prng}/YarrowCoreLib/src/prngpriv.h | 4 +- .../YarrowCoreLib/src/readme-prnguser.txt | 0 .../prng}/YarrowCoreLib/src/sha1mod.c | 2 +- .../prng}/YarrowCoreLib/src/sha1mod.h | 4 +- .../prng}/YarrowCoreLib/src/smf.h | 2 +- .../prng}/YarrowCoreLib/src/userdefines.h | 2 +- .../prng}/YarrowCoreLib/src/yarrowUtils.c | 4 +- {bsd/dev/random => osfmk/prng}/fips_sha1.c | 4 +- {bsd/dev/random => osfmk/prng}/fips_sha1.h | 4 +- osfmk/prng/prng_yarrow.c | 411 ++ osfmk/prng/random.c | 481 ++ osfmk/prng/random.h | 113 + osfmk/profiling/Makefile | 6 + osfmk/profiling/machine/profile-md.h | 2 +- osfmk/sys/syslog.h | 203 - osfmk/sys/types.h | 172 - osfmk/vm/WKdm_new.h | 190 +- osfmk/vm/bsd_vm.c | 111 +- osfmk/vm/default_freezer.c | 16 +- osfmk/vm/memory_object.c | 30 +- osfmk/vm/memory_object.h | 2 + osfmk/vm/pmap.h | 23 +- osfmk/vm/vm_apple_protect.c | 22 +- osfmk/vm/vm_compressor.c | 426 +- osfmk/vm/vm_compressor.h | 46 +- osfmk/vm/vm_compressor_backing_store.c | 632 +- osfmk/vm/vm_compressor_backing_store.h | 25 +- osfmk/vm/vm_compressor_pager.c | 365 +- osfmk/vm/vm_compressor_pager.h | 70 +- osfmk/vm/vm_fault.c | 543 +- osfmk/vm/vm_fault.h | 17 +- osfmk/vm/vm_init.c | 40 +- osfmk/vm/vm_kern.c | 3 +- osfmk/vm/vm_map.c | 1321 +++- osfmk/vm/vm_map.h | 75 +- osfmk/vm/vm_object.c | 1007 ++- osfmk/vm/vm_object.h | 89 +- osfmk/vm/vm_options.h | 3 + osfmk/vm/vm_page.h | 102 +- osfmk/vm/vm_pageout.c | 1680 ++++- osfmk/vm/vm_pageout.h | 53 +- osfmk/vm/vm_phantom_cache.c | 465 ++ .../ucontext.h => osfmk/vm/vm_phantom_cache.h | 50 +- osfmk/vm/vm_protos.h | 44 +- osfmk/vm/vm_purgeable.c | 761 ++- osfmk/vm/vm_purgeable_internal.h | 13 +- osfmk/vm/vm_resident.c | 516 +- osfmk/vm/vm_shared_region.c | 186 +- osfmk/vm/vm_shared_region.h | 2 +- osfmk/vm/vm_user.c | 62 +- osfmk/x86_64/copyio.c | 3 +- osfmk/x86_64/cswitch.s | 1 - osfmk/x86_64/idt64.s | 39 +- osfmk/x86_64/kpc_x86.c | 60 +- osfmk/x86_64/locore.s | 1 - osfmk/x86_64/loose_ends.c | 25 +- osfmk/x86_64/lowmem_vectors.c | 1 - osfmk/x86_64/machine_routines_asm.s | 44 +- osfmk/x86_64/pmap.c | 131 +- osfmk/x86_64/start.s | 3 +- pexpert/Makefile | 2 + pexpert/conf/MASTER | 94 - pexpert/conf/MASTER.x86_64 | 18 - pexpert/conf/Makefile | 32 +- pexpert/conf/Makefile.template | 29 +- pexpert/gen/pe_gen.c | 37 + pexpert/i386/pe_identify_machine.c | 2 +- pexpert/i386/pe_init.c | 9 +- pexpert/pexpert/GearImage.h | 2 + pexpert/pexpert/Makefile | 3 + pexpert/pexpert/i386/boot.h | 9 +- pexpert/pexpert/pe_images.h | 1 + pexpert/pexpert/pexpert.h | 10 + security/conf/MASTER | 78 - security/conf/MASTER.x86_64 | 31 - security/conf/Makefile | 32 +- security/conf/Makefile.template | 45 +- security/conf/files | 2 - security/conf/kernelversion.major | 1 - security/conf/kernelversion.minor | 1 - security/conf/kernelversion.variant | 1 - security/conf/version.major | 1 - security/conf/version.minor | 1 - security/conf/version.variant | 1 - security/mac_base.c | 160 +- security/mac_framework.h | 38 +- security/mac_internal.h | 35 - security/mac_iokit.c | 18 + security/mac_mach_internal.h | 49 - security/mac_policy.h | 748 +-- security/mac_port.c | 282 - security/mac_process.c | 10 + security/mac_system.c | 17 +- security/mac_task.c | 136 - security/mac_vfs.c | 91 +- tools/lldbmacros/Makefile | 17 +- tools/lldbmacros/README | 14 +- tools/lldbmacros/atm.py | 96 + tools/lldbmacros/bank.py | 146 + tools/lldbmacros/core/cvalue.py | 27 +- tools/lldbmacros/core/kernelcore.py | 35 +- tools/lldbmacros/core/operating_system.py | 1210 ++-- tools/lldbmacros/core/standard.py | 1 + tools/lldbmacros/core/syntax_checker.py | 10 +- tools/lldbmacros/ioreg.py | 97 + tools/lldbmacros/ipc.py | 505 +- tools/lldbmacros/ipcimportancedetail.py | 135 + tools/lldbmacros/mbufs.py | 180 +- tools/lldbmacros/memory.py | 525 +- tools/lldbmacros/net.py | 4 +- tools/lldbmacros/netdefines.py | 2 +- tools/lldbmacros/plugins/iosspeedtracer.py | 33 + tools/lldbmacros/plugins/iosspeedtracer.sh | 20 + tools/lldbmacros/pmap.py | 254 +- tools/lldbmacros/process.py | 585 +- tools/lldbmacros/scheduler.py | 6 + tools/lldbmacros/structanalyze.py | 73 + tools/lldbmacros/userspace.py | 352 +- tools/lldbmacros/utils.py | 8 +- tools/lldbmacros/xnu.py | 144 +- tools/lldbmacros/xnudefines.py | 6 +- tools/remote_build.sh | 28 +- tools/tests/MPMMTest/KQMPMMtest.c | 6 +- tools/tests/MPMMTest/MPMMtest.c | 6 +- tools/tests/MPMMTest/Makefile | 6 +- tools/tests/Makefile | 15 +- tools/tests/affinity/Makefile | 6 +- tools/tests/affinity/pool.c | 16 +- tools/tests/affinity/sets.c | 14 +- tools/tests/affinity/tags.c | 4 +- tools/tests/execperf/Makefile | 104 +- tools/tests/execperf/exit-asm.S | 5 +- tools/tests/execperf/exit.c | 4 +- tools/tests/execperf/printexecinfo.c | 7 + tools/tests/execperf/test.sh | 7 +- tools/tests/jitter/Makefile | 4 +- tools/tests/kqueue_tests/Makefile | 4 +- tools/tests/libMicro/Makefile.Darwin | 2 +- tools/tests/libMicro/apple/Makefile.Darwin | 4 +- tools/tests/memorystatus/Makefile | 19 +- tools/tests/memorystatus/memorystatus.c | 1278 +--- .../tests/memorystatus/memorystatus_groups.c | 653 ++ tools/tests/perf_index/Makefile | 89 +- .../PerfIndex_COPS_Module/Info.plist | 59 + .../perf_index/PerfIndex_COPS_Module/PITest.h | 36 + .../perf_index/PerfIndex_COPS_Module/PITest.m | 164 + .../PerfIndex_COPS_Module/PerfIndex.h | 13 + .../PerfIndex.xcodeproj/project.pbxproj | 402 ++ .../PerfIndex_COPS_Module/Prefix.pch | 9 + tools/tests/perf_index/README | 81 + tools/tests/perf_index/compile.c | 34 - tools/tests/perf_index/fail.h | 21 + tools/tests/perf_index/iperf.c | 16 - tools/tests/perf_index/main.c | 259 - tools/tests/perf_index/md5.c | 6 +- tools/tests/perf_index/md5.h | 8 + tools/tests/perf_index/perf_index.c | 214 + tools/tests/perf_index/perf_index.h | 94 +- tools/tests/perf_index/perfindex-compile.c | 54 + tools/tests/perf_index/perfindex-cpu.c | 14 + tools/tests/perf_index/perfindex-fault.c | 10 + .../tests/perf_index/perfindex-file_create.c | 33 + tools/tests/perf_index/perfindex-file_read.c | 36 + tools/tests/perf_index/perfindex-file_write.c | 37 + tools/tests/perf_index/perfindex-iperf.c | 23 + .../{stress_memory.c => perfindex-memory.c} | 36 +- .../perf_index/perfindex-ram_file_create.c | 33 + .../perf_index/perfindex-ram_file_read.c | 36 + .../perf_index/perfindex-ram_file_write.c | 36 + tools/tests/perf_index/perfindex-syscall.c | 11 + tools/tests/perf_index/perfindex-zfod.c | 10 + tools/tests/perf_index/ramdisk.c | 38 + tools/tests/perf_index/ramdisk.h | 7 + tools/tests/perf_index/stress_cpu.c | 11 - tools/tests/perf_index/stress_fault.c | 85 - tools/tests/perf_index/stress_file_create.c | 19 - tools/tests/perf_index/stress_file_local.c | 50 - tools/tests/perf_index/stress_file_ram.c | 65 - tools/tests/perf_index/stress_file_read.c | 66 - tools/tests/perf_index/stress_file_write.c | 46 - tools/tests/perf_index/stress_general.c | 10 - tools/tests/perf_index/stress_syscall.c | 13 - tools/tests/perf_index/test_fault_helper.c | 86 + tools/tests/perf_index/test_fault_helper.h | 12 + tools/tests/perf_index/test_file_helper.c | 189 + tools/tests/perf_index/test_file_helper.h | 16 + tools/tests/superpages/Makefile | 4 +- tools/tests/superpages/testsp.c | 2 +- tools/tests/testkext/testthreadcall.cpp | 53 +- tools/tests/testkext/testthreadcall.h | 4 +- tools/tests/unit_tests/Makefile | 237 - tools/tests/unit_tests/build_tests.sh | 134 - tools/tests/unit_tests/clock_types_6368156.c | 20 - .../codesigntests-entitlements.plist | 8 - tools/tests/unit_tests/codesigntests.c | 130 - .../cpu_hog/cpu_hog.m | 467 -- .../cpu_hog/cpu_hog.xcodeproj/project.pbxproj | 356 - .../contents.xcworkspacedata | 15 - .../cpumon_test_framework.c | 529 -- .../mach_exc.defs | 57 - .../mem_hog/mem_hog.c | 221 - .../fcntlrangecheck_tests_11202484.c | 210 - .../guarded_test.c | 532 -- .../guarded_test_common.h | 11 - .../guarded_test_framework.c | 276 - .../mach_exc.defs | 57 - .../guarded_test.c | 536 -- .../guarded_test_framework.c | 255 - .../mach_exc.defs | 57 - .../libproc_privilege_test_13203438.c | 127 - .../monitor_stress-Entitlements.plist | 8 - .../monitor_stress.xcodeproj/project.pbxproj | 324 - .../contents.xcworkspacedata | 7 - .../UserInterfaceState.xcuserstate | Bin 87338 -> 0 bytes .../xcschemes/monitor_stress 2.xcscheme | 59 - .../xcschemes/monitor_stress copy.xcscheme | 86 - .../xcschemes/monitor_stress.xcscheme | 86 - .../xcschemes/xcschememanagement.plist | 42 - .../monitor_stress/monitor_stress.m | 178 - .../unit_tests/pipe_test_10807398_src/child.c | 27 - .../pipe_test_10807398_src/parent.c | 50 - .../unit_tests/pipes_fill_procinfo_11179336.c | 38 - .../ptcwd_test_11269991.c | 219 - .../ptrace_test_12507045_src/ptrace_test.c | 749 --- .../ptrace_tests_10767133.c | 281 - tools/tests/unit_tests/sampletest.c | 28 - .../semctl_test_8534495.c | 38 - .../sprace_test_11891562.c | 265 - .../unit_tests/test_waitqlocktry_12053360.c | 79 - .../test_wq_exit_race_panic_10970548.c | 32 - .../excserver.defs | 1 - .../thread_get_state.c | 190 - tools/tests/unit_tests/xnu_raft_tests.py | 169 - .../tests/xnu_quick_test/32bit_inode_tests.c | 4 +- tools/tests/xnu_quick_test/README | 26 +- tools/tests/xnu_quick_test/commpage_tests.c | 2 - tools/tests/xnu_quick_test/kqueue_tests.c | 16 +- tools/tests/xnu_quick_test/main.c | 21 +- tools/tests/xnu_quick_test/makefile | 71 +- tools/tests/xnu_quick_test/misc.c | 4 + tools/tests/xnu_quick_test/pipes_tests.c | 3 +- tools/tests/xnu_quick_test/sema_tests.c | 5 - .../xnu_quick_test/shared_memory_tests.c | 5 - tools/tests/xnu_quick_test/socket_tests.c | 4 - tools/tests/xnu_quick_test/tests.c | 1709 ++++- tools/tests/xnu_quick_test/tests.h | 8 + .../xnu_quick_test.entitlements} | 2 +- tools/tests/zero-to-n/Makefile | 6 +- tools/tests/zero-to-n/zero-to-n.c | 90 +- 1290 files changed, 122214 insertions(+), 80013 deletions(-) create mode 100644 EXTERNAL_HEADERS/corecrypto/ccasn1.h create mode 100644 EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h create mode 100644 EXTERNAL_HEADERS/corecrypto/ccdrbg.h create mode 100644 EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h delete mode 100644 SETUP/config/mkglue.c delete mode 100644 SETUP/config/mkswapconf.c create mode 100644 SETUP/replacecontents/Makefile create mode 100644 SETUP/replacecontents/replacecontents.c delete mode 100644 bsd/conf/MASTER.x86_64 delete mode 100644 bsd/conf/compat_hdrs delete mode 100644 bsd/conf/machine.awk delete mode 100644 bsd/crypto/blowfish/Makefile delete mode 100644 bsd/crypto/blowfish/bf_enc.c delete mode 100644 bsd/crypto/blowfish/bf_locl.h delete mode 100644 bsd/crypto/blowfish/bf_pi.h delete mode 100644 bsd/crypto/blowfish/bf_skey.c delete mode 100644 bsd/crypto/blowfish/blowfish.h delete mode 100644 bsd/crypto/cast128/cast128.c delete mode 100644 bsd/crypto/cast128/cast128.h delete mode 100644 bsd/crypto/cast128/cast128_subkey.h rename bsd/dev/{i386 => }/mem.c (74%) create mode 100644 bsd/hfs/hfs_resize.c delete mode 100644 bsd/i386/setjmp.h create mode 100644 bsd/kern/kern_csr.c create mode 100644 bsd/kern/kern_ecc.c create mode 100644 bsd/kern/kern_sfi.c create mode 100644 bsd/kern/sys_coalition.c create mode 100644 bsd/kern/tty_dev.c create mode 100644 bsd/kern/tty_dev.h create mode 100644 bsd/man/man2/faccessat.2 create mode 100644 bsd/man/man2/fchmodat.2 create mode 100644 bsd/man/man2/fchownat.2 create mode 100644 bsd/man/man2/fstatat.2 create mode 100644 bsd/man/man2/getattrlistat.2 create mode 100644 bsd/man/man2/getattrlistbulk.2 create mode 100644 bsd/man/man2/linkat.2 create mode 100644 bsd/man/man2/mkdirat.2 create mode 100644 bsd/man/man2/openat.2 create mode 100644 bsd/man/man2/readlinkat.2 create mode 100644 bsd/man/man2/renameat.2 create mode 100644 bsd/man/man2/symlinkat.2 create mode 100644 bsd/man/man2/unlinkat.2 create mode 100644 bsd/net/content_filter.c create mode 100644 bsd/net/content_filter.h create mode 100644 bsd/net/necp.c create mode 100644 bsd/net/necp.h create mode 100644 bsd/net/packet_mangler.c create mode 100644 bsd/net/packet_mangler.h create mode 100644 bsd/netinet/cbrtf.c delete mode 100644 bsd/netinet/ip_mroute.c delete mode 100644 bsd/netinet/ip_mroute.h create mode 100644 bsd/netinet/tcp_cc.c create mode 100644 bsd/netinet/tcp_cubic.c delete mode 100644 bsd/netinet6/ip6_mroute.c delete mode 100644 bsd/netinet6/ip6_mroute.h delete mode 100644 bsd/netinet6/pim6.h delete mode 100644 bsd/netinet6/pim6_var.h rename bsd/sys/_types/{___offsetof.h => _fsid_t.h} (86%) rename bsd/sys/_types/{_pthread_t.h => _offsetof.h} (92%) delete mode 100644 bsd/sys/_types/_pthread_cond_t.h delete mode 100644 bsd/sys/_types/_pthread_condattr_t.h delete mode 100644 bsd/sys/_types/_pthread_mutex_t.h delete mode 100644 bsd/sys/_types/_pthread_mutexattr_t.h delete mode 100644 bsd/sys/_types/_pthread_once_t.h delete mode 100644 bsd/sys/_types/_pthread_rwlock_t.h delete mode 100644 bsd/sys/_types/_pthread_rwlockattr_t.h create mode 100644 bsd/sys/coalition.h rename bsd/{machine/ucontext.h => sys/codedir_internal.h} (76%) create mode 100644 bsd/sys/csr.h create mode 100644 bsd/sys/sfi.h rename bsd/{machine/setjmp.h => sys/stdio.h} (75%) delete mode 100644 config/DtraceIgnored.symbols rename {bsd/conf => config}/MASTER (61%) create mode 100644 config/MASTER.x86_64 create mode 100644 iokit/IOKit/IOInterruptAccounting.h create mode 100644 iokit/IOKit/IOInterruptAccountingPrivate.h create mode 100644 iokit/IOKit/IOKernelReportStructs.h create mode 100644 iokit/IOKit/IOKernelReporters.h create mode 100644 iokit/Kernel/IOHistogramReporter.cpp create mode 100644 iokit/Kernel/IOInterruptAccounting.cpp create mode 100644 iokit/Kernel/IOReportLegend.cpp create mode 100644 iokit/Kernel/IOReporter.cpp create mode 100644 iokit/Kernel/IOReporterDefs.h create mode 100644 iokit/Kernel/IOSimpleReporter.cpp create mode 100644 iokit/Kernel/IOStateReporter.cpp delete mode 100644 iokit/conf/MASTER delete mode 100644 iokit/conf/MASTER.x86_64 create mode 100644 libkern/c++/OSSerializeBinary.cpp delete mode 100644 libkern/conf/MASTER delete mode 100644 libkern/conf/MASTER.x86_64 create mode 100644 libkern/libkern/OSSerializeBinary.h delete mode 100644 libsa/conf/MASTER delete mode 100644 libsa/conf/MASTER.x86_64 delete mode 100644 libsa/printPlist rename bsd/machine/_structs.h => libsyscall/custom/__thread_selfusage.s (84%) create mode 100644 libsyscall/mach/.gitignore rename bsd/i386/_structs.h => libsyscall/mach/mach_voucher.defs (92%) create mode 100644 libsyscall/mach/thread_act.c create mode 100644 libsyscall/wrappers/coalition.c rename bsd/sys/_types/_pthread_key_t.h => libsyscall/wrappers/csr.c (52%) create mode 100644 libsyscall/wrappers/guarded_open_dprotected_np.c create mode 100644 libsyscall/wrappers/mach_approximate_time.c create mode 100644 libsyscall/wrappers/mach_approximate_time.s create mode 100644 libsyscall/wrappers/posix_sem_obsolete.c create mode 100644 libsyscall/wrappers/rename_ext.c create mode 100644 libsyscall/wrappers/renameat.c rename bsd/sys/_types/_pthread_attr_t.h => libsyscall/wrappers/setpriority.c (50%) create mode 100644 libsyscall/wrappers/sfi.c create mode 100644 libsyscall/wrappers/unlinkat.c create mode 100644 libsyscall/wrappers/varargs_wrappers.s delete mode 100755 libsyscall/xcodescripts/compat-symlinks.sh create mode 100644 osfmk/atm/Makefile create mode 100644 osfmk/atm/atm.c create mode 100644 osfmk/atm/atm_internal.h create mode 100644 osfmk/atm/atm_notification.defs create mode 100644 osfmk/atm/atm_types.defs create mode 100644 osfmk/atm/atm_types.h create mode 100644 osfmk/bank/Makefile create mode 100644 osfmk/bank/bank.c create mode 100644 osfmk/bank/bank_internal.h create mode 100644 osfmk/bank/bank_types.h delete mode 100644 osfmk/conf/MASTER delete mode 100644 osfmk/conf/MASTER.x86_64 create mode 100644 osfmk/console/art/ProgressBarEmptyLeftEndcap.png create mode 100644 osfmk/console/art/ProgressBarEmptyLeftEndcap@2x.png create mode 100644 osfmk/console/art/ProgressBarEmptyMiddle.png create mode 100644 osfmk/console/art/ProgressBarEmptyMiddle@2x.png create mode 100644 osfmk/console/art/ProgressBarEmptyRightEndcap.png create mode 100644 osfmk/console/art/ProgressBarEmptyRightEndcap@2x.png create mode 100644 osfmk/console/art/ProgressBarFullLeftEndcap.png create mode 100644 osfmk/console/art/ProgressBarFullLeftEndcap@2x.png create mode 100644 osfmk/console/art/ProgressBarFullMiddle.png create mode 100644 osfmk/console/art/ProgressBarFullMiddle@2x.png create mode 100644 osfmk/console/art/ProgressBarFullRightEndcap.png create mode 100644 osfmk/console/art/ProgressBarFullRightEndcap@2x.png create mode 100644 osfmk/console/art/progress.m create mode 100644 osfmk/console/art/scalegear.c delete mode 100644 osfmk/console/panic_ui/README delete mode 100644 osfmk/console/panic_ui/appleclut8.h delete mode 100644 osfmk/console/panic_ui/generated_files/panic_image.c delete mode 100644 osfmk/console/panic_ui/generated_files/rendered_numbers.c delete mode 100644 osfmk/console/panic_ui/genimage.c delete mode 100644 osfmk/console/panic_ui/images/panic_dialog.tiff delete mode 100644 osfmk/console/panic_ui/images/panic_dialogWHD.raw delete mode 100644 osfmk/console/panic_ui/images/rendered_numbers.tiff delete mode 100644 osfmk/console/panic_ui/images/rendered_numbersWHD.raw delete mode 100644 osfmk/console/panic_ui/qtif2kraw.c delete mode 100644 osfmk/console/panic_ui/setupdialog.c delete mode 100644 osfmk/console/panic_ui/systemCLUT.act create mode 100644 osfmk/corecrypto/ccdbrg/src/ccdrbg_nisthmac.c create mode 100644 osfmk/corecrypto/ccdigest/src/ccdigest_init.c create mode 100644 osfmk/corecrypto/ccdigest/src/ccdigest_update.c create mode 100644 osfmk/corecrypto/cchmac/src/cchmac.c create mode 100644 osfmk/corecrypto/cchmac/src/cchmac_final.c create mode 100644 osfmk/corecrypto/cchmac/src/cchmac_init.c create mode 100644 osfmk/corecrypto/cchmac/src/cchmac_update.c create mode 100644 osfmk/corecrypto/ccn/src/ccn_set.c create mode 100644 osfmk/corecrypto/ccsha1/src/ccdigest_final_64be.c create mode 100644 osfmk/corecrypto/ccsha1/src/ccsha1_eay.c create mode 100644 osfmk/corecrypto/ccsha1/src/ccsha1_initial_state.c create mode 100644 osfmk/i386/bit_routines.h create mode 100644 osfmk/i386/panic_hooks.c rename osfmk/i386/{gdb_defs.h => panic_hooks.h} (63%) create mode 100644 osfmk/ipc/ipc_importance.c create mode 100644 osfmk/ipc/ipc_importance.h delete mode 100644 osfmk/ipc/ipc_labelh.c delete mode 100644 osfmk/ipc/ipc_labelh.h create mode 100644 osfmk/ipc/ipc_voucher.c create mode 100644 osfmk/ipc/ipc_voucher.h create mode 100644 osfmk/kern/coalition.c create mode 100644 osfmk/kern/coalition.h rename bsd/dev/i386/memmove.c => osfmk/kern/ecc.h (61%) create mode 100644 osfmk/kern/energy_perf.c create mode 100644 osfmk/kern/energy_perf.h create mode 100644 osfmk/kern/hv_support.c create mode 100644 osfmk/kern/hv_support.h create mode 100644 osfmk/kern/kern_ecc.c create mode 100644 osfmk/kern/kern_stackshot.c delete mode 100644 osfmk/kern/queue.c create mode 100644 osfmk/kern/sched_dualq.c delete mode 100644 osfmk/kern/sched_fixedpriority.c create mode 100644 osfmk/kern/sched_multiq.c delete mode 100644 osfmk/kern/security.c create mode 100644 osfmk/kern/sfi.c create mode 100644 osfmk/kern/sfi.h create mode 100644 osfmk/mach/coalition_notification.defs delete mode 100644 osfmk/mach/i386/machine_types.defs create mode 100644 osfmk/mach/mach_voucher.defs create mode 100644 osfmk/mach/mach_voucher_attr_control.defs create mode 100644 osfmk/mach/mach_voucher_types.h delete mode 100644 osfmk/mach/memory_object_name.defs create mode 100644 osfmk/mach/mig_voucher_support.h delete mode 100644 osfmk/mach/security.defs create mode 100644 osfmk/mach/sfi_class.h rename {bsd/crypto/cast128 => osfmk/prng}/Makefile (56%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/include/WindowsTypesForMac.h (95%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/include/yarrow.h (98%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/include/yarrowUtils.h (93%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/port/smf.c (74%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/assertverify.h (97%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/comp.c (97%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/comp.h (97%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/entropysources.h (96%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/macOnly.h (93%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/prng.c (96%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/prng.h (95%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/prngpriv.h (96%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/readme-prnguser.txt (100%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/sha1mod.c (99%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/sha1mod.h (96%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/smf.h (97%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/userdefines.h (96%) rename {bsd/dev/random => osfmk/prng}/YarrowCoreLib/src/yarrowUtils.c (93%) rename {bsd/dev/random => osfmk/prng}/fips_sha1.c (99%) rename {bsd/dev/random => osfmk/prng}/fips_sha1.h (96%) create mode 100644 osfmk/prng/prng_yarrow.c create mode 100644 osfmk/prng/random.c create mode 100644 osfmk/prng/random.h delete mode 100644 osfmk/sys/syslog.h delete mode 100644 osfmk/sys/types.h create mode 100644 osfmk/vm/vm_phantom_cache.c rename bsd/i386/ucontext.h => osfmk/vm/vm_phantom_cache.h (58%) delete mode 100644 pexpert/conf/MASTER delete mode 100644 pexpert/conf/MASTER.x86_64 delete mode 100644 security/conf/MASTER delete mode 100644 security/conf/MASTER.x86_64 delete mode 100644 security/conf/kernelversion.major delete mode 100644 security/conf/kernelversion.minor delete mode 100644 security/conf/kernelversion.variant delete mode 100644 security/conf/version.major delete mode 100644 security/conf/version.minor delete mode 100644 security/conf/version.variant delete mode 100644 security/mac_port.c delete mode 100644 security/mac_task.c create mode 100644 tools/lldbmacros/atm.py create mode 100644 tools/lldbmacros/bank.py create mode 100644 tools/lldbmacros/ipcimportancedetail.py create mode 100644 tools/lldbmacros/plugins/iosspeedtracer.py create mode 100755 tools/lldbmacros/plugins/iosspeedtracer.sh create mode 100644 tools/lldbmacros/structanalyze.py create mode 100644 tools/tests/memorystatus/memorystatus_groups.c create mode 100644 tools/tests/perf_index/PerfIndex_COPS_Module/Info.plist create mode 100644 tools/tests/perf_index/PerfIndex_COPS_Module/PITest.h create mode 100644 tools/tests/perf_index/PerfIndex_COPS_Module/PITest.m create mode 100644 tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.h create mode 100644 tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.xcodeproj/project.pbxproj create mode 100644 tools/tests/perf_index/PerfIndex_COPS_Module/Prefix.pch create mode 100644 tools/tests/perf_index/README delete mode 100644 tools/tests/perf_index/compile.c create mode 100644 tools/tests/perf_index/fail.h delete mode 100644 tools/tests/perf_index/iperf.c delete mode 100644 tools/tests/perf_index/main.c create mode 100644 tools/tests/perf_index/md5.h create mode 100644 tools/tests/perf_index/perf_index.c create mode 100644 tools/tests/perf_index/perfindex-compile.c create mode 100644 tools/tests/perf_index/perfindex-cpu.c create mode 100644 tools/tests/perf_index/perfindex-fault.c create mode 100644 tools/tests/perf_index/perfindex-file_create.c create mode 100644 tools/tests/perf_index/perfindex-file_read.c create mode 100644 tools/tests/perf_index/perfindex-file_write.c create mode 100644 tools/tests/perf_index/perfindex-iperf.c rename tools/tests/perf_index/{stress_memory.c => perfindex-memory.c} (74%) create mode 100644 tools/tests/perf_index/perfindex-ram_file_create.c create mode 100644 tools/tests/perf_index/perfindex-ram_file_read.c create mode 100644 tools/tests/perf_index/perfindex-ram_file_write.c create mode 100644 tools/tests/perf_index/perfindex-syscall.c create mode 100644 tools/tests/perf_index/perfindex-zfod.c create mode 100644 tools/tests/perf_index/ramdisk.c create mode 100644 tools/tests/perf_index/ramdisk.h delete mode 100644 tools/tests/perf_index/stress_cpu.c delete mode 100644 tools/tests/perf_index/stress_fault.c delete mode 100644 tools/tests/perf_index/stress_file_create.c delete mode 100644 tools/tests/perf_index/stress_file_local.c delete mode 100644 tools/tests/perf_index/stress_file_ram.c delete mode 100644 tools/tests/perf_index/stress_file_read.c delete mode 100644 tools/tests/perf_index/stress_file_write.c delete mode 100644 tools/tests/perf_index/stress_general.c delete mode 100644 tools/tests/perf_index/stress_syscall.c create mode 100644 tools/tests/perf_index/test_fault_helper.c create mode 100644 tools/tests/perf_index/test_fault_helper.h create mode 100644 tools/tests/perf_index/test_file_helper.c create mode 100644 tools/tests/perf_index/test_file_helper.h delete mode 100644 tools/tests/unit_tests/Makefile delete mode 100755 tools/tests/unit_tests/build_tests.sh delete mode 100644 tools/tests/unit_tests/clock_types_6368156.c delete mode 100644 tools/tests/unit_tests/codesigntests-entitlements.plist delete mode 100644 tools/tests/unit_tests/codesigntests.c delete mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.m delete mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.pbxproj delete mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.xcworkspace/contents.xcworkspacedata delete mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpumon_test_framework.c delete mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mach_exc.defs delete mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c delete mode 100644 tools/tests/unit_tests/fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c delete mode 100644 tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test.c delete mode 100644 tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_common.h delete mode 100644 tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_framework.c delete mode 100644 tools/tests/unit_tests/guarded_fd_tests_11746236_src/mach_exc.defs delete mode 100644 tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test.c delete mode 100644 tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test_framework.c delete mode 100644 tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/mach_exc.defs delete mode 100644 tools/tests/unit_tests/libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c delete mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress-Entitlements.plist delete mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.pbxproj delete mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/contents.xcworkspacedata delete mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/xcuserdata/rab.xcuserdatad/UserInterfaceState.xcuserstate delete mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress 2.xcscheme delete mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress copy.xcscheme delete mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress.xcscheme delete mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/xcschememanagement.plist delete mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress/monitor_stress.m delete mode 100644 tools/tests/unit_tests/pipe_test_10807398_src/child.c delete mode 100644 tools/tests/unit_tests/pipe_test_10807398_src/parent.c delete mode 100644 tools/tests/unit_tests/pipes_fill_procinfo_11179336.c delete mode 100644 tools/tests/unit_tests/ptcwd_test_11269991_src/ptcwd_test_11269991.c delete mode 100644 tools/tests/unit_tests/ptrace_test_12507045_src/ptrace_test.c delete mode 100644 tools/tests/unit_tests/ptrace_tests_10767133_src/ptrace_tests_10767133.c delete mode 100644 tools/tests/unit_tests/sampletest.c delete mode 100644 tools/tests/unit_tests/semctl_test_8534495_src/semctl_test_8534495.c delete mode 100644 tools/tests/unit_tests/sprace_test_11891562_src/sprace_test_11891562.c delete mode 100644 tools/tests/unit_tests/test_waitqlocktry_12053360.c delete mode 100644 tools/tests/unit_tests/test_wq_exit_race_panic_10970548.c delete mode 100644 tools/tests/unit_tests/thread_get_state_11918811_src/excserver.defs delete mode 100644 tools/tests/unit_tests/thread_get_state_11918811_src/thread_get_state.c delete mode 100755 tools/tests/unit_tests/xnu_raft_tests.py rename tools/tests/{unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog-Entitlements.plist => xnu_quick_test/xnu_quick_test.entitlements} (77%) diff --git a/EXTERNAL_HEADERS/Availability.h b/EXTERNAL_HEADERS/Availability.h index 8fb99382d..ec18d512c 100644 --- a/EXTERNAL_HEADERS/Availability.h +++ b/EXTERNAL_HEADERS/Availability.h @@ -116,34 +116,37 @@ */ -#define __MAC_10_0 1000 -#define __MAC_10_1 1010 -#define __MAC_10_2 1020 -#define __MAC_10_3 1030 -#define __MAC_10_4 1040 -#define __MAC_10_5 1050 -#define __MAC_10_6 1060 -#define __MAC_10_7 1070 -#define __MAC_10_8 1080 -#define __MAC_10_9 1090 -#define __MAC_NA 9999 /* not available */ - -#define __IPHONE_2_0 20000 -#define __IPHONE_2_1 20100 -#define __IPHONE_2_2 20200 -#define __IPHONE_3_0 30000 -#define __IPHONE_3_1 30100 -#define __IPHONE_3_2 30200 -#define __IPHONE_4_0 40000 -#define __IPHONE_4_1 40100 -#define __IPHONE_4_2 40200 -#define __IPHONE_4_3 40300 -#define __IPHONE_5_0 50000 -#define __IPHONE_5_1 50100 -#define __IPHONE_6_0 60000 -#define __IPHONE_6_1 60100 -#define __IPHONE_7_0 70000 -#define __IPHONE_NA 99999 /* not available */ +#define __MAC_10_0 1000 +#define __MAC_10_1 1010 +#define __MAC_10_2 1020 +#define __MAC_10_3 1030 +#define __MAC_10_4 1040 +#define __MAC_10_5 1050 +#define __MAC_10_6 1060 +#define __MAC_10_7 1070 +#define __MAC_10_8 1080 +#define __MAC_10_9 1090 +#define __MAC_10_10 101000 +/* __MAC_NA is not defined to a value but is uses as a token by macros to indicate that the API is unavailable */ + +#define __IPHONE_2_0 20000 +#define __IPHONE_2_1 20100 +#define __IPHONE_2_2 20200 +#define __IPHONE_3_0 30000 +#define __IPHONE_3_1 30100 +#define __IPHONE_3_2 30200 +#define __IPHONE_4_0 40000 +#define __IPHONE_4_1 40100 +#define __IPHONE_4_2 40200 +#define __IPHONE_4_3 40300 +#define __IPHONE_5_0 50000 +#define __IPHONE_5_1 50100 +#define __IPHONE_6_0 60000 +#define __IPHONE_6_1 60100 +#define __IPHONE_7_0 70000 +#define __IPHONE_7_1 70100 +#define __IPHONE_8_0 80000 +/* __IPHONE_NA is not defined to a value but is uses as a token by macros to indicate that the API is unavailable */ #include diff --git a/EXTERNAL_HEADERS/AvailabilityInternal.h b/EXTERNAL_HEADERS/AvailabilityInternal.h index dc8a30747..e8b7b3de7 100644 --- a/EXTERNAL_HEADERS/AvailabilityInternal.h +++ b/EXTERNAL_HEADERS/AvailabilityInternal.h @@ -58,7 +58,7 @@ #ifdef __IPHONE_OS_VERSION_MIN_REQUIRED /* make sure a default max version is set */ #ifndef __IPHONE_OS_VERSION_MAX_ALLOWED - #define __IPHONE_OS_VERSION_MAX_ALLOWED __IPHONE_7_0 + #define __IPHONE_OS_VERSION_MAX_ALLOWED __IPHONE_8_0 #endif /* make sure a valid min is set */ #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0 @@ -160,6 +160,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=2.0,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=2.0,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.0))) #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=2.0))) #define __AVAILABILITY_INTERNAL__IPHONE_2_1 __attribute__((availability(ios,introduced=2.1))) @@ -247,6 +259,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=2.1,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=2.1,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.1))) #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=2.1))) #define __AVAILABILITY_INTERNAL__IPHONE_2_2 __attribute__((availability(ios,introduced=2.2))) @@ -328,6 +352,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=2.2,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=2.2,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.2))) #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=2.2))) #define __AVAILABILITY_INTERNAL__IPHONE_3_0 __attribute__((availability(ios,introduced=3.0))) @@ -403,6 +439,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=3.0,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=3.0,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.0))) #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=3.0))) #define __AVAILABILITY_INTERNAL__IPHONE_3_1 __attribute__((availability(ios,introduced=3.1))) @@ -472,6 +520,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=3.1,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=3.1,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.1))) #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=3.1))) #define __AVAILABILITY_INTERNAL__IPHONE_3_2 __attribute__((availability(ios,introduced=3.2))) @@ -535,6 +595,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=3.2,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=3.2,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.2))) #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=3.2))) #define __AVAILABILITY_INTERNAL__IPHONE_4_0 __attribute__((availability(ios,introduced=4.0))) @@ -592,6 +664,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=4.0,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=4.0,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.0))) #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.0))) #define __AVAILABILITY_INTERNAL__IPHONE_4_1 __attribute__((availability(ios,introduced=4.1))) @@ -643,6 +727,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=4.1,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=4.1,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.1))) #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.1))) #define __AVAILABILITY_INTERNAL__IPHONE_4_2 __attribute__((availability(ios,introduced=4.2))) @@ -688,6 +784,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=4.2,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=4.2,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.2))) #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.2))) #define __AVAILABILITY_INTERNAL__IPHONE_4_3 __attribute__((availability(ios,introduced=4.3))) @@ -727,6 +835,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=4.3,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=4.3,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.3))) #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.3))) #define __AVAILABILITY_INTERNAL__IPHONE_5_0 __attribute__((availability(ios,introduced=5.0))) @@ -760,6 +880,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=5.0,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=5.0,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=5.0))) #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=5.0))) #define __AVAILABILITY_INTERNAL__IPHONE_5_1 __attribute__((availability(ios,introduced=5.1))) @@ -787,6 +919,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=5.1,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=5.1,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=5.1))) #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=5.1))) #define __AVAILABILITY_INTERNAL__IPHONE_6_0 __attribute__((availability(ios,introduced=6.0))) @@ -808,6 +952,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=6.0,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=6.0,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=6.0))) #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=6.0))) #define __AVAILABILITY_INTERNAL__IPHONE_6_1 __attribute__((availability(ios,introduced=6.1))) @@ -823,6 +979,18 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=6.1,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=6.1,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=6.1))) #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=6.1))) #define __AVAILABILITY_INTERNAL__IPHONE_7_0 __attribute__((availability(ios,introduced=7.0))) @@ -832,8 +1000,44 @@ #else #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.0))) #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=7.0,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=7.0,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=8.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=7.0))) #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=7.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1 __attribute__((availability(ios,introduced=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __attribute__((availability(ios,introduced=7.1,deprecated=7.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=7.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=7.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=7.1,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=7.1,deprecated=8.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=7.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0 __attribute__((availability(ios,introduced=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __attribute__((availability(ios,introduced=8.0,deprecated=8.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __attribute__((availability(ios,introduced=8.0,deprecated=8.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=8.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=8.0))) #define __AVAILABILITY_INTERNAL__IPHONE_NA __attribute__((availability(ios,unavailable))) #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA __attribute__((availability(ios,unavailable))) #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,unavailable))) @@ -3604,6 +3808,1151 @@ #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif + /* set up old style internal macros (up to 7.1) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif + /* set up old style internal macros (up to 8.0) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_7_1 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL__IPHONE_8_0 + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_0 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif /* set up internal macros (n/a) */ #define __AVAILABILITY_INTERNAL__IPHONE_NA __AVAILABILITY_INTERNAL_UNAVAILABLE #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA __AVAILABILITY_INTERNAL_UNAVAILABLE @@ -3615,7 +4964,7 @@ #define __MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ /* make sure a default max version is set */ #ifndef __MAC_OS_X_VERSION_MAX_ALLOWED - #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_9 + #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_10 #endif #if defined(__has_attribute) && defined(__has_feature) @@ -3682,6 +5031,12 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.0,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.0))) #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.0))) #define __AVAILABILITY_INTERNAL__MAC_10_1 __attribute__((availability(macosx,introduced=10.1))) @@ -3739,6 +5094,12 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.1,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.1))) #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.1))) #define __AVAILABILITY_INTERNAL__MAC_10_2 __attribute__((availability(macosx,introduced=10.2))) @@ -3790,6 +5151,12 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.2,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.2))) #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.2))) #define __AVAILABILITY_INTERNAL__MAC_10_3 __attribute__((availability(macosx,introduced=10.3))) @@ -3835,6 +5202,12 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.3,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.3))) #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.3))) #define __AVAILABILITY_INTERNAL__MAC_10_4 __attribute__((availability(macosx,introduced=10.4))) @@ -3874,6 +5247,12 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.4,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.4))) #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.4))) #define __AVAILABILITY_INTERNAL__MAC_10_5 __attribute__((availability(macosx,introduced=10.5))) @@ -3907,6 +5286,12 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.5,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.5))) #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.5))) #define __AVAILABILITY_INTERNAL__MAC_10_6 __attribute__((availability(macosx,introduced=10.6))) @@ -3934,6 +5319,12 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.6,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.6))) #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.6))) #define __AVAILABILITY_INTERNAL__MAC_10_7 __attribute__((availability(macosx,introduced=10.7))) @@ -3955,6 +5346,12 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.7,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.7))) #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.7))) #define __AVAILABILITY_INTERNAL__MAC_10_8 __attribute__((availability(macosx,introduced=10.8))) @@ -3970,6 +5367,12 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.8,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.8))) #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.8))) #define __AVAILABILITY_INTERNAL__MAC_10_9 __attribute__((availability(macosx,introduced=10.9))) @@ -3979,8 +5382,23 @@ #else #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.9))) #endif + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.9,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.10))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.9))) #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.9))) + #define __AVAILABILITY_INTERNAL__MAC_10_10 __attribute__((availability(macosx,introduced=10.10))) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10 __attribute__((availability(macosx,introduced=10.10,deprecated=10.10))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.10,deprecated=10.10,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_MSG(_msg) __attribute__((availability(macosx,introduced=10.10,deprecated=10.10))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.10))) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.10))) #define __AVAILABILITY_INTERNAL__MAC_NA __attribute__((availability(macosx,unavailable))) #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA __attribute__((availability(macosx,unavailable))) #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,unavailable))) @@ -3989,6 +5407,13 @@ #ifndef __AVAILABILITY_INTERNAL__MAC_10_0 /* use old style attributes */ + #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__MAC_10_10 __AVAILABILITY_INTERNAL_REGULAR + #endif #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_9 #define __AVAILABILITY_INTERNAL__MAC_10_9 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_9 @@ -4303,6 +5728,53 @@ #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_9 #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_9 #endif + #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10 __AVAILABILITY_INTERNAL__MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10 + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_0 #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_1 @@ -4323,6 +5795,8 @@ #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_8 #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_9 #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_10 + #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_10 #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA __AVAILABILITY_INTERNAL_UNAVAILABLE #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL_UNAVAILABLE #endif diff --git a/EXTERNAL_HEADERS/AvailabilityMacros.h b/EXTERNAL_HEADERS/AvailabilityMacros.h index d569c3cd8..629697908 100644 --- a/EXTERNAL_HEADERS/AvailabilityMacros.h +++ b/EXTERNAL_HEADERS/AvailabilityMacros.h @@ -89,16 +89,17 @@ /* * Set up standard Mac OS X versions */ -#define MAC_OS_X_VERSION_10_0 1000 -#define MAC_OS_X_VERSION_10_1 1010 -#define MAC_OS_X_VERSION_10_2 1020 -#define MAC_OS_X_VERSION_10_3 1030 -#define MAC_OS_X_VERSION_10_4 1040 -#define MAC_OS_X_VERSION_10_5 1050 -#define MAC_OS_X_VERSION_10_6 1060 -#define MAC_OS_X_VERSION_10_7 1070 -#define MAC_OS_X_VERSION_10_8 1080 -#define MAC_OS_X_VERSION_10_9 1090 +#define MAC_OS_X_VERSION_10_0 1000 +#define MAC_OS_X_VERSION_10_1 1010 +#define MAC_OS_X_VERSION_10_2 1020 +#define MAC_OS_X_VERSION_10_3 1030 +#define MAC_OS_X_VERSION_10_4 1040 +#define MAC_OS_X_VERSION_10_5 1050 +#define MAC_OS_X_VERSION_10_6 1060 +#define MAC_OS_X_VERSION_10_7 1070 +#define MAC_OS_X_VERSION_10_8 1080 +#define MAC_OS_X_VERSION_10_9 1090 +#define MAC_OS_X_VERSION_10_10 101000 /* * If min OS not specified, assume 10.1 for ppc and 10.4 for all others @@ -122,13 +123,13 @@ #endif /* - * if max OS not specified, assume larger of (10.9, min) + * if max OS not specified, assume larger of (10.10, min) */ #ifndef MAC_OS_X_VERSION_MAX_ALLOWED - #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_9 + #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_10 #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_MIN_REQUIRED #else - #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_9 + #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_10 #endif #endif @@ -227,7 +228,7 @@ * Used on declarations introduced in Mac OS X 10.1 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_1, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_1, __IPHONE_NA) #elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_1 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_1 @@ -243,7 +244,7 @@ * and deprecated in Mac OS X 10.1 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_1, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_1, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else @@ -257,7 +258,7 @@ * but later deprecated in Mac OS X 10.1 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_1, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_1, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1 DEPRECATED_ATTRIBUTE #else @@ -270,7 +271,7 @@ * Used on types deprecated in Mac OS X 10.1 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_1, __IPHONE_4_0, __IPHONE_4_0) + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_1, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER DEPRECATED_ATTRIBUTE #else @@ -284,7 +285,7 @@ * Used on declarations introduced in Mac OS X 10.2 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_2, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_2, __IPHONE_NA) #elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_2 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_2 @@ -300,7 +301,7 @@ * and deprecated in Mac OS X 10.2 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_2, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_2, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else @@ -314,7 +315,7 @@ * but later deprecated in Mac OS X 10.2 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_2, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_2, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 DEPRECATED_ATTRIBUTE #else @@ -328,7 +329,7 @@ * but later deprecated in Mac OS X 10.2 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_2, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_2, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 DEPRECATED_ATTRIBUTE #else @@ -341,7 +342,7 @@ * Used on types deprecated in Mac OS X 10.2 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_2, __IPHONE_4_0, __IPHONE_4_0) + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_2, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER DEPRECATED_ATTRIBUTE #else @@ -355,7 +356,7 @@ * Used on declarations introduced in Mac OS X 10.3 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_3, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_3, __IPHONE_NA) #elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_3 @@ -371,7 +372,7 @@ * and deprecated in Mac OS X 10.3 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_3, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else @@ -385,7 +386,7 @@ * but later deprecated in Mac OS X 10.3 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_3, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 DEPRECATED_ATTRIBUTE #else @@ -399,7 +400,7 @@ * but later deprecated in Mac OS X 10.3 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_3, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 DEPRECATED_ATTRIBUTE #else @@ -413,7 +414,7 @@ * but later deprecated in Mac OS X 10.3 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_3, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 DEPRECATED_ATTRIBUTE #else @@ -426,7 +427,7 @@ * Used on types deprecated in Mac OS X 10.3 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_3, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER DEPRECATED_ATTRIBUTE #else @@ -440,7 +441,7 @@ * Used on declarations introduced in Mac OS X 10.4 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_4, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_4, __IPHONE_NA) #elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_4 @@ -456,7 +457,7 @@ * and deprecated in Mac OS X 10.4 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_4, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else @@ -470,7 +471,7 @@ * but later deprecated in Mac OS X 10.4 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_4, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 DEPRECATED_ATTRIBUTE #else @@ -484,7 +485,7 @@ * but later deprecated in Mac OS X 10.4 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_4, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 DEPRECATED_ATTRIBUTE #else @@ -498,7 +499,7 @@ * but later deprecated in Mac OS X 10.4 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_4, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 DEPRECATED_ATTRIBUTE #else @@ -512,7 +513,7 @@ * but later deprecated in Mac OS X 10.4 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_4, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 DEPRECATED_ATTRIBUTE #else @@ -525,7 +526,7 @@ * Used on types deprecated in Mac OS X 10.4 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_4, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER DEPRECATED_ATTRIBUTE #else @@ -539,7 +540,7 @@ * Used on declarations introduced in Mac OS X 10.5 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_NA) #elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_5 @@ -555,7 +556,7 @@ * and deprecated in Mac OS X 10.5 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_5, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else @@ -569,7 +570,7 @@ * but later deprecated in Mac OS X 10.5 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_5, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else @@ -583,7 +584,7 @@ * but later deprecated in Mac OS X 10.5 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_5, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else @@ -597,7 +598,7 @@ * but later deprecated in Mac OS X 10.5 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_5, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else @@ -611,7 +612,7 @@ * but later deprecated in Mac OS X 10.5 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_5, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else @@ -625,7 +626,7 @@ * but later deprecated in Mac OS X 10.5 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_5, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else @@ -638,7 +639,7 @@ * Used on types deprecated in Mac OS X 10.5 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_5, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER DEPRECATED_ATTRIBUTE #else @@ -652,7 +653,7 @@ * Used on declarations introduced in Mac OS X 10.6 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_NA) #elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_6 @@ -668,7 +669,7 @@ * and deprecated in Mac OS X 10.6 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_6, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else @@ -682,7 +683,7 @@ * but later deprecated in Mac OS X 10.6 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_6, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else @@ -696,7 +697,7 @@ * but later deprecated in Mac OS X 10.6 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_6, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else @@ -710,7 +711,7 @@ * but later deprecated in Mac OS X 10.6 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_6, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else @@ -724,7 +725,7 @@ * but later deprecated in Mac OS X 10.6 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_6, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else @@ -738,7 +739,7 @@ * but later deprecated in Mac OS X 10.6 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_6, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else @@ -752,7 +753,7 @@ * but later deprecated in Mac OS X 10.6 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_6, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else @@ -765,7 +766,7 @@ * Used on types deprecated in Mac OS X 10.6 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_6, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER DEPRECATED_ATTRIBUTE #else @@ -779,7 +780,7 @@ * Used on declarations introduced in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_NA) #elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_7 @@ -795,7 +796,7 @@ * and deprecated in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_7, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else @@ -809,7 +810,7 @@ * but later deprecated in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_7, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else @@ -823,7 +824,7 @@ * but later deprecated in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_7, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else @@ -837,7 +838,7 @@ * but later deprecated in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_7, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else @@ -851,7 +852,7 @@ * but later deprecated in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_7, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else @@ -865,7 +866,7 @@ * but later deprecated in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_7, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else @@ -879,7 +880,7 @@ * but later deprecated in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_7, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else @@ -893,7 +894,7 @@ * but later deprecated in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_7, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else @@ -906,7 +907,7 @@ * Used on types deprecated in Mac OS X 10.7 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_7, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER DEPRECATED_ATTRIBUTE #else @@ -920,7 +921,7 @@ * Used on declarations introduced in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_NA) #elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_8 @@ -936,7 +937,7 @@ * and deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else @@ -950,7 +951,7 @@ * but later deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else @@ -964,7 +965,7 @@ * but later deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else @@ -978,7 +979,7 @@ * but later deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else @@ -992,7 +993,7 @@ * but later deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else @@ -1006,7 +1007,7 @@ * but later deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else @@ -1020,7 +1021,7 @@ * but later deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else @@ -1034,7 +1035,7 @@ * but later deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else @@ -1048,7 +1049,7 @@ * but later deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else @@ -1061,7 +1062,7 @@ * Used on types deprecated in Mac OS X 10.8 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define DEPRECATED_IN_MAC_OS_X_VERSION_10_8_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_8_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_8_AND_LATER DEPRECATED_ATTRIBUTE #else @@ -1075,7 +1076,7 @@ * Used on declarations introduced in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_NA) #elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_9 @@ -1091,7 +1092,7 @@ * and deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else @@ -1105,7 +1106,7 @@ * but later deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE #else @@ -1119,7 +1120,7 @@ * but later deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE #else @@ -1133,7 +1134,7 @@ * but later deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE #else @@ -1147,7 +1148,7 @@ * but later deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE #else @@ -1161,7 +1162,7 @@ * but later deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE #else @@ -1175,7 +1176,7 @@ * but later deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE #else @@ -1189,7 +1190,7 @@ * but later deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE #else @@ -1203,7 +1204,7 @@ * but later deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE #else @@ -1217,7 +1218,7 @@ * but later deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE #else @@ -1230,7 +1231,7 @@ * Used on types deprecated in Mac OS X 10.9 */ #if __AVAILABILITY_MACROS_USES_AVAILABILITY - #define DEPRECATED_IN_MAC_OS_X_VERSION_10_9_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_9_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_9, __IPHONE_NA, __IPHONE_NA) #elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_9_AND_LATER DEPRECATED_ATTRIBUTE #else @@ -1238,6 +1239,189 @@ #endif +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER + * + * Used on declarations introduced in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER UNAVAILABLE_ATTRIBUTE +#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER WEAK_IMPORT_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED + * + * Used on declarations introduced in Mac OS X 10.10, + * and deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.0, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.1, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.2, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.3, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.4, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.5, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.6, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.7, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.8, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 + * + * Used on declarations introduced in Mac OS X 10.9, + * but later deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10 AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER +#endif + +/* + * DEPRECATED_IN_MAC_OS_X_VERSION_10_10_AND_LATER + * + * Used on types deprecated in Mac OS X 10.10 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10, __IPHONE_NA, __IPHONE_NA) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10 + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_AND_LATER DEPRECATED_ATTRIBUTE +#else + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_AND_LATER +#endif + + #endif /* __AVAILABILITYMACROS__ */ diff --git a/EXTERNAL_HEADERS/Makefile b/EXTERNAL_HEADERS/Makefile index 0eef296a7..39cb58f90 100644 --- a/EXTERNAL_HEADERS/Makefile +++ b/EXTERNAL_HEADERS/Makefile @@ -14,6 +14,9 @@ INSTINC_SUBDIRS = \ INSTINC_SUBDIRS_X86_64 = \ architecture +INSTINC_SUBDIRS_X86_64H = \ + architecture + INSTINC_SUBDIRS_ARM = \ architecture @@ -22,6 +25,9 @@ EXPORT_FILES = \ Availability.h \ AvailabilityInternal.h \ AvailabilityMacros.h \ + stddef.h + +KERNEL_FILES = \ stdarg.h \ stdbool.h \ stdint.h @@ -30,11 +36,11 @@ INSTALL_MI_LIST = INSTALL_MI_DIR = . -INSTALL_KF_MI_LIST = ${EXPORT_FILES} +INSTALL_KF_MI_LIST = ${KERNEL_FILES} -INSTALL_KF_MI_LCL_LIST = ${EXPORT_FILES} +INSTALL_KF_MI_LCL_LIST = ${KERNEL_FILES} -EXPORT_MI_LIST = ${EXPORT_FILES} stddef.h +EXPORT_MI_LIST = ${EXPORT_FILES} ${KERNEL_FILES} EXPORT_MI_DIR = . diff --git a/EXTERNAL_HEADERS/architecture/Makefile b/EXTERNAL_HEADERS/architecture/Makefile index 034d13692..1054ba8f1 100644 --- a/EXTERNAL_HEADERS/architecture/Makefile +++ b/EXTERNAL_HEADERS/architecture/Makefile @@ -12,6 +12,9 @@ INSTINC_SUBDIRS = INSTINC_SUBDIRS_X86_64 = \ i386 +INSTINC_SUBDIRS_X86_64H = \ + i386 + INSTINC_SUBDIRS_ARM = \ arm diff --git a/EXTERNAL_HEADERS/corecrypto/cc.h b/EXTERNAL_HEADERS/corecrypto/cc.h index ecf053182..f44384513 100644 --- a/EXTERNAL_HEADERS/corecrypto/cc.h +++ b/EXTERNAL_HEADERS/corecrypto/cc.h @@ -14,10 +14,11 @@ #include #include -#if KERNEL +#if CC_KERNEL #include #else #include +#include #endif /* Declare a struct element with a guarenteed alignment of _alignment_. @@ -37,12 +38,22 @@ #define cc_ctx_decl(_type_, _size_, _name_) \ _type_ _name_[cc_ctx_n(_type_, _size_)] +#if CC_HAS_BZERO #define cc_zero(_size_,_data_) bzero((_data_), (_size_)) +#else +/* Alternate version if you don't have bzero. */ +#define cc_zero(_size_,_data_) memset((_data_),0 ,(_size_)) +#endif -#define cc_copy(_size_, _dst_, _src_) memcpy(_dst_, _src_, _size_) +#if CC_KERNEL +#define cc_printf(x...) printf(x) +#else +#define cc_printf(x...) fprintf(stderr, x) +#endif + +#define cc_assert(x) assert(x) -#define cc_ctx_clear(_type_, _size_, _name_) \ - cc_zero((_size_ + sizeof(_type_) - 1) / sizeof(_type_), _name_) +#define cc_copy(_size_, _dst_, _src_) memcpy(_dst_, _src_, _size_) CC_INLINE CC_NONNULL2 CC_NONNULL3 CC_NONNULL4 void cc_xor(size_t size, void *r, const void *s, const void *t) { diff --git a/EXTERNAL_HEADERS/corecrypto/cc_config.h b/EXTERNAL_HEADERS/corecrypto/cc_config.h index 7b0f2ed78..9149edb00 100644 --- a/EXTERNAL_HEADERS/corecrypto/cc_config.h +++ b/EXTERNAL_HEADERS/corecrypto/cc_config.h @@ -6,11 +6,108 @@ * Copyright 2010,2011 Apple Inc. All rights reserved. * */ + #ifndef _CORECRYPTO_CC_CONFIG_H_ #define _CORECRYPTO_CC_CONFIG_H_ +/* A word about configuration macros: + + Conditional configuration macros specific to corecrypto should be named CORECRYPTO_xxx + or CCxx_yyy and be defined to be either 0 or 1 in this file. You can add an + #ifndef #error construct at the end of this file to make sure it's always defined. + + They should always be tested using the #if directive, never the #ifdef directive. + + No other conditional macros shall ever be used (except in this file) + + Configuration Macros that are defined outside of corecrypto (eg: KERNEL, DEBUG, ...) + shall only be used in this file to define CCxxx macros. + + External macros should be assumed to be either undefined, defined with no value, + or defined as true or false. We shall strive to build with -Wundef whenever possible, + so the following construct should be used to test external macros in this file: + + #if defined(DEBUG) && (DEBUG) + #define CORECRYPTO_DEBUG 1 + #else + #define CORECRYPTO_DEBUG 0 + #endif + + + It is acceptable to define a conditional CC_xxxx macro in an implementation file, + to be used only in this file. + + The current code is not guaranteed to follow those rules, but should be fixed to. + + Corecrypto requires GNU and C99 compatibility. + Typically enabled by passing --gnu --c99 to the compiler (eg. armcc) + +*/ + +#if defined(DEBUG) && (DEBUG) +/* CC_DEBUG is already used in CommonCrypto */ +#define CORECRYPTO_DEBUG 1 +#else +#define CORECRYPTO_DEBUG 0 +#endif + +#if defined(KERNEL) && (KERNEL) +#define CC_KERNEL 1 +#else +#define CC_KERNEL 0 +#endif + +#if defined(USE_L4) && (USE_L4) +#define CC_USE_L4 1 +#else +#define CC_USE_L4 0 +#endif + +#if defined(MAVERICK) && (MAVERICK) +#define CC_MAVERICK 1 +#else +#define CC_MAVERICK 0 +#endif + +#if defined(IBOOT) && (IBOOT) +#define CC_IBOOT 1 +#else +#define CC_IBOOT 0 +#endif + +// BB configuration +#if CC_MAVERICK + +// -- ENDIANESS +#if defined(ENDIAN_LITTLE) || (defined(__arm__) && !defined(__BIG_ENDIAN)) +#define __LITTLE_ENDIAN__ +#elif !defined(ENDIAN_BIG) && !defined(__BIG_ENDIAN) +#error Baseband endianess not defined. +#endif +#define AESOPT_ENDIAN_NO_FILE + +// -- Architecture +#define CCN_UNIT_SIZE 4 // 32 bits +#define aligned(x) aligned((x)>8?8:(x)) // Alignment on 8 bytes max +#define SAFE_IO // AES support for unaligned Input/Output + +// -- External function +#define assert ASSERT // sanity + +// -- Warnings +// Ignore irrelevant warnings after verification +// #1254-D: arithmetic on pointer to void or function type +// #186-D: pointless comparison of unsigned integer with zero +// #546-D: transfer of control bypasses initialization of +#if defined(__GNUC__) +// warning: pointer of type 'void *' used in arithmetic +#pragma GCC diagnostic ignored "-Wpointer-arith" +#endif // arm or gnuc + +#endif // MAVERICK + #if !defined(CCN_UNIT_SIZE) -#if defined(__x86_64__) +#if defined(__arm64__) || defined(__x86_64__) #define CCN_UNIT_SIZE 8 #elif defined(__arm__) || defined(__i386__) #define CCN_UNIT_SIZE 4 @@ -19,10 +116,44 @@ #endif #endif /* !defined(CCN_UNIT_SIZE) */ +#if defined(__x86_64__) || defined(__i386__) +#define CCN_IOS 0 +#define CCN_OSX 1 +#endif + /* No dynamic linking allowed in L4, e.g. avoid nonlazy symbols */ /* For corecrypto kext, CC_STATIC should be 0 */ +#if CC_USE_L4 +#define CC_STATIC 1 +#endif -#if defined(__x86_64__) || defined(__i386__) +/* L4 do not have bzero, neither does hexagon of ARMCC even with gnu compatibility mode */ +#if CC_USE_L4 || defined(__CC_ARM) || defined(__hexagon__) +#define CC_HAS_BZERO 0 +#else +#define CC_HAS_BZERO 1 +#endif + +#if defined(__CC_ARM) || defined(__hexagon__) +// ARMASM.exe does not to like the file syntax of the asm implementation + +#define CCN_ADD_ASM 0 +#define CCN_SUB_ASM 0 +#define CCN_MUL_ASM 0 +#define CCN_ADDMUL1_ASM 0 +#define CCN_MUL1_ASM 0 +#define CCN_CMP_ASM 0 +#define CCN_ADD1_ASM 0 +#define CCN_SUB1_ASM 0 +#define CCN_N_ASM 0 +#define CCN_SET_ASM 0 +#define CCAES_ARM 0 +#define CCAES_INTEL 0 +#define CCN_USE_BUILTIN_CLZ 0 +#define CCSHA1_VNG_INTEL 0 +#define CCSHA2_VNG_INTEL 0 + +#elif defined(__x86_64__) || defined(__i386__) /* These assembly routines only work for a single CCN_UNIT_SIZE. */ #if (defined(__x86_64__) && CCN_UNIT_SIZE == 8) || (defined(__i386__) && CCN_UNIT_SIZE == 4) @@ -44,6 +175,7 @@ #define CCN_SET_ASM 0 #define CCAES_ARM 0 #define CCAES_INTEL 1 +#define CCAES_MUX 0 #define CCN_USE_BUILTIN_CLZ 0 #define CCSHA1_VNG_INTEL 1 #define CCSHA2_VNG_INTEL 1 @@ -64,6 +196,7 @@ #define CCN_SET_ASM 0 #define CCAES_ARM 0 #define CCAES_INTEL 0 +#define CCAES_MUX 0 #define CCN_USE_BUILTIN_CLZ 0 #define CCSHA1_VNG_INTEL 0 #define CCSHA2_VNG_INTEL 0 @@ -80,6 +213,12 @@ #ifdef __GNUC__ #define CC_NORETURN __attribute__((__noreturn__)) #define CC_NOTHROW __attribute__((__nothrow__)) +// Transparent Union +#if defined(__CC_ARM) || defined(__hexagon__) +#define CC_NONNULL_TU(N) +#else +#define CC_NONNULL_TU(N) __attribute__((__nonnull__ N)) +#endif #define CC_NONNULL(N) __attribute__((__nonnull__ N)) #define CC_NONNULL1 __attribute__((__nonnull__(1))) #define CC_NONNULL2 __attribute__((__nonnull__(2))) @@ -97,6 +236,12 @@ #define CC_UNUSED __attribute__((unused)) #else /* !__GNUC__ */ /*! @parseOnly */ +#define CC_UNUSED +/*! @parseOnly */ +#define CC_NONNULL_TU(N) +/*! @parseOnly */ +#define CC_NONNULL(N) +/*! @parseOnly */ #define CC_NORETURN /*! @parseOnly */ #define CC_NOTHROW diff --git a/EXTERNAL_HEADERS/corecrypto/cc_priv.h b/EXTERNAL_HEADERS/corecrypto/cc_priv.h index db962d461..fbfadddcc 100644 --- a/EXTERNAL_HEADERS/corecrypto/cc_priv.h +++ b/EXTERNAL_HEADERS/corecrypto/cc_priv.h @@ -64,7 +64,7 @@ The following are not defined yet... define them if needed. CC_H2BE64 : convert a 64 bits value between host and big endian order CC_H2LE64 : convert a 64 bits value between host and little endian order - + */ /* TODO: optimized versions */ @@ -74,11 +74,11 @@ The following are not defined yet... define them if needed. #define CC_BZERO(D,L) memset((D),0,(L)) -#pragma mark - Loads and Store +// MARK: - Loads and Store -#pragma mark -- 32 bits - little endian +// MARK: -- 32 bits - little endian -#pragma mark --- Default version +// MARK: --- Default version #define CC_STORE32_LE(x, y) do { \ ((unsigned char *)(y))[3] = (unsigned char)(((x)>>24)&255); \ @@ -94,7 +94,7 @@ x = ((uint32_t)(((unsigned char *)(y))[3] & 255)<<24) | \ ((uint32_t)(((unsigned char *)(y))[0] & 255)); \ } while(0) -#pragma mark -- 64 bits - little endian +// MARK: -- 64 bits - little endian #define CC_STORE64_LE(x, y) do { \ ((unsigned char *)(y))[7] = (unsigned char)(((x)>>56)&255); \ @@ -118,8 +118,8 @@ x = (((uint64_t)(((unsigned char *)(y))[7] & 255))<<56) | \ (((uint64_t)(((unsigned char *)(y))[0] & 255))); \ } while(0) -#pragma mark -- 32 bits - big endian -#pragma mark --- intel version +// MARK: -- 32 bits - big endian +// MARK: --- intel version #if (defined(__i386__) || defined(__x86_64__)) @@ -137,7 +137,7 @@ x = (((uint64_t)(((unsigned char *)(y))[7] & 255))<<56) | \ :"=r"(x): "r"(y)) #else -#pragma mark --- default version +// MARK: --- default version #define CC_STORE32_BE(x, y) do { \ ((unsigned char *)(y))[0] = (unsigned char)(((x)>>24)&255); \ ((unsigned char *)(y))[1] = (unsigned char)(((x)>>16)&255); \ @@ -154,9 +154,9 @@ x = ((uint32_t)(((unsigned char *)(y))[0] & 255)<<24) | \ #endif -#pragma mark -- 64 bits - big endian +// MARK: -- 64 bits - big endian -#pragma mark --- intel 64 bits version +// MARK: --- intel 64 bits version #if defined(__x86_64__) @@ -175,7 +175,7 @@ __asm__ __volatile__ ( \ #else -#pragma mark --- default version +// MARK: --- default version #define CC_STORE64_BE(x, y) do { \ ((unsigned char *)(y))[0] = (unsigned char)(((x)>>56)&255); \ @@ -201,10 +201,10 @@ x = (((uint64_t)(((unsigned char *)(y))[0] & 255))<<56) | \ #endif -#pragma mark - 32-bit Rotates +// MARK: - 32-bit Rotates #if defined(_MSC_VER) -#pragma mark -- MSVC version +// MARK: -- MSVC version #include #pragma intrinsic(_lrotr,_lrotl) @@ -214,7 +214,7 @@ x = (((uint64_t)(((unsigned char *)(y))[0] & 255))<<56) | \ #define CC_ROLc(x,n) _lrotl(x,n) #elif (defined(__i386__) || defined(__x86_64__)) -#pragma mark -- intel asm version +// MARK: -- intel asm version static inline uint32_t CC_ROL(uint32_t word, int i) { @@ -252,7 +252,7 @@ static inline uint32_t CC_ROR(uint32_t word, int i) #else -#pragma mark -- default version +// MARK: -- default version static inline uint32_t CC_ROL(uint32_t word, int i) { @@ -269,10 +269,10 @@ static inline uint32_t CC_ROR(uint32_t word, int i) #endif -#pragma mark - 64 bits rotates +// MARK: - 64 bits rotates #if defined(__x86_64__) -#pragma mark -- intel 64 asm version +// MARK: -- intel 64 asm version static inline uint64_t CC_ROL64(uint64_t word, int i) { @@ -312,7 +312,7 @@ static inline uint64_t CC_ROR64(uint64_t word, int i) #else /* Not x86_64 */ -#pragma mark -- default C version +// MARK: -- default C version static inline uint64_t CC_ROL64(uint64_t word, int i) { @@ -330,7 +330,7 @@ static inline uint64_t CC_ROR64(uint64_t word, int i) #endif -#pragma mark - Byte Swaps +// MARK: - Byte Swaps static inline uint32_t CC_BSWAP(uint32_t x) { @@ -342,11 +342,20 @@ static inline uint32_t CC_BSWAP(uint32_t x) ); } +#define CC_BSWAP64(x) \ +((uint64_t)((((uint64_t)(x) & 0xff00000000000000ULL) >> 56) | \ +(((uint64_t)(x) & 0x00ff000000000000ULL) >> 40) | \ +(((uint64_t)(x) & 0x0000ff0000000000ULL) >> 24) | \ +(((uint64_t)(x) & 0x000000ff00000000ULL) >> 8) | \ +(((uint64_t)(x) & 0x00000000ff000000ULL) << 8) | \ +(((uint64_t)(x) & 0x0000000000ff0000ULL) << 24) | \ +(((uint64_t)(x) & 0x000000000000ff00ULL) << 40) | \ +(((uint64_t)(x) & 0x00000000000000ffULL) << 56))) + #ifdef __LITTLE_ENDIAN__ #define CC_H2BE32(x) CC_BSWAP(x) #define CC_H2LE32(x) (x) #else -#error not good. #define CC_H2BE32(x) (x) #define CC_H2LE32(x) CC_BSWAP(x) #endif @@ -359,4 +368,57 @@ static inline uint32_t CC_BSWAP(uint32_t x) #define cc_byte(x, n) (((x) >> (8 * (n))) & 255) #endif +/* HEAVISIDE_STEP (shifted by one) + function f(x): x->0, when x=0 + x->1, when x>0 + Can also be seen as a bitwise operation: + f(x): x -> y + y[0]=(OR x[i]) for all i (all bits) + y[i]=0 for all i>0 + Run in constant time (log2()) + Useful to run constant time checks +*/ +#define HEAVISIDE_STEP_UINT64(x) {unsigned long t; \ + t=(((uint64_t)x>>32) | (unsigned long)x); \ + t=((t>>16) | t); \ + t=((t>>8) | t); \ + t=((t>>4) | t); \ + t=((t>>2) | t); \ + t=((t>>1) | t); \ + x=t & 0x1;} + +#define HEAVISIDE_STEP_UINT32(x) {uint16_t t; \ + t=(((unsigned long)x>>16) | (uint16_t)x); \ + t=((t>>8) | t); \ + t=((t>>4) | t); \ + t=((t>>2) | t); \ + t=((t>>1) | t); \ + x=t & 0x1;} + +#define HEAVISIDE_STEP_UINT16(x) {uint8_t t; \ + t=(((uint16_t)x>>8) | (uint8_t)x); \ + t=((t>>4) | t); \ + t=((t>>2) | t); \ + t=((t>>1) | t); \ + x=t & 0x1;} + +#define HEAVISIDE_STEP_UINT8(x) {uint8_t t; \ + t=(((uint8_t)x>>4) | (uint8_t)x); \ + t=((t>>2) | t); \ + t=((t>>1) | t); \ + x=t & 0x1;} + +#define CC_HEAVISIDE_STEP(x) { \ + if (sizeof(x) == 1) {HEAVISIDE_STEP_UINT8(x);} \ + else if (sizeof(x) == 2) {HEAVISIDE_STEP_UINT16(x);} \ + else if (sizeof(x) == 4) {HEAVISIDE_STEP_UINT32(x);} \ + else if (sizeof(x) == 8) {HEAVISIDE_STEP_UINT64(x);} \ + else {x=((x==0)?0:1);} \ + } + + +/* Set a variable to the biggest power of 2 which can be represented */ +#define MAX_POWER_OF_2(x) ((__typeof__(x))1<<(8*sizeof(x)-1)) + + #endif /* _CORECRYPTO_CC_PRIV_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccaes.h b/EXTERNAL_HEADERS/corecrypto/ccaes.h index 9dca39bd6..67c4404ca 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccaes.h +++ b/EXTERNAL_HEADERS/corecrypto/ccaes.h @@ -32,6 +32,14 @@ extern const struct ccmode_cbc ccaes_arm_cbc_encrypt_mode; extern const struct ccmode_cbc ccaes_arm_cbc_decrypt_mode; #endif +#if CCAES_MUX +extern const struct ccmode_cbc ccaes_ios_hardware_cbc_encrypt_mode; +extern const struct ccmode_cbc ccaes_ios_hardware_cbc_decrypt_mode; + +extern const struct ccmode_cbc *ccaes_ios_mux_cbc_encrypt_mode(void); +extern const struct ccmode_cbc *ccaes_ios_mux_cbc_decrypt_mode(void); +#endif + #if CCAES_INTEL //extern const struct ccmode_ecb ccaes_intel_ecb_encrypt_mode; //extern const struct ccmode_ecb ccaes_intel_ecb_decrypt_mode; @@ -69,6 +77,7 @@ const struct ccmode_cfb *ccaes_cfb_encrypt_mode(void); const struct ccmode_cfb8 *ccaes_cfb8_encrypt_mode(void); const struct ccmode_xts *ccaes_xts_encrypt_mode(void); const struct ccmode_gcm *ccaes_gcm_encrypt_mode(void); +const struct ccmode_ccm *ccaes_ccm_encrypt_mode(void); const struct ccmode_ecb *ccaes_ecb_decrypt_mode(void); const struct ccmode_cbc *ccaes_cbc_decrypt_mode(void); @@ -76,6 +85,7 @@ const struct ccmode_cfb *ccaes_cfb_decrypt_mode(void); const struct ccmode_cfb8 *ccaes_cfb8_decrypt_mode(void); const struct ccmode_xts *ccaes_xts_decrypt_mode(void); const struct ccmode_gcm *ccaes_gcm_decrypt_mode(void); +const struct ccmode_ccm *ccaes_ccm_decrypt_mode(void); const struct ccmode_ctr *ccaes_ctr_crypt_mode(void); const struct ccmode_ofb *ccaes_ofb_crypt_mode(void); diff --git a/EXTERNAL_HEADERS/corecrypto/ccasn1.h b/EXTERNAL_HEADERS/corecrypto/ccasn1.h new file mode 100644 index 000000000..3f67e2e6f --- /dev/null +++ b/EXTERNAL_HEADERS/corecrypto/ccasn1.h @@ -0,0 +1,95 @@ +/* + * ccasn1.h + * corecrypto + * + * Created by Michael Brouwer on 8/6/10. + * Copyright 2010-2012 Apple Inc. All rights reserved. + * + */ + +#ifndef _CORECRYPTO_CCASN1_H_ +#define _CORECRYPTO_CCASN1_H_ + +#include +#include +#include + +/* ASN.1 types for on the fly ASN.1 BER/DER encoding/decoding. Don't use + these with the ccder interface, use the CCDER_ types instead. */ +enum { + CCASN1_EOL = 0x00, + CCASN1_BOOLEAN = 0x01, + CCASN1_INTEGER = 0x02, + CCASN1_BIT_STRING = 0x03, + CCASN1_OCTET_STRING = 0x04, + CCASN1_NULL = 0x05, + CCASN1_OBJECT_IDENTIFIER = 0x06, + CCASN1_OBJECT_DESCRIPTOR = 0x07, + /* External or instance-of 0x08 */ + CCASN1_REAL = 0x09, + CCASN1_ENUMERATED = 0x0a, + CCASN1_EMBEDDED_PDV = 0x0b, + CCASN1_UTF8_STRING = 0x0c, + /* 0x0d */ + /* 0x0e */ + /* 0x0f */ + CCASN1_SEQUENCE = 0x10, + CCASN1_SET = 0x11, + CCASN1_NUMERIC_STRING = 0x12, + CCASN1_PRINTABLE_STRING = 0x13, + CCASN1_T61_STRING = 0x14, + CCASN1_VIDEOTEX_STRING = 0x15, + CCASN1_IA5_STRING = 0x16, + CCASN1_UTC_TIME = 0x17, + CCASN1_GENERALIZED_TIME = 0x18, + CCASN1_GRAPHIC_STRING = 0x19, + CCASN1_VISIBLE_STRING = 0x1a, + CCASN1_GENERAL_STRING = 0x1b, + CCASN1_UNIVERSAL_STRING = 0x1c, + /* 0x1d */ + CCASN1_BMP_STRING = 0x1e, + CCASN1_HIGH_TAG_NUMBER = 0x1f, + CCASN1_TELETEX_STRING = CCASN1_T61_STRING, + + CCASN1_TAG_MASK = 0xff, + CCASN1_TAGNUM_MASK = 0x1f, + + CCASN1_METHOD_MASK = 0x20, + CCASN1_PRIMITIVE = 0x00, + CCASN1_CONSTRUCTED = 0x20, + + CCASN1_CLASS_MASK = 0xc0, + CCASN1_UNIVERSAL = 0x00, + CCASN1_APPLICATION = 0x40, + CCASN1_CONTEXT_SPECIFIC = 0x80, + CCASN1_PRIVATE = 0xc0, + + CCASN1_CONSTRUCTED_SET = CCASN1_SET | CCASN1_CONSTRUCTED, + CCASN1_CONSTRUCTED_SEQUENCE = CCASN1_SEQUENCE | CCASN1_CONSTRUCTED, + + // TODO: Remove these 2: */ + // ASN1_INTEGER = 0x02, + ASN1_CONSTRUCTED_SEQUENCE = 0x30 +}; + +typedef union { + const unsigned char *oid; +} ccoid_t __attribute__((transparent_union)); + +/* Returns *der iff *der points to a DER encoded oid that fits within *der_len. */ +ccoid_t ccoid_for_der(size_t *der_len, const uint8_t **der); + +/* Returns the size of an oid including it's tag and length. */ +CC_INLINE CC_PURE CC_NONNULL_TU((1)) +size_t ccoid_size(ccoid_t oid) { + return 2 + oid.oid[1]; +} + +CC_INLINE CC_PURE CC_NONNULL_TU((1)) CC_NONNULL_TU((2)) +bool ccoid_equal(ccoid_t oid1, ccoid_t oid2) { + return(ccoid_size(oid1) == ccoid_size(oid2) && memcmp(oid1.oid, oid2.oid, ccoid_size(oid1))== 0); +} + +extern const unsigned char *ccsha1_oid; + +#endif /* _CORECRYPTO_CCASN1_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccder.h b/EXTERNAL_HEADERS/corecrypto/ccder.h index 756afd295..7c7f08be6 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccder.h +++ b/EXTERNAL_HEADERS/corecrypto/ccder.h @@ -89,42 +89,69 @@ enum { }; -#pragma mark ccder_sizeof_ functions +#define CC_NO_INLINE +// MARK: ccder_sizeof_ functions -inline CC_CONST -size_t ccder_sizeof_tag(ccder_tag tag); +/* Returns the size of an asn1 encoded item of length l in bytes. */ +CC_NO_INLINE CC_CONST +size_t ccder_sizeof(ccder_tag tag, size_t len); + +CC_NO_INLINE CC_PURE +size_t ccder_sizeof_implicit_integer(ccder_tag implicit_tag, + cc_size n, const cc_unit *s); + +CC_NO_INLINE CC_PURE +size_t ccder_sizeof_implicit_octet_string(ccder_tag implicit_tag, + cc_size n, const cc_unit *s); -inline CC_CONST +CC_NO_INLINE CC_CONST +size_t ccder_sizeof_implicit_raw_octet_string(ccder_tag implicit_tag, + size_t s_size); +CC_NO_INLINE CC_CONST +size_t ccder_sizeof_implicit_uint64(ccder_tag implicit_tag, uint64_t value); + +CC_NO_INLINE CC_PURE +size_t ccder_sizeof_integer(cc_size n, const cc_unit *s); + +CC_NO_INLINE CC_CONST size_t ccder_sizeof_len(size_t len); -/* Returns the size of an asn1 encoded item of length l in bytes, - assuming a 1 byte tag. */ -inline CC_CONST -size_t ccder_sizeof(ccder_tag tag, size_t len); +CC_NO_INLINE CC_PURE +size_t ccder_sizeof_octet_string(cc_size n, const cc_unit *s); -inline CC_CONST +CC_NO_INLINE CC_PURE size_t ccder_sizeof_oid(ccoid_t oid); -#pragma mark ccder_encode_ functions. +CC_NO_INLINE CC_CONST +size_t ccder_sizeof_raw_octet_string(size_t s_size); + +CC_NO_INLINE CC_CONST +size_t ccder_sizeof_tag(ccder_tag tag); + +CC_NO_INLINE CC_CONST +size_t ccder_sizeof_uint64(uint64_t value); + + +// MARK: ccder_encode_ functions. /* Encode a tag backwards, der_end should point to one byte past the end of destination for the tag, returns a pointer to the first byte of the tag. Returns NULL if there is an encoding error. */ -inline CC_NONNULL2 +CC_NO_INLINE CC_NONNULL2 uint8_t *ccder_encode_tag(ccder_tag tag, const uint8_t *der, uint8_t *der_end); /* Returns a pointer to the start of the len field. returns NULL if there is an encoding error. */ -inline CC_NONNULL2 +CC_NO_INLINE CC_NONNULL2 uint8_t * ccder_encode_len(size_t len, const uint8_t *der, uint8_t *der_end); /* der_end should point to the first byte of the content of this der item. */ -inline CC_NONNULL3 +CC_NO_INLINE CC_NONNULL3 uint8_t * ccder_encode_tl(ccder_tag tag, size_t len, const uint8_t *der, uint8_t *der_end); -inline CC_PURE CC_NONNULL2 +CC_NO_INLINE CC_PURE CC_NONNULL2 uint8_t * ccder_encode_body_nocopy(size_t size, const uint8_t *der, uint8_t *der_end); @@ -132,51 +159,51 @@ ccder_encode_body_nocopy(size_t size, const uint8_t *der, uint8_t *der_end); bound, der_end is one byte paste where we want to write the length and body_end is one byte past the end of the body of the der object we are encoding the tag and length of. */ -inline CC_NONNULL((2,3)) +CC_NO_INLINE CC_NONNULL((2, 3)) uint8_t * ccder_encode_constructed_tl(ccder_tag tag, const uint8_t *body_end, const uint8_t *der, uint8_t *der_end); /* Encodes oid into der and returns der + ccder_sizeof_oid(oid). */ -inline CC_NONNULL1 CC_NONNULL2 +CC_NO_INLINE CC_NONNULL_TU((1)) CC_NONNULL2 uint8_t *ccder_encode_oid(ccoid_t oid, const uint8_t *der, uint8_t *der_end); -inline CC_NONNULL((3,4)) +CC_NO_INLINE CC_NONNULL((3, 4)) uint8_t *ccder_encode_implicit_integer(ccder_tag implicit_tag, cc_size n, const cc_unit *s, const uint8_t *der, uint8_t *der_end); -inline CC_NONNULL((2,3)) +CC_NO_INLINE CC_NONNULL((2, 3)) uint8_t *ccder_encode_integer(cc_size n, const cc_unit *s, const uint8_t *der, uint8_t *der_end); -inline CC_NONNULL3 +CC_NO_INLINE CC_NONNULL3 uint8_t *ccder_encode_implicit_uint64(ccder_tag implicit_tag, uint64_t value, const uint8_t *der, uint8_t *der_end); -inline CC_NONNULL3 +CC_NO_INLINE CC_NONNULL2 uint8_t *ccder_encode_uint64(uint64_t value, const uint8_t *der, uint8_t *der_end); -inline CC_NONNULL((3,4)) +CC_NO_INLINE CC_NONNULL((3, 4)) uint8_t *ccder_encode_implicit_octet_string(ccder_tag implicit_tag, cc_size n, const cc_unit *s, const uint8_t *der, uint8_t *der_end); -inline CC_NONNULL((2,3)) +CC_NO_INLINE CC_NONNULL((2, 3)) uint8_t *ccder_encode_octet_string(cc_size n, const cc_unit *s, const uint8_t *der, uint8_t *der_end); -inline CC_NONNULL((3,4)) +CC_NO_INLINE CC_NONNULL((3, 4)) uint8_t *ccder_encode_implicit_raw_octet_string(ccder_tag implicit_tag, size_t s_size, const uint8_t *s, const uint8_t *der, uint8_t *der_end); -inline CC_NONNULL((2,3)) +CC_NO_INLINE CC_NONNULL((2, 3)) uint8_t *ccder_encode_raw_octet_string(size_t s_size, const uint8_t *s, const uint8_t *der, uint8_t *der_end); @@ -184,55 +211,67 @@ uint8_t *ccder_encode_raw_octet_string(size_t s_size, const uint8_t *s, It's inefficient – especially when you already have to convert to get to the form for the body. see encode integer for the right way to unify conversion and insertion */ -inline CC_NONNULL3 +CC_NO_INLINE CC_NONNULL3 uint8_t * ccder_encode_body(size_t size, const uint8_t* body, const uint8_t *der, uint8_t *der_end); -#pragma mark ccder_decode_ functions. +// MARK: ccder_decode_ functions. /* Returns a pointer to the start of the length field, and returns the decoded tag in tag. returns NULL if there is a decoding error. */ -inline CC_NONNULL((1,3)) +CC_NO_INLINE CC_NONNULL((1, 3)) const uint8_t *ccder_decode_tag(ccder_tag *tagp, const uint8_t *der, const uint8_t *der_end); -inline CC_NONNULL((1,3)) +CC_NO_INLINE CC_NONNULL((1, 3)) const uint8_t *ccder_decode_len(size_t *lenp, const uint8_t *der, const uint8_t *der_end); /* Returns a pointer to the start of the der object, and returns the length in len. returns NULL if there is a decoding error. */ -inline CC_NONNULL((2,4)) +CC_NO_INLINE CC_NONNULL((2, 4)) const uint8_t *ccder_decode_tl(ccder_tag expected_tag, size_t *lenp, const uint8_t *der, const uint8_t *der_end); -inline CC_NONNULL((2,3)) +CC_NO_INLINE CC_NONNULL((2, 4)) const uint8_t * ccder_decode_constructed_tl(ccder_tag expected_tag, const uint8_t **body_end, const uint8_t *der, const uint8_t *der_end); -inline CC_NONNULL((1,3)) +CC_NO_INLINE CC_NONNULL((1, 3)) const uint8_t * ccder_decode_sequence_tl(const uint8_t **body_end, const uint8_t *der, const uint8_t *der_end); -inline CC_NONNULL((2,4)) +CC_NO_INLINE CC_NONNULL((2, 4)) const uint8_t *ccder_decode_uint(cc_size n, cc_unit *r, const uint8_t *der, const uint8_t *der_end); -inline CC_NONNULL((1,3)) +CC_NO_INLINE CC_NONNULL((1, 3)) const uint8_t *ccder_decode_uint64(uint64_t* r, const uint8_t *der, const uint8_t *der_end); /* Decode SEQUENCE { r, s -- (unsigned)integer } in der into r and s. Returns NULL on decode errors, returns pointer just past the end of the sequence of integers otherwise. */ -inline CC_NONNULL((2,3,5)) +CC_NO_INLINE CC_NONNULL((2, 3, 5)) const uint8_t *ccder_decode_seqii(cc_size n, cc_unit *r, cc_unit *s, const uint8_t *der, const uint8_t *der_end); -inline CC_NONNULL_ALL +CC_NO_INLINE CC_NONNULL_TU((1)) CC_NONNULL((3)) const uint8_t *ccder_decode_oid(ccoid_t *oidp, const uint8_t *der, const uint8_t *der_end); +CC_NO_INLINE CC_NONNULL_ALL +const uint8_t *ccder_decode_bitstring(const uint8_t **bit_string, + size_t *bit_length, + const uint8_t *der, const uint8_t *der_end); + +CC_NO_INLINE CC_NONNULL_ALL +const uint8_t *ccder_decode_eckey(uint64_t *version, + size_t *priv_size, const uint8_t **priv_key, + ccoid_t *oid, + size_t *pub_size, const uint8_t **pub_key, + const uint8_t *der, const uint8_t *der_end); + #ifndef CCDER_MULTIBYTE_TAGS #include #include @@ -242,6 +281,8 @@ const uint8_t *ccder_decode_oid(ccoid_t *oidp, #include #include #include +#include +#include #include #include #include @@ -255,6 +296,7 @@ const uint8_t *ccder_decode_oid(ccoid_t *oidp, #include #include #include +#include #include #include #include diff --git a/EXTERNAL_HEADERS/corecrypto/ccdigest.h b/EXTERNAL_HEADERS/corecrypto/ccdigest.h index 7aa8ada33..9079c4a18 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccdigest.h +++ b/EXTERNAL_HEADERS/corecrypto/ccdigest.h @@ -12,7 +12,10 @@ #include #include - +#ifdef USE_SUPER_COOL_NEW_CCOID_T +#include +#endif /* USE_SUPER_COOL_NEW_CCOID_T */ + /* To malloc a digest context for a given di, use malloc(ccdigest_di_size(di)) and assign the result to a pointer to a struct ccdigest_ctx. */ struct ccdigest_ctx { @@ -48,7 +51,11 @@ struct ccdigest_info { unsigned long state_size; unsigned long block_size; unsigned long oid_size; +#ifdef USE_SUPER_COOL_NEW_CCOID_T + ccoid_t oid; +#else unsigned char *oid; +#endif const void *initial_state; void(*compress)(ccdigest_state_t state, unsigned long nblocks, const void *data); @@ -66,19 +73,21 @@ struct ccdigest_info { size_t _block_size_, named _name_. Can be used in structs or on the stack. */ #define ccdigest_ctx_decl(_state_size_, _block_size_, _name_) cc_ctx_decl(struct ccdigest_ctx, ccdigest_ctx_size(_state_size_, _block_size_), _name_) -#define ccdigest_ctx_clear(_state_size_, _block_size_, _name_) cc_ctx_clear(struct ccdigest_ctx, ccdigest_ctx_size(_state_size_, _block_size_), _name_) +#define ccdigest_ctx_clear(_state_size_, _block_size_, _name_) cc_zero(ccdigest_ctx_size(_state_size_, _block_size_), _name_) /* Declare a ccdigest_ctx for a given size_t _state_size_ and size_t _block_size_, named _name_. Can be used on the stack. */ #define ccdigest_di_decl(_di_, _name_) cc_ctx_decl(struct ccdigest_ctx, ccdigest_di_size(_di_), _name_) -#define ccdigest_di_clear(_di_, _name_) cc_ctx_clear(struct ccdigest_ctx, ccdigest_di_size(_di_), _name_) +#define ccdigest_di_clear(_di_, _name_) cc_zero(ccdigest_di_size(_di_), _name_) /* Digest context field accessors. Consider the implementation private. */ -#define ccdigest_state(_di_, _ctx_) ((ccdigest_state_t)(_ctx_)) -#define ccdigest_state_u8(_di_, _ctx_) (&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8) -#define ccdigest_state_u32(_di_, _ctx_) (&((ccdigest_ctx_t)(_ctx_)).hdr->state.u32) -#define ccdigest_state_u64(_di_, _ctx_) (&((ccdigest_ctx_t)(_ctx_)).hdr->state.u64) -#define ccdigest_state_ccn(_di_, _ctx_) (&((ccdigest_ctx_t)(_ctx_)).hdr->state.ccn) -#define ccdigest_nbits(_di_, _ctx_) (((uint64_t *)(&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8 + (_di_)->state_size))[0]) + +#define ccdigest_state(_di_, _ctx_) ((struct ccdigest_state *)(&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8 + sizeof(uint64_t))) +#define ccdigest_state_u8(_di_, _ctx_) ccdigest_u8(ccdigest_state((_di_), (_ctx_))) +#define ccdigest_state_u32(_di_, _ctx_) ccdigest_u32(ccdigest_state((_di_), (_ctx_))) +#define ccdigest_state_u64(_di_, _ctx_) ccdigest_u64(ccdigest_state((_di_), (_ctx_))) +#define ccdigest_state_ccn(_di_, _ctx_) ccdigest_ccn(ccdigest_state((_di_), (_ctx_))) +#define ccdigest_nbits(_di_, _ctx_) (((uint64_t *)(&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8))[0]) + #define ccdigest_data(_di_, _ctx_) (&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8 + (_di_)->state_size + sizeof(uint64_t)) #define ccdigest_num(_di_, _ctx_) (((unsigned int *)(&((ccdigest_ctx_t)(_ctx_)).hdr->state.u8 + (_di_)->state_size + sizeof(uint64_t) + (_di_)->block_size))[0]) @@ -126,4 +135,45 @@ struct ccdigest_vector { int ccdigest_test_vector(const struct ccdigest_info *di, const struct ccdigest_vector *v); int ccdigest_test_chunk_vector(const struct ccdigest_info *di, const struct ccdigest_vector *v, unsigned long chunk); +#ifdef USE_SUPER_COOL_NEW_CCOID_T +#define OID_DEF(_NAME_, _VALUE_) _NAME_ {((unsigned char *) _VALUE_)} +#define CC_DIGEST_OID_MD2 {((unsigned char *)"\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x02")} +#define CC_DIGEST_OID_MD4 {((unsigned char *)"\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x04")} +#define CC_DIGEST_OID_MD5 {((unsigned char *)"\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x05")} +#define CC_DIGEST_OID_SHA1 {((unsigned char *)"\x06\x05\x2b\x0e\x03\x02\x1a")} +#define CC_DIGEST_OID_SHA224 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04")} +#define CC_DIGEST_OID_SHA256 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01")} +#define CC_DIGEST_OID_SHA384 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02")} +#define CC_DIGEST_OID_SHA512 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03")} +#define CC_DIGEST_OID_RMD128 {((unsigned char *)"\x06\x06\x28\xCF\x06\x03\x00\x32")} +#define CC_DIGEST_OID_RMD160 {((unsigned char *)"\x06\x05\x2B\x24\x03\x02\x01")} +#define CC_DIGEST_OID_RMD256 {((unsigned char *)"\x06\x05\x2B\x24\x03\x02\x03")} +#define CC_DIGEST_OID_RMD320 {((unsigned char *)NULL)} +#else +#define CC_DIGEST_OID_MD2 "\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x02" +#define CC_DIGEST_OID_MD4 "\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x04" +#define CC_DIGEST_OID_MD5 "\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x05" +#define CC_DIGEST_OID_SHA1 "\x06\x05\x2b\x0e\x03\x02\x1a" +#define CC_DIGEST_OID_SHA224 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04" +#define CC_DIGEST_OID_SHA256 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01" +#define CC_DIGEST_OID_SHA384 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02" +#define CC_DIGEST_OID_SHA512 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03" +#define CC_DIGEST_OID_RMD128 "\x06\x06\x28\xCF\x06\x03\x00\x32" +#define CC_DIGEST_OID_RMD160 "\x06\x05\x2B\x24\x03\x02\x01" +#define CC_DIGEST_OID_RMD256 "\x06\x05\x2B\x24\x03\x02\x03" +#define CC_DIGEST_OID_RMD320 NULL +#endif + +#ifdef USE_SUPER_COOL_NEW_CCOID_T +CC_INLINE CC_NONNULL_TU((1)) CC_NONNULL_TU((2)) +bool ccdigest_oid_equal(const struct ccdigest_info *di, ccoid_t oid) { + if(di->oid.oid == NULL && oid.oid == NULL) return true; + return ccoid_equal(di->oid, oid); +} + +typedef const struct ccdigest_info *(ccdigest_lookup)(ccoid_t oid); + +#include +const struct ccdigest_info *ccdigest_oid_lookup(ccoid_t oid, ...); +#endif /* USE_SUPER_COOL_NEW_CCOID_T*/ #endif /* _CORECRYPTO_CCDIGEST_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h b/EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h new file mode 100644 index 000000000..407a9b19b --- /dev/null +++ b/EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h @@ -0,0 +1,22 @@ +/* + * ccdigest_priv.h + * corecrypto + * + * Created by Fabrice Gautier on 12/7/10. + * Copyright 2010,2011 Apple, Inc. All rights reserved. + * + */ + +#ifndef _CORECRYPTO_CCDIGEST_PRIV_H_ +#define _CORECRYPTO_CCDIGEST_PRIV_H_ + +#include + +void ccdigest_final_common(const struct ccdigest_info *di, + ccdigest_ctx_t ctx, void *digest); +void ccdigest_final_64be(const struct ccdigest_info *di, ccdigest_ctx_t, + unsigned char *digest); +void ccdigest_final_64le(const struct ccdigest_info *di, ccdigest_ctx_t, + unsigned char *digest); + +#endif /* _CORECRYPTO_CCDIGEST_PRIV_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccdrbg.h b/EXTERNAL_HEADERS/corecrypto/ccdrbg.h new file mode 100644 index 000000000..152e0801f --- /dev/null +++ b/EXTERNAL_HEADERS/corecrypto/ccdrbg.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2007-2010 Apple Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/*! + @header corecrypto/ccdrbg.h + @abstract The functions provided in ccdrbg.h implement high-level accessors + to cryptographically secure random numbers. + + */ + +#ifndef _CORECRYPTO_CCDRBG_H_ +#define _CORECRYPTO_CCDRBG_H_ + +#include +#include + +/* TODO: Error codes ? */ +#define CCDRBG_STATUS_OK 0 +#define CCDRBG_STATUS_ERROR (-1) +#define CCDRBG_STATUS_NEED_RESEED (-2) +#define CCDRBG_STATUS_PARAM_ERROR (-3) + +CC_INLINE size_t ccdrbg_context_size(const struct ccdrbg_info *drbg) +{ + return drbg->size; +} + +CC_INLINE int ccdrbg_init(const struct ccdrbg_info *info, + struct ccdrbg_state *drbg, + unsigned long entropyLength, const void* entropy, + unsigned long nonceLength, const void* nonce, + unsigned long psLength, const void* ps) +{ + return info->init(info, drbg, entropyLength, entropy, nonceLength, nonce, psLength, ps); +} + +CC_INLINE int ccdrbg_reseed(const struct ccdrbg_info *info, + struct ccdrbg_state *prng, + unsigned long entropylen, const void *entropy, + unsigned long inlen, const void *in) +{ + return info->reseed(prng, entropylen, entropy, inlen, in); +} + + +CC_INLINE int ccdrbg_generate(const struct ccdrbg_info *info, + struct ccdrbg_state *prng, + unsigned long outlen, void *out, + unsigned long inlen, const void *in) +{ + return info->generate(prng, outlen, out, inlen, in); +} + +CC_INLINE void ccdrbg_done(const struct ccdrbg_info *info, + struct ccdrbg_state *prng) +{ + info->done(prng); +} + + +extern struct ccdrbg_info ccdrbg_dummy_info; +extern struct ccdrbg_info ccdrbg_fipssha1_info; + +struct ccdrbg_nistctr_custom { + const struct ccmode_ecb *ecb; + unsigned long keylen; + int strictFIPS; + int use_df; +}; + +void ccdrbg_factory_nistctr(struct ccdrbg_info *info, const struct ccdrbg_nistctr_custom *custom); + +extern struct ccdrbg_info ccdrbg_nistdigest_info; + +struct ccdrbg_nisthmac_custom { + const struct ccdigest_info *di; + int strictFIPS; +}; + +// "class" method on nisthmac dbrg's to ask about their security_strength for a given di +int ccdbrg_nisthmac_security_strength(const struct ccdrbg_nisthmac_custom *custom); + +void ccdrbg_factory_nisthmac(struct ccdrbg_info *info, const struct ccdrbg_nisthmac_custom *custom); + +#endif /* _CORECRYPTO_CCDRBG_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h b/EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h new file mode 100644 index 000000000..efa1ef9ba --- /dev/null +++ b/EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h @@ -0,0 +1,68 @@ +/* + * ccdrbg_impl.h + * corecrypto + * + * Created by James Murphy on 12/9/11. + * Copyright (c) 2011 Apple Inc. All rights reserved. + * + */ + +#ifndef _CORECRYPTO_CCDRBG_IMPL_H_ +#define _CORECRYPTO_CCDRBG_IMPL_H_ + +/* opaque drbg structure */ +struct ccdrbg_state; + +struct ccdrbg_info { + /** Size of the DRBG state in bytes **/ + size_t size; + + /** Instantiate the PRNG + @param prng The PRNG state + @param entropylen Length of entropy + @param entropy Entropy bytes + @param inlen Length of additional input + @param in Additional input bytes + @return 0 if successful + */ + int (*init)(const struct ccdrbg_info *info, struct ccdrbg_state *drbg, + unsigned long entropyLength, const void* entropy, + unsigned long nonceLength, const void* nonce, + unsigned long psLength, const void* ps); + + /** Add entropy to the PRNG + @param prng The PRNG state + @param entropylen Length of entropy + @param entropy Entropy bytes + @param inlen Length of additional input + @param in Additional input bytes + @return 0 if successful + */ + int (*reseed)(struct ccdrbg_state *prng, + unsigned long entropylen, const void *entropy, + unsigned long inlen, const void *in); + + /** Read from the PRNG in a FIPS Testing compliant manor + @param prng The PRNG state to read from + @param out [out] Where to store the data + @param outlen Length of data desired (octets) + @param inlen Length of additional input + @param in Additional input bytes + @return 0 if successfull + */ + int (*generate)(struct ccdrbg_state *prng, + unsigned long outlen, void *out, + unsigned long inlen, const void *in); + + /** Terminate a PRNG state + @param prng The PRNG state to terminate + */ + void (*done)(struct ccdrbg_state *prng); + + /** private parameters */ + const void *custom; +}; + + + +#endif // _CORECRYPTO_CCDRBG_IMPL_H_ diff --git a/EXTERNAL_HEADERS/corecrypto/cchmac.h b/EXTERNAL_HEADERS/corecrypto/cchmac.h index b6fd0dcae..17e295fb6 100644 --- a/EXTERNAL_HEADERS/corecrypto/cchmac.h +++ b/EXTERNAL_HEADERS/corecrypto/cchmac.h @@ -29,7 +29,7 @@ typedef union { #define cchmac_ctx_n(STATE_SIZE, BLOCK_SIZE) ccn_nof_size(cchmac_ctx_size((STATE_SIZE), (BLOCK_SIZE))) #define cchmac_ctx_decl(STATE_SIZE, BLOCK_SIZE, _name_) cc_ctx_decl(struct cchmac_ctx, cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_) -#define cchmac_ctx_clear(STATE_SIZE, BLOCK_SIZE, _name_) cc_ctx_clear(struct cchmac_ctx, cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_) +#define cchmac_ctx_clear(STATE_SIZE, BLOCK_SIZE, _name_) cc_zero(cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_) #define cchmac_di_decl(_di_, _name_) cchmac_ctx_decl((_di_)->state_size, (_di_)->block_size, _name_) #define cchmac_di_clear(_di_, _name_) cchmac_ctx_clear((_di_)->state_size, (_di_)->block_size, _name_) @@ -44,11 +44,11 @@ typedef union { #define cchmac_ostateccn(_di_, HC) (ccdigest_ccn(cchmac_ostate(_di_, HC))) /* Convenience accessors for ccdigest_ctx_t fields. */ -#define cchmac_istate(_di_, HC) ((ccdigest_state_t)(((cchmac_ctx_t)(HC)).digest)) -#define cchmac_istate8(_di_, HC) (ccdigest_u8(cchmac_istate(_di_, HC))) -#define cchmac_istate32(_di_, HC) (ccdigest_u32(cchmac_istate(_di_, HC))) -#define cchmac_istate64(_di_, HC) (ccdigest_u64(cchmac_istate(_di_, HC))) -#define cchmac_istateccn(_di_, HC) (ccdigest_ccn(cchmac_istate(_di_, HC))) +#define cchmac_istate(_di_, HC) ccdigest_state(_di_, ((cchmac_ctx_t)(HC)).digest) +#define cchmac_istate8(_di_, HC) ccdigest_u8(cchmac_istate(_di_, HC)) +#define cchmac_istate32(_di_, HC) ccdigest_u32(cchmac_istate(_di_, HC)) +#define cchmac_istate64(_di_, HC) ccdigest_u64(cchmac_istate(_di_, HC)) +#define cchmac_istateccn(_di_, HC) ccdigest_ccn(cchmac_istate(_di_, HC)) #define cchmac_data(_di_, HC) ccdigest_data(_di_, ((cchmac_ctx_t)(HC)).digest) #define cchmac_num(_di_, HC) ccdigest_num(_di_, ((cchmac_ctx_t)(HC)).digest) #define cchmac_nbits(_di_, HC) ccdigest_nbits(_di_, ((cchmac_ctx_t)(HC)).digest) diff --git a/EXTERNAL_HEADERS/corecrypto/ccmode.h b/EXTERNAL_HEADERS/corecrypto/ccmode.h index 3224069e7..0c7a19479 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccmode.h +++ b/EXTERNAL_HEADERS/corecrypto/ccmode.h @@ -18,7 +18,7 @@ /* Declare a ecb key named _name_. Pass the size field of a struct ccmode_ecb for _size_. */ #define ccecb_ctx_decl(_size_, _name_) cc_ctx_decl(ccecb_ctx, _size_, _name_) -#define ccecb_ctx_clear(_size_, _name_) cc_ctx_clear(ccecb_ctx, _size_, _name_) +#define ccecb_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) CC_INLINE size_t ccecb_context_size(const struct ccmode_ecb *mode) { @@ -31,26 +31,24 @@ CC_INLINE unsigned long ccecb_block_size(const struct ccmode_ecb *mode) } CC_INLINE void ccecb_init(const struct ccmode_ecb *mode, ccecb_ctx *ctx, - unsigned long key_len, const void *key) + size_t key_len, const void *key) { mode->init(mode, ctx, key_len, key); } CC_INLINE void ccecb_update(const struct ccmode_ecb *mode, const ccecb_ctx *ctx, - unsigned long in_len, const void *in, void *out) + unsigned long nblocks, const void *in, void *out) { - unsigned long numBlocks = (in_len / mode->block_size); - mode->ecb(ctx, numBlocks, in, out); + mode->ecb(ctx, nblocks, in, out); } CC_INLINE void ccecb_one_shot(const struct ccmode_ecb *mode, - unsigned long key_len, const void *key, unsigned long in_len, - const void *in, void *out) + size_t key_len, const void *key, + unsigned long nblocks, const void *in, void *out) { - unsigned long numBlocks = (in_len / mode->block_size); ccecb_ctx_decl(mode->size, ctx); mode->init(mode, ctx, key_len, key); - mode->ecb(ctx, numBlocks, in, out); + mode->ecb(ctx, nblocks, in, out); ccecb_ctx_clear(mode->size, ctx); } @@ -65,10 +63,10 @@ CC_INLINE void ccecb_one_shot(const struct ccmode_ecb *mode, /* Declare a cbc key named _name_. Pass the size field of a struct ccmode_cbc for _size_. */ #define cccbc_ctx_decl(_size_, _name_) cc_ctx_decl(cccbc_ctx, _size_, _name_) -#define cccbc_ctx_clear(_size_, _name_) cc_ctx_clear(cccbc_ctx, _size_, _name_) +#define cccbc_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) -/* Declare a cbc iv tweak named _name_. Pass the blocksize field of a struct ccmode_cbc - for _size_. */ +/* Declare a cbc iv tweak named _name_. Pass the blocksize field of a + struct ccmode_cbc for _size_. */ #define cccbc_iv_decl(_size_, _name_) cc_ctx_decl(cccbc_iv, _size_, _name_) #define cccbc_iv_clear(_size_, _name_) cc_ctx_clear(cccbc_iv, _size_, _name_) @@ -76,9 +74,9 @@ CC_INLINE void ccecb_one_shot(const struct ccmode_ecb *mode, Alternatively you can create a ccmode_cbc instance from any ccmode_ecb cipher. To do so, statically initialize a struct ccmode_cbc using the - CCMODE_FACTORY_CBC_DECRYPT or CCMODE_FACTORY_CBC_ENCRYPT macros. Alternatively - you can dynamically initialize a struct ccmode_cbc ccmode_factory_cbc_decrypt() - or ccmode_factory_cbc_encrypt(). */ + CCMODE_FACTORY_CBC_DECRYPT or CCMODE_FACTORY_CBC_ENCRYPT macros. + Alternatively you can dynamically initialize a struct ccmode_cbc + ccmode_factory_cbc_decrypt() or ccmode_factory_cbc_encrypt(). */ CC_INLINE size_t cccbc_context_size(const struct ccmode_cbc *mode) { @@ -91,34 +89,37 @@ CC_INLINE unsigned long cccbc_block_size(const struct ccmode_cbc *mode) } CC_INLINE void cccbc_init(const struct ccmode_cbc *mode, cccbc_ctx *ctx, - unsigned long key_len, const void *key) + size_t key_len, const void *key) { mode->init(mode, ctx, key_len, key); } -CC_INLINE void cccbc_set_iv(const struct ccmode_cbc *mode, cccbc_iv *iv_ctx, const void *iv) +CC_INLINE void cccbc_set_iv(const struct ccmode_cbc *mode, cccbc_iv *iv_ctx, + const void *iv) { - if(iv) + if (iv) cc_copy(mode->block_size, iv_ctx, iv); else cc_zero(mode->block_size, iv_ctx); } -CC_INLINE void cccbc_update(const struct ccmode_cbc *mode, cccbc_ctx *ctx, cccbc_iv *iv, - unsigned long nblocks, const void *in, void *out) +CC_INLINE void cccbc_update(const struct ccmode_cbc *mode, cccbc_ctx *ctx, + cccbc_iv *iv, unsigned long nblocks, + const void *in, void *out) { mode->cbc(ctx, iv, nblocks, in, out); } CC_INLINE void cccbc_one_shot(const struct ccmode_cbc *mode, - unsigned long key_len, const void *key, const void *iv, unsigned long nblocks, - const void *in, void *out) + unsigned long key_len, const void *key, + const void *iv, unsigned long nblocks, + const void *in, void *out) { cccbc_ctx_decl(mode->size, ctx); cccbc_iv_decl(mode->block_size, iv_ctx); mode->init(mode, ctx, key_len, key); - if(iv) - cccbc_set_iv (mode, iv_ctx, iv); + if (iv) + cccbc_set_iv(mode, iv_ctx, iv); else cc_zero(mode->block_size, iv_ctx); mode->cbc(ctx, iv_ctx, nblocks, in, out); @@ -128,9 +129,9 @@ CC_INLINE void cccbc_one_shot(const struct ccmode_cbc *mode, /* CFB mode. */ /* Declare a cfb key named _name_. Pass the size field of a struct ccmode_cfb - for _size_. */ + for _size_. */ #define cccfb_ctx_decl(_size_, _name_) cc_ctx_decl(cccfb_ctx, _size_, _name_) -#define cccfb_ctx_clear(_size_, _name_) cc_ctx_clear(cccfb_ctx, _size_, _name_) +#define cccfb_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) CC_INLINE size_t cccfb_context_size(const struct ccmode_cfb *mode) { @@ -143,24 +144,25 @@ CC_INLINE unsigned long cccfb_block_size(const struct ccmode_cfb *mode) } CC_INLINE void cccfb_init(const struct ccmode_cfb *mode, cccfb_ctx *ctx, - unsigned long key_len, const void *key, const void *iv) + size_t key_len, const void *key, + const void *iv) { mode->init(mode, ctx, key_len, key, iv); } CC_INLINE void cccfb_update(const struct ccmode_cfb *mode, cccfb_ctx *ctx, - unsigned long in_len, const void *in, void *out) + size_t nbytes, const void *in, void *out) { - mode->cfb(ctx, in_len, in, out); + mode->cfb(ctx, nbytes, in, out); } CC_INLINE void cccfb_one_shot(const struct ccmode_cfb *mode, - unsigned long key_len, const void *key, const void *iv, - unsigned long in_len, const void *in, void *out) + size_t key_len, const void *key, const void *iv, + size_t nbytes, const void *in, void *out) { cccfb_ctx_decl(mode->size, ctx); mode->init(mode, ctx, key_len, key, iv); - mode->cfb(ctx, in_len, in, out); + mode->cfb(ctx, nbytes, in, out); cccfb_ctx_clear(mode->size, ctx); } @@ -169,7 +171,7 @@ CC_INLINE void cccfb_one_shot(const struct ccmode_cfb *mode, /* Declare a cfb8 key named _name_. Pass the size field of a struct ccmode_cfb8 for _size_. */ #define cccfb8_ctx_decl(_size_, _name_) cc_ctx_decl(cccfb8_ctx, _size_, _name_) -#define cccfb8_ctx_clear(_size_, _name_) cc_ctx_clear(cccfb8_ctx, _size_, _name_) +#define cccfb8_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) CC_INLINE size_t cccfb8_context_size(const struct ccmode_cfb8 *mode) { @@ -182,24 +184,24 @@ CC_INLINE unsigned long cccfb8_block_size(const struct ccmode_cfb8 *mode) } CC_INLINE void cccfb8_init(const struct ccmode_cfb8 *mode, cccfb8_ctx *ctx, - unsigned long key_len, const void *key, const void *iv) + size_t key_len, const void *key, const void *iv) { mode->init(mode, ctx, key_len, key, iv); } CC_INLINE void cccfb8_update(const struct ccmode_cfb8 *mode, cccfb8_ctx *ctx, - unsigned long in_len, const void *in, void *out) + size_t nbytes, const void *in, void *out) { - mode->cfb8(ctx, in_len, in, out); + mode->cfb8(ctx, nbytes, in, out); } CC_INLINE void cccfb8_one_shot(const struct ccmode_cfb8 *mode, - unsigned long key_len, const void *key, const void *iv, - unsigned long in_len, const void *in, void *out) + size_t key_len, const void *key, const void *iv, + size_t nbytes, const void *in, void *out) { cccfb8_ctx_decl(mode->size, ctx); mode->init(mode, ctx, key_len, key, iv); - mode->cfb8(ctx, in_len, in, out); + mode->cfb8(ctx, nbytes, in, out); cccfb8_ctx_clear(mode->size, ctx); } @@ -208,7 +210,7 @@ CC_INLINE void cccfb8_one_shot(const struct ccmode_cfb8 *mode, /* Declare a ctr key named _name_. Pass the size field of a struct ccmode_ctr for _size_. */ #define ccctr_ctx_decl(_size_, _name_) cc_ctx_decl(ccctr_ctx, _size_, _name_) -#define ccctr_ctx_clear(_size_, _name_) cc_ctx_clear(ccctr_ctx, _size_, _name_) +#define ccctr_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) /* This is Integer Counter Mode: The IV is the initial value of the counter that is incremented by 1 for each new block. Use the mode flags to select @@ -225,26 +227,24 @@ CC_INLINE unsigned long ccctr_block_size(const struct ccmode_ctr *mode) } CC_INLINE void ccctr_init(const struct ccmode_ctr *mode, ccctr_ctx *ctx, - unsigned long key_len, const void *key, const void *iv) + size_t key_len, const void *key, const void *iv) { mode->init(mode, ctx, key_len, key, iv); } CC_INLINE void ccctr_update(const struct ccmode_ctr *mode, ccctr_ctx *ctx, - unsigned long in_len, const void *in, void *out) + size_t nbytes, const void *in, void *out) { - unsigned long numBlocks = (in_len / mode->block_size); - mode->ctr(ctx, numBlocks, in, out); + mode->ctr(ctx, nbytes, in, out); } CC_INLINE void ccctr_one_shot(const struct ccmode_ctr *mode, - unsigned long key_len, const void *key, const void *iv, - unsigned long in_len, const void *in, void *out) + size_t key_len, const void *key, const void *iv, + size_t nbytes, const void *in, void *out) { - unsigned long numBlocks = (in_len / mode->block_size); ccctr_ctx_decl(mode->size, ctx); mode->init(mode, ctx, key_len, key, iv); - mode->ctr(ctx, numBlocks, in, out); + mode->ctr(ctx, nbytes, in, out); ccctr_ctx_clear(mode->size, ctx); } @@ -254,7 +254,7 @@ CC_INLINE void ccctr_one_shot(const struct ccmode_ctr *mode, /* Declare a ofb key named _name_. Pass the size field of a struct ccmode_ofb for _size_. */ #define ccofb_ctx_decl(_size_, _name_) cc_ctx_decl(ccofb_ctx, _size_, _name_) -#define ccofb_ctx_clear(_size_, _name_) cc_ctx_clear(ccofb_ctx, _size_, _name_) +#define ccofb_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) CC_INLINE size_t ccofb_context_size(const struct ccmode_ofb *mode) { @@ -267,24 +267,24 @@ CC_INLINE unsigned long ccofb_block_size(const struct ccmode_ofb *mode) } CC_INLINE void ccofb_init(const struct ccmode_ofb *mode, ccofb_ctx *ctx, - unsigned long key_len, const void *key, const void *iv) + size_t key_len, const void *key, const void *iv) { mode->init(mode, ctx, key_len, key, iv); } CC_INLINE void ccofb_update(const struct ccmode_ofb *mode, ccofb_ctx *ctx, - unsigned long in_len, const void *in, void *out) + size_t nbytes, const void *in, void *out) { - mode->ofb(ctx, in_len, in, out); + mode->ofb(ctx, nbytes, in, out); } CC_INLINE void ccofb_one_shot(const struct ccmode_ofb *mode, - unsigned long key_len, const void *key, const void *iv, - unsigned long in_len, const void *in, void *out) + size_t key_len, const void *key, const void *iv, + size_t nbytes, const void *in, void *out) { ccofb_ctx_decl(mode->size, ctx); mode->init(mode, ctx, key_len, key, iv); - mode->ofb(ctx, in_len, in, out); + mode->ofb(ctx, nbytes, in, out); ccofb_ctx_clear(mode->size, ctx); } @@ -295,20 +295,20 @@ CC_INLINE void ccofb_one_shot(const struct ccmode_ofb *mode, /* Declare a xts key named _name_. Pass the size field of a struct ccmode_xts for _size_. */ #define ccxts_ctx_decl(_size_, _name_) cc_ctx_decl(ccxts_ctx, _size_, _name_) -#define ccxts_ctx_clear(_size_, _name_) cc_ctx_clear(ccxts_ctx, _size_, _name_) +#define ccxts_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) -/* Declare a xts tweak named _name_. Pass the tweak_size field of a struct ccmode_xts - for _size_. */ +/* Declare a xts tweak named _name_. Pass the tweak_size field of a + struct ccmode_xts for _size_. */ #define ccxts_tweak_decl(_size_, _name_) cc_ctx_decl(ccxts_tweak, _size_, _name_) -#define ccxts_tweak_clear(_size_, _name_) cc_ctx_clear(ccxts_tweak, _size_, _name_) +#define ccxts_tweak_clear(_size_, _name_) cc_zero(_size_, _name_) /* Actual symmetric algorithm implementation can provide you one of these. Alternatively you can create a ccmode_xts instance from any ccmode_ecb cipher. To do so, statically initialize a struct ccmode_xts using the CCMODE_FACTORY_XTS_DECRYPT or CCMODE_FACTORY_XTS_ENCRYPT macros. Alternatively - you can dynamically initialize a struct ccmode_xts ccmode_factory_xts_decrypt() - or ccmode_factory_xts_encrypt(). */ + you can dynamically initialize a struct ccmode_xts + ccmode_factory_xts_decrypt() or ccmode_factory_xts_encrypt(). */ /* NOTE that xts mode does not do cts padding. It's really an xex mode. If you need cts padding use the ccpad_xts_encrypt and ccpad_xts_decrypt @@ -326,32 +326,34 @@ CC_INLINE unsigned long ccxts_block_size(const struct ccmode_xts *mode) } CC_INLINE void ccxts_init(const struct ccmode_xts *mode, ccxts_ctx *ctx, - unsigned long key_len, const void *key, const void *tweak_key) + size_t key_len, const void *key, + const void *tweak_key) { mode->init(mode, ctx, key_len, key, tweak_key); } -CC_INLINE void ccxts_set_tweak(const struct ccmode_xts *mode, ccxts_ctx *ctx, ccxts_tweak *tweak, const void *iv) +CC_INLINE void ccxts_set_tweak(const struct ccmode_xts *mode, ccxts_ctx *ctx, + ccxts_tweak *tweak, const void *iv) { mode->set_tweak(ctx, tweak, iv); } CC_INLINE void *ccxts_update(const struct ccmode_xts *mode, ccxts_ctx *ctx, - ccxts_tweak *tweak, unsigned long in_len, const void *in, void *out) + ccxts_tweak *tweak, unsigned long nblocks, const void *in, void *out) { - return mode->xts(ctx, tweak, in_len, in, out); + return mode->xts(ctx, tweak, nblocks, in, out); } CC_INLINE void ccxts_one_shot(const struct ccmode_xts *mode, - unsigned long key_len, const void *key, const void *tweak_key, - const void* iv, - unsigned long in_len, const void *in, void *out) + size_t key_len, const void *key, + const void *tweak_key, const void *iv, + unsigned long nblocks, const void *in, void *out) { ccxts_ctx_decl(mode->size, ctx); ccxts_tweak_decl(mode->tweak_size, tweak); mode->init(mode, ctx, key_len, key, tweak_key); mode->set_tweak(ctx, tweak, iv); - mode->xts(ctx, tweak, in_len, in, out); + mode->xts(ctx, tweak, nblocks, in, out); ccxts_ctx_clear(mode->size, ctx); ccxts_tweak_clear(mode->tweak_size, tweak); } @@ -361,7 +363,7 @@ CC_INLINE void ccxts_one_shot(const struct ccmode_xts *mode, /* Declare a gcm key named _name_. Pass the size field of a struct ccmode_gcm for _size_. */ #define ccgcm_ctx_decl(_size_, _name_) cc_ctx_decl(ccgcm_ctx, _size_, _name_) -#define ccgcm_ctx_clear(_size_, _name_) cc_ctx_clear(ccgcm_ctx, _size_, _name_) +#define ccgcm_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) CC_INLINE size_t ccgcm_context_size(const struct ccmode_gcm *mode) { @@ -374,30 +376,31 @@ CC_INLINE unsigned long ccgcm_block_size(const struct ccmode_gcm *mode) } CC_INLINE void ccgcm_init(const struct ccmode_gcm *mode, ccgcm_ctx *ctx, - unsigned long key_len, const void *key) + size_t key_len, const void *key) { mode->init(mode, ctx, key_len, key); } -CC_INLINE void ccgcm_set_iv(const struct ccmode_gcm *mode, ccgcm_ctx *ctx, size_t iv_size, const void *iv) +CC_INLINE void ccgcm_set_iv(const struct ccmode_gcm *mode, ccgcm_ctx *ctx, + size_t iv_size, const void *iv) { mode->set_iv(ctx, iv_size, iv); } CC_INLINE void ccgcm_gmac(const struct ccmode_gcm *mode, ccgcm_ctx *ctx, - unsigned long nbytes, const void *in) + size_t nbytes, const void *in) { mode->gmac(ctx, nbytes, in); } CC_INLINE void ccgcm_update(const struct ccmode_gcm *mode, ccgcm_ctx *ctx, - unsigned long nbytes, const void *in, void *out) + size_t nbytes, const void *in, void *out) { mode->gcm(ctx, nbytes, in, out); } CC_INLINE void ccgcm_finalize(const struct ccmode_gcm *mode, ccgcm_ctx *ctx, - size_t tag_size, void *tag) + size_t tag_size, void *tag) { mode->finalize(ctx, tag_size, tag); } @@ -409,11 +412,11 @@ CC_INLINE void ccgcm_reset(const struct ccmode_gcm *mode, ccgcm_ctx *ctx) CC_INLINE void ccgcm_one_shot(const struct ccmode_gcm *mode, - unsigned long key_len, const void *key, - unsigned long iv_len, const void *iv, - unsigned long nbytes, const void *in, void *out, - unsigned long adata_len, const void* adata, - size_t tag_len, void *tag) + size_t key_len, const void *key, + size_t iv_len, const void *iv, + size_t adata_len, const void *adata, + size_t nbytes, const void *in, void *out, + size_t tag_len, void *tag) { ccgcm_ctx_decl(mode->size, ctx); mode->init(mode, ctx, key_len, key); @@ -424,13 +427,89 @@ CC_INLINE void ccgcm_one_shot(const struct ccmode_gcm *mode, ccgcm_ctx_clear(mode->size, ctx); } +/* CCM */ + +#define ccccm_ctx_decl(_size_, _name_) cc_ctx_decl(ccccm_ctx, _size_, _name_) +#define ccccm_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) + +/* Declare a ccm nonce named _name_. Pass the mode->nonce_ctx_size for _size_. */ +#define ccccm_nonce_decl(_size_, _name_) cc_ctx_decl(ccccm_nonce, _size_, _name_) +#define ccccm_nonce_clear(_size_, _name_) cc_zero(_size_, _name_) + + +CC_INLINE size_t ccccm_context_size(const struct ccmode_ccm *mode) +{ + return mode->size; +} + +CC_INLINE unsigned long ccccm_block_size(const struct ccmode_ccm *mode) +{ + return mode->block_size; +} + +CC_INLINE void ccccm_init(const struct ccmode_ccm *mode, ccccm_ctx *ctx, + size_t key_len, const void *key) +{ + mode->init(mode, ctx, key_len, key); +} + +CC_INLINE void ccccm_set_iv(const struct ccmode_ccm *mode, ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, + size_t nonce_len, const void *nonce, + size_t mac_size, size_t auth_len, size_t data_len) +{ + mode->set_iv(ctx, nonce_ctx, nonce_len, nonce, mac_size, auth_len, data_len); +} + +CC_INLINE void ccccm_cbcmac(const struct ccmode_ccm *mode, ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, + size_t nbytes, const void *in) +{ + mode->cbcmac(ctx, nonce_ctx, nbytes, in); +} + +CC_INLINE void ccccm_update(const struct ccmode_ccm *mode, ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, + size_t nbytes, const void *in, void *out) +{ + mode->ccm(ctx, nonce_ctx, nbytes, in, out); +} + +CC_INLINE void ccccm_finalize(const struct ccmode_ccm *mode, ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, + void *mac) +{ + mode->finalize(ctx, nonce_ctx, mac); +} + +CC_INLINE void ccccm_reset(const struct ccmode_ccm *mode, ccccm_ctx *ctx, ccccm_nonce *nonce_ctx) +{ + mode->reset(ctx, nonce_ctx); +} + + +CC_INLINE void ccccm_one_shot(const struct ccmode_ccm *mode, + unsigned long key_len, const void *key, + unsigned nonce_len, const void *nonce, + unsigned long nbytes, const void *in, void *out, + unsigned adata_len, const void* adata, + unsigned mac_size, void *mac) +{ + ccccm_ctx_decl(mode->size, ctx); + ccccm_nonce_decl(mode->nonce_size, nonce_ctx); + mode->init(mode, ctx, key_len, key); + mode->set_iv(ctx, nonce_ctx, nonce_len, nonce, mac_size, adata_len, nbytes); + mode->cbcmac(ctx, nonce_ctx, adata_len, adata); + mode->ccm(ctx, nonce_ctx, nbytes, in, out); + mode->finalize(ctx, nonce_ctx, mac); + ccccm_ctx_clear(mode->size, ctx); + ccccm_nonce_clear(mode->size, nonce_ctx); +} + + /* OMAC mode. */ /* Declare a omac key named _name_. Pass the size field of a struct ccmode_omac for _size_. */ #define ccomac_ctx_decl(_size_, _name_) cc_ctx_decl(ccomac_ctx, _size_, _name_) -#define ccomac_ctx_clear(_size_, _name_) cc_ctx_clear(ccomac_ctx, _size_, _name_) +#define ccomac_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) CC_INLINE size_t ccomac_context_size(const struct ccmode_omac *mode) { @@ -443,24 +522,24 @@ CC_INLINE unsigned long ccomac_block_size(const struct ccmode_omac *mode) } CC_INLINE void ccomac_init(const struct ccmode_omac *mode, ccomac_ctx *ctx, - unsigned long tweak_len, unsigned long key_len, const void *key) + size_t tweak_len, size_t key_len, const void *key) { return mode->init(mode, ctx, tweak_len, key_len, key); } CC_INLINE int ccomac_update(const struct ccmode_omac *mode, ccomac_ctx *ctx, - unsigned long in_len, const void *tweak, const void *in, void *out) + unsigned long nblocks, const void *tweak, const void *in, void *out) { - return mode->omac(ctx, in_len, tweak, in, out); + return mode->omac(ctx, nblocks, tweak, in, out); } CC_INLINE int ccomac_one_shot(const struct ccmode_omac *mode, - unsigned long tweak_len, unsigned long key_len, const void *key, - const void *tweak, unsigned long in_len, const void *in, void *out) + size_t tweak_len, size_t key_len, const void *key, + const void *tweak, unsigned long nblocks, const void *in, void *out) { ccomac_ctx_decl(mode->size, ctx); mode->init(mode, ctx, tweak_len, key_len, key); - int result = mode->omac(ctx, in_len, tweak, in, out); + int result = mode->omac(ctx, nblocks, tweak, in, out); ccomac_ctx_clear(mode->size, ctx); return result; } diff --git a/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h b/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h index 3233c9916..8ffe1fbd6 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h +++ b/EXTERNAL_HEADERS/corecrypto/ccmode_factory.h @@ -10,7 +10,7 @@ #ifndef _CORECRYPTO_CCMODE_FACTORY_H_ #define _CORECRYPTO_CCMODE_FACTORY_H_ -#include /* TODO: Remove dependancy on this header. */ +#include /* TODO: Remove dependency on this header. */ #include /* For CBC, direction of underlying ecb is the same as the cbc direction */ @@ -61,6 +61,9 @@ const struct ccmode_##_mode_ *cc##_cipher_##_##_mode_##_##_dir_##_mode(void) /* For GCM, same as CFB */ #define CCMODE_GCM_FACTORY(_cipher_, _dir_) CCMODE_CFB_FACTORY(_cipher_, gcm, _dir_) +/* For CCM, same as CFB */ +#define CCMODE_CCM_FACTORY(_cipher_, _dir_) CCMODE_CFB_FACTORY(_cipher_, ccm, _dir_) + /* Fot XTS, you always need an ecb encrypt */ #define CCMODE_XTS_FACTORY(_cipher_ , _dir_) \ @@ -107,13 +110,12 @@ const struct ccmode_cbc *cc3des_cbc_encrypt_mode(void) { -void *ccmode_cbc_init(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long rawkey_len, const void *rawkey, - const void *iv); -void *ccmode_cbc_decrypt(cccbc_ctx *ctx, unsigned long nblocks, - const void *in, void *out); -void *ccmode_cbc_encrypt(cccbc_ctx *ctx, unsigned long nblocks, - const void *in, void *out); +void ccmode_cbc_init(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, + size_t rawkey_len, const void *rawkey); +void ccmode_cbc_decrypt(const cccbc_ctx *ctx, cccbc_iv *iv, unsigned long nblocks, + const void *in, void *out); +void ccmode_cbc_encrypt(const cccbc_ctx *ctx, cccbc_iv *iv, unsigned long nblocks, + const void *in, void *out); struct _ccmode_cbc_key { const struct ccmode_ecb *ecb; @@ -162,11 +164,11 @@ void ccmode_factory_cbc_encrypt(struct ccmode_cbc *cbc, void ccmode_cfb_init(const struct ccmode_cfb *cfb, cccfb_ctx *ctx, - unsigned long rawkey_len, const void *rawkey, + size_t rawkey_len, const void *rawkey, const void *iv); -void ccmode_cfb_decrypt(cccfb_ctx *ctx, unsigned long nblocks, +void ccmode_cfb_decrypt(cccfb_ctx *ctx, size_t nbytes, const void *in, void *out); -void ccmode_cfb_encrypt(cccfb_ctx *ctx, unsigned long nblocks, +void ccmode_cfb_encrypt(cccfb_ctx *ctx, size_t nbytes, const void *in, void *out); struct _ccmode_cfb_key { @@ -210,18 +212,17 @@ void ccmode_factory_cfb_decrypt(struct ccmode_cfb *cfb, parameter. */ CC_INLINE void ccmode_factory_cfb_encrypt(struct ccmode_cfb *cfb, - const struct ccmode_ecb *ecb) { + const struct ccmode_ecb *ecb) { struct ccmode_cfb cfb_encrypt = CCMODE_FACTORY_CFB_ENCRYPT(ecb); *cfb = cfb_encrypt; } void ccmode_cfb8_init(const struct ccmode_cfb8 *cfb8, cccfb8_ctx *ctx, - unsigned long rawkey_len, const void *rawkey, - const void *iv); -void ccmode_cfb8_decrypt(cccfb8_ctx *ctx, unsigned long nbytes, + size_t rawkey_len, const void *rawkey, const void *iv); +void ccmode_cfb8_decrypt(cccfb8_ctx *ctx, size_t nbytes, const void *in, void *out); -void ccmode_cfb8_encrypt(cccfb8_ctx *ctx, unsigned long nbytes, +void ccmode_cfb8_encrypt(cccfb8_ctx *ctx, size_t nbytes, const void *in, void *out); struct _ccmode_cfb8_key { @@ -253,7 +254,7 @@ struct _ccmode_cfb8_key { parameter. */ CC_INLINE void ccmode_factory_cfb8_decrypt(struct ccmode_cfb8 *cfb8, - const struct ccmode_ecb *ecb) { + const struct ccmode_ecb *ecb) { struct ccmode_cfb8 cfb8_decrypt = CCMODE_FACTORY_CFB8_DECRYPT(ecb); *cfb8 = cfb8_decrypt; } @@ -264,15 +265,14 @@ void ccmode_factory_cfb8_decrypt(struct ccmode_cfb8 *cfb8, parameter. */ CC_INLINE void ccmode_factory_cfb8_encrypt(struct ccmode_cfb8 *cfb8, - const struct ccmode_ecb *ecb) { + const struct ccmode_ecb *ecb) { struct ccmode_cfb8 cfb8_encrypt = CCMODE_FACTORY_CFB8_ENCRYPT(ecb); *cfb8 = cfb8_encrypt; } void ccmode_ctr_init(const struct ccmode_ctr *ctr, ccctr_ctx *ctx, - unsigned long rawkey_len, const void *rawkey, - const void *iv); -void ccmode_ctr_crypt(ccctr_ctx *ctx, unsigned long nblocks, + size_t rawkey_len, const void *rawkey, const void *iv); +void ccmode_ctr_crypt(ccctr_ctx *ctx, size_t nbytes, const void *in, void *out); struct _ccmode_ctr_key { @@ -296,7 +296,7 @@ struct _ccmode_ctr_key { parameter. */ CC_INLINE void ccmode_factory_ctr_crypt(struct ccmode_ctr *ctr, - const struct ccmode_ecb *ecb) { + const struct ccmode_ecb *ecb) { struct ccmode_ctr ctr_crypt = CCMODE_FACTORY_CTR_CRYPT(ecb); *ctr = ctr_crypt; } @@ -315,17 +315,20 @@ void ccmode_factory_ctr_crypt(struct ccmode_ctr *ctr, extern const unsigned char gcm_shift_table[256*2]; #endif +#if defined(__x86_64__) || defined(__arm64__) +#define VNG_SPEEDUP 1 +#endif /* Create a gcm key from a gcm mode object. key must point to at least sizeof(CCMODE_GCM_KEY(ecb)) bytes of free storage. */ void ccmode_gcm_init(const struct ccmode_gcm *gcm, ccgcm_ctx *ctx, - unsigned long rawkey_len, const void *rawkey); + size_t rawkey_len, const void *rawkey); void ccmode_gcm_set_iv(ccgcm_ctx *ctx, size_t iv_size, const void *iv); -void ccmode_gcm_gmac(ccgcm_ctx *ctx, unsigned long nbytes, const void *in); -void ccmode_gcm_decrypt(ccgcm_ctx *ctx, unsigned long nbytes, const void *in, +void ccmode_gcm_gmac(ccgcm_ctx *ctx, size_t nbytes, const void *in); +void ccmode_gcm_decrypt(ccgcm_ctx *ctx, size_t nbytes, const void *in, void *out); -void ccmode_gcm_encrypt(ccgcm_ctx *ctx, unsigned long nbytes, const void *in, +void ccmode_gcm_encrypt(ccgcm_ctx *ctx, size_t nbytes, const void *in, void *out); void ccmode_gcm_finalize(ccgcm_ctx *key, size_t tag_size, void *tag); void ccmode_gcm_reset(ccgcm_ctx *key); @@ -355,7 +358,12 @@ struct _ccmode_gcm_key { ; #endif /* CCMODE_GCM_TABLES */ +#ifdef VNG_SPEEDUP + unsigned char Htable[16*8*2] __attribute__((aligned(16))); +#endif + cc_unit u[]; + }; /* Use this to statically initialize a ccmode_gcm object for decryption. */ @@ -390,7 +398,7 @@ struct _ccmode_gcm_key { parameter. */ CC_INLINE void ccmode_factory_gcm_decrypt(struct ccmode_gcm *gcm, - const struct ccmode_ecb *ecb_encrypt) { + const struct ccmode_ecb *ecb_encrypt) { struct ccmode_gcm gcm_decrypt = CCMODE_FACTORY_GCM_DECRYPT(ecb_encrypt); *gcm = gcm_decrypt; } @@ -401,16 +409,104 @@ void ccmode_factory_gcm_decrypt(struct ccmode_gcm *gcm, parameter. */ CC_INLINE void ccmode_factory_gcm_encrypt(struct ccmode_gcm *gcm, - const struct ccmode_ecb *ecb_encrypt) { + const struct ccmode_ecb *ecb_encrypt) { struct ccmode_gcm gcm_encrypt = CCMODE_FACTORY_GCM_ENCRYPT(ecb_encrypt); *gcm = gcm_encrypt; } +/* CCM (only NIST approved with AES) */ +void ccmode_ccm_init(const struct ccmode_ccm *ccm, ccccm_ctx *ctx, + size_t rawkey_len, const void *rawkey); +void ccmode_ccm_set_iv(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nonce_len, const void *nonce, + size_t mac_size, size_t auth_len, size_t data_len); +/* internal function */ +void ccmode_ccm_macdata(ccccm_ctx *key, ccccm_nonce *nonce_ctx, unsigned new_block, size_t nbytes, const void *in); +/* api function - disallows only mac'd data after data to encrypt was sent */ +void ccmode_ccm_cbcmac(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in); +/* internal function */ +void ccmode_ccm_crypt(ccccm_ctx *key, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in, void *out); +void ccmode_ccm_decrypt(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in, + void *out); +void ccmode_ccm_encrypt(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in, + void *out); +void ccmode_ccm_finalize(ccccm_ctx *key, ccccm_nonce *nonce_ctx, void *mac); +void ccmode_ccm_reset(ccccm_ctx *key, ccccm_nonce *nonce_ctx); + +struct _ccmode_ccm_key { + const struct ccmode_ecb *ecb; + cc_unit u[]; +}; + +struct _ccmode_ccm_nonce { + unsigned char A_i[16]; /* crypto block iv */ + unsigned char B_i[16]; /* mac block iv */ + unsigned char MAC[16]; /* crypted mac */ + unsigned char buf[16]; /* crypt buffer */ + + uint32_t mode; /* mode: IV -> AD -> DATA */ + uint32_t buflen; /* length of data in buf */ + uint32_t b_i_len; /* length of cbcmac data in B_i */ + + size_t nonce_size; + size_t mac_size; +}; + +/* Use this to statically initialize a ccmode_ccm object for decryption. */ +#define CCMODE_FACTORY_CCM_DECRYPT(ECB_ENCRYPT) { \ +.size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_key)) + ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \ +.nonce_size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_nonce)), \ +.block_size = 1, \ +.init = ccmode_ccm_init, \ +.set_iv = ccmode_ccm_set_iv, \ +.cbcmac = ccmode_ccm_cbcmac, \ +.ccm = ccmode_ccm_decrypt, \ +.finalize = ccmode_ccm_finalize, \ +.reset = ccmode_ccm_reset, \ +.custom = (ECB_ENCRYPT) \ +} + +/* Use this to statically initialize a ccmode_ccm object for encryption. */ +#define CCMODE_FACTORY_CCM_ENCRYPT(ECB_ENCRYPT) { \ +.size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_key)) + ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \ +.nonce_size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_nonce)), \ +.block_size = 1, \ +.init = ccmode_ccm_init, \ +.set_iv = ccmode_ccm_set_iv, \ +.cbcmac = ccmode_ccm_cbcmac, \ +.ccm = ccmode_ccm_encrypt, \ +.finalize = ccmode_ccm_finalize, \ +.reset = ccmode_ccm_reset, \ +.custom = (ECB_ENCRYPT) \ +} + +/* Use these function to runtime initialize a ccmode_ccm decrypt object (for + example if it's part of a larger structure). For CCM you always pass a + ecb encrypt mode implementation of some underlying algorithm as the ecb + parameter. */ +CC_INLINE +void ccmode_factory_ccm_decrypt(struct ccmode_ccm *ccm, + const struct ccmode_ecb *ecb_encrypt) { + struct ccmode_ccm ccm_decrypt = CCMODE_FACTORY_CCM_DECRYPT(ecb_encrypt); + *ccm = ccm_decrypt; +} + +/* Use these function to runtime initialize a ccmode_ccm encrypt object (for + example if it's part of a larger structure). For CCM you always pass a + ecb encrypt mode implementation of some underlying algorithm as the ecb + parameter. */ +CC_INLINE +void ccmode_factory_ccm_encrypt(struct ccmode_ccm *ccm, + const struct ccmode_ecb *ecb_encrypt) { + struct ccmode_ccm ccm_encrypt = CCMODE_FACTORY_CCM_ENCRYPT(ecb_encrypt); + *ccm = ccm_encrypt; +} + + void ccmode_ofb_init(const struct ccmode_ofb *ofb, ccofb_ctx *ctx, - unsigned long rawkey_len, const void *rawkey, + size_t rawkey_len, const void *rawkey, const void *iv); -void ccmode_ofb_crypt(ccofb_ctx *ctx, unsigned long nblocks, +void ccmode_ofb_crypt(ccofb_ctx *ctx, size_t nbytes, const void *in, void *out); struct _ccmode_ofb_key { @@ -434,7 +530,7 @@ struct _ccmode_ofb_key { parameter. */ CC_INLINE void ccmode_factory_ofb_crypt(struct ccmode_ofb *ofb, - const struct ccmode_ecb *ecb) { + const struct ccmode_ecb *ecb) { struct ccmode_ofb ofb_crypt = CCMODE_FACTORY_OFB_CRYPT(ecb); *ofb = ofb_crypt; } @@ -451,7 +547,7 @@ int ccmode_omac_encrypt(ccomac_ctx *ctx, unsigned long nblocks, key must point to at least sizeof(CCMODE_OMAC_KEY(ecb)) bytes of free storage. */ void ccmode_omac_init(const struct ccmode_omac *omac, ccomac_ctx *ctx, - cc_size tweak_len, unsigned long rawkey_len, + cc_size tweak_len, size_t rawkey_len, const void *rawkey); struct _ccmode_omac_key { @@ -484,7 +580,7 @@ struct _ccmode_omac_key { parameter. */ CC_INLINE void ccmode_factory_omac_decrypt(struct ccmode_omac *omac, - const struct ccmode_ecb *ecb) { + const struct ccmode_ecb *ecb) { struct ccmode_omac omac_decrypt = CCMODE_FACTORY_OMAC_DECRYPT(ecb); *omac = omac_decrypt; } @@ -495,7 +591,7 @@ void ccmode_factory_omac_decrypt(struct ccmode_omac *omac, parameter. */ CC_INLINE void ccmode_factory_omac_encrypt(struct ccmode_omac *omac, - const struct ccmode_ecb *ecb) { + const struct ccmode_ecb *ecb) { struct ccmode_omac omac_encrypt = CCMODE_FACTORY_OMAC_ENCRYPT(ecb); *omac = omac_encrypt; } @@ -503,28 +599,34 @@ void ccmode_factory_omac_encrypt(struct ccmode_omac *omac, /* Function prototypes used by the macros below, do not call directly. */ void ccmode_xts_init(const struct ccmode_xts *xts, ccxts_ctx *ctx, - unsigned long key_len, const void *data_key, + size_t key_len, const void *data_key, const void *tweak_key); -void *ccmode_xts_crypt(ccxts_ctx *ctx, unsigned long nblocks, - const void *in, void *out); -void ccmode_xts_set_tweak(ccxts_ctx *ctx, const void *tweak); +void *ccmode_xts_crypt(const ccxts_ctx *ctx, ccxts_tweak *tweak, + unsigned long nblocks, const void *in, void *out); +void ccmode_xts_set_tweak(const ccxts_ctx *ctx, ccxts_tweak *tweak, + const void *iv); struct _ccmode_xts_key { const struct ccmode_ecb *ecb; const struct ccmode_ecb *ecb_encrypt; - // FIPS requires that for XTS that no more that 2^20 AES blocks may be processed for any given - // Key, Tweak Key, and tweak combination - // the bytes_processed field in the context will accumuate the number of blocks processed and - // will fail the encrypt/decrypt if the size is violated. This counter will be reset to 0 - // when set_tweak is called. - unsigned long blocks_processed; + cc_unit u[]; +}; + +struct _ccmode_xts_tweak { + // FIPS requires that for XTS that no more that 2^20 AES blocks may be processed for any given + // Key, Tweak Key, and tweak combination + // the bytes_processed field in the context will accumuate the number of blocks processed and + // will fail the encrypt/decrypt if the size is violated. This counter will be reset to 0 + // when set_tweak is called. + unsigned long blocks_processed; cc_unit u[]; }; /* Use this to statically initialize a ccmode_xts object for decryption. */ #define CCMODE_FACTORY_XTS_DECRYPT(ECB, ECB_ENCRYPT) { \ -.size = ccn_sizeof_size(sizeof(struct _ccmode_xts_key)) + 2 * ccn_sizeof_size((ECB)->size) + ccn_sizeof_size(16), \ +.size = ccn_sizeof_size(sizeof(struct _ccmode_xts_key)) + 2 * ccn_sizeof_size((ECB)->size), \ +.tweak_size = ccn_sizeof_size(sizeof(struct _ccmode_xts_tweak)) + ccn_sizeof_size(16), \ .block_size = 16, \ .init = ccmode_xts_init, \ .set_tweak = ccmode_xts_set_tweak, \ @@ -535,7 +637,8 @@ struct _ccmode_xts_key { /* Use this to statically initialize a ccmode_xts object for encryption. */ #define CCMODE_FACTORY_XTS_ENCRYPT(ECB, ECB_ENCRYPT) { \ -.size = ccn_sizeof_size(sizeof(struct _ccmode_xts_key)) + 2 * ccn_sizeof_size((ECB)->size) + ccn_sizeof_size(16), \ +.size = ccn_sizeof_size(sizeof(struct _ccmode_xts_key)) + 2 * ccn_sizeof_size((ECB)->size), \ +.tweak_size = ccn_sizeof_size(sizeof(struct _ccmode_xts_tweak)) + ccn_sizeof_size(16), \ .block_size = 16, \ .init = ccmode_xts_init, \ .set_tweak = ccmode_xts_set_tweak, \ @@ -550,8 +653,8 @@ struct _ccmode_xts_key { parameter. */ CC_INLINE void ccmode_factory_xts_decrypt(struct ccmode_xts *xts, - const struct ccmode_ecb *ecb, - const struct ccmode_ecb *ecb_encrypt) { + const struct ccmode_ecb *ecb, + const struct ccmode_ecb *ecb_encrypt) { struct ccmode_xts xts_decrypt = CCMODE_FACTORY_XTS_DECRYPT(ecb, ecb_encrypt); *xts = xts_decrypt; } @@ -562,8 +665,8 @@ void ccmode_factory_xts_decrypt(struct ccmode_xts *xts, parameter. */ CC_INLINE void ccmode_factory_xts_encrypt(struct ccmode_xts *xts, - const struct ccmode_ecb *ecb, - const struct ccmode_ecb *ecb_encrypt) { + const struct ccmode_ecb *ecb, + const struct ccmode_ecb *ecb_encrypt) { struct ccmode_xts xts_encrypt = CCMODE_FACTORY_XTS_ENCRYPT(ecb, ecb_encrypt); *xts = xts_encrypt; } diff --git a/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h b/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h index 3e35f548e..ce1d1e114 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h +++ b/EXTERNAL_HEADERS/corecrypto/ccmode_impl.h @@ -21,7 +21,7 @@ struct ccmode_ecb { size_t size; /* first argument to ccecb_ctx_decl(). */ unsigned long block_size; void (*init)(const struct ccmode_ecb *ecb, ccecb_ctx *ctx, - unsigned long key_len, const void *key); + size_t key_len, const void *key); void (*ecb)(const ccecb_ctx *ctx, unsigned long nblocks, const void *in, void *out); }; @@ -34,10 +34,10 @@ struct ccmode_cbc { size_t size; /* first argument to cccbc_ctx_decl(). */ unsigned long block_size; void (*init)(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long key_len, const void *key); + size_t key_len, const void *key); /* cbc encrypt or decrypt nblocks from in to out, iv will be used and updated. */ - void (*cbc)(const cccbc_ctx *ctx, cccbc_iv *iv, unsigned long nblocks, - const void *in, void *out); + void (*cbc)(const cccbc_ctx *ctx, cccbc_iv *iv, + unsigned long nblocks, const void *in, void *out); const void *custom; }; @@ -48,10 +48,8 @@ struct ccmode_cfb { size_t size; /* first argument to cccfb_ctx_decl(). */ unsigned long block_size; void (*init)(const struct ccmode_cfb *cfb, cccfb_ctx *ctx, - unsigned long key_len, const void *key, - const void *iv); - void (*cfb)(cccfb_ctx *ctx, unsigned long nblocks, - const void *in, void *out); + size_t key_len, const void *key, const void *iv); + void (*cfb)(cccfb_ctx *ctx, size_t nbytes, const void *in, void *out); const void *custom; }; @@ -63,10 +61,8 @@ struct ccmode_cfb8 { size_t size; /* first argument to cccfb8_ctx_decl(). */ unsigned long block_size; void (*init)(const struct ccmode_cfb8 *cfb8, cccfb8_ctx *ctx, - unsigned long key_len, const void *key, - const void *iv); - void (*cfb8)(cccfb8_ctx *ctx, unsigned long nbytes, - const void *in, void *out); + size_t key_len, const void *key, const void *iv); + void (*cfb8)(cccfb8_ctx *ctx, size_t nbytes, const void *in, void *out); const void *custom; }; @@ -78,10 +74,8 @@ struct ccmode_ctr { size_t size; /* first argument to ccctr_ctx_decl(). */ unsigned long block_size; void (*init)(const struct ccmode_ctr *ctr, ccctr_ctx *ctx, - unsigned long key_len, const void *key, - const void *iv); - void (*ctr)(ccctr_ctx *ctx, unsigned long nblocks, - const void *in, void *out); + size_t key_len, const void *key, const void *iv); + void (*ctr)(ccctr_ctx *ctx, size_t nbytes, const void *in, void *out); const void *custom; }; @@ -93,10 +87,8 @@ struct ccmode_ofb { size_t size; /* first argument to ccofb_ctx_decl(). */ unsigned long block_size; void (*init)(const struct ccmode_ofb *ofb, ccofb_ctx *ctx, - unsigned long key_len, const void *key, - const void *iv); - void (*ofb)(ccofb_ctx *ctx, unsigned long nblocks, - const void *in, void *out); + size_t key_len, const void *key, const void *iv); + void (*ofb)(ccofb_ctx *ctx, size_t nbytes, const void *in, void *out); const void *custom; }; @@ -116,16 +108,15 @@ struct ccmode_xts { key must point to at least 'size' cc_units of free storage. tweak_key must point to at least 'tweak_size' cc_units of free storage. */ void (*init)(const struct ccmode_xts *xts, ccxts_ctx *ctx, - unsigned long key_len, const void *key, - const void *tweak_key); + size_t key_len, const void *key, const void *tweak_key); /* Set the tweak (sector number), the block within the sector zero. */ void (*set_tweak)(const ccxts_ctx *ctx, ccxts_tweak *tweak, const void *iv); /* Encrypt blocks for a sector, clients must call set_tweak before calling this function. Return a pointer to the tweak buffer */ - void *(*xts)(const ccxts_ctx *ctx, ccxts_tweak *tweak, unsigned long nblocks, - const void *in, void *out); + void *(*xts)(const ccxts_ctx *ctx, ccxts_tweak *tweak, + unsigned long nblocks, const void *in, void *out); const void *custom; const void *custom1; @@ -139,15 +130,36 @@ struct ccmode_gcm { size_t size; /* first argument to ccgcm_ctx_decl(). */ unsigned long block_size; void (*init)(const struct ccmode_gcm *gcm, ccgcm_ctx *ctx, - unsigned long key_len, const void *key); + size_t key_len, const void *key); void (*set_iv)(ccgcm_ctx *ctx, size_t iv_size, const void *iv); - void (*gmac)(ccgcm_ctx *ctx, unsigned long nbytes, const void *in); // could just be gcm with NULL out - void (*gcm)(ccgcm_ctx *ctx, unsigned long nbytes, const void *in, void *out); + void (*gmac)(ccgcm_ctx *ctx, size_t nbytes, const void *in); // could just be gcm with NULL out + void (*gcm)(ccgcm_ctx *ctx, size_t nbytes, const void *in, void *out); void (*finalize)(ccgcm_ctx *key, size_t tag_size, void *tag); void (*reset)(ccgcm_ctx *ctx); const void *custom; }; +/* GCM mode. */ + +cc_aligned_struct(16) ccccm_ctx; +cc_aligned_struct(16) ccccm_nonce; + +struct ccmode_ccm { + size_t size; /* first argument to ccccm_ctx_decl(). */ + size_t nonce_size; /* first argument to ccccm_nonce_decl(). */ + unsigned long block_size; + void (*init)(const struct ccmode_ccm *ccm, ccccm_ctx *ctx, + size_t key_len, const void *key); + void (*set_iv)(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nonce_len, const void *nonce, + size_t mac_size, size_t auth_len, size_t data_len); + void (*cbcmac)(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in); // could just be ccm with NULL out + void (*ccm)(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in, void *out); + void (*finalize)(ccccm_ctx *key, ccccm_nonce *nonce_ctx, void *mac); + void (*reset)(ccccm_ctx *key, ccccm_nonce *nonce_ctx); + const void *custom; +}; + + /* OMAC mode. */ cc_aligned_struct(16) ccomac_ctx; @@ -156,8 +168,7 @@ struct ccmode_omac { size_t size; /* first argument to ccomac_ctx_decl(). */ unsigned long block_size; void (*init)(const struct ccmode_omac *omac, ccomac_ctx *ctx, - unsigned long tweak_len, unsigned long key_len, - const void *key); + size_t tweak_len, size_t key_len, const void *key); int (*omac)(ccomac_ctx *ctx, unsigned long nblocks, const void *tweak, const void *in, void *out); const void *custom; diff --git a/EXTERNAL_HEADERS/corecrypto/ccn.h b/EXTERNAL_HEADERS/corecrypto/ccn.h index dd10e97de..3aa1bd8c5 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccn.h +++ b/EXTERNAL_HEADERS/corecrypto/ccn.h @@ -13,6 +13,8 @@ #include #include /* TODO: Get rid of this include in this header. */ #include +#include + typedef uint8_t cc_byte; typedef size_t cc_size; @@ -128,10 +130,11 @@ typedef const cc_unit *cc2np2_in_t; // 2 * n + 2 unit long mp /* Macro's for reading uint32_t and uint64_t from ccns, the index is in 32 or 64 bit units respectively. */ #if CCN_UNIT_SIZE == 8 -//#define ccn_uint16(a,i) ((i & 3) == 3 ? ((uint16_t)(a[i >> 2] >> 48)) : \ -// (i & 3) == 2 ? ((uint16_t)(a[i >> 2] >> 32) & UINT16_C(0xffff)) : \ -// (i & 3) == 1 ? ((uint16_t)(a[i >> 2] >> 16) & UINT16_C(0xffff)) : \ -// ((uint16_t)(a[i >> 1] & UINT16_C(0xffff)))) +/* #define ccn_uint16(a,i) ((i & 3) == 3 ? ((uint16_t)(a[i >> 2] >> 48)) : \ + (i & 3) == 2 ? ((uint16_t)(a[i >> 2] >> 32) & UINT16_C(0xffff)) : \ + (i & 3) == 1 ? ((uint16_t)(a[i >> 2] >> 16) & UINT16_C(0xffff)) : \ + ((uint16_t)(a[i >> 1] & UINT16_C(0xffff)))) +*/ //#define ccn_uint32(a,i) (i & 1 ? ((uint32_t)(a[i >> 1] >> 32)) : ((uint32_t)(a[i >> 1] & UINT32_C(0xffffffff)))) #elif CCN_UNIT_SIZE == 4 //#define ccn16_v(a0) (a0) @@ -323,18 +326,18 @@ cc_size ccn_n(cc_size n, const cc_unit *s); { N bit, scalar -> N bit } N = n * sizeof(cc_unit) * 8 the _multi version doesn't return the shifted bits, but does support multiple word shifts. */ -CC_NONNULL((2,3)) +CC_NONNULL((2, 3)) cc_unit ccn_shift_right(cc_size n, cc_unit *r, const cc_unit *s, size_t k); -CC_NONNULL((2,3)) +CC_NONNULL((2, 3)) void ccn_shift_right_multi(cc_size n, cc_unit *r,const cc_unit *s, size_t k); /* s << k -> r return bits shifted out of most significant word in bits [0, n> { N bit, scalar -> N bit } N = n * sizeof(cc_unit) * 8 the _multi version doesn't return the shifted bits, but does support multiple word shifts */ -CC_NONNULL((2,3)) +CC_NONNULL((2, 3)) cc_unit ccn_shift_left(cc_size n, cc_unit *r, const cc_unit *s, size_t k); -CC_NONNULL((2,3)) +CC_NONNULL((2, 3)) void ccn_shift_left_multi(cc_size n, cc_unit *r, const cc_unit *s, size_t k); /* s == 0 -> return 0 | s > 0 -> return index (starting at 1) of most @@ -345,6 +348,7 @@ size_t ccn_bitlen(cc_size n, const cc_unit *s); /* Returns the number of bits which are zero before the first one bit counting from least to most significant bit. */ +CC_NONNULL2 size_t ccn_trailing_zeros(cc_size n, const cc_unit *s); /* s == 0 -> return true | s != 0 -> return false @@ -355,8 +359,10 @@ size_t ccn_trailing_zeros(cc_size n, const cc_unit *s); { N bit } N = n * sizeof(cc_unit) * 8 */ #define ccn_is_one(_n_, _s_) (ccn_n(_n_, _s_) == 1 && _s_[0] == 1) +#define ccn_is_zero_or_one(_n_, _s_) (((_n_)==0) || ((ccn_n(_n_, _s_) <= 1) && (_s_[0] <= 1))) + #if CCN_CMP_INLINE -CC_INLINE CC_PURE CC_NONNULL((2,3)) +CC_INLINE CC_PURE CC_NONNULL((2, 3)) int ccn_cmp(cc_size n, const cc_unit *s, const cc_unit *t) { while (n) { n--; @@ -370,13 +376,13 @@ int ccn_cmp(cc_size n, const cc_unit *s, const cc_unit *t) { #else /* s < t -> return - 1 | s == t -> return 0 | s > t -> return 1 { N bit, N bit -> int } N = n * sizeof(cc_unit) * 8 */ -CC_PURE CC_NONNULL((2,3)) +CC_PURE CC_NONNULL((2, 3)) int ccn_cmp(cc_size n, const cc_unit *s, const cc_unit *t); #endif /* s < t -> return - 1 | s == t -> return 0 | s > t -> return 1 { N bit, M bit -> int } N = ns * sizeof(cc_unit) * 8 M = nt * sizeof(cc_unit) * 8 */ -CC_INLINE +CC_INLINE CC_NONNULL((2, 4)) int ccn_cmpn(cc_size ns, const cc_unit *s, cc_size nt, const cc_unit *t) { if (ns > nt) { @@ -389,81 +395,89 @@ int ccn_cmpn(cc_size ns, const cc_unit *s, /* s - t -> r return 1 iff t > s { N bit, N bit -> N bit } N = n * sizeof(cc_unit) * 8 */ -CC_NONNULL((2,3,4)) +CC_NONNULL((2, 3, 4)) cc_unit ccn_sub(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit *t); /* s - v -> r return 1 iff v > s return 0 otherwise. { N bit, sizeof(cc_unit) * 8 bit -> N bit } N = n * sizeof(cc_unit) * 8 */ -CC_NONNULL((2,3)) +CC_NONNULL((2, 3)) cc_unit ccn_sub1(cc_size n, cc_unit *r, const cc_unit *s, cc_unit v); /* s - t -> r return 1 iff t > s { N bit, NT bit -> N bit NT <= N} N = n * sizeof(cc_unit) * 8 */ CC_INLINE -CC_NONNULL((2,3,5)) -cc_unit ccn_subn(cc_size n, cc_unit *r,const cc_unit *s, +CC_NONNULL((2, 3, 5)) +cc_unit ccn_subn(cc_size n, cc_unit *r, const cc_unit *s, cc_size nt, const cc_unit *t) { + assert(n >= nt); return ccn_sub1(n - nt, r + nt, s + nt, ccn_sub(nt, r, s, t)); } /* s + t -> r return carry if result doesn't fit in n bits. { N bit, N bit -> N bit } N = n * sizeof(cc_unit) * 8 */ -CC_NONNULL((2,3,4)) +CC_NONNULL((2, 3, 4)) cc_unit ccn_add(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit *t); /* s + v -> r return carry if result doesn't fit in n bits. { N bit, sizeof(cc_unit) * 8 bit -> N bit } N = n * sizeof(cc_unit) * 8 */ -CC_NONNULL((2,3)) +CC_NONNULL((2, 3)) cc_unit ccn_add1(cc_size n, cc_unit *r, const cc_unit *s, cc_unit v); /* s + t -> r return carry if result doesn't fit in n bits { N bit, NT bit -> N bit NT <= N} N = n * sizeof(cc_unit) * 8 */ CC_INLINE -CC_NONNULL((2,3,5)) +CC_NONNULL((2, 3, 5)) cc_unit ccn_addn(cc_size n, cc_unit *r, const cc_unit *s, cc_size nt, const cc_unit *t) { + assert(n >= nt); return ccn_add1(n - nt, r + nt, s + nt, ccn_add(nt, r, s, t)); } -CC_NONNULL((4,5)) +CC_NONNULL((4, 5)) void ccn_divmod(cc_size n, cc_unit *q, cc_unit *r, const cc_unit *s, const cc_unit *t); -CC_NONNULL((2,3,4)) +CC_NONNULL((2, 3, 4)) void ccn_lcm(cc_size n, cc_unit *r2n, const cc_unit *s, const cc_unit *t); -/* s * t -> r - { n bit, n bit -> 2 * n bit } n = count * sizeof(cc_unit) * 8 */ -CC_NONNULL((2,3,4)) +/* s * t -> r_2n r_2n must not overlap with s nor t + { n bit, n bit -> 2 * n bit } n = count * sizeof(cc_unit) * 8 + { N bit, N bit -> 2N bit } N = ccn_bitsof(n) */ +CC_NONNULL((2, 3, 4)) void ccn_mul(cc_size n, cc_unit *r_2n, const cc_unit *s, const cc_unit *t); -CC_NONNULL((2,3)) +/* s[0..n) * v -> r[0..n)+return value + { N bit, sizeof(cc_unit) * 8 bit -> N + sizeof(cc_unit) * 8 bit } N = n * sizeof(cc_unit) * 8 */ +CC_NONNULL((2, 3)) cc_unit ccn_mul1(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit v); -CC_NONNULL((2,3)) + +/* s[0..n) * v + r[0..n) -> r[0..n)+return value + { N bit, sizeof(cc_unit) * 8 bit -> N + sizeof(cc_unit) * 8 bit } N = n * sizeof(cc_unit) * 8 */ +CC_NONNULL((2, 3)) cc_unit ccn_addmul1(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit v); #if 0 /* a % d -> n {2 * n bit, n bit -> n bit } n = count * sizeof(cc_unit) * 8 */ -CC_NONNULL((2,3,4)) +CC_NONNULL((2, 3, 4)) void ccn_mod(cc_size n, cc_unit *r, const cc_unit *a_2n, const cc_unit *d); #endif /* r = gcd(s, t). N bit, N bit -> N bit */ -CC_NONNULL((2,3,4)) +CC_NONNULL((2, 3, 4)) void ccn_gcd(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit *t); /* r = gcd(s, t). N bit, N bit -> O bit */ -CC_NONNULL((2,4,6)) +CC_NONNULL((2, 4, 6)) void ccn_gcdn(cc_size rn, cc_unit *r, cc_size sn, const cc_unit *s, cc_size tn, const cc_unit *t); /* r = (data, len) treated as a big endian byte array, return -1 if data doesn't fit in r, return 0 otherwise. */ -CC_NONNULL((2,4)) +CC_NONNULL((2, 4)) int ccn_read_uint(cc_size n, cc_unit *r, size_t data_size, const uint8_t *data); /* r = (data, len) treated as a big endian byte array, return -1 if data @@ -484,12 +498,12 @@ size_t ccn_write_uint_size(cc_size n, const cc_unit *s); The out_size argument should be the value returned from ccn_write_uint_size, and is also the exact number of bytes this function will write to out. If out_size if less than the value returned by ccn_write_uint_size, only the - first out_size non-zero most significant octects of s will be written. */ -CC_NONNULL((2,4)) + first out_size non-zero most significant octets of s will be written. */ +CC_NONNULL((2, 4)) void ccn_write_uint(cc_size n, const cc_unit *s, size_t out_size, void *out); -CC_INLINE CC_NONNULL((2,4)) +CC_INLINE CC_NONNULL((2, 4)) cc_size ccn_write_uint_padded(cc_size n, const cc_unit* s, size_t out_size, uint8_t* to) { size_t bytesInKey = ccn_write_uint_size(n, s); @@ -516,21 +530,21 @@ size_t ccn_write_int_size(cc_size n, const cc_unit *s); The out_size argument should be the value returned from ccn_write_int_size, and is also the exact number of bytes this function will write to out. If out_size if less than the value returned by ccn_write_int_size, only the - first out_size non-zero most significant octects of s will be written. */ -CC_NONNULL((2,4)) + first out_size non-zero most significant octets of s will be written. */ +CC_NONNULL((2, 4)) void ccn_write_int(cc_size n, const cc_unit *s, size_t out_size, void *out); /* s^2 -> r { n bit -> 2 * n bit } */ -CC_INLINE CC_NONNULL((2,3)) +CC_INLINE CC_NONNULL((2, 3)) void ccn_sqr(cc_size n, cc_unit *r, const cc_unit *s) { ccn_mul(n, r, s, s); } /* s -> r { n bit -> n bit } */ -CC_NONNULL((2,3)) +CC_NONNULL((2, 3)) void ccn_set(cc_size n, cc_unit *r, const cc_unit *s); CC_INLINE CC_NONNULL2 @@ -538,6 +552,9 @@ void ccn_zero(cc_size n, cc_unit *r) { CC_BZERO(r, ccn_sizeof_n(n)); } +CC_NONNULL2 +void ccn_zero_multi(cc_size n, cc_unit *r, ...); + /* Burn (zero fill or otherwise overwrite) n cc_units of stack space. */ void ccn_burn_stack(cc_size n); @@ -548,8 +565,8 @@ void ccn_seti(cc_size n, cc_unit *r, cc_unit v) { ccn_zero(n - 1, r + 1); } -CC_INLINE CC_NONNULL((2,4)) -void ccn_setn(cc_size n, cc_unit *r, CC_UNUSED const cc_size s_size, const cc_unit *s) { +CC_INLINE CC_NONNULL((2, 4)) +void ccn_setn(cc_size n, cc_unit *r, const cc_size s_size, const cc_unit *s) { /* FIXME: assert not available in kernel. assert(n > 0); assert(s_size > 0); @@ -572,10 +589,10 @@ void ccn_setn(cc_size n, cc_unit *r, CC_UNUSED const cc_size s_size, const cc_un ((((x) & 0xff000000) >> 24) | \ (((x) & 0x00ff0000) >> 8) | \ (((x) & 0x0000ff00) << 8) | \ - (((x) & 0x000000ff) << 24) ) + (((x) & 0x000000ff) << 24)) #define CC_SWAP_HOST_BIG_16(x) \ - (((x) & 0xff00) >> 8) | \ - (((x) & 0x00ff) << 8) | \ + ((((x) & 0xff00) >> 8) | \ + (((x) & 0x00ff) << 8)) /* This should probably move if we move ccn_swap out of line. */ #if CCN_UNIT_SIZE == 8 @@ -603,7 +620,7 @@ void ccn_swap(cc_size n, cc_unit *r) { *r = CC_UNIT_TO_BIG(*r); } -CC_INLINE CC_NONNULL2 CC_NONNULL3 CC_NONNULL4 +CC_INLINE CC_NONNULL((2, 3, 4)) void ccn_xor(cc_size n, cc_unit *r, const cc_unit *s, const cc_unit *t) { while (n--) { r[n] = s[n] ^ t[n]; @@ -620,17 +637,17 @@ void ccn_lprint(cc_size n, const char *label, const cc_unit *s); struct ccrng_state; #if 0 -CC_INLINE CC_NONNULL((2,3)) +CC_INLINE CC_NONNULL((2, 3)) int ccn_random(cc_size n, cc_unit *r, struct ccrng_state *rng) { return (RNG)->generate((RNG), ccn_sizeof_n(n), (unsigned char *)r); } #else #define ccn_random(_n_,_r_,_ccrng_ctx_) \ - ccrng_generate(_ccrng_ctx_, ccn_sizeof_n(_n_), (unsigned char *)_r_); + ccrng_generate(_ccrng_ctx_, ccn_sizeof_n(_n_), (unsigned char *)_r_) #endif /* Make a ccn of size ccn_nof(nbits) units with up to nbits sized random value. */ -CC_NONNULL((2,3)) +CC_NONNULL((2, 3)) int ccn_random_bits(cc_size nbits, cc_unit *r, struct ccrng_state *rng); #endif /* _CORECRYPTO_CCN_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccpad.h b/EXTERNAL_HEADERS/corecrypto/ccpad.h index 71789e0db..86001c2e6 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccpad.h +++ b/EXTERNAL_HEADERS/corecrypto/ccpad.h @@ -13,53 +13,61 @@ #include /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_cts_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_cts_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_cts_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_cts_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_cts1_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_cts1_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_cts1_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_cts1_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_cts2_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_cts2_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_cts2_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_cts2_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_cts3_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_cts3_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_cts3_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_cts3_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is non zero and a multiple of block_size. Furthermore in is nbytes long and out is nbytes long. Returns number of bytes written to out (technically we always write nbytes to out but the returned value is the number of bytes decrypted after removal of padding. To be safe we remove the entire offending block if the pkcs7 padding checks failed. However we purposely don't report the failure to decode the padding since any use of this error leads to potential security exploits. So currently there is no way to distinguish between a full block of padding and bad padding. */ -unsigned long ccpad_pkcs7_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, - void *out); +size_t ccpad_pkcs7_decrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); /* Contract is in is nbytes long. Writes (nbytes / block_size) + 1 times block_size to out. In other words, out must be nbytes rounded down to the closest multiple of block_size plus block_size bytes. */ -void ccpad_pkcs7_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_pkcs7_encrypt(const struct ccmode_cbc *cbc, cccbc_ctx *ctx, cccbc_iv *iv, + size_t nbytes, const void *in, void *out); + +/* Contract is 'don't break CommonCrypto functionality that allows PKCS7 padding with ECB mode'. This is basically the same routines above, without an IV, because calling + crypt with an IV makes ecb cry (and crash) */ + +size_t ccpad_pkcs7_ecb_decrypt(const struct ccmode_ecb *ecb, ccecb_ctx *ecb_key, + size_t nbytes, const void *in, void *out); + +void ccpad_pkcs7_ecb_encrypt(const struct ccmode_ecb *ecb, ccecb_ctx *ctx, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_xts_decrypt(const struct ccmode_xts *xts, ccxts_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_xts_decrypt(const struct ccmode_xts *xts, ccxts_ctx *ctx, ccxts_tweak *tweak, + size_t nbytes, const void *in, void *out); /* Contract is nbytes is at least 1 block + 1 byte. Also in is nbytes long out is nbytes long. */ -void ccpad_xts_encrypt(const struct ccmode_xts *xts, ccxts_ctx *ctx, - unsigned long nbytes, const void *in, void *out); +void ccpad_xts_encrypt(const struct ccmode_xts *xts, ccxts_ctx *ctx, ccxts_tweak *tweak, + size_t nbytes, const void *in, void *out); #endif /* _CORECRYPTO_CCPAD_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h b/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h index 15b94da15..ee980159f 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h +++ b/EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h @@ -11,7 +11,6 @@ #ifndef _CORECRYPTO_CCPBKDF2_H_ #define _CORECRYPTO_CCPBKDF2_H_ - #include /*! @function ccpbkdf2_hmac diff --git a/EXTERNAL_HEADERS/corecrypto/ccrc4.h b/EXTERNAL_HEADERS/corecrypto/ccrc4.h index a177f86e3..84204bb35 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccrc4.h +++ b/EXTERNAL_HEADERS/corecrypto/ccrc4.h @@ -14,14 +14,14 @@ cc_aligned_struct(16) ccrc4_ctx; -/* Declare a gcm key named _name_. Pass the size field of a struct ccmode_gcm +/* Declare a rc4 key named _name_. Pass the size field of a struct ccmode_ecb for _size_. */ #define ccrc4_ctx_decl(_size_, _name_) cc_ctx_decl(ccrc4_ctx, _size_, _name_) -#define ccrc4_ctx_clear(_size_, _name_) cc_ctx_clear(ccrc4_ctx, _size_, _name_) +#define ccrc4_ctx_clear(_size_, _name_) cc_zero(_size_, _name_) struct ccrc4_info { size_t size; /* first argument to ccrc4_ctx_decl(). */ - void (*init)(ccrc4_ctx *ctx, unsigned long key_len, const void *key); + void (*init)(ccrc4_ctx *ctx, size_t key_len, const void *key); void (*crypt)(ccrc4_ctx *ctx, unsigned long nbytes, const void *in, void *out); }; @@ -31,7 +31,7 @@ const struct ccrc4_info *ccrc4(void); extern const struct ccrc4_info ccrc4_eay; struct ccrc4_vector { - unsigned long keylen; + size_t keylen; const void *key; unsigned long datalen; const void *pt; diff --git a/EXTERNAL_HEADERS/corecrypto/ccrng.h b/EXTERNAL_HEADERS/corecrypto/ccrng.h index 8a31d5ac5..c748bc6e6 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccrng.h +++ b/EXTERNAL_HEADERS/corecrypto/ccrng.h @@ -7,12 +7,18 @@ * */ - #ifndef _CORECRYPTO_CCRNG_H_ #define _CORECRYPTO_CCRNG_H_ #include +#define CC_ERR_DEVICE -100 +#define CC_ERR_INTERUPTS -101 +#define CC_ERR_CRYPTO_CONFIG -102 +#define CC_ERR_PERMS -103 +#define CC_ERR_PARAMETER -104 +#define CC_ERR_MEMORY -105 + #define CCRNG_STATE_COMMON \ int (*generate)(struct ccrng_state *rng, unsigned long outlen, void *out); diff --git a/EXTERNAL_HEADERS/corecrypto/ccrng_system.h b/EXTERNAL_HEADERS/corecrypto/ccrng_system.h index 049970d19..3ecc428f7 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccrng_system.h +++ b/EXTERNAL_HEADERS/corecrypto/ccrng_system.h @@ -19,4 +19,6 @@ struct ccrng_system_state { int ccrng_system_init(struct ccrng_system_state *rng); +void ccrng_system_done(struct ccrng_system_state *rng); + #endif /* _CORECRYPTO_CCRNG_SYSTEM_H_ */ diff --git a/EXTERNAL_HEADERS/corecrypto/ccsha1.h b/EXTERNAL_HEADERS/corecrypto/ccsha1.h index fbb258f39..8e4480168 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccsha1.h +++ b/EXTERNAL_HEADERS/corecrypto/ccsha1.h @@ -32,8 +32,9 @@ extern const struct ccdigest_info ccsha1_ltc_di; extern const struct ccdigest_info ccsha1_eay_di; #if CCSHA1_VNG_INTEL -extern const struct ccdigest_info ccsha1_vng_intel_SSE3_di; -extern const struct ccdigest_info ccsha1_vng_intel_NOSSE3_di; +//extern const struct ccdigest_info ccsha1_vng_intel_di; +extern const struct ccdigest_info ccsha1_vng_intel_SupplementalSSE3_di; +extern const struct ccdigest_info ccsha1_vng_intel_NOSupplementalSSE3_di; #endif #if CCSHA1_VNG_ARMV7NEON diff --git a/EXTERNAL_HEADERS/corecrypto/ccsha2.h b/EXTERNAL_HEADERS/corecrypto/ccsha2.h index 4385b895e..5f55b9f40 100644 --- a/EXTERNAL_HEADERS/corecrypto/ccsha2.h +++ b/EXTERNAL_HEADERS/corecrypto/ccsha2.h @@ -37,16 +37,24 @@ const struct ccdigest_info *ccsha512_di(void); #define CCSHA256_OUTPUT_SIZE 32 #define CCSHA256_STATE_SIZE 32 extern const struct ccdigest_info ccsha256_ltc_di; -extern const struct ccdigest_info ccsha256_vng_intel_SSE3_di; -extern const struct ccdigest_info ccsha256_vng_intel_NOSSE3_di; +#if CCSHA2_VNG_INTEL +#if defined __x86_64__ +extern const struct ccdigest_info ccsha256_vng_intel_AVX2_di; +extern const struct ccdigest_info ccsha256_vng_intel_AVX1_di; +#endif +extern const struct ccdigest_info ccsha256_vng_intel_SupplementalSSE3_di; +extern const struct ccdigest_info ccsha256_vng_intel_NOSupplementalSSE3_di; +#endif +#if CCSHA2_VNG_ARMV7NEON extern const struct ccdigest_info ccsha256_vng_armv7neon_di; +#endif extern const uint32_t ccsha256_K[64]; /* SHA224 */ #define CCSHA224_OUTPUT_SIZE 28 extern const struct ccdigest_info ccsha224_ltc_di; -extern const struct ccdigest_info ccsha224_vng_intel_SSE3_di; -extern const struct ccdigest_info ccsha224_vng_intel_NOSSE3_di; +extern const struct ccdigest_info ccsha224_vng_intel_SupplementalSSE3_di; +extern const struct ccdigest_info ccsha224_vng_intel_NOSupplementalSSE3_di; extern const struct ccdigest_info ccsha224_vng_armv7neon_di; /* SHA512 */ diff --git a/Makefile b/Makefile index 2f4d164e6..c4036c896 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # -# Copyright (C) 1999-2010 Apple Inc. All rights reserved. +# Copyright (C) 1999-2013 Apple Inc. All rights reserved. # ifndef VERSDIR @@ -55,23 +55,31 @@ installsrc: else ifeq ($(RC_ProjectName),libkxld) +include $(MakeInc_cmd) + default: install installhdrs install clean: $(MAKE) -C libkern/kxld $@ USE_APPLE_PB_SUPPORT=all installsrc: - pax -rw . $(SRCROOT) + $(_v)$(MKDIR) $(SRCROOT) + $(_v)$(FIND) -x . \! \( \( -name BUILD -o -name .svn -o -name .git -o -name cscope.\* -o -name \*~ \) -prune \) -print0 | $(PAX) -rw -p a -d0 $(SRCROOT) + $(_v)$(CHMOD) -R go+rX $(SRCROOT) else ifeq ($(RC_ProjectName),libkxld_host) +include $(MakeInc_cmd) + default: install installhdrs install clean: $(MAKE) -C libkern/kxld $@ USE_APPLE_PB_SUPPORT=all PRODUCT_TYPE=ARCHIVE installsrc: - pax -rw . $(SRCROOT) + $(_v)$(MKDIR) $(SRCROOT) + $(_v)$(FIND) -x . \! \( \( -name BUILD -o -name .svn -o -name .git -o -name cscope.\* -o -name \*~ \) -prune \) -print0 | $(PAX) -rw -p a -d0 $(SRCROOT) + $(_v)$(CHMOD) -R go+rX $(SRCROOT) else ifeq ($(RC_ProjectName),libkmod) @@ -118,12 +126,40 @@ _v = @ endif # -# Setup for parallel sub-makes based on 2 times number of logical CPUs. +# Setup for parallel sub-makes, taking into account physical and logical +# CPUs. If the system does not support SMT, use N+1. # If MAKEJOBS or -jN is passed on the make line, that takes precedence. # -MAKEJOBS := --jobs=$(shell expr `/usr/sbin/sysctl -n hw.physicalcpu` \* 2) +export SYSCTL_HW_PHYSICALCPU := $(shell /usr/sbin/sysctl -n hw.physicalcpu) +export SYSCTL_HW_LOGICALCPU := $(shell /usr/sbin/sysctl -n hw.logicalcpu) +ifeq ($(SYSCTL_HW_PHYSICALCPU),$(SYSCTL_HW_LOGICALCPU)) +MAKEJOBS := --jobs=$(shell expr $(SYSCTL_HW_PHYSICALCPU) + 1) +else +MAKEJOBS := --jobs=$(SYSCTL_HW_LOGICALCPU) +endif -TOP_TARGETS = clean installsrc installhdrs installhdrs_embedded installman exporthdrs setup build all all_embedded install install_embedded installopensource cscope tags help print_exports print_exports_first_build_config +TOP_TARGETS = \ + clean \ + installsrc \ + exporthdrs \ + all all_desktop all_embedded \ + all_release_embedded all_development_embedded \ + installhdrs installhdrs_desktop installhdrs_embedded \ + installhdrs_release_embedded installhdrs_development_embedded \ + install install_desktop install_embedded \ + install_release_embedded install_development_embedded \ + installopensource \ + cscope tags \ + help + +# Targets for internal build system debugging +TOP_TARGETS += \ + print_exports print_exports_first_build_config \ + setup \ + build \ + config \ + install_textfiles \ + install_config .PHONY: $(TOP_TARGETS) @@ -131,7 +167,7 @@ default: all ifneq ($(REMOTEBUILD),) $(TOP_TARGETS): - $(_v)$(VERSDIR)/tools/remote_build.sh _REMOTEBUILD_TARGET=$@ _REMOTEBUILD_MAKE=$(MAKE) $(MAKEFLAGS) + $(_v)$(VERSDIR)/tools/remote_build.sh _REMOTEBUILD_TARGET=$@ _REMOTEBUILD_MAKE=$(MAKE) $(if $(filter --,$(MAKEFLAGS)),-,)$(MAKEFLAGS) else $(TOP_TARGETS): $(_v)$(MAKE) -r $(if $(filter -j,$(MAKEFLAGS)),,$(MAKEJOBS)) -f $(MakeInc_top) $@ @@ -156,22 +192,21 @@ CONFIG_SUBDIRS = config tools INSTINC_SUBDIRS = $(ALL_SUBDIRS) EXTERNAL_HEADERS INSTINC_SUBDIRS_X86_64 = $(INSTINC_SUBDIRS) +INSTINC_SUBDIRS_X86_64H = $(INSTINC_SUBDIRS) INSTINC_SUBDIRS_ARM = $(INSTINC_SUBDIRS) EXPINC_SUBDIRS = $(ALL_SUBDIRS) EXPINC_SUBDIRS_X86_64 = $(EXPINC_SUBDIRS) +EXPINC_SUBDIRS_X86_64H = $(EXPINC_SUBDIRS) EXPINC_SUBDIRS_ARM = $(EXPINC_SUBDIRS) SETUP_SUBDIRS = SETUP COMP_SUBDIRS_X86_64 = $(ALL_SUBDIRS) +COMP_SUBDIRS_X86_64H = $(ALL_SUBDIRS) COMP_SUBDIRS_ARM = $(ALL_SUBDIRS) -INST_SUBDIRS = \ - bsd \ - config - -INSTMAN_SUBDIRS = \ +INSTTEXTFILES_SUBDIRS = \ bsd include $(MakeInc_kernel) diff --git a/README b/README index 0ac97d202..f6b632335 100644 --- a/README +++ b/README @@ -86,22 +86,22 @@ A. How to build XNU: produce different results. Each build alias supports the standard "clean", "install", "installsrc", "installhdrs" targets, but conditionalize their behavior on the RC_ProjectName make variable - which is passed as the -project argument to ~rc/bin/buildit, which + which is passed as the -buildAlias argument to ~rc/bin/buildit, which can be one of: - -project xnu # the default, builds /mach_kernel, kernel-space - # headers, user-space headers, man pages, - # symbol-set kexts + -buildAlias xnu # the default, builds /mach_kernel, kernel-space + # headers, user-space headers, man pages, + # symbol-set kexts - -project xnu_debug # a DEBUG kernel in /AppleInternal with dSYM + -buildAlias xnu_debug # a DEBUG kernel in /AppleInternal with dSYM - -project libkxld # user-space version of kernel linker + -buildAlias libkxld # user-space version of kernel linker - -project libkmod # static library automatically linked into kexts + -buildAlias libkmod # static library automatically linked into kexts - -project Libsyscall # automatically generate BSD syscall stubs + -buildAlias Libsyscall # automatically generate BSD syscall stubs - -project xnu_quick_test # install xnu unit tests + -buildAlias xnu_quick_test # install xnu unit tests @@ -125,14 +125,14 @@ A. How to build XNU: $ make -w # trace recursive make invocations. Useful in combination with VERBOSE=YES - $ make BUILD_LTO=1 # build with LLVM Link Time Optimization (experimental) + $ make BUILD_LTO=0 # build without LLVM Link Time Optimization $ make REMOTEBUILD=user@remotehost # perform build on remote host ============================================= B. How to install a new header file from XNU -[Note: This does not cover installing header files in IOKit framework] +[To install IOKit headers, see additional comments in iokit/IOKit/Makefile.] 1) XNU installs header files at the following locations - a. $(DSTROOT)/System/Library/Frameworks/Kernel.framework/Headers diff --git a/SETUP/Makefile b/SETUP/Makefile index 74291170c..4ef2047d4 100644 --- a/SETUP/Makefile +++ b/SETUP/Makefile @@ -11,7 +11,8 @@ SETUP_SUBDIRS = \ kextsymboltool \ setsegname \ decomment \ - installfile + installfile \ + replacecontents include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/SETUP/config/Makefile b/SETUP/config/Makefile index 567f2966d..eb25f4571 100644 --- a/SETUP/config/Makefile +++ b/SETUP/config/Makefile @@ -7,10 +7,11 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -OBJS = externs.o main.o mkglue.o mkheaders.o mkioconf.o mkmakefile.o \ - mkswapconf.o openp.o searchp.o lexer.yy.o parser.o +OBJS = externs.o main.o mkheaders.o mkioconf.o mkmakefile.o \ + openp.o searchp.o lexer.yy.o parser.o CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O0 -I$(SOURCE) -I. +CFLAGS += -DYY_NO_INPUT WARNFLAGS = -Wall @@ -34,7 +35,7 @@ lexer.yy.c: lexer.l @echo HOST_FLEX $@ $(_v)env M4=$(HOST_GM4) $(HOST_FLEX) --header-file=lexer.yy.h -o $@ $< -main.o mkglue.o mkheaders.o mkioconf.o mkmakefile.o lexer.yy.c: parser.c +main.o mkheaders.o mkioconf.o mkmakefile.o lexer.yy.c: parser.c do_build_setup:: config diff --git a/SETUP/config/config.h b/SETUP/config/config.h index bcb0d3eeb..3692c798a 100644 --- a/SETUP/config/config.h +++ b/SETUP/config/config.h @@ -68,27 +68,6 @@ struct file_list { short f_special; /* requires special make rule */ char *f_needs; char *f_extra; /* stuff to add to make line */ - /* - * Random values: - * swap space parameters for swap areas - * root device, etc. for system specifications - */ - union { - struct { /* when swap specification */ - dev_t fuw_swapdev; - int fuw_swapsize; - } fuw; - struct { /* when system specification */ - dev_t fus_rootdev; - dev_t fus_argdev; - dev_t fus_dumpdev; - } fus; - } fun; -#define f_swapdev fun.fuw.fuw_swapdev -#define f_swapsize fun.fuw.fuw_swapsize -#define f_rootdev fun.fus.fus_rootdev -#define f_argdev fun.fus.fus_argdev -#define f_dumpdev fun.fus.fus_dumpdev }; /* @@ -98,57 +77,23 @@ struct file_list { #define NORMAL 2 #define INVISIBLE 3 #define PROFILING 4 -#define SYSTEMSPEC 5 -#define SWAPSPEC 6 /* * Attributes (flags). */ #define CONFIGDEP 0x01 /* obsolete? */ #define OPTIONSDEF 0x02 /* options definition entry */ -#define ORDERED 0x04 /* don't list in OBJ's, keep "files" order */ -#define SEDIT 0x08 /* run sed filter (SQT) */ - -/* - * Maximum number of fields for variable device fields (SQT). - */ -#define NFIELDS 10 - -struct idlst { - char *id; - struct idlst *id_next; - int id_vec; /* Sun interrupt vector number */ -}; struct device { int d_type; /* CONTROLLER, DEVICE, bus adaptor */ - struct device *d_conn; /* what it is connected to */ const char *d_name; /* name of device (e.g. rk11) */ - struct idlst *d_vec; /* interrupt vectors */ - int d_pri; /* interrupt priority */ - int d_addr; /* address of csr */ - int d_unit; /* unit number */ - int d_drive; /* drive number */ int d_slave; /* slave number */ #define QUES -1 /* -1 means '?' */ #define UNKNOWN -2 /* -2 means not set yet */ - int d_dk; /* if init 1 set to number for iostat */ int d_flags; /* nlags for device init */ struct device *d_next; /* Next one in list */ - u_short d_mach; /* Sun - machine type (0 = all)*/ - u_short d_bus; /* Sun - bus type (0 = unknown) */ - u_long d_fields[NFIELDS]; /* fields values (SQT) */ - int d_bin; /* interrupt bin (SQT) */ - int d_addrmod; /* address modifier (MIPS) */ char *d_init; /* pseudo device init routine name */ }; -#define TO_NEXUS (struct device *)-1 -#define TO_SLOT (struct device *)-1 - -struct config { - char *c_dev; - char *s_sysname; -}; /* * Config has a global notion of which machine type is @@ -157,40 +102,7 @@ struct config { * it will build from ``Makefile.vax'' and use ``../vax/inline'' * in the makerules, etc. */ -extern int machine; extern const char *machinename; -#define MACHINE_VAX 1 -#define MACHINE_SUN 2 -#define MACHINE_ROMP 3 -#define MACHINE_SUN2 4 -#define MACHINE_SUN3 5 -#define MACHINE_MMAX 6 -#define MACHINE_SQT 7 -#define MACHINE_SUN4 8 -#define MACHINE_I386 9 -#define MACHINE_IX 10 -#define MACHINE_MIPSY 11 -#define MACHINE_MIPS 12 -#define MACHINE_I860 13 -#define MACHINE_M68K 14 -#define MACHINE_M88K 15 -#define MACHINE_M98K 16 -#define MACHINE_HPPA 17 -#define MACHINE_SPARC 18 -#define MACHINE_PPC 19 -#define MACHINE_ARM 20 -#define MACHINE_X86_64 21 - -/* - * For each machine, a set of CPU's may be specified as supported. - * These and the options (below) are put in the C flags in the makefile. - */ -struct cputype { - char *cpu_name; - struct cputype *cpu_next; -}; - -extern struct cputype *cputype; /* * In order to configure and build outside the kernel source tree, @@ -217,7 +129,6 @@ struct opt { extern struct opt *opt, *mkopt, *opt_tail, *mkopt_tail; -extern char *ident; const char *get_word(FILE *fp); char *ns(const char *str); char *qu(int num); @@ -225,12 +136,6 @@ char *path(const char *file); extern int do_trace; -#if MACHINE_VAX -extern int seen_mba, seen_uba; -#endif - -extern int seen_vme, seen_mbii; - extern struct device *dtab; dev_t nametodev(char *name, int defunit, char defpartition); char *devtoname(dev_t dev); @@ -243,17 +148,10 @@ extern char *build_directory; extern int profiling; -extern int maxusers; - #define eq(a,b) (!strcmp(a,b)) -#ifdef mips -#define DEV_MASK 0xf -#define DEV_SHIFT 4 -#else /* mips */ #define DEV_MASK 0x7 #define DEV_SHIFT 3 -#endif /* mips */ /* External function references */ char *get_rest(FILE *fp); @@ -261,26 +159,7 @@ char *get_rest(FILE *fp); int yyparse(void); void yyerror(const char *s); -void vax_ioconf(void); -void sun_ioconf(void); -void romp_ioconf(void); -void mmax_ioconf(void); -void sqt_ioconf(void); -void i386_ioconf(void); -void mips_ioconf(void); -void m68k_ioconf(void); -void m88k_ioconf(void); -void m98k_ioconf(void); -void hppa_ioconf(void); -void sparc_ioconf(void); -void ppc_ioconf(void); -void arm_ioconf(void); -void x86_64_ioconf(void); - -void swapconf(void); - -void ubglue(void); -void mbglue(void); +void mkioconf(void); void makefile(void); void headers(void); diff --git a/SETUP/config/doconf b/SETUP/config/doconf index 33612c023..8e2a5a1ed 100755 --- a/SETUP/config/doconf +++ b/SETUP/config/doconf @@ -69,9 +69,10 @@ set prog=$0 set prog=$prog:t set nonomatch set OBJDIR=../BUILD +set SOURCEDIR=. set CONFIG_DIR=$OBJROOT/SETUP/config +set MASTER_CONF_DIR=. -unset domake unset doconfig unset beverbose unset MACHINE @@ -85,10 +86,6 @@ while ($#argv >= 1) case "-config": set doconfig breaksw - case "-m": - case "-make": - set domake - breaksw case "-cpu": if ($#argv < 2) then echo "${prog}: missing argument to ${argv[1]}" @@ -113,6 +110,22 @@ while ($#argv >= 1) set OBJDIR="$argv[2]" shift breaksw + case "-m": + if ($#argv < 2) then + echo "${prog}: missing argument to ${argv[1]}" + exit 1 + endif + set MASTER_CONF_DIR="$argv[2]" + shift + breaksw + case "-s": + if ($#argv < 2) then + echo "${prog}: missing argument to ${argv[1]}" + exit 1 + endif + set SOURCEDIR="$argv[2]" + shift + breaksw case "-verbose": set beverbose breaksw @@ -134,62 +147,18 @@ end if ($#argv == 0) set argv=(GENERIC) if (! $?MACHINE) then - if (-d /NextApps) then - set MACHINE=`hostinfo | awk '/MC680x0/ { printf("m68k") } /MC880x0/ { printf("m88k") }'` - endif -endif - -if (! $?MACHINE) then - if (-f /etc/machine) then - set MACHINE="`/etc/machine`" - else - echo "${prog}: no /etc/machine, specify machine type with -cpu" - echo "${prog}: e.g. ${prog} -cpu VAX CONFIGURATION" - exit 1 - endif + echo "${prog}: MACHINE not set" + exit 1 endif -set FEATURES_EXTRA= - -switch ("$MACHINE") - case IBMRT: - set cpu=ca - set ID=RT - set FEATURES_EXTRA="romp_dualcall.h romp_fpa.h" - breaksw - case SUN: - set cpu=sun3 - set ID=SUN3 - breaksw - default: - set cpu=`echo $MACHINE | tr A-Z a-z` - set ID=`echo $MACHINE | tr a-z A-Z` - breaksw -endsw -set FEATURES=../h/features.h -set FEATURES_H=(cs_*.h mach_*.h net_*.h\ - cputypes.h cpus.h vice.h\ - $FEATURES_EXTRA) -set MASTER_DIR=../conf +set cpu=`echo $MACHINE | tr A-Z a-z` +set ID=`echo $MACHINE | tr a-z A-Z` +set MASTER_DIR=${MASTER_CONF_DIR} set MASTER = ${MASTER_DIR}/MASTER set MASTER_CPU=${MASTER}.${cpu} set MASTER_CPU_PER_SOC=${MASTER}.${cpu}.${SOC_CONFIG} if (-f $MASTER_CPU_PER_SOC) set MASTER_CPU = ${MASTER_CPU_PER_SOC} -set MASTER_LOCAL = ${MASTER}.local -set MASTER_CPU_LOCAL = ${MASTER_CPU}.local -set MASTER_CPU_PER_SOC_LOCAL = ${MASTER_CPU_PER_SOC}.local -if (! -f $MASTER_LOCAL) set MASTER_LOCAL = "" -if (! -f $MASTER_CPU_LOCAL) set MASTER_CPU_LOCAL = "" -if (-f $MASTER_CPU_PER_SOC_LOCAL) set MASTER_CPU_LOCAL = ${MASTER_CPU_PER_SOC_LOCAL} - -if (! -d $OBJDIR) then - if ($?beverbose) then - echo "[ creating $OBJDIR ]" - endif - mkdir -p $OBJDIR -endif - foreach SYS ($argv) set SYSID=${SYS}_${ID} set SYSCONF=$OBJDIR/config.$SYSID @@ -199,8 +168,8 @@ foreach SYS ($argv) endif echo +$SYS \ | \ - cat $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL - \ - $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL \ + cat $MASTER $MASTER_CPU - \ + $MASTER $MASTER_CPU \ | \ sed -n \ -e "/^+/{" \ @@ -274,12 +243,6 @@ part != 0 {\ echo "${prog}: ${$SYSID}: no such configuration in $MASTER_DIR/MASTER{,.$cpu}" rm -f $SYSCONF.new endif - if (! -d $BLDDIR) then - if ($?beverbose) then - echo "[ creating $BLDDIR ]" - endif - mkdir -p $BLDDIR - endif # # These paths are used by config. # @@ -315,15 +278,9 @@ part != 0 {\ echo "[ configuring $SYSID ]" endif if ($?profile) then - $CONFIG_DIR/config -c $MASTER_DIR -p $SYSCONF + $CONFIG_DIR/config -c $SOURCEDIR -p $SYSCONF else - $CONFIG_DIR/config -c $MASTER_DIR $SYSCONF + $CONFIG_DIR/config -c $SOURCEDIR $SYSCONF endif endif - if ($?domake) then - if ($?beverbose) then - echo "[ making $SYSID ]" - endif - (cd $BLDDIR; make) - endif end diff --git a/SETUP/config/externs.c b/SETUP/config/externs.c index d1bdd8942..6f69a3340 100644 --- a/SETUP/config/externs.c +++ b/SETUP/config/externs.c @@ -33,16 +33,8 @@ * it will build from ``Makefile.vax'' and use ``../vax/inline'' * in the makerules, etc. */ -int machine; const char *machinename; -/* - * For each machine, a set of CPU's may be specified as supported. - * These and the options (below) are put in the C flags in the makefile. - */ - -struct cputype *cputype; - /* * In order to configure and build outside the kernel source tree, * we may wish to specify where the source tree lives. @@ -58,16 +50,8 @@ char *config_directory; */ struct opt *opt, *mkopt, *opt_tail, *mkopt_tail; -char *ident; - int do_trace; -#if MACHINE_VAX -int seen_mba, seen_uba; -#endif - -int seen_vme, seen_mbii; - struct device *dtab; char errbuf[80]; @@ -78,5 +62,3 @@ char *build_directory; int profiling = 0; -int maxusers; - diff --git a/SETUP/config/lexer.l b/SETUP/config/lexer.l index c5502b4ba..639330569 100644 --- a/SETUP/config/lexer.l +++ b/SETUP/config/lexer.l @@ -47,54 +47,16 @@ struct kt { const char *kt_name; int kt_val; } key_words[] = { - { "and", AND }, - { "args", ARGS }, - { "at", AT }, { "builddir", BUILDDIR }, - { "config", CONFIG }, - { "configdir", CONFIGDIR }, - { "controller", CONTROLLER }, - { "cpu", CPU }, - { "csr", CSR }, - { "device", DEVICE }, - { "disk", DISK }, - { "drive", DRIVE }, - { "dumps", DUMPS }, - { "flags", FLAGS }, - { "hz", HZ }, - { "ident", IDENT }, { "init", INIT }, { "machine", MACHINE }, - { "major", MAJOR }, { "makeoptions", MAKEOPTIONS }, { "makevariables", MAKEOPTIONS }, - { "master", MASTER }, - { "maxusers", MAXUSERS }, - { "mba", MBA }, - { "minor", MINOR }, - { "nexus", NEXUS }, { "objectdir", OBJECTDIR }, - { "on", ON }, { "options", OPTIONS }, - { "priority", PRIORITY }, - { "profile", PROFILE }, { "pseudo-device",PSEUDO_DEVICE }, - { "root", ROOT }, - { "size", SIZE }, - { "slave", SLAVE }, { "sourcedir", SOURCEDIR }, - { "swap", SWAP }, - { "tape", DEVICE }, { "trace", TRACE }, - { "uba", UBA }, - { "vector", VECTOR }, - { "lun", LUN }, /* MMAX only */ - { "slot", SLOT }, /* MMAX only */ - { "tape", TAPE }, /* MMAX only */ - { "bin", BIN }, /* SQT ONLY */ - { "am", ADDRMOD }, /* MIPS */ - { "mbii", MBII }, /* MIPS */ - { "vme", VME }, /* MIPS */ { 0, 0 }, }; %} @@ -137,13 +99,6 @@ WORD1 ([A-Za-z_][-A-Za-z_0-9]*) tprintf("#D:%d ", yylval.val); return NUMBER; } -[0-9]"."[0-9]* { - yylval.val = (int) (60 * atof(yytext) + 0.5); - return FPNUMBER; - } -"-" { - return MINUS; - } "?" { yylval.val = -1; tprintf("? "); @@ -163,7 +118,6 @@ WORD1 ([A-Za-z_][-A-Za-z_0-9]*) ";" { return SEMICOLON; } "," { return COMMA; } "=" { return EQUALS; } -"@" { return AT; } . { return yytext[0]; } diff --git a/SETUP/config/main.c b/SETUP/config/main.c index 024b17be8..5dfcf79d6 100644 --- a/SETUP/config/main.c +++ b/SETUP/config/main.c @@ -130,90 +130,10 @@ main(int argc, char *argv[]) opt = 0; if (yyparse()) exit(3); - switch (machine) { - - case MACHINE_VAX: - vax_ioconf(); /* Print ioconf.c */ - ubglue(); /* Create ubglue.s */ - break; - - case MACHINE_SUN: - sun_ioconf(); - break; - - case MACHINE_SUN2: - case MACHINE_SUN3: - case MACHINE_SUN4: - sun_ioconf(); /* Print ioconf.c */ - mbglue(); /* Create mbglue.s */ - break; - - case MACHINE_ROMP: - romp_ioconf(); - break; - - case MACHINE_MMAX: - mmax_ioconf(); - break; - - case MACHINE_SQT: - sqt_ioconf(); - break; - - case MACHINE_I386: - case MACHINE_IX: - i386_ioconf(); - break; - - case MACHINE_MIPSY: - case MACHINE_MIPS: - mips_ioconf(); - break; - - case MACHINE_I860: - /* i860_ioconf(); */ - break; - - case MACHINE_M68K: - m68k_ioconf(); - break; - - case MACHINE_M88K: - m88k_ioconf(); - break; - - case MACHINE_M98K: - m98k_ioconf(); - break; - - case MACHINE_HPPA: - hppa_ioconf(); - break; - - case MACHINE_SPARC: - sparc_ioconf(); - break; - - case MACHINE_PPC: - ppc_ioconf(); - break; - - case MACHINE_ARM: - arm_ioconf(); - break; - - case MACHINE_X86_64: - x86_64_ioconf(); - break; - - default: - printf("Specify machine type, e.g. ``machine vax''\n"); - exit(1); - } + mkioconf(); /* ioconf.c */ makefile(); /* build Makefile */ headers(); /* make a lot of .h files */ - swapconf(); /* swap config files */ return 0; } diff --git a/SETUP/config/mkglue.c b/SETUP/config/mkglue.c deleted file mode 100644 index 9d4b5ac6f..000000000 --- a/SETUP/config/mkglue.c +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights - * Reserved. This file contains Original Code and/or Modifications of - * Original Code as defined in and that are subject to the Apple Public - * Source License Version 1.0 (the 'License'). You may not use this file - * except in compliance with the License. Please obtain a copy of the - * License at http://www.apple.com/publicsource and read it before using - * this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License." - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Mach Operating System - * Copyright (c) 1990 Carnegie-Mellon University - * Copyright (c) 1989 Carnegie-Mellon University - * Copyright (c) 1988 Carnegie-Mellon University - * Copyright (c) 1987 Carnegie-Mellon University - * All rights reserved. The CMU software License Agreement specifies - * the terms and conditions for use and redistribution. - */ - -/* - * Copyright (c) 1980 Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef lint -static char sccsid[] __attribute__((used)) = "@(#)mkglue.c 5.6 (Berkeley) 6/18/88"; -#endif /* not lint */ - -/* - * Make the bus adaptor interrupt glue files. - */ -#include -#include -#include "config.h" -#include "parser.h" -#include - -void dump_mb_handler(FILE *fp, struct idlst *vec, int number); -void dump_ubavec(FILE *fp, char *vector, int number); -void dump_std(FILE *fp, FILE *gp); -void dump_intname(FILE *fp, char *vector, int number); -void dump_ctrs(FILE *fp); -void glue(FILE *fp, void (*dump_handler)(FILE *, struct idlst *, int)); - -/* - * Create the UNIBUS interrupt vector glue file. - */ -void -ubglue(void) -{ - register FILE *fp, *gp; - register struct device *dp, *mp; - - fp = fopen(path("ubglue.s"), "w"); - if (fp == 0) { - perror(path("ubglue.s")); - exit(1); - } - gp = fopen(path("ubvec.s"), "w"); - if (gp == 0) { - perror(path("ubvec.s")); - exit(1); - } - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (mp != 0 && mp != (struct device *)-1 && - !eq(mp->d_name, "mba")) { - struct idlst *id, *id2; - - for (id = dp->d_vec; id; id = id->id_next) { - for (id2 = dp->d_vec; id2; id2 = id2->id_next) { - if (id2 == id) { - dump_ubavec(fp, id->id, - dp->d_unit); - break; - } - if (!strcmp(id->id, id2->id)) - break; - } - } - } - } - dump_std(fp, gp); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (mp != 0 && mp != (struct device *)-1 && - !eq(mp->d_name, "mba")) { - struct idlst *id, *id2; - - for (id = dp->d_vec; id; id = id->id_next) { - for (id2 = dp->d_vec; id2; id2 = id2->id_next) { - if (id2 == id) { - dump_intname(fp, id->id, - dp->d_unit); - break; - } - if (!strcmp(id->id, id2->id)) - break; - } - } - } - } - dump_ctrs(fp); - (void) fclose(fp); - (void) fclose(gp); -} - -static int cntcnt = 0; /* number of interrupt counters allocated */ - -/* - * Print a UNIBUS interrupt vector. - */ -void -dump_ubavec(FILE *fp, char *vector, int number) -{ - char nbuf[80]; - register char *v = nbuf; - - switch (machine) { - - case MACHINE_VAX: - (void) sprintf(v, "%s%d", vector, number); - fprintf(fp, "\t.globl\t_X%s\n\t.align\t2\n_X%s:\n", - v, v); - fprintf(fp,"\tTIM_PUSHR(0)\n"); - fprintf(fp, "\tincl\t_fltintrcnt+(4*%d)\n", cntcnt++); - if (strncmp(vector, "dzx", 3) == 0) - fprintf(fp, "\tmovl\t$%d,r0\n\tjmp\tdzdma\n\n", number); - else { - if (strncmp(vector, "uur", 3) == 0) { - fprintf(fp, "#ifdef UUDMA\n"); - fprintf(fp, "\tmovl\t$%d,r0\n\tjsb\tuudma\n", - number); - fprintf(fp, "#endif\n"); - } - fprintf(fp, "\tpushl\t$%d\n", number); - fprintf(fp, "\tcalls\t$1,_%s\n",vector); - fprintf(fp, "\tCOUNT(V_INTR)\n"); - fprintf(fp, "\tTSREI_POPR\n"); - } - break; - - case MACHINE_MIPSY: - case MACHINE_MIPS: - /* - * Actually, we should never get here! - * Main does not even call ubglue. - */ - if (strncmp(vector, "dzx", 3) == 0) - fprintf(fp, "\tDZINTR(%s,%d)\n", vector, number); - else - fprintf(fp, "\tDEVINTR(%s,%d)\n", vector, number); - break; - } - -} - -static const char *vaxinames[] = { - "clock", "cnr", "cnx", "tur", "tux", - "mba0", "mba1", "mba2", "mba3", - "uba0", "uba1", "uba2", "uba3" -}; -static struct stdintrs { - const char **si_names; /* list of standard interrupt names */ - int si_n; /* number of such names */ -} stdintrs[] = { - { vaxinames, sizeof (vaxinames) / sizeof (vaxinames[0]) }, -}; -/* - * Start the interrupt name table with the names - * of the standard vectors not directly associated - * with a bus. Also, dump the defines needed to - * reference the associated counters into a separate - * file which is prepended to locore.s. - */ -void -dump_std(FILE *fp, FILE *gp) -{ - register struct stdintrs *si = &stdintrs[machine-1]; - register const char **cpp; - register int i; - - fprintf(fp, "\n\t.globl\t_intrnames\n"); - fprintf(fp, "\n\t.globl\t_eintrnames\n"); - fprintf(fp, "\t.data\n"); - fprintf(fp, "_intrnames:\n"); - cpp = si->si_names; - for (i = 0; i < si->si_n; i++) { - const char *cp; - char *tp; - char buf[80]; - - cp = *cpp; - if (cp[0] == 'i' && cp[1] == 'n' && cp[2] == 't') { - cp += 3; - if (*cp == 'r') - cp++; - } - for (tp = buf; *cp; cp++) - if (islower(*cp)) - *tp++ = toupper(*cp); - else - *tp++ = *cp; - *tp = '\0'; - fprintf(gp, "#define\tI_%s\t%lu\n", buf, i*sizeof (long)); - fprintf(fp, "\t.asciz\t\"%s\"\n", *cpp); - cpp++; - } -} - -void -dump_intname(FILE *fp, char *vector, int number) -{ - register char *cp = vector; - - fprintf(fp, "\t.asciz\t\""); - /* - * Skip any "int" or "intr" in the name. - */ - while (*cp) - if (cp[0] == 'i' && cp[1] == 'n' && cp[2] == 't') { - cp += 3; - if (*cp == 'r') - cp++; - } else { - putc(*cp, fp); - cp++; - } - fprintf(fp, "%d\"\n", number); -} - -/* - * Reserve space for the interrupt counters. - */ -void -dump_ctrs(FILE *fp) -{ - struct stdintrs *si = &stdintrs[machine-1]; - - fprintf(fp, "_eintrnames:\n"); - fprintf(fp, "\n\t.globl\t_intrcnt\n"); - fprintf(fp, "\n\t.globl\t_eintrcnt\n"); - fprintf(fp, "\t.align 2\n"); - fprintf(fp, "_intrcnt:\n"); - fprintf(fp, "\t.space\t4 * %d\n", si->si_n); - fprintf(fp, "_fltintrcnt:\n"); - fprintf(fp, "\t.space\t4 * %d\n", cntcnt); - fprintf(fp, "_eintrcnt:\n\n"); - fprintf(fp, "\t.text\n"); -} - -/* - * Routines for making Sun mb interrupt file mbglue.s - */ - -/* - * print an interrupt handler for mainbus - */ -void -dump_mb_handler(FILE *fp, struct idlst *vec, int number) -{ - fprintf(fp, "\tVECINTR(_X%s%d, _%s, _V%s%d)\n", - vec->id, number, vec->id, vec->id, number); -} - -void -mbglue(void) -{ - register FILE *fp; - const char *name = "mbglue.s"; - - fp = fopen(path(name), "w"); - if (fp == 0) { - perror(path(name)); - exit(1); - } - fprintf(fp, "#include \n\n"); - glue(fp, dump_mb_handler); - (void) fclose(fp); -} - -void -glue(FILE *fp, void (*dump_handler)(FILE *, struct idlst *, int)) -{ - register struct device *dp, *mp; - - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (mp != 0 && mp != (struct device *)-1 && - !eq(mp->d_name, "mba")) { - struct idlst *vd, *vd2; - - for (vd = dp->d_vec; vd; vd = vd->id_next) { - for (vd2 = dp->d_vec; vd2; vd2 = vd2->id_next) { - if (vd2 == vd) { - (void)(*dump_handler) - (fp, vd, dp->d_unit); - break; - } - if (!strcmp(vd->id, vd2->id)) - break; - } - } - } - } -} diff --git a/SETUP/config/mkheaders.c b/SETUP/config/mkheaders.c index a0e3fdc38..2c345b7c9 100644 --- a/SETUP/config/mkheaders.c +++ b/SETUP/config/mkheaders.c @@ -64,7 +64,6 @@ static char sccsid[] __attribute__((used)) = "@(#)mkheaders.c 5.5 (Berkeley) 6/1 static void do_count(const char *dev, const char *hname, int search); static void do_header(const char *dev, const char *hname, int count); -static int file_needed(const char *name); static char *toheader(const char *dev); static char *tomacro(const char *dev); @@ -85,20 +84,11 @@ headers(void) void do_count(const char *dev, const char *hname, int search) { - struct device *dp, *mp; + struct device *dp; int count; for (count = 0,dp = dtab; dp != 0; dp = dp->d_next) - if (dp->d_unit != -1 && eq(dp->d_name, dev)) { - /* - * Avoid making .h files for bus types on sun machines - */ - if ((machine == MACHINE_SUN2 || - machine == MACHINE_SUN3 || - machine == MACHINE_SUN4) - && dp->d_conn == TO_NEXUS){ - return; - } + if (eq(dp->d_name, dev)) { if (dp->d_type == PSEUDO_DEVICE) { count = dp->d_slave != UNKNOWN ? dp->d_slave : 1; @@ -106,54 +96,10 @@ do_count(const char *dev, const char *hname, int search) dev = NULL; break; } - if (machine != MACHINE_SUN2 && machine != MACHINE_SUN3 - && machine != MACHINE_SUN4) - /* avoid ie0,ie0,ie1 setting NIE to 3 */ - count++; - /* - * Allow holes in unit numbering, - * assumption is unit numbering starts - * at zero. - */ - if (dp->d_unit + 1 > count) - count = dp->d_unit + 1; - if (search) { - mp = dp->d_conn; - if (mp != 0 && mp != TO_NEXUS && - mp->d_conn != TO_NEXUS) { - /* - * Check for the case of the - * controller that the device - * is attached to is in a separate - * file (e.g. "sd" and "sc"). - * In this case, do NOT define - * the number of controllers - * in the hname .h file. - */ - if (!file_needed(mp->d_name)) - do_count(mp->d_name, hname, 0); - search = 0; - } - } } do_header(dev, hname, count); } -/* - * Scan the file list to see if name is needed to bring in a file. - */ -static int -file_needed(const char *name) -{ - struct file_list *fl; - - for (fl = ftab; fl != 0; fl = fl->f_next) { - if (fl->f_needs && strcmp(fl->f_needs, name) == 0) - return (1); - } - return (0); -} - static void do_header(const char *dev, const char *hname, int count) { diff --git a/SETUP/config/mkioconf.c b/SETUP/config/mkioconf.c index 2b4ff4a65..662166da6 100644 --- a/SETUP/config/mkioconf.c +++ b/SETUP/config/mkioconf.c @@ -56,1967 +56,10 @@ /* * build the ioconf.c file */ -char *intv(struct device *dev); -char *intv2(struct device *dev); -void i386_pseudo_inits(FILE *fp); /* XXX function in wrong block */ -void check_vector(struct idlst *vec); -void nrw_ioconf(void); -void m88k_pseudo_inits(FILE *fp); -void m98k_pseudo_inits(FILE *fp); -char *m88k_dn(char *name); -char *m98k_dn(char *name); -char *concat3(char *buf, const char *p1, const char *p2, const char *p3); - -#if MACHINE_VAX - -void -vax_ioconf(void) -{ - register struct device *dp, *mp, *np; - register int uba_n, slave; - FILE *fp; - - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } -/*MACH_KERNEL*/ - fprintf(fp, "#ifndef MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); -/*MACH_KERNEL*/ - fprintf(fp, "#endif MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "\n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n\n"); - fprintf(fp, "\n"); - fprintf(fp, "#define C (caddr_t)\n\n"); - /* - * First print the mba initialization structures - */ - if (seen_mba) { - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (mp == 0 || mp == TO_NEXUS || - !eq(mp->d_name, "mba")) - continue; - fprintf(fp, "extern struct mba_driver %sdriver;\n", - dp->d_name); - } - fprintf(fp, "\nstruct mba_device mbdinit[] = {\n"); - fprintf(fp, "\t/* Device, Unit, Mba, Drive, Dk */\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_unit == QUES || mp == 0 || - mp == TO_NEXUS || !eq(mp->d_name, "mba")) - continue; - if (dp->d_addr) { - printf("can't specify csr address on mba for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_vec != 0) { - printf("can't specify vector for %s%d on mba\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive == UNKNOWN) { - printf("drive not specified for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_slave != UNKNOWN) { - printf("can't specify slave number for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - fprintf(fp, "\t{ &%sdriver, %d, %s,", - dp->d_name, dp->d_unit, qu(mp->d_unit)); - fprintf(fp, " %s, %d },\n", - qu(dp->d_drive), dp->d_dk); - } - fprintf(fp, "\t0\n};\n\n"); - /* - * Print the mbsinit structure - * Driver Controller Unit Slave - */ - fprintf(fp, "struct mba_slave mbsinit [] = {\n"); - fprintf(fp, "\t/* Driver, Ctlr, Unit, Slave */\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - /* - * All slaves are connected to something which - * is connected to the massbus. - */ - if ((mp = dp->d_conn) == 0 || mp == TO_NEXUS) - continue; - np = mp->d_conn; - if (np == 0 || np == TO_NEXUS || - !eq(np->d_name, "mba")) - continue; - fprintf(fp, "\t{ &%sdriver, %s", - mp->d_name, qu(mp->d_unit)); - fprintf(fp, ", %2d, %s },\n", - dp->d_unit, qu(dp->d_slave)); - } - fprintf(fp, "\t0\n};\n\n"); - } - /* - * Now generate interrupt vectors for the unibus - */ - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_vec != 0) { - struct idlst *ip; - mp = dp->d_conn; - if (mp == 0 || mp == TO_NEXUS || - !eq(mp->d_name, "uba")) - continue; - fprintf(fp, - "extern struct uba_driver %sdriver;\n", - dp->d_name); - fprintf(fp, "extern "); - ip = dp->d_vec; - for (;;) { - fprintf(fp, "X%s%d()", ip->id, dp->d_unit); - ip = ip->id_next; - if (ip == 0) - break; - fprintf(fp, ", "); - } - fprintf(fp, ";\n"); - fprintf(fp, "int\t (*%sint%d[])() = { ", dp->d_name, - dp->d_unit); - ip = dp->d_vec; - for (;;) { - fprintf(fp, "X%s%d", ip->id, dp->d_unit); - ip = ip->id_next; - if (ip == 0) - break; - fprintf(fp, ", "); - } - fprintf(fp, ", 0 } ;\n"); - } - } - fprintf(fp, "\nstruct uba_ctlr ubminit[] = {\n"); - fprintf(fp, "/*\t driver,\tctlr,\tubanum,\talive,\tintr,\taddr */\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_type != CONTROLLER || mp == TO_NEXUS || mp == 0 || - !eq(mp->d_name, "uba")) - continue; - if (dp->d_vec == 0) { - printf("must specify vector for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr == 0) { - printf("must specify csr address for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives need their own entries; dont "); - printf("specify drive or slave for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_flags) { - printf("controllers (e.g. %s%d) ", - dp->d_name, dp->d_unit); - printf("don't have flags, only devices do\n"); - continue; - } - fprintf(fp, - "\t{ &%sdriver,\t%d,\t%s,\t0,\t%sint%d, C 0%o },\n", - dp->d_name, dp->d_unit, qu(mp->d_unit), - dp->d_name, dp->d_unit, dp->d_addr); - } - fprintf(fp, "\t0\n};\n"); -/* unibus devices */ - fprintf(fp, "\nstruct uba_device ubdinit[] = {\n"); - fprintf(fp, -"\t/* driver, unit, ctlr, ubanum, slave, intr, addr, dk, flags*/\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 || - mp == TO_NEXUS || mp->d_type == MASTER || - eq(mp->d_name, "mba")) - continue; - np = mp->d_conn; - if (np != 0 && np != TO_NEXUS && eq(np->d_name, "mba")) - continue; - np = 0; - if (eq(mp->d_name, "uba")) { - if (dp->d_vec == 0) { - printf("must specify vector for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr == 0) { - printf("must specify csr for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives/slaves can be specified "); - printf("only for controllers, "); - printf("not for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - uba_n = mp->d_unit; - slave = QUES; - } else { - if ((np = mp->d_conn) == 0) { - printf("%s%d isn't connected to anything ", - mp->d_name, mp->d_unit); - printf(", so %s%d is unattached\n", - dp->d_name, dp->d_unit); - continue; - } - uba_n = np->d_unit; - if (dp->d_drive == UNKNOWN) { - printf("must specify ``drive number'' "); - printf("for %s%d\n", dp->d_name, dp->d_unit); - continue; - } - /* NOTE THAT ON THE UNIBUS ``drive'' IS STORED IN */ - /* ``SLAVE'' AND WE DON'T WANT A SLAVE SPECIFIED */ - if (dp->d_slave != UNKNOWN) { - printf("slave numbers should be given only "); - printf("for massbus tapes, not for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_vec != 0) { - printf("interrupt vectors should not be "); - printf("given for drive %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr != 0) { - printf("csr addresses should be given only "); - printf("on controllers, not on %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - slave = dp->d_drive; - } - fprintf(fp, "\t{ &%sdriver, %2d, %s,", - eq(mp->d_name, "uba") ? dp->d_name : mp->d_name, dp->d_unit, - eq(mp->d_name, "uba") ? " -1" : qu(mp->d_unit)); - fprintf(fp, " %s, %2d, %s, C 0%-6o, %d, 0x%x },\n", - qu(uba_n), slave, intv(dp), dp->d_addr, dp->d_dk, - dp->d_flags); - } - fprintf(fp, "\t0\n};\n"); - (void) fclose(fp); -} -#endif - -#if MACHINE_SUN -#define SP_OBIO 0x0004 /* on board i/o (for sun/autoconf.h) */ - -#define VEC_LO 64 -#define VEC_HI 255 - -void pseudo_inits(FILE *fp); - -void -check_vector(struct idlst *vec) -{ - - if (vec->id_vec == 0) - fprintf(stderr, "vector number for %s not given\n", vec->id); - else if (vec->id_vec < VEC_LO || vec->id_vec > VEC_HI) - fprintf(stderr, - "vector number %d for %s is not between %d and %d\n", - vec->id_vec, vec->id, VEC_LO, VEC_HI); -} - -void -sun_ioconf(void) -{ - register struct device *dp, *mp; - register int slave; - register struct idlst *vp; - FILE *fp; - - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } -/*MACH_KERNEL*/ - fprintf(fp, "#ifndef MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); -/*MACH_KERNEL*/ - fprintf(fp, "#endif MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "\n"); - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - fprintf(fp, "#define C (caddr_t)\n\n"); - fprintf(fp, "\n"); - - /* - * Now generate interrupt vectors for the Mainbus - */ - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (mp == TO_NEXUS || mp == 0 || mp->d_conn != TO_NEXUS) - continue; - fprintf(fp, "extern struct mb_driver %sdriver;\n", - dp->d_name); - if (dp->d_vec != 0) { - if (dp->d_pri == 0) - fprintf(stderr, - "no priority specified for %s%d\n", - dp->d_name, dp->d_unit); - fprintf(fp, "extern "); - for (vp = dp->d_vec;;) { - if (machine == MACHINE_SUN4) - fprintf(fp, "%s()", vp->id); - else - fprintf(fp, "X%s%d()", - vp->id, dp->d_unit); - vp = vp->id_next; - if (vp == 0) - break; - fprintf(fp, ", "); - } - fprintf(fp, ";\n"); - - for (vp = dp->d_vec; vp; vp = vp->id_next) { - fprintf(fp, "int V%s%d = %d;\n", - vp->id, dp->d_unit, dp->d_unit); - } - - fprintf(fp, "struct vec %s[] = { ", intv(dp)); - for (vp = dp->d_vec; vp != 0; vp = vp->id_next) { - if (machine == MACHINE_SUN4) - fprintf(fp, "{ %s, %d, &V%s%d }, ", - vp->id, vp->id_vec, - vp->id, dp->d_unit); - else - fprintf(fp, "{ X%s%d, %d, &V%s%d }, ", - vp->id, dp->d_unit, vp->id_vec, - vp->id, dp->d_unit); - check_vector(vp); - } - fprintf(fp, "0 };\n"); - } - } - - /* - * Now spew forth the mb_ctlr structures - */ - fprintf(fp, "\nstruct mb_ctlr mbcinit[] = {\n"); - fprintf(fp, -"/* driver,\tctlr,\talive,\taddress,\tintpri,\t intr,\tspace */\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_type != CONTROLLER || mp == TO_NEXUS || mp == 0 || - mp->d_conn != TO_NEXUS) - continue; - if (dp->d_addr == UNKNOWN) { - printf("must specify csr address for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives need their own entries; "); - printf("don't specify drive or slave for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_flags) { - printf("controllers (e.g. %s%d) don't have flags, ", - dp->d_name, dp->d_unit); - printf("only devices do\n"); - continue; - } - if (machine == MACHINE_SUN4) - fprintf(fp, - "{ &%sdriver,\t%d,\t0,\tC 0x%08x,\t%d,\t%s, 0x%x },\n", - dp->d_name, dp->d_unit, dp->d_addr, - (dp->d_bus==SP_OBIO) ? (dp->d_pri << 1) : (dp->d_pri<<1)-1, - intv(dp), ((dp->d_mach << 16) | dp->d_bus)); - else - fprintf(fp, - "{ &%sdriver,\t%d,\t0,\tC 0x%08x,\t%d,\t%s, 0x%x },\n", - dp->d_name, dp->d_unit, dp->d_addr, - dp->d_pri, intv(dp), ((dp->d_mach << 16) | dp->d_bus)); - } - fprintf(fp, "\t0\n};\n"); - - /* - * Now we go for the mb_device stuff - */ - fprintf(fp, "\nstruct mb_device mbdinit[] = {\n"); - fprintf(fp, -"/* driver,\tunit, ctlr, slave, address, pri, dk, flags, intr, space */\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 || - mp == TO_NEXUS || mp->d_type == MASTER) - continue; - if (mp->d_conn == TO_NEXUS) { - if (dp->d_addr == UNKNOWN) { - printf("must specify csr for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives/slaves can be specified only "); - printf("for controllers, not for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - slave = QUES; - } else { - if (mp->d_conn == 0) { - printf("%s%d isn't connected to anything, ", - mp->d_name, mp->d_unit); - printf("so %s%d is unattached\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive == UNKNOWN) { - printf("must specify ``drive number'' for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - /* NOTE THAT ON THE UNIBUS ``drive'' IS STORED IN */ - /* ``SLAVE'' AND WE DON'T WANT A SLAVE SPECIFIED */ - if (dp->d_slave != UNKNOWN) { - printf("slave numbers should be given only "); - printf("for massbus tapes, not for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_pri != 0) { - printf("interrupt priority should not be "); - printf("given for drive %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr != UNKNOWN) { - printf("csr addresses should be given only"); - printf(" on controllers, not on %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - slave = dp->d_drive; - } - if (machine == MACHINE_SUN4) - fprintf(fp, -"{ &%sdriver,\t%d, %s, %2d, C 0x%08x, %d, %d, 0x%x, %s, 0x%x },\n", - mp->d_conn == TO_NEXUS? dp->d_name : mp->d_name, dp->d_unit, - mp->d_conn == TO_NEXUS? " -1" : qu(mp->d_unit), - slave, - dp->d_addr == UNKNOWN? 0 : dp->d_addr, - dp->d_pri * 2, dp->d_dk, dp->d_flags, intv(dp), - ((dp->d_mach << 16) | dp->d_bus)); - else - fprintf(fp, -"{ &%sdriver,\t%d, %s, %2d, C 0x%08x, %d, %d, 0x%x, %s, 0x%x },\n", - mp->d_conn == TO_NEXUS? dp->d_name : mp->d_name, dp->d_unit, - mp->d_conn == TO_NEXUS? " -1" : qu(mp->d_unit), - slave, - dp->d_addr == UNKNOWN? 0 : dp->d_addr, - dp->d_pri, dp->d_dk, dp->d_flags, intv(dp), - ((dp->d_mach << 16) | dp->d_bus)); - } - fprintf(fp, "\t0\n};\n"); - pseudo_inits(fp); - (void) fclose(fp); -} - -void -pseudo_inits(FILE *fp) -{ -#ifdef notdef - register struct device *dp; - int count; - - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - fprintf(fp, "extern int %s(int);\n", dp->d_init); - } -#endif /* notdef */ - fprintf(fp, "struct pseudo_init {\n"); - fprintf(fp, "\tint\tps_count;\n\tint\t(*ps_func)();\n"); - fprintf(fp, "} pseudo_inits[] = {\n"); -#ifdef notdef - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - count = dp->d_slave; - if (count <= 0) - count = 1; - fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init); - } -#endif /* notdef */ - fprintf(fp, "\t{0,\t0},\n};\n"); -} -#endif - -#if MACHINE_ROMP -void -romp_ioconf(void) -{ - register struct device *dp, *mp; - register int slave; - FILE *fp; - - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } -/*MACH_KERNEL*/ - fprintf(fp, "#ifndef MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); -/*MACH_KERNEL*/ - fprintf(fp, "#endif MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "\n"); - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - fprintf(fp, "#define C (caddr_t)\n\n"); - fprintf(fp, "\n"); - - fprintf (fp, "struct iocc_hd iocc_hd[] = {{C 0xF0000000,}};\n"); - /* - * Now generate interrupt vectors for the Winnerbus - */ - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_pri != 0) { - mp = dp->d_conn; - if (mp == 0 || mp == TO_NEXUS || - !eq(mp->d_name, "iocc")) - continue; - fprintf(fp, "extern struct iocc_driver %sdriver;\n", - dp->d_name); - } - } - /* - * Now spew forth the iocc_cinfo structure - */ - fprintf(fp, "\nstruct iocc_ctlr iocccinit[] = {\n"); - fprintf(fp, "/*\t driver,\tctlr,\talive,\taddr,\tintpri */\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_type != CONTROLLER) - continue; - if (mp == TO_NEXUS || mp == 0 || !eq(mp->d_name, "iocc")) - continue; - if (dp->d_unit == QUES && eq(dp->d_name,"hdc")) - continue; - if (dp->d_unit == QUES && eq(dp->d_name,"fdc")) - continue; - if (dp->d_pri == 0) { - printf("must specify priority for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr == 0) { - printf("must specify csr address for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives need their own entries; "); - printf("dont specify drive or slave for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_flags) { - printf("controllers (e.g. %s%d) don't have flags, ", - dp->d_name, dp->d_unit); - printf("only devices do\n"); - continue; - } - fprintf(fp, "\t{ &%sdriver,\t%d,\t0,\tC 0x%x,\t%d },\n", - dp->d_name, dp->d_unit, dp->d_addr, dp->d_pri); - } - fprintf(fp, "\t0\n};\n"); - /* - * Now we go for the iocc_device stuff - */ - fprintf(fp, "\nstruct iocc_device ioccdinit[] = {\n"); - fprintf(fp, -"\t/* driver, unit, ctlr, slave, addr, pri, dk, flags*/\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 || - mp == TO_NEXUS || mp->d_type == MASTER || - eq(mp->d_name, "iocca")) - continue; - if (eq(mp->d_name, "iocc")) { - if (dp->d_pri == 0) { - printf("must specify vector for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr == 0) { - printf("must specify csr for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives/slaves can be specified only "); - printf("for controllers, not for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - slave = QUES; - } else { - if (mp->d_conn == 0) { - printf("%s%d isn't connected to anything, ", - mp->d_name, mp->d_unit); - printf("so %s%d is unattached\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive == UNKNOWN) { - printf("must specify ``drive number'' for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - /* NOTE THAT ON THE UNIBUS ``drive'' IS STORED IN */ - /* ``SLAVE'' AND WE DON'T WANT A SLAVE SPECIFIED */ - if (dp->d_slave != UNKNOWN) { - printf("slave numbers should be given only "); - printf("for massbus tapes, not for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_pri != 0) { - printf("interrupt priority should not be "); - printf("given for drive %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr != 0) { - printf("csr addresses should be given only"); - printf("on controllers, not on %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - slave = dp->d_drive; - } - fprintf(fp, -"\t{ &%sdriver, %2d, %s, %2d, C 0x%x, %d, %d, 0x%x },\n", - eq(mp->d_name, "iocc") ? dp->d_name : mp->d_name, dp->d_unit, - eq(mp->d_name, "iocc") ? " -1" : qu(mp->d_unit), - slave, dp->d_addr, dp->d_pri, dp->d_dk, dp->d_flags); - } - fprintf(fp, "\t0\n};\n"); - (void) fclose(fp); -} - -#endif /* MACHINE_ROMP */ - -#if MACHINE_MMAX -void -mmax_ioconf(void) -{ - register struct device *dp, *dp1, *mp; - FILE *fp; - int unit; - - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } - fprintf(fp, "#include \n\n"); - - /* - * Multimax code is a little messy because we have to - * scan the entire list for each device to generate the - * structures correctly. We cheat and use the d->d_pri - * field to avoid doing anything twice. -1000 is an obvious - * bogus value for this field. - */ - - for (dp1 = dtab; dp1 != 0; dp1 = dp1->d_next) { - /* - * If pri is not -1000, then haven't seen device yet. - */ - if (dp1->d_pri != -1000) switch (dp1->d_type) { - - case CONTROLLER: - fprintf(fp,"struct devaddr %s_devaddr[] = {\n", - dp1->d_name); - /* - * Now scan entire list and get all of them. Use - * unit to make sure unit numbers are right. - */ - unit = 0; - for (dp = dp1; dp != 0; dp = dp->d_next) { - if (!strcmp(dp->d_name, dp1->d_name)) { - mp = dp->d_conn; - if (mp != TO_SLOT) { - printf("%s%d: controller must be connected to slot.\n", - dp->d_name, dp->d_unit); - exit(1); - } - if (dp->d_vec != 0) { - printf("%s%d: cannot configure multimax interrupt vectors.\n", - dp->d_name, dp->d_unit); - } - if (dp->d_pri != 0) { - printf("%s%d: interrupt priority is nonsense on multimax.\n", - dp->d_name, dp->d_unit); - } - if ((dp->d_drive != UNKNOWN) || - (dp->d_slave !=UNKNOWN)) { - printf("%s%d: don't specify drive or slave for controller.\n", - dp->d_name, dp->d_unit); - } - /* - * Fix unit number if bogus - */ - if(dp->d_unit != unit) { - printf("Warning: %s%d configured as %s%d -- fix config file.\n", - dp->d_name,dp->d_unit,dp->d_name,unit); - dp->d_unit = unit; - } - unit++; - fprintf(fp,"\t{ %d, 0, 0},\n",dp->d_addr); - dp->d_pri = -1000; /* done this one */ - } - } - fprintf(fp,"} ;\n\n"); - break; - - case DEVICE: - fprintf(fp,"struct subdevaddr %s_subdevaddr[] = {\n", - dp1->d_name); - /* - * Now scan entire list and get all of them. Use - * unit to make sure unit numbers are right. - */ - unit = 0; - for (dp = dp1; dp != 0; dp = dp->d_next) { - if (!strcmp(dp->d_name, dp1->d_name)) { - mp = dp->d_conn; - if ( (mp == 0) || (mp == TO_SLOT) || - (mp->d_type != CONTROLLER)) { - printf("%s%d: device has no controller.\n", - dp->d_name, dp->d_unit); - exit(1); - } - if (dp->d_vec != 0) { - printf("%s%d: cannot configure multimax interrupt vectors.\n", - dp->d_name, dp->d_unit); - } - if (dp->d_pri != 0) { - printf("%s%d: interrupt priority is nonsense on multimax.\n", - dp->d_name, dp->d_unit); - } - if ((dp->d_drive != UNKNOWN) || - (dp->d_slave !=UNKNOWN)) { - printf("%s%d: use 'unit' instead of 'drive' or 'slave'.\n", - dp->d_name, dp->d_unit); - } - /* - * Fix unit number if bogus - */ - if(dp->d_unit != unit) { - printf("Warning: %s%d configured as %s%d -- fix config file.\n", - dp->d_name,dp->d_unit,dp->d_name,unit); - dp->d_unit = unit; - } - unit++; - if((dp->d_addr == 0) || (dp->d_addr == QUES)){ - printf("%s%d: must specify logical unit number.\n", - dp->d_name,dp->d_unit); - exit(1); - } - fprintf(fp,"\t{ %d, %d, 0},\n",mp->d_unit, - dp->d_addr); - dp->d_pri = -1000; /* don't do this again */ - } - } - fprintf(fp,"} ;\n\n"); - break; - - case PSEUDO_DEVICE: - /* - * Doesn't exist as far as ioconf.c is concerned. - */ - break; - - default: - printf("Bogus device type for %s\n", dp1->d_name); - exit(1); - break; - } - } - - (void) fclose(fp); -} - -#endif /* MACHINE_MMAX */ - -#if MACHINE_SQT - -/* - * Define prototype device spec lines. - * - * For now, have static set of controller prototypes. This should be - * upgraded to using (eg) controllers.balance (ala Sequent /etc/config) - * to support custom boards without need to edit this file. - */ - -/* - * flags for indicating presence of upper and lower bound values - */ - -#define P_LB 1 -#define P_UB 2 - -struct p_entry { - const char *p_name; /* name of field */ - long p_def; /* default value */ - long p_lb; /* lower bound for field */ - long p_ub; /* upper bound of field */ - char p_flags; /* bound valid flags */ -}; - -struct proto { - const char *p_name; /* name of controller type */ - struct p_entry p_fields[NFIELDS]; /* ordered list of fields */ - int p_seen; /* any seen? */ -}; - -/* - * MULTIBUS Adapter: - * type mbad index csr flags maps[0,256] bin[0,7] intr[0,7] - */ - -static struct proto mbad_proto = { - "mbad", - {{ "index", 0, 0, 0, 0 }, - { "csr", 0, 0, 0, 0 }, - { "flags", 0, 0, 0, 0 }, - { "maps", 0, 0, 256, P_LB|P_UB }, - { "bin", 0, 0, 7, P_LB|P_UB }, - { "intr", 0, 0, 7, P_LB|P_UB },}, - 0 -}; - -/* - * SCSI/Ether Controller: - * type sec flags bin[0,7] req doneq index target[0,7]=-1 unit - */ - -static struct proto sec_proto = { - "sec", - {{ "flags", 0, 0, 0, 0 }, - { "bin", 0, 0, 7, P_LB|P_UB } , - { "req", 0, 0, 0, 0 }, - { "doneq", 0, 0, 0, 0 }, - { "index", 0, 0, 0, 0 }, - { "target", -1, 0, 7, P_LB|P_UB }, - { "unit", 0, 0, 0, 0 },}, - 0 -}; - -/* - * "Zeke" (FAST) Disk Controller (Dual-Channel Disk Controller): - * type zdc index[0,31] drive[-1,7] drive_type[-1,1] - * - * Levgal values for drive_type: - * M2333K = 0 (swallow) - * M2351A = 1 (eagle) - * wildcard = -1 (run-time determined) - */ - -static struct proto zdc_proto = { - "zdc", - {{ "index", 0, 0, 31, P_LB|P_UB }, - { "drive", 0, -1, 7, P_LB|P_UB }, - { "drive_type", 0, -1, 1, P_LB|P_UB },}, - 0 -}; - -static struct proto *ptab[] = { - &mbad_proto, - &sec_proto, - &zdc_proto, - (struct proto *) 0 -}; - -/* - * locate a prototype structure in the queue of such structures. - * return NULL if not found. - */ - -static struct proto * -find_proto(const char *str) -{ - register struct proto *ptp; - register int ptbx; - - for (ptbx = 0; (ptp = ptab[ptbx]) != NULL; ptbx++) { - if (eq(str, ptp->p_name)) - return(ptp); - } - return(NULL); -} - -void -dev_param(struct device *dp, const char *str, long num) -{ - register struct p_entry *entry; - register struct proto *ptp; - - ptp = find_proto(dp->d_conn->d_name); - if (ptp == NULL) { - fprintf(stderr,"dev %s cont %s", dp->d_name, dp->d_conn->d_name); - yyerror("invalid controller"); - return; - } - - for (entry = ptp->p_fields; entry->p_name != NULL; entry++) { - if (eq(entry->p_name, str)) { - if ((entry->p_flags & P_LB) && (num < entry->p_lb)) { - yyerror("parameter below range"); - return; - } - if ((entry->p_flags & P_UB) && (num > entry->p_ub)) { - yyerror("parameter above range"); - return; - } - dp->d_fields[entry-ptp->p_fields] = num; - return; - } - } - - yyerror("invalid parameter"); -} - -void -sqt_ioconf(void) -{ - register struct device *dp, *mp; - register int count; - const char *namep; - register struct proto *ptp; - register struct p_entry *entry; - FILE *fp; - int bin_table[8]; - int ptbx; - int found; - - for (count = 0; count < 8; count++) - bin_table[count] = 0; - fp = fopen(path("ioconf.c"), "w"); - if (fp == NULL) { - perror(path("ioconf.c")); - exit(1); - } -/*MACH_KERNEL*/ - fprintf(fp, "#ifndef MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); -/*MACH_KERNEL*/ - fprintf(fp, "#endif MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "\n"); - fprintf(fp, "#include \n"); - - fprintf(fp, "\nu_long\tMBAd_IOwindow =\t\t3*256*1024;\t/* top 1/4 Meg */\n\n"); - - for (ptbx = 0; (ptp = ptab[ptbx]) != NULL; ptbx++) { - - fprintf(fp, "/*\n"); - fprintf(fp, " * %s device configuration.\n", ptp->p_name); - fprintf(fp, " */\n\n"); - fprintf(fp, "\n"); - fprintf(fp, "#include \n", ptp->p_name); - fprintf(fp, "\n"); - - /* - * Generate dev structures for this controller - */ - for (dp = dtab, namep = NULL; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (mp == 0 || mp == TO_NEXUS || - !eq(mp->d_name, ptp->p_name) || - (namep != NULL && eq(dp->d_name, namep)) ) - continue; - fprintf(fp, "extern\tstruct\t%s_driver\t%s_driver;\n", - ptp->p_name, namep = dp->d_name); - ptp->p_seen = 1; - } - - found = 0; - for (dp = dtab, namep = NULL; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (mp == 0 || mp == TO_NEXUS || - !eq(mp->d_name, ptp->p_name)) - continue; - if (namep == NULL || !eq(namep, dp->d_name)) { - count = 0; - if (namep != NULL) - fprintf(fp, "};\n"); - found = 1; - fprintf(fp, "\nstruct\t%s_dev %s_%s[] = {\n", - ptp->p_name, - ptp->p_name, - namep = dp->d_name); - fprintf(fp, "/*"); - entry = ptp->p_fields; - for (; entry->p_name != NULL; entry++) - fprintf(fp, "\t%s",entry->p_name); - fprintf(fp, " */\n"); - } - if (dp->d_bin != UNKNOWN) - bin_table[dp->d_bin]++; - fprintf(fp, "{"); - for (entry = ptp->p_fields; entry->p_name != NULL; entry++) { - if (eq(entry->p_name,"index")) - fprintf(fp, "\t%d,", mp->d_unit); - else - fprintf(fp, "\t%lu,", - dp->d_fields[entry-ptp->p_fields]); - } - fprintf(fp, "\t},\t/* %s%d */\n", dp->d_name, count++); - } - if (found) - fprintf(fp, "};\n\n"); - - /* - * Generate conf array - */ - fprintf(fp, "/*\n"); - fprintf(fp, " * %s_conf array collects all %s devices\n", - ptp->p_name, ptp->p_name); - fprintf(fp, " */\n\n"); - fprintf(fp, "struct\t%s_conf %s_conf[] = {\n", - ptp->p_name, ptp->p_name); - fprintf(fp, "/*\tDriver\t\t#Entries\tDevices\t\t*/\n"); - for (dp = dtab, namep = NULL; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (mp == 0 || mp == TO_NEXUS || - !eq(mp->d_name, ptp->p_name)) - continue; - if (namep == NULL || !eq(namep, dp->d_name)) { - if (namep != NULL) - fprintf(fp, - "{\t&%s_driver,\t%d,\t\t%s_%s,\t},\t/* %s */\n", - namep, count, ptp->p_name, namep, namep); - count = 0; - namep = dp->d_name; - } - ++count; - } - if (namep != NULL) { - fprintf(fp, - "{\t&%s_driver,\t%d,\t\t%s_%s,\t},\t/* %s */\n", - namep, count, ptp->p_name, namep, namep); - } - fprintf(fp, "\t{ 0 },\n"); - fprintf(fp, "};\n\n"); - - } - - /* - * Pseudo's - */ - - fprintf(fp, "/*\n"); - fprintf(fp, " * Pseudo-device configuration\n"); - fprintf(fp, " */\n\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type == PSEUDO_DEVICE) { - fprintf(fp, "extern\tint\t%sboot();\n", dp->d_name); - } - } - fprintf(fp, "\nstruct\tpseudo_dev pseudo_dev[] = {\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type == PSEUDO_DEVICE) { - fprintf(fp, "\t{ \"%s\",\t%d,\t%sboot,\t},\n", - dp->d_name, - dp->d_slave == UNKNOWN ? 32 : dp->d_slave, - dp->d_name); - } - } - fprintf(fp, "\t{ 0 },\n"); - fprintf(fp, "};\n\n"); - - /* - * Bin interrupt table and misc - */ - - fprintf(fp, "/*\n"); - fprintf(fp, " * Interrupt table\n"); - fprintf(fp, " */\n\n"); - fprintf(fp, "int\tbin_intr[8] = {\n"); - fprintf(fp, "\t\t0,\t\t\t\t/* bin 0, always zero */\n"); - for (count=1; count < 8; count++) { - fprintf(fp, "\t\t%d,\t\t\t\t/* bin %d */\n", - bin_table[count], count); - } - fprintf(fp, "};\n"); - - /* - * b8k_cntlrs[] - */ - - fprintf(fp, "/*\n"); - fprintf(fp, " * b8k_cntlrs array collects all controller entries\n"); - fprintf(fp, " */\n\n"); - for (ptbx = 0; (ptp = ptab[ptbx]) != NULL; ptbx++) { - if (ptp->p_seen) - fprintf(fp, "extern int conf_%s(),\tprobe_%s_devices(),\t%s_map();\n", - ptp->p_name, ptp->p_name, ptp->p_name); - } - fprintf(fp, "\n\nstruct\tcntlrs b8k_cntlrs[] = {\n"); - fprintf(fp, "/*\tconf\t\tprobe_devs\t\tmap\t*/\n"); - - for (ptbx = 0; (ptp = ptab[ptbx]) != NULL; ptbx++) { - if (ptp->p_seen) - fprintf(fp, "{\tconf_%s,\tprobe_%s_devices,\t%s_map\t}, \n", - ptp->p_name, ptp->p_name, ptp->p_name); - } - fprintf(fp, "{\t0,\t},\n"); - fprintf(fp, "};\n"); - - (void) fclose(fp); -} - -#endif /* MACHINE_SQT */ -#if MACHINE_I386 -void -i386_ioconf(void) -{ - FILE *fp; - - unlink(path("ioconf.c")); - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - fprintf(fp, "#define C (void *)\n"); - fprintf(fp, "\n"); - - i386_pseudo_inits (fp); - (void) fclose(fp); -} -#endif /* MACHINE_I386 */ - -#if MACHINE_MIPSY || MACHINE_MIPS - -void declare(const char *cp); -int is_declared(const char *cp); - -void -mips_ioconf(void) -{ - register struct device *dp, *mp, *np; - register int slave; - FILE *fp; - char buf1[64], buf2[64]; - - unlink(path("ioconf.c")); - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } -/*MACH_KERNEL*/ - fprintf(fp, "#ifndef MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); -/*MACH_KERNEL*/ - fprintf(fp, "#endif MACH_KERNEL\n"); -/*MACH_KERNEL*/ - fprintf(fp, "\n"); - if (seen_mbii && seen_vme) { - printf("can't have both vme and mbii devices\n"); - exit(1); - } - if (seen_mbii) - fprintf(fp, "#include \n"); - if (seen_vme) - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - fprintf(fp, "#define C (caddr_t)\n"); - fprintf(fp, "#define NULL 0\n\n"); - if (!seen_mbii) - goto checkvme; - /* - * MBII stuff should go here - */ - -checkvme: - if (!seen_vme) - goto closefile; - /* - * Now generate interrupt vectors for the vme bus - */ - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_vec != 0) { - struct idlst *ip; - mp = dp->d_conn; - if (mp == 0 || mp == TO_NEXUS || !eq(mp->d_name, "vme")) - continue; - if (is_declared(dp->d_name)) - continue; - declare(dp->d_name); - fprintf(fp, "extern struct vme_driver %sdriver;\n", - dp->d_name); - fprintf(fp, "extern "); - ip = dp->d_vec; - for (;;) { - fprintf(fp, "%s()", ip->id); - ip = ip->id_next; - if (ip == 0) - break; - fprintf(fp, ", "); - } - fprintf(fp, ";\n"); - fprintf(fp, "int (*_%sint%d[])() = { ", dp->d_name, - dp->d_unit); - ip = dp->d_vec; - for (;;) { - fprintf(fp, "%s", ip->id); - ip = ip->id_next; - if (ip == 0) - break; - fprintf(fp, ", "); - } - fprintf(fp, ", 0 } ;\n\n"); - } - } - fprintf(fp, "\nstruct vme_ctlr vmminit[] = {\n"); - fprintf(fp, -" /* driver ctlr alive intr addr am */\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_type != CONTROLLER || mp == TO_NEXUS || mp == 0 || - !eq(mp->d_name, "vme")) - continue; - if (dp->d_vec == 0) { - printf("must specify vector for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr == 0) { - printf("must specify csr address for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addrmod == 0) { - printf("must specify address modifier for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives need their own entries; dont "); - printf("specify drive or slave for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_flags) { - printf("controllers (e.g. %s%d) ", - dp->d_name, dp->d_unit); - printf("don't have flags, only devices do\n"); - continue; - } - fprintf(fp, -" { %14s, %3d, 0, %11s, C 0x%08x, 0x%02x },\n", - concat3(buf1, "&", dp->d_name, "driver"), - dp->d_unit, - concat3(buf2, "_", dp->d_name, "int"), - dp->d_addr, - dp->d_addrmod); - } - fprintf(fp, " { NULL }\n};\n"); - /* - * vme devices - */ - fprintf(fp, "\nstruct vme_device vmdinit[] = {\n"); - fprintf(fp, -"/* driver unit ctlr slave intr addr am dk flags */\n" - ); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 || - mp == TO_NEXUS || mp->d_type == MASTER) - continue; - for (np = mp; np && np != TO_NEXUS; np = np->d_conn) - if (eq(np->d_name, "vme")) - break; - if (np != 0 && np != TO_NEXUS && !eq(np->d_name, "vme")) - continue; - np = 0; - if (eq(mp->d_name, "vme")) { - if (dp->d_vec == 0) { - printf("must specify vector for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr == 0) { - printf("must specify csr for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addrmod == 0) { - printf( - "must specify address modifier for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives/slaves can be specified "); - printf("only for controllers, "); - printf("not for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - slave = QUES; - } else { - if ((np = mp->d_conn) == 0) { - printf("%s%d isn't connected to anything ", - mp->d_name, mp->d_unit); - printf(", so %s%d is unattached\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive == UNKNOWN) { - printf("must specify ``drive number'' "); - printf("for %s%d\n", dp->d_name, dp->d_unit); - continue; - } - if (dp->d_slave != UNKNOWN) { - printf("slave numbers should be given only "); - printf("for massbus tapes, not for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_vec != 0) { - printf("interrupt vectors should not be "); - printf("given for drive %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr != 0) { - printf("csr addresses should be given only "); - printf("on controllers, not on %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addrmod != 0) { - printf("address modifiers should be given only "); - printf("on controllers, not on %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - slave = dp->d_drive; - } - fprintf(fp, -"{%14s, %3d, %3s, %4d,%10s, C 0x%08x, 0x%02x, %1d, 0x%08x },\n", - concat3(buf1, "&", - eq(mp->d_name, "vme") ? dp->d_name : mp->d_name, - "driver"), - dp->d_unit, - eq(mp->d_name, "vme") ? "-1" : qu(mp->d_unit), - slave, - intv2(dp), - dp->d_addr, - dp->d_addrmod, - dp->d_dk, - dp->d_flags); - } - fprintf(fp, "{ NULL }\n};\n"); -closefile: - (void) fclose(fp); -} - -char * -intv2(struct device *dev) -{ - static char buf[20]; - - if (dev->d_vec == 0) { - strcpy(buf, "NULL"); - } else { - (void) sprintf(buf, "_%sint", dev->d_name); - } - return (buf); -} - -char * -concat3(char *buf, const char *p1, const char *p2, const char *p3) -{ - (void) sprintf(buf, "%s%s%s", p1, p2, p3); - return (buf); -} - -#define MAXDEVS 100 -#define DEVLEN 10 -char decl_devices[MAXDEVS][DEVLEN]; - -void -declare(const char *cp) -{ - register int i; - - for (i = 0; i < MAXDEVS; i++) - if (decl_devices[i][0] == 0) { - strncpy(decl_devices[i], cp, DEVLEN); - return; - } - printf("device table full, fix mkioconf.c\n"); - exit(1); -} - -int -is_declared(const char *cp) -{ - register int i; - - for (i = 0; i < MAXDEVS; i++) { - if (decl_devices[i][0] == 0) - return(0); - if (strncmp(decl_devices[i], cp, DEVLEN) == 0) - return(1); - } - return(0); -} -#endif /* MACHINE_MIPSY || MACHINE_MIPS */ - -#if MACHINE_M68K -char *m68k_dn(const char *name); -void m68k_pseudo_inits(FILE *fp); - -void -m68k_ioconf(void) -{ - register struct device *dp, *mp; - register int slave; - FILE *fp; - - unlink(path("ioconf.c")); - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - fprintf(fp, "#define C (void *)\n"); - fprintf(fp, "\n"); - - /* - * Now generate interrupt vectors for the bus - */ - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (mp == TO_NEXUS || mp == 0 || mp->d_conn != TO_NEXUS) - continue; - fprintf(fp, "extern struct bus_driver %sdriver;\n", - dp->d_name); - } - - /* - * Now spew forth the bus_ctrl structures - */ - fprintf(fp, "\nstruct bus_ctrl bus_cinit[] = {\n"); - fprintf(fp, -" /* driver ctrl ipl address */\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_type != CONTROLLER || mp == TO_NEXUS || mp == 0 || - mp->d_conn != TO_NEXUS || dp->d_unit == QUES) - continue; - if (dp->d_addr == UNKNOWN) { - printf("must specify csr address for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives need their own entries; "); - printf("don't specify drive or slave for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_flags) { - printf("controllers (e.g. %s%d) don't have flags, ", - dp->d_name, dp->d_unit); - printf("only devices do\n"); - continue; - } - fprintf(fp, -" { %-12s, %5d, %4d, C 0x%08x },\n", - m68k_dn(dp->d_name), dp->d_unit, dp->d_pri, dp->d_addr); - } - fprintf(fp, " 0\n};\n"); - - /* - * Now we go for the bus_device stuff - */ - fprintf(fp, "\nstruct bus_device bus_dinit[] = {\n"); - fprintf(fp, -" /* driver unit ctrl slave ipl dk flags address name */\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - mp = dp->d_conn; - if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 || - mp == TO_NEXUS || mp->d_type == MASTER) - continue; - if (mp->d_conn == TO_NEXUS) { - if (dp->d_addr == UNKNOWN) { - printf("must specify csr for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) { - printf("drives/slaves can be specified only "); - printf("for controllers, not for device %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - slave = UNKNOWN; - } else { - if (mp->d_conn == 0) { - printf("%s%d isn't connected to anything, ", - mp->d_name, mp->d_unit); - printf("so %s%d is unattached\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_drive == UNKNOWN) { - printf("must specify ``drive number'' for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - /* NOTE THAT ON THE UNIBUS ``drive'' IS STORED IN */ - /* ``SLAVE'' AND WE DON'T WANT A SLAVE SPECIFIED */ - if (dp->d_slave != UNKNOWN) { - printf("slave numbers should be given only "); - printf("for massbus tapes, not for %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_pri != 0) { - printf("interrupt priority should not be "); - printf("given for drive %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - if (dp->d_addr != 0) { - printf("csr addresses should be given only"); - printf(" on controllers, not on %s%d\n", - dp->d_name, dp->d_unit); - continue; - } - slave = dp->d_drive; - } - fprintf(fp, -" { %-12s, %3d, %s, %s,%3d,%3d, %#10x, C 0x%08x, \"%s\" },\n", - m68k_dn(mp->d_conn == TO_NEXUS? dp->d_name : mp->d_name), - dp->d_unit, - mp->d_conn == TO_NEXUS? " -1" : qu(mp->d_unit), - qu(slave), - dp->d_pri, -dp->d_dk, dp->d_flags, - dp->d_addr == UNKNOWN? 0 : dp->d_addr, - dp->d_name); - } - fprintf(fp, " 0\n};\n"); - m68k_pseudo_inits (fp); - (void) fclose(fp); -} - -void -m68k_pseudo_inits(FILE *fp) -{ - register struct device *dp; - int count; - - fprintf(fp, "\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - fprintf(fp, "extern int %s(int);\n", dp->d_init); - } - fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - count = dp->d_slave; - if (count <= 0) - count = 1; - fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init); - } - fprintf(fp, "\t{0,\t0},\n};\n"); -} - -void -i386_pseudo_inits(FILE *fp) -{ - register struct device *dp; - int count; - - fprintf(fp, "\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - fprintf(fp, "extern int %s(int);\n", dp->d_init); - } - fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - count = dp->d_slave; - if (count <= 0) - count = 1; - fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init); - } - fprintf(fp, "\t{0,\t0},\n};\n"); -} - -char * -m68k_dn(const char *name) -{ - sprintf(errbuf, "&%sdriver", name); return ns(errbuf); -} -#endif /* MACHINE_M68K */ - -#if MACHINE_M88K || MACHINE_M98K -char *nrw_dn(char *name); -void nrw_pseudo_inits(FILE *fp); - -void -nrw_ioconf(void) -{ - FILE *fp; - - unlink(path("ioconf.c")); - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - nrw_pseudo_inits (fp); - (void) fclose(fp); -} - -void -nrw_pseudo_inits(FILE *fp) -{ - register struct device *dp; - int count; - - fprintf(fp, "\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - fprintf(fp, "extern int %s(int);\n", dp->d_init); - } - fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - count = dp->d_slave; - if (count <= 0) - count = 1; - fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init); - } - fprintf(fp, "\t{0,\t0},\n};\n"); -} - -char * -nrw_dn(char *name) -{ - sprintf(errbuf, "&%sdriver,", name); - return(errbuf); -} - -void -m88k_ioconf(void) -{ - nrw_ioconf(); -} - -void -m98k_ioconf(void) -{ - nrw_ioconf(); -} - -void -m88k_pseudo_inits(FILE *fp) -{ - nrw_pseudo_inits(fp); -} - -void -m98k_pseudo_inits(FILE *fp) -{ - nrw_pseudo_inits(fp); -} - -char * -m88k_dn(char *name) -{ - return(nrw_dn(name)); -} - -char * -m98k_dn(char *name) -{ - return(nrw_dn(name)); -} - - -#endif /* MACHINE_M88K || MACHINE_M98K */ - -#ifdef MACHINE_HPPA -char *hppa_dn(char *name); -void hppa_pseudo_inits(FILE *fp); - -void -hppa_ioconf(void) -{ - FILE *fp; - - unlink(path("ioconf.c")); - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - hppa_pseudo_inits (fp); - (void) fclose(fp); -} - -void -hppa_pseudo_inits(FILE *fp) -{ - register struct device *dp; - int count; - - fprintf(fp, "\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - fprintf(fp, "extern int %s(int);\n", dp->d_init); - } - fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - count = dp->d_slave; - if (count <= 0) - count = 1; - fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init); - } - fprintf(fp, "\t{0,\t0},\n};\n"); -} - -char * -hppa_dn(char *name) -{ - sprintf(errbuf, "&%sdriver,", name); - - return (errbuf); -} - -#endif /* MACHINE_HPPA */ - -#ifdef MACHINE_SPARC -char *sparc_dn(char *name); -void sparc_pseudo_inits(FILE *fp); - -void -sparc_ioconf(void) -{ - FILE *fp; - - unlink(path("ioconf.c")); - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - sparc_pseudo_inits (fp); - (void) fclose(fp); -} - -void -sparc_pseudo_inits(FILE *fp) -{ - register struct device *dp; - int count; - - fprintf(fp, "\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - fprintf(fp, "extern int %s(int);\n", dp->d_init); - } - fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - count = dp->d_slave; - if (count <= 0) - count = 1; - fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init); - } - fprintf(fp, "\t{0,\t0},\n};\n"); -} - -char * -sparc_dn(char *name) -{ - sprintf(errbuf, "&%sdriver,", name); - return (errbuf); -} - -#endif /* MACHINE_SPARC */ - -#ifdef MACHINE_PPC -char *ppc_dn(char *name); -void ppc_pseudo_inits(FILE *fp); - -void -ppc_ioconf(void) -{ - FILE *fp; - - unlink(path("ioconf.c")); - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - ppc_pseudo_inits (fp); - (void) fclose(fp); -} - -void -ppc_pseudo_inits(FILE *fp) -{ - register struct device *dp; - int count; - - fprintf(fp, "\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - fprintf(fp, "extern int %s(int);\n", dp->d_init); - } - fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - count = dp->d_slave; - if (count <= 0) - count = 1; - fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init); - } - fprintf(fp, "\t{0,\t0},\n};\n"); -} - -char * -ppc_dn(name) - char *name; -{ - sprintf(errbuf, "&%sdriver,", name); - return (errbuf); -} - -#endif /* MACHINE_PPC */ - -#ifdef MACHINE_ARM -void arm_pseudo_inits(FILE *fp); - -void -arm_ioconf(void) -{ - FILE *fp; - - unlink(path("ioconf.c")); - fp = fopen(path("ioconf.c"), "w"); - if (fp == 0) { - perror(path("ioconf.c")); - exit(1); - } - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - arm_pseudo_inits (fp); - (void) fclose(fp); -} - -void -arm_pseudo_inits(FILE *fp) -{ - register struct device *dp; - int count; - - fprintf(fp, "\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - fprintf(fp, "extern int %s(int);\n", dp->d_init); - } - fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n"); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0) - continue; - count = dp->d_slave; - if (count <= 0) - count = 1; - fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init); - } - fprintf(fp, "\t{0,\t0},\n};\n"); -} - -#endif /* MACHINE_ARM */ - -#ifdef MACHINE_X86_64 -void x86_64_pseudo_inits(FILE *fp); +void pseudo_inits(FILE *fp); void -x86_64_ioconf(void) +mkioconf(void) { FILE *fp; @@ -2028,12 +71,12 @@ x86_64_ioconf(void) } fprintf(fp, "#include \n"); fprintf(fp, "\n"); - x86_64_pseudo_inits (fp); + pseudo_inits (fp); (void) fclose(fp); } void -x86_64_pseudo_inits(FILE *fp) +pseudo_inits(FILE *fp) { register struct device *dp; int count; @@ -2055,32 +98,3 @@ x86_64_pseudo_inits(FILE *fp) } fprintf(fp, "\t{0,\t0},\n};\n"); } - -#endif /* MACHINE_X86_64 */ - -char * -intv(struct device *dev) -{ - static char buf[20]; - - if (dev->d_vec == 0) { - strcpy(buf, " 0"); - } else { - (void) sprintf(buf, "%sint%d", dev->d_name, dev->d_unit); - } - return ns(buf); -} - -char * -qu(int num) -{ - - if (num == QUES) { - strcpy(errbuf, "'?'"); - } else if (num == UNKNOWN) { - strcpy(errbuf, " -1"); - } else { - (void) sprintf(errbuf, "%3d", num); - } - return ns(errbuf); -} diff --git a/SETUP/config/mkmakefile.c b/SETUP/config/mkmakefile.c index f52ff8e18..cbb7d2bd8 100644 --- a/SETUP/config/mkmakefile.c +++ b/SETUP/config/mkmakefile.c @@ -66,19 +66,11 @@ static char sccsid[] __attribute__((used)) = "@(#)mkmakefile.c 5.21 (Berkeley) 6 void read_files(void); void do_objs(FILE *fp, const char *msg, int ext); -void do_ordered(FILE *fp); void do_files(FILE *fp, const char *msg, char ext); void do_machdep(FILE *ofp); -void do_build(const char *name, void (*format)(FILE *)); void do_rules(FILE *f); -void do_load(FILE *f); -struct file_list *do_systemspec(FILE *f, struct file_list *fl, int first); -void do_swapspec(FILE *f, const char *name, char *sysname); void copy_dependencies(FILE *makin, FILE *makout); -void build_cputypes(FILE *fp); -void build_confdep(FILE *fp); - struct file_list *fl_lookup(char *file); struct file_list *fltail_lookup(char *file); struct file_list *new_fent(void); @@ -86,8 +78,6 @@ struct file_list *new_fent(void); void put_source_file_name(FILE *fp, struct file_list *tp); -#define DO_SWAPFILE 0 - #define next_word(fp, wd) \ { register const char *word = get_word(fp); \ if (word == (char *)EOF) \ @@ -153,34 +143,6 @@ new_fent(void) } char *COPTS; -static struct users { - int u_default; - int u_min; - int u_max; -} users[] = { - { 24, 2, 1024 }, /* MACHINE_VAX */ - { 8, 2, 32 }, /* MACHINE_SUN */ - { 16, 4, 32 }, /* MACHINE_ROMP */ - { 8, 2, 32 }, /* MACHINE_SUN2 */ - { 8, 2, 32 }, /* MACHINE_SUN3 */ - { 24, 8, 1024}, /* MACHINE_MMAX */ - { 32, 8, 1024}, /* MACHINE_SQT */ - { 8, 2, 32 }, /* MACHINE_SUN4 */ - { 2, 2, 1024 }, /* MACHINE_I386 */ - { 32, 8, 1024 }, /* MACHINE_IX */ - { 32, 8, 1024 }, /* MACHINE_MIPSY */ - { 32, 8, 1024 }, /* MACHINE_MIPS*/ - { 32, 8, 1024 }, /* MACHINE_I860*/ - { 8, 2, 32 }, /* MACHINE_M68K */ - { 8, 2, 32 }, /* MACHINE_M88K */ - { 8, 2, 32 }, /* MACHINE_M98K */ - { 8, 2, 32 }, /* MACHINE_HPPA */ - { 8, 2, 32 }, /* MACHINE_SPARC */ - { 8, 2, 32 }, /* MACHINE_PPC */ - { 8, 2, 32 }, /* MACHINE_ARM */ - { 8, 2, 32 }, /* MACHINE_X86_64 */ -}; -#define NUSERS (sizeof (users) / sizeof (users[0])) const char * get_VPATH(void) @@ -210,7 +172,6 @@ makefile(void) char pname[BUFSIZ]; char line[BUFSIZ]; struct opt *op; - struct users *up; read_files(); (void) sprintf(line, "%s/Makefile.template", config_directory); @@ -222,13 +183,6 @@ makefile(void) dfp = fopen(path("Makefile"), "r"); rename(path("Makefile"), path("Makefile.old")); unlink(path("Makefile.old")); - unlink(path("M.d")); - if ((ofp = fopen(path("M.d"), "w")) == NULL) { - perror(path("M.d")); - /* We'll let this error go */ - } - else - fclose(ofp); ofp = fopen(path("Makefile"), "w"); if (ofp == 0) { perror(path("Makefile")); @@ -236,19 +190,9 @@ makefile(void) } fprintf(ofp, "SOURCE_DIR=%s\n", source_directory); - if (machine == MACHINE_SUN || machine == MACHINE_SUN2 - || machine == MACHINE_SUN3 || machine == MACHINE_SUN4) - fprintf(ofp, "export IDENT=-D%s -D%s", machinename, allCaps(ident)); - else - fprintf(ofp, "export IDENT=-D%s", allCaps(ident)); + fprintf(ofp, "export CONFIG_DEFINES ="); if (profiling) fprintf(ofp, " -DGPROF"); - if (cputype == 0) { - printf("cpu type must be specified\n"); - exit(1); - } - do_build("cputypes.h", build_cputypes); - do_build("platforms.h", build_cputypes); for (op = opt; op; op = op->op_next) if (op->op_value) @@ -256,18 +200,6 @@ makefile(void) else fprintf(ofp, " -D%s", op->op_name); fprintf(ofp, "\n"); - if ((unsigned)machine > NUSERS) { - printf("maxusers config info isn't present, using vax\n"); - up = &users[MACHINE_VAX-1]; - } else - up = &users[machine-1]; - if (maxusers < up->u_min) { - maxusers = up->u_min; - } else if (maxusers > up->u_max) - printf("warning: maxusers > %d (%d)\n", up->u_max, maxusers); - if (maxusers) { - do_build("confdep.h", build_confdep); - } for (op = mkopt; op; op = op->op_next) if (op->op_value) fprintf(ofp, "%s=%s\n", op->op_name, op->op_value); @@ -279,8 +211,7 @@ makefile(void) goto percent; if (profiling && strncmp(line, "COPTS=", 6) == 0) { register char *cp; - if (machine != MACHINE_MMAX) - fprintf(ofp, + fprintf(ofp, "GPROF.EX=$(SOURCE_DIR)/machdep/%s/gmon.ex\n", machinename); cp = index(line, '\n'); if (cp) @@ -294,13 +225,7 @@ makefile(void) exit(1); } strcpy(COPTS, cp); - if (machine == MACHINE_MIPSY || machine == MACHINE_MIPS) { - fprintf(ofp, "%s ${CCPROFOPT}\n", line); - fprintf(ofp, "PCOPTS=%s\n", cp); - } else if (machine == MACHINE_MMAX) - fprintf(ofp, "%s -p\n",line); - else - fprintf(ofp, "%s -pg\n", line); + fprintf(ofp, "%s -pg\n", line); continue; } fprintf(ofp, "%s", line); @@ -311,20 +236,16 @@ makefile(void) } else if (eq(line, "%CFILES\n")) { do_files(ofp, "CFILES=", 'c'); do_objs(ofp, "COBJS=", 'c'); + } else if (eq(line, "%CXXFILES\n")) { + do_files(ofp, "CXXFILES=", 'p'); + do_objs(ofp, "CXXOBJS=", 'p'); } else if (eq(line, "%SFILES\n")) { do_files(ofp, "SFILES=", 's'); do_objs(ofp, "SOBJS=", 's'); } else if (eq(line, "%MACHDEP\n")) { - /* - * Move do_machdep() after the mkopt stuff. - */ - for (op = mkopt; op; op = op->op_next) - fprintf(ofp, "%s=%s\n", op->op_name, op->op_value); do_machdep(ofp); } else if (eq(line, "%RULES\n")) do_rules(ofp); - else if (eq(line, "%LOAD\n")) - do_load(ofp); else fprintf(stderr, "Unknown %% construct in generic makefile: %s", @@ -355,12 +276,9 @@ read_files(void) const char *devorprof; int options; int not_option; - int ordered; - int sedit; /* SQT */ char pname[BUFSIZ]; char fname[1024]; char *rest = (char *) 0; - struct cputype *cp; int nreqs, first = 1, isdup; ftab = 0; @@ -378,13 +296,6 @@ next: * filename [ standard | optional ] * [ dev* | profiling-routine ] [ device-driver] */ - /* - * MACHINE_SQT ONLY: - * - * filename [ standard | optional ] - * [ ordered | sedit ] - * [ dev* | profiling-routine ] [ device-driver] - */ wd = get_word(fp); if (wd == (char *)EOF) { (void) fclose(fp); @@ -393,13 +304,6 @@ next: first++; goto openit; } - if (first == 2) { - (void) sprintf(fname, "files.%s", allCaps(ident)); - first++; - fp = fopenp(VPATH, fname, pname, "r"); - if (fp != 0) - goto next; - } return; } if (wd == 0) @@ -426,13 +330,8 @@ next: else isdup = 0; tp = 0; - if (first == 3 && (tp = fltail_lookup(this)) != 0) - printf("%s: Local file %s overrides %s.\n", - fname, this, tp->f_fn); nreqs = 0; devorprof = ""; - ordered = 0; - sedit = 1; /* SQT: assume sedit for now */ needs = 0; if (eq(wd, "standard")) goto checkdev; @@ -447,14 +346,6 @@ nextopt: next_word(fp, wd); if (wd == 0) goto doneopt; - if (eq(wd, "ordered")) { - ordered++; - goto nextopt; - } - if (machine == MACHINE_SQT && eq(wd, "sedit")) { - sedit++; - goto nextopt; - } if (eq(wd, "not")) { not_option = !not_option; goto nextopt; @@ -533,15 +424,6 @@ nextopt: goto nextopt; } - for (cp = cputype; cp; cp = cp->cpu_next) - if (opteq(cp->cpu_name, wd)) { - if (nreqs == 1) { - free(needs); - needs = 0; - } - goto nextopt; - } - invis: while ((wd = get_word(fp)) != 0) ; @@ -566,14 +448,6 @@ checkdev: goto getrest; next_word(fp, wd); if (wd) { - if (eq(wd, "ordered")) { - ordered++; - goto checkdev; - } - if (machine == MACHINE_SQT && eq(wd, "sedit")) { - sedit++; - goto checkdev; - } devorprof = wd; next_word(fp, wd); } @@ -606,10 +480,6 @@ getrest: else tp->f_type = NORMAL; tp->f_flags = 0; - if (ordered) - tp->f_flags |= ORDERED; - if (sedit) /* SQT */ - tp->f_flags |= SEDIT; tp->f_needs = needs; if (pf && pf->f_type == INVISIBLE) pf->f_flags = 1; /* mark as duplicate */ @@ -650,10 +520,6 @@ do_objs(FILE *fp, const char *msg, int ext) char *cp; char och; const char *sp; -#if DO_SWAPFILE - register struct file_list *fl; - char swapname[32]; -#endif /* DO_SWAPFILE */ fprintf(fp, "%s", msg); lpos = strlen(msg); @@ -665,8 +531,7 @@ do_objs(FILE *fp, const char *msg, int ext) * Check for '.o' file in list */ cp = tp->f_fn + (len = strlen(tp->f_fn)) - 1; - if ((ext == -1 && tp->f_flags & ORDERED) || /* not in objs */ - (ext != -1 && *cp != ext)) + if (ext != -1 && *cp != ext) continue; else if (*cp == 'o') { if (len + lpos > 72) { @@ -679,15 +544,6 @@ do_objs(FILE *fp, const char *msg, int ext) continue; } sp = tail(tp->f_fn); -#if DO_SWAPFILE - for (fl = conf_list; fl; fl = fl->f_next) { - if (fl->f_type != SWAPSPEC) - continue; - (void) sprintf(swapname, "swap%s.c", fl->f_fn); - if (eq(sp, swapname)) - goto cont; - } -#endif /* DO_SWAPFILE */ cp = (char *)sp + (len = strlen(sp)) - 1; och = *cp; *cp = 'o'; @@ -698,44 +554,8 @@ do_objs(FILE *fp, const char *msg, int ext) fprintf(fp, "%s ", sp); lpos += len + 1; *cp = och; -#if DO_SWAPFILE -cont: - ; -#endif /* DO_SWAPFILE */ } - if (lpos != 8) - putc('\n', fp); -} - -/* not presently used and probably broken, use ORDERED instead */ -void -do_ordered(FILE *fp) -{ - register struct file_list *tp; - register int lpos, len; - char *cp; - char och; - const char *sp; - - fprintf(fp, "ORDERED="); - lpos = 10; - for (tp = ftab; tp != 0; tp = tp->f_next) { - if ((tp->f_flags & ORDERED) != ORDERED) - continue; - sp = tail(tp->f_fn); - cp = (char *)sp + (len = strlen(sp)) - 1; - och = *cp; - *cp = 'o'; - if (len + lpos > 72) { - lpos = 8; - fprintf(fp, "\\\n\t"); - } - fprintf(fp, "%s ", sp); - lpos += len + 1; - *cp = och; - } - if (lpos != 8) - putc('\n', fp); + putc('\n', fp); } void @@ -761,8 +581,7 @@ do_files(FILE *fp, const char *msg, char ext) put_source_file_name(fp, tp); lpos += len + 1; } - if (lpos != 8) - putc('\n', fp); + putc('\n', fp); } /* @@ -791,82 +610,6 @@ do_machdep(FILE *ofp) fclose(ifp); } - -/* - * Format configuration dependent parameter file. - */ - -void -build_confdep(FILE *fp) -{ - fprintf(fp, "#define MAXUSERS %d\n", maxusers); -} - -/* - * Format cpu types file. - */ - -void -build_cputypes(FILE *fp) -{ - struct cputype *cp; - - for (cp = cputype; cp; cp = cp->cpu_next) - fprintf(fp, "#define\t%s\t1\n", cp->cpu_name); -} - - - -/* - * Build a define parameter file. Create it first in a temporary location and - * determine if this new contents differs from the old before actually - * replacing the original (so as not to introduce avoidable extraneous - * compilations). - */ - -void -do_build(const char *name, void (*format)(FILE *)) -{ - static char temp[]="#config.tmp"; - FILE *tfp, *ofp; - int c; - - unlink(path(temp)); - tfp = fopen(path(temp), "w+"); - if (tfp == 0) { - perror(path(temp)); - exit(1); - } - unlink(path(temp)); - (*format)(tfp); - ofp = fopen(path(name), "r"); - if (ofp != 0) - { - fseek(tfp, 0, 0); - while ((c = fgetc(tfp)) != EOF) - if (fgetc(ofp) != c) - goto copy; - if (fgetc(ofp) == EOF) - goto same; - - } -copy: - if (ofp) - fclose(ofp); - unlink(path(name)); - ofp = fopen(path(name), "w"); - if (ofp == 0) { - perror(path(name)); - exit(1); - } - fseek(tfp, 0, 0); - while ((c = fgetc(tfp)) != EOF) - fputc(c, ofp); -same: - fclose(ofp); - fclose(tfp); -} - const char * tail(const char *fn) { @@ -920,46 +663,22 @@ do_rules(FILE *f) fprintf(f, "-include %sd\n", tp); fprintf(f, "%so: %s%s%c\n", tp, source_dir, np, och); if (och == 's') { - switch (machine) { - case MACHINE_MIPSY: - case MACHINE_MIPS: - break; - default: - fprintf(f, "\t${S_RULE_0}\n"); - fprintf(f, "\t${S_RULE_1A}%s%.*s${S_RULE_1B}%s\n", - source_dir, (int)(tp-np), np, nl); - fprintf(f, "\t${S_RULE_2}%s\n", nl); - break; - } + fprintf(f, "\t${S_RULE_0}\n"); + fprintf(f, "\t${S_RULE_1A}%s%.*s${S_RULE_1B}%s\n", + source_dir, (int)(tp-np), np, nl); + fprintf(f, "\t${S_RULE_2}%s\n", nl); continue; } extras = ""; switch (ftp->f_type) { case NORMAL: - switch (machine) { - - case MACHINE_MIPSY: - case MACHINE_MIPS: - break; - default: - goto common; - } + goto common; break; case DRIVER: - switch (machine) { - - case MACHINE_MIPSY: - case MACHINE_MIPS: - fprintf(f, "\t@${RM} %so\n", tp); - fprintf(f, "\t${CC} ${CCDFLAGS}%s %s%s%sc\n\n", - (ftp->f_extra?ftp->f_extra:""), extras, source_dir, np); - continue; - default: - extras = "_D"; - goto common; - } + extras = "_D"; + goto common; break; case PROFILING: @@ -970,33 +689,8 @@ do_rules(FILE *f) "config: COPTS undefined in generic makefile"); COPTS = ""; } - switch (machine) { - case MACHINE_MIPSY: - case MACHINE_MIPS: - fprintf(f, "\t@${RM} %so\n", tp); - fprintf(f, "\t${CC} ${CCPFLAGS}%s %s../%sc\n\n", - (ftp->f_extra?ftp->f_extra:""), extras, np); - continue; - case MACHINE_VAX: - case MACHINE_ROMP: - case MACHINE_SQT: - case MACHINE_MMAX: - case MACHINE_SUN3: - case MACHINE_SUN4: - case MACHINE_I386: - case MACHINE_I860: - case MACHINE_HPPA: - case MACHINE_SPARC: - case MACHINE_PPC: - case MACHINE_ARM: - case MACHINE_X86_64: - extras = "_P"; - goto common; - default: - fprintf(stderr, - "config: don't know how to profile kernel on this cpu\n"); - break; - } + extras = "_P"; + goto common; common: och_upper = och + 'A' - 'a'; @@ -1029,81 +723,6 @@ do_rules(FILE *f) } } -/* - * Create the load strings - */ -void -do_load(FILE *f) -{ - register struct file_list *fl; - int first = 1; - - fl = conf_list; - while (fl) { - if (fl->f_type != SYSTEMSPEC) { - fl = fl->f_next; - continue; - } - fl = do_systemspec(f, fl, first); - if (first) - first = 0; - } - fprintf(f, "LOAD ="); - for (fl = conf_list; fl != 0; fl = fl->f_next) - if (fl->f_type == SYSTEMSPEC) - fprintf(f, " %s", fl->f_needs); -#ifdef multimax - fprintf(f, "\n\nall .ORDER: includelinks ${LOAD}\n"); -#else /* multimax */ - fprintf(f, "\n\nall: includelinks ${LOAD}\n"); -#endif /* multimax */ - fprintf(f, "\n"); -} - -struct file_list * -do_systemspec(FILE *f, struct file_list *fl, __unused int first) -{ - /* - * Variable for kernel name. - */ - fprintf(f, "KERNEL_NAME=%s\n", fl->f_needs); - - fprintf(f, "%s .ORDER: %s.sys ${SYSDEPS}\n", - fl->f_needs, fl->f_needs); - fprintf(f, "\t${SYS_RULE_1}\n"); - fprintf(f, "\t${SYS_RULE_2}\n"); - fprintf(f, "\t${SYS_RULE_3}\n"); - fprintf(f, "\t${SYS_RULE_4}\n\n"); - do_swapspec(f, fl->f_fn, fl->f_needs); - for (fl = fl->f_next; fl != NULL && fl->f_type == SWAPSPEC; fl = fl->f_next) - continue; - return (fl); -} - -void -do_swapspec(__unused FILE *f, __unused const char *name, __unused char *sysname) -{ - -#if DO_SWAPFILE - char *gdir = eq(name, "generic")?"$(MACHINEDIR)/":""; - - fprintf(f, "%s.sys:${P} ${PRELDDEPS} ${LDOBJS} ${LDDEPS}\n\n", sysname); - fprintf(f, "%s.swap: swap%s.o\n", sysname, name); - fprintf(f, "\t@rm -f $@\n"); - fprintf(f, "\t@cp swap%s.o $@\n\n", name); - fprintf(f, "swap%s.o: %sswap%s.c ${SWAPDEPS}\n", name, gdir, name); - if (machine == MACHINE_MIPSY || machine == MACHINE_MIPS) { - fprintf(f, "\t@${RM} swap%s.o\n", name); - fprintf(f, "\t${CC} ${CCNFLAGS} %sswap%s.c\n\n", gdir, name); - } else { - fprintf(f, "\t${C_RULE_1A}%s${C_RULE_1B}\n", gdir); - fprintf(f, "\t${C_RULE_2}\n"); - fprintf(f, "\t${C_RULE_3}\n"); - fprintf(f, "\t${C_RULE_4}\n\n"); - } -#endif /* DO_SWAPFILE */ -} - char * allCaps(str) register char *str; diff --git a/SETUP/config/mkswapconf.c b/SETUP/config/mkswapconf.c deleted file mode 100644 index fdd14d722..000000000 --- a/SETUP/config/mkswapconf.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights - * Reserved. This file contains Original Code and/or Modifications of - * Original Code as defined in and that are subject to the Apple Public - * Source License Version 1.0 (the 'License'). You may not use this file - * except in compliance with the License. Please obtain a copy of the - * License at http://www.apple.com/publicsource and read it before using - * this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License." - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Mach Operating System - * Copyright (c) 1990 Carnegie-Mellon University - * Copyright (c) 1989 Carnegie-Mellon University - * All rights reserved. The CMU software License Agreement specifies - * the terms and conditions for use and redistribution. - */ - -/* - * Copyright (c) 1980 Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. - */ - -#ifndef lint -static char sccsid[] __attribute__((used)) = "@(#)mkswapconf.c 5.6 (Berkeley) 6/18/88"; -#endif /* not lint */ - -/* - * Build a swap configuration file. - */ -#include "config.h" - -#include -#include /* for unlink */ -#include - -struct file_list *do_swap(struct file_list *fl); -void initdevtable(void); - -void -swapconf(void) -{ - register struct file_list *fl; - - fl = conf_list; - while (fl) { - if (fl->f_type != SYSTEMSPEC) { - fl = fl->f_next; - continue; - } - fl = do_swap(fl); - } -} - -struct file_list * -do_swap(struct file_list *fl) -{ - FILE *fp; - char swapname[80]; - register struct file_list *swap; - dev_t dev; - - if (eq(fl->f_fn, "generic")) { - fl = fl->f_next; - return (fl->f_next); - } - if (machine == MACHINE_MMAX) { - printf("Error: Multimax must specify swap generic only.\n"); - exit(1); - } - (void) sprintf(swapname, "swap%s.c", fl->f_fn); - fp = fopen(path(swapname), "w"); - if (fp == 0) { - perror(path(swapname)); - exit(1); - } - fprintf(fp, "#include \n"); - fprintf(fp, "#include \n"); - fprintf(fp, "\n"); - /* - * If there aren't any swap devices - * specified, just return, the error - * has already been noted. - */ - swap = fl->f_next; - if (swap == 0 || swap->f_type != SWAPSPEC) { - (void) unlink(path(swapname)); - fclose(fp); - return (swap); - } - fprintf(fp, "dev_t\trootdev = makedev(%d, %d);\n", - major(fl->f_rootdev), minor(fl->f_rootdev)); - fprintf(fp, "dev_t\targdev = makedev(%d, %d);\n", - major(fl->f_argdev), minor(fl->f_argdev)); - fprintf(fp, "dev_t\tdumpdev = makedev(%d, %d);\n", - major(fl->f_dumpdev), minor(fl->f_dumpdev)); - fprintf(fp, "\n"); - fprintf(fp, "struct\tswdevt swdevt[] = {\n"); - do { - dev = swap->f_swapdev; - fprintf(fp, "\t{ makedev(%d, %d),\t0,\t%d },\t/* %s */\n", - major(dev), minor(dev), swap->f_swapsize, swap->f_fn); - swap = swap->f_next; - } while (swap && swap->f_type == SWAPSPEC); - fprintf(fp, "\t{ 0, 0, 0 }\n"); - fprintf(fp, "};\n"); - if (machine == MACHINE_MIPSY || machine == MACHINE_MIPS) { - fprintf(fp, "\nsetconf()\n"); - fprintf(fp, "{\n"); - fprintf(fp, "\t/* resolve reference for non-generic kernels */\n"); - fprintf(fp, "}\n"); - } - fclose(fp); - return (swap); -} - -static int devtablenotread = 1; -static struct devdescription { - char *dev_name; - int dev_major; - struct devdescription *dev_next; -} *devtable; - -/* - * Given a device name specification figure out: - * major device number - * partition - * device name - * unit number - * This is a hack, but the system still thinks in - * terms of major/minor instead of string names. - */ -dev_t -nametodev(char *name, int defunit, char defpartition) -{ - char *cp, partition; - int unit; - register struct devdescription *dp; - - cp = name; - if (cp == 0) { - fprintf(stderr, "config: internal error, nametodev\n"); - exit(1); - } - while (*cp && !isdigit(*cp)) - cp++; - unit = *cp ? atoi(cp) : defunit; - if (unit < 0 || unit > 31) { - fprintf(stderr, -"config: %s: invalid device specification, unit out of range\n", name); - unit = defunit; /* carry on more checking */ - } - if (*cp) { - *cp++ = '\0'; - while (*cp && isdigit(*cp)) - cp++; - } - partition = *cp ? *cp : defpartition; - if (partition < 'a' || partition > 'h') { - fprintf(stderr, -"config: %c: invalid device specification, bad partition\n", *cp); - partition = defpartition; /* carry on */ - } - if (devtablenotread) - initdevtable(); - for (dp = devtable; dp->dev_next; dp = dp->dev_next) - if (eq(name, dp->dev_name)) - break; - if (dp == 0) { - fprintf(stderr, "config: %s: unknown device\n", name); - return (NODEV); - } - return (makedev(dp->dev_major, (unit << DEV_SHIFT) + (partition - 'a'))); -} - -char * -devtoname(dev_t dev) -{ - char buf[80]; - register struct devdescription *dp; - - if (devtablenotread) - initdevtable(); - for (dp = devtable; dp->dev_next; dp = dp->dev_next) - if (major(dev) == dp->dev_major) - break; - if (dp == 0) - dp = devtable; - (void) sprintf(buf, "%s%d%c", dp->dev_name, - minor(dev) >> DEV_SHIFT, (minor(dev) & DEV_MASK) + 'a'); - return (ns(buf)); -} - -void -initdevtable(void) -{ - char buf[BUFSIZ]; - char line[BUFSIZ]; - int maj; - register struct devdescription **dp = &devtable; - FILE *fp; - - (void) sprintf(buf, "%s/devices.%s", config_directory, machinename); - fp = fopenp(VPATH, buf, line, "r"); - if (fp == NULL) { - fprintf(stderr, "config: can't open %s\n", buf); - exit(1); - } - while (fgets(line, BUFSIZ, fp) != 0) { - if (*line == '#' || *line == '\n') - continue; - if (sscanf(line, "%s\t%d\n", buf, &maj) != 2) - break; - *dp = (struct devdescription *)malloc(sizeof (**dp)); - (*dp)->dev_name = ns(buf); - (*dp)->dev_major = maj; - dp = &(*dp)->dev_next; - } - *dp = 0; - fclose(fp); - devtablenotread = 0; -} diff --git a/SETUP/config/parser.y b/SETUP/config/parser.y index 4f77b93e4..8c1345af3 100644 --- a/SETUP/config/parser.y +++ b/SETUP/config/parser.y @@ -34,85 +34,25 @@ struct idlst *lst; } -%token ADDRMOD -%token AND -%token ANY -%token ARGS -%token AT -%token BIN %token BUILDDIR %token COMMA -%token CONFIG -%token CONFIGDIR -%token CONTROLLER -%token CPU -%token CSR -%token DEVICE -%token DISK -%token DRIVE -%token DST -%token DUMPS %token EQUALS -%token FLAGS -%token HZ -%token IDENT %token INIT %token MACHINE -%token MAJOR -%token MASTER -%token MAXUSERS -%token MAXDSIZ -%token MBA -%token MBII -%token MINOR -%token MINUS -%token NEXUS %token OBJECTDIR -%token ON %token OPTIONS %token MAKEOPTIONS -%token PRIORITY -%token PROFILE %token PSEUDO_DEVICE -%token ROOT %token SEMICOLON -%token SIZE -%token SLAVE %token SOURCEDIR -%token SWAP -%token TIMEZONE %token TRACE -%token UBA -%token VECTOR -%token VME -%token VME16D16 -%token VME24D16 -%token VME32D16 -%token VME16D32 -%token VME24D32 -%token VME32D32 - -/* following 3 are unique to CMU */ -%token LUN -%token SLOT -%token TAPE %token ID %token NUMBER -%token FPNUMBER %type Save_id %type Opt_value %type Dev -%type Id_list -%type optional_size -%type device_name -%type major_minor -%type arg_device_spec -%type root_device_spec -%type dump_device_spec -%type swap_device_spec -%type Value %{ @@ -128,25 +68,12 @@ char *val_id; int yylex(void); -int finddev(dev_t dev); -int alreadychecked(dev_t dev, dev_t list[], dev_t *last); void deverror(const char *systemname, const char *devtype); -void mkconf(char *sysname); -struct file_list *newswap(void); -void mkswap(struct file_list *syslist, struct file_list *fl, int size); -struct device *huhcon(const char *dev); -void check_nexus(struct device *dev, int num); -void check_slot(struct device *dev, int num); -void checksystemspec(struct file_list *fl); -void verifysystemspecs(void); -dev_t *verifyswap(struct file_list *fl, dev_t checked[], dev_t *pchecked); -struct device *dconnect(const char *dev, int num); %} %% Configuration: Many_specs - { verifysystemspecs(); } ; Many_specs: @@ -169,268 +96,22 @@ Spec: Config_spec: MACHINE Save_id - { - if (!strcmp($2, "vax")) { - machine = MACHINE_VAX; - machinename = "vax"; - } else if (!strcmp($2, "sun")) { - /* default to Sun 3 */ - machine = MACHINE_SUN3; - machinename = "sun3"; - } else if (!strcmp($2, "sun2")) { - machine = MACHINE_SUN2; - machinename = "sun2"; - } else if (!strcmp($2, "sun3")) { - machine = MACHINE_SUN3; - machinename = "sun3"; - } else if (!strcmp($2, "sun4")) { - machine = MACHINE_SUN4; - machinename = "sun4"; - } else if (!strcmp($2, "romp")) { - machine = MACHINE_ROMP; - machinename = "romp"; - } else if (!strcmp($2, "ca")) { - machine = MACHINE_ROMP; - machinename = "ca"; - } else if (!strcmp($2, "mmax")) { - machine = MACHINE_MMAX; - machinename = "mmax"; - } else if (!strcmp($2, "sqt")) { - machine = MACHINE_SQT; - machinename = "sqt"; - } else if (!strcmp($2, "i")) { - machine = MACHINE_I386; - machinename = "i386"; - } else if (!strcmp($2, "i386")) { - machine = MACHINE_I386; - machinename = "i386"; - } else if (!strcmp($2, "ix")) { - machine = MACHINE_IX; - machinename = "ix"; - } else if (!strcmp($2, "mipsy")) { - machine = MACHINE_MIPSY; - machinename = "mipsy"; - } else if (!strcmp($2, "mips")) { - machine = MACHINE_MIPS; - machinename = "mips"; - } else if (!strcmp($2, "i860")) { - machine = MACHINE_I860; - machinename = "i860"; - } else if (!strcmp($2, "m68k")) { - machine = MACHINE_M68K; - machinename = "m68k"; - } else if (!strcmp($2, "m88k")) { - machine = MACHINE_M88K; - machinename = "m88k"; - } else if (!strcmp($2, "m98k")) { - machine = MACHINE_M98K; - machinename = "m98k"; - } else if (!strcmp($2, "hppa")) { - machine = MACHINE_HPPA; - machinename = "hppa"; - } else if (!strcmp($2, "sparc")) { - machine = MACHINE_SPARC; - machinename = "sparc"; - } else if (!strcmp($2, "ppc")) { - machine = MACHINE_PPC; - machinename = "ppc"; - } else if (!strcmp($2, "arm")) { - machine = MACHINE_ARM; - machinename = "arm"; - } else if (!strcmp($2, "x86_64")) { - machine = MACHINE_X86_64; - machinename = "x86_64"; - } else - yyerror("Unknown machine type"); - } | - CPU Save_id - { - struct cputype *cp = - (struct cputype *)malloc(sizeof (struct cputype)); - cp->cpu_name = ns($2); - cp->cpu_next = cputype; - cputype = cp; - free(temp_id); - } | + { machinename = ns($2); } + | OPTIONS Opt_list | MAKEOPTIONS Mkopt_list | - IDENT ID - { ident = ns($2); } - | - System_spec - | - MAXUSERS NUMBER - { maxusers = $2; } - | BUILDDIR Save_id { build_directory = ns($2); } | - CONFIGDIR Save_id - { config_directory = ns($2); } - | OBJECTDIR Save_id { object_directory = ns($2); } | SOURCEDIR Save_id { source_directory = ns($2); } - | - PROFILE - { profiling++; } ; -System_spec: - System_id - { checksystemspec(*confp); } - | System_id System_parameter_list - { checksystemspec(*confp); } - ; - -System_id: - CONFIG Save_id - { mkconf($2); } - ; - -System_parameter_list: - System_parameter_list System_parameter - | System_parameter - ; - -System_parameter: - swap_spec - | root_spec - | dump_spec - | arg_spec - ; - -swap_spec: - SWAP optional_on swap_device_list - ; - -swap_device_list: - swap_device_list AND swap_device - | swap_device - ; - -swap_device: - swap_device_spec optional_size - { mkswap(*confp, $1, $2); } - ; - -swap_device_spec: - device_name - { - struct file_list *fl = newswap(); - - if (eq($1, "generic")) - fl->f_fn = $1; - else { - fl->f_swapdev = nametodev($1, 0, 'b'); - fl->f_fn = devtoname(fl->f_swapdev); - } - $$ = fl; - } - | major_minor - { - struct file_list *fl = newswap(); - - fl->f_swapdev = $1; - fl->f_fn = devtoname($1); - $$ = fl; - } - ; - -root_spec: - ROOT optional_on root_device_spec - { - struct file_list *fl = *confp; - - if (fl && fl->f_rootdev != NODEV) - yyerror("extraneous root device specification"); - else - fl->f_rootdev = $3; - } - ; - -root_device_spec: - device_name - { $$ = nametodev($1, 0, 'a'); } - | major_minor - ; - -dump_spec: - DUMPS optional_on dump_device_spec - { - struct file_list *fl = *confp; - - if (fl && fl->f_dumpdev != NODEV) - yyerror("extraneous dump device specification"); - else - fl->f_dumpdev = $3; - } - - ; - -dump_device_spec: - device_name - { $$ = nametodev($1, 0, 'b'); } - | major_minor - ; - -arg_spec: - ARGS optional_on arg_device_spec - { - struct file_list *fl = *confp; - - if (fl && fl->f_argdev != NODEV) - yyerror("extraneous arg device specification"); - else - fl->f_argdev = $3; - } - ; - -arg_device_spec: - device_name - { $$ = nametodev($1, 0, 'b'); } - | major_minor - ; - -major_minor: - MAJOR NUMBER MINOR NUMBER - { $$ = makedev($2, $4); } - ; - -optional_on: - ON - | /* empty */ - ; - -optional_size: - SIZE NUMBER - { $$ = $2; } - | /* empty */ - { $$ = 0; } - ; - -device_name: - Save_id - { $$ = $1; } - | Save_id NUMBER - { - char buf[80]; - - (void) sprintf(buf, "%s%d", $1, $2); - $$ = ns(buf); free($1); - } - | Save_id NUMBER ID - { - char buf[80]; - - (void) sprintf(buf, "%s%d%s", $1, $2, $3); - $$ = ns(buf); free($1); - } - ; Opt_list: Opt_list COMMA Option @@ -518,77 +199,11 @@ Mkoption: } ; Dev: - UBA - { $$ = ns("uba"); } | - MBA - { $$ = ns("mba"); } | - VME16D16 - { - if (machine != MACHINE_SUN2 && machine != MACHINE_SUN3 - && machine != MACHINE_SUN4) - yyerror("wrong machine type for vme16d16"); - $$ = ns("vme16d16"); - } | - VME24D16 - { - if (machine != MACHINE_SUN2 && machine != MACHINE_SUN3 - && machine != MACHINE_SUN4) - yyerror("wrong machine type for vme24d16"); - $$ = ns("vme24d16"); - } | - VME32D16 - { - if (machine != MACHINE_SUN3 && machine != MACHINE_SUN4) - - yyerror("wrong machine type for vme32d16"); - $$ = ns("vme32d16"); - } | - VME16D32 - { - if (machine != MACHINE_SUN3 && machine != MACHINE_SUN4) - yyerror("wrong machine type for vme16d32"); - $$ = ns("vme16d32"); - } | - VME24D32 - { - if (machine != MACHINE_SUN3 && machine != MACHINE_SUN4) - yyerror("wrong machine type for vme24d32"); - $$ = ns("vme24d32"); - } | - VME32D32 - { - if (machine != MACHINE_SUN3 && machine != MACHINE_SUN4) - yyerror("wrong machine type for vme32d32"); - $$ = ns("vme32d32"); - } | - VME - { - if (machine != MACHINE_MIPSY && machine != MACHINE_MIPS) - yyerror("wrong machine type for vme"); - $$ = ns("vme"); - } | - MBII - { - if (machine != MACHINE_MIPSY && machine != MACHINE_MIPS) - yyerror("wrong machine type for mbii"); - $$ = ns("mbii"); - } | ID { $$ = ns($1); } ; Device_spec: - DEVICE Dev_name Dev_info Int_spec - { cur.d_type = DEVICE; } | - MASTER Dev_name Dev_info Int_spec - { cur.d_type = MASTER; } | - DISK Dev_name Dev_info Int_spec - { cur.d_dk = 1; cur.d_type = DEVICE; } | -/* TAPE rule is unique to CMU */ - TAPE Dev_name Dev_info Int_spec - { cur.d_type = DEVICE; } | - CONTROLLER Dev_name Dev_info Int_spec - { cur.d_type = CONTROLLER; } | PSEUDO_DEVICE Init_dev Dev { cur.d_name = $3; @@ -614,172 +229,10 @@ Device_spec: cur.d_init = ns($6); }; -Dev_name: - Init_dev Dev NUMBER - { - cur.d_name = $2; - if (eq($2, "mba")) - seen_mba = 1; - else if (eq($2, "uba")) - seen_uba = 1; - else if (eq($2, "mbii")) - seen_mbii = 1; - else if (eq($2, "vme")) - seen_vme = 1; - cur.d_unit = $3; - }; - Init_dev: /* lambda */ { init_dev(&cur); }; -Dev_info: - Con_info Info_list - | - /* lambda */ - ; - -Con_info: - AT Dev NUMBER - { - if (eq(cur.d_name, "mba") || eq(cur.d_name, "uba") - || eq(cur.d_name, "mbii") || eq(cur.d_name, "vme")) { - (void) sprintf(errbuf, - "%s must be connected to a nexus", cur.d_name); - yyerror(errbuf); - } - cur.d_conn = dconnect($2, $3); - if (machine == MACHINE_SQT) - dev_param(&cur, "index", cur.d_unit); - } | -/* AT SLOT NUMBER rule is unique to CMU */ - AT SLOT NUMBER - { - check_slot(&cur, $3); - cur.d_addr = $3; - cur.d_conn = TO_SLOT; - } | - AT NEXUS NUMBER - { check_nexus(&cur, $3); cur.d_conn = TO_NEXUS; }; - -Info_list: - Info_list Info - | - /* lambda */ - ; - -Info: - CSR NUMBER - { - cur.d_addr = $2; - if (machine == MACHINE_SQT) { - dev_param(&cur, "csr", $2); - } - } | - DRIVE NUMBER - { - cur.d_drive = $2; - if (machine == MACHINE_SQT) { - dev_param(&cur, "drive", $2); - } - } | - SLAVE NUMBER - { - if (cur.d_conn != 0 && cur.d_conn != TO_NEXUS && - cur.d_conn->d_type == MASTER) - cur.d_slave = $2; - else - yyerror("can't specify slave--not to master"); - } | -/* MIPS */ - ADDRMOD NUMBER - { cur.d_addrmod = $2; } | -/* LUN NUMBER rule is unique to CMU */ - LUN NUMBER - { - if ((cur.d_conn != 0) && (cur.d_conn != TO_SLOT) && - (cur.d_conn->d_type == CONTROLLER)) { - cur.d_addr = $2; - } - else { - yyerror("device requires controller card"); - } - } | - FLAGS NUMBER - { - cur.d_flags = $2; - if (machine == MACHINE_SQT) { - dev_param(&cur, "flags", $2); - } - } | - BIN NUMBER - { - if (machine != MACHINE_SQT) - yyerror("bin specification only valid on Sequent Balance"); - if ($2 < 1 || $2 > 7) - yyerror("bogus bin number"); - else { - cur.d_bin = $2; - dev_param(&cur, "bin", $2); - } - } | - Dev Value - { - if (machine != MACHINE_SQT) - yyerror("bad device spec"); - dev_param(&cur, $1, $2); - }; - -Value: - NUMBER - | - MINUS NUMBER - { $$ = -($2); } - ; - -Int_spec: - Vec_spec - { cur.d_pri = 0; } | - PRIORITY NUMBER - { cur.d_pri = $2; } | - PRIORITY NUMBER Vec_spec - { cur.d_pri = $2; } | - Vec_spec PRIORITY NUMBER - { cur.d_pri = $3; } | - /* lambda */ - ; - -Vec_spec: - VECTOR Id_list - { cur.d_vec = $2; }; - - -Id_list: - Save_id - { - struct idlst *a = (struct idlst *)malloc(sizeof(struct idlst)); - a->id = $1; a->id_next = 0; $$ = a; - a->id_vec = 0; - } | - Save_id Id_list - { - struct idlst *a = (struct idlst *)malloc(sizeof(struct idlst)); - a->id = $1; a->id_next = $2; $$ = a; - a->id_vec = 0; - } | - Save_id NUMBER - { - struct idlst *a = (struct idlst *)malloc(sizeof(struct idlst)); - a->id_next = 0; a->id = $1; $$ = a; - a->id_vec = $2; - } | - Save_id NUMBER Id_list - { - struct idlst *a = (struct idlst *)malloc(sizeof(struct idlst)); - a->id_next = $3; a->id = $1; $$ = a; - a->id_vec = $2; - }; - %% void @@ -819,442 +272,17 @@ newdev(struct device *dp) curp->d_next = 0; } -/* - * note that a configuration should be made - */ -void -mkconf(char *sysname) -{ - register struct file_list *fl, **flp; - - fl = (struct file_list *) malloc(sizeof *fl); - fl->f_type = SYSTEMSPEC; - fl->f_needs = sysname; - fl->f_rootdev = NODEV; - fl->f_argdev = NODEV; - fl->f_dumpdev = NODEV; - fl->f_fn = 0; - fl->f_next = 0; - for (flp = confp; *flp; flp = &(*flp)->f_next) - ; - *flp = fl; - confp = flp; -} - -struct file_list * -newswap(void) -{ - struct file_list *fl = (struct file_list *)malloc(sizeof (*fl)); - - fl->f_type = SWAPSPEC; - fl->f_next = 0; - fl->f_swapdev = NODEV; - fl->f_swapsize = 0; - fl->f_needs = 0; - fl->f_fn = 0; - return (fl); -} - -/* - * Add a swap device to the system's configuration - */ -void -mkswap(struct file_list *syslist, struct file_list *fl, int size) -{ - register struct file_list **flp; - - if (syslist == 0 || syslist->f_type != SYSTEMSPEC) { - yyerror("\"swap\" spec precedes \"config\" specification"); - return; - } - if (size < 0) { - yyerror("illegal swap partition size"); - return; - } - /* - * Append swap description to the end of the list. - */ - flp = &syslist->f_next; - for (; *flp && (*flp)->f_type == SWAPSPEC; flp = &(*flp)->f_next) - ; - fl->f_next = *flp; - *flp = fl; - fl->f_swapsize = size; - /* - * If first swap device for this system, - * set up f_fn field to insure swap - * files are created with unique names. - */ - if (syslist->f_fn) - return; - if (eq(fl->f_fn, "generic")) - syslist->f_fn = ns(fl->f_fn); - else - syslist->f_fn = ns(syslist->f_needs); -} - -/* - * find the pointer to connect to the given device and number. - * returns 0 if no such device and prints an error message - */ -struct device * -dconnect(const char *dev, int num) -{ - register struct device *dp; - - if (num == QUES) - return (huhcon(dev)); - for (dp = dtab; dp != 0; dp = dp->d_next) { - if ((num != dp->d_unit) || !eq(dev, dp->d_name)) - continue; - if (dp->d_type != CONTROLLER && dp->d_type != MASTER) { - (void) sprintf(errbuf, - "%s connected to non-controller", dev); - yyerror(errbuf); - return (0); - } - return (dp); - } - (void) sprintf(errbuf, "%s %d not defined", dev, num); - yyerror(errbuf); - return (0); -} - -/* - * connect to an unspecific thing - */ -struct device * -huhcon(const char *dev) -{ - register struct device *dp, *dcp; - struct device rdev; /* only used if dp is NULL */ - int oldtype; - - memset(&rdev, 0, sizeof rdev); - - /* - * First make certain that there are some of these to wildcard on - */ - for (dp = dtab; dp != 0; dp = dp->d_next) - if (eq(dp->d_name, dev)) - break; - if (dp == 0) { - (void) sprintf(errbuf, "no %s's to wildcard", dev); - yyerror(errbuf); - return (0); - } - oldtype = dp->d_type; - dcp = dp->d_conn; - /* - * Now see if there is already a wildcard entry for this device - * (e.g. Search for a "uba ?") - */ - for (; dp != 0; dp = dp->d_next) - if (eq(dev, dp->d_name) && dp->d_unit == -1) - break; - /* - * If there isn't, make one because everything needs to be connected - * to something. - */ - if (dp == 0) { - dp = &rdev; - init_dev(dp); - dp->d_unit = QUES; - dp->d_name = ns(dev); - dp->d_type = oldtype; - newdev(dp); - dp = curp; - /* - * Connect it to the same thing that other similar things are - * connected to, but make sure it is a wildcard unit - * (e.g. up connected to sc ?, here we make connect sc? to a - * uba?). If other things like this are on the NEXUS or - * if they aren't connected to anything, then make the same - * connection, else call ourself to connect to another - * unspecific device. - */ - if (dcp == TO_NEXUS || dcp == 0) - dp->d_conn = dcp; - else - dp->d_conn = dconnect(dcp->d_name, QUES); - } - return (dp); -} - void init_dev(struct device *dp) { dp->d_name = "OHNO!!!"; - dp->d_type = DEVICE; - dp->d_conn = 0; - dp->d_vec = 0; - dp->d_addr = dp->d_pri = dp->d_flags = dp->d_dk = 0; - dp->d_slave = dp->d_drive = dp->d_unit = UNKNOWN; - if (machine == MACHINE_SUN2 || machine == MACHINE_SUN3 - || machine == MACHINE_SUN4){ - dp->d_addr = UNKNOWN; - dp->d_mach = dp->d_bus = 0; - } - if (machine == MACHINE_MIPSY || machine == MACHINE_MIPS){ - dp->d_addrmod = 0; - } + dp->d_type = PSEUDO_DEVICE; + dp->d_flags = 0; + dp->d_slave = UNKNOWN; dp->d_init = 0; } -/* - * make certain that this is a reasonable type of thing to connect to a nexus - */ -void -check_nexus(struct device *dev, int num) -{ - - switch (machine) { - - case MACHINE_VAX: - if (!eq(dev->d_name, "uba") && !eq(dev->d_name, "mba")) - yyerror("only uba's and mba's should be connected to the nexus"); - if (num != QUES) - yyerror("can't give specific nexus numbers"); - break; - - case MACHINE_SUN: - if (!eq(dev->d_name, "mb")) - yyerror("only mb's should be connected to the nexus"); - break; - - case MACHINE_ROMP: - if (!eq(dev->d_name, "iocc")) - yyerror("only iocc's should be connected to the nexus"); - break; - case MACHINE_SUN2: - if (!eq(dev->d_name, "virtual") && - !eq(dev->d_name, "obmem") && - !eq(dev->d_name, "obio") && - !eq(dev->d_name, "mbmem") && - !eq(dev->d_name, "mbio") && - !eq(dev->d_name, "vme16d16") && - !eq(dev->d_name, "vme24d16")) { - (void)sprintf(errbuf, - "unknown bus type `%s' for nexus connection on %s", - dev->d_name, machinename); - yyerror(errbuf); - } - - case MACHINE_MMAX: - yyerror("don't grok 'nexus' on mmax -- try 'slot'."); - break; - case MACHINE_SUN3: - case MACHINE_SUN4: - if (!eq(dev->d_name, "virtual") && - !eq(dev->d_name, "obmem") && - !eq(dev->d_name, "obio") && - !eq(dev->d_name, "mbmem") && - !eq(dev->d_name, "mbio") && - !eq(dev->d_name, "vme16d16") && - !eq(dev->d_name, "vme24d16") && - !eq(dev->d_name, "vme32d16") && - !eq(dev->d_name, "vme16d32") && - !eq(dev->d_name, "vme24d32") && - !eq(dev->d_name, "vme32d32")) { - (void)sprintf(errbuf, - "unknown bus type `%s' for nexus connection on %s", - dev->d_name, machinename); - yyerror(errbuf); - } - break; - case MACHINE_MIPSY: - case MACHINE_MIPS: - if (!eq(dev->d_name, "vme") && !eq(dev->d_name, "mbii")) - yyerror("only vme's and mbii's should be connected to the nexus"); - if (num != QUES) - yyerror("can't give specific nexus numbers"); - break; - } -} - -/* - * make certain that this is a reasonable type of thing to connect to a slot - */ - -void -check_slot(struct device *dev, int num) -{ - - switch (machine) { - - case MACHINE_MMAX: - if (!eq(dev->d_name, "emc")) - yyerror("only emc's plug into backplane slots."); - if (num == QUES) - yyerror("specific slot numbers must be given"); - break; - - case MACHINE_SQT: - if (!eq(dev->d_name, "mbad") && - !eq(dev->d_name, "zdc") && - !eq(dev->d_name, "sec")) { - (void)sprintf(errbuf, - "unknown bus type `%s' for slot on %s", - dev->d_name, machinename); - yyerror(errbuf); - } - break; - - default: - yyerror("don't grok 'slot' for this machine -- try 'nexus'."); - break; - } -} - -/* - * Check system specification and apply defaulting - * rules on root, argument, dump, and swap devices. - */ -void -checksystemspec(struct file_list *fl) -{ - char buf[BUFSIZ]; - register struct file_list *swap; - int generic; - - if (fl == 0 || fl->f_type != SYSTEMSPEC) { - yyerror("internal error, bad system specification"); - exit(1); - } - swap = fl->f_next; - generic = swap && swap->f_type == SWAPSPEC && eq(swap->f_fn, "generic"); - if (fl->f_rootdev == NODEV && !generic) { - yyerror("no root device specified"); - exit(1); - } - /* - * Default swap area to be in 'b' partition of root's - * device. If root specified to be other than on 'a' - * partition, give warning, something probably amiss. - */ - if (swap == 0 || swap->f_type != SWAPSPEC) { - dev_t dev; - - swap = newswap(); - dev = fl->f_rootdev; - if (minor(dev) & DEV_MASK) { - (void) sprintf(buf, -"Warning, swap defaulted to 'b' partition with root on '%c' partition", - (minor(dev) & DEV_MASK) + 'a'); - yyerror(buf); - } - swap->f_swapdev = - makedev(major(dev), (minor(dev) &~ DEV_MASK) | ('b' - 'a')); - swap->f_fn = devtoname(swap->f_swapdev); - mkswap(fl, swap, 0); - } - /* - * Make sure a generic swap isn't specified, along with - * other stuff (user must really be confused). - */ - if (generic) { - if (fl->f_rootdev != NODEV) - yyerror("root device specified with generic swap"); - if (fl->f_argdev != NODEV) - yyerror("arg device specified with generic swap"); - if (fl->f_dumpdev != NODEV) - yyerror("dump device specified with generic swap"); - return; - } - /* - * Default argument device and check for oddball arrangements. - */ - if (fl->f_argdev == NODEV) - fl->f_argdev = swap->f_swapdev; - if (fl->f_argdev != swap->f_swapdev) - yyerror("Warning, arg device different than primary swap"); - /* - * Default dump device and warn if place is not a - * swap area or the argument device partition. - */ - if (fl->f_dumpdev == NODEV) - fl->f_dumpdev = swap->f_swapdev; - if (fl->f_dumpdev != swap->f_swapdev && fl->f_dumpdev != fl->f_argdev) { - struct file_list *p = swap->f_next; - - for (; p && p->f_type == SWAPSPEC; p = p->f_next) - if (fl->f_dumpdev == p->f_swapdev) - return; - (void) sprintf(buf, "Warning, orphaned dump device, %s", - "do you know what you're doing"); - yyerror(buf); - } -} - -/* - * Verify all devices specified in the system specification - * are present in the device specifications. - */ -void -verifysystemspecs(void) -{ - register struct file_list *fl; - dev_t checked[50]; - register dev_t *pchecked = checked; - - for (fl = conf_list; fl; fl = fl->f_next) { - if (fl->f_type != SYSTEMSPEC) - continue; - if (!finddev(fl->f_rootdev)) - deverror(fl->f_needs, "root"); - *pchecked++ = fl->f_rootdev; - pchecked = verifyswap(fl->f_next, checked, pchecked); -#define samedev(dev1, dev2) \ - ((minor(dev1) &~ DEV_MASK) != (minor(dev2) &~ DEV_MASK)) - if (!alreadychecked(fl->f_dumpdev, checked, pchecked)) { - if (!finddev(fl->f_dumpdev)) - deverror(fl->f_needs, "dump"); - *pchecked++ = fl->f_dumpdev; - } - if (!alreadychecked(fl->f_argdev, checked, pchecked)) { - if (!finddev(fl->f_argdev)) - deverror(fl->f_needs, "arg"); - *pchecked++ = fl->f_argdev; - } - } -} - -/* - * Do as above, but for swap devices. - */ -dev_t * -verifyswap(struct file_list *fl, dev_t checked[], dev_t *pchecked) -{ - - for (;fl && fl->f_type == SWAPSPEC; fl = fl->f_next) { - if (eq(fl->f_fn, "generic")) - continue; - if (alreadychecked(fl->f_swapdev, checked, pchecked)) - continue; - if (!finddev(fl->f_swapdev)) - fprintf(stderr, - "config: swap device %s not configured", fl->f_fn); - *pchecked++ = fl->f_swapdev; - } - return (pchecked); -} - -/* - * Has a device already been checked - * for it's existence in the configuration? - */ -int -alreadychecked(dev_t dev, dev_t list[], dev_t *last) -{ - register dev_t *p; - - for (p = list; p < last; p++) - if (samedev(*p, dev)) - return (1); - return (0); -} - void deverror(const char *systemname, const char *devtype) { @@ -1262,17 +290,3 @@ deverror(const char *systemname, const char *devtype) fprintf(stderr, "config: %s: %s device not configured\n", systemname, devtype); } - -/* - * Look for the device in the list of - * configured hardware devices. Must - * take into account stuff wildcarded. - */ -/*ARGSUSED*/ -int -finddev(__unused dev_t dev) -{ - - /* punt on this right now */ - return (1); -} diff --git a/SETUP/kextsymboltool/Makefile b/SETUP/kextsymboltool/Makefile index 17cf1108f..3ae439d36 100644 --- a/SETUP/kextsymboltool/Makefile +++ b/SETUP/kextsymboltool/Makefile @@ -18,9 +18,7 @@ ifneq ($(HOST_SPARSE_SDKROOT),/) LDFLAGS += -lmacho endif -.PHONY: force - -.SparseSDK: force +.SparseSDK: ALWAYS $(_v)echo '$(HOST_SPARSE_SDKROOT)' | cmp -s - $@ || echo '$(HOST_SPARSE_SDKROOT)' > $@ kextsymboltool: $(OBJS) .SparseSDK diff --git a/SETUP/replacecontents/Makefile b/SETUP/replacecontents/Makefile new file mode 100644 index 000000000..aa12e725f --- /dev/null +++ b/SETUP/replacecontents/Makefile @@ -0,0 +1,30 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + +include $(MakeInc_cmd) +include $(MakeInc_def) + +OBJS = replacecontents.o + +CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O0 -I$(SOURCE) -I. + +WARNFLAGS = -Wall + +LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) + +replacecontents: $(OBJS) + @echo HOST_LD $@ + $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ + @echo HOST_CODESIGN $@ + $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@ + +%.o: %.c + @echo HOST_CC $@ + $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< + +do_build_setup:: replacecontents + +include $(MakeInc_rule) +include $(MakeInc_dir) diff --git a/SETUP/replacecontents/replacecontents.c b/SETUP/replacecontents/replacecontents.c new file mode 100644 index 000000000..3e72b2d9d --- /dev/null +++ b/SETUP/replacecontents/replacecontents.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +void usage(void); + +int main(int argc, char * argv[]) +{ + struct stat sb; + char *newcontent = NULL; + size_t newcontentlength = 0; + char *oldcontent = NULL; + int ret; + int dstfd; + const char *dst = NULL; + ssize_t readsize, writesize; + int i; + + if (argc < 2) { + usage(); + } + + dst = argv[1]; + + for (i=2; i < argc; i++) { + newcontentlength += strlen(argv[i]) + 1 /* space or newline */; + } + newcontentlength += 1; /* NUL */ + + newcontent = malloc(newcontentlength); + if (newcontent == NULL) + err(EX_UNAVAILABLE, "malloc() failed"); + + newcontent[0] = '\0'; + + for (i=2; i < argc; i++) { + strlcat(newcontent, argv[i], newcontentlength); + if (i < argc - 1) { + strlcat(newcontent, " ", newcontentlength); + } else { + strlcat(newcontent, "\n", newcontentlength); + } + } + + dstfd = open(dst, O_RDWR | O_CREAT | O_APPEND, DEFFILEMODE); + if (dstfd < 0) + err(EX_NOINPUT, "open(%s)", dst); + + ret = fstat(dstfd, &sb); + if (ret < 0) + err(EX_NOINPUT, "fstat(%s)", dst); + + if (!S_ISREG(sb.st_mode)) + err(EX_USAGE, "%s is not a regular file", dst); + + if (sb.st_size != newcontentlength) { + /* obvious new content must be different than old */ + goto replace; + } + + oldcontent = malloc(newcontentlength); + if (oldcontent == NULL) + err(EX_UNAVAILABLE, "malloc(%lu) failed", newcontentlength); + + readsize = read(dstfd, oldcontent, newcontentlength); + if (readsize == -1) + err(EX_UNAVAILABLE, "read() failed"); + else if (readsize != newcontentlength) + errx(EX_UNAVAILABLE, "short read of file"); + + if (0 == memcmp(oldcontent, newcontent, newcontentlength)) { + /* binary comparison succeeded, just exit */ + free(oldcontent); + ret = close(dstfd); + if (ret < 0) + err(EX_UNAVAILABLE, "close() failed"); + + exit(0); + } + +replace: + ret = ftruncate(dstfd, 0); + if (ret < 0) + err(EX_UNAVAILABLE, "ftruncate() failed"); + + writesize = write(dstfd, newcontent, newcontentlength); + if (writesize == -1) + err(EX_UNAVAILABLE, "write() failed"); + else if (writesize != newcontentlength) + errx(EX_UNAVAILABLE, "short write of file"); + + ret = close(dstfd); + if (ret < 0) + err(EX_NOINPUT, "close(dst)"); + + return 0; +} + +void usage(void) +{ + fprintf(stderr, "Usage: %s <...>\n", + getprogname()); + exit(EX_USAGE); +} diff --git a/bsd/Makefile b/bsd/Makefile index 03281456d..e03ce2014 100644 --- a/bsd/Makefile +++ b/bsd/Makefile @@ -29,9 +29,14 @@ INSTINC_SUBDIRS_X86_64 = \ i386 \ crypto +INSTINC_SUBDIRS_X86_64H = \ + i386 \ + crypto + INSTINC_SUBDIRS_ARM = \ arm + EXPINC_SUBDIRS = \ bsm \ dev \ @@ -52,19 +57,20 @@ EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_X86_64 = \ i386 +EXPINC_SUBDIRS_X86_64H = \ + i386 + EXPINC_SUBDIRS_ARM = \ arm + COMP_SUBDIRS = \ conf -INST_SUBDIRS = \ +INSTTEXTFILES_SUBDIRS = \ dev \ - kern - -INSTMAN_SUBDIRS = \ + kern \ man - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/bsm/audit_kevents.h b/bsd/bsm/audit_kevents.h index d5e4bac01..e6ea2a05b 100644 --- a/bsd/bsm/audit_kevents.h +++ b/bsd/bsm/audit_kevents.h @@ -601,6 +601,15 @@ #define AUE_PDKILL 43198 /* FreeBSD. */ #define AUE_PDGETPID 43199 /* FreeBSD. */ #define AUE_PDWAIT 43200 /* FreeBSD. */ +#define AUE_GETATTRLISTBULK 43201 /* Darwin. */ +#define AUE_GETATTRLISTAT 43202 /* Darwin. */ +#define AUE_OPENBYID 43203 /* Darwin. */ +#define AUE_OPENBYID_R 43204 /* Darwin. */ +#define AUE_OPENBYID_RT 43205 /* Darwin. */ +#define AUE_OPENBYID_W 43206 /* Darwin. */ +#define AUE_OPENBYID_WT 43207 /* Darwin. */ +#define AUE_OPENBYID_RW 43208 /* Darwin. */ +#define AUE_OPENBYID_RWT 43209 /* Darwin. */ #define AUE_SESSION_START 44901 /* Darwin. */ #define AUE_SESSION_UPDATE 44902 /* Darwin. */ diff --git a/bsd/conf/MASTER.x86_64 b/bsd/conf/MASTER.x86_64 deleted file mode 100644 index ff85c8806..000000000 --- a/bsd/conf/MASTER.x86_64 +++ /dev/null @@ -1,125 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -###################################################################### -# -# Master Apple configuration file (see the master machine independent -# configuration file for a description of the file format). -# -###################################################################### -# -# Apple (PSEUDO-)DEVICES (select any combination) -# ex = Excelan EXOS 202 Ethernet interface -# ip = Interphase V/SMD 3200 disk controller -# od = Canon OMD-1 Optical Disk -# rd = RAM disk -# sd = SCSI disk -# sg = Generic SCSI Device -# st = SCSI tape -# fd = Floppy Disk -# en = Integrated Ethernet controller -# dsp = DSP560001 digital signal processor -# iplmeas = ipl time measurement -# nextp = NeXT Laser Printer -# sound = sound I/O -# vol = removable volume support device -# venip = virtual Ethernet/IP network interface -# zs = Serial device -# -# MULTIPROCESSOR SUPPORT (select exactly one) -# multi = support 4 processors -# uni = supports single processor -# -# SPECIAL CHARACTERISTICS (select any combination) -# gdb = GNU kernel debugger -# posix_kern = POSIX support -# -# CPU TYPE (select exactly one) -# NeXT = FIXME -# -###################################################################### -# -# Standard Apple Research Configurations: -# -------- ----- -------- --------------- -# BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit config_imageboot config_workqueue psynch zleaks memorystatus vm_pressure_events kperf kpc importance_inheritance dynamic_codesigning config_telemetry config_proc_uuid_policy ] -# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs hfs_compression config_hfs_std config_hfs_alloc_rbtree config_hfs_trim config_imgsrc_access config_triggers config_ext_resolver config_searchfs config_hfs_dirlink config_appledouble ] -# NETWORKING = [ inet inet6 ipv6send compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib ifnet_input_chk config_mbuf_jumbo if_bridge PF ] -# NFS = [ nfsclient nfsserver ] -# VPN = [ ipsec flow_divert ] -# PF = [ pf pflog ] -# PKTSCHED = [ pktsched_cbq pktsched_fairq pktsched_hfsc pktsched_priq ] -# CLASSQ = [ classq_blue classq_red classq_rio ] -# MULTIPATH = [ multipath mptcp ] -# RELEASE = [ BASE NETWORKING NFS VPN FILESYS libdriver ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ] -# -# EMBEDDED_BASE = [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit config_imageboot config_workqueue psynch ] -# EMBEDDED_FILESYS = [ devfs hfs journaling fdesc fifo ] -# EMBEDDED_NET = [ inet compat_oldsock tcpdrop_synfin bpfilter ] -# EMBEDDED = [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ] -# DEVELOPMENT = [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver development mach_assert ] -# -###################################################################### -# -machine "x86_64" # -cpu "x86_64" # - -makeoptions CCONFIGFLAGS = "-g -O3 -fno-omit-frame-pointer" # -makeoptions CCONFIGFLAGS = "-O3" # -makeoptions RELOC = "00100000" # -makeoptions SYMADDR = "00780000" # - -options GDB # GNU kernel debugger # -options DEBUG # general debugging code # -options SHOW_SPACE # print size of structures # -options EVENTMETER # event meter support # -options FP_EMUL # floating point emulation # -options UXPR # user-level XPR package # -config mach_kernel swap generic # -options EVENT # - -options NO_NESTED_PMAP # - -# -# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and -# security/conf MASTER files. -# -options CONFIG_MACF # Mandatory Access Control Framework -options CONFIG_MACF_SOCKET_SUBSET # MAC socket subest (no labels) -#options CONFIG_MACF_SOCKET # MAC socket labels -#options CONFIG_MACF_NET # mbuf -#options CONFIG_MACF_DEBUG -#options CONFIG_MACF_MACH -options CONFIG_AUDIT # Kernel auditing - -# app-profiling i.e. pre-heating - off? -options CONFIG_APP_PROFILE=0 - -# kernel performance tracing -options KPERF # -options KPC # - -# -# code decryption... used on i386 for DSMOS -# must be set in all the bsd/conf and osfmk/conf MASTER files -# -options CONFIG_CODE_DECRYPTION - -# -# Ipl measurement system -# -pseudo-device iplmeas # - -# -# NFS measurement system -# -pseudo-device nfsmeas # - -# -# Removable Volume support -# -pseudo-device vol # diff --git a/bsd/conf/Makefile b/bsd/conf/Makefile index 25a42ef5e..76db9a7d8 100644 --- a/bsd/conf/Makefile +++ b/bsd/conf/Makefile @@ -6,20 +6,24 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) - -$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) - $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ - cd $(addsuffix /conf, $(TARGET)); \ - rm -f $(notdir $?); \ - cp $? .; \ - if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); +# Special handling for x86_64h which shares a MASTER config file with x86_64: +ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64h) +DOCONF_ARCH_CONFIG_LC = x86_64 +else +DOCONF_ARCH_CONFIG_LC = $(CURRENT_ARCH_CONFIG_LC) +endif + +MASTERCONFDIR = $(SRCROOT)/config +DOCONFDEPS = $(addprefix $(MASTERCONFDIR)/, MASTER MASTER.$(DOCONF_ARCH_CONFIG_LC)) \ + $(addprefix $(SOURCE)/, Makefile.template Makefile.$(DOCONF_ARCH_CONFIG_LC) files files.$(DOCONF_ARCH_CONFIG_LC)) + +ifneq (,$(wildcard $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC))) +DOCONFDEPS += $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) +endif + +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile: $(SRCROOT)/SETUP/config/doconf $(OBJROOT)/SETUP/config $(DOCONFDEPS) + $(_v)$(MKDIR) $(TARGET)/$(CURRENT_KERNEL_CONFIG) + $(_v)$(SRCROOT)/SETUP/config/doconf -c -cpu $(DOCONF_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) -s $(SOURCE) -m $(MASTERCONFDIR) $(CURRENT_KERNEL_CONFIG); do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile $(_v)${MAKE} \ diff --git a/bsd/conf/Makefile.template b/bsd/conf/Makefile.template index 5c10a5657..ece1262df 100644 --- a/bsd/conf/Makefile.template +++ b/bsd/conf/Makefile.template @@ -65,6 +65,8 @@ COMP_SUBDIRS = %CFILES +%CXXFILES + %SFILES %MACHDEP @@ -95,7 +97,6 @@ OBJS_NO_SIGN_COMPARE = \ ip_fw2_compat.o \ ip_icmp.o \ ip_input.o \ - ip_mroute.o \ ip_output.o \ raw_ip.o \ tcp_input.o \ @@ -122,7 +123,6 @@ OBJS_NO_SIGN_COMPARE = \ ip6_forward.o \ in6_ifattach.o \ ip6_input.o \ - ip6_mroute.o \ ip6_output.o \ ipcomp_input.o \ ipcomp_output.o \ @@ -166,7 +166,6 @@ OBJS_NO_CAST_ALIGN = \ fasttrap_isa.o \ fbt_arm.o \ fbt_x86.o \ - fips_sha1.o \ hfs_attrlist.o \ hfs_btreeio.o \ hfs_catalog.o \ @@ -208,9 +207,7 @@ OBJS_NO_CAST_ALIGN = \ proc_info.o \ pthread_synch.o \ qsort.o \ - randomdev.o \ sdt.o \ - sha1mod.o \ shadow.o \ spec_vnops.o \ subr_log.o \ @@ -222,6 +219,7 @@ OBJS_NO_CAST_ALIGN = \ tcp_lro.o \ tty.o \ tty_compat.o \ + tty_dev.o \ tty_ptmx.o \ tty_pty.o \ ubc_subr.o \ @@ -230,7 +228,6 @@ OBJS_NO_CAST_ALIGN = \ vfs_fsevents.o \ vfs_journal.o \ vfs_lookup.o \ - vfs_subr.o \ vfs_syscalls.o \ vfs_utfconv.o \ vfs_vnops.o \ @@ -250,18 +247,23 @@ $(foreach file,$(OBJS_NO_CAST_ALIGN),$(eval $(call add_perfile_cflags,$(file),-W subr_prof.o_CFLAGS_RM = -pg -# -# OBJSDEPS is the set of files (defined in the machine dependent -# template if necessary) which all objects depend on (such as an -# in-line assembler expansion filter) -# -${OBJS}: ${OBJSDEPS} +# Rebuild if per-file overrides change +${OBJS}: $(firstword $(MAKEFILE_LIST)) -LDOBJS = $(OBJS) +# Rebuild if global compile flags change +$(COBJS): .CFLAGS +.CFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KCC) $(CFLAGS) $(INCFLAGS) +$(CXXOBJS): .CXXFLAGS +.CXXFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KC++) $(CXXFLAGS) $(INCFLAGS) +$(SOBJS): .SFLAGS +.SFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(S_KCC) $(SFLAGS) $(INCFLAGS) -$(COMPONENT).filelist: $(LDOBJS) +$(COMPONENT).filelist: $(OBJS) @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${LDOBJS}; do \ + $(_v)( for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist diff --git a/bsd/conf/compat_hdrs b/bsd/conf/compat_hdrs deleted file mode 100644 index 017a7848d..000000000 --- a/bsd/conf/compat_hdrs +++ /dev/null @@ -1,82 +0,0 @@ -DELETED sys linedesc.h -DELETED sys mach_extra.h -kernserv kern kalloc.h -kernserv kern lock.h -kernserv kern queue.h -kernserv kern sched_prim.h -kernserv sys printf.h -kernserv next loadable_fs.h -mach kern exc.defs -mach kern exc.h -mach kern mach.defs -mach kern mach_exc.defs -mach kern mach_exc.h -mach kern mach_host.defs -mach kern mach_host.h -mach kern mach_interface.h -mach kern mach_param.h -mach kern mach_traps.h -mach kern mach_types.defs -mach kern mach_types.h -mach kern std_types.defs -mach kern std_types.h -mach kern syscall_sw.h -mach sys boolean.h -mach sys exception.h -mach sys features.h -mach sys host_info.h -mach sys kern_return.h -mach sys machine.h -mach sys message.h -mach sys mig_errors.h -mach sys msg_type.h -mach sys notify.h -mach sys policy.h -mach sys port.h -mach sys processor_info.h -mach sys task_info.h -mach sys task_special_ports.h -mach sys thread_info.h -mach sys thread_special_ports.h -mach sys thread_status.h COMPATMACHINE -mach sys thread_switch.h -mach sys time_stamp.h DELETED -mach sys time_value.h -mach vm memory_object.h -mach vm vm_inherit.h -mach vm vm_param.h COMPATMACHINE -mach vm vm_prot.h -mach vm vm_statistics.h -mach_debug kern ipc_statistics.h DELETED -mach_debug kern mach_debug.defs DELETED -mach_debug kern mach_debug.h DELETED -mach_debug kern mach_debug_types.defs DELETED -mach_debug kern mach_debug_types.h DELETED -mach_debug kern zone_info.h DELETED -bsd/dev nextdev ldd.h -bsd/dev nextdev npio.h -bsd/dev nextdev scsireg.h -bsd/dev nextdev fd_extern.h -bsd/dev nextdev disk.h -bsd/dev nextdev zsreg.h zsio.h -bsd/machine next cpu.h -bsd/machine next machparam.h -bsd/machine next param.h -bsd/machine next psl.h -bsd/machine next reg.h -bsd/machine next signal.h -bsd/machine next spl.h -bsd/machine next table.h -bsd/machine next user.h -bsd/machine next vmparam.h -bsd/sys kern mach_swapon.h -kernserv next printf.h -kernserv next us_timer.h -mach/machine next boolean.h -mach/machine next exception.h -mach/machine next kern_return.h -mach/machine next syscall_sw.h -mach/machine next thread_status.h COMPATMACHINE -mach/machine next time_stamp.h DELETED -mach/machine next vm_param.h COMPATMACHINE -mach/machine next vm_types.h diff --git a/bsd/conf/files b/bsd/conf/files index 8fa8a831a..54b4ef14d 100644 --- a/bsd/conf/files +++ b/bsd/conf/files @@ -5,13 +5,8 @@ # the terms and conditions for use and redistribution. # -# -# N.B. "kern/lock.c" is listed as "optional cpus" so that config will -# create an "cpus.h" file. -# OPTIONS/hw_ast optional hw_ast OPTIONS/hw_footprint optional hw_footprint -OPTIONS/kernserv optional kernserv OPTIONS/config_macf optional config_macf OPTIONS/config_macf_socket_subset optional config_macf_socket_subset OPTIONS/config_macf_socket optional config_macf_socket @@ -21,37 +16,23 @@ OPTIONS/mach_compat optional mach_compat OPTIONS/mach_counters optional mach_counters OPTIONS/mach_debug optional mach_debug OPTIONS/mach_fastlink optional mach_fastlink -OPTIONS/mach_fixpri optional mach_fixpri OPTIONS/mach_host optional mach_host -OPTIONS/mach_ipc_compat optional mach_ipc_compat OPTIONS/mach_ipc_debug optional mach_ipc_debug -OPTIONS/mach_ipc_test optional mach_ipc_test OPTIONS/mach_ldebug optional mach_ldebug OPTIONS/mach_load optional mach_load -OPTIONS/mach_machine_routines optional mach_machine_routines -OPTIONS/rev_endian_fs optional rev_endian_fs OPTIONS/mach_net optional mach_net OPTIONS/mach_np optional mach_np -OPTIONS/mach_old_vm_copy optional mach_old_vm_copy OPTIONS/mach_pagemap optional mach_pagemap OPTIONS/mach_sctimes optional mach_sctimes OPTIONS/mach_vm_debug optional mach_vm_debug OPTIONS/mach_xp optional mach_xp OPTIONS/mach_xp_fpd optional mach_xp_fpd OPTIONS/quota optional quota -OPTIONS/simple_clock optional simple_clock OPTIONS/xpr_debug optional xpr_debug OPTIONS/kdebug optional kdebug OPTIONS/nfsclient optional nfsclient OPTIONS/nfsserver optional nfsserver -OPTIONS/driverkit optional driverkit -OPTIONS/mallocdebug optional mallocdebug -OPTIONS/kernobjc optional kernobjc OPTIONS/kernremote optional kernremote -OPTIONS/uxpr optional uxpr -OPTIONS/kernel_stack optional kernel_stack -OPTIONS/new_vm_code optional new_vm_code -OPTIONS/old_vm_code optional old_vm_code OPTIONS/compat_43 optional compat_43 OPTIONS/diagnostic optional diagnostic OPTIONS/config_dtrace optional config_dtrace @@ -64,10 +45,10 @@ OPTIONS/development optional development OPTIONS/sysv_sem optional sysv_sem OPTIONS/sysv_msg optional sysv_msg OPTIONS/sysv_shm optional sysv_shm -OPTIONS/no_bsd_inlines optional no_bsd_inlines OPTIONS/importance_inheritance optional importance_inheritance OPTIONS/importance_debug optional importance_debug OPTIONS/in_kernel_tests optional config_in_kernel_tests +OPTIONS/config_ecc_logging optional config_ecc_logging # # Network options @@ -77,7 +58,6 @@ OPTIONS/inet optional inet OPTIONS/inet6 optional inet6 OPTIONS/ipv6send optional ipv6send OPTIONS/ether optional ether -OPTIONS/mrouting optional mrouting OPTIONS/vlan optional vlan OPTIONS/bond optional bond OPTIONS/bpfilter optional bpfilter @@ -108,17 +88,11 @@ OPTIONS/zlib optional zlib # # Filesystem options # -OPTIONS/ffs optional ffs OPTIONS/hfs optional hfs -OPTIONS/mfs optional mfs OPTIONS/fdesc optional fdesc OPTIONS/fifo optional fifo OPTIONS/devfs optional devfs OPTIONS/crypto optional crypto -OPTIONS/allcrypto optional allcrypto -OPTIONS/journaling optional journaling -OPTIONS/crypto optional crypto -OPTIONS/allcrypto optional allcrypto OPTIONS/journaling optional journaling OPTIONS/hfs_compression optional hfs_compression @@ -163,16 +137,11 @@ bsd/dev/dtrace/systrace.c optional config_dtrace bsd/dev/dtrace/profile_prvd.c optional config_dtrace bsd/dev/dtrace/fasttrap.c optional config_dtrace -bsd/dev/random/randomdev.c standard -bsd/dev/random/fips_sha1.c standard -bsd/dev/random/YarrowCoreLib/port/smf.c standard -bsd/dev/random/YarrowCoreLib/src/comp.c standard -bsd/dev/random/YarrowCoreLib/src/prng.c standard -bsd/dev/random/YarrowCoreLib/src/sha1mod.c standard -bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c standard +bsd/dev/random/randomdev.c standard bsd/dev/memdev.c standard -bsd/dev/munge.c standard +bsd/dev/mem.c standard +bsd/dev/munge.c optional config_requires_u32_munging bsd/dev/unix_startup.c standard bsd/dev/vn/vn.c optional vndevice @@ -257,6 +226,7 @@ bsd/net/if_utun_crypto.c optional networking bsd/net/if_utun_crypto_dtls.c optional networking bsd/net/if_utun_crypto_ipsec.c optional networking bsd/net/if_ipsec.c optional ipsec +bsd/net/necp.c optional necp bsd/net/if_pflog.c optional pflog bsd/net/pf.c optional pf bsd/net/pf_if.c optional pf @@ -270,6 +240,8 @@ bsd/net/pktap.c optional networking bsd/net/if_llreach.c optional networking bsd/net/flowhash.c optional networking bsd/net/flowadv.c optional networking +bsd/net/content_filter.c optional content_filter +bsd/net/packet_mangler.c optional packet_mangler bsd/net/classq/classq.c optional networking bsd/net/classq/classq_blue.c optional classq_blue @@ -313,7 +285,6 @@ bsd/netinet/ip_fw2_compat.c optional ipfw2 bsd/netinet/ip_icmp.c optional inet bsd/netinet/ip_id.c optional inet bsd/netinet/ip_input.c optional inet -bsd/netinet/ip_mroute.c optional mrouting bsd/netinet/ip_output.c optional inet bsd/netinet/raw_ip.c optional inet bsd/netinet/tcp_debug.c optional tcpdebug @@ -323,8 +294,11 @@ bsd/netinet/tcp_sack.c optional inet bsd/netinet/tcp_subr.c optional inet bsd/netinet/tcp_timer.c optional inet bsd/netinet/tcp_usrreq.c optional inet +bsd/netinet/tcp_cc.c optional inet bsd/netinet/tcp_newreno.c optional inet -bsd/netinet/tcp_lro.c optional inet +bsd/netinet/tcp_cubic.c optional inet +bsd/netinet/cbrtf.c optional inet +bsd/netinet/tcp_lro.c optional inet bsd/netinet/tcp_ledbat.c optional inet bsd/netinet/udp_usrreq.c optional inet bsd/netinet/in_gif.c optional gif inet @@ -358,7 +332,6 @@ bsd/netinet6/ip6_fw.c optional inet6 ipfw2 bsd/netinet6/ip6_forward.c optional inet6 bsd/netinet6/in6_ifattach.c optional inet6 bsd/netinet6/ip6_input.c optional inet6 -bsd/netinet6/ip6_mroute.c optional mrouting inet6 bsd/netinet6/ip6_output.c optional inet6 bsd/netinet6/in6_src.c optional inet6 bsd/netinet6/ipcomp_core.c optional ipsec @@ -386,9 +359,6 @@ bsd/netkey/key_debug.c optional ipsec bsd/netkey/keysock.c optional ipsec bsd/netkey/keydb.c optional ipsec -bsd/crypto/blowfish/bf_enc.c optional crypto allcrypto -bsd/crypto/blowfish/bf_skey.c optional crypto allcrypto -bsd/crypto/cast128/cast128.c optional crypto allcrypto bsd/crypto/rc4/rc4.c optional crypto #bsd/netpm/pm_aTT.c optional pm @@ -417,6 +387,7 @@ bsd/hfs/hfs_lookup.c optional hfs bsd/hfs/hfs_notification.c optional hfs bsd/hfs/hfs_quota.c optional quota bsd/hfs/hfs_readwrite.c optional hfs +bsd/hfs/hfs_resize.c optional hfs bsd/hfs/hfs_search.c optional hfs bsd/hfs/hfs_vfsops.c optional hfs bsd/hfs/hfs_vfsutils.c optional hfs @@ -466,6 +437,7 @@ bsd/kern/kern_clock.c standard bsd/kern/kern_core.c standard bsd/kern/kern_credential.c standard bsd/kern/kern_cs.c standard +bsd/kern/kern_csr.c optional config_csr bsd/kern/kern_symfile.c standard bsd/kern/kern_descrip.c standard bsd/kern/kern_guarded.c standard @@ -492,6 +464,7 @@ bsd/kern/kern_newsysctl.c standard bsd/kern/kern_memorystatus.c optional config_memorystatus bsd/kern/kern_mib.c standard bsd/kern/kpi_mbuf.c optional sockets +bsd/kern/kern_sfi.c standard bsd/kern/kern_time.c standard bsd/kern/kern_xxx.c standard bsd/kern/mach_process.c standard @@ -506,12 +479,14 @@ bsd/kern/sys_generic.c standard bsd/kern/sys_pipe.c standard bsd/kern/sys_socket.c optional sockets bsd/kern/sys_domain.c optional sockets +bsd/kern/sys_coalition.c optional config_coalitions ./syscalls.c standard bsd/kern/tty.c standard bsd/kern/tty_compat.c standard bsd/kern/tty_conf.c standard -bsd/kern/tty_pty.c optional pty +bsd/kern/tty_dev.c optional ptmx pty bsd/kern/tty_ptmx.c optional ptmx +bsd/kern/tty_pty.c optional pty bsd/kern/tty_subr.c standard bsd/kern/tty_tty.c standard bsd/kern/ubc_subr.c standard @@ -547,6 +522,8 @@ bsd/vm/vm_unix.c standard bsd/vm/dp_backing_file.c standard bsd/vm/vm_compressor_backing_file.c standard +bsd/kern/kern_ecc.c optional config_ecc_logging + bsd/uxkern/ux_exception.c standard bsd/conf/param.c standard @@ -563,4 +540,3 @@ bsd/kern/kern_kpc.c optional kpc bsd/kern/kern_tests.c optional config_in_kernel_tests bsd/kern/proc_uuid_policy.c optional config_proc_uuid_policy - diff --git a/bsd/conf/files.x86_64 b/bsd/conf/files.x86_64 index d9bbd2736..2fba68035 100644 --- a/bsd/conf/files.x86_64 +++ b/bsd/conf/files.x86_64 @@ -1,14 +1,7 @@ -OPTIONS/show_space optional show_space -OPTIONS/gdb optional gdb -OPTIONS/iplmeas optional iplmeas - - bsd/dev/i386/conf.c standard bsd/dev/i386/cons.c standard -bsd/dev/i386/mem.c standard bsd/dev/i386/km.c standard bsd/dev/i386/kern_machdep.c standard -bsd/dev/i386/memmove.c standard bsd/dev/i386/stubs.c standard bsd/dev/i386/systemcalls.c standard bsd/dev/i386/sysctl.c standard @@ -30,4 +23,3 @@ bsd/kern/policy_check.c optional config_macf bsd/kern/bsd_stubs.c standard bsd/netinet/cpu_in_cksum.c standard bsd/netinet/in_cksum.c optional inet - diff --git a/bsd/conf/machine.awk b/bsd/conf/machine.awk deleted file mode 100644 index 3b487628d..000000000 --- a/bsd/conf/machine.awk +++ /dev/null @@ -1,18 +0,0 @@ -BEGIN { - hdr = "#if\tm68k\n" \ - "#import \n" \ - "#endif\tm68k\n" \ - "#if\tm88k\n" \ - "#import \n" \ - "#endif\tm88k\n" - hdr = "#import \n" -} -/\.h$/ { - ofile = sprintf("%s/%s", loc, $1); - printf(hdr, dir, $1, dir, $1) > ofile; - continue; -} - -{ - dir = $1; loc = $2; -} diff --git a/bsd/conf/param.c b/bsd/conf/param.c index 351fc8cc0..49d0f006b 100644 --- a/bsd/conf/param.c +++ b/bsd/conf/param.c @@ -65,7 +65,6 @@ * @(#)param.c 8.3 (Berkeley) 8/20/94 */ -#include #include #include #include @@ -81,12 +80,13 @@ #include #include -struct timezone tz = { TIMEZONE, PST }; +struct timezone tz = { 0, 0 }; -#define NPROC (20 + 16 * MAXUSERS) +#define NPROC (20 + 16 * 32) +#define NPROC_PER_UID (NPROC/2) #define HNPROC 2500 /* based on thread_max */ int maxproc = NPROC; -int maxprocperuid = NPROC/2; +int maxprocperuid = NPROC_PER_UID; /*__private_extern__*/ int hard_maxproc = HNPROC; /* hardcoded limit */ int nprocs = 0; /* XXX */ @@ -101,9 +101,6 @@ unsigned int ncallout = 16 + 2*NPROC; unsigned int nmbclusters = NMBCLUSTERS; int nport = NPROC / 2; -#define MAXSOCKETS NMBCLUSTERS -int maxsockets = MAXSOCKETS; - /* * async IO (aio) configurable limits */ diff --git a/bsd/crypto/Makefile b/bsd/crypto/Makefile index 01b1da8b2..8a0a0f3bb 100644 --- a/bsd/crypto/Makefile +++ b/bsd/crypto/Makefile @@ -8,8 +8,6 @@ include $(MakeInc_cmd) include $(MakeInc_def) INSTINC_SUBDIRS = \ - blowfish \ - cast128 \ rc4 \ PRIVATE_DATAFILES = \ diff --git a/bsd/crypto/blowfish/Makefile b/bsd/crypto/blowfish/Makefile deleted file mode 100644 index 6a7a74803..000000000 --- a/bsd/crypto/blowfish/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -PRIVATE_DATAFILES = \ - blowfish.h - -INSTALL_MI_DIR = crypto - -EXPORT_MI_DIR = ${INSTALL_MI_DIR} - -INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} - -include $(MakeInc_rule) -include $(MakeInc_dir) diff --git a/bsd/crypto/blowfish/bf_enc.c b/bsd/crypto/blowfish/bf_enc.c deleted file mode 100644 index f17980d9b..000000000 --- a/bsd/crypto/blowfish/bf_enc.c +++ /dev/null @@ -1,154 +0,0 @@ -/* $FreeBSD: src/sys/crypto/blowfish/bf_enc.c,v 1.1.2.3 2002/03/26 10:12:23 ume Exp $ */ -/* $KAME: bf_enc.c,v 1.7 2002/02/27 01:33:59 itojun Exp $ */ - -/* crypto/bf/bf_enc.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -/* Blowfish as implemented from 'Blowfish: Springer-Verlag paper' - * (From LECTURE NOTES IN COIMPUTER SCIENCE 809, FAST SOFTWARE ENCRYPTION, - * CAMBRIDGE SECURITY WORKSHOP, CAMBRIDGE, U.K., DECEMBER 9-11, 1993) - */ - -#if (BF_ROUNDS != 16) && (BF_ROUNDS != 20) -If you set BF_ROUNDS to some value other than 16 or 20, you will have -to modify the code. -#endif - -/* XXX "data" is host endian */ -void -BF_encrypt(BF_LONG *data, BF_KEY *key) -{ - BF_LONG l, r, *p, *s; - - p = key->P; - s= &key->S[0]; - l = data[0]; - r = data[1]; - - l^=p[0]; - BF_ENC(r, l, s, p[ 1]); - BF_ENC(l, r, s, p[ 2]); - BF_ENC(r, l, s, p[ 3]); - BF_ENC(l, r, s, p[ 4]); - BF_ENC(r, l, s, p[ 5]); - BF_ENC(l, r, s, p[ 6]); - BF_ENC(r, l, s, p[ 7]); - BF_ENC(l, r, s, p[ 8]); - BF_ENC(r, l, s, p[ 9]); - BF_ENC(l, r, s, p[10]); - BF_ENC(r, l, s, p[11]); - BF_ENC(l, r, s, p[12]); - BF_ENC(r, l, s, p[13]); - BF_ENC(l, r, s, p[14]); - BF_ENC(r, l, s, p[15]); - BF_ENC(l, r, s, p[16]); -#if BF_ROUNDS == 20 - BF_ENC(r, l, s, p[17]); - BF_ENC(l, r, s, p[18]); - BF_ENC(r, l, s, p[19]); - BF_ENC(l, r, s, p[20]); -#endif - r ^= p[BF_ROUNDS + 1]; - - data[1] = l & 0xffffffff; - data[0] = r & 0xffffffff; -} - -/* XXX "data" is host endian */ -void -BF_decrypt(BF_LONG *data, BF_KEY *key) -{ - BF_LONG l, r, *p, *s; - - p = key->P; - s= &key->S[0]; - l = data[0]; - r = data[1]; - - l ^= p[BF_ROUNDS + 1]; -#if BF_ROUNDS == 20 - BF_ENC(r, l, s, p[20]); - BF_ENC(l, r, s, p[19]); - BF_ENC(r, l, s, p[18]); - BF_ENC(l, r, s, p[17]); -#endif - BF_ENC(r, l, s, p[16]); - BF_ENC(l, r, s, p[15]); - BF_ENC(r, l, s, p[14]); - BF_ENC(l, r, s, p[13]); - BF_ENC(r, l, s, p[12]); - BF_ENC(l, r, s, p[11]); - BF_ENC(r, l, s, p[10]); - BF_ENC(l, r, s, p[ 9]); - BF_ENC(r, l, s, p[ 8]); - BF_ENC(l, r, s, p[ 7]); - BF_ENC(r, l, s, p[ 6]); - BF_ENC(l, r, s, p[ 5]); - BF_ENC(r, l, s, p[ 4]); - BF_ENC(l, r, s, p[ 3]); - BF_ENC(r, l, s, p[ 2]); - BF_ENC(l, r, s, p[ 1]); - r ^= p[0]; - - data[1] = l & 0xffffffff; - data[0] = r & 0xffffffff; -} diff --git a/bsd/crypto/blowfish/bf_locl.h b/bsd/crypto/blowfish/bf_locl.h deleted file mode 100644 index c3fa929e0..000000000 --- a/bsd/crypto/blowfish/bf_locl.h +++ /dev/null @@ -1,226 +0,0 @@ -/* $FreeBSD: src/sys/crypto/blowfish/bf_locl.h,v 1.1.2.2 2001/07/03 11:01:28 ume Exp $ */ -/* $KAME: bf_locl.h,v 1.5 2000/08/31 06:03:48 itojun Exp $ */ - -/* crypto/bf/bf_local.h */ -/* Copyright (C) 1995-1997 Eric Young (eay@mincom.oz.au) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@mincom.oz.au). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@mincom.oz.au). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@mincom.oz.au)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@mincom.oz.au)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING - * - * Always modify bf_locl.org since bf_locl.h is automatically generated from - * it during SSLeay configuration. - * - * WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING - */ - -#undef c2l -#define c2l(c,l) (l =((BF_LONG)(*((c)++))) , \ - l|=((BF_LONG)(*((c)++)))<< 8L, \ - l|=((BF_LONG)(*((c)++)))<<16L, \ - l|=((BF_LONG)(*((c)++)))<<24L) - -/* NOTE - c is not incremented as per c2l */ -#undef c2ln -#define c2ln(c,l1,l2,n) { \ - c+=n; \ - l1=l2=0; \ - switch (n) { \ - case 8: l2 =((BF_LONG)(*(--(c))))<<24L; \ - case 7: l2|=((BF_LONG)(*(--(c))))<<16L; \ - case 6: l2|=((BF_LONG)(*(--(c))))<< 8L; \ - case 5: l2|=((BF_LONG)(*(--(c)))); \ - case 4: l1 =((BF_LONG)(*(--(c))))<<24L; \ - case 3: l1|=((BF_LONG)(*(--(c))))<<16L; \ - case 2: l1|=((BF_LONG)(*(--(c))))<< 8L; \ - case 1: l1|=((BF_LONG)(*(--(c)))); \ - } \ - } - -#undef l2c -#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ - *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \ - *((c)++)=(unsigned char)(((l)>>16L)&0xff), \ - *((c)++)=(unsigned char)(((l)>>24L)&0xff)) - -/* NOTE - c is not incremented as per l2c */ -#undef l2cn -#define l2cn(l1,l2,c,n) { \ - c+=n; \ - switch (n) { \ - case 8: *(--(c))=(unsigned char)(((l2)>>24L)&0xff); \ - case 7: *(--(c))=(unsigned char)(((l2)>>16L)&0xff); \ - case 6: *(--(c))=(unsigned char)(((l2)>> 8L)&0xff); \ - case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \ - case 4: *(--(c))=(unsigned char)(((l1)>>24L)&0xff); \ - case 3: *(--(c))=(unsigned char)(((l1)>>16L)&0xff); \ - case 2: *(--(c))=(unsigned char)(((l1)>> 8L)&0xff); \ - case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \ - } \ - } - -/* NOTE - c is not incremented as per n2l */ -#define n2ln(c,l1,l2,n) { \ - c+=n; \ - l1=l2=0; \ - switch (n) { \ - case 8: l2 =((BF_LONG)(*(--(c)))) ; \ - case 7: l2|=((BF_LONG)(*(--(c))))<< 8; \ - case 6: l2|=((BF_LONG)(*(--(c))))<<16; \ - case 5: l2|=((BF_LONG)(*(--(c))))<<24; \ - case 4: l1 =((BF_LONG)(*(--(c)))) ; \ - case 3: l1|=((BF_LONG)(*(--(c))))<< 8; \ - case 2: l1|=((BF_LONG)(*(--(c))))<<16; \ - case 1: l1|=((BF_LONG)(*(--(c))))<<24; \ - } \ - } - -/* NOTE - c is not incremented as per l2n */ -#define l2nn(l1,l2,c,n) { \ - c+=n; \ - switch (n) { \ - case 8: *(--(c))=(unsigned char)(((l2) )&0xff); \ - case 7: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \ - case 6: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \ - case 5: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \ - case 4: *(--(c))=(unsigned char)(((l1) )&0xff); \ - case 3: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \ - case 2: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \ - case 1: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \ - } \ - } - -#undef n2l -#define n2l(c,l) (l =((BF_LONG)(*((c)++)))<<24L, \ - l|=((BF_LONG)(*((c)++)))<<16L, \ - l|=((BF_LONG)(*((c)++)))<< 8L, \ - l|=((BF_LONG)(*((c)++)))) - -#undef l2n -#define l2n(l,c) (*((c)++)=(unsigned char)(((l)>>24L)&0xff), \ - *((c)++)=(unsigned char)(((l)>>16L)&0xff), \ - *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \ - *((c)++)=(unsigned char)(((l) )&0xff)) - -/* This is actually a big endian algorithm, the most significate byte - * is used to lookup array 0 */ - -/* use BF_PTR2 for intel boxes, - * BF_PTR for sparc and MIPS/SGI - * use nothing for Alpha and HP. - */ -#undef BF_PTR -#undef BF_PTR2 -#ifdef __NetBSD__ -#ifdef __i386__ -#define BF_PTR2 -#else -#ifdef __mips__ -#define BF_PTR -#endif -#endif -#endif /*NetBSD*/ - -#define BF_M 0x3fc -#define BF_0 22L -#define BF_1 14L -#define BF_2 6L -#define BF_3 2L /* left shift */ - -#if defined(BF_PTR2) - -/* This is basically a special pentium verson */ -#define BF_ENC(LL,R,S,P) \ - { \ - BF_LONG t,u,v; \ - u=R>>BF_0; \ - v=R>>BF_1; \ - u&=BF_M; \ - v&=BF_M; \ - t= *(BF_LONG *)((unsigned char *)&(S[ 0])+u); \ - u=R>>BF_2; \ - t+= *(BF_LONG *)((unsigned char *)&(S[256])+v); \ - v=R<>BF_0)&BF_M))+ \ - *(BF_LONG *)((unsigned char *)&(S[256])+((R>>BF_1)&BF_M)))^ \ - *(BF_LONG *)((unsigned char *)&(S[512])+((R>>BF_2)&BF_M)))+ \ - *(BF_LONG *)((unsigned char *)&(S[768])+((R<>24L) ] + \ - S[0x0100+((R>>16L)&0xff)])^ \ - S[0x0200+((R>> 8L)&0xff)])+ \ - S[0x0300+((R )&0xff)])&0xffffffff; -#endif diff --git a/bsd/crypto/blowfish/bf_pi.h b/bsd/crypto/blowfish/bf_pi.h deleted file mode 100644 index d2f80f0b4..000000000 --- a/bsd/crypto/blowfish/bf_pi.h +++ /dev/null @@ -1,328 +0,0 @@ -/* $FreeBSD: src/sys/crypto/blowfish/bf_pi.h,v 1.1.2.1 2000/07/15 07:14:18 kris Exp $ */ -/* $KAME: bf_pi.h,v 1.3 2000/03/27 04:36:26 sumikawa Exp $ */ - -/* crypto/bf/bf_pi.h */ -/* Copyright (C) 1995-1997 Eric Young (eay@mincom.oz.au) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@mincom.oz.au). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@mincom.oz.au). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@mincom.oz.au)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@mincom.oz.au)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -static const BF_KEY bf_init= { - { - 0x243f6a88L, 0x85a308d3L, 0x13198a2eL, 0x03707344L, - 0xa4093822L, 0x299f31d0L, 0x082efa98L, 0xec4e6c89L, - 0x452821e6L, 0x38d01377L, 0xbe5466cfL, 0x34e90c6cL, - 0xc0ac29b7L, 0xc97c50ddL, 0x3f84d5b5L, 0xb5470917L, - 0x9216d5d9L, 0x8979fb1b - },{ - 0xd1310ba6L, 0x98dfb5acL, 0x2ffd72dbL, 0xd01adfb7L, - 0xb8e1afedL, 0x6a267e96L, 0xba7c9045L, 0xf12c7f99L, - 0x24a19947L, 0xb3916cf7L, 0x0801f2e2L, 0x858efc16L, - 0x636920d8L, 0x71574e69L, 0xa458fea3L, 0xf4933d7eL, - 0x0d95748fL, 0x728eb658L, 0x718bcd58L, 0x82154aeeL, - 0x7b54a41dL, 0xc25a59b5L, 0x9c30d539L, 0x2af26013L, - 0xc5d1b023L, 0x286085f0L, 0xca417918L, 0xb8db38efL, - 0x8e79dcb0L, 0x603a180eL, 0x6c9e0e8bL, 0xb01e8a3eL, - 0xd71577c1L, 0xbd314b27L, 0x78af2fdaL, 0x55605c60L, - 0xe65525f3L, 0xaa55ab94L, 0x57489862L, 0x63e81440L, - 0x55ca396aL, 0x2aab10b6L, 0xb4cc5c34L, 0x1141e8ceL, - 0xa15486afL, 0x7c72e993L, 0xb3ee1411L, 0x636fbc2aL, - 0x2ba9c55dL, 0x741831f6L, 0xce5c3e16L, 0x9b87931eL, - 0xafd6ba33L, 0x6c24cf5cL, 0x7a325381L, 0x28958677L, - 0x3b8f4898L, 0x6b4bb9afL, 0xc4bfe81bL, 0x66282193L, - 0x61d809ccL, 0xfb21a991L, 0x487cac60L, 0x5dec8032L, - 0xef845d5dL, 0xe98575b1L, 0xdc262302L, 0xeb651b88L, - 0x23893e81L, 0xd396acc5L, 0x0f6d6ff3L, 0x83f44239L, - 0x2e0b4482L, 0xa4842004L, 0x69c8f04aL, 0x9e1f9b5eL, - 0x21c66842L, 0xf6e96c9aL, 0x670c9c61L, 0xabd388f0L, - 0x6a51a0d2L, 0xd8542f68L, 0x960fa728L, 0xab5133a3L, - 0x6eef0b6cL, 0x137a3be4L, 0xba3bf050L, 0x7efb2a98L, - 0xa1f1651dL, 0x39af0176L, 0x66ca593eL, 0x82430e88L, - 0x8cee8619L, 0x456f9fb4L, 0x7d84a5c3L, 0x3b8b5ebeL, - 0xe06f75d8L, 0x85c12073L, 0x401a449fL, 0x56c16aa6L, - 0x4ed3aa62L, 0x363f7706L, 0x1bfedf72L, 0x429b023dL, - 0x37d0d724L, 0xd00a1248L, 0xdb0fead3L, 0x49f1c09bL, - 0x075372c9L, 0x80991b7bL, 0x25d479d8L, 0xf6e8def7L, - 0xe3fe501aL, 0xb6794c3bL, 0x976ce0bdL, 0x04c006baL, - 0xc1a94fb6L, 0x409f60c4L, 0x5e5c9ec2L, 0x196a2463L, - 0x68fb6fafL, 0x3e6c53b5L, 0x1339b2ebL, 0x3b52ec6fL, - 0x6dfc511fL, 0x9b30952cL, 0xcc814544L, 0xaf5ebd09L, - 0xbee3d004L, 0xde334afdL, 0x660f2807L, 0x192e4bb3L, - 0xc0cba857L, 0x45c8740fL, 0xd20b5f39L, 0xb9d3fbdbL, - 0x5579c0bdL, 0x1a60320aL, 0xd6a100c6L, 0x402c7279L, - 0x679f25feL, 0xfb1fa3ccL, 0x8ea5e9f8L, 0xdb3222f8L, - 0x3c7516dfL, 0xfd616b15L, 0x2f501ec8L, 0xad0552abL, - 0x323db5faL, 0xfd238760L, 0x53317b48L, 0x3e00df82L, - 0x9e5c57bbL, 0xca6f8ca0L, 0x1a87562eL, 0xdf1769dbL, - 0xd542a8f6L, 0x287effc3L, 0xac6732c6L, 0x8c4f5573L, - 0x695b27b0L, 0xbbca58c8L, 0xe1ffa35dL, 0xb8f011a0L, - 0x10fa3d98L, 0xfd2183b8L, 0x4afcb56cL, 0x2dd1d35bL, - 0x9a53e479L, 0xb6f84565L, 0xd28e49bcL, 0x4bfb9790L, - 0xe1ddf2daL, 0xa4cb7e33L, 0x62fb1341L, 0xcee4c6e8L, - 0xef20cadaL, 0x36774c01L, 0xd07e9efeL, 0x2bf11fb4L, - 0x95dbda4dL, 0xae909198L, 0xeaad8e71L, 0x6b93d5a0L, - 0xd08ed1d0L, 0xafc725e0L, 0x8e3c5b2fL, 0x8e7594b7L, - 0x8ff6e2fbL, 0xf2122b64L, 0x8888b812L, 0x900df01cL, - 0x4fad5ea0L, 0x688fc31cL, 0xd1cff191L, 0xb3a8c1adL, - 0x2f2f2218L, 0xbe0e1777L, 0xea752dfeL, 0x8b021fa1L, - 0xe5a0cc0fL, 0xb56f74e8L, 0x18acf3d6L, 0xce89e299L, - 0xb4a84fe0L, 0xfd13e0b7L, 0x7cc43b81L, 0xd2ada8d9L, - 0x165fa266L, 0x80957705L, 0x93cc7314L, 0x211a1477L, - 0xe6ad2065L, 0x77b5fa86L, 0xc75442f5L, 0xfb9d35cfL, - 0xebcdaf0cL, 0x7b3e89a0L, 0xd6411bd3L, 0xae1e7e49L, - 0x00250e2dL, 0x2071b35eL, 0x226800bbL, 0x57b8e0afL, - 0x2464369bL, 0xf009b91eL, 0x5563911dL, 0x59dfa6aaL, - 0x78c14389L, 0xd95a537fL, 0x207d5ba2L, 0x02e5b9c5L, - 0x83260376L, 0x6295cfa9L, 0x11c81968L, 0x4e734a41L, - 0xb3472dcaL, 0x7b14a94aL, 0x1b510052L, 0x9a532915L, - 0xd60f573fL, 0xbc9bc6e4L, 0x2b60a476L, 0x81e67400L, - 0x08ba6fb5L, 0x571be91fL, 0xf296ec6bL, 0x2a0dd915L, - 0xb6636521L, 0xe7b9f9b6L, 0xff34052eL, 0xc5855664L, - 0x53b02d5dL, 0xa99f8fa1L, 0x08ba4799L, 0x6e85076aL, - 0x4b7a70e9L, 0xb5b32944L, 0xdb75092eL, 0xc4192623L, - 0xad6ea6b0L, 0x49a7df7dL, 0x9cee60b8L, 0x8fedb266L, - 0xecaa8c71L, 0x699a17ffL, 0x5664526cL, 0xc2b19ee1L, - 0x193602a5L, 0x75094c29L, 0xa0591340L, 0xe4183a3eL, - 0x3f54989aL, 0x5b429d65L, 0x6b8fe4d6L, 0x99f73fd6L, - 0xa1d29c07L, 0xefe830f5L, 0x4d2d38e6L, 0xf0255dc1L, - 0x4cdd2086L, 0x8470eb26L, 0x6382e9c6L, 0x021ecc5eL, - 0x09686b3fL, 0x3ebaefc9L, 0x3c971814L, 0x6b6a70a1L, - 0x687f3584L, 0x52a0e286L, 0xb79c5305L, 0xaa500737L, - 0x3e07841cL, 0x7fdeae5cL, 0x8e7d44ecL, 0x5716f2b8L, - 0xb03ada37L, 0xf0500c0dL, 0xf01c1f04L, 0x0200b3ffL, - 0xae0cf51aL, 0x3cb574b2L, 0x25837a58L, 0xdc0921bdL, - 0xd19113f9L, 0x7ca92ff6L, 0x94324773L, 0x22f54701L, - 0x3ae5e581L, 0x37c2dadcL, 0xc8b57634L, 0x9af3dda7L, - 0xa9446146L, 0x0fd0030eL, 0xecc8c73eL, 0xa4751e41L, - 0xe238cd99L, 0x3bea0e2fL, 0x3280bba1L, 0x183eb331L, - 0x4e548b38L, 0x4f6db908L, 0x6f420d03L, 0xf60a04bfL, - 0x2cb81290L, 0x24977c79L, 0x5679b072L, 0xbcaf89afL, - 0xde9a771fL, 0xd9930810L, 0xb38bae12L, 0xdccf3f2eL, - 0x5512721fL, 0x2e6b7124L, 0x501adde6L, 0x9f84cd87L, - 0x7a584718L, 0x7408da17L, 0xbc9f9abcL, 0xe94b7d8cL, - 0xec7aec3aL, 0xdb851dfaL, 0x63094366L, 0xc464c3d2L, - 0xef1c1847L, 0x3215d908L, 0xdd433b37L, 0x24c2ba16L, - 0x12a14d43L, 0x2a65c451L, 0x50940002L, 0x133ae4ddL, - 0x71dff89eL, 0x10314e55L, 0x81ac77d6L, 0x5f11199bL, - 0x043556f1L, 0xd7a3c76bL, 0x3c11183bL, 0x5924a509L, - 0xf28fe6edL, 0x97f1fbfaL, 0x9ebabf2cL, 0x1e153c6eL, - 0x86e34570L, 0xeae96fb1L, 0x860e5e0aL, 0x5a3e2ab3L, - 0x771fe71cL, 0x4e3d06faL, 0x2965dcb9L, 0x99e71d0fL, - 0x803e89d6L, 0x5266c825L, 0x2e4cc978L, 0x9c10b36aL, - 0xc6150ebaL, 0x94e2ea78L, 0xa5fc3c53L, 0x1e0a2df4L, - 0xf2f74ea7L, 0x361d2b3dL, 0x1939260fL, 0x19c27960L, - 0x5223a708L, 0xf71312b6L, 0xebadfe6eL, 0xeac31f66L, - 0xe3bc4595L, 0xa67bc883L, 0xb17f37d1L, 0x018cff28L, - 0xc332ddefL, 0xbe6c5aa5L, 0x65582185L, 0x68ab9802L, - 0xeecea50fL, 0xdb2f953bL, 0x2aef7dadL, 0x5b6e2f84L, - 0x1521b628L, 0x29076170L, 0xecdd4775L, 0x619f1510L, - 0x13cca830L, 0xeb61bd96L, 0x0334fe1eL, 0xaa0363cfL, - 0xb5735c90L, 0x4c70a239L, 0xd59e9e0bL, 0xcbaade14L, - 0xeecc86bcL, 0x60622ca7L, 0x9cab5cabL, 0xb2f3846eL, - 0x648b1eafL, 0x19bdf0caL, 0xa02369b9L, 0x655abb50L, - 0x40685a32L, 0x3c2ab4b3L, 0x319ee9d5L, 0xc021b8f7L, - 0x9b540b19L, 0x875fa099L, 0x95f7997eL, 0x623d7da8L, - 0xf837889aL, 0x97e32d77L, 0x11ed935fL, 0x16681281L, - 0x0e358829L, 0xc7e61fd6L, 0x96dedfa1L, 0x7858ba99L, - 0x57f584a5L, 0x1b227263L, 0x9b83c3ffL, 0x1ac24696L, - 0xcdb30aebL, 0x532e3054L, 0x8fd948e4L, 0x6dbc3128L, - 0x58ebf2efL, 0x34c6ffeaL, 0xfe28ed61L, 0xee7c3c73L, - 0x5d4a14d9L, 0xe864b7e3L, 0x42105d14L, 0x203e13e0L, - 0x45eee2b6L, 0xa3aaabeaL, 0xdb6c4f15L, 0xfacb4fd0L, - 0xc742f442L, 0xef6abbb5L, 0x654f3b1dL, 0x41cd2105L, - 0xd81e799eL, 0x86854dc7L, 0xe44b476aL, 0x3d816250L, - 0xcf62a1f2L, 0x5b8d2646L, 0xfc8883a0L, 0xc1c7b6a3L, - 0x7f1524c3L, 0x69cb7492L, 0x47848a0bL, 0x5692b285L, - 0x095bbf00L, 0xad19489dL, 0x1462b174L, 0x23820e00L, - 0x58428d2aL, 0x0c55f5eaL, 0x1dadf43eL, 0x233f7061L, - 0x3372f092L, 0x8d937e41L, 0xd65fecf1L, 0x6c223bdbL, - 0x7cde3759L, 0xcbee7460L, 0x4085f2a7L, 0xce77326eL, - 0xa6078084L, 0x19f8509eL, 0xe8efd855L, 0x61d99735L, - 0xa969a7aaL, 0xc50c06c2L, 0x5a04abfcL, 0x800bcadcL, - 0x9e447a2eL, 0xc3453484L, 0xfdd56705L, 0x0e1e9ec9L, - 0xdb73dbd3L, 0x105588cdL, 0x675fda79L, 0xe3674340L, - 0xc5c43465L, 0x713e38d8L, 0x3d28f89eL, 0xf16dff20L, - 0x153e21e7L, 0x8fb03d4aL, 0xe6e39f2bL, 0xdb83adf7L, - 0xe93d5a68L, 0x948140f7L, 0xf64c261cL, 0x94692934L, - 0x411520f7L, 0x7602d4f7L, 0xbcf46b2eL, 0xd4a20068L, - 0xd4082471L, 0x3320f46aL, 0x43b7d4b7L, 0x500061afL, - 0x1e39f62eL, 0x97244546L, 0x14214f74L, 0xbf8b8840L, - 0x4d95fc1dL, 0x96b591afL, 0x70f4ddd3L, 0x66a02f45L, - 0xbfbc09ecL, 0x03bd9785L, 0x7fac6dd0L, 0x31cb8504L, - 0x96eb27b3L, 0x55fd3941L, 0xda2547e6L, 0xabca0a9aL, - 0x28507825L, 0x530429f4L, 0x0a2c86daL, 0xe9b66dfbL, - 0x68dc1462L, 0xd7486900L, 0x680ec0a4L, 0x27a18deeL, - 0x4f3ffea2L, 0xe887ad8cL, 0xb58ce006L, 0x7af4d6b6L, - 0xaace1e7cL, 0xd3375fecL, 0xce78a399L, 0x406b2a42L, - 0x20fe9e35L, 0xd9f385b9L, 0xee39d7abL, 0x3b124e8bL, - 0x1dc9faf7L, 0x4b6d1856L, 0x26a36631L, 0xeae397b2L, - 0x3a6efa74L, 0xdd5b4332L, 0x6841e7f7L, 0xca7820fbL, - 0xfb0af54eL, 0xd8feb397L, 0x454056acL, 0xba489527L, - 0x55533a3aL, 0x20838d87L, 0xfe6ba9b7L, 0xd096954bL, - 0x55a867bcL, 0xa1159a58L, 0xcca92963L, 0x99e1db33L, - 0xa62a4a56L, 0x3f3125f9L, 0x5ef47e1cL, 0x9029317cL, - 0xfdf8e802L, 0x04272f70L, 0x80bb155cL, 0x05282ce3L, - 0x95c11548L, 0xe4c66d22L, 0x48c1133fL, 0xc70f86dcL, - 0x07f9c9eeL, 0x41041f0fL, 0x404779a4L, 0x5d886e17L, - 0x325f51ebL, 0xd59bc0d1L, 0xf2bcc18fL, 0x41113564L, - 0x257b7834L, 0x602a9c60L, 0xdff8e8a3L, 0x1f636c1bL, - 0x0e12b4c2L, 0x02e1329eL, 0xaf664fd1L, 0xcad18115L, - 0x6b2395e0L, 0x333e92e1L, 0x3b240b62L, 0xeebeb922L, - 0x85b2a20eL, 0xe6ba0d99L, 0xde720c8cL, 0x2da2f728L, - 0xd0127845L, 0x95b794fdL, 0x647d0862L, 0xe7ccf5f0L, - 0x5449a36fL, 0x877d48faL, 0xc39dfd27L, 0xf33e8d1eL, - 0x0a476341L, 0x992eff74L, 0x3a6f6eabL, 0xf4f8fd37L, - 0xa812dc60L, 0xa1ebddf8L, 0x991be14cL, 0xdb6e6b0dL, - 0xc67b5510L, 0x6d672c37L, 0x2765d43bL, 0xdcd0e804L, - 0xf1290dc7L, 0xcc00ffa3L, 0xb5390f92L, 0x690fed0bL, - 0x667b9ffbL, 0xcedb7d9cL, 0xa091cf0bL, 0xd9155ea3L, - 0xbb132f88L, 0x515bad24L, 0x7b9479bfL, 0x763bd6ebL, - 0x37392eb3L, 0xcc115979L, 0x8026e297L, 0xf42e312dL, - 0x6842ada7L, 0xc66a2b3bL, 0x12754cccL, 0x782ef11cL, - 0x6a124237L, 0xb79251e7L, 0x06a1bbe6L, 0x4bfb6350L, - 0x1a6b1018L, 0x11caedfaL, 0x3d25bdd8L, 0xe2e1c3c9L, - 0x44421659L, 0x0a121386L, 0xd90cec6eL, 0xd5abea2aL, - 0x64af674eL, 0xda86a85fL, 0xbebfe988L, 0x64e4c3feL, - 0x9dbc8057L, 0xf0f7c086L, 0x60787bf8L, 0x6003604dL, - 0xd1fd8346L, 0xf6381fb0L, 0x7745ae04L, 0xd736fcccL, - 0x83426b33L, 0xf01eab71L, 0xb0804187L, 0x3c005e5fL, - 0x77a057beL, 0xbde8ae24L, 0x55464299L, 0xbf582e61L, - 0x4e58f48fL, 0xf2ddfda2L, 0xf474ef38L, 0x8789bdc2L, - 0x5366f9c3L, 0xc8b38e74L, 0xb475f255L, 0x46fcd9b9L, - 0x7aeb2661L, 0x8b1ddf84L, 0x846a0e79L, 0x915f95e2L, - 0x466e598eL, 0x20b45770L, 0x8cd55591L, 0xc902de4cL, - 0xb90bace1L, 0xbb8205d0L, 0x11a86248L, 0x7574a99eL, - 0xb77f19b6L, 0xe0a9dc09L, 0x662d09a1L, 0xc4324633L, - 0xe85a1f02L, 0x09f0be8cL, 0x4a99a025L, 0x1d6efe10L, - 0x1ab93d1dL, 0x0ba5a4dfL, 0xa186f20fL, 0x2868f169L, - 0xdcb7da83L, 0x573906feL, 0xa1e2ce9bL, 0x4fcd7f52L, - 0x50115e01L, 0xa70683faL, 0xa002b5c4L, 0x0de6d027L, - 0x9af88c27L, 0x773f8641L, 0xc3604c06L, 0x61a806b5L, - 0xf0177a28L, 0xc0f586e0L, 0x006058aaL, 0x30dc7d62L, - 0x11e69ed7L, 0x2338ea63L, 0x53c2dd94L, 0xc2c21634L, - 0xbbcbee56L, 0x90bcb6deL, 0xebfc7da1L, 0xce591d76L, - 0x6f05e409L, 0x4b7c0188L, 0x39720a3dL, 0x7c927c24L, - 0x86e3725fL, 0x724d9db9L, 0x1ac15bb4L, 0xd39eb8fcL, - 0xed545578L, 0x08fca5b5L, 0xd83d7cd3L, 0x4dad0fc4L, - 0x1e50ef5eL, 0xb161e6f8L, 0xa28514d9L, 0x6c51133cL, - 0x6fd5c7e7L, 0x56e14ec4L, 0x362abfceL, 0xddc6c837L, - 0xd79a3234L, 0x92638212L, 0x670efa8eL, 0x406000e0L, - 0x3a39ce37L, 0xd3faf5cfL, 0xabc27737L, 0x5ac52d1bL, - 0x5cb0679eL, 0x4fa33742L, 0xd3822740L, 0x99bc9bbeL, - 0xd5118e9dL, 0xbf0f7315L, 0xd62d1c7eL, 0xc700c47bL, - 0xb78c1b6bL, 0x21a19045L, 0xb26eb1beL, 0x6a366eb4L, - 0x5748ab2fL, 0xbc946e79L, 0xc6a376d2L, 0x6549c2c8L, - 0x530ff8eeL, 0x468dde7dL, 0xd5730a1dL, 0x4cd04dc6L, - 0x2939bbdbL, 0xa9ba4650L, 0xac9526e8L, 0xbe5ee304L, - 0xa1fad5f0L, 0x6a2d519aL, 0x63ef8ce2L, 0x9a86ee22L, - 0xc089c2b8L, 0x43242ef6L, 0xa51e03aaL, 0x9cf2d0a4L, - 0x83c061baL, 0x9be96a4dL, 0x8fe51550L, 0xba645bd6L, - 0x2826a2f9L, 0xa73a3ae1L, 0x4ba99586L, 0xef5562e9L, - 0xc72fefd3L, 0xf752f7daL, 0x3f046f69L, 0x77fa0a59L, - 0x80e4a915L, 0x87b08601L, 0x9b09e6adL, 0x3b3ee593L, - 0xe990fd5aL, 0x9e34d797L, 0x2cf0b7d9L, 0x022b8b51L, - 0x96d5ac3aL, 0x017da67dL, 0xd1cf3ed6L, 0x7c7d2d28L, - 0x1f9f25cfL, 0xadf2b89bL, 0x5ad6b472L, 0x5a88f54cL, - 0xe029ac71L, 0xe019a5e6L, 0x47b0acfdL, 0xed93fa9bL, - 0xe8d3c48dL, 0x283b57ccL, 0xf8d56629L, 0x79132e28L, - 0x785f0191L, 0xed756055L, 0xf7960e44L, 0xe3d35e8cL, - 0x15056dd4L, 0x88f46dbaL, 0x03a16125L, 0x0564f0bdL, - 0xc3eb9e15L, 0x3c9057a2L, 0x97271aecL, 0xa93a072aL, - 0x1b3f6d9bL, 0x1e6321f5L, 0xf59c66fbL, 0x26dcf319L, - 0x7533d928L, 0xb155fdf5L, 0x03563482L, 0x8aba3cbbL, - 0x28517711L, 0xc20ad9f8L, 0xabcc5167L, 0xccad925fL, - 0x4de81751L, 0x3830dc8eL, 0x379d5862L, 0x9320f991L, - 0xea7a90c2L, 0xfb3e7bceL, 0x5121ce64L, 0x774fbe32L, - 0xa8b6e37eL, 0xc3293d46L, 0x48de5369L, 0x6413e680L, - 0xa2ae0810L, 0xdd6db224L, 0x69852dfdL, 0x09072166L, - 0xb39a460aL, 0x6445c0ddL, 0x586cdecfL, 0x1c20c8aeL, - 0x5bbef7ddL, 0x1b588d40L, 0xccd2017fL, 0x6bb4e3bbL, - 0xdda26a7eL, 0x3a59ff45L, 0x3e350a44L, 0xbcb4cdd5L, - 0x72eacea8L, 0xfa6484bbL, 0x8d6612aeL, 0xbf3c6f47L, - 0xd29be463L, 0x542f5d9eL, 0xaec2771bL, 0xf64e6370L, - 0x740e0d8dL, 0xe75b1357L, 0xf8721671L, 0xaf537d5dL, - 0x4040cb08L, 0x4eb4e2ccL, 0x34d2466aL, 0x0115af84L, - 0xe1b00428L, 0x95983a1dL, 0x06b89fb4L, 0xce6ea048L, - 0x6f3f3b82L, 0x3520ab82L, 0x011a1d4bL, 0x277227f8L, - 0x611560b1L, 0xe7933fdcL, 0xbb3a792bL, 0x344525bdL, - 0xa08839e1L, 0x51ce794bL, 0x2f32c9b7L, 0xa01fbac9L, - 0xe01cc87eL, 0xbcc7d1f6L, 0xcf0111c3L, 0xa1e8aac7L, - 0x1a908749L, 0xd44fbd9aL, 0xd0dadecbL, 0xd50ada38L, - 0x0339c32aL, 0xc6913667L, 0x8df9317cL, 0xe0b12b4fL, - 0xf79e59b7L, 0x43f5bb3aL, 0xf2d519ffL, 0x27d9459cL, - 0xbf97222cL, 0x15e6fc2aL, 0x0f91fc71L, 0x9b941525L, - 0xfae59361L, 0xceb69cebL, 0xc2a86459L, 0x12baa8d1L, - 0xb6c1075eL, 0xe3056a0cL, 0x10d25065L, 0xcb03a442L, - 0xe0ec6e0eL, 0x1698db3bL, 0x4c98a0beL, 0x3278e964L, - 0x9f1f9532L, 0xe0d392dfL, 0xd3a0342bL, 0x8971f21eL, - 0x1b0a7441L, 0x4ba3348cL, 0xc5be7120L, 0xc37632d8L, - 0xdf359f8dL, 0x9b992f2eL, 0xe60b6f47L, 0x0fe3f11dL, - 0xe54cda54L, 0x1edad891L, 0xce6279cfL, 0xcd3e7e6fL, - 0x1618b166L, 0xfd2c1d05L, 0x848fd2c5L, 0xf6fb2299L, - 0xf523f357L, 0xa6327623L, 0x93a83531L, 0x56cccd02L, - 0xacf08162L, 0x5a75ebb5L, 0x6e163697L, 0x88d273ccL, - 0xde966292L, 0x81b949d0L, 0x4c50901bL, 0x71c65614L, - 0xe6c6c7bdL, 0x327a140aL, 0x45e1d006L, 0xc3f27b9aL, - 0xc9aa53fdL, 0x62a80f00L, 0xbb25bfe2L, 0x35bdd2f6L, - 0x71126905L, 0xb2040222L, 0xb6cbcf7cL, 0xcd769c2bL, - 0x53113ec0L, 0x1640e3d3L, 0x38abbd60L, 0x2547adf0L, - 0xba38209cL, 0xf746ce76L, 0x77afa1c5L, 0x20756060L, - 0x85cbfe4eL, 0x8ae88dd8L, 0x7aaaf9b0L, 0x4cf9aa7eL, - 0x1948c25cL, 0x02fb8a8cL, 0x01c36ae4L, 0xd6ebe1f9L, - 0x90d4f869L, 0xa65cdea0L, 0x3f09252dL, 0xc208e69fL, - 0xb74e6132L, 0xce77e25bL, 0x578fdfe3L, 0x3ac372e6L, - } - }; - diff --git a/bsd/crypto/blowfish/bf_skey.c b/bsd/crypto/blowfish/bf_skey.c deleted file mode 100644 index 3f64cf95e..000000000 --- a/bsd/crypto/blowfish/bf_skey.c +++ /dev/null @@ -1,120 +0,0 @@ -/* $FreeBSD: src/sys/crypto/blowfish/bf_skey.c,v 1.1.2.3 2002/03/26 10:12:23 ume Exp $ */ -/* $KAME: bf_skey.c,v 1.7 2002/02/27 01:33:59 itojun Exp $ */ - -/* crypto/bf/bf_skey.c */ -/* Copyright (C) 1995-1997 Eric Young (eay@mincom.oz.au) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@mincom.oz.au). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@mincom.oz.au). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@mincom.oz.au)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@mincom.oz.au)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include -#include -#include -#include - -void -BF_set_key(key, len, data) - BF_KEY *key; - int len; - unsigned char *data; -{ - int i; - BF_LONG *p, ri, in[2]; - unsigned char *d, *end; - - memcpy((char *)key, (const char *)&bf_init, sizeof(BF_KEY)); - p = key->P; - - if (len > ((BF_ROUNDS + 2) * 4)) - len = (BF_ROUNDS + 2) * 4; - - d = data; - end= &(data[len]); - for (i = 0; i < BF_ROUNDS + 2; i++) { - ri = *(d++); - if (d >= end) d = data; - - ri <<= 8; - ri |= *(d++); - if (d >= end) d = data; - - ri <<= 8; - ri |= *(d++); - if (d >= end) d = data; - - ri <<= 8; - ri |= *(d++); - if (d >= end) d = data; - - p[i] ^= ri; - } - - in[0] = 0L; - in[1] = 0L; - for (i = 0; i < BF_ROUNDS + 2; i += 2) { - BF_encrypt(in, key); - p[i ] = in[0]; - p[i+1] = in[1]; - } - - p = key->S; - for (i = 0; i < 4 * 256; i += 2) { - BF_encrypt(in, key); - p[i ] = in[0]; - p[i+1] = in[1]; - } -} diff --git a/bsd/crypto/blowfish/blowfish.h b/bsd/crypto/blowfish/blowfish.h deleted file mode 100644 index 121e9c394..000000000 --- a/bsd/crypto/blowfish/blowfish.h +++ /dev/null @@ -1,93 +0,0 @@ -/* $FreeBSD: src/sys/crypto/blowfish/blowfish.h,v 1.1.2.3 2002/03/26 10:12:23 ume Exp $ */ -/* $KAME: blowfish.h,v 1.12 2002/02/27 01:33:59 itojun Exp $ */ - -/* crypto/bf/blowfish.h */ -/* Copyright (C) 1995-1997 Eric Young (eay@mincom.oz.au) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@mincom.oz.au). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@mincom.oz.au). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@mincom.oz.au)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@mincom.oz.au)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#ifndef HEADER_BLOWFISH_H -#define HEADER_BLOWFISH_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define BF_ENCRYPT 1 -#define BF_DECRYPT 0 - -/* must be 32bit quantity */ -#define BF_LONG u_int32_t - -#define BF_ROUNDS 16 -#define BF_BLOCK 8 - -typedef struct bf_key_st { - BF_LONG P[BF_ROUNDS+2]; - BF_LONG S[4*256]; -} BF_KEY; - -void BF_set_key(BF_KEY *, int, unsigned char *); -void BF_encrypt(BF_LONG *, BF_KEY *); -void BF_decrypt(BF_LONG *, BF_KEY *); -void BF_cbc_encrypt(const unsigned char *, unsigned char *, long, - const BF_KEY *, unsigned char *, int); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/bsd/crypto/cast128/cast128.c b/bsd/crypto/cast128/cast128.c deleted file mode 100644 index 84d40ba0b..000000000 --- a/bsd/crypto/cast128/cast128.c +++ /dev/null @@ -1,887 +0,0 @@ -/* $FreeBSD: src/sys/crypto/cast128/cast128.c,v 1.1.2.3 2001/12/05 05:54:57 ume Exp $ */ -/* $KAME: cast128.c,v 1.5 2001/11/27 09:47:32 sakane Exp $ */ - -/* - * heavily modified by Tomomi Suzuki - */ -/* - * The CAST-128 Encryption Algorithm (RFC 2144) - * - * original implementation - * 1997/08/21 - */ -/* - * Copyright (C) 1997 Hideo "Sir MANMOS" Morishita - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY Hideo "Sir MaNMOS" Morishita ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL Hideo "Sir MaNMOS" Morishita BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include - - -static u_int32_t S1[]; -static u_int32_t S2[]; -static u_int32_t S3[]; -static u_int32_t S4[]; -static u_int32_t S5[]; -static u_int32_t S6[]; -static u_int32_t S7[]; -static u_int32_t S8[]; - - -/* - * Step 1 - */ -void set_cast128_subkey(u_int32_t *subkey, u_int8_t *key0, int keylen) -{ - u_int32_t buf[8]; /* for x0x1x2x3, x4x5x6x7 ..., z0z1z2z3, ... */ - u_int32_t key[16]; - int i; - - /* - * the key has to be initilized. should it be logged when the key - * length is more than 16 bytes ? anyway, ignore it at this moment. - */ - if (keylen > 16) - keylen = 16; - for (i = 0; i < keylen; i++) - key[i] = key0[i]; - while (i < 16) - key[i++] = 0; - - buf[0] = (key[ 0] << 24) | (key[ 1] << 16) | (key[ 2] << 8) - | key[ 3]; - buf[1] = (key[ 4] << 24) | (key[ 5] << 16) | (key[ 6] << 8) - | key[ 7]; - buf[2] = (key[ 8] << 24) | (key[ 9] << 16) | (key[10] << 8) - | key[11]; - buf[3] = (key[12] << 24) | (key[13] << 16) | (key[14] << 8) - | key[15]; - - /* masking subkey */ - z0z1z2z3 = x0x1x2x3 ^ S5[xD] ^ S6[xF] ^ S7[xC] ^ S8[xE] ^ S7[x8]; - z4z5z6z7 = x8x9xAxB ^ S5[z0] ^ S6[z2] ^ S7[z1] ^ S8[z3] ^ S8[xA]; - z8z9zAzB = xCxDxExF ^ S5[z7] ^ S6[z6] ^ S7[z5] ^ S8[z4] ^ S5[x9]; - zCzDzEzF = x4x5x6x7 ^ S5[zA] ^ S6[z9] ^ S7[zB] ^ S8[z8] ^ S6[xB]; - subkey[0] = S5[z8] ^ S6[z9] ^ S7[z7] ^ S8[z6] ^ S5[z2]; - subkey[1] = S5[zA] ^ S6[zB] ^ S7[z5] ^ S8[z4] ^ S6[z6]; - subkey[2] = S5[zC] ^ S6[zD] ^ S7[z3] ^ S8[z2] ^ S7[z9]; - subkey[3] = S5[zE] ^ S6[zF] ^ S7[z1] ^ S8[z0] ^ S8[zC]; - - x0x1x2x3 = z8z9zAzB ^ S5[z5] ^ S6[z7] ^ S7[z4] ^ S8[z6] ^ S7[z0]; - x4x5x6x7 = z0z1z2z3 ^ S5[x0] ^ S6[x2] ^ S7[x1] ^ S8[x3] ^ S8[z2]; - x8x9xAxB = z4z5z6z7 ^ S5[x7] ^ S6[x6] ^ S7[x5] ^ S8[x4] ^ S5[z1]; - xCxDxExF = zCzDzEzF ^ S5[xA] ^ S6[x9] ^ S7[xB] ^ S8[x8] ^ S6[z3]; - subkey[4] = S5[x3] ^ S6[x2] ^ S7[xC] ^ S8[xD] ^ S5[x8]; - subkey[5] = S5[x1] ^ S6[x0] ^ S7[xE] ^ S8[xF] ^ S6[xD]; - subkey[6] = S5[x7] ^ S6[x6] ^ S7[x8] ^ S8[x9] ^ S7[x3]; - subkey[7] = S5[x5] ^ S6[x4] ^ S7[xA] ^ S8[xB] ^ S8[x7]; - - z0z1z2z3 = x0x1x2x3 ^ S5[xD] ^ S6[xF] ^ S7[xC] ^ S8[xE] ^ S7[x8]; - z4z5z6z7 = x8x9xAxB ^ S5[z0] ^ S6[z2] ^ S7[z1] ^ S8[z3] ^ S8[xA]; - z8z9zAzB = xCxDxExF ^ S5[z7] ^ S6[z6] ^ S7[z5] ^ S8[z4] ^ S5[x9]; - zCzDzEzF = x4x5x6x7 ^ S5[zA] ^ S6[z9] ^ S7[zB] ^ S8[z8] ^ S6[xB]; - subkey[8] = S5[z3] ^ S6[z2] ^ S7[zC] ^ S8[zD] ^ S5[z9]; - subkey[9] = S5[z1] ^ S6[z0] ^ S7[zE] ^ S8[zF] ^ S6[zC]; - subkey[10] = S5[z7] ^ S6[z6] ^ S7[z8] ^ S8[z9] ^ S7[z2]; - subkey[11] = S5[z5] ^ S6[z4] ^ S7[zA] ^ S8[zB] ^ S8[z6]; - - x0x1x2x3 = z8z9zAzB ^ S5[z5] ^ S6[z7] ^ S7[z4] ^ S8[z6] ^ S7[z0]; - x4x5x6x7 = z0z1z2z3 ^ S5[x0] ^ S6[x2] ^ S7[x1] ^ S8[x3] ^ S8[z2]; - x8x9xAxB = z4z5z6z7 ^ S5[x7] ^ S6[x6] ^ S7[x5] ^ S8[x4] ^ S5[z1]; - xCxDxExF = zCzDzEzF ^ S5[xA] ^ S6[x9] ^ S7[xB] ^ S8[x8] ^ S6[z3]; - subkey[12] = S5[x8] ^ S6[x9] ^ S7[x7] ^ S8[x6] ^ S5[x3]; - subkey[13] = S5[xA] ^ S6[xB] ^ S7[x5] ^ S8[x4] ^ S6[x7]; - subkey[14] = S5[xC] ^ S6[xD] ^ S7[x3] ^ S8[x2] ^ S7[x8]; - subkey[15] = S5[xE] ^ S6[xF] ^ S7[x1] ^ S8[x0] ^ S8[xD]; - - /* rotate subkey (least significast 5 bits) */ - z0z1z2z3 = x0x1x2x3 ^ S5[xD] ^ S6[xF] ^ S7[xC] ^ S8[xE] ^ S7[x8]; - z4z5z6z7 = x8x9xAxB ^ S5[z0] ^ S6[z2] ^ S7[z1] ^ S8[z3] ^ S8[xA]; - z8z9zAzB = xCxDxExF ^ S5[z7] ^ S6[z6] ^ S7[z5] ^ S8[z4] ^ S5[x9]; - zCzDzEzF = x4x5x6x7 ^ S5[zA] ^ S6[z9] ^ S7[zB] ^ S8[z8] ^ S6[xB]; - subkey[16] = (S5[z8] ^ S6[z9] ^ S7[z7] ^ S8[z6] ^ S5[z2]) & 0x1f; - subkey[17] = (S5[zA] ^ S6[zB] ^ S7[z5] ^ S8[z4] ^ S6[z6]) & 0x1f; - subkey[18] = (S5[zC] ^ S6[zD] ^ S7[z3] ^ S8[z2] ^ S7[z9]) & 0x1f; - subkey[19] = (S5[zE] ^ S6[zF] ^ S7[z1] ^ S8[z0] ^ S8[zC]) & 0x1f; - - x0x1x2x3 = z8z9zAzB ^ S5[z5] ^ S6[z7] ^ S7[z4] ^ S8[z6] ^ S7[z0]; - x4x5x6x7 = z0z1z2z3 ^ S5[x0] ^ S6[x2] ^ S7[x1] ^ S8[x3] ^ S8[z2]; - x8x9xAxB = z4z5z6z7 ^ S5[x7] ^ S6[x6] ^ S7[x5] ^ S8[x4] ^ S5[z1]; - xCxDxExF = zCzDzEzF ^ S5[xA] ^ S6[x9] ^ S7[xB] ^ S8[x8] ^ S6[z3]; - subkey[20] = (S5[x3] ^ S6[x2] ^ S7[xC] ^ S8[xD] ^ S5[x8]) & 0x1f; - subkey[21] = (S5[x1] ^ S6[x0] ^ S7[xE] ^ S8[xF] ^ S6[xD]) & 0x1f; - subkey[22] = (S5[x7] ^ S6[x6] ^ S7[x8] ^ S8[x9] ^ S7[x3]) & 0x1f; - subkey[23] = (S5[x5] ^ S6[x4] ^ S7[xA] ^ S8[xB] ^ S8[x7]) & 0x1f; - - z0z1z2z3 = x0x1x2x3 ^ S5[xD] ^ S6[xF] ^ S7[xC] ^ S8[xE] ^ S7[x8]; - z4z5z6z7 = x8x9xAxB ^ S5[z0] ^ S6[z2] ^ S7[z1] ^ S8[z3] ^ S8[xA]; - z8z9zAzB = xCxDxExF ^ S5[z7] ^ S6[z6] ^ S7[z5] ^ S8[z4] ^ S5[x9]; - zCzDzEzF = x4x5x6x7 ^ S5[zA] ^ S6[z9] ^ S7[zB] ^ S8[z8] ^ S6[xB]; - subkey[24] = (S5[z3] ^ S6[z2] ^ S7[zC] ^ S8[zD] ^ S5[z9]) & 0x1f; - subkey[25] = (S5[z1] ^ S6[z0] ^ S7[zE] ^ S8[zF] ^ S6[zC]) & 0x1f; - subkey[26] = (S5[z7] ^ S6[z6] ^ S7[z8] ^ S8[z9] ^ S7[z2]) & 0x1f; - subkey[27] = (S5[z5] ^ S6[z4] ^ S7[zA] ^ S8[zB] ^ S8[z6]) & 0x1f; - - x0x1x2x3 = z8z9zAzB ^ S5[z5] ^ S6[z7] ^ S7[z4] ^ S8[z6] ^ S7[z0]; - x4x5x6x7 = z0z1z2z3 ^ S5[x0] ^ S6[x2] ^ S7[x1] ^ S8[x3] ^ S8[z2]; - x8x9xAxB = z4z5z6z7 ^ S5[x7] ^ S6[x6] ^ S7[x5] ^ S8[x4] ^ S5[z1]; - xCxDxExF = zCzDzEzF ^ S5[xA] ^ S6[x9] ^ S7[xB] ^ S8[x8] ^ S6[z3]; - subkey[28] = (S5[x8] ^ S6[x9] ^ S7[x7] ^ S8[x6] ^ S5[x3]) & 0x1f; - subkey[29] = (S5[xA] ^ S6[xB] ^ S7[x5] ^ S8[x4] ^ S6[x7]) & 0x1f; - subkey[30] = (S5[xC] ^ S6[xD] ^ S7[x3] ^ S8[x2] ^ S7[x8]) & 0x1f; - subkey[31] = (S5[xE] ^ S6[xF] ^ S7[x1] ^ S8[x0] ^ S8[xD]) & 0x1f; -} - - -#define CAST128_TYPE1(rc, d, km, kr) { \ - u_int32_t x = circular_leftshift(((km)+(d)), (kr)); \ - (rc) = ((S1[byte0(x)] ^ S2[byte1(x)]) - S3[byte2(x)]) + S4[byte3(x)]; \ -} - -#define CAST128_TYPE2(rc, d, km, kr) { \ - u_int32_t x = circular_leftshift(((km)^(d)), (kr)); \ - (rc) = ((S1[byte0(x)] - S2[byte1(x)]) + S3[byte2(x)]) ^ S4[byte3(x)]; \ -} - -#define CAST128_TYPE3(rc, d, km, kr) { \ - u_int32_t x = circular_leftshift(((km)-(d)), (kr)); \ - (rc) = ((S1[byte0(x)] + S2[byte1(x)]) ^ S3[byte2(x)]) - S4[byte3(x)]; \ -} - - -void cast128_encrypt_round16(u_int8_t *c, const u_int8_t *m, - u_int32_t *subkey) -{ - u_int32_t l; /* left 32bit */ - u_int32_t r; /* right 32bit */ - u_int32_t br; /* backup right 32bit */ - u_int32_t rc; /* result code of CAST128_TYPE?() */ - u_int32_t *km, *kr; - - /* Step 2 */ - l = (m[0] << 24) | (m[1] << 16) | (m[2] << 8) | m[3]; - r = (m[4] << 24) | (m[5] << 16) | (m[6] << 8) | m[7]; - - /* Step 3 */ - km = subkey; - kr = subkey + 16; - - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE2(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE3(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE2(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE3(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE2(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE3(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE2(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE3(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE2(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE3(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; - - /* Step 4 */ - c[0] = (r >> 24) & 0xff; - c[1] = (r >> 16) & 0xff; - c[2] = (r >> 8) & 0xff; - c[3] = r & 0xff; - c[4] = (l >> 24) & 0xff; - c[5] = (l >> 16) & 0xff; - c[6] = (l >> 8) & 0xff; - c[7] = l & 0xff; -} - - -void cast128_decrypt_round16(u_int8_t *m, const u_int8_t *c, - u_int32_t *subkey) -{ - u_int32_t l; /* left 32bit */ - u_int32_t r; /* right 32bit */ - u_int32_t bl; /* backup left 32bit */ - u_int32_t rc; /* result code of CAST128_TYPE?() */ - u_int32_t *km, *kr; - - /* Step 2 */ - r = (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3]; - l = (c[4] << 24) | (c[5] << 16) | (c[6] << 8) | c[7]; - - /* Step 3 */ - km = subkey + 15; - kr = subkey + 31; - - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE3(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE2(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE3(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE2(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE3(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE2(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE3(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE2(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE3(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE2(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; - - /* Step 4 */ - m[0] = (l >> 24) & 0xff; - m[1] = (l >> 16) & 0xff; - m[2] = (l >> 8) & 0xff; - m[3] = l & 0xff; - m[4] = (r >> 24) & 0xff; - m[5] = (r >> 16) & 0xff; - m[6] = (r >> 8) & 0xff; - m[7] = r & 0xff; -} - - -void cast128_encrypt_round12(u_int8_t *c, const u_int8_t *m, - u_int32_t *subkey) -{ - u_int32_t l; /* left 32bit */ - u_int32_t r; /* right 32bit */ - u_int32_t br; /* backup right 32bit */ - u_int32_t rc; /* result code of CAST128_TYPE?() */ - u_int32_t *km, *kr; - - /* Step 2 */ - l = (m[0] << 24) | (m[1] << 16) | (m[2] << 8) | m[3]; - r = (m[4] << 24) | (m[5] << 16) | (m[6] << 8) | m[7]; - - /* Step 3 */ - km = subkey; - kr = subkey + 16; - - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE2(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE3(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE2(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE3(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE2(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE3(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE1(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE2(rc, r, *km, *kr); r = l ^ rc; l = br; km++; kr++; - br = r; CAST128_TYPE3(rc, r, *km, *kr); r = l ^ rc; l = br; - - /* Step 4 */ - c[0] = (r >> 24) & 0xff; - c[1] = (r >> 16) & 0xff; - c[2] = (r >> 8) & 0xff; - c[3] = r & 0xff; - c[4] = (l >> 24) & 0xff; - c[5] = (l >> 16) & 0xff; - c[6] = (l >> 8) & 0xff; - c[7] = l & 0xff; -} - - -void cast128_decrypt_round12(u_int8_t *m, const u_int8_t *c, - u_int32_t *subkey) -{ - u_int32_t l; /* left 32bit */ - u_int32_t r; /* right 32bit */ - u_int32_t bl; /* backup left 32bit */ - u_int32_t rc; /* result code of CAST128_TYPE?() */ - u_int32_t *km, *kr; - - /* Step 2 */ - r = (c[0] << 24) | (c[1] << 16) | (c[2] << 8) | c[3]; - l = (c[4] << 24) | (c[5] << 16) | (c[6] << 8) | c[7]; - - /* Step 3 */ - km = subkey + 11; - kr = subkey + 27; - - bl = l; CAST128_TYPE3(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE2(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE3(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE2(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE3(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE2(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE3(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE2(rc, l, *km, *kr); l = r ^ rc; r = bl; km--; kr--; - bl = l; CAST128_TYPE1(rc, l, *km, *kr); l = r ^ rc; r = bl; - - /* Step 4 */ - m[0] = (l >> 24) & 0xff; - m[1] = (l >> 16) & 0xff; - m[2] = (l >> 8) & 0xff; - m[3] = l & 0xff; - m[4] = (r >> 24) & 0xff; - m[5] = (r >> 16) & 0xff; - m[6] = (r >> 8) & 0xff; - m[7] = r & 0xff; -} - - -static u_int32_t S1[] = { - 0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, - 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949, - 0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, - 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e, - 0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, - 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d, - 0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, - 0x2abe32e1, 0xaa54166b, 0x22568e3a, 0xa2d341d0, - 0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, - 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7, - 0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, - 0x827b68d0, 0x90ecf52e, 0x22b0c054, 0xbc8e5935, - 0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, - 0xb7332290, 0xe93b159f, 0xb48ee411, 0x4bff345d, - 0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, - 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50, - 0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, - 0x564c1d2f, 0xc59c5319, 0xb949e354, 0xb04669fe, - 0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, - 0x57538ad5, 0x6a390493, 0xe63d37e0, 0x2a54f6b3, - 0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, - 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167, - 0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, - 0x874a1427, 0xa2d1936b, 0x2ad286af, 0xaa56d291, - 0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, - 0x6c00b32d, 0x73e2bb14, 0xa0bebc3c, 0x54623779, - 0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, - 0x04ee002e, 0x89fe78e6, 0x3fab0950, 0x325ff6c2, - 0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, - 0xca180dcf, 0x380782d5, 0xc7fa5cf6, 0x8ac31511, - 0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, - 0x31366241, 0x051ef495, 0xaa573b04, 0x4a805d8d, - 0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, - 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5, - 0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, - 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324, - 0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, - 0xeadf55b3, 0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c, - 0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, - 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc, - 0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, - 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d, - 0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, - 0xe01063da, 0x4736f464, 0x5ad328d8, 0xb347cc96, - 0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, - 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a, - 0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, - 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d, - 0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, - 0x4744ead4, 0xb11c3274, 0xdd24cb9e, 0x7e1c54bd, - 0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, - 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6, - 0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, - 0x35ba3e4a, 0x3526ffa0, 0xc37b4d09, 0xbc306ed9, - 0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, - 0x7c63b2cf, 0x700b45e1, 0xd5ea50f1, 0x85a92872, - 0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, - 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814c, - 0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, - 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e, - 0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, - 0x962bda1c, 0xe1e696ff, 0xb141ab08, 0x7cca89b9, - 0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, - 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf, -}; - -static u_int32_t S2[] = { - 0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, - 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651, - 0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, - 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3, - 0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, - 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb, - 0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, - 0x01420ddb, 0xe4e7ef5b, 0x25a1ff41, 0xe180f806, - 0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, - 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b, - 0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, - 0x62143154, 0x0d554b63, 0x5d681121, 0xc866c359, - 0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, - 0x071f6181, 0x39f7627f, 0x361e3084, 0xe4eb573b, - 0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, - 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c, - 0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, - 0x208cfb6a, 0x8f458c74, 0xd9e0a227, 0x4ec73a34, - 0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, - 0x8a45388c, 0x1d804366, 0x721d9bfd, 0xa58684bb, - 0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, - 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd, - 0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, - 0x84db26a9, 0xe0b56714, 0x21f043b7, 0xe5d05860, - 0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, - 0xb5625dbf, 0x68561be6, 0x83ca6b94, 0x2d6ed23b, - 0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, - 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304, - 0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, - 0x881ca122, 0xb96726d1, 0x8049a7e8, 0x22b7da7b, - 0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, - 0x488cb402, 0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf, - 0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, - 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c, - 0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, - 0xf33401c6, 0x30a22c95, 0x31a70850, 0x60930f13, - 0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, - 0xcdff33a6, 0xa02b1741, 0x7cbad9a2, 0x2180036f, - 0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, - 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6, - 0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, - 0x2ccb49eb, 0x846a3bae, 0x8ff77888, 0xee5d60f6, - 0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, - 0xb3faec54, 0x157fd7fa, 0xef8579cc, 0xd152de58, - 0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, - 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906, - 0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, - 0x088a1bc8, 0xbec0c560, 0x61a3c9e8, 0xbca8f54d, - 0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, - 0x8b1c34bc, 0x301e16e6, 0x273be979, 0xb0ffeaa6, - 0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, - 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4, - 0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, - 0xc6bcc63e, 0x1a513742, 0xef6828bc, 0x520365d6, - 0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, - 0xdb92f2fb, 0x5eea29cb, 0x145892f5, 0x91584f7f, - 0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, - 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249, - 0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, - 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa, - 0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, - 0x649da589, 0xa345415e, 0x5c038323, 0x3e5d3bb9, - 0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, - 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1, -}; - -static u_int32_t S3[] = { - 0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, - 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90, - 0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, - 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5, - 0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, - 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e, - 0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, - 0x21fffcee, 0x825b1bfd, 0x9255c5ed, 0x1257a240, - 0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, - 0x3373f7bf, 0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5, - 0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, - 0x40fff7c1, 0x1fb78dfc, 0x8e6bd2c1, 0x437be59b, - 0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, - 0xa197c81c, 0x4a012d6e, 0xc5884a28, 0xccc36f71, - 0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, - 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04, - 0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, - 0x5f98302e, 0x727cc3c4, 0x0a0fb402, 0x0f7fef82, - 0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, - 0x8427f4a0, 0x1eac5790, 0x796fb449, 0x8252dc15, - 0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, - 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2, - 0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, - 0x94fd6574, 0x927985b2, 0x8276dbcb, 0x02778176, - 0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, - 0x842f7d83, 0x340ce5c8, 0x96bbb682, 0x93b4b148, - 0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, - 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc, - 0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, - 0xc3e9615e, 0x3cf8209d, 0x6094d1e3, 0xcd9ca341, - 0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, - 0xf76cedd9, 0xbda8229c, 0x127dadaa, 0x438a074e, - 0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, - 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51, - 0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, - 0xdd7ef86a, 0x76a2e214, 0xb9a40368, 0x925d958f, - 0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, - 0x6d498623, 0x193cbcfa, 0x27627545, 0x825cf47a, - 0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, - 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b, - 0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, - 0x428929fb, 0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b, - 0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, - 0x694bcc11, 0x236a5cae, 0x12deca4d, 0x2c3f8cc5, - 0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, - 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45, - 0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, - 0xd773bc40, 0x7c34671c, 0x02717ef6, 0x4feb5536, - 0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, - 0x07478cd1, 0x006e1888, 0xa2e53f55, 0xb9e6d4bc, - 0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, - 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0, - 0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, - 0x4264a5ff, 0x856302e0, 0x72dbd92b, 0xee971b69, - 0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, - 0xcf1febd2, 0x61efc8c2, 0xf1ac2571, 0xcc8239c2, - 0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, - 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49, - 0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, - 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d, - 0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, - 0x642b1e31, 0x9c305a00, 0x52bce688, 0x1b03588a, - 0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, - 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783, -}; - -static u_int32_t S4[] = { - 0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, - 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1, - 0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, - 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf, - 0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, - 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15, - 0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, - 0x1a05645f, 0x0c13fefe, 0x081b08ca, 0x05170121, - 0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, - 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25, - 0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, - 0x7ed48400, 0x547eebe6, 0x446d4ca0, 0x6cf3d6f5, - 0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, - 0xd3772061, 0x11b638e1, 0x72500e03, 0xf80eb2bb, - 0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, - 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5, - 0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, - 0xdaec6fea, 0x9f926f91, 0x9f46222f, 0x3991467d, - 0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, - 0x022083b8, 0x3fb6180c, 0x18f8931e, 0x281658e6, - 0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, - 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23, - 0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, - 0xa51a9ef9, 0x0014377b, 0x041e8ac8, 0x09114003, - 0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, - 0x557be8de, 0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6, - 0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, - 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119, - 0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, - 0x5bbfc92d, 0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24, - 0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, - 0xaac9548a, 0xeca1d7c7, 0x041afa32, 0x1d16625a, - 0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, - 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79, - 0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, - 0x8674cde3, 0xedda04eb, 0x17a9be04, 0x2c18f4df, - 0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, - 0x1741a254, 0xe5b6a035, 0x213d42f6, 0x2c1c7c26, - 0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, - 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab, - 0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, - 0x8d0dbd86, 0x311170a7, 0x3e9b640c, 0xcc3e10d7, - 0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, - 0x71eae2a1, 0x1f9af36e, 0xcfcbd12f, 0xc1de8417, - 0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, - 0xb1c52fca, 0xb4be31cd, 0xd8782806, 0x12a3a4e2, - 0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, - 0xf4990fc5, 0x9711aac5, 0x001d7b95, 0x82e5e7d2, - 0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, - 0x29908415, 0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a, - 0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, - 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919, - 0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, - 0x21f05cbe, 0x8b75e387, 0xb3c50651, 0xb8a5c3ef, - 0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, - 0xafe67afb, 0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876, - 0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, - 0xa99144f8, 0x296b299e, 0x492fc295, 0x9266beab, - 0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, - 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04, - 0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, - 0x41823979, 0x932bcdf6, 0xb657c34d, 0x4edfd282, - 0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, - 0x13ecf0b0, 0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2, -}; - -static u_int32_t S5[] = { - 0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, - 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f, - 0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, - 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a, - 0xe6a2e77f, 0xf0c720cd, 0xc4494816, 0xccf5c180, - 0x38851640, 0x15b0a848, 0xe68b18cb, 0x4caadeff, - 0x5f480a01, 0x0412b2aa, 0x259814fc, 0x41d0efe2, - 0x4e40b48d, 0x248eb6fb, 0x8dba1cfe, 0x41a99b02, - 0x1a550a04, 0xba8f65cb, 0x7251f4e7, 0x95a51725, - 0xc106ecd7, 0x97a5980a, 0xc539b9aa, 0x4d79fe6a, - 0xf2f3f763, 0x68af8040, 0xed0c9e56, 0x11b4958b, - 0xe1eb5a88, 0x8709e6b0, 0xd7e07156, 0x4e29fea7, - 0x6366e52d, 0x02d1c000, 0xc4ac8e05, 0x9377f571, - 0x0c05372a, 0x578535f2, 0x2261be02, 0xd642a0c9, - 0xdf13a280, 0x74b55bd2, 0x682199c0, 0xd421e5ec, - 0x53fb3ce8, 0xc8adedb3, 0x28a87fc9, 0x3d959981, - 0x5c1ff900, 0xfe38d399, 0x0c4eff0b, 0x062407ea, - 0xaa2f4fb1, 0x4fb96976, 0x90c79505, 0xb0a8a774, - 0xef55a1ff, 0xe59ca2c2, 0xa6b62d27, 0xe66a4263, - 0xdf65001f, 0x0ec50966, 0xdfdd55bc, 0x29de0655, - 0x911e739a, 0x17af8975, 0x32c7911c, 0x89f89468, - 0x0d01e980, 0x524755f4, 0x03b63cc9, 0x0cc844b2, - 0xbcf3f0aa, 0x87ac36e9, 0xe53a7426, 0x01b3d82b, - 0x1a9e7449, 0x64ee2d7e, 0xcddbb1da, 0x01c94910, - 0xb868bf80, 0x0d26f3fd, 0x9342ede7, 0x04a5c284, - 0x636737b6, 0x50f5b616, 0xf24766e3, 0x8eca36c1, - 0x136e05db, 0xfef18391, 0xfb887a37, 0xd6e7f7d4, - 0xc7fb7dc9, 0x3063fcdf, 0xb6f589de, 0xec2941da, - 0x26e46695, 0xb7566419, 0xf654efc5, 0xd08d58b7, - 0x48925401, 0xc1bacb7f, 0xe5ff550f, 0xb6083049, - 0x5bb5d0e8, 0x87d72e5a, 0xab6a6ee1, 0x223a66ce, - 0xc62bf3cd, 0x9e0885f9, 0x68cb3e47, 0x086c010f, - 0xa21de820, 0xd18b69de, 0xf3f65777, 0xfa02c3f6, - 0x407edac3, 0xcbb3d550, 0x1793084d, 0xb0d70eba, - 0x0ab378d5, 0xd951fb0c, 0xded7da56, 0x4124bbe4, - 0x94ca0b56, 0x0f5755d1, 0xe0e1e56e, 0x6184b5be, - 0x580a249f, 0x94f74bc0, 0xe327888e, 0x9f7b5561, - 0xc3dc0280, 0x05687715, 0x646c6bd7, 0x44904db3, - 0x66b4f0a3, 0xc0f1648a, 0x697ed5af, 0x49e92ff6, - 0x309e374f, 0x2cb6356a, 0x85808573, 0x4991f840, - 0x76f0ae02, 0x083be84d, 0x28421c9a, 0x44489406, - 0x736e4cb8, 0xc1092910, 0x8bc95fc6, 0x7d869cf4, - 0x134f616f, 0x2e77118d, 0xb31b2be1, 0xaa90b472, - 0x3ca5d717, 0x7d161bba, 0x9cad9010, 0xaf462ba2, - 0x9fe459d2, 0x45d34559, 0xd9f2da13, 0xdbc65487, - 0xf3e4f94e, 0x176d486f, 0x097c13ea, 0x631da5c7, - 0x445f7382, 0x175683f4, 0xcdc66a97, 0x70be0288, - 0xb3cdcf72, 0x6e5dd2f3, 0x20936079, 0x459b80a5, - 0xbe60e2db, 0xa9c23101, 0xeba5315c, 0x224e42f2, - 0x1c5c1572, 0xf6721b2c, 0x1ad2fff3, 0x8c25404e, - 0x324ed72f, 0x4067b7fd, 0x0523138e, 0x5ca3bc78, - 0xdc0fd66e, 0x75922283, 0x784d6b17, 0x58ebb16e, - 0x44094f85, 0x3f481d87, 0xfcfeae7b, 0x77b5ff76, - 0x8c2302bf, 0xaaf47556, 0x5f46b02a, 0x2b092801, - 0x3d38f5f7, 0x0ca81f36, 0x52af4a8a, 0x66d5e7c0, - 0xdf3b0874, 0x95055110, 0x1b5ad7a8, 0xf61ed5ad, - 0x6cf6e479, 0x20758184, 0xd0cefa65, 0x88f7be58, - 0x4a046826, 0x0ff6f8f3, 0xa09c7f70, 0x5346aba0, - 0x5ce96c28, 0xe176eda3, 0x6bac307f, 0x376829d2, - 0x85360fa9, 0x17e3fe2a, 0x24b79767, 0xf5a96b20, - 0xd6cd2595, 0x68ff1ebf, 0x7555442c, 0xf19f06be, - 0xf9e0659a, 0xeeb9491d, 0x34010718, 0xbb30cab8, - 0xe822fe15, 0x88570983, 0x750e6249, 0xda627e55, - 0x5e76ffa8, 0xb1534546, 0x6d47de08, 0xefe9e7d4, -}; - -static u_int32_t S6[] = { - 0xf6fa8f9d, 0x2cac6ce1, 0x4ca34867, 0xe2337f7c, - 0x95db08e7, 0x016843b4, 0xeced5cbc, 0x325553ac, - 0xbf9f0960, 0xdfa1e2ed, 0x83f0579d, 0x63ed86b9, - 0x1ab6a6b8, 0xde5ebe39, 0xf38ff732, 0x8989b138, - 0x33f14961, 0xc01937bd, 0xf506c6da, 0xe4625e7e, - 0xa308ea99, 0x4e23e33c, 0x79cbd7cc, 0x48a14367, - 0xa3149619, 0xfec94bd5, 0xa114174a, 0xeaa01866, - 0xa084db2d, 0x09a8486f, 0xa888614a, 0x2900af98, - 0x01665991, 0xe1992863, 0xc8f30c60, 0x2e78ef3c, - 0xd0d51932, 0xcf0fec14, 0xf7ca07d2, 0xd0a82072, - 0xfd41197e, 0x9305a6b0, 0xe86be3da, 0x74bed3cd, - 0x372da53c, 0x4c7f4448, 0xdab5d440, 0x6dba0ec3, - 0x083919a7, 0x9fbaeed9, 0x49dbcfb0, 0x4e670c53, - 0x5c3d9c01, 0x64bdb941, 0x2c0e636a, 0xba7dd9cd, - 0xea6f7388, 0xe70bc762, 0x35f29adb, 0x5c4cdd8d, - 0xf0d48d8c, 0xb88153e2, 0x08a19866, 0x1ae2eac8, - 0x284caf89, 0xaa928223, 0x9334be53, 0x3b3a21bf, - 0x16434be3, 0x9aea3906, 0xefe8c36e, 0xf890cdd9, - 0x80226dae, 0xc340a4a3, 0xdf7e9c09, 0xa694a807, - 0x5b7c5ecc, 0x221db3a6, 0x9a69a02f, 0x68818a54, - 0xceb2296f, 0x53c0843a, 0xfe893655, 0x25bfe68a, - 0xb4628abc, 0xcf222ebf, 0x25ac6f48, 0xa9a99387, - 0x53bddb65, 0xe76ffbe7, 0xe967fd78, 0x0ba93563, - 0x8e342bc1, 0xe8a11be9, 0x4980740d, 0xc8087dfc, - 0x8de4bf99, 0xa11101a0, 0x7fd37975, 0xda5a26c0, - 0xe81f994f, 0x9528cd89, 0xfd339fed, 0xb87834bf, - 0x5f04456d, 0x22258698, 0xc9c4c83b, 0x2dc156be, - 0x4f628daa, 0x57f55ec5, 0xe2220abe, 0xd2916ebf, - 0x4ec75b95, 0x24f2c3c0, 0x42d15d99, 0xcd0d7fa0, - 0x7b6e27ff, 0xa8dc8af0, 0x7345c106, 0xf41e232f, - 0x35162386, 0xe6ea8926, 0x3333b094, 0x157ec6f2, - 0x372b74af, 0x692573e4, 0xe9a9d848, 0xf3160289, - 0x3a62ef1d, 0xa787e238, 0xf3a5f676, 0x74364853, - 0x20951063, 0x4576698d, 0xb6fad407, 0x592af950, - 0x36f73523, 0x4cfb6e87, 0x7da4cec0, 0x6c152daa, - 0xcb0396a8, 0xc50dfe5d, 0xfcd707ab, 0x0921c42f, - 0x89dff0bb, 0x5fe2be78, 0x448f4f33, 0x754613c9, - 0x2b05d08d, 0x48b9d585, 0xdc049441, 0xc8098f9b, - 0x7dede786, 0xc39a3373, 0x42410005, 0x6a091751, - 0x0ef3c8a6, 0x890072d6, 0x28207682, 0xa9a9f7be, - 0xbf32679d, 0xd45b5b75, 0xb353fd00, 0xcbb0e358, - 0x830f220a, 0x1f8fb214, 0xd372cf08, 0xcc3c4a13, - 0x8cf63166, 0x061c87be, 0x88c98f88, 0x6062e397, - 0x47cf8e7a, 0xb6c85283, 0x3cc2acfb, 0x3fc06976, - 0x4e8f0252, 0x64d8314d, 0xda3870e3, 0x1e665459, - 0xc10908f0, 0x513021a5, 0x6c5b68b7, 0x822f8aa0, - 0x3007cd3e, 0x74719eef, 0xdc872681, 0x073340d4, - 0x7e432fd9, 0x0c5ec241, 0x8809286c, 0xf592d891, - 0x08a930f6, 0x957ef305, 0xb7fbffbd, 0xc266e96f, - 0x6fe4ac98, 0xb173ecc0, 0xbc60b42a, 0x953498da, - 0xfba1ae12, 0x2d4bd736, 0x0f25faab, 0xa4f3fceb, - 0xe2969123, 0x257f0c3d, 0x9348af49, 0x361400bc, - 0xe8816f4a, 0x3814f200, 0xa3f94043, 0x9c7a54c2, - 0xbc704f57, 0xda41e7f9, 0xc25ad33a, 0x54f4a084, - 0xb17f5505, 0x59357cbe, 0xedbd15c8, 0x7f97c5ab, - 0xba5ac7b5, 0xb6f6deaf, 0x3a479c3a, 0x5302da25, - 0x653d7e6a, 0x54268d49, 0x51a477ea, 0x5017d55b, - 0xd7d25d88, 0x44136c76, 0x0404a8c8, 0xb8e5a121, - 0xb81a928a, 0x60ed5869, 0x97c55b96, 0xeaec991b, - 0x29935913, 0x01fdb7f1, 0x088e8dfa, 0x9ab6f6f5, - 0x3b4cbf9f, 0x4a5de3ab, 0xe6051d35, 0xa0e1d855, - 0xd36b4cf1, 0xf544edeb, 0xb0e93524, 0xbebb8fbd, - 0xa2d762cf, 0x49c92f54, 0x38b5f331, 0x7128a454, - 0x48392905, 0xa65b1db8, 0x851c97bd, 0xd675cf2f, -}; - -static u_int32_t S7[] = { - 0x85e04019, 0x332bf567, 0x662dbfff, 0xcfc65693, - 0x2a8d7f6f, 0xab9bc912, 0xde6008a1, 0x2028da1f, - 0x0227bce7, 0x4d642916, 0x18fac300, 0x50f18b82, - 0x2cb2cb11, 0xb232e75c, 0x4b3695f2, 0xb28707de, - 0xa05fbcf6, 0xcd4181e9, 0xe150210c, 0xe24ef1bd, - 0xb168c381, 0xfde4e789, 0x5c79b0d8, 0x1e8bfd43, - 0x4d495001, 0x38be4341, 0x913cee1d, 0x92a79c3f, - 0x089766be, 0xbaeeadf4, 0x1286becf, 0xb6eacb19, - 0x2660c200, 0x7565bde4, 0x64241f7a, 0x8248dca9, - 0xc3b3ad66, 0x28136086, 0x0bd8dfa8, 0x356d1cf2, - 0x107789be, 0xb3b2e9ce, 0x0502aa8f, 0x0bc0351e, - 0x166bf52a, 0xeb12ff82, 0xe3486911, 0xd34d7516, - 0x4e7b3aff, 0x5f43671b, 0x9cf6e037, 0x4981ac83, - 0x334266ce, 0x8c9341b7, 0xd0d854c0, 0xcb3a6c88, - 0x47bc2829, 0x4725ba37, 0xa66ad22b, 0x7ad61f1e, - 0x0c5cbafa, 0x4437f107, 0xb6e79962, 0x42d2d816, - 0x0a961288, 0xe1a5c06e, 0x13749e67, 0x72fc081a, - 0xb1d139f7, 0xf9583745, 0xcf19df58, 0xbec3f756, - 0xc06eba30, 0x07211b24, 0x45c28829, 0xc95e317f, - 0xbc8ec511, 0x38bc46e9, 0xc6e6fa14, 0xbae8584a, - 0xad4ebc46, 0x468f508b, 0x7829435f, 0xf124183b, - 0x821dba9f, 0xaff60ff4, 0xea2c4e6d, 0x16e39264, - 0x92544a8b, 0x009b4fc3, 0xaba68ced, 0x9ac96f78, - 0x06a5b79a, 0xb2856e6e, 0x1aec3ca9, 0xbe838688, - 0x0e0804e9, 0x55f1be56, 0xe7e5363b, 0xb3a1f25d, - 0xf7debb85, 0x61fe033c, 0x16746233, 0x3c034c28, - 0xda6d0c74, 0x79aac56c, 0x3ce4e1ad, 0x51f0c802, - 0x98f8f35a, 0x1626a49f, 0xeed82b29, 0x1d382fe3, - 0x0c4fb99a, 0xbb325778, 0x3ec6d97b, 0x6e77a6a9, - 0xcb658b5c, 0xd45230c7, 0x2bd1408b, 0x60c03eb7, - 0xb9068d78, 0xa33754f4, 0xf430c87d, 0xc8a71302, - 0xb96d8c32, 0xebd4e7be, 0xbe8b9d2d, 0x7979fb06, - 0xe7225308, 0x8b75cf77, 0x11ef8da4, 0xe083c858, - 0x8d6b786f, 0x5a6317a6, 0xfa5cf7a0, 0x5dda0033, - 0xf28ebfb0, 0xf5b9c310, 0xa0eac280, 0x08b9767a, - 0xa3d9d2b0, 0x79d34217, 0x021a718d, 0x9ac6336a, - 0x2711fd60, 0x438050e3, 0x069908a8, 0x3d7fedc4, - 0x826d2bef, 0x4eeb8476, 0x488dcf25, 0x36c9d566, - 0x28e74e41, 0xc2610aca, 0x3d49a9cf, 0xbae3b9df, - 0xb65f8de6, 0x92aeaf64, 0x3ac7d5e6, 0x9ea80509, - 0xf22b017d, 0xa4173f70, 0xdd1e16c3, 0x15e0d7f9, - 0x50b1b887, 0x2b9f4fd5, 0x625aba82, 0x6a017962, - 0x2ec01b9c, 0x15488aa9, 0xd716e740, 0x40055a2c, - 0x93d29a22, 0xe32dbf9a, 0x058745b9, 0x3453dc1e, - 0xd699296e, 0x496cff6f, 0x1c9f4986, 0xdfe2ed07, - 0xb87242d1, 0x19de7eae, 0x053e561a, 0x15ad6f8c, - 0x66626c1c, 0x7154c24c, 0xea082b2a, 0x93eb2939, - 0x17dcb0f0, 0x58d4f2ae, 0x9ea294fb, 0x52cf564c, - 0x9883fe66, 0x2ec40581, 0x763953c3, 0x01d6692e, - 0xd3a0c108, 0xa1e7160e, 0xe4f2dfa6, 0x693ed285, - 0x74904698, 0x4c2b0edd, 0x4f757656, 0x5d393378, - 0xa132234f, 0x3d321c5d, 0xc3f5e194, 0x4b269301, - 0xc79f022f, 0x3c997e7e, 0x5e4f9504, 0x3ffafbbd, - 0x76f7ad0e, 0x296693f4, 0x3d1fce6f, 0xc61e45be, - 0xd3b5ab34, 0xf72bf9b7, 0x1b0434c0, 0x4e72b567, - 0x5592a33d, 0xb5229301, 0xcfd2a87f, 0x60aeb767, - 0x1814386b, 0x30bcc33d, 0x38a0c07d, 0xfd1606f2, - 0xc363519b, 0x589dd390, 0x5479f8e6, 0x1cb8d647, - 0x97fd61a9, 0xea7759f4, 0x2d57539d, 0x569a58cf, - 0xe84e63ad, 0x462e1b78, 0x6580f87e, 0xf3817914, - 0x91da55f4, 0x40a230f3, 0xd1988f35, 0xb6e318d2, - 0x3ffa50bc, 0x3d40f021, 0xc3c0bdae, 0x4958c24c, - 0x518f36b2, 0x84b1d370, 0x0fedce83, 0x878ddada, - 0xf2a279c7, 0x94e01be8, 0x90716f4b, 0x954b8aa3, -}; - -static u_int32_t S8[] = { - 0xe216300d, 0xbbddfffc, 0xa7ebdabd, 0x35648095, - 0x7789f8b7, 0xe6c1121b, 0x0e241600, 0x052ce8b5, - 0x11a9cfb0, 0xe5952f11, 0xece7990a, 0x9386d174, - 0x2a42931c, 0x76e38111, 0xb12def3a, 0x37ddddfc, - 0xde9adeb1, 0x0a0cc32c, 0xbe197029, 0x84a00940, - 0xbb243a0f, 0xb4d137cf, 0xb44e79f0, 0x049eedfd, - 0x0b15a15d, 0x480d3168, 0x8bbbde5a, 0x669ded42, - 0xc7ece831, 0x3f8f95e7, 0x72df191b, 0x7580330d, - 0x94074251, 0x5c7dcdfa, 0xabbe6d63, 0xaa402164, - 0xb301d40a, 0x02e7d1ca, 0x53571dae, 0x7a3182a2, - 0x12a8ddec, 0xfdaa335d, 0x176f43e8, 0x71fb46d4, - 0x38129022, 0xce949ad4, 0xb84769ad, 0x965bd862, - 0x82f3d055, 0x66fb9767, 0x15b80b4e, 0x1d5b47a0, - 0x4cfde06f, 0xc28ec4b8, 0x57e8726e, 0x647a78fc, - 0x99865d44, 0x608bd593, 0x6c200e03, 0x39dc5ff6, - 0x5d0b00a3, 0xae63aff2, 0x7e8bd632, 0x70108c0c, - 0xbbd35049, 0x2998df04, 0x980cf42a, 0x9b6df491, - 0x9e7edd53, 0x06918548, 0x58cb7e07, 0x3b74ef2e, - 0x522fffb1, 0xd24708cc, 0x1c7e27cd, 0xa4eb215b, - 0x3cf1d2e2, 0x19b47a38, 0x424f7618, 0x35856039, - 0x9d17dee7, 0x27eb35e6, 0xc9aff67b, 0x36baf5b8, - 0x09c467cd, 0xc18910b1, 0xe11dbf7b, 0x06cd1af8, - 0x7170c608, 0x2d5e3354, 0xd4de495a, 0x64c6d006, - 0xbcc0c62c, 0x3dd00db3, 0x708f8f34, 0x77d51b42, - 0x264f620f, 0x24b8d2bf, 0x15c1b79e, 0x46a52564, - 0xf8d7e54e, 0x3e378160, 0x7895cda5, 0x859c15a5, - 0xe6459788, 0xc37bc75f, 0xdb07ba0c, 0x0676a3ab, - 0x7f229b1e, 0x31842e7b, 0x24259fd7, 0xf8bef472, - 0x835ffcb8, 0x6df4c1f2, 0x96f5b195, 0xfd0af0fc, - 0xb0fe134c, 0xe2506d3d, 0x4f9b12ea, 0xf215f225, - 0xa223736f, 0x9fb4c428, 0x25d04979, 0x34c713f8, - 0xc4618187, 0xea7a6e98, 0x7cd16efc, 0x1436876c, - 0xf1544107, 0xbedeee14, 0x56e9af27, 0xa04aa441, - 0x3cf7c899, 0x92ecbae6, 0xdd67016d, 0x151682eb, - 0xa842eedf, 0xfdba60b4, 0xf1907b75, 0x20e3030f, - 0x24d8c29e, 0xe139673b, 0xefa63fb8, 0x71873054, - 0xb6f2cf3b, 0x9f326442, 0xcb15a4cc, 0xb01a4504, - 0xf1e47d8d, 0x844a1be5, 0xbae7dfdc, 0x42cbda70, - 0xcd7dae0a, 0x57e85b7a, 0xd53f5af6, 0x20cf4d8c, - 0xcea4d428, 0x79d130a4, 0x3486ebfb, 0x33d3cddc, - 0x77853b53, 0x37effcb5, 0xc5068778, 0xe580b3e6, - 0x4e68b8f4, 0xc5c8b37e, 0x0d809ea2, 0x398feb7c, - 0x132a4f94, 0x43b7950e, 0x2fee7d1c, 0x223613bd, - 0xdd06caa2, 0x37df932b, 0xc4248289, 0xacf3ebc3, - 0x5715f6b7, 0xef3478dd, 0xf267616f, 0xc148cbe4, - 0x9052815e, 0x5e410fab, 0xb48a2465, 0x2eda7fa4, - 0xe87b40e4, 0xe98ea084, 0x5889e9e1, 0xefd390fc, - 0xdd07d35b, 0xdb485694, 0x38d7e5b2, 0x57720101, - 0x730edebc, 0x5b643113, 0x94917e4f, 0x503c2fba, - 0x646f1282, 0x7523d24a, 0xe0779695, 0xf9c17a8f, - 0x7a5b2121, 0xd187b896, 0x29263a4d, 0xba510cdf, - 0x81f47c9f, 0xad1163ed, 0xea7b5965, 0x1a00726e, - 0x11403092, 0x00da6d77, 0x4a0cdd61, 0xad1f4603, - 0x605bdfb0, 0x9eedc364, 0x22ebe6a8, 0xcee7d28a, - 0xa0e736a0, 0x5564a6b9, 0x10853209, 0xc7eb8f37, - 0x2de705ca, 0x8951570f, 0xdf09822b, 0xbd691a6c, - 0xaa12e4f2, 0x87451c0f, 0xe0f6a27a, 0x3ada4819, - 0x4cf1764f, 0x0d771c2b, 0x67cdb156, 0x350d8384, - 0x5938fa0f, 0x42399ef3, 0x36997b07, 0x0e84093d, - 0x4aa93e61, 0x8360d87b, 0x1fa98b0c, 0x1149382c, - 0xe97625a5, 0x0614d1b7, 0x0e25244b, 0x0c768347, - 0x589e8d82, 0x0d2059d1, 0xa466bb1e, 0xf8da0a82, - 0x04f19130, 0xba6e4ec0, 0x99265164, 0x1ee7230d, - 0x50b2ad80, 0xeaee6801, 0x8db2a283, 0xea8bf59e, -}; - diff --git a/bsd/crypto/cast128/cast128.h b/bsd/crypto/cast128/cast128.h deleted file mode 100644 index d79eea55c..000000000 --- a/bsd/crypto/cast128/cast128.h +++ /dev/null @@ -1,55 +0,0 @@ -/* $FreeBSD: src/sys/crypto/cast128/cast128.h,v 1.1.2.3 2001/12/05 05:54:57 ume Exp $ */ -/* $KAME: cast128.h,v 1.7 2001/11/27 09:47:32 sakane Exp $ */ - -/* - * heavily modified by Tomomi Suzuki - */ -/* - * The CAST-128 Encryption Algorithm (RFC 2144) - * - * original implementation - * 1997/08/21 - */ -/* - * Copyright (C) 1997 Hideo "Sir MANMOS" Morishita - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY Hideo "Sir MaNMOS" Morishita ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL Hideo "Sir MaNMOS" Morishita BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef RFC2144_CAST_128_H -#define RFC2144_CAST_128_H - -#include - - -#define CAST128_ENCRYPT 1 -#define CAST128_DECRYPT 0 - - -extern void set_cast128_subkey(u_int32_t *, u_int8_t *, int); -extern void cast128_encrypt_round16(u_int8_t *, const u_int8_t *, u_int32_t *); -extern void cast128_decrypt_round16(u_int8_t *, const u_int8_t *, u_int32_t *); -extern void cast128_encrypt_round12(u_int8_t *, const u_int8_t *, u_int32_t *); -extern void cast128_decrypt_round12(u_int8_t *, const u_int8_t *, u_int32_t *); -#endif - diff --git a/bsd/crypto/cast128/cast128_subkey.h b/bsd/crypto/cast128/cast128_subkey.h deleted file mode 100644 index d30980385..000000000 --- a/bsd/crypto/cast128/cast128_subkey.h +++ /dev/null @@ -1,92 +0,0 @@ -/* $FreeBSD: src/sys/crypto/cast128/cast128_subkey.h,v 1.1.2.1 2000/07/15 07:14:21 kris Exp $ */ -/* $KAME: cast128_subkey.h,v 1.3 2000/03/27 04:36:30 sumikawa Exp $ */ - -/* - * heavily modified by Tomomi Suzuki - */ -/* - * The CAST-128 Encryption Algorithm (RFC 2144) - * - * original implementation - * 1997/08/21 - */ -/* - * Copyright (C) 1997 Hideo "Sir MANMOS" Morishita - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY Hideo "Sir MaNMOS" Morishita ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL Hideo "Sir MaNMOS" Morishita BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef RFC2144_CAST_128_SUBKEY_H -#define RFC2144_CAST_128_SUBKEY_H - -#define x0x1x2x3 buf[0] -#define x4x5x6x7 buf[1] -#define x8x9xAxB buf[2] -#define xCxDxExF buf[3] -#define z0z1z2z3 buf[4] -#define z4z5z6z7 buf[5] -#define z8z9zAzB buf[6] -#define zCzDzEzF buf[7] - -#define byte0(x) (((x) >> 24)) -#define byte1(x) (((x) >> 16) & 0xff) -#define byte2(x) (((x) >> 8) & 0xff) -#define byte3(x) (((x)) & 0xff) - -#define x0 byte0(buf[0]) -#define x1 byte1(buf[0]) -#define x2 byte2(buf[0]) -#define x3 byte3(buf[0]) -#define x4 byte0(buf[1]) -#define x5 byte1(buf[1]) -#define x6 byte2(buf[1]) -#define x7 byte3(buf[1]) -#define x8 byte0(buf[2]) -#define x9 byte1(buf[2]) -#define xA byte2(buf[2]) -#define xB byte3(buf[2]) -#define xC byte0(buf[3]) -#define xD byte1(buf[3]) -#define xE byte2(buf[3]) -#define xF byte3(buf[3]) -#define z0 byte0(buf[4]) -#define z1 byte1(buf[4]) -#define z2 byte2(buf[4]) -#define z3 byte3(buf[4]) -#define z4 byte0(buf[5]) -#define z5 byte1(buf[5]) -#define z6 byte2(buf[5]) -#define z7 byte3(buf[5]) -#define z8 byte0(buf[6]) -#define z9 byte1(buf[6]) -#define zA byte2(buf[6]) -#define zB byte3(buf[6]) -#define zC byte0(buf[7]) -#define zD byte1(buf[7]) -#define zE byte2(buf[7]) -#define zF byte3(buf[7]) - -#define circular_leftshift(x, y) ( ((x) << (y)) | ((x) >> (32-(y))) ) - -#endif - diff --git a/bsd/dev/Makefile b/bsd/dev/Makefile index 2f15fce2a..0dc6f85b8 100644 --- a/bsd/dev/Makefile +++ b/bsd/dev/Makefile @@ -9,7 +9,7 @@ include $(MakeInc_def) EXPINC_SUBDIRS = random -INST_SUBDIRS = dtrace +INSTTEXTFILES_SUBDIRS = dtrace include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/dev/dtrace/Makefile b/bsd/dev/dtrace/Makefile index f0d9c4f10..b16c0ed6c 100644 --- a/bsd/dev/dtrace/Makefile +++ b/bsd/dev/dtrace/Makefile @@ -7,7 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INST_SUBDIRS = scripts +INSTTEXTFILES_SUBDIRS = scripts include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index 314ed0b57..25f6d7c8e 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -69,34 +69,6 @@ * [Group] Functions", allowing one to find each block by searching forward * on capital-f functions. */ -#if !defined(__APPLE__) -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#else #include #include #include @@ -120,16 +92,14 @@ #include #include #include +#include #include -#if defined(__APPLE__) #include extern uint32_t pmap_find_phys(void *, uint64_t); extern boolean_t pmap_valid_page(uint32_t); extern void OSKextRegisterKextsWithDTrace(void); extern kmod_info_t g_kernel_kmod_info; -#endif /* __APPLE__ */ - /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ @@ -150,16 +120,16 @@ extern void dtrace_postinit(void); extern kern_return_t chudxnu_dtrace_callback (uint64_t selector, uint64_t *args, uint32_t count); -#endif /* __APPLE__ */ +/* Import this function to retrieve the physical memory. */ +extern int kernel_sysctlbyname(const char *name, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); /* * DTrace Tunable Variables * - * The following variables may be tuned by adding a line to /etc/system that - * includes both the name of the DTrace module ("dtrace") and the name of the - * variable. For example: - * - * set dtrace:dtrace_destructive_disallow = 1 + * The following variables may be dynamically tuned by using sysctl(8), the + * variables being stored in the kern.dtrace namespace. For example: + * sysctl kern.dtrace.dof_maxsize = 1048575 # 1M * * In general, the only variables that one should be tuning this way are those * that affect system-wide DTrace behavior, and for which the default behavior @@ -168,10 +138,10 @@ extern kern_return_t chudxnu_dtrace_callback * When tuning these variables, avoid pathological values; while some attempt * is made to verify the integrity of these variables, they are not considered * part of the supported interface to DTrace, and they are therefore not - * checked comprehensively. Further, these variables should not be tuned - * dynamically via "mdb -kw" or other means; they should only be tuned via - * /etc/system. + * checked comprehensively. */ +uint64_t dtrace_buffer_memory_maxsize = 0; /* initialized in dtrace_init */ +uint64_t dtrace_buffer_memory_inuse = 0; int dtrace_destructive_disallow = 0; dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); size_t dtrace_difo_maxsize = (256 * 1024); @@ -201,6 +171,7 @@ hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */ hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ int dtrace_devdepth_max = 32; int dtrace_err_verbose; +int dtrace_provide_private_probes = 0; hrtime_t dtrace_deadman_interval = NANOSEC; hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; @@ -246,7 +217,7 @@ static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ -#if defined(__APPLE__) + static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's dof modes. */ /* @@ -254,9 +225,8 @@ static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either... */ int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */ -#endif -#if defined(__APPLE__) + /* * To save memory, some common memory allocations are given a * unique zone. For example, dtrace_probe_t is 72 bytes in size, @@ -267,7 +237,6 @@ int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwi struct zone *dtrace_probe_t_zone; static int dtrace_module_unloaded(struct kmod_info *kmod); -#endif /* __APPLE__ */ /* * DTrace Locking @@ -300,17 +269,14 @@ static int dtrace_module_unloaded(struct kmod_info *kmod); * acquired _between_ dtrace_provider_lock and dtrace_lock. */ -#if !defined(__APPLE__) -static kmutex_t dtrace_lock; /* probe state lock */ -static kmutex_t dtrace_provider_lock; /* provider state lock */ -static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ -#else + /* * APPLE NOTE: * - * All kmutex_t vars have been changed to lck_mtx_t. - * Note that lck_mtx_t's require explicit initialization. + * For porting purposes, all kmutex_t vars have been changed + * to lck_mtx_t, which require explicit initialization. * + * kmutex_t becomes lck_mtx_t * mutex_enter() becomes lck_mtx_lock() * mutex_exit() becomes lck_mtx_unlock() * @@ -320,13 +286,11 @@ static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ * becomes: * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); * - * Due to the number of these changes, they are not called out explicitly. */ static lck_mtx_t dtrace_lock; /* probe state lock */ static lck_mtx_t dtrace_provider_lock; /* provider state lock */ static lck_mtx_t dtrace_meta_lock; /* meta-provider state lock */ static lck_rw_t dtrace_dof_mode_lock; /* dof mode lock */ -#endif /* __APPLE__ */ /* * DTrace Provider Variables @@ -375,11 +339,7 @@ dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ uint32_t dtrace_helptrace_next = 0; uint32_t dtrace_helptrace_nlocals; char *dtrace_helptrace_buffer; -#if !defined(__APPLE__) /* Quiet compiler warning */ -int dtrace_helptrace_bufsize = 512 * 1024; -#else size_t dtrace_helptrace_bufsize = 512 * 1024; -#endif /* __APPLE__ */ #if DEBUG int dtrace_helptrace_enabled = 1; @@ -387,6 +347,7 @@ int dtrace_helptrace_enabled = 1; int dtrace_helptrace_enabled = 0; #endif + /* * DTrace Error Hashing * @@ -430,7 +391,7 @@ static lck_mtx_t dtrace_errlock; /* * The key for a thread-local variable consists of the lower 61 bits of the - * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. + * current_thread(), plus the 3 bits of the highest active interrupt above LOCK_LEVEL. * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never * equal to a variable identifier. This is necessary (but not sufficient) to * assure that global associative arrays never collide with thread-local @@ -443,17 +404,6 @@ static lck_mtx_t dtrace_errlock; * no way for a global variable key signature to match a thread-local key * signature. */ -#if !defined(__APPLE__) -#define DTRACE_TLS_THRKEY(where) { \ - uint_t intr = 0; \ - uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ - for (; actv; actv >>= 1) \ - intr++; \ - ASSERT(intr < (1 << 3)); \ - (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ - (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ -} -#else #if defined (__x86_64__) /* FIXME: two function calls!! */ #define DTRACE_TLS_THRKEY(where) { \ @@ -466,7 +416,6 @@ static lck_mtx_t dtrace_errlock; #else #error Unknown architecture #endif -#endif /* __APPLE__ */ #define DT_BSWAP_8(x) ((x) & 0xff) #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) @@ -508,44 +457,6 @@ static lck_mtx_t dtrace_errlock; ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ (mstate)->dtms_scratch_ptr >= (alloc_sz)) -#if !defined(__APPLE__) -#define DTRACE_LOADFUNC(bits) \ -/*CSTYLED*/ \ -uint##bits##_t \ -dtrace_load##bits(uintptr_t addr) \ -{ \ - size_t size = bits / NBBY; \ - /*CSTYLED*/ \ - uint##bits##_t rval; \ - int i; \ - volatile uint16_t *flags = (volatile uint16_t *) \ - &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ - \ - DTRACE_ALIGNCHECK(addr, size, flags); \ - \ - for (i = 0; i < dtrace_toxranges; i++) { \ - if (addr >= dtrace_toxrange[i].dtt_limit) \ - continue; \ - \ - if (addr + size <= dtrace_toxrange[i].dtt_base) \ - continue; \ - \ - /* \ - * This address falls within a toxic region; return 0. \ - */ \ - *flags |= CPU_DTRACE_BADADDR; \ - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ - return (0); \ - } \ - \ - *flags |= CPU_DTRACE_NOFAULT; \ - /*CSTYLED*/ \ - rval = *((volatile uint##bits##_t *)addr); \ - *flags &= ~CPU_DTRACE_NOFAULT; \ - \ - return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \ -} -#else /* __APPLE__ */ #define RECOVER_LABEL(bits) dtraceLoadRecover##bits: #if defined (__x86_64__) @@ -601,7 +512,6 @@ dtrace_load##bits(uintptr_t addr) \ #else /* all other architectures */ #error Unknown Architecture #endif -#endif /* __APPLE__ */ #ifdef __LP64__ #define dtrace_loadptr dtrace_load64 @@ -636,14 +546,6 @@ dtrace_load##bits(uintptr_t addr) \ (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) -#if defined (__APPLE__) -/* Avoid compiler warnings when assigning regs[rd] = NULL */ -#ifdef NULL -#undef NULL -#define NULL (uintptr_t)0 -#endif -#endif /* __APPLE__ */ - static size_t dtrace_strlen(const char *, size_t); static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); @@ -661,6 +563,225 @@ static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); + +/* + * DTrace sysctl handlers + * + * These declarations and functions are used for a deeper DTrace configuration. + * Most of them are not per-consumer basis and may impact the other DTrace + * consumers. Correctness may not be supported for all the variables, so you + * should be careful about what values you are using. + */ + +SYSCTL_DECL(_kern_dtrace); +SYSCTL_NODE(_kern, OID_AUTO, dtrace, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "dtrace"); + +static int +sysctl_dtrace_err_verbose SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2) + int changed, error; + int value = *(int *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value != 0 && value != 1) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_err_verbose = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.err_verbose + * + * Set DTrace verbosity when an error occured (0 = disabled, 1 = enabld). + * Errors are reported when a DIFO or a DOF has been rejected by the kernel. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, err_verbose, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_err_verbose, 0, + sysctl_dtrace_err_verbose, "I", "dtrace error verbose"); + +static int +sysctl_dtrace_buffer_memory_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + uint64_t value = *(uint64_t *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= dtrace_buffer_memory_inuse) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_buffer_memory_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.buffer_memory_maxsize + * + * Set DTrace maximal size in bytes used by all the consumers' state buffers. By default + * the limit is PHYS_MEM / 3 for *all* consumers. Attempting to set a null, a negative value + * or a value <= to dtrace_buffer_memory_inuse will result in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, buffer_memory_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_buffer_memory_maxsize, 0, + sysctl_dtrace_buffer_memory_maxsize, "Q", "dtrace state buffer memory maxsize"); + +/* + * kern.dtrace.buffer_memory_inuse + * + * Current state buffer memory used, in bytes, by all the DTrace consumers. + * This value is read-only. + */ +SYSCTL_QUAD(_kern_dtrace, OID_AUTO, buffer_memory_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, + &dtrace_buffer_memory_inuse, "dtrace state buffer memory in-use"); + +static int +sysctl_dtrace_difo_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + size_t value = *(size_t*) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= 0) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_difo_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.difo_maxsize + * + * Set the DIFO max size in bytes, check the definition of dtrace_difo_maxsize + * to get the default value. Attempting to set a null or negative size will + * result in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, difo_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_difo_maxsize, 0, + sysctl_dtrace_difo_maxsize, "Q", "dtrace difo maxsize"); + +static int +sysctl_dtrace_dof_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + dtrace_optval_t value = *(dtrace_optval_t *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= 0) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_dof_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.dof_maxsize + * + * Set the DOF max size in bytes, check the definition of dtrace_dof_maxsize to + * get the default value. Attempting to set a null or negative size will result + * in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, dof_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_dof_maxsize, 0, + sysctl_dtrace_dof_maxsize, "Q", "dtrace dof maxsize"); + +static int +sysctl_dtrace_global_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + dtrace_optval_t value = *(dtrace_optval_t*) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= 0) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_global_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.global_maxsize + * + * Set the global variable max size in bytes, check the definition of + * dtrace_global_maxsize to get the default value. Attempting to set a null or + * negative size will result in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, global_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_global_maxsize, 0, + sysctl_dtrace_global_maxsize, "Q", "dtrace global maxsize"); + +static int +sysctl_dtrace_provide_private_probes SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2) + int error; + int value = *(int *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, NULL); + if (error) + return (error); + + if (value != 0 && value != 1) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_provide_private_probes = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.provide_private_probes + * + * Set whether the providers must provide the private probes. This is + * mainly used by the FBT provider to request probes for the private/static + * symbols. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, provide_private_probes, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_provide_private_probes, 0, + sysctl_dtrace_provide_private_probes, "I", "provider must provide the private probes"); + /* * DTrace Probe Context Functions * @@ -861,11 +982,7 @@ static int dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { -#if !defined(__APPLE__) /* Quiet compiler warning - matches dtrace_dif_emulate */ - volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#else volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#endif /* __APPLE */ /* * If we hold the privilege to read from kernel memory, then @@ -1237,11 +1354,7 @@ dtrace_priv_proc_common_user(dtrace_state_t *state) */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) - if ((cr = CRED()) != NULL && -#else if ((cr = dtrace_CRED()) != NULL && -#endif /* __APPLE__ */ posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_uid && posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_ruid && posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_suid && @@ -1262,7 +1375,7 @@ static int dtrace_priv_proc_common_zone(dtrace_state_t *state) { cred_t *cr, *s_cr = state->dts_cred.dcr_cred; -#pragma unused(cr, s_cr) /* __APPLE__ */ +#pragma unused(cr, s_cr, state) /* __APPLE__ */ /* * We should always have a non-NULL state cred here, since if cred @@ -1270,52 +1383,29 @@ dtrace_priv_proc_common_zone(dtrace_state_t *state) */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) - if ((cr = CRED()) != NULL && - s_cr->cr_zone == cr->cr_zone) - return (1); - - return (0); -#else -#pragma unused(state) - - return 1; /* Darwin doesn't do zones. */ -#endif /* __APPLE__ */ + return 1; /* APPLE NOTE: Darwin doesn't do zones. */ } /* * This privilege check should be used by actions and subroutines to * verify that the process has not setuid or changed credentials. */ -#if !defined(__APPLE__) -static int -dtrace_priv_proc_common_nocd() -{ - proc_t *proc; - - if ((proc = ttoproc(curthread)) != NULL && - !(proc->p_flag & SNOCD)) - return (1); - - return (0); -} -#else static int dtrace_priv_proc_common_nocd(void) { return 1; /* Darwin omits "No Core Dump" flag. */ } -#endif /* __APPLE__ */ static int dtrace_priv_proc_destructive(dtrace_state_t *state) { int action = state->dts_cred.dcr_action; -#if defined(__APPLE__) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; -#endif /* __APPLE__ */ + + if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) + goto bad; if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && dtrace_priv_proc_common_zone(state) == 0) @@ -1340,10 +1430,11 @@ bad: static int dtrace_priv_proc_control(dtrace_state_t *state) { -#if defined(__APPLE__) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; -#endif /* __APPLE__ */ + + if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) + goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) return (1); @@ -1353,9 +1444,7 @@ dtrace_priv_proc_control(dtrace_state_t *state) dtrace_priv_proc_common_nocd()) return (1); -#if defined(__APPLE__) bad: -#endif /* __APPLE__ */ cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; return (0); @@ -1364,24 +1453,26 @@ bad: static int dtrace_priv_proc(dtrace_state_t *state) { -#if defined(__APPLE__) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; -#endif /* __APPLE__ */ + + if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) + goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) return (1); -#if defined(__APPLE__) bad: -#endif /* __APPLE__ */ cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; return (0); } -#if defined(__APPLE__) -/* dtrace_priv_proc() omitting the P_LNOATTACH check. For PID and EXECNAME accesses. */ +/* + * The P_LNOATTACH check is an Apple specific check. + * We need a version of dtrace_priv_proc() that omits + * that check for PID and EXECNAME accesses + */ static int dtrace_priv_proc_relaxed(dtrace_state_t *state) { @@ -1393,14 +1484,17 @@ dtrace_priv_proc_relaxed(dtrace_state_t *state) return (0); } -#endif /* __APPLE__ */ static int dtrace_priv_kernel(dtrace_state_t *state) { + if (dtrace_is_restricted()) + goto bad; + if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) return (1); +bad: cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; return (0); @@ -1409,9 +1503,13 @@ dtrace_priv_kernel(dtrace_state_t *state) static int dtrace_priv_kernel_destructive(dtrace_state_t *state) { + if (dtrace_is_restricted()) + goto bad; + if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) return (1); +bad: cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; return (0); @@ -1423,10 +1521,7 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state) * clean the dirty dynamic variable lists on all CPUs. Dynamic variable * cleaning is explained in detail in . */ -#if defined(__APPLE__) /* Quiet compiler warning. */ -static -#endif /* __APPLE__ */ -void +static void dtrace_dynvar_clean(dtrace_dstate_t *dstate) { dtrace_dynvar_t *dirty; @@ -1518,10 +1613,7 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) * variable can be allocated. If NULL is returned, the appropriate counter * will be incremented. */ -#if defined(__APPLE__) /* Quiet compiler warning. */ -static -#endif /* __APPLE__ */ -dtrace_dynvar_t * +static dtrace_dynvar_t * dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) @@ -1620,15 +1712,9 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, while ((lock = *lockp) & 1) continue; -#if !defined(__APPLE__) /* Quiet compiler warning */ - if (dtrace_casptr((void *)lockp, - (void *)lock, (void *)(lock + 1)) == (void *)lock) - break; -#else if (dtrace_casptr((void *)(uintptr_t)lockp, (void *)lock, (void *)(lock + 1)) == (void *)lock) break; -#endif /* __APPLE__ */ } dtrace_membar_producer(); @@ -2498,27 +2584,16 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, dtrace_speculation_t *spec; dtrace_buffer_t *src, *dest; uintptr_t daddr, saddr, dlimit; -#if !defined(__APPLE__) /* Quiet compiler warning */ - dtrace_speculation_state_t current, new; -#else dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; -#endif /* __APPLE__ */ intptr_t offs; if (which == 0) return; -#if !defined(__APPLE__) /* Quiet compiler warning */ - if (which > state->dts_nspeculations) { - cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; - return; - } -#else if (which > (dtrace_specid_t)state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } -#endif /* __APPLE__ */ spec = &state->dts_speculations[which - 1]; src = &spec->dtsp_buffer[cpu]; @@ -2646,27 +2721,16 @@ dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, dtrace_specid_t which) { dtrace_speculation_t *spec; -#if !defined(__APPLE__) /* Quiet compiler warning */ - dtrace_speculation_state_t current, new; -#else dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; -#endif /* __APPLE__ */ dtrace_buffer_t *buf; if (which == 0) return; -#if !defined(__APPLE__) /* Quiet compiler warning */ - if (which > state->dts_nspeculations) { - cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; - return; - } -#else if (which > (dtrace_specid_t)state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } -#endif /* __APPLE__ */ spec = &state->dts_speculations[which - 1]; buf = &spec->dtsp_buffer[cpu]; @@ -2726,11 +2790,7 @@ dtrace_speculation_clean_here(dtrace_state_t *state) return; } -#if !defined(__APPLE__) /* Quiet compiler warning */ - for (i = 0; i < state->dts_nspeculations; i++) { -#else for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { -#endif /* __APPLE__ */ dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; @@ -2765,19 +2825,11 @@ dtrace_speculation_clean_here(dtrace_state_t *state) static void dtrace_speculation_clean(dtrace_state_t *state) { -#if !defined(__APPLE__) /* Quiet compiler warning */ - int work = 0, rv; -#else int work = 0; uint32_t rv; -#endif /* __APPLE__ */ dtrace_specid_t i; -#if !defined(__APPLE__) /* Quiet compiler warning */ - for (i = 0; i < state->dts_nspeculations; i++) { -#else for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { -#endif /* __APPLE__ */ dtrace_speculation_t *spec = &state->dts_speculations[i]; ASSERT(!spec->dtsp_cleaning); @@ -2801,11 +2853,7 @@ dtrace_speculation_clean(dtrace_state_t *state) * speculation buffers, as appropriate. We can now set the state * to inactive. */ -#if !defined(__APPLE__) /* Quiet compiler warning */ - for (i = 0; i < state->dts_nspeculations; i++) { -#else for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { -#endif /* __APPLE__ */ dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_speculation_state_t current, new; @@ -2836,21 +2884,13 @@ dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, dtrace_specid_t which) { dtrace_speculation_t *spec; -#if !defined(__APPLE__) /* Quiet compiler warning */ - dtrace_speculation_state_t current, new; -#else dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; -#endif /* __APPLE__ */ dtrace_buffer_t *buf; if (which == 0) return (NULL); -#if !defined(__APPLE__) /* Quiet compiler warning */ - if (which > state->dts_nspeculations) { -#else if (which > (dtrace_specid_t)state->dts_nspeculations) { -#endif /* __APPLE__ */ cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return (NULL); } @@ -2911,9 +2951,7 @@ dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, * dtrace_dif_variable() uses this routine as a helper for various * builtin values such as 'execname' and 'probefunc.' */ -#if defined(__APPLE__) /* Quiet compiler warning. */ static -#endif /* __APPLE__ */ uintptr_t dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, dtrace_mstate_t *mstate) @@ -2940,7 +2978,7 @@ dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, if (mstate->dtms_scratch_ptr + strsz > mstate->dtms_scratch_base + mstate->dtms_scratch_size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - return (NULL); + return (0); } dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, @@ -2972,12 +3010,10 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); if (ndx >= sizeof (mstate->dtms_arg) / sizeof (mstate->dtms_arg[0])) { -#if !defined(__APPLE__) - int aframes = mstate->dtms_probe->dtpr_aframes + 2; -#else - /* Account for introduction of __dtrace_probe() on xnu. */ + /* + * APPLE NOTE: Account for introduction of __dtrace_probe() + */ int aframes = mstate->dtms_probe->dtpr_aframes + 3; -#endif /* __APPLE__ */ dtrace_provider_t *pv; uint64_t val; @@ -2986,12 +3022,11 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg, mstate->dtms_probe->dtpr_id, mstate->dtms_probe->dtpr_arg, ndx, aframes); -#if defined(__APPLE__) /* Special case access of arg5 as passed to dtrace_probe_error() (which see.) */ else if (mstate->dtms_probe->dtpr_id == dtrace_probeid_error && ndx == 5) { return ((dtrace_state_t *)(uintptr_t)(mstate->dtms_arg[0]))->dts_arg_error_illval; } -#endif /* __APPLE__ */ + else val = dtrace_getarg(ndx, aframes); @@ -3012,22 +3047,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (mstate->dtms_arg[ndx]); -#if !defined(__APPLE__) - case DIF_VAR_UREGS: { - klwp_t *lwp; - - if (!dtrace_priv_proc(state)) - return (0); - - if ((lwp = curthread->t_lwp) == NULL) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL; - return (0); - } - - return (dtrace_getreg(lwp->lwp_regs, ndx)); - } -#else case DIF_VAR_UREGS: { thread_t thread; @@ -3042,20 +3061,13 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (dtrace_getreg(find_user_regs(thread), ndx)); } -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - case DIF_VAR_CURTHREAD: - if (!dtrace_priv_kernel(state)) - return (0); - return ((uint64_t)(uintptr_t)curthread); -#else + case DIF_VAR_CURTHREAD: if (!dtrace_priv_kernel(state)) return (0); return ((uint64_t)(uintptr_t)current_thread()); -#endif /* __APPLE__ */ case DIF_VAR_TIMESTAMP: if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { @@ -3064,15 +3076,9 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_timestamp); -#if !defined(__APPLE__) - case DIF_VAR_VTIMESTAMP: - ASSERT(dtrace_vtime_references != 0); - return (curthread->t_dtrace_vtime); -#else case DIF_VAR_VTIMESTAMP: ASSERT(dtrace_vtime_references != 0); return (dtrace_get_thread_vtime(current_thread())); -#endif /* __APPLE__ */ case DIF_VAR_WALLTIMESTAMP: if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) { @@ -3081,6 +3087,13 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_walltimestamp); + case DIF_VAR_MACHTIMESTAMP: + if (!(mstate->dtms_present & DTRACE_MSTATE_MACHTIMESTAMP)) { + mstate->dtms_machtimestamp = mach_absolute_time(); + mstate->dtms_present |= DTRACE_MSTATE_MACHTIMESTAMP; + } + return (mstate->dtms_machtimestamp); + case DIF_VAR_IPL: if (!dtrace_priv_kernel(state)) return (0); @@ -3102,12 +3115,10 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { -#if !defined(__APPLE__) - int aframes = mstate->dtms_probe->dtpr_aframes + 2; -#else - /* Account for introduction of __dtrace_probe() on xnu. */ + /* + * APPLE NOTE: Account for introduction of __dtrace_probe() + */ int aframes = mstate->dtms_probe->dtpr_aframes + 3; -#endif /* __APPLE__ */ mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; @@ -3138,12 +3149,10 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { -#if !defined(__APPLE__) - int aframes = mstate->dtms_probe->dtpr_aframes + 2; -#else - /* Account for introduction of __dtrace_probe() on xnu. */ + /* + * APPLE NOTE: Account for introduction of __dtrace_probe() + */ int aframes = mstate->dtms_probe->dtpr_aframes + 3; -#endif /* __APPLE__ */ if (!DTRACE_ANCHORED(mstate->dtms_probe)) { /* @@ -3158,11 +3167,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); mstate->dtms_caller = caller[1]; } else if ((mstate->dtms_caller = -#if !defined(__APPLE__) /* Quiet compiler warnings */ - dtrace_caller(aframes)) == -1) { -#else - dtrace_caller(aframes)) == (uintptr_t)-1) { -#endif /* __APPLE__ */ + dtrace_caller(aframes)) == (uintptr_t)-1) { /* * We have failed to do this the quick way; * we must resort to the slower approach of @@ -3192,7 +3197,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, * uint64_t will contain the caller, which is what * we're after. */ - ustack[2] = NULL; + ustack[2] = 0; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getupcstack(ustack, 3); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); @@ -3226,30 +3231,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, (uintptr_t)mstate->dtms_probe->dtpr_name, state, mstate)); -#if !defined(__APPLE__) - case DIF_VAR_PID: - if (!dtrace_priv_proc(state)) - return (0); - - /* - * Note that we are assuming that an unanchored probe is - * always due to a high-level interrupt. (And we're assuming - * that there is only a single high level interrupt.) - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (pid0.pid_id); - - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * Further, it is always safe to dereference the p_pidp member - * of one's own proc structure. (These are truisms becuase - * threads and processes don't clean up their own state -- - * they leave that task to whomever reaps them.) - */ - return ((uint64_t)curthread->t_procp->p_pidp->pid_id); - -#else case DIF_VAR_PID: if (!dtrace_priv_proc_relaxed(state)) return (0); @@ -3264,27 +3245,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return 0; return ((uint64_t)dtrace_proc_selfpid()); -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_VAR_PPID: - if (!dtrace_priv_proc(state)) - return (0); - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (pid0.pid_id); - - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * (This is true because threads don't clean up their own - * state -- they leave that task to whomever reaps them.) - */ - return ((uint64_t)curthread->t_procp->p_ppid); -#else case DIF_VAR_PPID: if (!dtrace_priv_proc_relaxed(state)) return (0); @@ -3296,18 +3257,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (0); return ((uint64_t)dtrace_proc_selfppid()); -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_VAR_TID: - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (0); - return ((uint64_t)curthread->t_tid); -#else case DIF_VAR_TID: /* We do not need to check for null current_thread() */ return thread_tid(current_thread()); /* globally unique */ @@ -3325,29 +3275,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, /* We do not need to check for null current_thread() */ return thread_dispatchqaddr(current_thread()); -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_VAR_EXECNAME: - if (!dtrace_priv_proc(state)) - return (0); - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)(uintptr_t)p0.p_user.u_comm); - - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * (This is true because threads don't clean up their own - * state -- they leave that task to whomever reaps them.) - */ - return (dtrace_dif_varstr( - (uintptr_t)curthread->t_procp->p_user.u_comm, - state, mstate)); -#else case DIF_VAR_EXECNAME: { char *xname = (char *)mstate->dtms_scratch_ptr; @@ -3367,29 +3295,8 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)(uintptr_t)xname); } -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - case DIF_VAR_ZONENAME: - if (!dtrace_priv_proc(state)) - return (0); - - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)(uintptr_t)p0.p_zone->zone_name); - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * (This is true because threads don't clean up their own - * state -- they leave that task to whomever reaps them.) - */ - return (dtrace_dif_varstr( - (uintptr_t)curthread->t_procp->p_zone->zone_name, - state, mstate)); -#else case DIF_VAR_ZONENAME: { /* scratch_size is equal to length('global') + 1 for the null-terminator. */ @@ -3412,30 +3319,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)(uintptr_t)zname); } -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_VAR_UID: - if (!dtrace_priv_proc(state)) - return (0); - - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)p0.p_cred->cr_uid); - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * (This is true because threads don't clean up their own - * state -- they leave that task to whomever reaps them.) - * - * Additionally, it is safe to dereference one's own process - * credential, since this is never NULL after process birth. - */ - return ((uint64_t)curthread->t_procp->p_cred->cr_uid); -#else case DIF_VAR_UID: if (!dtrace_priv_proc_relaxed(state)) return (0); @@ -3447,30 +3331,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (0); return ((uint64_t) dtrace_proc_selfruid()); -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_VAR_GID: - if (!dtrace_priv_proc(state)) - return (0); - - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)p0.p_cred->cr_gid); - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * (This is true because threads don't clean up their own - * state -- they leave that task to whomever reaps them.) - * - * Additionally, it is safe to dereference one's own process - * credential, since this is never NULL after process birth. - */ - return ((uint64_t)curthread->t_procp->p_cred->cr_gid); -#else case DIF_VAR_GID: if (!dtrace_priv_proc(state)) return (0); @@ -3489,32 +3350,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return -1ULL; } -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_VAR_ERRNO: { - klwp_t *lwp; - if (!dtrace_priv_proc(state)) - return (0); - - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (0); - - /* - * It is always safe to dereference one's own t_lwp pointer in - * the event that this pointer is non-NULL. (This is true - * because threads and lwps don't clean up their own state -- - * they leave that task to whomever reaps them.) - */ - if ((lwp = curthread->t_lwp) == NULL) - return (0); - return ((uint64_t)lwp->lwp_errno); - } -#else case DIF_VAR_ERRNO: { uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (!dtrace_priv_proc(state)) @@ -3533,7 +3369,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return -1ULL; } } -#endif /* __APPLE__ */ default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); @@ -3554,11 +3389,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, dtrace_mstate_t *mstate, dtrace_state_t *state) { volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; -#if !defined(__APPLE__) - volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#else volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#endif /* __APPLE__ */ dtrace_vstate_t *vstate = &state->dts_vstate; #if !defined(__APPLE__) @@ -3584,7 +3415,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_MUTEX_OWNED: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -3598,7 +3429,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_MUTEX_OWNER: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -3613,7 +3444,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -3624,7 +3455,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_MUTEX_TYPE_SPIN: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -3637,7 +3468,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -3649,7 +3480,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_RW_WRITE_HELD: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -3660,7 +3491,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_RW_ISWRITER: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -3687,7 +3518,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } if (!dtrace_canload(src, size, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -3715,18 +3546,14 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, if (scratch_size < size || !DTRACE_INSCRATCH(mstate, scratch_size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } if (subr == DIF_SUBR_COPYIN) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -#if !defined(__APPLE__) - dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); -#else if (dtrace_priv_proc(state)) dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); -#endif /* __APPLE__ */ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } @@ -3751,12 +3578,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -#if !defined(__APPLE__) - dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); -#else if (dtrace_priv_proc(state)) dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); -#endif /* __APPLE__ */ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; } @@ -3775,17 +3598,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, */ if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -#if !defined(__APPLE__) - dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); -#else if (dtrace_priv_proc(state)) dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); -#endif /* __APPLE__ */ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); ((char *)dest)[size - 1] = '\0'; @@ -3794,65 +3613,6 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } -#if !defined(__APPLE__) - case DIF_SUBR_MSGSIZE: - case DIF_SUBR_MSGDSIZE: { - uintptr_t baddr = tupregs[0].dttk_value, daddr; - uintptr_t wptr, rptr; - size_t count = 0; - int cont = 0; - - while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { - - if (!dtrace_canload(baddr, sizeof (mblk_t), mstate, - vstate)) { - regs[rd] = NULL; - break; - } - - wptr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_wptr)); - - rptr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_rptr)); - - if (wptr < rptr) { - *flags |= CPU_DTRACE_BADADDR; - *illval = tupregs[0].dttk_value; - break; - } - - daddr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_datap)); - - baddr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_cont)); - - /* - * We want to prevent against denial-of-service here, - * so we're only going to search the list for - * dtrace_msgdsize_max mblks. - */ - if (cont++ > dtrace_msgdsize_max) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - - if (subr == DIF_SUBR_MSGDSIZE) { - if (dtrace_load8(daddr + - offsetof(dblk_t, db_type)) != M_DATA) - continue; - } - - count += wptr - rptr; - } - - if (!(*flags & CPU_DTRACE_FAULT)) - regs[rd] = count; - - break; - } -#else case DIF_SUBR_MSGSIZE: case DIF_SUBR_MSGDSIZE: { /* Darwin does not implement SysV streams messages */ @@ -3860,29 +3620,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = 0; break; } -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - case DIF_SUBR_PROGENYOF: { - pid_t pid = tupregs[0].dttk_value; - proc_t *p; - int rval = 0; - - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - - for (p = curthread->t_procp; p != NULL; p = p->p_parent) { - if (p->p_pidp->pid_id == pid) { - rval = 1; - break; - } - } - - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); - - regs[rd] = rval; - break; - } -#else case DIF_SUBR_PROGENYOF: { pid_t pid = tupregs[0].dttk_value; struct proc *p = current_proc(); @@ -3911,16 +3649,15 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = rval; break; } -#endif /* __APPLE__ */ case DIF_SUBR_SPECULATION: regs[rd] = dtrace_speculation(state); break; -#if !defined(__APPLE__) + case DIF_SUBR_COPYOUT: { uintptr_t kaddr = tupregs[0].dttk_value; - uintptr_t uaddr = tupregs[1].dttk_value; + user_addr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && @@ -3935,7 +3672,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_COPYOUTSTR: { uintptr_t kaddr = tupregs[0].dttk_value; - uintptr_t uaddr = tupregs[1].dttk_value; + user_addr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && @@ -3947,37 +3684,6 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } break; } -#else - case DIF_SUBR_COPYOUT: { - uintptr_t kaddr = tupregs[0].dttk_value; - user_addr_t uaddr = tupregs[1].dttk_value; - uint64_t size = tupregs[2].dttk_value; - - if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyout(kaddr, uaddr, size, flags); - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); - } - break; - } - - case DIF_SUBR_COPYOUTSTR: { - uintptr_t kaddr = tupregs[0].dttk_value; - user_addr_t uaddr = tupregs[1].dttk_value; - uint64_t size = tupregs[2].dttk_value; - - if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyoutstr(kaddr, uaddr, size, flags); - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); - } - break; - } -#endif /* __APPLE__ */ case DIF_SUBR_STRLEN: { size_t sz; @@ -3986,7 +3692,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, state->dts_options[DTRACEOPT_STRSIZE]); if (!dtrace_canload(addr, sz + 1, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4009,7 +3715,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; char c, target = (char)tupregs[1].dttk_value; - for (regs[rd] = NULL; addr < limit; addr++) { + for (regs[rd] = 0; addr < limit; addr++) { if ((c = dtrace_load8(addr)) == target) { regs[rd] = addr; @@ -4022,7 +3728,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4053,13 +3759,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = notfound; if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4137,21 +3843,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (pos > len) -#else if ((size_t)pos > len) -#endif /* __APPLE__ */ pos = len; } else { if (pos < 0) pos = 0; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (pos >= len) { -#else if ((size_t)pos >= len) { -#endif /* __APPLE__ */ if (sublen == 0) regs[rd] = len; break; @@ -4193,30 +3891,25 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t limit, toklimit = tokaddr + size; char *dest = (char *)mstate->dtms_scratch_ptr; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - uint8_t c, tokmap[32]; /* 256 / 8 */ - int i; -#else uint8_t c='\0', tokmap[32]; /* 256 / 8 */ uint64_t i = 0; -#endif /* __APPLE__ */ /* * Check both the token buffer and (later) the input buffer, * since both could be non-scratch addresses. */ if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } - if (addr == NULL) { + if (addr == 0) { /* * If the address specified is NULL, we use our saved * strtok pointer from the mstate. Note that this @@ -4235,9 +3928,9 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * would fail this access check. */ if (!dtrace_strcanload(addr, size, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; - } + } } /* @@ -4275,8 +3968,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * We return NULL in this case, and we set the saved * address to NULL as well. */ - regs[rd] = NULL; - mstate->dtms_strtok = NULL; + regs[rd] = 0; + mstate->dtms_strtok = 0; break; } @@ -4312,13 +4005,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int64_t i = 0; if (!dtrace_canload(s, len + 1, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4334,15 +4027,6 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (index >= len || index < 0) { - remaining = 0; - } else if (remaining < 0) { - remaining += len - index; - } else if (index + remaining > size) { - remaining = size - index; - } -#else if ((size_t)index >= len || index < 0) { remaining = 0; } else if (remaining < 0) { @@ -4350,7 +4034,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } else if ((uint64_t)index + (uint64_t)remaining > size) { remaining = size - index; } -#endif /* __APPLE__ */ + for (i = 0; i < remaining; i++) { if ((d[i] = dtrace_load8(s + index + i)) == '\0') break; @@ -4363,265 +4047,44 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } -#if !defined(__APPLE__) - case DIF_SUBR_GETMAJOR: -#ifdef _LP64 - regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; -#else - regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; -#endif - break; - -#else /* __APPLE__ */ case DIF_SUBR_GETMAJOR: regs[rd] = (uintptr_t)major( (dev_t)tupregs[0].dttk_value ); break; -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_SUBR_GETMINOR: -#ifdef _LP64 - regs[rd] = tupregs[0].dttk_value & MAXMIN64; -#else - regs[rd] = tupregs[0].dttk_value & MAXMIN; -#endif - break; -#else /* __APPLE__ */ case DIF_SUBR_GETMINOR: regs[rd] = (uintptr_t)minor( (dev_t)tupregs[0].dttk_value ); break; -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_SUBR_DDI_PATHNAME: { - /* - * This one is a galactic mess. We are going to roughly - * emulate ddi_pathname(), but it's made more complicated - * by the fact that we (a) want to include the minor name and - * (b) must proceed iteratively instead of recursively. - */ - uintptr_t dest = mstate->dtms_scratch_ptr; - uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; - char *start = (char *)dest, *end = start + size - 1; - uintptr_t daddr = tupregs[0].dttk_value; - int64_t minor = (int64_t)tupregs[1].dttk_value; - char *s; - int i, len, depth = 0; - - /* - * Due to all the pointer jumping we do and context we must - * rely upon, we just mandate that the user must have kernel - * read privileges to use this routine. - */ - if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) { - *flags |= CPU_DTRACE_KPRIV; - *illval = daddr; - regs[rd] = NULL; - } - - if (!DTRACE_INSCRATCH(mstate, size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; - break; - } - - *end = '\0'; - - /* - * We want to have a name for the minor. In order to do this, - * we need to walk the minor list from the devinfo. We want - * to be sure that we don't infinitely walk a circular list, - * so we check for circularity by sending a scout pointer - * ahead two elements for every element that we iterate over; - * if the list is circular, these will ultimately point to the - * same element. You may recognize this little trick as the - * answer to a stupid interview question -- one that always - * seems to be asked by those who had to have it laboriously - * explained to them, and who can't even concisely describe - * the conditions under which one would be forced to resort to - * this technique. Needless to say, those conditions are - * found here -- and probably only here. Is this the only use - * of this infamous trick in shipping, production code? If it - * isn't, it probably should be... - */ - if (minor != -1) { - uintptr_t maddr = dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_minor)); - - uintptr_t next = offsetof(struct ddi_minor_data, next); - uintptr_t name = offsetof(struct ddi_minor_data, - d_minor) + offsetof(struct ddi_minor, name); - uintptr_t dev = offsetof(struct ddi_minor_data, - d_minor) + offsetof(struct ddi_minor, dev); - uintptr_t scout; - - if (maddr != NULL) - scout = dtrace_loadptr(maddr + next); - - while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { - uint64_t m; -#ifdef _LP64 - m = dtrace_load64(maddr + dev) & MAXMIN64; -#else - m = dtrace_load32(maddr + dev) & MAXMIN; -#endif - if (m != minor) { - maddr = dtrace_loadptr(maddr + next); - - if (scout == NULL) - continue; - - scout = dtrace_loadptr(scout + next); - - if (scout == NULL) - continue; - - scout = dtrace_loadptr(scout + next); - - if (scout == NULL) - continue; - - if (scout == maddr) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - - continue; - } - - /* - * We have the minor data. Now we need to - * copy the minor's name into the end of the - * pathname. - */ - s = (char *)dtrace_loadptr(maddr + name); - len = dtrace_strlen(s, size); - - if (*flags & CPU_DTRACE_FAULT) - break; - - if (len != 0) { - if ((end -= (len + 1)) < start) - break; - - *end = ':'; - } - - for (i = 1; i <= len; i++) - end[i] = dtrace_load8((uintptr_t)s++); - break; - } - } - - while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { - ddi_node_state_t devi_state; - - devi_state = dtrace_load32(daddr + - offsetof(struct dev_info, devi_node_state)); - - if (*flags & CPU_DTRACE_FAULT) - break; - - if (devi_state >= DS_INITIALIZED) { - s = (char *)dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_addr)); - len = dtrace_strlen(s, size); - - if (*flags & CPU_DTRACE_FAULT) - break; - - if (len != 0) { - if ((end -= (len + 1)) < start) - break; - - *end = '@'; - } - - for (i = 1; i <= len; i++) - end[i] = dtrace_load8((uintptr_t)s++); - } - - /* - * Now for the node name... - */ - s = (char *)dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_node_name)); - daddr = dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_parent)); - - /* - * If our parent is NULL (that is, if we're the root - * node), we're going to use the special path - * "devices". - */ - if (daddr == NULL) - s = "devices"; - - len = dtrace_strlen(s, size); - if (*flags & CPU_DTRACE_FAULT) - break; - - if ((end -= (len + 1)) < start) - break; - - for (i = 1; i <= len; i++) - end[i] = dtrace_load8((uintptr_t)s++); - *end = '/'; - - if (depth++ > dtrace_devdepth_max) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - } - - if (end < start) - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - - if (daddr == NULL) { - regs[rd] = (uintptr_t)end; - mstate->dtms_scratch_ptr += size; - } - - break; - } -#else case DIF_SUBR_DDI_PATHNAME: { - /* FIXME: awaits galactic disentanglement ;-} */ + /* APPLE NOTE: currently unsupported on Darwin */ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - regs[rd] = NULL; + regs[rd] = 0; break; } -#endif /* __APPLE__ */ case DIF_SUBR_STRJOIN: { char *d = (char *)mstate->dtms_scratch_ptr; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = tupregs[0].dttk_value; uintptr_t s2 = tupregs[1].dttk_value; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int i = 0; -#else uint64_t i = 0; -#endif /* __APPLE__ */ if (!dtrace_strcanload(s1, size, mstate, vstate) || !dtrace_strcanload(s2, size, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } for (;;) { if (i >= size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4634,7 +4097,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, for (;;) { if (i >= size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4658,7 +4121,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4716,13 +4179,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int start, end; if (!dtrace_canload(src, len + 1, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4828,13 +4291,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, end = lastbase; } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - for (i = start, j = 0; i <= end && j < size - 1; i++, j++) - dest[j] = dtrace_load8(src + i); -#else for (i = start, j = 0; i <= end && (uint64_t)j < size - 1; i++, j++) dest[j] = dtrace_load8(src + i); -#endif /* __APPLE__ */ dest[j] = '\0'; regs[rd] = (uintptr_t)dest; @@ -4849,13 +4307,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int i = 0, j = 0; if (!dtrace_strcanload(src, size, mstate, vstate)) { - regs[rd] = NULL; + regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4865,13 +4323,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, do { c = dtrace_load8(src + i++); next: -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (j + 5 >= size) /* 5 = strlen("/..c\0") */ - break; -#else if ((uint64_t)(j + 5) >= size) /* 5 = strlen("/..c\0") */ break; -#endif /* __APPLE__ */ if (c != '/') { dest[j++] = c; @@ -4998,7 +4451,7 @@ next: #endif /* __APPLE__ */ if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } base = (char *)mstate->dtms_scratch_ptr; @@ -5057,7 +4510,7 @@ next: size = INET6_ADDRSTRLEN; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } base = (char *)mstate->dtms_scratch_ptr; @@ -5071,11 +4524,7 @@ next: firstzero = -1; tryzero = -1; numzero = 1; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - for (i = 0; i < sizeof (struct in6_addr); i++) { -#else for (i = 0; i < (int)sizeof (struct in6_addr); i++) { -#endif /* __APPLE__ */ if (ip6._S6_un._S6_u8[i] == 0 && tryzero == -1 && i % 2 == 0) { tryzero = i; @@ -5100,11 +4549,7 @@ next: numzero += 2; } } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - ASSERT(firstzero + numzero <= sizeof (struct in6_addr)); -#else ASSERT(firstzero + numzero <= (int)sizeof (struct in6_addr)); -#endif /* __APPLE__ */ /* * Check for an IPv4 embedded address. @@ -5112,13 +4557,8 @@ next: v6end = sizeof (struct in6_addr) - 2; if (IN6_IS_ADDR_V4MAPPED(&ip6) || IN6_IS_ADDR_V4COMPAT(&ip6)) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - for (i = sizeof (struct in6_addr) - 1; - i >= DTRACE_V4MAPPED_OFFSET; i--) { -#else for (i = sizeof (struct in6_addr) - 1; i >= (int)DTRACE_V4MAPPED_OFFSET; i--) { -#endif /* __APPLE__ */ ASSERT(end >= base); val = ip6._S6_un._S6_u8[i]; @@ -5131,13 +4571,8 @@ next: } } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (i > DTRACE_V4MAPPED_OFFSET) - *end-- = '.'; -#else if (i > (int)DTRACE_V4MAPPED_OFFSET) *end-- = '.'; -#endif /* __APPLE__ */ } if (subr == DIF_SUBR_INET_NTOA6) @@ -5189,7 +4624,7 @@ next: * The user didn't use AH_INET or AH_INET6. */ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -5197,10 +4632,52 @@ inetout: regs[rd] = (uintptr_t)end + 1; mstate->dtms_scratch_ptr += size; break; } - -#ifdef __APPLE__ - /* CoreProfile callback ('core_profile(uint64_t, [uint64_t], [uint64_t] ...)') */ + case DIF_SUBR_TOUPPER: + case DIF_SUBR_TOLOWER: { + uintptr_t src = tupregs[0].dttk_value; + char *dest = (char *)mstate->dtms_scratch_ptr; + char lower, upper, base, c; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + size_t len = dtrace_strlen((char*) src, size); + size_t i = 0; + + lower = (subr == DIF_SUBR_TOUPPER) ? 'a' : 'A'; + upper = (subr == DIF_SUBR_TOUPPER) ? 'z' : 'Z'; + base = (subr == DIF_SUBR_TOUPPER) ? 'A' : 'a'; + + if (!dtrace_canload(src, len + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = 0; + break; + } + + for (i = 0; i < size - 1; ++i) { + if ((c = dtrace_load8(src + i)) == '\0') + break; + if (c >= lower && c <= upper) + c = base + (c - lower); + dest[i] = c; + } + + ASSERT(i < size); + + dest[i] = '\0'; + regs[rd] = (uintptr_t) dest; + mstate->dtms_scratch_ptr += size; + + break; + } + +/* + * APPLE NOTE: + * CoreProfile callback ('core_profile (uint64_t, [uint64_t], [uint64_t] ...)') + */ case DIF_SUBR_COREPROFILE: { uint64_t selector = tupregs[0].dttk_value; uint64_t args[DIF_DTR_NREGS-1] = {0ULL}; @@ -5229,9 +4706,6 @@ inetout: regs[rd] = (uintptr_t)end + 1; regs[rd] = ret; break; } - -#endif /* __APPLE__ */ - } } @@ -5254,11 +4728,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; dtrace_difv_t *v; volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; -#if !defined(__APPLE__) - volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#else volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#endif /* __APPLE__ */ dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ uint64_t regs[DIF_DIR_NREGS]; @@ -5266,11 +4736,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; int64_t cc_r; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - uint_t pc = 0, id, opc; -#else uint_t pc = 0, id, opc = 0; -#endif /* __APPLE__ */ uint8_t ttop = 0; dif_instr_t instr; uint_t r1, r2, rd; @@ -5484,36 +4950,12 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, case DIF_OP_LDX: regs[rd] = dtrace_load64(regs[r1]); break; -#if !defined(__APPLE__) - case DIF_OP_ULDSB: - regs[rd] = (int8_t) - dtrace_fuword8((void *)(uintptr_t)regs[r1]); - break; - case DIF_OP_ULDSH: - regs[rd] = (int16_t) - dtrace_fuword16((void *)(uintptr_t)regs[r1]); - break; - case DIF_OP_ULDSW: - regs[rd] = (int32_t) - dtrace_fuword32((void *)(uintptr_t)regs[r1]); - break; - case DIF_OP_ULDUB: - regs[rd] = - dtrace_fuword8((void *)(uintptr_t)regs[r1]); - break; - case DIF_OP_ULDUH: - regs[rd] = - dtrace_fuword16((void *)(uintptr_t)regs[r1]); - break; - case DIF_OP_ULDUW: - regs[rd] = - dtrace_fuword32((void *)(uintptr_t)regs[r1]); - break; - case DIF_OP_ULDX: - regs[rd] = - dtrace_fuword64((void *)(uintptr_t)regs[r1]); - break; -#else /* Darwin 32-bit kernel may fetch from 64-bit user. Don't want uintptr_t cast. */ +/* + * Darwin 32-bit kernel may fetch from 64-bit user. + * Do not cast regs to uintptr_t + * DIF_OP_ULDSB,DIF_OP_ULDSH, DIF_OP_ULDSW, DIF_OP_ULDUB + * DIF_OP_ULDUH, DIF_OP_ULDUW, DIF_OP_ULDX + */ case DIF_OP_ULDSB: regs[rd] = (int8_t) dtrace_fuword8(regs[r1]); @@ -5541,7 +4983,6 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, case DIF_OP_ULDX: regs[rd] = dtrace_fuword64(regs[r1]); -#endif /* __APPLE__ */ break; case DIF_OP_RET: rval = regs[rd]; @@ -5561,10 +5002,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, uintptr_t s1 = regs[r1]; uintptr_t s2 = regs[r2]; - if (s1 != NULL && + if (s1 != 0 && !dtrace_strcanload(s1, sz, mstate, vstate)) break; - if (s2 != NULL && + if (s2 != 0 && !dtrace_strcanload(s2, sz, mstate, vstate)) break; @@ -5603,7 +5044,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, * then this is to be treated as a * reference to a NULL variable. */ - regs[rd] = NULL; + regs[rd] = 0; } else { regs[rd] = a + sizeof (uint64_t); } @@ -5627,10 +5068,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; - ASSERT(a != NULL); + ASSERT(a != 0); ASSERT(svar->dtsv_size != 0); - if (regs[rd] == NULL) { + if (regs[rd] == 0) { *(uint8_t *)a = UINT8_MAX; break; } else { @@ -5672,13 +5113,8 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, id -= DIF_VAR_OTHER_UBASE; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - ASSERT(id < vstate->dtvs_nlocals); -#else ASSERT(id < (uint_t)vstate->dtvs_nlocals); -#endif /* __APPLE__ */ ASSERT(vstate->dtvs_locals != NULL); - svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; @@ -5697,7 +5133,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, * then this is to be treated as a * reference to a NULL variable. */ - regs[rd] = NULL; + regs[rd] = 0; } else { regs[rd] = a + sizeof (uint64_t); } @@ -5715,12 +5151,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - ASSERT(id < vstate->dtvs_nlocals); -#else ASSERT(id < (uint_t)vstate->dtvs_nlocals); -#endif /* __APPLE__ */ - ASSERT(vstate->dtvs_locals != NULL); svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); @@ -5734,7 +5165,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(svar->dtsv_size == (int)NCPU * sz); a += CPU->cpu_id * sz; - if (regs[rd] == NULL) { + if (regs[rd] == 0) { *(uint8_t *)a = UINT8_MAX; break; } else { @@ -5815,11 +5246,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, * Given that we're storing to thread-local data, * we need to flush our predicate cache. */ -#if !defined(__APPLE__) - curthread->t_predcache = NULL; -#else dtrace_set_thread_predcache(current_thread(), 0); -#endif /* __APPLE__ */ if (dvar == NULL) break; @@ -5990,7 +5417,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (size < regs[r1] || !DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -6058,11 +5485,12 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[rd]; break; } -#if !defined(__APPLE__) - if (regs[rd] & 7) { -#else - if (regs[rd] & 3) { /* Darwin kmem_zalloc() called from dtrace_difo_init() is 4-byte aligned. */ -#endif /* __APPLE__ */ + + /* + * Darwin kmem_zalloc() called from + * dtrace_difo_init() is 4-byte aligned. + */ + if (regs[rd] & 3) { *flags |= CPU_DTRACE_BADALIGN; *illval = regs[rd]; break; @@ -6087,13 +5515,8 @@ dtrace_action_breakpoint(dtrace_ecb_t *ecb) dtrace_probe_t *probe = ecb->dte_probe; dtrace_provider_t *prov = probe->dtpr_provider; char c[DTRACE_FULLNAMELEN + 80], *str; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - char *msg = "dtrace: breakpoint action at probe "; - char *ecbmsg = " (ecb "; -#else const char *msg = "dtrace: breakpoint action at probe "; const char *ecbmsg = " (ecb "; -#endif /* __APPLE__ */ uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); uintptr_t val = (uintptr_t)ecb; int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; @@ -6164,13 +5587,8 @@ dtrace_action_panic(dtrace_ecb_t *ecb) if (dtrace_panicked != NULL) return; -#if !defined(__APPLE__) - if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL) - return; -#else if (dtrace_casptr(&dtrace_panicked, NULL, current_thread()) != NULL) return; -#endif /* __APPLE__ */ /* * We won the right to panic. (We want to be sure that only one @@ -6181,10 +5599,11 @@ dtrace_action_panic(dtrace_ecb_t *ecb) probe->dtpr_provider->dtpv_name, probe->dtpr_mod, probe->dtpr_func, probe->dtpr_name, (void *)ecb); -#if defined(__APPLE__) - /* Mac OS X debug feature -- can return from panic() */ + /* + * APPLE NOTE: this was for an old Mac OS X debug feature + * allowing a return from panic(). Revisit someday. + */ dtrace_panicked = NULL; -#endif /* __APPLE__ */ } static void @@ -6198,24 +5617,17 @@ dtrace_action_raise(uint64_t sig) return; } -#if !defined(__APPLE__) /* * raise() has a queue depth of 1 -- we ignore all subsequent * invocations of the raise() action. */ - if (curthread->t_dtrace_sig == 0) - curthread->t_dtrace_sig = (uint8_t)sig; - curthread->t_sig_check = 1; - aston(curthread); -#else uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread && uthread->t_dtrace_sig == 0) { uthread->t_dtrace_sig = sig; act_set_astbsd(current_thread()); } -#endif /* __APPLE__ */ } static void @@ -6224,13 +5636,6 @@ dtrace_action_stop(void) if (dtrace_destructive_disallow) return; -#if !defined(__APPLE__) - if (!curthread->t_dtrace_stop) { - curthread->t_dtrace_stop = 1; - curthread->t_sig_check = 1; - aston(curthread); - } -#else uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread) { /* @@ -6240,10 +5645,14 @@ dtrace_action_stop(void) uthread->t_dtrace_stop = 1; act_set_astbsd(current_thread()); } -#endif /* __APPLE__ */ } -#if defined(__APPLE__) + +/* + * APPLE NOTE: pidresume works in conjunction with the dtrace stop action. + * Both activate only when the currently running process next leaves the + * kernel. + */ static void dtrace_action_pidresume(uint64_t pid) { @@ -6269,8 +5678,6 @@ dtrace_action_pidresume(uint64_t pid) act_set_astbsd(current_thread()); } } -#endif /* __APPLE__ */ - static void dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) @@ -6343,11 +5750,7 @@ dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, size = (uintptr_t)fps - mstate->dtms_scratch_ptr + (nframes * sizeof (uint64_t)); -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (!DTRACE_INSCRATCH(mstate, size)) { -#else if (!DTRACE_INSCRATCH(mstate, (uintptr_t)size)) { -#endif /* __APPLE__ */ /* * Not enough room for our frame pointers -- need to indicate * that we ran out of scratch space. @@ -6439,15 +5842,9 @@ out: * is the function called by the provider to fire a probe -- from which all * subsequent probe-context DTrace activity emanates. */ -#if !defined(__APPLE__) -void -dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, - uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) -#else static void __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) -#endif /* __APPLE__ */ { processorid_t cpuid; dtrace_icookie_t cookie; @@ -6461,30 +5858,13 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, volatile uint16_t *flags; hrtime_t now; -#if !defined(__APPLE__) - /* - * Kick out immediately if this CPU is still being born (in which case - * curthread will be set to -1) or the current thread can't allow - * probes in its current context. - */ - if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE)) - return; -#else - /* Not a concern for Darwin */ -#endif /* __APPLE__ */ - cookie = dtrace_interrupt_disable(); probe = dtrace_probes[id - 1]; cpuid = CPU->cpu_id; onintr = CPU_ON_INTR(CPU); -#if !defined(__APPLE__) - if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && - probe->dtpr_predcache == curthread->t_predcache) { -#else if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && probe->dtpr_predcache == dtrace_get_thread_predcache(current_thread())) { -#endif /* __APPLE__ */ /* * We have hit in the predicate cache; we know that * this predicate would evaluate to be false. @@ -6508,8 +5888,11 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (vtime && curthread->t_dtrace_start) curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; #else - /* FIXME: the time spent entering DTrace and arriving to this point is attributed - to the current thread. Instead it should accrue to DTrace. */ + /* + * APPLE NOTE: The time spent entering DTrace and arriving + * to this point, is attributed to the current thread. + * Instead it should accrue to DTrace. FIXME + */ vtime = dtrace_vtime_references != 0; if (vtime) @@ -6531,12 +5914,12 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, now = dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */ #endif /* __APPLE__ */ -#if defined(__APPLE__) /* - * A provider may call dtrace_probe_error() in lieu of dtrace_probe() in some circumstances. - * See, e.g. fasttrap_isa.c. However the provider has no access to ECB context, so passes - * 0 through "arg0" and the probe_id of the overridden probe as arg1. Detect that here - * and cons up a viable state (from the probe_id). + * APPLE NOTE: A provider may call dtrace_probe_error() in lieu of + * dtrace_probe() in some circumstances. See, e.g. fasttrap_isa.c. + * However the provider has no access to ECB context, so passes + * 0 through "arg0" and the probe_id of the overridden probe as arg1. + * Detect that here and cons up a viable state (from the probe_id). */ if (dtrace_probeid_error == id && 0 == arg0) { dtrace_id_t ftp_id = (dtrace_id_t)arg1; @@ -6554,11 +5937,10 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, ftp_state->dts_arg_error_illval = -1; /* arg5 */ } } -#endif /* __APPLE__ */ mstate.dtms_difo = NULL; mstate.dtms_probe = probe; - mstate.dtms_strtok = NULL; + mstate.dtms_strtok = 0; mstate.dtms_arg[0] = arg0; mstate.dtms_arg[1] = arg1; mstate.dtms_arg[2] = arg2; @@ -6574,6 +5956,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *prov = probe->dtpr_provider; + uint64_t tracememsize = 0; int committed = 0; caddr_t tomax; @@ -6670,11 +6053,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * XXX this is hackish, but so is setting a variable * XXX in a McCarthy OR... */ -#if !defined(__APPLE__) - if ((cr = CRED()) == NULL || -#else if ((cr = dtrace_CRED()) == NULL || -#endif /* __APPLE__ */ posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_uid || posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_ruid || posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_suid || @@ -6685,7 +6064,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, (proc = ttoproc(curthread)) == NULL || (proc->p_flag & SNOCD)) #else - 1) /* Darwin omits "No Core Dump" flag. */ + 1) /* APPLE NOTE: Darwin omits "No Core Dump" flag */ #endif /* __APPLE__ */ continue; } @@ -6704,7 +6083,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, cr->cr_zone->zone_id) continue; #else - /* Darwin doesn't do zones. */ + /* APPLE NOTE: Darwin doesn't do zones. */ #endif /* __APPLE__ */ } } @@ -6765,11 +6144,8 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * Update the predicate cache... */ ASSERT(cid == pred->dtp_cacheid); -#if !defined(__APPLE__) - curthread->t_predcache = cid; -#else + dtrace_set_thread_predcache(current_thread(), cid); -#endif /* __APPLE__ */ } continue; @@ -6831,18 +6207,10 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (!dtrace_priv_kernel(state)) continue; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - dtrace_getpcstack((pc_t *)(tomax + valoffs), - size / sizeof (pc_t), probe->dtpr_aframes, - DTRACE_ANCHORED(probe) ? NULL : - (uint32_t *)arg0); -#else dtrace_getpcstack((pc_t *)(tomax + valoffs), size / sizeof (pc_t), probe->dtpr_aframes, DTRACE_ANCHORED(probe) ? NULL : (uint32_t *)(uintptr_t)arg0); -#endif /* __APPLE__ */ - continue; case DTRACEACT_JSTACK: @@ -6937,12 +6305,10 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_action_raise(val); continue; -#if defined(__APPLE__) - case DTRACEACT_PIDRESUME: + case DTRACEACT_PIDRESUME: /* __APPLE__ */ if (dtrace_priv_proc_destructive(state)) dtrace_action_pidresume(val); continue; -#endif /* __APPLE__ */ case DTRACEACT_COMMIT: ASSERT(!committed); @@ -6967,9 +6333,12 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: -#if defined(__APPLE__) - case DTRACEACT_APPLEBINARY: -#endif /* __APPLE__ */ + case DTRACEACT_APPLEBINARY: /* __APPLE__ */ + case DTRACEACT_TRACEMEM: + break; + + case DTRACEACT_TRACEMEM_DYNSIZE: + tracememsize = val; break; case DTRACEACT_SYM: @@ -6978,23 +6347,6 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, continue; break; -#if !defined(__APPLE__) - case DTRACEACT_USYM: - case DTRACEACT_UMOD: - case DTRACEACT_UADDR: { - struct pid *pid = curthread->t_procp->p_pidp; - - if (!dtrace_priv_proc(state)) - continue; - - DTRACE_STORE(uint64_t, tomax, - valoffs, (uint64_t)pid->pid_id); - DTRACE_STORE(uint64_t, tomax, - valoffs + sizeof (uint64_t), val); - - continue; - } -#else case DTRACEACT_USYM: case DTRACEACT_UMOD: case DTRACEACT_UADDR: { @@ -7008,7 +6360,6 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, continue; } -#endif /* __APPLE__ */ case DTRACEACT_EXIT: { /* @@ -7051,6 +6402,13 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { uintptr_t end = valoffs + size; + if (tracememsize != 0 && + valoffs + tracememsize < end) + { + end = valoffs + tracememsize; + tracememsize = 0; + } + if (!dtrace_vcanload((void *)(uintptr_t)val, &dp->dtdo_rtype, &mstate, vstate)) continue; @@ -7141,13 +6499,14 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * time to prevent it from being accumulated * into t_dtrace_vtime. */ -#if !defined(__APPLE__) - curthread->t_dtrace_start = 0; -#else - /* Set the sign bit on t_dtrace_tracing to suspend accumulation to it. */ + + /* + * Darwin sets the sign bit on t_dtrace_tracing + * to suspend accumulation to it. + */ dtrace_set_thread_tracing(current_thread(), - (1ULL<<63) | dtrace_get_thread_tracing(current_thread())); -#endif /* __APPLE__ */ + (1ULL<<63) | dtrace_get_thread_tracing(current_thread())); + } /* @@ -7174,11 +6533,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, buf->dtb_offset = offs + ecb->dte_size; } -#if !defined(__APPLE__) - if (vtime) - curthread->t_dtrace_start = dtrace_gethrtime(); -#else - /* FIXME: the time spent leaving DTrace from this point to the rti is attributed + /* FIXME: On Darwin the time spent leaving DTrace from this point to the rti is attributed to the current thread. Instead it should accrue to DTrace. */ if (vtime) { thread_t thread = current_thread(); @@ -7192,16 +6547,18 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_set_thread_tracing(thread, (~(1ULL<<63)) & t); } } -#endif /* __APPLE__ */ dtrace_interrupt_enable(cookie); } -#if defined(__APPLE__) -/* Don't allow a thread to re-enter dtrace_probe(). This could occur if a probe is encountered - on some function in the transitive closure of the call to dtrace_probe(). Solaris has some - strong guarantees that this won't happen, the Darwin implementation is not so mature as to - make those guarantees. */ +/* + * APPLE NOTE: Don't allow a thread to re-enter dtrace_probe(). + * This could occur if a probe is encountered on some function in the + * transitive closure of the call to dtrace_probe(). + * Solaris has some strong guarantees that this won't happen. + * The Darwin implementation is not so mature as to make those guarantees. + * Hence, the introduction of __dtrace_probe() on xnu. + */ void dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, @@ -7222,7 +6579,6 @@ dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, #endif enable_preemption(); } -#endif /* __APPLE__ */ /* * DTrace Probe Hashing Functions @@ -7236,11 +6592,7 @@ dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * specified.) */ static uint_t -#if !defined(__APPLE__) /* Quiet compiler warnings */ -dtrace_hash_str(char *p) -#else dtrace_hash_str(const char *p) -#endif /* __APPLE__ */ { unsigned int g; uint_t hval = 0; @@ -7272,7 +6624,12 @@ dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) return (hash); } -#if !defined(__APPLE__) /* Unused. Quiet compiler warning. */ +/* + * APPLE NOTE: dtrace_hash_destroy is not used. + * It is called by dtrace_detach which is not + * currently implemented. Revisit someday. + */ +#if !defined(__APPLE__) static void dtrace_hash_destroy(dtrace_hash_t *hash) { @@ -7386,7 +6743,7 @@ dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) return (bucket->dthb_len); } - return (NULL); + return (0); } static void @@ -7458,19 +6815,8 @@ dtrace_badattr(const dtrace_attribute_t *a) /* * Return a duplicate copy of a string. If the specified string is NULL, * this function returns a zero-length string. + * APPLE NOTE: Darwin employs size bounded string operation. */ -#if !defined(__APPLE__) -static char * -dtrace_strdup(const char *str) -{ - char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP); - - if (str != NULL) - (void) strcpy(new, str); - - return (new); -} -#else /* Employ size bounded string operation. */ static char * dtrace_strdup(const char *str) { @@ -7482,7 +6828,6 @@ dtrace_strdup(const char *str) return (new); } -#endif /* __APPLE__ */ #define DTRACE_ISALPHA(c) \ (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) @@ -7541,20 +6886,12 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) static void dtrace_errdebug(const char *str) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ; -#else int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ; -#endif /* __APPLE__ */ int occupied = 0; lck_mtx_lock(&dtrace_errlock); dtrace_errlast = str; -#if !defined(__APPLE__) - dtrace_errthread = curthread; -#else dtrace_errthread = (kthread_t *)current_thread(); -#endif /* __APPLE__ */ while (occupied++ < DTRACE_ERRHASHSZ) { if (dtrace_errhash[hval].dter_msg == str) { @@ -7782,11 +7119,9 @@ static int dtrace_match_string(const char *s, const char *p, int depth) { #pragma unused(depth) /* __APPLE__ */ -#if !defined(__APPLE__) - return (s != NULL && strcmp(s, p) == 0); -#else /* Employ size bounded string operation. */ + + /* APPLE NOTE: Darwin employs size bounded string operation. */ return (s != NULL && strncmp(s, p, strlen(s) + 1) == 0); -#endif /* __APPLE__ */ } /*ARGSUSED*/ @@ -7830,15 +7165,9 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, return (nmatched); } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - template.dtpr_mod = (char *)pkp->dtpk_mod; - template.dtpr_func = (char *)pkp->dtpk_func; - template.dtpr_name = (char *)pkp->dtpk_name; -#else template.dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod; template.dtpr_func = (char *)(uintptr_t)pkp->dtpk_func; template.dtpr_name = (char *)(uintptr_t)pkp->dtpk_name; -#endif /* __APPLE__ */ /* * We want to find the most distinct of the module name, function @@ -7869,11 +7198,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, * invoke our callback for each one that matches our input probe key. */ if (hash == NULL) { -#if !defined(__APPLE__) /* Quiet compiler warning */ - for (i = 0; i < dtrace_nprobes; i++) { -#else for (i = 0; i < (dtrace_id_t)dtrace_nprobes; i++) { -#endif /* __APPLE__ */ if ((probe = dtrace_probes[i]) == NULL || dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) @@ -8032,16 +7357,13 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, } provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); -#if !defined(__APPLE__) - provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); - (void) strcpy(provider->dtpv_name, name); -#else /* Employ size bounded string operation. */ + + /* APPLE NOTE: Darwin employs size bounded string operation. */ { size_t bufsize = strlen(name) + 1; provider->dtpv_name = kmem_alloc(bufsize, KM_SLEEP); (void) strlcpy(provider->dtpv_name, name, bufsize); } -#endif /* __APPLE__ */ provider->dtpv_attr = *pap; provider->dtpv_priv.dtpp_flags = priv; @@ -8178,7 +7500,7 @@ dtrace_unregister(dtrace_provider_id_t id) /* * Attempt to destroy the probes associated with this provider. */ - if (old->ecb_count!=0) { + if (old->dtpv_ecb_count!=0) { /* * We have at least one ECB; we can't remove this provider. */ @@ -8194,7 +7516,7 @@ dtrace_unregister(dtrace_provider_id_t id) * All of the probes for this provider are disabled; we can safely * remove all of them from their hash chains and from the probe array. */ - for (i = 0; i < dtrace_nprobes && old->probe_count!=0; i++) { + for (i = 0; i < dtrace_nprobes && old->dtpv_probe_count!=0; i++) { if ((probe = dtrace_probes[i]) == NULL) continue; @@ -8202,7 +7524,7 @@ dtrace_unregister(dtrace_provider_id_t id) continue; dtrace_probes[i] = NULL; - old->probe_count--; + old->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); @@ -8233,11 +7555,7 @@ dtrace_unregister(dtrace_provider_id_t id) kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); -#if !defined(__APPLE__) - kmem_free(probe, sizeof (dtrace_probe_t)); -#else zfree(dtrace_probe_t_zone, probe); -#endif } if ((prev = dtrace_provider) == old) { @@ -8338,7 +7656,7 @@ dtrace_condense(dtrace_provider_id_t id) continue; dtrace_probes[i] = NULL; - prov->probe_count--; + prov->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); @@ -8349,11 +7667,7 @@ dtrace_condense(dtrace_provider_id_t id) kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); -#if !defined(__APPLE__) - kmem_free(probe, sizeof (dtrace_probe_t)); -#else zfree(dtrace_probe_t_zone, probe); -#endif vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); } @@ -8392,12 +7706,9 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1, VM_BESTFIT | VM_SLEEP); -#if !defined(__APPLE__) - probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP); -#else + probe = zalloc(dtrace_probe_t_zone); bzero(probe, sizeof (dtrace_probe_t)); -#endif probe->dtpr_id = id; probe->dtpr_gen = dtrace_probegen++; @@ -8412,11 +7723,7 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, dtrace_hash_add(dtrace_byfunc, probe); dtrace_hash_add(dtrace_byname, probe); -#if !defined(__APPLE__) /* Quiet compiler warning */ - if (id - 1 >= dtrace_nprobes) { -#else if (id - 1 >= (dtrace_id_t)dtrace_nprobes) { -#endif /* __APPLE__ */ size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); size_t nsize = osize << 1; @@ -8449,16 +7756,12 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, dtrace_nprobes <<= 1; } -#if !defined(__APPLE__) /* Quiet compiler warning */ - ASSERT(id - 1 < dtrace_nprobes); -#else ASSERT(id - 1 < (dtrace_id_t)dtrace_nprobes); -#endif /* __APPLE__ */ } ASSERT(dtrace_probes[id - 1] == NULL); dtrace_probes[id - 1] = probe; - provider->probe_count++; + provider->dtpv_probe_count++; if (provider != dtrace_provider) lck_mtx_unlock(&dtrace_lock); @@ -8471,13 +7774,8 @@ dtrace_probe_lookup_id(dtrace_id_t id) { lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) /* Quiet compiler warning */ - if (id == 0 || id > dtrace_nprobes) - return (NULL); -#else if (id == 0 || id > (dtrace_id_t)dtrace_nprobes) return (NULL); -#endif /* __APPLE__ */ return (dtrace_probes[id - 1]); } @@ -8550,21 +7848,13 @@ dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) bzero(pdp, sizeof (dtrace_probedesc_t)); pdp->dtpd_id = prp->dtpr_id; -#if !defined(__APPLE__) - (void) strncpy(pdp->dtpd_provider, - prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1); - - (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1); - (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1); - (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1); -#else /* Employ size bounded string operation. */ + /* APPLE NOTE: Darwin employs size bounded string operation. */ (void) strlcpy(pdp->dtpd_provider, prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN); (void) strlcpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN); (void) strlcpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN); (void) strlcpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN); -#endif /* __APPLE__ */ } /* @@ -8609,22 +7899,11 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) */ lck_mtx_lock(&mod_lock); -#if !defined(__APPLE__) - ctl = &modules; - do { - if (ctl->mod_busy || ctl->mod_mp == NULL) - continue; - - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); - - } while ((ctl = ctl->mod_next) != &modules); -#else ctl = dtrace_modctl_list; while (ctl) { prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); ctl = ctl->mod_next; } -#endif lck_mtx_unlock(&mod_lock); } while (all && (prv = prv->dtpv_next) != NULL); @@ -8798,18 +8077,10 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) #else dhpb.dthpb_base = dhp->dofhp_addr; /* FIXME: James, why? */ #endif -#if !defined(__APPLE__) /* Quiet compiler warning */ - dhpb.dthpb_offs = off + probe->dofpr_offidx; -#else dhpb.dthpb_offs = (int32_t *)(off + probe->dofpr_offidx); -#endif /* __APPLE__ */ dhpb.dthpb_noffs = probe->dofpr_noffs; if (enoff != NULL) { -#if !defined(__APPLE__) /* Quiet compiler warning */ - dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; -#else dhpb.dthpb_enoffs = (int32_t *)(enoff + probe->dofpr_enoffidx); -#endif /* __APPLE__ */ dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; } else { dhpb.dthpb_enoffs = NULL; @@ -8830,11 +8101,7 @@ dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; -#if !defined(__APPLE__) /* Quiet compiler warning */ - int i; -#else uint32_t i; -#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); @@ -8891,11 +8158,7 @@ dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; -#if !defined(__APPLE__) /* Quiet compiler warning */ - int i; -#else uint32_t i; -#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); @@ -8922,11 +8185,7 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, { dtrace_meta_t *meta; dtrace_helpers_t *help, *next; -#if !defined(__APPLE__) /* Quiet compiler warning */ - int i; -#else uint_t i; -#endif /* __APPLE__ */ *idp = DTRACE_METAPROVNONE; @@ -8951,16 +8210,14 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); meta->dtm_mops = *mops; -#if !defined(__APPLE__) - meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); - (void) strcpy(meta->dtm_name, name); -#else /* Employ size bounded string operation. */ + + /* APPLE NOTE: Darwin employs size bounded string operation. */ { size_t bufsize = strlen(name) + 1; meta->dtm_name = kmem_alloc(bufsize, KM_SLEEP); (void) strlcpy(meta->dtm_name, name, bufsize); } -#endif /* __APPLE__ */ + meta->dtm_arg = arg; lck_mtx_lock(&dtrace_meta_lock); @@ -9076,12 +8333,9 @@ static int dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, cred_t *cr) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int err = 0, i; -#else int err = 0; uint_t i; -#endif /* __APPLE__ */ + int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; int kcheckload; uint_t pc; @@ -9366,12 +8620,8 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; dtrace_diftype_t *vt, *et; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - uint_t id, ndx; -#else uint_t id; int ndx; -#endif /* __APPLE__ */ if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && v->dtdv_scope != DIFV_SCOPE_THREAD && @@ -9605,9 +8855,7 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) subr == DIF_SUBR_STRJOIN || subr == DIF_SUBR_STRRCHR || subr == DIF_SUBR_STRSTR || -#if defined(__APPLE__) subr == DIF_SUBR_COREPROFILE || -#endif /* __APPLE__ */ subr == DIF_SUBR_HTONS || subr == DIF_SUBR_HTONL || subr == DIF_SUBR_HTONLL || @@ -9635,11 +8883,7 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) static int dtrace_difo_cacheable(dtrace_difo_t *dp) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int i; -#else uint_t i; -#endif /* __APPLE__ */ if (dp == NULL) return (0); @@ -9684,11 +8928,7 @@ dtrace_difo_cacheable(dtrace_difo_t *dp) static void dtrace_difo_hold(dtrace_difo_t *dp) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int i; -#else uint_t i; -#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -9720,11 +8960,7 @@ dtrace_difo_hold(dtrace_difo_t *dp) static void dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - uint64_t sval; -#else uint64_t sval = 0; -#endif /* __APPLE__ */ dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ const dif_instr_t *text = dp->dtdo_buf; uint_t pc, srd = 0; @@ -9859,25 +9095,16 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int i, oldsvars, osz, nsz, otlocals, ntlocals; - uint_t id; -#else int oldsvars, osz, nsz, otlocals, ntlocals; uint_t i, id; -#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - dtrace_statvar_t *svar, ***svarp; -#else dtrace_statvar_t *svar; dtrace_statvar_t ***svarp = NULL; -#endif /* __APPLE__ */ size_t dsize = 0; uint8_t scope = v->dtdv_scope; int *np = (int *)NULL; @@ -9889,11 +9116,7 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) switch (scope) { case DIFV_SCOPE_THREAD: -#if !defined(__APPLE__) /* Quiet compiler warnings */ - while (id >= (otlocals = vstate->dtvs_ntlocals)) { -#else while (id >= (uint_t)(otlocals = vstate->dtvs_ntlocals)) { -#endif /* __APPLE__ */ dtrace_difv_t *tlocals; if ((ntlocals = (otlocals << 1)) == 0) @@ -9943,11 +9166,7 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) ASSERT(0); } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - while (id >= (oldsvars = *np)) { -#else while (id >= (uint_t)(oldsvars = *np)) { -#endif /* __APPLE__ */ dtrace_statvar_t **statics; int newsvars, oldsize, newsize; @@ -10034,28 +9253,17 @@ dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int i; -#else uint_t i; -#endif /* __APPLE__ */ ASSERT(dp->dtdo_refcnt == 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - dtrace_statvar_t *svar, **svarp; - uint_t id; - uint8_t scope = v->dtdv_scope; - int *np; -#else dtrace_statvar_t *svar; dtrace_statvar_t **svarp = NULL; uint_t id; uint8_t scope = v->dtdv_scope; int *np = NULL; -#endif /* __APPLE__ */ switch (scope) { case DIFV_SCOPE_THREAD: @@ -10080,11 +9288,7 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) id -= DIF_VAR_OTHER_UBASE; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - ASSERT(id < *np); -#else ASSERT(id < (uint_t)*np); -#endif /* __APPLE__ */ svar = svarp[id]; ASSERT(svar != NULL); @@ -10094,7 +9298,7 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) continue; if (svar->dtsv_size != 0) { - ASSERT(svar->dtsv_data != NULL); + ASSERT(svar->dtsv_data != 0); kmem_free((void *)(uintptr_t)svar->dtsv_data, svar->dtsv_size); } @@ -10114,11 +9318,7 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int i; -#else uint_t i; -#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_refcnt != 0); @@ -10295,8 +9495,8 @@ dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, { dtrace_actdesc_t *act; - ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && - arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA)); + ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != 0 && + arg >= KERNELBASE) || (arg == 0 && kind == DTRACEACT_PRINTA)); act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); act->dtad_kind = kind; @@ -10366,19 +9566,11 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) epid = state->dts_epid++; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (epid - 1 >= state->dts_necbs) { -#else if (epid - 1 >= (dtrace_epid_t)state->dts_necbs) { -#endif /* __APPLE__ */ dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; int necbs = state->dts_necbs << 1; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - ASSERT(epid == state->dts_necbs + 1); -#else ASSERT(epid == (dtrace_epid_t)state->dts_necbs + 1); -#endif /* __APPLE__ */ if (necbs == 0) { ASSERT(oecbs == NULL); @@ -10436,7 +9628,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) return(0); } - probe->dtpr_provider->ecb_count++; + probe->dtpr_provider->dtpv_ecb_count++; if (probe->dtpr_ecb == NULL) { dtrace_provider_t *prov = probe->dtpr_provider; @@ -10734,21 +9926,13 @@ success: aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, VM_BESTFIT | VM_SLEEP); -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (aggid - 1 >= state->dts_naggregations) { -#else if (aggid - 1 >= (dtrace_aggid_t)state->dts_naggregations) { -#endif /* __APPLE__ */ dtrace_aggregation_t **oaggs = state->dts_aggregations; dtrace_aggregation_t **aggs; int naggs = state->dts_naggregations << 1; int onaggs = state->dts_naggregations; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - ASSERT(aggid == state->dts_naggregations + 1); -#else ASSERT(aggid == (dtrace_aggid_t)state->dts_naggregations + 1); -#endif /* __APPLE */ if (naggs == 0) { ASSERT(oaggs == NULL); @@ -10806,12 +9990,8 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) uint16_t format = 0; dtrace_recdesc_t *rec; dtrace_state_t *state = ecb->dte_state; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - dtrace_optval_t *opt = state->dts_options, nframes, strsize; -#else dtrace_optval_t *opt = state->dts_options; dtrace_optval_t nframes=0, strsize; -#endif /* __APPLE__ */ uint64_t arg = desc->dtad_arg; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -10852,11 +10032,11 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) * We know that our arg is a string -- turn it into a * format. */ - if (arg == NULL) { + if (arg == 0) { ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); format = 0; } else { - ASSERT(arg != NULL); + ASSERT(arg != 0); ASSERT(arg > KERNELBASE); format = dtrace_format_add(state, (char *)(uintptr_t)arg); @@ -10865,9 +10045,9 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) /*FALLTHROUGH*/ case DTRACEACT_LIBACT: case DTRACEACT_DIFEXPR: -#if defined(__APPLE__) - case DTRACEACT_APPLEBINARY: -#endif /* __APPLE__ */ + case DTRACEACT_TRACEMEM: + case DTRACEACT_TRACEMEM_DYNSIZE: + case DTRACEACT_APPLEBINARY: /* __APPLE__ */ if (dp == NULL) return (EINVAL); @@ -10954,9 +10134,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_CHILL: case DTRACEACT_DISCARD: case DTRACEACT_RAISE: -#if defined(__APPLE__) - case DTRACEACT_PIDRESUME: -#endif /* __APPLE__ */ + case DTRACEACT_PIDRESUME: /* __APPLE__ */ if (dp == NULL) return (EINVAL); break; @@ -11122,7 +10300,7 @@ dtrace_ecb_disable(dtrace_ecb_t *ecb) probe->dtpr_ecb_last = prev; } - probe->dtpr_provider->ecb_count--; + probe->dtpr_provider->dtpv_ecb_count--; /* * The ECB has been disconnected from the probe; now sync to assure * that all CPUs have seen the change before returning. @@ -11308,11 +10486,7 @@ dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (id == 0 || id > state->dts_necbs) -#else - if (id == 0 || id > (dtrace_epid_t)state->dts_necbs) -#endif /* __APPLE__ */ + if (id == 0 || id > (dtrace_epid_t)state->dts_necbs) return (NULL); ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); @@ -11329,11 +10503,7 @@ dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (id == 0 || id > state->dts_naggregations) -#else if (id == 0 || id > (dtrace_aggid_t)state->dts_naggregations) -#endif /* __APPLE__ */ return (NULL); ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); @@ -11409,31 +10579,31 @@ dtrace_buffer_activate(dtrace_state_t *state) dtrace_interrupt_enable(cookie); } +static int +dtrace_buffer_canalloc(size_t size) +{ + if (size > (UINT64_MAX - dtrace_buffer_memory_inuse)) + return (B_FALSE); + if ((size + dtrace_buffer_memory_inuse) > dtrace_buffer_memory_maxsize) + return (B_FALSE); + + return (B_TRUE); +} + static int dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, processorid_t cpu) { dtrace_cpu_t *cp; dtrace_buffer_t *buf; + size_t size_before_alloc = dtrace_buffer_memory_inuse; lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (size > dtrace_nonroot_maxsize && - !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) - return (EFBIG); -#else if (size > (size_t)dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) return (EFBIG); -#endif /* __APPLE__ */ - - -#if defined(__APPLE__) - if (size > (sane_size / 8) / (int)NCPU) /* As in kdbg_set_nkdbufs(), roughly. */ - return (ENOMEM); -#endif /* __APPLE__ */ cp = cpu_list; @@ -11455,8 +10625,12 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, ASSERT(buf->dtb_xamot == NULL); + /* DTrace, please do not eat all the memory. */ + if (dtrace_buffer_canalloc(size) == B_FALSE) + goto err; if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL) goto err; + dtrace_buffer_memory_inuse += size; buf->dtb_size = size; buf->dtb_flags = flags; @@ -11466,10 +10640,16 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, if (flags & DTRACEBUF_NOSWITCH) continue; + /* DTrace, please do not eat all the memory. */ + if (dtrace_buffer_canalloc(size) == B_FALSE) + goto err; if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL) goto err; + dtrace_buffer_memory_inuse += size; } while ((cp = cp->cpu_next) != cpu_list); + ASSERT(dtrace_buffer_memory_inuse <= dtrace_buffer_memory_maxsize); + return (0); err: @@ -11497,6 +10677,9 @@ err: buf->dtb_size = 0; } while ((cp = cp->cpu_next) != cpu_list); + /* Restore the size saved before allocating memory */ + dtrace_buffer_memory_inuse = size_before_alloc; + return (ENOMEM); } @@ -11548,11 +10731,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, offs += sizeof (uint32_t); } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if ((soffs = offs + needed) > buf->dtb_size) { -#else if ((uint64_t)(soffs = offs + needed) > buf->dtb_size) { -#endif /* __APPLE__ */ dtrace_buffer_drop(buf); return (-1); } @@ -11623,11 +10802,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, * there. We need to clear the buffer from the current * offset to the end (there may be old gunk there). */ -#if !defined(__APPLE__) /* Quiet compiler warnings */ - while (offs < buf->dtb_size) -#else while ((uint64_t)offs < buf->dtb_size) -#endif /* __APPLE__ */ tomax[offs++] = 0; /* @@ -11664,22 +10839,14 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, } } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - while (offs + total_off > woffs) { -#else while (offs + total_off > (size_t)woffs) { -#endif /* __APPLE__ */ dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); size_t size; if (epid == DTRACE_EPIDNONE) { size = sizeof (uint32_t); } else { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - ASSERT(epid <= state->dts_necbs); -#else ASSERT(epid <= (dtrace_epid_t)state->dts_necbs); -#endif /* __APPLE__ */ ASSERT(state->dts_ecbs[epid - 1] != NULL); size = state->dts_ecbs[epid - 1]->dte_size; @@ -11714,12 +10881,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, buf->dtb_offset = 0; woffs = total_off; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - while (woffs < buf->dtb_size) -#else while ((uint64_t)woffs < buf->dtb_size) -#endif /* __APPLE__ */ - tomax[woffs++] = 0; } @@ -11836,9 +10998,15 @@ dtrace_buffer_free(dtrace_buffer_t *bufs) if (buf->dtb_xamot != NULL) { ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); kmem_free(buf->dtb_xamot, buf->dtb_size); + + ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size); + dtrace_buffer_memory_inuse -= buf->dtb_size; } kmem_free(buf->dtb_tomax, buf->dtb_size); + ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size); + dtrace_buffer_memory_inuse -= buf->dtb_size; + buf->dtb_size = 0; buf->dtb_tomax = NULL; buf->dtb_xamot = NULL; @@ -11872,9 +11040,8 @@ dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) ASSERT(enab->dten_probegen == 0); ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); -#if defined(__APPLE__) - if (ecb == NULL) return; /* Note: protection against gcc 4.0 botch on x86 */ -#endif /* __APPLE__ */ + /* APPLE NOTE: this protects against gcc 4.0 botch on x86 */ + if (ecb == NULL) return; if (enab->dten_ndesc < enab->dten_maxdesc) { enab->dten_desc[enab->dten_ndesc++] = ecb; @@ -12076,19 +11243,7 @@ dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *pd = &ep->dted_probe; -#if !defined(__APPLE__) - if (strcmp(pd->dtpd_provider, match->dtpd_provider)) - continue; - - if (strcmp(pd->dtpd_mod, match->dtpd_mod)) - continue; - - if (strcmp(pd->dtpd_func, match->dtpd_func)) - continue; - - if (strcmp(pd->dtpd_name, match->dtpd_name)) - continue; -#else /* Employ size bounded string operation. */ + /* APPLE NOTE: Darwin employs size bounded string operation. */ if (strncmp(pd->dtpd_provider, match->dtpd_provider, DTRACE_PROVNAMELEN)) continue; @@ -12100,7 +11255,6 @@ dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, if (strncmp(pd->dtpd_name, match->dtpd_name, DTRACE_NAMELEN)) continue; -#endif /* __APPLE__ */ /* * We have a winning probe! Add it to our growing @@ -12221,16 +11375,13 @@ dtrace_enabling_matchall(void) * however: the taskq_destroy() at the end of dtrace_detach() will * block pending our completion. */ - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { -#if !defined(__APPLE__) - cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred; - if (INGLOBALZONE(curproc) || - cr != NULL && getzoneid() == crgetzoneid(cr)) - (void) dtrace_enabling_match(enab, NULL); -#else - (void) dtrace_enabling_match(enab, NULL); /* As if always in "global" zone." */ -#endif /* __APPLE__ */ + /* + * Darwin doesn't do zones. + * Behave as if always in "global" zone." + */ + for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { + (void) dtrace_enabling_match(enab, NULL); } lck_mtx_unlock(&dtrace_lock); @@ -12361,11 +11512,7 @@ dtrace_dof_create(dtrace_state_t *state) lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) - dof = kmem_zalloc(len, KM_SLEEP); -#else dof = dt_kmem_zalloc_aligned(len, 8, KM_SLEEP); -#endif /* __APPLE__ */ dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; @@ -12412,11 +11559,7 @@ dtrace_dof_create(dtrace_state_t *state) } static dof_hdr_t * -#if !defined(__APPLE__) -dtrace_dof_copyin(uintptr_t uarg, int *errp) -#else dtrace_dof_copyin(user_addr_t uarg, int *errp) -#endif { dof_hdr_t hdr, *dof; @@ -12425,11 +11568,7 @@ dtrace_dof_copyin(user_addr_t uarg, int *errp) /* * First, we're going to copyin() the sizeof (dof_hdr_t). */ -#if !defined(__APPLE__) - if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { -#else if (copyin(uarg, &hdr, sizeof (hdr)) != 0) { -#endif dtrace_dof_error(NULL, "failed to copyin DOF header"); *errp = EFAULT; return (NULL); @@ -12439,11 +11578,7 @@ dtrace_dof_copyin(user_addr_t uarg, int *errp) * Now we'll allocate the entire DOF and copy it in -- provided * that the length isn't outrageous. */ -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { -#else if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { -#endif /* __APPLE__ */ dtrace_dof_error(&hdr, "load size exceeds maximum"); *errp = E2BIG; return (NULL); @@ -12455,16 +11590,6 @@ dtrace_dof_copyin(user_addr_t uarg, int *errp) return (NULL); } -#if !defined(__APPLE__) - dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); - - if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || - dof->dofh_loadsz != hdr.dofh_loadsz) { - kmem_free(dof, hdr.dofh_loadsz); - *errp = EFAULT; - return (NULL); - } -#else dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); if (copyin(uarg, dof, hdr.dofh_loadsz) != 0 || @@ -12473,13 +11598,10 @@ dtrace_dof_copyin(user_addr_t uarg, int *errp) *errp = EFAULT; return (NULL); } -#endif return (dof); } -#if defined(__APPLE__) - static dof_hdr_t * dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) { @@ -12523,8 +11645,6 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) return (dof); } -#endif /* __APPLE__ */ - static dof_hdr_t * dtrace_dof_property(const char *name) { @@ -12538,15 +11658,9 @@ dtrace_dof_property(const char *name) * only) interpreted to be integer arrays. We must read our DOF * as an integer array, and then squeeze it into a byte array. */ -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, - (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) - return (NULL); -#else if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, name, (int **)&buf, &len) != DDI_PROP_SUCCESS) return (NULL); -#endif /* __APPLE__ */ for (i = 0; i < len; i++) buf[i] = (uchar_t)(((int *)buf)[i]); @@ -12563,21 +11677,13 @@ dtrace_dof_property(const char *name) return (NULL); } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (loadsz >= dtrace_dof_maxsize) { -#else if (loadsz >= (uint64_t)dtrace_dof_maxsize) { -#endif /* __APPLE__ */ ddi_prop_free(buf); dtrace_dof_error(NULL, "oversized DOF"); return (NULL); } -#if !defined(__APPLE__) - dof = kmem_alloc(loadsz, KM_SLEEP); -#else dof = dt_kmem_alloc_aligned(loadsz, 8, KM_SLEEP); -#endif /* __APPLE__ */ bcopy(buf, dof, loadsz); ddi_prop_free(buf); @@ -12587,11 +11693,7 @@ dtrace_dof_property(const char *name) static void dtrace_dof_destroy(dof_hdr_t *dof) { -#if !defined(__APPLE__) - kmem_free(dof, dof->dofh_loadsz); -#else dt_kmem_free_aligned(dof, dof->dofh_loadsz); -#endif /* __APPLE__ */ } /* @@ -12665,9 +11767,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_provider, (char *)(str + probe->dofp_provider), MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); -#if defined(__APPLE__) /* Employ size bounded string operation. */ + + /* APPLE NOTE: Darwin employs size bounded string operation. */ desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; -#endif /* __APPLE__ */ if (probe->dofp_mod >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe module"); @@ -12676,9 +11778,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); -#if defined(__APPLE__) /* Employ size bounded string operation. */ + + /* APPLE NOTE: Darwin employs size bounded string operation. */ desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; -#endif /* __APPLE__ */ if (probe->dofp_func >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe function"); @@ -12687,9 +11789,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); -#if defined(__APPLE__) /* Employ size bounded string operation. */ + + /* APPLE NOTE: Darwin employs size bounded string operation. */ desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; -#endif /* __APPLE__ */ if (probe->dofp_name >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe name"); @@ -12698,9 +11800,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); -#if defined(__APPLE__) /* Employ size bounded string operation. */ + + /* APPLE NOTE: Darwin employs size bounded string operation. */ desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; -#endif /* __APPLE__ */ return (desc); } @@ -12714,12 +11816,8 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, dof_difohdr_t *dofd; uintptr_t daddr = (uintptr_t)dof; size_t max_size = dtrace_difo_maxsize; -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int i, l, n; -#else uint_t i; int l, n; -#endif /* __APPLE__ */ static const struct { @@ -12746,11 +11844,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t), sizeof (uint_t), "multiple variable tables" }, -#if !defined(__APPLE__) - { DOF_SECT_NONE, 0, 0, 0, NULL } -#else { DOF_SECT_NONE, 0, 0, 0, 0, NULL } -#endif /* __APPLE__ */ }; if (sec->dofs_type != DOF_SECT_DIFOHDR) { @@ -12793,30 +11887,18 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (subsec->dofs_type != difo[i].section) - continue; -#else if (subsec->dofs_type != (uint32_t)difo[i].section) continue; -#endif /* __APPLE __ */ if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { dtrace_dof_error(dof, "section not loaded"); goto err; } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (subsec->dofs_align != difo[i].align) { - dtrace_dof_error(dof, "bad alignment"); - goto err; - } -#else if (subsec->dofs_align != (uint32_t)difo[i].align) { dtrace_dof_error(dof, "bad alignment"); goto err; } -#endif /* __APPLE__ */ bufp = (void **)((uintptr_t)dp + difo[i].bufoffs); lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs); @@ -12826,17 +11908,10 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, goto err; } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (difo[i].entsize != subsec->dofs_entsize) { - dtrace_dof_error(dof, "entry size mismatch"); - goto err; - } -#else if ((uint32_t)difo[i].entsize != subsec->dofs_entsize) { dtrace_dof_error(dof, "entry size mismatch"); goto err; } -#endif /* __APPLE__ */ if (subsec->dofs_entsize != 0 && (subsec->dofs_size % subsec->dofs_entsize) != 0) { @@ -13112,76 +12187,10 @@ err: return (NULL); } -#if !defined(__APPLE__) /* APPLE dyld has already done this for us */ /* - * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the - * specified DOF. At present, this amounts to simply adding 'ubase' to the - * site of any user SETX relocations to account for load object base address. - * In the future, if we need other relocations, this function can be extended. + * APPLE NOTE: dyld handles dof relocation. + * Darwin does not need dtrace_dof_relocate() */ -static int -dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase) -{ - uintptr_t daddr = (uintptr_t)dof; - dof_relohdr_t *dofr = - (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); - dof_sec_t *ss, *rs, *ts; - dof_relodesc_t *r; - uint_t i, n; - - if (sec->dofs_size < sizeof (dof_relohdr_t) || - sec->dofs_align != sizeof (dof_secidx_t)) { - dtrace_dof_error(dof, "invalid relocation header"); - return (-1); - } - - ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); - rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); - ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); - - if (ss == NULL || rs == NULL || ts == NULL) - return (-1); /* dtrace_dof_error() has been called already */ - - if (rs->dofs_entsize < sizeof (dof_relodesc_t) || - rs->dofs_align != sizeof (uint64_t)) { - dtrace_dof_error(dof, "invalid relocation section"); - return (-1); - } - - r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset); - n = rs->dofs_size / rs->dofs_entsize; - - for (i = 0; i < n; i++) { - uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; - - switch (r->dofr_type) { - case DOF_RELO_NONE: - break; - case DOF_RELO_SETX: - if (r->dofr_offset >= ts->dofs_size || r->dofr_offset + - sizeof (uint64_t) > ts->dofs_size) { - dtrace_dof_error(dof, "bad relocation offset"); - return (-1); - } - - if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) { - dtrace_dof_error(dof, "misaligned setx relo"); - return (-1); - } - - *(uint64_t *)taddr += ubase; - break; - default: - dtrace_dof_error(dof, "invalid relocation type"); - return (-1); - } - - r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize); - } - - return (0); -} -#endif /* __APPLE__ */ /* * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated @@ -13225,21 +12234,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, return (-1); } -#if !defined(__APPLE__) - if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && - dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { - dtrace_dof_error(dof, "DOF version mismatch"); - return (-1); - } -#else /* - * We only support DOF_VERSION_3 for now. + * APPLE NOTE: Darwin only supports DOF_VERSION_3 for now. */ if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_3) { dtrace_dof_error(dof, "DOF version mismatch"); return (-1); } -#endif if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { dtrace_dof_error(dof, "DOF uses unsupported instruction set"); @@ -13343,32 +12344,10 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, } } -#if !defined(__APPLE__) - /* - * Take a second pass through the sections and locate and perform any - * relocations that are present. We do this after the first pass to - * be sure that all sections have had their headers validated. - */ - for (i = 0; i < dof->dofh_secnum; i++) { - dof_sec_t *sec = (dof_sec_t *)(daddr + - (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); - - if (!(sec->dofs_flags & DOF_SECF_LOAD)) - continue; /* skip sections that are not loadable */ - - switch (sec->dofs_type) { - case DOF_SECT_URELHDR: - if (dtrace_dof_relocate(dof, sec, ubase) != 0) - return (-1); - break; - } - } -#else /* - * APPLE NOTE: We have no relocation to perform. All dof values are - * relative offsets. + * APPLE NOTE: We have no further relocation to perform. + * All dof values are relative offsets. */ -#endif /* __APPLE__ */ if ((enab = *enabp) == NULL) enab = *enabp = dtrace_enabling_create(vstate); @@ -13380,22 +12359,18 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, if (sec->dofs_type != DOF_SECT_ECBDESC) continue; -#if !defined(__APPLE__) - if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) { - dtrace_enabling_destroy(enab); - *enabp = NULL; - return (-1); - } -#else - /* Note: Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc - are checked for the NULL return value.) */ + /* + * APPLE NOTE: Defend against gcc 4.0 botch on x86. + * not all paths out of inlined dtrace_dof_ecbdesc + * are checked for the NULL return value. + * Check for NULL explicitly here. + */ ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr); if (ep == NULL) { dtrace_enabling_destroy(enab); *enabp = NULL; return (-1); } -#endif /* __APPLE__ */ dtrace_enabling_add(enab, ep); } @@ -13410,12 +12385,8 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, static int dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) { -#if !defined(__APPLE__) /* Quiet compiler warnings */ - int i, rval; -#else uint_t i; int rval; -#endif /* __APPLE__ */ uint32_t entsize; size_t offs; dof_optdesc_t *desc; @@ -13452,11 +12423,7 @@ dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) return (EINVAL); } -#if !defined(__APPLE__) /* Quiet compiler warnings */ - if (desc->dofo_value == DTRACEOPT_UNSET) { -#else if (desc->dofo_value == (uint64_t)DTRACEOPT_UNSET) { -#endif /* __APPLE __ */ dtrace_dof_error(dof, "unset option"); return (EINVAL); } @@ -13475,21 +12442,14 @@ dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) /* * DTrace Consumer State Functions */ -#if defined(__APPLE__) /* Quiet compiler warning. */ -static -#endif /* __APPLE__ */ -int +static int dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) { size_t hashsize, maxper, min_size, chunksize = dstate->dtds_chunksize; void *base; uintptr_t limit; dtrace_dynvar_t *dvar, *next, *start; -#if !defined(__APPLE__) /* Quiet compiler warning */ - int i; -#else size_t i; -#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); @@ -13580,10 +12540,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) return (0); } -#if defined(__APPLE__) /* Quiet compiler warning. */ -static -#endif /* __APPLE__ */ -void +static void dtrace_dstate_fini(dtrace_dstate_t *dstate) { lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); @@ -13658,13 +12615,8 @@ dtrace_state_deadman(dtrace_state_t *state) state->dts_alive = now; } -#if !defined(__APPLE__) -dtrace_state_t * -dtrace_state_create(dev_t *devp, cred_t *cr) -#else static int dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) -#endif /* __APPLE__ */ { minor_t minor; major_t major; @@ -13676,15 +12628,6 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) - minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, - VM_BESTFIT | VM_SLEEP); - - if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return (NULL); - } -#else /* Cause restart */ *new_state = NULL; @@ -13717,8 +12660,6 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) return (EAGAIN); /* temporary resource shortage */ } -#endif /* __APPLE__ */ - state = ddi_get_soft_state(dtrace_softstate, minor); state->dts_epid = DTRACE_EPIDNONE + 1; @@ -13829,18 +12770,13 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) * If we have all privs in whatever zone this is, * we can do destructive things to processes which * have altered credentials. + * + * APPLE NOTE: Darwin doesn't do zones. + * Behave as if zone always has destructive privs. */ -#if !defined(__APPLE__) - if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), - cr->cr_zone->zone_privset)) { - state->dts_cred.dcr_action |= - DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; - } -#else - /* Darwin doesn't do zones. */ + state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; -#endif /* __APPLE__ */ } /* @@ -13880,18 +12816,12 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) * If we have all privs in whatever zone this is, * we can do destructive things to processes which * have altered credentials. - */ -#if !defined(__APPLE__) - if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), - cr->cr_zone->zone_privset)) { - state->dts_cred.dcr_action |= - DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; - } -#else - /* Darwin doesn't do zones. */ + * + * APPLE NOTE: Darwin doesn't do zones. + * Behave as if zone always has destructive privs. + */ state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; -#endif /* __APPLE__ */ } /* @@ -13911,12 +12841,8 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) } } -#if !defined(__APPLE__) - return (state); -#else *new_state = state; return(0); /* Success */ -#endif /* __APPLE__ */ } static int @@ -13954,11 +12880,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) flags |= DTRACEBUF_INACTIVE; } -#if !defined(__APPLE__) /* Quiet compiler warning */ - for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) { -#else for (size = opt[which]; (size_t)size >= sizeof (uint64_t); size >>= 1) { -#endif /* __APPLE__ */ /* * The size must be 8-byte aligned. If the size is not 8-byte * aligned, drop it down by the difference. @@ -14152,17 +13074,10 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) * If we have an aggregation buffer, we must also have * a buffer to use as scratch. */ -#if !defined(__APPLE__) /* Quiet compiler warning */ - if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || - opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { - opt[DTRACEOPT_BUFSIZE] = state->dts_needed; - } -#else if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || (size_t)opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { opt[DTRACEOPT_BUFSIZE] = state->dts_needed; } -#endif /* __APPLE__ */ } } @@ -14388,7 +13303,12 @@ dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, switch (option) { case DTRACEOPT_DESTRUCTIVE: - if (dtrace_destructive_disallow) + /* + * Prevent consumers from enabling destructive actions if DTrace + * is running in a restricted environment, or if actions are + * disallowed. + */ + if (dtrace_is_restricted() || dtrace_destructive_disallow) return (EACCES); state->dts_cred.dcr_destructive = 1; @@ -14605,15 +13525,9 @@ dtrace_anon_property(void) * If we haven't allocated an anonymous state, we'll do so now. */ if ((state = dtrace_anon.dta_state) == NULL) { -#if !defined(__APPLE__) - state = dtrace_state_create(NULL, NULL); - dtrace_anon.dta_state = state; - if (state == NULL) { -#else rv = dtrace_state_create(NULL, NULL, &state); dtrace_anon.dta_state = state; if (rv != 0 || state == NULL) { -#endif /* __APPLE__ */ /* * This basically shouldn't happen: the only * failure mode from dtrace_state_create() is a @@ -14678,23 +13592,15 @@ static void dtrace_helper_trace(dtrace_helper_action_t *helper, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) { -#if !defined(__APPLE__) /* Quiet compiler warning */ - uint32_t size, next, nnext, i; -#else uint32_t size, next, nnext; int i; -#endif /* __APPLE__ */ dtrace_helptrace_t *ent; uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags; if (!dtrace_helptrace_enabled) return; -#if !defined(__APPLE__) /* Quiet compiler warning */ - ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); -#else ASSERT((uint32_t)vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); -#endif /* __APPLE__ */ /* * What would a tracing framework be without its own tracing @@ -14832,7 +13738,7 @@ err: mstate->dtms_arg[0] = sarg0; mstate->dtms_arg[1] = sarg1; - return (NULL); + return (0); } static void @@ -14854,23 +13760,12 @@ dtrace_helper_action_destroy(dtrace_helper_action_t *helper, kmem_free(helper, sizeof (dtrace_helper_action_t)); } -#if !defined(__APPLE__) -static int -dtrace_helper_destroygen(int gen) -{ - proc_t *p = curproc; -#else static int dtrace_helper_destroygen(proc_t* p, int gen) { -#endif dtrace_helpers_t *help = p->p_dtrace_helpers; dtrace_vstate_t *vstate; -#if !defined(__APPLE__) /* Quiet compiler warning */ - int i; -#else uint_t i; -#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -14968,13 +13863,8 @@ dtrace_helper_validate(dtrace_helper_action_t *helper) return (err == 0); } -#if !defined(__APPLE__) -static int -dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep) -#else static int dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) -#endif { dtrace_helpers_t *help; dtrace_helper_action_t *helper, *last; @@ -14986,11 +13876,7 @@ dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) return (EINVAL); -#if !defined(__APPLE__) - help = curproc->p_dtrace_helpers; -#else help = p->p_dtrace_helpers; -#endif last = help->dthps_actions[which]; vstate = &help->dthps_vstate; @@ -15043,11 +13929,7 @@ dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) last->dtha_next = helper; } -#if !defined(__APPLE__) /* Quiet compiler warning */ - if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { -#else if ((uint32_t)vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { -#endif /* __APPLE__ */ dtrace_helptrace_nlocals = vstate->dtvs_nlocals; dtrace_helptrace_next = 0; } @@ -15105,11 +13987,7 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, * off to the meta provider. */ -#if !defined(__APPLE__) /* Quiet compiler warning */ - int i; -#else uint_t i; -#endif /* __APPLE__ */ lck_mtx_unlock(&dtrace_lock); for (i = 0; i < help->dthps_nprovs; i++) { @@ -15121,25 +13999,15 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, lck_mtx_unlock(&dtrace_meta_lock); } -#if !defined(__APPLE__) -static int -dtrace_helper_provider_add(dof_helper_t *dofhp, int gen) -#else static int dtrace_helper_provider_add(proc_t* p, dof_helper_t *dofhp, int gen) -#endif { dtrace_helpers_t *help; dtrace_helper_provider_t *hprov, **tmp_provs; uint_t tmp_maxprovs, i; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - -#if !defined(__APPLE__) - help = curproc->p_dtrace_helpers; -#else help = p->p_dtrace_helpers; -#endif ASSERT(help != NULL); /* @@ -15425,13 +14293,8 @@ dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec) return (0); } -#if !defined(__APPLE__) -static int -dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp) -#else static int dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) -#endif { dtrace_helpers_t *help; dtrace_vstate_t *vstate; @@ -15441,13 +14304,8 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) - if ((help = curproc->p_dtrace_helpers) == NULL) - help = dtrace_helpers_create(curproc); -#else if ((help = p->p_dtrace_helpers) == NULL) help = dtrace_helpers_create(p); -#endif vstate = &help->dthps_vstate; @@ -15461,11 +14319,7 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) * Look for helper providers and validate their descriptions. */ if (dhp != NULL) { -#if !defined(__APPLE__) /* Quiet compiler warning */ - for (i = 0; i < dof->dofh_secnum; i++) { -#else for (i = 0; (uint32_t)i < dof->dofh_secnum; i++) { -#endif /* __APPLE__ */ dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + i * dof->dofh_secsize); @@ -15489,16 +14343,7 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *desc = &ep->dted_probe; -#if !defined(__APPLE__) - if (strcmp(desc->dtpd_provider, "dtrace") != 0) - continue; - - if (strcmp(desc->dtpd_mod, "helper") != 0) - continue; - - if (strcmp(desc->dtpd_func, "ustack") != 0) - continue; -#else /* Employ size bounded string operation. */ + /* APPLE NOTE: Darwin employs size bounded string operation. */ if (!LIT_STRNEQL(desc->dtpd_provider, "dtrace")) continue; @@ -15507,24 +14352,14 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) if (!LIT_STRNEQL(desc->dtpd_func, "ustack")) continue; -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, - ep)) != 0) { -#else if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK, ep)) != 0) { -#endif /* * Adding this helper action failed -- we are now going * to rip out the entire generation and return failure. */ -#if !defined(__APPLE__) - (void) dtrace_helper_destroygen(help->dthps_generation); -#else (void) dtrace_helper_destroygen(p, help->dthps_generation); -#endif dtrace_enabling_destroy(enab); dtrace_dof_destroy(dof); return (-1); @@ -15541,17 +14376,9 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) if (dhp != NULL && nprovs > 0) { dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; -#if !defined(__APPLE__) - if (dtrace_helper_provider_add(dhp, gen) == 0) { -#else if (dtrace_helper_provider_add(p, dhp, gen) == 0) { -#endif lck_mtx_unlock(&dtrace_lock); -#if !defined(__APPLE__) - dtrace_helper_provider_register(curproc, help, dhp); -#else dtrace_helper_provider_register(p, help, dhp); -#endif lck_mtx_lock(&dtrace_lock); destroy = 0; @@ -15564,10 +14391,8 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) return (gen); } -#if defined(__APPLE__) - /* - * DTrace lazy dof + * APPLE NOTE: DTrace lazy dof implementation * * DTrace user static probes (USDT probes) and helper actions are loaded * in a process by proccessing dof sections. The dof sections are passed @@ -15971,8 +14796,6 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) return PROC_RETURNED; } -#endif /* __APPLE__ */ - static dtrace_helpers_t * dtrace_helpers_create(proc_t *p) { @@ -15991,22 +14814,12 @@ dtrace_helpers_create(proc_t *p) return (help); } -#if !defined(__APPLE__) -static void -dtrace_helpers_destroy(void) -{ - dtrace_helpers_t *help; - dtrace_vstate_t *vstate; - proc_t *p = curproc; - int i; -#else static void dtrace_helpers_destroy(proc_t* p) { dtrace_helpers_t *help; dtrace_vstate_t *vstate; uint_t i; -#endif lck_mtx_lock(&dtrace_lock); @@ -16099,12 +14912,8 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) dtrace_helper_action_t *helper, *new, *last; dtrace_difo_t *dp; dtrace_vstate_t *vstate; -#if !defined(__APPLE__) /* Quiet compiler warning */ - int i, j, sz, hasprovs = 0; -#else uint_t i; int j, sz, hasprovs = 0; -#endif /* __APPLE__ */ lck_mtx_lock(&dtrace_lock); ASSERT(from->p_dtrace_helpers != NULL); @@ -16138,15 +14947,6 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) sz = sizeof (dtrace_difo_t *) * new->dtha_nactions; new->dtha_actions = kmem_alloc(sz, KM_SLEEP); -#if !defined(__APPLE__) /* Quiet compiler warning */ - for (j = 0; j < new->dtha_nactions; j++) { - dtrace_difo_t *dp = helper->dtha_actions[j]; - - ASSERT(dp != NULL); - dp = dtrace_difo_duplicate(dp, vstate); - new->dtha_actions[j] = dp; - } -#else for (j = 0; j < new->dtha_nactions; j++) { dtrace_difo_t *dpj = helper->dtha_actions[j]; @@ -16154,7 +14954,6 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) dpj = dtrace_difo_duplicate(dpj, vstate); new->dtha_actions[j] = dpj; } -#endif /* __APPLE__ */ if (last != NULL) { last->dtha_next = new; @@ -16193,9 +14992,9 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) * DTrace Hook Functions */ -#if defined(__APPLE__) /* - * Routines to manipulate the modctl list within dtrace + * APPLE NOTE: dtrace_modctl_* routines for kext support. + * Used to manipulate the modctl list within dtrace xnu. */ modctl_t *dtrace_modctl_list; @@ -16304,8 +15103,6 @@ dtrace_modctl_remove(struct modctl * ctl) kmem_free (ctl, sizeof(modctl_t)); } -#endif /* __APPLE__ */ - /* * APPLE NOTE: The kext loader will call dtrace_module_loaded * when the kext is loaded in memory, but before calling the @@ -16315,23 +15112,11 @@ dtrace_modctl_remove(struct modctl * ctl) * Return -1 on failure */ -#if !defined (__APPLE__) -static void -dtrace_module_loaded(struct modctl *ctl) -#else static int dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) -#endif /* __APPLE__ */ { dtrace_provider_t *prv; -#if !defined(__APPLE__) - mutex_enter(&dtrace_provider_lock); - mutex_enter(&mod_lock); - - ASSERT(ctl->mod_busy); -#else - /* * If kernel symbols have been disabled, return immediately * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks @@ -16442,7 +15227,6 @@ dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS; lck_mtx_unlock(&dtrace_lock); -#endif /* __APPLE__ */ /* * We're going to call each providers per-module provide operation @@ -16451,14 +15235,12 @@ dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); -#if defined(__APPLE__) /* - * The contract with the kext loader is that once this function has completed, - * it may delete kernel symbols at will. We must set this while still holding - * the mod_lock. + * APPLE NOTE: The contract with the kext loader is that once this function + * has completed, it may delete kernel symbols at will. + * We must set this while still holding the mod_lock. */ ctl->mod_flags &= ~MODCTL_HAS_KERNEL_SYMBOLS; -#endif lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); @@ -16475,31 +15257,9 @@ dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) if (dtrace_retained == NULL) { lck_mtx_unlock(&dtrace_lock); -#if !defined(__APPLE__) - return; -#else return 0; -#endif } -#if !defined(__APPLE__) - (void) taskq_dispatch(dtrace_taskq, - (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP); - - mutex_exit(&dtrace_lock); - - /* - * And now, for a little heuristic sleaze: in general, we want to - * match modules as soon as they load. However, we cannot guarantee - * this, because it would lead us to the lock ordering violation - * outlined above. The common case, of course, is that cpu_lock is - * _not_ held -- so we delay here for a clock tick, hoping that that's - * long enough for the task queue to do its work. If it's not, it's - * not a serious problem -- it just means that the module that we - * just loaded may not be immediately instrumentable. - */ - delay(1); -#else /* APPLE NOTE! * * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually @@ -16512,105 +15272,7 @@ dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) dtrace_enabling_matchall(); return 0; -#endif /* __APPLE__ */ -} - -#if !defined(__APPLE__) -static void -dtrace_module_unloaded(struct modctl *ctl) -{ - dtrace_probe_t template, *probe, *first, *next; - dtrace_provider_t *prov; - - template.dtpr_mod = ctl->mod_modname; - - mutex_enter(&dtrace_provider_lock); - mutex_enter(&mod_lock); - mutex_enter(&dtrace_lock); - - if (dtrace_bymod == NULL) { - /* - * The DTrace module is loaded (obviously) but not attached; - * we don't have any work to do. - */ - mutex_exit(&dtrace_provider_lock); - mutex_exit(&mod_lock); - mutex_exit(&dtrace_lock); - return; - } - - for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); - probe != NULL; probe = probe->dtpr_nextmod) { - if (probe->dtpr_ecb != NULL) { - mutex_exit(&dtrace_provider_lock); - mutex_exit(&mod_lock); - mutex_exit(&dtrace_lock); - - /* - * This shouldn't _actually_ be possible -- we're - * unloading a module that has an enabled probe in it. - * (It's normally up to the provider to make sure that - * this can't happen.) However, because dtps_enable() - * doesn't have a failure mode, there can be an - * enable/unload race. Upshot: we don't want to - * assert, but we're not going to disable the - * probe, either. - */ - if (dtrace_err_verbose) { - cmn_err(CE_WARN, "unloaded module '%s' had " - "enabled probes", ctl->mod_modname); - } - - return; - } - } - - probe = first; - - for (first = NULL; probe != NULL; probe = next) { - ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); - - dtrace_probes[probe->dtpr_id - 1] = NULL; - probe->dtpr_provider->probe_count--; - - next = probe->dtpr_nextmod; - dtrace_hash_remove(dtrace_bymod, probe); - dtrace_hash_remove(dtrace_byfunc, probe); - dtrace_hash_remove(dtrace_byname, probe); - - if (first == NULL) { - first = probe; - probe->dtpr_nextmod = NULL; - } else { - probe->dtpr_nextmod = first; - first = probe; - } - } - - /* - * We've removed all of the module's probes from the hash chains and - * from the probe array. Now issue a dtrace_sync() to be sure that - * everyone has cleared out from any probe array processing. - */ - dtrace_sync(); - - for (probe = first; probe != NULL; probe = first) { - first = probe->dtpr_nextmod; - prov = probe->dtpr_provider; - prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, - probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); - vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); - kmem_free(probe, sizeof (dtrace_probe_t)); - } - - mutex_exit(&dtrace_lock); - mutex_exit(&mod_lock); - mutex_exit(&dtrace_provider_lock); } -#else /* __APPLE__ */ /* * Return 0 on success @@ -16654,9 +15316,9 @@ dtrace_module_unloaded(struct kmod_info *kmod) */ if (ctl != NULL) (void)dtrace_modctl_remove(ctl); - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); return(0); } @@ -16675,9 +15337,9 @@ syncloop: if (syncctl==NULL) { /* We have no more work to do */ - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); return(0); } else { @@ -16709,9 +15371,9 @@ syncloop: goto syncloop; } - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); if (dtrace_err_verbose) { cmn_err(CE_WARN, "unloaded module '%s' had " @@ -16727,7 +15389,7 @@ syncloop: ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); dtrace_probes[probe->dtpr_id - 1] = NULL; - probe->dtpr_provider->probe_count--; + probe->dtpr_provider->dtpv_probe_count--; next = probe->dtpr_nextmod; dtrace_hash_remove(dtrace_bymod, probe); @@ -16774,7 +15436,6 @@ syncloop: return(0); } -#endif /* __APPLE__ */ void dtrace_suspend(void) @@ -16882,8 +15543,8 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit) dtrace_toxrange = range; } - ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL); - ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL); + ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0); + ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0); dtrace_toxrange[dtrace_toxranges].dtt_base = base; dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; @@ -16909,28 +15570,13 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) if (ddi_soft_state_init(&dtrace_softstate, sizeof (dtrace_state_t), 0) != 0) { cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); - lck_mtx_unlock(&cpu_lock); - lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&dtrace_lock); - return (DDI_FAILURE); - } - -#if !defined(__APPLE__) - if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR, - DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE || - ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR, - DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) { - cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes"); - ddi_remove_minor_node(devi, NULL); - ddi_soft_state_fini(&dtrace_softstate); - lck_mtx_unlock(&cpu_lock); lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); return (DDI_FAILURE); } -#else + /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */ -#endif /* __APPLE__ */ ddi_report_dev(devi); dtrace_devi = devi; @@ -16998,14 +15644,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ASSERT(dtrace_provider != NULL); ASSERT((dtrace_provider_id_t)dtrace_provider == id); -#if !defined(__APPLE__) - dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); - dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "END", 0, NULL); - dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "ERROR", 1, NULL); -#elif defined (__x86_64__) +#if defined (__x86_64__) dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "BEGIN", 1, NULL); dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) @@ -17014,7 +15653,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_provider, NULL, NULL, "ERROR", 3, NULL); #else #error Unknown Architecture -#endif /* __APPLE__ */ +#endif dtrace_anon_property(); lck_mtx_unlock(&cpu_lock); @@ -17040,14 +15679,12 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) if (dtrace_anon.dta_enabling != NULL) { ASSERT(dtrace_retained == dtrace_anon.dta_enabling); -#if defined(__APPLE__) /* - * If there is anonymous dof, we should switch symbol modes. + * APPLE NOTE: if handling anonymous dof, switch symbol modes. */ if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) { dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL; } -#endif dtrace_enabling_provide(NULL); state = dtrace_anon.dta_state; @@ -17094,23 +15731,9 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) uint32_t priv; uid_t uid; zoneid_t zoneid; -#if defined (__APPLE__) int rv; -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - if (getminor(*devp) == DTRACEMNRN_HELPER) - return (0); - - /* - * If this wasn't an open with the "helper" minor, then it must be - * the "dtrace" minor. - */ - if (getminor(*devp) != DTRACEMNRN_DTRACE) - return (ENXIO); -#else - /* Darwin puts Helper on its own major device. */ -#endif /* __APPLE__ */ + /* APPLE: Darwin puts Helper on its own major device. */ /* * If no DTRACE_PRIV_* bits are set in the credential, then the @@ -17120,13 +15743,11 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) if (priv == DTRACE_PRIV_NONE) return (EACCES); -#if defined(__APPLE__) /* - * We delay the initialization of fasttrap as late as possible. + * APPLE NOTE: We delay the initialization of fasttrap as late as possible. * It certainly can't be later than now! */ fasttrap_init(); -#endif /* __APPLE__ */ /* * Ask all providers to provide all their probes. @@ -17146,981 +15767,146 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) */ if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { dtrace_opens--; - lck_mtx_unlock(&cpu_lock); - lck_mtx_unlock(&dtrace_lock); - return (EBUSY); - } - -#if !defined(__APPLE__) - state = dtrace_state_create(devp, cred_p); - lck_mtx_unlock(&cpu_lock); - - if (state == NULL) { - if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) - (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); - lck_mtx_unlock(&dtrace_lock); - return (EAGAIN); - } - - lck_mtx_unlock(&dtrace_lock); -#else - rv = dtrace_state_create(devp, cred_p, &state); - lck_mtx_unlock(&cpu_lock); - - if (rv != 0 || state == NULL) { - if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) - (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); lck_mtx_unlock(&dtrace_lock); - /* propagate EAGAIN or ERESTART */ - return (rv); - } - - lck_mtx_unlock(&dtrace_lock); - - lck_rw_lock_exclusive(&dtrace_dof_mode_lock); - - /* - * If we are currently lazy, transition states. - * - * Unlike dtrace_close, we do not need to check the - * value of dtrace_opens, as any positive value (and - * we count as 1) means we transition states. - */ - if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON) { - dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_OFF; - - /* - * Iterate all existing processes and load lazy dofs. - */ - proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, - dtrace_lazy_dofs_proc_iterate_doit, - NULL, - dtrace_lazy_dofs_proc_iterate_filter, - NULL); - } - - lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); - - /* - * Update kernel symbol state. - * - * We must own the provider and dtrace locks. - * - * NOTE! It may appear there is a race by setting this value so late - * after dtrace_probe_provide. However, any kext loaded after the - * call to probe provide and before we set LAZY_OFF will be marked as - * eligible for symbols from userspace. The same dtrace that is currently - * calling dtrace_open() (this call!) will get a list of kexts needing - * symbols and fill them in, thus closing the race window. - * - * We want to set this value only after it certain it will succeed, as - * this significantly reduces the complexity of error exits. - */ - lck_mtx_lock(&dtrace_lock); - if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) { - dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL; - } - lck_mtx_unlock(&dtrace_lock); -#endif /* __APPLE__ */ - - return (0); -} - -/*ARGSUSED*/ -static int -dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) -{ -#pragma unused(flag, otyp, cred_p) /* __APPLE__ */ - minor_t minor = getminor(dev); - dtrace_state_t *state; - -#if !defined(__APPLE__) - if (minor == DTRACEMNRN_HELPER) - return (0); -#else - /* Darwin puts Helper on its own major device. */ -#endif /* __APPLE__ */ - - state = ddi_get_soft_state(dtrace_softstate, minor); - - lck_mtx_lock(&cpu_lock); - lck_mtx_lock(&dtrace_lock); - - if (state->dts_anon) { - /* - * There is anonymous state. Destroy that first. - */ - ASSERT(dtrace_anon.dta_state == NULL); - dtrace_state_destroy(state->dts_anon); - } - - dtrace_state_destroy(state); - ASSERT(dtrace_opens > 0); - - /* - * Only relinquish control of the kernel debugger interface when there - * are no consumers and no anonymous enablings. - */ - if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) - (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); - - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&cpu_lock); - -#if defined(__APPLE__) - /* - * Lock ordering requires the dof mode lock be taken before - * the dtrace_lock. - */ - lck_rw_lock_exclusive(&dtrace_dof_mode_lock); - lck_mtx_lock(&dtrace_lock); - - if (dtrace_opens == 0) { - /* - * If we are currently lazy-off, and this is the last close, transition to - * lazy state. - */ - if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) { - dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; - } - - /* - * If we are the last dtrace client, switch back to lazy (from userspace) symbols - */ - if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_KERNEL) { - dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; - } - } - - lck_mtx_unlock(&dtrace_lock); - lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); - - /* - * Kext probes may be retained past the end of the kext's lifespan. The - * probes are kept until the last reference to them has been removed. - * Since closing an active dtrace context is likely to drop that last reference, - * lets take a shot at cleaning out the orphaned probes now. - */ - dtrace_module_unloaded(NULL); -#endif /* __APPLE__ */ - - return (0); -} - -#if !defined(__APPLE__) -/*ARGSUSED*/ -static int -dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv) -{ - int rval; - dof_helper_t help, *dhp = NULL; - - switch (cmd) { - case DTRACEHIOC_ADDDOF: - if (copyin((void *)arg, &help, sizeof (help)) != 0) { - dtrace_dof_error(NULL, "failed to copyin DOF helper"); - return (EFAULT); - } - - dhp = &help; - arg = (intptr_t)help.dofhp_dof; - /*FALLTHROUGH*/ - - case DTRACEHIOC_ADD: { - dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval); - - if (dof == NULL) - return (rval); - - mutex_enter(&dtrace_lock); - - /* - * dtrace_helper_slurp() takes responsibility for the dof -- - * it may free it now or it may save it and free it later. - */ - if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) { - *rv = rval; - rval = 0; - } else { - rval = EINVAL; - } - - mutex_exit(&dtrace_lock); - return (rval); - } - - case DTRACEHIOC_REMOVE: { - mutex_enter(&dtrace_lock); - rval = dtrace_helper_destroygen(arg); - mutex_exit(&dtrace_lock); - - return (rval); - } - - default: - break; - } - - return (ENOTTY); -} - -/*ARGSUSED*/ -static int -dtrace_ioctl(dev_t dev, u_long cmd, intptr_t arg, int md, cred_t *cr, int *rv) -{ - minor_t minor = getminor(dev); - dtrace_state_t *state; - int rval; - - if (minor == DTRACEMNRN_HELPER) - return (dtrace_ioctl_helper(cmd, arg, rv)); - - state = ddi_get_soft_state(dtrace_softstate, minor); - - if (state->dts_anon) { - ASSERT(dtrace_anon.dta_state == NULL); - state = state->dts_anon; - } - - switch (cmd) { - case DTRACEIOC_PROVIDER: { - dtrace_providerdesc_t pvd; - dtrace_provider_t *pvp; - - if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0) - return (EFAULT); - - pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; - lck_mtx_lock(&dtrace_provider_lock); - - for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { - if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) - break; - } - - lck_mtx_unlock(&dtrace_provider_lock); - - if (pvp == NULL) - return (ESRCH); - - bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); - bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); - if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0) - return (EFAULT); - - return (0); - } - - case DTRACEIOC_EPROBE: { - dtrace_eprobedesc_t epdesc; - dtrace_ecb_t *ecb; - dtrace_action_t *act; - void *buf; - size_t size; - uintptr_t dest; - int nrecs; - - if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0) - return (EFAULT); - - lck_mtx_lock(&dtrace_lock); - - if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { - lck_mtx_unlock(&dtrace_lock); - return (EINVAL); - } - - if (ecb->dte_probe == NULL) { - lck_mtx_unlock(&dtrace_lock); - return (EINVAL); - } - - epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; - epdesc.dtepd_uarg = ecb->dte_uarg; - epdesc.dtepd_size = ecb->dte_size; - - nrecs = epdesc.dtepd_nrecs; - epdesc.dtepd_nrecs = 0; - for (act = ecb->dte_action; act != NULL; act = act->dta_next) { - if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) - continue; - - epdesc.dtepd_nrecs++; - } - - /* - * Now that we have the size, we need to allocate a temporary - * buffer in which to store the complete description. We need - * the temporary buffer to be able to drop dtrace_lock() - * across the copyout(), below. - */ - size = sizeof (dtrace_eprobedesc_t) + - (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); - - buf = kmem_alloc(size, KM_SLEEP); - dest = (uintptr_t)buf; - - bcopy(&epdesc, (void *)dest, sizeof (epdesc)); - dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); - - for (act = ecb->dte_action; act != NULL; act = act->dta_next) { - if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) - continue; - - if (nrecs-- == 0) - break; - - bcopy(&act->dta_rec, (void *)dest, - sizeof (dtrace_recdesc_t)); - dest += sizeof (dtrace_recdesc_t); - } - - lck_mtx_unlock(&dtrace_lock); - - if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { - kmem_free(buf, size); - return (EFAULT); - } - - kmem_free(buf, size); - return (0); - } - - case DTRACEIOC_AGGDESC: { - dtrace_aggdesc_t aggdesc; - dtrace_action_t *act; - dtrace_aggregation_t *agg; - int nrecs; - uint32_t offs; - dtrace_recdesc_t *lrec; - void *buf; - size_t size; - uintptr_t dest; - - if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0) - return (EFAULT); - - lck_mtx_lock(&dtrace_lock); - - if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { - lck_mtx_unlock(&dtrace_lock); - return (EINVAL); - } - - aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; - - nrecs = aggdesc.dtagd_nrecs; - aggdesc.dtagd_nrecs = 0; - - offs = agg->dtag_base; - lrec = &agg->dtag_action.dta_rec; - aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; - - for (act = agg->dtag_first; ; act = act->dta_next) { - ASSERT(act->dta_intuple || - DTRACEACT_ISAGG(act->dta_kind)); - - /* - * If this action has a record size of zero, it - * denotes an argument to the aggregating action. - * Because the presence of this record doesn't (or - * shouldn't) affect the way the data is interpreted, - * we don't copy it out to save user-level the - * confusion of dealing with a zero-length record. - */ - if (act->dta_rec.dtrd_size == 0) { - ASSERT(agg->dtag_hasarg); - continue; - } - - aggdesc.dtagd_nrecs++; - - if (act == &agg->dtag_action) - break; - } - - /* - * Now that we have the size, we need to allocate a temporary - * buffer in which to store the complete description. We need - * the temporary buffer to be able to drop dtrace_lock() - * across the copyout(), below. - */ - size = sizeof (dtrace_aggdesc_t) + - (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); - - buf = kmem_alloc(size, KM_SLEEP); - dest = (uintptr_t)buf; - - bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); - dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); - - for (act = agg->dtag_first; ; act = act->dta_next) { - dtrace_recdesc_t rec = act->dta_rec; - - /* - * See the comment in the above loop for why we pass - * over zero-length records. - */ - if (rec.dtrd_size == 0) { - ASSERT(agg->dtag_hasarg); - continue; - } - - if (nrecs-- == 0) - break; - - rec.dtrd_offset -= offs; - bcopy(&rec, (void *)dest, sizeof (rec)); - dest += sizeof (dtrace_recdesc_t); - - if (act == &agg->dtag_action) - break; - } - - lck_mtx_unlock(&dtrace_lock); - - if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { - kmem_free(buf, size); - return (EFAULT); - } - - kmem_free(buf, size); - return (0); - } - - case DTRACEIOC_ENABLE: { - dof_hdr_t *dof; - dtrace_enabling_t *enab = NULL; - dtrace_vstate_t *vstate; - int err = 0; - - *rv = 0; - - /* - * If a NULL argument has been passed, we take this as our - * cue to reevaluate our enablings. - */ - if (arg == NULL) { - dtrace_enabling_matchall(); - - return (0); - } - - if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) - return (rval); - - lck_mtx_lock(&cpu_lock); - lck_mtx_lock(&dtrace_lock); - vstate = &state->dts_vstate; - - if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&cpu_lock); - dtrace_dof_destroy(dof); - return (EBUSY); - } - - if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) { - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&cpu_lock); - dtrace_dof_destroy(dof); - return (EINVAL); - } - - if ((rval = dtrace_dof_options(dof, state)) != 0) { - dtrace_enabling_destroy(enab); - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&cpu_lock); - dtrace_dof_destroy(dof); - return (rval); - } - - if ((err = dtrace_enabling_match(enab, rv)) == 0) { - err = dtrace_enabling_retain(enab); - } else { - dtrace_enabling_destroy(enab); - } - - lck_mtx_unlock(&cpu_lock); - lck_mtx_unlock(&dtrace_lock); - dtrace_dof_destroy(dof); - - return (err); - } - - case DTRACEIOC_REPLICATE: { - dtrace_repldesc_t desc; - dtrace_probedesc_t *match = &desc.dtrpd_match; - dtrace_probedesc_t *create = &desc.dtrpd_create; - int err; - - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) - return (EFAULT); - - match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; - match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; - match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; - match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; - - create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; - create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; - create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; - create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; - - lck_mtx_lock(&dtrace_lock); - err = dtrace_enabling_replicate(state, match, create); - lck_mtx_unlock(&dtrace_lock); - - return (err); - } - - case DTRACEIOC_PROBEMATCH: - case DTRACEIOC_PROBES: { - dtrace_probe_t *probe = NULL; - dtrace_probedesc_t desc; - dtrace_probekey_t pkey; - dtrace_id_t i; - int m = 0; - uint32_t priv; - uid_t uid; - zoneid_t zoneid; - - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) - return (EFAULT); - - desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; - desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; - desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; - desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0'; - - /* - * Before we attempt to match this probe, we want to give - * all providers the opportunity to provide it. - */ - if (desc.dtpd_id == DTRACE_IDNONE) { - lck_mtx_lock(&dtrace_provider_lock); - dtrace_probe_provide(&desc, NULL); - lck_mtx_unlock(&dtrace_provider_lock); - desc.dtpd_id++; - } - - if (cmd == DTRACEIOC_PROBEMATCH) { - dtrace_probekey(&desc, &pkey); - pkey.dtpk_id = DTRACE_IDNONE; - } - - dtrace_cred2priv(cr, &priv, &uid, &zoneid); - - lck_mtx_lock(&dtrace_lock); - - if (cmd == DTRACEIOC_PROBEMATCH) { - for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i - 1]) != NULL && - (m = dtrace_match_probe(probe, &pkey, - priv, uid, zoneid)) != 0) - break; - } - - if (m < 0) { - lck_mtx_unlock(&dtrace_lock); - return (EINVAL); - } - - } else { - for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i - 1]) != NULL && - dtrace_match_priv(probe, priv, uid, zoneid)) - break; - } - } - - if (probe == NULL) { - lck_mtx_unlock(&dtrace_lock); - return (ESRCH); - } - - dtrace_probe_description(probe, &desc); - lck_mtx_unlock(&dtrace_lock); - - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) - return (EFAULT); - - return (0); - } - - case DTRACEIOC_PROBEARG: { - dtrace_argdesc_t desc; - dtrace_probe_t *probe; - dtrace_provider_t *prov; - - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) - return (EFAULT); - - if (desc.dtargd_id == DTRACE_IDNONE) - return (EINVAL); - - if (desc.dtargd_ndx == DTRACE_ARGNONE) - return (EINVAL); - - lck_mtx_lock(&dtrace_provider_lock); - lck_mtx_lock(&mod_lock); - lck_mtx_lock(&dtrace_lock); - - if (desc.dtargd_id > dtrace_nprobes) { - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&mod_lock); - lck_mtx_unlock(&dtrace_provider_lock); - return (EINVAL); - } - - if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) { - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&mod_lock); - lck_mtx_unlock(&dtrace_provider_lock); - return (EINVAL); - } - - lck_mtx_unlock(&dtrace_lock); - - prov = probe->dtpr_provider; - - if (prov->dtpv_pops.dtps_getargdesc == NULL) { - /* - * There isn't any typed information for this probe. - * Set the argument number to DTRACE_ARGNONE. - */ - desc.dtargd_ndx = DTRACE_ARGNONE; - } else { - desc.dtargd_native[0] = '\0'; - desc.dtargd_xlate[0] = '\0'; - desc.dtargd_mapping = desc.dtargd_ndx; - - prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg, &desc); - } - - lck_mtx_unlock(&mod_lock); - lck_mtx_unlock(&dtrace_provider_lock); - - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) - return (EFAULT); - - return (0); - } - - case DTRACEIOC_GO: { - processorid_t cpuid; - rval = dtrace_state_go(state, &cpuid); - - if (rval != 0) - return (rval); - - if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) - return (EFAULT); - - return (0); - } - - case DTRACEIOC_STOP: { - processorid_t cpuid; - - lck_mtx_lock(&dtrace_lock); - rval = dtrace_state_stop(state, &cpuid); - lck_mtx_unlock(&dtrace_lock); - - if (rval != 0) - return (rval); - - if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) - return (EFAULT); - - return (0); - } - - case DTRACEIOC_DOFGET: { - dof_hdr_t hdr, *dof; - uint64_t len; - - if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0) - return (EFAULT); - - lck_mtx_lock(&dtrace_lock); - dof = dtrace_dof_create(state); - lck_mtx_unlock(&dtrace_lock); - - len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); - rval = copyout(dof, (void *)arg, len); - dtrace_dof_destroy(dof); - - return (rval == 0 ? 0 : EFAULT); - } - - case DTRACEIOC_AGGSNAP: - case DTRACEIOC_BUFSNAP: { - dtrace_bufdesc_t desc; - caddr_t cached; - dtrace_buffer_t *buf; - - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) - return (EFAULT); - - if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) - return (EINVAL); - - lck_mtx_lock(&dtrace_lock); - - if (cmd == DTRACEIOC_BUFSNAP) { - buf = &state->dts_buffer[desc.dtbd_cpu]; - } else { - buf = &state->dts_aggbuffer[desc.dtbd_cpu]; - } - - if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { - size_t sz = buf->dtb_offset; - - if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { - lck_mtx_unlock(&dtrace_lock); - return (EBUSY); - } - - /* - * If this buffer has already been consumed, we're - * going to indicate that there's nothing left here - * to consume. - */ - if (buf->dtb_flags & DTRACEBUF_CONSUMED) { - lck_mtx_unlock(&dtrace_lock); - - desc.dtbd_size = 0; - desc.dtbd_drops = 0; - desc.dtbd_errors = 0; - desc.dtbd_oldest = 0; - sz = sizeof (desc); - - if (copyout(&desc, (void *)arg, sz) != 0) - return (EFAULT); - - return (0); - } - - /* - * If this is a ring buffer that has wrapped, we want - * to copy the whole thing out. - */ - if (buf->dtb_flags & DTRACEBUF_WRAPPED) { - dtrace_buffer_polish(buf); - sz = buf->dtb_size; - } - - if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { - lck_mtx_unlock(&dtrace_lock); - return (EFAULT); - } - - desc.dtbd_size = sz; - desc.dtbd_drops = buf->dtb_drops; - desc.dtbd_errors = buf->dtb_errors; - desc.dtbd_oldest = buf->dtb_xamot_offset; - - lck_mtx_unlock(&dtrace_lock); - - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) - return (EFAULT); - - buf->dtb_flags |= DTRACEBUF_CONSUMED; - - return (0); - } - - if (buf->dtb_tomax == NULL) { - ASSERT(buf->dtb_xamot == NULL); - lck_mtx_unlock(&dtrace_lock); - return (ENOENT); - } - - cached = buf->dtb_tomax; - ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); - - dtrace_xcall(desc.dtbd_cpu, - (dtrace_xcall_t)dtrace_buffer_switch, buf); - - state->dts_errors += buf->dtb_xamot_errors; - - /* - * If the buffers did not actually switch, then the cross call - * did not take place -- presumably because the given CPU is - * not in the ready set. If this is the case, we'll return - * ENOENT. - */ - if (buf->dtb_tomax == cached) { - ASSERT(buf->dtb_xamot != cached); - lck_mtx_unlock(&dtrace_lock); - return (ENOENT); - } - - ASSERT(cached == buf->dtb_xamot); - - /* - * We have our snapshot; now copy it out. - */ - if (copyout(buf->dtb_xamot, desc.dtbd_data, - buf->dtb_xamot_offset) != 0) { - lck_mtx_unlock(&dtrace_lock); - return (EFAULT); - } - - desc.dtbd_size = buf->dtb_xamot_offset; - desc.dtbd_drops = buf->dtb_xamot_drops; - desc.dtbd_errors = buf->dtb_xamot_errors; - desc.dtbd_oldest = 0; - - lck_mtx_unlock(&dtrace_lock); - - /* - * Finally, copy out the buffer description. - */ - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) - return (EFAULT); - - return (0); - } - - case DTRACEIOC_CONF: { - dtrace_conf_t conf; - - bzero(&conf, sizeof (conf)); - conf.dtc_difversion = DIF_VERSION; - conf.dtc_difintregs = DIF_DIR_NREGS; - conf.dtc_diftupregs = DIF_DTR_NREGS; - conf.dtc_ctfmodel = CTF_MODEL_NATIVE; - - if (copyout(&conf, (void *)arg, sizeof (conf)) != 0) - return (EFAULT); - - return (0); - } - - case DTRACEIOC_STATUS: { - dtrace_status_t stat; - dtrace_dstate_t *dstate; - int i, j; - uint64_t nerrs; - - /* - * See the comment in dtrace_state_deadman() for the reason - * for setting dts_laststatus to INT64_MAX before setting - * it to the correct value. - */ - state->dts_laststatus = INT64_MAX; - dtrace_membar_producer(); - state->dts_laststatus = dtrace_gethrtime(); - - bzero(&stat, sizeof (stat)); - - lck_mtx_lock(&dtrace_lock); - - if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { - lck_mtx_unlock(&dtrace_lock); - return (ENOENT); - } - - if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) - stat.dtst_exiting = 1; - - nerrs = state->dts_errors; - dstate = &state->dts_vstate.dtvs_dynvars; - - for (i = 0; i < NCPU; i++) { - dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; - - stat.dtst_dyndrops += dcpu->dtdsc_drops; - stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; - stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; - - if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) - stat.dtst_filled++; - - nerrs += state->dts_buffer[i].dtb_errors; + lck_mtx_unlock(&cpu_lock); + return (EBUSY); + } - for (j = 0; j < state->dts_nspeculations; j++) { - dtrace_speculation_t *spec; - dtrace_buffer_t *buf; + rv = dtrace_state_create(devp, cred_p, &state); + lck_mtx_unlock(&cpu_lock); - spec = &state->dts_speculations[j]; - buf = &spec->dtsp_buffer[i]; - stat.dtst_specdrops += buf->dtb_xamot_drops; - } - } + if (rv != 0 || state == NULL) { + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) + (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); + lck_mtx_unlock(&dtrace_lock); + /* propagate EAGAIN or ERESTART */ + return (rv); + } + + lck_mtx_unlock(&dtrace_lock); - stat.dtst_specdrops_busy = state->dts_speculations_busy; - stat.dtst_specdrops_unavail = state->dts_speculations_unavail; - stat.dtst_stkstroverflows = state->dts_stkstroverflows; - stat.dtst_dblerrors = state->dts_dblerrors; - stat.dtst_killed = - (state->dts_activity == DTRACE_ACTIVITY_KILLED); - stat.dtst_errors = nerrs; + lck_rw_lock_exclusive(&dtrace_dof_mode_lock); - lck_mtx_unlock(&dtrace_lock); + /* + * If we are currently lazy, transition states. + * + * Unlike dtrace_close, we do not need to check the + * value of dtrace_opens, as any positive value (and + * we count as 1) means we transition states. + */ + if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON) { + dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_OFF; + + /* + * Iterate all existing processes and load lazy dofs. + */ + proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, + dtrace_lazy_dofs_proc_iterate_doit, + NULL, + dtrace_lazy_dofs_proc_iterate_filter, + NULL); + } - if (copyout(&stat, (void *)arg, sizeof (stat)) != 0) - return (EFAULT); + lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); - return (0); + /* + * Update kernel symbol state. + * + * We must own the provider and dtrace locks. + * + * NOTE! It may appear there is a race by setting this value so late + * after dtrace_probe_provide. However, any kext loaded after the + * call to probe provide and before we set LAZY_OFF will be marked as + * eligible for symbols from userspace. The same dtrace that is currently + * calling dtrace_open() (this call!) will get a list of kexts needing + * symbols and fill them in, thus closing the race window. + * + * We want to set this value only after it certain it will succeed, as + * this significantly reduces the complexity of error exits. + */ + lck_mtx_lock(&dtrace_lock); + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL; } + lck_mtx_unlock(&dtrace_lock); - case DTRACEIOC_FORMAT: { - dtrace_fmtdesc_t fmt; - char *str; - int len; + return (0); +} - if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0) - return (EFAULT); +/*ARGSUSED*/ +static int +dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) +{ +#pragma unused(flag, otyp, cred_p) /* __APPLE__ */ + minor_t minor = getminor(dev); + dtrace_state_t *state; - lck_mtx_lock(&dtrace_lock); + /* APPLE NOTE: Darwin puts Helper on its own major device. */ - if (fmt.dtfd_format == 0 || - fmt.dtfd_format > state->dts_nformats) { - lck_mtx_unlock(&dtrace_lock); - return (EINVAL); - } + state = ddi_get_soft_state(dtrace_softstate, minor); + + lck_mtx_lock(&cpu_lock); + lck_mtx_lock(&dtrace_lock); + if (state->dts_anon) { /* - * Format strings are allocated contiguously and they are - * never freed; if a format index is less than the number - * of formats, we can assert that the format map is non-NULL - * and that the format for the specified index is non-NULL. + * There is anonymous state. Destroy that first. */ - ASSERT(state->dts_formats != NULL); - str = state->dts_formats[fmt.dtfd_format - 1]; - ASSERT(str != NULL); + ASSERT(dtrace_anon.dta_state == NULL); + dtrace_state_destroy(state->dts_anon); + } - len = strlen(str) + 1; + dtrace_state_destroy(state); + ASSERT(dtrace_opens > 0); - if (len > fmt.dtfd_length) { - fmt.dtfd_length = len; + /* + * Only relinquish control of the kernel debugger interface when there + * are no consumers and no anonymous enablings. + */ + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) + (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); + + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); - if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) { - lck_mtx_unlock(&dtrace_lock); - return (EINVAL); - } - } else { - if (copyout(str, fmt.dtfd_string, len) != 0) { - lck_mtx_unlock(&dtrace_lock); - return (EINVAL); - } + /* + * Lock ordering requires the dof mode lock be taken before + * the dtrace_lock. + */ + lck_rw_lock_exclusive(&dtrace_dof_mode_lock); + lck_mtx_lock(&dtrace_lock); + + if (dtrace_opens == 0) { + /* + * If we are currently lazy-off, and this is the last close, transition to + * lazy state. + */ + if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) { + dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; } - lck_mtx_unlock(&dtrace_lock); - return (0); - } - - default: - break; + /* + * If we are the last dtrace client, switch back to lazy (from userspace) symbols + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_KERNEL) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; + } } + + lck_mtx_unlock(&dtrace_lock); + lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); + + /* + * Kext probes may be retained past the end of the kext's lifespan. The + * probes are kept until the last reference to them has been removed. + * Since closing an active dtrace context is likely to drop that last reference, + * lets take a shot at cleaning out the orphaned probes now. + */ + dtrace_module_unloaded(NULL); - return (ENOTTY); + return (0); } -#else + /*ARGSUSED*/ static int dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) @@ -18507,7 +16293,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv * If a NULL argument has been passed, we take this as our * cue to reevaluate our enablings. */ - if (arg == NULL) { + if (arg == 0) { dtrace_enabling_matchall(); return (0); @@ -18548,8 +16334,8 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv dtrace_enabling_destroy(enab); } - lck_mtx_unlock(&cpu_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); dtrace_dof_destroy(dof); return (err); @@ -19028,7 +16814,16 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv size_t module_uuids_list_size; dtrace_module_uuids_list_t* uuids_list; uint64_t dtmul_count; - + + /* + * Security restrictions make this operation illegal, if this is enabled DTrace + * must refuse to provide any fbt probes. + */ + if (dtrace_is_restricted()) { + cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); + return (EPERM); + } + /* * Fail if the kernel symbol mode makes this operation illegal. * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check @@ -19074,6 +16869,10 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv lck_mtx_lock(&mod_lock); struct modctl* ctl = dtrace_modctl_list; while (ctl) { + /* Update the private probes bit */ + if (dtrace_provide_private_probes) + ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; + ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); if (!MOD_SYMBOLS_DONE(ctl)) { dtmul_count++; @@ -19121,6 +16920,10 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv struct modctl* ctl = dtrace_modctl_list; while (ctl) { + /* Update the private probes bit */ + if (dtrace_provide_private_probes) + ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; + /* * We assume that userspace symbols will be "better" than kernel level symbols, * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms @@ -19166,7 +16969,16 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv size_t module_symbols_size; dtrace_module_symbols_t* module_symbols; uint64_t dtmodsyms_count; - + + /* + * Security restrictions make this operation illegal, if this is enabled DTrace + * must refuse to provide any fbt probes. + */ + if (dtrace_is_restricted()) { + cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); + return (EPERM); + } + /* * Fail if the kernel symbol mode makes this operation illegal. * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check @@ -19229,6 +17041,10 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv struct modctl* ctl = dtrace_modctl_list; while (ctl) { + /* Update the private probes bit */ + if (dtrace_provide_private_probes) + ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; + ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl)) { if (memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) { @@ -19269,15 +17085,47 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv return rval; } - - default: - break; + + case DTRACEIOC_PROCWAITFOR: { + dtrace_procdesc_t pdesc = { + .p_comm = {0}, + .p_pid = -1 + }; + + if ((rval = copyin(arg, &pdesc, sizeof(pdesc))) != 0) + goto proc_waitfor_error; + + if ((rval = dtrace_proc_waitfor(&pdesc)) != 0) + goto proc_waitfor_error; + + if ((rval = copyout(&pdesc, arg, sizeof(pdesc))) != 0) + goto proc_waitfor_error; + + return 0; + + proc_waitfor_error: + /* The process was suspended, revert this since the client will not do it. */ + if (pdesc.p_pid != -1) { + proc_t *proc = proc_find(pdesc.p_pid); + if (proc != PROC_NULL) { + task_pidresume(proc->task); + proc_rele(proc); + } + } + + return rval; + } + + default: + break; } return (ENOTTY); } -#endif /* __APPLE__ */ +/* + * APPLE NOTE: dtrace_detach not implemented + */ #if !defined(__APPLE__) /*ARGSUSED*/ static int @@ -19303,15 +17151,15 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) ASSERT(dtrace_opens == 0); if (dtrace_helpers > 0) { - lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&cpu_lock); return (DDI_FAILURE); } if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { - lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&cpu_lock); return (DDI_FAILURE); } @@ -19403,90 +17251,7 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) return (DDI_SUCCESS); } - -/*ARGSUSED*/ -static int -dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int error; - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = (void *)dtrace_devi; - error = DDI_SUCCESS; - break; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - error = DDI_SUCCESS; - break; - default: - error = DDI_FAILURE; - } - return (error); -} - -static struct cb_ops dtrace_cb_ops = { - dtrace_open, /* open */ - dtrace_close, /* close */ - nulldev, /* strategy */ - nulldev, /* print */ - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - dtrace_ioctl, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* cb_prop_op */ - 0, /* streamtab */ - D_NEW | D_MP /* Driver compatibility flag */ -}; - -static struct dev_ops dtrace_ops = { - DEVO_REV, /* devo_rev */ - 0, /* refcnt */ - dtrace_info, /* get_dev_info */ - nulldev, /* identify */ - nulldev, /* probe */ - dtrace_attach, /* attach */ - dtrace_detach, /* detach */ - nodev, /* reset */ - &dtrace_cb_ops, /* driver operations */ - NULL, /* bus operations */ - nodev /* dev power */ -}; - -static struct modldrv modldrv = { - &mod_driverops, /* module type (this is a pseudo driver) */ - "Dynamic Tracing", /* name of module */ - &dtrace_ops, /* driver ops */ -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modldrv, - NULL -}; - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -int -_fini(void) -{ - return (mod_remove(&modlinkage)); -} -#else /* Darwin BSD driver model. */ +#endif /* __APPLE__ */ d_open_t _dtrace_open, helper_open; d_close_t _dtrace_close, helper_close; @@ -19686,6 +17451,7 @@ dtrace_init( void ) { if (0 == gDTraceInited) { int i, ncpu; + size_t size = sizeof(dtrace_buffer_memory_maxsize); /* * DTrace allocates buffers based on the maximum number @@ -19694,7 +17460,28 @@ dtrace_init( void ) */ ASSERT(dtrace_max_cpus == 0); ncpu = dtrace_max_cpus = ml_get_max_cpus(); - + + /* + * Retrieve the size of the physical memory in order to define + * the state buffer memory maximal size. If we cannot retrieve + * this value, we'll consider that we have 1Gb of memory per CPU, that's + * still better than raising a kernel panic. + */ + if (0 != kernel_sysctlbyname("hw.memsize", &dtrace_buffer_memory_maxsize, + &size, NULL, 0)) + { + dtrace_buffer_memory_maxsize = ncpu * 1024 * 1024 * 1024; + printf("dtrace_init: failed to retrieve the hw.memsize, defaulted to %lld bytes\n", + dtrace_buffer_memory_maxsize); + } + + /* + * Finally, divide by three to prevent DTrace from eating too + * much memory. + */ + dtrace_buffer_memory_maxsize /= 3; + ASSERT(dtrace_buffer_memory_maxsize > 0); + gMajDevNo = cdevsw_add(DTRACE_MAJOR, &dtrace_cdevsw); if (gMajDevNo < 0) { @@ -19738,6 +17525,7 @@ dtrace_init( void ) lck_mtx_init(&dtrace_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&dtrace_provider_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&dtrace_meta_lock, dtrace_lck_grp, dtrace_lck_attr); + lck_mtx_init(&dtrace_procwaitfor_lock, dtrace_lck_grp, dtrace_lck_attr); #if DEBUG lck_mtx_init(&dtrace_errlock, dtrace_lck_grp, dtrace_lck_attr); #endif @@ -19751,8 +17539,14 @@ dtrace_init( void ) * the structure is sized to avoid false sharing. */ lck_mtx_init(&cpu_lock, dtrace_lck_grp, dtrace_lck_attr); + lck_mtx_init(&cyc_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&mod_lock, dtrace_lck_grp, dtrace_lck_attr); + /* + * Initialize the CPU offline/online hooks. + */ + dtrace_install_cpu_hooks(); + dtrace_modctl_list = NULL; cpu_core = (cpu_core_t *)kmem_zalloc( ncpu * sizeof(cpu_core_t), KM_SLEEP ); @@ -19764,6 +17558,7 @@ dtrace_init( void ) for (i = 0; i < ncpu; ++i) { cpu_list[i].cpu_id = (processorid_t)i; cpu_list[i].cpu_next = &(cpu_list[(i+1) % ncpu]); + LIST_INIT(&cpu_list[i].cpu_cyc_list); lck_rw_init(&cpu_list[i].cpu_ft_lock, dtrace_lck_grp, dtrace_lck_attr); } @@ -19868,4 +17663,3 @@ unregister_cpu_setup_func(cpu_setup_func_t *ignore1, void *ignore2) { #pragma unused(ignore1,ignore2) } -#endif /* __APPLE__ */ diff --git a/bsd/dev/dtrace/dtrace_glue.c b/bsd/dev/dtrace/dtrace_glue.c index 3e34344a7..d7588c0ca 100644 --- a/bsd/dev/dtrace/dtrace_glue.c +++ b/bsd/dev/dtrace/dtrace_glue.c @@ -66,6 +66,9 @@ #include #include /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */ +/* missing prototypes, not exported by Mach */ +extern kern_return_t task_suspend_internal(task_t); +extern kern_return_t task_resume_internal(task_t); /* * pid/proc @@ -83,7 +86,7 @@ sprlock(pid_t pid) return PROC_NULL; } - task_suspend(p->task); + task_suspend_internal(p->task); proc_lock(p); @@ -101,7 +104,7 @@ sprunlock(proc_t *p) proc_unlock(p); - task_resume(p->task); + task_resume_internal(p->task); proc_rele(p); } @@ -228,6 +231,7 @@ done: * cpuvar */ lck_mtx_t cpu_lock; +lck_mtx_t cyc_lock; lck_mtx_t mod_lock; dtrace_cpu_t *cpu_list; @@ -282,14 +286,64 @@ crgetuid(const cred_t *cr) { cred_t copy_cr = *cr; return kauth_cred_getuid(&cop */ typedef struct wrap_timer_call { - cyc_handler_t hdlr; - cyc_time_t when; - uint64_t deadline; - struct timer_call call; + /* node attributes */ + cyc_handler_t hdlr; + cyc_time_t when; + uint64_t deadline; + int cpuid; + boolean_t suspended; + struct timer_call call; + + /* next item in the linked list */ + LIST_ENTRY(wrap_timer_call) entries; } wrap_timer_call_t; -#define WAKEUP_REAPER 0x7FFFFFFFFFFFFFFFLL -#define NEARLY_FOREVER 0x7FFFFFFFFFFFFFFELL +#define WAKEUP_REAPER 0x7FFFFFFFFFFFFFFFLL +#define NEARLY_FOREVER 0x7FFFFFFFFFFFFFFELL + +/* CPU going online/offline notifications */ +void (*dtrace_cpu_state_changed_hook)(int, boolean_t) = NULL; +void dtrace_cpu_state_changed(int, boolean_t); + +void +dtrace_install_cpu_hooks(void) { + dtrace_cpu_state_changed_hook = dtrace_cpu_state_changed; +} + +void +dtrace_cpu_state_changed(int cpuid, boolean_t is_running) { +#pragma unused(cpuid) + wrap_timer_call_t *wrapTC = NULL; + boolean_t suspend = (is_running ? FALSE : TRUE); + dtrace_icookie_t s; + + /* Ensure that we're not going to leave the CPU */ + s = dtrace_interrupt_disable(); + assert(cpuid == cpu_number()); + + LIST_FOREACH(wrapTC, &(cpu_list[cpu_number()].cpu_cyc_list), entries) { + assert(wrapTC->cpuid == cpu_number()); + if (suspend) { + assert(!wrapTC->suspended); + /* If this fails, we'll panic anyway, so let's do this now. */ + if (!timer_call_cancel(&wrapTC->call)) + panic("timer_call_set_suspend() failed to cancel a timer call"); + wrapTC->suspended = TRUE; + } else { + /* Rearm the timer, but ensure it was suspended first. */ + assert(wrapTC->suspended); + clock_deadline_for_periodic_event(wrapTC->when.cyt_interval, mach_absolute_time(), + &wrapTC->deadline); + timer_call_enter1(&wrapTC->call, (void*) wrapTC, wrapTC->deadline, + TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); + wrapTC->suspended = FALSE; + } + + } + + /* Restore the previous interrupt state. */ + dtrace_interrupt_enable(s); +} static void _timer_call_apply_cyclic( void *ignore, void *vTChdl ) @@ -301,16 +355,13 @@ _timer_call_apply_cyclic( void *ignore, void *vTChdl ) clock_deadline_for_periodic_event( wrapTC->when.cyt_interval, mach_absolute_time(), &(wrapTC->deadline) ); timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL ); - - /* Did timer_call_remove_cyclic request a wakeup call when this timer call was re-armed? */ - if (wrapTC->when.cyt_interval == WAKEUP_REAPER) - thread_wakeup((event_t)wrapTC); } static cyclic_id_t timer_call_add_cyclic(wrap_timer_call_t *wrapTC, cyc_handler_t *handler, cyc_time_t *when) { uint64_t now; + dtrace_icookie_t s; timer_call_setup( &(wrapTC->call), _timer_call_apply_cyclic, NULL ); wrapTC->hdlr = *handler; @@ -322,25 +373,34 @@ timer_call_add_cyclic(wrap_timer_call_t *wrapTC, cyc_handler_t *handler, cyc_tim wrapTC->deadline = now; clock_deadline_for_periodic_event( wrapTC->when.cyt_interval, now, &(wrapTC->deadline) ); - timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL ); + + /* Insert the timer to the list of the running timers on this CPU, and start it. */ + s = dtrace_interrupt_disable(); + wrapTC->cpuid = cpu_number(); + LIST_INSERT_HEAD(&cpu_list[wrapTC->cpuid].cpu_cyc_list, wrapTC, entries); + timer_call_enter1(&wrapTC->call, (void*) wrapTC, wrapTC->deadline, + TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); + wrapTC->suspended = FALSE; + dtrace_interrupt_enable(s); return (cyclic_id_t)wrapTC; } +/* + * Executed on the CPU the timer is running on. + */ static void timer_call_remove_cyclic(cyclic_id_t cyclic) { wrap_timer_call_t *wrapTC = (wrap_timer_call_t *)cyclic; - while (!timer_call_cancel(&(wrapTC->call))) { - int ret = assert_wait(wrapTC, THREAD_UNINT); - ASSERT(ret == THREAD_WAITING); + assert(wrapTC); + assert(cpu_number() == wrapTC->cpuid); - wrapTC->when.cyt_interval = WAKEUP_REAPER; + if (!timer_call_cancel(&wrapTC->call)) + panic("timer_call_remove_cyclic() failed to cancel a timer call"); - ret = thread_block(THREAD_CONTINUE_NULL); - ASSERT(ret == THREAD_AWAKENED); - } + LIST_REMOVE(wrapTC, entries); } static void * @@ -366,7 +426,10 @@ cyclic_timer_remove(cyclic_id_t cyclic) { ASSERT( cyclic != CYCLIC_NONE ); - timer_call_remove_cyclic( cyclic ); + /* Removing a timer call must be done on the CPU the timer is running on. */ + wrap_timer_call_t *wrapTC = (wrap_timer_call_t *) cyclic; + dtrace_xcall(wrapTC->cpuid, (dtrace_xcall_t) timer_call_remove_cyclic, (void*) cyclic); + _FREE((void *)cyclic, M_TEMP); } @@ -420,11 +483,15 @@ _cyclic_remove_omni(cyclic_id_list_t cyc_list) t += sizeof(cyc_omni_handler_t); cyc_list = (cyclic_id_list_t)(uintptr_t)t; - cid = cyc_list[cpu_number()]; - oarg = timer_call_get_cyclic_arg(cid); - - timer_call_remove_cyclic( cid ); - (omni->cyo_offline)(omni->cyo_arg, CPU, oarg); + /* + * If the processor was offline when dtrace started, we did not allocate + * a cyclic timer for this CPU. + */ + if ((cid = cyc_list[cpu_number()]) != CYCLIC_NONE) { + oarg = timer_call_get_cyclic_arg(cid); + timer_call_remove_cyclic(cid); + (omni->cyo_offline)(omni->cyo_arg, CPU, oarg); + } } void @@ -795,23 +862,34 @@ dt_kmem_free(void *buf, size_t size) void* dt_kmem_alloc_aligned(size_t size, size_t align, int kmflag) { - void* buf; - intptr_t p; - void** buf_backup; + void *mem, **addr_to_free; + intptr_t mem_aligned; + size_t *size_to_free, hdr_size; - buf = dt_kmem_alloc(align + sizeof(void*) + size, kmflag); + /* Must be a power of two. */ + assert(align != 0); + assert((align & (align - 1)) == 0); - if(!buf) + /* + * We are going to add a header to the allocation. It contains + * the address to free and the total size of the buffer. + */ + hdr_size = sizeof(size_t) + sizeof(void*); + mem = dt_kmem_alloc(size + align + hdr_size, kmflag); + if (mem == NULL) return NULL; - p = (intptr_t)buf; - p += sizeof(void*); /* now we have enough room to store the backup */ - p = P2ROUNDUP(p, align); /* and now we're aligned */ + mem_aligned = (intptr_t) (((intptr_t) mem + align + hdr_size) & ~(align - 1)); + + /* Write the address to free in the header. */ + addr_to_free = (void**) (mem_aligned - sizeof(void*)); + *addr_to_free = mem; - buf_backup = (void**)(p - sizeof(void*)); - *buf_backup = buf; /* back up the address we need to free */ + /* Write the size to free in the header. */ + size_to_free = (size_t*) (mem_aligned - hdr_size); + *size_to_free = size + align + hdr_size; - return (void*)p; + return (void*) mem_aligned; } void* dt_kmem_zalloc_aligned(size_t size, size_t align, int kmflag) @@ -831,14 +909,14 @@ void* dt_kmem_zalloc_aligned(size_t size, size_t align, int kmflag) void dt_kmem_free_aligned(void* buf, size_t size) { #pragma unused(size) - intptr_t p; - void** buf_backup; + intptr_t ptr = (intptr_t) buf; + void **addr_to_free = (void**) (ptr - sizeof(void*)); + size_t *size_to_free = (size_t*) (ptr - (sizeof(size_t) + sizeof(void*))); - p = (intptr_t)buf; - p -= sizeof(void*); - buf_backup = (void**)(p); + if (buf == NULL) + return; - dt_kmem_free(*buf_backup, size + ((char*)buf - (char*)*buf_backup)); + dt_kmem_free(*addr_to_free, *size_to_free); } /* diff --git a/bsd/dev/dtrace/dtrace_subr.c b/bsd/dev/dtrace/dtrace_subr.c index c609fddec..aa8fb6c1a 100644 --- a/bsd/dev/dtrace/dtrace_subr.c +++ b/bsd/dev/dtrace/dtrace_subr.c @@ -34,13 +34,23 @@ #include #include #include +#include #include +#include +#include -#if defined(__APPLE__) -/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ -#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ +#if CONFIG_CSR +#include +#include #endif +/* + * APPLE NOTE: Solaris proc_t is the struct. + * Darwin's proc_t is a pointer to it. + */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ + + /* Copied from an arch specific dtrace_subr.c. */ int (*dtrace_fasttrap_probe_ptr)(struct regs *); @@ -49,14 +59,9 @@ int (*dtrace_fasttrap_probe_ptr)(struct regs *); * They're assigned in dtrace.c but Darwin never calls them. */ void (*dtrace_cpu_init)(processorid_t); -#if !defined(__APPLE__) -void (*dtrace_modload)(struct modctl *); -void (*dtrace_modunload)(struct modctl *); -#else int (*dtrace_modload)(struct kmod_info *, uint32_t); int (*dtrace_modunload)(struct kmod_info *); void (*dtrace_helpers_cleanup)(proc_t *); -#endif /*__APPLE__*/ void (*dtrace_helpers_fork)(proc_t *, proc_t *); void (*dtrace_cpustart_init)(void); void (*dtrace_cpustart_fini)(void); @@ -83,16 +88,96 @@ void (*dtrace_fasttrap_exit_ptr)(proc_t *); void dtrace_fasttrap_fork(proc_t *p, proc_t *cp) { -#if !defined(__APPLE__) - ASSERT(p->p_proc_flag & P_PR_LOCK); - ASSERT(p->p_dtrace_count > 0); -#endif /* __APPLE__ */ - if (dtrace_fasttrap_fork_ptr) { (*dtrace_fasttrap_fork_ptr)(p, cp); } } + +/* + * DTrace wait for process execution + * + * This feature is using a list of entries, each entry containing a pointer + * on a process description. The description is provided by a client, and it + * contains the command we want to wait for along with a reserved space for + * the caught process id. + * + * Once an awaited process has been spawned, it will be suspended before + * notifying the client. Once the client has been back to userland, it's its + * duty to resume the task. + */ + +lck_mtx_t dtrace_procwaitfor_lock; + +typedef struct dtrace_proc_awaited_entry { + struct dtrace_procdesc *pdesc; + LIST_ENTRY(dtrace_proc_awaited_entry) entries; +} dtrace_proc_awaited_entry_t; + +LIST_HEAD(listhead, dtrace_proc_awaited_entry) dtrace_proc_awaited_head + = LIST_HEAD_INITIALIZER(dtrace_proc_awaited_head); + +void (*dtrace_proc_waitfor_exec_ptr)(proc_t*) = NULL; + +static void +dtrace_proc_exec_notification(proc_t *p) { + dtrace_proc_awaited_entry_t *entry, *tmp; + + ASSERT(p); + ASSERT(p->p_pid != -1); + ASSERT(current_task() != p->task); + + lck_mtx_lock(&dtrace_procwaitfor_lock); + + /* + * For each entry, if it has not been matched with a process yet we + * try to match it with the newly created process. If they match, the + * entry is initialized with the process id and the process task is + * suspended. Finally, we wake up the client's waiting thread. + */ + LIST_FOREACH_SAFE(entry, &dtrace_proc_awaited_head, entries, tmp) { + if ((entry->pdesc->p_pid == -1) + && !strncmp(entry->pdesc->p_comm, &p->p_comm[0], sizeof(p->p_comm))) + { + entry->pdesc->p_pid = p->p_pid; + task_pidsuspend(p->task); + wakeup(entry); + } + } + + lck_mtx_unlock(&dtrace_procwaitfor_lock); +} + +int +dtrace_proc_waitfor(dtrace_procdesc_t* pdesc) { + dtrace_proc_awaited_entry_t entry; + int res; + + ASSERT(pdesc); + ASSERT(pdesc->p_comm); + + lck_mtx_lock(&dtrace_procwaitfor_lock); + + /* Initialize and insert the entry, then install the hook. */ + pdesc->p_pid = -1; + entry.pdesc = pdesc; + LIST_INSERT_HEAD(&dtrace_proc_awaited_head, &entry, entries); + dtrace_proc_waitfor_exec_ptr = &dtrace_proc_exec_notification; + + /* Sleep until the process has been executed */ + res = msleep(&entry, &dtrace_procwaitfor_lock, PCATCH, "dtrace_proc_waitfor", NULL); + + /* Remove the entry and the hook if it is not needed anymore. */ + LIST_REMOVE(&entry, entries); + if (LIST_EMPTY(&dtrace_proc_awaited_head)) + dtrace_proc_waitfor_exec_ptr = NULL; + + lck_mtx_unlock(&dtrace_procwaitfor_lock); + + return res; +} + + typedef struct dtrace_invop_hdlr { int (*dtih_func)(uintptr_t, uintptr_t *, uintptr_t); struct dtrace_invop_hdlr *dtih_next; @@ -155,3 +240,34 @@ dtrace_invop_remove(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) kmem_free(hdlr, sizeof (dtrace_invop_hdlr_t)); } +/* + * Check if DTrace has been restricted by the current security policy. + */ +boolean_t +dtrace_is_restricted(void) +{ +#if CONFIG_CSR + if (csr_check(CSR_ALLOW_UNRESTRICTED_DTRACE) != 0) + return TRUE; +#endif + + return FALSE; +} + +/* + * Check if the process can be attached. + */ +boolean_t +dtrace_can_attach_to_proc(proc_t *proc) +{ +#pragma unused(proc) + ASSERT(proc != NULL); + +#if CONFIG_CSR + if ((cs_entitlement_flags(proc) & CS_GET_TASK_ALLOW) == 0) + return FALSE; +#endif + + return TRUE; +} + diff --git a/bsd/dev/dtrace/fasttrap.c b/bsd/dev/dtrace/fasttrap.c index bbf6db52b..f8cbeb4c1 100644 --- a/bsd/dev/dtrace/fasttrap.c +++ b/bsd/dev/dtrace/fasttrap.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -147,16 +148,12 @@ static uint_t fasttrap_cleanup_work; */ static volatile uint64_t fasttrap_mod_gen; -#if !defined(__APPLE__) /* - * When the fasttrap provider is loaded, fasttrap_max is set to either - * FASTTRAP_MAX_DEFAULT or the value for fasttrap-max-probes in the - * fasttrap.conf file. Each time a probe is created, fasttrap_total is + * APPLE NOTE: When the fasttrap provider is loaded, fasttrap_max is computed + * base on system memory. Each time a probe is created, fasttrap_total is * incremented by the number of tracepoints that may be associated with that * probe; fasttrap_total is capped at fasttrap_max. */ -#define FASTTRAP_MAX_DEFAULT 2500000 -#endif static uint32_t fasttrap_max; static uint32_t fasttrap_total; @@ -179,10 +176,8 @@ static lck_mtx_t fasttrap_count_mtx; /* lock on ref count */ static int fasttrap_tracepoint_enable(proc_t *, fasttrap_probe_t *, uint_t); static void fasttrap_tracepoint_disable(proc_t *, fasttrap_probe_t *, uint_t); -#if defined(__APPLE__) static fasttrap_provider_t *fasttrap_provider_lookup(pid_t, fasttrap_provider_type_t, const char *, const dtrace_pattr_t *); -#endif static void fasttrap_provider_retire(pid_t, const char *, int); static void fasttrap_provider_free(fasttrap_provider_t *); @@ -194,11 +189,9 @@ static void fasttrap_proc_release(fasttrap_proc_t *); #define FASTTRAP_PROCS_INDEX(pid) ((pid) & fasttrap_procs.fth_mask) -#if defined(__APPLE__) - /* - * To save memory, some common memory allocations are given a - * unique zone. In example, dtrace_probe_t is 72 bytes in size, + * APPLE NOTE: To save memory, some common memory allocations are given + * a unique zone. For example, dtrace_probe_t is 72 bytes in size, * which means it would fall into the kalloc.128 bucket. With * 20k elements allocated, the space saved is substantial. */ @@ -206,7 +199,7 @@ static void fasttrap_proc_release(fasttrap_proc_t *); struct zone *fasttrap_tracepoint_t_zone; /* - * fasttrap_probe_t's are variable in size. Some quick profiling has shown + * APPLE NOTE: fasttrap_probe_t's are variable in size. Some quick profiling has shown * that the sweet spot for reducing memory footprint is covering the first * three sizes. Everything larger goes into the common pool. */ @@ -222,12 +215,11 @@ static const char *fasttrap_probe_t_zone_names[FASTTRAP_PROBE_T_ZONE_MAX_TRACEPO }; /* - * We have to manage locks explicitly + * APPLE NOTE: We have to manage locks explicitly */ lck_grp_t* fasttrap_lck_grp; lck_grp_attr_t* fasttrap_lck_grp_attr; lck_attr_t* fasttrap_lck_attr; -#endif static int fasttrap_highbit(ulong_t i) @@ -275,14 +267,14 @@ fasttrap_hash_str(const char *p) } /* - * FIXME - needs implementation + * APPLE NOTE: fasttrap_sigtrap not implemented */ void fasttrap_sigtrap(proc_t *p, uthread_t t, user_addr_t pc) { #pragma unused(p, t, pc) -#if 0 +#if !defined(__APPLE__) sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); sqp->sq_info.si_signo = SIGTRAP; @@ -295,7 +287,7 @@ fasttrap_sigtrap(proc_t *p, uthread_t t, user_addr_t pc) if (t != NULL) aston(t); -#endif +#endif /* __APPLE__ */ printf("fasttrap_sigtrap called with no implementation.\n"); } @@ -556,15 +548,14 @@ fasttrap_exec_exit(proc_t *p) * static probes are handled by the meta-provider remove entry point. */ fasttrap_provider_retire(p->p_pid, FASTTRAP_PID_NAME, 0); -#if defined(__APPLE__) + /* - * We also need to remove any aliased providers. + * APPLE NOTE: We also need to remove any aliased providers. * XXX optimization: track which provider types are instantiated * and only retire as needed. */ fasttrap_provider_retire(p->p_pid, FASTTRAP_OBJC_NAME, 0); fasttrap_provider_retire(p->p_pid, FASTTRAP_ONESHOT_NAME, 0); -#endif /* __APPLE__ */ /* * This should be called after it is no longer possible for a user @@ -1024,34 +1015,12 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) * USDT probes. Otherwise, the process is gone so bail. */ if ((p = sprlock(probe->ftp_pid)) == PROC_NULL) { -#if defined(__APPLE__) /* * APPLE NOTE: We should never end up here. The Solaris sprlock() * does not return process's with SIDL set, but we always return * the child process. */ return(0); -#else - - if ((curproc->p_flag & SFORKING) == 0) - return(0); - - lck_mtx_lock(&pidlock); - p = prfind(probe->ftp_pid); - - /* - * Confirm that curproc is indeed forking the process in which - * we're trying to enable probes. - */ - ASSERT(p != NULL); - //ASSERT(p->p_parent == curproc); - ASSERT(p->p_stat == SIDL); - - lck_mtx_lock(&p->p_lock); - lck_mtx_unlock(&pidlock); - - sprlock_proc(p); -#endif } /* @@ -1244,31 +1213,20 @@ fasttrap_pid_destroy(void *arg, dtrace_id_t id, void *parg) ASSERT(fasttrap_total >= probe->ftp_ntps); atomic_add_32(&fasttrap_total, -probe->ftp_ntps); -#if !defined(__APPLE__) - size_t size = offsetof(fasttrap_probe_t, ftp_tps[probe->ftp_ntps]); -#endif if (probe->ftp_gen + 1 >= fasttrap_mod_gen) fasttrap_mod_barrier(probe->ftp_gen); for (i = 0; i < probe->ftp_ntps; i++) { -#if !defined(__APPLE__) - kmem_free(probe->ftp_tps[i].fit_tp, sizeof (fasttrap_tracepoint_t)); -#else zfree(fasttrap_tracepoint_t_zone, probe->ftp_tps[i].fit_tp); -#endif } -#if !defined(__APPLE__) - kmem_free(probe, size); -#else if (probe->ftp_ntps < FASTTRAP_PROBE_T_ZONE_MAX_TRACEPOINTS) { zfree(fasttrap_probe_t_zones[probe->ftp_ntps], probe); } else { size_t size = offsetof(fasttrap_probe_t, ftp_tps[probe->ftp_ntps]); kmem_free(probe, size); } -#endif } @@ -1361,12 +1319,10 @@ fasttrap_proc_lookup(pid_t pid) } } -#if defined(__APPLE__) /* - * We have to initialize all locks explicitly + * APPLE NOTE: We have to initialize all locks explicitly */ lck_mtx_init(&new_fprc->ftpc_mtx, fasttrap_lck_grp, fasttrap_lck_attr); -#endif new_fprc->ftpc_next = bucket->ftb_data; bucket->ftb_data = new_fprc; @@ -1421,13 +1377,11 @@ fasttrap_proc_release(fasttrap_proc_t *proc) lck_mtx_unlock(&bucket->ftb_mtx); -#if defined(__APPLE__) /* - * Apple explicit lock management. Not 100% certain we need this, the + * APPLE NOTE: explicit lock management. Not 100% certain we need this, the * memory is freed even without the destroy. Maybe accounting cleanup? */ lck_mtx_destroy(&fprc->ftpc_mtx, fasttrap_lck_grp); -#endif kmem_free(fprc, sizeof (fasttrap_proc_t)); } @@ -1438,11 +1392,9 @@ fasttrap_proc_release(fasttrap_proc_t *proc) * if it doesn't exist otherwise it returns NULL. The provider is returned * with its lock held. */ -#if defined(__APPLE__) static fasttrap_provider_t * fasttrap_provider_lookup(pid_t pid, fasttrap_provider_type_t provider_type, const char *name, const dtrace_pattr_t *pattr) -#endif /* __APPLE__ */ { fasttrap_provider_t *fp, *new_fp = NULL; fasttrap_bucket_t *bucket; @@ -1461,9 +1413,7 @@ fasttrap_provider_lookup(pid_t pid, fasttrap_provider_type_t provider_type, cons */ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { if (fp->ftp_pid == pid && -#if defined(__APPLE__) fp->ftp_provider_type == provider_type && -#endif /* __APPLE__ */ strncmp(fp->ftp_name, name, sizeof(fp->ftp_name)) == 0 && !fp->ftp_retired) { lck_mtx_lock(&fp->ftp_mtx); @@ -1502,36 +1452,27 @@ fasttrap_provider_lookup(pid_t pid, fasttrap_provider_type_t provider_type, cons /* * Grab the credentials for this process so we have * something to pass to dtrace_register(). + * APPLE NOTE: We have no equivalent to crhold, + * even though there is a cr_ref filed in ucred. */ -#if !defined(__APPLE__) - mutex_enter(&p->p_crlock); - crhold(p->p_cred); - cred = p->p_cred; - mutex_exit(&p->p_crlock); - mutex_exit(&p->p_lock); -#else - // lck_mtx_lock(&p->p_crlock); - // Seems like OS X has no equivalent to crhold, even though it has a cr_ref field in ucred + // lck_mtx_lock(&p->p_crlock; crhold(p->p_ucred); cred = p->p_ucred; // lck_mtx_unlock(&p->p_crlock); proc_unlock(p); proc_rele(p); -#endif /* __APPLE__ */ new_fp = kmem_zalloc(sizeof (fasttrap_provider_t), KM_SLEEP); ASSERT(new_fp != NULL); new_fp->ftp_pid = pid; new_fp->ftp_proc = fasttrap_proc_lookup(pid); -#if defined(__APPLE__) new_fp->ftp_provider_type = provider_type; /* - * Apple locks require explicit init. + * APPLE NOTE: locks require explicit init */ lck_mtx_init(&new_fp->ftp_mtx, fasttrap_lck_grp, fasttrap_lck_attr); lck_mtx_init(&new_fp->ftp_cmtx, fasttrap_lck_grp, fasttrap_lck_attr); -#endif /* __APPLE__ */ ASSERT(new_fp->ftp_proc != NULL); @@ -1609,14 +1550,12 @@ fasttrap_provider_free(fasttrap_provider_t *provider) fasttrap_proc_release(provider->ftp_proc); -#if defined(__APPLE__) /* - * Apple explicit lock management. Not 100% certain we need this, the + * APPLE NOTE: explicit lock management. Not 100% certain we need this, the * memory is freed even without the destroy. Maybe accounting cleanup? */ lck_mtx_destroy(&provider->ftp_mtx, fasttrap_lck_grp); lck_mtx_destroy(&provider->ftp_cmtx, fasttrap_lck_grp); -#endif kmem_free(provider, sizeof (fasttrap_provider_t)); @@ -1731,9 +1670,7 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) if (pdata->ftps_noffs == 0) return (EINVAL); -#if defined(__APPLE__) switch (pdata->ftps_probe_type) { -#endif case DTFTP_ENTRY: name = "entry"; aframes = FASTTRAP_ENTRY_AFRAMES; @@ -1750,7 +1687,6 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) return (EINVAL); } -#if defined(__APPLE__) const char* provider_name; switch (pdata->ftps_provider_type) { case DTFTP_PROVIDER_PID: @@ -1769,7 +1705,6 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) if ((provider = fasttrap_provider_lookup(pdata->ftps_pid, pdata->ftps_provider_type, provider_name, &pid_attr)) == NULL) return (ESRCH); -#endif /* __APPLE__ */ /* * Increment this reference count to indicate that a consumer is @@ -1807,13 +1742,8 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) goto no_mem; } -#if !defined(__APPLE__) - pp = kmem_zalloc(sizeof (fasttrap_probe_t), KM_SLEEP); - ASSERT(pp != NULL); -#else pp = zalloc(fasttrap_probe_t_zones[1]); bzero(pp, sizeof (fasttrap_probe_t)); -#endif pp->ftp_prov = provider; pp->ftp_faddr = pdata->ftps_pc; @@ -1821,12 +1751,8 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) pp->ftp_pid = pdata->ftps_pid; pp->ftp_ntps = 1; -#if !defined(__APPLE__) - tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); -#else tp = zalloc(fasttrap_tracepoint_t_zone); bzero(tp, sizeof (fasttrap_tracepoint_t)); -#endif tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc; @@ -1835,9 +1761,7 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) pp->ftp_tps[0].fit_tp = tp; pp->ftp_tps[0].fit_id.fti_probe = pp; -#if defined(__APPLE__) pp->ftp_tps[0].fit_id.fti_ptype = pdata->ftps_probe_type; -#endif pp->ftp_id = dtrace_probe_create(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name_str, FASTTRAP_OFFSET_AFRAMES, pp); @@ -1868,18 +1792,12 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) } ASSERT(pdata->ftps_noffs > 0); -#if !defined(__APPLE__) - pp = kmem_zalloc(offsetof(fasttrap_probe_t, - ftp_tps[pdata->ftps_noffs]), KM_SLEEP); - ASSERT(pp != NULL); -#else if (pdata->ftps_noffs < FASTTRAP_PROBE_T_ZONE_MAX_TRACEPOINTS) { pp = zalloc(fasttrap_probe_t_zones[pdata->ftps_noffs]); bzero(pp, offsetof(fasttrap_probe_t, ftp_tps[pdata->ftps_noffs])); } else { pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[pdata->ftps_noffs]), KM_SLEEP); } -#endif pp->ftp_prov = provider; pp->ftp_faddr = pdata->ftps_pc; @@ -1888,22 +1806,15 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) pp->ftp_ntps = pdata->ftps_noffs; for (i = 0; i < pdata->ftps_noffs; i++) { -#if !defined(__APPLE__) - tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); -#else tp = zalloc(fasttrap_tracepoint_t_zone); bzero(tp, sizeof (fasttrap_tracepoint_t)); -#endif - tp->ftt_proc = provider->ftp_proc; tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc; tp->ftt_pid = pdata->ftps_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; -#if defined(__APPLE__) pp->ftp_tps[i].fit_id.fti_ptype = pdata->ftps_probe_type; -#endif } pp->ftp_id = dtrace_probe_create(provider->ftp_provid, @@ -1974,9 +1885,9 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) FASTTRAP_PID_NAME); return (NULL); } -#if defined(__APPLE__) + /* - * We also need to check the other pid provider types + * APPLE NOTE: We also need to check the objc and oneshot pid provider types */ if (strncmp(dhpv->dthpv_provname, FASTTRAP_OBJC_NAME, sizeof(FASTTRAP_OBJC_NAME)) == 0) { cmn_err(CE_WARN, "failed to instantiate provider %s: " @@ -1990,7 +1901,6 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) FASTTRAP_ONESHOT_NAME); return (NULL); } -#endif /* __APPLE__ */ /* * The highest stability class that fasttrap supports is ISA; cap @@ -2007,7 +1917,6 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) if (dhpv->dthpv_pattr.dtpa_args.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_args.dtat_class = DTRACE_CLASS_ISA; -#if defined(__APPLE__) if ((provider = fasttrap_provider_lookup(pid, DTFTP_PROVIDER_USDT, dhpv->dthpv_provname, &dhpv->dthpv_pattr)) == NULL) { cmn_err(CE_WARN, "failed to instantiate provider %s for " @@ -2031,9 +1940,9 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) * for that case. * * UPDATE: It turns out there are several use cases that require adding - * probes to existing providers. Disabling this optimization for now... + * probes to existing providers. Disabling the dtrace_probe_lookup() + * optimization for now. See APPLE NOTE in fasttrap_meta_create_probe. */ -#endif /* __APPLE__ */ /* * Up the meta provider count so this provider isn't removed until @@ -2090,7 +1999,7 @@ fasttrap_meta_create_probe(void *arg, void *parg, */ lck_mtx_lock(&provider->ftp_cmtx); -#if !defined(__APPLE__) +#if 0 /* * APPLE NOTE: This is hideously expensive. See note in * fasttrap_meta_provide() for why we can get away without @@ -2114,17 +2023,12 @@ fasttrap_meta_create_probe(void *arg, void *parg, return; } -#if !defined(__APPLE__) - pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[ntps]), KM_SLEEP); - ASSERT(pp != NULL); -#else if (ntps < FASTTRAP_PROBE_T_ZONE_MAX_TRACEPOINTS) { pp = zalloc(fasttrap_probe_t_zones[ntps]); bzero(pp, offsetof(fasttrap_probe_t, ftp_tps[ntps])); } else { pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[ntps]), KM_SLEEP); } -#endif pp->ftp_prov = provider; pp->ftp_pid = provider->ftp_pid; @@ -2137,15 +2041,11 @@ fasttrap_meta_create_probe(void *arg, void *parg, * First create a tracepoint for each actual point of interest. */ for (i = 0; i < dhpb->dthpb_noffs; i++) { -#if !defined(__APPLE__) - tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); -#else tp = zalloc(fasttrap_tracepoint_t_zone); bzero(tp, sizeof (fasttrap_tracepoint_t)); -#endif tp->ftt_proc = provider->ftp_proc; -#if defined(__APPLE__) + /* * APPLE NOTE: We have linker support when creating DOF to handle all relocations for us. * Unfortunately, a side effect of this is that the relocations do not point at exactly @@ -2160,33 +2060,22 @@ fasttrap_meta_create_probe(void *arg, void *parg, #error "Architecture not supported" #endif -#else - tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_offs[i]; -#endif tp->ftt_pid = provider->ftp_pid; pp->ftp_tps[i].fit_tp = tp; pp->ftp_tps[i].fit_id.fti_probe = pp; -#ifdef __sparc - pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_POST_OFFSETS; -#else pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_OFFSETS; -#endif } /* * Then create a tracepoint for each is-enabled point. */ for (j = 0; i < ntps; i++, j++) { -#if !defined(__APPLE__) - tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP); -#else tp = zalloc(fasttrap_tracepoint_t_zone); bzero(tp, sizeof (fasttrap_tracepoint_t)); -#endif tp->ftt_proc = provider->ftp_proc; -#if defined(__APPLE__) + /* * APPLE NOTE: We have linker support when creating DOF to handle all relocations for us. * Unfortunately, a side effect of this is that the relocations do not point at exactly @@ -2201,9 +2090,6 @@ fasttrap_meta_create_probe(void *arg, void *parg, #error "Architecture not supported" #endif -#else - tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_enoffs[j]; -#endif tp->ftt_pid = provider->ftp_pid; pp->ftp_tps[i].fit_tp = tp; @@ -2252,6 +2138,30 @@ static dtrace_mops_t fasttrap_mops = { fasttrap_meta_remove }; +/* + * Validate a null-terminated string. If str is not null-terminated, + * or not a UTF8 valid string, the function returns -1. Otherwise, 0 is + * returned. + * + * str: string to validate. + * maxlen: maximal length of the string, null-terminated byte included. + */ +static int +fasttrap_validatestr(char const* str, size_t maxlen) { + size_t len; + + assert(str); + assert(maxlen != 0); + + /* Check if the string is null-terminated. */ + len = strnlen(str, maxlen); + if (len >= maxlen) + return -1; + + /* Finally, check for UTF8 validity. */ + return utf8_validatestr((unsigned const char*) str, len); +} + /*ARGSUSED*/ static int fasttrap_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv) @@ -2263,9 +2173,8 @@ fasttrap_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int * if (cmd == FASTTRAPIOC_MAKEPROBE) { fasttrap_probe_spec_t *probe; uint64_t noffs; - size_t size, i; + size_t size; int ret; - char *c; if (copyin(arg + __offsetof(fasttrap_probe_spec_t, ftps_noffs), &noffs, sizeof (probe->ftps_noffs))) @@ -2299,24 +2208,13 @@ fasttrap_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int * * Verify that the function and module strings contain no * funny characters. */ - for (i = 0, c = &probe->ftps_func[0]; i < sizeof(probe->ftps_func) && *c != '\0'; i++, c++) { - if (*c < 0x20 || 0x7f <= *c) { - ret = EINVAL; - goto err; - } - } - if (*c != '\0') { + + if (fasttrap_validatestr(probe->ftps_func, sizeof(probe->ftps_func)) != 0) { ret = EINVAL; goto err; } - for (i = 0, c = &probe->ftps_mod[0]; i < sizeof(probe->ftps_mod) && *c != '\0'; i++, c++) { - if (*c < 0x20 || 0x7f <= *c) { - ret = EINVAL; - goto err; - } - } - if (*c != '\0') { + if (fasttrap_validatestr(probe->ftps_mod, sizeof(probe->ftps_mod)) != 0) { ret = EINVAL; goto err; } @@ -2440,18 +2338,15 @@ fasttrap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_fasttrap_exit_ptr = &fasttrap_exec_exit; dtrace_fasttrap_exec_ptr = &fasttrap_exec_exit; -#if !defined(__APPLE__) - fasttrap_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, - "fasttrap-max-probes", FASTTRAP_MAX_DEFAULT); -#else /* - * We're sizing based on system memory. 100k probes per 256M of system memory. + * APPLE NOTE: We size the maximum number of fasttrap probes + * based on system memory. 100k probes per 256M of system memory. * Yes, this is a WAG. */ fasttrap_max = (sane_size >> 28) * 100000; if (fasttrap_max == 0) fasttrap_max = 50000; -#endif + fasttrap_total = 0; /* @@ -2472,15 +2367,14 @@ fasttrap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) fasttrap_tpoints.fth_table = kmem_zalloc(fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t), KM_SLEEP); ASSERT(fasttrap_tpoints.fth_table != NULL); -#if defined(__APPLE__) + /* - * We have to explicitly initialize all locks... + * APPLE NOTE: explicitly initialize all locks... */ unsigned int i; for (i=0; i */ struct savearea_t; /* Used anonymously */ +#if defined(__x86_64__) typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, __unused int); extern perfCallback tempDTraceTrapHook; extern kern_return_t fbt_perfCallback(int, struct savearea_t *, uintptr_t *, __unused int); +#else +#error Unknown architecture +#endif #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) #define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */ @@ -250,6 +254,9 @@ fbt_resume(void *arg, dtrace_id_t id, void *parg) dtrace_membar_consumer(); } +/* + * APPLE NOTE: fbt_getargdesc not implemented + */ #if !defined(__APPLE__) /*ARGSUSED*/ static void @@ -362,11 +369,7 @@ static dtrace_pops_t fbt_pops = { fbt_disable, fbt_suspend, fbt_resume, -#if !defined(__APPLE__) - fbt_getargdesc, -#else - NULL, /* FIXME: where to look for xnu? */ -#endif /* __APPLE__ */ + NULL, /* APPLE NOTE: fbt_getargdesc not implemented */ NULL, NULL, fbt_destroy @@ -403,15 +406,6 @@ fbt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_invop_add(fbt_invop); -#if !defined(__APPLE__) - if (ddi_create_minor_node(devi, "fbt", S_IFCHR, 0, - DDI_PSEUDO, NULL) == DDI_FAILURE || - dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_KERNEL, NULL, - &fbt_pops, NULL, &fbt_id) != 0) { - fbt_cleanup(devi); - return (DDI_FAILURE); - } -#else if (ddi_create_minor_node(devi, "fbt", S_IFCHR, 0, DDI_PSEUDO, 0) == DDI_FAILURE || dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_KERNEL, NULL, @@ -419,7 +413,6 @@ fbt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) fbt_cleanup(devi); return (DDI_FAILURE); } -#endif /* __APPLE__ */ ddi_report_dev(devi); fbt_devi = devi; diff --git a/bsd/dev/dtrace/lockstat.c b/bsd/dev/dtrace/lockstat.c index 508f2ea19..4417a812a 100644 --- a/bsd/dev/dtrace/lockstat.c +++ b/bsd/dev/dtrace/lockstat.c @@ -171,13 +171,14 @@ typedef struct lockstat_assembly_probe { /* - * Hot patch switches back and forth the probe points between NOP and RET. - * The active argument indicates whether the probe point will turn on or off. + * APPLE NOTE: + * Hot patch is used to manipulate probe points by swapping between + * no-op and return instructions. + * The active flag indicates whether the probe point will turn on or off. * on == plant a NOP and thus fall through to the probe call * off == plant a RET and thus avoid the probe call completely - * The lsap_probe identifies which probe we will patch. + * The ls_probe identifies which probe we will patch. */ -#if defined(__APPLE__) static void lockstat_hot_patch(boolean_t active, int ls_probe) { @@ -200,14 +201,15 @@ void lockstat_hot_patch(boolean_t active, int ls_probe) #endif } /* for */ } -#endif /* __APPLE__*/ - void (*lockstat_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); -#if defined(__APPLE__) -/* This wrapper is used by arm assembler hot patched probes */ + +/* + * APPLE NOTE: + * This wrapper is used only by assembler hot patched probes. + */ void lockstat_probe_wrapper(int probe, uintptr_t lp, int rwflag) { @@ -218,8 +220,6 @@ lockstat_probe_wrapper(int probe, uintptr_t lp, int rwflag) (*lockstat_probe)(id, (uintptr_t)lp, (uint64_t)rwflag, 0,0,0); } } -#endif /* __APPLE__ */ - static dev_info_t *lockstat_devi; /* saved in xxattach() for xxinfo() */ static dtrace_provider_id_t lockstat_id; diff --git a/bsd/dev/dtrace/profile_prvd.c b/bsd/dev/dtrace/profile_prvd.c index 60ebf9bd7..eb5ada1bb 100644 --- a/bsd/dev/dtrace/profile_prvd.c +++ b/bsd/dev/dtrace/profile_prvd.c @@ -25,21 +25,6 @@ /* #pragma ident "@(#)profile.c 1.7 07/01/10 SMI" */ -#if !defined(__APPLE__) -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#else #ifdef KERNEL #ifndef _KERNEL #define _KERNEL /* Solaris vs. Darwin */ @@ -77,7 +62,6 @@ extern x86_saved_state_t *find_kern_regs(thread_t); #define ASSERT(x) do {} while(0) extern void profile_init(void); -#endif /* __APPLE__ */ static dev_info_t *profile_devi; static dtrace_provider_id_t profile_id; @@ -176,16 +160,12 @@ profile_fire(void *arg) late = dtrace_gethrtime() - pcpu->profc_expected; pcpu->profc_expected += pcpu->profc_interval; -#if !defined(__APPLE__) - dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, - CPU->cpu_profile_upc, late, 0, 0); -#else #if defined(__x86_64__) x86_saved_state_t *kern_regs = find_kern_regs(current_thread()); if (NULL != kern_regs) { /* Kernel was interrupted. */ - dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, 0, 0, 0); + dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, late, 0, 0); } else { pal_register_cache_state(current_thread(), VALID); @@ -195,21 +175,20 @@ profile_fire(void *arg) if (NULL == tagged_regs) { /* Too bad, so sad, no useful interrupt state. */ dtrace_probe(prof->prof_id, 0xcafebabe, - 0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */ + 0x0, late, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */ } else if (is_saved_state64(tagged_regs)) { x86_saved_state64_t *regs = saved_state64(tagged_regs); - dtrace_probe(prof->prof_id, 0x0, regs->isf.rip, 0, 0, 0); + dtrace_probe(prof->prof_id, 0x0, regs->isf.rip, late, 0, 0); } else { x86_saved_state32_t *regs = saved_state32(tagged_regs); - dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0); + dtrace_probe(prof->prof_id, 0x0, regs->eip, late, 0, 0); } } #else #error Unknown architecture #endif -#endif /* __APPLE__ */ } static void @@ -217,10 +196,6 @@ profile_tick(void *arg) { profile_probe_t *prof = arg; -#if !defined(__APPLE__) - dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, - CPU->cpu_profile_upc, 0, 0, 0); -#else #if defined(__x86_64__) x86_saved_state_t *kern_regs = find_kern_regs(current_thread()); @@ -249,7 +224,6 @@ profile_tick(void *arg) #else #error Unknown architecture #endif -#endif /* __APPLE__ */ } static void @@ -269,14 +243,11 @@ profile_create(hrtime_t interval, const char *name, int kind) return; } -#if !defined(__APPLE__) - prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); -#else if (PROF_TICK == kind) prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); else prof = kmem_zalloc(sizeof (profile_probe_t) + NCPU*sizeof(profile_probe_percpu_t), KM_SLEEP); -#endif /* __APPLE__ */ + (void) strlcpy(prof->prof_name, name, sizeof(prof->prof_name)); prof->prof_interval = interval; prof->prof_cyclic = CYCLIC_NONE; @@ -295,38 +266,6 @@ profile_provide(void *arg, const dtrace_probedesc_t *desc) hrtime_t val = 0, mult = 1, len; const char *name, *suffix = NULL; -#if !defined(__APPLE__) - const struct { - char *prefix; - int kind; - } types[] = { - { PROF_PREFIX_PROFILE, PROF_PROFILE }, - { PROF_PREFIX_TICK, PROF_TICK }, - { NULL, NULL } - }; - - const struct { - char *name; - hrtime_t mult; - } suffixes[] = { - { "ns", NANOSEC / NANOSEC }, - { "nsec", NANOSEC / NANOSEC }, - { "us", NANOSEC / MICROSEC }, - { "usec", NANOSEC / MICROSEC }, - { "ms", NANOSEC / MILLISEC }, - { "msec", NANOSEC / MILLISEC }, - { "s", NANOSEC / SEC }, - { "sec", NANOSEC / SEC }, - { "m", NANOSEC * (hrtime_t)60 }, - { "min", NANOSEC * (hrtime_t)60 }, - { "h", NANOSEC * (hrtime_t)(60 * 60) }, - { "hour", NANOSEC * (hrtime_t)(60 * 60) }, - { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, - { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, - { "hz", 0 }, - { NULL } - }; -#else const struct { const char *prefix; int kind; @@ -357,8 +296,6 @@ profile_provide(void *arg, const dtrace_probedesc_t *desc) { "hz", 0 }, { NULL, 0 } }; -#endif /* __APPLE__ */ - if (desc == NULL) { char n[PROF_NAMELEN]; @@ -366,11 +303,7 @@ profile_provide(void *arg, const dtrace_probedesc_t *desc) /* * If no description was provided, provide all of our probes. */ -#if !defined(__APPLE__) - for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) { -#else for (i = 0; i < (int)(sizeof (profile_rates) / sizeof (int)); i++) { -#endif /* __APPLE__ */ if ((rate = profile_rates[i]) == 0) continue; @@ -379,11 +312,7 @@ profile_provide(void *arg, const dtrace_probedesc_t *desc) profile_create(NANOSEC / rate, n, PROF_PROFILE); } -#if !defined(__APPLE__) - for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) { -#else for (i = 0; i < (int)(sizeof (profile_ticks) / sizeof (int)); i++) { -#endif /* __APPLE__ */ if ((rate = profile_ticks[i]) == 0) continue; @@ -440,17 +369,11 @@ profile_provide(void *arg, const dtrace_probedesc_t *desc) * Look-up the suffix to determine the multiplier. */ for (i = 0, mult = 0; suffixes[i].name != NULL; i++) { -#if !defined(__APPLE__) - if (strcasecmp(suffixes[i].name, suffix) == 0) { - mult = suffixes[i].mult; - break; - } -#else + /* APPLE NOTE: Darwin employs size bounded string operations */ if (strncasecmp(suffixes[i].name, suffix, strlen(suffixes[i].name) + 1) == 0) { mult = suffixes[i].mult; break; } -#endif /* __APPLE__ */ } if (suffixes[i].name == NULL && *suffix != '\0') @@ -476,14 +399,11 @@ profile_destroy(void *arg, dtrace_id_t id, void *parg) profile_probe_t *prof = parg; ASSERT(prof->prof_cyclic == CYCLIC_NONE); -#if !defined(__APPLE__) - kmem_free(prof, sizeof (profile_probe_t)); -#else + if (prof->prof_kind == PROF_TICK) kmem_free(prof, sizeof (profile_probe_t)); else kmem_free(prof, sizeof (profile_probe_t) + NCPU*sizeof(profile_probe_percpu_t)); -#endif /* __APPLE__ */ ASSERT(profile_total >= 1); atomic_add_32(&profile_total, -1); @@ -497,11 +417,7 @@ profile_online(void *arg, dtrace_cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *wh profile_probe_t *prof = arg; profile_probe_percpu_t *pcpu; -#if !defined(__APPLE__) - pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP); -#else pcpu = ((profile_probe_percpu_t *)(&(prof[1]))) + cpu_number(); -#endif /* __APPLE__ */ pcpu->profc_probe = prof; hdlr->cyh_func = profile_fire; @@ -509,11 +425,7 @@ profile_online(void *arg, dtrace_cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *wh hdlr->cyh_level = CY_HIGH_LEVEL; when->cyt_interval = prof->prof_interval; -#if !defined(__APPLE__) when->cyt_when = dtrace_gethrtime() + when->cyt_interval; -#else - when->cyt_when = 0; -#endif /* __APPLE__ */ pcpu->profc_expected = when->cyt_when; pcpu->profc_interval = when->cyt_interval; @@ -526,11 +438,7 @@ profile_offline(void *arg, dtrace_cpu_t *cpu, void *oarg) profile_probe_percpu_t *pcpu = oarg; ASSERT(pcpu->profc_probe == arg); -#if !defined(__APPLE__) - kmem_free(pcpu, sizeof (profile_probe_percpu_t)); -#else #pragma unused(pcpu,arg,cpu) /* __APPLE__ */ -#endif /* __APPLE__ */ } /*ARGSUSED*/ @@ -564,19 +472,12 @@ profile_enable(void *arg, dtrace_id_t id, void *parg) omni.cyo_arg = prof; } -#if !defined(__APPLE__) - if (prof->prof_kind == PROF_TICK) { - prof->prof_cyclic = cyclic_add(&hdlr, &when); - } else { - prof->prof_cyclic = cyclic_add_omni(&omni); - } -#else if (prof->prof_kind == PROF_TICK) { prof->prof_cyclic = cyclic_timer_add(&hdlr, &when); } else { prof->prof_cyclic = (cyclic_id_t)cyclic_add_omni(&omni); /* cast puns cyclic_id_list_t with cyclic_id_t */ } -#endif /* __APPLE__ */ + return(0); } @@ -589,34 +490,24 @@ profile_disable(void *arg, dtrace_id_t id, void *parg) ASSERT(prof->prof_cyclic != CYCLIC_NONE); ASSERT(MUTEX_HELD(&cpu_lock)); -#if !defined(__APPLE__) - cyclic_remove(prof->prof_cyclic); -#else #pragma unused(arg,id) if (prof->prof_kind == PROF_TICK) { cyclic_timer_remove(prof->prof_cyclic); } else { cyclic_remove_omni((cyclic_id_list_t)prof->prof_cyclic); /* cast puns cyclic_id_list_t with cyclic_id_t */ } -#endif /* __APPLE__ */ prof->prof_cyclic = CYCLIC_NONE; } -#if !defined(__APPLE__) -/*ARGSUSED*/ -static int -profile_usermode(void *arg, dtrace_id_t id, void *parg) -{ - return (CPU->cpu_profile_pc == 0); -} -#else +/* + * APPLE NOTE: profile_usermode call not supported. + */ static int profile_usermode(void *arg, dtrace_id_t id, void *parg) { #pragma unused(arg,id,parg) return 1; /* XXX_BOGUS */ } -#endif /* __APPLE__ */ static dtrace_pattr_t profile_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, @@ -651,19 +542,6 @@ profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_FAILURE); } -#if !defined(__APPLE__) - if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0, - DDI_PSEUDO, NULL) == DDI_FAILURE || - dtrace_register("profile", &profile_attr, - DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL, - &profile_pops, NULL, &profile_id) != 0) { - ddi_remove_minor_node(devi, NULL); - return (DDI_FAILURE); - } - - profile_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, - "profile-max-probes", PROFILE_MAX_DEFAULT); -#else if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0, DDI_PSEUDO, 0) == DDI_FAILURE || dtrace_register("profile", &profile_attr, @@ -674,13 +552,15 @@ profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) } profile_max = PROFILE_MAX_DEFAULT; -#endif /* __APPLE__ */ ddi_report_dev(devi); profile_devi = devi; return (DDI_SUCCESS); } +/* + * APPLE NOTE: profile_detach not implemented + */ #if !defined(__APPLE__) static int profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) @@ -700,100 +580,8 @@ profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) ddi_remove_minor_node(devi, NULL); return (DDI_SUCCESS); } +#endif /* __APPLE__ */ -/*ARGSUSED*/ -static int -profile_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int error; - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = (void *)profile_devi; - error = DDI_SUCCESS; - break; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - error = DDI_SUCCESS; - break; - default: - error = DDI_FAILURE; - } - return (error); -} - -/*ARGSUSED*/ -static int -profile_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) -{ - return (0); -} - -static struct cb_ops profile_cb_ops = { - profile_open, /* open */ - nodev, /* close */ - nulldev, /* strategy */ - nulldev, /* print */ - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - nodev, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* cb_prop_op */ - 0, /* streamtab */ - D_NEW | D_MP /* Driver compatibility flag */ -}; - -static struct dev_ops profile_ops = { - DEVO_REV, /* devo_rev, */ - 0, /* refcnt */ - profile_info, /* get_dev_info */ - nulldev, /* identify */ - nulldev, /* probe */ - profile_attach, /* attach */ - profile_detach, /* detach */ - nodev, /* reset */ - &profile_cb_ops, /* driver operations */ - NULL, /* bus operations */ - nodev /* dev power */ -}; - -/* - * Module linkage information for the kernel. - */ -static struct modldrv modldrv = { - &mod_driverops, /* module type (this is a pseudo driver) */ - "Profile Interrupt Tracing", /* name of module */ - &profile_ops, /* driver ops */ -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modldrv, - NULL -}; - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -int -_fini(void) -{ - return (mod_remove(&modlinkage)); -} -#else d_open_t _profile_open; int _profile_open(dev_t dev, int flags, int devtype, struct proc *p) @@ -847,4 +635,3 @@ void profile_init( void ) panic("profile_init: called twice!\n"); } #undef PROFILE_MAJOR -#endif /* __APPLE__ */ diff --git a/bsd/dev/dtrace/scripts/Makefile b/bsd/dev/dtrace/scripts/Makefile index 532a8699f..79b907e00 100644 --- a/bsd/dev/dtrace/scripts/Makefile +++ b/bsd/dev/dtrace/scripts/Makefile @@ -18,7 +18,7 @@ INSTALL_DTRACE_SCRIPTS_LIST = \ tcp.d \ unistd.d -ifeq ($(PLATFORM),iPhoneOS) +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) INSTALL_DTRACE_SCRIPTS_LIST += mptcp.d endif @@ -30,7 +30,7 @@ $(INSTALL_DTRACE_SCRIPTS_FILES): $(DSTROOT)/$(INSTALL_DTRACE_SCRIPTS_DIR)/% : % @echo INSTALL $(@F) $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ -do_build_install_primary:: $(INSTALL_DTRACE_SCRIPTS_FILES) +do_textfiles_install:: $(INSTALL_DTRACE_SCRIPTS_FILES) include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/dev/dtrace/scripts/darwin.d b/bsd/dev/dtrace/scripts/darwin.d index 2a2cf933e..138334605 100644 --- a/bsd/dev/dtrace/scripts/darwin.d +++ b/bsd/dev/dtrace/scripts/darwin.d @@ -39,6 +39,24 @@ inline uint32_t THREAD_TAG_MAINTHREAD = 0x1; inline uint32_t THREAD_TAG_CALLOUT = 0x2; inline uint32_t THREAD_TAG_IOWORKLOOP = 0x4; +/* + * mach thread scheduler state + */ +inline int TH_WAIT = 0x01; +#pragma D binding "1.0" TH_WAIT +inline int TH_SUSP = 0x02; +#pragma D binding "1.0" TH_SUSP +inline int TH_RUN = 0x04; +#pragma D binding "1.0" TH_RUN +inline int TH_UNINT = 0x08; +#pragma D binding "1.0" TH_UNINT +inline int TH_TERMINATE = 0x10; +#pragma D binding "1.0" TH_TERMINATE +inline int TH_TERMINATE2 = 0x20; +#pragma D binding "1.0" TH_TERMINATE2 +inline int TH_IDLE = 0x80; +#pragma D binding "1.0" TH_IDLE + /* * The following miscellaneous constants are used by the proc(4) translators * defined below. @@ -277,6 +295,7 @@ typedef struct lwpsinfo { short pr_syscall; /* system call number (if in syscall) */ int pr_pri; /* priority, high value is high priority */ char pr_clname[8]; /* scheduling class name */ + int pr_thstate; /* mach thread scheduler state */ processorid_t pr_onpro; /* processor which last ran this lwp */ processorid_t pr_bindpro; /* processor to which lwp is bound */ psetid_t pr_bindpset; /* processor set to which lwp is bound */ @@ -306,6 +325,7 @@ translator lwpsinfo_t < thread_t T > { pr_onpro = (T->last_processor == PROCESSOR_NULL) ? -1 : T->last_processor->cpu_id; pr_bindpro = -1; /* Darwin does not bind threads to processors. */ pr_bindpset = -1; /* Darwin does not partition processors. */ + pr_thstate = T->state; }; inline psinfo_t *curpsinfo = xlate (curproc); diff --git a/bsd/dev/dtrace/scripts/mptcp.d b/bsd/dev/dtrace/scripts/mptcp.d index cc7b9d365..5e5c60db5 100644 --- a/bsd/dev/dtrace/scripts/mptcp.d +++ b/bsd/dev/dtrace/scripts/mptcp.d @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Apple Computer, Inc. All Rights Reserved. + * Copyright (c) 2013-2014 Apple Computer, Inc. All Rights Reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -50,6 +50,8 @@ inline int MPTCPS_TIME_WAIT = 8; #pragma D binding "1.0" MPTCPS_TIME_WAIT inline int MPTCPS_FASTCLOSE_WAIT = 9; #pragma D binding "1.0" MPTCPS_FASTCLOSE_WAIT +inline int MPTCPS_TERMINATE = 10; +#pragma D binding "1.0" MPTCPS_TERMINATE typedef uint64_t mptcp_key_t; typedef uint32_t mptcp_token_t; @@ -94,6 +96,8 @@ translator mptsinfo_t < struct mptcb *T > { T->mpt_state == MPTCPS_TIME_WAIT ? "state-time-wait" : T->mpt_state == MPTCPS_FASTCLOSE_WAIT ? "state-fastclose-wait" : + T->mpt_state == MPTCPS_TERMINATE ? + "state-terminate" : ""; flags = T->mpt_flags; vers = T->mpt_version; @@ -206,6 +210,8 @@ inline int MPTSF_ACTIVE = 0x40000; #pragma D binding "1.0" MPTSF_ACTIVE inline int MPTSF_MPCAP_CTRSET = 0x80000; #pragma D binding "1.0" MPTSF_MPCAP_CTRSET +inline int MPTSF_FASTJ_SEND = 0x100000; +#pragma D binding "1.0" MPTSF_FASTJ_SEND typedef struct mptsubinfo { uint32_t flags; diff --git a/bsd/dev/dtrace/sdt.c b/bsd/dev/dtrace/sdt.c index ad03df995..610de106b 100644 --- a/bsd/dev/dtrace/sdt.c +++ b/bsd/dev/dtrace/sdt.c @@ -49,12 +49,13 @@ #include extern int dtrace_kernel_symbol_mode; +/* #include sdp_ctl; if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) { @@ -343,6 +348,9 @@ sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_SUCCESS); } +/* + * APPLE NOTE: sdt_detach not implemented + */ #if !defined(__APPLE__) /*ARGSUSED*/ static int @@ -375,100 +383,8 @@ sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) return (DDI_SUCCESS); } +#endif /* __APPLE__ */ -/*ARGSUSED*/ -static int -sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int error; - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = (void *)sdt_devi; - error = DDI_SUCCESS; - break; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - error = DDI_SUCCESS; - break; - default: - error = DDI_FAILURE; - } - return (error); -} - -/*ARGSUSED*/ -static int -sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) -{ - return (0); -} - -static struct cb_ops sdt_cb_ops = { - sdt_open, /* open */ - nodev, /* close */ - nulldev, /* strategy */ - nulldev, /* print */ - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - nodev, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* cb_prop_op */ - 0, /* streamtab */ - D_NEW | D_MP /* Driver compatibility flag */ -}; - -static struct dev_ops sdt_ops = { - DEVO_REV, /* devo_rev, */ - 0, /* refcnt */ - sdt_info, /* get_dev_info */ - nulldev, /* identify */ - nulldev, /* probe */ - sdt_attach, /* attach */ - sdt_detach, /* detach */ - nodev, /* reset */ - &sdt_cb_ops, /* driver operations */ - NULL, /* bus operations */ - nodev /* dev power */ -}; - -/* - * Module linkage information for the kernel. - */ -static struct modldrv modldrv = { - &mod_driverops, /* module type (this is a pseudo driver) */ - "Statically Defined Tracing", /* name of module */ - &sdt_ops, /* driver ops */ -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modldrv, - NULL -}; - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -int -_fini(void) -{ - return (mod_remove(&modlinkage)); -} -#else d_open_t _sdt_open; int _sdt_open(dev_t dev, int flags, int devtype, struct proc *p) @@ -678,11 +594,11 @@ sdt_provide_module(void *arg, struct modctl *ctl) } g_sdt_mach_module.sdt_probes = NULL; } else { - /* FIXME -- sdt in kext not yet supported */ + /* + * APPLE NOTE: sdt probes for kexts not yet implemented + */ } /* Need to mark this module as completed */ ctl->mod_flags |= MODCTL_SDT_PROBES_PROVIDED; } - -#endif /* __APPLE__ */ diff --git a/bsd/dev/dtrace/sdt_subr.c b/bsd/dev/dtrace/sdt_subr.c index 82ab01989..cde9701e4 100644 --- a/bsd/dev/dtrace/sdt_subr.c +++ b/bsd/dev/dtrace/sdt_subr.c @@ -172,14 +172,14 @@ sdt_argdesc_t sdt_args[] = { { "io", "wait-done", 0, 0, "struct buf *", "bufinfo_t *" }, { "io", "wait-done", 1, 0, "struct buf *", "devinfo_t *" }, { "io", "wait-done", 2, 0, "struct buf *", "fileinfo_t *" }, -#if defined(__APPLE__) + + /* APPLE NOTE: add vfs journaling support */ { "io", "journal-start", 0, 0, "struct buf *", "bufinfo_t *" }, { "io", "journal-start", 1, 0, "struct buf *", "devinfo_t *" }, { "io", "journal-start", 2, 0, "struct buf *", "fileinfo_t *" }, { "io", "journal-done", 0, 0, "struct buf *", "bufinfo_t *" }, { "io", "journal-done", 1, 0, "struct buf *", "devinfo_t *" }, { "io", "journal-done", 2, 0, "struct buf *", "fileinfo_t *" }, -#endif /* __APPLE__ */ { "mib", NULL, 0, 0, "int", NULL }, diff --git a/bsd/dev/dtrace/systrace.c b/bsd/dev/dtrace/systrace.c index cdd5a3040..c8a6305b1 100644 --- a/bsd/dev/dtrace/systrace.c +++ b/bsd/dev/dtrace/systrace.c @@ -25,18 +25,6 @@ /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */ -#if !defined(__APPLE__) -#include -#include -#include -#include -#include -#include -#include -#include -#define SYSTRACE_ARTIFICIAL_FRAMES 1 -#else - #ifdef KERNEL #ifndef _KERNEL #define _KERNEL /* Solaris vs. Darwin */ @@ -169,6 +157,7 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) #if 0 /* XXX */ /* + * APPLE NOTE: Not implemented. * We want to explicitly allow DTrace consumers to stop a process * before it actually executes the meat of the syscall. */ @@ -323,7 +312,6 @@ dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); } } -#endif /* __APPLE__ */ #define SYSTRACE_SHIFT 16 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) @@ -338,38 +326,12 @@ dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) static dev_info_t *systrace_devi; static dtrace_provider_id_t systrace_id; -#if !defined (__APPLE__) -static void -systrace_init(struct sysent *actual, systrace_sysent_t **interposed) -{ - systrace_sysent_t *sysent = *interposed; - int i; - - if (sysent == NULL) { - *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) * - NSYSCALL, KM_SLEEP); - } - - for (i = 0; i < NSYSCALL; i++) { - struct sysent *a = &actual[i]; - systrace_sysent_t *s = &sysent[i]; - - if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) - continue; - - if (a->sy_callc == dtrace_systrace_syscall) - continue; - -#ifdef _SYSCALL32_IMPL - if (a->sy_callc == dtrace_systrace_syscall32) - continue; -#endif +/* + * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol. + * See balanced undef below. + */ +#define systrace_init _systrace_init - s->stsy_underlying = a->sy_callc; - } -} -#else -#define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */ static void systrace_init(struct sysent *actual, systrace_sysent_t **interposed) { @@ -404,7 +366,6 @@ systrace_init(struct sysent *actual, systrace_sysent_t **interposed) lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr); } -#endif /* __APPLE__ */ /*ARGSUSED*/ static void @@ -444,9 +405,7 @@ systrace_provide(void *arg, const dtrace_probedesc_t *desc) #endif } } -#if defined(__APPLE__) #undef systrace_init -#endif /*ARGSUSED*/ static void @@ -584,19 +543,6 @@ systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_FAILURE); } -#if !defined(__APPLE__) - systrace_probe = (void (*)())dtrace_probe; - membar_enter(); - - if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, - DDI_PSEUDO, NULL) == DDI_FAILURE || - dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL, - &systrace_pops, NULL, &systrace_id) != 0) { - systrace_probe = systrace_stub; - ddi_remove_minor_node(devi, NULL); - return (DDI_FAILURE); - } -#else systrace_probe = (void(*))&dtrace_probe; membar_enter(); @@ -608,7 +554,6 @@ systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ddi_remove_minor_node(devi, NULL); return (DDI_FAILURE); } -#endif /* __APPLE__ */ ddi_report_dev(devi); systrace_devi = devi; @@ -616,6 +561,10 @@ systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_SUCCESS); } + +/* + * APPLE NOTE: systrace_detach not implemented + */ #if !defined(__APPLE__) static int systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) @@ -636,109 +585,18 @@ systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) systrace_probe = systrace_stub; return (DDI_SUCCESS); } +#endif /* __APPLE__ */ -/*ARGSUSED*/ -static int -systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int error; - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = (void *)systrace_devi; - error = DDI_SUCCESS; - break; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - error = DDI_SUCCESS; - break; - default: - error = DDI_FAILURE; - } - return (error); -} - -/*ARGSUSED*/ -static int -systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) -{ - return (0); -} - -static struct cb_ops systrace_cb_ops = { - systrace_open, /* open */ - nodev, /* close */ - nulldev, /* strategy */ - nulldev, /* print */ - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - nodev, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* cb_prop_op */ - 0, /* streamtab */ - D_NEW | D_MP /* Driver compatibility flag */ -}; - -static struct dev_ops systrace_ops = { - DEVO_REV, /* devo_rev, */ - 0, /* refcnt */ - systrace_info, /* get_dev_info */ - nulldev, /* identify */ - nulldev, /* probe */ - systrace_attach, /* attach */ - systrace_detach, /* detach */ - nodev, /* reset */ - &systrace_cb_ops, /* driver operations */ - NULL, /* bus operations */ - nodev /* dev power */ -}; - -/* - * Module linkage information for the kernel. - */ -static struct modldrv modldrv = { - &mod_driverops, /* module type (this is a pseudo driver) */ - "System Call Tracing", /* name of module */ - &systrace_ops, /* driver ops */ -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modldrv, - NULL -}; - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} -int -_fini(void) -{ - return (mod_remove(&modlinkage)); -} -#else typedef kern_return_t (*mach_call_t)(void *); -/* XXX From #include which may be changed for 64 bit! */ -typedef void mach_munge_t(const void *, void *); +/* APPLE NOTE: From #include which may be changed for 64 bit! */ +typedef void mach_munge_t(void *); typedef struct { int mach_trap_arg_count; kern_return_t (*mach_trap_function)(void *); -#if defined(__x86_64__) +#if defined(__arm64__) || defined(__x86_64__) mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */ #endif int mach_trap_u32_words; @@ -834,6 +692,7 @@ dtrace_machtrace_syscall(struct mach_call_args *args) #if 0 /* XXX */ /* + * APPLE NOTE: Not implemented. * We want to explicitly allow DTrace consumers to stop a process * before it actually executes the meat of the syscall. */ @@ -1027,16 +886,6 @@ machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_FAILURE); } -#if !defined(__APPLE__) - machtrace_probe = (void (*)())dtrace_probe; - membar_enter(); - - if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, - DDI_PSEUDO, NULL) == DDI_FAILURE || - dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, - &machtrace_pops, NULL, &machtrace_id) != 0) { - machtrace_probe = systrace_stub; -#else machtrace_probe = dtrace_probe; membar_enter(); @@ -1045,7 +894,6 @@ machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, &machtrace_pops, NULL, &machtrace_id) != 0) { machtrace_probe = (void (*))&systrace_stub; -#endif /* __APPLE__ */ ddi_remove_minor_node(devi, NULL); return (DDI_FAILURE); } @@ -1111,7 +959,6 @@ void systrace_init( void ) panic("systrace_init: called twice!\n"); } #undef SYSTRACE_MAJOR -#endif /* __APPLE__ */ static uint64_t systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) diff --git a/bsd/dev/dtrace/systrace.h b/bsd/dev/dtrace/systrace.h index aeab1de4e..e9af96363 100644 --- a/bsd/dev/dtrace/systrace.h +++ b/bsd/dev/dtrace/systrace.h @@ -29,7 +29,6 @@ /* #pragma ident "@(#)systrace.h 1.3 06/09/19 SMI" */ -#if defined(__APPLE__) #ifdef KERNEL #ifndef _KERNEL #define _KERNEL /* Solaris vs. Darwin */ @@ -38,9 +37,6 @@ #include -#endif /* __APPLE__ */ -#include - #ifdef __cplusplus extern "C" { #endif @@ -50,26 +46,13 @@ extern "C" { typedef struct systrace_sysent { dtrace_id_t stsy_entry; dtrace_id_t stsy_return; -#if !defined(__APPLE__) - int64_t (*stsy_underlying)(); -#else int32_t (*stsy_underlying)(struct proc *, void *, int *); int32_t stsy_return_type; -#endif /* __APPLE__ */ } systrace_sysent_t; extern systrace_sysent_t *systrace_sysent; extern systrace_sysent_t *systrace_sysent32; -#if !defined(__APPLE__) -extern void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t, uintptr_t); -extern void systrace_stub(dtrace_id_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t, uintptr_t); - -extern int64_t dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, - uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5); -#else extern void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); extern void systrace_stub(dtrace_id_t, uint64_t, uint64_t, @@ -78,14 +61,13 @@ extern void systrace_stub(dtrace_id_t, uint64_t, uint64_t, extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *); extern void dtrace_systrace_syscall_return(unsigned short, int, int *); -#endif /* __APPLE__ */ #ifdef _SYSCALL32_IMPL extern int64_t dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5); #endif -#endif +#endif /* _KERNEL */ #ifdef __cplusplus } diff --git a/bsd/dev/i386/conf.c b/bsd/dev/i386/conf.c index cfaae450b..62d62601e 100644 --- a/bsd/dev/i386/conf.c +++ b/bsd/dev/i386/conf.c @@ -47,7 +47,6 @@ /* Prototypes that should be elsewhere: */ extern dev_t chrtoblk(dev_t dev); extern int chrtoblk_set(int cdev, int bdev); -extern int iskmemdev(dev_t dev); struct bdevsw bdevsw[] = { @@ -134,12 +133,12 @@ extern d_ioctl_t mmioctl; #include #if NPTY > 0 -extern struct tty *pt_tty[]; extern d_open_t ptsopen; extern d_close_t ptsclose; extern d_read_t ptsread; extern d_write_t ptswrite; extern d_stop_t ptsstop; +extern d_select_t ptsselect; extern d_open_t ptcopen; extern d_close_t ptcclose; extern d_read_t ptcread; @@ -212,7 +211,7 @@ struct cdevsw cdevsw[] = }, { ptsopen, ptsclose, ptsread, ptswrite, /* 4*/ - ptyioctl, ptsstop, nullreset, pt_tty, ttselect, + ptyioctl, ptsstop, nullreset, 0, ptsselect, eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY }, { @@ -369,10 +368,3 @@ chrtoblk_set(int cdev, int bdev) return 0; } -/* - * Returns true if dev is /dev/mem or /dev/kmem. - */ -int iskmemdev(dev_t dev) -{ - return (major(dev) == 3 && minor(dev) < 2); -} diff --git a/bsd/dev/i386/dis_tables.c b/bsd/dev/i386/dis_tables.c index bfaa4bc79..e0031d618 100644 --- a/bsd/dev/i386/dis_tables.c +++ b/bsd/dev/i386/dis_tables.c @@ -31,16 +31,10 @@ /* * #pragma ident "@(#)dis_tables.c 1.18 08/05/24 SMI" */ -#if !defined(__APPLE__) -#include "dis_tables.h" -#else #include #include - #include -#endif /* __APPLE__ */ - /* BEGIN CSTYLED */ /* @@ -66,9 +60,6 @@ #ifdef DIS_TEXT extern char *strncpy(char *, const char *, size_t); extern size_t strlen(const char *); -#if !defined(__APPLE__) -extern int strcmp(const char *, const char *); -#endif /* __APPLE__ */ extern int strncmp(const char *, const char *, size_t); extern size_t strlcat(char *, const char *, size_t); #endif @@ -1000,12 +991,8 @@ const instable_t dis_op0F[16][16] = { /* [10] */ TNSZ("movups",XMMO,16), TNSZ("movups",XMMOS,16),TNSZ("movlps",XMMO,8), TNSZ("movlps",XMMOS,8), /* [14] */ TNSZ("unpcklps",XMMO,16),TNSZ("unpckhps",XMMO,16),TNSZ("movhps",XMMOM,8),TNSZ("movhps",XMMOMS,8), /* [18] */ IND(dis_op0F18), INVALID, INVALID, INVALID, -#if !defined(__APPLE__) -/* [1C] */ INVALID, INVALID, INVALID, INVALID, -#else -/* Need to handle multi-byte NOP */ +/* APPLE NOTE: Need to handle multi-byte NOP */ /* [1C] */ INVALID, INVALID, INVALID, TS("nop",Mw), -#endif /* __APPLE __ */ }, { /* [20] */ TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), /* [24] */ TSx("mov",SREG), INVALID, TSx("mov",SREG), INVALID, @@ -1336,30 +1323,14 @@ const instable_t dis_distable[16][16] = { /* [1,C] */ TNS("sbbb",IA), TS("sbb",IA), TSx("push",SEG), TSx("pop",SEG), }, { /* [2,0] */ TNS("andb",RMw), TS("and",RMw), TNS("andb",MRw), TS("and",MRw), -#if !defined(__APPLE__) -/* [2,4] */ TNS("andb",IA), TS("and",IA), TNSx("%es:",OVERRIDE), TNSx("daa",NORM), -#else /* [2,4] */ TNS("andb",IA), TS("and",IA), TNS("%es:",OVERRIDE), TNSx("daa",NORM), -#endif /* __APPLE__ */ /* [2,8] */ TNS("subb",RMw), TS("sub",RMw), TNS("subb",MRw), TS("sub",MRw), -#if !defined(__APPLE__) -/* [2,C] */ TNS("subb",IA), TS("sub",IA), TNSx("%cs:",OVERRIDE), TNSx("das",NORM), -#else /* [2,C] */ TNS("subb",IA), TS("sub",IA), TNS("%cs:",OVERRIDE), TNSx("das",NORM), -#endif /* __APPLE__ */ }, { /* [3,0] */ TNS("xorb",RMw), TS("xor",RMw), TNS("xorb",MRw), TS("xor",MRw), -#if !defined(__APPLE__) -/* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNSx("%ss:",OVERRIDE), TNSx("aaa",NORM), -#else /* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNS("%ss:",OVERRIDE), TNSx("aaa",NORM), -#endif /* __APPLE__ */ /* [3,8] */ TNS("cmpb",RMw), TS("cmp",RMw), TNS("cmpb",MRw), TS("cmp",MRw), -#if !defined(__APPLE__) -/* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNSx("%ds:",OVERRIDE), TNSx("aas",NORM), -#else /* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNS("%ds:",OVERRIDE), TNSx("aas",NORM), -#endif /* __APPLE__ */ }, { /* [4,0] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), /* [4,4] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), diff --git a/bsd/dev/i386/dtrace_isa.c b/bsd/dev/i386/dtrace_isa.c index 05f366291..dfdeaad82 100644 --- a/bsd/dev/i386/dtrace_isa.c +++ b/bsd/dev/i386/dtrace_isa.c @@ -158,10 +158,10 @@ dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg) xcArg.arg = arg; if (cpu == DTRACE_CPUALL) { - mp_cpus_call (CPUMASK_ALL, SYNC, xcRemote, (void*)&xcArg); + mp_cpus_call (CPUMASK_ALL, ASYNC, xcRemote, (void*)&xcArg); } else { - mp_cpus_call (cpu_to_cpumask((cpu_t)cpu), SYNC, xcRemote, (void*)&xcArg); + mp_cpus_call (cpu_to_cpumask((cpu_t)cpu), ASYNC, xcRemote, (void*)&xcArg); } } diff --git a/bsd/dev/i386/fasttrap_isa.c b/bsd/dev/i386/fasttrap_isa.c index e3bfb9402..4eeb6d140 100644 --- a/bsd/dev/i386/fasttrap_isa.c +++ b/bsd/dev/i386/fasttrap_isa.c @@ -197,21 +197,19 @@ extern dtrace_id_t dtrace_probeid_error; * * REG_RAX -> EAX * REG_RCX -> ECX - * ... + * REG_RDX -> EDX + * REG_RBX -> EBX + * REG_RSP -> UESP + * REG_RBP -> EBP + * REG_RSI -> ESI * REG_RDI -> EDI * * The fasttrap_getreg function knows how to make the correct transformation. */ -#if __sol64 || defined(__APPLE__) static const uint8_t regmap[16] = { REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, }; -#else -static const uint8_t regmap[8] = { - EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI -}; -#endif static user_addr_t fasttrap_getreg(x86_saved_state_t *, uint_t); @@ -363,13 +361,11 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, break; } -#if __sol64 || defined(__APPLE__) /* * Identify the REX prefix on 64-bit processes. */ if (p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) rex = instr[start++]; -#endif /* * Now that we're pretty sure that the instruction is okay, copy the @@ -452,11 +448,9 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, * modes, there is a 32-bit operand. */ if (mod == 0 && rm == 5) { -#if __sol64 || defined(__APPLE__) if (p_model == DATAMODEL_LP64) tp->ftt_base = REG_RIP; else -#endif tp->ftt_base = FASTTRAP_NOREG; sz = 4; } else { @@ -554,7 +548,6 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, break; case FASTTRAP_NOP: -#if __sol64 || defined(__APPLE__) ASSERT(p_model == DATAMODEL_LP64 || rex == 0); /* @@ -564,7 +557,6 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). */ if (FASTTRAP_REX_B(rex) == 0) -#endif tp->ftt_type = FASTTRAP_T_NOP; break; @@ -591,7 +583,6 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, } } -#if __sol64 || defined(__APPLE__) if (p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { /* * If the process is 64-bit and the instruction type is still @@ -637,7 +628,6 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, } } } -#endif return (0); } @@ -993,12 +983,12 @@ fasttrap_pid_probe32(x86_saved_state_t *regs) * parent. We know that there's only one thread of control in such a * process: this one. */ - /* - * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal" - * FIXME: How do we assert this? - */ - while (p->p_lflag & P_LINVFORK) - p = p->p_pptr; + if (p->p_lflag & P_LINVFORK) { + proc_list_lock(); + while (p->p_lflag & P_LINVFORK) + p = p->p_pptr; + proc_list_unlock(); + } pid = p->p_pid; pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; @@ -1552,12 +1542,12 @@ fasttrap_pid_probe64(x86_saved_state_t *regs) * parent. We know that there's only one thread of control in such a * process: this one. */ - /* - * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal" - * FIXME: How do we assert this? - */ - while (p->p_lflag & P_LINVFORK) - p = p->p_pptr; + if (p->p_lflag & P_LINVFORK) { + proc_list_lock(); + while (p->p_lflag & P_LINVFORK) + p = p->p_pptr; + proc_list_unlock(); + } pid = p->p_pid; pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; @@ -2184,13 +2174,10 @@ fasttrap_return_probe(x86_saved_state_t *regs) * parent. We know that there's only one thread of control in such a * process: this one. */ - /* - * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal" - * How do we assert this? - */ - while (p->p_lflag & P_LINVFORK) { + proc_list_lock(); + while (p->p_lflag & P_LINVFORK) p = p->p_pptr; - } + proc_list_unlock(); /* * We set rp->r_pc to the address of the traced instruction so diff --git a/bsd/dev/i386/fbt_x86.c b/bsd/dev/i386/fbt_x86.c index 750a024cf..93c19c561 100644 --- a/bsd/dev/i386/fbt_x86.c +++ b/bsd/dev/i386/fbt_x86.c @@ -1046,6 +1046,8 @@ fbt_provide_module(void *arg, struct modctl *ctl) if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) { __user_syms_provide_module(arg, ctl); ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED; + if (MOD_FBT_PROVIDE_PRIVATE_PROBES(ctl)) + ctl->mod_flags |= MODCTL_FBT_PRIVATE_PROBES_PROVIDED; return; } } diff --git a/bsd/dev/i386/instr_size.c b/bsd/dev/i386/instr_size.c index 2982b8d66..4d8c11dd2 100644 --- a/bsd/dev/i386/instr_size.c +++ b/bsd/dev/i386/instr_size.c @@ -120,19 +120,3 @@ dtrace_instr_size(uchar_t *instr) return (dtrace_dis_isize(instr, DIS_ISIZE_INSTR, DATAMODEL_NATIVE, NULL)); } - -#if !defined(__APPLE__) -/*ARGSUSED*/ -int -instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rw) -{ - uchar_t instr[16]; /* maximum size instruction */ - caddr_t pc = (caddr_t)rp->r_pc; - - (void) copyin_nowatch(pc, (caddr_t)instr, sizeof (instr)); - - return (dtrace_dis_isize(instr, - rw == S_EXEC ? DIS_ISIZE_INSTR : DIS_ISIZE_OPERAND, - curproc->p_model, NULL)); -} -#endif /* __APPLE__ */ diff --git a/bsd/dev/i386/kern_machdep.c b/bsd/dev/i386/kern_machdep.c index 79bd890e2..cd7dbb1d7 100644 --- a/bsd/dev/i386/kern_machdep.c +++ b/bsd/dev/i386/kern_machdep.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,23 +43,39 @@ /********************************************************************** * Routine: grade_binary() * - * Function: Keep the API the same between PPC and X86; always say - * any CPU subtype is OK with us, but only OK CPU types - * for which we are actually capable of executing the - * binary, either directly or via an imputed interpreter. + * Function: Say OK to CPU types that we can actually execute on the given + * system. 64-bit binaries have the highest preference, followed + * by 32-bit binaries. 0 means unsupported. **********************************************************************/ int -grade_binary(cpu_type_t exectype, __unused cpu_subtype_t execsubtype) +grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype) { + cpu_subtype_t hostsubtype = cpu_subtype(); + switch(exectype) { + case CPU_TYPE_X86_64: /* native 64-bit */ + switch(hostsubtype) { + case CPU_SUBTYPE_X86_64_H: /* x86_64h can execute anything */ + switch (execsubtype) { + case CPU_SUBTYPE_X86_64_H: + return 3; + case CPU_SUBTYPE_X86_64_ALL: + return 2; + } + break; + case CPU_SUBTYPE_X86_ARCH1: /* generic systems can only execute ALL subtype */ + switch (execsubtype) { + case CPU_SUBTYPE_X86_64_ALL: + return 2; + } + break; + } + break; case CPU_TYPE_X86: /* native */ - case CPU_TYPE_POWERPC: /* via translator */ return 1; - case CPU_TYPE_X86_64: /* native 64-bit */ - return (ml_is64bit() ? 2 : 0); - default: /* all other binary types */ - return 0; } + + return 0; } extern void md_prepare_for_shutdown(int, int, char *); diff --git a/bsd/dev/i386/stubs.c b/bsd/dev/i386/stubs.c index bf69ac8ac..4dd2830f5 100644 --- a/bsd/dev/i386/stubs.c +++ b/bsd/dev/i386/stubs.c @@ -45,11 +45,6 @@ #include #include -/* XXX should be elsewhere (cpeak) */ -extern void *get_bsduthreadarg(thread_t); -extern int *get_bsduthreadrval(thread_t); -extern void *find_user_regs(thread_t); - /* * copy a null terminated string from the kernel address space into * the user address space. @@ -116,20 +111,3 @@ copywithin(void *src, void *dst, size_t count) bcopy(src,dst,count); return 0; } - -void * -get_bsduthreadarg(thread_t th) -{ - struct uthread *ut; - ut = get_bsdthread_info(th); - return ut->uu_ap; -} - -int * -get_bsduthreadrval(thread_t th) -{ -struct uthread *ut; - - ut = get_bsdthread_info(th); - return(&ut->uu_rval[0]); -} diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c index d96ceb1c0..a314e46b5 100644 --- a/bsd/dev/i386/sysctl.c +++ b/bsd/dev/i386/sysctl.c @@ -833,86 +833,6 @@ SYSCTL_INT(_kern, OID_AUTO, interrupt_timer_coalescing_enabled, SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_idle_entry_hard_deadline_max, CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, &idle_entry_timer_processing_hdeadline_threshold, 0, ""); -/* Coalescing tuning parameters for various thread/task attributes */ -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_bg_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.timer_coalesce_bg_shift, 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_bg_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.timer_coalesce_bg_ns_max, ""); - -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_kt_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.timer_coalesce_kt_shift, 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_kt_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.timer_coalesce_kt_ns_max, ""); - -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_fp_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.timer_coalesce_fp_shift, 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_fp_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.timer_coalesce_fp_ns_max, ""); - -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_ts_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.timer_coalesce_ts_shift, 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_ts_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.timer_coalesce_ts_ns_max, ""); - -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier0_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_scale[0], 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier0_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_ns_max[0], ""); - -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier1_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_scale[1], 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier1_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_ns_max[1], ""); - -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier2_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_scale[2], 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier2_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_ns_max[2], ""); - -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier3_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_scale[3], 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier3_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_ns_max[3], ""); - -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier4_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_scale[4], 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier4_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_ns_max[4], ""); - -SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier5_scale, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_scale[5], 0, ""); - -SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier5_ns_max, - CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcoal_prio_params.latency_qos_ns_max[5], ""); /* Track potentially expensive eager timer evaluations on QoS tier * switches. @@ -928,28 +848,3 @@ extern uint64_t ml_timer_eager_evaluation_max; SYSCTL_QUAD(_machdep, OID_AUTO, eager_timer_evaluation_max, CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, &ml_timer_eager_evaluation_max, ""); - -/* Communicate the "user idle level" heuristic to the timer layer, and - * potentially other layers in the future. - */ - -static int -timer_set_user_idle_level(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { - int new_value = 0, old_value = 0, changed = 0, error; - - old_value = ml_timer_get_user_idle_level(); - - error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed); - - if (error == 0 && changed) { - if (ml_timer_set_user_idle_level(new_value) != KERN_SUCCESS) - error = ERANGE; - } - - return error; -} - -SYSCTL_PROC(_machdep, OID_AUTO, user_idle_level, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, - 0, 0, - timer_set_user_idle_level, "I", "User idle level heuristic, 0-128"); diff --git a/bsd/dev/i386/systemcalls.c b/bsd/dev/i386/systemcalls.c index 77ecfba3a..8b2c8ab98 100644 --- a/bsd/dev/i386/systemcalls.c +++ b/bsd/dev/i386/systemcalls.c @@ -69,15 +69,6 @@ extern void *find_user_regs(thread_t); /* dynamically generated at build time based on syscalls.master */ extern const char *syscallnames[]; -/* - * This needs to be a single switch so that it's "all on" or "all off", - * rather than being turned on for some code paths and not others, as this - * has a tendency to introduce "blame the next guy" bugs. - */ -#if DEBUG -#define FUNNEL_DEBUG 1 /* Check for funnel held on exit */ -#endif - /* * Function: unix_syscall * @@ -141,10 +132,13 @@ unix_syscall(x86_saved_state_t *state) } vt = (void *)uthread->uu_arg; - uthread->uu_ap = vt; if (callp->sy_arg_bytes != 0) { +#if CONFIG_REQUIRES_U32_MUNGING sy_munge_t *mungerp; +#else +#error U32 syscalls on x86_64 kernel requires munging +#endif uint32_t nargs; assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); @@ -164,17 +158,13 @@ unix_syscall(x86_saved_state_t *state) BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, *ip, *(ip+1), *(ip+2), *(ip+3), 0); } + +#if CONFIG_REQUIRES_U32_MUNGING mungerp = callp->sy_arg_munge32; - /* - * If non-NULL, then call the syscall argument munger to - * copy in arguments (see xnu/bsd/dev/{i386|x86_64}/munge.s); the - * first argument is NULL because we are munging in place - * after a copyin because the ABI currently doesn't use - * registers to pass system call arguments. - */ if (mungerp != NULL) - (*mungerp)(NULL, vt); + (*mungerp)(vt); +#endif } else KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, @@ -187,9 +177,9 @@ unix_syscall(x86_saved_state_t *state) kauth_cred_uthread_update(uthread, p); uthread->uu_rval[0] = 0; - uthread->uu_rval[1] = regs->edx; + uthread->uu_rval[1] = 0; uthread->uu_flag |= UT_NOTCANCELPT; - + uthread->syscall_code = code; #ifdef JOE_DEBUG uthread->uu_iocount = 0; @@ -198,7 +188,7 @@ unix_syscall(x86_saved_state_t *state) AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); - AUDIT_SYSCALL_EXIT(code, p, uthread, error); + AUDIT_SYSCALL_EXIT(code, p, uthread, error); #ifdef JOE_DEBUG if (uthread->uu_iocount) @@ -223,6 +213,11 @@ unix_syscall(x86_saved_state_t *state) regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ + /* + * We split retval across two registers, in case the + * syscall had a 64-bit return value, in which case + * eax/edx matches the function call ABI. + */ regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; } @@ -233,12 +228,6 @@ unix_syscall(x86_saved_state_t *state) error, regs->eax, regs->edx); uthread->uu_flag &= ~UT_NOTCANCELPT; -#if FUNNEL_DEBUG - /* - * if we're holding the funnel panic - */ - syscall_exit_funnelcheck(); -#endif /* FUNNEL_DEBUG */ if (__improbable(uthread->uu_lowpri_window)) { /* @@ -268,10 +257,11 @@ void unix_syscall64(x86_saved_state_t *state) { thread_t thread; + void *vt; unsigned int code; struct sysent *callp; - void *uargp; int args_in_regs; + boolean_t args_start_at_rdi; int error; struct proc *p; struct uthread *uthread; @@ -300,43 +290,50 @@ unix_syscall64(x86_saved_state_t *state) thread_exception_return(); /* NOTREACHED */ } - args_in_regs = 6; code = regs->rax & SYSCALL_NUMBER_MASK; DEBUG_KPRINT_SYSCALL_UNIX( "unix_syscall64: code=%d(%s) rip=%llx\n", code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; - uargp = (void *)(®s->rdi); + + vt = (void *)uthread->uu_arg; if (__improbable(callp == sysent)) { /* * indirect system call... system call number * passed as 'arg0' */ - code = regs->rdi; + code = regs->rdi; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; - uargp = (void *)(®s->rsi); + args_start_at_rdi = FALSE; args_in_regs = 5; + } else { + args_start_at_rdi = TRUE; + args_in_regs = 6; } - uthread->uu_ap = uargp; if (callp->sy_narg != 0) { + assert(callp->sy_narg <= 8); /* size of uu_arg */ + + args_in_regs = MIN(args_in_regs, callp->sy_narg); + memcpy(vt, args_start_at_rdi ? ®s->rdi : ®s->rsi, args_in_regs * sizeof(syscall_arg_t)); + + if (code != 180) { - uint64_t *ip = (uint64_t *)uargp; + uint64_t *ip = (uint64_t *)vt; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); } - assert(callp->sy_narg <= 8); if (__improbable(callp->sy_narg > args_in_regs)) { int copyin_count; - copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t); + copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t); - error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count); + error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count); if (error) { regs->rax = error; regs->isf.rflags |= EFL_CF; @@ -357,9 +354,8 @@ unix_syscall64(x86_saved_state_t *state) uthread->uu_rval[0] = 0; uthread->uu_rval[1] = 0; - - uthread->uu_flag |= UT_NOTCANCELPT; + uthread->syscall_code = code; #ifdef JOE_DEBUG uthread->uu_iocount = 0; @@ -367,8 +363,8 @@ unix_syscall64(x86_saved_state_t *state) #endif AUDIT_SYSCALL_ENTER(code, p, uthread); - error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0])); - AUDIT_SYSCALL_EXIT(code, p, uthread, error); + error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0])); + AUDIT_SYSCALL_EXIT(code, p, uthread, error); #ifdef JOE_DEBUG if (uthread->uu_iocount) @@ -426,13 +422,6 @@ unix_syscall64(x86_saved_state_t *state) uthread->uu_flag &= ~UT_NOTCANCELPT; -#if FUNNEL_DEBUG - /* - * if we're holding the funnel panic - */ - syscall_exit_funnelcheck(); -#endif /* FUNNEL_DEBUG */ - if (__improbable(uthread->uu_lowpri_window)) { /* * task is marked as a low priority I/O type @@ -460,7 +449,6 @@ unix_syscall_return(int error) struct uthread *uthread; struct proc *p; unsigned int code; - vm_offset_t params; struct sysent *callp; thread = current_thread(); @@ -475,17 +463,9 @@ unix_syscall_return(int error) regs = saved_state64(find_user_regs(thread)); - /* reconstruct code for tracing before blasting rax */ - code = regs->rax & SYSCALL_NUMBER_MASK; + code = uthread->syscall_code; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; - if (callp == sysent) - /* - * indirect system call... system call number - * passed as 'arg0' - */ - code = regs->rdi; - #if CONFIG_DTRACE if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); @@ -539,8 +519,8 @@ unix_syscall_return(int error) regs = saved_state32(find_user_regs(thread)); regs->efl &= ~(EFL_CF); - /* reconstruct code for tracing before blasting eax */ - code = regs->eax & I386_SYSCALL_NUMBER_MASK; + + code = uthread->syscall_code; callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; #if CONFIG_DTRACE @@ -549,10 +529,6 @@ unix_syscall_return(int error) #endif /* CONFIG_DTRACE */ AUDIT_SYSCALL_EXIT(code, p, uthread, error); - if (callp == sysent) { - params = (vm_offset_t) (regs->uesp + sizeof (int)); - code = fuword(params); - } if (error == ERESTART) { pal_syscall_restart( thread, find_user_regs(thread) ); } @@ -573,13 +549,6 @@ unix_syscall_return(int error) uthread->uu_flag &= ~UT_NOTCANCELPT; -#if FUNNEL_DEBUG - /* - * if we're holding the funnel panic - */ - syscall_exit_funnelcheck(); -#endif /* FUNNEL_DEBUG */ - if (uthread->uu_lowpri_window) { /* * task is marked as a low priority I/O type diff --git a/bsd/dev/i386/mem.c b/bsd/dev/mem.c similarity index 74% rename from bsd/dev/i386/mem.c rename to bsd/dev/mem.c index 61f0d6929..25e1a601e 100644 --- a/bsd/dev/i386/mem.c +++ b/bsd/dev/mem.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -67,8 +67,6 @@ * @(#)mem.c 8.1 (Berkeley) 6/11/93 */ -#include - /* * Memory special file */ @@ -86,10 +84,23 @@ #include #include /* for kernel_map */ +#include /* for PE_parse_boot_argn */ + +boolean_t iskmemdev(dev_t dev); + +#if CONFIG_DEV_KMEM +boolean_t dev_kmem_enabled; +boolean_t dev_kmem_mask_top_bit; + +void dev_kmem_init(void); + +#if defined(__x86_64__) extern addr64_t kvtophys(vm_offset_t va); +#else +#error need kvtophys prototype +#endif extern boolean_t kernacc(off_t, size_t ); -#if !defined(SECURE_KERNEL) -extern int setup_kmem; + #endif static caddr_t devzerobuf; @@ -102,14 +113,12 @@ int mmrw(dev_t dev, struct uio *uio, enum uio_rw rw); int mmread(dev_t dev, struct uio *uio) { - return (mmrw(dev, uio, UIO_READ)); } int mmwrite(dev_t dev, struct uio *uio) { - return (mmrw(dev, uio, UIO_WRITE)); } @@ -121,11 +130,11 @@ mmioctl(dev_t dev, u_long cmd, __unused caddr_t data, if (0 == minnum || 1 == minnum) { /* /dev/mem and /dev/kmem */ -#if defined(SECURE_KERNEL) - return (ENODEV); +#if CONFIG_DEV_KMEM + if (!dev_kmem_enabled) + return (ENODEV); #else - if (0 == setup_kmem) - return (EINVAL); + return (ENODEV); #endif } @@ -144,66 +153,65 @@ mmioctl(dev_t dev, u_long cmd, __unused caddr_t data, int mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) { - register int o; - register u_int c, v; + unsigned int c; int error = 0; - vm_offset_t where; - vm_size_t size; - - while (uio_resid(uio) > 0 && error == 0) { + while (uio_resid(uio) > 0) { uio_update(uio, 0); switch (minor(dev)) { /* minor device 0 is physical memory */ case 0: -#if defined(SECURE_KERNEL) - return(ENODEV); -#else - if (setup_kmem == 0) - return(ENODEV); -#endif + return (ENODEV); - v = trunc_page(uio->uio_offset); - if (uio->uio_offset >= (off_t)mem_size) - goto fault; + /* minor device 1 is kernel memory */ + case 1: +#if !CONFIG_DEV_KMEM + return (ENODEV); +#else /* CONFIG_DEV_KMEM */ + if (!dev_kmem_enabled) + return (ENODEV); - size= PAGE_SIZE; - if (kmem_alloc(kernel_map, &where, size) - != KERN_SUCCESS) { - goto fault; + vm_address_t kaddr = (vm_address_t)uio->uio_offset; + if (dev_kmem_mask_top_bit) { + /* + * KVA addresses of the form 0xFFFFFF80AABBCCDD can't be + * represented as a signed off_t correctly. In these cases, + * 0x7FFFFF80AABBCCDD is passed in, and the top bit OR-ed + * on. + */ + const vm_address_t top_bit = (~((vm_address_t)0)) ^ (~((vm_address_t)0) >> 1UL); + if (kaddr & top_bit) { + /* top bit should not be set already */ + return (EFAULT); + } + kaddr |= top_bit; } - o = uio->uio_offset - v; - c = min(PAGE_SIZE - o, uio_curriovlen(uio)); - error = uiomove((caddr_t) (where + o), c, uio); - kmem_free(kernel_map, where, PAGE_SIZE); - continue; - /* minor device 1 is kernel memory */ - case 1: -#if defined(SECURE_KERNEL) - return(ENODEV); -#else - if (setup_kmem == 0) - return(ENODEV); -#endif + c = uio_curriovlen(uio); + /* Do some sanity checking */ - if (((vm_address_t)uio->uio_offset >= VM_MAX_KERNEL_ADDRESS) || - ((vm_address_t)uio->uio_offset <= VM_MIN_KERNEL_AND_KEXT_ADDRESS)) + if ((kaddr > (VM_MAX_KERNEL_ADDRESS - c)) || + (kaddr <= VM_MIN_KERNEL_AND_KEXT_ADDRESS)) goto fault; - c = uio_curriovlen(uio); - if (!kernacc(uio->uio_offset, c)) + if (!kernacc(kaddr, c)) goto fault; - error = uiomove((caddr_t)(uintptr_t)uio->uio_offset, + error = uiomove((const char *)(uintptr_t)kaddr, (int)c, uio); - continue; + if (error) + break; + + continue; /* Keep going until UIO is done */ +#endif /* CONFIG_DEV_KMEM */ /* minor device 2 is EOF/RATHOLE */ case 2: if (rw == UIO_READ) return (0); c = uio_curriovlen(uio); + + error = 0; /* Always succeeds, always consumes all input */ break; case 3: if(devzerobuf == NULL) { @@ -212,25 +220,48 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) } if(uio->uio_rw == UIO_WRITE) { c = uio_curriovlen(uio); + + error = 0; /* Always succeeds, always consumes all input */ break; } + c = min(uio_curriovlen(uio), PAGE_SIZE); error = uiomove(devzerobuf, (int)c, uio); - continue; + if (error) + break; + + continue; /* Keep going until UIO is done */ default: - goto fault; + return (ENODEV); break; } if (error) break; + uio_update(uio, c); } return (error); +#if CONFIG_DEV_KMEM fault: return (EFAULT); +#endif } +#if CONFIG_DEV_KMEM +void dev_kmem_init(void) +{ + uint32_t kmem; + + if (PE_parse_boot_argn("kmem", &kmem, sizeof (kmem))) { + if (kmem & 0x1) { + dev_kmem_enabled = TRUE; + } + if (kmem & 0x2) { + dev_kmem_mask_top_bit = TRUE; + } + } +} boolean_t kernacc( @@ -252,3 +283,13 @@ kernacc( return (TRUE); } + +#endif /* CONFIG_DEV_KMEM */ + +/* + * Returns true if dev is /dev/mem or /dev/kmem. + */ +boolean_t iskmemdev(dev_t dev) +{ + return (major(dev) == 3 && minor(dev) < 2); +} diff --git a/bsd/dev/munge.c b/bsd/dev/munge.c index 168fad929..36da48eb4 100644 --- a/bsd/dev/munge.c +++ b/bsd/dev/munge.c @@ -31,62 +31,63 @@ #include #include -static inline __attribute__((always_inline)) void -munge_32_to_64_unsigned(volatile uint64_t *dest, volatile uint32_t *src, int count); - /* * Refer to comments in bsd/sys/munge.h */ + +static inline __attribute__((always_inline)) void +munge_32_to_64_unsigned(volatile uint64_t *dest, volatile uint32_t *src, int count); + void -munge_w(const void *arg0 __unused, void *args) +munge_w(void *args) { munge_32_to_64_unsigned(args, args, 1); } void -munge_ww(const void *arg0 __unused, void *args) +munge_ww(void *args) { munge_32_to_64_unsigned(args, args, 2); } void -munge_www(const void *arg0 __unused, void *args) +munge_www(void *args) { munge_32_to_64_unsigned(args, args, 3); } void -munge_wwww(const void *arg0 __unused, void *args) +munge_wwww(void *args) { munge_32_to_64_unsigned(args, args, 4); } void -munge_wwwww(const void *arg0 __unused, void *args) +munge_wwwww(void *args) { munge_32_to_64_unsigned(args, args, 5); } void -munge_wwwwww(const void *arg0 __unused, void *args) +munge_wwwwww(void *args) { munge_32_to_64_unsigned(args, args, 6); } void -munge_wwwwwww(const void *arg0 __unused, void *args) +munge_wwwwwww(void *args) { munge_32_to_64_unsigned(args, args, 7); } void -munge_wwwwwwww(const void *arg0 __unused, void *args) +munge_wwwwwwww(void *args) { munge_32_to_64_unsigned(args, args, 8); } void -munge_wl(const void *arg0 __unused, void *args) +munge_wl(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -96,7 +97,7 @@ munge_wl(const void *arg0 __unused, void *args) } void -munge_wwl(const void *arg0 __unused, void *args) +munge_wwl(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -107,7 +108,7 @@ munge_wwl(const void *arg0 __unused, void *args) } void -munge_wwlw(const void *arg0 __unused, void *args) +munge_wwlw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -118,7 +119,7 @@ munge_wwlw(const void *arg0 __unused, void *args) out_args[0] = in_args[0]; } void -munge_wwlll(const void *arg0 __unused, void *args) +munge_wwlll(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -131,7 +132,7 @@ munge_wwlll(const void *arg0 __unused, void *args) } void -munge_wwllww(const void *arg0 __unused, void *args) +munge_wwllww(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -145,7 +146,7 @@ munge_wwllww(const void *arg0 __unused, void *args) } void -munge_wlw(const void *arg0 __unused, void *args) +munge_wlw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -156,7 +157,7 @@ munge_wlw(const void *arg0 __unused, void *args) } void -munge_wlwwwll(const void *arg0 __unused, void *args) +munge_wlwwwll(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -171,17 +172,17 @@ munge_wlwwwll(const void *arg0 __unused, void *args) } void -munge_wlwwwllw(const void *arg0 __unused, void *args) +munge_wlwwwllw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; out_args[7] = in_args[10]; - munge_wlwwwll(args, args); + munge_wlwwwll(args); } void -munge_wlwwlwlw(const void *arg0 __unused, void *args) +munge_wlwwlwlw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -197,7 +198,7 @@ munge_wlwwlwlw(const void *arg0 __unused, void *args) } void -munge_wll(const void *arg0 __unused, void *args) +munge_wll(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -208,7 +209,7 @@ munge_wll(const void *arg0 __unused, void *args) } void -munge_wlll(const void *arg0 __unused, void *args) +munge_wlll(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -220,7 +221,7 @@ munge_wlll(const void *arg0 __unused, void *args) } void -munge_wllww(const void *arg0 __unused, void *args) +munge_wllww(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -233,7 +234,7 @@ munge_wllww(const void *arg0 __unused, void *args) } void -munge_wllwwll(const void *arg0 __unused, void *args) +munge_wllwwll(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -248,7 +249,7 @@ munge_wllwwll(const void *arg0 __unused, void *args) } void -munge_wwwlw(const void *arg0 __unused, void *args) +munge_wwwlw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -261,7 +262,7 @@ munge_wwwlw(const void *arg0 __unused, void *args) } void -munge_wwwlww(const void *arg0 __unused, void *args) +munge_wwwlww(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -275,7 +276,7 @@ munge_wwwlww(const void *arg0 __unused, void *args) } void -munge_wwwl(const void *arg0 __unused, void *args) +munge_wwwl(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -287,7 +288,7 @@ munge_wwwl(const void *arg0 __unused, void *args) } void -munge_wwwwlw(const void *arg0 __unused, void *args) +munge_wwwwlw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -301,7 +302,7 @@ munge_wwwwlw(const void *arg0 __unused, void *args) } void -munge_wwwwl(const void *arg0 __unused, void *args) +munge_wwwwl(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -314,7 +315,7 @@ munge_wwwwl(const void *arg0 __unused, void *args) } void -munge_wwwwwl(const void *arg0 __unused, void *args) +munge_wwwwwl(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -328,7 +329,7 @@ munge_wwwwwl(const void *arg0 __unused, void *args) } void -munge_wwwwwlww(const void *arg0 __unused, void *args) +munge_wwwwwlww(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -344,7 +345,7 @@ munge_wwwwwlww(const void *arg0 __unused, void *args) } void -munge_wwwwwllw(const void *arg0 __unused, void *args) +munge_wwwwwllw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -360,7 +361,7 @@ munge_wwwwwllw(const void *arg0 __unused, void *args) } void -munge_wwwwwlll(const void *arg0 __unused, void *args) +munge_wwwwwlll(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -376,7 +377,7 @@ munge_wwwwwlll(const void *arg0 __unused, void *args) } void -munge_wwwwwwl(const void *arg0 __unused, void *args) +munge_wwwwwwl(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -391,7 +392,7 @@ munge_wwwwwwl(const void *arg0 __unused, void *args) } void -munge_wwwwwwlw(const void *arg0 __unused, void *args) +munge_wwwwwwlw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -407,7 +408,7 @@ munge_wwwwwwlw(const void *arg0 __unused, void *args) } void -munge_wwwwwwll(const void *arg0 __unused, void *args) +munge_wwwwwwll(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -423,7 +424,7 @@ munge_wwwwwwll(const void *arg0 __unused, void *args) } void -munge_wsw(const void *arg0 __unused, void *args) +munge_wsw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -434,7 +435,7 @@ munge_wsw(const void *arg0 __unused, void *args) } void -munge_wws(const void *arg0 __unused, void *args) +munge_wws(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -445,7 +446,7 @@ munge_wws(const void *arg0 __unused, void *args) } void -munge_wwwsw(const void *arg0 __unused, void *args) +munge_wwwsw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -458,25 +459,25 @@ munge_wwwsw(const void *arg0 __unused, void *args) } void -munge_llllll(const void *arg0 __unused, void *args __unused) +munge_llllll(void *args __unused) { /* Nothing to do, already all 64-bit */ } void -munge_ll(const void *arg0 __unused, void *args __unused) +munge_ll(void *args __unused) { /* Nothing to do, already all 64-bit */ } void -munge_l(const void *arg0 __unused, void *args __unused) +munge_l(void *args __unused) { /* Nothing to do, already all 64-bit */ } void -munge_lw(const void *arg0 __unused, void *args) +munge_lw(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -486,7 +487,7 @@ munge_lw(const void *arg0 __unused, void *args) } void -munge_lwww(const void *arg0 __unused, void *args) +munge_lwww(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -498,7 +499,7 @@ munge_lwww(const void *arg0 __unused, void *args) } void -munge_wwlwww(const void *arg0 __unused, void *args) +munge_wwlwww(void *args) { volatile uint64_t *out_args = (volatile uint64_t*)args; volatile uint32_t *in_args = (volatile uint32_t*)args; @@ -525,4 +526,3 @@ munge_32_to_64_unsigned(volatile uint64_t *dest, volatile uint32_t *src, int cou dest[i] = src[i]; } } - diff --git a/bsd/dev/random/randomdev.c b/bsd/dev/random/randomdev.c index c73553994..1b96f774c 100644 --- a/bsd/dev/random/randomdev.c +++ b/bsd/dev/random/randomdev.c @@ -26,16 +26,6 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! - - THIS FILE IS NEEDED TO PASS FIPS ACCEPTANCE FOR THE RANDOM NUMBER GENERATOR. - IF YOU ALTER IT IN ANY WAY, WE WILL NEED TO GO THOUGH FIPS ACCEPTANCE AGAIN, - AN OPERATION THAT IS VERY EXPENSIVE AND TIME CONSUMING. IN OTHER WORDS, - DON'T MESS WITH THIS FILE. - - WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! -*/ #include #include @@ -46,30 +36,25 @@ #include #include #include -#include +#include #include #include #include #include #include -#include #include #include #include -#include - -#include "fips_sha1.h" #define RANDOM_MAJOR -1 /* let the kernel pick the device number */ +#define RANDOM_MINOR 0 +#define URANDOM_MINOR 1 d_ioctl_t random_ioctl; -/* To generate the seed for the RNG */ -extern uint64_t early_random(); - /* * A struct describing which functions will get invoked for certain * actions. @@ -93,313 +78,6 @@ static struct cdevsw random_cdevsw = }; -/* - WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! - - ANY CODE PROTECTED UNDER "#ifdef __arm__" IS SERIOUSLY SUPPOSED TO BE THERE! - IF YOU REMOVE ARM CODE, RANDOM WILL NOT MEAN ANYTHING FOR iPHONES ALL OVER. - PLEASE DON'T TOUCH __arm__ CODE IN THIS FILE! - - WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! -*/ - - -/* Used to detect whether we've already been initialized */ -static UInt8 gRandomInstalled = 0; -static PrngRef gPrngRef; -static int gRandomError = 1; -static lck_grp_t *gYarrowGrp; -static lck_attr_t *gYarrowAttr; -static lck_grp_attr_t *gYarrowGrpAttr; -static lck_mtx_t *gYarrowMutex = 0; -static UInt8 gYarrowInitializationLock = 0; - -#define RESEED_TICKS 50 /* how long a reseed operation can take */ - - -typedef u_int8_t BlockWord; -enum {kBSize = 20}; -typedef BlockWord Block[kBSize]; -enum {kBlockSize = sizeof(Block)}; - -/* define prototypes to keep the compiler happy... */ - -void add_blocks(Block a, Block b, BlockWord carry); -void fips_initialize(void); -void random_block(Block b, int addOptional); -u_int32_t CalculateCRC(u_int8_t* buffer, size_t length); - -/* - * Get 120 bits from yarrow - */ - -/* - * add block b to block a - */ -void -add_blocks(Block a, Block b, BlockWord carry) -{ - int i = kBlockSize - 1; - while (i >= 0) - { - u_int32_t c = (u_int32_t)carry + - (u_int32_t)a[i] + - (u_int32_t)b[i]; - a[i] = c & 0xff; - carry = c >> 8; - i -= 1; - } -} - - - -static char zeros[(512 - kBSize * 8) / 8]; -static Block g_xkey; -static Block g_random_data; -static int g_bytes_used; -static unsigned char g_SelfTestInitialized = 0; -static u_int32_t gLastBlockChecksum; - -static const u_int32_t g_crc_table[] = -{ - 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, - 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, - 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, - 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, - 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, - 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, - 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, - 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, - 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, - 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, - 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, - 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, - 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, - 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, - 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, - 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, - 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, - 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, - 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, - 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, - 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, - 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, - 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, - 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, - 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, - 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, - 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, - 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, - 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, - 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, - 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, - 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, -}; - -/* - * Setup for fips compliance - */ - -/* - * calculate a crc-32 checksum - */ -u_int32_t CalculateCRC(u_int8_t* buffer, size_t length) -{ - u_int32_t crc = 0; - - size_t i; - for (i = 0; i < length; ++i) - { - u_int32_t temp = (crc ^ ((u_int32_t) buffer[i])) & 0xFF; - crc = (crc >> 8) ^ g_crc_table[temp]; - } - - return crc; -} - -/* - * get a random block of data per fips 186-2 - */ -void -random_block(Block b, int addOptional) -{ - SHA1_CTX sha1_ctx; - - int repeatCount = 0; - do - { - // do one iteration - - if (addOptional) - { - // create an xSeed to add. - Block xSeed; - prngOutput (gPrngRef, (BYTE*) &xSeed, sizeof (xSeed)); - - // add the seed to the previous value of g_xkey - add_blocks (g_xkey, xSeed, 0); - } - - // initialize the value of H - FIPS_SHA1Init(&sha1_ctx); - - // to stay compatible with the FIPS specification, we need to flip the bytes in - // g_xkey to little endian byte order. In our case, this makes exactly no difference - // (random is random), but we need to do it anyway to keep FIPS happy - - // compute "G" - FIPS_SHA1Update(&sha1_ctx, g_xkey, kBlockSize); - - // add zeros to fill the internal SHA-1 buffer - FIPS_SHA1Update (&sha1_ctx, (const u_int8_t *)zeros, sizeof (zeros)); - - // we have to do a byte order correction here because the sha1 math is being done internally - // as u_int32_t, not a stream of bytes. Since we maintain our data as a byte stream, we need - // to convert - - u_int32_t* finger = (u_int32_t*) b; - - unsigned j; - for (j = 0; j < kBlockSize / sizeof (u_int32_t); ++j) - { - *finger++ = OSSwapHostToBigInt32(sha1_ctx.h.b32[j]); - } - - // calculate the CRC-32 of the block - u_int32_t new_crc = CalculateCRC(sha1_ctx.h.b8, sizeof (Block)); - - // make sure we don't repeat - int cmp = new_crc == gLastBlockChecksum; - gLastBlockChecksum = new_crc; - if (!g_SelfTestInitialized) - { - g_SelfTestInitialized = 1; - return; - } - else if (!cmp) - { - return; - } - - repeatCount += 1; - - // fix up the next value of g_xkey - add_blocks (g_xkey, b, 1); - } while (repeatCount < 2); - - /* - * If we got here, three sucessive checksums of the random number - * generator have been the same. Since the odds of this happening are - * 1 in 18,446,744,073,709,551,616, (1 in 18 quintillion) one of the following has - * most likely happened: - * - * 1: There is a significant bug in this code. - * 2: There has been a massive system failure. - * 3: The universe has ceased to exist. - * - * There is no good way to recover from any of these cases. We - * therefore panic. - */ - - panic("FIPS random self-test failed."); -} - -/* - *Initialize ONLY the Yarrow generator. - */ -void -PreliminarySetup(void) -{ - prng_error_status perr; - - /* Multiple threads can enter this as a result of an earlier - * check of gYarrowMutex. We make sure that only one of them - * can enter at a time. If one of them enters and discovers - * that gYarrowMutex is no longer NULL, we know that another - * thread has initialized the Yarrow state and we can exit. - */ - - /* The first thread that enters this function will find - * gYarrowInitializationLock set to 0. It will atomically - * set the value to 1 and, seeing that it was zero, drop - * out of the loop. Other threads will see that the value is - * 1 and continue to loop until we are initialized. - */ - - while (OSTestAndSet(0, &gYarrowInitializationLock)); /* serialize access to this function */ - - if (gYarrowMutex) { - /* we've already been initialized, clear and get out */ - goto function_exit; - } - - /* create a Yarrow object */ - perr = prngInitialize(&gPrngRef); - if (perr != 0) { - printf ("Couldn't initialize Yarrow, /dev/random will not work.\n"); - return; - } - - /* clear the error flag, reads and write should then work */ - gRandomError = 0; - - uint64_t tt; - char buffer [16]; - - /* get a little non-deterministic data as an initial seed. */ - /* On OSX, securityd will add much more entropy as soon as it */ - /* comes up. On iOS, entropy is added with each system interrupt. */ - tt = early_random(); - - perr = prngInput(gPrngRef, (BYTE*) &tt, sizeof (tt), SYSTEM_SOURCE, 8); - if (perr != 0) { - /* an error, complain */ - printf ("Couldn't seed Yarrow.\n"); - goto function_exit; - } - - /* turn the data around */ - perr = prngOutput(gPrngRef, (BYTE*) buffer, sizeof (buffer)); - - /* and scramble it some more */ - perr = prngForceReseed(gPrngRef, RESEED_TICKS); - - /* make a mutex to control access */ - gYarrowGrpAttr = lck_grp_attr_alloc_init(); - gYarrowGrp = lck_grp_alloc_init("random", gYarrowGrpAttr); - gYarrowAttr = lck_attr_alloc_init(); - gYarrowMutex = lck_mtx_alloc_init(gYarrowGrp, gYarrowAttr); - - fips_initialize (); - -function_exit: - /* allow other threads to figure out whether or not we have been initialized. */ - gYarrowInitializationLock = 0; -} - -const Block kKnownAnswer = {0x92, 0xb4, 0x04, 0xe5, 0x56, 0x58, 0x8c, 0xed, 0x6c, 0x1a, 0xcd, 0x4e, 0xbf, 0x05, 0x3f, 0x68, 0x09, 0xf7, 0x3a, 0x93}; - -void -fips_initialize(void) -{ - /* So that we can do the self test, set the seed to zero */ - memset(&g_xkey, 0, sizeof(g_xkey)); - - /* other initializations */ - memset (zeros, 0, sizeof (zeros)); - g_bytes_used = 0; - random_block(g_random_data, FALSE); - - // check here to see if we got the initial data we were expecting - if (memcmp(kKnownAnswer, g_random_data, kBlockSize) != 0) - { - panic("FIPS random self test failed"); - } - - // now do the random block again to make sure that userland doesn't get predicatable data - random_block(g_random_data, TRUE); -} - /* * Called to initialize our device, * and to register ourselves with devfs @@ -409,31 +87,21 @@ random_init(void) { int ret; - if (OSTestAndSet(0, &gRandomInstalled)) { - /* do this atomically so that it works correctly with - multiple threads */ - return; - } - ret = cdevsw_add(RANDOM_MAJOR, &random_cdevsw); if (ret < 0) { - printf("random_init: failed to allocate a major number!\n"); - gRandomInstalled = 0; - return; + panic("random_init: failed to allocate a major number!"); } - devfs_make_node(makedev (ret, 0), DEVFS_CHAR, + devfs_make_node(makedev (ret, RANDOM_MINOR), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, "random", 0); /* * also make urandom * (which is exactly the same thing in our context) */ - devfs_make_node(makedev (ret, 1), DEVFS_CHAR, + devfs_make_node(makedev (ret, URANDOM_MINOR), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, "urandom", 0); - /* setup yarrow and the mutex if needed*/ - PreliminarySetup(); } int @@ -459,11 +127,6 @@ random_ioctl( __unused dev_t dev, u_long cmd, __unused caddr_t data, int random_open(__unused dev_t dev, int flags, __unused int devtype, __unused struct proc *p) { - if (gRandomError != 0) { - /* forget it, yarrow didn't come up */ - return (ENOTSUP); - } - /* * if we are being opened for write, * make sure that we have privledges do so @@ -497,80 +160,29 @@ random_close(__unused dev_t dev, __unused int flags, __unused int mode, __unused * prng. */ int -random_write (__unused dev_t dev, struct uio *uio, __unused int ioflag) +random_write (dev_t dev, struct uio *uio, __unused int ioflag) { int retCode = 0; char rdBuffer[256]; - if (gRandomError != 0) { - return (ENOTSUP); - } - - /* get control of the Yarrow instance, Yarrow is NOT thread safe */ - lck_mtx_lock(gYarrowMutex); - + if (minor(dev) != RANDOM_MINOR) + return EPERM; + /* Security server is sending us entropy */ while (uio_resid(uio) > 0 && retCode == 0) { /* get the user's data */ - int bytesToInput = min(uio_resid(uio), sizeof (rdBuffer)); + int bytesToInput = MIN(uio_resid(uio), + (user_ssize_t) sizeof(rdBuffer)); retCode = uiomove(rdBuffer, bytesToInput, uio); if (retCode != 0) - goto /*ugh*/ error_exit; - - /* put it in Yarrow */ - if (prngInput(gPrngRef, (BYTE*) rdBuffer, - bytesToInput, SYSTEM_SOURCE, - bytesToInput * 8) != 0) { - retCode = EIO; - goto error_exit; - } - } - - /* force a reseed */ - if (prngForceReseed(gPrngRef, RESEED_TICKS) != 0) { - retCode = EIO; - goto error_exit; - } - - /* retCode should be 0 at this point */ - -error_exit: /* do this to make sure the mutex unlocks. */ - lck_mtx_unlock(gYarrowMutex); - return (retCode); -} - - -/* export good random numbers to the rest of the kernel */ -void -read_random(void* buffer, u_int numbytes) -{ - if (gYarrowMutex == 0) { /* are we initialized? */ - PreliminarySetup (); - } - - lck_mtx_lock(gYarrowMutex); - - - int bytes_read = 0; - - int bytes_remaining = numbytes; - while (bytes_remaining > 0) { - int bytes_to_read = min(bytes_remaining, kBlockSize - g_bytes_used); - if (bytes_to_read == 0) - { - random_block(g_random_data, TRUE); - g_bytes_used = 0; - bytes_to_read = min(bytes_remaining, kBlockSize); - } - - memmove ((u_int8_t*) buffer + bytes_read, ((u_int8_t*)g_random_data)+ g_bytes_used, bytes_to_read); - g_bytes_used += bytes_to_read; - bytes_read += bytes_to_read; - bytes_remaining -= bytes_to_read; + break; + retCode = write_random(rdBuffer, bytesToInput); + if (retCode != 0) + break; } - lck_mtx_unlock(gYarrowMutex); + return retCode; } /* @@ -579,30 +191,25 @@ read_random(void* buffer, u_int numbytes) int random_read(__unused dev_t dev, struct uio *uio, __unused int ioflag) { - int retCode = 0; - - if (gRandomError != 0) - return (ENOTSUP); - - char buffer[64]; + int retCode = 0; + char buffer[512]; user_ssize_t bytes_remaining = uio_resid(uio); - while (bytes_remaining > 0 && retCode == 0) { - user_ssize_t bytesToRead = min(sizeof(buffer), bytes_remaining); - read_random(buffer, bytesToRead); - retCode = uiomove(buffer, bytesToRead, uio); - - if (retCode != 0) - goto error_exit; + while (bytes_remaining > 0 && retCode == 0) { + int bytesToRead = MIN(bytes_remaining, + (user_ssize_t) sizeof(buffer)); + read_random(buffer, bytesToRead); + + retCode = uiomove(buffer, bytesToRead, uio); + if (retCode != 0) + break; bytes_remaining = uio_resid(uio); - } - - retCode = 0; + } -error_exit: - return retCode; + return retCode; } + /* * Return an u_int32_t pseudo-random number. */ diff --git a/bsd/dev/unix_startup.c b/bsd/dev/unix_startup.c index 0f36920f0..ddb2baa68 100644 --- a/bsd/dev/unix_startup.c +++ b/bsd/dev/unix_startup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -328,15 +328,18 @@ bsd_scale_setup(int scale) desiredvnodes = maxfiles; vnodes_sized = 1; if (scale > 4) { - /* clip them at 32G level */ + /* clip somaxconn at 32G level */ somaxconn = 2048; - /* 64G or more the hash size is 32k */ + /* + * For scale > 4 (> 32G), clip + * tcp_tcbhashsize to 32K + */ + tcp_tcbhashsize = 32 *1024; + if (scale > 7) { /* clip at 64G level */ - tcp_tcbhashsize = 16 *1024; max_cached_sock_count = 165000; } else { - tcp_tcbhashsize = 32 *1024; max_cached_sock_count = 60000 + ((scale-1) * 15000); } } else { diff --git a/bsd/dev/vn/vn.c b/bsd/dev/vn/vn.c index c5f9eae81..457e58370 100644 --- a/bsd/dev/vn/vn.c +++ b/bsd/dev/vn/vn.c @@ -492,7 +492,6 @@ vnread(dev_t dev, struct uio *uio, int ioflag) { struct vfs_context context; int error = 0; - boolean_t funnel_state; off_t offset; proc_t p; user_ssize_t resid; @@ -504,7 +503,6 @@ vnread(dev_t dev, struct uio *uio, int ioflag) return (ENXIO); } p = current_proc(); - funnel_state = thread_funnel_set(kernel_flock, TRUE); vn = vn_table + unit; if ((vn->sc_flags & VNF_INITED) == 0) { error = ENXIO; @@ -560,7 +558,6 @@ vnread(dev_t dev, struct uio *uio, int ioflag) } vnode_put(vn->sc_vp); done: - (void) thread_funnel_set(kernel_flock, funnel_state); return (error); } @@ -569,7 +566,6 @@ vnwrite(dev_t dev, struct uio *uio, int ioflag) { struct vfs_context context; int error; - boolean_t funnel_state; off_t offset; proc_t p; user_ssize_t resid; @@ -581,7 +577,6 @@ vnwrite(dev_t dev, struct uio *uio, int ioflag) return (ENXIO); } p = current_proc(); - funnel_state = thread_funnel_set(kernel_flock, TRUE); vn = vn_table + unit; if ((vn->sc_flags & VNF_INITED) == 0) { error = ENXIO; @@ -640,7 +635,6 @@ vnwrite(dev_t dev, struct uio *uio, int ioflag) } vnode_put(vn->sc_vp); done: - (void) thread_funnel_set(kernel_flock, funnel_state); return (error); } @@ -782,12 +776,10 @@ vnstrategy(struct buf *bp) int error = 0; long sz; /* in sc_secsize chunks */ daddr64_t blk_num; - boolean_t funnel_state; struct vnode * shadow_vp = NULL; struct vnode * vp = NULL; struct vfs_context context; - funnel_state = thread_funnel_set(kernel_flock, TRUE); vn = vn_table + vnunit(buf_device(bp)); if ((vn->sc_flags & VNF_INITED) == 0) { error = ENXIO; @@ -859,7 +851,6 @@ vnstrategy(struct buf *bp) } done: - (void) thread_funnel_set(kernel_flock, funnel_state); if (error) { buf_seterror(bp, error); } @@ -881,15 +872,13 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, int unit; struct vfsioattr ioattr; struct vn_ioctl_64 user_vnio; - boolean_t funnel_state; - struct vfs_context context; + struct vfs_context context; unit = vnunit(dev); if (vnunit(dev) >= NVNDEVICE) { return (ENXIO); } - funnel_state = thread_funnel_set(kernel_flock, TRUE); vn = vn_table + unit; error = proc_suser(p); if (error) { @@ -1102,7 +1091,6 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, break; } done: - (void) thread_funnel_set(kernel_flock, funnel_state); return(error); } @@ -1336,20 +1324,18 @@ vnsize(dev_t dev) int secsize; struct vn_softc *vn; int unit; - boolean_t funnel_state; unit = vnunit(dev); if (vnunit(dev) >= NVNDEVICE) { return (-1); } - funnel_state = thread_funnel_set(kernel_flock, TRUE); vn = vn_table + unit; if ((vn->sc_flags & VNF_INITED) == 0) secsize = -1; else secsize = vn->sc_secsize; - (void) thread_funnel_set(kernel_flock, funnel_state); + return (secsize); } diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h index eb0ab4a16..0f3771a22 100644 --- a/bsd/hfs/hfs.h +++ b/bsd/hfs/hfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -67,6 +67,7 @@ #include #include #include +#include #if CONFIG_PROTECT /* Forward declare the cprotect struct */ @@ -145,9 +146,20 @@ typedef struct hfsmount { /* Physical Description */ u_int32_t hfs_logical_block_size; /* Logical block size of the disk as reported by ioctl(DKIOCGETBLOCKSIZE), always a multiple of 512 */ - daddr64_t hfs_logical_block_count; /* Number of logical blocks on the disk */ + daddr64_t hfs_logical_block_count; /* Number of logical blocks on the disk, as reported by ioctl(DKIOCGETBLOCKCOUNT) */ u_int64_t hfs_logical_bytes; /* Number of bytes on the disk device this HFS is mounted on (blockcount * blocksize) */ - daddr64_t hfs_alt_id_sector; /* location of alternate VH/MDB */ + /* + * Regarding the two AVH sector fields below: + * Under normal circumstances, the filesystem's notion of the "right" location for the AVH is such that + * the partition and filesystem's are in sync. However, during a filesystem resize, HFS proactively + * writes a new AVH at the end of the filesystem, assuming that the partition will be resized accordingly. + * + * However, it is not technically a corruption if the partition size is never modified. As a result, we need + * to keep two copies of the AVH around "just in case" the partition size is not modified. + */ + daddr64_t hfs_partition_avh_sector; /* location of Alt VH w.r.t partition size */ + daddr64_t hfs_fs_avh_sector; /* location of Alt VH w.r.t filesystem size */ + u_int32_t hfs_physical_block_size; /* Physical block size of the disk as reported by ioctl(DKIOCGETPHYSICALBLOCKSIZE) */ u_int32_t hfs_log_per_phys; /* Number of logical blocks per physical block size */ @@ -250,16 +262,16 @@ typedef struct hfsmount { struct quotafile hfs_qfiles[MAXQUOTAS]; /* quota files */ /* Journaling variables: */ - void *jnl; // the journal for this volume (if one exists) + struct journal *jnl; // the journal for this volume (if one exists) struct vnode *jvp; // device where the journal lives (may be equal to devvp) u_int32_t jnl_start; // start block of the journal file (so we don't delete it) u_int32_t jnl_size; u_int32_t hfs_jnlfileid; u_int32_t hfs_jnlinfoblkid; - lck_rw_t hfs_global_lock; + lck_rw_t hfs_global_lock; u_int32_t hfs_global_lock_nesting; - void* hfs_global_lockowner; - + thread_t hfs_global_lockowner; + /* Notification variables: */ u_int32_t hfs_notification_conditions; u_int32_t hfs_freespace_notify_dangerlimit; @@ -295,24 +307,50 @@ typedef struct hfsmount { /* Sparse device variables: */ struct vnode * hfs_backingfs_rootvp; u_int32_t hfs_last_backingstatfs; - int hfs_sparsebandblks; + u_int32_t hfs_sparsebandblks; u_int64_t hfs_backingfs_maxblocks; #endif size_t hfs_max_inline_attrsize; lck_mtx_t hfs_mutex; /* protects access to hfsmount data */ - void *hfs_freezing_proc; /* who froze the fs */ - void *hfs_downgrading_proc; /* process who's downgrading to rdonly */ - lck_rw_t hfs_insync; /* protects sync/freeze interaction */ + + uint32_t hfs_syncers; // Count of the number of syncers running + enum { + HFS_THAWED, + HFS_WANT_TO_FREEZE, // This state stops hfs_sync from starting + HFS_FREEZING, // We're in this state whilst we're flushing + HFS_FROZEN // Everything gets blocked in hfs_lock_global + } hfs_freeze_state; + union { + /* + * When we're freezing (HFS_FREEZING) but not yet + * frozen (HFS_FROZEN), we record the freezing thread + * so that we stop other threads from taking locks, + * but allow the freezing thread. + */ + const struct thread *hfs_freezing_thread; + /* + * Once we have frozen (HFS_FROZEN), we record the + * process so that if it dies, we can automatically + * unfreeze. + */ + proc_t hfs_freezing_proc; + }; + + thread_t hfs_downgrading_thread; /* thread who's downgrading to rdonly */ /* Resize variables: */ u_int32_t hfs_resize_blocksmoved; u_int32_t hfs_resize_totalblocks; u_int32_t hfs_resize_progress; #if CONFIG_PROTECT + /* Data Protection fields */ struct cprotect *hfs_resize_cpentry; u_int16_t hfs_running_cp_major_vers; - uint32_t default_cp_class; + uint32_t default_cp_class; /* default effective class value */ + uint64_t cproot_flags; + uint8_t cp_crypto_generation; + uint8_t hfs_cp_lock_state; /* per-mount device lock state info */ #endif @@ -397,6 +435,7 @@ static __inline__ Boolean IsVCBDirty(ExtendedVCB *vcb) enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS}; #define HFS_ALLOCATOR_SCAN_INFLIGHT 0x0001 /* scan started */ +#define HFS_ALLOCATOR_SCAN_COMPLETED 0x0002 /* initial scan was completed */ /* HFS mount point flags */ #define HFS_READ_ONLY 0x00001 @@ -428,9 +467,9 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS}; #define HFS_RDONLY_DOWNGRADE 0x80000 #define HFS_DID_CONTIG_SCAN 0x100000 #define HFS_UNMAP 0x200000 -#define HFS_SSD 0x400000 -#define HFS_SUMMARY_TABLE 0x800000 -#define HFS_CS 0x1000000 +#define HFS_SSD 0x400000 +#define HFS_SUMMARY_TABLE 0x800000 +#define HFS_CS 0x1000000 /* Macro to update next allocation block in the HFS mount structure. If @@ -554,6 +593,11 @@ enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 }; */ #define MAC_GMT_FACTOR 2082844800UL +static inline __attribute__((const)) +uint64_t hfs_blk_to_bytes(uint32_t blk, uint32_t blk_size) +{ + return (uint64_t)blk * blk_size; // Avoid the overflow +} /***************************************************************************** FUNCTION PROTOTYPES @@ -563,6 +607,7 @@ enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 }; hfs_vnop_xxx functions from different files ******************************************************************************/ int hfs_vnop_readdirattr(struct vnop_readdirattr_args *); /* in hfs_attrlist.c */ +int hfs_vnop_getattrlistbulk(struct vnop_getattrlistbulk_args *); /* in hfs_attrlist.c */ int hfs_vnop_inactive(struct vnop_inactive_args *); /* in hfs_cnode.c */ int hfs_vnop_reclaim(struct vnop_reclaim_args *); /* in hfs_cnode.c */ @@ -657,10 +702,11 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp); extern int hfs_relocate(struct vnode *, u_int32_t, kauth_cred_t, struct proc *); /* Flags for HFS truncate */ -#define HFS_TRUNCATE_SKIPUPDATE 0x00000001 -#define HFS_TRUNCATE_SKIPTIMES 0x00000002 /* implied by skipupdate; it is a subset */ +#define HFS_TRUNCATE_SKIPUPDATE 0x00000001 +#define HFS_TRUNCATE_SKIPTIMES 0x00000002 /* implied by skipupdate; it is a subset */ + -extern int hfs_truncate(struct vnode *, off_t, int, int, int, vfs_context_t); +extern int hfs_truncate(struct vnode *, off_t, int, int, vfs_context_t); extern int hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, struct filefork *rsrcfork, u_int32_t fileid); @@ -683,6 +729,14 @@ extern int hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t numBlocks, u_int32_t *alloc_count); extern int hfs_isrbtree_active (struct hfsmount *hfsmp); +extern errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock); + + +/***************************************************************************** + Functions from hfs_resize.c +******************************************************************************/ +int hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context); +int hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context); /***************************************************************************** @@ -710,7 +764,22 @@ extern int hfs_resize_progress(struct hfsmount *, u_int32_t *); /* If a runtime corruption is detected, mark the volume inconsistent * bit in the volume attributes. */ -void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp); + +typedef enum { + HFS_INCONSISTENCY_DETECTED, + + // Used when unable to rollback an operation that failed + HFS_ROLLBACK_FAILED, + + // Used when the latter part of an operation failed, but we chose not to roll back + HFS_OP_INCOMPLETE, + + // Used when someone told us to force an fsck on next mount + HFS_FSCK_FORCED, +} hfs_inconsistency_reason_t; + +void hfs_mark_inconsistent(struct hfsmount *hfsmp, + hfs_inconsistency_reason_t reason); void hfs_scan_blocks (struct hfsmount *hfsmp); @@ -730,7 +799,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, extern int hfsUnmount(struct hfsmount *hfsmp, struct proc *p); -extern int overflow_extents(struct filefork *fp); +extern bool overflow_extents(struct filefork *fp); extern int hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred, struct proc *p, int invokesuperuserstatus); @@ -739,6 +808,8 @@ extern int check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_ty extern int check_for_dataless_file(struct vnode *vp, uint64_t op_type); extern int hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid); +/* Return information about number of metadata blocks for volume */ +extern int hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo); /* * Journal lock function prototypes @@ -757,7 +828,8 @@ void hfs_unlock_mount (struct hfsmount *hfsmp); #define SFL_BITMAP 0x0004 #define SFL_ATTRIBUTE 0x0008 #define SFL_STARTUP 0x0010 -#define SFL_VALIDMASK (SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE | SFL_STARTUP) +#define SFL_VM_PRIV 0x0020 +#define SFL_VALIDMASK (SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE | SFL_STARTUP | SFL_VM_PRIV) extern int hfs_systemfile_lock(struct hfsmount *, int, enum hfs_locktype); extern void hfs_systemfile_unlock(struct hfsmount *, int); @@ -791,6 +863,8 @@ extern int hfs_virtualmetafile(struct cnode *); extern int hfs_start_transaction(struct hfsmount *hfsmp); extern int hfs_end_transaction(struct hfsmount *hfsmp); +extern void hfs_journal_lock(struct hfsmount *hfsmp); +extern void hfs_journal_unlock(struct hfsmount *hfsmp); extern int hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO); extern void hfs_syncer_lock(struct hfsmount *hfsmp); extern void hfs_syncer_unlock(struct hfsmount *hfsmp); @@ -806,6 +880,9 @@ extern int hfs_erase_unused_nodes(struct hfsmount *hfsmp); extern uint64_t hfs_usecs_to_deadline(uint64_t usecs); +extern int hfs_freeze(struct hfsmount *hfsmp); +extern int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process); + /***************************************************************************** Functions from hfs_vnops.c @@ -823,7 +900,7 @@ extern int hfs_btsync(struct vnode *vp, int sync_transaction); extern void replace_desc(struct cnode *cp, struct cat_desc *cdp); extern int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, - struct vnode **rvpp, int can_drop_lock, int error_on_unlink); + struct vnode **rvpp); extern int hfs_update(struct vnode *, int); @@ -846,11 +923,12 @@ int hfs_buildattrkey(u_int32_t fileID, const char *attrname, HFSPlusAttrKey *ke void hfs_xattr_init(struct hfsmount * hfsmp); int file_attribute_exist(struct hfsmount *hfsmp, uint32_t fileID); int init_attrdata_vnode(struct hfsmount *hfsmp); -int hfs_getxattr_internal(struct cnode *, struct vnop_getxattr_args *, - struct hfsmount *, u_int32_t); -int hfs_setxattr_internal(struct cnode *, caddr_t, size_t, - struct vnop_setxattr_args *, struct hfsmount *, u_int32_t); - +int hfs_xattr_read(vnode_t vp, const char *name, void *data, size_t *size); +int hfs_getxattr_internal(cnode_t *, struct vnop_getxattr_args *, + struct hfsmount *, u_int32_t); +int hfs_xattr_write(vnode_t vp, const char *name, const void *data, size_t size); +int hfs_setxattr_internal(struct cnode *, const void *, size_t, + struct vnop_setxattr_args *, struct hfsmount *, u_int32_t); /***************************************************************************** diff --git a/bsd/hfs/hfs_attrlist.c b/bsd/hfs/hfs_attrlist.c index c57f811e2..cb14646e1 100644 --- a/bsd/hfs/hfs_attrlist.c +++ b/bsd/hfs/hfs_attrlist.c @@ -71,6 +71,25 @@ static void packdirattr(struct attrblock *abp, struct hfsmount *hfsmp, static u_int32_t hfs_real_user_access(vnode_t vp, vfs_context_t ctx); +static void get_vattr_data_for_attrs(struct attrlist *, struct vnode_attr *, + struct hfsmount *, struct vnode *, struct cat_desc *, struct cat_attr *, + struct cat_fork *, struct cat_fork *, vfs_context_t); + +static void vattr_data_for_common_attrs(struct attrlist *, struct vnode_attr *, + struct hfsmount *, struct vnode *, struct cat_desc *, struct cat_attr *, + vfs_context_t); + +static void vattr_data_for_dir_attrs(struct attrlist *, struct vnode_attr *, + struct hfsmount *, struct vnode *, struct cat_desc *, struct cat_attr *); + +static void vattr_data_for_file_attrs(struct attrlist *, struct vnode_attr *, + struct hfsmount *, struct cat_attr *, struct cat_fork *, struct cat_fork *, + struct vnode *vp); + +static int hfs_readdirattr_internal(struct vnode *, struct attrlist *, + struct vnode_attr *, uio_t, uint64_t, int, uint32_t *, int *, int *, + vfs_context_t); + /* * readdirattr operation will return attributes for the items in the * directory specified. @@ -95,22 +114,94 @@ hfs_vnop_readdirattr(ap) vfs_context_t a_context; } */ *ap; { - struct vnode *dvp = ap->a_vp; + int error; + struct attrlist *alist = ap->a_alist; + + /* Check for invalid options and buffer space. */ + if (((ap->a_options & ~(FSOPT_NOINMEMUPDATE | FSOPT_NOFOLLOW)) != 0) || + (ap->a_maxcount <= 0)) { + return (EINVAL); + } + /* + * Reject requests for unsupported attributes. + */ + if ((alist->bitmapcount != ATTR_BIT_MAP_COUNT) || + (alist->commonattr & ~HFS_ATTR_CMN_VALID) || + (alist->volattr != 0) || + (alist->dirattr & ~HFS_ATTR_DIR_VALID) || + (alist->fileattr & ~HFS_ATTR_FILE_VALID) || + (alist->forkattr != 0)) { + return (EINVAL); + } + + error = hfs_readdirattr_internal(ap->a_vp, alist, NULL, ap->a_uio, + (uint64_t)ap->a_options, ap->a_maxcount, ap->a_newstate, + ap->a_eofflag, (int *)ap->a_actualcount, ap->a_context); + + return (error); +} + + +/* + * getattrlistbulk, like readdirattr, will return attributes for the items in + * the directory specified. + * + * It does not do . and .. entries. The problem is if you are at the root of the + * hfs directory and go to .. you could be crossing a mountpoint into a + * different (ufs) file system. The attributes that apply for it may not + * apply for the file system you are doing the readdirattr on. To make life + * simpler, this call will only return entries in its directory, hfs like. + */ +int +hfs_vnop_getattrlistbulk(ap) + struct vnop_getattrlistbulk_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct attrlist *a_alist; + struct vnode_attr *a_vap; + struct uio *a_uio; + void *a_private; + uint64_t a_options; + int32_t *a_eofflag; + int32_t *a_actualcount; + vfs_context_t a_context; + } */ *ap; +{ + int error = 0; + + error = hfs_readdirattr_internal(ap->a_vp, ap->a_alist, ap->a_vap, + ap->a_uio, (uint64_t)ap->a_options, 0, NULL, ap->a_eofflag, + (int *)ap->a_actualcount, ap->a_context); + + return (error); +} + +/* + * Common function for both hfs_vnop_readdirattr and hfs_vnop_getattrlistbulk. + * This either fills in a vnode_attr structure or fills in an attrbute buffer + * Currently the difference in behaviour required for the two vnops is keyed + * on whether the passed in vnode_attr pointer is null or not. If the pointer + * is null we fill in buffer passed and if it is not null we fill in the fields + * of the vnode_attr structure. + */ +int +hfs_readdirattr_internal(struct vnode *dvp, struct attrlist *alist, + struct vnode_attr *vap, uio_t uio, uint64_t options, int maxcount, + uint32_t *newstate, int *eofflag, int *actualcount, vfs_context_t ctx) +{ struct cnode *dcp; struct hfsmount * hfsmp; - struct attrlist *alist = ap->a_alist; - uio_t uio = ap->a_uio; - int maxcount = ap->a_maxcount; u_int32_t fixedblocksize; u_int32_t maxattrblocksize; u_int32_t currattrbufsize; void *attrbufptr = NULL; - void *attrptr; - void *varptr; + void *attrptr = NULL; + void *varptr = NULL; + caddr_t namebuf = NULL; struct attrblock attrblk; int error = 0; int index = 0; - int i, dir_entries = 0; + int i = 0; struct cat_desc *lastdescp = NULL; struct cat_entrylist *ce_list = NULL; directoryhint_t *dirhint = NULL; @@ -118,26 +209,13 @@ hfs_vnop_readdirattr(ap) int maxentries; int lockflags; u_int32_t dirchg = 0; + int reachedeof = 0; - *(ap->a_actualcount) = 0; - *(ap->a_eofflag) = 0; + *(actualcount) = 0; + *(eofflag) = 0; - /* Check for invalid options and buffer space. */ - if (((ap->a_options & ~(FSOPT_NOINMEMUPDATE | FSOPT_NOFOLLOW)) != 0) || - (uio_resid(uio) <= 0) || (uio_iovcnt(uio) > 1) || (maxcount <= 0)) { + if ((uio_resid(uio) <= 0) || (uio_iovcnt(uio) > 1)) return (EINVAL); - } - /* - * Reject requests for unsupported attributes. - */ - if ((alist->bitmapcount != ATTR_BIT_MAP_COUNT) || - (alist->commonattr & ~HFS_ATTR_CMN_VALID) || - (alist->volattr != 0) || - (alist->dirattr & ~HFS_ATTR_DIR_VALID) || - (alist->fileattr & ~HFS_ATTR_FILE_VALID) || - (alist->forkattr != 0)) { - return (EINVAL); - } if (VTOC(dvp)->c_bsdflags & UF_COMPRESSED) { int compressed = hfs_file_is_compressed(VTOC(dvp), 0); /* 0 == take the cnode lock */ @@ -150,7 +228,6 @@ hfs_vnop_readdirattr(ap) } } - /* * Take an exclusive directory lock since we manipulate the directory hints */ @@ -160,31 +237,45 @@ hfs_vnop_readdirattr(ap) dcp = VTOC(dvp); hfsmp = VTOHFS(dvp); - dir_entries = dcp->c_entries; dirchg = dcp->c_dirchangecnt; /* Extract directory index and tag (sequence number) from uio_offset */ index = uio_offset(uio) & HFS_INDEX_MASK; tag = uio_offset(uio) & ~HFS_INDEX_MASK; - if ((index + 1) > dir_entries) { - *(ap->a_eofflag) = 1; - error = 0; - goto exit2; - } + + /* + * We can't just use the valence as an optimization to avoid + * going to the catalog. It might be wrong (== 0), and that would + * cause us to avoid iterating the directory when it might actually have + * contents. Instead, use the catalog to tell us when we've hit EOF + * for this directory + */ /* Get a buffer to hold packed attributes. */ fixedblocksize = (sizeof(u_int32_t) + hfs_attrblksize(alist)); /* 4 bytes for length */ - maxattrblocksize = fixedblocksize; - if (alist->commonattr & ATTR_CMN_NAME) - maxattrblocksize += kHFSPlusMaxFileNameBytes + 1; - MALLOC(attrbufptr, void *, maxattrblocksize, M_TEMP, M_WAITOK); - if (attrbufptr == NULL) { - error = ENOMEM; - goto exit2; - } - attrptr = attrbufptr; - varptr = (char *)attrbufptr + fixedblocksize; /* Point to variable-length storage */ + if (!vap) { + maxattrblocksize = fixedblocksize; + if (alist->commonattr & ATTR_CMN_NAME) + maxattrblocksize += kHFSPlusMaxFileNameBytes + 1; + + MALLOC(attrbufptr, void *, maxattrblocksize, M_TEMP, M_WAITOK); + if (attrbufptr == NULL) { + error = ENOMEM; + goto exit2; + } + attrptr = attrbufptr; + varptr = (char *)attrbufptr + fixedblocksize; /* Point to variable-length storage */ + } else { + if ((alist->commonattr & ATTR_CMN_NAME) && !vap->va_name) { + MALLOC(namebuf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK); + if (!namebuf) { + error = ENOMEM; + goto exit2; + } + vap->va_name = namebuf; + } + } /* Get a detached directory hint (cnode must be locked exclusive) */ dirhint = hfs_getdirhint(dcp, ((index - 1) & HFS_INDEX_MASK) | tag, TRUE); @@ -203,7 +294,9 @@ hfs_vnop_readdirattr(ap) * Constrain our list size. */ maxentries = uio_resid(uio) / (fixedblocksize + HFS_AVERAGE_NAME_SIZE); - maxentries = min(maxentries, maxcount); + /* There is maxcount for the bulk vnop */ + if (!vap) + maxentries = min(maxentries, maxcount); maxentries = min(maxentries, MAXCATENTRIES); if (maxentries < 1) { error = EINVAL; @@ -224,19 +317,33 @@ hfs_vnop_readdirattr(ap) */ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_getentriesattr(hfsmp, dirhint, ce_list); + error = cat_getentriesattr(hfsmp, dirhint, ce_list, &reachedeof); /* Don't forget to release the descriptors later! */ hfs_systemfile_unlock(hfsmp, lockflags); - if (error == ENOENT) { - *(ap->a_eofflag) = TRUE; + if ((error == ENOENT) || (reachedeof != 0)) { + *(eofflag) = TRUE; error = 0; } if (error) { goto exit1; } + /* + * Check for a FS corruption in the valence. We're holding the cnode lock + * exclusive since we need to serialize the directory hints, so if we found + * that the valence reported 0, but we actually found some items here, then + * silently minimally self-heal and bump the valence to 1. + */ + if ((dcp->c_entries == 0) && (ce_list->realentries > 0)) { + dcp->c_entries++; + dcp->c_flag |= (C_MODIFIED | C_FORCEUPDATE); + printf("hfs_vnop_readdirattr: repairing valence to non-zero! \n"); + /* force an update on dcp while we're still holding the lock. */ + hfs_update(dvp, 0); + } + /* * Drop the directory lock so we don't deadlock when we: * - acquire a child cnode lock @@ -264,15 +371,20 @@ hfs_vnop_readdirattr(ap) c_rsrcfork.cf_size = ce_list->entry[i].ce_rsrcsize; c_rsrcfork.cf_blocks = ce_list->entry[i].ce_rsrcblks; - if ((alist->commonattr & ATTR_CMN_USERACCESS) && - (cattrp->ca_recflags & kHFSHasSecurityMask)) { + if (((alist->commonattr & ATTR_CMN_USERACCESS) && + (cattrp->ca_recflags & kHFSHasSecurityMask)) +#if CONFIG_PROTECT + || + ((alist->commonattr & ATTR_CMN_DATA_PROTECT_FLAGS) && (vap)) +#endif + ) { /* * Obtain vnode for our vnode_authorize() calls. */ if (hfs_vget(hfsmp, cattrp->ca_fileid, &vp, 0, 0) != 0) { vp = NULL; } - } else if (!(ap->a_options & FSOPT_NOINMEMUPDATE)) { + } else if (vap || !(options & FSOPT_NOINMEMUPDATE)) { /* Get in-memory cnode data (if any). */ vp = hfs_chash_getvnode(hfsmp, cattrp->ca_fileid, 0, 0, 0); } @@ -295,58 +407,97 @@ hfs_vnop_readdirattr(ap) cp = NULL; } - *((u_int32_t *)attrptr) = 0; - attrptr = ((u_int32_t *)attrptr) + 1; - attrblk.ab_attrlist = alist; - attrblk.ab_attrbufpp = &attrptr; - attrblk.ab_varbufpp = &varptr; - attrblk.ab_flags = 0; - attrblk.ab_blocksize = maxattrblocksize; - attrblk.ab_context = ap->a_context; - - /* Pack catalog entries into attribute buffer. */ - hfs_packattrblk(&attrblk, hfsmp, vp, cdescp, cattrp, &c_datafork, &c_rsrcfork, ap->a_context); - currattrbufsize = ((char *)varptr - (char *)attrbufptr); - - /* All done with vnode. */ - if (vp != NULL) { - vnode_put(vp); - vp = NULL; - } + if (!vap) { + *((u_int32_t *)attrptr) = 0; + attrptr = ((u_int32_t *)attrptr) + 1; + attrblk.ab_attrlist = alist; + attrblk.ab_attrbufpp = &attrptr; + attrblk.ab_varbufpp = &varptr; + attrblk.ab_flags = 0; + attrblk.ab_blocksize = maxattrblocksize; + attrblk.ab_context = ctx; + + /* Pack catalog entries into attribute buffer. */ + hfs_packattrblk(&attrblk, hfsmp, vp, cdescp, cattrp, &c_datafork, &c_rsrcfork, ctx); + currattrbufsize = ((char *)varptr - (char *)attrbufptr); + + /* All done with vnode. */ + if (vp != NULL) { + vnode_put(vp); + vp = NULL; + } - /* Make sure there's enough buffer space remaining. */ - // LP64todo - fix this! - if (uio_resid(uio) < 0 || currattrbufsize > (u_int32_t)uio_resid(uio)) { - break; - } else { - *((u_int32_t *)attrbufptr) = currattrbufsize; - error = uiomove((caddr_t)attrbufptr, currattrbufsize, uio); - if (error != E_NONE) { + /* Make sure there's enough buffer space remaining. */ + // LP64todo - fix this! + if (uio_resid(uio) < 0 || + currattrbufsize > (u_int32_t)uio_resid(uio)) { break; + } else { + *((u_int32_t *)attrbufptr) = currattrbufsize; + error = uiomove((caddr_t)attrbufptr, currattrbufsize, uio); + if (error != E_NONE) { + break; + } + attrptr = attrbufptr; + /* Point to variable-length storage */ + varptr = (char *)attrbufptr + fixedblocksize; + /* Save the last valid catalog entry */ + lastdescp = &ce_list->entry[i].ce_desc; + index++; + *actualcount += 1; + + /* Termination checks */ + if ((--maxcount <= 0) || + // LP64todo - fix this! + uio_resid(uio) < 0 || + ((u_int32_t)uio_resid(uio) < (fixedblocksize + HFS_AVERAGE_NAME_SIZE))){ + break; + } + } + } else { + size_t orig_resid = (size_t)uio_resid(uio); + size_t resid; + + get_vattr_data_for_attrs(alist, vap, hfsmp, vp, cdescp, + cattrp, &c_datafork, &c_rsrcfork, ctx); + +#if CONFIG_PROTECT + if ((alist->commonattr & ATTR_CMN_DATA_PROTECT_FLAGS) && + vp) { + int class; + + if (!cp_vnode_getclass(vp, &class)) { + VATTR_RETURN(vap, va_dataprotect_class, + (uint32_t)class); + } } - attrptr = attrbufptr; - /* Point to variable-length storage */ - varptr = (char *)attrbufptr + fixedblocksize; +#endif + error = vfs_attr_pack(vp, uio, alist, options, vap, + NULL, ctx); + + /* All done with vnode. */ + if (vp) { + vnode_put(vp); + vp = NULL; + } + + resid = uio_resid(uio); + + /* Was this entry succesful ? */ + if (error || resid == orig_resid) + break; + /* Save the last valid catalog entry */ lastdescp = &ce_list->entry[i].ce_desc; index++; - *ap->a_actualcount += 1; + *actualcount += 1; - /* Termination checks */ - if ((--maxcount <= 0) || - // LP64todo - fix this! - uio_resid(uio) < 0 || - ((u_int32_t)uio_resid(uio) < (fixedblocksize + HFS_AVERAGE_NAME_SIZE))){ + /* Do we have the bare minimum for the next entry ? */ + if (resid < sizeof(uint32_t)) break; - } } } /* for each catalog entry */ - /* For small directories, check if we're all done. */ - if (*ap->a_actualcount == (u_long)dir_entries) { - *(ap->a_eofflag) = TRUE; - } - /* If we skipped catalog entries for reserved files that should * not be listed in namespace, update the index accordingly. */ @@ -355,10 +506,14 @@ hfs_vnop_readdirattr(ap) ce_list->skipentries = 0; } - /* If there are more entries then save the last name. */ - if (index < dir_entries - && !(*(ap->a_eofflag)) - && lastdescp != NULL) { + /* + * If there are more entries then save the last name. + * Key this behavior based on whether or not we observed EOFFLAG. + * + * Do not use the valence as a way to determine if we hit EOF, since + * it can be wrong. Use the catalog's output only. + */ + if ((*(eofflag) == 0) && lastdescp != NULL) { /* Remember last entry */ if ((dirhint->dh_desc.cd_flags & CD_HASBUF) && @@ -374,11 +529,8 @@ hfs_vnop_readdirattr(ap) dirhint->dh_desc.cd_cnid = lastdescp->cd_cnid; dirhint->dh_desc.cd_hint = lastdescp->cd_hint; dirhint->dh_desc.cd_encoding = lastdescp->cd_encoding; - } else { - /* No more entries. */ - *(ap->a_eofflag) = TRUE; - } - + } + /* All done with the catalog descriptors. */ for (i = 0; i < (int)ce_list->realentries; ++i) cat_releasedesc(&ce_list->entry[i].ce_desc); @@ -394,20 +546,31 @@ exit1: dirhint->dh_index |= tag; exit2: - *ap->a_newstate = dirchg; + if (newstate) + *newstate = dirchg; - /* Drop directory hint on error or if there are no more entries */ + /* + * Drop directory hint on error or if there are no more entries, + * only if EOF was seen. + */ if (dirhint) { - if ((error != 0) || (index >= dir_entries) || *(ap->a_eofflag)) + if ((error != 0) || *(eofflag)) hfs_reldirhint(dcp, dirhint); else hfs_insertdirhint(dcp, dirhint); } + if (namebuf) { + FREE(namebuf, M_TEMP); + vap->va_name = NULL; + } if (attrbufptr) FREE(attrbufptr, M_TEMP); if (ce_list) FREE(ce_list, M_TEMP); + if (vap && *actualcount && error) + error = 0; + hfs_unlock(dcp); return (error); } @@ -527,7 +690,6 @@ packnameattr( *abp->ab_varbufpp = varbufptr; } - static void packcommonattr( struct attrblock *abp, @@ -742,7 +904,6 @@ packcommonattr( *((u_int32_t *)attrbufptr) = cap->ca_flags; attrbufptr = ((u_int32_t *)attrbufptr) + 1; } - if (ATTR_CMN_USERACCESS & attr) { u_int32_t user_access; @@ -1091,3 +1252,470 @@ Exit: return (permissions); } + +/* + * =========================================================================== + * Support functions for filling up a vnode_attr structure based on attributes + * requested. + * =========================================================================== + */ +void +get_vattr_data_for_attrs(struct attrlist *alp, struct vnode_attr *vap, + struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc *descp, + struct cat_attr *atrp, struct cat_fork *datafork, struct cat_fork *rsrcfork, + vfs_context_t ctx) +{ + if (alp->commonattr) + vattr_data_for_common_attrs(alp, vap, hfsmp, vp, descp, atrp, + ctx); + + if (alp->dirattr && S_ISDIR(atrp->ca_mode)) + vattr_data_for_dir_attrs(alp, vap, hfsmp, vp, descp, atrp); + + if (alp->fileattr && !S_ISDIR(atrp->ca_mode)) { + vattr_data_for_file_attrs(alp, vap, hfsmp, atrp, datafork, + rsrcfork, vp); + } +} + +static void +copy_name_attr(struct vnode_attr *vap, struct vnode *vp, const u_int8_t *name, + int namelen) +{ + char *mpname; + size_t mpnamelen; + u_int32_t attrlength; + u_int8_t empty = 0; + + /* A cnode's name may be incorrect for the root of a mounted + * filesystem (it can be mounted on a different directory name + * than the name of the volume, such as "blah-1"). So for the + * root directory, it's best to return the last element of the + location where the volume's mounted: + */ + if ((vp != NULL) && vnode_isvroot(vp) && + (mpname = mountpointname(vnode_mount(vp)))) { + mpnamelen = strlen(mpname); + + /* Trim off any trailing slashes: */ + while ((mpnamelen > 0) && (mpname[mpnamelen-1] == '/')) + --mpnamelen; + + /* If there's anything left, use it instead of the volume's name */ + if (mpnamelen > 0) { + name = (u_int8_t *)mpname; + namelen = mpnamelen; + } + } + + if (name == NULL) { + name = ∅ + namelen = 0; + } + + attrlength = namelen + 1; + (void) strncpy((char *)vap->va_name, (const char *) name, attrlength); + /* + * round upto 8 and zero out the rounded up bytes. + */ + attrlength = min(kHFSPlusMaxFileNameBytes, ((attrlength + 7) & ~0x07)); + bzero(vap->va_name + attrlength, kHFSPlusMaxFileNameBytes - attrlength); +} + +static void +vattr_data_for_common_attrs( struct attrlist *alp, struct vnode_attr *vap, + struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc *cdp, + struct cat_attr *cap, vfs_context_t ctx) +{ + attrgroup_t attr = alp->commonattr; + struct mount *mp = HFSTOVFS(hfsmp); + uid_t cuid = 1; + int isroot = 0; + + if (attr & (ATTR_CMN_OWNERID | ATTR_CMN_GRPID)) { + cuid = kauth_cred_getuid(vfs_context_ucred(ctx)); + isroot = cuid == 0; + } + + if (ATTR_CMN_NAME & attr) { + if (vap->va_name) { + copy_name_attr(vap, vp, cdp->cd_nameptr, + cdp->cd_namelen); + VATTR_SET_SUPPORTED(vap, va_name); + } else { + VATTR_CLEAR_SUPPORTED(vap, va_name); + } + } + + if (ATTR_CMN_DEVID & attr) { + vap->va_devid = hfsmp->hfs_raw_dev; + VATTR_SET_SUPPORTED(vap, va_devid); + } + + if (ATTR_CMN_FSID & attr) { + vap->va_fsid64.val[0] = hfsmp->hfs_raw_dev; + vap->va_fsid64.val[1] = vfs_typenum(mp); + VATTR_SET_SUPPORTED(vap, va_fsid64); + } + /* + * We always provide the objtype even if not asked because VFS helper + * functions depend on knowing the object's type. + */ + vap->va_objtype = IFTOVT(cap->ca_mode); + VATTR_SET_SUPPORTED(vap, va_objtype); + + if (ATTR_CMN_OBJTAG & attr) { + vap->va_objtag = VT_HFS; + VATTR_SET_SUPPORTED(vap, va_objtag); + } + /* + * Exporting file IDs from HFS Plus: + * + * For "normal" files the c_fileid is the same value as the + * c_cnid. But for hard link files, they are different - the + * c_cnid belongs to the active directory entry (ie the link) + * and the c_fileid is for the actual inode (ie the data file). + * + * The stat call (getattr) will always return the c_fileid + * and Carbon APIs, which are hardlink-ignorant, will always + * receive the c_cnid (from getattrlist). + */ + if ((ATTR_CMN_OBJID & attr) || + (ATTR_CMN_OBJPERMANENTID & attr)) { + vap->va_linkid = cdp->cd_cnid; + VATTR_SET_SUPPORTED(vap, va_linkid); + } + + if (ATTR_CMN_PAROBJID & attr) { + vap->va_parentid = cdp->cd_parentcnid; + VATTR_SET_SUPPORTED(vap, va_parentid); + } + + if (ATTR_CMN_SCRIPT & attr) { + vap->va_encoding = cdp->cd_encoding; + VATTR_SET_SUPPORTED(vap, va_encoding); + } + + if (ATTR_CMN_CRTIME & attr) { + vap->va_create_time.tv_sec = cap->ca_itime; + vap->va_create_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_create_time); + } + + if (ATTR_CMN_MODTIME & attr) { + vap->va_modify_time.tv_sec = cap->ca_mtime; + vap->va_modify_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_modify_time); + } + + if (ATTR_CMN_CHGTIME & attr) { + vap->va_change_time.tv_sec = cap->ca_ctime; + vap->va_change_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_change_time); + } + + if (ATTR_CMN_ACCTIME & attr) { + vap->va_access_time.tv_sec = cap->ca_atime; + vap->va_access_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_access_time); + } + + if (ATTR_CMN_BKUPTIME & attr) { + vap->va_backup_time.tv_sec = cap->ca_btime; + vap->va_backup_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_backup_time); + } + + if (ATTR_CMN_FNDRINFO & attr) { + u_int8_t *finfo = NULL; + + bcopy(&cap->ca_finderinfo, &vap->va_finderinfo[0], + sizeof(u_int8_t) * 32); + finfo = (u_int8_t*)(&vap->va_finderinfo[0]); + + /* Don't expose a symlink's private type/creator. */ + if (S_ISLNK(cap->ca_mode)) { + struct FndrFileInfo *fip; + + fip = (struct FndrFileInfo *)finfo; + fip->fdType = 0; + fip->fdCreator = 0; + } + + /* advance 16 bytes into the attrbuf */ + finfo = finfo + 16; + + /* also don't expose the date_added or write_gen_counter fields */ + if (S_ISREG(cap->ca_mode) || S_ISLNK(cap->ca_mode)) { + struct FndrExtendedFileInfo *extinfo = + (struct FndrExtendedFileInfo *)finfo; + extinfo->document_id = 0; + extinfo->date_added = 0; + extinfo->write_gen_counter = 0; + } else if (S_ISDIR(cap->ca_mode)) { + struct FndrExtendedDirInfo *extinfo = + (struct FndrExtendedDirInfo *)finfo; + extinfo->document_id = 0; + extinfo->date_added = 0; + extinfo->write_gen_counter = 0; + } + + VATTR_SET_SUPPORTED(vap, va_finderinfo); + } + + if (ATTR_CMN_OWNERID & attr) { + uid_t nuid = cap->ca_uid; + + if (!isroot) { + if (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) + nuid = cuid; + else if (nuid == UNKNOWNUID) + nuid = cuid; + } + + vap->va_uid = nuid; + VATTR_SET_SUPPORTED(vap, va_uid); + } + + if (ATTR_CMN_GRPID & attr) { + gid_t ngid = cap->ca_gid; + + if (!isroot) { + gid_t cgid = kauth_cred_getgid(vfs_context_ucred(ctx)); + if (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) + ngid = cgid; + else if (ngid == UNKNOWNUID) + ngid = cgid; + } + + vap->va_gid = ngid; + VATTR_SET_SUPPORTED(vap, va_gid); + } + + if (ATTR_CMN_ACCESSMASK & attr) { + uint32_t nmode; + /* + * [2856576] Since we are dynamically changing the owner, also + * effectively turn off the set-user-id and set-group-id bits, + * just like chmod(2) would when changing ownership. This prevents + * a security hole where set-user-id programs run as whoever is + * logged on (or root if nobody is logged in yet!) + */ + nmode = (cap->ca_uid == UNKNOWNUID) ? + cap->ca_mode & ~(S_ISUID | S_ISGID) : cap->ca_mode; + + vap->va_mode = nmode; + VATTR_SET_SUPPORTED(vap, va_mode); + } + + if (ATTR_CMN_FLAGS & attr) { + vap->va_flags = cap->ca_flags; + VATTR_SET_SUPPORTED(vap, va_flags); + } + + if (ATTR_CMN_GEN_COUNT & attr) { + vap->va_write_gencount = hfs_get_gencount_from_blob( + (const uint8_t *)cap->ca_finderinfo, cap->ca_mode); + VATTR_SET_SUPPORTED(vap, va_write_gencount); + } + + if (ATTR_CMN_DOCUMENT_ID & attr) { + vap->va_document_id = hfs_get_document_id_from_blob( + (const uint8_t *)cap->ca_finderinfo, cap->ca_mode); + VATTR_SET_SUPPORTED(vap, va_document_id); + } + + if (ATTR_CMN_USERACCESS & attr) { + u_int32_t user_access; + + /* Take the long path when we have an ACL */ + if ((vp != NULLVP) && (cap->ca_recflags & kHFSHasSecurityMask)) { + user_access = hfs_real_user_access(vp, ctx); + } else { + user_access = DerivePermissionSummary(cap->ca_uid, cap->ca_gid, + cap->ca_mode, mp, vfs_context_ucred(ctx), 0); + } + /* Also consider READ-ONLY file system. */ + if (vfs_flags(mp) & MNT_RDONLY) { + user_access &= ~W_OK; + } + /* Locked objects are not writable either */ + if ((cap->ca_flags & UF_IMMUTABLE) && (vfs_context_suser(ctx) != 0)) + user_access &= ~W_OK; + if ((cap->ca_flags & SF_IMMUTABLE) && (vfs_context_suser(ctx) == 0)) + user_access &= ~W_OK; + + vap->va_user_access = user_access; + VATTR_SET_SUPPORTED(vap, va_user_access); + } + + /* + * Right now the best we can do is tell if we *don't* have extended + * security (like hfs_vnop_getattr). + */ + if (ATTR_CMN_EXTENDED_SECURITY & attr) { + if (!(cap->ca_recflags & kHFSHasSecurityMask)) { + vap->va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE; + VATTR_SET_SUPPORTED(vap, va_acl); + } + } + + if (ATTR_CMN_FILEID & attr) { + vap->va_fileid = cap->ca_fileid; + VATTR_SET_SUPPORTED(vap, va_fileid); + } + + if (ATTR_CMN_PARENTID & attr) { + vap->va_parentid = cdp->cd_parentcnid; + VATTR_SET_SUPPORTED(vap, va_parentid); + } + + if (ATTR_CMN_ADDEDTIME & attr) { + if (cap->ca_recflags & kHFSHasDateAddedMask) { + vap->va_addedtime.tv_sec = hfs_get_dateadded_from_blob( + (const uint8_t *)cap->ca_finderinfo, cap->ca_mode); + vap->va_addedtime.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_addedtime); + } + } +} + +static void +vattr_data_for_dir_attrs(struct attrlist *alp, struct vnode_attr *vap, + struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc * descp, + struct cat_attr * cattrp) +{ + attrgroup_t attr = alp->dirattr; + u_int32_t entries; + + /* + * The DIR_LINKCOUNT is the count of real directory hard links. + * (i.e. its not the sum of the implied "." and ".." references + * typically used in stat's st_nlink field) + */ + if (ATTR_DIR_LINKCOUNT & attr) { + vap->va_dirlinkcount = cattrp->ca_linkcount; + VATTR_SET_SUPPORTED(vap, va_dirlinkcount); + } + if (ATTR_DIR_ENTRYCOUNT & attr) { + entries = cattrp->ca_entries; + + if (descp->cd_parentcnid == kHFSRootParentID) { + if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid != 0) + --entries; /* hide private dir */ + if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid != 0) + --entries; /* hide private dir */ + if (hfsmp->jnl || + ((hfsmp->vcbAtrb & kHFSVolumeJournaledMask) && + (hfsmp->hfs_flags & HFS_READ_ONLY))) + entries -= 2; /* hide the journal files */ + } + + vap->va_nchildren = entries; + VATTR_SET_SUPPORTED(vap, va_nchildren); + } + + if (ATTR_DIR_MOUNTSTATUS & attr) { + /* + * There is not vnode_attr for mount point status. + * XXX. Should there be ? + */ + u_int32_t mstatus = 0; + + if (vp != NULL && vnode_mountedhere(vp) != NULL) + mstatus = DIR_MNTSTATUS_MNTPOINT; + } +} + +static void +vattr_data_for_file_attrs(struct attrlist *alp, struct vnode_attr *vap, + struct hfsmount *hfsmp, struct cat_attr *cattrp, struct cat_fork *datafork, + struct cat_fork *rsrcfork, struct vnode *vp) +{ +#if !HFS_COMPRESSION +#pragma unused(vp) +#endif + attrgroup_t attr = alp->fileattr; + off_t da_size, rsrc_len, rsrc_alloc; + u_int32_t allocblksize; + + allocblksize = HFSTOVCB(hfsmp)->blockSize; + + off_t datasize = datafork->cf_size; + off_t totalsize = datasize + rsrcfork->cf_size; +#if HFS_COMPRESSION + int handle_compressed; + handle_compressed = (cattrp->ca_flags & UF_COMPRESSED);// && hfs_file_is_compressed(VTOC(vp), 1); + + if (handle_compressed) { + if (attr & (ATTR_FILE_DATALENGTH|ATTR_FILE_TOTALSIZE)) { + if ( 0 == hfs_uncompressed_size_of_compressed_file(hfsmp, vp, cattrp->ca_fileid, &datasize, 1) ) { /* 1 == don't take the cnode lock */ + /* total size of a compressed file is just the data size */ + totalsize = datasize; + } + } + } +#endif + + if (ATTR_FILE_LINKCOUNT & attr) { + vap->va_nlink = cattrp->ca_linkcount; + VATTR_SET_SUPPORTED(vap, va_nlink); + } + if (ATTR_FILE_TOTALSIZE & attr) { + VATTR_RETURN(vap, va_total_size, totalsize); + } + if (ATTR_FILE_ALLOCSIZE & attr) { + VATTR_RETURN(vap, va_total_alloc, + (off_t)cattrp->ca_blocks * (off_t)allocblksize ); + } + if (ATTR_FILE_IOBLOCKSIZE & attr) { + VATTR_RETURN(vap, va_iosize, hfsmp->hfs_logBlockSize); + } + + /* ATTR_FILE_CLUMPSIZE is obsolete */ + + if (ATTR_FILE_DEVTYPE & attr) { + dev_t dev = 0; + + if (S_ISBLK(cattrp->ca_mode) || S_ISCHR(cattrp->ca_mode)) + dev = (u_int32_t)cattrp->ca_rdev; + + VATTR_RETURN(vap, va_rdev, dev); + } + + if (ATTR_FILE_DATALENGTH & attr) { + VATTR_RETURN(vap, va_data_size, datasize); + } +#if HFS_COMPRESSION + /* fake the data fork size on a decmpfs compressed file to reflect the + * uncompressed size. This ensures proper reading and copying of these + * files. + * NOTE: we may need to get the vnode here because the vnode parameter + * passed by hfs_vnop_readdirattr() may be null. + */ + + if (handle_compressed) { + da_size = (off_t)rsrcfork->cf_blocks * (off_t)allocblksize; + rsrc_len = 0; + rsrc_alloc = 0; + } + else +#endif + { + da_size = (off_t)datafork->cf_blocks * (off_t)allocblksize; + rsrc_len = rsrcfork->cf_size; + rsrc_alloc = (off_t)rsrcfork->cf_blocks * (off_t)allocblksize; + } + + if (ATTR_FILE_DATAALLOCSIZE & attr) { + VATTR_RETURN(vap, va_data_alloc, da_size); + } + + if (ATTR_FILE_RSRCLENGTH & attr) { + VATTR_RETURN(vap, va_rsrc_length, rsrc_len); + } + + if (ATTR_FILE_RSRCALLOCSIZE & attr) { + VATTR_RETURN(vap, va_rsrc_alloc, rsrc_alloc); + } +} diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index 5fc36da95..fefc36ad3 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -663,7 +663,7 @@ again: hfs_lock_mount (hfsmp); if (hfsmp->hfs_flags & HFS_CREATING_BTREE) { /* Someone else beat us, wait for them to finish. */ - (void) msleep(hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex, + (void) msleep(&hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex, PDROP | PINOD, "hfs_create_attr_btree", 0); if (hfsmp->hfs_attribute_vp) { return (0); @@ -924,7 +924,7 @@ exit: */ hfs_lock_mount (hfsmp); hfsmp->hfs_flags &= ~HFS_CREATING_BTREE; - wakeup((caddr_t)hfsmp->hfs_attribute_cp); + wakeup((caddr_t)&hfsmp->hfs_attribute_cp); hfs_unlock_mount (hfsmp); return (result); diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index c7aa7b38e..ef0b4a61e 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -67,8 +67,8 @@ struct btobj { struct update_state { struct cat_desc * s_desc; struct cat_attr * s_attr; - struct cat_fork * s_datafork; - struct cat_fork * s_rsrcfork; + const struct cat_fork * s_datafork; + const struct cat_fork * s_rsrcfork; struct hfsmount * s_hfsmp; }; @@ -139,7 +139,7 @@ static int buildthread(void *keyp, void *recp, int std_hfs, int directory); static int cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFSPlusCatalogFile *crp); static int cat_update_internal(struct hfsmount *hfsmp, int update_hardlink, struct cat_desc *descp, struct cat_attr *attrp, - struct cat_fork *dataforkp, struct cat_fork *rsrcforkp); + const struct cat_fork *dataforkp, const struct cat_fork *rsrcforkp); @@ -233,8 +233,8 @@ int cat_remove_idhash (cat_preflightid_t *preflight) { * catalog by checking the ID hash table. */ int -cat_acquire_cnid (struct hfsmount *hfsmp, cnid_t *new_cnid) { - +cat_acquire_cnid (struct hfsmount *hfsmp, cnid_t *new_cnid) +{ uint32_t nextCNID; struct BTreeIterator *iterator; FSBufferDescriptor btdata; @@ -466,7 +466,7 @@ cat_convertkey( cnid = getcnid(recp); if (cnid == 0) { /* If ths CNID == 0, it's invalid. Mark as corrupt */ - hfs_mark_volume_inconsistent (hfsmp); + hfs_mark_inconsistent (hfsmp, HFS_INCONSISTENCY_DETECTED); err = EINVAL; } else { @@ -898,7 +898,7 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h cnid = getcnid(recp); if (cnid == 0) { /* CNID of 0 is invalid. Mark as corrupt */ - hfs_mark_volume_inconsistent (hfsmp); + hfs_mark_inconsistent (hfsmp, HFS_INCONSISTENCY_DETECTED); result = EINVAL; goto exit; } @@ -1088,7 +1088,7 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h * than that which is in its extent records. */ - (void) hfs_mark_volume_inconsistent (hfsmp); + (void) hfs_mark_inconsistent (hfsmp, HFS_INCONSISTENCY_DETECTED); forkp->cf_blocks = validblks; if (attrp != NULL) { @@ -1225,7 +1225,7 @@ cat_create(struct hfsmount *hfsmp, cnid_t new_fileid, struct cat_desc *descp, st * volume inconsistent */ printf ("hfs: cat_create() failed to delete thread record id=%u on vol=%s\n", new_fileid, hfsmp->vcbVN); - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); } } goto exit; @@ -1466,7 +1466,7 @@ cat_rename ( /* Get the CNID after calling searchrecord */ cnid = getcnid (recp); if (cnid == 0) { - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED); result = EINVAL; goto exit; } @@ -1492,7 +1492,7 @@ cat_rename ( err = BTInsertRecord(fcb, from_iterator, &btdata, datasize); if (err) { printf("hfs: cat_create: could not undo (BTInsert = %d)\n", err); - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); result = err; goto exit; } @@ -1519,7 +1519,7 @@ cat_rename ( err = BTDeleteRecord(fcb, to_iterator); if (err) { printf("hfs: cat_create: could not undo (BTDelete = %d)\n", err); - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); result = err; goto exit; } @@ -1685,7 +1685,7 @@ cat_delete(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr if (BTDeleteRecord(fcb, iterator)) { if (!std_hfs) { printf ("hfs: cat_delete() failed to delete thread record id=%u on vol=%s\n", cnid, hfsmp->vcbVN); - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE); } } @@ -1704,7 +1704,7 @@ exit: */ static int cat_update_internal(struct hfsmount *hfsmp, int update_hardlink, struct cat_desc *descp, struct cat_attr *attrp, - struct cat_fork *dataforkp, struct cat_fork *rsrcforkp) + const struct cat_fork *dataforkp, const struct cat_fork *rsrcforkp) { FCB * fcb; BTreeIterator * iterator; @@ -1764,7 +1764,7 @@ exit: */ int cat_update(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp, - struct cat_fork *dataforkp, struct cat_fork *rsrcforkp) + const struct cat_fork *dataforkp, const struct cat_fork *rsrcforkp) { return cat_update_internal(hfsmp, false, descp, attrp, dataforkp, rsrcforkp); } @@ -1778,7 +1778,7 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st { struct cat_desc *descp; struct cat_attr *attrp; - struct cat_fork *forkp; + const struct cat_fork *forkp; struct hfsmount *hfsmp; long blksize; @@ -2460,18 +2460,12 @@ cat_createlink(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr * fcb = hfsmp->hfs_catalog_cp->c_datafork; /* - * Get the next CNID. We can change it since we hold the catalog lock. + * Get the next CNID. Note that we are currently holding catalog lock. */ - nextCNID = hfsmp->vcbNxtCNID; - if (nextCNID == 0xFFFFFFFF) { - hfs_lock_mount (hfsmp); - hfsmp->vcbNxtCNID = kHFSFirstUserCatalogNodeID; - hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; - hfs_unlock_mount(hfsmp); - } else { - hfsmp->vcbNxtCNID++; + result = cat_acquire_cnid(hfsmp, &nextCNID); + if (result) { + return result; } - MarkVCBDirty(hfsmp); /* Get space for iterator, key and data */ MALLOC(bto, struct btobj *, sizeof(struct btobj), M_TEMP, M_WAITOK); @@ -2487,59 +2481,20 @@ cat_createlink(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr * /* This is our only chance to set the encoding (other than a rename). */ encoding = hfs_pickencoding(bto->key.nodeName.unicode, bto->key.nodeName.length); - /* Insert the thread record first. */ + /* + * Insert the thread record first. + */ datalen = buildthread((void*)&bto->key, &bto->data, 0, 0); btdata.bufferAddress = &bto->data; btdata.itemSize = datalen; btdata.itemCount = 1; - - for (;;) { - buildthreadkey(nextCNID, 0, (CatalogKey *) &bto->iterator.key); - - /* - * If the CNID wraparound bit is set, then we need to validate if there - * is a cnode in the hash already with this ID (even if it no longer exists - * on disk). If so, then just skip this ID and move on to the next one. - */ - if (!std_hfs && (hfsmp->vcbAtrb & kHFSCatalogNodeIDsReusedMask)) { - /* Verify that the CNID does not already exist in the cnode hash... */ - if (hfs_chash_snoop (hfsmp, nextCNID, 1, NULL, NULL) == 0) { - /* It was found in the cnode hash!*/ - result = btExists; - } - } - - if (result == 0) { - result = BTInsertRecord(fcb, &bto->iterator, &btdata, datalen); - } - if ((result == btExists) && (hfsmp->vcbAtrb & kHFSCatalogNodeIDsReusedMask)) { - /* - * Allow CNIDs on HFS Plus volumes to wrap around - */ - if (++nextCNID < kHFSFirstUserCatalogNodeID) { - nextCNID = kHFSFirstUserCatalogNodeID; - } - continue; - } - if (result == 0) { - thread_inserted = 1; - } - break; - } - if (result) + buildthreadkey(nextCNID, 0, (CatalogKey *) &bto->iterator.key); + result = BTInsertRecord(fcb, &bto->iterator, &btdata, datalen); + if (result) { goto exit; - - /* - * CNID is now established. If we have wrapped then - * update the vcbNxtCNID. - */ - if ((hfsmp->vcbAtrb & kHFSCatalogNodeIDsReusedMask)) { - hfsmp->vcbNxtCNID = nextCNID + 1; - if (hfsmp->vcbNxtCNID < kHFSFirstUserCatalogNodeID) { - hfsmp->vcbNxtCNID = kHFSFirstUserCatalogNodeID; - } } + thread_inserted = 1; /* * Now insert the link record. @@ -2579,8 +2534,8 @@ exit: buildthreadkey(nextCNID, 0, (CatalogKey *)&bto->iterator.key); if (BTDeleteRecord(fcb, &bto->iterator)) { - printf("hfs: cat_createlink() failed to delete thread record on volume %s\n", hfsmp->vcbVN); - hfs_mark_volume_inconsistent(hfsmp); + printf("hfs: cat_createlink() failed to delete thread record on volume %s\n", hfsmp->vcbVN); + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); } } if (alias_allocated && rsrcforkp->extents[0].startBlock != 0) { @@ -2832,6 +2787,7 @@ struct readattr_state { cnid_t dir_cnid; int stdhfs; int error; + int reached_eof; }; static int @@ -2857,6 +2813,7 @@ getentriesattr_callback(const CatalogKey *key, const CatalogRecord *rec, #endif if (parentcnid != state->dir_cnid) { state->error = ENOENT; + state->reached_eof = 1; return (0); /* stop */ } break; @@ -2941,7 +2898,7 @@ getentriesattr_callback(const CatalogKey *key, const CatalogRecord *rec, * Note: index is zero relative */ int -cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_entrylist *ce_list) +cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_entrylist *ce_list, int *reachedeof) { FCB* fcb; CatalogKey * key; @@ -2953,6 +2910,7 @@ cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_ int index; int have_key; int result = 0; + int reached_eof = 0; ce_list->realentries = 0; @@ -2960,6 +2918,8 @@ cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_ std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord); parentcnid = dirhint->dh_desc.cd_parentcnid; + bzero (&state, sizeof(struct readattr_state)); + state.hfsmp = hfsmp; state.list = ce_list; state.dir_cnid = parentcnid; @@ -3016,7 +2976,15 @@ cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_ result = ps.error; else result = MacToVFSError(result); + if (result) { + /* + * Note: the index may now point to EOF if the directory + * was modified in between system calls. We will return + * ENOENT from cat_findposition if this is the case, and + * when we bail out with an error, our caller (hfs_readdirattr_internal) + * will suppress the error and indicate EOF to its caller. + */ result = MacToVFSError(result); goto exit; } @@ -3027,12 +2995,17 @@ cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_ result = BTIterateRecords(fcb, kBTreeNextRecord, iterator, (IterateCallBackProcPtr)getentriesattr_callback, &state); - if (state.error) + if (state.error) { result = state.error; - else if (ce_list->realentries == 0) + reached_eof = state.reached_eof; + } + else if (ce_list->realentries == 0) { result = ENOENT; - else + reached_eof = 1; + } + else { result = MacToVFSError(result); + } if (std_hfs) goto exit; @@ -3076,9 +3049,10 @@ cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_ cep->ce_rsrcblks = filerec.resourceFork.totalBlocks; } } + exit: FREE(iterator, M_TEMP); - + *reachedeof = reached_eof; return MacToVFSError(result); } @@ -3653,6 +3627,15 @@ cat_getdirentries(struct hfsmount *hfsmp, u_int32_t entrycnt, directoryhint_t *d result = MacToVFSError(result); if (result) { result = MacToVFSError(result); + if (result == ENOENT) { + /* + * ENOENT means we've hit the EOF. + * suppress the error, and set the eof flag. + */ + result = 0; + dirhint->dh_desc.cd_flags |= CD_EOF; + *eofflag = 1; + } goto cleanup; } } @@ -3814,7 +3797,11 @@ cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp, /* Make sure parent directory didn't change */ if (state->parentID != curID) { - state->error = EINVAL; + /* + * The parent ID is different from curID this means we've hit + * the EOF for the directory. + */ + state->error = ENOENT; return (0); /* stop */ } @@ -4791,13 +4778,13 @@ cat_lookup_dirlink(struct hfsmount *hfsmp, cnid_t dirlink_id, } if (error) { printf ("hfs: cat_lookup_dirlink(): Error looking up file record for id=%u (error=%d)\n", dirlink_id, error); - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED); goto out; } /* Just for sanity, make sure that id in catalog record and thread record match */ if ((outdescp != NULL) && (dirlink_id != outdescp->cd_cnid)) { printf ("hfs: cat_lookup_dirlink(): Requested cnid=%u != found_cnid=%u\n", dirlink_id, outdescp->cd_cnid); - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED); error = ENOENT; } diff --git a/bsd/hfs/hfs_catalog.h b/bsd/hfs/hfs_catalog.h index 3eea08ac8..a48ca2fb6 100644 --- a/bsd/hfs/hfs_catalog.h +++ b/bsd/hfs/hfs_catalog.h @@ -107,7 +107,17 @@ struct cat_attr { u_int32_t cau_dircount; /* count of sub dirs (for posix nlink) */ u_int32_t cau_firstlink; /* first hardlink link (files only) */ } ca_union3; - u_int8_t ca_finderinfo[32]; /* Opaque Finder information */ + union { + u_int8_t ca_finderinfo[32]; /* Opaque Finder information */ + struct { + FndrFileInfo ca_finderfileinfo; + struct FndrExtendedFileInfo ca_finderextendedfileinfo; + }; + struct { + FndrDirInfo ca_finderdirinfo; + struct FndrExtendedDirInfo ca_finderextendeddirinfo; + }; + }; }; /* Aliases for common fields */ @@ -353,7 +363,8 @@ extern int cat_findname (struct hfsmount *hfsmp, extern int cat_getentriesattr( struct hfsmount *hfsmp, directoryhint_t *dirhint, - struct cat_entrylist *ce_list); + struct cat_entrylist *ce_list, + int *reachedeof); extern int cat_rename ( struct hfsmount * hfsmp, struct cat_desc * from_cdp, @@ -364,8 +375,8 @@ extern int cat_rename ( struct hfsmount * hfsmp, extern int cat_update ( struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp, - struct cat_fork *dataforkp, - struct cat_fork *rsrcforkp); + const struct cat_fork *dataforkp, + const struct cat_fork *rsrcforkp); extern int cat_getdirentries( struct hfsmount *hfsmp, diff --git a/bsd/hfs/hfs_chash.c b/bsd/hfs/hfs_chash.c index b162a53b2..c52dc7521 100644 --- a/bsd/hfs/hfs_chash.c +++ b/bsd/hfs/hfs_chash.c @@ -222,8 +222,8 @@ exit: * */ int -hfs_chash_snoop(struct hfsmount *hfsmp, ino_t inum, int existence_only, int (*callout)(const struct cat_desc *, - const struct cat_attr *, void *), void * arg) +hfs_chash_snoop(struct hfsmount *hfsmp, ino_t inum, int existence_only, + int (*callout)(const cnode_t *cp, void *), void * arg) { struct cnode *cp; int result = ENOENT; @@ -260,11 +260,13 @@ hfs_chash_snoop(struct hfsmount *hfsmp, ino_t inum, int existence_only, int (*ca /* Skip cnodes that have been removed from the catalog */ if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + result = EACCES; break; } + /* Skip cnodes being created or reclaimed. */ if (!ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) { - result = callout(&cp->c_desc, &cp->c_attr, arg); + result = callout(cp, arg); } break; } diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index 574afc762..c2f92f02a 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2013 Apple Inc. All rights reserved. + * Copyright (c) 2002-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -49,6 +49,7 @@ #include #include #include +#include extern int prtactive; @@ -175,8 +176,8 @@ int hfs_is_backingstore (struct vnode *vp, int *val) { * Assumes that both truncate and cnode locks for 'cp' are held. */ static -int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { - +int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) +{ int forkcount = 0; enum vtype v_type; struct cnode *cp; @@ -254,7 +255,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { * and we entered hfs_vnop_inactive. As a result, the only time we can guarantee * that there aren't any references is during vnop_reclaim. */ - hfs_filedone(vp, ctx); + hfs_filedone(vp, ctx, 0); } /* @@ -367,7 +368,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { * context because we're only removing blocks, not zero-filling new * ones. The C_DELETED check above makes things much simpler. */ - error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, 0, ctx); + error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, ctx); if (error) { goto out; } @@ -592,7 +593,22 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { } hfs_update(vp, 0); } - + + /* + * Since we are about to finish what might be an inactive call, propagate + * any remaining modified or touch bits from the cnode to the vnode. This + * serves as a hint to vnode recycling that we shouldn't recycle this vnode + * synchronously. + */ + if (ISSET(cp->c_flag, C_MODIFIED) || ISSET(cp->c_flag, C_FORCEUPDATE) || + cp->c_touch_acctime || cp->c_touch_chgtime || + cp->c_touch_modtime || ISSET(cp->c_flag, C_NEEDS_DATEADDED) || + ISSET(cp->c_flag, C_DELETED)) { + vnode_setdirty(vp); + } else { + vnode_cleardirty(vp); + } + out: if (cat_reserve) cat_postflight(hfsmp, &cookie, p); @@ -625,7 +641,7 @@ out: */ if (forkcount == 1) { struct cprotect *entry = cp->c_cpentry; - if ((entry) && (entry->cp_pclass != PROTECTION_CLASS_F)) { + if ((entry) && ( CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F)) { if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) { cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED; bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len); @@ -726,7 +742,7 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) if (took_trunc_lock) { hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } - + hfs_unlock(cp); inactive_done: @@ -740,7 +756,8 @@ inactive_done: */ int -hfs_filedone(struct vnode *vp, vfs_context_t context) +hfs_filedone(struct vnode *vp, vfs_context_t context, + hfs_file_done_opts_t opts) { struct cnode *cp; struct filefork *fp; @@ -760,39 +777,31 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0)) return (0); + if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) { #if CONFIG_PROTECT - /* - * Figure out if we need to do synchronous IO. - * - * If the file represents a content-protected file, we may need - * to issue synchronous IO when we dispatch to the cluster layer. - * If we didn't, then the IO would go out to the disk asynchronously. - * If the vnode hits the end of inactive before getting reclaimed, the - * content protection keys would be wiped/bzeroed out, and we'd end up - * trying to issue the IO with an invalid key. This will lead to file - * corruption. IO_SYNC will force the cluster_push to wait until all IOs - * have completed (though they may be in the track cache). - */ - if (cp_fs_protected(VTOVFS(vp))) { - cluster_flags |= IO_SYNC; - cluster_zero_flags |= IO_SYNC; - } + /* + * Figure out if we need to do synchronous IO. + * + * If the file represents a content-protected file, we may need + * to issue synchronous IO when we dispatch to the cluster layer. + * If we didn't, then the IO would go out to the disk asynchronously. + * If the vnode hits the end of inactive before getting reclaimed, the + * content protection keys would be wiped/bzeroed out, and we'd end up + * trying to issue the IO with an invalid key. This will lead to file + * corruption. IO_SYNC will force the cluster_push to wait until all IOs + * have completed (though they may be in the track cache). + */ + if (cp_fs_protected(VTOVFS(vp))) { + cluster_flags |= IO_SYNC; + cluster_zero_flags |= IO_SYNC; + } #endif - /* - * If we are being invoked from F_SWAPDATAEXTENTS, then we - * need to issue synchronous IO; Unless we are sure that all - * of the data has been written to the disk, we won't know - * that all of the blocks have been allocated properly. - */ - if (cp->c_flag & C_SWAPINPROGRESS) { - cluster_flags |= IO_SYNC; + hfs_unlock(cp); + (void) cluster_push(vp, cluster_flags); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); } - hfs_unlock(cp); - (void) cluster_push(vp, cluster_flags); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - /* * Explicitly zero out the areas of file * that are currently marked invalid. @@ -823,21 +832,24 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) * Shrink the peof to the smallest size neccessary to contain the leof. */ if (blks < fp->ff_blocks) { - (void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context); + (void) hfs_truncate(vp, leof, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES, context); } - hfs_unlock(cp); - (void) cluster_push(vp, cluster_flags); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) { + hfs_unlock(cp); + (void) cluster_push(vp, cluster_flags); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - /* - * If the hfs_truncate didn't happen to flush the vnode's - * information out to disk, force it to be updated now that - * all invalid ranges have been zero-filled and validated: - */ - if (cp->c_flag & C_MODIFIED) { - hfs_update(vp, 0); + /* + * If the hfs_truncate didn't happen to flush the vnode's + * information out to disk, force it to be updated now that + * all invalid ranges have been zero-filled and validated: + */ + if (cp->c_flag & C_MODIFIED) { + hfs_update(vp, 0); + } } + return (0); } @@ -1020,7 +1032,7 @@ hfs_getnewvnode( /* Sanity check the vtype and mode */ if (vtype == VBAD) { /* Mark the FS as corrupt and bail out */ - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED); return EINVAL; } @@ -1397,7 +1409,8 @@ hfs_getnewvnode( */ if (VNODE_IS_RSRC(vp)) { int err; - KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 37)), cp->c_vp, cp->c_rsrc_vp, 0, 0, 0); + + KERNEL_DEBUG_CONSTANT(HFSDBG_GETNEWVNODE, VM_KERNEL_ADDRPERM(cp->c_vp), VM_KERNEL_ADDRPERM(cp->c_rsrc_vp), 0, 0, 0); /* Force VL_NEEDINACTIVE on this vnode */ err = vnode_ref(vp); @@ -1641,30 +1654,26 @@ void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) { return; } - -u_int32_t hfs_get_dateadded (struct cnode *cp) { +static u_int32_t +hfs_get_dateadded_internal(const uint8_t *finderinfo, mode_t mode) +{ u_int8_t *finfo = NULL; u_int32_t dateadded = 0; - if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) { - /* Date added was never set. Return 0. */ - return dateadded; - } /* overlay the FinderInfo to the correct pointer, and advance */ - finfo = (u_int8_t*)cp->c_finderinfo; - finfo = finfo + 16; + finfo = (u_int8_t*)finderinfo + 16; /* * FinderInfo is written out in big endian... make sure to convert it to host * native before we use it. */ - if (S_ISREG(cp->c_attr.ca_mode)) { + if (S_ISREG(mode)) { struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; dateadded = OSSwapBigToHostInt32 (extinfo->date_added); } - else if (S_ISDIR(cp->c_attr.ca_mode)) { + else if (S_ISDIR(mode)) { struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; dateadded = OSSwapBigToHostInt32 (extinfo->date_added); } @@ -1672,16 +1681,64 @@ u_int32_t hfs_get_dateadded (struct cnode *cp) { return dateadded; } +u_int32_t +hfs_get_dateadded(struct cnode *cp) +{ + if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) { + /* Date added was never set. Return 0. */ + return (0); + } + + return (hfs_get_dateadded_internal((u_int8_t*)cp->c_finderinfo, + cp->c_attr.ca_mode)); +} + +u_int32_t +hfs_get_dateadded_from_blob(const uint8_t *finderinfo, mode_t mode) +{ + return (hfs_get_dateadded_internal(finderinfo, mode)); +} + /* - * Per HI and Finder requirements, HFS maintains a "write/generation count" - * for each file that is incremented on any write & pageout. It should start - * at 1 to reserve "0" as a special value. If it should ever wrap around, - * it will skip using 0. + * Per HI and Finder requirements, HFS maintains a "write/generation + * count" for each file that is incremented on any write & pageout. + * It should start at 1 to reserve "0" as a special value. If it + * should ever wrap around, it will skip using 0. * - * Note that this field is also set explicitly in the hfs_vnop_setxattr code. - * We must ignore user attempts to set this part of the finderinfo, and - * so we need to save a local copy of the date added, write in the user - * finderinfo, then stuff the value back in. + * Note that finderinfo is manipulated in hfs_vnop_setxattr and care + * is and should be taken to ignore user attempts to set the part of + * the finderinfo that records the generation counter. + * + * Any change to the generation counter *must* not be visible before + * the change that caused it (for obvious reasons), and given the + * limitations of our current architecture, the change to the + * generation counter may occur some time afterwards (particularly in + * the case where a file is mapped writable---more on that below). + * + * We make no guarantees about the consistency of a file. In other + * words, a reader that is operating concurrently with a writer might + * see some, but not all of writer's changes, and the generation + * counter will *not* necessarily tell you this has happened. To + * enforce consistency, clients must make their own arrangements + * e.g. use file locking. + * + * We treat files that are mapped writable as a special case: when + * that happens, clients requesting the generation count will be told + * it has a generation count of zero and they use that knowledge as a + * hint that the file is changing and it therefore might be prudent to + * wait until it is no longer mapped writable. Clients should *not* + * rely on this behaviour however; we might decide that it's better + * for us to publish the fact that a file is mapped writable via + * alternate means and return the generation counter when it is mapped + * writable as it still has some, albeit limited, use. We reserve the + * right to make this change. + * + * Lastly, it's important to realise that because data and metadata + * take different paths through the system, it's possible upon crash + * or sudden power loss and after a restart, that a change may be + * visible to the rest of the system without a corresponding change to + * the generation counter. The reverse may also be true, but for all + * practical applications this shouldn't be an issue. */ void hfs_write_gencount (struct cat_attr *attrp, uint32_t gencount) { u_int8_t *finfo = NULL; @@ -1705,7 +1762,22 @@ void hfs_write_gencount (struct cat_attr *attrp, uint32_t gencount) { return; } -/* Increase the gen count by 1; if it wraps around to 0, increment by two */ +/* + * Increase the gen count by 1; if it wraps around to 0, increment by + * two. The cnode *must* be locked exclusively by the caller. + * + * You may think holding the lock is unnecessary because we only need + * to change the counter, but consider this sequence of events: thread + * A calls hfs_incr_gencount and the generation counter is 2 upon + * entry. A context switch occurs and thread B increments the counter + * to 3, thread C now gets the generation counter (for whatever + * purpose), and then another thread makes another change and the + * generation counter is incremented again---it's now 4. Now thread A + * continues and it sets the generation counter back to 3. So you can + * see, thread C would miss the change that caused the generation + * counter to increment to 4 and for this reason the cnode *must* + * always be locked exclusively. + */ uint32_t hfs_incr_gencount (struct cnode *cp) { u_int8_t *finfo = NULL; u_int32_t gcount = 0; @@ -1717,8 +1789,12 @@ uint32_t hfs_incr_gencount (struct cnode *cp) { /* * FinderInfo is written out in big endian... make sure to convert it to host * native before we use it. + * + * NOTE: the write_gen_counter is stored in the same location in both the + * FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the + * last 32-bit word) so it is safe to have one code path here. */ - if (S_ISREG(cp->c_attr.ca_mode)) { + if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode)) { struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter); @@ -1735,6 +1811,8 @@ uint32_t hfs_incr_gencount (struct cnode *cp) { gcount++; } extinfo->write_gen_counter = OSSwapHostToBigInt32 (gcount); + + SET(cp->c_flag, C_MODIFIED); } else { gcount = 0; @@ -1743,6 +1821,11 @@ uint32_t hfs_incr_gencount (struct cnode *cp) { return gcount; } +/* + * There is no need for any locks here (other than an iocount on an + * associated vnode) because reading and writing an aligned 32 bit + * integer should be atomic on all platforms we support. + */ static u_int32_t hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode) { @@ -1773,16 +1856,7 @@ hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode) if (gcount == 0) { gcount++; } - } else if (S_ISDIR(mode)) { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)finderinfo + 16); - gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter); - - if (gcount == 0) { - gcount++; - } - } else { - gcount = 0; - } + } return gcount; } @@ -1797,6 +1871,17 @@ u_int32_t hfs_get_gencount_from_blob (const uint8_t *finfoblob, mode_t mode) { return hfs_get_gencount_internal(finfoblob, mode); } +void hfs_clear_might_be_dirty_flag(cnode_t *cp) +{ + /* + * If we're about to touch both mtime and ctime, we can clear the + * C_MIGHT_BE_DIRTY_FROM_MAPPING since we can guarantee that + * subsequent page-outs can only be for data made dirty before + * now. + */ + CLR(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING); +} + /* * Touch cnode times based on c_touch_xxx flags * @@ -1832,7 +1917,7 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) */ if (cp->c_touch_acctime) { if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) || - (hfsmp->hfs_freezing_proc != NULL) || + hfsmp->hfs_freeze_state != HFS_THAWED || (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) || (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) { @@ -1844,6 +1929,9 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) struct timeval tv; int touchvol = 0; + if (cp->c_touch_modtime && cp->c_touch_chgtime) + hfs_clear_might_be_dirty_flag(cp); + microtime(&tv); if (cp->c_touch_acctime) { @@ -1896,13 +1984,20 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) } } +// Use this if you don't want to check the return code +void hfs_lock_always(cnode_t *cp, enum hfs_locktype locktype) +{ + hfs_lock(cp, locktype, HFS_LOCK_ALWAYS); +} + /* * Lock a cnode. + * N.B. If you add any failure cases, *make* sure hfs_lock_always works */ int hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) { - void * thread = current_thread(); + thread_t thread = current_thread(); if (cp->c_lockowner == thread) { /* Only the extents and bitmap files support lock recursion. */ @@ -2102,10 +2197,9 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, void hfs_unlock(struct cnode *cp) { - vnode_t rvp = NULLVP; - vnode_t vp = NULLVP; - u_int32_t c_flag; - void *lockowner; + vnode_t rvp = NULLVP; + vnode_t vp = NULLVP; + u_int32_t c_flag; /* * Only the extents and bitmap file's support lock recursion. @@ -2116,18 +2210,36 @@ hfs_unlock(struct cnode *cp) return; } } - c_flag = cp->c_flag; - cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT | C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE); - if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) { + const thread_t thread = current_thread(); + + if (cp->c_lockowner == thread) { + c_flag = cp->c_flag; + + // If we have the truncate lock, we must defer the puts + if (cp->c_truncatelockowner == thread) { + if (ISSET(c_flag, C_NEED_DVNODE_PUT) + && !cp->c_need_dvnode_put_after_truncate_unlock) { + CLR(c_flag, C_NEED_DVNODE_PUT); + cp->c_need_dvnode_put_after_truncate_unlock = true; + } + if (ISSET(c_flag, C_NEED_RVNODE_PUT) + && !cp->c_need_rvnode_put_after_truncate_unlock) { + CLR(c_flag, C_NEED_RVNODE_PUT); + cp->c_need_rvnode_put_after_truncate_unlock = true; + } + } + + CLR(cp->c_flag, (C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE + | C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT)); + + if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) { vp = cp->c_vp; - } - if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) { + } + if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) { rvp = cp->c_rsrc_vp; - } + } - lockowner = cp->c_lockowner; - if (lockowner == current_thread()) { cp->c_lockowner = NULL; lck_rw_unlock_exclusive(&cp->c_rwlock); } else { @@ -2136,14 +2248,29 @@ hfs_unlock(struct cnode *cp) /* Perform any vnode post processing after cnode lock is dropped. */ if (vp) { - if (c_flag & C_NEED_DATA_SETSIZE) - ubc_setsize(vp, 0); + if (c_flag & C_NEED_DATA_SETSIZE) { + ubc_setsize(vp, VTOF(vp)->ff_size); +#if HFS_COMPRESSION + /* + * If this is a compressed file, we need to reset the + * compression state. We will have set the size to zero + * above and it will get fixed up later (in exactly the + * same way that new vnodes are fixed up). Note that we + * should only be able to get here if the truncate lock is + * held exclusively and so we do the reset when that's + * unlocked. + */ + decmpfs_cnode *dp = VTOCMP(vp); + if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN) + cp->c_need_decmpfs_reset = true; +#endif + } if (c_flag & C_NEED_DVNODE_PUT) vnode_put(vp); } if (rvp) { if (c_flag & C_NEED_RSRC_SETSIZE) - ubc_setsize(rvp, 0); + ubc_setsize(rvp, VTOF(rvp)->ff_size); if (c_flag & C_NEED_RVNODE_PUT) vnode_put(rvp); } @@ -2215,7 +2342,7 @@ skip2: void hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) { - void * thread = current_thread(); + thread_t thread = current_thread(); if (cp->c_truncatelockowner == thread) { /* @@ -2250,7 +2377,7 @@ hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockfla */ int hfs_try_trunclock (struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) { - void * thread = current_thread(); + thread_t thread = current_thread(); boolean_t didlock = false; if (cp->c_truncatelockowner == thread) { @@ -2296,7 +2423,7 @@ int hfs_try_trunclock (struct cnode *cp, enum hfs_locktype locktype, enum hfs_lo void hfs_unlock_truncate(struct cnode *cp, enum hfs_lockflags flags) { - void *thread = current_thread(); + thread_t thread = current_thread(); /* * If HFS_LOCK_SKIP_IF_EXCLUSIVE is set in the flags AND the current @@ -2318,8 +2445,51 @@ hfs_unlock_truncate(struct cnode *cp, enum hfs_lockflags flags) /* HFS_LOCK_EXCLUSIVE */ if (thread == cp->c_truncatelockowner) { + vnode_t vp = NULL, rvp = NULL; + + /* + * Deal with any pending set sizes. We need to call + * ubc_setsize before we drop the exclusive lock. Ideally, + * hfs_unlock should be called before hfs_unlock_truncate but + * that's a lot to ask people to remember :-) + */ + if (cp->c_lockowner == thread + && ISSET(cp->c_flag, C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE)) { + // hfs_unlock will do the setsize calls for us + hfs_unlock(cp); + hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); + } + + if (cp->c_need_dvnode_put_after_truncate_unlock) { + vp = cp->c_vp; + cp->c_need_dvnode_put_after_truncate_unlock = false; + } + if (cp->c_need_rvnode_put_after_truncate_unlock) { + rvp = cp->c_rsrc_vp; + cp->c_need_rvnode_put_after_truncate_unlock = false; + } + +#if HFS_COMPRESSION + bool reset_decmpfs = cp->c_need_decmpfs_reset; + cp->c_need_decmpfs_reset = false; +#endif + cp->c_truncatelockowner = NULL; lck_rw_unlock_exclusive(&cp->c_truncatelock); + +#if HFS_COMPRESSION + if (reset_decmpfs) { + decmpfs_cnode *dp = cp->c_decmp; + if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN) + decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0); + } +#endif + + // Do the puts now + if (vp) + vnode_put(vp); + if (rvp) + vnode_put(rvp); } else { /* HFS_LOCK_SHARED */ lck_rw_unlock_shared(&cp->c_truncatelock); } diff --git a/bsd/hfs/hfs_cnode.h b/bsd/hfs/hfs_cnode.h index 9c45e7912..1fa4f2d8b 100644 --- a/bsd/hfs/hfs_cnode.h +++ b/bsd/hfs/hfs_cnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2012 Apple Inc. All rights reserved. + * Copyright (c) 2002-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,6 +32,7 @@ #ifdef KERNEL #ifdef __APPLE_API_PRIVATE +#include #include #include #include @@ -132,9 +133,9 @@ typedef struct linkorigin linkorigin_t; */ struct cnode { lck_rw_t c_rwlock; /* cnode's lock */ - void * c_lockowner; /* cnode's lock owner (exclusive case only) */ + thread_t c_lockowner; /* cnode's lock owner (exclusive case only) */ lck_rw_t c_truncatelock; /* protects file from truncation during read/write */ - void * c_truncatelockowner; /* truncate lock owner (exclusive case only) */ + thread_t c_truncatelockowner; /* truncate lock owner (exclusive case only) */ LIST_ENTRY(cnode) c_hash; /* cnode's hash chain */ u_int32_t c_flag; /* cnode's runtime flags */ u_int32_t c_hflag; /* cnode's flags for maintaining hash - protected by global hash lock */ @@ -158,6 +159,19 @@ struct cnode { atomicflag_t c_touch_acctime; atomicflag_t c_touch_chgtime; atomicflag_t c_touch_modtime; + + // The following flags are protected by the truncate lock + union { + struct { + bool c_need_dvnode_put_after_truncate_unlock : 1; + bool c_need_rvnode_put_after_truncate_unlock : 1; +#if HFS_COMPRESSION + bool c_need_decmpfs_reset : 1; +#endif + }; + uint8_t c_tflags; + }; + #if HFS_COMPRESSION decmpfs_cnode *c_decmp; #endif /* HFS_COMPRESSION */ @@ -202,43 +216,55 @@ typedef struct cnode cnode_t; #define H_WAITING 0x00008 /* CNode is being waited for */ -/* Runtime cnode flags (kept in c_flag) */ -#define C_NEED_RVNODE_PUT 0x00001 /* Need to do a vnode_put on c_rsrc_vp after the unlock */ -#define C_NEED_DVNODE_PUT 0x00002 /* Need to do a vnode_put on c_vp after the unlock */ -#define C_ZFWANTSYNC 0x00004 /* fsync requested and file has holes */ -#define C_FROMSYNC 0x00008 /* fsync was called from sync */ - -#define C_MODIFIED 0x00010 /* CNode has been modified */ -#define C_NOEXISTS 0x00020 /* CNode has been deleted, catalog entry is gone */ -#define C_DELETED 0x00040 /* CNode has been marked to be deleted */ -#define C_HARDLINK 0x00080 /* CNode is a hard link (file or dir) */ - -#define C_FORCEUPDATE 0x00100 /* force the catalog entry update */ -#define C_HASXATTRS 0x00200 /* cnode has extended attributes */ -#define C_NEG_ENTRIES 0x00400 /* directory has negative name entries */ +/* + * Runtime cnode flags (kept in c_flag) + */ +#define C_NEED_RVNODE_PUT 0x0000001 /* Need to do a vnode_put on c_rsrc_vp after the unlock */ +#define C_NEED_DVNODE_PUT 0x0000002 /* Need to do a vnode_put on c_vp after the unlock */ +#define C_ZFWANTSYNC 0x0000004 /* fsync requested and file has holes */ +#define C_FROMSYNC 0x0000008 /* fsync was called from sync */ + +#define C_MODIFIED 0x0000010 /* CNode has been modified */ +#define C_NOEXISTS 0x0000020 /* CNode has been deleted, catalog entry is gone */ +#define C_DELETED 0x0000040 /* CNode has been marked to be deleted */ +#define C_HARDLINK 0x0000080 /* CNode is a hard link (file or dir) */ + +#define C_FORCEUPDATE 0x0000100 /* force the catalog entry update */ +#define C_HASXATTRS 0x0000200 /* cnode has extended attributes */ +#define C_NEG_ENTRIES 0x0000400 /* directory has negative name entries */ /* * For C_SSD_STATIC: SSDs may want to deal with the file payload data in a * different manner knowing that the content is not likely to be modified. This is * purely advisory at the HFS level, and is not maintained after the cnode goes out of core. */ -#define C_SSD_STATIC 0x00800 /* Assume future writes contain static content */ +#define C_SSD_STATIC 0x0000800 /* Assume future writes contain static content */ + +#define C_NEED_DATA_SETSIZE 0x0001000 /* Do a ubc_setsize(0) on c_rsrc_vp after the unlock */ +#define C_NEED_RSRC_SETSIZE 0x0002000 /* Do a ubc_setsize(0) on c_vp after the unlock */ +#define C_DIR_MODIFICATION 0x0004000 /* Directory is being modified, wait for lookups */ +#define C_ALWAYS_ZEROFILL 0x0008000 /* Always zero-fill the file on an fsync */ -#define C_NEED_DATA_SETSIZE 0x01000 /* Do a ubc_setsize(0) on c_rsrc_vp after the unlock */ -#define C_NEED_RSRC_SETSIZE 0x02000 /* Do a ubc_setsize(0) on c_vp after the unlock */ -#define C_DIR_MODIFICATION 0x04000 /* Directory is being modified, wait for lookups */ -#define C_ALWAYS_ZEROFILL 0x08000 /* Always zero-fill the file on an fsync */ +#define C_RENAMED 0x0010000 /* cnode was deleted as part of rename; C_DELETED should also be set */ +#define C_NEEDS_DATEADDED 0x0020000 /* cnode needs date-added written to the finderinfo bit */ +#define C_BACKINGSTORE 0x0040000 /* cnode is a backing store for an existing or currently-mounting filesystem */ -#define C_RENAMED 0x10000 /* cnode was deleted as part of rename; C_DELETED should also be set */ -#define C_NEEDS_DATEADDED 0x20000 /* cnode needs date-added written to the finderinfo bit */ -#define C_BACKINGSTORE 0x40000 /* cnode is a backing store for an existing or currently-mounting filesystem */ -#define C_SWAPINPROGRESS 0x80000 /* cnode's data is about to be swapped. Issue synchronous cluster io */ +/* + * This flag indicates the cnode might be dirty because it + * was mapped writable so if we get any page-outs, update + * the modification and change times. + */ +#define C_MIGHT_BE_DIRTY_FROM_MAPPING 0x0080000 /* * For C_SSD_GREEDY_MODE: SSDs may want to write the file payload data using the greedy mode knowing * that the content needs to be written out to the disk quicker than normal at the expense of storage efficiency. * This is purely advisory at the HFS level, and is not maintained after the cnode goes out of core. */ -#define C_SSD_GREEDY_MODE 0x100000 /* Assume future writes are recommended to be written in SLC mode */ +#define C_SSD_GREEDY_MODE 0x0100000 /* Assume future writes are recommended to be written in SLC mode */ + +/* 0x0200000 is currently unused */ + +#define C_IO_ISOCHRONOUS 0x0400000 /* device-specific isochronous throughput I/O */ #define ZFTIMELIMIT (5 * 60) @@ -340,19 +366,26 @@ extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct com extern void hfs_touchtimes(struct hfsmount *, struct cnode *); extern void hfs_write_dateadded (struct cat_attr *cattrp, u_int32_t dateadded); extern u_int32_t hfs_get_dateadded (struct cnode *cp); +extern u_int32_t hfs_get_dateadded_from_blob(const uint8_t * /* finderinfo */, mode_t /* mode */); /* Gen counter methods */ extern void hfs_write_gencount(struct cat_attr *cattrp, uint32_t gencount); extern uint32_t hfs_get_gencount(struct cnode *cp); -extern uint32_t hfs_get_gencount_from_blob (const uint8_t *finfoblob, mode_t mode); extern uint32_t hfs_incr_gencount (struct cnode *cp); +extern uint32_t hfs_get_gencount_from_blob(const uint8_t * /* finderinfo */, mode_t /* mode */); /* Document id methods */ extern uint32_t hfs_get_document_id(struct cnode * /* cp */); extern uint32_t hfs_get_document_id_from_blob(const uint8_t * /* finderinfo */, mode_t /* mode */); /* Zero-fill file and push regions out to disk */ -extern int hfs_filedone(struct vnode *vp, vfs_context_t context); +enum { + // Use this flag if you're going to sync later + HFS_FILE_DONE_NO_SYNC = 1, +}; +typedef uint32_t hfs_file_done_opts_t; +extern int hfs_filedone(struct vnode *vp, vfs_context_t context, + hfs_file_done_opts_t opts); /* * HFS cnode hash functions. @@ -370,11 +403,10 @@ extern struct vnode * hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int int skiplock, int allow_deleted); extern struct cnode * hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, int wantrsrc, int skiplock, int *out_flags, int *hflags); -extern int hfs_chash_snoop(struct hfsmount *, ino_t, int, int (*)(const struct cat_desc *, - const struct cat_attr *, void *), void *); +extern int hfs_chash_snoop(struct hfsmount *, ino_t, int, int (*)(const cnode_t *, void *), void *); extern int hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, cnid_t cnid, struct cat_attr *cattr, int *error); - + extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid); /* @@ -382,24 +414,97 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid); * * HFS Locking Order: * - * 1. cnode truncate lock (if needed) - * hfs_vnop_pagein/out can skip grabbing of this lock by flag option by - * HFS_LOCK_SKIP_IF_EXCLUSIVE if the truncate lock is already held exclusive - * by current thread from an earlier vnop. + * 1. cnode truncate lock (if needed) -- see below for more on this + * + * + hfs_vnop_pagein/out handles recursive use of this lock (by + * using flag option HFS_LOCK_SKIP_IF_EXCLUSIVE) although there + * are issues with this (see #16620278). + * + * + If locking multiple cnodes then the truncate lock must be taken on + * both (in address order), before taking the cnode locks. + * * 2. cnode lock (in parent-child order if related, otherwise by address order) + * * 3. journal (if needed) + * * 4. system files (as needed) + * * A. Catalog B-tree file * B. Attributes B-tree file * C. Startup file (if there is one) * D. Allocation Bitmap file (always exclusive, supports recursion) * E. Overflow Extents B-tree file (always exclusive, supports recursion) + * * 5. hfs mount point (always last) * * * I. HFS cnode hash lock (must not acquire any new locks while holding this lock, always taken last) */ +/* + * -- The Truncate Lock -- + * + * The truncate lock is used for a few purposes (more than its name + * might suggest). The first thing to note is that the cnode lock + * cannot be held whilst issuing any I/O other than metadata changes, + * so the truncate lock, in either shared or exclusive form, must + * usually be held in these cases. This includes calls to ubc_setsize + * where the new size is less than the current size known to the VM + * subsystem (for two reasons: a) because reaping pages can block + * (e.g. on pages that are busy or being cleaned); b) reaping pages + * might require page-in for tasks that have that region mapped + * privately). The same applies to other calls into the VM subsystem. + * + * Here are some (but not necessarily all) cases that the truncate + * lock protects for: + * + * + When reading and writing a file, we hold the truncate lock + * shared to ensure that the underlying blocks cannot be deleted + * and on systems that use content protection, this also ensures + * the keys remain valid (which might be being used by the + * underlying layers). + * + * + We need to protect against the following sequence of events: + * + * A file is initially size X. A thread issues an append to that + * file. Another thread truncates the file and then extends it + * to a a new size Y. Now the append can be applied at offset X + * and then the data is lost when the file is truncated; or it + * could be applied after the truncate, i.e. at offset 0; or it + * can be applied at offset Y. What we *cannot* do is apply the + * append at offset X and for the data to be visible at the end. + * (Note that we are free to choose when we apply the append + * operation.) + * + * To solve this, we keep things simple and take the truncate lock + * exclusively in order to sequence the append with other size + * changes. Therefore any size change must take the truncate lock + * exclusively. + * + * (N.B. we could do better and allow readers to run concurrently + * during the append and other size changes.) + * + * So here are the rules: + * + * + If you plan to change ff_size, you must take the truncate lock + * exclusively, *but* be careful what I/O you do whilst you have + * the truncate lock exclusively and try and avoid it if you can: + * if the VM subsystem tries to do something with some pages on a + * different thread and you try and do some I/O with those same + * pages, we will deadlock. (See #16620278.) + * + * + If you do anything that requires blocks to not be deleted or + * encrpytion keys to remain valid, you must take the truncate lock + * shared. + * + * + And it follows therefore, that if you want to delete blocks or + * delete keys, you must take the truncate lock exclusively. + * + * N.B. ff_size is actually protected by the cnode lock and so you + * must hold the cnode lock exclusively to change it and shared to + * read it. + * + */ enum hfs_locktype { HFS_SHARED_LOCK = 1, @@ -410,10 +515,14 @@ enum hfs_locktype { enum hfs_lockflags { HFS_LOCK_DEFAULT = 0x0, /* Default flag, no options provided */ HFS_LOCK_ALLOW_NOEXISTS = 0x1, /* Allow locking of all cnodes, including cnode marked deleted with no catalog entry */ - HFS_LOCK_SKIP_IF_EXCLUSIVE = 0x2 /* Skip locking if the current thread already holds the lock exclusive */ + HFS_LOCK_SKIP_IF_EXCLUSIVE = 0x2, /* Skip locking if the current thread already holds the lock exclusive */ + + // Used when you do not want to check return from hfs_lock + HFS_LOCK_ALWAYS = HFS_LOCK_ALLOW_NOEXISTS, }; #define HFS_SHARED_OWNER (void *)0xffffffff +void hfs_lock_always(cnode_t *cnode, enum hfs_locktype); int hfs_lock(struct cnode *, enum hfs_locktype, enum hfs_lockflags); int hfs_lockpair(struct cnode *, struct cnode *, enum hfs_locktype); int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *, @@ -427,6 +536,18 @@ void hfs_lock_truncate(struct cnode *, enum hfs_locktype, enum hfs_lockflags); void hfs_unlock_truncate(struct cnode *, enum hfs_lockflags); int hfs_try_trunclock(struct cnode *, enum hfs_locktype, enum hfs_lockflags); +void hfs_clear_might_be_dirty_flag(cnode_t *cp); + +// cnode must be locked +static inline __attribute__((pure)) +bool hfs_has_rsrc(const cnode_t *cp) +{ + if (cp->c_rsrcfork) + return cp->c_rsrcfork->ff_blocks > 0; + else + return cp->c_datafork && cp->c_blocks > cp->c_datafork->ff_blocks; +} + #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/hfs/hfs_cprotect.c b/bsd/hfs/hfs_cprotect.c index 742b095eb..ebb58b7ff 100644 --- a/bsd/hfs/hfs_cprotect.c +++ b/bsd/hfs/hfs_cprotect.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -41,10 +41,15 @@ #include "hfs.h" #include "hfs_cnode.h" +#include "hfs_fsctl.h" #if CONFIG_PROTECT -static struct cp_wrap_func g_cp_wrap_func = {}; -static struct cp_global_state g_cp_state = {0, 0, 0}; +/* + * The wrap function pointers and the variable to indicate if they + * are initialized are system-wide, and hence are defined globally. + */ +static struct cp_wrap_func g_cp_wrap_func = {}; +static int are_wraps_initialized = false; extern int (**hfs_vnodeop_p) (void *); @@ -59,13 +64,20 @@ static int cp_restore_keys(struct cprotect *, struct hfsmount *hfsmp, struct cno static int cp_lock_vfs_callback(mount_t, void *); static int cp_lock_vnode_callback(vnode_t, void *); static int cp_vnode_is_eligible (vnode_t); -static int cp_check_access (cnode_t *, int); -static int cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, struct cprotect **output_entry); +static int cp_check_access (cnode_t *cp, struct hfsmount *hfsmp, int vnop); +static int cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, + uint32_t flags, struct cprotect **output_entry); static int cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass); static int cp_unwrap(struct hfsmount *, struct cprotect *, struct cnode *); static int cp_setup_aes_ctx(struct cprotect *entry); static void cp_init_access(cp_cred_t access, struct cnode *cp); +static inline int cp_get_crypto_generation (uint32_t protclass) { + if (protclass & CP_CRYPTO_G1) { + return 1; + } + else return 0; +} #if DEVELOPMENT || DEBUG @@ -84,22 +96,19 @@ cp_key_store_action(int action) if (action < 0 || action > CP_MAX_STATE) { return -1; } + + /* + * The lock state is kept locally to each data protected filesystem to + * avoid using globals. Pass along the lock request to each filesystem + * we iterate through. + */ - /* this truncates the upper 3 bytes */ - g_cp_state.lock_state = (uint8_t)action; - - if (action == CP_LOCKED_STATE) { - /* - * Upcast the value in 'action' to be a pointer-width unsigned integer. - * This avoids issues relating to pointer-width. - */ - unsigned long action_arg = (unsigned long) action; - return vfs_iterate(0, cp_lock_vfs_callback, (void*)action_arg); - } - - /* Do nothing on unlock events */ - return 0; - + /* + * Upcast the value in 'action' to be a pointer-width unsigned integer. + * This avoids issues relating to pointer-width. + */ + unsigned long action_arg = (unsigned long) action; + return vfs_iterate(0, cp_lock_vfs_callback, (void*)action_arg); } @@ -111,8 +120,10 @@ cp_register_wraps(cp_wrap_func_t key_store_func) g_cp_wrap_func.rewrapper = key_store_func->rewrapper; /* do not use invalidater until rdar://12170050 goes in ! */ g_cp_wrap_func.invalidater = key_store_func->invalidater; + g_cp_wrap_func.backup_key = key_store_func->backup_key; - g_cp_state.wrap_functions_set = 1; + /* Mark the functions as initialized in the function pointer container */ + are_wraps_initialized = true; return 0; } @@ -148,7 +159,7 @@ cp_entry_init(struct cnode *cp, struct mount *mp) return 0; } - if (!g_cp_state.wrap_functions_set) { + if (are_wraps_initialized == false) { printf("hfs: cp_update_entry: wrap functions not yet set\n"); return ENXIO; } @@ -189,7 +200,9 @@ cp_entry_init(struct cnode *cp, struct mount *mp) if (S_ISDIR(cp->c_mode)) { target_class = PROTECTION_CLASS_DIR_NONE; } - error = cp_new (target_class, hfsmp, cp, cp->c_mode, &entry); + /* allow keybag to override our class preferences */ + uint32_t keyflags = CP_KEYWRAP_DIFFCLASS; + error = cp_new (target_class, hfsmp, cp, cp->c_mode, keyflags, &entry); if (error == 0) { error = cp_setxattr (cp, entry, hfsmp, cp->c_fileid, XATTR_CREATE); } @@ -239,6 +252,7 @@ int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppli int isdir = 0; struct cprotect *entry = NULL; uint32_t target_class = hfsmp->default_cp_class; + suppliedclass = CP_CLASS(suppliedclass); if (hfsmp->hfs_running_cp_major_vers == 0) { panic ("CP: major vers not set in mount!"); @@ -279,7 +293,7 @@ int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppli * has a NONE class set, then we can continue to use that. */ if ((dcp) && (dcp->c_cpentry)) { - uint32_t parentclass = dcp->c_cpentry->cp_pclass; + uint32_t parentclass = CP_CLASS(dcp->c_cpentry->cp_pclass); /* If the parent class is not valid, default to the mount point value */ if (cp_is_valid_class(1, parentclass)) { if (isdir) { @@ -307,6 +321,7 @@ int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppli * target class. */ entry->cp_flags = (CP_NEEDS_KEYS | CP_NO_XATTR); + /* Note this is only the effective class */ entry->cp_pclass = target_class; *tmpentry = entry; @@ -370,7 +385,10 @@ int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp) *entry_ptr = NULL; return ENOMEM; } + /* This is generated in-kernel so we leave it at the max key*/ entry->cp_cache_key_len = CP_MAX_KEYSIZE; + + /* This pclass is only the effective class */ entry->cp_pclass = PROTECTION_CLASS_F; entry->cp_persistent_key_len = 0; @@ -482,7 +500,7 @@ cp_vnode_getclass(struct vnode *vp, int *class) /* Note that we may not have keys yet, but we know the target class. */ if (error == 0) { - *class = entry->cp_pclass; + *class = CP_CLASS(entry->cp_pclass); } if (took_truncate_lock) { @@ -514,6 +532,9 @@ cp_vnode_setclass(struct vnode *vp, uint32_t newclass) isdir = 1; } + /* Ensure we only use the effective class here */ + newclass = CP_CLASS(newclass); + if (!cp_is_valid_class(isdir, newclass)) { printf("hfs: CP: cp_setclass called with invalid class %d\n", newclass); return EINVAL; @@ -543,6 +564,12 @@ cp_vnode_setclass(struct vnode *vp, uint32_t newclass) hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); took_truncate_lock = 1; + /* + * The truncate lock is not sufficient to guarantee the CP blob + * isn't being used. We must wait for existing writes to finish. + */ + vnode_waitforwrites(vp, 0, 0, 0, "cp_vnode_setclass"); + if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) { return EINVAL; } @@ -578,7 +605,10 @@ cp_vnode_setclass(struct vnode *vp, uint32_t newclass) goto out; } + /* newclass is only the effective class */ entry->cp_pclass = newclass; + + /* Class F files are not wrapped, so they continue to use MAX_KEYSIZE */ entry->cp_cache_key_len = CP_MAX_KEYSIZE; read_random (&entry->cp_cache_key[0], entry->cp_cache_key_len); if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { @@ -596,7 +626,13 @@ cp_vnode_setclass(struct vnode *vp, uint32_t newclass) /* We cannot call cp_rewrap unless the keys were already in existence. */ if (entry->cp_flags & CP_NEEDS_KEYS) { struct cprotect *newentry = NULL; - error = cp_generate_keys (hfsmp, cp, newclass, &newentry); + /* + * We want to fail if we can't wrap to the target class. By not setting + * CP_KEYWRAP_DIFFCLASS, we tell keygeneration that if it can't wrap + * to 'newclass' then error out. + */ + uint32_t flags = 0; + error = cp_generate_keys (hfsmp, cp, newclass, flags, &newentry); if (error == 0) { cp_replace_entry (cp, newentry); } @@ -613,7 +649,7 @@ cp_vnode_setclass(struct vnode *vp, uint32_t newclass) } } else if (vnode_isdir(vp)) { - /* For directories, just update the pclass */ + /* For directories, just update the pclass. newclass is only effective class */ entry->cp_pclass = newclass; error = 0; } @@ -643,7 +679,7 @@ out: } -int cp_vnode_transcode(vnode_t vp) +int cp_vnode_transcode(vnode_t vp, void *key, unsigned *len) { struct cnode *cp; struct cprotect *entry = 0; @@ -653,7 +689,7 @@ int cp_vnode_transcode(vnode_t vp) /* Structures passed between HFS and AKS */ cp_cred_s access_in; - cp_wrapped_key_s wrapped_key_in; + cp_wrapped_key_s wrapped_key_in, wrapped_key_out; /* Is this an interesting vp? */ if (!cp_vnode_is_eligible(vp)) { @@ -704,7 +740,7 @@ int cp_vnode_transcode(vnode_t vp) /* Picked up the following from cp_wrap(). * If needed, more comments available there. */ - if (entry->cp_pclass == PROTECTION_CLASS_F) { + if (CP_CLASS(entry->cp_pclass) == PROTECTION_CLASS_F) { error = EINVAL; goto out; } @@ -712,17 +748,22 @@ int cp_vnode_transcode(vnode_t vp) cp_init_access(&access_in, cp); bzero(&wrapped_key_in, sizeof(wrapped_key_in)); + bzero(&wrapped_key_out, sizeof(wrapped_key_out)); wrapped_key_in.key = entry->cp_persistent_key; wrapped_key_in.key_len = entry->cp_persistent_key_len; + /* Use the actual persistent class when talking to AKS */ wrapped_key_in.dp_class = entry->cp_pclass; + wrapped_key_out.key = key; + wrapped_key_out.key_len = *len; - error = g_cp_wrap_func.rewrapper(&access_in, - entry->cp_pclass, + error = g_cp_wrap_func.backup_key(&access_in, &wrapped_key_in, - NULL); + &wrapped_key_out); if(error) error = EPERM; + else + *len = wrapped_key_out.key_len; } out: @@ -820,7 +861,7 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) } hfsmp = VTOHFS(vp); - if ((error = cp_check_access(cp, vnop))) { + if ((error = cp_check_access(cp, hfsmp, vnop))) { /* check for raw encrypted access before bailing out */ if ((vnop == CP_READ_ACCESS) && (ioflag & IO_ENCRYPTED)) { /* @@ -851,7 +892,13 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) /* generate new keys if none have ever been saved */ if ((entry->cp_flags & CP_NEEDS_KEYS)) { struct cprotect *newentry = NULL; - error = cp_generate_keys (hfsmp, cp, cp->c_cpentry->cp_pclass, &newentry); + /* + * It's ok if this ends up being wrapped in a different class than 'pclass'. + * class modification is OK here. + */ + uint32_t flags = CP_KEYWRAP_DIFFCLASS; + + error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), flags, &newentry); if (error == 0) { cp_replace_entry (cp, newentry); entry = newentry; @@ -904,7 +951,7 @@ cp_handle_open(struct vnode *vp, int mode) return 0; } - /* We know the vnode is in a valid state. acquire cnode and validate */ + /* We know the vnode is in a valid state. Acquire cnode and validate */ cp = VTOC(vp); hfsmp = VTOHFS(vp); @@ -932,7 +979,9 @@ cp_handle_open(struct vnode *vp, int mode) */ if (entry->cp_flags & CP_NEEDS_KEYS) { struct cprotect *newentry = NULL; - error = cp_generate_keys (hfsmp, cp, cp->c_cpentry->cp_pclass, &newentry); + /* Allow the keybag to override our class preferences */ + uint32_t flags = CP_KEYWRAP_DIFFCLASS; + error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), flags, &newentry); if (error == 0) { cp_replace_entry (cp, newentry); entry = newentry; @@ -946,7 +995,7 @@ cp_handle_open(struct vnode *vp, int mode) * We want to minimize the number of unwraps that we'll have to do since * the cost can vary, depending on the platform we're running. */ - switch (entry->cp_pclass) { + switch (CP_CLASS(entry->cp_pclass)) { case PROTECTION_CLASS_B: if (mode & O_CREAT) { /* @@ -977,6 +1026,7 @@ cp_handle_open(struct vnode *vp, int mode) bzero(&wrapped_key_in, sizeof(wrapped_key_in)); wrapped_key_in.key = entry->cp_persistent_key; wrapped_key_in.key_len = entry->cp_persistent_key_len; + /* Use the persistent class when talking to AKS */ wrapped_key_in.dp_class = entry->cp_pclass; error = g_cp_wrap_func.unwrapper(&access_in, &wrapped_key_in, NULL); if (error) { @@ -1046,7 +1096,7 @@ cp_handle_relocate (struct cnode *cp, struct hfsmount *hfsmp) * Still need to validate whether to permit access to the file or not * based on lock status */ - if ((error = cp_check_access(cp, CP_READ_ACCESS | CP_WRITE_ACCESS))) { + if ((error = cp_check_access(cp, hfsmp, CP_READ_ACCESS | CP_WRITE_ACCESS))) { goto out; } @@ -1196,7 +1246,12 @@ int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp /* Note that it's OK to write out an XATTR without keys. */ /* Disable flags that will be invalid as we're writing the EA out at this point. */ tempflags = entry->cp_flags; + + /* we're writing the EA; CP_NO_XATTR is invalid */ tempflags &= ~CP_NO_XATTR; + + /* CP_SEP_WRAPPEDKEY is informational/runtime only. */ + tempflags &= ~CP_SEP_WRAPPEDKEY; switch(hfsmp->hfs_running_cp_major_vers) { case CP_NEW_MAJOR_VERS: { @@ -1492,7 +1547,7 @@ cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry entry->cp_flags |= CP_OFF_IV_ENABLED; } - if (entry->cp_pclass != PROTECTION_CLASS_F ) { + if (CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F ) { bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size); } @@ -1562,7 +1617,7 @@ cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry entry->cp_flags = xattr->flags; - if (entry->cp_pclass != PROTECTION_CLASS_F ) { + if (CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F ) { bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size); } @@ -1608,6 +1663,7 @@ cp_lock_vfs_callback(mount_t mp, void *arg) /* Use a pointer-width integer field for casting */ unsigned long new_state; + struct hfsmount *hfsmp; /* * When iterating the various mount points that may @@ -1617,8 +1673,15 @@ cp_lock_vfs_callback(mount_t mp, void *arg) if (!cp_fs_protected(mp)) { return 0; } - new_state = (unsigned long) arg; + + hfsmp = VFSTOHFS(mp); + + hfs_lock_mount(hfsmp); + /* this loses all of the upper bytes of precision; that's OK */ + hfsmp->hfs_cp_lock_state = (uint8_t) new_state; + hfs_unlock_mount(hfsmp); + if (new_state == CP_LOCKED_STATE) { /* * We respond only to lock events. Since cprotect structs @@ -1637,11 +1700,17 @@ cp_lock_vfs_callback(mount_t mp, void *arg) * Deny access to protected files if keys have been locked. */ static int -cp_check_access(struct cnode *cp, int vnop __unused) +cp_check_access(struct cnode *cp, struct hfsmount *hfsmp, int vnop __unused) { int error = 0; - if (g_cp_state.lock_state == CP_UNLOCKED_STATE) { + /* + * For now it's OK to examine the state variable here without + * holding the HFS lock. This is only a short-circuit; if the state + * transitions (or is in transition) after we examine this field, we'd + * have to handle that anyway. + */ + if (hfsmp->hfs_cp_lock_state == CP_UNLOCKED_STATE) { return 0; } @@ -1655,7 +1724,7 @@ cp_check_access(struct cnode *cp, int vnop __unused) } /* Deny all access for class A files */ - switch (cp->c_cpentry->cp_pclass) { + switch (CP_CLASS(cp->c_cpentry->cp_pclass)) { case PROTECTION_CLASS_A: { error = EPERM; break; @@ -1713,7 +1782,7 @@ cp_lock_vnode_callback(struct vnode *vp, void *arg) switch (action) { case CP_LOCKED_STATE: { vfs_context_t ctx; - if (entry->cp_pclass != PROTECTION_CLASS_A || + if (CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_A || vnode_isdir(vp)) { /* * There is no change at lock for other classes than A. @@ -1734,7 +1803,7 @@ cp_lock_vnode_callback(struct vnode *vp, void *arg) /* Before doing anything else, zero-fill sparse ranges as needed */ ctx = vfs_context_current(); - (void) hfs_filedone (vp, ctx); + (void) hfs_filedone (vp, ctx, 0); /* first, sync back dirty pages */ hfs_unlock (cp); @@ -1797,6 +1866,7 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) uint8_t new_persistent_key[CP_MAX_WRAPPEDKEYSIZE]; size_t keylen = CP_MAX_WRAPPEDKEYSIZE; int error = 0; + newclass = CP_CLASS(newclass); /* Structures passed between HFS and AKS */ cp_cred_s access_in; @@ -1817,6 +1887,7 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) bzero(&wrapped_key_in, sizeof(wrapped_key_in)); wrapped_key_in.key = entry->cp_persistent_key; wrapped_key_in.key_len = entry->cp_persistent_key_len; + /* Use the persistent class when talking to AKS */ wrapped_key_in.dp_class = entry->cp_pclass; bzero(&wrapped_key_out, sizeof(wrapped_key_out)); @@ -1839,9 +1910,23 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) if (error == 0) { struct cprotect *newentry = NULL; - /* - * v2 EA's don't support the larger class B keys + /* + * Verify that AKS returned to us a wrapped key of the + * target class requested. */ + /* Get the effective class here */ + int effective = CP_CLASS(wrapped_key_out.dp_class); + if (effective != newclass) { + /* + * Fail the operation if defaults or some other enforcement + * dictated that the class be wrapped differently. + */ + + /* TODO: Invalidate the key when 12170074 unblocked */ + return EPERM; + } + + /* v2 EA's don't support the larger class B keys */ if ((keylen != CP_V2_WRAPPEDKEYSIZE) && (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) { return EINVAL; @@ -1855,7 +1940,9 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) bcopy (new_persistent_key, newentry->cp_persistent_key, keylen); newentry->cp_persistent_key_len = keylen; newentry->cp_backing_cnode = cp; - newentry->cp_pclass = newclass; + + /* Actually record/store what AKS reported back, not the effective class stored in newclass */ + newentry->cp_pclass = wrapped_key_out.dp_class; /* Attach the new entry to the cnode */ cp->c_cpentry = newentry; @@ -1887,7 +1974,7 @@ cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp) * key that is only good as long as the file is open. There is no * wrapped key, so there isn't anything to unwrap. */ - if (entry->cp_pclass == PROTECTION_CLASS_F) { + if (CP_CLASS(entry->cp_pclass) == PROTECTION_CLASS_F) { return EPERM; } @@ -1896,16 +1983,31 @@ cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp) bzero(&wrapped_key_in, sizeof(wrapped_key_in)); wrapped_key_in.key = entry->cp_persistent_key; wrapped_key_in.key_len = entry->cp_persistent_key_len; + /* Use the persistent class when talking to AKS */ wrapped_key_in.dp_class = entry->cp_pclass; bzero(&key_out, sizeof(key_out)); - key_out.key = entry->cp_cache_key; - key_out.key_len = CP_MAX_KEYSIZE; key_out.iv_key = iv_key; + key_out.key = entry->cp_cache_key; + /* + * The unwrapper should validate/set the key length for + * the IV key length and the cache key length, however we need + * to supply the correct buffer length so that AKS knows how + * many bytes it has to work with. + */ key_out.iv_key_len = CP_IV_KEYSIZE; + key_out.key_len = CP_MAX_CACHEBUFLEN; error = g_cp_wrap_func.unwrapper(&access_in, &wrapped_key_in, &key_out); if (!error) { + if (key_out.key_len == 0 || key_out.key_len > CP_MAX_CACHEBUFLEN) { + panic ("cp_unwrap: invalid key length! (%ul)\n", key_out.key_len); + } + + if (key_out.iv_key_len == 0 || key_out.iv_key_len > CP_IV_KEYSIZE) { + panic ("cp_unwrap: invalid iv key length! (%ul)\n", key_out.iv_key_len); + } + entry->cp_cache_key_len = key_out.key_len; /* No need to go here for older EAs */ @@ -1913,6 +2015,13 @@ cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp) aes_encrypt_key128(iv_key, &entry->cp_cache_iv_ctx); entry->cp_flags |= CP_OFF_IV_ENABLED; } + + /* Is the key a raw wrapped key? */ + if (key_out.flags & CP_RAW_KEY_WRAPPEDKEY) { + /* OR in the right bit for the cprotect */ + entry->cp_flags |= CP_SEP_WRAPPEDKEY; + } + } else { error = EPERM; } @@ -1929,6 +2038,11 @@ cp_setup_aes_ctx(struct cprotect *entry) /* First init the cp_cache_iv_key[] */ SHA1Init(&sha1ctxt); + + /* + * We can only use this when the keys are generated in the AP; As a result + * we only use the first 32 bytes of key length in the cache key + */ SHA1Update(&sha1ctxt, &entry->cp_cache_key[0], CP_MAX_KEYSIZE); SHA1Final(&cp_cache_iv_key[0], &sha1ctxt); @@ -1946,13 +2060,17 @@ cp_setup_aes_ctx(struct cprotect *entry) * on 'cp'. * */ -int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, struct cprotect **newentry) +int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, + uint32_t keyflags, struct cprotect **newentry) { int error = 0; struct cprotect *newcp = NULL; *newentry = NULL; + /* Target class must be an effective class only */ + targetclass = CP_CLASS(targetclass); + /* Validate that it has a cprotect already */ if (cp->c_cpentry == NULL) { /* We can't do anything if it shouldn't be protected. */ @@ -1973,7 +2091,7 @@ int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, } } - error = cp_new (targetclass, hfsmp, cp, cp->c_mode, &newcp); + error = cp_new (targetclass, hfsmp, cp, cp->c_mode, keyflags, &newcp); if (error) { /* * Key generation failed. This is not necessarily fatal @@ -2039,16 +2157,20 @@ void cp_replace_entry (struct cnode *cp, struct cprotect *newentry) */ static int -cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, struct cprotect **output_entry) +cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, + uint32_t keyflags, struct cprotect **output_entry) { struct cprotect *entry = NULL; int error = 0; - uint8_t new_key[CP_MAX_KEYSIZE]; - size_t new_key_len = CP_MAX_KEYSIZE; + uint8_t new_key[CP_MAX_CACHEBUFLEN]; + size_t new_key_len = CP_MAX_CACHEBUFLEN; /* AKS tell us the proper key length, how much of this is used */ uint8_t new_persistent_key[CP_MAX_WRAPPEDKEYSIZE]; size_t new_persistent_len = CP_MAX_WRAPPEDKEYSIZE; uint8_t iv_key[CP_IV_KEYSIZE]; size_t iv_key_len = CP_IV_KEYSIZE; + int iswrapped = 0; + + newclass_eff = CP_CLASS(newclass_eff); /* Structures passed between HFS and AKS */ cp_cred_s access_in; @@ -2059,11 +2181,16 @@ cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, str panic ("cp_new with non-null entry!"); } - if (!g_cp_state.wrap_functions_set) { + if (are_wraps_initialized == false) { printf("hfs: cp_new: wrap/gen functions not yet set\n"); return ENXIO; } + /* Sanity check that it's a file or directory here */ + if (!(S_ISREG(cmode)) && !(S_ISDIR(cmode))) { + return EPERM; + } + /* * Step 1: Generate Keys if needed. * @@ -2083,9 +2210,10 @@ cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, str error = 0; } - else if (S_ISREG(cmode)) { - /* Files */ - if (newclass == PROTECTION_CLASS_F) { + else { + /* Must be a file */ + if (newclass_eff == PROTECTION_CLASS_F) { + /* class F files are not wrapped; they can still use the max key size */ new_key_len = CP_MAX_KEYSIZE; read_random (&new_key[0], new_key_len); new_persistent_len = 0; @@ -2104,8 +2232,13 @@ cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, str bzero(&key_out, sizeof(key_out)); key_out.key = new_key; - key_out.key_len = new_key_len; key_out.iv_key = iv_key; + /* + * AKS will override our key length fields, but we need to supply + * the length of the buffer in those length fields so that + * AKS knows hoa many bytes it has to work with. + */ + key_out.key_len = new_key_len; key_out.iv_key_len = iv_key_len; bzero(&wrapped_key_out, sizeof(wrapped_key_out)); @@ -2113,69 +2246,119 @@ cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, str wrapped_key_out.key_len = new_persistent_len; error = g_cp_wrap_func.new_key(&access_in, - newclass, + newclass_eff, &key_out, &wrapped_key_out); + if (error) { + /* keybag returned failure */ + error = EPERM; + goto cpnew_fail; + } + + /* Now sanity-check the output from new_key */ + if (key_out.key_len == 0 || key_out.key_len > CP_MAX_CACHEBUFLEN) { + panic ("cp_new: invalid key length! (%ul) \n", key_out.key_len); + } + + if (key_out.iv_key_len == 0 || key_out.iv_key_len > CP_IV_KEYSIZE) { + panic ("cp_new: invalid iv key length! (%ul) \n", key_out.iv_key_len); + } + + /* + * AKS is allowed to override our preferences and wrap with a + * different class key for policy reasons. If we were told that + * any class other than the one specified is unacceptable then error out + * if that occurred. Check that the effective class returned by + * AKS is the same as our effective new class + */ + if ((int)(CP_CLASS(wrapped_key_out.dp_class)) != newclass_eff) { + if (keyflags & CP_KEYWRAP_DIFFCLASS) { + newclass_eff = CP_CLASS(wrapped_key_out.dp_class); + } + else { + error = EPERM; + /* TODO: When 12170074 fixed, release/invalidate the key! */ + goto cpnew_fail; + } + } + new_key_len = key_out.key_len; iv_key_len = key_out.iv_key_len; new_persistent_len = wrapped_key_out.key_len; - } - } - else { - /* Something other than file or dir? */ - error = EPERM; + /* Is the key a SEP wrapped key? */ + if (key_out.flags & CP_RAW_KEY_WRAPPEDKEY) { + iswrapped = 1; + } + } } /* - * Step 2: Allocate cprotect and initialize it. + * Step 2: allocate cprotect and initialize it. */ - if (error == 0) { - /* - * v2 EA's don't support the larger class B keys - */ - if ((new_persistent_len != CP_V2_WRAPPEDKEYSIZE) && - (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) { - return EINVAL; - } - entry = cp_entry_alloc (new_persistent_len); - if (entry == NULL) { - return ENOMEM; - } + /* + * v2 EA's don't support the larger class B keys + */ + if ((new_persistent_len != CP_V2_WRAPPEDKEYSIZE) && + (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) { + return EINVAL; + } - *output_entry = entry; + entry = cp_entry_alloc (new_persistent_len); + if (entry == NULL) { + return ENOMEM; + } - entry->cp_pclass = newclass; + *output_entry = entry; - /* Copy the cache key & IV keys into place if needed. */ - if (new_key_len > 0) { - bcopy (new_key, entry->cp_cache_key, new_key_len); - entry->cp_cache_key_len = new_key_len; + /* + * For directories and class F files, just store the effective new class. + * AKS does not interact with us in generating keys for F files, and directories + * don't actually have keys. + */ + if ( S_ISDIR (cmode) || (newclass_eff == PROTECTION_CLASS_F)) { + entry->cp_pclass = newclass_eff; + } + else { + /* + * otherwise, store what AKS actually returned back to us. + * wrapped_key_out is only valid if we have round-tripped to AKS + */ + entry->cp_pclass = wrapped_key_out.dp_class; + } - /* Initialize the IV key */ - if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { - if (newclass == PROTECTION_CLASS_F) { - /* class F needs a full IV initialize */ - cp_setup_aes_ctx(entry); - } - else { - /* Key store gave us an iv key. Just need to wrap it.*/ - aes_encrypt_key128(iv_key, &entry->cp_cache_iv_ctx); - } - entry->cp_flags |= CP_OFF_IV_ENABLED; + /* Copy the cache key & IV keys into place if needed. */ + if (new_key_len > 0) { + bcopy (new_key, entry->cp_cache_key, new_key_len); + entry->cp_cache_key_len = new_key_len; + + + /* Initialize the IV key */ + if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { + if (newclass_eff == PROTECTION_CLASS_F) { + /* class F needs a full IV initialize */ + cp_setup_aes_ctx(entry); } - } - if (new_persistent_len > 0) { - bcopy(new_persistent_key, entry->cp_persistent_key, new_persistent_len); + else { + /* Key store gave us an iv key. Just need to wrap it.*/ + aes_encrypt_key128(iv_key, &entry->cp_cache_iv_ctx); + } + entry->cp_flags |= CP_OFF_IV_ENABLED; } } - else { - error = EPERM; + if (new_persistent_len > 0) { + bcopy(new_persistent_key, entry->cp_persistent_key, new_persistent_len); + } + + /* Mark it as a wrapped key if necessary */ + if (iswrapped) { + entry->cp_flags |= CP_SEP_WRAPPEDKEY; } +cpnew_fail: return error; } diff --git a/bsd/hfs/hfs_encodings.c b/bsd/hfs/hfs_encodings.c index 65fae3049..d4fc65fc6 100644 --- a/bsd/hfs/hfs_encodings.c +++ b/bsd/hfs/hfs_encodings.c @@ -571,7 +571,7 @@ static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 break; case 0x2200: - if (lsb <= 0x68) + if (lsb < 0x68) mc = gMathTable[lsb]; break; diff --git a/bsd/hfs/hfs_endian.c b/bsd/hfs/hfs_endian.c index 4e7dfea24..50fb1ddd9 100644 --- a/bsd/hfs/hfs_endian.c +++ b/bsd/hfs/hfs_endian.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -390,7 +390,7 @@ fail: */ printf("hfs: node=%lld fileID=%u volume=%s device=%s\n", src->blockNum, VTOC(vp)->c_fileid, VTOVCB(vp)->vcbVN, vfs_statfs(vnode_mount(vp))->f_mntfromname); - hfs_mark_volume_inconsistent(VTOVCB(vp)); + hfs_mark_inconsistent(VTOVCB(vp), HFS_INCONSISTENCY_DETECTED); } return (error); diff --git a/bsd/hfs/hfs_fsctl.h b/bsd/hfs/hfs_fsctl.h index 9739a8555..f7f3c26b1 100644 --- a/bsd/hfs/hfs_fsctl.h +++ b/bsd/hfs/hfs_fsctl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -34,7 +34,7 @@ #include #include #include - +#include #ifdef __APPLE_API_UNSTABLE @@ -54,6 +54,17 @@ struct hfs_journal_info { }; +struct hfsinfo_metadata { + uint32_t total; + uint32_t extents; + uint32_t catalog; + uint32_t allocation; + uint32_t attribute; + uint32_t journal; + uint32_t reserved[4]; +}; + + /* HFS FS CONTROL COMMANDS */ #define HFSIOC_RESIZE_PROGRESS _IOR('h', 1, u_int32_t) @@ -145,19 +156,28 @@ struct hfs_journal_info { #define HFSIOC_GET_DESIRED_DISK _IOR('h', 29, u_int32_t) #define HFS_FSCTL_GET_DESIRED_DISK IOCBASECMD(HFSIOC_GET_DESIRED_DISK) -#define HFSIOC_GET_WRITE_GEN_COUNTER _IOR('h', 30, u_int32_t) -#define HFS_GET_WRITE_GEN_COUNTER IOCBASECMD(HFSIOC_GET_WRITE_GEN_COUNTER) - -/* revisiond uses this to allocate a doc-id for files from Cab and earlier systems that are marked tracked but don't have a doc-id */ -#define HFS_DOCUMENT_ID_ALLOCATE 0x1 +/* 30 was HFSIOC_GET_WRITE_GEN_COUNTER and is now deprecated */ -#define HFSIOC_GET_DOCUMENT_ID _IOR('h', 31, u_int32_t) -#define HFS_GET_DOCUMENT_ID IOCBASECMD(HFSIOC_GET_DOCUMENT_ID) +/* 31 was HFSIOC_GET_DOCUMENT_ID and is now deprecated */ /* revisiond only uses this when something transforms in a way the kernel can't track such as "foo.rtf" -> "foo.rtfd" */ #define HFSIOC_TRANSFER_DOCUMENT_ID _IOW('h', 32, u_int32_t) #define HFS_TRANSFER_DOCUMENT_ID IOCBASECMD(HFSIOC_TRANSFER_DOCUMENT_ID) + +/* + * Get information about number of file system allocation blocks used by metadata + * files on the volume, including individual btrees and journal file. The caller + * can determine the size of file system allocation block using value returned as + * f_bsize by statfs(2). + */ +#define HFSIOC_FSINFO_METADATA_BLOCKS _IOWR('h', 38, struct hfsinfo_metadata) +#define HFS_FSINFO_METADATA_BLOCKS IOCBASECMD(HFSIOC_FSINFO_METADATA_BLOCKS) + +/* Send TRIMs for all free blocks to the underlying device */ +#define HFSIOC_CS_FREESPACE_TRIM _IOWR('h', 39, u_int32_t) +#define HFS_CS_FREESPACE_TRIM IOCBASECMD(HFSIOC_CS_FREESPACE_TRIM) + #endif /* __APPLE_API_UNSTABLE */ #endif /* ! _HFS_FSCTL_H_ */ diff --git a/bsd/hfs/hfs_hotfiles.c b/bsd/hfs/hfs_hotfiles.c index bde852475..7ba80c737 100644 --- a/bsd/hfs/hfs_hotfiles.c +++ b/bsd/hfs/hfs_hotfiles.c @@ -1931,7 +1931,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent index[(nodesize / 2) - 4] = SWAP_BE16 (offset); vnode_setnoflush(vp); - error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, 0, ctx); + error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, ctx); if (error) { printf("hfs: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); goto out; diff --git a/bsd/hfs/hfs_kdebug.h b/bsd/hfs/hfs_kdebug.h index f8451a64c..324a15f16 100644 --- a/bsd/hfs/hfs_kdebug.h +++ b/bsd/hfs/hfs_kdebug.h @@ -9,8 +9,13 @@ #define HFSDBG_CODE(code) FSDBG_CODE(DBG_HFS, code) enum { - HFSDBG_UNMAP_FREE = HFSDBG_CODE(0), /* 0x03080000 */ - HFSDBG_UNMAP_ALLOC = HFSDBG_CODE(1), /* 0x03080004 */ + HFSDBG_WRITE = FSDBG_CODE(DBG_FSRW, 0), /* 0x3010000 */ + HFSDBG_TRUNCATE = FSDBG_CODE(DBG_FSRW, 7), /* 0x301001C */ + HFSDBG_READ = FSDBG_CODE(DBG_FSRW, 12), /* 0x3010030 */ + HFSDBG_GETNEWVNODE = FSDBG_CODE(DBG_FSRW, 37), /* 0x3010094 */ + HFSDBG_UPDATE = FSDBG_CODE(DBG_FSRW, 8192), /* 0x3018000 */ + HFSDBG_UNMAP_FREE = HFSDBG_CODE(0), /* 0x03080000 */ + HFSDBG_UNMAP_ALLOC = HFSDBG_CODE(1), /* 0x03080004 */ HFSDBG_UNMAP_CALLBACK = HFSDBG_CODE(2), /* 0x03080008 */ /* 0x0308000C is unused */ HFSDBG_BLOCK_ALLOCATE = HFSDBG_CODE(4), /* 0x03080010 */ @@ -23,39 +28,55 @@ enum { HFSDBG_MARK_ALLOC_BITMAP = HFSDBG_CODE(11), /* 0x0308002C */ HFSDBG_MARK_FREE_BITMAP = HFSDBG_CODE(12), /* 0x03080030 */ HFSDBG_BLOCK_FIND_CONTIG = HFSDBG_CODE(13), /* 0x03080034 */ - HFSDBG_IS_ALLOCATED = HFSDBG_CODE(14), /* 0x03080038 */ + HFSDBG_IS_ALLOCATED = HFSDBG_CODE(14), /* 0x03080038 */ /* 0x0308003C is unused */ HFSDBG_RESET_EXTENT_CACHE = HFSDBG_CODE(16), /* 0x03080040 */ HFSDBG_REMOVE_EXTENT_CACHE = HFSDBG_CODE(17), /* 0x03080044 */ HFSDBG_ADD_EXTENT_CACHE = HFSDBG_CODE(18), /* 0x03080048 */ - HFSDBG_READ_BITMAP_RANGE = HFSDBG_CODE(19), /* 0x0308004C */ - HFSDBG_RELEASE_SCAN_BITMAP = HFSDBG_CODE(20), /* 0x03080050 */ - HFSDBG_SYNCER = HFSDBG_CODE(21), /* 0x03080054 */ - HFSDBG_SYNCER_TIMED = HFSDBG_CODE(22), /* 0x03080058 */ + HFSDBG_READ_BITMAP_RANGE = HFSDBG_CODE(19), /* 0x0308004C */ + HFSDBG_RELEASE_SCAN_BITMAP = HFSDBG_CODE(20), /* 0x03080050 */ + HFSDBG_SYNCER = HFSDBG_CODE(21), /* 0x03080054 */ + HFSDBG_SYNCER_TIMED = HFSDBG_CODE(22), /* 0x03080058 */ + HFSDBG_UNMAP_SCAN = HFSDBG_CODE(23), /* 0x0308005C */ + HFSDBG_UNMAP_SCAN_TRIM = HFSDBG_CODE(24) /* 0x03080060 */ }; /* - Parameters logged by the above - EVENT CODE DBG_FUNC_START arg1, arg2, arg3, arg4 ... DBG_FUNC_END arg1, arg2, arg3, arg4 - --------------------------- - HFSDBG_UNMAP_CALLBACK 0, extentCount, 0, 0 ... 0, 0, 0, 0 - HFSDBG_UNMAP_FREE startBlock, blockCount, 0, 0 ... err, 0, 0, 0 - HFSDBG_UNMAP_ALLOC startBlock, blockCount, 0, 0 ... err, 0, 0, 0 - HFSDBG_REMOVE_EXTENT_CACHE startBlock, blockCount, vcbFreeExtCnt, 0 ... 0, 0, vcbFreeExtCnt, extentsRemoved - HFSDBG_ADD_EXTENT_CACHE startBlock, blockCount, vcbFreeExtCnt, 0 ... 0, 0, vcbFreeExtCnt, retval - HFSDBG_MARK_ALLOC_BITMAP startBlock, blockCount, 0, 0 ... err, 0, 0, 0 - HFSDBG_MARK_FREE_BITMAP startBlock, blockCount, valid, 0 ... err, 0, 0, 0 - HFSDBG_BLOCK_DEALLOCATE startBlock, blockCount, flags, 0 ... err, 0, 0, 0 - HFSDBG_IS_ALLOCATED startBlock, blockCount, stop, 0 ... err, 0, actualBlockCount, 0 - HFSDBG_BLOCK_ALLOCATE startBlock, minBlocks, maxBlocks, flags ... err, actualStartBlock, actualBlockCount, 0 - HFSDBG_ALLOC_CONTIG_BITMAP startBlock, minBlocks, maxBlocks, useMeta ... err, actualStartBlock, actualBlockCount, 0 - HFSDBG_ALLOC_ANY_BITMAP startBlock, endBlock, maxBlocks, useMeta ... err, actualStartBlock, actualBlockCount, 0 - HFSDBG_ALLOC_KNOWN_BITMAP 0, 0, maxBlocks, 0 ... err, actualStartBlock, actualBlockCount, 0 - HFSDBG_BLOCK_FIND_CONTIG startBlock, endBlock, minBlocks, maxBlocks ... err, actualStartBlock, actualBlockCount, 0 - HFSDBG_READ_BITMAP_BLOCK startBlock, 0, 0, 0 ... err, 0, 0, 0 - HFSDBG_RELEASE_BITMAP_BLOCK dirty, 0, 0, 0 ... 0, 0, 0, 0 - HFSDBG_RESET_EXTENT_CACHE 0, 0, 0, 0 ... 0, 0, 0, 0 - HFSDBG_READ_BITMAP_RANGE startBlock, iosize, 0, 0 ... err, 0, 0, 0 - HFSDBG_RELEASE_SCAN_BITMAP 0, 0, 0, 0, ... 0, 0, 0, 0 - + Parameters logged by the above tracepoints: +--------------------------------------------------------------------------------------------------------------------------------- + CODE EVENT NAME DBG_FUNC_START arg1, arg2, arg3, arg4, arg5 ... DBG_FUNC_END arg1, arg2, arg3, arg4, arg5 + DBG_FUNC_NONE arg1, arg2, arg3, arg4, arg5 +--------------------------------------------------------------------------------------------------------------------------------- +0x3010000 HFSDBG_WRITE offset, uio_resid, ff_size, filebytes, 0 ... uio_offset, uio_resid, ff_size, filebytes, 0 + offset, uio_resid, ff_size, filebytes, 0 +0x301001C HFSDBG_TRUNCATE length, ff_size, filebytes, 0, 0 ... length, ff_size, filebytes, retval, 0 + length, ff_size, filebytes, 0, 0 +0x3010030 HFSDBG_READ uio_offset, uio_resid, filesize, filebytes, 0 ... uio_offset, uio_resid, filesize, filebytes, 0 +0x3010094 HFSDBG_GETNEWVNODE c_vp, c_rsrc_vp, 0, 0, 0 +0x3018000 HFSDBG_UPDATE vp, tstate, 0, 0, 0 ... vp, tstate, error, 0/-1, 0 + 0 HFSDBG_UNMAP_FREE startBlock, blockCount, 0, 0, 0 ... err, 0, 0, 0, 0 + 1 HFSDBG_UNMAP_ALLOC startBlock, blockCount, 0, 0, 0 ... err, 0, 0, 0, 0 + 2 HFSDBG_UNMAP_CALLBACK 0, extentCount, 0, 0, 0 ... 0, 0, 0, 0, 0 + 3 unused + 4 HFSDBG_BLOCK_ALLOCATE startBlock, minBlocks, maxBlocks, flags, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 + 5 HFSDBG_BLOCK_DEALLOCATE startBlock, blockCount, flags, 0, 0 ... err, 0, 0, 0, 0 + 6 HFSDBG_READ_BITMAP_BLOCK startBlock, 0, 0, 0, 0 ... err, 0, 0, 0, 0 + 7 HFSDBG_RELEASE_BITMAP_BLOCK dirty, 0, 0, 0, 0 ... 0, 0, 0, 0, 0 + 8 HFSDBG_ALLOC_CONTIG_BITMAP startBlock, minBlocks, maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 + 9 HFSDBG_ALLOC_ANY_BITMAP startBlock, endBlock, maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 + 10 HFSDBG_ALLOC_KNOWN_BITMAP 0, 0, maxBlocks, 0, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 + 11 HFSDBG_MARK_ALLOC_BITMAP startBlock, blockCount, 0, 0, 0 ... err, 0, 0, 0, 0 + 12 HFSDBG_MARK_FREE_BITMAP startBlock, blockCount, valid, 0, 0 ... err, 0, 0, 0, 0 + 13 HFSDBG_BLOCK_FIND_CONTIG startBlock, endBlock, minBlocks, maxBlocks, 0 ... err, actualStartBlock, actualBlockCount, 0, 0 + 14 HFSDBG_IS_ALLOCATED startBlock, blockCount, stop, 0, 0 ... err, 0, actualBlockCount, 0, 0 + 15 unused + 16 HFSDBG_RESET_EXTENT_CACHE 0, 0, 0, 0, 0 ... 0, 0, 0, 0, 0 + 17 HFSDBG_REMOVE_EXTENT_CACHE startBlock, blockCount, vcbFreeExtCnt, 0, 0 ... 0, 0, vcbFreeExtCnt, extentsRemoved, 0 + 18 HFSDBG_ADD_EXTENT_CACHE startBlock, blockCount, vcbFreeExtCnt, 0, 0 ... 0, 0, vcbFreeExtCnt, retval, 0 + 19 HFSDBG_READ_BITMAP_RANGE startBlock, iosize, 0, 0, 0 ... err, 0, 0, 0, 0 + 20 HFSDBG_RELEASE_SCAN_BITMAP 0, 0, 0, 0, 0 ... 0, 0, 0, 0, 0 + 21 HFSDBG_SYNCER hfsmp, now, mnt_last_write_completed_timestamp, mnt_pending_write_size, 0 ... err, deadline, 0, 0, 0 + 22 HFSDBG_SYNCER_TIMED now, last_write_completed, hfs_mp->mnt_last_write_issued_timestamp, mnt_pending_write_size, 0 ... now, mnt_last_write_completed_timestamp, mnt_last_write_issued_timestamp, hfs_mp->mnt_pending_write_size, 0 + 23 HFSDBG_UNMAP_SCAN hfs_raw_dev, 0, 0, 0, 0 ... hfs_raw_dev, error, 0, 0, 0 + 24 HFSDBG_UNMAP_TRIM hfs_raw_dev, 0, 0, 0, 0 ... hfs_raw_dev, error, 0, 0, 0 */ diff --git a/bsd/hfs/hfs_link.c b/bsd/hfs/hfs_link.c index 287fa6253..667bad9c6 100644 --- a/bsd/hfs/hfs_link.c +++ b/bsd/hfs/hfs_link.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2013 Apple Inc. All rights reserved. + * Copyright (c) 1999-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -238,8 +238,15 @@ hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp, /* Put the original file back. */ err = cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL); - if (err && err != EIO && err != ENXIO) - panic("hfs_makelink: error %d from cat_rename backout 1", err); + if (err) { + if (err != EIO && err != ENXIO) + printf("hfs_makelink: error %d from cat_rename backout 1", err); + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); + } + if (retval != EIO && retval != ENXIO) { + printf("hfs_makelink: createindirectlink (1) failed: %d\n", retval); + retval = EIO; + } goto out; } cp->c_attr.ca_linkref = indnodeno; @@ -287,10 +294,18 @@ hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp, /* Put the original file back. */ err = cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL); - if (err && err != EIO && err != ENXIO) - panic("hfs_makelink: error %d from cat_rename backout 2", err); + if (err) { + if (err != EIO && err != ENXIO) + printf("hfs_makelink: error %d from cat_rename backout 2", err); + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); + } cp->c_attr.ca_linkref = 0; + + if (retval != EIO && retval != ENXIO) { + printf("hfs_makelink: createindirectlink (2) failed: %d\n", retval); + retval = EIO; + } goto out; } else if (retval == 0) { @@ -314,10 +329,6 @@ hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp, if (newlink) { vnode_t vp; - if (retval != 0) { - panic("hfs_makelink: retval %d but newlink = 1!\n", retval); - } - hfsmp->hfs_private_attr[type].ca_entries++; /* From application perspective, directory hard link is a * normal directory. Therefore count the new directory @@ -328,8 +339,12 @@ hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp, } retval = cat_update(hfsmp, &hfsmp->hfs_private_desc[type], &hfsmp->hfs_private_attr[type], NULL, NULL); - if (retval != 0 && retval != EIO && retval != ENXIO) { - panic("hfs_makelink: cat_update of privdir failed! (%d)\n", retval); + if (retval) { + if (retval != EIO && retval != ENXIO) { + printf("hfs_makelink: cat_update of privdir failed! (%d)\n", retval); + retval = EIO; + } + hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE); } cp->c_flag |= C_HARDLINK; @@ -427,6 +442,14 @@ hfs_vnop_link(struct vnop_link_args *ap) if (v_type == VBLK || v_type == VCHR) { return (EPERM); } + + /* + * For now, return ENOTSUP for a symlink target. This can happen + * for linkat(2) when called without AT_SYMLINK_FOLLOW. + */ + if (v_type == VLNK) + return (ENOTSUP); + if (v_type == VDIR) { #if CONFIG_HFS_DIRLINK /* Make sure our private directory exists. */ @@ -616,10 +639,14 @@ hfs_vnop_link(struct vnop_link_args *ap) tdcp->c_flag |= C_FORCEUPDATE; error = hfs_update(tdvp, 0); - if (error && error != EIO && error != ENXIO) { - panic("hfs_vnop_link: error %d updating tdvp %p\n", error, tdvp); + if (error) { + if (error != EIO && error != ENXIO) { + printf("hfs_vnop_link: error %d updating tdvp %p\n", error, tdvp); + error = EIO; + } + hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE); } - + if ((v_type == VDIR) && (fdcp != NULL) && ((fdcp->c_attr.ca_recflags & kHFSHasChildLinkMask) == 0)) { @@ -628,8 +655,12 @@ hfs_vnop_link(struct vnop_link_args *ap) fdcp->c_touch_chgtime = TRUE; fdcp->c_flag |= C_FORCEUPDATE; error = hfs_update(fdvp, 0); - if (error && error != EIO && error != ENXIO) { - panic("hfs_vnop_link: error %d updating fdvp %p\n", error, fdvp); + if (error) { + if (error != EIO && error != ENXIO) { + printf("hfs_vnop_link: error %d updating fdvp %p\n", error, fdvp); + // No point changing error as it's set immediate below + } + hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE); } /* Set kHFSHasChildLinkBit in the source hierarchy */ @@ -645,8 +676,10 @@ hfs_vnop_link(struct vnop_link_args *ap) /* Make sure update occurs inside transaction */ cp->c_flag |= C_FORCEUPDATE; - if ((error == 0) && (ret = hfs_update(vp, TRUE)) != 0 && ret != EIO && ret != ENXIO) { - panic("hfs_vnop_link: error %d updating vp @ %p\n", ret, vp); + if (error == 0 && (ret = hfs_update(vp, TRUE)) != 0) { + if (ret != EIO && ret != ENXIO) + printf("hfs_vnop_link: error %d updating vp @ %p\n", ret, vp); + hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE); } out: @@ -1107,7 +1140,7 @@ void hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid) { linkorigin_t *origin = NULL; - void * thread = current_thread(); + thread_t thread = current_thread(); int count = 0; int maxorigins = (S_ISDIR(cp->c_mode)) ? MAX_CACHED_ORIGINS : MAX_CACHED_FILE_ORIGINS; /* @@ -1162,7 +1195,7 @@ void hfs_relorigin(struct cnode *cp, cnid_t parentcnid) { linkorigin_t *origin, *prev; - void * thread = current_thread(); + thread_t thread = current_thread(); TAILQ_FOREACH_SAFE(origin, &cp->c_originlist, lo_link, prev) { if ((origin->lo_thread == thread) || @@ -1185,7 +1218,7 @@ hfs_haslinkorigin(cnode_t *cp) { if (cp->c_flag & C_HARDLINK) { linkorigin_t *origin; - void * thread = current_thread(); + thread_t thread = current_thread(); TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) { if (origin->lo_thread == thread) { @@ -1207,7 +1240,7 @@ hfs_currentparent(cnode_t *cp) { if (cp->c_flag & C_HARDLINK) { linkorigin_t *origin; - void * thread = current_thread(); + thread_t thread = current_thread(); TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) { if (origin->lo_thread == thread) { @@ -1229,7 +1262,7 @@ hfs_currentcnid(cnode_t *cp) { if (cp->c_flag & C_HARDLINK) { linkorigin_t *origin; - void * thread = current_thread(); + thread_t thread = current_thread(); TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) { if (origin->lo_thread == thread) { diff --git a/bsd/hfs/hfs_lookup.c b/bsd/hfs/hfs_lookup.c index c2599a8e3..e198d3190 100644 --- a/bsd/hfs/hfs_lookup.c +++ b/bsd/hfs/hfs_lookup.c @@ -289,10 +289,7 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int * directory has not been removed, then can consider * allowing file to be created. */ - if ((nameiop == CREATE || nameiop == RENAME || - (nameiop == DELETE && - (cnp->cn_flags & DOWHITEOUT) && - (cnp->cn_flags & ISWHITEOUT))) && + if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN) && !(ISSET(dcp->c_flag, C_DELETED | C_NOEXISTS))) { retval = EJUSTRETURN; diff --git a/bsd/hfs/hfs_macos_defs.h b/bsd/hfs/hfs_macos_defs.h index f150e2905..b4a303b88 100644 --- a/bsd/hfs/hfs_macos_defs.h +++ b/bsd/hfs/hfs_macos_defs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -149,7 +149,7 @@ typedef const UniChar * ConstUniCharArrayPtr; "Boolean" will remain an unsigned char for compatibility with source code written before "bool" existed. */ -#if !TYPE_BOOL +#if !TYPE_BOOL && !__bool_true_false_are_defined enum { false = 0, diff --git a/bsd/hfs/hfs_notification.c b/bsd/hfs/hfs_notification.c index c63d6307a..621c58de5 100644 --- a/bsd/hfs/hfs_notification.c +++ b/bsd/hfs/hfs_notification.c @@ -83,16 +83,21 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp) printf("hfs: set VeryLowDisk: vol:%s, freeblks:%d, dangerlimit:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_dangerlimit); #if HFS_SPARSE_DEV - if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { - if (hfsmp->hfs_backingfs_rootvp) { - struct mount *mp = vnode_mount (hfsmp->hfs_backingfs_rootvp); - /* If we're a sparse device, dump some info about the backing store... */ - if (mp) { - printf("hfs: set VeryLowDisk: vol:%s, backingstore b_avail:%lld, tag:%d\n", hfsmp->vcbVN, mp->mnt_vfsstat.f_bavail, hfsmp->hfs_backingfs_rootvp->v_tag); - } - } + // If we're a sparse device, dump some info about the backing store.. + hfs_lock_mount(hfsmp); + vnode_t backing_vp = hfsmp->hfs_backingfs_rootvp; + if (backing_vp && vnode_get(backing_vp) != 0) + backing_vp = NULL; + hfs_unlock_mount(hfsmp); + + if (backing_vp) { + struct mount *mp = vnode_mount(backing_vp); + printf("hfs: set VeryLowDisk: vol:%s, backingstore b_avail:%lld, tag:%d\n", + hfsmp->vcbVN, mp->mnt_vfsstat.f_bavail, backing_vp->v_tag); + vnode_put(backing_vp); } #endif + hfsmp->hfs_notification_conditions |= (VQ_VERYLOWDISK|VQ_LOWDISK); vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); } else if (state == 1) { diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index 690f30464..96e8c20ed 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -126,6 +126,7 @@ hfs_vnop_read(struct vnop_read_args *ap) int retval = 0; int took_truncate_lock = 0; int io_throttle = 0; + int throttled_count = 0; /* Preflight checks */ if (!vnode_isreg(vp)) { @@ -139,8 +140,14 @@ hfs_vnop_read(struct vnop_read_args *ap) return (0); /* Nothing left to do */ if (offset < 0) return (EINVAL); /* cant read from a negative offset */ - - + + if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) == + (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) { + /* Don't allow unencrypted io request from user space */ + return EPERM; + } + + #if HFS_COMPRESSION if (VNODE_IS_RSRC(vp)) { @@ -205,6 +212,13 @@ read_again: filesize = fp->ff_size; filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; + + /* + * Check the file size. Note that per POSIX spec, we return 0 at + * file EOF, so attempting a read at an offset that is too big + * should just return 0 on HFS+. Since the return value was initialized + * to 0 above, we just jump to exit. HFS Standard has its own behavior. + */ if (offset > filesize) { if ((hfsmp->hfs_flags & HFS_STANDARD) && (offset > (off_t)MAXHFSFILESIZE)) { @@ -213,14 +227,14 @@ read_again: goto exit; } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START, + KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START, (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle); cp->c_touch_acctime = TRUE; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, + KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END, (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); /* @@ -259,10 +273,14 @@ exit: } if (retval == EAGAIN) { throttle_lowpri_io(1); + throttled_count++; retval = 0; goto read_again; } + if (throttled_count) { + throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread())); + } return (retval); } @@ -295,6 +313,7 @@ hfs_vnop_write(struct vnop_write_args *ap) time_t orig_ctime=VTOC(vp)->c_ctime; int took_truncate_lock = 0; int io_return_on_throttle = 0; + int throttled_count = 0; struct rl_entry *invalid_range; #if HFS_COMPRESSION @@ -328,6 +347,13 @@ hfs_vnop_write(struct vnop_write_args *ap) #endif + if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) == + (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) { + /* Don't allow unencrypted io request from user space */ + return EPERM; + } + + resid = uio_resid(uio); offset = uio_offset(uio); @@ -368,7 +394,6 @@ hfs_vnop_write(struct vnop_write_args *ap) } again: - /* Protect against a size change. */ /* * Protect against a size change. * @@ -447,10 +472,6 @@ again: } cnode_locked = 1; - if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { - hfs_incr_gencount (cp); - } - /* * Now that we have the cnode lock, see if there are delayed zero fill ranges * overlapping our write. If so, we need the truncate lock exclusive (see above). @@ -470,7 +491,7 @@ again: goto again; } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, + KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START, (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); @@ -519,7 +540,7 @@ again: if (retval != E_NONE) break; filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE, + KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE, (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); } (void) hfs_update(vp, TRUE); @@ -709,6 +730,7 @@ sizeok: cp->c_touch_chgtime = TRUE; cp->c_touch_modtime = TRUE; + hfs_incr_gencount(cp); } if (filesize > fp->ff_size) { /* @@ -738,13 +760,7 @@ sizeok: fp->ff_bytesread = 0; } } - fp->ff_new_size = 0; /* ff_size now has the correct size */ - - /* If we wrote some bytes, then touch the change and mod times */ - if (resid > uio_resid(uio)) { - cp->c_touch_chgtime = TRUE; - cp->c_touch_modtime = TRUE; - } + fp->ff_new_size = 0; /* ff_size now has the correct size */ } if (partialwrite) { uio_setresid(uio, (uio_resid(uio) + bytesToAdd)); @@ -759,44 +775,43 @@ sizeok: } ioerr_exit: - /* - * If we successfully wrote any data, and we are not the superuser - * we clear the setuid and setgid bits as a precaution against - * tampering. - */ - if (cp->c_mode & (S_ISUID | S_ISGID)) { - cred = vfs_context_ucred(ap->a_context); - if (resid > uio_resid(uio) && cred && suser(cred, NULL)) { - if (!cnode_locked) { - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cnode_locked = 1; + if (resid > uio_resid(uio)) { + if (!cnode_locked) { + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + cnode_locked = 1; + } + + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; + hfs_incr_gencount(cp); + + /* + * If we successfully wrote any data, and we are not the superuser + * we clear the setuid and setgid bits as a precaution against + * tampering. + */ + if (cp->c_mode & (S_ISUID | S_ISGID)) { + cred = vfs_context_ucred(ap->a_context); + if (cred && suser(cred, NULL)) { + cp->c_mode &= ~(S_ISUID | S_ISGID); } - cp->c_mode &= ~(S_ISUID | S_ISGID); } } if (retval) { if (ioflag & IO_UNIT) { - if (!cnode_locked) { - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cnode_locked = 1; - } (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC, - 0, 0, ap->a_context); + 0, ap->a_context); uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio)))); uio_setresid(uio, resid); filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; } - } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) { - if (!cnode_locked) { - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cnode_locked = 1; - } + } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) retval = hfs_update(vp, TRUE); - } + /* Updating vcbWrCnt doesn't need to be atomic. */ hfsmp->vcbWrCnt++; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END, + KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END, (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); exit: if (cnode_locked) @@ -807,10 +822,14 @@ exit: } if (retval == EAGAIN) { throttle_lowpri_io(1); + throttled_count++; retval = 0; goto again; } + if (throttled_count) { + throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread())); + } return (retval); } @@ -1032,15 +1051,15 @@ struct cinfo { }; static int -snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg) +snoop_callback(const cnode_t *cp, void *arg) { - struct cinfo *cip = (struct cinfo *)arg; + struct cinfo *cip = arg; - cip->uid = attrp->ca_uid; - cip->gid = attrp->ca_gid; - cip->mode = attrp->ca_mode; - cip->parentcnid = descp->cd_parentcnid; - cip->recflags = attrp->ca_recflags; + cip->uid = cp->c_uid; + cip->gid = cp->c_gid; + cip->mode = cp->c_mode; + cip->parentcnid = cp->c_parentcnid; + cip->recflags = cp->c_attr.ca_recflags; return (0); } @@ -1057,36 +1076,41 @@ do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, /* if this id matches the one the fsctl was called with, skip the lookup */ if (cnid == skip_cp->c_cnid) { - cnattrp->ca_uid = skip_cp->c_uid; - cnattrp->ca_gid = skip_cp->c_gid; - cnattrp->ca_mode = skip_cp->c_mode; - cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags; - keyp->hfsPlus.parentID = skip_cp->c_parentcnid; + cnattrp->ca_uid = skip_cp->c_uid; + cnattrp->ca_gid = skip_cp->c_gid; + cnattrp->ca_mode = skip_cp->c_mode; + cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags; + keyp->hfsPlus.parentID = skip_cp->c_parentcnid; } else { - struct cinfo c_info; - - /* otherwise, check the cnode hash incase the file/dir is incore */ - if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) { - cnattrp->ca_uid = c_info.uid; - cnattrp->ca_gid = c_info.gid; - cnattrp->ca_mode = c_info.mode; - cnattrp->ca_recflags = c_info.recflags; - keyp->hfsPlus.parentID = c_info.parentcnid; - } else { - int lockflags; - - if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp))) - throttle_lowpri_io(1); + struct cinfo c_info; + + /* otherwise, check the cnode hash incase the file/dir is incore */ + error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info); + + if (error == EACCES) { + // File is deleted + return ENOENT; + } else if (!error) { + cnattrp->ca_uid = c_info.uid; + cnattrp->ca_gid = c_info.gid; + cnattrp->ca_mode = c_info.mode; + cnattrp->ca_recflags = c_info.recflags; + keyp->hfsPlus.parentID = c_info.parentcnid; + } else { + int lockflags; + + if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp))) + throttle_lowpri_io(1); - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - /* lookup this cnid in the catalog */ - error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp); + /* lookup this cnid in the catalog */ + error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp); - hfs_systemfile_unlock(hfsmp, lockflags); + hfs_systemfile_unlock(hfsmp, lockflags); - cache->lookups++; - } + cache->lookups++; + } } return (error); @@ -1548,17 +1572,6 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, /* end "bulk-access" support */ -/* - * Callback for use with freeze ioctl. - */ -static int -hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs) -{ - vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze"); - - return 0; -} - /* * Control filesystem operating characteristics. */ @@ -1655,169 +1668,6 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { return (error); } - case HFS_GET_WRITE_GEN_COUNTER: - { - struct cnode *cp = NULL; - int error; - u_int32_t *counter = (u_int32_t *)ap->a_data; - - cp = VTOC(vp); - - if (!vnode_isdir(vp) && !(vnode_isreg(vp)) && - !(vnode_islnk(vp))) { - error = EBADF; - *counter = 0; - return error; - } - - error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); - if (error == 0) { - struct ubc_info *uip; - int is_mapped_writable = 0; - - if (UBCINFOEXISTS(vp)) { - uip = vp->v_ubcinfo; - if ((uip->ui_flags & UI_ISMAPPED) && (uip->ui_flags & UI_MAPPEDWRITE)) { - is_mapped_writable = 1; - } - } - - - if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { - uint32_t gcount = hfs_get_gencount(cp); - // - // Even though we return EBUSY for files that are mmap'ed - // we also want to bump the value so that the write-gen - // counter will always be different once the file is unmapped - // (since the file may be unmapped but the pageouts have not - // yet happened). - // - if (is_mapped_writable) { - hfs_incr_gencount (cp); - gcount = hfs_get_gencount(cp); - } - - *counter = gcount; - } else if (S_ISDIR(cp->c_attr.ca_mode)) { - *counter = hfs_get_gencount(cp); - } else { - /* not a file or dir? silently return */ - *counter = 0; - } - hfs_unlock (cp); - - if (is_mapped_writable) { - error = EBUSY; - } - } - - return error; - } - - case HFS_GET_DOCUMENT_ID: - { - struct cnode *cp = NULL; - int error=0; - u_int32_t *document_id = (u_int32_t *)ap->a_data; - - cp = VTOC(vp); - - if (cp->c_desc.cd_cnid == kHFSRootFolderID) { - // the root-dir always has document id '2' (aka kHFSRootFolderID) - *document_id = kHFSRootFolderID; - - } else if ((S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode))) { - int mark_it = 0; - uint32_t tmp_doc_id; - - // - // we can use the FndrExtendedFileInfo because the doc-id is the first - // thing in both it and the FndrExtendedDirInfo struct which is fixed - // in format and can not change layout - // - struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16); - - hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); - - // - // if the cnode isn't UF_TRACKED and the doc-id-allocate flag isn't set - // then just return a zero for the doc-id - // - if (!(cp->c_bsdflags & UF_TRACKED) && !(ap->a_fflag & HFS_DOCUMENT_ID_ALLOCATE)) { - *document_id = 0; - hfs_unlock(cp); - return 0; - } - - // - // if the cnode isn't UF_TRACKED and the doc-id-allocate flag IS set, - // then set mark_it so we know to set the UF_TRACKED flag once the - // cnode is locked. - // - if (!(cp->c_bsdflags & UF_TRACKED) && (ap->a_fflag & HFS_DOCUMENT_ID_ALLOCATE)) { - mark_it = 1; - } - - tmp_doc_id = extinfo->document_id; // get a copy of this - - hfs_unlock(cp); // in case we have to call hfs_generate_document_id() - - // - // If the document_id isn't set, get a new one and then set it. - // Note: we first get the document id, then lock the cnode to - // avoid any deadlock potential between cp and the root vnode. - // - uint32_t new_id; - if (tmp_doc_id == 0 && (error = hfs_generate_document_id(hfsmp, &new_id)) == 0) { - - if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) == 0) { - extinfo->document_id = tmp_doc_id = new_id; - //printf("ASSIGNING: doc-id %d to ino %d\n", extinfo->document_id, cp->c_fileid); - - if (mark_it) { - cp->c_bsdflags |= UF_TRACKED; - } - - // mark the cnode dirty - cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; - - int lockflags; - if ((error = hfs_start_transaction(hfsmp)) == 0) { - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); - - (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); - - hfs_systemfile_unlock (hfsmp, lockflags); - (void) hfs_end_transaction(hfsmp); - } - -#if CONFIG_FSE - add_fsevent(FSE_DOCID_CHANGED, context, - FSE_ARG_DEV, hfsmp->hfs_raw_dev, - FSE_ARG_INO, (ino64_t)0, // src inode # - FSE_ARG_INO, (ino64_t)cp->c_fileid, // dst inode # - FSE_ARG_INT32, extinfo->document_id, - FSE_ARG_DONE); - - hfs_unlock (cp); // so we can send the STAT_CHANGED event without deadlocking - - if (need_fsevent(FSE_STAT_CHANGED, vp)) { - add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE); - } -#else - hfs_unlock (cp); -#endif - } - } - - *document_id = tmp_doc_id; - } else { - *document_id = 0; - } - - return error; - } - case HFS_TRANSFER_DOCUMENT_ID: { struct cnode *cp = NULL; @@ -1943,6 +1793,8 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { return error; } + + case HFS_PREV_LINK: case HFS_NEXT_LINK: { @@ -2114,20 +1966,11 @@ fail_change_next_allocation: vnode_ref(bsfs_rootvp); vnode_put(bsfs_rootvp); + hfs_lock_mount(hfsmp); hfsmp->hfs_backingfs_rootvp = bsfs_rootvp; - hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; - /* The free extent cache is managed differently for sparse devices. - * There is a window between which the volume is mounted and the - * device is marked as sparse, so the free extent cache for this - * volume is currently initialized as normal volume (sorted by block - * count). Reset the cache so that it will be rebuilt again - * for sparse device (sorted by start block). - */ - ResetVCBFreeExtCache(hfsmp); - - hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize; - hfsmp->hfs_sparsebandblks *= 4; + hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4; + hfs_unlock_mount(hfsmp); /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */ @@ -2150,6 +1993,15 @@ fail_change_next_allocation: } } + /* The free extent cache is managed differently for sparse devices. + * There is a window between which the volume is mounted and the + * device is marked as sparse, so the free extent cache for this + * volume is currently initialized as normal volume (sorted by block + * count). Reset the cache so that it will be rebuilt again + * for sparse device (sorted by start block). + */ + ResetVCBFreeExtCache(hfsmp); + (void)vnode_put(di_vp); file_drop(bsdata->backingfd); return (0); @@ -2169,10 +2021,13 @@ fail_change_next_allocation: if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) { + hfs_lock_mount(hfsmp); hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; tmpvp = hfsmp->hfs_backingfs_rootvp; hfsmp->hfs_backingfs_rootvp = NULLVP; hfsmp->hfs_sparsebandblks = 0; + hfs_unlock_mount(hfsmp); + vnode_rele(tmpvp); } return (0); @@ -2238,38 +2093,7 @@ fail_change_next_allocation: !kauth_cred_issuser(cred)) return (EACCES); - lck_rw_lock_exclusive(&hfsmp->hfs_insync); - - // flush things before we get started to try and prevent - // dirty data from being paged out while we're frozen. - // note: can't do this after taking the lock as it will - // deadlock against ourselves. - vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); - hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); - - // DO NOT call hfs_journal_flush() because that takes a - // shared lock on the global exclusive lock! - journal_flush(hfsmp->jnl, TRUE); - - // don't need to iterate on all vnodes, we just need to - // wait for writes to the system files and the device vnode - // - // Now that journal flush waits for all metadata blocks to - // be written out, waiting for btree writes is probably no - // longer required. - if (HFSTOVCB(hfsmp)->extentsRefNum) - vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze"); - if (HFSTOVCB(hfsmp)->catalogRefNum) - vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze"); - if (HFSTOVCB(hfsmp)->allocationsRefNum) - vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze"); - if (hfsmp->hfs_attribute_vp) - vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze"); - vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze"); - - hfsmp->hfs_freezing_proc = current_proc(); - - return (0); + return hfs_freeze(hfsmp); } case F_THAW_FS: { @@ -2278,20 +2102,7 @@ fail_change_next_allocation: !kauth_cred_issuser(cred)) return (EACCES); - // if we're not the one who froze the fs then we - // can't thaw it. - if (hfsmp->hfs_freezing_proc != current_proc()) { - return EPERM; - } - - // NOTE: if you add code here, also go check the - // code that "thaws" the fs in hfs_vnop_close() - // - hfsmp->hfs_freezing_proc = NULL; - hfs_unlock_global (hfsmp); - lck_rw_unlock_exclusive(&hfsmp->hfs_insync); - - return (0); + return hfs_thaw(hfsmp, current_proc()); } case HFS_BULKACCESS_FSCTL: { @@ -2430,6 +2241,52 @@ fail_change_next_allocation: return error; } + case F_SETIOTYPE: { + int error; + uint32_t iotypeflag = 0; + + struct cnode *cp = NULL; + /* + * lock the cnode, decorate the cnode flag, and bail out. + * VFS should have already authenticated the caller for us. + */ + + if (ap->a_data == NULL) { + return EINVAL; + } + + /* + * Note that even though ap->a_data is of type caddr_t, we + * can only use 32 bits of flag values. + */ + iotypeflag = (uint32_t) ap->a_data; + switch (iotypeflag) { + case F_IOTYPE_ISOCHRONOUS: + break; + default: + return EINVAL; + } + + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return EROFS; + } + cp = VTOC(vp); + + error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error == 0) { + switch (iotypeflag) { + case F_IOTYPE_ISOCHRONOUS: + cp->c_flag |= C_IO_ISOCHRONOUS; + break; + default: + break; + } + hfs_unlock (cp); + } + return error; + } + case F_MAKECOMPRESSED: { int error = 0; uint32_t gen_counter; @@ -2466,7 +2323,7 @@ fail_change_next_allocation: hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); return error; } - + /* Are there any other usecounts/FDs? */ if (vnode_isinuse(vp, 1)) { hfs_unlock(cp); @@ -2474,7 +2331,6 @@ fail_change_next_allocation: return EBUSY; } - /* now we have the cnode locked down; Validate arguments */ if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) { /* EINVAL if you are trying to manipulate an IMMUTABLE file */ @@ -2490,8 +2346,9 @@ fail_change_next_allocation: */ reset_decmp = 1; cp->c_bsdflags |= UF_COMPRESSED; - - error = hfs_truncate(vp, 0, IO_NDELAY, 0, (HFS_TRUNCATE_SKIPTIMES), ap->a_context); + + error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES, + ap->a_context); } else { error = ESTALE; @@ -2734,7 +2591,7 @@ fail_change_next_allocation: return (EROFS); } printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n"); - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED); break; case HFS_FSCTL_GET_JOURNAL_INFO: @@ -2780,7 +2637,113 @@ fail_change_next_allocation: printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN); break; } - + + + case HFS_FSINFO_METADATA_BLOCKS: { + int error; + struct hfsinfo_metadata *hinfo; + + hinfo = (struct hfsinfo_metadata *)ap->a_data; + + /* Get information about number of metadata blocks */ + error = hfs_getinfo_metadata_blocks(hfsmp, hinfo); + if (error) { + return error; + } + + break; + } + + case HFS_CS_FREESPACE_TRIM: { + int error = 0; + int lockflags = 0; + + /* Only root allowed */ + if (!kauth_cred_issuser(kauth_cred_get())) { + return EACCES; + } + + /* + * This core functionality is similar to hfs_scan_blocks(). + * The main difference is that hfs_scan_blocks() is called + * as part of mount where we are assured that the journal is + * empty to start with. This fcntl() can be called on a + * mounted volume, therefore it has to flush the content of + * the journal as well as ensure the state of summary table. + * + * This fcntl scans over the entire allocation bitmap, + * creates list of all the free blocks, and issues TRIM + * down to the underlying device. This can take long time + * as it can generate up to 512MB of read I/O. + */ + + if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) { + error = hfs_init_summary(hfsmp); + if (error) { + printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN); + return error; + } + } + + /* + * The journal maintains list of recently deallocated blocks to + * issue DKIOCUNMAPs when the corresponding journal transaction is + * flushed to the disk. To avoid any race conditions, we only + * want one active trim list and only one thread issuing DKIOCUNMAPs. + * Therefore we make sure that the journal trim list is sync'ed, + * empty, and not modifiable for the duration of our scan. + * + * Take the journal lock before flushing the journal to the disk. + * We will keep on holding the journal lock till we don't get the + * bitmap lock to make sure that no new journal transactions can + * start. This will make sure that the journal trim list is not + * modified after the journal flush and before getting bitmap lock. + * We can release the journal lock after we acquire the bitmap + * lock as it will prevent any further block deallocations. + */ + hfs_journal_lock(hfsmp); + + /* Flush the journal and wait for all I/Os to finish up */ + error = hfs_journal_flush(hfsmp, TRUE); + if (error) { + hfs_journal_unlock(hfsmp); + return error; + } + + /* Take bitmap lock to ensure it is not being modified */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + /* Release the journal lock */ + hfs_journal_unlock(hfsmp); + + /* + * ScanUnmapBlocks reads the bitmap in large block size + * (up to 1MB) unlike the runtime which reads the bitmap + * in the 4K block size. This can cause buf_t collisions + * and potential data corruption. To avoid this, we + * invalidate all the existing buffers associated with + * the bitmap vnode before scanning it. + * + * Note: ScanUnmapBlock() cleans up all the buffers + * after itself, so there won't be any large buffers left + * for us to clean up after it returns. + */ + error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0); + if (error) { + hfs_systemfile_unlock(hfsmp, lockflags); + return error; + } + + /* Traverse bitmap and issue DKIOCUNMAPs */ + error = ScanUnmapBlocks(hfsmp); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error) { + return error; + } + + break; + } + default: return (ENOTTY); } @@ -3213,13 +3176,19 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap) /* Mark buffer as containing static data if cnode flag set */ if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) { - bufattr_markgreedymode((bufattr_t)(&bp->b_attr)); + bufattr_markgreedymode(&bp->b_attr); + } + + /* mark buffer as containing burst mode data if cnode flag set */ + if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) { + bufattr_markisochronous(&bp->b_attr); } #if CONFIG_PROTECT cnode_t *cp = NULL; - if ((cp = cp_get_protected_cnode(vp)) != NULL) { + if ((!bufattr_rawencrypted(&bp->b_attr)) && + ((cp = cp_get_protected_cnode(vp)) != NULL)) { /* * We rely upon the truncate lock to protect the * CP cache key from getting tossed prior to our IO finishing here. @@ -3242,8 +3211,31 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap) * with the CP blob being wiped out in the middle of the IO * because there isn't anything to toss; the VM swapfile key stays * in-core as long as the file is open. - * - * NB: + */ + + + /* + * Last chance: If this data protected I/O does not have unwrapped keys + * present, then try to get them. We already know that it should, by this point. + */ + if (cp->c_cpentry->cp_flags & (CP_KEY_FLUSHED | CP_NEEDS_KEYS)) { + int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS); + if ((error = cp_handle_vnop(vp, io_op, 0)) != 0) { + /* + * We have to be careful here. By this point in the I/O path, VM or the cluster + * engine has prepared a buf_t with the proper file offsets and all the rest, + * so simply erroring out will result in us leaking this particular buf_t. + * We need to properly decorate the buf_t just as buf_strategy would so as + * to make it appear that the I/O errored out with the particular error code. + */ + buf_seterror (bp, error); + buf_biodone(bp); + return error; + } + } + + /* + *NB: * For filesystem resize, we may not have access to the underlying * file's cache key for whatever reason (device may be locked). However, * we do not need it since we are going to use the temporary HFS-wide resize key @@ -3278,7 +3270,6 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf { register struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); - struct proc *p = vfs_context_proc(context);; kauth_cred_t cred = vfs_context_ucred(context); int retval; off_t bytesToAdd; @@ -3290,12 +3281,12 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf int lockflags; int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE); int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES); - + blksize = VTOVCB(vp)->blockSize; fileblocks = fp->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START, + KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START, (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); if (length < 0) @@ -3349,8 +3340,9 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf /* All or nothing and don't round up to clumpsize. */ eflags = kEFAllMask | kEFNoClumpMask; - if (cred && suser(cred, NULL) != 0) + if (cred && (suser(cred, NULL) != 0)) { eflags |= kEFReserveMask; /* keep a reserve */ + } /* * Allocate Journal and Quota files in metadata zone. @@ -3372,6 +3364,10 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf lockflags |= SFL_EXTENTS; lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + /* + * Keep growing the file as long as the current EOF is + * less than the desired value. + */ while ((length > filebytes) && (retval == E_NONE)) { bytesToAdd = length - filebytes; retval = MacToVFSError(ExtendFileC(VTOVCB(vp), @@ -3406,11 +3402,15 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf if (retval) goto Err_Exit; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, + KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE, (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); } - if (!(flags & IO_NOZEROFILL)) { + if (ISSET(flags, IO_NOZEROFILL)) { + // An optimisation for the hibernation file + if (vnode_isswap(vp)) + rl_remove_all(&fp->ff_invalidranges); + } else { if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) { struct rl_entry *invalid_range; off_t zero_limit; @@ -3465,7 +3465,10 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf } else { /* Shorten the size of the file */ - if ((off_t)fp->ff_size > length) { + // An optimisation for the hibernation file + if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) { + rl_remove_all(&fp->ff_invalidranges); + } else if ((off_t)fp->ff_size > length) { /* Any space previously marked as invalid is now irrelevant: */ rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges); } @@ -3499,55 +3502,48 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf hfs_unlock_mount (hfsmp); } - /* - * For a TBE process the deallocation of the file blocks is - * delayed until the file is closed. And hfs_close calls - * truncate with the IO_NDELAY flag set. So when IO_NDELAY - * isn't set, we make sure this isn't a TBE process. - */ - if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) { #if QUOTA - off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); + off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); #endif /* QUOTA */ - if (hfs_start_transaction(hfsmp) != 0) { - retval = EINVAL; - goto Err_Exit; - } + if (hfs_start_transaction(hfsmp) != 0) { + retval = EINVAL; + goto Err_Exit; + } - if (fp->ff_unallocblocks == 0) { - /* Protect extents b-tree and allocation bitmap */ - lockflags = SFL_BITMAP; - if (overflow_extents(fp)) - lockflags |= SFL_EXTENTS; - lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + if (fp->ff_unallocblocks == 0) { + /* Protect extents b-tree and allocation bitmap */ + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); - retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, - FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false)); + retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, + FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false)); - hfs_systemfile_unlock(hfsmp, lockflags); + hfs_systemfile_unlock(hfsmp, lockflags); + } + if (hfsmp->jnl) { + if (retval == 0) { + fp->ff_size = length; } - if (hfsmp->jnl) { - if (retval == 0) { - fp->ff_size = length; - } - if (skipupdate) { - (void) hfs_minorupdate(vp); - } - else { - (void) hfs_update(vp, TRUE); - (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); - } + if (skipupdate) { + (void) hfs_minorupdate(vp); } - hfs_end_transaction(hfsmp); + else { + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + } + } + hfs_end_transaction(hfsmp); - filebytes = (off_t)fp->ff_blocks * (off_t)blksize; - if (retval) - goto Err_Exit; + filebytes = (off_t)fp->ff_blocks * (off_t)blksize; + if (retval) + goto Err_Exit; #if QUOTA - /* These are bytesreleased */ - (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0); + /* These are bytesreleased */ + (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0); #endif /* QUOTA */ - } + /* * Only set update flag if the logical length changes & we aren't * suppressing modtime updates. @@ -3583,13 +3579,13 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf retval = hfs_update(vp, MNT_WAIT); } if (retval) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, + KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE, -1, -1, -1, retval, 0); } Err_Exit: - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END, + KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END, (int)length, (int)fp->ff_size, (int)filebytes, retval, 0); return (retval); @@ -3700,14 +3696,16 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, blksize = hfsmp->blockSize; /* Data Fork */ - if ((datafork != NULL) && (datafork->ff_blocks > 0)) { + if (datafork) { + datafork->ff_size = 0; + fileblocks = datafork->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ while (filebytes > 0) { - if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) { + if (filebytes > HFS_BIGFILE_SIZE) { filebytes -= HFS_BIGFILE_SIZE; } else { filebytes = 0; @@ -3730,9 +3728,6 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, hfs_systemfile_unlock(hfsmp, lockflags); } - if (error == 0) { - datafork->ff_size = filebytes; - } (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); /* Finish the transaction and start over if necessary */ @@ -3745,14 +3740,16 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, } /* Resource fork */ - if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) { + if (error == 0 && rsrcfork) { + rsrcfork->ff_size = 0; + fileblocks = rsrcfork->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ while (filebytes > 0) { - if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) { + if (filebytes > HFS_BIGFILE_SIZE) { filebytes -= HFS_BIGFILE_SIZE; } else { filebytes = 0; @@ -3775,9 +3772,6 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, hfs_systemfile_unlock(hfsmp, lockflags); } - if (error == 0) { - rsrcfork->ff_size = filebytes; - } (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); /* Finish the transaction and start over if necessary */ @@ -3792,19 +3786,52 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, return error; } +errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock) +{ + errno_t error; + + /* + * Call ubc_setsize to give the VM subsystem a chance to do + * whatever it needs to with existing pages before we delete + * blocks. Note that symlinks don't use the UBC so we'll + * get back ENOENT in that case. + */ + if (have_cnode_lock) { + error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY); + if (error == EAGAIN) { + cnode_t *cp = VTOC(vp); + + if (cp->c_truncatelockowner != current_thread()) { +#if DEVELOPMENT || DEBUG + panic("hfs: hfs_ubc_setsize called without exclusive truncate lock!"); +#else + printf("hfs: hfs_ubc_setsize called without exclusive truncate lock!\n"); +#endif + } + + hfs_unlock(cp); + error = ubc_setsize_ex(vp, len, 0); + hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); + } + } else + error = ubc_setsize_ex(vp, len, 0); + + return error == ENOENT ? 0 : error; +} /* * Truncate a cnode to at most length size, freeing (or adding) the * disk blocks. */ int -hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, - int truncateflags, vfs_context_t context) +hfs_truncate(struct vnode *vp, off_t length, int flags, + int truncateflags, vfs_context_t context) { - struct filefork *fp = VTOF(vp); + struct filefork *fp = VTOF(vp); off_t filebytes; u_int32_t fileblocks; - int blksize, error = 0; + int blksize; + errno_t error = 0; struct cnode *cp = VTOC(vp); /* Cannot truncate an HFS directory! */ @@ -3812,7 +3839,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, return (EISDIR); } /* A swap file cannot change size. */ - if (vnode_isswap(vp) && (length != 0)) { + if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) { return (EPERM); } @@ -3820,24 +3847,17 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, fileblocks = fp->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; - // - // Have to do this here so that we don't wind up with - // i/o pending for blocks that are about to be released - // if we truncate the file. - // - // If skipsetsize is set, then the caller is responsible - // for the ubc_setsize. - // - // Even if skipsetsize is set, if the length is zero we - // want to call ubc_setsize() because as of SnowLeopard - // it will no longer cause any page-ins and it will drop - // any dirty pages so that we don't do any i/o that we - // don't have to. This also prevents a race where i/o - // for truncated blocks may overwrite later data if the - // blocks get reallocated to a different file. - // - if (!skipsetsize || length == 0) - ubc_setsize(vp, length); + bool caller_has_cnode_lock = (cp->c_lockowner == current_thread()); + + error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock); + if (error) + return error; + + if (!caller_has_cnode_lock) { + error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error) + return error; + } // have to loop truncating or growing files that are // really big because otherwise transactions can get @@ -3845,7 +3865,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, if (length < filebytes) { while (filebytes > length) { - if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) { + if ((filebytes - length) > HFS_BIGFILE_SIZE) { filebytes -= HFS_BIGFILE_SIZE; } else { filebytes = length; @@ -3857,7 +3877,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, } } else if (length > filebytes) { while (filebytes < length) { - if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) { + if ((length - filebytes) > HFS_BIGFILE_SIZE) { filebytes += HFS_BIGFILE_SIZE; } else { filebytes = length; @@ -3876,9 +3896,16 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, fp->ff_bytesread = 0; } - return (error); -} + if (!caller_has_cnode_lock) + hfs_unlock(cp); + // Make sure UBC's size matches up (in case we didn't completely succeed) + errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock); + if (!error) + error = err2; + + return error; +} /* @@ -4016,13 +4043,13 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { /* Protect extents b-tree and allocation bitmap */ lockflags = SFL_BITMAP; if (overflow_extents(fp)) - lockflags |= SFL_EXTENTS; + lockflags |= SFL_EXTENTS; lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); if (moreBytesRequested >= HFS_BIGFILE_SIZE) { - bytesRequested = HFS_BIGFILE_SIZE; + bytesRequested = HFS_BIGFILE_SIZE; } else { - bytesRequested = moreBytesRequested; + bytesRequested = moreBytesRequested; } if (extendFlags & kEFContigMask) { @@ -4079,14 +4106,18 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { } else { /* Shorten the size of the file */ - if (fp->ff_size > length) { - /* - * Any buffers that are past the truncation point need to be - * invalidated (to maintain buffer cache consistency). - */ - } + /* + * N.B. At present, this code is never called. If and when we + * do start using it, it looks like there might be slightly + * strange semantics with the file size: it's possible for the + * file size to *increase* e.g. if current file size is 5, + * length is 1024 and filebytes is 4096, the file size will + * end up being 1024 bytes. This isn't necessarily a problem + * but it's not consistent with the code above which doesn't + * change the file size. + */ - retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context); + retval = hfs_truncate(vp, length, 0, 0, ap->a_context); filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; /* @@ -4102,9 +4133,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { if (fp->ff_size > filebytes) { fp->ff_size = filebytes; - hfs_unlock(cp); - ubc_setsize(vp, fp->ff_size); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + hfs_ubc_setsize(vp, fp->ff_size, true); } } @@ -4146,8 +4175,10 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap) upl_t upl; upl_page_info_t *pl; off_t f_offset; + off_t page_needed_f_offset; int offset; int isize; + int upl_size; int pg_index; boolean_t truncate_lock_held = FALSE; boolean_t file_converted = FALSE; @@ -4196,6 +4227,8 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap) goto pagein_done; } + page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset; + retry_pagein: /* * take truncate lock (shared/recursive) to guard against @@ -4258,9 +4291,9 @@ retry_pagein: } ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1); - isize = ap->a_size; + upl_size = isize = ap->a_size; - /* + /* * Scan from the back to find the last page in the UPL, so that we * aren't looking at a UPL that may have already been freed by the * preceding aborts/completions. @@ -4328,6 +4361,7 @@ retry_pagein: int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ if (compressed) { + if (truncate_lock_held) { /* * can't hold the truncate lock when calling into the decmpfs layer @@ -4367,6 +4401,19 @@ retry_pagein: * indication that the pagein needs to be redriven */ ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART); + } else if (error == ENOSPC) { + + if (upl_size == PAGE_SIZE) + panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n"); + + ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY); + + ap->a_size = PAGE_SIZE; + ap->a_pl = NULL; + ap->a_pl_offset = 0; + ap->a_f_offset = page_needed_f_offset; + + goto retry_pagein; } goto pagein_next_range; } @@ -4502,10 +4549,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) a_flags = ap->a_flags; a_pl_offset = ap->a_pl_offset; - if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { - hfs_incr_gencount (cp); - } - /* * we can tell if we're getting the new or old behavior from the UPL */ @@ -4734,20 +4777,41 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) } /* - * If data was written, update the modification time of the file. - * If setuid or setgid bits are set and this process is not the - * superuser then clear the setuid and setgid bits as a precaution - * against tampering. + * If data was written, update the modification time of the file + * but only if it's mapped writable; we will have touched the + * modifcation time for direct writes. */ - if (retval == 0) { - cp->c_touch_modtime = TRUE; - cp->c_touch_chgtime = TRUE; - if ((cp->c_mode & (S_ISUID | S_ISGID)) && - (vfs_context_suser(ap->a_context) != 0)) { - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cp->c_mode &= ~(S_ISUID | S_ISGID); - hfs_unlock(cp); + if (retval == 0 && (ubc_is_mapped_writable(vp) + || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) { + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + + // Check again with lock + bool mapped_writable = ubc_is_mapped_writable(vp); + if (mapped_writable + || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) { + cp->c_touch_modtime = TRUE; + cp->c_touch_chgtime = TRUE; + + /* + * We only need to increment the generation counter if + * it's currently mapped writable because we incremented + * the counter in hfs_vnop_mnomap. + */ + if (mapped_writable) + hfs_incr_gencount(VTOC(vp)); + + /* + * If setuid or setgid bits are set and this process is + * not the superuser then clear the setuid and setgid bits + * as a precaution against tampering. + */ + if ((cp->c_mode & (S_ISUID | S_ISGID)) && + (vfs_context_suser(ap->a_context) != 0)) { + cp->c_mode &= ~(S_ISUID | S_ISGID); + } } + + hfs_unlock(cp); } pageout_done: @@ -5203,7 +5267,7 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY); } else { /* - * No need to call ubc_sync_range or hfs_invalbuf + * No need to call ubc_msync or hfs_invalbuf * since the file was copied using IO_NOCACHE and * the copy was done starting and ending on a page * boundary in the file. diff --git a/bsd/hfs/hfs_resize.c b/bsd/hfs/hfs_resize.c new file mode 100644 index 000000000..ceaa4d572 --- /dev/null +++ b/bsd/hfs/hfs_resize.c @@ -0,0 +1,3497 @@ +/* + * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hfs.h" +#include "hfs_catalog.h" +#include "hfs_cnode.h" +#include "hfs_endian.h" +#include "hfs_btreeio.h" + +#if CONFIG_PROTECT +#include +#endif + +/* Enable/disable debugging code for live volume resizing */ +int hfs_resize_debug = 0; + +static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec); +static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context); +static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context); + +/* + * Extend a file system. + */ +int +hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) +{ + struct proc *p = vfs_context_proc(context); + kauth_cred_t cred = vfs_context_ucred(context); + struct vnode *vp; + struct vnode *devvp; + struct buf *bp; + struct filefork *fp = NULL; + ExtendedVCB *vcb; + struct cat_fork forkdata; + u_int64_t oldsize; + u_int64_t newblkcnt; + u_int64_t prev_phys_block_count; + u_int32_t addblks; + u_int64_t sector_count; + u_int32_t sector_size; + u_int32_t phys_sector_size; + u_int32_t overage_blocks; + daddr64_t prev_fs_alt_sector; + daddr_t bitmapblks; + int lockflags = 0; + int error; + int64_t oldBitmapSize; + + Boolean usedExtendFileC = false; + int transaction_begun = 0; + + devvp = hfsmp->hfs_devvp; + vcb = HFSTOVCB(hfsmp); + + /* + * - HFS Plus file systems only. + * - Journaling must be enabled. + * - No embedded volumes. + */ + if ((vcb->vcbSigWord == kHFSSigWord) || + (hfsmp->jnl == NULL) || + (vcb->hfsPlusIOPosOffset != 0)) { + return (EPERM); + } + /* + * If extending file system by non-root, then verify + * ownership and check permissions. + */ + if (suser(cred, NULL)) { + error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0); + + if (error) + return (error); + error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0); + if (error == 0) { + error = hfs_write_access(vp, cred, p, false); + } + hfs_unlock(VTOC(vp)); + vnode_put(vp); + if (error) + return (error); + + error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context); + if (error) + return (error); + } + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)§or_size, 0, context)) { + return (ENXIO); + } + if (sector_size != hfsmp->hfs_logical_block_size) { + return (ENXIO); + } + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)§or_count, 0, context)) { + return (ENXIO); + } + /* Check if partition size is correct for new file system size */ + if ((sector_size * sector_count) < newsize) { + printf("hfs_extendfs: not enough space on device (vol=%s)\n", hfsmp->vcbVN); + return (ENOSPC); + } + error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context); + if (error) { + if ((error != ENOTSUP) && (error != ENOTTY)) { + return (ENXIO); + } + /* If ioctl is not supported, force physical and logical sector size to be same */ + phys_sector_size = sector_size; + } + oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; + + /* + * Validate new size. + */ + if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) { + printf("hfs_extendfs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize); + return (EINVAL); + } + newblkcnt = newsize / vcb->blockSize; + if (newblkcnt > (u_int64_t)0xFFFFFFFF) { + printf ("hfs_extendfs: current blockSize=%u too small for newsize=%qu\n", hfsmp->blockSize, newsize); + return (EOVERFLOW); + } + + addblks = newblkcnt - vcb->totalBlocks; + + if (hfs_resize_debug) { + printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks); + printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks); + } + printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks); + + hfs_lock_mount (hfsmp); + if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { + hfs_unlock_mount(hfsmp); + error = EALREADY; + goto out; + } + hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; + hfs_unlock_mount (hfsmp); + + /* Start with a clean journal. */ + hfs_journal_flush(hfsmp, TRUE); + + /* + * Enclose changes inside a transaction. + */ + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + transaction_begun = 1; + + + /* Update the hfsmp fields for the physical information about the device */ + prev_phys_block_count = hfsmp->hfs_logical_block_count; + prev_fs_alt_sector = hfsmp->hfs_fs_avh_sector; + + hfsmp->hfs_logical_block_count = sector_count; + hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size; + + /* + * It is possible that the new file system is smaller than the partition size. + * Therefore, update offsets for AVH accordingly. + */ + if (hfs_resize_debug) { + printf ("hfs_extendfs: old: partition_avh_sector=%qu, fs_avh_sector=%qu\n", + hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector); + } + hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) + + HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count); + + hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) + + HFS_ALT_SECTOR(sector_size, (newsize/hfsmp->hfs_logical_block_size)); + if (hfs_resize_debug) { + printf ("hfs_extendfs: new: partition_avh_sector=%qu, fs_avh_sector=%qu\n", + hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector); + } + + /* + * Note: we take the attributes lock in case we have an attribute data vnode + * which needs to change size. + */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + vp = vcb->allocationsRefNum; + fp = VTOF(vp); + bcopy(&fp->ff_data, &forkdata, sizeof(forkdata)); + + /* + * Calculate additional space required (if any) by allocation bitmap. + */ + oldBitmapSize = fp->ff_size; + bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize; + if (bitmapblks > (daddr_t)fp->ff_blocks) + bitmapblks -= fp->ff_blocks; + else + bitmapblks = 0; + + /* + * The allocation bitmap can contain unused bits that are beyond end of + * current volume's allocation blocks. Usually they are supposed to be + * zero'ed out but there can be cases where they might be marked as used. + * After extending the file system, those bits can represent valid + * allocation blocks, so we mark all the bits from the end of current + * volume to end of allocation bitmap as "free". + * + * Figure out the number of overage blocks before proceeding though, + * so we don't add more bytes to our I/O than necessary. + * First figure out the total number of blocks representable by the + * end of the bitmap file vs. the total number of blocks in the new FS. + * Then subtract away the number of blocks in the current FS. This is how much + * we can mark as free right now without having to grow the bitmap file. + */ + overage_blocks = fp->ff_blocks * vcb->blockSize * 8; + overage_blocks = MIN (overage_blocks, newblkcnt); + overage_blocks -= vcb->totalBlocks; + + BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks); + + if (bitmapblks > 0) { + daddr64_t blkno; + daddr_t blkcnt; + off_t bytesAdded; + + /* + * Get the bitmap's current size (in allocation blocks) so we know + * where to start zero filling once the new space is added. We've + * got to do this before the bitmap is grown. + */ + blkno = (daddr64_t)fp->ff_blocks; + + /* + * Try to grow the allocation file in the normal way, using allocation + * blocks already existing in the file system. This way, we might be + * able to grow the bitmap contiguously, or at least in the metadata + * zone. + */ + error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0, + kEFAllMask | kEFNoClumpMask | kEFReserveMask + | kEFMetadataMask | kEFContigMask, &bytesAdded); + + if (error == 0) { + usedExtendFileC = true; + } else { + /* + * If the above allocation failed, fall back to allocating the new + * extent of the bitmap from the space we're going to add. Since those + * blocks don't yet belong to the file system, we have to update the + * extent list directly, and manually adjust the file size. + */ + bytesAdded = 0; + error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks); + if (error) { + printf("hfs_extendfs: error %d adding extents\n", error); + goto out; + } + fp->ff_blocks += bitmapblks; + VTOC(vp)->c_blocks = fp->ff_blocks; + VTOC(vp)->c_flag |= C_MODIFIED; + } + + /* + * Update the allocation file's size to include the newly allocated + * blocks. Note that ExtendFileC doesn't do this, which is why this + * statement is outside the above "if" statement. + */ + fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; + + /* + * Zero out the new bitmap blocks. + */ + { + + bp = NULL; + blkcnt = bitmapblks; + while (blkcnt > 0) { + error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp); + if (error) { + if (bp) { + buf_brelse(bp); + } + break; + } + bzero((char *)buf_dataptr(bp), vcb->blockSize); + buf_markaged(bp); + error = (int)buf_bwrite(bp); + if (error) + break; + --blkcnt; + ++blkno; + } + } + if (error) { + printf("hfs_extendfs: error %d clearing blocks\n", error); + goto out; + } + /* + * Mark the new bitmap space as allocated. + * + * Note that ExtendFileC will have marked any blocks it allocated, so + * this is only needed if we used AddFileExtent. Also note that this + * has to come *after* the zero filling of new blocks in the case where + * we used AddFileExtent (since the part of the bitmap we're touching + * is in those newly allocated blocks). + */ + if (!usedExtendFileC) { + error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks); + if (error) { + printf("hfs_extendfs: error %d setting bitmap\n", error); + goto out; + } + vcb->freeBlocks -= bitmapblks; + } + } + + /* + * Mark the new alternate VH as allocated. + */ + if (vcb->blockSize == 512) + error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2); + else + error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1); + if (error) { + printf("hfs_extendfs: error %d setting bitmap (VH)\n", error); + goto out; + } + + /* + * Mark the old alternate VH as free. + */ + if (vcb->blockSize == 512) + (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2); + else + (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1); + + /* + * Adjust file system variables for new space. + */ + vcb->totalBlocks += addblks; + vcb->freeBlocks += addblks; + MarkVCBDirty(vcb); + error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + if (error) { + printf("hfs_extendfs: couldn't flush volume headers (%d)", error); + /* + * Restore to old state. + */ + if (usedExtendFileC) { + (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp), + FTOC(fp)->c_fileid, false); + } else { + fp->ff_blocks -= bitmapblks; + fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; + /* + * No need to mark the excess blocks free since those bitmap blocks + * are no longer part of the bitmap. But we do need to undo the + * effect of the "vcb->freeBlocks -= bitmapblks" above. + */ + vcb->freeBlocks += bitmapblks; + } + vcb->totalBlocks -= addblks; + vcb->freeBlocks -= addblks; + hfsmp->hfs_logical_block_count = prev_phys_block_count; + hfsmp->hfs_fs_avh_sector = prev_fs_alt_sector; + /* Do not revert hfs_partition_avh_sector because the + * partition size is larger than file system size + */ + MarkVCBDirty(vcb); + if (vcb->blockSize == 512) { + if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) { + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); + } + } else { + if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) { + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); + } + } + goto out; + } + /* + * Invalidate the old alternate volume header. We are growing the filesystem so + * this sector must be returned to the FS as free space. + */ + bp = NULL; + if (prev_fs_alt_sector) { + if (buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(prev_fs_alt_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) { + journal_modify_block_start(hfsmp->jnl, bp); + + bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize); + + journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); + } else if (bp) { + buf_brelse(bp); + } + } + + /* + * Update the metadata zone size based on current volume size + */ + hfs_metadatazone_init(hfsmp, false); + + /* + * Adjust the size of hfsmp->hfs_attrdata_vp + */ + if (hfsmp->hfs_attrdata_vp) { + struct cnode *attr_cp; + struct filefork *attr_fp; + + if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) { + attr_cp = VTOC(hfsmp->hfs_attrdata_vp); + attr_fp = VTOF(hfsmp->hfs_attrdata_vp); + + attr_cp->c_blocks = newblkcnt; + attr_fp->ff_blocks = newblkcnt; + attr_fp->ff_extents[0].blockCount = newblkcnt; + attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize; + ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size); + vnode_put(hfsmp->hfs_attrdata_vp); + } + } + + /* + * We only update hfsmp->allocLimit if totalBlocks actually increased. + */ + if (error == 0) { + UpdateAllocLimit(hfsmp, hfsmp->totalBlocks); + } + + /* Release all locks and sync up journal content before + * checking and extending, if required, the journal + */ + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + lockflags = 0; + } + if (transaction_begun) { + hfs_end_transaction(hfsmp); + hfs_journal_flush(hfsmp, TRUE); + transaction_begun = 0; + } + + /* Increase the journal size, if required. */ + error = hfs_extend_journal(hfsmp, sector_size, sector_count, context); + if (error) { + printf ("hfs_extendfs: Could not extend journal size\n"); + goto out_noalloc; + } + + /* Log successful extending */ + printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n", + hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize)); + +out: + if (error && fp) { + /* Restore allocation fork. */ + bcopy(&forkdata, &fp->ff_data, sizeof(forkdata)); + VTOC(vp)->c_blocks = fp->ff_blocks; + + } + +out_noalloc: + hfs_lock_mount (hfsmp); + hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; + hfs_unlock_mount (hfsmp); + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + } + if (transaction_begun) { + hfs_end_transaction(hfsmp); + hfs_journal_flush(hfsmp, FALSE); + /* Just to be sure, sync all data to the disk */ + (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + } + if (error) { + printf ("hfs_extentfs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN); + } + + return MacToVFSError(error); +} + +#define HFS_MIN_SIZE (32LL * 1024LL * 1024LL) + +/* + * Truncate a file system (while still mounted). + */ +int +hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) +{ + u_int64_t oldsize; + u_int32_t newblkcnt; + u_int32_t reclaimblks = 0; + int lockflags = 0; + int transaction_begun = 0; + Boolean updateFreeBlocks = false; + Boolean disable_sparse = false; + int error = 0; + + hfs_lock_mount (hfsmp); + if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { + hfs_unlock_mount (hfsmp); + return (EALREADY); + } + hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; + hfsmp->hfs_resize_blocksmoved = 0; + hfsmp->hfs_resize_totalblocks = 0; + hfsmp->hfs_resize_progress = 0; + hfs_unlock_mount (hfsmp); + + /* + * - Journaled HFS Plus volumes only. + * - No embedded volumes. + */ + if ((hfsmp->jnl == NULL) || + (hfsmp->hfsPlusIOPosOffset != 0)) { + error = EPERM; + goto out; + } + oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; + newblkcnt = newsize / hfsmp->blockSize; + reclaimblks = hfsmp->totalBlocks - newblkcnt; + + if (hfs_resize_debug) { + printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1)); + printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks); + } + + /* Make sure new size is valid. */ + if ((newsize < HFS_MIN_SIZE) || + (newsize >= oldsize) || + (newsize % hfsmp->hfs_logical_block_size) || + (newsize % hfsmp->hfs_physical_block_size)) { + printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize); + error = EINVAL; + goto out; + } + + /* + * Make sure that the file system has enough free blocks reclaim. + * + * Before resize, the disk is divided into four zones - + * A. Allocated_Stationary - These are allocated blocks that exist + * before the new end of disk. These blocks will not be + * relocated or modified during resize. + * B. Free_Stationary - These are free blocks that exist before the + * new end of disk. These blocks can be used for any new + * allocations during resize, including allocation for relocating + * data from the area of disk being reclaimed. + * C. Allocated_To-Reclaim - These are allocated blocks that exist + * beyond the new end of disk. These blocks need to be reclaimed + * during resize by allocating equal number of blocks in Free + * Stationary zone and copying the data. + * D. Free_To-Reclaim - These are free blocks that exist beyond the + * new end of disk. Nothing special needs to be done to reclaim + * them. + * + * Total number of blocks on the disk before resize: + * ------------------------------------------------ + * Total Blocks = Allocated_Stationary + Free_Stationary + + * Allocated_To-Reclaim + Free_To-Reclaim + * + * Total number of blocks that need to be reclaimed: + * ------------------------------------------------ + * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim + * + * Note that the check below also makes sure that we have enough space + * to relocate data from Allocated_To-Reclaim to Free_Stationary. + * Therefore we do not need to check total number of blocks to relocate + * later in the code. + * + * The condition below gets converted to: + * + * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim + * + * which is equivalent to: + * + * Allocated To-Reclaim >= Free Stationary + */ + if (reclaimblks >= hfs_freeblks(hfsmp, 1)) { + printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1)); + error = ENOSPC; + goto out; + } + + /* Start with a clean journal. */ + hfs_journal_flush(hfsmp, TRUE); + + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + transaction_begun = 1; + + /* Take the bitmap lock to update the alloc limit field */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + /* + * Prevent new allocations from using the part we're trying to truncate. + * + * NOTE: allocLimit is set to the allocation block number where the new + * alternate volume header will be. That way there will be no files to + * interfere with allocating the new alternate volume header, and no files + * in the allocation blocks beyond (i.e. the blocks we're trying to + * truncate away. + */ + if (hfsmp->blockSize == 512) { + error = UpdateAllocLimit (hfsmp, newblkcnt - 2); + } + else { + error = UpdateAllocLimit (hfsmp, newblkcnt - 1); + } + + /* Sparse devices use first fit allocation which is not ideal + * for volume resize which requires best fit allocation. If a + * sparse device is being truncated, disable the sparse device + * property temporarily for the duration of resize. Also reset + * the free extent cache so that it is rebuilt as sorted by + * totalBlocks instead of startBlock. + * + * Note that this will affect all allocations on the volume and + * ideal fix would be just to modify resize-related allocations, + * but it will result in complexity like handling of two free + * extent caches sorted differently, etc. So we stick to this + * solution for now. + */ + hfs_lock_mount (hfsmp); + if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; + ResetVCBFreeExtCache(hfsmp); + disable_sparse = true; + } + + /* + * Update the volume free block count to reflect the total number + * of free blocks that will exist after a successful resize. + * Relocation of extents will result in no net change in the total + * free space on the disk. Therefore the code that allocates + * space for new extent and deallocates the old extent explicitly + * prevents updating the volume free block count. It will also + * prevent false disk full error when the number of blocks in + * an extent being relocated is more than the free blocks that + * will exist after the volume is resized. + */ + hfsmp->freeBlocks -= reclaimblks; + updateFreeBlocks = true; + hfs_unlock_mount(hfsmp); + + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + lockflags = 0; + } + + /* + * Update the metadata zone size to match the new volume size, + * and if it too less, metadata zone might be disabled. + */ + hfs_metadatazone_init(hfsmp, false); + + /* + * If some files have blocks at or beyond the location of the + * new alternate volume header, recalculate free blocks and + * reclaim blocks. Otherwise just update free blocks count. + * + * The current allocLimit is set to the location of new alternate + * volume header, and reclaimblks are the total number of blocks + * that need to be reclaimed. So the check below is really + * ignoring the blocks allocated for old alternate volume header. + */ + if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) { + /* + * hfs_reclaimspace will use separate transactions when + * relocating files (so we don't overwhelm the journal). + */ + hfs_end_transaction(hfsmp); + transaction_begun = 0; + + /* Attempt to reclaim some space. */ + error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context); + if (error != 0) { + printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error); + error = ENOSPC; + goto out; + } + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + transaction_begun = 1; + + /* Check if we're clear now. */ + error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks); + if (error != 0) { + printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error); + error = EAGAIN; /* tell client to try again */ + goto out; + } + } + + /* + * Note: we take the attributes lock in case we have an attribute data vnode + * which needs to change size. + */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + /* + * Allocate last 1KB for alternate volume header. + */ + error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1); + if (error) { + printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error); + goto out; + } + + /* + * Mark the old alternate volume header as free. + * We don't bother shrinking allocation bitmap file. + */ + if (hfsmp->blockSize == 512) + (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2); + else + (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1); + + /* Don't invalidate the old AltVH yet. It is still valid until the partition size is updated ! */ + + /* Log successful shrinking. */ + printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n", + hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks); + + /* + * Adjust file system variables and flush them to disk. + * + * Note that although the logical block size is updated here, it is only + * done for the benefit/convenience of the partition management software. The + * logical block count change has not yet actually been propagated to + * the disk device yet (and we won't get any notification when it does). + */ + hfsmp->totalBlocks = newblkcnt; + hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size; + hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size; + + /* + * At this point, a smaller HFS file system exists in a larger volume. + * As per volume format, the alternate volume header is located 1024 bytes + * before end of the partition. So, until the partition is also resized, + * a valid alternate volume header will need to be updated at 1024 bytes + * before end of the volume. Under normal circumstances, a file system + * resize is always followed by a volume resize, so we also need to + * write a copy of the new alternate volume header at 1024 bytes before + * end of the new file system. + */ + if (hfs_resize_debug) { + printf ("hfs_truncatefs: old: partition_avh_sector=%qu, fs_avh_sector=%qu\n", + hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector); + } + hfsmp->hfs_fs_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); + /* Note hfs_partition_avh_sector stays unchanged! partition size has not yet been modified */ + if (hfs_resize_debug) { + printf ("hfs_truncatefs: new: partition_avh_sector=%qu, fs_avh_sector=%qu\n", + hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector); + } + + MarkVCBDirty(hfsmp); + error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + if (error) { + panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error); + } + + /* + * Adjust the size of hfsmp->hfs_attrdata_vp + */ + if (hfsmp->hfs_attrdata_vp) { + struct cnode *cp; + struct filefork *fp; + + if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) { + cp = VTOC(hfsmp->hfs_attrdata_vp); + fp = VTOF(hfsmp->hfs_attrdata_vp); + + cp->c_blocks = newblkcnt; + fp->ff_blocks = newblkcnt; + fp->ff_extents[0].blockCount = newblkcnt; + fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize; + ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size); + vnode_put(hfsmp->hfs_attrdata_vp); + } + } + +out: + /* + * Update the allocLimit to acknowledge the last one or two blocks now. + * Add it to the tree as well if necessary. + */ + UpdateAllocLimit (hfsmp, hfsmp->totalBlocks); + + hfs_lock_mount (hfsmp); + if (disable_sparse == true) { + /* Now that resize is completed, set the volume to be sparse + * device again so that all further allocations will be first + * fit instead of best fit. Reset free extent cache so that + * it is rebuilt. + */ + hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; + ResetVCBFreeExtCache(hfsmp); + } + + if (error && (updateFreeBlocks == true)) { + hfsmp->freeBlocks += reclaimblks; + } + + if (hfsmp->nextAllocation >= hfsmp->allocLimit) { + hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1; + } + hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; + hfs_unlock_mount (hfsmp); + + /* On error, reset the metadata zone for original volume size */ + if (error && (updateFreeBlocks == true)) { + hfs_metadatazone_init(hfsmp, false); + } + + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + } + if (transaction_begun) { + hfs_end_transaction(hfsmp); + hfs_journal_flush(hfsmp, FALSE); + /* Just to be sure, sync all data to the disk */ + (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + } + + if (error) { + printf ("hfs_truncatefs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN); + } + + return MacToVFSError(error); +} + + +/* + * Invalidate the physical block numbers associated with buffer cache blocks + * in the given extent of the given vnode. + */ +struct hfs_inval_blk_no { + daddr64_t sectorStart; + daddr64_t sectorCount; +}; +static int +hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in) +{ + daddr64_t blkno; + struct hfs_inval_blk_no *args; + + blkno = buf_blkno(bp); + args = args_in; + + if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount) + buf_setblkno(bp, buf_lblkno(bp)); + + return BUF_RETURNED; +} +static void +hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount) +{ + struct hfs_inval_blk_no args; + args.sectorStart = sectorStart; + args.sectorCount = sectorCount; + + buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args); +} + + +/* + * Copy the contents of an extent to a new location. Also invalidates the + * physical block number of any buffer cache block in the copied extent + * (so that if the block is written, it will go through VNOP_BLOCKMAP to + * determine the new physical block number). + * + * At this point, for regular files, we hold the truncate lock exclusive + * and the cnode lock exclusive. + */ +static int +hfs_copy_extent( + struct hfsmount *hfsmp, + struct vnode *vp, /* The file whose extent is being copied. */ + u_int32_t oldStart, /* The start of the source extent. */ + u_int32_t newStart, /* The start of the destination extent. */ + u_int32_t blockCount, /* The number of allocation blocks to copy. */ + vfs_context_t context) +{ + int err = 0; + size_t bufferSize; + void *buffer = NULL; + struct vfsioattr ioattr; + buf_t bp = NULL; + off_t resid; + size_t ioSize; + u_int32_t ioSizeSectors; /* Device sectors in this I/O */ + daddr64_t srcSector, destSector; + u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size; +#if CONFIG_PROTECT + int cpenabled = 0; +#endif + + /* + * Sanity check that we have locked the vnode of the file we're copying. + * + * But since hfs_systemfile_lock() doesn't actually take the lock on + * the allocation file if a journal is active, ignore the check if the + * file being copied is the allocation file. + */ + struct cnode *cp = VTOC(vp); + if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread()) + panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp); + +#if CONFIG_PROTECT + /* + * Prepare the CP blob and get it ready for use, if necessary. + * + * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs), + * because they are implicitly protected via the media key on iOS. As such, they + * must not be relocated except with the media key. So it is OK to not pass down + * a special cpentry to the IOMedia/LwVM code for handling. + */ + if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) { + int cp_err = 0; + /* + * Ideally, the file whose extents we are about to manipulate is using the + * newer offset-based IVs so that we can manipulate it regardless of the + * current lock state. However, we must maintain support for older-style + * EAs. + * + * For the older EA case, the IV was tied to the device LBA for file content. + * This means that encrypted data cannot be moved from one location to another + * in the filesystem without garbling the IV data. As a result, we need to + * access the file's plaintext because we cannot do our AES-symmetry trick + * here. This requires that we attempt a key-unwrap here (via cp_handle_relocate) + * to make forward progress. If the keys are unavailable then we will + * simply stop the resize in its tracks here since we cannot move + * this extent at this time. + */ + if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) { + cp_err = cp_handle_relocate(cp, hfsmp); + } + + if (cp_err) { + printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err); + return cp_err; + } + + cpenabled = 1; + } +#endif + + + /* + * Determine the I/O size to use + * + * NOTE: Many external drives will result in an ioSize of 128KB. + * TODO: Should we use a larger buffer, doing several consecutive + * reads, then several consecutive writes? + */ + vfs_ioattr(hfsmp->hfs_mp, &ioattr); + bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt); + if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize)) + return ENOMEM; + + /* Get a buffer for doing the I/O */ + bp = buf_alloc(hfsmp->hfs_devvp); + buf_setdataptr(bp, (uintptr_t)buffer); + + resid = (off_t) blockCount * (off_t) hfsmp->blockSize; + srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; + destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; + while (resid > 0) { + ioSize = MIN(bufferSize, (size_t) resid); + ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size; + + /* Prepare the buffer for reading */ + buf_reset(bp, B_READ); + buf_setsize(bp, ioSize); + buf_setcount(bp, ioSize); + buf_setblkno(bp, srcSector); + buf_setlblkno(bp, srcSector); + + /* + * Note that because this is an I/O to the device vp + * it is correct to have lblkno and blkno both point to the + * start sector being read from. If it were being issued against the + * underlying file then that would be different. + */ + + /* Attach the new CP blob to the buffer if needed */ +#if CONFIG_PROTECT + if (cpenabled) { + if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) { + /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */ + cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT; + buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry); + } + else { + /* + * Use the cnode's cp key. This file is tied to the + * LBAs of the physical blocks that it occupies. + */ + buf_setcpaddr (bp, cp->c_cpentry); + } + + /* Initialize the content protection file offset to start at 0 */ + buf_setcpoff (bp, 0); + } +#endif + + /* Do the read */ + err = VNOP_STRATEGY(bp); + if (!err) + err = buf_biowait(bp); + if (err) { +#if CONFIG_PROTECT + /* Turn the flag off in error cases. */ + if (cpenabled) { + cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT; + } +#endif + printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err); + break; + } + + /* Prepare the buffer for writing */ + buf_reset(bp, B_WRITE); + buf_setsize(bp, ioSize); + buf_setcount(bp, ioSize); + buf_setblkno(bp, destSector); + buf_setlblkno(bp, destSector); + if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl)) + buf_markfua(bp); + +#if CONFIG_PROTECT + /* Attach the CP to the buffer if needed */ + if (cpenabled) { + if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) { + buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry); + } + else { + /* + * Use the cnode's CP key. This file is still tied + * to the LBAs of the physical blocks that it occupies. + */ + buf_setcpaddr (bp, cp->c_cpentry); + } + /* + * The last STRATEGY call may have updated the cp file offset behind our + * back, so we cannot trust it. Re-initialize the content protection + * file offset back to 0 before initiating the write portion of this I/O. + */ + buf_setcpoff (bp, 0); + } +#endif + + /* Do the write */ + vnode_startwrite(hfsmp->hfs_devvp); + err = VNOP_STRATEGY(bp); + if (!err) { + err = buf_biowait(bp); + } +#if CONFIG_PROTECT + /* Turn the flag off regardless once the strategy call finishes. */ + if (cpenabled) { + cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT; + } +#endif + if (err) { + printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err); + break; + } + + resid -= ioSize; + srcSector += ioSizeSectors; + destSector += ioSizeSectors; + } + if (bp) + buf_free(bp); + if (buffer) + kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize); + + /* Make sure all writes have been flushed to disk. */ + if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) { + err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + if (err) { + printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err); + err = 0; /* Don't fail the copy. */ + } + } + + if (!err) + hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock); + + return err; +} + + +/* Structure to store state of reclaiming extents from a + * given file. hfs_reclaim_file()/hfs_reclaim_xattr() + * initializes the values in this structure which are then + * used by code that reclaims and splits the extents. + */ +struct hfs_reclaim_extent_info { + struct vnode *vp; + u_int32_t fileID; + u_int8_t forkType; + u_int8_t is_dirlink; /* Extent belongs to directory hard link */ + u_int8_t is_sysfile; /* Extent belongs to system file */ + u_int8_t is_xattr; /* Extent belongs to extent-based xattr */ + u_int8_t extent_index; + int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */ + u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */ + u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */ + u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */ + struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */ + union record { + HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */ + HFSPlusAttrRecord xattr; /* Attribute record for large EAs */ + } record; + HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed. + * For catalog extent record, points to the correct + * extent information in filefork. For overflow extent + * record, or xattr record, points to extent record + * in the structure above + */ + struct cat_desc *dirlink_desc; + struct cat_attr *dirlink_attr; + struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */ + struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr() + * use it for reading and hfs_reclaim_extent()/hfs_split_extent() + * use it for writing updated extent record + */ + struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */ + u_int16_t recordlen; + int overflow_count; /* For debugging, counter for overflow extent record */ + FCB *fcb; /* Pointer to the current btree being traversed */ +}; + +/* + * Split the current extent into two extents, with first extent + * to contain given number of allocation blocks. Splitting of + * extent creates one new extent entry which can result in + * shifting of many entries through all the extent records of a + * file, and/or creating a new extent record in the overflow + * extent btree. + * + * Example: + * The diagram below represents two consecutive extent records, + * for simplicity, lets call them record X and X+1 respectively. + * Interesting extent entries have been denoted by letters. + * If the letter is unchanged before and after split, it means + * that the extent entry was not modified during the split. + * A '.' means that the entry remains unchanged after the split + * and is not relevant for our example. A '0' means that the + * extent entry is empty. + * + * If there isn't sufficient contiguous free space to relocate + * an extent (extent "C" below), we will have to break the one + * extent into multiple smaller extents, and relocate each of + * the smaller extents individually. The way we do this is by + * finding the largest contiguous free space that is currently + * available (N allocation blocks), and then convert extent "C" + * into two extents, C1 and C2, that occupy exactly the same + * allocation blocks as extent C. Extent C1 is the first + * N allocation blocks of extent C, and extent C2 is the remainder + * of extent C. Then we can relocate extent C1 since we know + * we have enough contiguous free space to relocate it in its + * entirety. We then repeat the process starting with extent C2. + * + * In record X, only the entries following entry C are shifted, and + * the original entry C is replaced with two entries C1 and C2 which + * are actually two extent entries for contiguous allocation blocks. + * + * Note that the entry E from record X is shifted into record X+1 as + * the new first entry. Since the first entry of record X+1 is updated, + * the FABN will also get updated with the blockCount of entry E. + * This also results in shifting of all extent entries in record X+1. + * Note that the number of empty entries after the split has been + * changed from 3 to 2. + * + * Before: + * record X record X+1 + * ---------------------===--------- --------------------------------- + * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 | + * ---------------------===--------- --------------------------------- + * + * After: + * ---------------------=======----- --------------------------------- + * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 | + * ---------------------=======----- --------------------------------- + * + * C1.startBlock = C.startBlock + * C1.blockCount = N + * + * C2.startBlock = C.startBlock + N + * C2.blockCount = C.blockCount - N + * + * FABN = old FABN - E.blockCount + * + * Inputs: + * extent_info - This is the structure that contains state about + * the current file, extent, and extent record that + * is being relocated. This structure is shared + * among code that traverses through all the extents + * of the file, code that relocates extents, and + * code that splits the extent. + * newBlockCount - The blockCount of the extent to be split after + * successfully split operation. + * Output: + * Zero on success, non-zero on failure. + */ +static int +hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount) +{ + int error = 0; + int index = extent_info->extent_index; + int i; + HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */ + HFSPlusExtentDescriptor last_extent; + HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */ + HFSPlusExtentRecord *extents_rec = NULL; + HFSPlusExtentKey *extents_key = NULL; + HFSPlusAttrRecord *xattr_rec = NULL; + HFSPlusAttrKey *xattr_key = NULL; + struct BTreeIterator iterator; + struct FSBufferDescriptor btdata; + uint16_t reclen; + uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */ + uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */ + Boolean create_record = false; + Boolean is_xattr; + struct cnode *cp; + + is_xattr = extent_info->is_xattr; + extents = extent_info->extents; + cp = VTOC(extent_info->vp); + + if (newBlockCount == 0) { + if (hfs_resize_debug) { + printf ("hfs_split_extent: No splitting required for newBlockCount=0\n"); + } + return error; + } + + if (hfs_resize_debug) { + printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount); + } + + /* Extents overflow btree can not have more than 8 extents. + * No split allowed if the 8th extent is already used. + */ + if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) { + printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n"); + error = ENOSPC; + goto out; + } + + /* Determine the starting allocation block number for the following + * overflow extent record, if any, before the current record + * gets modified. + */ + read_recStartBlock = extent_info->recStartBlock; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + if (extents[i].blockCount == 0) { + break; + } + read_recStartBlock += extents[i].blockCount; + } + + /* Shift and split */ + if (index == kHFSPlusExtentDensity-1) { + /* The new extent created after split will go into following overflow extent record */ + shift_extent.startBlock = extents[index].startBlock + newBlockCount; + shift_extent.blockCount = extents[index].blockCount - newBlockCount; + + /* Last extent in the record will be split, so nothing to shift */ + } else { + /* Splitting of extents can result in at most of one + * extent entry to be shifted into following overflow extent + * record. So, store the last extent entry for later. + */ + shift_extent = extents[kHFSPlusExtentDensity-1]; + if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) { + printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount); + } + + /* Start shifting extent information from the end of the extent + * record to the index where we want to insert the new extent. + * Note that kHFSPlusExtentDensity-1 is already saved above, and + * does not need to be shifted. The extent entry that is being + * split does not get shifted. + */ + for (i = kHFSPlusExtentDensity-2; i > index; i--) { + if (hfs_resize_debug) { + if (extents[i].blockCount) { + printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount); + } + } + extents[i+1] = extents[i]; + } + } + + if (index == kHFSPlusExtentDensity-1) { + /* The second half of the extent being split will be the overflow + * entry that will go into following overflow extent record. The + * value has been stored in 'shift_extent' above, so there is + * nothing to be done here. + */ + } else { + /* Update the values in the second half of the extent being split + * before updating the first half of the split. Note that the + * extent to split or first half of the split is at index 'index' + * and a new extent or second half of the split will be inserted at + * 'index+1' or into following overflow extent record. + */ + extents[index+1].startBlock = extents[index].startBlock + newBlockCount; + extents[index+1].blockCount = extents[index].blockCount - newBlockCount; + } + /* Update the extent being split, only the block count will change */ + extents[index].blockCount = newBlockCount; + + if (hfs_resize_debug) { + printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount); + if (index != kHFSPlusExtentDensity-1) { + printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount); + } else { + printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount); + } + } + + /* Write out information about the newly split extent to the disk */ + if (extent_info->catalog_fp) { + /* (extent_info->catalog_fp != NULL) means the newly split + * extent exists in the catalog record. This means that + * the cnode was updated. Therefore, to write out the changes, + * mark the cnode as modified. We cannot call hfs_update() + * in this function because the caller hfs_reclaim_extent() + * is holding the catalog lock currently. + */ + cp->c_flag |= C_MODIFIED; + } else { + /* The newly split extent is for large EAs or is in overflow + * extent record, so update it directly in the btree using the + * iterator information from the shared extent_info structure + */ + error = BTReplaceRecord(extent_info->fcb, extent_info->iterator, + &(extent_info->btdata), extent_info->recordlen); + if (error) { + printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error); + goto out; + } + } + + /* No extent entry to be shifted into another extent overflow record */ + if (shift_extent.blockCount == 0) { + if (hfs_resize_debug) { + printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n"); + } + error = 0; + goto out; + } + + /* The overflow extent entry has to be shifted into an extent + * overflow record. This means that we might have to shift + * extent entries from all subsequent overflow records by one. + * We start iteration from the first record to the last record, + * and shift the extent entry from one record to another. + * We might have to create a new extent record for the last + * extent entry for the file. + */ + + /* Initialize iterator to search the next record */ + bzero(&iterator, sizeof(iterator)); + if (is_xattr) { + /* Copy the key from the iterator that was used to update the modified attribute record. */ + xattr_key = (HFSPlusAttrKey *)&(iterator.key); + bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey)); + /* Note: xattr_key->startBlock will be initialized later in the iteration loop */ + + MALLOC(xattr_rec, HFSPlusAttrRecord *, + sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK); + if (xattr_rec == NULL) { + error = ENOMEM; + goto out; + } + btdata.bufferAddress = xattr_rec; + btdata.itemSize = sizeof(HFSPlusAttrRecord); + btdata.itemCount = 1; + extents = xattr_rec->overflowExtents.extents; + } else { + /* Initialize the extent key for the current file */ + extents_key = (HFSPlusExtentKey *) &(iterator.key); + extents_key->keyLength = kHFSPlusExtentKeyMaximumLength; + extents_key->forkType = extent_info->forkType; + extents_key->fileID = extent_info->fileID; + /* Note: extents_key->startBlock will be initialized later in the iteration loop */ + + MALLOC(extents_rec, HFSPlusExtentRecord *, + sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK); + if (extents_rec == NULL) { + error = ENOMEM; + goto out; + } + btdata.bufferAddress = extents_rec; + btdata.itemSize = sizeof(HFSPlusExtentRecord); + btdata.itemCount = 1; + extents = extents_rec[0]; + } + + /* The overflow extent entry has to be shifted into an extent + * overflow record. This means that we might have to shift + * extent entries from all subsequent overflow records by one. + * We start iteration from the first record to the last record, + * examine one extent record in each iteration and shift one + * extent entry from one record to another. We might have to + * create a new extent record for the last extent entry for the + * file. + * + * If shift_extent.blockCount is non-zero, it means that there is + * an extent entry that needs to be shifted into the next + * overflow extent record. We keep on going till there are no such + * entries left to be shifted. This will also change the starting + * allocation block number of the extent record which is part of + * the key for the extent record in each iteration. Note that + * because the extent record key is changing while we are searching, + * the record can not be updated directly, instead it has to be + * deleted and inserted again. + */ + while (shift_extent.blockCount) { + if (hfs_resize_debug) { + printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock); + } + + /* Search if there is any existing overflow extent record + * that matches the current file and the logical start block + * number. + * + * For this, the logical start block number in the key is + * the value calculated based on the logical start block + * number of the current extent record and the total number + * of blocks existing in the current extent record. + */ + if (is_xattr) { + xattr_key->startBlock = read_recStartBlock; + } else { + extents_key->startBlock = read_recStartBlock; + } + error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator); + if (error) { + if (error != btNotFound) { + printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error); + goto out; + } + /* No matching record was found, so create a new extent record. + * Note: Since no record was found, we can't rely on the + * btree key in the iterator any longer. This will be initialized + * later before we insert the record. + */ + create_record = true; + } + + /* The extra extent entry from the previous record is being inserted + * as the first entry in the current extent record. This will change + * the file allocation block number (FABN) of the current extent + * record, which is the startBlock value from the extent record key. + * Since one extra entry is being inserted in the record, the new + * FABN for the record will less than old FABN by the number of blocks + * in the new extent entry being inserted at the start. We have to + * do this before we update read_recStartBlock to point at the + * startBlock of the following record. + */ + write_recStartBlock = read_recStartBlock - shift_extent.blockCount; + if (hfs_resize_debug) { + if (create_record) { + printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock); + } + } + + /* Now update the read_recStartBlock to account for total number + * of blocks in this extent record. It will now point to the + * starting allocation block number for the next extent record. + */ + for (i = 0; i < kHFSPlusExtentDensity; i++) { + if (extents[i].blockCount == 0) { + break; + } + read_recStartBlock += extents[i].blockCount; + } + + if (create_record == true) { + /* Initialize new record content with only one extent entry */ + bzero(extents, sizeof(HFSPlusExtentRecord)); + /* The new record will contain only one extent entry */ + extents[0] = shift_extent; + /* There are no more overflow extents to be shifted */ + shift_extent.startBlock = shift_extent.blockCount = 0; + + if (is_xattr) { + /* BTSearchRecord above returned btNotFound, + * but since the attribute btree is never empty + * if we are trying to insert new overflow + * record for the xattrs, the extents_key will + * contain correct data. So we don't need to + * re-initialize it again like below. + */ + + /* Initialize the new xattr record */ + xattr_rec->recordType = kHFSPlusAttrExtents; + xattr_rec->overflowExtents.reserved = 0; + reclen = sizeof(HFSPlusAttrExtents); + } else { + /* BTSearchRecord above returned btNotFound, + * which means that extents_key content might + * not correspond to the record that we are + * trying to create, especially when the extents + * overflow btree is empty. So we reinitialize + * the extents_key again always. + */ + extents_key->keyLength = kHFSPlusExtentKeyMaximumLength; + extents_key->forkType = extent_info->forkType; + extents_key->fileID = extent_info->fileID; + + /* Initialize the new extent record */ + reclen = sizeof(HFSPlusExtentRecord); + } + } else { + /* The overflow extent entry from previous record will be + * the first entry in this extent record. If the last + * extent entry in this record is valid, it will be shifted + * into the following extent record as its first entry. So + * save the last entry before shifting entries in current + * record. + */ + last_extent = extents[kHFSPlusExtentDensity-1]; + + /* Shift all entries by one index towards the end */ + for (i = kHFSPlusExtentDensity-2; i >= 0; i--) { + extents[i+1] = extents[i]; + } + + /* Overflow extent entry saved from previous record + * is now the first entry in the current record. + */ + extents[0] = shift_extent; + + if (hfs_resize_debug) { + printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock); + } + + /* The last entry from current record will be the + * overflow entry which will be the first entry for + * the following extent record. + */ + shift_extent = last_extent; + + /* Since the key->startBlock is being changed for this record, + * it should be deleted and inserted with the new key. + */ + error = BTDeleteRecord(extent_info->fcb, &iterator); + if (error) { + printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error); + goto out; + } + if (hfs_resize_debug) { + printf ("hfs_split_extent: Deleted extent record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock)); + } + } + + /* Insert the newly created or modified extent record */ + bzero(&iterator.hint, sizeof(iterator.hint)); + if (is_xattr) { + xattr_key->startBlock = write_recStartBlock; + } else { + extents_key->startBlock = write_recStartBlock; + } + error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen); + if (error) { + printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error); + goto out; + } + if (hfs_resize_debug) { + printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock); + } + } + +out: + /* + * Extents overflow btree or attributes btree headers might have + * been modified during the split/shift operation, so flush the + * changes to the disk while we are inside journal transaction. + * We should only be able to generate I/O that modifies the B-Tree + * header nodes while we're in the middle of a journal transaction. + * Otherwise it might result in panic during unmount. + */ + BTFlushPath(extent_info->fcb); + + if (extents_rec) { + FREE (extents_rec, M_TEMP); + } + if (xattr_rec) { + FREE (xattr_rec, M_TEMP); + } + return error; +} + + +/* + * Relocate an extent if it lies beyond the expected end of volume. + * + * This function is called for every extent of the file being relocated. + * It allocates space for relocation, copies the data, deallocates + * the old extent, and update corresponding on-disk extent. If the function + * does not find contiguous space to relocate an extent, it splits the + * extent in smaller size to be able to relocate it out of the area of + * disk being reclaimed. As an optimization, if an extent lies partially + * in the area of the disk being reclaimed, it is split so that we only + * have to relocate the area that was overlapping with the area of disk + * being reclaimed. + * + * Note that every extent is relocated in its own transaction so that + * they do not overwhelm the journal. This function handles the extent + * record that exists in the catalog record, extent record from overflow + * extents btree, and extents for large EAs. + * + * Inputs: + * extent_info - This is the structure that contains state about + * the current file, extent, and extent record that + * is being relocated. This structure is shared + * among code that traverses through all the extents + * of the file, code that relocates extents, and + * code that splits the extent. + */ +static int +hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context) +{ + int error = 0; + int index; + struct cnode *cp; + u_int32_t oldStartBlock; + u_int32_t oldBlockCount; + u_int32_t newStartBlock; + u_int32_t newBlockCount; + u_int32_t roundedBlockCount; + uint16_t node_size; + uint32_t remainder_blocks; + u_int32_t alloc_flags; + int blocks_allocated = false; + + index = extent_info->extent_index; + cp = VTOC(extent_info->vp); + + oldStartBlock = extent_info->extents[index].startBlock; + oldBlockCount = extent_info->extents[index].blockCount; + + if (0 && hfs_resize_debug) { + printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount); + } + + /* If the current extent lies completely within allocLimit, + * it does not require any relocation. + */ + if ((oldStartBlock + oldBlockCount) <= allocLimit) { + extent_info->cur_blockCount += oldBlockCount; + return error; + } + + /* Every extent should be relocated in its own transaction + * to make sure that we don't overflow the journal buffer. + */ + error = hfs_start_transaction(hfsmp); + if (error) { + return error; + } + extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK); + + /* Check if the extent lies partially in the area to reclaim, + * i.e. it starts before allocLimit and ends beyond allocLimit. + * We have already skipped extents that lie completely within + * allocLimit in the check above, so we only check for the + * startBlock. If it lies partially, split it so that we + * only relocate part of the extent. + */ + if (oldStartBlock < allocLimit) { + newBlockCount = allocLimit - oldStartBlock; + + if (hfs_resize_debug) { + int idx = extent_info->extent_index; + printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount); + } + + /* If the extent belongs to a btree, check and trim + * it to be multiple of the node size. + */ + if (extent_info->is_sysfile) { + node_size = get_btree_nodesize(extent_info->vp); + /* If the btree node size is less than the block size, + * splitting this extent will not split a node across + * different extents. So we only check and trim if + * node size is more than the allocation block size. + */ + if (node_size > hfsmp->blockSize) { + remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize); + if (remainder_blocks) { + newBlockCount -= remainder_blocks; + if (hfs_resize_debug) { + printf ("hfs_reclaim_extent: Round-down newBlockCount to be multiple of nodeSize, node_allocblks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount); + } + } + } + /* The newBlockCount is zero because of rounding-down so that + * btree nodes are not split across extents. Therefore this + * straddling extent across resize-boundary does not require + * splitting. Skip over to relocating of complete extent. + */ + if (newBlockCount == 0) { + if (hfs_resize_debug) { + printf ("hfs_reclaim_extent: After round-down newBlockCount=0, skip split, relocate full extent\n"); + } + goto relocate_full_extent; + } + } + + /* Split the extents into two parts --- the first extent lies + * completely within allocLimit and therefore does not require + * relocation. The second extent will require relocation which + * will be handled when the caller calls this function again + * for the next extent. + */ + error = hfs_split_extent(extent_info, newBlockCount); + if (error == 0) { + /* Split success, no relocation required */ + goto out; + } + /* Split failed, so try to relocate entire extent */ + if (hfs_resize_debug) { + int idx = extent_info->extent_index; + printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks failed, relocate full extent\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount); + } + } + +relocate_full_extent: + /* At this point, the current extent requires relocation. + * We will try to allocate space equal to the size of the extent + * being relocated first to try to relocate it without splitting. + * If the allocation fails, we will try to allocate contiguous + * blocks out of metadata zone. If that allocation also fails, + * then we will take a whatever contiguous block run is returned + * by the allocation, split the extent into two parts, and then + * relocate the first splitted extent. + */ + alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; + if (extent_info->is_sysfile) { + alloc_flags |= HFS_ALLOC_METAZONE; + } + + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, + &newStartBlock, &newBlockCount); + if ((extent_info->is_sysfile == false) && + ((error == dskFulErr) || (error == ENOSPC))) { + /* For non-system files, try reallocating space in metadata zone */ + alloc_flags |= HFS_ALLOC_METAZONE; + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, + alloc_flags, &newStartBlock, &newBlockCount); + } + if ((error == dskFulErr) || (error == ENOSPC)) { + /* + * We did not find desired contiguous space for this + * extent, when we asked for it, including the metazone allocations. + * At this point we are not worrying about getting contiguity anymore. + * + * HOWEVER, if we now allow blocks to be used which were recently + * de-allocated, we may find a contiguous range (though this seems + * unlikely). As a result, assume that we will have to split the + * current extent into two pieces, but if we are able to satisfy + * the request with a single extent, detect that as well. + */ + alloc_flags &= ~HFS_ALLOC_FORCECONTIG; + alloc_flags |= HFS_ALLOC_FLUSHTXN; + + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, + alloc_flags, &newStartBlock, &newBlockCount); + if (error) { + printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); + goto out; + } + + /* + * Allowing recently deleted extents may now allow us to find + * a single contiguous extent in the amount & size desired. If so, + * do NOT split this extent into two pieces. This is technically a + * check for "< oldBlockCount", but we use != to highlight the point + * that the special case is when they're equal. The allocator should + * never vend back more blocks than were requested. + */ + if (newBlockCount != oldBlockCount) { + blocks_allocated = true; + + /* The number of blocks allocated is less than the requested + * number of blocks. For btree extents, check and trim the + * extent to be multiple of the node size. + */ + if (extent_info->is_sysfile) { + node_size = get_btree_nodesize(extent_info->vp); + if (node_size > hfsmp->blockSize) { + remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize); + if (remainder_blocks) { + roundedBlockCount = newBlockCount - remainder_blocks; + /* Free tail-end blocks of the newly allocated extent */ + BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount, + newBlockCount - roundedBlockCount, + HFS_ALLOC_SKIPFREEBLKS); + newBlockCount = roundedBlockCount; + if (hfs_resize_debug) { + printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount); + } + if (newBlockCount == 0) { + printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID); + error = ENOSPC; + goto out; + } + } + } + } + + /* The number of blocks allocated is less than the number of + * blocks requested, so split this extent --- the first extent + * will be relocated as part of this function call and the caller + * will handle relocating the second extent by calling this + * function again for the second extent. + */ + error = hfs_split_extent(extent_info, newBlockCount); + if (error) { + printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); + goto out; + } + oldBlockCount = newBlockCount; + } /* end oldBlockCount != newBlockCount */ + } /* end allocation request for any available free space */ + + if (error) { + printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); + goto out; + } + blocks_allocated = true; + + /* Copy data from old location to new location */ + error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock, + newStartBlock, newBlockCount, context); + if (error) { + printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error); + goto out; + } + + /* Update the extent record with the new start block information */ + extent_info->extents[index].startBlock = newStartBlock; + + /* Sync the content back to the disk */ + if (extent_info->catalog_fp) { + /* Update the extents in catalog record */ + if (extent_info->is_dirlink) { + error = cat_update_dirlink(hfsmp, extent_info->forkType, + extent_info->dirlink_desc, extent_info->dirlink_attr, + &(extent_info->dirlink_fork->ff_data)); + } else { + cp->c_flag |= C_MODIFIED; + /* If this is a system file, sync volume headers on disk */ + if (extent_info->is_sysfile) { + error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + } + } + } else { + /* Replace record for extents overflow or extents-based xattrs */ + error = BTReplaceRecord(extent_info->fcb, extent_info->iterator, + &(extent_info->btdata), extent_info->recordlen); + } + if (error) { + printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error); + goto out; + } + + /* Deallocate the old extent */ + error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); + if (error) { + printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); + goto out; + } + extent_info->blocks_relocated += newBlockCount; + + if (hfs_resize_debug) { + printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); + } + +out: + if (error != 0) { + if (blocks_allocated == true) { + BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); + } + } else { + /* On success, increment the total allocation blocks processed */ + extent_info->cur_blockCount += newBlockCount; + } + + hfs_systemfile_unlock(hfsmp, extent_info->lockflags); + + /* For a non-system file, if an extent entry from catalog record + * was modified, sync the in-memory changes to the catalog record + * on disk before ending the transaction. + */ + if ((extent_info->catalog_fp) && + (extent_info->is_sysfile == false)) { + (void) hfs_update(extent_info->vp, MNT_WAIT); + } + + hfs_end_transaction(hfsmp); + + return error; +} + +/* Report intermediate progress during volume resize */ +static void +hfs_truncatefs_progress(struct hfsmount *hfsmp) +{ + u_int32_t cur_progress = 0; + + hfs_resize_progress(hfsmp, &cur_progress); + if (cur_progress > (hfsmp->hfs_resize_progress + 9)) { + printf("hfs_truncatefs: %d%% done...\n", cur_progress); + hfsmp->hfs_resize_progress = cur_progress; + } + return; +} + +/* + * Reclaim space at the end of a volume for given file and forktype. + * + * This routine attempts to move any extent which contains allocation blocks + * at or after "allocLimit." A separate transaction is used for every extent + * that needs to be moved. If there is not contiguous space available for + * moving an extent, it can be split into smaller extents. The contents of + * any moved extents are read and written via the volume's device vnode -- + * NOT via "vp." During the move, moved blocks which are part of a transaction + * have their physical block numbers invalidated so they will eventually be + * written to their new locations. + * + * This function is also called for directory hard links. Directory hard links + * are regular files with no data fork and resource fork that contains alias + * information for backward compatibility with pre-Leopard systems. However + * non-Mac OS X implementation can add/modify data fork or resource fork + * information to directory hard links, so we check, and if required, relocate + * both data fork and resource fork. + * + * Inputs: + * hfsmp The volume being resized. + * vp The vnode for the system file. + * fileID ID of the catalog record that needs to be relocated + * forktype The type of fork that needs relocated, + * kHFSResourceForkType for resource fork, + * kHFSDataForkType for data fork + * allocLimit Allocation limit for the new volume size, + * do not use this block or beyond. All extents + * that use this block or any blocks beyond this limit + * will be relocated. + * + * Side Effects: + * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation + * blocks that were relocated. + */ +static int +hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, + u_int8_t forktype, u_long allocLimit, vfs_context_t context) +{ + int error = 0; + struct hfs_reclaim_extent_info *extent_info; + int i; + int lockflags = 0; + struct cnode *cp; + struct filefork *fp; + int took_truncate_lock = false; + int release_desc = false; + HFSPlusExtentKey *key; + + /* If there is no vnode for this file, then there's nothing to do. */ + if (vp == NULL) { + return 0; + } + + cp = VTOC(vp); + + if (hfs_resize_debug) { + const char *filename = (const char *) cp->c_desc.cd_nameptr; + int namelen = cp->c_desc.cd_namelen; + + if (filename == NULL) { + filename = ""; + namelen = 0; + } + printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename); + } + + MALLOC(extent_info, struct hfs_reclaim_extent_info *, + sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK); + if (extent_info == NULL) { + return ENOMEM; + } + bzero(extent_info, sizeof(struct hfs_reclaim_extent_info)); + extent_info->vp = vp; + extent_info->fileID = fileID; + extent_info->forkType = forktype; + extent_info->is_sysfile = vnode_issystem(vp); + if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) { + extent_info->is_dirlink = true; + } + /* We always need allocation bitmap and extent btree lock */ + lockflags = SFL_BITMAP | SFL_EXTENTS; + if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) { + lockflags |= SFL_CATALOG; + } else if (fileID == kHFSAttributesFileID) { + lockflags |= SFL_ATTRIBUTE; + } else if (fileID == kHFSStartupFileID) { + lockflags |= SFL_STARTUP; + } + extent_info->lockflags = lockflags; + extent_info->fcb = VTOF(hfsmp->hfs_extents_vp); + + /* Flush data associated with current file on disk. + * + * If the current vnode is directory hard link, no flushing of + * journal or vnode is required. The current kernel does not + * modify data/resource fork of directory hard links, so nothing + * will be in the cache. If a directory hard link is newly created, + * the resource fork data is written directly using devvp and + * the code that actually relocates data (hfs_copy_extent()) also + * uses devvp for its I/O --- so they will see a consistent copy. + */ + if (extent_info->is_sysfile) { + /* If the current vnode is system vnode, flush journal + * to make sure that all data is written to the disk. + */ + error = hfs_journal_flush(hfsmp, TRUE); + if (error) { + printf ("hfs_reclaim_file: journal_flush returned %d\n", error); + goto out; + } + } else if (extent_info->is_dirlink == false) { + /* Flush all blocks associated with this regular file vnode. + * Normally there should not be buffer cache blocks for regular + * files, but for objects like symlinks, we can have buffer cache + * blocks associated with the vnode. Therefore we call + * buf_flushdirtyblks() also. + */ + buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file"); + + hfs_unlock(cp); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + took_truncate_lock = true; + (void) cluster_push(vp, 0); + error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + if (error) { + goto out; + } + + /* If the file no longer exists, nothing left to do */ + if (cp->c_flag & C_NOEXISTS) { + error = 0; + goto out; + } + + /* Wait for any in-progress writes to this vnode to complete, so that we'll + * be copying consistent bits. (Otherwise, it's possible that an async + * write will complete to the old extent after we read from it. That + * could lead to corruption.) + */ + error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file"); + if (error) { + goto out; + } + } + + if (hfs_resize_debug) { + printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID); + } + + if (extent_info->is_dirlink) { + MALLOC(extent_info->dirlink_desc, struct cat_desc *, + sizeof(struct cat_desc), M_TEMP, M_WAITOK); + MALLOC(extent_info->dirlink_attr, struct cat_attr *, + sizeof(struct cat_attr), M_TEMP, M_WAITOK); + MALLOC(extent_info->dirlink_fork, struct filefork *, + sizeof(struct filefork), M_TEMP, M_WAITOK); + if ((extent_info->dirlink_desc == NULL) || + (extent_info->dirlink_attr == NULL) || + (extent_info->dirlink_fork == NULL)) { + error = ENOMEM; + goto out; + } + + /* Lookup catalog record for directory hard link and + * create a fake filefork for the value looked up from + * the disk. + */ + fp = extent_info->dirlink_fork; + bzero(extent_info->dirlink_fork, sizeof(struct filefork)); + extent_info->dirlink_fork->ff_cp = cp; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + error = cat_lookup_dirlink(hfsmp, fileID, forktype, + extent_info->dirlink_desc, extent_info->dirlink_attr, + &(extent_info->dirlink_fork->ff_data)); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error) { + printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error); + goto out; + } + release_desc = true; + } else { + fp = VTOF(vp); + } + + extent_info->catalog_fp = fp; + extent_info->recStartBlock = 0; + extent_info->extents = extent_info->catalog_fp->ff_extents; + /* Relocate extents from the catalog record */ + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (fp->ff_extents[i].blockCount == 0) { + break; + } + extent_info->extent_index = i; + error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context); + if (error) { + printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error); + goto out; + } + } + + /* If the number of allocation blocks processed for reclaiming + * are less than total number of blocks for the file, continuing + * working on overflow extents record. + */ + if (fp->ff_blocks <= extent_info->cur_blockCount) { + if (0 && hfs_resize_debug) { + printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount); + } + goto out; + } + + if (hfs_resize_debug) { + printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount); + } + + MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK); + if (extent_info->iterator == NULL) { + error = ENOMEM; + goto out; + } + bzero(extent_info->iterator, sizeof(struct BTreeIterator)); + key = (HFSPlusExtentKey *) &(extent_info->iterator->key); + key->keyLength = kHFSPlusExtentKeyMaximumLength; + key->forkType = forktype; + key->fileID = fileID; + key->startBlock = extent_info->cur_blockCount; + + extent_info->btdata.bufferAddress = extent_info->record.overflow; + extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord); + extent_info->btdata.itemCount = 1; + + extent_info->catalog_fp = NULL; + + /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */ + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + error = BTSearchRecord(extent_info->fcb, extent_info->iterator, + &(extent_info->btdata), &(extent_info->recordlen), + extent_info->iterator); + hfs_systemfile_unlock(hfsmp, lockflags); + while (error == 0) { + extent_info->overflow_count++; + extent_info->recStartBlock = key->startBlock; + extent_info->extents = extent_info->record.overflow; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + if (extent_info->record.overflow[i].blockCount == 0) { + goto out; + } + extent_info->extent_index = i; + error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context); + if (error) { + printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error); + goto out; + } + } + + /* Look for more overflow records */ + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord, + extent_info->iterator, &(extent_info->btdata), + &(extent_info->recordlen)); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error) { + break; + } + /* Stop when we encounter a different file or fork. */ + if ((key->fileID != fileID) || (key->forkType != forktype)) { + break; + } + } + if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { + error = 0; + } + +out: + /* If any blocks were relocated, account them and report progress */ + if (extent_info->blocks_relocated) { + hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated; + hfs_truncatefs_progress(hfsmp); + if (fileID < kHFSFirstUserCatalogNodeID) { + printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n", + extent_info->blocks_relocated, fileID, hfsmp->vcbVN); + } + } + if (extent_info->iterator) { + FREE(extent_info->iterator, M_TEMP); + } + if (release_desc == true) { + cat_releasedesc(extent_info->dirlink_desc); + } + if (extent_info->dirlink_desc) { + FREE(extent_info->dirlink_desc, M_TEMP); + } + if (extent_info->dirlink_attr) { + FREE(extent_info->dirlink_attr, M_TEMP); + } + if (extent_info->dirlink_fork) { + FREE(extent_info->dirlink_fork, M_TEMP); + } + if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) { + (void) hfs_update(vp, MNT_WAIT); + } + if (took_truncate_lock) { + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); + } + if (extent_info) { + FREE(extent_info, M_TEMP); + } + if (hfs_resize_debug) { + printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error); + } + + return error; +} + + +/* + * This journal_relocate callback updates the journal info block to point + * at the new journal location. This write must NOT be done using the + * transaction. We must write the block immediately. We must also force + * it to get to the media so that the new journal location will be seen by + * the replay code before we can safely let journaled blocks be written + * to their normal locations. + * + * The tests for journal_uses_fua below are mildly hacky. Since the journal + * and the file system are both on the same device, I'm leveraging what + * the journal has decided about FUA. + */ +struct hfs_journal_relocate_args { + struct hfsmount *hfsmp; + vfs_context_t context; + u_int32_t newStartBlock; + u_int32_t newBlockCount; +}; + +static errno_t +hfs_journal_relocate_callback(void *_args) +{ + int error; + struct hfs_journal_relocate_args *args = _args; + struct hfsmount *hfsmp = args->hfsmp; + buf_t bp; + JournalInfoBlock *jibp; + + error = buf_meta_bread(hfsmp->hfs_devvp, + hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), + hfsmp->blockSize, vfs_context_ucred(args->context), &bp); + if (error) { + printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error); + if (bp) { + buf_brelse(bp); + } + return error; + } + jibp = (JournalInfoBlock*) buf_dataptr(bp); + jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize); + jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize); + if (journal_uses_fua(hfsmp->jnl)) + buf_markfua(bp); + error = buf_bwrite(bp); + if (error) { + printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error); + return error; + } + if (!journal_uses_fua(hfsmp->jnl)) { + error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context); + if (error) { + printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); + error = 0; /* Don't fail the operation. */ + } + } + + return error; +} + + +/* Type of resize operation in progress */ +#define HFS_RESIZE_TRUNCATE 1 +#define HFS_RESIZE_EXTEND 2 + +/* + * Core function to relocate the journal file. This function takes the + * journal size of the newly relocated journal --- the caller can + * provide a new journal size if they want to change the size of + * the journal. The function takes care of updating the journal info + * block and all other data structures correctly. + * + * Note: This function starts a transaction and grabs the btree locks. + */ +static int +hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context) +{ + int error; + int journal_err; + int lockflags; + u_int32_t oldStartBlock; + u_int32_t newStartBlock; + u_int32_t oldBlockCount; + u_int32_t newBlockCount; + u_int32_t jnlBlockCount; + u_int32_t alloc_skipfreeblks; + struct cat_desc journal_desc; + struct cat_attr journal_attr; + struct cat_fork journal_fork; + struct hfs_journal_relocate_args callback_args; + + /* Calculate the number of allocation blocks required for the journal */ + jnlBlockCount = howmany(jnl_size, hfsmp->blockSize); + + /* + * During truncatefs(), the volume free block count is updated + * before relocating data and reflects the total number of free + * blocks that will exist on volume after the resize is successful. + * This means that the allocation blocks required for relocation + * have already been reserved and accounted for in the free block + * count. Therefore, block allocation and deallocation routines + * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS + * flag. + * + * This special handling is not required when the file system + * is being extended as we want all the allocated and deallocated + * blocks to be accounted for correctly. + */ + if (resize_type == HFS_RESIZE_TRUNCATE) { + alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS; + } else { + alloc_skipfreeblks = 0; + } + + error = hfs_start_transaction(hfsmp); + if (error) { + printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error); + return error; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_FLUSHTXN | alloc_skipfreeblks, + &newStartBlock, &newBlockCount); + if (error) { + printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error); + goto fail; + } + if (newBlockCount != jnlBlockCount) { + printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount); + goto free_fail; + } + + error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, 0, &journal_desc, &journal_attr, &journal_fork); + if (error) { + printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error); + goto free_fail; + } + + oldStartBlock = journal_fork.cf_extents[0].startBlock; + oldBlockCount = journal_fork.cf_extents[0].blockCount; + error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks); + if (error) { + printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error); + goto free_fail; + } + + /* Update the catalog record for .journal */ + journal_fork.cf_size = newBlockCount * hfsmp->blockSize; + journal_fork.cf_extents[0].startBlock = newStartBlock; + journal_fork.cf_extents[0].blockCount = newBlockCount; + journal_fork.cf_blocks = newBlockCount; + error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL); + cat_releasedesc(&journal_desc); /* all done with cat descriptor */ + if (error) { + printf("hfs_relocate_journal_file: cat_update returned %d\n", error); + goto free_fail; + } + + /* + * If the journal is part of the file system, then tell the journal + * code about the new location. If the journal is on an external + * device, then just keep using it as-is. + */ + if (hfsmp->jvp == hfsmp->hfs_devvp) { + callback_args.hfsmp = hfsmp; + callback_args.context = context; + callback_args.newStartBlock = newStartBlock; + callback_args.newBlockCount = newBlockCount; + + error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize, + (off_t)newBlockCount*hfsmp->blockSize, 0, + hfs_journal_relocate_callback, &callback_args); + if (error) { + /* NOTE: journal_relocate will mark the journal invalid. */ + printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error); + goto fail; + } + if (hfs_resize_debug) { + printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); + } + hfsmp->jnl_start = newStartBlock; + hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize; + } + + hfs_systemfile_unlock(hfsmp, lockflags); + error = hfs_end_transaction(hfsmp); + if (error) { + printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error); + } + + return error; + +free_fail: + journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); + if (journal_err) { + printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error); + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); + } +fail: + hfs_systemfile_unlock(hfsmp, lockflags); + (void) hfs_end_transaction(hfsmp); + if (hfs_resize_debug) { + printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error); + } + return error; +} + + +/* + * Relocate the journal file when the file system is being truncated. + * We do not down-size the journal when the file system size is + * reduced, so we always provide the current journal size to the + * relocate code. + */ +static int +hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) +{ + int error = 0; + u_int32_t startBlock; + u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize; + + /* + * Figure out the location of the .journal file. When the journal + * is on an external device, we need to look up the .journal file. + */ + if (hfsmp->jvp == hfsmp->hfs_devvp) { + startBlock = hfsmp->jnl_start; + blockCount = hfsmp->jnl_size / hfsmp->blockSize; + } else { + u_int32_t fileid; + u_int32_t old_jnlfileid; + struct cat_attr attr; + struct cat_fork fork; + + /* + * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid + * is set, and it is trying to hide the .journal file. So temporarily + * unset the field while calling GetFileInfo. + */ + old_jnlfileid = hfsmp->hfs_jnlfileid; + hfsmp->hfs_jnlfileid = 0; + fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork); + hfsmp->hfs_jnlfileid = old_jnlfileid; + if (fileid != old_jnlfileid) { + printf("hfs_reclaim_journal_file: cannot find .journal file!\n"); + return EIO; + } + + startBlock = fork.cf_extents[0].startBlock; + blockCount = fork.cf_extents[0].blockCount; + } + + if (startBlock + blockCount <= allocLimit) { + /* The journal file does not require relocation */ + return 0; + } + + error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context); + if (error == 0) { + hfsmp->hfs_resize_blocksmoved += blockCount; + hfs_truncatefs_progress(hfsmp); + printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n", + blockCount, hfsmp->vcbVN); + } + + return error; +} + + +/* + * Move the journal info block to a new location. We have to make sure the + * new copy of the journal info block gets to the media first, then change + * the field in the volume header and the catalog record. + */ +static int +hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) +{ + int error; + int journal_err; + int lockflags; + u_int32_t oldBlock; + u_int32_t newBlock; + u_int32_t blockCount; + struct cat_desc jib_desc; + struct cat_attr jib_attr; + struct cat_fork jib_fork; + buf_t old_bp, new_bp; + + if (hfsmp->vcbJinfoBlock <= allocLimit) { + /* The journal info block does not require relocation */ + return 0; + } + + error = hfs_start_transaction(hfsmp); + if (error) { + printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error); + return error; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + error = BlockAllocate(hfsmp, 1, 1, 1, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS | HFS_ALLOC_FLUSHTXN, + &newBlock, &blockCount); + if (error) { + printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error); + goto fail; + } + if (blockCount != 1) { + printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount); + goto free_fail; + } + + /* Copy the old journal info block content to the new location */ + error = buf_meta_bread(hfsmp->hfs_devvp, + hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), + hfsmp->blockSize, vfs_context_ucred(context), &old_bp); + if (error) { + printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error); + if (old_bp) { + buf_brelse(old_bp); + } + goto free_fail; + } + new_bp = buf_getblk(hfsmp->hfs_devvp, + newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), + hfsmp->blockSize, 0, 0, BLK_META); + bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize); + buf_brelse(old_bp); + if (journal_uses_fua(hfsmp->jnl)) + buf_markfua(new_bp); + error = buf_bwrite(new_bp); + if (error) { + printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error); + goto free_fail; + } + if (!journal_uses_fua(hfsmp->jnl)) { + error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + if (error) { + printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); + /* Don't fail the operation. */ + } + } + + /* Deallocate the old block once the new one has the new valid content */ + error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS); + if (error) { + printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); + goto free_fail; + } + + + /* Update the catalog record for .journal_info_block */ + error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, 0, &jib_desc, &jib_attr, &jib_fork); + if (error) { + printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error); + goto fail; + } + oldBlock = jib_fork.cf_extents[0].startBlock; + jib_fork.cf_size = hfsmp->blockSize; + jib_fork.cf_extents[0].startBlock = newBlock; + jib_fork.cf_extents[0].blockCount = 1; + jib_fork.cf_blocks = 1; + error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL); + cat_releasedesc(&jib_desc); /* all done with cat descriptor */ + if (error) { + printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error); + goto fail; + } + + /* Update the pointer to the journal info block in the volume header. */ + hfsmp->vcbJinfoBlock = newBlock; + error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + if (error) { + printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error); + goto fail; + } + hfs_systemfile_unlock(hfsmp, lockflags); + error = hfs_end_transaction(hfsmp); + if (error) { + printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error); + } + error = hfs_journal_flush(hfsmp, FALSE); + if (error) { + printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error); + } + + /* Account for the block relocated and print progress */ + hfsmp->hfs_resize_blocksmoved += 1; + hfs_truncatefs_progress(hfsmp); + if (!error) { + printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n", + hfsmp->vcbVN); + if (hfs_resize_debug) { + printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount); + } + } + return error; + +free_fail: + journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS); + if (journal_err) { + printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); + hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED); + } + +fail: + hfs_systemfile_unlock(hfsmp, lockflags); + (void) hfs_end_transaction(hfsmp); + if (hfs_resize_debug) { + printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error); + } + return error; +} + + +static u_int64_t +calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count) +{ + u_int64_t journal_size; + u_int32_t journal_scale; + +#define DEFAULT_JOURNAL_SIZE (8*1024*1024) +#define MAX_JOURNAL_SIZE (512*1024*1024) + + /* Calculate the journal size for this volume. We want + * at least 8 MB of journal for each 100 GB of disk space. + * We cap the size at 512 MB, unless the allocation block + * size is larger, in which case, we use one allocation + * block. + */ + journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024); + journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1); + if (journal_size > MAX_JOURNAL_SIZE) { + journal_size = MAX_JOURNAL_SIZE; + } + if (journal_size < hfsmp->blockSize) { + journal_size = hfsmp->blockSize; + } + return journal_size; +} + + +/* + * Calculate the expected journal size based on current partition size. + * If the size of the current journal is less than the calculated size, + * force journal relocation with the new journal size. + */ +static int +hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context) +{ + int error = 0; + u_int64_t calc_journal_size; + + if (hfsmp->jvp != hfsmp->hfs_devvp) { + if (hfs_resize_debug) { + printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n"); + } + return 0; + } + + calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count); + if (calc_journal_size <= hfsmp->jnl_size) { + /* The journal size requires no modification */ + goto out; + } + + if (hfs_resize_debug) { + printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size); + } + + /* Extend the journal to the new calculated size */ + error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context); + if (error == 0) { + printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n", + hfsmp->jnl_size, hfsmp->vcbVN); + } +out: + return error; +} + + +/* + * This function traverses through all extended attribute records for a given + * fileID, and calls function that reclaims data blocks that exist in the + * area of the disk being reclaimed which in turn is responsible for allocating + * new space, copying extent data, deallocating new space, and if required, + * splitting the extent. + * + * Note: The caller has already acquired the cnode lock on the file. Therefore + * we are assured that no other thread would be creating/deleting/modifying + * extended attributes for this file. + * + * Side Effects: + * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation + * blocks that were relocated. + * + * Returns: + * 0 on success, non-zero on failure. + */ +static int +hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context) +{ + int error = 0; + struct hfs_reclaim_extent_info *extent_info; + int i; + HFSPlusAttrKey *key; + int *lockflags; + + if (hfs_resize_debug) { + printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID); + } + + MALLOC(extent_info, struct hfs_reclaim_extent_info *, + sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK); + if (extent_info == NULL) { + return ENOMEM; + } + bzero(extent_info, sizeof(struct hfs_reclaim_extent_info)); + extent_info->vp = vp; + extent_info->fileID = fileID; + extent_info->is_xattr = true; + extent_info->is_sysfile = vnode_issystem(vp); + extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp); + lockflags = &(extent_info->lockflags); + *lockflags = SFL_ATTRIBUTE | SFL_BITMAP; + + /* Initialize iterator from the extent_info structure */ + MALLOC(extent_info->iterator, struct BTreeIterator *, + sizeof(struct BTreeIterator), M_TEMP, M_WAITOK); + if (extent_info->iterator == NULL) { + error = ENOMEM; + goto out; + } + bzero(extent_info->iterator, sizeof(struct BTreeIterator)); + + /* Build attribute key */ + key = (HFSPlusAttrKey *)&(extent_info->iterator->key); + error = hfs_buildattrkey(fileID, NULL, key); + if (error) { + goto out; + } + + /* Initialize btdata from extent_info structure. Note that the + * buffer pointer actually points to the xattr record from the + * extent_info structure itself. + */ + extent_info->btdata.bufferAddress = &(extent_info->record.xattr); + extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord); + extent_info->btdata.itemCount = 1; + + /* + * Sync all extent-based attribute data to the disk. + * + * All extent-based attribute data I/O is performed via cluster + * I/O using a virtual file that spans across entire file system + * space. + */ + hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + (void)cluster_push(hfsmp->hfs_attrdata_vp, 0); + error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr"); + hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_LOCK_DEFAULT); + if (error) { + goto out; + } + + /* Search for extended attribute for current file. This + * will place the iterator before the first matching record. + */ + *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK); + error = BTSearchRecord(extent_info->fcb, extent_info->iterator, + &(extent_info->btdata), &(extent_info->recordlen), + extent_info->iterator); + hfs_systemfile_unlock(hfsmp, *lockflags); + if (error) { + if (error != btNotFound) { + goto out; + } + /* btNotFound is expected here, so just mask it */ + error = 0; + } + + while (1) { + /* Iterate to the next record */ + *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK); + error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord, + extent_info->iterator, &(extent_info->btdata), + &(extent_info->recordlen)); + hfs_systemfile_unlock(hfsmp, *lockflags); + + /* Stop the iteration if we encounter end of btree or xattr with different fileID */ + if (error || key->fileID != fileID) { + if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { + error = 0; + } + break; + } + + /* We only care about extent-based EAs */ + if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) && + (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) { + continue; + } + + if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) { + extent_info->overflow_count = 0; + extent_info->extents = extent_info->record.xattr.forkData.theFork.extents; + } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) { + extent_info->overflow_count++; + extent_info->extents = extent_info->record.xattr.overflowExtents.extents; + } + + extent_info->recStartBlock = key->startBlock; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + if (extent_info->extents[i].blockCount == 0) { + break; + } + extent_info->extent_index = i; + error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context); + if (error) { + printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error); + goto out; + } + } + } + +out: + /* If any blocks were relocated, account them and report progress */ + if (extent_info->blocks_relocated) { + hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated; + hfs_truncatefs_progress(hfsmp); + } + if (extent_info->iterator) { + FREE(extent_info->iterator, M_TEMP); + } + if (extent_info) { + FREE(extent_info, M_TEMP); + } + if (hfs_resize_debug) { + printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error); + } + return error; +} + +/* + * Reclaim any extent-based extended attributes allocation blocks from + * the area of the disk that is being truncated. + * + * The function traverses the attribute btree to find out the fileIDs + * of the extended attributes that need to be relocated. For every + * file whose large EA requires relocation, it looks up the cnode and + * calls hfs_reclaim_xattr() to do all the work for allocating + * new space, copying data, deallocating old space, and if required, + * splitting the extents. + * + * Inputs: + * allocLimit - starting block of the area being reclaimed + * + * Returns: + * returns 0 on success, non-zero on failure. + */ +static int +hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) +{ + int error = 0; + FCB *fcb; + struct BTreeIterator *iterator = NULL; + struct FSBufferDescriptor btdata; + HFSPlusAttrKey *key; + HFSPlusAttrRecord rec; + int lockflags = 0; + cnid_t prev_fileid = 0; + struct vnode *vp; + int need_relocate; + int btree_operation; + u_int32_t files_moved = 0; + u_int32_t prev_blocksmoved; + int i; + + fcb = VTOF(hfsmp->hfs_attribute_vp); + /* Store the value to print total blocks moved by this function in end */ + prev_blocksmoved = hfsmp->hfs_resize_blocksmoved; + + if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { + return ENOMEM; + } + bzero(iterator, sizeof(*iterator)); + key = (HFSPlusAttrKey *)&iterator->key; + btdata.bufferAddress = &rec; + btdata.itemSize = sizeof(rec); + btdata.itemCount = 1; + + need_relocate = false; + btree_operation = kBTreeFirstRecord; + /* Traverse the attribute btree to find extent-based EAs to reclaim */ + while (1) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK); + error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error) { + if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { + error = 0; + } + break; + } + btree_operation = kBTreeNextRecord; + + /* If the extents of current fileID were already relocated, skip it */ + if (prev_fileid == key->fileID) { + continue; + } + + /* Check if any of the extents in the current record need to be relocated */ + need_relocate = false; + switch(rec.recordType) { + case kHFSPlusAttrForkData: + for (i = 0; i < kHFSPlusExtentDensity; i++) { + if (rec.forkData.theFork.extents[i].blockCount == 0) { + break; + } + if ((rec.forkData.theFork.extents[i].startBlock + + rec.forkData.theFork.extents[i].blockCount) > allocLimit) { + need_relocate = true; + break; + } + } + break; + + case kHFSPlusAttrExtents: + for (i = 0; i < kHFSPlusExtentDensity; i++) { + if (rec.overflowExtents.extents[i].blockCount == 0) { + break; + } + if ((rec.overflowExtents.extents[i].startBlock + + rec.overflowExtents.extents[i].blockCount) > allocLimit) { + need_relocate = true; + break; + } + } + break; + }; + + /* Continue iterating to next attribute record */ + if (need_relocate == false) { + continue; + } + + /* Look up the vnode for corresponding file. The cnode + * will be locked which will ensure that no one modifies + * the xattrs when we are relocating them. + * + * We want to allow open-unlinked files to be moved, + * so provide allow_deleted == 1 for hfs_vget(). + */ + if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) { + continue; + } + + error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context); + hfs_unlock(VTOC(vp)); + vnode_put(vp); + if (error) { + printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error); + break; + } + prev_fileid = key->fileID; + files_moved++; + } + + if (files_moved) { + printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n", + (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved), + files_moved, hfsmp->vcbVN); + } + + kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); + return error; +} + +/* + * Reclaim blocks from regular files. + * + * This function iterates over all the record in catalog btree looking + * for files with extents that overlap into the space we're trying to + * free up. If a file extent requires relocation, it looks up the vnode + * and calls function to relocate the data. + * + * Returns: + * Zero on success, non-zero on failure. + */ +static int +hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) +{ + int error; + FCB *fcb; + struct BTreeIterator *iterator = NULL; + struct FSBufferDescriptor btdata; + int btree_operation; + int lockflags; + struct HFSPlusCatalogFile filerec; + struct vnode *vp; + struct vnode *rvp; + struct filefork *datafork; + u_int32_t files_moved = 0; + u_int32_t prev_blocksmoved; + +#if CONFIG_PROTECT + int keys_generated = 0; +#endif + + fcb = VTOF(hfsmp->hfs_catalog_vp); + /* Store the value to print total blocks moved by this function at the end */ + prev_blocksmoved = hfsmp->hfs_resize_blocksmoved; + + if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { + error = ENOMEM; + goto reclaim_filespace_done; + } + +#if CONFIG_PROTECT + /* + * For content-protected filesystems, we may need to relocate files that + * are encrypted. If they use the new-style offset-based IVs, then + * we can move them regardless of the lock state. We create a temporary + * key here that we use to read/write the data, then we discard it at the + * end of the function. + */ + if (cp_fs_protected (hfsmp->hfs_mp)) { + int needs = 0; + error = cp_needs_tempkeys(hfsmp, &needs); + + if ((error == 0) && (needs)) { + error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp); + if (error == 0) { + keys_generated = 1; + } + } + + if (error) { + printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error); + goto reclaim_filespace_done; + } + } + +#endif + + bzero(iterator, sizeof(*iterator)); + + btdata.bufferAddress = &filerec; + btdata.itemSize = sizeof(filerec); + btdata.itemCount = 1; + + btree_operation = kBTreeFirstRecord; + while (1) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error) { + if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { + error = 0; + } + break; + } + btree_operation = kBTreeNextRecord; + + if (filerec.recordType != kHFSPlusFileRecord) { + continue; + } + + /* Check if any of the extents require relocation */ + if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) { + continue; + } + + /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */ + if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) { + if (hfs_resize_debug) { + printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID); + } + continue; + } + + /* If data fork exists or item is a directory hard link, relocate blocks */ + datafork = VTOF(vp); + if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) { + error = hfs_reclaim_file(hfsmp, vp, filerec.fileID, + kHFSDataForkType, allocLimit, context); + if (error) { + printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error); + hfs_unlock(VTOC(vp)); + vnode_put(vp); + break; + } + } + + /* If resource fork exists or item is a directory hard link, relocate blocks */ + if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) { + if (vnode_isdir(vp)) { + /* Resource fork vnode lookup is invalid for directory hard link. + * So we fake data fork vnode as resource fork vnode. + */ + rvp = vp; + } else { + error = hfs_vgetrsrc(hfsmp, vp, &rvp); + if (error) { + printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error); + hfs_unlock(VTOC(vp)); + vnode_put(vp); + break; + } + VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT; + } + + error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID, + kHFSResourceForkType, allocLimit, context); + if (error) { + printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error); + hfs_unlock(VTOC(vp)); + vnode_put(vp); + break; + } + } + + /* The file forks were relocated successfully, now drop the + * cnode lock and vnode reference, and continue iterating to + * next catalog record. + */ + hfs_unlock(VTOC(vp)); + vnode_put(vp); + files_moved++; + } + + if (files_moved) { + printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n", + (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved), + files_moved, hfsmp->vcbVN); + } + +reclaim_filespace_done: + if (iterator) { + kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); + } + +#if CONFIG_PROTECT + if (keys_generated) { + cp_entry_destroy(hfsmp->hfs_resize_cpentry); + hfsmp->hfs_resize_cpentry = NULL; + } +#endif + return error; +} + +/* + * Reclaim space at the end of a file system. + * + * Inputs - + * allocLimit - start block of the space being reclaimed + * reclaimblks - number of allocation blocks to reclaim + */ +static int +hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context) +{ + int error = 0; + + /* + * Preflight the bitmap to find out total number of blocks that need + * relocation. + * + * Note: Since allocLimit is set to the location of new alternate volume + * header, the check below does not account for blocks allocated for old + * alternate volume header. + */ + error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks)); + if (error) { + printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error); + return error; + } + if (hfs_resize_debug) { + printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks); + } + + /* Just to be safe, sync the content of the journal to the disk before we proceed */ + hfs_journal_flush(hfsmp, TRUE); + + /* First, relocate journal file blocks if they're in the way. + * Doing this first will make sure that journal relocate code + * gets access to contiguous blocks on disk first. The journal + * file has to be contiguous on the disk, otherwise resize will + * fail. + */ + error = hfs_reclaim_journal_file(hfsmp, allocLimit, context); + if (error) { + printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error); + return error; + } + + /* Relocate journal info block blocks if they're in the way. */ + error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context); + if (error) { + printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error); + return error; + } + + /* Relocate extents of the Extents B-tree if they're in the way. + * Relocating extents btree before other btrees is important as + * this will provide access to largest contiguous block range on + * the disk for relocating extents btree. Note that extents btree + * can only have maximum of 8 extents. + */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID, + kHFSDataForkType, allocLimit, context); + if (error) { + printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error); + return error; + } + + /* Relocate extents of the Allocation file if they're in the way. */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID, + kHFSDataForkType, allocLimit, context); + if (error) { + printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error); + return error; + } + + /* Relocate extents of the Catalog B-tree if they're in the way. */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID, + kHFSDataForkType, allocLimit, context); + if (error) { + printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error); + return error; + } + + /* Relocate extents of the Attributes B-tree if they're in the way. */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID, + kHFSDataForkType, allocLimit, context); + if (error) { + printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error); + return error; + } + + /* Relocate extents of the Startup File if there is one and they're in the way. */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID, + kHFSDataForkType, allocLimit, context); + if (error) { + printf("hfs_reclaimspace: reclaim startup file returned %d\n", error); + return error; + } + + /* + * We need to make sure the alternate volume header gets flushed if we moved + * any extents in the volume header. But we need to do that before + * shrinking the size of the volume, or else the journal code will panic + * with an invalid (too large) block number. + * + * Note that blks_moved will be set if ANY extent was moved, even + * if it was just an overflow extent. In this case, the journal_flush isn't + * strictly required, but shouldn't hurt. + */ + if (hfsmp->hfs_resize_blocksmoved) { + hfs_journal_flush(hfsmp, TRUE); + } + + /* Reclaim extents from catalog file records */ + error = hfs_reclaim_filespace(hfsmp, allocLimit, context); + if (error) { + printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error); + return error; + } + + /* Reclaim extents from extent-based extended attributes, if any */ + error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context); + if (error) { + printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error); + return error; + } + + return error; +} + + +/* + * Check if there are any extents (including overflow extents) that overlap + * into the disk space that is being reclaimed. + * + * Output - + * true - One of the extents need to be relocated + * false - No overflow extents need to be relocated, or there was an error + */ +static int +hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec) +{ + struct BTreeIterator * iterator = NULL; + struct FSBufferDescriptor btdata; + HFSPlusExtentRecord extrec; + HFSPlusExtentKey *extkeyptr; + FCB *fcb; + int overlapped = false; + int i, j; + int error; + int lockflags = 0; + u_int32_t endblock; + + /* Check if data fork overlaps the target space */ + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (filerec->dataFork.extents[i].blockCount == 0) { + break; + } + endblock = filerec->dataFork.extents[i].startBlock + + filerec->dataFork.extents[i].blockCount; + if (endblock > allocLimit) { + overlapped = true; + goto out; + } + } + + /* Check if resource fork overlaps the target space */ + for (j = 0; j < kHFSPlusExtentDensity; ++j) { + if (filerec->resourceFork.extents[j].blockCount == 0) { + break; + } + endblock = filerec->resourceFork.extents[j].startBlock + + filerec->resourceFork.extents[j].blockCount; + if (endblock > allocLimit) { + overlapped = true; + goto out; + } + } + + /* Return back if there are no overflow extents for this file */ + if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) { + goto out; + } + + if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { + return 0; + } + bzero(iterator, sizeof(*iterator)); + extkeyptr = (HFSPlusExtentKey *)&iterator->key; + extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength; + extkeyptr->forkType = 0; + extkeyptr->fileID = filerec->fileID; + extkeyptr->startBlock = 0; + + btdata.bufferAddress = &extrec; + btdata.itemSize = sizeof(extrec); + btdata.itemCount = 1; + + fcb = VTOF(hfsmp->hfs_extents_vp); + + lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK); + + /* This will position the iterator just before the first overflow + * extent record for given fileID. It will always return btNotFound, + * so we special case the error code. + */ + error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator); + if (error && (error != btNotFound)) { + goto out; + } + + /* BTIterateRecord() might return error if the btree is empty, and + * therefore we return that the extent does not overflow to the caller + */ + error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); + while (error == 0) { + /* Stop when we encounter a different file. */ + if (extkeyptr->fileID != filerec->fileID) { + break; + } + /* Check if any of the forks exist in the target space. */ + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (extrec[i].blockCount == 0) { + break; + } + endblock = extrec[i].startBlock + extrec[i].blockCount; + if (endblock > allocLimit) { + overlapped = true; + goto out; + } + } + /* Look for more records. */ + error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); + } + +out: + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + } + if (iterator) { + kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); + } + return overlapped; +} + + +/* + * Calculate the progress of a file system resize operation. + */ +__private_extern__ +int +hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress) +{ + if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) { + return (ENXIO); + } + + if (hfsmp->hfs_resize_totalblocks > 0) { + *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks); + } else { + *progress = 0; + } + + return (0); +} diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index d2e76f7b3..88712db1e 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -128,8 +128,8 @@ int hfs_dbg_all = 0; int hfs_dbg_err = 0; #endif -/* Enable/disable debugging code for live volume resizing */ -int hfs_resize_debug = 0; +/* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */ +extern int hfs_resize_debug; lck_grp_attr_t * hfs_group_attr; lck_attr_t * hfs_lock_attr; @@ -157,10 +157,8 @@ static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t cont static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context); static int hfs_start(struct mount *mp, int flags, vfs_context_t context); static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context); -static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec); static int hfs_journal_replay(vnode_t devvp, vfs_context_t context); -static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context); -static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context); +static void hfs_syncer_free(struct hfsmount *hfsmp); void hfs_initialize_allocator (struct hfsmount *hfsmp); int hfs_teardown_allocator (struct hfsmount *hfsmp); @@ -210,6 +208,11 @@ hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) vfsp = vfs_statfs(mp); (void)hfs_statfs(mp, vfsp, NULL); + /* Invoke ioctl that asks if the underlying device is Core Storage or not */ + error = VNOP_IOCTL(rvp, _DKIOCCORESTORAGE, NULL, 0, context); + if (error == 0) { + hfsmp->hfs_flags |= HFS_CS; + } return (0); } @@ -267,14 +270,15 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte */ hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE; - hfsmp->hfs_downgrading_proc = current_thread(); + hfsmp->hfs_downgrading_thread = current_thread(); hfs_unlock_global (hfsmp); - + hfs_syncer_free(hfsmp); + /* use VFS_SYNC to push out System (btree) files */ retval = VFS_SYNC(mp, MNT_WAIT, context); if (retval && ((cmdflags & MNT_FORCE) == 0)) { hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; - hfsmp->hfs_downgrading_proc = NULL; + hfsmp->hfs_downgrading_thread = NULL; if (HFS_MOUNT_DEBUG) { printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN); } @@ -287,7 +291,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte if ((retval = hfs_flushfiles(mp, flags, p))) { hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; - hfsmp->hfs_downgrading_proc = NULL; + hfsmp->hfs_downgrading_thread = NULL; if (HFS_MOUNT_DEBUG) { printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN); } @@ -325,8 +329,9 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte // it later if we go back to being read-write. hfs_unlock_global (hfsmp); - } + vfs_clearflags(hfsmp->hfs_mp, MNT_JOURNALED); + } /* * Write out any pending I/O still outstanding against the device node @@ -343,7 +348,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN); } hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; - hfsmp->hfs_downgrading_proc = NULL; + hfsmp->hfs_downgrading_thread = NULL; hfsmp->hfs_flags &= ~HFS_READ_ONLY; goto out; } @@ -366,7 +371,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte } } - hfsmp->hfs_downgrading_proc = NULL; + hfsmp->hfs_downgrading_thread = NULL; } /* Change to a writable file system. */ @@ -435,8 +440,8 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte goto out; } else { hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET; + vfs_setflags(hfsmp->hfs_mp, MNT_JOURNALED); } - } /* See if we need to erase unused Catalog nodes due to . */ @@ -453,7 +458,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte * moving back to read-write. */ hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; - hfsmp->hfs_downgrading_proc = NULL; + hfsmp->hfs_downgrading_thread = NULL; /* mark the volume dirty (clear clean unmount bit) */ hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask; @@ -524,25 +529,10 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte */ if (retval == 0) { errno_t err; - vnode_t root_vnode; - err = hfs_vfs_root(mp, &root_vnode, context); + /* Invoke ioctl that asks if the underlying device is Core Storage or not */ + err = VNOP_IOCTL(devvp, _DKIOCCORESTORAGE, NULL, 0, context); if (err == 0) { - if (VNOP_IOCTL(devvp, _DKIOCCSSETFSVNODE, - (caddr_t)&root_vnode, 0, context) == 0) { - err = vnode_ref(root_vnode); - if (err == 0) { - hfsmp->hfs_flags |= HFS_CS; - } - } - - err = vnode_put(root_vnode); - if (err) { - printf("hfs: could not release io count on root vnode with error: %d\n", - err); - } - } else { - printf("hfs: could not get root vnode with error: %d\n", - err); + hfsmp->hfs_flags |= HFS_CS; } } } @@ -1081,13 +1071,14 @@ hfs_syncer(void *arg0, void *unused) hfsmp->hfs_mp->mnt_pending_write_size, 0); if (hfsmp->hfs_syncer_thread) { - printf("hfs: syncer already running!"); + printf("hfs: syncer already running!\n"); return; } hfsmp->hfs_syncer_thread = current_thread(); - hfs_start_transaction(hfsmp); // so we hold off any new writes + if (hfs_start_transaction(hfsmp) != 0) // so we hold off any new writes + goto out; /* * We intentionally do a synchronous flush (of the journal or entire volume) here. @@ -1120,6 +1111,8 @@ hfs_syncer(void *arg0, void *unused) hfs_end_transaction(hfsmp); +out: + hfsmp->hfs_syncer_thread = NULL; hfs_syncer_lock(hfsmp); @@ -1199,6 +1192,8 @@ void hfs_scan_blocks (struct hfsmount *hfsmp) { */ (void) ScanUnmapBlocks(hfsmp); + hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_COMPLETED; + hfs_systemfile_unlock(hfsmp, flags); } @@ -1434,7 +1429,6 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr); lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr); lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr); - lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr); lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr); vfs_setfsprivate(mp, hfsmp); @@ -2041,37 +2035,9 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) if (hfsmp->hfs_flags & HFS_METADATA_ZONE) (void) hfs_recording_suspend(hfsmp); - - // Tidy up the syncer - if (hfsmp->hfs_syncer) - { - hfs_syncer_lock(hfsmp); - - /* First, make sure everything else knows we don't want any more - requests queued. */ - thread_call_t syncer = hfsmp->hfs_syncer; - hfsmp->hfs_syncer = NULL; - - hfs_syncer_unlock(hfsmp); - - // Now deal with requests that are outstanding - if (hfsmp->hfs_sync_incomplete) { - if (thread_call_cancel(syncer)) { - // We managed to cancel the timer so we're done - hfsmp->hfs_sync_incomplete = FALSE; - } else { - // Syncer must be running right now so we have to wait - hfs_syncer_lock(hfsmp); - while (hfsmp->hfs_sync_incomplete) - hfs_syncer_wait(hfsmp); - hfs_syncer_unlock(hfsmp); - } - } - - // Now we're safe to free the syncer - thread_call_free(syncer); - } - + + hfs_syncer_free(hfsmp); + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { if (hfsmp->hfs_summary_table) { int err = 0; @@ -2437,14 +2403,32 @@ hfs_sync_metadata(void *arg) } else if (bp) { buf_brelse(bp); } + + /* Note that these I/Os bypass the journal (no calls to journal_start_modify_block) */ // the alternate super block... // XXXdbg - we probably don't need to do this each and every time. // hfs_btreeio.c:FlushAlternate() should flag when it was // written... - if (hfsmp->hfs_alt_id_sector) { + if (hfsmp->hfs_partition_avh_sector) { + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_partition_avh_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &bp); + if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) { + /* + * note this I/O can fail if the partition shrank behind our backs! + * So failure should be OK here. + */ + buf_bwrite(bp); + } else if (bp) { + buf_brelse(bp); + } + } + + /* Is the FS's idea of the AVH different than the partition ? */ + if ((hfsmp->hfs_fs_avh_sector) && (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector)) { retval = (int)buf_meta_bread(hfsmp->hfs_devvp, - HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, NOCRED, &bp); if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) { buf_bwrite(bp); @@ -2452,6 +2436,7 @@ hfs_sync_metadata(void *arg) buf_brelse(bp); } } + } @@ -2512,18 +2497,21 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) hfsmp = VFSTOHFS(mp); - /* - * hfs_changefs might be manipulating vnodes so back off - */ - if (hfsmp->hfs_flags & HFS_IN_CHANGEFS) - return (0); + // Back off if hfs_changefs or a freeze is underway + hfs_lock_mount(hfsmp); + if ((hfsmp->hfs_flags & HFS_IN_CHANGEFS) + || hfsmp->hfs_freeze_state != HFS_THAWED) { + hfs_unlock_mount(hfsmp); + return 0; + } - if (hfsmp->hfs_flags & HFS_READ_ONLY) + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + hfs_unlock_mount(hfsmp); return (EROFS); + } - /* skip over frozen volumes */ - if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync)) - return 0; + ++hfsmp->hfs_syncers; + hfs_unlock_mount(hfsmp); args.cred = kauth_cred_get(); args.waitfor = waitfor; @@ -2609,7 +2597,13 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) hfs_journal_flush(hfsmp, FALSE); } - lck_rw_unlock_shared(&hfsmp->hfs_insync); + hfs_lock_mount(hfsmp); + boolean_t wake = (!--hfsmp->hfs_syncers + && hfsmp->hfs_freeze_state == HFS_WANT_TO_FREEZE); + hfs_unlock_mount(hfsmp); + if (wake) + wakeup(&hfsmp->hfs_freeze_state); + return (allerror); } @@ -2737,7 +2731,6 @@ hfs_locks_destroy(struct hfsmount *hfsmp) lck_mtx_destroy(&hfsmp->hfs_mutex, hfs_mutex_group); lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group); lck_rw_destroy(&hfsmp->hfs_global_lock, hfs_rwlock_group); - lck_rw_destroy(&hfsmp->hfs_insync, hfs_rwlock_group); lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group); return; @@ -2827,7 +2820,7 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, goto encodinghint_exit; } MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK); - if (filename == NULL) { + if (unicode_name == NULL) { error = ENOMEM; goto encodinghint_exit; } @@ -3159,6 +3152,9 @@ encodinghint_exit: struct vfsquery vq; req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */ + if (req == NULL) { + return EFAULT; + } error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32)); if (error) return (error); @@ -3454,9 +3450,6 @@ hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p) } } #endif /* QUOTA */ - if (hfsmp->hfs_flags & HFS_CS) { - ++accounted_root_usecounts; - } if (accounted_root_usecounts > 0) { /* Obtain the root vnode so we can skip over it. */ @@ -3495,16 +3488,7 @@ hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p) } } #endif /* QUOTA */ - if (hfsmp->hfs_flags & HFS_CS) { - error = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSSETFSVNODE, - (caddr_t)NULL, 0, vfs_context_kernel()); - vnode_rele(skipvp); - printf("hfs_flushfiles: VNOP_IOCTL(_DKIOCCSSETFSVNODE) failed with error code %d\n", - error); - - /* ignore the CS error and proceed with the unmount. */ - error = 0; - } + if (skipvp) { error = vflush(mp, NULLVP, SKIPSYSTEM | flags); } @@ -3605,6 +3589,7 @@ hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot) #if CONFIG_HFS_STD +/* HFS Standard MDB flush */ static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) { @@ -3681,7 +3666,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) if (altflush) { struct buf *alt_bp = NULL; - if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sector_size, NOCRED, &alt_bp) == 0) { + if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_partition_avh_sector, sector_size, NOCRED, &alt_bp) == 0) { bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sector_size), kMDBSize); (void) VNOP_BWRITE(alt_bp); @@ -3719,6 +3704,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) int critical; u_int16_t signature; u_int16_t hfsversion; + daddr64_t avh_sector; if (hfsmp->hfs_flags & HFS_READ_ONLY) { return(0); @@ -3759,15 +3745,62 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || (hfsversion < kHFSPlusVersion) || (hfsversion > 100) || (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) { - printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n", - vcb->vcbVN, signature, hfsversion, - SWAP_BE32 (volumeHeader->blockSize), - hfsmp->hfs_alt_id_sector ? "; trying alternate" : ""); - hfs_mark_volume_inconsistent(hfsmp); - - if (hfsmp->hfs_alt_id_sector) { + printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d\n", + vcb->vcbVN, signature, hfsversion, + SWAP_BE32 (volumeHeader->blockSize)); + hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED); + + /* Almost always we read AVH relative to the partition size */ + avh_sector = hfsmp->hfs_partition_avh_sector; + + if (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector) { + /* + * The two altVH offsets do not match --- which means that a smaller file + * system exists in a larger partition. Verify that we have the correct + * alternate volume header sector as per the current parititon size. + * The GPT device that we are mounted on top could have changed sizes + * without us knowing. + * + * We're in a transaction, so it's safe to modify the partition_avh_sector + * field if necessary. + */ + + uint64_t sector_count; + + /* Get underlying device block count */ + if ((retval = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCGETBLOCKCOUNT, + (caddr_t)§or_count, 0, vfs_context_current()))) { + printf("hfs_flushVH: err %d getting block count (%s) \n", retval, vcb->vcbVN); + retval = ENXIO; + goto err_exit; + } + + /* Partition size was changed without our knowledge */ + if (sector_count != (uint64_t)hfsmp->hfs_logical_block_count) { + hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, sector_count); + /* Note: hfs_fs_avh_sector will remain unchanged */ + printf ("hfs_flushVH: partition size changed, partition_avh_sector=%qu, fs_avh_sector=%qu\n", + hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector); + + /* + * We just updated the offset for AVH relative to + * the partition size, so the content of that AVH + * will be invalid. But since we are also maintaining + * a valid AVH relative to the file system size, we + * can read it since primary VH and partition AVH + * are not valid. + */ + avh_sector = hfsmp->hfs_fs_avh_sector; + } + } + + printf ("hfs: trying alternate (for %s) avh_sector=%qu\n", + (avh_sector == hfsmp->hfs_fs_avh_sector) ? "file system" : "partition", avh_sector); + + if (avh_sector) { retval = buf_meta_bread(hfsmp->hfs_devvp, - HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), + HFS_PHYSBLK_ROUNDDOWN(avh_sector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, NOCRED, &alt_bp); if (retval) { printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN); @@ -3963,39 +3996,137 @@ done: hfs_unlock_mount (hfsmp); /* If requested, flush out the alternate volume header */ - if (altflush && hfsmp->hfs_alt_id_sector) { - if (buf_meta_bread(hfsmp->hfs_devvp, - HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), - hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) { - if (hfsmp->jnl) { - journal_modify_block_start(hfsmp->jnl, alt_bp); + if (altflush) { + /* + * The two altVH offsets do not match --- which means that a smaller file + * system exists in a larger partition. Verify that we have the correct + * alternate volume header sector as per the current parititon size. + * The GPT device that we are mounted on top could have changed sizes + * without us knowning. + * + * We're in a transaction, so it's safe to modify the partition_avh_sector + * field if necessary. + */ + if (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector) { + uint64_t sector_count; + + /* Get underlying device block count */ + if ((retval = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCGETBLOCKCOUNT, + (caddr_t)§or_count, 0, vfs_context_current()))) { + printf("hfs_flushVH: err %d getting block count (%s) \n", retval, vcb->vcbVN); + retval = ENXIO; + goto err_exit; + } + + /* Partition size was changed without our knowledge */ + if (sector_count != (uint64_t)hfsmp->hfs_logical_block_count) { + hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, sector_count); + /* Note: hfs_fs_avh_sector will remain unchanged */ + printf ("hfs_flushVH: altflush: partition size changed, partition_avh_sector=%qu, fs_avh_sector=%qu\n", + hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector); } + } + + /* + * First see if we need to write I/O to the "secondary" AVH + * located at FS Size - 1024 bytes, because this one will + * always go into the journal. We put this AVH into the journal + * because even if the filesystem size has shrunk, this LBA should be + * reachable after the partition-size modification has occurred. + * The one where we need to be careful is partitionsize-1024, since the + * partition size should hopefully shrink. + * + * Most of the time this block will not execute. + */ + if ((hfsmp->hfs_fs_avh_sector) && + (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector)) { + if (buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) { + if (hfsmp->jnl) { + journal_modify_block_start(hfsmp->jnl, alt_bp); + } - bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + - HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), - kMDBSize); + bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), + kMDBSize); - if (hfsmp->jnl) { - journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL); - } else { - (void) VNOP_BWRITE(alt_bp); + if (hfsmp->jnl) { + journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL); + } else { + (void) VNOP_BWRITE(alt_bp); + } + } else if (alt_bp) { + buf_brelse(alt_bp); } - } else if (alt_bp) - buf_brelse(alt_bp); + } + + /* + * Flush out alternate volume header located at 1024 bytes before + * end of the partition as part of journal transaction. In + * most cases, this will be the only alternate volume header + * that we need to worry about because the file system size is + * same as the partition size, therefore hfs_fs_avh_sector is + * same as hfs_partition_avh_sector. This is the "priority" AVH. + * + * However, do not always put this I/O into the journal. If we skipped the + * FS-Size AVH write above, then we will put this I/O into the journal as + * that indicates the two were in sync. However, if the FS size is + * not the same as the partition size, we are tracking two. We don't + * put it in the journal in that case, since if the partition + * size changes between uptimes, and we need to replay the journal, + * this I/O could generate an EIO if during replay it is now trying + * to access blocks beyond the device EOF. + */ + if (hfsmp->hfs_partition_avh_sector) { + if (buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_partition_avh_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) { + + /* only one AVH, put this I/O in the journal. */ + if ((hfsmp->jnl) && (hfsmp->hfs_partition_avh_sector == hfsmp->hfs_fs_avh_sector)) { + journal_modify_block_start(hfsmp->jnl, alt_bp); + } + + bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), + kMDBSize); + + /* If journaled and we only have one AVH to track */ + if ((hfsmp->jnl) && (hfsmp->hfs_partition_avh_sector == hfsmp->hfs_fs_avh_sector)) { + journal_modify_block_end (hfsmp->jnl, alt_bp, NULL, NULL); + } else { + /* + * If we don't have a journal or there are two AVH's at the + * moment, then this one doesn't go in the journal. Note that + * this one may generate I/O errors, since the partition + * can be resized behind our backs at any moment and this I/O + * may now appear to be beyond the device EOF. + */ + (void) VNOP_BWRITE(alt_bp); + (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, + NULL, FWRITE, NULL); + } + } else if (alt_bp) { + buf_brelse(alt_bp); + } + } } + /* Finish modifying the block for the primary VH */ if (hfsmp->jnl) { journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); } else { - if (waitfor != MNT_WAIT) + if (waitfor != MNT_WAIT) { buf_bawrite(bp); - else { - retval = VNOP_BWRITE(bp); - /* When critical data changes, flush the device cache */ - if (critical && (retval == 0)) { - (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, - NULL, FWRITE, NULL); - } + } else { + retval = VNOP_BWRITE(bp); + /* When critical data changes, flush the device cache */ + if (critical && (retval == 0)) { + (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, + NULL, FWRITE, NULL); + } } } hfs_end_transaction(hfsmp); @@ -4013,3499 +4144,82 @@ err_exit: /* - * Extend a file system. + * Creates a UUID from a unique "name" in the HFS UUID Name space. + * See version 3 UUID. */ -int -hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) +static void +hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result) { - struct proc *p = vfs_context_proc(context); - kauth_cred_t cred = vfs_context_ucred(context); - struct vnode *vp; - struct vnode *devvp; - struct buf *bp; - struct filefork *fp = NULL; - ExtendedVCB *vcb; - struct cat_fork forkdata; - u_int64_t oldsize; - u_int64_t newblkcnt; - u_int64_t prev_phys_block_count; - u_int32_t addblks; - u_int64_t sector_count; - u_int32_t sector_size; - u_int32_t phys_sector_size; - u_int32_t overage_blocks; - daddr64_t prev_alt_sector; - daddr_t bitmapblks; - int lockflags = 0; - int error; - int64_t oldBitmapSize; - Boolean usedExtendFileC = false; - int transaction_begun = 0; - - devvp = hfsmp->hfs_devvp; - vcb = HFSTOVCB(hfsmp); + MD5_CTX md5c; + uint8_t rawUUID[8]; - /* - * - HFS Plus file systems only. - * - Journaling must be enabled. - * - No embedded volumes. - */ - if ((vcb->vcbSigWord == kHFSSigWord) || - (hfsmp->jnl == NULL) || - (vcb->hfsPlusIOPosOffset != 0)) { - return (EPERM); - } - /* - * If extending file system by non-root, then verify - * ownership and check permissions. - */ - if (suser(cred, NULL)) { - error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0); + ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6]; + ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7]; - if (error) - return (error); - error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0); - if (error == 0) { - error = hfs_write_access(vp, cred, p, false); - } - hfs_unlock(VTOC(vp)); - vnode_put(vp); - if (error) - return (error); + MD5Init( &md5c ); + MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) ); + MD5Update( &md5c, rawUUID, sizeof (rawUUID) ); + MD5Final( result, &md5c ); - error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context); - if (error) - return (error); - } - if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)§or_size, 0, context)) { - return (ENXIO); - } - if (sector_size != hfsmp->hfs_logical_block_size) { - return (ENXIO); - } - if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)§or_count, 0, context)) { - return (ENXIO); - } - if ((sector_size * sector_count) < newsize) { - printf("hfs_extendfs: not enough space on device (vol=%s)\n", hfsmp->vcbVN); - return (ENOSPC); - } - error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context); - if (error) { - if ((error != ENOTSUP) && (error != ENOTTY)) { - return (ENXIO); - } - /* If ioctl is not supported, force physical and logical sector size to be same */ - phys_sector_size = sector_size; - } - oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; + result[6] = 0x30 | ( result[6] & 0x0F ); + result[8] = 0x80 | ( result[8] & 0x3F ); +} - /* - * Validate new size. - */ - if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) { - printf("hfs_extendfs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize); - return (EINVAL); - } - newblkcnt = newsize / vcb->blockSize; - if (newblkcnt > (u_int64_t)0xFFFFFFFF) { - printf ("hfs_extendfs: current blockSize=%u too small for newsize=%qu\n", hfsmp->blockSize, newsize); - return (EOVERFLOW); - } +/* + * Get file system attributes. + */ +static int +hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context) +{ +#define HFS_ATTR_CMN_VALIDMASK ATTR_CMN_VALIDMASK +#define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST)) +#define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_ACCTIME)) - addblks = newblkcnt - vcb->totalBlocks; + ExtendedVCB *vcb = VFSTOVCB(mp); + struct hfsmount *hfsmp = VFSTOHFS(mp); + u_int32_t freeCNIDs; - if (hfs_resize_debug) { - printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks); - printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks); - } - printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks); + int searchfs_on = 0; + int exchangedata_on = 1; - hfs_lock_mount (hfsmp); - if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { - hfs_unlock_mount(hfsmp); - error = EALREADY; - goto out; - } - hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; - hfs_unlock_mount (hfsmp); - - /* Start with a clean journal. */ - hfs_journal_flush(hfsmp, TRUE); +#if CONFIG_SEARCHFS + searchfs_on = 1; +#endif - /* - * Enclose changes inside a transaction. - */ - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; - goto out; +#if CONFIG_PROTECT + if (cp_fs_protected(mp)) { + exchangedata_on = 0; } - transaction_begun = 1; - - - /* Update the hfsmp fields for the physical information about the device */ - prev_phys_block_count = hfsmp->hfs_logical_block_count; - prev_alt_sector = hfsmp->hfs_alt_id_sector; - - hfsmp->hfs_logical_block_count = sector_count; - /* - * Note that the new AltVH location must be based on the device's EOF rather than the new - * filesystem's EOF, so we use logical_block_count here rather than newsize. - */ - hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) + - HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count); - hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size; - - - /* - * Note: we take the attributes lock in case we have an attribute data vnode - * which needs to change size. - */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - vp = vcb->allocationsRefNum; - fp = VTOF(vp); - bcopy(&fp->ff_data, &forkdata, sizeof(forkdata)); - - /* - * Calculate additional space required (if any) by allocation bitmap. - */ - oldBitmapSize = fp->ff_size; - bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize; - if (bitmapblks > (daddr_t)fp->ff_blocks) - bitmapblks -= fp->ff_blocks; - else - bitmapblks = 0; +#endif - /* - * The allocation bitmap can contain unused bits that are beyond end of - * current volume's allocation blocks. Usually they are supposed to be - * zero'ed out but there can be cases where they might be marked as used. - * After extending the file system, those bits can represent valid - * allocation blocks, so we mark all the bits from the end of current - * volume to end of allocation bitmap as "free". - * - * Figure out the number of overage blocks before proceeding though, - * so we don't add more bytes to our I/O than necessary. - * First figure out the total number of blocks representable by the - * end of the bitmap file vs. the total number of blocks in the new FS. - * Then subtract away the number of blocks in the current FS. This is how much - * we can mark as free right now without having to grow the bitmap file. - */ - overage_blocks = fp->ff_blocks * vcb->blockSize * 8; - overage_blocks = MIN (overage_blocks, newblkcnt); - overage_blocks -= vcb->totalBlocks; + freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID; - BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks); + VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt); + VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt); + VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt); + VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF); + VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0)); + VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks); + VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0)); + VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1)); + VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize); + /* XXX needs clarification */ + VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1)); + /* Maximum files is constrained by total blocks. */ + VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2)); + VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1))); - if (bitmapblks > 0) { - daddr64_t blkno; - daddr_t blkcnt; - off_t bytesAdded; + fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev; + fsap->f_fsid.val[1] = vfs_typenum(mp); + VFSATTR_SET_SUPPORTED(fsap, f_fsid); - /* - * Get the bitmap's current size (in allocation blocks) so we know - * where to start zero filling once the new space is added. We've - * got to do this before the bitmap is grown. - */ - blkno = (daddr64_t)fp->ff_blocks; + VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord); + VFSATTR_RETURN(fsap, f_carbon_fsid, 0); - /* - * Try to grow the allocation file in the normal way, using allocation - * blocks already existing in the file system. This way, we might be - * able to grow the bitmap contiguously, or at least in the metadata - * zone. - */ - error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0, - kEFAllMask | kEFNoClumpMask | kEFReserveMask - | kEFMetadataMask | kEFContigMask, &bytesAdded); - - if (error == 0) { - usedExtendFileC = true; - } else { - /* - * If the above allocation failed, fall back to allocating the new - * extent of the bitmap from the space we're going to add. Since those - * blocks don't yet belong to the file system, we have to update the - * extent list directly, and manually adjust the file size. - */ - bytesAdded = 0; - error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks); - if (error) { - printf("hfs_extendfs: error %d adding extents\n", error); - goto out; - } - fp->ff_blocks += bitmapblks; - VTOC(vp)->c_blocks = fp->ff_blocks; - VTOC(vp)->c_flag |= C_MODIFIED; - } - - /* - * Update the allocation file's size to include the newly allocated - * blocks. Note that ExtendFileC doesn't do this, which is why this - * statement is outside the above "if" statement. - */ - fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; - - /* - * Zero out the new bitmap blocks. - */ - { - - bp = NULL; - blkcnt = bitmapblks; - while (blkcnt > 0) { - error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp); - if (error) { - if (bp) { - buf_brelse(bp); - } - break; - } - bzero((char *)buf_dataptr(bp), vcb->blockSize); - buf_markaged(bp); - error = (int)buf_bwrite(bp); - if (error) - break; - --blkcnt; - ++blkno; - } - } - if (error) { - printf("hfs_extendfs: error %d clearing blocks\n", error); - goto out; - } - /* - * Mark the new bitmap space as allocated. - * - * Note that ExtendFileC will have marked any blocks it allocated, so - * this is only needed if we used AddFileExtent. Also note that this - * has to come *after* the zero filling of new blocks in the case where - * we used AddFileExtent (since the part of the bitmap we're touching - * is in those newly allocated blocks). - */ - if (!usedExtendFileC) { - error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks); - if (error) { - printf("hfs_extendfs: error %d setting bitmap\n", error); - goto out; - } - vcb->freeBlocks -= bitmapblks; - } - } - /* - * Mark the new alternate VH as allocated. - */ - if (vcb->blockSize == 512) - error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2); - else - error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1); - if (error) { - printf("hfs_extendfs: error %d setting bitmap (VH)\n", error); - goto out; - } - /* - * Mark the old alternate VH as free. - */ - if (vcb->blockSize == 512) - (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2); - else - (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1); - /* - * Adjust file system variables for new space. - */ - vcb->totalBlocks += addblks; - vcb->freeBlocks += addblks; - MarkVCBDirty(vcb); - error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); - if (error) { - printf("hfs_extendfs: couldn't flush volume headers (%d)", error); - /* - * Restore to old state. - */ - if (usedExtendFileC) { - (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp), - FTOC(fp)->c_fileid, false); - } else { - fp->ff_blocks -= bitmapblks; - fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; - /* - * No need to mark the excess blocks free since those bitmap blocks - * are no longer part of the bitmap. But we do need to undo the - * effect of the "vcb->freeBlocks -= bitmapblks" above. - */ - vcb->freeBlocks += bitmapblks; - } - vcb->totalBlocks -= addblks; - vcb->freeBlocks -= addblks; - hfsmp->hfs_logical_block_count = prev_phys_block_count; - hfsmp->hfs_alt_id_sector = prev_alt_sector; - MarkVCBDirty(vcb); - if (vcb->blockSize == 512) { - if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) { - hfs_mark_volume_inconsistent(hfsmp); - } - } else { - if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) { - hfs_mark_volume_inconsistent(hfsmp); - } - } - goto out; - } - /* - * Invalidate the old alternate volume header. - */ - bp = NULL; - if (prev_alt_sector) { - if (buf_meta_bread(hfsmp->hfs_devvp, - HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys), - hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) { - journal_modify_block_start(hfsmp->jnl, bp); - - bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize); - - journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); - } else if (bp) { - buf_brelse(bp); - } - } - - /* - * Update the metadata zone size based on current volume size - */ - hfs_metadatazone_init(hfsmp, false); - - /* - * Adjust the size of hfsmp->hfs_attrdata_vp - */ - if (hfsmp->hfs_attrdata_vp) { - struct cnode *attr_cp; - struct filefork *attr_fp; - - if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) { - attr_cp = VTOC(hfsmp->hfs_attrdata_vp); - attr_fp = VTOF(hfsmp->hfs_attrdata_vp); - - attr_cp->c_blocks = newblkcnt; - attr_fp->ff_blocks = newblkcnt; - attr_fp->ff_extents[0].blockCount = newblkcnt; - attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize; - ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size); - vnode_put(hfsmp->hfs_attrdata_vp); - } - } - - /* - * Update the R/B Tree if necessary. Since we don't have to drop the systemfile - * locks in the middle of these operations like we do in the truncate case - * where we have to relocate files, we can only update the red-black tree - * if there were actual changes made to the bitmap. Also, we can't really scan the - * new portion of the bitmap before it has been allocated. The BlockMarkAllocated - * routines are smart enough to avoid the r/b tree if the portion they are manipulating is - * not currently controlled by the tree. - * - * We only update hfsmp->allocLimit if totalBlocks actually increased. - */ - if (error == 0) { - UpdateAllocLimit(hfsmp, hfsmp->totalBlocks); - } - - /* Release all locks and sync up journal content before - * checking and extending, if required, the journal - */ - if (lockflags) { - hfs_systemfile_unlock(hfsmp, lockflags); - lockflags = 0; - } - if (transaction_begun) { - hfs_end_transaction(hfsmp); - hfs_journal_flush(hfsmp, TRUE); - transaction_begun = 0; - } - - /* Increase the journal size, if required. */ - error = hfs_extend_journal(hfsmp, sector_size, sector_count, context); - if (error) { - printf ("hfs_extendfs: Could not extend journal size\n"); - goto out_noalloc; - } - - /* Log successful extending */ - printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n", - hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize)); - -out: - if (error && fp) { - /* Restore allocation fork. */ - bcopy(&forkdata, &fp->ff_data, sizeof(forkdata)); - VTOC(vp)->c_blocks = fp->ff_blocks; - - } - -out_noalloc: - hfs_lock_mount (hfsmp); - hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; - hfs_unlock_mount (hfsmp); - if (lockflags) { - hfs_systemfile_unlock(hfsmp, lockflags); - } - if (transaction_begun) { - hfs_end_transaction(hfsmp); - hfs_journal_flush(hfsmp, FALSE); - /* Just to be sure, sync all data to the disk */ - (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); - } - if (error) { - printf ("hfs_extentfs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN); - } - - return MacToVFSError(error); -} - -#define HFS_MIN_SIZE (32LL * 1024LL * 1024LL) - -/* - * Truncate a file system (while still mounted). - */ -int -hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) -{ - struct buf *bp = NULL; - u_int64_t oldsize; - u_int32_t newblkcnt; - u_int32_t reclaimblks = 0; - int lockflags = 0; - int transaction_begun = 0; - Boolean updateFreeBlocks = false; - Boolean disable_sparse = false; - int error = 0; - - hfs_lock_mount (hfsmp); - if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { - hfs_unlock_mount (hfsmp); - return (EALREADY); - } - hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; - hfsmp->hfs_resize_blocksmoved = 0; - hfsmp->hfs_resize_totalblocks = 0; - hfsmp->hfs_resize_progress = 0; - hfs_unlock_mount (hfsmp); - - /* - * - Journaled HFS Plus volumes only. - * - No embedded volumes. - */ - if ((hfsmp->jnl == NULL) || - (hfsmp->hfsPlusIOPosOffset != 0)) { - error = EPERM; - goto out; - } - oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; - newblkcnt = newsize / hfsmp->blockSize; - reclaimblks = hfsmp->totalBlocks - newblkcnt; - - if (hfs_resize_debug) { - printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1)); - printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks); - } - - /* Make sure new size is valid. */ - if ((newsize < HFS_MIN_SIZE) || - (newsize >= oldsize) || - (newsize % hfsmp->hfs_logical_block_size) || - (newsize % hfsmp->hfs_physical_block_size)) { - printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize); - error = EINVAL; - goto out; - } - - /* - * Make sure that the file system has enough free blocks reclaim. - * - * Before resize, the disk is divided into four zones - - * A. Allocated_Stationary - These are allocated blocks that exist - * before the new end of disk. These blocks will not be - * relocated or modified during resize. - * B. Free_Stationary - These are free blocks that exist before the - * new end of disk. These blocks can be used for any new - * allocations during resize, including allocation for relocating - * data from the area of disk being reclaimed. - * C. Allocated_To-Reclaim - These are allocated blocks that exist - * beyond the new end of disk. These blocks need to be reclaimed - * during resize by allocating equal number of blocks in Free - * Stationary zone and copying the data. - * D. Free_To-Reclaim - These are free blocks that exist beyond the - * new end of disk. Nothing special needs to be done to reclaim - * them. - * - * Total number of blocks on the disk before resize: - * ------------------------------------------------ - * Total Blocks = Allocated_Stationary + Free_Stationary + - * Allocated_To-Reclaim + Free_To-Reclaim - * - * Total number of blocks that need to be reclaimed: - * ------------------------------------------------ - * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim - * - * Note that the check below also makes sure that we have enough space - * to relocate data from Allocated_To-Reclaim to Free_Stationary. - * Therefore we do not need to check total number of blocks to relocate - * later in the code. - * - * The condition below gets converted to: - * - * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim - * - * which is equivalent to: - * - * Allocated To-Reclaim >= Free Stationary - */ - if (reclaimblks >= hfs_freeblks(hfsmp, 1)) { - printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1)); - error = ENOSPC; - goto out; - } - - /* Start with a clean journal. */ - hfs_journal_flush(hfsmp, TRUE); - - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; - goto out; - } - transaction_begun = 1; - - /* Take the bitmap lock to update the alloc limit field */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - - /* - * Prevent new allocations from using the part we're trying to truncate. - * - * NOTE: allocLimit is set to the allocation block number where the new - * alternate volume header will be. That way there will be no files to - * interfere with allocating the new alternate volume header, and no files - * in the allocation blocks beyond (i.e. the blocks we're trying to - * truncate away. - * - * Also shrink the red-black tree if needed. - */ - if (hfsmp->blockSize == 512) { - error = UpdateAllocLimit (hfsmp, newblkcnt - 2); - } - else { - error = UpdateAllocLimit (hfsmp, newblkcnt - 1); - } - - /* Sparse devices use first fit allocation which is not ideal - * for volume resize which requires best fit allocation. If a - * sparse device is being truncated, disable the sparse device - * property temporarily for the duration of resize. Also reset - * the free extent cache so that it is rebuilt as sorted by - * totalBlocks instead of startBlock. - * - * Note that this will affect all allocations on the volume and - * ideal fix would be just to modify resize-related allocations, - * but it will result in complexity like handling of two free - * extent caches sorted differently, etc. So we stick to this - * solution for now. - */ - hfs_lock_mount (hfsmp); - if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { - hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; - ResetVCBFreeExtCache(hfsmp); - disable_sparse = true; - } - - /* - * Update the volume free block count to reflect the total number - * of free blocks that will exist after a successful resize. - * Relocation of extents will result in no net change in the total - * free space on the disk. Therefore the code that allocates - * space for new extent and deallocates the old extent explicitly - * prevents updating the volume free block count. It will also - * prevent false disk full error when the number of blocks in - * an extent being relocated is more than the free blocks that - * will exist after the volume is resized. - */ - hfsmp->freeBlocks -= reclaimblks; - updateFreeBlocks = true; - hfs_unlock_mount(hfsmp); - - if (lockflags) { - hfs_systemfile_unlock(hfsmp, lockflags); - lockflags = 0; - } - - /* - * Update the metadata zone size to match the new volume size, - * and if it too less, metadata zone might be disabled. - */ - hfs_metadatazone_init(hfsmp, false); - - /* - * If some files have blocks at or beyond the location of the - * new alternate volume header, recalculate free blocks and - * reclaim blocks. Otherwise just update free blocks count. - * - * The current allocLimit is set to the location of new alternate - * volume header, and reclaimblks are the total number of blocks - * that need to be reclaimed. So the check below is really - * ignoring the blocks allocated for old alternate volume header. - */ - if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) { - /* - * hfs_reclaimspace will use separate transactions when - * relocating files (so we don't overwhelm the journal). - */ - hfs_end_transaction(hfsmp); - transaction_begun = 0; - - /* Attempt to reclaim some space. */ - error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context); - if (error != 0) { - printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error); - error = ENOSPC; - goto out; - } - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; - goto out; - } - transaction_begun = 1; - - /* Check if we're clear now. */ - error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks); - if (error != 0) { - printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error); - error = EAGAIN; /* tell client to try again */ - goto out; - } - } - - /* - * Note: we take the attributes lock in case we have an attribute data vnode - * which needs to change size. - */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - - /* - * Allocate last 1KB for alternate volume header. - */ - error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1); - if (error) { - printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error); - goto out; - } - - /* - * Mark the old alternate volume header as free. - * We don't bother shrinking allocation bitmap file. - */ - if (hfsmp->blockSize == 512) - (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2); - else - (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1); - - /* - * Invalidate the existing alternate volume header. - * - * Don't include this in a transaction (don't call journal_modify_block) - * since this block will be outside of the truncated file system! - */ - if (hfsmp->hfs_alt_id_sector) { - error = buf_meta_bread(hfsmp->hfs_devvp, - HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), - hfsmp->hfs_physical_block_size, NOCRED, &bp); - if (error == 0) { - bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize); - (void) VNOP_BWRITE(bp); - } else { - if (bp) { - buf_brelse(bp); - } - } - bp = NULL; - } - - /* Log successful shrinking. */ - printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n", - hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks); - - /* - * Adjust file system variables and flush them to disk. - */ - hfsmp->totalBlocks = newblkcnt; - hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size; - hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size; - - /* - * Note that although the logical block size is updated here, it is only done for - * the benefit of the partition management software. The logical block count change - * has not yet actually been propagated to the disk device yet. - */ - - hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); - MarkVCBDirty(hfsmp); - error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); - if (error) - panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error); - - /* - * Adjust the size of hfsmp->hfs_attrdata_vp - */ - if (hfsmp->hfs_attrdata_vp) { - struct cnode *cp; - struct filefork *fp; - - if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) { - cp = VTOC(hfsmp->hfs_attrdata_vp); - fp = VTOF(hfsmp->hfs_attrdata_vp); - - cp->c_blocks = newblkcnt; - fp->ff_blocks = newblkcnt; - fp->ff_extents[0].blockCount = newblkcnt; - fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize; - ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size); - vnode_put(hfsmp->hfs_attrdata_vp); - } - } - -out: - /* - * Update the allocLimit to acknowledge the last one or two blocks now. - * Add it to the tree as well if necessary. - */ - UpdateAllocLimit (hfsmp, hfsmp->totalBlocks); - - hfs_lock_mount (hfsmp); - if (disable_sparse == true) { - /* Now that resize is completed, set the volume to be sparse - * device again so that all further allocations will be first - * fit instead of best fit. Reset free extent cache so that - * it is rebuilt. - */ - hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; - ResetVCBFreeExtCache(hfsmp); - } - - if (error && (updateFreeBlocks == true)) { - hfsmp->freeBlocks += reclaimblks; - } - - if (hfsmp->nextAllocation >= hfsmp->allocLimit) { - hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1; - } - hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; - hfs_unlock_mount (hfsmp); - - /* On error, reset the metadata zone for original volume size */ - if (error && (updateFreeBlocks == true)) { - hfs_metadatazone_init(hfsmp, false); - } - - if (lockflags) { - hfs_systemfile_unlock(hfsmp, lockflags); - } - if (transaction_begun) { - hfs_end_transaction(hfsmp); - hfs_journal_flush(hfsmp, FALSE); - /* Just to be sure, sync all data to the disk */ - (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); - } - - if (error) { - printf ("hfs_truncatefs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN); - } - - return MacToVFSError(error); -} - - -/* - * Invalidate the physical block numbers associated with buffer cache blocks - * in the given extent of the given vnode. - */ -struct hfs_inval_blk_no { - daddr64_t sectorStart; - daddr64_t sectorCount; -}; -static int -hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in) -{ - daddr64_t blkno; - struct hfs_inval_blk_no *args; - - blkno = buf_blkno(bp); - args = args_in; - - if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount) - buf_setblkno(bp, buf_lblkno(bp)); - - return BUF_RETURNED; -} -static void -hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount) -{ - struct hfs_inval_blk_no args; - args.sectorStart = sectorStart; - args.sectorCount = sectorCount; - - buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args); -} - - -/* - * Copy the contents of an extent to a new location. Also invalidates the - * physical block number of any buffer cache block in the copied extent - * (so that if the block is written, it will go through VNOP_BLOCKMAP to - * determine the new physical block number). - * - * At this point, for regular files, we hold the truncate lock exclusive - * and the cnode lock exclusive. - */ -static int -hfs_copy_extent( - struct hfsmount *hfsmp, - struct vnode *vp, /* The file whose extent is being copied. */ - u_int32_t oldStart, /* The start of the source extent. */ - u_int32_t newStart, /* The start of the destination extent. */ - u_int32_t blockCount, /* The number of allocation blocks to copy. */ - vfs_context_t context) -{ - int err = 0; - size_t bufferSize; - void *buffer = NULL; - struct vfsioattr ioattr; - buf_t bp = NULL; - off_t resid; - size_t ioSize; - u_int32_t ioSizeSectors; /* Device sectors in this I/O */ - daddr64_t srcSector, destSector; - u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size; -#if CONFIG_PROTECT - int cpenabled = 0; -#endif - - /* - * Sanity check that we have locked the vnode of the file we're copying. - * - * But since hfs_systemfile_lock() doesn't actually take the lock on - * the allocation file if a journal is active, ignore the check if the - * file being copied is the allocation file. - */ - struct cnode *cp = VTOC(vp); - if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread()) - panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp); - -#if CONFIG_PROTECT - /* - * Prepare the CP blob and get it ready for use, if necessary. - * - * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs), - * because they are implicitly protected via the media key on iOS. As such, they - * must not be relocated except with the media key. So it is OK to not pass down - * a special cpentry to the IOMedia/LwVM code for handling. - */ - if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) { - int cp_err = 0; - /* - * Ideally, the file whose extents we are about to manipulate is using the - * newer offset-based IVs so that we can manipulate it regardless of the - * current lock state. However, we must maintain support for older-style - * EAs. - * - * For the older EA case, the IV was tied to the device LBA for file content. - * This means that encrypted data cannot be moved from one location to another - * in the filesystem without garbling the IV data. As a result, we need to - * access the file's plaintext because we cannot do our AES-symmetry trick - * here. This requires that we attempt a key-unwrap here (via cp_handle_relocate) - * to make forward progress. If the keys are unavailable then we will - * simply stop the resize in its tracks here since we cannot move - * this extent at this time. - */ - if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) { - cp_err = cp_handle_relocate(cp, hfsmp); - } - - if (cp_err) { - printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err); - return cp_err; - } - - cpenabled = 1; - } -#endif - - - /* - * Determine the I/O size to use - * - * NOTE: Many external drives will result in an ioSize of 128KB. - * TODO: Should we use a larger buffer, doing several consecutive - * reads, then several consecutive writes? - */ - vfs_ioattr(hfsmp->hfs_mp, &ioattr); - bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt); - if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize)) - return ENOMEM; - - /* Get a buffer for doing the I/O */ - bp = buf_alloc(hfsmp->hfs_devvp); - buf_setdataptr(bp, (uintptr_t)buffer); - - resid = (off_t) blockCount * (off_t) hfsmp->blockSize; - srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; - destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; - while (resid > 0) { - ioSize = MIN(bufferSize, (size_t) resid); - ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size; - - /* Prepare the buffer for reading */ - buf_reset(bp, B_READ); - buf_setsize(bp, ioSize); - buf_setcount(bp, ioSize); - buf_setblkno(bp, srcSector); - buf_setlblkno(bp, srcSector); - - /* - * Note that because this is an I/O to the device vp - * it is correct to have lblkno and blkno both point to the - * start sector being read from. If it were being issued against the - * underlying file then that would be different. - */ - - /* Attach the new CP blob to the buffer if needed */ -#if CONFIG_PROTECT - if (cpenabled) { - if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) { - /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */ - cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT; - buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry); - } - else { - /* - * Use the cnode's cp key. This file is tied to the - * LBAs of the physical blocks that it occupies. - */ - buf_setcpaddr (bp, cp->c_cpentry); - } - - /* Initialize the content protection file offset to start at 0 */ - buf_setcpoff (bp, 0); - } -#endif - - /* Do the read */ - err = VNOP_STRATEGY(bp); - if (!err) - err = buf_biowait(bp); - if (err) { -#if CONFIG_PROTECT - /* Turn the flag off in error cases. */ - if (cpenabled) { - cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT; - } -#endif - printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err); - break; - } - - /* Prepare the buffer for writing */ - buf_reset(bp, B_WRITE); - buf_setsize(bp, ioSize); - buf_setcount(bp, ioSize); - buf_setblkno(bp, destSector); - buf_setlblkno(bp, destSector); - if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl)) - buf_markfua(bp); - -#if CONFIG_PROTECT - /* Attach the CP to the buffer if needed */ - if (cpenabled) { - if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) { - buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry); - } - else { - /* - * Use the cnode's CP key. This file is still tied - * to the LBAs of the physical blocks that it occupies. - */ - buf_setcpaddr (bp, cp->c_cpentry); - } - /* - * The last STRATEGY call may have updated the cp file offset behind our - * back, so we cannot trust it. Re-initialize the content protection - * file offset back to 0 before initiating the write portion of this I/O. - */ - buf_setcpoff (bp, 0); - } -#endif - - /* Do the write */ - vnode_startwrite(hfsmp->hfs_devvp); - err = VNOP_STRATEGY(bp); - if (!err) { - err = buf_biowait(bp); - } -#if CONFIG_PROTECT - /* Turn the flag off regardless once the strategy call finishes. */ - if (cpenabled) { - cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT; - } -#endif - if (err) { - printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err); - break; - } - - resid -= ioSize; - srcSector += ioSizeSectors; - destSector += ioSizeSectors; - } - if (bp) - buf_free(bp); - if (buffer) - kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize); - - /* Make sure all writes have been flushed to disk. */ - if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) { - err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); - if (err) { - printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err); - err = 0; /* Don't fail the copy. */ - } - } - - if (!err) - hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock); - - return err; -} - - -/* Structure to store state of reclaiming extents from a - * given file. hfs_reclaim_file()/hfs_reclaim_xattr() - * initializes the values in this structure which are then - * used by code that reclaims and splits the extents. - */ -struct hfs_reclaim_extent_info { - struct vnode *vp; - u_int32_t fileID; - u_int8_t forkType; - u_int8_t is_dirlink; /* Extent belongs to directory hard link */ - u_int8_t is_sysfile; /* Extent belongs to system file */ - u_int8_t is_xattr; /* Extent belongs to extent-based xattr */ - u_int8_t extent_index; - int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */ - u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */ - u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */ - u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */ - struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */ - union record { - HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */ - HFSPlusAttrRecord xattr; /* Attribute record for large EAs */ - } record; - HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed. - * For catalog extent record, points to the correct - * extent information in filefork. For overflow extent - * record, or xattr record, points to extent record - * in the structure above - */ - struct cat_desc *dirlink_desc; - struct cat_attr *dirlink_attr; - struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */ - struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr() - * use it for reading and hfs_reclaim_extent()/hfs_split_extent() - * use it for writing updated extent record - */ - struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */ - u_int16_t recordlen; - int overflow_count; /* For debugging, counter for overflow extent record */ - FCB *fcb; /* Pointer to the current btree being traversed */ -}; - -/* - * Split the current extent into two extents, with first extent - * to contain given number of allocation blocks. Splitting of - * extent creates one new extent entry which can result in - * shifting of many entries through all the extent records of a - * file, and/or creating a new extent record in the overflow - * extent btree. - * - * Example: - * The diagram below represents two consecutive extent records, - * for simplicity, lets call them record X and X+1 respectively. - * Interesting extent entries have been denoted by letters. - * If the letter is unchanged before and after split, it means - * that the extent entry was not modified during the split. - * A '.' means that the entry remains unchanged after the split - * and is not relevant for our example. A '0' means that the - * extent entry is empty. - * - * If there isn't sufficient contiguous free space to relocate - * an extent (extent "C" below), we will have to break the one - * extent into multiple smaller extents, and relocate each of - * the smaller extents individually. The way we do this is by - * finding the largest contiguous free space that is currently - * available (N allocation blocks), and then convert extent "C" - * into two extents, C1 and C2, that occupy exactly the same - * allocation blocks as extent C. Extent C1 is the first - * N allocation blocks of extent C, and extent C2 is the remainder - * of extent C. Then we can relocate extent C1 since we know - * we have enough contiguous free space to relocate it in its - * entirety. We then repeat the process starting with extent C2. - * - * In record X, only the entries following entry C are shifted, and - * the original entry C is replaced with two entries C1 and C2 which - * are actually two extent entries for contiguous allocation blocks. - * - * Note that the entry E from record X is shifted into record X+1 as - * the new first entry. Since the first entry of record X+1 is updated, - * the FABN will also get updated with the blockCount of entry E. - * This also results in shifting of all extent entries in record X+1. - * Note that the number of empty entries after the split has been - * changed from 3 to 2. - * - * Before: - * record X record X+1 - * ---------------------===--------- --------------------------------- - * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 | - * ---------------------===--------- --------------------------------- - * - * After: - * ---------------------=======----- --------------------------------- - * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 | - * ---------------------=======----- --------------------------------- - * - * C1.startBlock = C.startBlock - * C1.blockCount = N - * - * C2.startBlock = C.startBlock + N - * C2.blockCount = C.blockCount - N - * - * FABN = old FABN - E.blockCount - * - * Inputs: - * extent_info - This is the structure that contains state about - * the current file, extent, and extent record that - * is being relocated. This structure is shared - * among code that traverses through all the extents - * of the file, code that relocates extents, and - * code that splits the extent. - * newBlockCount - The blockCount of the extent to be split after - * successfully split operation. - * Output: - * Zero on success, non-zero on failure. - */ -static int -hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount) -{ - int error = 0; - int index = extent_info->extent_index; - int i; - HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */ - HFSPlusExtentDescriptor last_extent; - HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */ - HFSPlusExtentRecord *extents_rec = NULL; - HFSPlusExtentKey *extents_key = NULL; - HFSPlusAttrRecord *xattr_rec = NULL; - HFSPlusAttrKey *xattr_key = NULL; - struct BTreeIterator iterator; - struct FSBufferDescriptor btdata; - uint16_t reclen; - uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */ - uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */ - Boolean create_record = false; - Boolean is_xattr; - struct cnode *cp; - - is_xattr = extent_info->is_xattr; - extents = extent_info->extents; - cp = VTOC(extent_info->vp); - - if (newBlockCount == 0) { - if (hfs_resize_debug) { - printf ("hfs_split_extent: No splitting required for newBlockCount=0\n"); - } - return error; - } - - if (hfs_resize_debug) { - printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount); - } - - /* Extents overflow btree can not have more than 8 extents. - * No split allowed if the 8th extent is already used. - */ - if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) { - printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n"); - error = ENOSPC; - goto out; - } - - /* Determine the starting allocation block number for the following - * overflow extent record, if any, before the current record - * gets modified. - */ - read_recStartBlock = extent_info->recStartBlock; - for (i = 0; i < kHFSPlusExtentDensity; i++) { - if (extents[i].blockCount == 0) { - break; - } - read_recStartBlock += extents[i].blockCount; - } - - /* Shift and split */ - if (index == kHFSPlusExtentDensity-1) { - /* The new extent created after split will go into following overflow extent record */ - shift_extent.startBlock = extents[index].startBlock + newBlockCount; - shift_extent.blockCount = extents[index].blockCount - newBlockCount; - - /* Last extent in the record will be split, so nothing to shift */ - } else { - /* Splitting of extents can result in at most of one - * extent entry to be shifted into following overflow extent - * record. So, store the last extent entry for later. - */ - shift_extent = extents[kHFSPlusExtentDensity-1]; - if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) { - printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount); - } - - /* Start shifting extent information from the end of the extent - * record to the index where we want to insert the new extent. - * Note that kHFSPlusExtentDensity-1 is already saved above, and - * does not need to be shifted. The extent entry that is being - * split does not get shifted. - */ - for (i = kHFSPlusExtentDensity-2; i > index; i--) { - if (hfs_resize_debug) { - if (extents[i].blockCount) { - printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount); - } - } - extents[i+1] = extents[i]; - } - } - - if (index == kHFSPlusExtentDensity-1) { - /* The second half of the extent being split will be the overflow - * entry that will go into following overflow extent record. The - * value has been stored in 'shift_extent' above, so there is - * nothing to be done here. - */ - } else { - /* Update the values in the second half of the extent being split - * before updating the first half of the split. Note that the - * extent to split or first half of the split is at index 'index' - * and a new extent or second half of the split will be inserted at - * 'index+1' or into following overflow extent record. - */ - extents[index+1].startBlock = extents[index].startBlock + newBlockCount; - extents[index+1].blockCount = extents[index].blockCount - newBlockCount; - } - /* Update the extent being split, only the block count will change */ - extents[index].blockCount = newBlockCount; - - if (hfs_resize_debug) { - printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount); - if (index != kHFSPlusExtentDensity-1) { - printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount); - } else { - printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount); - } - } - - /* Write out information about the newly split extent to the disk */ - if (extent_info->catalog_fp) { - /* (extent_info->catalog_fp != NULL) means the newly split - * extent exists in the catalog record. This means that - * the cnode was updated. Therefore, to write out the changes, - * mark the cnode as modified. We cannot call hfs_update() - * in this function because the caller hfs_reclaim_extent() - * is holding the catalog lock currently. - */ - cp->c_flag |= C_MODIFIED; - } else { - /* The newly split extent is for large EAs or is in overflow - * extent record, so update it directly in the btree using the - * iterator information from the shared extent_info structure - */ - error = BTReplaceRecord(extent_info->fcb, extent_info->iterator, - &(extent_info->btdata), extent_info->recordlen); - if (error) { - printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error); - goto out; - } - } - - /* No extent entry to be shifted into another extent overflow record */ - if (shift_extent.blockCount == 0) { - if (hfs_resize_debug) { - printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n"); - } - error = 0; - goto out; - } - - /* The overflow extent entry has to be shifted into an extent - * overflow record. This means that we might have to shift - * extent entries from all subsequent overflow records by one. - * We start iteration from the first record to the last record, - * and shift the extent entry from one record to another. - * We might have to create a new extent record for the last - * extent entry for the file. - */ - - /* Initialize iterator to search the next record */ - bzero(&iterator, sizeof(iterator)); - if (is_xattr) { - /* Copy the key from the iterator that was used to update the modified attribute record. */ - xattr_key = (HFSPlusAttrKey *)&(iterator.key); - bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey)); - /* Note: xattr_key->startBlock will be initialized later in the iteration loop */ - - MALLOC(xattr_rec, HFSPlusAttrRecord *, - sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK); - if (xattr_rec == NULL) { - error = ENOMEM; - goto out; - } - btdata.bufferAddress = xattr_rec; - btdata.itemSize = sizeof(HFSPlusAttrRecord); - btdata.itemCount = 1; - extents = xattr_rec->overflowExtents.extents; - } else { - /* Initialize the extent key for the current file */ - extents_key = (HFSPlusExtentKey *) &(iterator.key); - extents_key->keyLength = kHFSPlusExtentKeyMaximumLength; - extents_key->forkType = extent_info->forkType; - extents_key->fileID = extent_info->fileID; - /* Note: extents_key->startBlock will be initialized later in the iteration loop */ - - MALLOC(extents_rec, HFSPlusExtentRecord *, - sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK); - if (extents_rec == NULL) { - error = ENOMEM; - goto out; - } - btdata.bufferAddress = extents_rec; - btdata.itemSize = sizeof(HFSPlusExtentRecord); - btdata.itemCount = 1; - extents = extents_rec[0]; - } - - /* The overflow extent entry has to be shifted into an extent - * overflow record. This means that we might have to shift - * extent entries from all subsequent overflow records by one. - * We start iteration from the first record to the last record, - * examine one extent record in each iteration and shift one - * extent entry from one record to another. We might have to - * create a new extent record for the last extent entry for the - * file. - * - * If shift_extent.blockCount is non-zero, it means that there is - * an extent entry that needs to be shifted into the next - * overflow extent record. We keep on going till there are no such - * entries left to be shifted. This will also change the starting - * allocation block number of the extent record which is part of - * the key for the extent record in each iteration. Note that - * because the extent record key is changing while we are searching, - * the record can not be updated directly, instead it has to be - * deleted and inserted again. - */ - while (shift_extent.blockCount) { - if (hfs_resize_debug) { - printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock); - } - - /* Search if there is any existing overflow extent record - * that matches the current file and the logical start block - * number. - * - * For this, the logical start block number in the key is - * the value calculated based on the logical start block - * number of the current extent record and the total number - * of blocks existing in the current extent record. - */ - if (is_xattr) { - xattr_key->startBlock = read_recStartBlock; - } else { - extents_key->startBlock = read_recStartBlock; - } - error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator); - if (error) { - if (error != btNotFound) { - printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error); - goto out; - } - /* No matching record was found, so create a new extent record. - * Note: Since no record was found, we can't rely on the - * btree key in the iterator any longer. This will be initialized - * later before we insert the record. - */ - create_record = true; - } - - /* The extra extent entry from the previous record is being inserted - * as the first entry in the current extent record. This will change - * the file allocation block number (FABN) of the current extent - * record, which is the startBlock value from the extent record key. - * Since one extra entry is being inserted in the record, the new - * FABN for the record will less than old FABN by the number of blocks - * in the new extent entry being inserted at the start. We have to - * do this before we update read_recStartBlock to point at the - * startBlock of the following record. - */ - write_recStartBlock = read_recStartBlock - shift_extent.blockCount; - if (hfs_resize_debug) { - if (create_record) { - printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock); - } - } - - /* Now update the read_recStartBlock to account for total number - * of blocks in this extent record. It will now point to the - * starting allocation block number for the next extent record. - */ - for (i = 0; i < kHFSPlusExtentDensity; i++) { - if (extents[i].blockCount == 0) { - break; - } - read_recStartBlock += extents[i].blockCount; - } - - if (create_record == true) { - /* Initialize new record content with only one extent entry */ - bzero(extents, sizeof(HFSPlusExtentRecord)); - /* The new record will contain only one extent entry */ - extents[0] = shift_extent; - /* There are no more overflow extents to be shifted */ - shift_extent.startBlock = shift_extent.blockCount = 0; - - if (is_xattr) { - /* BTSearchRecord above returned btNotFound, - * but since the attribute btree is never empty - * if we are trying to insert new overflow - * record for the xattrs, the extents_key will - * contain correct data. So we don't need to - * re-initialize it again like below. - */ - - /* Initialize the new xattr record */ - xattr_rec->recordType = kHFSPlusAttrExtents; - xattr_rec->overflowExtents.reserved = 0; - reclen = sizeof(HFSPlusAttrExtents); - } else { - /* BTSearchRecord above returned btNotFound, - * which means that extents_key content might - * not correspond to the record that we are - * trying to create, especially when the extents - * overflow btree is empty. So we reinitialize - * the extents_key again always. - */ - extents_key->keyLength = kHFSPlusExtentKeyMaximumLength; - extents_key->forkType = extent_info->forkType; - extents_key->fileID = extent_info->fileID; - - /* Initialize the new extent record */ - reclen = sizeof(HFSPlusExtentRecord); - } - } else { - /* The overflow extent entry from previous record will be - * the first entry in this extent record. If the last - * extent entry in this record is valid, it will be shifted - * into the following extent record as its first entry. So - * save the last entry before shifting entries in current - * record. - */ - last_extent = extents[kHFSPlusExtentDensity-1]; - - /* Shift all entries by one index towards the end */ - for (i = kHFSPlusExtentDensity-2; i >= 0; i--) { - extents[i+1] = extents[i]; - } - - /* Overflow extent entry saved from previous record - * is now the first entry in the current record. - */ - extents[0] = shift_extent; - - if (hfs_resize_debug) { - printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock); - } - - /* The last entry from current record will be the - * overflow entry which will be the first entry for - * the following extent record. - */ - shift_extent = last_extent; - - /* Since the key->startBlock is being changed for this record, - * it should be deleted and inserted with the new key. - */ - error = BTDeleteRecord(extent_info->fcb, &iterator); - if (error) { - printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error); - goto out; - } - if (hfs_resize_debug) { - printf ("hfs_split_extent: Deleted extent record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock)); - } - } - - /* Insert the newly created or modified extent record */ - bzero(&iterator.hint, sizeof(iterator.hint)); - if (is_xattr) { - xattr_key->startBlock = write_recStartBlock; - } else { - extents_key->startBlock = write_recStartBlock; - } - error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen); - if (error) { - printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error); - goto out; - } - if (hfs_resize_debug) { - printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock); - } - } - -out: - /* - * Extents overflow btree or attributes btree headers might have - * been modified during the split/shift operation, so flush the - * changes to the disk while we are inside journal transaction. - * We should only be able to generate I/O that modifies the B-Tree - * header nodes while we're in the middle of a journal transaction. - * Otherwise it might result in panic during unmount. - */ - BTFlushPath(extent_info->fcb); - - if (extents_rec) { - FREE (extents_rec, M_TEMP); - } - if (xattr_rec) { - FREE (xattr_rec, M_TEMP); - } - return error; -} - - -/* - * Relocate an extent if it lies beyond the expected end of volume. - * - * This function is called for every extent of the file being relocated. - * It allocates space for relocation, copies the data, deallocates - * the old extent, and update corresponding on-disk extent. If the function - * does not find contiguous space to relocate an extent, it splits the - * extent in smaller size to be able to relocate it out of the area of - * disk being reclaimed. As an optimization, if an extent lies partially - * in the area of the disk being reclaimed, it is split so that we only - * have to relocate the area that was overlapping with the area of disk - * being reclaimed. - * - * Note that every extent is relocated in its own transaction so that - * they do not overwhelm the journal. This function handles the extent - * record that exists in the catalog record, extent record from overflow - * extents btree, and extents for large EAs. - * - * Inputs: - * extent_info - This is the structure that contains state about - * the current file, extent, and extent record that - * is being relocated. This structure is shared - * among code that traverses through all the extents - * of the file, code that relocates extents, and - * code that splits the extent. - */ -static int -hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context) -{ - int error = 0; - int index; - struct cnode *cp; - u_int32_t oldStartBlock; - u_int32_t oldBlockCount; - u_int32_t newStartBlock; - u_int32_t newBlockCount; - u_int32_t roundedBlockCount; - uint16_t node_size; - uint32_t remainder_blocks; - u_int32_t alloc_flags; - int blocks_allocated = false; - - index = extent_info->extent_index; - cp = VTOC(extent_info->vp); - - oldStartBlock = extent_info->extents[index].startBlock; - oldBlockCount = extent_info->extents[index].blockCount; - - if (0 && hfs_resize_debug) { - printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount); - } - - /* If the current extent lies completely within allocLimit, - * it does not require any relocation. - */ - if ((oldStartBlock + oldBlockCount) <= allocLimit) { - extent_info->cur_blockCount += oldBlockCount; - return error; - } - - /* Every extent should be relocated in its own transaction - * to make sure that we don't overflow the journal buffer. - */ - error = hfs_start_transaction(hfsmp); - if (error) { - return error; - } - extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK); - - /* Check if the extent lies partially in the area to reclaim, - * i.e. it starts before allocLimit and ends beyond allocLimit. - * We have already skipped extents that lie completely within - * allocLimit in the check above, so we only check for the - * startBlock. If it lies partially, split it so that we - * only relocate part of the extent. - */ - if (oldStartBlock < allocLimit) { - newBlockCount = allocLimit - oldStartBlock; - - if (hfs_resize_debug) { - int idx = extent_info->extent_index; - printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount); - } - - /* If the extent belongs to a btree, check and trim - * it to be multiple of the node size. - */ - if (extent_info->is_sysfile) { - node_size = get_btree_nodesize(extent_info->vp); - /* If the btree node size is less than the block size, - * splitting this extent will not split a node across - * different extents. So we only check and trim if - * node size is more than the allocation block size. - */ - if (node_size > hfsmp->blockSize) { - remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize); - if (remainder_blocks) { - newBlockCount -= remainder_blocks; - if (hfs_resize_debug) { - printf ("hfs_reclaim_extent: Round-down newBlockCount to be multiple of nodeSize, node_allocblks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount); - } - } - } - /* The newBlockCount is zero because of rounding-down so that - * btree nodes are not split across extents. Therefore this - * straddling extent across resize-boundary does not require - * splitting. Skip over to relocating of complete extent. - */ - if (newBlockCount == 0) { - if (hfs_resize_debug) { - printf ("hfs_reclaim_extent: After round-down newBlockCount=0, skip split, relocate full extent\n"); - } - goto relocate_full_extent; - } - } - - /* Split the extents into two parts --- the first extent lies - * completely within allocLimit and therefore does not require - * relocation. The second extent will require relocation which - * will be handled when the caller calls this function again - * for the next extent. - */ - error = hfs_split_extent(extent_info, newBlockCount); - if (error == 0) { - /* Split success, no relocation required */ - goto out; - } - /* Split failed, so try to relocate entire extent */ - if (hfs_resize_debug) { - int idx = extent_info->extent_index; - printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks failed, relocate full extent\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount); - } - } - -relocate_full_extent: - /* At this point, the current extent requires relocation. - * We will try to allocate space equal to the size of the extent - * being relocated first to try to relocate it without splitting. - * If the allocation fails, we will try to allocate contiguous - * blocks out of metadata zone. If that allocation also fails, - * then we will take a whatever contiguous block run is returned - * by the allocation, split the extent into two parts, and then - * relocate the first splitted extent. - */ - alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; - if (extent_info->is_sysfile) { - alloc_flags |= HFS_ALLOC_METAZONE; - } - - error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, - &newStartBlock, &newBlockCount); - if ((extent_info->is_sysfile == false) && - ((error == dskFulErr) || (error == ENOSPC))) { - /* For non-system files, try reallocating space in metadata zone */ - alloc_flags |= HFS_ALLOC_METAZONE; - error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, - alloc_flags, &newStartBlock, &newBlockCount); - } - if ((error == dskFulErr) || (error == ENOSPC)) { - /* We did not find desired contiguous space for this extent. - * So don't worry about getting contiguity anymore. Also, allow using - * blocks that were recently deallocated. - */ - alloc_flags &= ~HFS_ALLOC_FORCECONTIG; - alloc_flags |= HFS_ALLOC_FLUSHTXN; - - error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, - alloc_flags, &newStartBlock, &newBlockCount); - if (error) { - printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); - goto out; - } - blocks_allocated = true; - - /* The number of blocks allocated is less than the requested - * number of blocks. For btree extents, check and trim the - * extent to be multiple of the node size. - */ - if (extent_info->is_sysfile) { - node_size = get_btree_nodesize(extent_info->vp); - if (node_size > hfsmp->blockSize) { - remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize); - if (remainder_blocks) { - roundedBlockCount = newBlockCount - remainder_blocks; - /* Free tail-end blocks of the newly allocated extent */ - BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount, - newBlockCount - roundedBlockCount, - HFS_ALLOC_SKIPFREEBLKS); - newBlockCount = roundedBlockCount; - if (hfs_resize_debug) { - printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount); - } - if (newBlockCount == 0) { - printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID); - error = ENOSPC; - goto out; - } - } - } - } - - /* The number of blocks allocated is less than the number of - * blocks requested, so split this extent --- the first extent - * will be relocated as part of this function call and the caller - * will handle relocating the second extent by calling this - * function again for the second extent. - */ - error = hfs_split_extent(extent_info, newBlockCount); - if (error) { - printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); - goto out; - } - oldBlockCount = newBlockCount; - } - if (error) { - printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); - goto out; - } - blocks_allocated = true; - - /* Copy data from old location to new location */ - error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock, - newStartBlock, newBlockCount, context); - if (error) { - printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error); - goto out; - } - - /* Update the extent record with the new start block information */ - extent_info->extents[index].startBlock = newStartBlock; - - /* Sync the content back to the disk */ - if (extent_info->catalog_fp) { - /* Update the extents in catalog record */ - if (extent_info->is_dirlink) { - error = cat_update_dirlink(hfsmp, extent_info->forkType, - extent_info->dirlink_desc, extent_info->dirlink_attr, - &(extent_info->dirlink_fork->ff_data)); - } else { - cp->c_flag |= C_MODIFIED; - /* If this is a system file, sync volume headers on disk */ - if (extent_info->is_sysfile) { - error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); - } - } - } else { - /* Replace record for extents overflow or extents-based xattrs */ - error = BTReplaceRecord(extent_info->fcb, extent_info->iterator, - &(extent_info->btdata), extent_info->recordlen); - } - if (error) { - printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error); - goto out; - } - - /* Deallocate the old extent */ - error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); - if (error) { - printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error); - goto out; - } - extent_info->blocks_relocated += newBlockCount; - - if (hfs_resize_debug) { - printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); - } - -out: - if (error != 0) { - if (blocks_allocated == true) { - BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); - } - } else { - /* On success, increment the total allocation blocks processed */ - extent_info->cur_blockCount += newBlockCount; - } - - hfs_systemfile_unlock(hfsmp, extent_info->lockflags); - - /* For a non-system file, if an extent entry from catalog record - * was modified, sync the in-memory changes to the catalog record - * on disk before ending the transaction. - */ - if ((extent_info->catalog_fp) && - (extent_info->is_sysfile == false)) { - (void) hfs_update(extent_info->vp, MNT_WAIT); - } - - hfs_end_transaction(hfsmp); - - return error; -} - -/* Report intermediate progress during volume resize */ -static void -hfs_truncatefs_progress(struct hfsmount *hfsmp) -{ - u_int32_t cur_progress = 0; - - hfs_resize_progress(hfsmp, &cur_progress); - if (cur_progress > (hfsmp->hfs_resize_progress + 9)) { - printf("hfs_truncatefs: %d%% done...\n", cur_progress); - hfsmp->hfs_resize_progress = cur_progress; - } - return; -} - -/* - * Reclaim space at the end of a volume for given file and forktype. - * - * This routine attempts to move any extent which contains allocation blocks - * at or after "allocLimit." A separate transaction is used for every extent - * that needs to be moved. If there is not contiguous space available for - * moving an extent, it can be split into smaller extents. The contents of - * any moved extents are read and written via the volume's device vnode -- - * NOT via "vp." During the move, moved blocks which are part of a transaction - * have their physical block numbers invalidated so they will eventually be - * written to their new locations. - * - * This function is also called for directory hard links. Directory hard links - * are regular files with no data fork and resource fork that contains alias - * information for backward compatibility with pre-Leopard systems. However - * non-Mac OS X implementation can add/modify data fork or resource fork - * information to directory hard links, so we check, and if required, relocate - * both data fork and resource fork. - * - * Inputs: - * hfsmp The volume being resized. - * vp The vnode for the system file. - * fileID ID of the catalog record that needs to be relocated - * forktype The type of fork that needs relocated, - * kHFSResourceForkType for resource fork, - * kHFSDataForkType for data fork - * allocLimit Allocation limit for the new volume size, - * do not use this block or beyond. All extents - * that use this block or any blocks beyond this limit - * will be relocated. - * - * Side Effects: - * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation - * blocks that were relocated. - */ -static int -hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, - u_int8_t forktype, u_long allocLimit, vfs_context_t context) -{ - int error = 0; - struct hfs_reclaim_extent_info *extent_info; - int i; - int lockflags = 0; - struct cnode *cp; - struct filefork *fp; - int took_truncate_lock = false; - int release_desc = false; - HFSPlusExtentKey *key; - - /* If there is no vnode for this file, then there's nothing to do. */ - if (vp == NULL) { - return 0; - } - - cp = VTOC(vp); - - if (hfs_resize_debug) { - const char *filename = (const char *) cp->c_desc.cd_nameptr; - int namelen = cp->c_desc.cd_namelen; - - if (filename == NULL) { - filename = ""; - namelen = 0; - } - printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename); - } - - MALLOC(extent_info, struct hfs_reclaim_extent_info *, - sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK); - if (extent_info == NULL) { - return ENOMEM; - } - bzero(extent_info, sizeof(struct hfs_reclaim_extent_info)); - extent_info->vp = vp; - extent_info->fileID = fileID; - extent_info->forkType = forktype; - extent_info->is_sysfile = vnode_issystem(vp); - if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) { - extent_info->is_dirlink = true; - } - /* We always need allocation bitmap and extent btree lock */ - lockflags = SFL_BITMAP | SFL_EXTENTS; - if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) { - lockflags |= SFL_CATALOG; - } else if (fileID == kHFSAttributesFileID) { - lockflags |= SFL_ATTRIBUTE; - } else if (fileID == kHFSStartupFileID) { - lockflags |= SFL_STARTUP; - } - extent_info->lockflags = lockflags; - extent_info->fcb = VTOF(hfsmp->hfs_extents_vp); - - /* Flush data associated with current file on disk. - * - * If the current vnode is directory hard link, no flushing of - * journal or vnode is required. The current kernel does not - * modify data/resource fork of directory hard links, so nothing - * will be in the cache. If a directory hard link is newly created, - * the resource fork data is written directly using devvp and - * the code that actually relocates data (hfs_copy_extent()) also - * uses devvp for its I/O --- so they will see a consistent copy. - */ - if (extent_info->is_sysfile) { - /* If the current vnode is system vnode, flush journal - * to make sure that all data is written to the disk. - */ - error = hfs_journal_flush(hfsmp, TRUE); - if (error) { - printf ("hfs_reclaim_file: journal_flush returned %d\n", error); - goto out; - } - } else if (extent_info->is_dirlink == false) { - /* Flush all blocks associated with this regular file vnode. - * Normally there should not be buffer cache blocks for regular - * files, but for objects like symlinks, we can have buffer cache - * blocks associated with the vnode. Therefore we call - * buf_flushdirtyblks() also. - */ - buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file"); - - hfs_unlock(cp); - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); - took_truncate_lock = true; - (void) cluster_push(vp, 0); - error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - if (error) { - goto out; - } - - /* If the file no longer exists, nothing left to do */ - if (cp->c_flag & C_NOEXISTS) { - error = 0; - goto out; - } - - /* Wait for any in-progress writes to this vnode to complete, so that we'll - * be copying consistent bits. (Otherwise, it's possible that an async - * write will complete to the old extent after we read from it. That - * could lead to corruption.) - */ - error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file"); - if (error) { - goto out; - } - } - - if (hfs_resize_debug) { - printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID); - } - - if (extent_info->is_dirlink) { - MALLOC(extent_info->dirlink_desc, struct cat_desc *, - sizeof(struct cat_desc), M_TEMP, M_WAITOK); - MALLOC(extent_info->dirlink_attr, struct cat_attr *, - sizeof(struct cat_attr), M_TEMP, M_WAITOK); - MALLOC(extent_info->dirlink_fork, struct filefork *, - sizeof(struct filefork), M_TEMP, M_WAITOK); - if ((extent_info->dirlink_desc == NULL) || - (extent_info->dirlink_attr == NULL) || - (extent_info->dirlink_fork == NULL)) { - error = ENOMEM; - goto out; - } - - /* Lookup catalog record for directory hard link and - * create a fake filefork for the value looked up from - * the disk. - */ - fp = extent_info->dirlink_fork; - bzero(extent_info->dirlink_fork, sizeof(struct filefork)); - extent_info->dirlink_fork->ff_cp = cp; - lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); - error = cat_lookup_dirlink(hfsmp, fileID, forktype, - extent_info->dirlink_desc, extent_info->dirlink_attr, - &(extent_info->dirlink_fork->ff_data)); - hfs_systemfile_unlock(hfsmp, lockflags); - if (error) { - printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error); - goto out; - } - release_desc = true; - } else { - fp = VTOF(vp); - } - - extent_info->catalog_fp = fp; - extent_info->recStartBlock = 0; - extent_info->extents = extent_info->catalog_fp->ff_extents; - /* Relocate extents from the catalog record */ - for (i = 0; i < kHFSPlusExtentDensity; ++i) { - if (fp->ff_extents[i].blockCount == 0) { - break; - } - extent_info->extent_index = i; - error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context); - if (error) { - printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error); - goto out; - } - } - - /* If the number of allocation blocks processed for reclaiming - * are less than total number of blocks for the file, continuing - * working on overflow extents record. - */ - if (fp->ff_blocks <= extent_info->cur_blockCount) { - if (0 && hfs_resize_debug) { - printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount); - } - goto out; - } - - if (hfs_resize_debug) { - printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount); - } - - MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK); - if (extent_info->iterator == NULL) { - error = ENOMEM; - goto out; - } - bzero(extent_info->iterator, sizeof(struct BTreeIterator)); - key = (HFSPlusExtentKey *) &(extent_info->iterator->key); - key->keyLength = kHFSPlusExtentKeyMaximumLength; - key->forkType = forktype; - key->fileID = fileID; - key->startBlock = extent_info->cur_blockCount; - - extent_info->btdata.bufferAddress = extent_info->record.overflow; - extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord); - extent_info->btdata.itemCount = 1; - - extent_info->catalog_fp = NULL; - - /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */ - lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); - error = BTSearchRecord(extent_info->fcb, extent_info->iterator, - &(extent_info->btdata), &(extent_info->recordlen), - extent_info->iterator); - hfs_systemfile_unlock(hfsmp, lockflags); - while (error == 0) { - extent_info->overflow_count++; - extent_info->recStartBlock = key->startBlock; - extent_info->extents = extent_info->record.overflow; - for (i = 0; i < kHFSPlusExtentDensity; i++) { - if (extent_info->record.overflow[i].blockCount == 0) { - goto out; - } - extent_info->extent_index = i; - error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context); - if (error) { - printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error); - goto out; - } - } - - /* Look for more overflow records */ - lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); - error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord, - extent_info->iterator, &(extent_info->btdata), - &(extent_info->recordlen)); - hfs_systemfile_unlock(hfsmp, lockflags); - if (error) { - break; - } - /* Stop when we encounter a different file or fork. */ - if ((key->fileID != fileID) || (key->forkType != forktype)) { - break; - } - } - if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { - error = 0; - } - -out: - /* If any blocks were relocated, account them and report progress */ - if (extent_info->blocks_relocated) { - hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated; - hfs_truncatefs_progress(hfsmp); - if (fileID < kHFSFirstUserCatalogNodeID) { - printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n", - extent_info->blocks_relocated, fileID, hfsmp->vcbVN); - } - } - if (extent_info->iterator) { - FREE(extent_info->iterator, M_TEMP); - } - if (release_desc == true) { - cat_releasedesc(extent_info->dirlink_desc); - } - if (extent_info->dirlink_desc) { - FREE(extent_info->dirlink_desc, M_TEMP); - } - if (extent_info->dirlink_attr) { - FREE(extent_info->dirlink_attr, M_TEMP); - } - if (extent_info->dirlink_fork) { - FREE(extent_info->dirlink_fork, M_TEMP); - } - if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) { - (void) hfs_update(vp, MNT_WAIT); - } - if (took_truncate_lock) { - hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); - } - if (extent_info) { - FREE(extent_info, M_TEMP); - } - if (hfs_resize_debug) { - printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error); - } - - return error; -} - - -/* - * This journal_relocate callback updates the journal info block to point - * at the new journal location. This write must NOT be done using the - * transaction. We must write the block immediately. We must also force - * it to get to the media so that the new journal location will be seen by - * the replay code before we can safely let journaled blocks be written - * to their normal locations. - * - * The tests for journal_uses_fua below are mildly hacky. Since the journal - * and the file system are both on the same device, I'm leveraging what - * the journal has decided about FUA. - */ -struct hfs_journal_relocate_args { - struct hfsmount *hfsmp; - vfs_context_t context; - u_int32_t newStartBlock; - u_int32_t newBlockCount; -}; - -static errno_t -hfs_journal_relocate_callback(void *_args) -{ - int error; - struct hfs_journal_relocate_args *args = _args; - struct hfsmount *hfsmp = args->hfsmp; - buf_t bp; - JournalInfoBlock *jibp; - - error = buf_meta_bread(hfsmp->hfs_devvp, - hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), - hfsmp->blockSize, vfs_context_ucred(args->context), &bp); - if (error) { - printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error); - if (bp) { - buf_brelse(bp); - } - return error; - } - jibp = (JournalInfoBlock*) buf_dataptr(bp); - jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize); - jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize); - if (journal_uses_fua(hfsmp->jnl)) - buf_markfua(bp); - error = buf_bwrite(bp); - if (error) { - printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error); - return error; - } - if (!journal_uses_fua(hfsmp->jnl)) { - error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context); - if (error) { - printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); - error = 0; /* Don't fail the operation. */ - } - } - - return error; -} - - -/* Type of resize operation in progress */ -#define HFS_RESIZE_TRUNCATE 1 -#define HFS_RESIZE_EXTEND 2 - -/* - * Core function to relocate the journal file. This function takes the - * journal size of the newly relocated journal --- the caller can - * provide a new journal size if they want to change the size of - * the journal. The function takes care of updating the journal info - * block and all other data structures correctly. - * - * Note: This function starts a transaction and grabs the btree locks. - */ -static int -hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context) -{ - int error; - int journal_err; - int lockflags; - u_int32_t oldStartBlock; - u_int32_t newStartBlock; - u_int32_t oldBlockCount; - u_int32_t newBlockCount; - u_int32_t jnlBlockCount; - u_int32_t alloc_skipfreeblks; - struct cat_desc journal_desc; - struct cat_attr journal_attr; - struct cat_fork journal_fork; - struct hfs_journal_relocate_args callback_args; - - /* Calculate the number of allocation blocks required for the journal */ - jnlBlockCount = howmany(jnl_size, hfsmp->blockSize); - - /* - * During truncatefs(), the volume free block count is updated - * before relocating data and reflects the total number of free - * blocks that will exist on volume after the resize is successful. - * This means that the allocation blocks required for relocation - * have already been reserved and accounted for in the free block - * count. Therefore, block allocation and deallocation routines - * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS - * flag. - * - * This special handling is not required when the file system - * is being extended as we want all the allocated and deallocated - * blocks to be accounted for correctly. - */ - if (resize_type == HFS_RESIZE_TRUNCATE) { - alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS; - } else { - alloc_skipfreeblks = 0; - } - - error = hfs_start_transaction(hfsmp); - if (error) { - printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error); - return error; - } - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - - error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount, - HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_FLUSHTXN | alloc_skipfreeblks, - &newStartBlock, &newBlockCount); - if (error) { - printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error); - goto fail; - } - if (newBlockCount != jnlBlockCount) { - printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount); - goto free_fail; - } - - error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, 0, &journal_desc, &journal_attr, &journal_fork); - if (error) { - printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error); - goto free_fail; - } - - oldStartBlock = journal_fork.cf_extents[0].startBlock; - oldBlockCount = journal_fork.cf_extents[0].blockCount; - error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks); - if (error) { - printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error); - goto free_fail; - } - - /* Update the catalog record for .journal */ - journal_fork.cf_size = newBlockCount * hfsmp->blockSize; - journal_fork.cf_extents[0].startBlock = newStartBlock; - journal_fork.cf_extents[0].blockCount = newBlockCount; - journal_fork.cf_blocks = newBlockCount; - error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL); - cat_releasedesc(&journal_desc); /* all done with cat descriptor */ - if (error) { - printf("hfs_relocate_journal_file: cat_update returned %d\n", error); - goto free_fail; - } - - /* - * If the journal is part of the file system, then tell the journal - * code about the new location. If the journal is on an external - * device, then just keep using it as-is. - */ - if (hfsmp->jvp == hfsmp->hfs_devvp) { - callback_args.hfsmp = hfsmp; - callback_args.context = context; - callback_args.newStartBlock = newStartBlock; - callback_args.newBlockCount = newBlockCount; - - error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize, - (off_t)newBlockCount*hfsmp->blockSize, 0, - hfs_journal_relocate_callback, &callback_args); - if (error) { - /* NOTE: journal_relocate will mark the journal invalid. */ - printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error); - goto fail; - } - if (hfs_resize_debug) { - printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); - } - hfsmp->jnl_start = newStartBlock; - hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize; - } - - hfs_systemfile_unlock(hfsmp, lockflags); - error = hfs_end_transaction(hfsmp); - if (error) { - printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error); - } - - return error; - -free_fail: - journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); - if (journal_err) { - printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error); - hfs_mark_volume_inconsistent(hfsmp); - } -fail: - hfs_systemfile_unlock(hfsmp, lockflags); - (void) hfs_end_transaction(hfsmp); - if (hfs_resize_debug) { - printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error); - } - return error; -} - - -/* - * Relocate the journal file when the file system is being truncated. - * We do not down-size the journal when the file system size is - * reduced, so we always provide the current journal size to the - * relocate code. - */ -static int -hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) -{ - int error = 0; - u_int32_t startBlock; - u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize; - - /* - * Figure out the location of the .journal file. When the journal - * is on an external device, we need to look up the .journal file. - */ - if (hfsmp->jvp == hfsmp->hfs_devvp) { - startBlock = hfsmp->jnl_start; - blockCount = hfsmp->jnl_size / hfsmp->blockSize; - } else { - u_int32_t fileid; - u_int32_t old_jnlfileid; - struct cat_attr attr; - struct cat_fork fork; - - /* - * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid - * is set, and it is trying to hide the .journal file. So temporarily - * unset the field while calling GetFileInfo. - */ - old_jnlfileid = hfsmp->hfs_jnlfileid; - hfsmp->hfs_jnlfileid = 0; - fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork); - hfsmp->hfs_jnlfileid = old_jnlfileid; - if (fileid != old_jnlfileid) { - printf("hfs_reclaim_journal_file: cannot find .journal file!\n"); - return EIO; - } - - startBlock = fork.cf_extents[0].startBlock; - blockCount = fork.cf_extents[0].blockCount; - } - - if (startBlock + blockCount <= allocLimit) { - /* The journal file does not require relocation */ - return 0; - } - - error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context); - if (error == 0) { - hfsmp->hfs_resize_blocksmoved += blockCount; - hfs_truncatefs_progress(hfsmp); - printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n", - blockCount, hfsmp->vcbVN); - } - - return error; -} - - -/* - * Move the journal info block to a new location. We have to make sure the - * new copy of the journal info block gets to the media first, then change - * the field in the volume header and the catalog record. - */ -static int -hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) -{ - int error; - int journal_err; - int lockflags; - u_int32_t oldBlock; - u_int32_t newBlock; - u_int32_t blockCount; - struct cat_desc jib_desc; - struct cat_attr jib_attr; - struct cat_fork jib_fork; - buf_t old_bp, new_bp; - - if (hfsmp->vcbJinfoBlock <= allocLimit) { - /* The journal info block does not require relocation */ - return 0; - } - - error = hfs_start_transaction(hfsmp); - if (error) { - printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error); - return error; - } - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - - error = BlockAllocate(hfsmp, 1, 1, 1, - HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS | HFS_ALLOC_FLUSHTXN, - &newBlock, &blockCount); - if (error) { - printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error); - goto fail; - } - if (blockCount != 1) { - printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount); - goto free_fail; - } - - /* Copy the old journal info block content to the new location */ - error = buf_meta_bread(hfsmp->hfs_devvp, - hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), - hfsmp->blockSize, vfs_context_ucred(context), &old_bp); - if (error) { - printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error); - if (old_bp) { - buf_brelse(old_bp); - } - goto free_fail; - } - new_bp = buf_getblk(hfsmp->hfs_devvp, - newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), - hfsmp->blockSize, 0, 0, BLK_META); - bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize); - buf_brelse(old_bp); - if (journal_uses_fua(hfsmp->jnl)) - buf_markfua(new_bp); - error = buf_bwrite(new_bp); - if (error) { - printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error); - goto free_fail; - } - if (!journal_uses_fua(hfsmp->jnl)) { - error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); - if (error) { - printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); - /* Don't fail the operation. */ - } - } - - /* Deallocate the old block once the new one has the new valid content */ - error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS); - if (error) { - printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); - goto free_fail; - } - - - /* Update the catalog record for .journal_info_block */ - error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, 0, &jib_desc, &jib_attr, &jib_fork); - if (error) { - printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error); - goto fail; - } - oldBlock = jib_fork.cf_extents[0].startBlock; - jib_fork.cf_size = hfsmp->blockSize; - jib_fork.cf_extents[0].startBlock = newBlock; - jib_fork.cf_extents[0].blockCount = 1; - jib_fork.cf_blocks = 1; - error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL); - cat_releasedesc(&jib_desc); /* all done with cat descriptor */ - if (error) { - printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error); - goto fail; - } - - /* Update the pointer to the journal info block in the volume header. */ - hfsmp->vcbJinfoBlock = newBlock; - error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); - if (error) { - printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error); - goto fail; - } - hfs_systemfile_unlock(hfsmp, lockflags); - error = hfs_end_transaction(hfsmp); - if (error) { - printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error); - } - error = hfs_journal_flush(hfsmp, FALSE); - if (error) { - printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error); - } - - /* Account for the block relocated and print progress */ - hfsmp->hfs_resize_blocksmoved += 1; - hfs_truncatefs_progress(hfsmp); - if (!error) { - printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n", - hfsmp->vcbVN); - if (hfs_resize_debug) { - printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount); - } - } - return error; - -free_fail: - journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS); - if (journal_err) { - printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); - hfs_mark_volume_inconsistent(hfsmp); - } - -fail: - hfs_systemfile_unlock(hfsmp, lockflags); - (void) hfs_end_transaction(hfsmp); - if (hfs_resize_debug) { - printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error); - } - return error; -} - - -static u_int64_t -calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count) -{ - u_int64_t journal_size; - u_int32_t journal_scale; - -#define DEFAULT_JOURNAL_SIZE (8*1024*1024) -#define MAX_JOURNAL_SIZE (512*1024*1024) - - /* Calculate the journal size for this volume. We want - * at least 8 MB of journal for each 100 GB of disk space. - * We cap the size at 512 MB, unless the allocation block - * size is larger, in which case, we use one allocation - * block. - */ - journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024); - journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1); - if (journal_size > MAX_JOURNAL_SIZE) { - journal_size = MAX_JOURNAL_SIZE; - } - if (journal_size < hfsmp->blockSize) { - journal_size = hfsmp->blockSize; - } - return journal_size; -} - - -/* - * Calculate the expected journal size based on current partition size. - * If the size of the current journal is less than the calculated size, - * force journal relocation with the new journal size. - */ -static int -hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context) -{ - int error = 0; - u_int64_t calc_journal_size; - - if (hfsmp->jvp != hfsmp->hfs_devvp) { - if (hfs_resize_debug) { - printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n"); - } - return 0; - } - - calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count); - if (calc_journal_size <= hfsmp->jnl_size) { - /* The journal size requires no modification */ - goto out; - } - - if (hfs_resize_debug) { - printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size); - } - - /* Extend the journal to the new calculated size */ - error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context); - if (error == 0) { - printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n", - hfsmp->jnl_size, hfsmp->vcbVN); - } -out: - return error; -} - - -/* - * This function traverses through all extended attribute records for a given - * fileID, and calls function that reclaims data blocks that exist in the - * area of the disk being reclaimed which in turn is responsible for allocating - * new space, copying extent data, deallocating new space, and if required, - * splitting the extent. - * - * Note: The caller has already acquired the cnode lock on the file. Therefore - * we are assured that no other thread would be creating/deleting/modifying - * extended attributes for this file. - * - * Side Effects: - * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation - * blocks that were relocated. - * - * Returns: - * 0 on success, non-zero on failure. - */ -static int -hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context) -{ - int error = 0; - struct hfs_reclaim_extent_info *extent_info; - int i; - HFSPlusAttrKey *key; - int *lockflags; - - if (hfs_resize_debug) { - printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID); - } - - MALLOC(extent_info, struct hfs_reclaim_extent_info *, - sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK); - if (extent_info == NULL) { - return ENOMEM; - } - bzero(extent_info, sizeof(struct hfs_reclaim_extent_info)); - extent_info->vp = vp; - extent_info->fileID = fileID; - extent_info->is_xattr = true; - extent_info->is_sysfile = vnode_issystem(vp); - extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp); - lockflags = &(extent_info->lockflags); - *lockflags = SFL_ATTRIBUTE | SFL_BITMAP; - - /* Initialize iterator from the extent_info structure */ - MALLOC(extent_info->iterator, struct BTreeIterator *, - sizeof(struct BTreeIterator), M_TEMP, M_WAITOK); - if (extent_info->iterator == NULL) { - error = ENOMEM; - goto out; - } - bzero(extent_info->iterator, sizeof(struct BTreeIterator)); - - /* Build attribute key */ - key = (HFSPlusAttrKey *)&(extent_info->iterator->key); - error = hfs_buildattrkey(fileID, NULL, key); - if (error) { - goto out; - } - - /* Initialize btdata from extent_info structure. Note that the - * buffer pointer actually points to the xattr record from the - * extent_info structure itself. - */ - extent_info->btdata.bufferAddress = &(extent_info->record.xattr); - extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord); - extent_info->btdata.itemCount = 1; - - /* - * Sync all extent-based attribute data to the disk. - * - * All extent-based attribute data I/O is performed via cluster - * I/O using a virtual file that spans across entire file system - * space. - */ - hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); - (void)cluster_push(hfsmp->hfs_attrdata_vp, 0); - error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr"); - hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_LOCK_DEFAULT); - if (error) { - goto out; - } - - /* Search for extended attribute for current file. This - * will place the iterator before the first matching record. - */ - *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK); - error = BTSearchRecord(extent_info->fcb, extent_info->iterator, - &(extent_info->btdata), &(extent_info->recordlen), - extent_info->iterator); - hfs_systemfile_unlock(hfsmp, *lockflags); - if (error) { - if (error != btNotFound) { - goto out; - } - /* btNotFound is expected here, so just mask it */ - error = 0; - } - - while (1) { - /* Iterate to the next record */ - *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK); - error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord, - extent_info->iterator, &(extent_info->btdata), - &(extent_info->recordlen)); - hfs_systemfile_unlock(hfsmp, *lockflags); - - /* Stop the iteration if we encounter end of btree or xattr with different fileID */ - if (error || key->fileID != fileID) { - if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { - error = 0; - } - break; - } - - /* We only care about extent-based EAs */ - if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) && - (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) { - continue; - } - - if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) { - extent_info->overflow_count = 0; - extent_info->extents = extent_info->record.xattr.forkData.theFork.extents; - } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) { - extent_info->overflow_count++; - extent_info->extents = extent_info->record.xattr.overflowExtents.extents; - } - - extent_info->recStartBlock = key->startBlock; - for (i = 0; i < kHFSPlusExtentDensity; i++) { - if (extent_info->extents[i].blockCount == 0) { - break; - } - extent_info->extent_index = i; - error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context); - if (error) { - printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error); - goto out; - } - } - } - -out: - /* If any blocks were relocated, account them and report progress */ - if (extent_info->blocks_relocated) { - hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated; - hfs_truncatefs_progress(hfsmp); - } - if (extent_info->iterator) { - FREE(extent_info->iterator, M_TEMP); - } - if (extent_info) { - FREE(extent_info, M_TEMP); - } - if (hfs_resize_debug) { - printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error); - } - return error; -} - -/* - * Reclaim any extent-based extended attributes allocation blocks from - * the area of the disk that is being truncated. - * - * The function traverses the attribute btree to find out the fileIDs - * of the extended attributes that need to be relocated. For every - * file whose large EA requires relocation, it looks up the cnode and - * calls hfs_reclaim_xattr() to do all the work for allocating - * new space, copying data, deallocating old space, and if required, - * splitting the extents. - * - * Inputs: - * allocLimit - starting block of the area being reclaimed - * - * Returns: - * returns 0 on success, non-zero on failure. - */ -static int -hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) -{ - int error = 0; - FCB *fcb; - struct BTreeIterator *iterator = NULL; - struct FSBufferDescriptor btdata; - HFSPlusAttrKey *key; - HFSPlusAttrRecord rec; - int lockflags = 0; - cnid_t prev_fileid = 0; - struct vnode *vp; - int need_relocate; - int btree_operation; - u_int32_t files_moved = 0; - u_int32_t prev_blocksmoved; - int i; - - fcb = VTOF(hfsmp->hfs_attribute_vp); - /* Store the value to print total blocks moved by this function in end */ - prev_blocksmoved = hfsmp->hfs_resize_blocksmoved; - - if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { - return ENOMEM; - } - bzero(iterator, sizeof(*iterator)); - key = (HFSPlusAttrKey *)&iterator->key; - btdata.bufferAddress = &rec; - btdata.itemSize = sizeof(rec); - btdata.itemCount = 1; - - need_relocate = false; - btree_operation = kBTreeFirstRecord; - /* Traverse the attribute btree to find extent-based EAs to reclaim */ - while (1) { - lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK); - error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL); - hfs_systemfile_unlock(hfsmp, lockflags); - if (error) { - if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { - error = 0; - } - break; - } - btree_operation = kBTreeNextRecord; - - /* If the extents of current fileID were already relocated, skip it */ - if (prev_fileid == key->fileID) { - continue; - } - - /* Check if any of the extents in the current record need to be relocated */ - need_relocate = false; - switch(rec.recordType) { - case kHFSPlusAttrForkData: - for (i = 0; i < kHFSPlusExtentDensity; i++) { - if (rec.forkData.theFork.extents[i].blockCount == 0) { - break; - } - if ((rec.forkData.theFork.extents[i].startBlock + - rec.forkData.theFork.extents[i].blockCount) > allocLimit) { - need_relocate = true; - break; - } - } - break; - - case kHFSPlusAttrExtents: - for (i = 0; i < kHFSPlusExtentDensity; i++) { - if (rec.overflowExtents.extents[i].blockCount == 0) { - break; - } - if ((rec.overflowExtents.extents[i].startBlock + - rec.overflowExtents.extents[i].blockCount) > allocLimit) { - need_relocate = true; - break; - } - } - break; - }; - - /* Continue iterating to next attribute record */ - if (need_relocate == false) { - continue; - } - - /* Look up the vnode for corresponding file. The cnode - * will be locked which will ensure that no one modifies - * the xattrs when we are relocating them. - * - * We want to allow open-unlinked files to be moved, - * so provide allow_deleted == 1 for hfs_vget(). - */ - if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) { - continue; - } - - error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context); - hfs_unlock(VTOC(vp)); - vnode_put(vp); - if (error) { - printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error); - break; - } - prev_fileid = key->fileID; - files_moved++; - } - - if (files_moved) { - printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n", - (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved), - files_moved, hfsmp->vcbVN); - } - - kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); - return error; -} - -/* - * Reclaim blocks from regular files. - * - * This function iterates over all the record in catalog btree looking - * for files with extents that overlap into the space we're trying to - * free up. If a file extent requires relocation, it looks up the vnode - * and calls function to relocate the data. - * - * Returns: - * Zero on success, non-zero on failure. - */ -static int -hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) -{ - int error; - FCB *fcb; - struct BTreeIterator *iterator = NULL; - struct FSBufferDescriptor btdata; - int btree_operation; - int lockflags; - struct HFSPlusCatalogFile filerec; - struct vnode *vp; - struct vnode *rvp; - struct filefork *datafork; - u_int32_t files_moved = 0; - u_int32_t prev_blocksmoved; - -#if CONFIG_PROTECT - int keys_generated = 0; -#endif - - fcb = VTOF(hfsmp->hfs_catalog_vp); - /* Store the value to print total blocks moved by this function at the end */ - prev_blocksmoved = hfsmp->hfs_resize_blocksmoved; - - if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { - error = ENOMEM; - goto reclaim_filespace_done; - } - -#if CONFIG_PROTECT - /* - * For content-protected filesystems, we may need to relocate files that - * are encrypted. If they use the new-style offset-based IVs, then - * we can move them regardless of the lock state. We create a temporary - * key here that we use to read/write the data, then we discard it at the - * end of the function. - */ - if (cp_fs_protected (hfsmp->hfs_mp)) { - int needs = 0; - error = cp_needs_tempkeys(hfsmp, &needs); - - if ((error == 0) && (needs)) { - error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp); - if (error == 0) { - keys_generated = 1; - } - } - - if (error) { - printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error); - goto reclaim_filespace_done; - } - } - -#endif - - bzero(iterator, sizeof(*iterator)); - - btdata.bufferAddress = &filerec; - btdata.itemSize = sizeof(filerec); - btdata.itemCount = 1; - - btree_operation = kBTreeFirstRecord; - while (1) { - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL); - hfs_systemfile_unlock(hfsmp, lockflags); - if (error) { - if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { - error = 0; - } - break; - } - btree_operation = kBTreeNextRecord; - - if (filerec.recordType != kHFSPlusFileRecord) { - continue; - } - - /* Check if any of the extents require relocation */ - if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) { - continue; - } - - /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */ - if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) { - if (hfs_resize_debug) { - printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID); - } - continue; - } - - /* If data fork exists or item is a directory hard link, relocate blocks */ - datafork = VTOF(vp); - if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) { - error = hfs_reclaim_file(hfsmp, vp, filerec.fileID, - kHFSDataForkType, allocLimit, context); - if (error) { - printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error); - hfs_unlock(VTOC(vp)); - vnode_put(vp); - break; - } - } - - /* If resource fork exists or item is a directory hard link, relocate blocks */ - if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) { - if (vnode_isdir(vp)) { - /* Resource fork vnode lookup is invalid for directory hard link. - * So we fake data fork vnode as resource fork vnode. - */ - rvp = vp; - } else { - error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE); - if (error) { - printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error); - hfs_unlock(VTOC(vp)); - vnode_put(vp); - break; - } - VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT; - } - - error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID, - kHFSResourceForkType, allocLimit, context); - if (error) { - printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error); - hfs_unlock(VTOC(vp)); - vnode_put(vp); - break; - } - } - - /* The file forks were relocated successfully, now drop the - * cnode lock and vnode reference, and continue iterating to - * next catalog record. - */ - hfs_unlock(VTOC(vp)); - vnode_put(vp); - files_moved++; - } - - if (files_moved) { - printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n", - (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved), - files_moved, hfsmp->vcbVN); - } - -reclaim_filespace_done: - if (iterator) { - kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); - } - -#if CONFIG_PROTECT - if (keys_generated) { - cp_entry_destroy(hfsmp->hfs_resize_cpentry); - hfsmp->hfs_resize_cpentry = NULL; - } -#endif - return error; -} - -/* - * Reclaim space at the end of a file system. - * - * Inputs - - * allocLimit - start block of the space being reclaimed - * reclaimblks - number of allocation blocks to reclaim - */ -static int -hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context) -{ - int error = 0; - - /* - * Preflight the bitmap to find out total number of blocks that need - * relocation. - * - * Note: Since allocLimit is set to the location of new alternate volume - * header, the check below does not account for blocks allocated for old - * alternate volume header. - */ - error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks)); - if (error) { - printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error); - return error; - } - if (hfs_resize_debug) { - printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks); - } - - /* Just to be safe, sync the content of the journal to the disk before we proceed */ - hfs_journal_flush(hfsmp, TRUE); - - /* First, relocate journal file blocks if they're in the way. - * Doing this first will make sure that journal relocate code - * gets access to contiguous blocks on disk first. The journal - * file has to be contiguous on the disk, otherwise resize will - * fail. - */ - error = hfs_reclaim_journal_file(hfsmp, allocLimit, context); - if (error) { - printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error); - return error; - } - - /* Relocate journal info block blocks if they're in the way. */ - error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context); - if (error) { - printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error); - return error; - } - - /* Relocate extents of the Extents B-tree if they're in the way. - * Relocating extents btree before other btrees is important as - * this will provide access to largest contiguous block range on - * the disk for relocating extents btree. Note that extents btree - * can only have maximum of 8 extents. - */ - error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID, - kHFSDataForkType, allocLimit, context); - if (error) { - printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error); - return error; - } - - /* Relocate extents of the Allocation file if they're in the way. */ - error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID, - kHFSDataForkType, allocLimit, context); - if (error) { - printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error); - return error; - } - - /* Relocate extents of the Catalog B-tree if they're in the way. */ - error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID, - kHFSDataForkType, allocLimit, context); - if (error) { - printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error); - return error; - } - - /* Relocate extents of the Attributes B-tree if they're in the way. */ - error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID, - kHFSDataForkType, allocLimit, context); - if (error) { - printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error); - return error; - } - - /* Relocate extents of the Startup File if there is one and they're in the way. */ - error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID, - kHFSDataForkType, allocLimit, context); - if (error) { - printf("hfs_reclaimspace: reclaim startup file returned %d\n", error); - return error; - } - - /* - * We need to make sure the alternate volume header gets flushed if we moved - * any extents in the volume header. But we need to do that before - * shrinking the size of the volume, or else the journal code will panic - * with an invalid (too large) block number. - * - * Note that blks_moved will be set if ANY extent was moved, even - * if it was just an overflow extent. In this case, the journal_flush isn't - * strictly required, but shouldn't hurt. - */ - if (hfsmp->hfs_resize_blocksmoved) { - hfs_journal_flush(hfsmp, TRUE); - } - - /* Reclaim extents from catalog file records */ - error = hfs_reclaim_filespace(hfsmp, allocLimit, context); - if (error) { - printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error); - return error; - } - - /* Reclaim extents from extent-based extended attributes, if any */ - error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context); - if (error) { - printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error); - return error; - } - - return error; -} - - -/* - * Check if there are any extents (including overflow extents) that overlap - * into the disk space that is being reclaimed. - * - * Output - - * true - One of the extents need to be relocated - * false - No overflow extents need to be relocated, or there was an error - */ -static int -hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec) -{ - struct BTreeIterator * iterator = NULL; - struct FSBufferDescriptor btdata; - HFSPlusExtentRecord extrec; - HFSPlusExtentKey *extkeyptr; - FCB *fcb; - int overlapped = false; - int i, j; - int error; - int lockflags = 0; - u_int32_t endblock; - - /* Check if data fork overlaps the target space */ - for (i = 0; i < kHFSPlusExtentDensity; ++i) { - if (filerec->dataFork.extents[i].blockCount == 0) { - break; - } - endblock = filerec->dataFork.extents[i].startBlock + - filerec->dataFork.extents[i].blockCount; - if (endblock > allocLimit) { - overlapped = true; - goto out; - } - } - - /* Check if resource fork overlaps the target space */ - for (j = 0; j < kHFSPlusExtentDensity; ++j) { - if (filerec->resourceFork.extents[j].blockCount == 0) { - break; - } - endblock = filerec->resourceFork.extents[j].startBlock + - filerec->resourceFork.extents[j].blockCount; - if (endblock > allocLimit) { - overlapped = true; - goto out; - } - } - - /* Return back if there are no overflow extents for this file */ - if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) { - goto out; - } - - if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { - return 0; - } - bzero(iterator, sizeof(*iterator)); - extkeyptr = (HFSPlusExtentKey *)&iterator->key; - extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength; - extkeyptr->forkType = 0; - extkeyptr->fileID = filerec->fileID; - extkeyptr->startBlock = 0; - - btdata.bufferAddress = &extrec; - btdata.itemSize = sizeof(extrec); - btdata.itemCount = 1; - - fcb = VTOF(hfsmp->hfs_extents_vp); - - lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK); - - /* This will position the iterator just before the first overflow - * extent record for given fileID. It will always return btNotFound, - * so we special case the error code. - */ - error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator); - if (error && (error != btNotFound)) { - goto out; - } - - /* BTIterateRecord() might return error if the btree is empty, and - * therefore we return that the extent does not overflow to the caller - */ - error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); - while (error == 0) { - /* Stop when we encounter a different file. */ - if (extkeyptr->fileID != filerec->fileID) { - break; - } - /* Check if any of the forks exist in the target space. */ - for (i = 0; i < kHFSPlusExtentDensity; ++i) { - if (extrec[i].blockCount == 0) { - break; - } - endblock = extrec[i].startBlock + extrec[i].blockCount; - if (endblock > allocLimit) { - overlapped = true; - goto out; - } - } - /* Look for more records. */ - error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); - } - -out: - if (lockflags) { - hfs_systemfile_unlock(hfsmp, lockflags); - } - if (iterator) { - kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); - } - return overlapped; -} - - -/* - * Calculate the progress of a file system resize operation. - */ -__private_extern__ -int -hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress) -{ - if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) { - return (ENXIO); - } - - if (hfsmp->hfs_resize_totalblocks > 0) { - *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks); - } else { - *progress = 0; - } - - return (0); -} - - -/* - * Creates a UUID from a unique "name" in the HFS UUID Name space. - * See version 3 UUID. - */ -static void -hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result) -{ - MD5_CTX md5c; - uint8_t rawUUID[8]; - - ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6]; - ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7]; - - MD5Init( &md5c ); - MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) ); - MD5Update( &md5c, rawUUID, sizeof (rawUUID) ); - MD5Final( result, &md5c ); - - result[6] = 0x30 | ( result[6] & 0x0F ); - result[8] = 0x80 | ( result[8] & 0x3F ); -} - -/* - * Get file system attributes. - */ -static int -hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context) -{ -#define HFS_ATTR_CMN_VALIDMASK ATTR_CMN_VALIDMASK -#define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST)) -#define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_ACCTIME)) - - ExtendedVCB *vcb = VFSTOVCB(mp); - struct hfsmount *hfsmp = VFSTOHFS(mp); - u_int32_t freeCNIDs; - - int searchfs_on = 0; - int exchangedata_on = 1; - -#if CONFIG_SEARCHFS - searchfs_on = 1; -#endif - -#if CONFIG_PROTECT - if (cp_fs_protected(mp)) { - exchangedata_on = 0; - } -#endif - - freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID; - - VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt); - VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt); - VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt); - VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF); - VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0)); - VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks); - VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0)); - VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1)); - VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize); - /* XXX needs clarification */ - VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1)); - /* Maximum files is constrained by total blocks. */ - VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2)); - VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1))); - - fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev; - fsap->f_fsid.val[1] = vfs_typenum(mp); - VFSATTR_SET_SUPPORTED(fsap, f_fsid); - - VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord); - VFSATTR_RETURN(fsap, f_carbon_fsid, 0); - - if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { - vol_capabilities_attr_t *cap; - - cap = &fsap->f_capabilities; + if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { + vol_capabilities_attr_t *cap; + + cap = &fsap->f_capabilities; if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) { /* HFS+ & variants */ @@ -7828,17 +4542,34 @@ out: * a repair operation fails. The bit should be cleared only from file system * verify/repair utility like fsck_hfs when a verify/repair succeeds. */ -void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp) +__private_extern__ +void hfs_mark_inconsistent(struct hfsmount *hfsmp, + hfs_inconsistency_reason_t reason) { hfs_lock_mount (hfsmp); if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) { hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask; MarkVCBDirty(hfsmp); } - if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) { - /* Log information to ASL log */ - fslog_fs_corrupt(hfsmp->hfs_mp); - printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN); + if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) { + switch (reason) { + case HFS_INCONSISTENCY_DETECTED: + printf("hfs_mark_inconsistent: Runtime corruption detected on %s, fsck will be forced on next mount.\n", + hfsmp->vcbVN); + break; + case HFS_ROLLBACK_FAILED: + printf("hfs_mark_inconsistent: Failed to roll back; volume `%s' might be inconsistent; fsck will be forced on next mount.\n", + hfsmp->vcbVN); + break; + case HFS_OP_INCOMPLETE: + printf("hfs_mark_inconsistent: Failed to complete operation; volume `%s' might be inconsistent; fsck will be forced on next mount.\n", + hfsmp->vcbVN); + break; + case HFS_FSCK_FORCED: + printf("hfs_mark_inconsistent: fsck requested for `%s'; fsck will be forced on next mount.\n", + hfsmp->vcbVN); + break; + } } hfs_unlock_mount (hfsmp); } @@ -7895,6 +4626,44 @@ out: return retval; } + +/* + * Cancel the syncer + */ +static void +hfs_syncer_free(struct hfsmount *hfsmp) +{ + if (hfsmp && hfsmp->hfs_syncer) { + hfs_syncer_lock(hfsmp); + + /* + * First, make sure everything else knows we don't want any more + * requests queued. + */ + thread_call_t syncer = hfsmp->hfs_syncer; + hfsmp->hfs_syncer = NULL; + + hfs_syncer_unlock(hfsmp); + + // Now deal with requests that are outstanding + if (hfsmp->hfs_sync_incomplete) { + if (thread_call_cancel(syncer)) { + // We managed to cancel the timer so we're done + hfsmp->hfs_sync_incomplete = FALSE; + } else { + // Syncer must be running right now so we have to wait + hfs_syncer_lock(hfsmp); + while (hfsmp->hfs_sync_incomplete) + hfs_syncer_wait(hfsmp); + hfs_syncer_unlock(hfsmp); + } + } + + // Now we're safe to free the syncer + thread_call_free(syncer); + } +} + /* * hfs vfs operations. */ diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index 5fe09c2ed..9e2adb7d9 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -71,10 +72,14 @@ #include "hfscommon/headers/BTreesInternal.h" #include "hfscommon/headers/HFSUnicodeWrappers.h" +/* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */ +extern int hfs_resize_debug; + static void ReleaseMetaFileVNode(struct vnode *vp); static int hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args); static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *); +static void hfs_thaw_locked(struct hfsmount *hfsmp); #define HFS_MOUNT_DEBUG 1 @@ -165,8 +170,12 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size); vcb->vcbVBMIOSize = kHFSBlockSize; - hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, + /* Generate the partition-based AVH location */ + hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); + + /* HFS standard is read-only, so just stuff the FS location in here, too */ + hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector; bzero(&cndesc, sizeof(cndesc)); cndesc.cd_parentcnid = kHFSRootParentID; @@ -452,17 +461,73 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* * Validate and initialize the location of the alternate volume header. + * + * Note that there may be spare sectors beyond the end of the filesystem that still + * belong to our partition. */ + spare_sectors = hfsmp->hfs_logical_block_count - (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size); + /* + * Differentiate between "innocuous" spare sectors and the more unusual + * degenerate case: + * + * *** Innocuous spare sectors exist if: + * + * A) the number of bytes assigned to the partition (by multiplying logical + * block size * logical block count) is greater than the filesystem size + * (by multiplying allocation block count and allocation block size) + * + * and + * + * B) the remainder is less than the size of a full allocation block's worth of bytes. + * + * This handles the normal case where there may be a few extra sectors, but the two + * are fundamentally in sync. + * + * *** Degenerate spare sectors exist if: + * A) The number of bytes assigned to the partition (by multiplying logical + * block size * logical block count) is greater than the filesystem size + * (by multiplying allocation block count and block size). + * + * and + * + * B) the remainder is greater than a full allocation's block worth of bytes. + * In this case, a smaller file system exists in a larger partition. + * This can happen in various ways, including when volume is resized but the + * partition is yet to be resized. Under this condition, we have to assume that + * a partition management software may resize the partition to match + * the file system size in the future. Therefore we should update + * alternate volume header at two locations on the disk, + * a. 1024 bytes before end of the partition + * b. 1024 bytes before end of the file system + */ + if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) { - hfsmp->hfs_alt_id_sector = 0; /* partition has grown! */ - } else { - hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + /* + * Handle the degenerate case above. FS < partition size. + * AVH located at 1024 bytes from the end of the partition + */ + hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); + + /* AVH located at 1024 bytes from the end of the filesystem */ + hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, - hfsmp->hfs_logical_block_count); + (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size)); + } + else { + /* Innocuous spare sectors; Partition & FS notion are in sync */ + hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); + + hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector; + } + if (hfs_resize_debug) { + printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n", + hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector); } bzero(&cndesc, sizeof(cndesc)); @@ -867,7 +932,8 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, int cperr = 0; uint16_t majorversion; uint16_t minorversion; - + uint64_t flags; + uint8_t cryptogen = 0; struct cp_root_xattr *xattr = NULL; MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK); if (xattr == NULL) { @@ -895,11 +961,15 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, bzero(xattr, sizeof(struct cp_root_xattr)); xattr->major_version = CP_NEW_MAJOR_VERS; xattr->minor_version = CP_MINOR_VERS; - xattr->flags = 0; cperr = cp_setrootxattr (hfsmp, xattr); } majorversion = xattr->major_version; minorversion = xattr->minor_version; + flags = xattr->flags; + if (xattr->flags & CP_ROOT_CRYPTOG1) { + cryptogen = 1; + } + if (xattr) { FREE(xattr, M_TEMP); } @@ -909,15 +979,23 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* If we got here, then the CP version is valid. Set it in the mount point */ hfsmp->hfs_running_cp_major_vers = majorversion; printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion); + hfsmp->cproot_flags = flags; + hfsmp->cp_crypto_generation = cryptogen; /* - * Acquire the boot-arg for the AKS default key. + * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree. * Ensure that the boot-arg's value is valid for FILES (not directories), * since only files are actually protected for now. */ + PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); + + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { + PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); + } + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { - hfsmp->default_cp_class = PROTECTION_CLASS_D; + hfsmp->default_cp_class = PROTECTION_CLASS_C; } } else { @@ -1085,8 +1163,7 @@ hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p) * zero - overflow extents do not exist */ __private_extern__ -int -overflow_extents(struct filefork *fp) +bool overflow_extents(struct filefork *fp) { u_int32_t blocks; @@ -1096,29 +1173,42 @@ overflow_extents(struct filefork *fp) // and therefore it has to be an HFS+ volume. Otherwise // we check through the volume header to see what type // of volume we're on. - // - if (FTOV(fp) == NULL || VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) { - if (fp->ff_extents[7].blockCount == 0) - return (0); + // - blocks = fp->ff_extents[0].blockCount + - fp->ff_extents[1].blockCount + - fp->ff_extents[2].blockCount + - fp->ff_extents[3].blockCount + - fp->ff_extents[4].blockCount + - fp->ff_extents[5].blockCount + - fp->ff_extents[6].blockCount + - fp->ff_extents[7].blockCount; - } else { +#if CONFIG_HFS_STD + if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) { if (fp->ff_extents[2].blockCount == 0) return false; - + blocks = fp->ff_extents[0].blockCount + - fp->ff_extents[1].blockCount + - fp->ff_extents[2].blockCount; - } + fp->ff_extents[1].blockCount + + fp->ff_extents[2].blockCount; - return (fp->ff_blocks > blocks); + return fp->ff_blocks > blocks; + } +#endif + + if (fp->ff_extents[7].blockCount == 0) + return false; + + blocks = fp->ff_extents[0].blockCount + + fp->ff_extents[1].blockCount + + fp->ff_extents[2].blockCount + + fp->ff_extents[3].blockCount + + fp->ff_extents[4].blockCount + + fp->ff_extents[5].blockCount + + fp->ff_extents[6].blockCount + + fp->ff_extents[7].blockCount; + + return fp->ff_blocks > blocks; +} + +static __attribute__((pure)) +boolean_t hfs_is_frozen(struct hfsmount *hfsmp) +{ + return (hfsmp->hfs_freeze_state == HFS_FROZEN + || (hfsmp->hfs_freeze_state == HFS_FREEZING + && current_thread() != hfsmp->hfs_freezing_thread)); } /* @@ -1127,23 +1217,62 @@ overflow_extents(struct filefork *fp) int hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype) { - void *thread = current_thread(); + thread_t thread = current_thread(); if (hfsmp->hfs_global_lockowner == thread) { panic ("hfs_lock_global: locking against myself!"); } - /* HFS_SHARED_LOCK */ + /* + * This check isn't really necessary but this stops us taking + * the mount lock in most cases. The essential check is below. + */ + if (hfs_is_frozen(hfsmp)) { + /* + * Unfortunately, there is no easy way of getting a notification + * for when a process is exiting and it's possible for the exiting + * process to get blocked somewhere else. To catch this, we + * periodically monitor the frozen process here and thaw if + * we spot that it's exiting. + */ +frozen: + hfs_lock_mount(hfsmp); + + struct timespec ts = { 0, 500 * NSEC_PER_MSEC }; + + while (hfs_is_frozen(hfsmp)) { + if (hfsmp->hfs_freeze_state == HFS_FROZEN + && proc_exiting(hfsmp->hfs_freezing_proc)) { + hfs_thaw_locked(hfsmp); + break; + } + + msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex, + PWAIT, "hfs_lock_global (frozen)", &ts); + } + hfs_unlock_mount(hfsmp); + } + + /* HFS_SHARED_LOCK */ if (locktype == HFS_SHARED_LOCK) { lck_rw_lock_shared (&hfsmp->hfs_global_lock); hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER; } - /* HFS_EXCLUSIVE_LOCK */ + /* HFS_EXCLUSIVE_LOCK */ else { lck_rw_lock_exclusive (&hfsmp->hfs_global_lock); hfsmp->hfs_global_lockowner = thread; } + /* + * We have to check if we're frozen again because of the time + * between when we checked and when we took the global lock. + */ + if (hfs_is_frozen(hfsmp)) { + hfs_unlock_global(hfsmp); + goto frozen; + } + return 0; } @@ -1153,16 +1282,15 @@ hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype) */ void hfs_unlock_global (struct hfsmount *hfsmp) -{ - - void *thread = current_thread(); +{ + thread_t thread = current_thread(); - /* HFS_LOCK_EXCLUSIVE */ + /* HFS_LOCK_EXCLUSIVE */ if (hfsmp->hfs_global_lockowner == thread) { hfsmp->hfs_global_lockowner = NULL; lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock); } - /* HFS_LOCK_SHARED */ + /* HFS_LOCK_SHARED */ else { lck_rw_unlock_shared (&hfsmp->hfs_global_lock); } @@ -1303,6 +1431,24 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp */ if (hfsmp->hfs_extents_cp) { (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + + if (hfsmp->hfs_mp->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * because we may need this lock on the pageout path (if a swapfile allocation + * spills into the extents overflow tree), we will grant the holder of this + * lock the privilege of dipping into the reserve free pool in order to prevent + * a deadlock from occurring if we need those pageouts to complete before we + * will make any new pages available on the free list... the deadlock can occur + * if this thread needs to allocate memory while this lock is held + */ + if (set_vm_privilege(TRUE) == FALSE) { + /* + * indicate that we need to drop vm_privilege + * when we unlock + */ + flags |= SFL_VM_PRIV; + } + } } else { flags &= ~SFL_EXTENTS; } @@ -1366,6 +1512,14 @@ hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags) } } hfs_unlock(hfsmp->hfs_extents_cp); + + if (flags & SFL_VM_PRIV) { + /* + * revoke the vm_privilege we granted this thread + * now that we have unlocked the overflow extents + */ + set_vm_privilege(FALSE); + } } } @@ -1387,7 +1541,7 @@ void RequireFileLock(FileReference vp, int shareable) shareable = 0; } - locked = VTOC(vp)->c_lockowner == (void *)current_thread(); + locked = VTOC(vp)->c_lockowner == current_thread(); if (!locked && !shareable) { switch (VTOC(vp)->c_fileid) { @@ -1654,7 +1808,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp) cnode.c_rsrcfork = NULL; fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize; while (fsize > 0) { - if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) { + if (fsize > HFS_BIGFILE_SIZE) { fsize -= HFS_BIGFILE_SIZE; } else { fsize = 0; @@ -1808,6 +1962,81 @@ u_int32_t logBlockSize; return logBlockSize; } +#if HFS_SPARSE_DEV +static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks) +{ + struct vfsstatfs *vfsp; /* 272 bytes */ + uint64_t vfreeblks; + struct timeval now; + + hfs_lock_mount(hfsmp); + + vnode_t backing_vp = hfsmp->hfs_backingfs_rootvp; + if (!backing_vp) { + hfs_unlock_mount(hfsmp); + return false; + } + + // usecount is not enough; we need iocount + if (vnode_get(backing_vp)) { + hfs_unlock_mount(hfsmp); + *pfree_blks = 0; + return true; + } + + uint32_t loanedblks = hfsmp->loanedBlocks; + uint32_t bandblks = hfsmp->hfs_sparsebandblks; + uint64_t maxblks = hfsmp->hfs_backingfs_maxblocks; + + hfs_unlock_mount(hfsmp); + + mount_t backingfs_mp = vnode_mount(backing_vp); + + microtime(&now); + if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) { + vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT); + hfsmp->hfs_last_backingstatfs = now.tv_sec; + } + + if (!(vfsp = vfs_statfs(backingfs_mp))) { + vnode_put(backing_vp); + return false; + } + + vfreeblks = vfsp->f_bavail; + /* Normalize block count if needed. */ + if (vfsp->f_bsize != hfsmp->blockSize) + vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize; + if (vfreeblks > bandblks) + vfreeblks -= bandblks; + else + vfreeblks = 0; + + /* + * Take into account any delayed allocations. It is not + * certain what the original reason for the "2 *" is. Most + * likely it is to allow for additional requirements in the + * host file system and metadata required by disk images. The + * number of loaned blocks is likely to be small and we will + * stop using them as we get close to the limit. + */ + loanedblks = 2 * loanedblks; + if (vfreeblks > loanedblks) + vfreeblks -= loanedblks; + else + vfreeblks = 0; + + if (maxblks) + vfreeblks = MIN(vfreeblks, maxblks); + + vnode_put(backing_vp); + + *pfree_blks = vfreeblks; + + return true; +} +#endif + u_int32_t hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) { @@ -1840,48 +2069,11 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) * When the underlying device is sparse, check the * available space on the backing store volume. */ - if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) { - struct vfsstatfs *vfsp; /* 272 bytes */ - u_int64_t vfreeblks; - u_int32_t loanedblks; - struct mount * backingfs_mp; - struct timeval now; - - backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp); - - microtime(&now); - if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) { - vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT); - hfsmp->hfs_last_backingstatfs = now.tv_sec; - } - - if ((vfsp = vfs_statfs(backingfs_mp))) { - hfs_lock_mount (hfsmp); - vfreeblks = vfsp->f_bavail; - /* Normalize block count if needed. */ - if (vfsp->f_bsize != hfsmp->blockSize) { - vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize; - } - if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks) - vfreeblks -= hfsmp->hfs_sparsebandblks; - else - vfreeblks = 0; - - /* Take into account any delayed allocations. */ - loanedblks = 2 * hfsmp->loanedBlocks; - if (vfreeblks > loanedblks) - vfreeblks -= loanedblks; - else - vfreeblks = 0; - - if (hfsmp->hfs_backingfs_maxblocks) { - vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks); - } - freeblks = MIN(vfreeblks, freeblks); - hfs_unlock_mount (hfsmp); - } - } + uint64_t vfreeblks; + if (hfs_get_backing_free_blks(hfsmp, &vfreeblks)) + freeblks = MIN(freeblks, vfreeblks); #endif /* HFS_SPARSE_DEV */ + if (hfsmp->hfs_flags & HFS_CS) { uint64_t cs_free_bytes; uint64_t cs_free_blks; @@ -3049,7 +3241,7 @@ void hfs_syncer_queue(thread_call_t syncer) hfs_usecs_to_deadline(HFS_META_DELAY), 0, THREAD_CALL_DELAY_SYS_BACKGROUND)) { - printf ("hfs: syncer already scheduled!"); + printf("hfs: syncer already scheduled!\n"); } } @@ -3089,8 +3281,8 @@ hfs_sync_ejectable(struct hfsmount *hfsmp) int hfs_start_transaction(struct hfsmount *hfsmp) { - int ret, unlock_on_err=0; - void * thread = current_thread(); + int ret = 0, unlock_on_err = 0; + thread_t thread = current_thread(); #ifdef HFS_CHECK_LOCK_ORDER /* @@ -3111,17 +3303,28 @@ hfs_start_transaction(struct hfsmount *hfsmp) #endif /* HFS_CHECK_LOCK_ORDER */ if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) { + /* + * The global lock should be held shared if journal is + * active to prevent disabling. If we're not the owner + * of the journal lock, verify that we're not already + * holding the global lock exclusive before moving on. + */ + if (hfsmp->hfs_global_lockowner == thread) { + ret = EBUSY; + goto out; + } + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); unlock_on_err = 1; } /* If a downgrade to read-only mount is in progress, no other - * process than the downgrade process is allowed to modify + * thread than the downgrade thread is allowed to modify * the file system. */ if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) && - (hfsmp->hfs_downgrading_proc != thread)) { + hfsmp->hfs_downgrading_thread != thread) { ret = EROFS; goto out; } @@ -3170,6 +3373,28 @@ hfs_end_transaction(struct hfsmount *hfsmp) } +void +hfs_journal_lock(struct hfsmount *hfsmp) +{ + /* Only peek at hfsmp->jnl while holding the global lock */ + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + if (hfsmp->jnl) { + journal_lock(hfsmp->jnl); + } + hfs_unlock_global (hfsmp); +} + +void +hfs_journal_unlock(struct hfsmount *hfsmp) +{ + /* Only peek at hfsmp->jnl while holding the global lock */ + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + if (hfsmp->jnl) { + journal_unlock(hfsmp->jnl); + } + hfs_unlock_global (hfsmp); +} + /* * Flush the contents of the journal to the disk. * @@ -3292,7 +3517,7 @@ extern time_t snapshot_timestamp; int check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg) { - int tracked_error = 0, snapshot_error = 0; + int snapshot_error = 0; if (vp == NULL) { return 0; @@ -3303,23 +3528,6 @@ check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *a return 0; } - if (VTOC(vp)->c_bsdflags & UF_TRACKED) { - // the file has the tracked bit set, so send an event to the tracked-file handler - int error; - - // printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp); - error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT); - if (error) { - if (error == EAGAIN) { - printf("hfs: tracked-file: timed out waiting for namespace handler...\n"); - - } else if (error == EINTR) { - // printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n"); - tracked_error = EINTR; - } - } - } - if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) { // the change time is within this epoch int error; @@ -3337,7 +3545,6 @@ check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *a } } - if (tracked_error) return tracked_error; if (snapshot_error) return snapshot_error; return 0; @@ -3440,3 +3647,166 @@ hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid) return 0; } + + +/* + * Return information about number of file system allocation blocks + * taken by metadata on a volume. + * + * This function populates struct hfsinfo_metadata with allocation blocks + * used by extents overflow btree, catalog btree, bitmap, attribute btree, + * journal file, and sum of all of the above. + */ +int +hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo) +{ + int lockflags = 0; + int ret_lockflags = 0; + + /* Zero out the output buffer */ + bzero(hinfo, sizeof(struct hfsinfo_metadata)); + + /* + * Getting number of allocation blocks for all btrees + * should be a quick operation, so we grab locks for + * all of them at the same time + */ + lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE; + ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + /* + * Make sure that we were able to acquire all locks requested + * to protect us against conditions like unmount in progress. + */ + if ((lockflags & ret_lockflags) != lockflags) { + /* Release any locks that were acquired */ + hfs_systemfile_unlock(hfsmp, ret_lockflags); + return EPERM; + } + + /* Get information about all the btrees */ + hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks; + hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks; + hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks; + hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks; + + /* Done with btrees, give up the locks */ + hfs_systemfile_unlock(hfsmp, ret_lockflags); + + /* Get information about journal file */ + hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize); + + /* Calculate total number of metadata blocks */ + hinfo->total = hinfo->extents + hinfo->catalog + + hinfo->allocation + hinfo->attribute + + hinfo->journal; + + return 0; +} + +static int +hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs) +{ + vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8"); + + return 0; +} + +__private_extern__ +int hfs_freeze(struct hfsmount *hfsmp) +{ + // First make sure some other process isn't freezing + hfs_lock_mount(hfsmp); + while (hfsmp->hfs_freeze_state != HFS_THAWED) { + if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex, + PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) { + hfs_unlock_mount(hfsmp); + return EINTR; + } + } + + // Stop new syncers from starting + hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE; + + // Now wait for all syncers to finish + while (hfsmp->hfs_syncers) { + if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex, + PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) { + hfs_thaw_locked(hfsmp); + hfs_unlock_mount(hfsmp); + return EINTR; + } + } + hfs_unlock_mount(hfsmp); + + // flush things before we get started to try and prevent + // dirty data from being paged out while we're frozen. + // note: we can't do this once we're in the freezing state because + // other threads will need to take the global lock + vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL); + + // Block everything in hfs_lock_global now + hfs_lock_mount(hfsmp); + hfsmp->hfs_freeze_state = HFS_FREEZING; + hfsmp->hfs_freezing_thread = current_thread(); + hfs_unlock_mount(hfsmp); + + /* Take the exclusive lock to flush out anything else that + might have the global lock at the moment and also so we + can flush the journal. */ + hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK); + journal_flush(hfsmp->jnl, TRUE); + hfs_unlock_global(hfsmp); + + // don't need to iterate on all vnodes, we just need to + // wait for writes to the system files and the device vnode + // + // Now that journal flush waits for all metadata blocks to + // be written out, waiting for btree writes is probably no + // longer required. + if (HFSTOVCB(hfsmp)->extentsRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3"); + if (HFSTOVCB(hfsmp)->catalogRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4"); + if (HFSTOVCB(hfsmp)->allocationsRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5"); + if (hfsmp->hfs_attribute_vp) + vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6"); + vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7"); + + // We're done, mark frozen + hfs_lock_mount(hfsmp); + hfsmp->hfs_freeze_state = HFS_FROZEN; + hfsmp->hfs_freezing_proc = current_proc(); + hfs_unlock_mount(hfsmp); + + return 0; +} + +__private_extern__ +int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process) +{ + hfs_lock_mount(hfsmp); + + if (hfsmp->hfs_freeze_state != HFS_FROZEN) { + hfs_unlock_mount(hfsmp); + return EINVAL; + } + if (process && hfsmp->hfs_freezing_proc != process) { + hfs_unlock_mount(hfsmp); + return EPERM; + } + + hfs_thaw_locked(hfsmp); + + hfs_unlock_mount(hfsmp); + + return 0; +} + +static void hfs_thaw_locked(struct hfsmount *hfsmp) +{ + hfsmp->hfs_freezing_proc = NULL; + hfsmp->hfs_freeze_state = HFS_THAWED; + + wakeup(&hfsmp->hfs_freeze_state); +} diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 414d6de78..5bfc09c3e 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,6 +26,7 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include #include #include #include @@ -60,6 +61,7 @@ #include #include +#include #include "hfs.h" #include "hfs_catalog.h" @@ -68,6 +70,7 @@ #include "hfs_mount.h" #include "hfs_quota.h" #include "hfs_endian.h" +#include "hfs_kdebug.h" #include "hfscommon/headers/BTreesInternal.h" #include "hfscommon/headers/FileMgrInternal.h" @@ -95,9 +98,23 @@ int hfs_removefile(struct vnode *, struct vnode *, struct componentname *, /* Used here and in cnode teardown -- for symlinks */ int hfs_removefile_callback(struct buf *bp, void *hfsmp); -int hfs_movedata (struct vnode *, struct vnode*); -static int hfs_move_fork (struct filefork *srcfork, struct cnode *src, - struct filefork *dstfork, struct cnode *dst); +enum { + HFS_MOVE_DATA_INCLUDE_RSRC = 1, +}; +typedef uint32_t hfs_move_data_options_t; + +static int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, + hfs_move_data_options_t options); +static int hfs_move_fork(filefork_t *srcfork, cnode_t *src, + filefork_t *dstfork, cnode_t *dst); +static const struct cat_fork * +hfs_prepare_fork_for_update(const filefork_t *pfork, + struct cat_fork *fork_buf, + uint32_t block_size); + +#if HFS_COMPRESSION +static int hfs_move_compressed(cnode_t *from_vp, cnode_t *to_vp); +#endif decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp); @@ -125,7 +142,6 @@ int hfs_vnop_symlink(struct vnop_symlink_args*); int hfs_vnop_setattr(struct vnop_setattr_args*); int hfs_vnop_readlink(struct vnop_readlink_args *); int hfs_vnop_pathconf(struct vnop_pathconf_args *); -int hfs_vnop_whiteout(struct vnop_whiteout_args *); int hfs_vnop_mmap(struct vnop_mmap_args *ap); int hfsspec_read(struct vnop_read_args *); int hfsspec_write(struct vnop_write_args *); @@ -169,42 +185,20 @@ hfs_is_journal_file(struct hfsmount *hfsmp, struct cnode *cp) int hfs_vnop_create(struct vnop_create_args *ap) { - int error; - -again: - error = hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context); - /* - * We speculatively skipped the original lookup of the leaf - * for CREATE. Since it exists, go get it as long as they - * didn't want an exclusive create. + * We leave handling of certain race conditions here to the caller + * which will have a better understanding of the semantics it + * requires. For example, if it turns out that the file exists, + * it would be wrong of us to return a reference to the existing + * file because the caller might not want that and it would be + * misleading to suggest the file had been created when it hadn't + * been. Note that our NFS server code does not set the + * VA_EXCLUSIVE flag so you cannot assume that callers don't want + * EEXIST errors if it's not set. The common case, where users + * are calling open with the O_CREAT mode, is handled in VFS; when + * we return EEXIST, it will loop and do the look-up again. */ - if ((error == EEXIST) && !(ap->a_vap->va_vaflags & VA_EXCLUSIVE)) { - struct vnop_lookup_args args; - - args.a_desc = &vnop_lookup_desc; - args.a_dvp = ap->a_dvp; - args.a_vpp = ap->a_vpp; - args.a_cnp = ap->a_cnp; - args.a_context = ap->a_context; - args.a_cnp->cn_nameiop = LOOKUP; - error = hfs_vnop_lookup(&args); - /* - * We can also race with remove for this file. - */ - if (error == ENOENT) { - goto again; - } - - /* Make sure it was file. */ - if ((error == 0) && !vnode_isreg(*args.a_vpp)) { - vnode_put(*args.a_vpp); - *args.a_vpp = NULLVP; - error = EEXIST; - } - args.a_cnp->cn_nameiop = CREATE; - } - return (error); + return hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context); } /* @@ -482,7 +476,7 @@ get_uthread_doc_tombstone(void) // where we have the necessary info. // static void -clear_tombstone_docid(struct doc_tombstone *ut, struct hfsmount *hfsmp, struct cnode *dst_cnode) +clear_tombstone_docid(struct doc_tombstone *ut, __unused struct hfsmount *hfsmp, struct cnode *dst_cnode) { uint32_t old_id = ut->t_lastop_document_id; @@ -821,7 +815,8 @@ hfs_vnop_close(ap) // now we can truncate the file, if necessary blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize); if (blks < VTOF(vp)->ff_blocks){ - (void) hfs_truncate(vp, VTOF(vp)->ff_size, IO_NDELAY, 0, 0, ap->a_context); + (void) hfs_truncate(vp, VTOF(vp)->ff_size, IO_NDELAY, + 0, ap->a_context); } } } @@ -830,10 +825,9 @@ hfs_vnop_close(ap) // if we froze the fs and we're exiting, then "thaw" the fs - if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) { - hfsmp->hfs_freezing_proc = NULL; - hfs_unlock_global (hfsmp); - lck_rw_unlock_exclusive(&hfsmp->hfs_insync); + if (hfsmp->hfs_freeze_state == HFS_FROZEN + && hfsmp->hfs_freezing_proc == p && proc_exiting(p)) { + hfs_thaw(hfsmp, p); } busy = vnode_isinuse(vp, 1); @@ -859,6 +853,14 @@ hfs_vnop_close(ap) return (0); } +static bool hfs_should_generate_document_id(hfsmount_t *hfsmp, cnode_t *cp) +{ + return (!ISSET(hfsmp->hfs_flags, HFS_READ_ONLY) + && ISSET(cp->c_bsdflags, UF_TRACKED) + && cp->c_desc.cd_cnid != kHFSRootFolderID + && (S_ISDIR(cp->c_mode) || S_ISREG(cp->c_mode) || S_ISLNK(cp->c_mode))); +} + /* * Get basic attributes. */ @@ -936,6 +938,65 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) hfsmp = VTOHFS(vp); v_type = vnode_vtype(vp); + + if (VATTR_IS_ACTIVE(vap, va_document_id)) { + uint32_t document_id; + + if (cp->c_desc.cd_cnid == kHFSRootFolderID) + document_id = kHFSRootFolderID; + else { + /* + * This is safe without a lock because we're just reading + * a 32 bit aligned integer which should be atomic on all + * platforms we support. + */ + document_id = hfs_get_document_id(cp); + + if (!document_id && hfs_should_generate_document_id(hfsmp, cp)) { + uint32_t new_document_id; + + error = hfs_generate_document_id(hfsmp, &new_document_id); + if (error) + return error; + + error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error) + return error; + + bool want_docid_fsevent = false; + + // Need to check again now that we have the lock + document_id = hfs_get_document_id(cp); + if (!document_id && hfs_should_generate_document_id(hfsmp, cp)) { + cp->c_attr.ca_finderextendeddirinfo.document_id = document_id = new_document_id; + want_docid_fsevent = true; + SET(cp->c_flag, C_MODIFIED); + } + + hfs_unlock(cp); + + if (want_docid_fsevent) { +#if CONFIG_FSE + add_fsevent(FSE_DOCID_CHANGED, ap->a_context, + FSE_ARG_DEV, hfsmp->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)0, // src inode # + FSE_ARG_INO, (ino64_t)cp->c_fileid, // dst inode # + FSE_ARG_INT32, document_id, + FSE_ARG_DONE); + + if (need_fsevent(FSE_STAT_CHANGED, vp)) { + add_fsevent(FSE_STAT_CHANGED, ap->a_context, + FSE_ARG_VNODE, vp, FSE_ARG_DONE); + } +#endif + } + } + } + + vap->va_document_id = document_id; + VATTR_SET_SUPPORTED(vap, va_document_id); + } + /* * If time attributes are requested and we have cnode times * that require updating, then acquire an exclusive lock on @@ -1065,7 +1126,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) if (cp->c_blocks - VTOF(vp)->ff_blocks) { /* We deal with rsrc fork vnode iocount at the end of the function */ - error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE); + error = hfs_vgetrsrc(hfsmp, vp, &rvp); if (error) { /* * Note that we call hfs_vgetrsrc with error_on_unlinked @@ -1203,26 +1264,27 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) vap->va_supported |= VNODE_ATTR_va_data_size; #endif - if (VATTR_IS_ACTIVE(vap, va_gen)) { - if (UBCINFOEXISTS(vp) && (vp->v_ubcinfo->ui_flags & UI_ISMAPPED)) { - /* While file is mmapped the generation count is invalid. - * However, bump the value so that the write-gen counter - * will be different once the file is unmapped (since, - * when unmapped the pageouts may not yet have happened) - */ - if (vp->v_ubcinfo->ui_flags & UI_MAPPEDWRITE) { - hfs_incr_gencount (cp); - } - vap->va_gen = 0; +#if CONFIG_PROTECT + if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) { + vap->va_dataprotect_class = cp->c_cpentry ? cp->c_cpentry->cp_pclass : 0; + VATTR_SET_SUPPORTED(vap, va_dataprotect_class); + } +#endif + if (VATTR_IS_ACTIVE(vap, va_write_gencount)) { + if (ubc_is_mapped_writable(vp)) { + /* + * Return 0 to the caller to indicate the file may be + * changing. There is no need for us to increment the + * generation counter here because it gets done as part of + * page-out and also when the file is unmapped (to account + * for changes we might not have seen). + */ + vap->va_write_gencount = 0; } else { - vap->va_gen = hfs_get_gencount(cp); + vap->va_write_gencount = hfs_get_gencount(cp); } - - VATTR_SET_SUPPORTED(vap, va_gen); - } - if (VATTR_IS_ACTIVE(vap, va_document_id)) { - vap->va_document_id = hfs_get_document_id(cp); - VATTR_SET_SUPPORTED(vap, va_document_id); + + VATTR_SET_SUPPORTED(vap, va_write_gencount); } /* Mark them all at once instead of individual VATTR_SET_SUPPORTED calls. */ @@ -1428,29 +1490,19 @@ hfs_vnop_setattr(ap) } #endif - /* Take truncate lock before taking cnode lock. */ + // Take truncate lock hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); - - /* Perform the ubc_setsize before taking the cnode lock. */ - ubc_setsize(vp, vap->va_data_size); - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { - hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT); -#if HFS_COMPRESSION - decmpfs_unlock_compressed_data(dp, 1); -#endif - return (error); - } - cp = VTOC(vp); + // hfs_truncate will deal with the cnode lock + error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, + 0, ap->a_context); - error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 1, 0, ap->a_context); - - hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); + hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT); #if HFS_COMPRESSION decmpfs_unlock_compressed_data(dp, 1); #endif if (error) - goto out; + return error; } if (cp == NULL) { if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) @@ -1600,6 +1652,8 @@ hfs_vnop_setattr(ap) cp->c_touch_modtime = FALSE; cp->c_touch_chgtime = TRUE; + hfs_clear_might_be_dirty_flag(cp); + /* * The utimes system call can reset the modification * time but it doesn't know about HFS create times. @@ -1868,6 +1922,47 @@ good: return (0); } +#if HFS_COMPRESSION +/* + * Flush the resource fork if it exists. vp is the data fork and has + * an iocount. + */ +static int hfs_flush_rsrc(vnode_t vp, vfs_context_t ctx) +{ + cnode_t *cp = VTOC(vp); + + hfs_lock(cp, HFS_SHARED_LOCK, 0); + + vnode_t rvp = cp->c_rsrc_vp; + + if (!rvp) { + hfs_unlock(cp); + return 0; + } + + int vid = vnode_vid(rvp); + + hfs_unlock(cp); + + int error = vnode_getwithvid(rvp, vid); + + if (error) + return error == ENOENT ? 0 : error; + + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, 0); + hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); + hfs_filedone(rvp, ctx, HFS_FILE_DONE_NO_SYNC); + hfs_unlock(cp); + hfs_unlock_truncate(cp, 0); + + error = ubc_msync(rvp, 0, ubc_getsize(rvp), NULL, + UBC_PUSHALL | UBC_SYNC); + + vnode_put(rvp); + + return error; +} +#endif // HFS_COMPRESSION /* * hfs_vnop_exchange: @@ -1917,6 +2012,7 @@ hfs_vnop_exchange(ap) int error = 0, started_tr = 0, got_cookie = 0; cat_cookie_t cookie; time_t orig_from_ctime, orig_to_ctime; + bool have_cnode_locks = false, have_from_trunc_lock = false, have_to_trunc_lock = false; /* * VFS does the following checks: @@ -1925,9 +2021,12 @@ hfs_vnop_exchange(ap) * 3. Validate that they're not the same vnode. */ - orig_from_ctime = VTOC(from_vp)->c_ctime; - orig_to_ctime = VTOC(to_vp)->c_ctime; + from_cp = VTOC(from_vp); + to_cp = VTOC(to_vp); + hfsmp = VTOHFS(from_vp); + orig_from_ctime = from_cp->c_ctime; + orig_to_ctime = to_cp->c_ctime; #if CONFIG_PROTECT /* @@ -1941,19 +2040,25 @@ hfs_vnop_exchange(ap) #endif #if HFS_COMPRESSION - if ( hfs_file_is_compressed(VTOC(from_vp), 0) ) { - if ( 0 != ( error = decmpfs_decompress_file(from_vp, VTOCMP(from_vp), -1, 0, 1) ) ) { - return error; + if (!ISSET(ap->a_options, FSOPT_EXCHANGE_DATA_ONLY)) { + if ( hfs_file_is_compressed(from_cp, 0) ) { + if ( 0 != ( error = decmpfs_decompress_file(from_vp, VTOCMP(from_vp), -1, 0, 1) ) ) { + return error; + } } - } - - if ( hfs_file_is_compressed(VTOC(to_vp), 0) ) { - if ( 0 != ( error = decmpfs_decompress_file(to_vp, VTOCMP(to_vp), -1, 0, 1) ) ) { - return error; + + if ( hfs_file_is_compressed(to_cp, 0) ) { + if ( 0 != ( error = decmpfs_decompress_file(to_vp, VTOCMP(to_vp), -1, 0, 1) ) ) { + return error; + } } } #endif // HFS_COMPRESSION - + + // Resource forks cannot be exchanged. + if (VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) + return EINVAL; + /* * Normally, we want to notify the user handlers about the event, * except if it's a handler driving the event. @@ -1962,69 +2067,100 @@ hfs_vnop_exchange(ap) check_for_tracked_file(from_vp, orig_from_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); check_for_tracked_file(to_vp, orig_to_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); } else { - /* - * We're doing a data-swap. - * Take the truncate lock/cnode lock, then verify there are no mmap references. - * Issue a hfs_filedone to flush out all of the remaining state for this file. - * Allow the rest of the codeflow to re-acquire the cnode locks in order. + /* + * This is currently used by mtmd so we should tidy up the + * file now because the data won't be used again in the + * destination file. */ - - hfs_lock_truncate (VTOC(from_vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); - - if ((error = hfs_lock(VTOC(from_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { - hfs_unlock_truncate (VTOC(from_vp), HFS_LOCK_DEFAULT); - return error; - } + hfs_lock_truncate(from_cp, HFS_EXCLUSIVE_LOCK, 0); + hfs_lock_always(from_cp, HFS_EXCLUSIVE_LOCK); + hfs_filedone(from_vp, ap->a_context, HFS_FILE_DONE_NO_SYNC); + hfs_unlock(from_cp); + hfs_unlock_truncate(from_cp, 0); + + // Flush all the data from the source file + error = ubc_msync(from_vp, 0, ubc_getsize(from_vp), NULL, + UBC_PUSHALL | UBC_SYNC); + if (error) + goto exit; - /* Verify the source file is not in use by anyone besides us (including mmap refs) */ - if (vnode_isinuse(from_vp, 1)) { - error = EBUSY; - hfs_unlock(VTOC(from_vp)); - hfs_unlock_truncate (VTOC(from_vp), HFS_LOCK_DEFAULT); - return error; +#if HFS_COMPRESSION + /* + * If this is a compressed file, we need to do the same for + * the resource fork. + */ + if (ISSET(from_cp->c_bsdflags, UF_COMPRESSED)) { + error = hfs_flush_rsrc(from_vp, ap->a_context); + if (error) + goto exit; } +#endif - /* Flush out the data in the source file */ - VTOC(from_vp)->c_flag |= C_SWAPINPROGRESS; - error = hfs_filedone (from_vp, ap->a_context); - VTOC(from_vp)->c_flag &= ~C_SWAPINPROGRESS; - hfs_unlock(VTOC(from_vp)); - hfs_unlock_truncate(VTOC(from_vp), HFS_LOCK_DEFAULT); + /* + * We're doing a data-swap so we need to take the truncate + * lock exclusively. We need an exclusive lock because we + * will be completely truncating the source file and we must + * make sure nobody else sneaks in and trys to issue I/O + * whilst we don't have the cnode lock. + * + * After taking the truncate lock we do a quick check to + * verify there are no other references (including mmap + * references), but we must remember that this does not stop + * anybody coming in later and taking a reference. We will + * have the truncate lock exclusively so that will prevent + * them from issuing any I/O. + */ - if (error) { - return error; + if (to_cp < from_cp) { + hfs_lock_truncate(to_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + have_to_trunc_lock = true; } - } - if ((error = hfs_lockpair(VTOC(from_vp), VTOC(to_vp), HFS_EXCLUSIVE_LOCK))) - return (error); + hfs_lock_truncate(from_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + have_from_trunc_lock = true; - from_cp = VTOC(from_vp); - to_cp = VTOC(to_vp); - hfsmp = VTOHFS(from_vp); + /* + * Do an early check to verify the source is not in use by + * anyone. We should be called from an FD opened as F_EVTONLY + * so that doesn't count as a reference. + */ + if (vnode_isinuse(from_vp, 0)) { + error = EBUSY; + goto exit; + } - /* Resource forks cannot be exchanged. */ - if ( VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) { - error = EINVAL; - goto exit; + if (to_cp >= from_cp) { + hfs_lock_truncate(to_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + have_to_trunc_lock = true; + } } + if ((error = hfs_lockpair(from_cp, to_cp, HFS_EXCLUSIVE_LOCK))) + goto exit; + have_cnode_locks = true; + // Don't allow modification of the journal or journal_info_block if (hfs_is_journal_file(hfsmp, from_cp) || hfs_is_journal_file(hfsmp, to_cp)) { error = EPERM; goto exit; } - + /* * Ok, now that all of the pre-flighting is done, call the underlying * function if needed. */ - if (ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) { - error = hfs_movedata(from_vp, to_vp); + if (ISSET(ap->a_options, FSOPT_EXCHANGE_DATA_ONLY)) { +#if HFS_COMPRESSION + if (ISSET(from_cp->c_bsdflags, UF_COMPRESSED)) { + error = hfs_move_compressed(from_cp, to_cp); + goto exit; + } +#endif + + error = hfs_move_data(from_cp, to_cp, 0); goto exit; } - if ((error = hfs_start_transaction(hfsmp)) != 0) { goto exit; @@ -2068,21 +2204,31 @@ hfs_vnop_exchange(ap) } /* - * ExchangeFileIDs swaps the extent information attached to two - * different file IDs. It also swaps the extent information that - * may live in the extents-overflow B-Tree. + * ExchangeFileIDs swaps the on-disk, or in-BTree extent information + * attached to two different file IDs. It also swaps the extent + * information that may live in the extents-overflow B-Tree. * * We do this in a transaction as this may require a lot of B-Tree nodes * to do completely, particularly if one of the files in question - * has a lot of extents. + * has a lot of extents. * * For example, assume "file1" has fileID 50, and "file2" has fileID 52. * For the on-disk records, which are assumed to be synced, we will * first swap the resident inline-8 extents as part of the catalog records. * Then we will swap any extents overflow records for each file. * - * When this function is done, "file1" will have fileID 52, and "file2" will - * have fileID 50. + * When ExchangeFileIDs returns successfully, "file1" will have fileID 52, + * and "file2" will have fileID 50. However, note that this is only + * approximately half of the work that exchangedata(2) will need to + * accomplish. In other words, we swap "too much" of the information + * because if we only called ExchangeFileIDs, both the fileID and extent + * information would be the invariants of this operation. We don't + * actually want that; we want to conclude with "file1" having + * file ID 50, and "file2" having fileID 52. + * + * The remainder of hfs_vnop_exchange will swap the file ID and other cnode + * data back to the proper ownership, while still allowing the cnode to remain + * pointing at the same set of extents that it did originally. */ error = ExchangeFileIDs(hfsmp, from_nameptr, to_nameptr, from_parid, to_parid, from_cp->c_hint, to_cp->c_hint); @@ -2110,7 +2256,6 @@ hfs_vnop_exchange(ap) hfs_incr_gencount (to_cp); } - /* Save a copy of "from" attributes before swapping. */ bcopy(&from_cp->c_desc, &tempdesc, sizeof(struct cat_desc)); bcopy(&from_cp->c_attr, &tempattr, sizeof(struct cat_attr)); @@ -2124,7 +2269,7 @@ hfs_vnop_exchange(ap) to_cp->c_flag &= ~(C_HARDLINK | C_HASXATTRS); /* - * Complete the in-memory portion of the copy. + * Now complete the in-memory portion of the copy. * * ExchangeFileIDs swaps the on-disk records involved. We complete the * operation by swapping the in-memory contents of the two files here. @@ -2252,29 +2397,89 @@ hfs_vnop_exchange(ap) exit: if (got_cookie) { - cat_postflight(hfsmp, &cookie, vfs_context_proc(ap->a_context)); + cat_postflight(hfsmp, &cookie, vfs_context_proc(ap->a_context)); } if (started_tr) { hfs_end_transaction(hfsmp); } - hfs_unlockpair(from_cp, to_cp); - return (error); + if (have_from_trunc_lock) + hfs_unlock_truncate(from_cp, 0); + + if (have_to_trunc_lock) + hfs_unlock_truncate(to_cp, 0); + + if (have_cnode_locks) + hfs_unlockpair(from_cp, to_cp); + + return (error); } +#if HFS_COMPRESSION +/* + * This function is used specifically for the case when a namespace + * handler is trying to steal data before it's deleted. Note that we + * don't bother deleting the xattr from the source because it will get + * deleted a short time later anyway. + * + * cnodes must be locked + */ +static int hfs_move_compressed(cnode_t *from_cp, cnode_t *to_cp) +{ + int ret; + void *data = NULL; + + CLR(from_cp->c_bsdflags, UF_COMPRESSED); + SET(from_cp->c_flag, C_MODIFIED); + + ret = hfs_move_data(from_cp, to_cp, HFS_MOVE_DATA_INCLUDE_RSRC); + if (ret) + goto exit; + + /* + * Transfer the xattr that decmpfs uses. Ideally, this code + * should be with the other decmpfs code but it's file system + * agnostic and this path is currently, and likely to remain, HFS+ + * specific. It's easier and more performant if we implement it + * here. + */ + + size_t size = MAX_DECMPFS_XATTR_SIZE; + MALLOC(data, void *, size, M_TEMP, M_WAITOK); + + ret = hfs_xattr_read(from_cp->c_vp, DECMPFS_XATTR_NAME, data, &size); + if (ret) + goto exit; + + ret = hfs_xattr_write(to_cp->c_vp, DECMPFS_XATTR_NAME, data, size); + if (ret) + goto exit; + + SET(to_cp->c_bsdflags, UF_COMPRESSED); + SET(to_cp->c_flag, C_MODIFIED); + +exit: + if (data) + FREE(data, M_TEMP); + + return ret; +} +#endif // HFS_COMPRESSION + int hfs_vnop_mmap(struct vnop_mmap_args *ap) { struct vnode *vp = ap->a_vp; + cnode_t *cp = VTOC(vp); int error; if (VNODE_IS_RSRC(vp)) { /* allow pageins of the resource fork */ } else { - int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ - time_t orig_ctime = VTOC(vp)->c_ctime; + int compressed = hfs_file_is_compressed(cp, 1); /* 1 == don't take the cnode lock */ + time_t orig_ctime = cp->c_ctime; - if (!compressed && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { + if (!compressed && (cp->c_bsdflags & UF_COMPRESSED)) { error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP); if (error != 0) { return error; @@ -2283,19 +2488,9 @@ hfs_vnop_mmap(struct vnop_mmap_args *ap) if (ap->a_fflags & PROT_WRITE) { check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); - - /* even though we're manipulating a cnode field here, we're only monotonically increasing - * the generation counter. The vnode can't be recycled (because we hold a FD in order to cause the - * map to happen). So it's safe to do this without holding the cnode lock. The caller's only - * requirement is that the number has been changed. - */ - struct cnode *cp = VTOC(vp); - if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { - hfs_incr_gencount(cp); - } } } - + // // NOTE: we return ENOTSUP because we want the cluster layer // to actually do all the real work. @@ -2303,64 +2498,106 @@ hfs_vnop_mmap(struct vnop_mmap_args *ap) return (ENOTSUP); } +static errno_t hfs_vnop_mnomap(struct vnop_mnomap_args *ap) +{ + vnode_t vp = ap->a_vp; + + /* + * Whilst the file was mapped, there may not have been any + * page-outs so we need to increment the generation counter now. + * Unfortunately this may lead to a change in the generation + * counter when no actual change has been made, but there is + * little we can do about that with our current architecture. + */ + if (ubc_is_mapped_writable(vp)) { + cnode_t *cp = VTOC(vp); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + hfs_incr_gencount(cp); + + /* + * We don't want to set the modification time here since a + * change to that is not acceptable if no changes were made. + * Instead we set a flag so that if we get any page-outs we + * know to update the modification time. It's possible that + * they weren't actually because of changes made whilst the + * file was mapped but that's not easy to fix now. + */ + SET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING); + + hfs_unlock(cp); + } + + return 0; +} + /* - * hfs_movedata + * Mark the resource fork as needing a ubc_setsize when we drop the + * cnode lock later. + */ +static void hfs_rsrc_setsize(cnode_t *cp) +{ + /* + * We need to take an iocount if we don't have one. vnode_get + * will return ENOENT if the vnode is terminating which is what we + * want as it's not safe to call ubc_setsize in that case. + */ + if (cp->c_rsrc_vp && !vnode_get(cp->c_rsrc_vp)) { + // Shouldn't happen, but better safe... + if (ISSET(cp->c_flag, C_NEED_RVNODE_PUT)) + vnode_put(cp->c_rsrc_vp); + SET(cp->c_flag, C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE); + } +} + +/* + * hfs_move_data * * This is a non-symmetric variant of exchangedata. In this function, - * the contents of the fork in from_vp are moved to the fork - * specified by to_vp. + * the contents of the data fork (and optionally the resource fork) + * are moved from from_cp to to_cp. * - * The cnodes pointed to by 'from_vp' and 'to_vp' must be locked. + * The cnodes must be locked. * - * The vnode pointed to by 'to_vp' *must* be empty prior to invoking this function. - * We impose this restriction because we may not be able to fully delete the entire - * file's contents in a single transaction, particularly if it has a lot of extents. - * In the normal file deletion codepath, the file is screened for two conditions: - * 1) bigger than 400MB, and 2) more than 8 extents. If so, the file is relocated to - * the hidden directory and the deletion is broken up into multiple truncates. We can't - * do that here because both files need to exist in the namespace. The main reason this - * is imposed is that we may have to touch a whole lot of bitmap blocks if there are - * many extents. + * The cnode pointed to by 'to_cp' *must* be empty prior to invoking + * this function. We impose this restriction because we may not be + * able to fully delete the entire file's contents in a single + * transaction, particularly if it has a lot of extents. In the + * normal file deletion codepath, the file is screened for two + * conditions: 1) bigger than 400MB, and 2) more than 8 extents. If + * so, the file is relocated to the hidden directory and the deletion + * is broken up into multiple truncates. We can't do that here + * because both files need to exist in the namespace. The main reason + * this is imposed is that we may have to touch a whole lot of bitmap + * blocks if there are many extents. * - * Any data written to 'from_vp' after this call completes is not guaranteed - * to be moved. + * Any data written to 'from_cp' after this call completes is not + * guaranteed to be moved. * * Arguments: - * vnode from_vp: source file - * vnode to_vp: destination file; must be empty + * cnode_t *from_cp : source file + * cnode_t *to_cp : destination file; must be empty * * Returns: + * + * EBUSY - File has been deleted or is in use * EFBIG - Destination file was not empty - * 0 - success - * - * + * EIO - An I/O error + * 0 - success + * other - Other errors that can be returned from called functions */ -int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) { - - struct cnode *from_cp; - struct cnode *to_cp; - struct hfsmount *hfsmp = NULL; - int error = 0; - int started_tr = 0; - int lockflags = 0; - int overflow_blocks; - int rsrc = 0; - - - /* Get the HFS pointers */ - from_cp = VTOC(from_vp); - to_cp = VTOC(to_vp); - hfsmp = VTOHFS(from_vp); - - /* Verify that neither source/dest file is open-unlinked */ - if (from_cp->c_flag & (C_DELETED | C_NOEXISTS)) { - error = EBUSY; - goto movedata_exit; - } +int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, + hfs_move_data_options_t options) +{ + hfsmount_t *hfsmp = VTOHFS(from_cp->c_vp); + int error = 0; + int lockflags = 0; + bool return_EIO_on_error = false; + const bool include_rsrc = ISSET(options, HFS_MOVE_DATA_INCLUDE_RSRC); - if (to_cp->c_flag & (C_DELETED | C_NOEXISTS)) { - error = EBUSY; - goto movedata_exit; + /* Verify that neither source/dest file is open-unlinked */ + if (ISSET(from_cp->c_flag, C_DELETED | C_NOEXISTS) + || ISSET(to_cp->c_flag, C_DELETED | C_NOEXISTS)) { + return EBUSY; } /* @@ -2375,230 +2612,257 @@ int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) { * As a result, we shouldn't have any active usecounts against * this vnode when we go to check it below. */ - if (vnode_isinuse(from_vp, 0)) { - error = EBUSY; - goto movedata_exit; - } + if (vnode_isinuse(from_cp->c_vp, 0)) + return EBUSY; - if (from_cp->c_rsrc_vp == from_vp) { - rsrc = 1; - } - - /* - * We assume that the destination file is already empty. - * Verify that it is. - */ - if (rsrc) { - if (to_cp->c_rsrcfork->ff_size > 0) { - error = EFBIG; - goto movedata_exit; - } - } - else { - if (to_cp->c_datafork->ff_size > 0) { - error = EFBIG; - goto movedata_exit; - } - } - - /* If the source has the rsrc open, make sure the destination is also the rsrc */ - if (rsrc) { - if (to_vp != to_cp->c_rsrc_vp) { - error = EINVAL; - goto movedata_exit; + if (include_rsrc && from_cp->c_rsrc_vp) { + if (vnode_isinuse(from_cp->c_rsrc_vp, 0)) + return EBUSY; + + /* + * In the code below, if the destination file doesn't have a + * c_rsrcfork then we don't create it which means we we cannot + * transfer the ff_invalidranges and cf_vblocks fields. These + * shouldn't be set because we flush the resource fork before + * calling this function but there is a tiny window when we + * did not have any locks... + */ + if (!to_cp->c_rsrcfork + && (!TAILQ_EMPTY(&from_cp->c_rsrcfork->ff_invalidranges) + || from_cp->c_rsrcfork->ff_unallocblocks)) { + /* + * The file isn't really busy now but something did slip + * in and tinker with the file while we didn't have any + * locks, so this is the most meaningful return code for + * the caller. + */ + return EBUSY; } } - else { - /* Verify that both forks are data forks */ - if (to_vp != to_cp->c_vp) { - error = EINVAL; - goto movedata_exit; - } - } - - /* - * See if the source file has overflow extents. If it doesn't, we don't - * need to call into MoveData, and the catalog will be enough. - */ - if (rsrc) { - overflow_blocks = overflow_extents(from_cp->c_rsrcfork); - } - else { - overflow_blocks = overflow_extents(from_cp->c_datafork); - } - - if ((error = hfs_start_transaction (hfsmp)) != 0) { - goto movedata_exit; + + // Check the destination file is empty + if (to_cp->c_datafork->ff_blocks + || to_cp->c_datafork->ff_size + || (include_rsrc + && (to_cp->c_blocks + || (to_cp->c_rsrcfork && to_cp->c_rsrcfork->ff_size)))) { + return EFBIG; } - started_tr = 1; - - /* Lock the system files: catalog, extents, attributes */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); - - /* Copy over any catalog allocation data into the new spot. */ - if (rsrc) { - if ((error = hfs_move_fork (from_cp->c_rsrcfork, from_cp, to_cp->c_rsrcfork, to_cp))){ - hfs_systemfile_unlock(hfsmp, lockflags); - goto movedata_exit; + + if ((error = hfs_start_transaction (hfsmp))) + return error; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, + HFS_EXCLUSIVE_LOCK); + + // filefork_t is 128 bytes which should be OK + filefork_t rfork_buf, *from_rfork = NULL; + + if (include_rsrc) { + from_rfork = from_cp->c_rsrcfork; + + /* + * Creating resource fork vnodes is expensive, so just get get + * the fork data if we need it. + */ + if (!from_rfork && hfs_has_rsrc(from_cp)) { + from_rfork = &rfork_buf; + + from_rfork->ff_cp = from_cp; + TAILQ_INIT(&from_rfork->ff_invalidranges); + + error = cat_idlookup(hfsmp, from_cp->c_fileid, 0, 1, NULL, NULL, + &from_rfork->ff_data); + + if (error) + goto exit; } } - else { - if ((error = hfs_move_fork (from_cp->c_datafork, from_cp, to_cp->c_datafork, to_cp))) { - hfs_systemfile_unlock(hfsmp, lockflags); - goto movedata_exit; - } + + /* + * From here on, any failures mean that we might be leaving things + * in a weird or inconsistent state. Ideally, we should back out + * all the changes, but to do that properly we need to fix + * MoveData. We'll save fixing that for another time. For now, + * just return EIO in all cases to the caller so that they know. + */ + return_EIO_on_error = true; + + bool data_overflow_extents = overflow_extents(from_cp->c_datafork); + + // Move the data fork + if ((error = hfs_move_fork (from_cp->c_datafork, from_cp, + to_cp->c_datafork, to_cp))) { + goto exit; } - + + SET(from_cp->c_flag, C_NEED_DATA_SETSIZE); + SET(to_cp->c_flag, C_NEED_DATA_SETSIZE); + + // We move the resource fork later + /* - * Note that because all we're doing is moving the extents around, we can - * probably do this in a single transaction: Each extent record (group of 8) - * is 64 bytes. A extent overflow B-Tree node is typically 4k. This means - * each node can hold roughly ~60 extent records == (480 extents). + * Note that because all we're doing is moving the extents around, + * we can probably do this in a single transaction: Each extent + * record (group of 8) is 64 bytes. A extent overflow B-Tree node + * is typically 4k. This means each node can hold roughly ~60 + * extent records == (480 extents). * - * If a file was massively fragmented and had 20k extents, this means we'd - * roughly touch 20k/480 == 41 to 42 nodes, plus the index nodes, for half - * of the operation. (inserting or deleting). So if we're manipulating 80-100 - * nodes, this is basically 320k of data to write to the journal in - * a bad case. + * If a file was massively fragmented and had 20k extents, this + * means we'd roughly touch 20k/480 == 41 to 42 nodes, plus the + * index nodes, for half of the operation. (inserting or + * deleting). So if we're manipulating 80-100 nodes, this is + * basically 320k of data to write to the journal in a bad case. */ - if (overflow_blocks != 0) { - if (rsrc) { - error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 1); - } - else { - error = MoveData (hfsmp, from_cp->c_cnid, to_cp->c_cnid, 0); - } + if (data_overflow_extents) { + if ((error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 0))) + goto exit; } - - if (error) { - /* Reverse the operation. Copy the fork data back into the source */ - if (rsrc) { - hfs_move_fork (to_cp->c_rsrcfork, to_cp, from_cp->c_rsrcfork, from_cp); - } - else { - hfs_move_fork (to_cp->c_datafork, to_cp, from_cp->c_datafork, from_cp); - } + + if (from_rfork && overflow_extents(from_rfork)) { + if ((error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 1))) + goto exit; + } + + // Touch times + from_cp->c_touch_acctime = TRUE; + from_cp->c_touch_chgtime = TRUE; + from_cp->c_touch_modtime = TRUE; + hfs_touchtimes(hfsmp, from_cp); + + to_cp->c_touch_acctime = TRUE; + to_cp->c_touch_chgtime = TRUE; + to_cp->c_touch_modtime = TRUE; + hfs_touchtimes(hfsmp, to_cp); + + struct cat_fork dfork_buf; + const struct cat_fork *dfork, *rfork; + + dfork = hfs_prepare_fork_for_update(to_cp->c_datafork, &dfork_buf, + hfsmp->blockSize); + rfork = hfs_prepare_fork_for_update(from_rfork, &rfork_buf.ff_data, + hfsmp->blockSize); + + // Update the catalog nodes, to_cp first + if ((error = cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, + dfork, rfork))) { + goto exit; } - else { - struct cat_fork *src_data = NULL; - struct cat_fork *src_rsrc = NULL; - struct cat_fork *dst_data = NULL; - struct cat_fork *dst_rsrc = NULL; - - /* Touch the times*/ - to_cp->c_touch_acctime = TRUE; - to_cp->c_touch_chgtime = TRUE; - to_cp->c_touch_modtime = TRUE; - - from_cp->c_touch_acctime = TRUE; - from_cp->c_touch_chgtime = TRUE; - from_cp->c_touch_modtime = TRUE; - - hfs_touchtimes(hfsmp, to_cp); - hfs_touchtimes(hfsmp, from_cp); - - if (from_cp->c_datafork) { - src_data = &from_cp->c_datafork->ff_data; - } - if (from_cp->c_rsrcfork) { - src_rsrc = &from_cp->c_rsrcfork->ff_data; - } - - if (to_cp->c_datafork) { - dst_data = &to_cp->c_datafork->ff_data; - } - if (to_cp->c_rsrcfork) { - dst_rsrc = &to_cp->c_rsrcfork->ff_data; + + CLR(to_cp->c_flag, C_MODIFIED); + + // Update in-memory resource fork data here + if (from_rfork) { + // Update c_blocks + uint32_t moving = from_rfork->ff_blocks + from_rfork->ff_unallocblocks; + + from_cp->c_blocks -= moving; + to_cp->c_blocks += moving; + + // Update to_cp's resource data if it has it + filefork_t *to_rfork = to_cp->c_rsrcfork; + if (to_rfork) { + to_rfork->ff_invalidranges = from_rfork->ff_invalidranges; + to_rfork->ff_data = from_rfork->ff_data; + + // Deal with ubc_setsize + hfs_rsrc_setsize(to_cp); } - - /* Update the catalog nodes */ - (void) cat_update(hfsmp, &from_cp->c_desc, &from_cp->c_attr, - src_data, src_rsrc); - - (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, - dst_data, dst_rsrc); - + + // Wipe out the resource fork in from_cp + rl_init(&from_rfork->ff_invalidranges); + bzero(&from_rfork->ff_data, sizeof(from_rfork->ff_data)); + + // Deal with ubc_setsize + hfs_rsrc_setsize(from_cp); } - /* unlock the system files */ - hfs_systemfile_unlock(hfsmp, lockflags); - - -movedata_exit: - if (started_tr) { + + // Currently unnecessary, but might be useful in future... + dfork = hfs_prepare_fork_for_update(from_cp->c_datafork, &dfork_buf, + hfsmp->blockSize); + rfork = hfs_prepare_fork_for_update(from_rfork, &rfork_buf.ff_data, + hfsmp->blockSize); + + // Update from_cp + if ((error = cat_update(hfsmp, &from_cp->c_desc, &from_cp->c_attr, + dfork, rfork))) { + goto exit; + } + + CLR(from_cp->c_flag, C_MODIFIED); + +exit: + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); hfs_end_transaction(hfsmp); } - + + if (error && error != EIO && return_EIO_on_error) { + printf("hfs_move_data: encountered error %d\n", error); + error = EIO; + } + return error; - -} +} /* - * Copy all of the catalog and runtime data in srcfork to dstfork. + * Move all of the catalog and runtime data in srcfork to dstfork. * - * This allows us to maintain the invalid ranges across the movedata operation so - * we don't need to force all of the pending IO right now. In addition, we move all - * non overflow-extent extents into the destination here. + * This allows us to maintain the invalid ranges across the move data + * operation so we don't need to force all of the pending IO right + * now. In addition, we move all non overflow-extent extents into the + * destination here. + * + * The destination fork must be empty and should have been checked + * prior to calling this. */ -static int hfs_move_fork (struct filefork *srcfork, struct cnode *src_cp, - struct filefork *dstfork, struct cnode *dst_cp) { - struct rl_entry *invalid_range; - int size = sizeof(struct HFSPlusExtentDescriptor); - size = size * kHFSPlusExtentDensity; - - /* If the dstfork has any invalid ranges, bail out */ - invalid_range = TAILQ_FIRST(&dstfork->ff_invalidranges); - if (invalid_range != NULL) { - return EFBIG; - } - - if (dstfork->ff_data.cf_size != 0 || dstfork->ff_data.cf_new_size != 0) { - return EFBIG; - } - - /* First copy the invalid ranges */ - while ((invalid_range = TAILQ_FIRST(&srcfork->ff_invalidranges))) { - off_t start = invalid_range->rl_start; - off_t end = invalid_range->rl_end; - - /* Remove it from the srcfork and add it to dstfork */ - rl_remove(start, end, &srcfork->ff_invalidranges); - rl_add(start, end, &dstfork->ff_invalidranges); - } - - /* - * Ignore the ff_union. We don't move symlinks or system files. - * Now copy the in-catalog extent information - */ - dstfork->ff_data.cf_size = srcfork->ff_data.cf_size; - dstfork->ff_data.cf_new_size = srcfork->ff_data.cf_new_size; - dstfork->ff_data.cf_vblocks = srcfork->ff_data.cf_vblocks; - dstfork->ff_data.cf_blocks = srcfork->ff_data.cf_blocks; - - /* just memcpy the whole array of extents to the new location. */ - memcpy (dstfork->ff_data.cf_extents, srcfork->ff_data.cf_extents, size); - - /* - * Copy the cnode attribute data. - * - */ - src_cp->c_blocks -= srcfork->ff_data.cf_vblocks; - src_cp->c_blocks -= srcfork->ff_data.cf_blocks; - - dst_cp->c_blocks += srcfork->ff_data.cf_vblocks; - dst_cp->c_blocks += srcfork->ff_data.cf_blocks; - - /* Now delete the entries in the source fork */ - srcfork->ff_data.cf_size = 0; - srcfork->ff_data.cf_new_size = 0; - srcfork->ff_data.cf_union.cfu_bytesread = 0; - srcfork->ff_data.cf_vblocks = 0; - srcfork->ff_data.cf_blocks = 0; - - /* Zero out the old extents */ - bzero (srcfork->ff_data.cf_extents, size); +static int hfs_move_fork(filefork_t *srcfork, cnode_t *src_cp, + filefork_t *dstfork, cnode_t *dst_cp) +{ + // Move the invalid ranges + dstfork->ff_invalidranges = srcfork->ff_invalidranges; + rl_init(&srcfork->ff_invalidranges); + + // Move the fork data (copy whole structure) + dstfork->ff_data = srcfork->ff_data; + bzero(&srcfork->ff_data, sizeof(srcfork->ff_data)); + + // Update c_blocks + src_cp->c_blocks -= dstfork->ff_blocks + dstfork->ff_unallocblocks; + dst_cp->c_blocks += dstfork->ff_blocks + dstfork->ff_unallocblocks; + return 0; } - + + +#include + +struct hfs_fsync_panic_hook { + panic_hook_t hook; + struct cnode *cp; +}; + +static void hfs_fsync_panic_hook(panic_hook_t *hook_) +{ + struct hfs_fsync_panic_hook *hook = (struct hfs_fsync_panic_hook *)hook_; + extern int kdb_log(const char *fmt, ...); + + // Get the physical region just before cp + panic_phys_range_t range; + uint64_t phys; + + if (panic_phys_range_before(hook->cp, &phys, &range)) { + kdb_log("cp = %p, phys = %p, prev (%p: %p-%p)\n", + hook->cp, phys, range.type, range.phys_start, + range.phys_start + range.len); + } else + kdb_log("cp = %p, phys = %p, prev (!)\n", hook->cp, phys); + + panic_dump_mem((void *)(((vm_offset_t)hook->cp - 4096) & ~4095), 12288); + + kdb_log("\n"); +} + /* * cnode must be locked @@ -2649,11 +2913,18 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); took_trunc_lock = 1; + struct hfs_fsync_panic_hook hook; + hook.cp = cp; + panic_hook(&hook.hook, hfs_fsync_panic_hook); + if (fp->ff_unallocblocks != 0) { hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); } + + panic_unhook(&hook.hook); + /* Don't hold cnode lock when calling into cluster layer. */ (void) cluster_push(vp, waitdata ? IO_SYNC : 0); @@ -3184,10 +3455,8 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, dcp->c_entries--; DEC_FOLDERCOUNT(hfsmp, dcp->c_attr); dcp->c_dirchangecnt++; - { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); - extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); - } + hfs_incr_gencount(dcp); + dcp->c_touch_chgtime = TRUE; dcp->c_touch_modtime = TRUE; hfs_touchtimes(hfsmp, cp); @@ -3209,6 +3478,7 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, /* Mark C_NOEXISTS since the catalog entry is now gone */ cp->c_flag |= C_NOEXISTS; + out: dcp->c_flag &= ~C_DIR_MODIFICATION; wakeup((caddr_t)&dcp->c_flag); @@ -3572,9 +3842,8 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, } /* Check if we have to break the deletion into multiple pieces. */ - if (isdir == 0) { - isbigfile = ((cp->c_datafork->ff_size >= HFS_BIGFILE_SIZE) && overflow_extents(VTOF(vp))); - } + if (isdir == 0) + isbigfile = cp->c_datafork->ff_size >= HFS_BIGFILE_SIZE; /* Check if the file has xattrs. If it does we'll have to delete them in individual transactions in case there are too many */ @@ -3763,10 +4032,8 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, DEC_FOLDERCOUNT(hfsmp, dcp->c_attr); } dcp->c_dirchangecnt++; - { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); - extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); - } + hfs_incr_gencount(dcp); + dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); @@ -3848,10 +4115,8 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (dcp->c_entries > 0) dcp->c_entries--; dcp->c_dirchangecnt++; - { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); - extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); - } + hfs_incr_gencount(dcp); + dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); @@ -3919,7 +4184,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, * to proceed and update the volume header and mark the cnode C_NOEXISTS. * The subsequent fsck should be able to recover the free space for us. */ - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE); } else { /* reset update_vh to 0, since hfs_release_storage should have done it for us */ @@ -4199,10 +4464,9 @@ relock: goto retry; } - if (emit_rename && is_tracked) { - resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_FAILED_OP | NAMESPACE_HANDLER_TRACK_EVENT); - } - + /* If we want to reintroduce notifications for failed renames, this + is the place to do it. */ + return (error); } @@ -4475,6 +4739,54 @@ relock: /* Preflighting done, take fvp out of the name space. */ cache_purge(fvp); +#if CONFIG_SECLUDED_RENAME + /* + * Check for "secure" rename that imposes additional restrictions on the + * source vnode. We wait until here to check in order to prevent a race + * with other threads that manage to look up fvp, but their open or link + * is blocked by our locks. At this point, with fvp out of the name cache, + * and holding the lock on fdvp, no other thread can find fvp. + * + * TODO: Do we need to limit these checks to regular files only? + */ + if (fcnp->cn_flags & CN_SECLUDE_RENAME) { + if (vnode_isdir(fvp)) { + error = EISDIR; + goto out; + } + + /* + * Neither fork of source may be open or memory mapped. + * We also don't want it in use by any other system call. + * The file must not have hard links. + * + * We can't simply use vnode_isinuse() because that does not + * count opens with O_EVTONLY. We don't want a malicious + * process using O_EVTONLY to subvert a secluded rename. + */ + if (fcp->c_linkcount != 1) { + error = EMLINK; + goto out; + } + + if (fcp->c_rsrc_vp && (fcp->c_rsrc_vp->v_usecount > 0 || + fcp->c_rsrc_vp->v_iocount > 0)) { + /* Resource fork is in use (including O_EVTONLY) */ + error = EBUSY; + goto out; + } + if (fcp->c_vp && (fcp->c_vp->v_usecount > (fcp->c_rsrc_vp ? 1 : 0) || + fcp->c_vp->v_iocount > 1)) { + /* + * Data fork is in use, including O_EVTONLY, but not + * including a reference from the resource fork. + */ + error = EBUSY; + goto out; + } + } +#endif + bzero(&from_desc, sizeof(from_desc)); from_desc.cd_nameptr = (const u_int8_t *)fcnp->cn_nameptr; from_desc.cd_namelen = fcnp->cn_namelen; @@ -4843,10 +5155,8 @@ skip_rm: } tdcp->c_entries++; tdcp->c_dirchangecnt++; - { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)tdcp->c_finderinfo + 16); - extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); - } + hfs_incr_gencount(tdcp); + if (fdcp->c_entries > 0) fdcp->c_entries--; fdcp->c_dirchangecnt++; @@ -4856,10 +5166,7 @@ skip_rm: fdcp->c_flag |= C_FORCEUPDATE; // XXXdbg - force it out! (void) hfs_update(fdvp, 0); } - { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)fdcp->c_finderinfo + 16); - extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); - } + hfs_incr_gencount(fdcp); tdcp->c_childhint = out_desc.cd_hint; /* Cache directory's location */ tdcp->c_touch_chgtime = TRUE; @@ -4933,13 +5240,8 @@ out: if (error && tvp_deleted) error = EIO; - if (emit_rename && is_tracked) { - if (error) { - resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_FAILED_OP | NAMESPACE_HANDLER_TRACK_EVENT); - } else { - resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_SUCCESS_OP | NAMESPACE_HANDLER_TRACK_EVENT); - } - } + /* If we want to reintroduce notifications for renames, this is the + place to do it. */ return (error); } @@ -5020,11 +5322,10 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) * Allocate space for the link. * * Since we're already inside a transaction, - * tell hfs_truncate to skip the ubc_setsize. * * Don't need truncate lock since a symlink is treated as a system file. */ - error = hfs_truncate(vp, len, IO_NOZEROFILL, 1, 0, ap->a_context); + error = hfs_truncate(vp, len, IO_NOZEROFILL, 0, ap->a_context); /* On errors, remove the symlink file */ if (error) { @@ -5065,14 +5366,6 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) } else { buf_bawrite(bp); } - /* - * We defered the ubc_setsize for hfs_truncate - * since we were inside a transaction. - * - * We don't need to drop the cnode lock here - * since this is a symlink. - */ - ubc_setsize(vp, len); out: if (started_tr) hfs_end_transaction(hfsmp); @@ -5172,6 +5465,7 @@ hfs_vnop_readdir(ap) int extended; int nfs_cookies; cnid_t cnid_hint = 0; + int bump_valence = 0; items = 0; startoffset = offset = uio_offset(uio); @@ -5264,14 +5558,13 @@ hfs_vnop_readdir(ap) offset += 2; } - /* If there are no real entries then we're done. */ - if (cp->c_entries == 0) { - error = 0; - eofflag = 1; - uio_setoffset(uio, offset); - goto seekoffcalc; - } - + /* + * Intentionally avoid checking the valence here. If we + * have FS corruption that reports the valence is 0, even though it + * has contents, we might artificially skip over iterating + * this directory. + */ + // // We have to lock the user's buffer here so that we won't // fault on it after we've acquired a shared lock on the @@ -5373,6 +5666,31 @@ hfs_vnop_readdir(ap) if (items >= (int)cp->c_entries) { eofflag = 1; } + + /* + * Detect valence FS corruption. + * + * We are holding the cnode lock exclusive, so there should not be + * anybody modifying the valence field of this cnode. If we enter + * this block, that means we observed filesystem corruption, because + * this directory reported a valence of 0, yet we found at least one + * item. In this case, we need to minimally self-heal this + * directory to prevent userland from tripping over a directory + * that appears empty (getattr of valence reports 0), but actually + * has contents. + * + * We'll force the cnode update at the end of the function after + * completing all of the normal getdirentries steps. + */ + if ((cp->c_entries == 0) && (items > 0)) { + /* disk corruption */ + cp->c_entries++; + /* Mark the cnode as dirty. */ + cp->c_flag |= (C_MODIFIED | C_FORCEUPDATE); + printf("hfs_vnop_readdir: repairing valence to non-zero! \n"); + bump_valence++; + } + /* Convert catalog directory index back into an offset. */ while (tag == 0) @@ -5406,7 +5724,14 @@ out: if (dirhint == &localhint) { cat_releasedesc(&localhint.dh_desc); } + + if (bump_valence) { + /* force the update before dropping the cnode lock*/ + hfs_update(vp, 0); + } + hfs_unlock(cp); + return (error); } @@ -5583,6 +5908,62 @@ hfs_vnop_pathconf(ap) return (0); } +/* + * Prepares a fork for cat_update by making sure ff_size and ff_blocks + * are no bigger than the valid data on disk thus reducing the chance + * of exposing unitialised data in the event of a non clean unmount. + * fork_buf is where to put the temporary copy if required. (It can + * be inside pfork.) + */ +static const struct cat_fork * +hfs_prepare_fork_for_update(const filefork_t *pfork, + struct cat_fork *fork_buf, + uint32_t block_size) +{ + if (!pfork) + return NULL; + + off_t max_size = pfork->ff_size; + + // Check first invalid range + if (!TAILQ_EMPTY(&pfork->ff_invalidranges)) + max_size = TAILQ_FIRST(&pfork->ff_invalidranges)->rl_start; + + if (!pfork->ff_unallocblocks && pfork->ff_size <= max_size) + return &pfork->ff_data; // Nothing to do + + if (pfork->ff_blocks < pfork->ff_unallocblocks) { + panic("hfs: ff_blocks %d is less than unalloc blocks %d\n", + pfork->ff_blocks, pfork->ff_unallocblocks); + } + + struct cat_fork *out = fork_buf; + + if (out != &pfork->ff_data) + bcopy(&pfork->ff_data, out, sizeof(*out)); + + // Adjust cf_blocks for cf_vblocks + out->cf_blocks -= out->cf_vblocks; + + /* + * We have to trim the size with the updated cf_blocks. You might + * think that this is unnecessary because the invalid ranges + * should catch this, but we update invalid ranges *before* we do + * I/O whereas cf_vblocks is updated later in hfs_vnop_blockmap. + * There might still be a chance that we will be exposing + * unitialised data because the metadata takes a different path to + * data but the window should be tiny (if it exists at all). + */ + off_t alloc_bytes = hfs_blk_to_bytes(out->cf_blocks, block_size); + if (out->cf_size > alloc_bytes) + out->cf_size = alloc_bytes; + + // Trim cf_size to first invalid range + if (out->cf_size > max_size) + out->cf_size = max_size; + + return out; +} /* * Update a cnode's on-disk metadata. @@ -5597,8 +5978,8 @@ hfs_update(struct vnode *vp, __unused int waitfor) { struct cnode *cp = VTOC(vp); struct proc *p; - struct cat_fork *dataforkp = NULL; - struct cat_fork *rsrcforkp = NULL; + const struct cat_fork *dataforkp = NULL; + const struct cat_fork *rsrcforkp = NULL; struct cat_fork datafork; struct cat_fork rsrcfork; struct hfsmount *hfsmp; @@ -5667,11 +6048,10 @@ hfs_update(struct vnode *vp, __unused int waitfor) return (0); } - KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_START, vp, tstate, 0, 0, 0); + KERNEL_DEBUG_CONSTANT(HFSDBG_UPDATE | DBG_FUNC_START, VM_KERNEL_ADDRPERM(vp), tstate, 0, 0, 0); if ((error = hfs_start_transaction(hfsmp)) != 0) { - - KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_END, vp, tstate, error, -1, 0); + KERNEL_DEBUG_CONSTANT(HFSDBG_UPDATE | DBG_FUNC_END, VM_KERNEL_ADDRPERM(vp), tstate, error, -1, 0); return error; } @@ -5679,59 +6059,9 @@ hfs_update(struct vnode *vp, __unused int waitfor) * Modify the values passed to cat_update based on whether or not * the file has invalid ranges or borrowed blocks. */ - if (dataforkp) { - off_t numbytes = 0; - - /* copy the datafork into a temporary copy so we don't pollute the cnode's */ - bcopy(dataforkp, &datafork, sizeof(datafork)); - dataforkp = &datafork; - - /* - * If there are borrowed blocks, ensure that they are subtracted - * from the total block count before writing the cnode entry to disk. - * Only extents that have actually been marked allocated in the bitmap - * should be reflected in the total block count for this fork. - */ - if (cp->c_datafork->ff_unallocblocks != 0) { - // make sure that we don't assign a negative block count - if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) { - panic("hfs: ff_blocks %d is less than unalloc blocks %d\n", - cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks); - } - - /* Also cap the LEOF to the total number of bytes that are allocated. */ - datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks); - datafork.cf_size = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize; - } - - /* - * For files with invalid ranges (holes) the on-disk - * field representing the size of the file (cf_size) - * must be no larger than the start of the first hole. - * However, note that if the first invalid range exists - * solely within borrowed blocks, then our LEOF and block - * count should both be zero. As a result, set it to the - * min of the current cf_size and the start of the first - * invalid range, because it may have already been reduced - * to zero by the borrowed blocks check above. - */ - if (!TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges)) { - numbytes = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start; - datafork.cf_size = MIN((numbytes), (datafork.cf_size)); - } - } - - /* - * For resource forks with delayed allocations, make sure - * the block count and file size match the number of blocks - * actually allocated to the file on disk. - */ - if (rsrcforkp && (cp->c_rsrcfork->ff_unallocblocks != 0)) { - bcopy(rsrcforkp, &rsrcfork, sizeof(rsrcfork)); - rsrcfork.cf_blocks = (cp->c_rsrcfork->ff_blocks - cp->c_rsrcfork->ff_unallocblocks); - rsrcfork.cf_size = rsrcfork.cf_blocks * HFSTOVCB(hfsmp)->blockSize; - rsrcforkp = &rsrcfork; - } + dataforkp = hfs_prepare_fork_for_update(cp->c_datafork, &datafork, hfsmp->blockSize); + rsrcforkp = hfs_prepare_fork_for_update(cp->c_rsrcfork, &rsrcfork, hfsmp->blockSize); + if (kdebug_enable) { long dbg_parms[NUMPARMS]; int dbg_namelen; @@ -5760,14 +6090,13 @@ hfs_update(struct vnode *vp, __unused int waitfor) hfs_end_transaction(hfsmp); - KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_END, vp, tstate, error, 0, 0); + KERNEL_DEBUG_CONSTANT(HFSDBG_UPDATE | DBG_FUNC_END, VM_KERNEL_ADDRPERM(vp), tstate, error, 0, 0); return (error); } /* * Allocate a new node - * Note - Function does not create and return a vnode for whiteout creation. */ int hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, @@ -5792,8 +6121,22 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, #if CONFIG_PROTECT struct cprotect *entry = NULL; int32_t cp_class = -1; + + /* + * By default, it's OK for AKS to overrride our target class preferences. + */ + uint32_t keywrap_flags = CP_KEYWRAP_DIFFCLASS; + if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) { cp_class = (int32_t)vap->va_dataprotect_class; + /* + * Since the user specifically requested this target class be used, + * we want to fail this creation operation if we cannot wrap to their + * target class. The CP_KEYWRAP_DIFFCLASS bit says that it is OK to + * use a different class than the one specified, so we turn that off + * now. + */ + keywrap_flags &= ~CP_KEYWRAP_DIFFCLASS; } int protected_mount = 0; #endif @@ -5933,8 +6276,6 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, fip->fdType = SWAP_BE32(kSymLinkFileType); fip->fdCreator = SWAP_BE32(kSymLinkCreator); } - if (cnp->cn_flags & ISWHITEOUT) - attr.ca_flags |= UF_OPAQUE; /* Setup the descriptor */ in_desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr; @@ -5989,18 +6330,13 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, /* Update the parent directory */ dcp->c_childhint = out_desc.cd_hint; /* Cache directory's location */ dcp->c_entries++; - { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); - extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); - } + if (vnodetype == VDIR) { INC_FOLDERCOUNT(hfsmp, dcp->c_attr); } dcp->c_dirchangecnt++; - { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); - extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); - } + hfs_incr_gencount(dcp); + dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); @@ -6080,12 +6416,6 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, entry = NULL; } #endif - - /* Do not create vnode for whiteouts */ - if (S_ISWHT(mode)) { - goto exit; - } - gnv_flags |= GNV_CREATE; if (nocache) { gnv_flags |= GNV_NOCACHE; @@ -6171,7 +6501,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, panic ("hfs_makenode: no cpentry for cnode (%p)", cp); } - error = cp_generate_keys (hfsmp, cp, cp->c_cpentry->cp_pclass, &keyed_entry); + error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), keywrap_flags, &keyed_entry); if (error == 0) { /* * Upon success, the keys were generated and written out. @@ -6275,108 +6605,46 @@ exit: /* - * hfs_vgetrsrc acquires a resource fork vnode corresponding to the cnode that is - * found in 'vp'. The rsrc fork vnode is returned with the cnode locked and iocount - * on the rsrc vnode. - * - * *rvpp is an output argument for returning the pointer to the resource fork vnode. - * In most cases, the resource fork vnode will not be set if we return an error. - * However, if error_on_unlinked is set, we may have already acquired the resource fork vnode - * before we discover the error (the file has gone open-unlinked). In this case only, - * we may return a vnode in the output argument despite an error. + * hfs_vgetrsrc acquires a resource fork vnode corresponding to the + * cnode that is found in 'vp'. The cnode should be locked upon entry + * and will be returned locked, but it may be dropped temporarily. * - * If can_drop_lock is set, then it is safe for this function to temporarily drop - * and then re-acquire the cnode lock. We may need to do this, for example, in order to - * acquire an iocount or promote our lock. - * - * error_on_unlinked is an argument which indicates that we are to return an error if we - * discover that the cnode has gone into an open-unlinked state ( C_DELETED or C_NOEXISTS) - * is set in the cnode flags. This is only necessary if can_drop_lock is true, otherwise - * there's really no reason to double-check for errors on the cnode. + * On success, *rvpp wlll hold the resource fork vnode with an + * iocount. *Don't* forget the vnode_put. */ - int -hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, - int can_drop_lock, int error_on_unlinked) +hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp) { struct vnode *rvp; struct vnode *dvp = NULLVP; struct cnode *cp = VTOC(vp); int error; int vid; - int delete_status = 0; if (vnode_vtype(vp) == VDIR) { return EINVAL; } - /* - * Need to check the status of the cnode to validate it hasn't gone - * open-unlinked on us before we can actually do work with it. - */ - delete_status = hfs_checkdeleted(cp); - if ((delete_status) && (error_on_unlinked)) { - return delete_status; - } - restart: /* Attempt to use existing vnode */ if ((rvp = cp->c_rsrc_vp)) { - vid = vnode_vid(rvp); + vid = vnode_vid(rvp); - /* - * It is not safe to hold the cnode lock when calling vnode_getwithvid() - * for the alternate fork -- vnode_getwithvid() could deadlock waiting - * for a VL_WANTTERM while another thread has an iocount on the alternate - * fork vnode and is attempting to acquire the common cnode lock. - * - * But it's also not safe to drop the cnode lock when we're holding - * multiple cnode locks, like during a hfs_removefile() operation - * since we could lock out of order when re-acquiring the cnode lock. - * - * So we can only drop the lock here if its safe to drop it -- which is - * most of the time with the exception being hfs_removefile(). - */ - if (can_drop_lock) - hfs_unlock(cp); + // vnode_getwithvid can block so we need to drop the cnode lock + hfs_unlock(cp); error = vnode_getwithvid(rvp, vid); - if (can_drop_lock) { - (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); - /* - * When we relinquished our cnode lock, the cnode could have raced - * with a delete and gotten deleted. If the caller did not want - * us to ignore open-unlinked files, then re-check the C_DELETED - * state and see if we need to return an ENOENT here because the item - * got deleted in the intervening time. - */ - if (error_on_unlinked) { - if ((delete_status = hfs_checkdeleted(cp))) { - /* - * If error == 0, this means that we succeeded in acquiring an iocount on the - * rsrc fork vnode. However, if we're in this block of code, that means that we noticed - * that the cnode has gone open-unlinked. In this case, the caller requested that we - * not do any other work and return an errno. The caller will be responsible for - * dropping the iocount we just acquired because we can't do it until we've released - * the cnode lock. - */ - if (error == 0) { - *rvpp = rvp; - } - return delete_status; - } - } + /* + * When our lock was relinquished, the resource fork + * could have been recycled. Check for this and try + * again. + */ + if (error == ENOENT) + goto restart; - /* - * When our lock was relinquished, the resource fork - * could have been recycled. Check for this and try - * again. - */ - if (error == ENOENT) - goto restart; - } if (error) { const char * name = (const char *)VTOC(vp)->c_desc.cd_nameptr; @@ -6393,7 +6661,7 @@ restart: char delname[32]; int lockflags; int newvnode_flags = 0; - + /* * Make sure cnode lock is exclusive, if not upgrade it. * @@ -6401,9 +6669,6 @@ restart: * and that its safe to have the cnode lock dropped and reacquired. */ if (cp->c_lockowner != current_thread()) { - if (!can_drop_lock) { - return (EINVAL); - } /* * If the upgrade fails we lose the lock and * have to take the exclusive lock on our own. @@ -6418,17 +6683,8 @@ restart: * C_DELETED. This is because we need to continue to provide rsrc * fork access to open-unlinked files. In this case, build a fake descriptor * like in hfs_removefile. If we don't do this, buildkey will fail in - * cat_lookup because this cnode has no name in its descriptor. However, - * only do this if the caller did not specify that they wanted us to - * error out upon encountering open-unlinked files. + * cat_lookup because this cnode has no name in its descriptor. */ - - if ((error_on_unlinked) && (can_drop_lock)) { - if ((error = hfs_checkdeleted(cp))) { - return error; - } - } - if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) { bzero (&to_desc, sizeof(to_desc)); bzero (delname, 32); @@ -6522,6 +6778,11 @@ restart: cn.cn_namelen = snprintf(cn.cn_nameptr, MAXPATHLEN, "%s%s", descptr->cd_nameptr, _PATH_RSRCFORKSPEC); + // Should never happen because cn.cn_nameptr won't ever be long... + if (cn.cn_namelen >= MAXPATHLEN) { + FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); + return ENAMETOOLONG; + } } dvp = vnode_getparent(vp); error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL, @@ -6736,15 +6997,14 @@ hfs_vnop_fsync(ap) return 0; } -#if CONFIG_PROTECT - if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { - return (error); - } -#endif /* CONFIG_PROTECT */ - /* - * We need to allow ENOENT lock errors since unlink - * systenm call can call VNOP_FSYNC during vclean. + * No need to call cp_handle_vnop to resolve fsync(). Any dirty data + * should have caused the keys to be unwrapped at the time the data was + * put into the UBC, either at mmap/pagein/read-write. If we did manage + * to let this by, then strategy will auto-resolve for us. + * + * We also need to allow ENOENT lock errors since unlink + * system call can call VNOP_FSYNC during vclean. */ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (error) @@ -6756,74 +7016,6 @@ hfs_vnop_fsync(ap) return (error); } - -int -hfs_vnop_whiteout(ap) - struct vnop_whiteout_args /* { - struct vnode *a_dvp; - struct componentname *a_cnp; - int a_flags; - vfs_context_t a_context; - } */ *ap; -{ - int error = 0; - struct vnode *vp = NULL; - struct vnode_attr va; - struct vnop_lookup_args lookup_args; - struct vnop_remove_args remove_args; - struct hfsmount *hfsmp; - - hfsmp = VTOHFS(ap->a_dvp); - if (hfsmp->hfs_flags & HFS_STANDARD) { - error = ENOTSUP; - goto exit; - } - - switch (ap->a_flags) { - case LOOKUP: - error = 0; - break; - - case CREATE: - VATTR_INIT(&va); - VATTR_SET(&va, va_type, VREG); - VATTR_SET(&va, va_mode, S_IFWHT); - VATTR_SET(&va, va_uid, 0); - VATTR_SET(&va, va_gid, 0); - - error = hfs_makenode(ap->a_dvp, &vp, ap->a_cnp, &va, ap->a_context); - /* No need to release the vnode as no vnode is created for whiteouts */ - break; - - case DELETE: - lookup_args.a_dvp = ap->a_dvp; - lookup_args.a_vpp = &vp; - lookup_args.a_cnp = ap->a_cnp; - lookup_args.a_context = ap->a_context; - - error = hfs_vnop_lookup(&lookup_args); - if (error) { - break; - } - - remove_args.a_dvp = ap->a_dvp; - remove_args.a_vp = vp; - remove_args.a_cnp = ap->a_cnp; - remove_args.a_flags = 0; - remove_args.a_context = ap->a_context; - - error = hfs_vnop_remove(&remove_args); - vnode_put(vp); - break; - - default: - panic("hfs_vnop_whiteout: unknown operation (flag = %x)\n", ap->a_flags); - }; - -exit: - return (error); -} - int (**hfs_vnodeop_p)(void *); #define VOPFUNC int (*)(void *) @@ -6886,12 +7078,12 @@ struct vnodeopv_entry_desc hfs_standard_vnodeop_entries[] = { { &vnop_setxattr_desc, (VOPFUNC)hfs_readonly_op}, /* set xattr (READONLY) */ { &vnop_removexattr_desc, (VOPFUNC)hfs_readonly_op}, /* remove xattr (READONLY) */ { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, - { &vnop_whiteout_desc, (VOPFUNC)hfs_readonly_op}, /* whiteout (READONLY) */ #if NAMEDSTREAMS { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream }, { &vnop_makenamedstream_desc, (VOPFUNC)hfs_readonly_op }, { &vnop_removenamedstream_desc, (VOPFUNC)hfs_readonly_op }, #endif + { &vnop_getattrlistbulk_desc, (VOPFUNC)hfs_vnop_getattrlistbulk }, /* getattrlistbulk */ { NULL, (VOPFUNC)NULL } }; @@ -6948,12 +7140,13 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr}, { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr}, { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, - { &vnop_whiteout_desc, (VOPFUNC)hfs_vnop_whiteout}, #if NAMEDSTREAMS { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream }, { &vnop_makenamedstream_desc, (VOPFUNC)hfs_vnop_makenamedstream }, { &vnop_removenamedstream_desc, (VOPFUNC)hfs_vnop_removenamedstream }, #endif + { &vnop_getattrlistbulk_desc, (VOPFUNC)hfs_vnop_getattrlistbulk }, /* getattrlistbulk */ + { &vnop_mnomap_desc, (VOPFUNC)hfs_vnop_mnomap }, { NULL, (VOPFUNC)NULL } }; diff --git a/bsd/hfs/hfs_xattr.c b/bsd/hfs/hfs_xattr.c index 29145dd4a..3a989c132 100644 --- a/bsd/hfs/hfs_xattr.c +++ b/bsd/hfs/hfs_xattr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2013 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include "hfs.h" #include "hfs_cnode.h" @@ -72,10 +74,6 @@ struct listattr_callback_state { #define XATTR_EXTENDEDSECURITY_NAME "system.extendedsecurity" #define XATTR_XATTREXTENTS_NAME "system.xattrextents" -/* Faster version if we already know this is the data fork. */ -#define RSRC_FORK_EXISTS(CP) \ - (((CP)->c_attr.ca_blocks - (CP)->c_datafork->ff_data.cf_blocks) > 0) - static u_int32_t emptyfinfo[8] = {0}; static int hfs_zero_hidden_fields (struct cnode *cp, u_int8_t *finderinfo); @@ -133,7 +131,7 @@ hfs_vnop_getnamedstream(struct vnop_getnamedstream_args* ap) if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (error); } - if ((!RSRC_FORK_EXISTS(cp) + if ((!hfs_has_rsrc(cp) #if HFS_COMPRESSION || hide_rsrc #endif /* HFS_COMPRESSION */ @@ -141,7 +139,7 @@ hfs_vnop_getnamedstream(struct vnop_getnamedstream_args* ap) hfs_unlock(cp); return (ENOATTR); } - error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE, FALSE); + error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp); hfs_unlock(cp); return (error); @@ -184,7 +182,7 @@ hfs_vnop_makenamedstream(struct vnop_makenamedstream_args* ap) if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (error); } - error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE, FALSE); + error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp); hfs_unlock(cp); return (error); @@ -197,7 +195,7 @@ int hfs_vnop_removenamedstream(struct vnop_removenamedstream_args* ap) { vnode_t svp = ap->a_svp; - struct cnode *scp; + cnode_t *scp = VTOC(svp); int error = 0; /* @@ -207,26 +205,20 @@ hfs_vnop_removenamedstream(struct vnop_removenamedstream_args* ap) return (ENOATTR); } #if HFS_COMPRESSION - if (hfs_hides_rsrc(ap->a_context, VTOC(svp), 1)) { + if (hfs_hides_rsrc(ap->a_context, scp, 1)) { /* do nothing */ return 0; } #endif /* HFS_COMPRESSION */ - - scp = VTOC(svp); - /* Take truncate lock before taking cnode lock. */ hfs_lock_truncate(scp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); - if ((error = hfs_lock(scp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { - goto out; + if (VTOF(svp)->ff_size) { + // hfs_truncate will deal with the cnode lock + error = hfs_truncate(svp, 0, IO_NDELAY, 0, ap->a_context); } - if (VTOF(svp)->ff_size != 0) { - error = hfs_truncate(svp, 0, IO_NDELAY, 0, 0, ap->a_context); - } - hfs_unlock(scp); -out: hfs_unlock_truncate(scp, HFS_LOCK_DEFAULT); - return (error); + + return error; } #endif @@ -341,7 +333,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) } namelen = cp->c_desc.cd_namelen; - if ( !RSRC_FORK_EXISTS(cp)) { + if (!hfs_has_rsrc(cp)) { hfs_unlock(cp); return (ENOATTR); } @@ -350,7 +342,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) openunlinked = 1; } - result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE); + result = hfs_vgetrsrc(hfsmp, vp, &rvp); hfs_unlock(cp); if (result) { return (result); @@ -418,7 +410,23 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) return MacToVFSError(result); } +// Has same limitations as hfs_getxattr_internal below +int hfs_xattr_read(vnode_t vp, const char *name, void *data, size_t *size) +{ + char uio_buf[UIO_SIZEOF(1)]; + uio_t uio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, uio_buf, + sizeof(uio_buf)); + + uio_addiov(uio, CAST_USER_ADDR_T(data), *size); + struct vnop_getxattr_args args = { + .a_uio = uio, + .a_name = name, + .a_size = size + }; + + return hfs_getxattr_internal(VTOC(vp), &args, VTOHFS(vp), 0); +} /* * getxattr_internal @@ -766,6 +774,12 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) dateadded = hfs_get_dateadded (cp); if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16); + /* + * Grab generation counter directly from the cnode + * instead of calling hfs_get_gencount(), because + * for zero generation count values hfs_get_gencount() + * lies and bumps it up to one. + */ write_gen_counter = extinfo->write_gen_counter; document_id = extinfo->document_id; } else if (S_ISDIR(cp->c_attr.ca_mode)) { @@ -774,7 +788,11 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) document_id = extinfo->document_id; } - /* Zero out the date added field to ignore user's attempts to set it */ + /* + * Zero out the finder info's reserved fields like date added, + * generation counter, and document id to ignore user's attempts + * to set it + */ hfs_zero_hidden_fields(cp, finderinfo); if (bcmp(finderinfo_start, emptyfinfo, attrsize)) { @@ -792,16 +810,15 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) } /* - * Now restore the date added to the finderinfo to be written out. - * Advance to the 2nd half of the finderinfo to write out the date added - * into the buffer. + * Now restore the date added and other reserved fields to the finderinfo to + * be written out. Advance to the 2nd half of the finderinfo to write them + * out into the buffer. * * Make sure to endian swap the date added back into big endian. When we used * hfs_get_dateadded above to retrieve it, it swapped into local endianness * for us. But now that we're writing it out, put it back into big endian. */ finfo = &finderinfo[16]; - if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; extinfo->date_added = OSSwapHostToBigInt32(dateadded); @@ -859,7 +876,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) cp = VTOC(vp); namelen = cp->c_desc.cd_namelen; - if (RSRC_FORK_EXISTS(cp)) { + if (hfs_has_rsrc(cp)) { /* attr exists and "create" was specified. */ if (ap->a_options & XATTR_CREATE) { hfs_unlock(cp); @@ -881,7 +898,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) openunlinked = 1; } - result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE); + result = hfs_vgetrsrc(hfsmp, vp, &rvp); hfs_unlock(cp); if (result) { return (result); @@ -970,6 +987,16 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) return (result == btNotFound ? ENOATTR : MacToVFSError(result)); } +// Has same limitations as hfs_setxattr_internal below +int hfs_xattr_write(vnode_t vp, const char *name, const void *data, size_t size) +{ + struct vnop_setxattr_args args = { + .a_vp = vp, + .a_name = name, + }; + + return hfs_setxattr_internal(VTOC(vp), data, size, &args, VTOHFS(vp), 0); +} /* * hfs_setxattr_internal @@ -987,7 +1014,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) * 3. If data originates entirely in-kernel, use a null UIO, and ensure the size is less than * hfsmp->hfs_max_inline_attrsize bytes long. */ -int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize, +int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsize, struct vnop_setxattr_args *ap, struct hfsmount *hfsmp, u_int32_t fileid) { @@ -1339,17 +1366,21 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (result); } - if ( !RSRC_FORK_EXISTS(cp)) { + if (!hfs_has_rsrc(cp)) { hfs_unlock(cp); return (ENOATTR); } - result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE); + result = hfs_vgetrsrc(hfsmp, vp, &rvp); hfs_unlock(cp); if (result) { return (result); } hfs_lock_truncate(VTOC(rvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + + // Tell UBC now before we take the cnode lock and start the transaction + hfs_ubc_setsize(rvp, 0, false); + if ((result = hfs_lock(VTOC(rvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); vnode_put(rvp); @@ -1366,7 +1397,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) return (result); } - result = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 0, 0, ap->a_context); + result = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 0, ap->a_context); if (result == 0) { cp->c_touch_chgtime = TRUE; cp->c_flag |= C_MODIFIED; @@ -1764,11 +1795,11 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap) } } /* If Resource Fork is non-empty then export it's name. */ - if (S_ISREG(cp->c_mode) && RSRC_FORK_EXISTS(cp)) { + if (S_ISREG(cp->c_mode) && hfs_has_rsrc(cp)) { #if HFS_COMPRESSION if ((ap->a_options & XATTR_SHOWCOMPRESSION) || !compressed || - !hfs_hides_rsrc(ap->a_context, VTOC(vp), 1) /* 1 == don't take the cnode lock */ + !decmpfs_hides_rsrc(ap->a_context, VTOCMP(vp)) ) #endif /* HFS_COMPRESSION */ { @@ -1903,7 +1934,7 @@ listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *dat return (1); /* continue */ #if HFS_COMPRESSION - if (!state->showcompressed && hfs_hides_xattr(state->ctx, VTOC(state->vp), attrname, 1) ) /* 1 == don't take the cnode lock */ + if (!state->showcompressed && decmpfs_hides_xattr(state->ctx, VTOCMP(state->vp), attrname) ) return 1; /* continue */ #endif /* HFS_COMPRESSION */ @@ -2589,3 +2620,4 @@ count_extent_blocks(int maxblks, HFSPlusExtentRecord extents) } return (blocks); } + diff --git a/bsd/hfs/hfscommon/BTree/BTree.c b/bsd/hfs/hfscommon/BTree/BTree.c index be52f5900..99888cafd 100644 --- a/bsd/hfs/hfscommon/BTree/BTree.c +++ b/bsd/hfs/hfscommon/BTree/BTree.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008, 2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -722,7 +722,7 @@ OSStatus BTIterateRecord (FCB *filePtr, err = fsBTInvalidNodeErr; printf ("hfs: BTIterateRecord() found invalid btree node on volume %s\n", FCBTOVCB(filePtr)->vcbVN); - hfs_mark_volume_inconsistent(FCBTOVCB(filePtr)); + hfs_mark_inconsistent(FCBTOVCB(filePtr), HFS_INCONSISTENCY_DETECTED); goto ErrorExit; } @@ -890,7 +890,7 @@ CopyData: { err = fsBTInvalidNodeErr; printf ("hfs: BTIterateRecord() found invalid btree node on volume %s\n", FCBTOVCB(filePtr)->vcbVN); - hfs_mark_volume_inconsistent(FCBTOVCB(filePtr)); + hfs_mark_inconsistent(FCBTOVCB(filePtr), HFS_INCONSISTENCY_DETECTED); goto ErrorExit; } #endif @@ -1030,7 +1030,7 @@ BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator err = fsBTInvalidNodeErr; printf ("hfs: BTIterateRecords() found invalid btree node on volume %s\n", FCBTOVCB(filePtr)->vcbVN); - hfs_mark_volume_inconsistent(FCBTOVCB(filePtr)); + hfs_mark_inconsistent(FCBTOVCB(filePtr), HFS_INCONSISTENCY_DETECTED); goto ErrorExit; } diff --git a/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c b/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c index e4b86f2ec..6a76e1df7 100644 --- a/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c +++ b/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c @@ -294,7 +294,7 @@ FlushCatalog(ExtendedVCB *volume) { //--- check if catalog's fcb is dirty... - if ( 0 /*fcb->fcbFlags & fcbModifiedMask*/ ) + if ( (0) /*fcb->fcbFlags & fcbModifiedMask*/ ) { hfs_lock_mount (hfsmp); MarkVCBDirty(volume); // Mark the VCB dirty diff --git a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c index 6d6c228d9..018a8701e 100644 --- a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c +++ b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c @@ -97,7 +97,9 @@ Internal Routines: and was in the extents file, then delete the record instead. */ +#if CONFIG_HFS_STD static const int64_t kTwoGigabytes = 0x80000000LL; +#endif enum { @@ -1169,6 +1171,8 @@ OSErr ExtendFileC ( else { wantContig = true; } + + useMetaZone = flags & kEFMetadataMask; do { if (blockHint != 0) @@ -1185,10 +1189,12 @@ OSErr ExtendFileC ( if (availbytes <= 0) { err = dskFulErr; } else { - if (wantContig && (availbytes < bytesToAdd)) + if (wantContig && (availbytes < bytesToAdd)) { err = dskFulErr; + } else { uint32_t ba_flags = 0; + if (wantContig) { ba_flags |= HFS_ALLOC_FORCECONTIG; } @@ -1367,9 +1373,9 @@ Exit: #if CONFIG_HFS_STD HFS_Std_Overflow: -#endif err = fileBoundsErr; goto ErrorExit; +#endif } diff --git a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c index 0fa70d27b..b49cf439c 100644 --- a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c +++ b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -526,21 +526,46 @@ static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, ;________________________________________________________________________________ */ -static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) { +static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) +{ dk_unmap_t unmap; int error = 0; + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0); + } + if (list->extent_count > 0) { bzero(&unmap, sizeof(unmap)); unmap.extents = list->extents; unmap.extentsCount = list->extent_count; + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0); + } + +#if CONFIG_PROTECT + /* + * If we have not yet completed the first scan through the bitmap, then + * optionally inform the block driver below us that this is an initialization + * TRIM scan, if it can deal with this information. + */ + if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) { + unmap.options |= _DK_UNMAP_INITIALIZE; + } +#endif /* Issue a TRIM and flush them out */ error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t))); + bzero (&unmap, sizeof(unmap)); list->extent_count = 0; } + + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0); + } + return error; } @@ -727,6 +752,15 @@ CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks ; up-to-date as possible with which blocks are unmapped. ; Additionally build up the summary table as needed. ; + ; This function reads the bitmap in large block size + ; (up to 1MB) unlink the runtime which reads the bitmap + ; in 4K block size. So if this function is being called + ; after the volume is mounted and actively modified, the + ; caller needs to invalidate all of the existing buffers + ; associated with the bitmap vnode before calling this + ; function. If the buffers are not invalidated, it can + ; cause but_t collision and potential data corruption. + ; ; Input Arguments: ; hfsmp - The volume containing the allocation blocks. ;________________________________________________________________________________ @@ -739,6 +773,10 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) int error = 0; struct jnl_trim_list trimlist; + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0); + } + /* *struct jnl_trim_list { uint32_t allocated_count; @@ -807,6 +845,10 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) } #endif + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0); + } + return error; } @@ -2190,7 +2232,7 @@ static OSErr BlockAllocateKnown( if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) { printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN); - hfs_mark_volume_inconsistent(vcb); + hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED); *actualStartBlock = 0; *actualNumBlocks = 0; err = EIO; @@ -2601,7 +2643,7 @@ OSErr BlockMarkFreeInternal( } printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN); - hfs_mark_volume_inconsistent(vcb); + hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED); err = EIO; goto Exit; } @@ -2788,7 +2830,7 @@ Corruption: panic("hfs: BlockMarkFreeInternal: blocks not allocated!"); #else printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN); - hfs_mark_volume_inconsistent(vcb); + hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED); err = EIO; goto Exit; #endif diff --git a/bsd/hfs/rangelist.c b/bsd/hfs/rangelist.c index 74ced2e58..0a1b412b6 100644 --- a/bsd/hfs/rangelist.c +++ b/bsd/hfs/rangelist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2006-2008 Apple Inc. All rights reserved. + * Copyright (c) 2001-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -97,20 +97,18 @@ rl_add(off_t start, off_t end, struct rl_head *rangelist) switch (ovcase) { case RL_NOOVERLAP: /* 0: no overlap */ /* - * If the list was empty 'prev' is undisturbed and 'overlap' == NULL; - * if the search hit a non-overlapping entry PAST the start of the - * new range, 'prev' points to ITS predecessor, and 'overlap' points - * to that entry: - */ + * overlap points to the entry we should insert before, or + * if NULL, we should insert at the end. + */ MALLOC(range, struct rl_entry *, sizeof(*range), M_TEMP, M_WAITOK); range->rl_start = start; range->rl_end = end; /* Link in the new range: */ if (overlap) { - TAILQ_INSERT_AFTER(rangelist, overlap, range, rl_link); + TAILQ_INSERT_BEFORE(overlap, range, rl_link); } else { - TAILQ_INSERT_HEAD(rangelist, range, rl_link); + TAILQ_INSERT_TAIL(rangelist, range, rl_link); } /* Check to see if any ranges can be combined (possibly including the immediately @@ -314,13 +312,12 @@ rl_scan_from(struct rl_head *rangelist, return RL_NOOVERLAP; }; - range = TAILQ_NEXT(range, rl_link); /* Check the other entries in the list: */ - if (range == NULL) { + range = TAILQ_NEXT(range, rl_link); + *overlap = range; + if (range == NULL) return RL_NOOVERLAP; - } - *overlap = range; continue; } @@ -361,8 +358,6 @@ rl_scan_from(struct rl_head *rangelist, panic("hfs: rl_scan_from: unhandled overlap condition?!"); #endif } - - return RL_NOOVERLAP; } @@ -418,6 +413,14 @@ rl_collapse_neighbors(struct rl_head *rangelist, struct rl_entry *range) rl_collapse_backwards(rangelist, range); } +void rl_remove_all(struct rl_head *rangelist) +{ + struct rl_entry *r, *nextr; + TAILQ_FOREACH_SAFE(r, rangelist, rl_link, nextr) + FREE(r, M_TEMP); + TAILQ_INIT(rangelist); +} + #else /* not HFS - temp workaround until 4277828 is fixed */ /* stubs for exported routines that aren't present when we build kernel without HFS */ @@ -448,4 +451,8 @@ int rl_scan(__unused void *rangelist, __unused off_t start, __unused off_t end, return(0); } +void rl_remove_all(struct rl_head *rangelist) +{ +} + #endif /* HFS */ diff --git a/bsd/hfs/rangelist.h b/bsd/hfs/rangelist.h index 7cfa0e8d9..0f66d34c9 100644 --- a/bsd/hfs/rangelist.h +++ b/bsd/hfs/rangelist.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2001-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -58,6 +58,7 @@ __BEGIN_DECLS void rl_init(struct rl_head *rangelist); void rl_add(off_t start, off_t end, struct rl_head *rangelist); void rl_remove(off_t start, off_t end, struct rl_head *rangelist); +void rl_remove_all(struct rl_head *rangelist); enum rl_overlaptype rl_scan(struct rl_head *rangelist, off_t start, off_t end, diff --git a/bsd/i386/Makefile b/bsd/i386/Makefile index 5dd87eecc..7433ece89 100644 --- a/bsd/i386/Makefile +++ b/bsd/i386/Makefile @@ -9,14 +9,14 @@ include $(MakeInc_def) DATAFILES = \ endian.h fasttrap_isa.h param.h \ - profile.h setjmp.h signal.h limits.h _limits.h \ - types.h vmparam.h _structs.h _types.h _param.h \ + profile.h signal.h limits.h _limits.h \ + types.h vmparam.h _types.h _param.h \ _mcontext.h KERNELFILES = \ endian.h param.h \ - profile.h setjmp.h signal.h limits.h _limits.h \ - types.h vmparam.h _structs.h _types.h _param.h \ + profile.h signal.h limits.h _limits.h \ + types.h vmparam.h _types.h _param.h \ _mcontext.h diff --git a/bsd/i386/_mcontext.h b/bsd/i386/_mcontext.h index 917e80d43..0abb4c87a 100644 --- a/bsd/i386/_mcontext.h +++ b/bsd/i386/_mcontext.h @@ -120,11 +120,4 @@ typedef _STRUCT_MCONTEXT32 *mcontext_t; #endif #endif /* _MCONTEXT_T */ - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef I386_MCONTEXT_SIZE -#define I386_MCONTEXT_SIZE sizeof(struct mcontext) -#endif /* I386_MCONTEXT_SIZE */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - #endif /* __I386_MCONTEXT_H_ */ diff --git a/bsd/i386/dis_tables.h b/bsd/i386/dis_tables.h index a0db708fe..5367b5277 100644 --- a/bsd/i386/dis_tables.h +++ b/bsd/i386/dis_tables.h @@ -41,15 +41,9 @@ extern "C" { #endif -#if !defined(__APPLE__) -#include -#include -#include -#else #include #include #include -#endif /* __APPLE__ */ /* * values for cpu mode diff --git a/bsd/i386/fasttrap_isa.h b/bsd/i386/fasttrap_isa.h index d51c7ddfb..a71101a2d 100644 --- a/bsd/i386/fasttrap_isa.h +++ b/bsd/i386/fasttrap_isa.h @@ -48,10 +48,8 @@ typedef uint8_t fasttrap_instr_t; typedef struct fasttrap_machtp { uint8_t ftmt_instr[FASTTRAP_MAX_INSTR_SIZE]; /* orig. instr. */ uint8_t ftmt_size; /* instruction size */ -#if __sol64 || defined(__APPLE__) uint8_t ftmt_ripmode; /* %rip-relative handling mode */ uint8_t ftmt_modrm; /* saved modrm byte */ -#endif uint8_t ftmt_type; /* emulation type */ uint8_t ftmt_code; /* branch condition */ uint8_t ftmt_base; /* branch base */ @@ -62,10 +60,8 @@ typedef struct fasttrap_machtp { } fasttrap_machtp_t; #define ftt_instr ftt_mtp.ftmt_instr -#if __sol64 || defined(__APPLE__) #define ftt_ripmode ftt_mtp.ftmt_ripmode #define ftt_modrm ftt_mtp.ftmt_modrm -#endif #define ftt_size ftt_mtp.ftmt_size #define ftt_type ftt_mtp.ftmt_type #define ftt_code ftt_mtp.ftmt_code diff --git a/bsd/i386/setjmp.h b/bsd/i386/setjmp.h deleted file mode 100644 index ad23a339b..000000000 --- a/bsd/i386/setjmp.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1992 NeXT Computer, Inc. All rights reserved. - * - * File: setjmp.h - * - * Declaration of setjmp routines and data structures. - */ -#ifndef _BSD_I386_SETJMP_H -#define _BSD_I386_SETJMP_H - -#include - -#if defined(__x86_64__) -/* - * _JBLEN is number of ints required to save the following: - * rflags, rip, rbp, rsp, rbx, r12, r13, r14, r15... these are 8 bytes each - * mxcsr, fp control word, sigmask... these are 4 bytes each - * add 16 ints for future expansion needs... - */ -#define _JBLEN ((9 * 2) + 3 + 16) -typedef int jmp_buf[_JBLEN]; -typedef int sigjmp_buf[_JBLEN + 1]; - -#else - -/* - * _JBLEN is number of ints required to save the following: - * eax, ebx, ecx, edx, edi, esi, ebp, esp, ss, eflags, eip, - * cs, de, es, fs, gs == 16 ints - * onstack, mask = 2 ints - */ - -#if !defined(KERNEL) - -#define _JBLEN (18) -typedef int jmp_buf[_JBLEN]; -typedef int sigjmp_buf[_JBLEN + 1]; -#endif - -#endif - -__BEGIN_DECLS -int setjmp(jmp_buf); -void longjmp(jmp_buf, int); - -#ifndef _ANSI_SOURCE -int _setjmp(jmp_buf); -void _longjmp(jmp_buf, int); -int sigsetjmp(sigjmp_buf, int); -void siglongjmp(sigjmp_buf, int); -#endif /* _ANSI_SOURCE */ - -#if !defined(_ANSI_SOURCE) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) -void longjmperror(void); -#endif /* neither ANSI nor POSIX */ -__END_DECLS -#endif /* !_BSD_I386_SETJMP_H */ diff --git a/bsd/i386/types.h b/bsd/i386/types.h index 301114f86..eec91fb3e 100644 --- a/bsd/i386/types.h +++ b/bsd/i386/types.h @@ -145,7 +145,5 @@ typedef __int64_t user32_off_t __attribute__((aligned(4))); /* This defines the size of syscall arguments after copying into the kernel: */ typedef u_int64_t syscall_arg_t; -#include - #endif /* __ASSEMBLER__ */ #endif /* _MACHTYPES_H_ */ diff --git a/bsd/i386/vmparam.h b/bsd/i386/vmparam.h index b6389b47d..6ea9e94b9 100644 --- a/bsd/i386/vmparam.h +++ b/bsd/i386/vmparam.h @@ -31,9 +31,7 @@ #include -/* Rosetta dependency on this address */ #define USRSTACK VM_USRSTACK32 - #define USRSTACK64 VM_USRSTACK64 diff --git a/bsd/kern/Makefile b/bsd/kern/Makefile index 2e14acc85..32c7ab729 100644 --- a/bsd/kern/Makefile +++ b/bsd/kern/Makefile @@ -17,7 +17,7 @@ $(INSTALL_SHARE_MISC_FILES): $(DSTROOT)/$(INSTALL_SHARE_MISC_DIR)/% : % @echo INSTALL $(@F) $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ -do_build_install_primary:: $(INSTALL_SHARE_MISC_FILES) +do_textfiles_install:: $(INSTALL_SHARE_MISC_FILES) include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index ace8c58ba..105922628 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -155,11 +155,15 @@ #include /* for tty_init() */ #include /* proc_uuid_policy_init() */ #include /* flow_divert_init() */ +#include /* for cfil_init() */ +#include /* for necp_init() */ +#include /* for pkt_mnglr_init() */ #include /* for utun_register_control() */ #include /* for ipsec_register_control() */ #include /* for net_str_id_init() */ #include /* for netsrc_init() */ #include /* for nstat_init() */ +#include /* for tcp_cc_init() */ #include /* for assert() */ #include /* for init_system_override() */ @@ -248,6 +252,8 @@ extern void file_lock_init(void); extern void kmeminit(void); extern void bsd_bufferinit(void); extern void throttle_init(void); +extern void macx_init(void); +extern void acct_init(void); extern int serverperfmode; extern int ncl; @@ -282,14 +288,15 @@ void bsd_utaskbootstrap(void); static void parse_bsd_args(void); extern task_t bsd_init_task; extern char init_task_failure_data[]; +#if CONFIG_DEV_KMEM +extern void dev_kmem_init(void); +#endif extern void time_zone_slock_init(void); extern void select_wait_queue_init(void); static void process_name(const char *, proc_t); static void setconf(void); -funnel_t *kernel_flock; - #if SYSV_SHM extern void sysv_shm_lock_init(void); #endif @@ -300,12 +307,6 @@ extern void sysv_sem_lock_init(void); extern void sysv_msg_lock_init(void); #endif -#if !defined(SECURE_KERNEL) -/* kmem access not enabled by default; can be changed with boot-args */ -/* We don't need to keep this symbol around in RELEASE kernel */ -int setup_kmem = 0; -#endif - #if CONFIG_MACF #if defined (__i386__) || defined (__x86_64__) /* MACF policy_check configuration flags; see policy_check.c for details */ @@ -347,11 +348,11 @@ process_name(const char *s, proc_t p) /* To allow these values to be patched, they're globals here */ #include -struct rlimit vm_initial_limit_stack = { DFLSSIZ, MAXSSIZ - PAGE_SIZE }; +struct rlimit vm_initial_limit_stack = { DFLSSIZ, MAXSSIZ - PAGE_MAX_SIZE }; struct rlimit vm_initial_limit_data = { DFLDSIZ, MAXDSIZ }; struct rlimit vm_initial_limit_core = { DFLCSIZ, MAXCSIZ }; -extern thread_t cloneproc(task_t, proc_t, int, int); +extern thread_t cloneproc(task_t, coalition_t, proc_t, int, int); extern int (*mountroot)(void); lck_grp_t * proc_lck_grp; @@ -384,8 +385,6 @@ void (*unmountroot_pre_hook)(void); * used like any other. */ -extern void IOServicePublishResource(const char *, boolean_t); - void bsd_init(void) { @@ -403,11 +402,6 @@ bsd_init(void) throttle_init(); - kernel_flock = funnel_alloc(KERNEL_FUNNEL); - if (kernel_flock == (funnel_t *)0 ) { - panic("bsd_init: Failed to allocate kernel funnel"); - } - printf(copyright); bsd_init_kprintf("calling kmeminit\n"); @@ -416,6 +410,11 @@ bsd_init(void) bsd_init_kprintf("calling parse_bsd_args\n"); parse_bsd_args(); +#if CONFIG_DEV_KMEM + bsd_init_kprintf("calling dev_kmem_init\n"); + dev_kmem_init(); +#endif + /* Initialize kauth subsystem before instancing the first credential */ bsd_init_kprintf("calling kauth_init\n"); kauth_init(); @@ -599,7 +598,6 @@ bsd_init(void) #if CONFIG_MACF mac_cred_label_associate_kernel(kernproc->p_ucred); - mac_task_label_update_cred (kernproc->p_ucred, (struct task *) kernproc->task); #endif /* Create the file descriptor table. */ @@ -627,7 +625,7 @@ bsd_init(void) kernproc->p_sigacts = &sigacts0; /* - * Charge root for two processes: init and mach_init. + * Charge root for one process: launchd. */ bsd_init_kprintf("calling chgproccnt\n"); (void)chgproccnt(0, 1); @@ -797,6 +795,12 @@ bsd_init(void) memorystatus_init(); #endif /* CONFIG_MEMORYSTATUS */ + bsd_init_kprintf("calling macx_init\n"); + macx_init(); + + bsd_init_kprintf("calling acct_init\n"); + acct_init(); + #ifdef GPROF /* Initialize kernel profiling. */ kmstartup(); @@ -840,6 +844,19 @@ bsd_init(void) bsd_init_kprintf("calling net_init_run\n"); net_init_run(); +#if CONTENT_FILTER + cfil_init(); +#endif + +#if PACKET_MANGLER + pkt_mnglr_init(); +#endif + +#if NECP + /* Initialize Network Extension Control Policies */ + necp_init(); +#endif + /* register user tunnel kernel control handler */ utun_register_control(); #if IPSEC @@ -847,16 +864,11 @@ bsd_init(void) #endif /* IPSEC */ netsrc_init(); nstat_init(); + tcp_cc_init(); #endif /* NETWORKING */ bsd_init_kprintf("calling vnode_pager_bootstrap\n"); vnode_pager_bootstrap(); -#if 0 - /* XXX Hack for early debug stop */ - printf("\nabout to sleep for 10 seconds\n"); - IOSleep( 10 * 1000 ); - /* Debugger("hello"); */ -#endif bsd_init_kprintf("calling inittodr\n"); inittodr(0); @@ -978,10 +990,8 @@ bsd_init(void) #endif bsd_init_kprintf("done\n"); - } -/* Called with kernel funnel held */ void bsdinit_task(void) { @@ -1007,7 +1017,6 @@ bsdinit_task(void) #if CONFIG_MACF mac_cred_label_associate_user(p->p_ucred); - mac_task_label_update_cred (p->p_ucred, (struct task *) p->task); #endif load_init_program(p); lock_trace = 1; @@ -1041,11 +1050,6 @@ setconf(void) u_int32_t flags; kern_return_t err; - /* - * calls into IOKit can generate networking registrations - * which needs to be under network funnel. Right thing to do - * here is to drop the funnel alltogether and regrab it afterwards - */ err = IOFindBSDRoot(rootdevice, sizeof(rootdevice), &rootdev, &flags); if( err) { printf("setconf: IOFindBSDRoot returned an error (%d);" @@ -1079,7 +1083,7 @@ bsd_utaskbootstrap(void) * Clone the bootstrap process from the kernel process, without * inheriting either task characteristics or memory from the kernel; */ - thread = cloneproc(TASK_NULL, kernproc, FALSE, TRUE); + thread = cloneproc(TASK_NULL, COALITION_NULL, kernproc, FALSE, TRUE); /* Hold the reference as it will be dropped during shutdown */ initproc = proc_find(1); @@ -1115,6 +1119,7 @@ parse_bsd_args(void) if (PE_parse_boot_argn("-x", namep, sizeof (namep))) /* safe boot */ boothowto |= RB_SAFEBOOT; + /* disable vnode_cache_is_authorized() by setting vnode_cache_defeat */ if (PE_parse_boot_argn("-vnode_cache_defeat", namep, sizeof (namep))) bootarg_vnode_cache_defeat = 1; @@ -1129,9 +1134,6 @@ parse_bsd_args(void) sizeof (max_nbuf_headers))) { customnbuf = 1; } -#if !defined(SECURE_KERNEL) - PE_parse_boot_argn("kmem", &setup_kmem, sizeof (setup_kmem)); -#endif #if CONFIG_MACF #if defined (__i386__) || defined (__x86_64__) @@ -1146,7 +1148,6 @@ parse_bsd_args(void) if (PE_parse_boot_argn("-novfscache", namep, sizeof(namep))) { nc_disabled = 1; } - } void diff --git a/bsd/kern/bsd_stubs.c b/bsd/kern/bsd_stubs.c index bc4537d35..648d6e305 100644 --- a/bsd/kern/bsd_stubs.c +++ b/bsd/kern/bsd_stubs.c @@ -49,7 +49,6 @@ extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int); /* XXX most of these just exist to export; there's no good header for them*/ void pcb_synch(void); -void tbeproc(void *); TAILQ_HEAD(,devsw_lock) devsw_locks; lck_mtx_t devsw_lock_list_mtx; @@ -317,16 +316,6 @@ cdevsw_setkqueueok(int index, struct cdevsw *csw, int use_offset) #include /* for PE_parse_boot_arg */ -void -tbeproc(void *procp) -{ - struct proc *p = procp; - - if (p) - OSBitOrAtomic(P_TBE, &p->p_flag); - return; -} - /* * Copy the "hostname" variable into a caller-provided buffer * Returns: 0 for success, ENAMETOOLONG for insufficient buffer space. diff --git a/bsd/kern/decmpfs.c b/bsd/kern/decmpfs.c index ec784aebd..ce43a4785 100644 --- a/bsd/kern/decmpfs.c +++ b/bsd/kern/decmpfs.c @@ -1040,6 +1040,10 @@ static kern_return_t commit_upl(upl_t upl, upl_offset_t pl_offset, size_t uplSize, int flags, int abort) { kern_return_t kr = 0; + +#if CONFIG_IOSCHED + upl_unmark_decmp(upl); +#endif /* CONFIG_IOSCHED */ /* commit the upl pages */ if (abort) { @@ -1056,6 +1060,7 @@ commit_upl(upl_t upl, upl_offset_t pl_offset, size_t uplSize, int flags, int abo return kr; } + errno_t decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmpfs_cnode *cp) { @@ -1098,16 +1103,25 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp err = ENOTSUP; goto out; } - + +#if CONFIG_IOSCHED + /* Mark the UPL as the requesting UPL for decompression */ + upl_mark_decmp(pl); +#endif /* CONFIG_IOSCHED */ + /* map the upl so we can fetch into it */ kern_return_t kr = ubc_upl_map(pl, (vm_offset_t*)&data); if ((kr != KERN_SUCCESS) || (data == NULL)) { + err = ENOSPC; +#if CONFIG_IOSCHED + upl_unmark_decmp(pl); +#endif /* CONFIG_IOSCHED */ goto out; } uplPos = f_offset; uplSize = size; - + /* clip the size to the size of the file */ if ((uint64_t)uplPos + uplSize > cachedSize) { /* truncate the read to the size of the file */ @@ -1159,7 +1173,11 @@ decompress: if (did_read < total_size) { memset((char*)vec.buf + did_read, 0, total_size - did_read); } - + +#if CONFIG_IOSCHED + upl_unmark_decmp(pl); +#endif /* CONFIG_IOSCHED */ + kr = ubc_upl_unmap(pl); data = NULL; /* make sure to set data to NULL so we don't try to unmap again below */ if (kr != KERN_SUCCESS) ErrorLog("ubc_upl_unmap error %d\n", (int)kr); @@ -1299,8 +1317,8 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c /* clip to max upl size */ curUplSize = uplRemaining; - if (curUplSize > MAX_UPL_SIZE * PAGE_SIZE) { - curUplSize = MAX_UPL_SIZE * PAGE_SIZE; + if (curUplSize > MAX_UPL_SIZE_BYTES) { + curUplSize = MAX_UPL_SIZE_BYTES; } /* create the upl */ @@ -1311,10 +1329,18 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c goto out; } VerboseLog("curUplPos %lld curUplSize %lld\n", (uint64_t)curUplPos, (uint64_t)curUplSize); - + +#if CONFIG_IOSCHED + /* Mark the UPL as the requesting UPL for decompression */ + upl_mark_decmp(upl); +#endif /* CONFIG_IOSCHED */ + /* map the upl */ kr = ubc_upl_map(upl, (vm_offset_t*)&data); if (kr != KERN_SUCCESS) { + + commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1); + ErrorLog("ubc_upl_map error %d\n", (int)kr); err = EINVAL; goto out; @@ -1322,6 +1348,9 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c /* make sure the map succeeded */ if (!data) { + + commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1); + ErrorLog("ubc_upl_map mapped null\n"); err = EINVAL; goto out; @@ -1395,11 +1424,12 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c } else { ErrorLog("ubc_upl_unmap error %d\n", (int)kr); } - + uplRemaining -= curUplSize; } out: + if (hdr) FREE(hdr, M_TEMP); if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 0); if (err) {/* something went wrong */ diff --git a/bsd/kern/kdebug.c b/bsd/kern/kdebug.c index 71f1ae7c2..a9dad0b4e 100644 --- a/bsd/kern/kdebug.c +++ b/bsd/kern/kdebug.c @@ -32,6 +32,7 @@ #include #include #include +#include #define HZ 100 #include @@ -117,7 +118,6 @@ int cpu_number(void); /* XXX include path broken */ int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t); void kdbg_control_chud(int, void *); int kdbg_control(int *, u_int, user_addr_t, size_t *); -int kdbg_getentropy (user_addr_t, size_t *, int); int kdbg_readcpumap(user_addr_t, size_t *); int kdbg_readcurcpumap(user_addr_t, size_t *); int kdbg_readthrmap(user_addr_t, size_t *, vnode_t, vfs_context_t); @@ -145,18 +145,15 @@ extern void IOSleep(int); /* trace enable status */ unsigned int kdebug_enable = 0; -/* track timestamps for security server's entropy needs */ -uint64_t * kd_entropy_buffer = 0; -unsigned int kd_entropy_bufsize = 0; -unsigned int kd_entropy_count = 0; -unsigned int kd_entropy_indx = 0; -vm_offset_t kd_entropy_buftomem = 0; - -#define MAX_ENTROPY_COUNT (128 * 1024) +/* A static buffer to record events prior to the start of regular logging */ +#define KD_EARLY_BUFFER_MAX 64 +static kd_buf kd_early_buffer[KD_EARLY_BUFFER_MAX]; +static int kd_early_index = 0; +static boolean_t kd_early_overflow = FALSE; #define SLOW_NOLOG 0x01 #define SLOW_CHECKS 0x02 -#define SLOW_ENTROPY 0x04 +#define SLOW_ENTROPY 0x04 /* Obsolescent */ #define SLOW_CHUD 0x08 #define EVENTS_PER_STORAGE_UNIT 2048 @@ -197,7 +194,6 @@ int n_storage_units = 0; int n_storage_buffers = 0; int n_storage_threshold = 0; int kds_waiter = 0; -int kde_waiter = 0; #pragma pack(0) struct kd_bufinfo { @@ -337,6 +333,29 @@ __private_extern__ void stackshot_lock_init( void ); static uint8_t *type_filter_bitmap; +/* + * This allows kperf to swap out the global state pid when kperf ownership is + * passed from one process to another. It checks the old global state pid so + * that kperf can't accidentally steal control of trace when a non-kperf trace user has + * control of trace. + */ +void +kdbg_swap_global_state_pid(pid_t old_pid, pid_t new_pid); + +void +kdbg_swap_global_state_pid(pid_t old_pid, pid_t new_pid) +{ + if (!(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT)) + return; + + lck_mtx_lock(kd_trace_mtx_sysctl); + + if (old_pid == global_state_pid) + global_state_pid = new_pid; + + lck_mtx_unlock(kd_trace_mtx_sysctl); +} + static uint32_t kdbg_cpu_count(boolean_t early_trace) { @@ -356,62 +375,6 @@ kdbg_cpu_count(boolean_t early_trace) } #if MACH_ASSERT -static boolean_t -kdbg_iop_list_is_valid(kd_iop_t* iop) -{ - if (iop) { - /* Is list sorted by cpu_id? */ - kd_iop_t* temp = iop; - do { - assert(!temp->next || temp->next->cpu_id == temp->cpu_id - 1); - assert(temp->next || (temp->cpu_id == kdbg_cpu_count(FALSE) || temp->cpu_id == kdbg_cpu_count(TRUE))); - } while ((temp = temp->next)); - - /* Does each entry have a function and a name? */ - temp = iop; - do { - assert(temp->callback.func); - assert(strlen(temp->callback.iop_name) < sizeof(temp->callback.iop_name)); - } while ((temp = temp->next)); - } - - return TRUE; -} - -static boolean_t -kdbg_iop_list_contains_cpu_id(kd_iop_t* list, uint32_t cpu_id) -{ - while (list) { - if (list->cpu_id == cpu_id) - return TRUE; - list = list->next; - } - - return FALSE; -} - -/* - * This is a temporary workaround for - * - * DO NOT CALL IN RELEASE BUILD, LEAKS ADDRESS INFORMATION! - */ -static boolean_t -kdbg_iop_list_check_for_timestamp_rollback(kd_iop_t* list, uint32_t cpu_id, uint64_t timestamp) -{ - while (list) { - if (list->cpu_id == cpu_id) { - if (list->last_timestamp > timestamp) { - kprintf("%s is sending trace events that have gone backwards in time. Run the following command: \"symbols -2 -lookup 0x%p\" and file a radar against the matching kext.\n", list->callback.iop_name, (void*)list->callback.func); - } - /* Unconditional set mitigates syslog spam */ - list->last_timestamp = timestamp; - return TRUE; - } - list = list->next; - } - - return FALSE; -} #endif /* MACH_ASSERT */ static void @@ -524,7 +487,6 @@ create_buffers(boolean_t early_trace) */ kd_ctrl_page.kdebug_iops = kd_iops; - assert(kdbg_iop_list_is_valid(kd_ctrl_page.kdebug_iops)); /* * If the list is valid, it is sorted, newest -> oldest. Each iop entry @@ -906,9 +868,6 @@ kernel_debug_enter( } record_event: - assert(kdbg_iop_list_contains_cpu_id(kd_ctrl_page.kdebug_iops, coreid)); - /* Remove when is closed. */ - assert(kdbg_iop_list_check_for_timestamp_rollback(kd_ctrl_page.kdebug_iops, coreid, timestamp)); disable_preemption(); @@ -1000,8 +959,7 @@ kernel_debug_internal( uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, - uintptr_t arg5, - int entropy_flag); + uintptr_t arg5); __attribute__((always_inline)) void kernel_debug_internal( @@ -1010,8 +968,7 @@ kernel_debug_internal( uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, - uintptr_t arg5, - int entropy_flag) + uintptr_t arg5) { struct proc *curproc; uint64_t now; @@ -1040,30 +997,6 @@ kernel_debug_internal( chudhook(debugid, arg1, arg2, arg3, arg4, arg5); ml_set_interrupts_enabled(s); } - if ((kdebug_enable & KDEBUG_ENABLE_ENTROPY) && entropy_flag) { - - now = mach_absolute_time(); - - s = ml_set_interrupts_enabled(FALSE); - lck_spin_lock(kds_spin_lock); - - if (kdebug_enable & KDEBUG_ENABLE_ENTROPY) { - - if (kd_entropy_indx < kd_entropy_count) { - kd_entropy_buffer[kd_entropy_indx] = now; - kd_entropy_indx++; - } - if (kd_entropy_indx == kd_entropy_count) { - /* - * Disable entropy collection - */ - kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY; - kd_ctrl_page.kdebug_slowcheck &= ~SLOW_ENTROPY; - } - } - lck_spin_unlock(kds_spin_lock); - ml_set_interrupts_enabled(s); - } if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT))) goto out1; @@ -1168,8 +1101,7 @@ retry_q: out: enable_preemption(); out1: - if ((kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) || - (kde_waiter && kd_entropy_indx >= kd_entropy_count)) { + if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) { uint32_t etype; uint32_t stype; @@ -1180,7 +1112,6 @@ out1: stype == BSC_SysCall || stype == MACH_SysCall) { boolean_t need_kds_wakeup = FALSE; - boolean_t need_kde_wakeup = FALSE; /* * try to take the lock here to synchronize with the @@ -1200,18 +1131,12 @@ out1: kds_waiter = 0; need_kds_wakeup = TRUE; } - if (kde_waiter && kd_entropy_indx >= kd_entropy_count) { - kde_waiter = 0; - need_kde_wakeup = TRUE; - } lck_spin_unlock(kdw_spin_lock); } ml_set_interrupts_enabled(s); if (need_kds_wakeup == TRUE) wakeup(&kds_waiter); - if (need_kde_wakeup == TRUE) - wakeup(&kde_waiter); } } } @@ -1225,7 +1150,7 @@ kernel_debug( uintptr_t arg4, __unused uintptr_t arg5) { - kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, (uintptr_t)thread_tid(current_thread()), 1); + kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, (uintptr_t)thread_tid(current_thread())); } void @@ -1237,7 +1162,96 @@ kernel_debug1( uintptr_t arg4, uintptr_t arg5) { - kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5, 1); + kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5); +} + +void +kernel_debug_string(const char *message) +{ + uintptr_t arg[4] = {0, 0, 0, 0}; + + /* Stuff the message string in the args and log it. */ + strncpy((char *)arg, message, MIN(sizeof(arg), strlen(message))); + KERNEL_DEBUG_EARLY( + (TRACEDBG_CODE(DBG_TRACE_INFO, 4)) | DBG_FUNC_NONE, + arg[0], arg[1], arg[2], arg[3]); +} + +extern int master_cpu; /* MACH_KERNEL_PRIVATE */ +/* + * Used prior to start_kern_tracing() being called. + * Log temporarily into a static buffer. + */ +void +kernel_debug_early( + uint32_t debugid, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4) +{ + /* If tracing is already initialized, use it */ + if (nkdbufs) + KERNEL_DEBUG_CONSTANT(debugid, arg1, arg2, arg3, arg4, 0); + + /* Do nothing if the buffer is full or we're not on the boot cpu */ + kd_early_overflow = kd_early_index >= KD_EARLY_BUFFER_MAX; + if (kd_early_overflow || + cpu_number() != master_cpu) + return; + + kd_early_buffer[kd_early_index].debugid = debugid; + kd_early_buffer[kd_early_index].timestamp = mach_absolute_time(); + kd_early_buffer[kd_early_index].arg1 = arg1; + kd_early_buffer[kd_early_index].arg2 = arg2; + kd_early_buffer[kd_early_index].arg3 = arg3; + kd_early_buffer[kd_early_index].arg4 = arg4; + kd_early_buffer[kd_early_index].arg5 = 0; + kd_early_index++; +} + +/* + * Transfer the contents of the temporary buffer into the trace buffers. + * Precede that by logging the rebase time (offset) - the TSC-based time (in ns) + * when mach_absolute_time is set to 0. + */ +static void +kernel_debug_early_end(void) +{ + int i; + + if (cpu_number() != master_cpu) + panic("kernel_debug_early_end() not call on boot processor"); + + /* Fake sentinel marking the start of kernel time relative to TSC */ + kernel_debug_enter( + 0, + (TRACEDBG_CODE(DBG_TRACE_INFO, 1)) | DBG_FUNC_NONE, + 0, + (uint32_t)(tsc_rebase_abs_time >> 32), + (uint32_t)tsc_rebase_abs_time, + 0, + 0, + 0); + for (i = 0; i < kd_early_index; i++) { + kernel_debug_enter( + 0, + kd_early_buffer[i].debugid, + kd_early_buffer[i].timestamp, + kd_early_buffer[i].arg1, + kd_early_buffer[i].arg2, + kd_early_buffer[i].arg3, + kd_early_buffer[i].arg4, + 0); + } + + /* Cut events-lost event on overflow */ + if (kd_early_overflow) + KERNEL_DEBUG_CONSTANT( + TRACEDBG_CODE(DBG_TRACE_INFO, 2), 0, 0, 0, 0, 0); + + /* This trace marks the start of kernel tracing */ + kernel_debug_string("early trace done"); } /* @@ -1247,9 +1261,9 @@ int kdebug_trace(__unused struct proc *p, struct kdebug_trace_args *uap, __unused int32_t *retval) { if ( __probable(kdebug_enable == 0) ) - return(EINVAL); - - kernel_debug_internal(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, (uintptr_t)thread_tid(current_thread()), 0); + return(0); + + kernel_debug_internal(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, (uintptr_t)thread_tid(current_thread())); return(0); } @@ -2076,115 +2090,6 @@ write_error: return(ret); } -int -kdbg_getentropy (user_addr_t buffer, size_t *number, int ms_timeout) -{ - int avail = *number; - int ret = 0; - int s; - u_int64_t abstime; - u_int64_t ns; - int wait_result = THREAD_AWAKENED; - - - if (kd_entropy_buffer) - return(EBUSY); - - if (ms_timeout < 0) - return(EINVAL); - - kd_entropy_count = avail/sizeof(uint64_t); - - if (kd_entropy_count > MAX_ENTROPY_COUNT || kd_entropy_count == 0) { - /* - * Enforce maximum entropy entries - */ - return(EINVAL); - } - kd_entropy_bufsize = kd_entropy_count * sizeof(uint64_t); - - /* - * allocate entropy buffer - */ - if (kmem_alloc(kernel_map, &kd_entropy_buftomem, (vm_size_t)kd_entropy_bufsize) == KERN_SUCCESS) { - kd_entropy_buffer = (uint64_t *) kd_entropy_buftomem; - } else { - kd_entropy_buffer = (uint64_t *) 0; - kd_entropy_count = 0; - - return (ENOMEM); - } - kd_entropy_indx = 0; - - KERNEL_DEBUG_CONSTANT(0xbbbbf000 | DBG_FUNC_START, ms_timeout, kd_entropy_count, 0, 0, 0); - - /* - * Enable entropy sampling - */ - kdbg_set_flags(SLOW_ENTROPY, KDEBUG_ENABLE_ENTROPY, TRUE); - - if (ms_timeout) { - ns = (u_int64_t)ms_timeout * (u_int64_t)(1000 * 1000); - nanoseconds_to_absolutetime(ns, &abstime ); - clock_absolutetime_interval_to_deadline( abstime, &abstime ); - } else - abstime = 0; - - s = ml_set_interrupts_enabled(FALSE); - lck_spin_lock(kdw_spin_lock); - - while (wait_result == THREAD_AWAKENED && kd_entropy_indx < kd_entropy_count) { - - kde_waiter = 1; - - if (abstime) { - /* - * wait for the specified timeout or - * until we've hit our sample limit - */ - wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kde_waiter, THREAD_ABORTSAFE, abstime); - } else { - /* - * wait until we've hit our sample limit - */ - wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kde_waiter, THREAD_ABORTSAFE); - } - kde_waiter = 0; - } - lck_spin_unlock(kdw_spin_lock); - ml_set_interrupts_enabled(s); - - /* - * Disable entropy sampling - */ - kdbg_set_flags(SLOW_ENTROPY, KDEBUG_ENABLE_ENTROPY, FALSE); - - KERNEL_DEBUG_CONSTANT(0xbbbbf000 | DBG_FUNC_END, ms_timeout, kd_entropy_indx, 0, 0, 0); - - *number = 0; - ret = 0; - - if (kd_entropy_indx > 0) { - /* - * copyout the buffer - */ - if (copyout(kd_entropy_buffer, buffer, kd_entropy_indx * sizeof(uint64_t))) - ret = EINVAL; - else - *number = kd_entropy_indx * sizeof(uint64_t); - } - /* - * Always cleanup - */ - kd_entropy_count = 0; - kd_entropy_indx = 0; - kd_entropy_buftomem = 0; - kmem_free(kernel_map, (vm_offset_t)kd_entropy_buffer, kd_entropy_bufsize); - kd_entropy_buffer = (uint64_t *) 0; - - return(ret); -} - static int kdbg_set_nkdbufs(unsigned int value) @@ -2331,28 +2236,26 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) ret = EINVAL; } goto out; - break; - case KERN_KDGETENTROPY: - if (kd_entropy_buffer) - ret = EBUSY; - else - ret = kdbg_getentropy(where, sizep, value); + case KERN_KDGETENTROPY: { + /* Obsolescent - just fake with a random buffer */ + char *buffer = (char *) kalloc(size); + read_frandom((void *) buffer, size); + ret = copyout(buffer, where, size); + kfree(buffer, size); goto out; - break; + } case KERN_KDENABLE_BG_TRACE: bg_nkdbufs = kdbg_set_nkdbufs(value); kdlog_bg_trace = TRUE; ret = kdbg_enable_bg_trace(); goto out; - break; case KERN_KDDISABLE_BG_TRACE: kdlog_bg_trace = FALSE; kdbg_disable_bg_trace(); goto out; - break; } if ((curproc = current_proc()) != NULL) @@ -2523,7 +2426,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) proc_fdunlock(p); if ((ret = vnode_getwithref(vp)) == 0) { - + RAW_file_offset = fp->f_fglob->fg_offset; if (name[0] == KERN_KDWRITETR) { number = nkdbufs * sizeof(kd_buf); @@ -2536,6 +2439,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) number = kd_mapcount * sizeof(kd_threadmap); kdbg_readthrmap(0, &number, vp, &context); } + fp->f_fglob->fg_offset = RAW_file_offset; vnode_put(vp); } fp_drop(p, fd, fp, 0); @@ -3017,6 +2921,23 @@ stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_ *retval = 0; return (0); } + + if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_ENABLE) { + error = telemetry_enable_window(); + + if (error != KERN_SUCCESS) { + /* We are probably out of memory */ + *retval = -1; + return ENOMEM; + } + + *retval = 0; + return (0); + } else if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_DISABLE) { + telemetry_disable_window(); + *retval = 0; + return (0); + } #endif *retval = -1; @@ -3047,6 +2968,23 @@ stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_ goto error_exit; } + if (flags & STACKSHOT_GET_WINDOWED_MICROSTACKSHOTS) { + + if (tracebuf_size > SANE_TRACEBUF_SIZE) { + error = EINVAL; + goto error_exit; + } + + bytesTraced = tracebuf_size; + error = telemetry_gather_windowed(tracebuf, &bytesTraced); + if (error == KERN_NO_SPACE) { + error = ENOSPC; + } + + *retval = (int)bytesTraced; + goto error_exit; + } + if (flags & STACKSHOT_GET_BOOT_PROFILE) { if (tracebuf_size > SANE_BOOTPROFILE_TRACEBUF_SIZE) { @@ -3123,13 +3061,16 @@ error_exit: } void -start_kern_tracing(unsigned int new_nkdbufs, boolean_t need_map) { +start_kern_tracing(unsigned int new_nkdbufs, boolean_t need_map) +{ if (!new_nkdbufs) return; nkdbufs = kdbg_set_nkdbufs(new_nkdbufs); kdbg_lock_init(); + kernel_debug_string("start_kern_tracing"); + if (0 == kdbg_reinit(TRUE)) { if (need_map == TRUE) { @@ -3139,22 +3080,43 @@ start_kern_tracing(unsigned int new_nkdbufs, boolean_t need_map) { disable_wrap(&old1, &old2); } + + /* Hold off interrupts until the early traces are cut */ + boolean_t s = ml_set_interrupts_enabled(FALSE); + kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE); -#if defined(__i386__) || defined(__x86_64__) - uint64_t now = mach_absolute_time(); + /* + * Transfer all very early events from the static buffer + * into the real buffers. + */ + kernel_debug_early_end(); + + ml_set_interrupts_enabled(s); - KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 1)) | DBG_FUNC_NONE, - (uint32_t)(tsc_rebase_abs_time >> 32), (uint32_t)tsc_rebase_abs_time, - (uint32_t)(now >> 32), (uint32_t)now, - 0); -#endif printf("kernel tracing started\n"); } else { printf("error from kdbg_reinit,kernel tracing not started\n"); } } +void +start_kern_tracing_with_typefilter(unsigned int new_nkdbufs, + boolean_t need_map, + unsigned int typefilter) +{ + /* startup tracing */ + start_kern_tracing(new_nkdbufs, need_map); + + /* check that tracing was actually enabled */ + if (!(kdebug_enable & KDEBUG_ENABLE_TRACE)) + return; + + /* setup the typefiltering */ + if (0 == kdbg_enable_typefilter()) + setbit(type_filter_bitmap, typefilter & (CSC_MASK >> CSC_OFFSET)); +} + void kdbg_dump_trace_to_file(const char *filename) { diff --git a/bsd/kern/kern_acct.c b/bsd/kern/kern_acct.c index 516de08dc..aec90c9e0 100644 --- a/bsd/kern/kern_acct.c +++ b/bsd/kern/kern_acct.c @@ -116,7 +116,7 @@ */ comp_t encode_comp_t(uint32_t, uint32_t); void acctwatch(void *); -void acctwatch_funnel(void *); +void acct_init(void); /* * Accounting vnode pointer, and suspended accounting vnode pointer. States @@ -139,6 +139,21 @@ int acctsuspend = 2; /* stop accounting when < 2% free space left */ int acctresume = 4; /* resume when free space risen to > 4% */ int acctchkfreq = 15; /* frequency (in seconds) to check space */ + +static lck_grp_t *acct_subsys_lck_grp; +static lck_mtx_t *acct_subsys_mutex; + +#define ACCT_SUBSYS_LOCK() lck_mtx_lock(acct_subsys_mutex) +#define ACCT_SUBSYS_UNLOCK() lck_mtx_unlock(acct_subsys_mutex) + +void +acct_init(void) +{ + acct_subsys_lck_grp = lck_grp_alloc_init("acct", NULL); + acct_subsys_mutex = lck_mtx_alloc_init(acct_subsys_lck_grp, NULL); +} + + /* * Accounting system call. Written based on the specification and * previous implementation done by Mark Tinguely. @@ -191,21 +206,26 @@ acct(proc_t p, struct acct_args *uap, __unused int *retval) * If accounting was previously enabled, kill the old space-watcher, * close the file, and (if no new file was specified, leave). */ + ACCT_SUBSYS_LOCK(); if (acctp != NULLVP || suspend_acctp != NULLVP) { - untimeout(acctwatch_funnel, NULL); + untimeout(acctwatch, NULL); error = vn_close((acctp != NULLVP ? acctp : suspend_acctp), FWRITE, vfs_context_current()); acctp = suspend_acctp = NULLVP; } - if (uap->path == USER_ADDR_NULL) + if (uap->path == USER_ADDR_NULL) { + ACCT_SUBSYS_UNLOCK(); return (error); + } /* * Save the new accounting file vnode, and schedule the new * free space watcher. */ acctp = nd.ni_vp; + ACCT_SUBSYS_UNLOCK(); + acctwatch(NULL); return (error); } @@ -230,9 +250,12 @@ acct_process(proc_t p) struct tty *tp; /* If accounting isn't enabled, don't bother */ + ACCT_SUBSYS_LOCK(); vp = acctp; - if (vp == NULLVP) + if (vp == NULLVP) { + ACCT_SUBSYS_UNLOCK(); return (0); + } /* * Get process accounting information. @@ -301,6 +324,8 @@ acct_process(proc_t p) } kauth_cred_unref(&safecred); + ACCT_SUBSYS_UNLOCK(); + return (error); } @@ -342,16 +367,6 @@ encode_comp_t(uint32_t s, uint32_t us) return (exp); } -/* XXX The acctwatch() thread need to be protected by a mutex instead. */ -void -acctwatch_funnel(void *a) -{ - thread_funnel_set(kernel_flock, TRUE); - acctwatch(a); - thread_funnel_set(kernel_flock, FALSE); -} - - /* * Periodically check the file system to see if accounting * should be turned on or off. Beware the case where the vnode @@ -369,6 +384,7 @@ acctwatch(__unused void *a) VFSATTR_WANTED(&va, f_blocks); VFSATTR_WANTED(&va, f_bavail); + ACCT_SUBSYS_LOCK(); if (suspend_acctp != NULLVP) { /* * Resuming accounting when accounting is suspended, and the @@ -378,6 +394,7 @@ acctwatch(__unused void *a) if (suspend_acctp->v_type == VBAD) { (void) vn_close(suspend_acctp, FWRITE, vfs_context_kernel()); suspend_acctp = NULLVP; + ACCT_SUBSYS_UNLOCK(); return; } (void)vfs_getattr(suspend_acctp->v_mount, &va, ctx); @@ -395,6 +412,7 @@ acctwatch(__unused void *a) if (acctp->v_type == VBAD) { (void) vn_close(acctp, FWRITE, vfs_context_kernel()); acctp = NULLVP; + ACCT_SUBSYS_UNLOCK(); return; } (void)vfs_getattr(acctp->v_mount, &va, ctx); @@ -404,8 +422,10 @@ acctwatch(__unused void *a) log(LOG_NOTICE, "Accounting suspended\n"); } } else { + ACCT_SUBSYS_UNLOCK(); return; - } - - timeout(acctwatch_funnel, NULL, acctchkfreq * hz); + } + ACCT_SUBSYS_UNLOCK(); + + timeout(acctwatch, NULL, acctchkfreq * hz); } diff --git a/bsd/kern/kern_aio.c b/bsd/kern/kern_aio.c index 1627d4964..2513122e6 100644 --- a/bsd/kern/kern_aio.c +++ b/bsd/kern/kern_aio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2008 Apple Inc. All rights reserved. + * Copyright (c) 2003-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -2415,8 +2415,11 @@ do_aio_write( aio_workq_entry *entryp ) entryp->aiocb.aio_offset, flags, &entryp->returnval); - - fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); + + if (entryp->returnval) + fp_drop_written(entryp->procp, entryp->aiocb.aio_fildes, fp); + else + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); return( error ); diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c index 5a4cfd5f7..16a66ae82 100644 --- a/bsd/kern/kern_control.c +++ b/bsd/kern/kern_control.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 1999-2012 Apple Inc. All rights reserved. + * Copyright (c) 1999-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -49,42 +49,53 @@ #include #include #include +#include #include #include #include +#ifndef ROUNDUP64 +#define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t)) +#endif + +#ifndef ADVANCE64 +#define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n)) +#endif + /* * Definitions and vars for we support */ -#define CTL_SENDSIZE (2 * 1024) /* default buffer size */ -#define CTL_RECVSIZE (8 * 1024) /* default buffer size */ +#define CTL_SENDSIZE (2 * 1024) /* default buffer size */ +#define CTL_RECVSIZE (8 * 1024) /* default buffer size */ /* * Definitions and vars for we support */ -static u_int32_t ctl_maxunit = 65536; +static u_int32_t ctl_maxunit = 65536; static lck_grp_attr_t *ctl_lck_grp_attr = 0; -static lck_attr_t *ctl_lck_attr = 0; -static lck_grp_t *ctl_lck_grp = 0; -static lck_mtx_t *ctl_mtx; - +static lck_attr_t *ctl_lck_attr = 0; +static lck_grp_t *ctl_lck_grp = 0; +static lck_mtx_t *ctl_mtx; /* all the controllers are chained */ TAILQ_HEAD(kctl_list, kctl) ctl_head; + static int ctl_attach(struct socket *, int, struct proc *); static int ctl_detach(struct socket *); static int ctl_sofreelastref(struct socket *so); static int ctl_connect(struct socket *, struct sockaddr *, struct proc *); static int ctl_disconnect(struct socket *); static int ctl_ioctl(struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp, struct proc *p); + struct ifnet *ifp, struct proc *p); static int ctl_send(struct socket *, int, struct mbuf *, - struct sockaddr *, struct mbuf *, struct proc *); + struct sockaddr *, struct mbuf *, struct proc *); +static int ctl_send_list(struct socket *, int, struct mbuf *, + struct sockaddr *, struct mbuf *, struct proc *); static int ctl_ctloutput(struct socket *, struct sockopt *); static int ctl_peeraddr(struct socket *so, struct sockaddr **nam); static int ctl_usr_rcvd(struct socket *so, int flags); @@ -109,35 +120,70 @@ static struct pr_usrreqs ctl_usrreqs = { .pru_peeraddr = ctl_peeraddr, .pru_rcvd = ctl_usr_rcvd, .pru_send = ctl_send, + .pru_send_list = ctl_send_list, .pru_sosend = sosend, + .pru_sosend_list = sosend_list, .pru_soreceive = soreceive, + .pru_soreceive_list = soreceive_list, }; static struct protosw kctlsw[] = { { - .pr_type = SOCK_DGRAM, - .pr_protocol = SYSPROTO_CONTROL, - .pr_flags = PR_ATOMIC|PR_CONNREQUIRED|PR_PCBLOCK|PR_WANTRCVD, - .pr_ctloutput = ctl_ctloutput, - .pr_usrreqs = &ctl_usrreqs, - .pr_lock = ctl_lock, - .pr_unlock = ctl_unlock, - .pr_getlock = ctl_getlock, + .pr_type = SOCK_DGRAM, + .pr_protocol = SYSPROTO_CONTROL, + .pr_flags = PR_ATOMIC|PR_CONNREQUIRED|PR_PCBLOCK|PR_WANTRCVD, + .pr_ctloutput = ctl_ctloutput, + .pr_usrreqs = &ctl_usrreqs, + .pr_lock = ctl_lock, + .pr_unlock = ctl_unlock, + .pr_getlock = ctl_getlock, }, { - .pr_type = SOCK_STREAM, - .pr_protocol = SYSPROTO_CONTROL, - .pr_flags = PR_CONNREQUIRED|PR_PCBLOCK|PR_WANTRCVD, - .pr_ctloutput = ctl_ctloutput, - .pr_usrreqs = &ctl_usrreqs, - .pr_lock = ctl_lock, - .pr_unlock = ctl_unlock, - .pr_getlock = ctl_getlock, + .pr_type = SOCK_STREAM, + .pr_protocol = SYSPROTO_CONTROL, + .pr_flags = PR_CONNREQUIRED|PR_PCBLOCK|PR_WANTRCVD, + .pr_ctloutput = ctl_ctloutput, + .pr_usrreqs = &ctl_usrreqs, + .pr_lock = ctl_lock, + .pr_unlock = ctl_unlock, + .pr_getlock = ctl_getlock, } }; +__private_extern__ int kctl_reg_list SYSCTL_HANDLER_ARGS; +__private_extern__ int kctl_pcblist SYSCTL_HANDLER_ARGS; +__private_extern__ int kctl_getstat SYSCTL_HANDLER_ARGS; + static int kctl_proto_count = (sizeof (kctlsw) / sizeof (struct protosw)); +SYSCTL_NODE(_net_systm, OID_AUTO, kctl, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Kernel control family"); + +struct kctlstat kctlstat; +SYSCTL_PROC(_net_systm_kctl, OID_AUTO, stats, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, + kctl_getstat, "S,kctlstat", ""); + +SYSCTL_PROC(_net_systm_kctl, OID_AUTO, reg_list, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, + kctl_reg_list, "S,xkctl_reg", ""); + +SYSCTL_PROC(_net_systm_kctl, OID_AUTO, pcblist, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, + kctl_pcblist, "S,xkctlpcb", ""); + +u_int32_t ctl_autorcvbuf_max = 256 * 1024; +SYSCTL_INT(_net_systm_kctl, OID_AUTO, autorcvbufmax, + CTLFLAG_RW | CTLFLAG_LOCKED, &ctl_autorcvbuf_max, 0, ""); + +u_int32_t ctl_autorcvbuf_high = 0; +SYSCTL_INT(_net_systm_kctl, OID_AUTO, autorcvbufhigh, + CTLFLAG_RD | CTLFLAG_LOCKED, &ctl_autorcvbuf_high, 0, ""); + +u_int32_t ctl_debug = 0; +SYSCTL_INT(_net_systm_kctl, OID_AUTO, debug, + CTLFLAG_RW | CTLFLAG_LOCKED, &ctl_debug, 0, ""); + /* * Install the protosw's for the Kernel Control manager. */ @@ -190,16 +236,16 @@ kcb_delete(struct ctl_cb *kcb) } } - /* * Kernel Controller user-request functions - * attach function must exist and succeed - * detach not necessary + * attach function must exist and succeed + * detach not necessary * we need a pcb for the per socket mutex */ static int -ctl_attach(__unused struct socket *so, __unused int proto, __unused struct proc *p) -{ +ctl_attach(struct socket *so, int proto, struct proc *p) +{ +#pragma unused(proto, p) int error = 0; struct ctl_cb *kcb = 0; @@ -209,7 +255,7 @@ ctl_attach(__unused struct socket *so, __unused int proto, __unused struct proc goto quit; } bzero(kcb, sizeof(struct ctl_cb)); - + kcb->mtx = lck_mtx_alloc_init(ctl_lck_grp, ctl_lck_attr); if (kcb->mtx == NULL) { error = ENOMEM; @@ -217,139 +263,150 @@ ctl_attach(__unused struct socket *so, __unused int proto, __unused struct proc } kcb->so = so; so->so_pcb = (caddr_t)kcb; - + quit: if (error != 0) { kcb_delete(kcb); kcb = 0; } - return error; + return (error); } static int ctl_sofreelastref(struct socket *so) { - struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; - - so->so_pcb = 0; - - if (kcb != 0) { - struct kctl *kctl; - if ((kctl = kcb->kctl) != 0) { - lck_mtx_lock(ctl_mtx); - TAILQ_REMOVE(&kctl->kcb_head, kcb, next); - lck_mtx_unlock(ctl_mtx); - } - kcb_delete(kcb); - } - sofreelastref(so, 1); - return 0; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + + so->so_pcb = 0; + + if (kcb != 0) { + struct kctl *kctl; + if ((kctl = kcb->kctl) != 0) { + lck_mtx_lock(ctl_mtx); + TAILQ_REMOVE(&kctl->kcb_head, kcb, next); + kctlstat.kcs_pcbcount--; + kctlstat.kcs_gencnt++; + lck_mtx_unlock(ctl_mtx); + } + kcb_delete(kcb); + } + sofreelastref(so, 1); + return (0); } static int ctl_detach(struct socket *so) { - struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; - - if (kcb == 0) - return 0; - - soisdisconnected(so); - so->so_flags |= SOF_PCBCLEARING; - return 0; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + + if (kcb == 0) + return (0); + + soisdisconnected(so); + so->so_flags |= SOF_PCBCLEARING; + return (0); } static int -ctl_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) -{ - struct kctl *kctl; - int error = 0; - struct sockaddr_ctl sa; - struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; - struct ctl_cb *kcb_next = NULL; - - if (kcb == 0) - panic("ctl_connect so_pcb null\n"); - - if (nam->sa_len != sizeof(struct sockaddr_ctl)) - return(EINVAL); - - bcopy(nam, &sa, sizeof(struct sockaddr_ctl)); - - lck_mtx_lock(ctl_mtx); - kctl = ctl_find_by_id_unit(sa.sc_id, sa.sc_unit); - if (kctl == NULL) { - lck_mtx_unlock(ctl_mtx); - return ENOENT; - } - - if (((kctl->flags & CTL_FLAG_REG_SOCK_STREAM) && (so->so_type != SOCK_STREAM)) || - (!(kctl->flags & CTL_FLAG_REG_SOCK_STREAM) && (so->so_type != SOCK_DGRAM))) { - lck_mtx_unlock(ctl_mtx); - return EPROTOTYPE; - } - - if (kctl->flags & CTL_FLAG_PRIVILEGED) { - if (p == 0) { - lck_mtx_unlock(ctl_mtx); - return(EINVAL); - } - if (kauth_cred_issuser(kauth_cred_get()) == 0) { - lck_mtx_unlock(ctl_mtx); - return EPERM; - } - } +ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) +{ +#pragma unused(p) + struct kctl *kctl; + int error = 0; + struct sockaddr_ctl sa; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct ctl_cb *kcb_next = NULL; + + if (kcb == 0) + panic("ctl_connect so_pcb null\n"); + + if (nam->sa_len != sizeof(struct sockaddr_ctl)) + return (EINVAL); + + bcopy(nam, &sa, sizeof(struct sockaddr_ctl)); + + lck_mtx_lock(ctl_mtx); + kctl = ctl_find_by_id_unit(sa.sc_id, sa.sc_unit); + if (kctl == NULL) { + lck_mtx_unlock(ctl_mtx); + return (ENOENT); + } + + if (((kctl->flags & CTL_FLAG_REG_SOCK_STREAM) && + (so->so_type != SOCK_STREAM)) || + (!(kctl->flags & CTL_FLAG_REG_SOCK_STREAM) && + (so->so_type != SOCK_DGRAM))) { + lck_mtx_unlock(ctl_mtx); + return (EPROTOTYPE); + } + + if (kctl->flags & CTL_FLAG_PRIVILEGED) { + if (p == 0) { + lck_mtx_unlock(ctl_mtx); + return (EINVAL); + } + if (kauth_cred_issuser(kauth_cred_get()) == 0) { + lck_mtx_unlock(ctl_mtx); + return (EPERM); + } + } if ((kctl->flags & CTL_FLAG_REG_ID_UNIT) || sa.sc_unit != 0) { if (kcb_find(kctl, sa.sc_unit) != NULL) { lck_mtx_unlock(ctl_mtx); - return EBUSY; + return (EBUSY); } } else { - /* Find an unused ID, assumes control IDs are listed in order */ - u_int32_t unit = 1; - - TAILQ_FOREACH(kcb_next, &kctl->kcb_head, next) { - if (kcb_next->unit > unit) { - /* Found a gap, lets fill it in */ - break; - } - unit = kcb_next->unit + 1; - if (unit == ctl_maxunit) - break; - } - + /* Find an unused ID, assumes control IDs are in order */ + u_int32_t unit = 1; + + TAILQ_FOREACH(kcb_next, &kctl->kcb_head, next) { + if (kcb_next->unit > unit) { + /* Found a gap, lets fill it in */ + break; + } + unit = kcb_next->unit + 1; + if (unit == ctl_maxunit) + break; + } + if (unit == ctl_maxunit) { lck_mtx_unlock(ctl_mtx); - return EBUSY; + return (EBUSY); } - + sa.sc_unit = unit; - } + } kcb->unit = sa.sc_unit; - kcb->kctl = kctl; - if (kcb_next != NULL) { - TAILQ_INSERT_BEFORE(kcb_next, kcb, next); - } - else { + kcb->kctl = kctl; + if (kcb_next != NULL) { + TAILQ_INSERT_BEFORE(kcb_next, kcb, next); + } else { TAILQ_INSERT_TAIL(&kctl->kcb_head, kcb, next); } - lck_mtx_unlock(ctl_mtx); + kctlstat.kcs_pcbcount++; + kctlstat.kcs_gencnt++; + kctlstat.kcs_connections++; + lck_mtx_unlock(ctl_mtx); - error = soreserve(so, kctl->sendbufsize, kctl->recvbufsize); - if (error) + error = soreserve(so, kctl->sendbufsize, kctl->recvbufsize); + if (error) { + printf("%s - soreserve(%llx, %u, %u) error %d\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(so), + kctl->sendbufsize, kctl->recvbufsize, error); goto done; - soisconnecting(so); - + } + soisconnecting(so); + socket_unlock(so, 0); - error = (*kctl->connect)(kctl, &sa, &kcb->userdata); + error = (*kctl->connect)(kctl, &sa, &kcb->userdata); socket_lock(so, 0); - if (error) + if (error) goto end; - - soisconnected(so); + + soisconnected(so); end: if (error && kctl->disconnect) { @@ -358,45 +415,50 @@ end: socket_lock(so, 0); } done: - if (error) { - soisdisconnected(so); - lck_mtx_lock(ctl_mtx); - kcb->kctl = 0; - kcb->unit = 0; - TAILQ_REMOVE(&kctl->kcb_head, kcb, next); - lck_mtx_unlock(ctl_mtx); - } - return error; + if (error) { + soisdisconnected(so); + lck_mtx_lock(ctl_mtx); + kcb->kctl = 0; + kcb->unit = 0; + TAILQ_REMOVE(&kctl->kcb_head, kcb, next); + kctlstat.kcs_pcbcount--; + kctlstat.kcs_gencnt++; + kctlstat.kcs_conn_fail++; + lck_mtx_unlock(ctl_mtx); + } + return (error); } static int ctl_disconnect(struct socket *so) { - struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; - - if ((kcb = (struct ctl_cb *)so->so_pcb)) { - struct kctl *kctl = kcb->kctl; - - if (kctl && kctl->disconnect) { - socket_unlock(so, 0); - (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata); - socket_lock(so, 0); - } - - soisdisconnected(so); - + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + + if ((kcb = (struct ctl_cb *)so->so_pcb)) { + struct kctl *kctl = kcb->kctl; + + if (kctl && kctl->disconnect) { + socket_unlock(so, 0); + (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata); + socket_lock(so, 0); + } + + soisdisconnected(so); + socket_unlock(so, 0); - lck_mtx_lock(ctl_mtx); - kcb->kctl = 0; - kcb->unit = 0; - while (kcb->usecount != 0) { - msleep(&kcb->usecount, ctl_mtx, 0, "kcb->usecount", 0); - } - TAILQ_REMOVE(&kctl->kcb_head, kcb, next); - lck_mtx_unlock(ctl_mtx); + lck_mtx_lock(ctl_mtx); + kcb->kctl = 0; + kcb->unit = 0; + while (kcb->usecount != 0) { + msleep(&kcb->usecount, ctl_mtx, 0, "kcb->usecount", 0); + } + TAILQ_REMOVE(&kctl->kcb_head, kcb, next); + kctlstat.kcs_pcbcount--; + kctlstat.kcs_gencnt++; + lck_mtx_unlock(ctl_mtx); socket_lock(so, 0); - } - return 0; + } + return (0); } static int @@ -405,23 +467,54 @@ ctl_peeraddr(struct socket *so, struct sockaddr **nam) struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; struct kctl *kctl; struct sockaddr_ctl sc; - + if (kcb == NULL) /* sanity check */ - return(ENOTCONN); - + return (ENOTCONN); + if ((kctl = kcb->kctl) == NULL) - return(EINVAL); - + return (EINVAL); + bzero(&sc, sizeof(struct sockaddr_ctl)); sc.sc_len = sizeof(struct sockaddr_ctl); sc.sc_family = AF_SYSTEM; sc.ss_sysaddr = AF_SYS_CONTROL; sc.sc_id = kctl->id; sc.sc_unit = kcb->unit; - + *nam = dup_sockaddr((struct sockaddr *)&sc, 1); - - return 0; + + return (0); +} + +static void +ctl_sbrcv_trim(struct socket *so) +{ + struct sockbuf *sb = &so->so_rcv; + + if (sb->sb_hiwat > sb->sb_idealsize) { + u_int32_t diff; + int32_t trim; + + /* + * The difference between the ideal size and the + * current size is the upper bound of the trimage + */ + diff = sb->sb_hiwat - sb->sb_idealsize; + /* + * We cannot trim below the outstanding data + */ + trim = sb->sb_hiwat - sb->sb_cc; + + trim = imin(trim, (int32_t)diff); + + if (trim > 0) { + sbreserve(sb, (sb->sb_hiwat - trim)); + + if (ctl_debug) + printf("%s - shrunk to %d\n", + __func__, sb->sb_hiwat); + } + } } static int @@ -431,7 +524,7 @@ ctl_usr_rcvd(struct socket *so, int flags) struct kctl *kctl; if ((kctl = kcb->kctl) == NULL) { - return EINVAL; + return (EINVAL); } if (kctl->rcvd) { @@ -440,27 +533,32 @@ ctl_usr_rcvd(struct socket *so, int flags) socket_lock(so, 0); } - return 0; + ctl_sbrcv_trim(so); + + return (0); } static int ctl_send(struct socket *so, int flags, struct mbuf *m, - __unused struct sockaddr *addr, struct mbuf *control, - __unused struct proc *p) + struct sockaddr *addr, struct mbuf *control, + struct proc *p) { - int error = 0; +#pragma unused(addr, p) + int error = 0; struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; - struct kctl *kctl; - - if (control) m_freem(control); - + struct kctl *kctl; + + if (control) + m_freem(control); + if (kcb == NULL) /* sanity check */ error = ENOTCONN; - + if (error == 0 && (kctl = kcb->kctl) == NULL) error = EINVAL; - + if (error == 0 && kctl->send) { + so_tc_update_stats(m, so, m_get_service_class(m)); socket_unlock(so, 0); error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, flags); socket_lock(so, 0); @@ -469,71 +567,330 @@ ctl_send(struct socket *so, int flags, struct mbuf *m, if (error == 0) error = ENOTSUP; } - return error; + if (error != 0) + OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_fail); + return (error); +} + +static int +ctl_send_list(struct socket *so, int flags, struct mbuf *m, + __unused struct sockaddr *addr, struct mbuf *control, + __unused struct proc *p) +{ + int error = 0; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct kctl *kctl; + + if (control) + m_freem_list(control); + + if (kcb == NULL) /* sanity check */ + error = ENOTCONN; + + if (error == 0 && (kctl = kcb->kctl) == NULL) + error = EINVAL; + + if (error == 0 && kctl->send_list) { + struct mbuf *nxt; + + for (nxt = m; nxt != NULL; nxt = nxt->m_nextpkt) + so_tc_update_stats(nxt, so, m_get_service_class(nxt)); + + socket_unlock(so, 0); + error = (*kctl->send_list)(kctl, kcb->unit, kcb->userdata, m, + flags); + socket_lock(so, 0); + } else if (error == 0 && kctl->send) { + while (m != NULL && error == 0) { + struct mbuf *nextpkt = m->m_nextpkt; + + m->m_nextpkt = NULL; + so_tc_update_stats(m, so, m_get_service_class(m)); + socket_unlock(so, 0); + error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, + flags); + socket_lock(so, 0); + m = nextpkt; + } + if (m != NULL) + m_freem_list(m); + } else { + m_freem_list(m); + if (error == 0) + error = ENOTSUP; + } + if (error != 0) + OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_list_fail); + return (error); +} + +static errno_t +ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize, + u_int32_t flags) +{ + struct sockbuf *sb = &so->so_rcv; + u_int32_t space = sbspace(sb); + errno_t error; + + if ((kctl->flags & CTL_FLAG_REG_CRIT) == 0) { + if ((u_int32_t) space >= datasize) + error = 0; + else + error = ENOBUFS; + } else if ((flags & CTL_DATA_CRIT) == 0) { + /* + * Reserve 25% for critical messages + */ + if (space < (sb->sb_hiwat >> 2) || + space < datasize) + error = ENOBUFS; + else + error = 0; + } else { + u_int32_t autorcvbuf_max; + + /* + * Allow overcommit of 25% + */ + autorcvbuf_max = min(sb->sb_idealsize + (sb->sb_idealsize >> 2), + ctl_autorcvbuf_max); + + if ((u_int32_t) space >= datasize) { + error = 0; + } else if (tcp_cansbgrow(sb) && + sb->sb_hiwat < autorcvbuf_max) { + /* + * Grow with a little bit of leeway + */ + u_int32_t grow = datasize - space + MSIZE; + + if (sbreserve(sb, + min((sb->sb_hiwat + grow), autorcvbuf_max)) == 1) { + + if (sb->sb_hiwat > ctl_autorcvbuf_high) + ctl_autorcvbuf_high = sb->sb_hiwat; + + if (ctl_debug) + printf("%s - grown to %d\n", + __func__, sb->sb_hiwat); + error = 0; + } else { + error = ENOBUFS; + } + } else { + error = ENOBUFS; + } + } + return (error); } errno_t ctl_enqueuembuf(void *kctlref, u_int32_t unit, struct mbuf *m, u_int32_t flags) { struct socket *so; - errno_t error = 0; - struct kctl *kctl = (struct kctl *)kctlref; - + errno_t error = 0; + struct kctl *kctl = (struct kctl *)kctlref; + int len = m->m_pkthdr.len; + if (kctl == NULL) - return EINVAL; - + return (EINVAL); + so = kcb_find_socket(kctl, unit); - + if (so == NULL) - return EINVAL; - - if (sbspace(&so->so_rcv) < m->m_pkthdr.len) { + return (EINVAL); + + if (ctl_rcvbspace(kctl, so, len, flags) != 0) { error = ENOBUFS; + OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock); goto bye; } if ((flags & CTL_DATA_EOR)) m->m_flags |= M_EOR; - if (sbappend(&so->so_rcv, m) && (flags & CTL_DATA_NOWAKEUP) == 0) - sorwakeup(so); + + so_recv_data_stat(so, m, 0); + if (sbappend(&so->so_rcv, m) != 0) { + if ((flags & CTL_DATA_NOWAKEUP) == 0) + sorwakeup(so); + } else { + error = ENOBUFS; + OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock); + } bye: + if (ctl_debug && error != 0 && (flags & CTL_DATA_CRIT)) + printf("%s - crit data err %d len %d hiwat %d cc: %d\n", + __func__, error, len, + so->so_rcv.sb_hiwat, so->so_rcv.sb_cc); + socket_unlock(so, 1); - return error; + if (error != 0) + OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fail); + + return (error); +} + +/* + * Compute space occupied by mbuf like sbappendrecord + */ +static int +m_space(struct mbuf *m) +{ + int space = 0; + struct mbuf *nxt; + + for (nxt = m; nxt != NULL; nxt = nxt->m_next) + space += nxt->m_len; + + return (space); +} + +errno_t +ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list, + u_int32_t flags, struct mbuf **m_remain) +{ + struct socket *so = NULL; + errno_t error = 0; + struct kctl *kctl = (struct kctl *)kctlref; + struct mbuf *m, *nextpkt; + int needwakeup = 0; + int len; + + /* + * Need to point the beginning of the list in case of early exit + */ + m = m_list; + + if (kctl == NULL) { + error = EINVAL; + goto done; + } + if (kctl->flags & CTL_FLAG_REG_SOCK_STREAM) { + error = EOPNOTSUPP; + goto done; + } + if (flags & CTL_DATA_EOR) { + error = EINVAL; + goto done; + } + /* + * kcb_find_socket takes the socket lock with a reference + */ + so = kcb_find_socket(kctl, unit); + if (so == NULL) { + error = EINVAL; + goto done; + } + + for (m = m_list; m != NULL; m = nextpkt) { + nextpkt = m->m_nextpkt; + + if (m->m_pkthdr.len == 0) + printf("%s: %llx m_pkthdr.len is 0", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(m)); + + /* + * The mbuf is either appended or freed by sbappendrecord() + * so it's not reliable from a data standpoint + */ + len = m_space(m); + if (ctl_rcvbspace(kctl, so, len, flags) != 0) { + error = ENOBUFS; + OSIncrementAtomic64( + (SInt64 *)&kctlstat.kcs_enqueue_fullsock); + break; + } else { + /* + * Unlink from the list, m is on its own + */ + m->m_nextpkt = NULL; + so_recv_data_stat(so, m, 0); + if (sbappendrecord(&so->so_rcv, m) != 0) { + needwakeup = 1; + } else { + /* + * We free or return the remaining + * mbufs in the list + */ + m = nextpkt; + error = ENOBUFS; + OSIncrementAtomic64( + (SInt64 *)&kctlstat.kcs_enqueue_fullsock); + break; + } + } + } + if (needwakeup && (flags & CTL_DATA_NOWAKEUP) == 0) + sorwakeup(so); + +done: + if (so != NULL) { + if (ctl_debug && error != 0 && (flags & CTL_DATA_CRIT)) + printf("%s - crit data err %d len %d hiwat %d cc: %d\n", + __func__, error, len, + so->so_rcv.sb_hiwat, so->so_rcv.sb_cc); + + socket_unlock(so, 1); + } + if (m_remain) { + *m_remain = m; + + if (m != NULL && socket_debug && so != NULL && + (so->so_options & SO_DEBUG)) { + struct mbuf *n; + + printf("%s m_list %llx\n", __func__, + (uint64_t) VM_KERNEL_ADDRPERM(m_list)); + for (n = m; n != NULL; n = n->m_nextpkt) + printf(" remain %llx m_next %llx\n", + (uint64_t) VM_KERNEL_ADDRPERM(n), + (uint64_t) VM_KERNEL_ADDRPERM(n->m_next)); + } + } else { + if (m != NULL) + m_freem_list(m); + } + if (error != 0) + OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fail); + return (error); } errno_t -ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, u_int32_t flags) +ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, + u_int32_t flags) { struct socket *so; struct mbuf *m; - errno_t error = 0; - struct kctl *kctl = (struct kctl *)kctlref; + errno_t error = 0; + struct kctl *kctl = (struct kctl *)kctlref; unsigned int num_needed; struct mbuf *n; - size_t curlen = 0; - + size_t curlen = 0; + if (kctlref == NULL) - return EINVAL; - + return (EINVAL); + so = kcb_find_socket(kctl, unit); if (so == NULL) - return EINVAL; - - if (sbspace(&so->so_rcv) < (int)len) { + return (EINVAL); + + if (ctl_rcvbspace(kctl, so, len, flags) != 0) { error = ENOBUFS; + OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock); goto bye; } num_needed = 1; m = m_allocpacket_internal(&num_needed, len, NULL, M_NOWAIT, 1, 0); if (m == NULL) { - printf("ctl_enqueuedata: m_allocpacket_internal(%lu) failed\n", len); - error = ENOBUFS; + printf("ctl_enqueuedata: m_allocpacket_internal(%lu) failed\n", + len); + error = ENOMEM; goto bye; } - + for (n = m; n != NULL; n = n->m_next) { size_t mlen = mbuf_maxlen(n); - + if (mlen + curlen > len) mlen = len - curlen; n->m_len = mlen; @@ -544,33 +901,71 @@ ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, u_int32_t if ((flags & CTL_DATA_EOR)) m->m_flags |= M_EOR; - if (sbappend(&so->so_rcv, m) && (flags & CTL_DATA_NOWAKEUP) == 0) - sorwakeup(so); + so_recv_data_stat(so, m, 0); + if (sbappend(&so->so_rcv, m) != 0) { + if ((flags & CTL_DATA_NOWAKEUP) == 0) + sorwakeup(so); + } else { + error = ENOBUFS; + OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock); + } + bye: + if (ctl_debug && error != 0 && (flags & CTL_DATA_CRIT)) + printf("%s - crit data err %d len %d hiwat %d cc: %d\n", + __func__, error, (int)len, + so->so_rcv.sb_hiwat, so->so_rcv.sb_cc); + socket_unlock(so, 1); - return error; + if (error != 0) + OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fail); + return (error); } -errno_t +errno_t ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space) { struct kctl *kctl = (struct kctl *)kctlref; struct socket *so; long avail; - + if (kctlref == NULL || space == NULL) - return EINVAL; - + return (EINVAL); + so = kcb_find_socket(kctl, unit); if (so == NULL) - return EINVAL; - + return (EINVAL); + avail = sbspace(&so->so_rcv); *space = (avail < 0) ? 0 : avail; socket_unlock(so, 1); - - return 0; + + return (0); +} + +errno_t +ctl_getenqueuereadable(kern_ctl_ref kctlref, u_int32_t unit, + u_int32_t *difference) +{ + struct kctl *kctl = (struct kctl *)kctlref; + struct socket *so; + + if (kctlref == NULL || difference == NULL) + return (EINVAL); + + so = kcb_find_socket(kctl, unit); + if (so == NULL) + return (EINVAL); + + if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat) { + *difference = 0; + } else { + *difference = (so->so_rcv.sb_lowat - so->so_rcv.sb_cc); + } + socket_unlock(so, 1); + + return (0); } static int @@ -581,76 +976,91 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt) int error = 0; void *data; size_t len; - + if (sopt->sopt_level != SYSPROTO_CONTROL) { - return(EINVAL); + return (EINVAL); } - + if (kcb == NULL) /* sanity check */ - return(ENOTCONN); - + return (ENOTCONN); + if ((kctl = kcb->kctl) == NULL) - return(EINVAL); - + return (EINVAL); + switch (sopt->sopt_dir) { case SOPT_SET: if (kctl->setopt == NULL) - return(ENOTSUP); + return (ENOTSUP); if (sopt->sopt_valsize == 0) { data = NULL; } else { - MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, M_WAITOK); + MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, + M_WAITOK); if (data == NULL) - return(ENOMEM); - error = sooptcopyin(sopt, data, sopt->sopt_valsize, sopt->sopt_valsize); + return (ENOMEM); + error = sooptcopyin(sopt, data, + sopt->sopt_valsize, + sopt->sopt_valsize); } if (error == 0) { socket_unlock(so, 0); - error = (*kctl->setopt)(kcb->kctl, kcb->unit, kcb->userdata, sopt->sopt_name, - data, sopt->sopt_valsize); + error = (*kctl->setopt)(kcb->kctl, kcb->unit, + kcb->userdata, + sopt->sopt_name, + data, + sopt->sopt_valsize); socket_lock(so, 0); } FREE(data, M_TEMP); break; - + case SOPT_GET: if (kctl->getopt == NULL) - return(ENOTSUP); + return (ENOTSUP); data = NULL; if (sopt->sopt_valsize && sopt->sopt_val) { - MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, M_WAITOK); + MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, + M_WAITOK); if (data == NULL) - return(ENOMEM); - /* 4108337 - copy in data for get socket option */ - error = sooptcopyin(sopt, data, sopt->sopt_valsize, sopt->sopt_valsize); + return (ENOMEM); + /* + * 4108337 - copy user data in case the + * kernel control needs it + */ + error = sooptcopyin(sopt, data, + sopt->sopt_valsize, sopt->sopt_valsize); } len = sopt->sopt_valsize; socket_unlock(so, 0); - error = (*kctl->getopt)(kcb->kctl, kcb->unit, kcb->userdata, sopt->sopt_name, + error = (*kctl->getopt)(kcb->kctl, kcb->unit, + kcb->userdata, sopt->sopt_name, data, &len); if (data != NULL && len > sopt->sopt_valsize) - panic_plain("ctl_ctloutput: ctl %s returned len (%lu) > sopt_valsize (%lu)\n", - kcb->kctl->name, len, sopt->sopt_valsize); - socket_lock(so, 0); + panic_plain("ctl_ctloutput: ctl %s returned " + "len (%lu) > sopt_valsize (%lu)\n", + kcb->kctl->name, len, + sopt->sopt_valsize); + socket_lock(so, 0); if (error == 0) { if (data != NULL) error = sooptcopyout(sopt, data, len); - else + else sopt->sopt_valsize = len; } if (data != NULL) - FREE(data, M_TEMP); + FREE(data, M_TEMP); break; } - return error; + return (error); } -static int -ctl_ioctl(__unused struct socket *so, u_long cmd, caddr_t data, - __unused struct ifnet *ifp, __unused struct proc *p) +static int +ctl_ioctl(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct proc *p) { +#pragma unused(so, ifp, p) int error = ENOTSUP; - + switch (cmd) { /* get the number of controllers */ case CTLIOCGCOUNT: { @@ -661,7 +1071,7 @@ ctl_ioctl(__unused struct socket *so, u_long cmd, caddr_t data, TAILQ_FOREACH(kctl, &ctl_head, next) n++; lck_mtx_unlock(ctl_mtx); - + bcopy(&n, data, sizeof (n)); error = 0; break; @@ -690,12 +1100,12 @@ ctl_ioctl(__unused struct socket *so, u_long cmd, caddr_t data, error = 0; break; } - + /* add controls to get list of NKEs */ - + } - - return error; + + return (error); } /* @@ -709,22 +1119,23 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) u_int32_t id = 1; size_t name_len; int is_extended = 0; - + u_quad_t sbmaxsize; + if (userkctl == NULL) /* sanity check */ - return(EINVAL); + return (EINVAL); if (userkctl->ctl_connect == NULL) - return(EINVAL); + return (EINVAL); name_len = strlen(userkctl->ctl_name); if (name_len == 0 || name_len + 1 > MAX_KCTL_NAME) - return(EINVAL); - + return (EINVAL); + MALLOC(kctl, struct kctl *, sizeof(*kctl), M_TEMP, M_WAITOK); if (kctl == NULL) - return(ENOMEM); + return (ENOMEM); bzero((char *)kctl, sizeof(*kctl)); - + lck_mtx_lock(ctl_mtx); - + /* * Kernel Control IDs * @@ -735,45 +1146,45 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) * this leads to wrapping the id around, we start at the front of the * list and look for a gap. */ - + if ((userkctl->ctl_flags & CTL_FLAG_REG_ID_UNIT) == 0) { /* Must dynamically assign an unused ID */ - + /* Verify the same name isn't already registered */ if (ctl_find_by_name(userkctl->ctl_name) != NULL) { lck_mtx_unlock(ctl_mtx); FREE(kctl, M_TEMP); - return(EEXIST); + return (EEXIST); } - + /* Start with 1 in case the list is empty */ id = 1; kctl_next = TAILQ_LAST(&ctl_head, kctl_list); - + if (kctl_next != NULL) { - /* List was not empty, add one to the last item in the list */ + /* List was not empty, add one to the last item */ id = kctl_next->id + 1; kctl_next = NULL; - + /* - * If this wrapped the id number, start looking at the front - * of the list for an unused id. + * If this wrapped the id number, start looking at + * the front of the list for an unused id. */ if (id == 0) { /* Find the next unused ID */ id = 1; - + TAILQ_FOREACH(kctl_next, &ctl_head, next) { if (kctl_next->id > id) { /* We found a gap */ break; } - + id = kctl_next->id + 1; } } } - + userkctl->ctl_id = id; kctl->id = id; kctl->reg_unit = -1; @@ -782,11 +1193,11 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) if (kctl_next->id > userkctl->ctl_id) break; } - - if (ctl_find_by_id_unit(userkctl->ctl_id, userkctl->ctl_unit) != NULL) { + + if (ctl_find_by_id_unit(userkctl->ctl_id, userkctl->ctl_unit)) { lck_mtx_unlock(ctl_mtx); FREE(kctl, M_TEMP); - return(EEXIST); + return (EEXIST); } kctl->id = userkctl->ctl_id; kctl->reg_unit = userkctl->ctl_unit; @@ -797,14 +1208,29 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) strlcpy(kctl->name, userkctl->ctl_name, MAX_KCTL_NAME); kctl->flags = userkctl->ctl_flags; - /* Let the caller know the default send and receive sizes */ + /* + * Let the caller know the default send and receive sizes + * + * rdar://15526688: Limit the send and receive sizes to sb_max + * by using the same scaling as sbreserve() + */ + sbmaxsize = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); + if (userkctl->ctl_sendsize == 0) - userkctl->ctl_sendsize = CTL_SENDSIZE; + kctl->sendbufsize = CTL_SENDSIZE; + else if (userkctl->ctl_sendsize > sbmaxsize) + kctl->sendbufsize = sbmaxsize; + else kctl->sendbufsize = userkctl->ctl_sendsize; + userkctl->ctl_sendsize = kctl->sendbufsize; if (userkctl->ctl_recvsize == 0) - userkctl->ctl_recvsize = CTL_RECVSIZE; + kctl->recvbufsize = CTL_RECVSIZE; + else if (userkctl->ctl_recvsize > sbmaxsize) + kctl->recvbufsize = sbmaxsize; + else kctl->recvbufsize = userkctl->ctl_recvsize; + userkctl->ctl_recvsize = kctl->recvbufsize; kctl->connect = userkctl->ctl_connect; kctl->disconnect = userkctl->ctl_disconnect; @@ -813,52 +1239,59 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) kctl->getopt = userkctl->ctl_getopt; if (is_extended) { kctl->rcvd = userkctl->ctl_rcvd; + kctl->send_list = userkctl->ctl_send_list; } - + TAILQ_INIT(&kctl->kcb_head); - + if (kctl_next) TAILQ_INSERT_BEFORE(kctl_next, kctl, next); else TAILQ_INSERT_TAIL(&ctl_head, kctl, next); - + + kctlstat.kcs_reg_count++; + kctlstat.kcs_gencnt++; + lck_mtx_unlock(ctl_mtx); - + *kctlref = kctl; - + ctl_post_msg(KEV_CTL_REGISTERED, kctl->id); - return(0); + return (0); } errno_t ctl_deregister(void *kctlref) -{ - struct kctl *kctl; - - if (kctlref == NULL) /* sanity check */ - return(EINVAL); - - lck_mtx_lock(ctl_mtx); - TAILQ_FOREACH(kctl, &ctl_head, next) { - if (kctl == (struct kctl *)kctlref) - break; - } - if (kctl != (struct kctl *)kctlref) { - lck_mtx_unlock(ctl_mtx); - return EINVAL; - } +{ + struct kctl *kctl; + + if (kctlref == NULL) /* sanity check */ + return (EINVAL); + + lck_mtx_lock(ctl_mtx); + TAILQ_FOREACH(kctl, &ctl_head, next) { + if (kctl == (struct kctl *)kctlref) + break; + } + if (kctl != (struct kctl *)kctlref) { + lck_mtx_unlock(ctl_mtx); + return (EINVAL); + } if (!TAILQ_EMPTY(&kctl->kcb_head)) { - lck_mtx_unlock(ctl_mtx); - return EBUSY; + lck_mtx_unlock(ctl_mtx); + return (EBUSY); } - TAILQ_REMOVE(&ctl_head, kctl, next); + TAILQ_REMOVE(&ctl_head, kctl, next); + + kctlstat.kcs_reg_count--; + kctlstat.kcs_gencnt++; - lck_mtx_unlock(ctl_mtx); - - ctl_post_msg(KEV_CTL_DEREGISTERED, kctl->id); - FREE(kctl, M_TEMP); - return(0); + lck_mtx_unlock(ctl_mtx); + + ctl_post_msg(KEV_CTL_DEREGISTERED, kctl->id); + FREE(kctl, M_TEMP); + return (0); } /* @@ -866,54 +1299,54 @@ ctl_deregister(void *kctlref) */ static struct kctl * ctl_find_by_name(const char *name) -{ - struct kctl *kctl; +{ + struct kctl *kctl; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); - TAILQ_FOREACH(kctl, &ctl_head, next) - if (strncmp(kctl->name, name, sizeof(kctl->name)) == 0) - return kctl; + TAILQ_FOREACH(kctl, &ctl_head, next) + if (strncmp(kctl->name, name, sizeof(kctl->name)) == 0) + return (kctl); - return NULL; + return (NULL); } u_int32_t ctl_id_by_name(const char *name) { u_int32_t ctl_id = 0; - + struct kctl *kctl; + lck_mtx_lock(ctl_mtx); - struct kctl *kctl = ctl_find_by_name(name); - if (kctl) ctl_id = kctl->id; + kctl = ctl_find_by_name(name); + if (kctl) + ctl_id = kctl->id; lck_mtx_unlock(ctl_mtx); - - return ctl_id; + + return (ctl_id); } errno_t -ctl_name_by_id( - u_int32_t id, - char *out_name, - size_t maxsize) +ctl_name_by_id(u_int32_t id, char *out_name, size_t maxsize) { int found = 0; - - lck_mtx_lock(ctl_mtx); struct kctl *kctl; - TAILQ_FOREACH(kctl, &ctl_head, next) { - if (kctl->id == id) - break; - } - - if (kctl && kctl->name) - { - if (maxsize > MAX_KCTL_NAME) - maxsize = MAX_KCTL_NAME; - strlcpy(out_name, kctl->name, maxsize); - found = 1; - } + + lck_mtx_lock(ctl_mtx); + TAILQ_FOREACH(kctl, &ctl_head, next) { + if (kctl->id == id) + break; + } + + if (kctl && kctl->name) { + if (maxsize > MAX_KCTL_NAME) + maxsize = MAX_KCTL_NAME; + strlcpy(out_name, kctl->name, maxsize); + found = 1; + } lck_mtx_unlock(ctl_mtx); - - return found ? 0 : ENOENT; + + return (found ? 0 : ENOENT); } /* @@ -922,16 +1355,18 @@ ctl_name_by_id( */ static struct kctl * ctl_find_by_id_unit(u_int32_t id, u_int32_t unit) -{ - struct kctl *kctl; - - TAILQ_FOREACH(kctl, &ctl_head, next) { - if (kctl->id == id && (kctl->flags & CTL_FLAG_REG_ID_UNIT) == 0) - return kctl; - else if (kctl->id == id && kctl->reg_unit == unit) - return kctl; - } - return NULL; +{ + struct kctl *kctl; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); + + TAILQ_FOREACH(kctl, &ctl_head, next) { + if (kctl->id == id && (kctl->flags & CTL_FLAG_REG_ID_UNIT) == 0) + return (kctl); + else if (kctl->id == id && kctl->reg_unit == unit) + return (kctl); + } + return (NULL); } /* @@ -939,23 +1374,29 @@ ctl_find_by_id_unit(u_int32_t id, u_int32_t unit) */ static struct ctl_cb * kcb_find(struct kctl *kctl, u_int32_t unit) -{ - struct ctl_cb *kcb; +{ + struct ctl_cb *kcb; - TAILQ_FOREACH(kcb, &kctl->kcb_head, next) - if (kcb->unit == unit) - return kcb; + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED); - return NULL; + TAILQ_FOREACH(kcb, &kctl->kcb_head, next) + if (kcb->unit == unit) + return (kcb); + + return (NULL); } static struct socket * kcb_find_socket(struct kctl *kctl, u_int32_t unit) { struct socket *so = NULL; - + struct ctl_cb *kcb; + void *lr_saved; + + lr_saved = __builtin_return_address(0); + lck_mtx_lock(ctl_mtx); - struct ctl_cb *kcb = kcb_find(kctl, unit); + kcb = kcb_find(kctl, unit); if (kcb && kcb->kctl == kctl) { so = kcb->so; if (so) { @@ -963,53 +1404,60 @@ kcb_find_socket(struct kctl *kctl, u_int32_t unit) } } lck_mtx_unlock(ctl_mtx); - + if (so == NULL) { - return NULL; + return (NULL); } - + socket_lock(so, 1); - + lck_mtx_lock(ctl_mtx); - if (kcb->kctl == NULL) - { + if (kcb->kctl == NULL) { lck_mtx_unlock(ctl_mtx); socket_unlock(so, 1); so = NULL; lck_mtx_lock(ctl_mtx); + } else { + /* + * The socket lock history is more useful if we store + * the address of the caller. + */ + int i = (so->next_lock_lr + SO_LCKDBG_MAX - 1) % SO_LCKDBG_MAX; + + so->lock_lr[i] = lr_saved; } kcb->usecount--; if (kcb->usecount == 0) wakeup((event_t)&kcb->usecount); lck_mtx_unlock(ctl_mtx); - - return so; + + return (so); } -static void -ctl_post_msg(u_int32_t event_code, u_int32_t id) +static void +ctl_post_msg(u_int32_t event_code, u_int32_t id) { - struct ctl_event_data ctl_ev_data; - struct kev_msg ev_msg; - - lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_NOTOWNED); - - bzero(&ev_msg, sizeof(struct kev_msg)); - ev_msg.vendor_code = KEV_VENDOR_APPLE; - - ev_msg.kev_class = KEV_SYSTEM_CLASS; - ev_msg.kev_subclass = KEV_CTL_SUBCLASS; - ev_msg.event_code = event_code; - - /* common nke subclass data */ - bzero(&ctl_ev_data, sizeof(ctl_ev_data)); - ctl_ev_data.ctl_id = id; - ev_msg.dv[0].data_ptr = &ctl_ev_data; - ev_msg.dv[0].data_length = sizeof(ctl_ev_data); - - ev_msg.dv[1].data_length = 0; - - kev_post_msg(&ev_msg); + struct ctl_event_data ctl_ev_data; + struct kev_msg ev_msg; + + lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_NOTOWNED); + + bzero(&ev_msg, sizeof(struct kev_msg)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + + ev_msg.kev_class = KEV_SYSTEM_CLASS; + ev_msg.kev_subclass = KEV_CTL_SUBCLASS; + ev_msg.event_code = event_code; + + /* common nke subclass data */ + bzero(&ctl_ev_data, sizeof(ctl_ev_data)); + ctl_ev_data.ctl_id = id; + ev_msg.dv[0].data_ptr = &ctl_ev_data; + ev_msg.dv[0].data_length = sizeof(ctl_ev_data); + + ev_msg.dv[1].data_length = 0; + + kev_post_msg(&ev_msg); } static int @@ -1025,14 +1473,15 @@ ctl_lock(struct socket *so, int refcount, void *lr) if (so->so_pcb != NULL) { lck_mtx_lock(((struct ctl_cb *)so->so_pcb)->mtx); } else { - panic("ctl_lock: so=%p NO PCB! lr=%p lrh= %s\n", + panic("ctl_lock: so=%p NO PCB! lr=%p lrh= %s\n", so, lr_saved, solockhistory_nr(so)); /* NOTREACHED */ } if (so->so_usecount < 0) { panic("ctl_lock: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", - so, so->so_pcb, lr_saved, so->so_usecount, solockhistory_nr(so)); + so, so->so_pcb, lr_saved, so->so_usecount, + solockhistory_nr(so)); /* NOTREACHED */ } @@ -1056,21 +1505,24 @@ ctl_unlock(struct socket *so, int refcount, void *lr) lr_saved = lr; #ifdef MORE_KCTLLOCK_DEBUG - printf("ctl_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%p\n", - so, so->so_pcb, ((struct ctl_cb *)so->so_pcb)->mtx, - so->so_usecount, lr_saved); + printf("ctl_unlock: so=%llx sopcb=%x lock=%llx ref=%u lr=%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb, + (uint64_t)VM_KERNEL_ADDRPERM(((struct ctl_cb *)so->so_pcb)->mtx), + so->so_usecount, (uint64_t)VM_KERNEL_ADDRPERM(lr_saved)); #endif if (refcount) so->so_usecount--; if (so->so_usecount < 0) { - panic("ctl_unlock: so=%p usecount=%x lrh= %s\n", + panic("ctl_unlock: so=%p usecount=%x lrh= %s\n", so, so->so_usecount, solockhistory_nr(so)); /* NOTREACHED */ } if (so->so_pcb == NULL) { - panic("ctl_unlock: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", - so, so->so_usecount, (void *)lr_saved, solockhistory_nr(so)); + panic("ctl_unlock: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", + so, so->so_usecount, (void *)lr_saved, + solockhistory_nr(so)); /* NOTREACHED */ } mutex_held = ((struct ctl_cb *)so->so_pcb)->mtx; @@ -1087,18 +1539,263 @@ ctl_unlock(struct socket *so, int refcount, void *lr) } static lck_mtx_t * -ctl_getlock(struct socket *so, __unused int locktype) +ctl_getlock(struct socket *so, int locktype) { +#pragma unused(locktype) struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; - + if (so->so_pcb) { if (so->so_usecount < 0) - panic("ctl_getlock: so=%p usecount=%x lrh= %s\n", + panic("ctl_getlock: so=%p usecount=%x lrh= %s\n", so, so->so_usecount, solockhistory_nr(so)); - return(kcb->mtx); + return (kcb->mtx); } else { - panic("ctl_getlock: so=%p NULL NO so_pcb %s\n", + panic("ctl_getlock: so=%p NULL NO so_pcb %s\n", so, solockhistory_nr(so)); return (so->so_proto->pr_domain->dom_mtx); } } + +__private_extern__ int +kctl_reg_list SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + int n, i; + struct xsystmgen xsg; + void *buf = NULL; + struct kctl *kctl; + size_t item_size = ROUNDUP64(sizeof (struct xkctl_reg)); + + buf = _MALLOC(item_size, M_TEMP, M_WAITOK | M_ZERO); + if (buf == NULL) + return (ENOMEM); + + lck_mtx_lock(ctl_mtx); + + n = kctlstat.kcs_reg_count; + + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = (n + n/8) * sizeof(struct xkctl_reg); + goto done; + } + if (req->newptr != USER_ADDR_NULL) { + error = EPERM; + goto done; + } + bzero(&xsg, sizeof (xsg)); + xsg.xg_len = sizeof (xsg); + xsg.xg_count = n; + xsg.xg_gen = kctlstat.kcs_gencnt; + xsg.xg_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xsg, sizeof (xsg)); + if (error) { + goto done; + } + /* + * We are done if there is no pcb + */ + if (n == 0) { + goto done; + } + + i = 0; + for (i = 0, kctl = TAILQ_FIRST(&ctl_head); + i < n && kctl != NULL; + i++, kctl = TAILQ_NEXT(kctl, next)) { + struct xkctl_reg *xkr = (struct xkctl_reg *)buf; + struct ctl_cb *kcb; + u_int32_t pcbcount = 0; + + TAILQ_FOREACH(kcb, &kctl->kcb_head, next) + pcbcount++; + + bzero(buf, item_size); + + xkr->xkr_len = sizeof(struct xkctl_reg); + xkr->xkr_kind = XSO_KCREG; + xkr->xkr_id = kctl->id; + xkr->xkr_reg_unit = kctl->reg_unit; + xkr->xkr_flags = kctl->flags; + xkr->xkr_kctlref = (uint64_t)VM_KERNEL_ADDRPERM(kctl); + xkr->xkr_recvbufsize = kctl->recvbufsize; + xkr->xkr_sendbufsize = kctl->sendbufsize; + xkr->xkr_lastunit = kctl->lastunit; + xkr->xkr_pcbcount = pcbcount; + xkr->xkr_connect = (uint64_t)VM_KERNEL_ADDRPERM(kctl->connect); + xkr->xkr_disconnect = + (uint64_t)VM_KERNEL_ADDRPERM(kctl->disconnect); + xkr->xkr_send = (uint64_t)VM_KERNEL_ADDRPERM(kctl->send); + xkr->xkr_send_list = + (uint64_t)VM_KERNEL_ADDRPERM(kctl->send_list); + xkr->xkr_setopt = (uint64_t)VM_KERNEL_ADDRPERM(kctl->setopt); + xkr->xkr_getopt = (uint64_t)VM_KERNEL_ADDRPERM(kctl->getopt); + xkr->xkr_rcvd = (uint64_t)VM_KERNEL_ADDRPERM(kctl->rcvd); + strlcpy(xkr->xkr_name, kctl->name, sizeof(xkr->xkr_name)); + + error = SYSCTL_OUT(req, buf, item_size); + } + + if (error == 0) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + bzero(&xsg, sizeof (xsg)); + xsg.xg_len = sizeof (xsg); + xsg.xg_count = n; + xsg.xg_gen = kctlstat.kcs_gencnt; + xsg.xg_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xsg, sizeof (xsg)); + if (error) { + goto done; + } + } + +done: + lck_mtx_unlock(ctl_mtx); + + if (buf != NULL) + FREE(buf, M_TEMP); + + return (error); +} + +__private_extern__ int +kctl_pcblist SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + int n, i; + struct xsystmgen xsg; + void *buf = NULL; + struct kctl *kctl; + size_t item_size = ROUNDUP64(sizeof (struct xkctlpcb)) + + ROUNDUP64(sizeof (struct xsocket_n)) + + 2 * ROUNDUP64(sizeof (struct xsockbuf_n)) + + ROUNDUP64(sizeof (struct xsockstat_n)); + + buf = _MALLOC(item_size, M_TEMP, M_WAITOK | M_ZERO); + if (buf == NULL) + return (ENOMEM); + + lck_mtx_lock(ctl_mtx); + + n = kctlstat.kcs_pcbcount; + + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = (n + n/8) * item_size; + goto done; + } + if (req->newptr != USER_ADDR_NULL) { + error = EPERM; + goto done; + } + bzero(&xsg, sizeof (xsg)); + xsg.xg_len = sizeof (xsg); + xsg.xg_count = n; + xsg.xg_gen = kctlstat.kcs_gencnt; + xsg.xg_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xsg, sizeof (xsg)); + if (error) { + goto done; + } + /* + * We are done if there is no pcb + */ + if (n == 0) { + goto done; + } + + i = 0; + for (i = 0, kctl = TAILQ_FIRST(&ctl_head); + i < n && kctl != NULL; + kctl = TAILQ_NEXT(kctl, next)) { + struct ctl_cb *kcb; + + for (kcb = TAILQ_FIRST(&kctl->kcb_head); + i < n && kcb != NULL; + i++, kcb = TAILQ_NEXT(kcb, next)) { + struct xkctlpcb *xk = (struct xkctlpcb *)buf; + struct xsocket_n *xso = (struct xsocket_n *) + ADVANCE64(xk, sizeof (*xk)); + struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *) + ADVANCE64(xso, sizeof (*xso)); + struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *) + ADVANCE64(xsbrcv, sizeof (*xsbrcv)); + struct xsockstat_n *xsostats = (struct xsockstat_n *) + ADVANCE64(xsbsnd, sizeof (*xsbsnd)); + + bzero(buf, item_size); + + xk->xkp_len = sizeof(struct xkctlpcb); + xk->xkp_kind = XSO_KCB; + xk->xkp_unit = kcb->unit; + xk->xkp_kctpcb = (uint64_t)VM_KERNEL_ADDRPERM(kcb); + xk->xkp_kctlref = (uint64_t)VM_KERNEL_ADDRPERM(kctl); + xk->xkp_kctlid = kctl->id; + strlcpy(xk->xkp_kctlname, kctl->name, + sizeof(xk->xkp_kctlname)); + + sotoxsocket_n(kcb->so, xso); + sbtoxsockbuf_n(kcb->so ? + &kcb->so->so_rcv : NULL, xsbrcv); + sbtoxsockbuf_n(kcb->so ? + &kcb->so->so_snd : NULL, xsbsnd); + sbtoxsockstat_n(kcb->so, xsostats); + + error = SYSCTL_OUT(req, buf, item_size); + } + } + + if (error == 0) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + bzero(&xsg, sizeof (xsg)); + xsg.xg_len = sizeof (xsg); + xsg.xg_count = n; + xsg.xg_gen = kctlstat.kcs_gencnt; + xsg.xg_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xsg, sizeof (xsg)); + if (error) { + goto done; + } + } + +done: + lck_mtx_unlock(ctl_mtx); + + return (error); +} + +int +kctl_getstat SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + + lck_mtx_lock(ctl_mtx); + + if (req->newptr != USER_ADDR_NULL) { + error = EPERM; + goto done; + } + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = sizeof(struct kctlstat); + goto done; + } + + error = SYSCTL_OUT(req, &kctlstat, + MIN(sizeof(struct kctlstat), req->oldlen)); +done: + lck_mtx_unlock(ctl_mtx); + return (error); +} diff --git a/bsd/kern/kern_core.c b/bsd/kern/kern_core.c index fd832d4c5..2bd9de059 100644 --- a/bsd/kern/kern_core.c +++ b/bsd/kern/kern_core.c @@ -241,6 +241,10 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit) int is_64 = 0; size_t mach_header_sz = sizeof(struct mach_header); size_t segment_command_sz = sizeof(struct segment_command); + + if (current_proc() != core_proc) { + panic("coredump() called against proc that is not current_proc: %p", core_proc); + } if (do_coredump == 0 || /* Not dumping at all */ ( (sugid_coredump == 0) && /* Not dumping SUID/SGID binaries */ diff --git a/bsd/kern/kern_credential.c b/bsd/kern/kern_credential.c index 0098c54d3..911f0e710 100644 --- a/bsd/kern/kern_credential.c +++ b/bsd/kern/kern_credential.c @@ -61,7 +61,7 @@ #include #include -#include +#include #ifdef MACH_ASSERT # undef MACH_ASSERT #endif @@ -4380,11 +4380,12 @@ kauth_cred_label_update(kauth_cred_t cred, struct label *label) * that is returned to them, if it is not intended to be a * persistent reference. */ + static kauth_cred_t kauth_cred_label_update_execve(kauth_cred_t cred, vfs_context_t ctx, - struct vnode *vp, struct vnode *scriptvp, struct label *scriptl, - struct label *execl, void *macextensions, int *disjointp) + struct vnode *vp, off_t offset, struct vnode *scriptvp, struct label *scriptl, + struct label *execl, unsigned int *csflags, void *macextensions, int *disjointp, int *labelupdateerror) { kauth_cred_t newcred; struct ucred temp_cred; @@ -4393,9 +4394,9 @@ kauth_cred_label_update_execve(kauth_cred_t cred, vfs_context_t ctx, mac_cred_label_init(&temp_cred); mac_cred_label_associate(cred, &temp_cred); - *disjointp = mac_cred_label_update_execve(ctx, &temp_cred, - vp, scriptvp, scriptl, execl, - macextensions); + mac_cred_label_update_execve(ctx, &temp_cred, + vp, offset, scriptvp, scriptl, execl, csflags, + macextensions, disjointp, labelupdateerror); newcred = kauth_cred_update(cred, &temp_cred, TRUE); mac_cred_label_destroy(&temp_cred); @@ -4479,6 +4480,8 @@ int kauth_proc_label_update(struct proc *p, struct label *label) * vp The vnode being exec'ed * scriptl The script MAC label * execl The executable MAC label + * lupdateerror The error place holder for MAC label authority + * to update about possible termination * * Returns: 0 Label update did not make credential * disjoint @@ -4489,14 +4492,13 @@ int kauth_proc_label_update(struct proc *p, struct label *label) * result of this call. The caller should not assume the process * reference to the old credential still exists. */ -int + +void kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx, - struct vnode *vp, struct vnode *scriptvp, struct label *scriptl, - struct label *execl, void *macextensions) + struct vnode *vp, off_t offset, struct vnode *scriptvp, struct label *scriptl, + struct label *execl, unsigned int *csflags, void *macextensions, int *disjoint, int *update_return) { kauth_cred_t my_cred, my_new_cred; - int disjoint = 0; - my_cred = kauth_cred_proc_ref(p); DEBUG_CRED_ENTER("kauth_proc_label_update_execve: %p\n", my_cred); @@ -4511,7 +4513,7 @@ kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx, * passed in. The subsequent compare is safe, because it is * a pointer compare rather than a contents compare. */ - my_new_cred = kauth_cred_label_update_execve(my_cred, ctx, vp, scriptvp, scriptl, execl, macextensions, &disjoint); + my_new_cred = kauth_cred_label_update_execve(my_cred, ctx, vp, offset, scriptvp, scriptl, execl, csflags, macextensions, disjoint, update_return); if (my_cred != my_new_cred) { DEBUG_CRED_CHANGE("kauth_proc_label_update_execve_unlocked CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags); @@ -4540,8 +4542,6 @@ kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx, } /* Drop old proc reference or our extra reference */ kauth_cred_unref(&my_cred); - - return (disjoint); } #if 1 diff --git a/bsd/kern/kern_cs.c b/bsd/kern/kern_cs.c index 648aaa100..66af46613 100644 --- a/bsd/kern/kern_cs.c +++ b/bsd/kern/kern_cs.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -63,8 +64,6 @@ #include #include -#include -#include #include @@ -80,14 +79,22 @@ int cs_force_hard = 0; int cs_debug = 0; #if SECURE_KERNEL const int cs_enforcement_enable=1; +const int cs_library_val_enable=1; #else #if CONFIG_ENFORCE_SIGNED_CODE int cs_enforcement_enable=1; #else int cs_enforcement_enable=0; -#endif +#endif /* CONFIG_ENFORCE_SIGNED_CODE */ + +#if CONFIG_ENFORCE_LIBRARY_VALIDATION +int cs_library_val_enable = 1; +#else +int cs_library_val_enable = 0; +#endif /* CONFIG_ENFORCE_LIBRARY_VALIDATION */ + int cs_enforcement_panic=0; -#endif +#endif /* SECURE_KERNEL */ int cs_all_vnodes = 0; static lck_grp_t *cs_lockgrp; @@ -108,9 +115,9 @@ int panic_on_cs_killed = 0; void cs_init(void) { -#if MACH_ASSERT +#if MACH_ASSERT && __x86_64__ panic_on_cs_killed = 1; -#endif +#endif /* MACH_ASSERT && __x86_64__ */ PE_parse_boot_argn("panic_on_cs_killed", &panic_on_cs_killed, sizeof (panic_on_cs_killed)); #if !SECURE_KERNEL @@ -171,11 +178,6 @@ cs_invalid_page( p = current_proc(); - /* - * XXX revisit locking when proc is no longer protected - * by the kernel funnel... - */ - if (verbose) printf("CODE SIGNING: cs_invalid_page(0x%llx): p=%d[%s]\n", vaddr, p->p_pid, p->p_comm); @@ -222,24 +224,12 @@ cs_invalid_page( csflags = p->p_csflags; proc_unlock(p); - if (verbose) { - char pid_str[10]; - snprintf(pid_str, sizeof(pid_str), "%d", p->p_pid); - kern_asl_msg(LOG_NOTICE, "messagetracer", - 5, - "com.apple.message.domain", "com.apple.kernel.cs.invalidate", - "com.apple.message.signature", send_kill ? "kill" : retval ? "deny" : "invalidate", - "com.apple.message.signature4", pid_str, - "com.apple.message.signature3", p->p_comm, - "com.apple.message.summarize", "YES", - NULL - ); + if (verbose) printf("CODE SIGNING: cs_invalid_page(0x%llx): " "p=%d[%s] final status 0x%x, %s page%s\n", vaddr, p->p_pid, p->p_comm, p->p_csflags, retval ? "denying" : "allowing (remove VALID)", send_kill ? " sending SIGKILL" : ""); - } if (send_kill) threadsignal(current_thread(), SIGKILL, EXC_BAD_ACCESS); @@ -493,3 +483,237 @@ cs_register_cscsr(struct cscsr_functions *funcs) return; csr_state.funcs = funcs; } + +/* + * Library validation functions + */ +int +cs_require_lv(struct proc *p) +{ + + if (cs_library_val_enable) + return 1; + + if (p == NULL) + p = current_proc(); + + if (p != NULL && (p->p_csflags & CS_REQUIRE_LV)) + return 1; + + return 0; +} + +/* + * Function: csblob_get_teamid + * + * Description: This function returns a pointer to the team id + stored within the codedirectory of the csblob. + If the codedirectory predates team-ids, it returns + NULL. + This does not copy the name but returns a pointer to + it within the CD. Subsequently, the CD must be + available when this is used. +*/ +const char * +csblob_get_teamid(struct cs_blob *csblob) +{ + const CS_CodeDirectory *cd; + + if ((cd = (const CS_CodeDirectory *)cs_find_blob( + csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL) + return NULL; + + if (ntohl(cd->version) < CS_SUPPORTSTEAMID) + return NULL; + + if (ntohl(cd->teamOffset) == 0) + return NULL; + + const char *name = ((const char *)cd) + ntohl(cd->teamOffset); + if (cs_debug > 1) + printf("found team-id %s in cdblob\n", name); + + return name; +} + +/* + * Function: csproc_get_blob + * + * Description: This function returns the cs_blob + * for the process p + */ +static struct cs_blob * +csproc_get_blob(struct proc *p) +{ + if (NULL == p) + return NULL; + + if (NULL == p->p_textvp) + return NULL; + + return ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff); +} + +/* + * Function: csproc_get_teamid + * + * Description: This function returns a pointer to the + * team id of the process p +*/ +const char * +csproc_get_teamid(struct proc *p) +{ + struct cs_blob *csblob; + + csblob = csproc_get_blob(p); + + return (csblob == NULL) ? NULL : csblob->csb_teamid; +} + +/* + * Function: csvnode_get_teamid + * + * Description: This function returns a pointer to the + * team id of the binary at the given offset in vnode vp +*/ +const char * +csvnode_get_teamid(struct vnode *vp, off_t offset) +{ + struct cs_blob *csblob; + + if (vp == NULL) + return NULL; + + csblob = ubc_cs_blob_get(vp, -1, offset); + + return (csblob == NULL) ? NULL : csblob->csb_teamid; +} + +/* + * Function: csproc_get_platform_binary + * + * Description: This function returns the value + * of the platform_binary field for proc p + */ +int +csproc_get_platform_binary(struct proc *p) +{ + struct cs_blob *csblob; + + csblob = csproc_get_blob(p); + + /* If there is no csblob this returns 0 because + it is true that it is not a platform binary */ + return (csblob == NULL) ? 0 : csblob->csb_platform_binary; +} + +/* + * Function: csfg_get_platform_binary + * + * Description: This function returns the + * platform binary field for the + * fileglob fg + */ +int +csfg_get_platform_binary(struct fileglob *fg) +{ + int platform_binary = 0; + struct ubc_info *uip; + vnode_t vp; + + if (FILEGLOB_DTYPE(fg) != DTYPE_VNODE) + return 0; + + vp = (struct vnode *)fg->fg_data; + if (vp == NULL) + return 0; + + vnode_lock(vp); + if (!UBCINFOEXISTS(vp)) + goto out; + + uip = vp->v_ubcinfo; + if (uip == NULL) + goto out; + + if (uip->cs_blobs == NULL) + goto out; + + /* It is OK to extract the teamid from the first blob + because all blobs of a vnode must have the same teamid */ + platform_binary = uip->cs_blobs->csb_platform_binary; +out: + vnode_unlock(vp); + + return platform_binary; +} + +/* + * Function: csfg_get_teamid + * + * Description: This returns a pointer to + * the teamid for the fileglob fg + */ +const char * +csfg_get_teamid(struct fileglob *fg) +{ + struct ubc_info *uip; + const char *str = NULL; + vnode_t vp; + + if (FILEGLOB_DTYPE(fg) != DTYPE_VNODE) + return NULL; + + vp = (struct vnode *)fg->fg_data; + if (vp == NULL) + return NULL; + + vnode_lock(vp); + if (!UBCINFOEXISTS(vp)) + goto out; + + uip = vp->v_ubcinfo; + if (uip == NULL) + goto out; + + if (uip->cs_blobs == NULL) + goto out; + + /* It is OK to extract the teamid from the first blob + because all blobs of a vnode must have the same teamid */ + str = uip->cs_blobs->csb_teamid; +out: + vnode_unlock(vp); + + return str; +} + +uint32_t +cs_entitlement_flags(struct proc *p) +{ + return (p->p_csflags & CS_ENTITLEMENT_FLAGS); +} + +/* + * Function: csfg_get_path + * + * Description: This populates the buffer passed in + * with the path of the vnode + * When calling this, the fileglob + * cannot go away. The caller must have a + * a reference on the fileglob or fileproc + */ +int +csfg_get_path(struct fileglob *fg, char *path, int *len) +{ + vnode_t vp = NULL; + + if (FILEGLOB_DTYPE(fg) != DTYPE_VNODE) + return -1; + + vp = (struct vnode *)fg->fg_data; + + /* vn_getpath returns 0 for success, + or an error code */ + return vn_getpath(vp, path, len); +} diff --git a/bsd/kern/kern_csr.c b/bsd/kern/kern_csr.c new file mode 100644 index 000000000..7badfcc58 --- /dev/null +++ b/bsd/kern/kern_csr.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + +/* allow everything by default? */ +/* XXX: set this to 0 later: */ +static int csr_allow_all = 1; + +/* allow everything if CSR_ALLOW_APPLE_INTERNAL is set */ +static int csr_allow_internal = 1; + +/* Current boot-arg policy: + * rootless=0 + * csr_allow_all = 1 + * rootless=1 + * csr_allow_all = 0 + * csr_allow_internal = 0 + * + * After : + * rootless=0 + * no effect + * rootless=1 + * csr_allow_internal = 0 + * + * Enforcement policy: + * =============================== + * | csr_allow_internal + * | 0 1 + * =============================== + * csr_ 0 | always customer + * allow_ | + * all 1 | never never + * =============================== + * NB: "customer" means enforce when + * CSR_ALLOW_APPLE_INTERNAL not set */ + +void +csr_init(void) +{ + boot_args *args = (boot_args *)PE_state.bootArgs; + if (args->flags & kBootArgsFlagCSRBoot) { + /* special booter; allow everything */ + csr_allow_all = 1; + } + + int rootless_boot_arg; + if (PE_parse_boot_argn("rootless", &rootless_boot_arg, sizeof(rootless_boot_arg))) { + /* XXX: set csr_allow_all to boot arg value for now + * (to be removed by ) */ + csr_allow_all = !rootless_boot_arg; + /* if rootless=1, do not allow everything when CSR_ALLOW_APPLE_INTERNAL is set */ + csr_allow_internal &= !rootless_boot_arg; + } +} + +int +csrctl(__unused proc_t p, struct csrctl_args *uap, __unused int32_t *retval) +{ + int error = 0; + + if (uap->useraddr == 0) + return EINVAL; + if (uap->usersize != sizeof(csr_config_t)) + return EINVAL; + + switch (uap->op) { + case CSR_OP_CHECK: + { + csr_config_t mask; + error = copyin(uap->useraddr, &mask, sizeof(csr_config_t)); + + if (error) + return error; + + error = csr_check(mask); + break; + } + + case CSR_OP_GET_ACTIVE_CONFIG: + case CSR_OP_GET_PENDING_CONFIG: /* fall through */ + { + csr_config_t config = 0; + if (uap->op == CSR_OP_GET_ACTIVE_CONFIG) + error = csr_get_active_config(&config); + else + error = csr_get_pending_config(&config); + + if (error) + return error; + + error = copyout(&config, uap->useraddr, sizeof(csr_config_t)); + break; + } + + default: + error = EINVAL; + break; + } + + return error; +} + +int +csr_get_active_config(csr_config_t *config) +{ + boot_args *args = (boot_args *)PE_state.bootArgs; + if (args->flags & kBootArgsFlagCSRActiveConfig) { + *config = args->csrActiveConfig & CSR_VALID_FLAGS; + } else { + /* XXX: change to 0 when is in the build */ + *config = CSR_ALLOW_APPLE_INTERNAL; + } + + return 0; +} + +int +csr_get_pending_config(csr_config_t *config) +{ + boot_args *args = (boot_args *)PE_state.bootArgs; + if (args->flags & kBootArgsFlagCSRPendingConfig) { + *config = args->csrPendingConfig & CSR_VALID_FLAGS; + return 0; + } else { + return ENOENT; + } +} + +int +csr_check(csr_config_t mask) +{ + if (csr_allow_all) { + return 0; + } + + csr_config_t config; + int error = csr_get_active_config(&config); + if (error) { + return error; + } + + if (csr_allow_internal && (config & CSR_ALLOW_APPLE_INTERNAL)) { + return 0; + } + + if (mask == 0) { + /* pass 0 to check if Rootless enforcement is active */ + return -1; + } + + error = (config & mask) ? 0 : EPERM; + return error; +} + +void +csr_set_allow_all(int value) +{ + csr_allow_all = !!value; // force value to 0 or 1 +} diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index 7f53765a2..722fcfdff 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -93,9 +93,10 @@ #include #include #include -#include +#include #include #include +#include #include @@ -114,6 +115,7 @@ #include #include +#include #if CONFIG_PROTECT #include @@ -148,7 +150,6 @@ static void _fdrelse(struct proc * p, int fd); extern void file_lock_init(void); -extern int kqueue_stat(struct fileproc *fp, void *ub, int isstat4, proc_t p); extern kauth_scope_t kauth_scope_fileop; @@ -442,6 +443,7 @@ fd_rdwr( uio_t auio = NULL; char uio_buf[ UIO_SIZEOF(1) ]; struct vfs_context context = *(vfs_context_current()); + bool wrote_some = false; p = current_proc(); @@ -477,9 +479,11 @@ fd_rdwr( if ( !(io_flg & IO_APPEND)) flags = FOF_OFFSET; - if (rw == UIO_WRITE) + if (rw == UIO_WRITE) { + user_ssize_t orig_resid = uio_resid(auio); error = fo_write(fp, auio, flags, &context); - else + wrote_some = uio_resid(auio) < orig_resid; + } else error = fo_read(fp, auio, flags, &context); if (aresid) @@ -489,7 +493,7 @@ fd_rdwr( error = EIO; } out: - if (rw == UIO_WRITE && error == 0) + if (wrote_some) fp_drop_written(p, fd, fp); else fp_drop(p, fd, fp, 0); @@ -1623,7 +1627,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) { struct user_fsignatures fs; kern_return_t kr; - off_t kernel_blob_offset; vm_offset_t kernel_blob_addr; vm_size_t kernel_blob_size; @@ -1658,11 +1661,16 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) vnode_put(vp); goto outdrop; } -#if defined(__LP64__) -#define CS_MAX_BLOB_SIZE (2560ULL * 1024ULL) /* max shared cache file XXX ? */ -#else -#define CS_MAX_BLOB_SIZE (1600ULL * 1024ULL) /* max shared cache file XXX ? */ -#endif +/* + * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover + * our use cases for the immediate future, but note that at the time of this commit, some + * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB). + * + * We should consider how we can manage this more effectively; the above means that some + * platforms are using megabytes of memory for signing data; it merely hasn't crossed the + * threshold considered ridiculous at the time of this change. + */ +#define CS_MAX_BLOB_SIZE (10ULL * 1024ULL * 1024ULL) if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) { error = E2BIG; vnode_put(vp); @@ -1678,12 +1686,10 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } if(uap->cmd == F_ADDSIGS) { - kernel_blob_offset = 0; error = copyin(fs.fs_blob_start, (void *) kernel_blob_addr, kernel_blob_size); } else /* F_ADDFILESIGS */ { - kernel_blob_offset = fs.fs_blob_start; error = vn_rdwr(UIO_READ, vp, (caddr_t) kernel_blob_addr, @@ -1708,7 +1714,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) CPU_TYPE_ANY, /* not for a specific architecture */ fs.fs_file_start, kernel_blob_addr, - kernel_blob_offset, kernel_blob_size); if (error) { ubc_cs_blob_deallocate(kernel_blob_addr, @@ -1813,6 +1818,9 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } case F_TRANSCODEKEY: { + + char *backup_keyp = NULL; + unsigned backup_key_len = CP_MAX_WRAPPEDKEYSIZE; if (fp->f_type != DTYPE_VNODE) { error = EBADF; @@ -1826,9 +1834,23 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) error = ENOENT; goto outdrop; } - - error = cp_vnode_transcode (vp); + + MALLOC(backup_keyp, char *, backup_key_len, M_TEMP, M_WAITOK); + if (backup_keyp == NULL) { + error = ENOMEM; + goto outdrop; + } + + error = cp_vnode_transcode (vp, backup_keyp, &backup_key_len); vnode_put(vp); + + if (error == 0) { + error = copyout((caddr_t)backup_keyp, argp, backup_key_len); + *retval = backup_key_len; + } + + FREE(backup_keyp, M_TEMP); + break; } @@ -2103,7 +2125,187 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context); (void)vnode_put(vp); + + break; + } + + /* + * SPI (private) for indicating to the lower level storage driver that the + * subsequent writes should be of a particular IO type (burst, greedy, static), + * or other flavors that may be necessary. + */ + case F_SETIOTYPE: { + caddr_t param_ptr; + uint32_t param; + + if (uap->arg) { + /* extract 32 bits of flags from userland */ + param_ptr = (caddr_t) uap->arg; + param = (uint32_t) param_ptr; + } + else { + /* If no argument is specified, error out */ + error = EINVAL; + goto out; + } + /* + * Validate the different types of flags that can be specified: + * all of them are mutually exclusive for now. + */ + switch (param) { + case F_IOTYPE_ISOCHRONOUS: + break; + + default: + error = EINVAL; + goto out; + } + + + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } + vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); + + error = vnode_getwithref(vp); + if (error) { + error = ENOENT; + goto outdrop; + } + + /* Only go forward if you have write access */ + vfs_context_t ctx = vfs_context_current(); + if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) { + vnode_put(vp); + error = EBADF; + goto outdrop; + } + + error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context); + (void)vnode_put(vp); + + break; + } + + + /* + * Extract the CodeDirectory of the vnode associated with + * the file descriptor and copy it back to user space + */ + case F_GETCODEDIR: { + struct user_fcodeblobs args; + + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } + + vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); + + if ((fp->f_flag & FREAD) == 0) { + error = EBADF; + goto outdrop; + } + + if (IS_64BIT_PROCESS(p)) { + struct user64_fcodeblobs args64; + + error = copyin(argp, &args64, sizeof(args64)); + if (error) + goto outdrop; + + args.f_cd_hash = args64.f_cd_hash; + args.f_hash_size = args64.f_hash_size; + args.f_cd_buffer = args64.f_cd_buffer; + args.f_cd_size = args64.f_cd_size; + args.f_out_size = args64.f_out_size; + args.f_arch = args64.f_arch; + } else { + struct user32_fcodeblobs args32; + + error = copyin(argp, &args32, sizeof(args32)); + if (error) + goto outdrop; + + args.f_cd_hash = CAST_USER_ADDR_T(args32.f_cd_hash); + args.f_hash_size = args32.f_hash_size; + args.f_cd_buffer = CAST_USER_ADDR_T(args32.f_cd_buffer); + args.f_cd_size = args32.f_cd_size; + args.f_out_size = CAST_USER_ADDR_T(args32.f_out_size); + args.f_arch = args32.f_arch; + } + + if (vp->v_ubcinfo == NULL) { + error = EINVAL; + goto outdrop; + } + + struct cs_blob *t_blob = vp->v_ubcinfo->cs_blobs; + + /* + * This call fails if there is no cs_blob corresponding to the + * vnode, or if there are multiple cs_blobs present, and the caller + * did not specify which cpu_type they want the cs_blob for + */ + if (t_blob == NULL) { + error = ENOENT; /* there is no codesigning blob for this process */ + goto outdrop; + } else if (args.f_arch == 0 && t_blob->csb_next != NULL) { + error = ENOENT; /* too many architectures and none specified */ + goto outdrop; + } + + /* If the user specified an architecture, find the right blob */ + if (args.f_arch != 0) { + while (t_blob) { + if (t_blob->csb_cpu_type == args.f_arch) + break; + t_blob = t_blob->csb_next; + } + /* The cpu_type the user requested could not be found */ + if (t_blob == NULL) { + error = ENOENT; + goto outdrop; + } + } + + const CS_SuperBlob *super_blob = (void *)t_blob->csb_mem_kaddr; + const CS_CodeDirectory *cd = findCodeDirectory(super_blob, + (char *) super_blob, + (char *) super_blob + t_blob->csb_mem_size); + if (cd == NULL) { + error = ENOENT; + goto outdrop; + } + + uint64_t buffer_size = ntohl(cd->length); + + if (buffer_size > UINT_MAX) { + error = ERANGE; + goto outdrop; + } + + error = copyout(&buffer_size, args.f_out_size, sizeof(unsigned int)); + if (error) + goto outdrop; + + if (sizeof(t_blob->csb_sha1) > args.f_hash_size || + buffer_size > args.f_cd_size) { + error = ERANGE; + goto outdrop; + } + + error = copyout(t_blob->csb_sha1, args.f_cd_hash, sizeof(t_blob->csb_sha1)); + if (error) + goto outdrop; + error = copyout(cd, args.f_cd_buffer, buffer_size); + if (error) + goto outdrop; + break; } @@ -2628,7 +2830,6 @@ fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec struct user32_stat64 user32_sb64; } dest; int error, my_size; - int funnel_state; file_type_t type; caddr_t data; kauth_filesec_t fsec; @@ -2683,9 +2884,7 @@ fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec break; case DTYPE_KQUEUE: - funnel_state = thread_funnel_set(kernel_flock, TRUE); - error = kqueue_stat(fp, sbptr, isstat64, p); - thread_funnel_set(kernel_flock, funnel_state); + error = kqueue_stat((void *)data, sbptr, isstat64, p); break; default: @@ -4297,6 +4496,11 @@ fg_free(struct fileglob *fg) { OSAddAtomic(-1, &nfiles); + if (fg->fg_vn_data) { + fg_vn_data_free(fg->fg_vn_data); + fg->fg_vn_data = NULL; + } + if (IS_VALID_CRED(fg->fg_cred)) { kauth_cred_unref(&fg->fg_cred); } @@ -4327,18 +4531,6 @@ fg_free(struct fileglob *fg) * * Locks: This function internally takes and drops proc_fdlock() * - * Notes: This function drops and retakes the kernel funnel; this is - * inherently unsafe, since another thread may have the - * proc_fdlock. - * - * XXX: We should likely reverse the lock and funnel drop/acquire - * order to avoid the small race window; it's also possible that - * if the program doing the exec has an outstanding listen socket - * and a network connection is completed asynchronously that we - * will end up with a "ghost" socket reference in the new process. - * - * This needs reworking to make it safe to remove the funnel from - * the execve and posix_spawn system calls. */ void fdexec(proc_t p, short flags) diff --git a/bsd/kern/kern_ecc.c b/bsd/kern/kern_ecc.c new file mode 100644 index 000000000..c2f018e52 --- /dev/null +++ b/bsd/kern/kern_ecc.c @@ -0,0 +1,67 @@ +/* +* Copyright (c) 2013 Apple Inc. All rights reserved. +* +* @APPLE_OSREFERENCE_LICENSE_HEADER_START@ +* +* This file contains Original Code and/or Modifications of Original Code +* as defined in and that are subject to the Apple Public Source License +* Version 2.0 (the 'License'). You may not use this file except in +* compliance with the License. The rights granted to you under the License +* may not be used to create, or enable the creation or redistribution of, +* unlawful or unlicensed copies of an Apple operating system, or to +* circumvent, violate, or enable the circumvention or violation of, any +* terms of an Apple operating system software license agreement. +* +* Please obtain a copy of the License at +* http://www.opensource.apple.com/apsl/ and read it before using this file. +* +* The Original Code and all software distributed under the License are +* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +* Please see the License for the specific language governing rights and +* limitations under the License. +* +* @APPLE_OSREFERENCE_LICENSE_HEADER_END@ +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int +get_ecc_data_handler(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, + struct sysctl_req *req) +{ + struct ecc_event ev; + int changed, retval; + + if (priv_check_cred(kauth_cred_get(), PRIV_HW_DEBUG_DATA, 0) != 0) { + return EPERM; + } + + if (KERN_SUCCESS != ecc_log_get_next_event(&ev)) { + /* + * EAGAIN would be better, but sysctl infrastructure + * interprets that */ + return EBUSY; + } + + retval = sysctl_io_opaque(req, &ev, sizeof(ev), &changed); + assert(!changed); + + return retval; +} + +SYSCTL_PROC(_kern, OID_AUTO, next_ecc_event, + CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLTYPE_STRUCT, + 0, 0, get_ecc_data_handler, + "-", ""); diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index e74047e2c..3b16c4ca0 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -83,7 +83,7 @@ #include #include -#include +#include #include #include #include @@ -128,8 +128,6 @@ static int kqueue_close(struct fileglob *fg, vfs_context_t ctx); static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx); static int kqueue_drain(struct fileproc *fp, vfs_context_t ctx); -extern int kqueue_stat(struct fileproc *fp, void *ub, int isstat64, - vfs_context_t ctx); static const struct fileops kqueueops = { .fo_type = DTYPE_KQUEUE, @@ -601,6 +599,13 @@ filt_proc(struct knote *kn, long hint) } #pragma clang diagnostic pop + + /* + * The kernel has a wrapper in place that returns the same data + * as is collected here, in kn_data. Any changes to how + * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected + * should also be reflected in the proc_pidnoteexit() wrapper. + */ if (event == NOTE_EXIT) { kn->kn_data = 0; if ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0) { @@ -624,6 +629,9 @@ filt_proc(struct knote *kn, long hint) case P_JETSAM_VMTHRASHING: kn->kn_data |= NOTE_EXIT_MEMORY_VMTHRASHING; break; + case P_JETSAM_FCTHRASHING: + kn->kn_data |= NOTE_EXIT_MEMORY_FCTHRASHING; + break; case P_JETSAM_VNODE: kn->kn_data |= NOTE_EXIT_MEMORY_VNODE; break; @@ -644,7 +652,6 @@ filt_proc(struct knote *kn, long hint) } } } - } /* atomic check, no locking need when called from above */ @@ -2469,10 +2476,9 @@ kqueue_drain(struct fileproc *fp, __unused vfs_context_t ctx) /*ARGSUSED*/ int -kqueue_stat(struct fileproc *fp, void *ub, int isstat64, __unused vfs_context_t ctx) +kqueue_stat(struct kqueue *kq, void *ub, int isstat64, proc_t p) { - - struct kqueue *kq = (struct kqueue *)fp->f_data; + kqlock(kq); if (isstat64 != 0) { struct stat64 *sb64 = (struct stat64 *)ub; @@ -2481,7 +2487,7 @@ kqueue_stat(struct fileproc *fp, void *ub, int isstat64, __unused vfs_context_t if (kq->kq_state & KQ_KEV64) sb64->st_blksize = sizeof(struct kevent64_s); else - sb64->st_blksize = sizeof(struct kevent); + sb64->st_blksize = IS_64BIT_PROCESS(p) ? sizeof(struct user64_kevent) : sizeof(struct user32_kevent); sb64->st_mode = S_IFIFO; } else { struct stat *sb = (struct stat *)ub; @@ -2491,10 +2497,10 @@ kqueue_stat(struct fileproc *fp, void *ub, int isstat64, __unused vfs_context_t if (kq->kq_state & KQ_KEV64) sb->st_blksize = sizeof(struct kevent64_s); else - sb->st_blksize = sizeof(struct kevent); + sb->st_blksize = IS_64BIT_PROCESS(p) ? sizeof(struct user64_kevent) : sizeof(struct user32_kevent); sb->st_mode = S_IFIFO; } - + kqunlock(kq); return (0); } @@ -2862,6 +2868,14 @@ knote_free(struct knote *kn) #include #include +#ifndef ROUNDUP64 +#define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t)) +#endif + +#ifndef ADVANCE64 +#define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n)) +#endif + static lck_grp_attr_t *kev_lck_grp_attr; static lck_attr_t *kev_lck_attr; static lck_grp_t *kev_lck_grp; @@ -2898,6 +2912,21 @@ static struct protosw eventsw[] = { } }; +__private_extern__ int kevt_getstat SYSCTL_HANDLER_ARGS; +__private_extern__ int kevt_pcblist SYSCTL_HANDLER_ARGS; + +SYSCTL_NODE(_net_systm, OID_AUTO, kevt, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Kernel event family"); + +struct kevtstat kevtstat; +SYSCTL_PROC(_net_systm_kevt, OID_AUTO, stats, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, + kevt_getstat, "S,kevtstat", ""); + +SYSCTL_PROC(_net_systm_kevt, OID_AUTO, pcblist, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, + kevt_pcblist, "S,xkevtpcb", ""); + static lck_mtx_t * event_getlock(struct socket *so, int locktype) { @@ -3007,12 +3036,14 @@ event_sofreelastref(struct socket *so) */ so->so_rcv.sb_flags &= ~SB_UPCALL; so->so_snd.sb_flags &= ~SB_UPCALL; - so->so_event = NULL; + so->so_event = sonullevent; lck_mtx_unlock(&(ev_pcb->evp_mtx)); lck_mtx_assert(&(ev_pcb->evp_mtx), LCK_MTX_ASSERT_NOTOWNED); lck_rw_lock_exclusive(kev_rwlock); LIST_REMOVE(ev_pcb, evp_link); + kevtstat.kes_pcbcount--; + kevtstat.kes_gencnt++; lck_rw_done(kev_rwlock); kev_delete(ev_pcb); @@ -3103,6 +3134,8 @@ kev_attach(struct socket *so, __unused int proto, __unused struct proc *p) so->so_pcb = (caddr_t) ev_pcb; lck_rw_lock_exclusive(kev_rwlock); LIST_INSERT_HEAD(&kern_event_head, ev_pcb, evp_link); + kevtstat.kes_pcbcount++; + kevtstat.kes_gencnt++; lck_rw_done(kev_rwlock); return (error); @@ -3159,9 +3192,10 @@ kev_msg_post(struct kev_msg *event_msg) * only */ if (event_msg->vendor_code < min_vendor || - event_msg->vendor_code > max_vendor) + event_msg->vendor_code > max_vendor) { + OSIncrementAtomic64((SInt64 *)&kevtstat.kes_badvendor); return (EINVAL); - + } return (kev_post_msg(event_msg)); } @@ -3185,13 +3219,15 @@ kev_post_msg(struct kev_msg *event_msg) } if (total_size > MLEN) { + OSIncrementAtomic64((SInt64 *)&kevtstat.kes_toobig); return (EMSGSIZE); } m = m_get(M_DONTWAIT, MT_DATA); - if (m == 0) - return (ENOBUFS); - + if (m == 0) { + OSIncrementAtomic64((SInt64 *)&kevtstat.kes_nomem); + return (ENOMEM); + } ev = mtod(m, struct kern_event_msg *); total_size = KEV_MSG_HEADER_SIZE; @@ -3235,8 +3271,10 @@ kev_post_msg(struct kev_msg *event_msg) continue; } - if ((ev_pcb->evp_subclass_filter != KEV_ANY_SUBCLASS) && - (ev_pcb->evp_subclass_filter != ev->kev_subclass)) { + if ((ev_pcb->evp_subclass_filter != + KEV_ANY_SUBCLASS) && + (ev_pcb->evp_subclass_filter != + ev->kev_subclass)) { lck_mtx_unlock(&ev_pcb->evp_mtx); continue; } @@ -3245,13 +3283,25 @@ kev_post_msg(struct kev_msg *event_msg) m2 = m_copym(m, 0, m->m_len, M_NOWAIT); if (m2 == 0) { + OSIncrementAtomic64((SInt64 *)&kevtstat.kes_nomem); m_free(m); lck_mtx_unlock(&ev_pcb->evp_mtx); lck_rw_done(kev_rwlock); - return (ENOBUFS); + return (ENOMEM); } - if (sbappendrecord(&ev_pcb->evp_socket->so_rcv, m2)) + if (sbappendrecord(&ev_pcb->evp_socket->so_rcv, m2)) { + /* + * We use "m" for the socket stats as it would be + * unsafe to use "m2" + */ + so_inc_recv_data_stat(ev_pcb->evp_socket, + 1, m->m_len, SO_TC_BE); + sorwakeup(ev_pcb->evp_socket); + OSIncrementAtomic64((SInt64 *)&kevtstat.kes_posted); + } else { + OSIncrementAtomic64((SInt64 *)&kevtstat.kes_fullsock); + } lck_mtx_unlock(&ev_pcb->evp_mtx); } m_free(m); @@ -3301,6 +3351,139 @@ kev_control(struct socket *so, return (0); } +int +kevt_getstat SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + + lck_rw_lock_shared(kev_rwlock); + + if (req->newptr != USER_ADDR_NULL) { + error = EPERM; + goto done; + } + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = sizeof(struct kevtstat); + goto done; + } + + error = SYSCTL_OUT(req, &kevtstat, + MIN(sizeof(struct kevtstat), req->oldlen)); +done: + lck_rw_done(kev_rwlock); + + return (error); +} + +__private_extern__ int +kevt_pcblist SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + int n, i; + struct xsystmgen xsg; + void *buf = NULL; + size_t item_size = ROUNDUP64(sizeof (struct xkevtpcb)) + + ROUNDUP64(sizeof (struct xsocket_n)) + + 2 * ROUNDUP64(sizeof (struct xsockbuf_n)) + + ROUNDUP64(sizeof (struct xsockstat_n)); + struct kern_event_pcb *ev_pcb; + + buf = _MALLOC(item_size, M_TEMP, M_WAITOK | M_ZERO); + if (buf == NULL) + return (ENOMEM); + + lck_rw_lock_shared(kev_rwlock); + + n = kevtstat.kes_pcbcount; + + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = (n + n/8) * item_size; + goto done; + } + if (req->newptr != USER_ADDR_NULL) { + error = EPERM; + goto done; + } + bzero(&xsg, sizeof (xsg)); + xsg.xg_len = sizeof (xsg); + xsg.xg_count = n; + xsg.xg_gen = kevtstat.kes_gencnt; + xsg.xg_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xsg, sizeof (xsg)); + if (error) { + goto done; + } + /* + * We are done if there is no pcb + */ + if (n == 0) { + goto done; + } + + i = 0; + for (i = 0, ev_pcb = LIST_FIRST(&kern_event_head); + i < n && ev_pcb != NULL; + i++, ev_pcb = LIST_NEXT(ev_pcb, evp_link)) { + struct xkevtpcb *xk = (struct xkevtpcb *)buf; + struct xsocket_n *xso = (struct xsocket_n *) + ADVANCE64(xk, sizeof (*xk)); + struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *) + ADVANCE64(xso, sizeof (*xso)); + struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *) + ADVANCE64(xsbrcv, sizeof (*xsbrcv)); + struct xsockstat_n *xsostats = (struct xsockstat_n *) + ADVANCE64(xsbsnd, sizeof (*xsbsnd)); + + bzero(buf, item_size); + + lck_mtx_lock(&ev_pcb->evp_mtx); + + xk->kep_len = sizeof(struct xkevtpcb); + xk->kep_kind = XSO_EVT; + xk->kep_evtpcb = (uint64_t)VM_KERNEL_ADDRPERM(ev_pcb); + xk->kep_vendor_code_filter = ev_pcb->evp_vendor_code_filter; + xk->kep_class_filter = ev_pcb->evp_class_filter; + xk->kep_subclass_filter = ev_pcb->evp_subclass_filter; + + sotoxsocket_n(ev_pcb->evp_socket, xso); + sbtoxsockbuf_n(ev_pcb->evp_socket ? + &ev_pcb->evp_socket->so_rcv : NULL, xsbrcv); + sbtoxsockbuf_n(ev_pcb->evp_socket ? + &ev_pcb->evp_socket->so_snd : NULL, xsbsnd); + sbtoxsockstat_n(ev_pcb->evp_socket, xsostats); + + lck_mtx_unlock(&ev_pcb->evp_mtx); + + error = SYSCTL_OUT(req, buf, item_size); + } + + if (error == 0) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + bzero(&xsg, sizeof (xsg)); + xsg.xg_len = sizeof (xsg); + xsg.xg_count = n; + xsg.xg_gen = kevtstat.kes_gencnt; + xsg.xg_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xsg, sizeof (xsg)); + if (error) { + goto done; + } + } + +done: + lck_rw_done(kev_rwlock); + + return (error); +} + #endif /* SOCKETS */ @@ -3309,7 +3492,6 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo) { struct vinfo_stat * st; - /* No need for the funnel as fd is kept alive */ st = &kinfo->kq_stat; st->vst_size = kq->kq_count; diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index 0fb4a347a..3d2710538 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -33,8 +33,6 @@ * the terms and conditions for use and redistribution. */ -#include - /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. @@ -133,6 +131,7 @@ #include #include #include +#include #if CONFIG_MACF #include @@ -159,14 +158,21 @@ #if CONFIG_DTRACE /* Do not include dtrace.h, it redefines kmem_[alloc/free] */ extern void (*dtrace_fasttrap_exec_ptr)(proc_t); +extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t); extern void (*dtrace_helpers_cleanup)(proc_t); extern void dtrace_lazy_dofs_destroy(proc_t); +/* + * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c, + * we will store its value before actually calling it. + */ +static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL; + #include #endif /* support for child creation in exec after vfork */ -thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit); +thread_t fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child_proc, int inherit_memory, int is64bit); void vfork_exit(proc_t p, int rv); int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart); extern void proc_apply_task_networkbg_internal(proc_t, thread_t); @@ -185,8 +191,13 @@ kern_return_t ipc_object_copyin( ipc_object_t *objectp); void ipc_port_release_send(ipc_port_t); +#if DEVELOPMENT || DEBUG +void task_importance_update_owner_info(task_t); +#endif + extern struct savearea *get_user_regs(thread_t); +__attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid); #include #include @@ -238,8 +249,9 @@ static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size); static void exec_resettextvp(proc_t, struct image_params *); static int check_for_signature(proc_t, struct image_params *); static void exec_prefault_data(proc_t, struct image_params *, load_result_t *); -static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch); -static errno_t exec_handle_spawnattr_apptype(proc_t p, int psa_apptype); +static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports); +static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, + ipc_port_t * portwatch_ports, int portwatch_count); /* * exec_add_user_string @@ -585,6 +597,44 @@ exec_fat_imgact(struct image_params *imgp) goto bad; } +#if DEVELOPMENT || DEBUG + if (cpu_type() == CPU_TYPE_ARM64) { + uint32_t fat_nfat_arch = OSSwapBigToHostInt32(fat_header->nfat_arch); + struct fat_arch *archs; + int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC); + int spawn = (imgp->ip_flags & IMGPF_SPAWN); + + archs = (struct fat_arch *)(imgp->ip_vdata + sizeof(struct fat_header)); + + /* ip_vdata always has PAGE_SIZE of data */ + if (PAGE_SIZE >= (sizeof(struct fat_header) + (fat_nfat_arch + 1) * sizeof(struct fat_arch))) { + if (fat_nfat_arch > 0 + && OSSwapBigToHostInt32(archs[fat_nfat_arch].cputype) == CPU_TYPE_ARM64) { + + /* rdar://problem/15001727 */ + printf("Attempt to execute malformed binary %s\n", imgp->ip_strings); + + proc_lock(p); + p->p_csflags |= CS_KILLED; + proc_unlock(p); + + /* + * We can't stop the system call, so make sure the child never executes + * For vfork exec, the current implementation has not set up the thread in the + * child process, so we cannot signal it. Return an error code in that case. + */ + if (!vfexec && !spawn) { + psignal(p, SIGKILL); + error = 0; + } else { + error = EBADEXEC; + } + goto bad; + } + } + } +#endif + /* If posix_spawn binprefs exist, respect those prefs. */ psa = (struct _posix_spawnattr *) imgp->ip_px_sa; if (psa != NULL && psa->psa_binprefs[0] != 0) { @@ -693,7 +743,6 @@ exec_mach_imgact(struct image_params *imgp) struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata; proc_t p = vfs_context_proc(imgp->ip_vfs_context); int error = 0; - int vfexec = 0; task_t task; task_t new_task = NULL; /* protected by vfexec */ thread_t thread; @@ -703,7 +752,8 @@ exec_mach_imgact(struct image_params *imgp) load_return_t lret; load_result_t load_result; struct _posix_spawnattr *psa = NULL; - int spawn = (imgp->ip_flags & IMGPF_SPAWN); + int spawn = (imgp->ip_flags & IMGPF_SPAWN); + int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC); /* * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference @@ -739,15 +789,6 @@ exec_mach_imgact(struct image_params *imgp) thread = current_thread(); uthread = get_bsdthread_info(thread); - /* - * Save off the vfexec state up front; we have to do this, because - * we need to know if we were in this state initially subsequent to - * creating the backing task, thread, and uthread for the child - * process (from the vfs_context_t from in img_parms). - */ - if (uthread->uu_flag & UT_VFORK) - vfexec = 1; /* Mark in exec */ - if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64) imgp->ip_flags |= IMGPF_IS_64BIT; @@ -805,7 +846,7 @@ grade: */ if (vfexec || spawn) { if (vfexec) { - imgp->ip_new_thread = fork_create_child(task, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT)); + imgp->ip_new_thread = fork_create_child(task, COALITION_NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT)); if (imgp->ip_new_thread == NULL) { error = ENOMEM; goto bad; @@ -840,6 +881,10 @@ grade: * NOTE: An error after this point indicates we have potentially * destroyed or overwritten some process state while attempting an * execve() following a vfork(), which is an unrecoverable condition. + * We send the new process an immediate SIGKILL to avoid it executing + * any instructions in the mutated address space. For true spawns, + * this is not the case, and "too late" is still not too late to + * return an error code to the parent process. */ /* @@ -852,6 +897,11 @@ grade: goto badtoolate; } + proc_lock(p); + p->p_cputype = imgp->ip_origcputype; + p->p_cpusubtype = imgp->ip_origcpusubtype; + proc_unlock(p); + vm_map_set_user_wire_limit(get_task_map(task), p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); /* @@ -861,7 +911,7 @@ grade: if (load_result.csflags & CS_VALID) { imgp->ip_csflags |= load_result.csflags & (CS_VALID| - CS_HARD|CS_KILL|CS_ENFORCEMENT| + CS_HARD|CS_KILL|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM| CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT); } else { imgp->ip_csflags &= ~CS_VALID; @@ -873,6 +923,8 @@ grade: imgp->ip_csflags |= CS_KILL; if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT) imgp->ip_csflags |= CS_ENFORCEMENT; + if (p->p_csflags & CS_EXEC_SET_INSTALLER) + imgp->ip_csflags |= CS_INSTALLER; /* @@ -892,15 +944,20 @@ grade: * deal with set[ug]id. */ error = exec_handle_sugid(imgp); + if (error) { + goto badtoolate; + } + + /* + * deal with voucher on exec-calling thread. + */ + if (imgp->ip_new_thread == NULL) + thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL); /* Make sure we won't interrupt ourself signalling a partial process */ if (!vfexec && !spawn && (p->p_lflag & P_LTRACED)) psignal(p, SIGTRAP); - if (error) { - goto badtoolate; - } - if (load_result.unixproc && create_unix_stack(get_task_map(task), &load_result, @@ -940,8 +997,8 @@ grade: error = copyoutptr(load_result.mach_header, ap, new_ptr_size); if (error) { - if (vfexec || spawn) - vm_map_switch(old_map); + if (vfexec || spawn) + vm_map_switch(old_map); goto badtoolate; } task_set_dyld_info(task, load_result.all_image_info_addr, @@ -1002,6 +1059,13 @@ grade: pal_dbg_set_task_name( p->task ); +#if DEVELOPMENT || DEBUG + /* + * Update the pid an proc name for importance base if any + */ + task_importance_update_owner_info(p->task); +#endif + memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid)); // dtrace code cleanup needed @@ -1036,8 +1100,8 @@ grade: */ proc_lock(p); if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) { - (*dtrace_fasttrap_exec_ptr)(p); - } + (*dtrace_fasttrap_exec_ptr)(p); + } proc_unlock(p); #endif @@ -1082,15 +1146,6 @@ grade: } } - /* - * Apply the apptype state (which primes the task for importance donation) - * This must be done after the exec so that the child's thread is ready - */ - if (imgp->ip_px_sa != NULL) { - psa = (struct _posix_spawnattr *) imgp->ip_px_sa; - exec_handle_spawnattr_apptype(p, psa->psa_apptype); - } - /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back @@ -1112,17 +1167,32 @@ grade: psignal_vfork(p, new_task, thread, SIGTRAP); } + goto done; + badtoolate: -if (!spawn) - /* notify only if it has not failed due to FP Key error */ - if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0) - proc_knote(p, NOTE_EXEC); + /* Don't allow child process to execute any instructions */ + if (!spawn) { + if (vfexec) { + psignal_vfork(p, new_task, thread, SIGKILL); + } else { + psignal(p, SIGKILL); + } - if (vfexec || spawn) { + /* We can't stop this system call at this point, so just pretend we succeeded */ + error = 0; + } + +done: + if (!spawn) { + /* notify only if it has not failed due to FP Key error */ + if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0) + proc_knote(p, NOTE_EXEC); + } + + /* Drop extra references for cases where we don't expect the caller to clean up */ + if (vfexec || (spawn && error == 0)) { task_deallocate(new_task); thread_deallocate(thread); - if (error) - error = 0; } bad: @@ -1178,7 +1248,7 @@ struct execsw { static int exec_activate_image(struct image_params *imgp) { - struct nameidata nd; + struct nameidata *ndp = NULL; int error; int resid; int once = 1; /* save SGUID-ness for interpreted files */ @@ -1198,15 +1268,21 @@ exec_activate_image(struct image_params *imgp) /* Use imgp->ip_strings, which contains the copyin-ed exec path */ DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings); - NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO); + if (ndp == NULL) { + error = ENOMEM; + goto bad_notrans; + } + + NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context); again: - error = namei(&nd); + error = namei(ndp); if (error) goto bad_notrans; - imgp->ip_ndp = &nd; /* successful namei(); call nameidone() later */ - imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */ + imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */ + imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */ /* * Before we start the transition from binary A to binary B, make @@ -1220,7 +1296,7 @@ again: proc_unlock(p); goto bad_notrans; } - error = proc_transstart(p, 1); + error = proc_transstart(p, 1, 0); proc_unlock(p); if (error) goto bad_notrans; @@ -1284,14 +1360,14 @@ encapsulated_binary: imgp->ip_scriptvp = imgp->ip_vp; #endif - nameidone(&nd); + nameidone(ndp); vnode_put(imgp->ip_vp); imgp->ip_vp = NULL; /* already put */ imgp->ip_ndp = NULL; /* already nameidone */ /* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */ - NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, + NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context); proc_transend(p, 0); @@ -1309,7 +1385,7 @@ encapsulated_binary: if (error == 0 && kauth_authorize_fileop_has_listeners()) { kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context), KAUTH_FILEOP_EXEC, - (uintptr_t)nd.ni_vp, 0); + (uintptr_t)ndp->ni_vp, 0); } bad: @@ -1320,15 +1396,17 @@ bad_notrans: execargs_free(imgp); if (imgp->ip_ndp) nameidone(imgp->ip_ndp); + if (ndp) + FREE(ndp, M_TEMP); return (error); } /* - * exec_handle_spawnattr_apptype + * exec_handle_spawnattr_policy * - * Description: Decode and apply the posix_spawn apptype to the task. + * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task. * * Parameters: proc_t p process to apply attributes to * int psa_apptype posix spawn attribute apptype @@ -1336,10 +1414,13 @@ bad_notrans: * Returns: 0 Success */ static errno_t -exec_handle_spawnattr_apptype(proc_t p, int psa_apptype) +exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, + ipc_port_t * portwatch_ports, int portwatch_count) { + int apptype = TASK_APPTYPE_NONE; + int qos_clamp = THREAD_QOS_UNSPECIFIED; + if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) { - int apptype = TASK_APPTYPE_NONE; int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK; switch(proctype) { @@ -1363,13 +1444,32 @@ exec_handle_spawnattr_apptype(proc_t p, int psa_apptype) break; default: apptype = TASK_APPTYPE_NONE; + /* TODO: Should an invalid value here fail the spawn? */ break; } + } - proc_set_task_apptype(p->task, apptype); + if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) { + switch (psa_qos_clamp) { + case POSIX_SPAWN_PROC_CLAMP_UTILITY: + qos_clamp = THREAD_QOS_UTILITY; + break; + case POSIX_SPAWN_PROC_CLAMP_BACKGROUND: + qos_clamp = THREAD_QOS_BACKGROUND; + break; + case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE: + qos_clamp = THREAD_QOS_MAINTENANCE; + break; + default: + qos_clamp = THREAD_QOS_UNSPECIFIED; + /* TODO: Should an invalid value here fail the spawn? */ + break; + } + } - /* TODO: Should an invalid value here fail the spawn? */ - return (0); + if (psa_apptype != TASK_APPTYPE_NONE || qos_clamp != THREAD_QOS_UNSPECIFIED) { + proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, + portwatch_ports, portwatch_count); } return (0); @@ -1391,7 +1491,7 @@ exec_handle_spawnattr_apptype(proc_t p, int psa_apptype) * ENOTSUP Illegal posix_spawn attr flag was set */ static errno_t -exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch_ports) +exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports) { _posix_spawn_port_actions_t pacts = imgp->ip_px_spa; proc_t p = vfs_context_proc(imgp->ip_vfs_context); @@ -1401,8 +1501,7 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_ errno_t ret = 0; int i; - if (need_portwatch != NULL) - *need_portwatch = 0; + *portwatch_present = FALSE; for (i = 0; i < pacts->pspa_count; i++) { act = &pacts->pspa_actions[i]; @@ -1440,8 +1539,7 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_ #endif case PSPA_IMP_WATCHPORTS: if (portwatch_ports != NULL) { - if (need_portwatch != NULL) - *need_portwatch = 1; + *portwatch_present = TRUE; /* hold on to this till end of spawn */ portwatch_ports[i] = port; ret = 0; @@ -1507,29 +1605,41 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) * context of UIO_SYSSPACE, and casts the address * argument to a user_addr_t. */ - struct vnode_attr va; - struct nameidata nd; + char *bufp = NULL; + struct vnode_attr *vap; + struct nameidata *ndp; int mode = psfa->psfaa_openargs.psfao_mode; struct dup2_args dup2a; struct close_nocancel_args ca; int origfd; - VATTR_INIT(&va); + MALLOC(bufp, char *, sizeof(*vap) + sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO); + if (bufp == NULL) { + error = ENOMEM; + break; + } + + vap = (struct vnode_attr *) bufp; + ndp = (struct nameidata *) (bufp + sizeof(*vap)); + + VATTR_INIT(vap); /* Mask off all but regular access permissions */ mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT; - VATTR_SET(&va, va_mode, mode & ACCESSPERMS); + VATTR_SET(vap, va_mode, mode & ACCESSPERMS); - NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE, + NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE, CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path), imgp->ip_vfs_context); error = open1(imgp->ip_vfs_context, - &nd, + ndp, psfa->psfaa_openargs.psfao_oflag, - &va, + vap, fileproc_alloc_init, NULL, ival); + FREE(bufp, M_TEMP); + /* * If there's an error, or we get the right fd by * accident, then drop out here. This is easier than @@ -1818,7 +1928,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) boolean_t spawn_no_exec = FALSE; boolean_t proc_transit_set = TRUE; boolean_t exec_done = FALSE; - int need_portwatch = 0, portwatch_count = 0; + int portwatch_count = 0; ipc_port_t * portwatch_ports = NULL; vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports); @@ -1845,6 +1955,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE); imgp->ip_p_comm = alt_p_comm; /* for PowerPC */ imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); + imgp->ip_mac_return = 0; if (uap->adesc != USER_ADDR_NULL) { if(is_64) { @@ -1874,8 +1985,8 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) /* * We are not copying the port_actions pointer, * because we already have it from px_args. + * This is a bit fragile: */ - if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0)) goto bad; @@ -1964,15 +2075,63 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) * and execve(). */ if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){ - if ((error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN)) != 0) + + /* + * Set the new task's coalition, if it is requested. + * TODO: privilege check - 15365900 + */ + coalition_t coal = COALITION_NULL; +#if CONFIG_COALITIONS + if (imgp->ip_px_sa) { + uint64_t cid = px_sa.psa_coalitionid; + if (cid != 0) { +#if COALITION_DEBUG + printf("%s: searching for coalition ID %llu\n", __func__, cid); +#endif + coal = coalition_find_and_activate_by_id(cid); + if (coal == COALITION_NULL) { +#if COALITION_DEBUG + printf("%s: could not find coalition ID %llu (perhaps it has been terminated or reaped)\n", __func__, cid); +#endif + error = ESRCH; + goto bad; + } + } + } +#endif /* CONFIG_COALITIONS */ + + error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal); + + if (error != 0) { + if (coal != COALITION_NULL) { +#if CONFIG_COALITIONS + coalition_remove_active(coal); + coalition_release(coal); +#endif /* CONFIG_COALITIONS */ + } goto bad; + } imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */ spawn_no_exec = TRUE; /* used in later tests */ - DTRACE_PROC1(create, proc_t, p); + + if (coal != COALITION_NULL) { +#if CONFIG_COALITIONS + coalition_remove_active(coal); + coalition_release(coal); +#endif /* CONFIG_COALITIONS */ + } } - if (spawn_no_exec) + if (spawn_no_exec) { p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread); + + /* + * We had to wait until this point before firing the + * proc:::create probe, otherwise p would not point to the + * child process. + */ + DTRACE_PROC1(create, proc_t, p); + } assert(p != NULL); /* By default, the thread everyone plays with is the parent */ @@ -2007,6 +2166,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) /* Has spawn port actions? */ if (imgp->ip_px_spa != NULL) { boolean_t is_adaptive = FALSE; + boolean_t portwatch_present = FALSE; /* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */ if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE) @@ -2025,8 +2185,14 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) } if ((error = exec_handle_port_actions(imgp, - imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &need_portwatch, portwatch_ports)) != 0) + imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &portwatch_present, portwatch_ports)) != 0) goto bad; + + if (portwatch_present == FALSE && portwatch_ports != NULL) { + FREE(portwatch_ports, M_TEMP); + portwatch_ports = NULL; + portwatch_count = 0; + } } /* Has spawn attr? */ @@ -2120,7 +2286,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) * Activate the image */ error = exec_activate_image(imgp); - + if (error == 0) { /* process completed the exec */ exec_done = TRUE; @@ -2204,41 +2370,6 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) bad: - if (portwatch_ports != NULL) { - int needboost = 0; - - /* - * Mark the ports as destined to be handed off to the new task, and - * transfer any boosts to the new task. - * We need to release the rights even if the posix_spawn has failed. - */ - if (need_portwatch != 0) { - for (int i = 0; i < portwatch_count; i++) { - ipc_port_t port = NULL; - - if ((port = portwatch_ports[i]) != NULL) { - int boost = 0; - if (error == 0) - task_add_importance_watchport(p->task, p->p_pid, port, &boost); - ipc_port_release_send(port); - needboost += boost; - } - } - } - - if (needboost != 0) { - /* - * Apply the boost count found on the ports, which will keep the - * newly created process out of background until it handles the incoming messages. - */ - task_hold_multiple_assertion(p->task, needboost); - } - - FREE(portwatch_ports, M_TEMP); - portwatch_ports = NULL; - portwatch_count = 0; - } - if (error == 0) { /* reset delay idle sleep status if set */ if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) @@ -2267,7 +2398,8 @@ bad: /* Has jetsam attributes? */ if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) { memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY), - TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND)); + TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND), + (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL)); } #endif } @@ -2297,6 +2429,51 @@ bad: /* notify only if it has not failed due to FP Key error */ if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0) proc_knote(p, NOTE_EXEC); + } else { + /* reset the importance attribute from our previous life */ + task_importance_reset(p->task); + + /* reset atm context from task */ + task_atm_reset(p->task); + } + + /* + * Apply the spawnattr policy, apptype (which primes the task for importance donation), + * and bind any portwatch ports to the new task. + * This must be done after the exec so that the child's thread is ready, + * and after the in transit state has been released, because priority is + * dropped here so we need to be prepared for a potentially long preemption interval + * + * TODO: Consider splitting this up into separate phases + */ + if (error == 0 && imgp->ip_px_sa != NULL) { + struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa; + + exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, + portwatch_ports, portwatch_count); + } + + /* Apply the main thread qos */ + if (error == 0) { + thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread(); + + task_set_main_thread_qos(p->task, main_thread); + } + + /* + * Release any ports we kept around for binding to the new task + * We need to release the rights even if the posix_spawn has failed. + */ + if (portwatch_ports != NULL) { + for (int i = 0; i < portwatch_count; i++) { + ipc_port_t port = NULL; + if ((port = portwatch_ports[i]) != NULL) { + ipc_port_release_send(port); + } + } + FREE(portwatch_ports, M_TEMP); + portwatch_ports = NULL; + portwatch_count = 0; } /* @@ -2377,6 +2554,9 @@ bad: DTRACE_PROC(exec__success); } } + + if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) + (*dtrace_proc_waitfor_hook)(p); #endif /* Return to both the parent and the child? */ @@ -2508,6 +2688,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */ int is_64 = IS_64BIT_PROCESS(p); struct vfs_context context; + struct uthread *uthread; context.vc_thread = current_thread(); context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */ @@ -2534,6 +2715,12 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE); imgp->ip_p_comm = alt_p_comm; /* for PowerPC */ imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); + imgp->ip_mac_return = 0; + + uthread = get_bsdthread_info(current_thread()); + if (uthread->uu_flag & UT_VFORK) { + imgp->ip_flags |= IMGPF_VFORK_EXEC; + } #if CONFIG_MACF if (uap->mac_p != USER_ADDR_NULL) { @@ -2570,14 +2757,27 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) mac_vnode_label_free(imgp->ip_scriptlabelp); #endif if (!error) { - struct uthread *uthread; - /* Sever any extant thread affinity */ thread_affinity_exec(current_thread()); + thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread(); + + task_set_main_thread_qos(p->task, main_thread); + + /* reset task importance */ + task_importance_reset(p->task); + + /* reset atm context from task */ + task_atm_reset(p->task); + DTRACE_PROC(exec__success); - uthread = get_bsdthread_info(current_thread()); - if (uthread->uu_flag & UT_VFORK) { + +#if CONFIG_DTRACE + if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) + (*dtrace_proc_waitfor_hook)(p); +#endif + + if (imgp->ip_flags & IMGPF_VFORK_EXEC) { vfork_return(p, retval, p->p_pid); (void)thread_resume(imgp->ip_new_thread); } @@ -3227,17 +3427,17 @@ exec_add_apple_strings(struct image_params *imgp) /* adding the NANO_ENGAGE_KEY key */ if (imgp->ip_px_sa) { - int proc_type = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_apptype) & POSIX_SPAWN_PROC_TYPE_MASK; + int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags); - if (proc_type == POSIX_SPAWN_PROC_TYPE_APP_DEFAULT || proc_type == POSIX_SPAWN_PROC_TYPE_APP_TAL) { - char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1]; + if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) { + char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1]; - snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY); - error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE); - if(error) - goto bad; - imgp->ip_applec++; - } + snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY); + error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE); + if (error) + goto bad; + imgp->ip_applec++; + } } /* @@ -3419,9 +3619,11 @@ exec_handle_sugid(struct image_params *imgp) proc_t p = vfs_context_proc(imgp->ip_vfs_context); int i; int leave_sugid_clear = 0; + int mac_reset_ipc = 0; int error = 0; #if CONFIG_MACF - int mac_transition; + int mac_transition, disjoint_cred = 0; + int label_update_return = 0; /* * Determine whether a call to update the MAC label will result in the @@ -3435,6 +3637,7 @@ exec_handle_sugid(struct image_params *imgp) mac_transition = mac_cred_check_label_update_execve( imgp->ip_vfs_context, imgp->ip_vp, + imgp->ip_arch_offset, imgp->ip_scriptvp, imgp->ip_scriptlabelp, imgp->ip_execlabelp, @@ -3504,14 +3707,19 @@ handle_mac_transition: * modifying any others sharing it. */ if (mac_transition) { - kauth_cred_t my_cred; - if (kauth_proc_label_update_execve(p, + kauth_proc_label_update_execve(p, imgp->ip_vfs_context, imgp->ip_vp, + imgp->ip_arch_offset, imgp->ip_scriptvp, imgp->ip_scriptlabelp, imgp->ip_execlabelp, - imgp->ip_px_smpx)) { + &imgp->ip_csflags, + imgp->ip_px_smpx, + &disjoint_cred, /* will be non zero if disjoint */ + &label_update_return); + + if (disjoint_cred) { /* * If updating the MAC label resulted in a * disjoint credential, flag that we need to @@ -3523,11 +3731,12 @@ handle_mac_transition: */ leave_sugid_clear = 0; } - - my_cred = kauth_cred_proc_ref(p); - mac_task_label_update_cred(my_cred, p->task); - kauth_cred_unref(&my_cred); + + imgp->ip_mac_return = label_update_return; } + + mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp); + #endif /* CONFIG_MACF */ /* @@ -3539,17 +3748,19 @@ handle_mac_transition: * So we don't set the P_SUGID or reset mach ports and fds * on the basis of simply running this code. */ - if (!leave_sugid_clear) { + if (mac_reset_ipc || !leave_sugid_clear) { /* - * Have mach reset the task and thread ports. - * We don't want anyone who had the ports before - * a setuid exec to be able to access/control the - * task/thread after. - */ + * Have mach reset the task and thread ports. + * We don't want anyone who had the ports before + * a setuid exec to be able to access/control the + * task/thread after. + */ ipc_task_reset(p->task); ipc_thread_reset((imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread()); + } + if (!leave_sugid_clear) { /* * Flag the process as setuid. */ @@ -3580,6 +3791,7 @@ handle_mac_transition: struct fileproc *fp; int indx; int flag; + struct nameidata *ndp = NULL; if (i == 0) flag = FREAD; @@ -3590,13 +3802,17 @@ handle_mac_transition: &fp, &indx, imgp->ip_vfs_context)) != 0) continue; - struct nameidata nd1; + MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO); + if (ndp == NULL) { + error = ENOMEM; + break; + } - NDINIT(&nd1, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, + NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T("/dev/null"), imgp->ip_vfs_context); - if ((error = vn_open(&nd1, flag, 0)) != 0) { + if ((error = vn_open(ndp, flag, 0)) != 0) { fp_free(p, indx, fp); break; } @@ -3605,14 +3821,16 @@ handle_mac_transition: fg->fg_flag = flag; fg->fg_ops = &vnops; - fg->fg_data = nd1.ni_vp; + fg->fg_data = ndp->ni_vp; - vnode_put(nd1.ni_vp); + vnode_put(ndp->ni_vp); proc_fdlock(p); procfdtbl_releasefd(p, indx, NULL); fp_drop(p, indx, fp, 1); proc_fdunlock(p); + + FREE(ndp, M_TEMP); } } } @@ -3630,6 +3848,7 @@ handle_mac_transition: goto handle_mac_transition; } } + #endif /* CONFIG_MACF */ /* @@ -3737,9 +3956,12 @@ create_unix_stack(vm_map_t map, load_result_t* load_result, #include -static char init_program_name[128] = "/sbin/launchd"; - -struct execve_args init_exec_args; +static const char * init_programs[] = { +#if DEVELOPMENT || DEBUG + "/usr/local/sbin/launchd.development", +#endif + "/sbin/launchd", +}; /* * load_init_program @@ -3759,75 +3981,77 @@ struct execve_args init_exec_args; void load_init_program(proc_t p) { - vm_offset_t init_addr; - int argc = 0; + vm_offset_t init_addr, addr; + int argc; uint32_t argv[3]; + unsigned int i; int error; int retval[2]; - - /* - * Copy out program name. - */ + const char *init_program_name; + struct execve_args init_exec_args; init_addr = VM_MIN_ADDRESS; - (void) vm_allocate(current_map(), &init_addr, PAGE_SIZE, - VM_FLAGS_ANYWHERE); + (void) vm_allocate(current_map(), &init_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE); if (init_addr == 0) init_addr++; + + for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) { + + init_program_name = init_programs[i]; + addr = init_addr; + argc = 0; - (void) copyout((caddr_t) init_program_name, CAST_USER_ADDR_T(init_addr), - (unsigned) sizeof(init_program_name)+1); - - argv[argc++] = (uint32_t)init_addr; - init_addr += sizeof(init_program_name); - init_addr = (vm_offset_t)ROUND_PTR(char, init_addr); + /* + * Copy out program name. + */ + (void) copyout(init_program_name, CAST_USER_ADDR_T(addr), strlen(init_program_name)+1); - /* - * Put out first (and only) argument, similarly. - * Assumes everything fits in a page as allocated - * above. - */ - if (boothowto & RB_SINGLE) { - const char *init_args = "-s"; + argv[argc++] = (uint32_t)addr; + addr += strlen(init_program_name)+1; + addr = (vm_offset_t)ROUND_PTR(char, addr); - copyout(init_args, CAST_USER_ADDR_T(init_addr), - strlen(init_args)); + /* + * Put out first (and only) argument, similarly. + * Assumes everything fits in a page as allocated above. + */ + if (boothowto & RB_SINGLE) { + const char *init_args = "-s"; - argv[argc++] = (uint32_t)init_addr; - init_addr += strlen(init_args); - init_addr = (vm_offset_t)ROUND_PTR(char, init_addr); + copyout(init_args, CAST_USER_ADDR_T(addr), strlen(init_args)+1); - } + argv[argc++] = (uint32_t)addr; + addr += strlen(init_args)+1; + addr = (vm_offset_t)ROUND_PTR(char, addr); + } - /* - * Null-end the argument list - */ - argv[argc] = 0; - - /* - * Copy out the argument list. - */ + /* + * Null-end the argument list + */ + argv[argc] = 0; - (void) copyout((caddr_t) argv, CAST_USER_ADDR_T(init_addr), - (unsigned) sizeof(argv)); - - /* - * Set up argument block for fake call to execve. - */ + /* + * Copy out the argument list. + */ + (void) copyout(argv, CAST_USER_ADDR_T(addr), sizeof(argv)); - init_exec_args.fname = CAST_USER_ADDR_T(argv[0]); - init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr); - init_exec_args.envp = CAST_USER_ADDR_T(0); + /* + * Set up argument block for fake call to execve. + */ + init_exec_args.fname = CAST_USER_ADDR_T(argv[0]); + init_exec_args.argp = CAST_USER_ADDR_T((char **)addr); + init_exec_args.envp = CAST_USER_ADDR_T(0); - /* - * So that mach_init task is set with uid,gid 0 token - */ - set_security_token(p); + /* + * So that init task is set with uid,gid 0 token + */ + set_security_token(p); - error = execve(p,&init_exec_args,retval); - if (error) - panic("Process 1 exec of %s failed, errno %d", - init_program_name, error); + error = execve(p, &init_exec_args, retval); + if (!error) + return; + } + + panic("Process 1 exec of %s failed, errno %d", init_program_name, error); } /* @@ -4126,7 +4350,20 @@ taskgated_required(proc_t p, boolean_t *require_success) error = cs_entitlements_blob_get(p, &blob, &length); if (error == 0 && blob != NULL) { - *require_success = TRUE; /* fatal on the desktop when entitlements are present */ + /* + * fatal on the desktop when entitlements are present, + * unless we started in single-user mode + */ + if ((boothowto & RB_SINGLE) == 0) + *require_success = TRUE; + /* + * Allow initproc to run without causing taskgated to launch + */ + if (p == initproc) { + *require_success = FALSE; + return FALSE; + } + return TRUE; } @@ -4134,6 +4371,18 @@ taskgated_required(proc_t p, boolean_t *require_success) return 0; } +/* + * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__ + * + * Description: Waits for the userspace daemon to respond to the request + * we made. Function declared non inline to be visible in + * stackshots and spindumps as well as debugging. + */ +__attribute__((noinline)) int +__EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid) +{ + return find_code_signature(task_access_port, new_pid); +} static int check_for_signature(proc_t p, struct image_params *imgp) @@ -4144,6 +4393,8 @@ check_for_signature(proc_t p, struct image_params *imgp) boolean_t unexpected_failure = FALSE; unsigned char hash[SHA1_RESULTLEN]; boolean_t require_success = FALSE; + int spawn = (imgp->ip_flags & IMGPF_SPAWN); + int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC); /* * Override inherited code signing flags with the @@ -4158,6 +4409,17 @@ check_for_signature(proc_t p, struct image_params *imgp) if(p->p_csflags & (CS_HARD|CS_KILL)) { vm_map_switch_protect(get_task_map(p->task), TRUE); } + + /* + * image activation may be failed due to policy + * which is unexpected but security framework does not + * approve of exec, kill and return immediately. + */ + if (imgp->ip_mac_return != 0) { + error = imgp->ip_mac_return; + unexpected_failure = TRUE; + goto done; + } /* check if callout to taskgated is needed */ if (!taskgated_required(p, &require_success)) { @@ -4181,7 +4443,7 @@ check_for_signature(proc_t p, struct image_params *imgp) * rpc call, taskgated died, mig server died etc.). */ - kr = find_code_signature(port, p->p_pid); + kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid); switch (kr) { case KERN_SUCCESS: error = 0; @@ -4213,7 +4475,12 @@ done: if (!unexpected_failure) p->p_csflags |= CS_KILLED; /* make very sure execution fails */ - psignal(p, SIGKILL); + if (vfexec || spawn) { + psignal_vfork(p, p->task, imgp->ip_new_thread, SIGKILL); + error = 0; + } else { + psignal(p, SIGKILL); + } } return error; } diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c index 1ef04170d..c1bff3128 100644 --- a/bsd/kern/kern_exit.c +++ b/bsd/kern/kern_exit.c @@ -145,6 +145,8 @@ extern void dtrace_lazy_dofs_destroy(proc_t); #include #include +#include + #include extern char init_task_failure_data[]; @@ -158,16 +160,14 @@ static int reap_child_locked(proc_t parent, proc_t child, int deadparent, int re /* * Things which should have prototypes in headers, but don't */ -void *get_bsduthreadarg(thread_t); void proc_exit(proc_t p); int wait1continue(int result); int waitidcontinue(int result); -int *get_bsduthreadrval(thread_t); kern_return_t sys_perf_notify(thread_t thread, int pid); kern_return_t task_exception_notify(exception_type_t exception, mach_exception_data_type_t code, mach_exception_data_type_t subcode); void delay(int); -void gather_rusage_info_v2(proc_t p, struct rusage_info_v2 *ru, int flavor); +void gather_rusage_info(proc_t p, rusage_info_current *ru, int flavor); /* * NOTE: Source and target may *NOT* overlap! @@ -230,7 +230,6 @@ exit(proc_t p, struct exit_args *uap, int *retval) { exit1(p, W_EXITCODE(uap->rval, 0), retval); - /* drop funnel before we return */ thread_exception_return(); /* NOTREACHED */ while (TRUE) @@ -299,7 +298,7 @@ exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, bo TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE); proc_lock(p); - error = proc_transstart(p, 1); + error = proc_transstart(p, 1, ((jetsam_flags & P_JETSAM_VNODE) ? 1 : 0)); if (error == EDEADLK) { /* Temp: If deadlock error, then it implies multithreaded exec is * in progress. Instread of letting exit continue and @@ -337,10 +336,24 @@ exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, bo } sig_lock_to_exit(p); } - if (p == initproc) { + if (p == initproc && current_proc() == p) { proc_unlock(p); printf("pid 1 exited (signal %d, exit %d)", WTERMSIG(rv), WEXITSTATUS(rv)); +#if (DEVELOPMENT || DEBUG) + int err; + /* + * For debugging purposes, generate a core file of initproc before + * panicking. Leave at least 300 MB free on the root volume, and ignore + * the process's corefile ulimit. + */ + if ((err = coredump(p, 300, 1)) != 0) { + printf("Failed to generate initproc core file: error %d", err); + } else { + printf("Generated initproc core file"); + sync(p, (void *)NULL, (int *)NULL); + } +#endif panic("%s died\nState at Last Exception:\n\n%s", (p->p_comm[0] != '\0' ? p->p_comm : @@ -411,7 +424,7 @@ skipcheck: MALLOC_ZONE(rup, struct rusage_superset *, sizeof (*rup), M_ZOMBIE, M_WAITOK); if (rup != NULL) { - gather_rusage_info_v2(p, &rup->ri, RUSAGE_INFO_V2); + gather_rusage_info(p, &rup->ri, RUSAGE_INFO_CURRENT); rup->ri.ri_phys_footprint = 0; rup->ri.ri_proc_exit_abstime = mach_absolute_time(); @@ -471,10 +484,10 @@ proc_exit(proc_t p) int exitval; int knote_hint; - uth = (struct uthread *)get_bsdthread_info(current_thread()); + uth = current_uthread(); proc_lock(p); - proc_transstart(p, 1); + proc_transstart(p, 1, 0); if( !(p->p_lflag & P_LEXIT)) { /* * This can happen if a thread_terminate() occurs @@ -616,6 +629,17 @@ proc_exit(proc_t p) if ((tp != TTY_NULL) && (tp->t_session == sessp)) { session_unlock(sessp); + /* + * We're going to SIGHUP the foreground process + * group. It can't change from this point on + * until the revoke is complete. + * The process group changes under both the tty + * lock and proc_list_lock but we need only one + */ + tty_lock(tp); + ttysetpgrphup(tp); + tty_unlock(tp); + tty_pgsignal(tp, SIGHUP, 1); session_lock(sessp); @@ -639,12 +663,14 @@ proc_exit(proc_t p) } context.vc_thread = proc_thread(p); /* XXX */ context.vc_ucred = kauth_cred_proc_ref(p); - vnode_rele(ttyvp); VNOP_REVOKE(ttyvp, REVOKEALL, &context); if (cttyflag) { /* * Release the extra usecount taken in cttyopen. * usecount should be released after VNOP_REVOKE is called. + * This usecount was taken to ensure that + * the VNOP_REVOKE results in a close to + * the tty since cttyclose is a no-op. */ vnode_rele(ttyvp); } @@ -652,10 +678,17 @@ proc_exit(proc_t p) kauth_cred_unref(&context.vc_ucred); ttyvp = NULLVP; } - if (ttyvp) - vnode_rele(ttyvp); - if (tp) + if (tp) { + /* + * This is cleared even if not set. This is also done in + * spec_close to ensure that the flag is cleared. + */ + tty_lock(tp); + ttyclrpgrphup(tp); + tty_unlock(tp); + ttyfree(tp); + } } session_lock(sessp); sessp->s_leader = NULL; @@ -835,6 +868,7 @@ proc_exit(proc_t p) proc_limitdrop(p, 1); p->p_limit = NULL; + vm_purgeable_disown(p->task); /* * Finish up by terminating the task @@ -1169,19 +1203,24 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoi int wait1continue(int result) { - void *vt; + proc_t p; thread_t thread; + uthread_t uth; + struct _wait4_data *wait4_data; + struct wait4_nocancel_args *uap; int *retval; - proc_t p; if (result) return(result); p = current_proc(); thread = current_thread(); - vt = get_bsduthreadarg(thread); - retval = get_bsduthreadrval(thread); - return(wait4(p, (struct wait4_args *)vt, retval)); + uth = (struct uthread *)get_bsdthread_info(thread); + + wait4_data = &uth->uu_kevent.uu_wait4_data; + uap = wait4_data->args; + retval = wait4_data->retval; + return(wait4_nocancel(p, uap, retval)); } int @@ -1198,6 +1237,8 @@ wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, int32_t *retval) int sibling_count; proc_t p; int status, error; + uthread_t uth; + struct _wait4_data *wait4_data; AUDIT_ARG(pid, uap->pid); @@ -1278,7 +1319,7 @@ loop1: */ if ( sibling_count == 0 ) { int mask = sigmask(SIGCHLD); - uthread_t uth = (struct uthread *)get_bsdthread_info(current_thread()); + uth = current_uthread(); if ( (uth->uu_sigmask & mask) != 0 ) { /* we are blocking SIGCHLD signals. clear any pending SIGCHLD. @@ -1355,6 +1396,12 @@ loop1: return (0); } + /* Save arguments for continuation. Backing storage is in uthread->uu_arg, and will not be deallocated */ + uth = current_uthread(); + wait4_data = &uth->uu_kevent.uu_wait4_data; + wait4_data->args = uap; + wait4_data->retval = retval; + if ((error = msleep0((caddr_t)q, proc_list_mlock, PWAIT | PCATCH | PDROP, "wait", 0, wait1continue))) return (error); @@ -1377,17 +1424,24 @@ out: int waitidcontinue(int result) { - void *vt; + proc_t p; thread_t thread; + uthread_t uth; + struct _waitid_data *waitid_data; + struct waitid_nocancel_args *uap; int *retval; if (result) return (result); + p = current_proc(); thread = current_thread(); - vt = get_bsduthreadarg(thread); - retval = get_bsduthreadrval(thread); - return (waitid(current_proc(), (struct waitid_args *)vt, retval)); + uth = (struct uthread *)get_bsdthread_info(thread); + + waitid_data = &uth->uu_kevent.uu_waitid_data; + uap = waitid_data->args; + retval = waitid_data->retval; + return(waitid_nocancel(p, uap, retval)); } /* @@ -1419,6 +1473,8 @@ waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, int nfound; proc_t p; int error; + uthread_t uth; + struct _waitid_data *waitid_data; if (uap->options == 0 || (uap->options & ~(WNOHANG|WNOWAIT|WCONTINUED|WSTOPPED|WEXITED))) @@ -1603,6 +1659,12 @@ loop1: return (0); } + /* Save arguments for continuation. Backing storage is in uthread->uu_arg, and will not be deallocated */ + uth = current_uthread(); + waitid_data = &uth->uu_kevent.uu_waitid_data; + waitid_data->args = uap; + waitid_data->retval = retval; + if ((error = msleep0(q, proc_list_mlock, PWAIT | PCATCH | PDROP, "waitid", 0, waitidcontinue)) != 0) return (error); @@ -1807,6 +1869,17 @@ vproc_exit(proc_t p) if ((tp != TTY_NULL) && (tp->t_session == sessp)) { session_unlock(sessp); + /* + * We're going to SIGHUP the foreground process + * group. It can't change from this point on + * until the revoke is complete. + * The process group changes under both the tty + * lock and proc_list_lock but we need only one + */ + tty_lock(tp); + ttysetpgrphup(tp); + tty_unlock(tp); + tty_pgsignal(tp, SIGHUP, 1); session_lock(sessp); @@ -1830,12 +1903,14 @@ vproc_exit(proc_t p) } context.vc_thread = proc_thread(p); /* XXX */ context.vc_ucred = kauth_cred_proc_ref(p); - vnode_rele(ttyvp); VNOP_REVOKE(ttyvp, REVOKEALL, &context); if (cttyflag) { /* * Release the extra usecount taken in cttyopen. * usecount should be released after VNOP_REVOKE is called. + * This usecount was taken to ensure that + * the VNOP_REVOKE results in a close to + * the tty since cttyclose is a no-op. */ vnode_rele(ttyvp); } @@ -1843,10 +1918,17 @@ vproc_exit(proc_t p) kauth_cred_unref(&context.vc_ucred); ttyvp = NULLVP; } - if (ttyvp) - vnode_rele(ttyvp); - if (tp) + if (tp) { + /* + * This is cleared even if not set. This is also done in + * spec_close to ensure that the flag is cleared. + */ + tty_lock(tp); + ttyclrpgrphup(tp); + tty_unlock(tp); + ttyfree(tp); + } } session_lock(sessp); sessp->s_leader = NULL; @@ -1989,7 +2071,7 @@ vproc_exit(proc_t p) ruadd(&rup->ru, &p->p_stats->p_cru); - gather_rusage_info_v2(p, &rup->ri, RUSAGE_INFO_V2); + gather_rusage_info(p, &rup->ri, RUSAGE_INFO_CURRENT); rup->ri.ri_phys_footprint = 0; rup->ri.ri_proc_exit_abstime = mach_absolute_time(); diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index da7a5395c..9417b0911 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -97,14 +97,22 @@ /* Do not include dtrace.h, it redefines kmem_[alloc/free] */ extern void dtrace_fasttrap_fork(proc_t, proc_t); extern void (*dtrace_helpers_fork)(proc_t, proc_t); +extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t); extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t); +/* + * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c, + * we will store its value before actually calling it. + */ +static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL; + #include #endif #include #include +#include #include #include #include @@ -141,10 +149,10 @@ void thread_set_child(thread_t child, int pid); void *act_thread_csave(void); -thread_t cloneproc(task_t, proc_t, int, int); +thread_t cloneproc(task_t, coalition_t, proc_t, int, int); proc_t forkproc(proc_t); void forkproc_free(proc_t); -thread_t fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64bit); +thread_t fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child, int inherit_memory, int is64bit); void proc_vfork_begin(proc_t parent_proc); void proc_vfork_end(proc_t parent_proc); @@ -278,7 +286,7 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) thread_t child_thread; int err; - if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK)) != 0) { + if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK, COALITION_NULL)) != 0) { retval[1] = 0; } else { uthread_t ut = get_bsdthread_info(current_thread()); @@ -315,6 +323,11 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) * Mach thread_t of the child process * breated * kind kind of creation being requested + * coalition if spawn, coalition the child process + * should join, or COALITION_NULL to + * inherit the parent's. On non-spawns, + * this param is ignored and the child + * always inherits the parent's coalition. * * Notes: Permissable values for 'kind': * @@ -346,7 +359,7 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) * back to the other information. */ int -fork1(proc_t parent_proc, thread_t *child_threadp, int kind) +fork1(proc_t parent_proc, thread_t *child_threadp, int kind, coalition_t coalition) { thread_t parent_thread = (thread_t)current_thread(); uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread); @@ -538,7 +551,11 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind) * will, in effect, create a duplicate of it, with only minor * differences. Contrarily, spawned processes do not inherit. */ - if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE, FALSE)) == NULL) { + if ((child_thread = cloneproc(parent_proc->task, + spawn ? coalition : COALITION_NULL, + parent_proc, + spawn ? FALSE : TRUE, + FALSE)) == NULL) { /* Failed to create thread */ err = EAGAIN; goto bad; @@ -741,6 +758,7 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval) * process * * Parameters: parent_task parent task + * parent_coalition parent_coalition * child_proc child process * inherit_memory TRUE, if the parents address space is * to be inherited by the child @@ -754,17 +772,18 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval) * vfork() equivalent call, and in the system bootstrap case. * * It creates a new task and thread (and as a side effect of the - * thread creation, a uthread), which is then associated with the - * process 'child'. If the parent process address space is to - * be inherited, then a flag indicates that the newly created - * task should inherit this from the child task. + * thread creation, a uthread) in the parent coalition, which is + * then associated with the process 'child'. If the parent + * process address space is to be inherited, then a flag + * indicates that the newly created task should inherit this from + * the child task. * * As a special concession to bootstrapping the initial process * in the system, it's possible for 'parent_task' to be TASK_NULL; * in this case, 'inherit_memory' MUST be FALSE. */ thread_t -fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit) +fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child_proc, int inherit_memory, int is64bit) { thread_t child_thread = NULL; task_t child_task; @@ -772,6 +791,7 @@ fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int /* Create a new task for the child process */ result = task_create_internal(parent_task, + parent_coalition, inherit_memory, is64bit, &child_task); @@ -797,12 +817,6 @@ fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int else vm_map_set_32bit(get_task_map(child_task)); -#if CONFIG_MACF - /* Update task for MAC framework */ - /* valid to use p_ucred as child is still not running ... */ - mac_task_label_update_cred(child_proc->p_ucred, child_task); -#endif - /* * Set child process BSD visible scheduler priority if nice value * inherited from parent @@ -873,7 +887,7 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) retval[1] = 0; /* flag parent return for user space */ - if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK)) == 0) { + if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK, COALITION_NULL)) == 0) { task_t child_task; proc_t child_proc; @@ -893,6 +907,11 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid); DTRACE_PROC1(create, proc_t, child_proc); +#if CONFIG_DTRACE + if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) + (*dtrace_proc_waitfor_hook)(child_proc); +#endif + /* "Return" to the child */ (void)thread_resume(child_thread); @@ -946,7 +965,7 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) * live with this being somewhat awkward. */ thread_t -cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory, int memstat_internal) +cloneproc(task_t parent_task, coalition_t parent_coalition, proc_t parent_proc, int inherit_memory, int memstat_internal) { #if !CONFIG_MEMORYSTATUS #pragma unused(memstat_internal) @@ -960,7 +979,7 @@ cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory, int memsta goto bad; } - child_thread = fork_create_child(parent_task, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); + child_thread = fork_create_child(parent_task, parent_coalition, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); if (child_thread == NULL) { /* @@ -1305,9 +1324,10 @@ retry: * but indicate that the process is in (the creation) transition. */ proc_signalstart(child_proc, 0); - proc_transstart(child_proc, 0); + proc_transstart(child_proc, 0, 0); + + child_proc->p_pcaction = 0; - child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX; TAILQ_INIT(&child_proc->p_uthlist); TAILQ_INIT(&child_proc->p_aio_activeq); TAILQ_INIT(&child_proc->p_aio_doneq); @@ -1517,7 +1537,7 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) struct _select *sel; uthread_t uth = (uthread_t)uthread; proc_t p = (proc_t)bsd_info; - + void *pth_name; if (uth->uu_lowpri_window || uth->uu_throttle_info) { /* @@ -1558,12 +1578,22 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) uth->uu_allocsize = 0; uth->uu_wqset = 0; } - - if(uth->pth_name != NULL) - { - kfree(uth->pth_name, MAXTHREADNAMESIZE); - uth->pth_name = 0; + + /* + * + * Set pth_name to NULL before calling free(). + * Previously there was a race condition in the + * case this code was executing during a stackshot + * where the stackshot could try and copy pth_name + * after it had been freed and before if was marked + * as null. + */ + if (uth->pth_name != NULL) { + pth_name = uth->pth_name; + uth->pth_name = NULL; + kfree(pth_name, MAXTHREADNAMESIZE); } + if ((task != kernel_task) && p) { if (((uth->uu_flag & UT_VFORK) == UT_VFORK) && (uth->uu_proc != PROC_NULL)) { diff --git a/bsd/kern/kern_guarded.c b/bsd/kern/kern_guarded.c index 5c175c7bb..c8223153a 100644 --- a/bsd/kern/kern_guarded.c +++ b/bsd/kern/kern_guarded.c @@ -35,8 +35,27 @@ #include #include #include +#include +#include +#include #include #include +#include +#include +#include +#include +#include +#if CONFIG_PROTECT +#include +#endif + + +#define f_flag f_fglob->fg_flag +#define f_type f_fglob->fg_ops->fo_type +extern int dofilewrite(vfs_context_t ctx, struct fileproc *fp, + user_addr_t bufp, user_size_t nbyte, off_t offset, + int flags, user_ssize_t *retval ); +extern int wr_uio(struct proc *p, struct fileproc *fp, uio_t uio, user_ssize_t *retval); /* * Experimental guarded file descriptor support. @@ -312,7 +331,7 @@ guarded_open_np(proc_t p, struct guarded_open_np_args *uap, int32_t *retval) #define GUARD_REQUIRED (GUARD_DUP) #define GUARD_ALL (GUARD_REQUIRED | \ - (GUARD_CLOSE | GUARD_SOCKET_IPC | GUARD_FILEPORT)) + (GUARD_CLOSE | GUARD_SOCKET_IPC | GUARD_FILEPORT | GUARD_WRITE)) if (((uap->guardflags & GUARD_REQUIRED) != GUARD_REQUIRED) || ((uap->guardflags & ~GUARD_ALL) != 0)) @@ -350,6 +369,78 @@ guarded_open_np(proc_t p, struct guarded_open_np_args *uap, int32_t *retval) guarded_fileproc_alloc_init, &crarg, retval)); } +/* + * int guarded_open_dprotected_np(const char *pathname, int flags, + * const guardid_t *guard, u_int guardflags, int dpclass, int dpflags, ...); + * + * This SPI is extension of guarded_open_np() to include dataprotection class on creation + * in "dpclass" and dataprotection flags 'dpflags'. Otherwise behaviors are same as in + * guarded_open_np() + */ +int +guarded_open_dprotected_np(proc_t p, struct guarded_open_dprotected_np_args *uap, int32_t *retval) +{ + if ((uap->flags & O_CLOEXEC) == 0) + return (EINVAL); + +#define GUARD_REQUIRED (GUARD_DUP) +#define GUARD_ALL (GUARD_REQUIRED | \ + (GUARD_CLOSE | GUARD_SOCKET_IPC | GUARD_FILEPORT | GUARD_WRITE)) + + if (((uap->guardflags & GUARD_REQUIRED) != GUARD_REQUIRED) || + ((uap->guardflags & ~GUARD_ALL) != 0)) + return (EINVAL); + + int error; + struct gfp_crarg crarg = { + .gca_attrs = uap->guardflags + }; + + if ((error = copyin(uap->guard, + &(crarg.gca_guard), sizeof (crarg.gca_guard))) != 0) + return (error); + + /* + * Disallow certain guard values -- is zero enough? + */ + if (crarg.gca_guard == 0) + return (EINVAL); + + struct filedesc *fdp = p->p_fd; + struct vnode_attr va; + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + int cmode; + + VATTR_INIT(&va); + cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; + VATTR_SET(&va, va_mode, cmode & ACCESSPERMS); + + NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, + uap->path, ctx); + + /* + * Initialize the extra fields in vnode_attr to pass down dataprotection + * extra fields. + * 1. target cprotect class. + * 2. set a flag to mark it as requiring open-raw-encrypted semantics. + */ + if (uap->flags & O_CREAT) { + VATTR_SET(&va, va_dataprotect_class, uap->dpclass); + } + + if (uap->dpflags & O_DP_GETRAWENCRYPTED) { + if ( uap->flags & (O_RDWR | O_WRONLY)) { + /* Not allowed to write raw encrypted bytes */ + return EINVAL; + } + VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); + } + + return (open1(ctx, &nd, uap->flags | O_CLOFORK, &va, + guarded_fileproc_alloc_init, &crarg, retval)); +} + /* * int guarded_kqueue_np(const guardid_t *guard, u_int guardflags); * @@ -591,12 +682,11 @@ restart: }; struct fileproc *nfp = guarded_fileproc_alloc_init(&crarg); + struct guarded_fileproc *gfp; proc_fdlock(p); switch (error = fp_tryswap(p, fd, nfp)) { - struct guarded_fileproc *gfp; - case 0: /* guarded-ness comes with side-effects */ gfp = FP_TO_GFP(nfp); if (gfp->gf_attrs & GUARD_CLOSE) @@ -680,4 +770,188 @@ dropout: proc_fdunlock(p); return (error); } - + +/* + * user_ssize_t guarded_write_np(int fd, const guardid_t *guard, + * user_addr_t cbuf, user_ssize_t nbyte); + * + * Initial implementation of guarded writes. + */ +int +guarded_write_np(struct proc *p, struct guarded_write_np_args *uap, user_ssize_t *retval) +{ + int error; + int fd = uap->fd; + guardid_t uguard; + struct fileproc *fp; + struct guarded_fileproc *gfp; + bool wrote_some = false; + + AUDIT_ARG(fd, fd); + + if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0) + return (error); + + error = fp_lookup_guarded(p, fd, uguard, &gfp); + if (error) + return(error); + + fp = GFP_TO_FP(gfp); + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + } else { + + struct vfs_context context = *(vfs_context_current()); + context.vc_ucred = fp->f_fglob->fg_cred; + + error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte, + (off_t)-1, 0, retval); + wrote_some = *retval > 0; + } + if (wrote_some) + fp_drop_written(p, fd, fp); + else + fp_drop(p, fd, fp, 0); + return(error); +} + +/* + * user_ssize_t guarded_pwrite_np(int fd, const guardid_t *guard, + * user_addr_t buf, user_size_t nbyte, off_t offset); + * + * Initial implementation of guarded pwrites. + */ + int + guarded_pwrite_np(struct proc *p, struct guarded_pwrite_np_args *uap, user_ssize_t *retval) + { + struct fileproc *fp; + int error; + int fd = uap->fd; + vnode_t vp = (vnode_t)0; + guardid_t uguard; + struct guarded_fileproc *gfp; + bool wrote_some = false; + + AUDIT_ARG(fd, fd); + + if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0) + return (error); + + error = fp_lookup_guarded(p, fd, uguard, &gfp); + if (error) + return(error); + + fp = GFP_TO_FP(gfp); + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + } else { + struct vfs_context context = *vfs_context_current(); + context.vc_ucred = fp->f_fglob->fg_cred; + + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + goto errout; + } + vp = (vnode_t)fp->f_fglob->fg_data; + if (vnode_isfifo(vp)) { + error = ESPIPE; + goto errout; + } + if ((vp->v_flag & VISTTY)) { + error = ENXIO; + goto errout; + } + if (uap->offset == (off_t)-1) { + error = EINVAL; + goto errout; + } + + error = dofilewrite(&context, fp, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET, retval); + wrote_some = *retval > 0; + } +errout: + if (wrote_some) + fp_drop_written(p, fd, fp); + else + fp_drop(p, fd, fp, 0); + + KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_guarded_pwrite_np) | DBG_FUNC_NONE), + uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0); + + return(error); +} + +/* + * user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, + * struct iovec *iovp, u_int iovcnt); + * + * Initial implementation of guarded writev. + * + */ +int +guarded_writev_np(struct proc *p, struct guarded_writev_np_args *uap, user_ssize_t *retval) +{ + uio_t auio = NULL; + int error; + struct fileproc *fp; + struct user_iovec *iovp; + guardid_t uguard; + struct guarded_fileproc *gfp; + bool wrote_some = false; + + AUDIT_ARG(fd, uap->fd); + + /* Verify range bedfore calling uio_create() */ + if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) + return (EINVAL); + + /* allocate a uio large enough to hold the number of iovecs passed */ + auio = uio_create(uap->iovcnt, 0, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + UIO_WRITE); + + /* get location of iovecs within the uio. then copyin the iovecs from + * user space. + */ + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto ExitThisRoutine; + } + error = copyin_user_iovec_array(uap->iovp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + uap->iovcnt, iovp); + if (error) { + goto ExitThisRoutine; + } + + /* finalize uio_t for use and do the IO + */ + uio_calculateresid(auio); + + if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0) + goto ExitThisRoutine; + + error = fp_lookup_guarded(p, uap->fd, uguard, &gfp); + if (error) + goto ExitThisRoutine; + + fp = GFP_TO_FP(gfp); + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + } else { + error = wr_uio(p, fp, auio, retval); + wrote_some = *retval > 0; + } + + if (wrote_some) + fp_drop_written(p, uap->fd, fp); + else + fp_drop(p, uap->fd, fp, 0); +ExitThisRoutine: + if (auio != NULL) { + uio_free(auio); + } + return (error); +} diff --git a/bsd/kern/kern_kpc.c b/bsd/kern/kern_kpc.c index 9cf0ab817..dde93bbce 100644 --- a/bsd/kern/kern_kpc.c +++ b/bsd/kern/kern_kpc.c @@ -27,6 +27,7 @@ */ #include +#include #include #include #include @@ -51,8 +52,7 @@ #define REQ_CONFIG (9) #define REQ_PERIOD (10) #define REQ_ACTIONID (11) -#define REQ_FORCE_ALL_CTRS (12) -#define REQ_DISABLE_WHITELIST (13) +#define REQ_SW_INC (14) /* Type-munging casts */ typedef int (*getint_t)(void); @@ -62,6 +62,7 @@ typedef int (*setint_t)(int); static int kpc_initted = 0; /* locking and buffer for large data requests */ +#define SYSCTL_BUFFER_SIZE (33 * sizeof(uint64_t)) static lck_grp_attr_t *sysctl_buffer_lckgrp_attr = NULL; static lck_grp_t *sysctl_buffer_lckgrp = NULL; static lck_mtx_t sysctl_buffer_lock; @@ -70,8 +71,9 @@ static void *sysctl_buffer = NULL; typedef int (*setget_func_t)(int); /* init our stuff */ -extern void kpc_thread_init(void); /* osfmk/kern/kpc_thread.c */ extern void kpc_arch_init(void); +extern void kpc_common_init(void); +extern void kpc_thread_init(void); /* osfmk/kern/kpc_thread.c */ void kpc_init(void) @@ -82,6 +84,7 @@ kpc_init(void) lck_mtx_init(&sysctl_buffer_lock, sysctl_buffer_lckgrp, LCK_ATTR_NULL); kpc_arch_init(); + kpc_common_init(); kpc_thread_init(); kpc_initted = 1; @@ -100,6 +103,21 @@ sysctl_get_int( struct sysctl_oid *oidp, struct sysctl_req *req, return error; } +static int +sysctl_set_int( struct sysctl_req *req, int (*set_func)(int)) +{ + int error = 0; + int value = 0; + + error = SYSCTL_IN( req, &value, sizeof(value) ); + if( error ) + return error; + + error = set_func( value ); + + return error; +} + static int sysctl_getset_int( struct sysctl_oid *oidp, struct sysctl_req *req, int (*get_func)(void), int (*set_func)(int) ) @@ -121,6 +139,7 @@ sysctl_getset_int( struct sysctl_oid *oidp, struct sysctl_req *req, return error; } + static int sysctl_setget_int( struct sysctl_req *req, int (*setget_func)(int) ) @@ -143,7 +162,13 @@ static int kpc_sysctl_acquire_buffer(void) { if( sysctl_buffer == NULL ) - sysctl_buffer = kpc_counterbuf_alloc(); + { + sysctl_buffer = kalloc(SYSCTL_BUFFER_SIZE); + if( sysctl_buffer ) + { + bzero( sysctl_buffer, SYSCTL_BUFFER_SIZE ); + } + } if( !sysctl_buffer ) { @@ -255,7 +280,7 @@ sysctl_get_bigarray( struct sysctl_req *req, int (*get_fn)(uint32_t, uint32_t*, void*) ) { int error = 0; - uint32_t bufsize = KPC_MAX_COUNTERS * sizeof(uint64_t); /* XXX? */ + uint32_t bufsize = SYSCTL_BUFFER_SIZE; uint32_t arg = 0; /* get the argument */ @@ -310,7 +335,7 @@ sysctl_getset_bigarray( struct sysctl_req *req, int (*set_fn)(uint32_t, void*) ) { int error = 0; - uint32_t bufsize = KPC_MAX_COUNTERS * sizeof(uint64_t); /* XXX? */ + uint32_t bufsize = SYSCTL_BUFFER_SIZE; uint32_t regsize = 0; uint64_t arg; @@ -470,6 +495,11 @@ kpc_sysctl SYSCTL_HANDLER_ARGS sysctl_kpc_set_actionid ); break; + + case REQ_SW_INC: + ret = sysctl_set_int( req, (setget_func_t)kpc_set_sw_inc ); + break; + default: ret = ENOENT; break; @@ -514,6 +544,11 @@ SYSCTL_PROC(_kpc, OID_AUTO, counter_count, (void*)REQ_COUNTER_COUNT, sizeof(int), kpc_sysctl, "S", "Counter count"); +SYSCTL_PROC(_kpc, OID_AUTO, sw_inc, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_SW_INC, + sizeof(int), kpc_sysctl, "S", "Software increment"); + /* arrays */ SYSCTL_PROC(_kpc, OID_AUTO, thread_counters, CTLFLAG_RD|CTLFLAG_WR|CTLFLAG_ANYBODY, @@ -550,3 +585,5 @@ SYSCTL_PROC(_kpc, OID_AUTO, actionid, (void*)REQ_ACTIONID, sizeof(uint32_t), kpc_sysctl, "QU", "Set counter actionids"); + + diff --git a/bsd/kern/kern_lockf.c b/bsd/kern/kern_lockf.c index 2b5f0906a..3bbf77a45 100644 --- a/bsd/kern/kern_lockf.c +++ b/bsd/kern/kern_lockf.c @@ -606,13 +606,15 @@ lf_setlock(struct lockf *lock, struct timespec *timeout) /* Check if current task can donate importance. The * check of imp_donor bit is done without holding - * task lock. The value may change after you read it, + * any lock. The value may change after you read it, * but it is ok to boost a task while someone else is * unboosting you. + * + * TODO: Support live inheritance on file locks. */ if (task_is_importance_donor(boosting_task)) { if (block->lf_boosted != LF_BOOSTED && - task_is_importance_receiver(block_task)) { + task_is_importance_receiver_type(block_task)) { lf_hold_assertion(block_task, block); } lf_jump_to_queue_head(block, lock); @@ -1383,8 +1385,9 @@ lf_printlist(const char *tag, struct lockf *lock) static void lf_hold_assertion(task_t block_task, struct lockf *block) { - task_importance_hold_internal_assertion(block_task, 1); - block->lf_boosted = LF_BOOSTED; + if (task_importance_hold_file_lock_assertion(block_task, 1)) { + block->lf_boosted = LF_BOOSTED; + } } @@ -1425,7 +1428,7 @@ lf_drop_assertion(struct lockf *block) task_t current_task; current_task = proc_task((proc_t) block->lf_id); - task_importance_drop_internal_assertion(current_task, 1); + task_importance_drop_file_lock_assertion(current_task, 1); block->lf_boosted = LF_NOT_BOOSTED; } diff --git a/bsd/kern/kern_malloc.c b/bsd/kern/kern_malloc.c index 3f689cf35..860a232dd 100644 --- a/bsd/kern/kern_malloc.c +++ b/bsd/kern/kern_malloc.c @@ -74,6 +74,7 @@ #include #include +#include #include #include @@ -303,6 +304,20 @@ const char *memname[] = { "", /* 116 M_FLOW_DIVERT_GROUP */ #endif "ip6cga", /* 117 M_IP6CGA */ +#if NECP + "necp", /* 118 M_NECP */ + "necp_session_policy", /* 119 M_NECP_SESSION_POLICY */ + "necp_socket_policy", /* 120 M_NECP_SOCKET_POLICY */ + "necp_ip_policy", /* 121 M_NECP_IP_POLICY */ +#else + "", /* 118 M_NECP */ + "", /* 119 M_NECP_SESSION_POLICY */ + "", /* 120 M_NECP_SOCKET_POLICY */ + "", /* 121 M_NECP_IP_POLICY */ +#endif + "fdvnodedata" /* 122 M_FD_VN_DATA */ + "fddirbuf", /* 123 M_FD_DIRBUF */ + "" }; /* for use with kmzones.kz_zalloczone */ @@ -489,6 +504,16 @@ struct kmzones { { 0, KMZ_MALLOC, FALSE }, /* 116 M_FLOW_DIVERT_GROUP */ #endif /* FLOW_DIVERT */ { 0, KMZ_MALLOC, FALSE }, /* 117 M_IP6CGA */ + { 0, KMZ_MALLOC, FALSE }, /* 118 M_NECP */ +#if NECP + { SOS(necp_session_policy), KMZ_CREATEZONE, TRUE }, /* 119 M_NECP_SESSION_POLICY */ + { SOS(necp_kernel_socket_policy), KMZ_CREATEZONE, TRUE }, /* 120 M_NECP_SOCKET_POLICY */ + { SOS(necp_kernel_ip_output_policy), KMZ_CREATEZONE, TRUE }, /* 121 M_NECP_IP_POLICY */ +#else + { 0, KMZ_MALLOC, FALSE }, /* 119 M_NECP_SESSION_POLICY */ + { 0, KMZ_MALLOC, FALSE }, /* 120 M_NECP_SOCKET_POLICY */ + { 0, KMZ_MALLOC, FALSE }, /* 121 M_NECP_IP_POLICY */ +#endif /* NECP */ #undef SOS #undef SOX }; diff --git a/bsd/kern/kern_memorystatus.c b/bsd/kern/kern_memorystatus.c index 852037af4..0d46cec14 100644 --- a/bsd/kern/kern_memorystatus.c +++ b/bsd/kern/kern_memorystatus.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -60,6 +60,38 @@ #include +#if CONFIG_JETSAM +/* For logging clarity */ +static const char *jetsam_kill_cause_name[] = { + "" , + "jettisoned" , /* kMemorystatusKilled */ + "highwater" , /* kMemorystatusKilledHiwat */ + "vnode-limit" , /* kMemorystatusKilledVnodes */ + "vm-pageshortage" , /* kMemorystatusKilledVMPageShortage */ + "vm-thrashing" , /* kMemorystatusKilledVMThrashing */ + "fc-thrashing" , /* kMemorystatusKilledFCThrashing */ + "per-process-limit" , /* kMemorystatusKilledPerProcessLimit */ + "diagnostic" , /* kMemorystatusKilledDiagnostic */ + "idle-exit" , /* kMemorystatusKilledIdleExit */ +}; + +/* Does cause indicate vm or fc thrashing? */ +static boolean_t +is_thrashing(unsigned cause) +{ + switch (cause) { + case kMemorystatusKilledVMThrashing: + case kMemorystatusKilledFCThrashing: + return TRUE; + default: + return FALSE; + } +} + +/* Callback into vm_compressor.c to signal that thrashing has been mitigated. */ +extern void vm_thrashing_jetsam_done(void); +#endif + /* These are very verbose printfs(), enable with * MEMORYSTATUS_DEBUG_LOG */ @@ -105,8 +137,9 @@ struct filterops memorystatus_filtops = { }; enum { - kMemorystatusNoPressure = 1, - kMemorystatusPressure = 2 + kMemorystatusNoPressure = 0x1, + kMemorystatusPressure = 0x2, + kMemorystatusLowSwap = 0x4 }; /* Idle guard handling */ @@ -120,7 +153,10 @@ static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_s static void memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clean_state); static void memorystatus_reschedule_idle_demotion_locked(void); -static void memorystatus_update_priority_locked(proc_t p, int priority); +static void memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert); + +boolean_t is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t); +void memorystatus_send_low_swap_note(void); int memorystatus_wakeup = 0; @@ -141,10 +177,6 @@ uint64_t memstat_idle_demotion_deadline = 0; static unsigned int memorystatus_dirty_count = 0; -#if !CONFIG_JETSAM -static boolean_t kill_idle_exit = FALSE; -#endif - int memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret) @@ -169,28 +201,20 @@ static void memorystatus_thread(void *param __unused, wait_result_t wr __unused) #if CONFIG_JETSAM +int proc_get_memstat_priority(proc_t, boolean_t); + /* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */ #define LEGACY_HIWATER 1 -static int memorystatus_highwater_enabled = 1; +static boolean_t memorystatus_idle_snapshot = 0; -extern unsigned int vm_page_free_count; -extern unsigned int vm_page_active_count; -extern unsigned int vm_page_inactive_count; -extern unsigned int vm_page_throttled_count; -extern unsigned int vm_page_purgeable_count; -extern unsigned int vm_page_wire_count; +static int memorystatus_highwater_enabled = 1; unsigned int memorystatus_delta = 0; -static unsigned int memorystatus_available_pages = (unsigned int)-1; -static unsigned int memorystatus_available_pages_pressure = 0; -static unsigned int memorystatus_available_pages_critical = 0; static unsigned int memorystatus_available_pages_critical_base = 0; -static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1; -#if !LATENCY_JETSAM +//static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1; static unsigned int memorystatus_available_pages_critical_idle_offset = 0; -#endif #if DEVELOPMENT || DEBUG static unsigned int memorystatus_jetsam_panic_debug = 0; @@ -199,7 +223,9 @@ static unsigned int memorystatus_jetsam_policy = kPolicyDefault; static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0; #endif -static boolean_t kill_under_pressure = FALSE; +static unsigned int memorystatus_thread_wasted_wakeup = 0; + +static uint32_t kill_under_pressure_cause = 0; static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot; #define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries @@ -208,11 +234,10 @@ static unsigned int memorystatus_jetsam_snapshot_count = 0; static unsigned int memorystatus_jetsam_snapshot_max = 0; static void memorystatus_clear_errors(void); -static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint); -static int memorystatus_send_note(int event_code, void *data, size_t data_length); +static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages); static uint32_t memorystatus_build_state(proc_t p); static void memorystatus_update_levels_locked(boolean_t critical_only); -static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured); +//static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured); static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause); static boolean_t memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors); @@ -227,14 +252,45 @@ static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause /* VM pressure */ +extern unsigned int vm_page_free_count; +extern unsigned int vm_page_active_count; +extern unsigned int vm_page_inactive_count; +extern unsigned int vm_page_throttled_count; +extern unsigned int vm_page_purgeable_count; +extern unsigned int vm_page_wire_count; + #if VM_PRESSURE_EVENTS #include "vm_pressure.h" -extern boolean_t memorystatus_warn_process(pid_t pid); +extern boolean_t memorystatus_warn_process(pid_t pid, boolean_t critical); vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal; +#if CONFIG_MEMORYSTATUS +unsigned int memorystatus_available_pages = (unsigned int)-1; +unsigned int memorystatus_available_pages_pressure = 0; +unsigned int memorystatus_available_pages_critical = 0; +unsigned int memorystatus_frozen_count = 0; +unsigned int memorystatus_suspended_count = 0; + +/* + * We use this flag to signal if we have any HWM offenders + * on the system. This way we can reduce the number of wakeups + * of the memorystatus_thread when the system is between the + * "pressure" and "critical" threshold. + * + * The (re-)setting of this variable is done without any locks + * or synchronization simply because it is not possible (currently) + * to keep track of HWM offenders that drop down below their memory + * limit and/or exit. So, we choose to burn a couple of wasted wakeups + * by allowing the unguarded modification of this variable. + */ +boolean_t memorystatus_hwm_candidates = 0; + +static int memorystatus_send_note(int event_code, void *data, size_t data_length); +#endif /* CONFIG_MEMORYSTATUS */ + #endif /* VM_PRESSURE_EVENTS */ /* Freeze */ @@ -252,10 +308,8 @@ static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __ /* Thresholds */ static unsigned int memorystatus_freeze_threshold = 0; -static unsigned int memorystatus_freeze_pages_min = FREEZE_PAGES_MIN; -static unsigned int memorystatus_freeze_pages_max = FREEZE_PAGES_MAX; - -static unsigned int memorystatus_frozen_count = 0; +static unsigned int memorystatus_freeze_pages_min = 0; +static unsigned int memorystatus_freeze_pages_max = 0; static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; @@ -271,13 +325,14 @@ static throttle_interval_t throttle_intervals[] = { static uint64_t memorystatus_freeze_throttle_count = 0; -static unsigned int memorystatus_suspended_count = 0; static unsigned int memorystatus_suspended_footprint_total = 0; #endif /* CONFIG_FREEZE */ /* Debug */ +extern struct knote *vm_find_knote_from_pid(pid_t, struct klist *); + #if DEVELOPMENT || DEBUG #if CONFIG_JETSAM @@ -322,6 +377,14 @@ sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS } task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); + if (memlimit == -1) { + p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; + } else { + if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { + p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; + } + } + p = memorystatus_get_next_proc_locked(&b, p, TRUE); } @@ -332,14 +395,14 @@ sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS return 0; } +SYSCTL_INT(_kern, OID_AUTO, memorystatus_idle_snapshot, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_idle_snapshot, 0, ""); + SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", ""); SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages, 0, ""); SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, ""); SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, ""); -#if !LATENCY_JETSAM SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, ""); -#endif /* Diagnostic code */ @@ -418,34 +481,53 @@ SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, ""); -static int -sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS -{ -#pragma unused(arg1, arg2, oidp) - int error = 0; - - error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); - if (error) - return (error); - - return SYSCTL_OUT(req, &memorystatus_vm_pressure_level, sizeof(memorystatus_vm_pressure_level)); -} -SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED, - 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); +/* + * This routine is used for targeted notifications + * regardless of system memory pressure. + * "memnote" is the current user. + */ static int sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS { #pragma unused(arg1, arg2) - int error, pid = 0; + int error = 0, pid = 0; + int ret = 0; + struct knote *kn = NULL; error = sysctl_handle_int(oidp, &pid, 0, req); if (error || !req->newptr) return (error); - return vm_dispatch_pressure_note_to_pid(pid, FALSE); + /* + * We inspect 3 lists here for targeted notifications: + * - memorystatus_klist + * - vm_pressure_klist + * - vm_pressure_dormant_klist + * + * The vm_pressure_* lists are tied to the old VM_PRESSURE + * notification mechanism. We intend to stop using that + * mechanism and, in turn, get rid of the 2 lists and + * vm_dispatch_pressure_note_to_pid() too. + */ + + memorystatus_klist_lock(); + kn = vm_find_knote_from_pid(pid, &memorystatus_klist); + if (kn) { + /* + * Forcibly send this pid a "warning" memory pressure notification. + */ + kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; + KNOTE(&memorystatus_klist, kMemorystatusPressure); + ret = 0; + } else { + ret = vm_dispatch_pressure_note_to_pid(pid, FALSE); + } + memorystatus_klist_unlock(); + + return ret; } SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, @@ -455,8 +537,6 @@ SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_ #endif /* CONFIG_JETSAM */ -#endif /* DEVELOPMENT || DEBUG */ - #if CONFIG_FREEZE SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, ""); @@ -473,10 +553,7 @@ boolean_t memorystatus_freeze_throttle_enabled = TRUE; SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, ""); /* - * Enabled via: Enable the sysctl_memorystatus_freeze/thaw sysctls on Release KC - * - * TODO: Manual trigger of freeze and thaw for dev / debug kernels only. - * Disable/restrict the sysctl_memorystatus_freeze/thaw sysctls on Release KC + * Manual trigger of freeze and thaw for dev / debug kernels only. */ static int sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS @@ -486,6 +563,10 @@ sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS int error, pid = 0; proc_t p; + if (memorystatus_freeze_enabled == FALSE) { + return ENOTSUP; + } + error = sysctl_handle_int(oidp, &pid, 0, req); if (error || !req->newptr) return (error); @@ -496,7 +577,7 @@ sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS boolean_t shared; uint32_t max_pages = 0; - if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); } else { max_pages = UINT32_MAX - 1; @@ -522,6 +603,10 @@ sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS int error, pid = 0; proc_t p; + if (memorystatus_freeze_enabled == FALSE) { + return ENOTSUP; + } + error = sysctl_handle_int(oidp, &pid, 0, req); if (error || !req->newptr) return (error); @@ -544,11 +629,79 @@ SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_L #endif /* CONFIG_FREEZE */ +#endif /* DEVELOPMENT || DEBUG */ + extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation, void *parameter, integer_t priority, thread_t *new_thread); +#if CONFIG_JETSAM +/* + * Sort processes by size for a single jetsam bucket. + */ + +static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index) +{ + proc_t p = NULL, insert_after_proc = NULL, max_proc = NULL; + uint32_t pages = 0, max_pages = 0; + memstat_bucket_t *current_bucket; + + if (bucket_index >= MEMSTAT_BUCKET_COUNT) { + return; + } + + current_bucket = &memstat_bucket[bucket_index]; + + p = TAILQ_FIRST(¤t_bucket->list); + + if (p) { + memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); + max_pages = pages; + insert_after_proc = NULL; + + p = TAILQ_NEXT(p, p_memstat_list); + +restart: + while (p) { + + memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); + + if (pages > max_pages) { + max_pages = pages; + max_proc = p; + } + + p = TAILQ_NEXT(p, p_memstat_list); + } + + if (max_proc) { + + TAILQ_REMOVE(¤t_bucket->list, max_proc, p_memstat_list); + + if (insert_after_proc == NULL) { + TAILQ_INSERT_HEAD(¤t_bucket->list, max_proc, p_memstat_list); + } else { + TAILQ_INSERT_AFTER(¤t_bucket->list, insert_after_proc, max_proc, p_memstat_list); + } + + insert_after_proc = max_proc; + + /* Reset parameters for the new search. */ + p = TAILQ_NEXT(max_proc, p_memstat_list); + if (p) { + memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); + max_pages = pages; + } + max_proc = NULL; + + goto restart; + } + } +} + +#endif /* CONFIG_JETSAM */ + static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search) { memstat_bucket_t *current_bucket; proc_t next_p; @@ -593,6 +746,11 @@ memorystatus_init(void) kern_return_t result; int i; +#if CONFIG_FREEZE + memorystatus_freeze_pages_min = FREEZE_PAGES_MIN; + memorystatus_freeze_pages_max = FREEZE_PAGES_MAX; +#endif + nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time); /* Init buckets */ @@ -617,10 +775,7 @@ memorystatus_init(void) #if CONFIG_JETSAM memorystatus_delta = delta_percentage * atop_64(max_mem) / 100; -#if !LATENCY_JETSAM memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100; -#endif - memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta; memorystatus_jetsam_snapshot_max = maxproc; @@ -651,10 +806,29 @@ memorystatus_init(void) extern void vm_wake_compactor_swapper(void); +/* + * The jetsam no frills kill call + * Return: 0 on success + * error code on failure (EINVAL...) + */ +static int +jetsam_do_kill(proc_t p, int jetsam_flags) { + int error = 0; + error = exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags); + return(error); +} + +/* + * Wrapper for processes exiting with memorystatus details + */ static boolean_t memorystatus_do_kill(proc_t p, uint32_t cause) { - int retval = 0; + int error = 0; + __unused pid_t victim_pid = p->p_pid; + + KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_START, + victim_pid, cause, vm_page_free_count, 0, 0); #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) if (memorystatus_jetsam_panic_debug & (1 << cause)) { @@ -669,16 +843,20 @@ memorystatus_do_kill(proc_t p, uint32_t cause) { case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break; case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break; case kMemorystatusKilledVMThrashing: jetsam_flags |= P_JETSAM_VMTHRASHING; break; + case kMemorystatusKilledFCThrashing: jetsam_flags |= P_JETSAM_FCTHRASHING; break; case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break; case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break; } - retval = exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags); + error = jetsam_do_kill(p, jetsam_flags); + + KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END, + victim_pid, cause, vm_page_free_count, error, 0); if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { vm_wake_compactor_swapper(); } - - return (retval == 0); + + return (error == 0); } /* @@ -721,11 +899,12 @@ memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2) if (current_time >= p->p_memstat_idledeadline) { #if DEBUG || DEVELOPMENT if (!(p->p_memstat_dirty & P_DIRTY_MARKED)) { - printf("memorystatus_perform_idle_demotion: moving process %d to idle band, but never dirtied (0x%x)!\n", p->p_pid, p->p_memstat_dirty); + printf("memorystatus_perform_idle_demotion: moving process %d [%s] to idle band, but never dirtied (0x%x)!\n", + p->p_pid, (p->p_comm ? p->p_comm : "(unknown)"), p->p_memstat_dirty); } #endif memorystatus_invalidate_idle_demotion_locked(p, TRUE); - memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE); + memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, false); // The prior process has moved out of the demotion bucket, so grab the new head and continue p = TAILQ_FIRST(&demotion_bucket->list); @@ -746,35 +925,53 @@ memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2) static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state) { + boolean_t present_in_deferred_bucket = FALSE; + + if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { + present_in_deferred_bucket = TRUE; + } + MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n", p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions); - assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)); + assert((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED); if (set_state) { assert(p->p_memstat_idledeadline == 0); + p->p_memstat_dirty |= P_DIRTY_DEFER_IN_PROGRESS; p->p_memstat_idledeadline = mach_absolute_time() + memorystatus_idle_delay_time; } - assert(p->p_memstat_idledeadline); + assert(p->p_memstat_idledeadline); - memorystatus_scheduled_idle_demotions++; + if (present_in_deferred_bucket == FALSE) { + memorystatus_scheduled_idle_demotions++; + } } static void memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state) { + boolean_t present_in_deferred_bucket = FALSE; + + if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { + present_in_deferred_bucket = TRUE; + assert(p->p_memstat_idledeadline); + } + MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n", p->p_pid, clear_state, memorystatus_scheduled_idle_demotions); - assert(p->p_memstat_idledeadline); if (clear_state) { p->p_memstat_idledeadline = 0; p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; } - memorystatus_scheduled_idle_demotions--; + if (present_in_deferred_bucket == TRUE) { + memorystatus_scheduled_idle_demotions--; + } + assert(memorystatus_scheduled_idle_demotions >= 0); } @@ -791,11 +988,12 @@ memorystatus_reschedule_idle_demotion_locked(void) { proc_t p; demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; p = TAILQ_FIRST(&demotion_bucket->list); - assert(p && p->p_memstat_idledeadline); - if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline){ - thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline); - memstat_idle_demotion_deadline = p->p_memstat_idledeadline; + assert(p && p->p_memstat_idledeadline); + + if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline){ + thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline); + memstat_idle_demotion_deadline = p->p_memstat_idledeadline; } } } @@ -809,7 +1007,7 @@ memorystatus_add(proc_t p, boolean_t locked) { memstat_bucket_t *bucket; - MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->pid, priority); + MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority); if (!locked) { proc_list_lock(); @@ -822,6 +1020,10 @@ memorystatus_add(proc_t p, boolean_t locked) bucket = &memstat_bucket[p->p_memstat_effectivepriority]; + if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { + assert(bucket->count == memorystatus_scheduled_idle_demotions); + } + TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list); bucket->count++; @@ -838,7 +1040,7 @@ exit: } static void -memorystatus_update_priority_locked(proc_t p, int priority) +memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert) { memstat_bucket_t *old_bucket, *new_bucket; @@ -849,22 +1051,41 @@ memorystatus_update_priority_locked(proc_t p, int priority) return; } - MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d\n", p->p_pid, priority); + MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d, inserting at %s\n", + p->p_pid, priority, head_insert ? "head" : "tail"); old_bucket = &memstat_bucket[p->p_memstat_effectivepriority]; + if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { + assert(old_bucket->count == (memorystatus_scheduled_idle_demotions + 1)); + } + TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list); old_bucket->count--; new_bucket = &memstat_bucket[priority]; - TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); + if (head_insert) + TAILQ_INSERT_HEAD(&new_bucket->list, p, p_memstat_list); + else + TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); new_bucket->count++; #if CONFIG_JETSAM if (memorystatus_highwater_enabled && (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND)) { + + /* + * Adjust memory limit based on if the task is going to/from foreground and background. + */ + if (((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) || ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND))) { int32_t memlimit = (priority >= JETSAM_PRIORITY_FOREGROUND) ? -1 : p->p_memstat_memlimit; task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); + + if (memlimit <= 0) { + p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; + } else { + p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; + } } } #endif @@ -875,12 +1096,13 @@ memorystatus_update_priority_locked(proc_t p, int priority) } int -memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background) +memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background, boolean_t is_fatal_limit) { int ret; + boolean_t head_insert = false; #if !CONFIG_JETSAM -#pragma unused(update_memlimit, memlimit, memlimit_background) +#pragma unused(update_memlimit, memlimit, memlimit_background, is_fatal_limit) #endif MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data); @@ -893,6 +1115,10 @@ memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effect } else if (priority == JETSAM_PRIORITY_IDLE_DEFERRED) { /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */ priority = JETSAM_PRIORITY_IDLE; + } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) { + /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */ + priority = JETSAM_PRIORITY_IDLE; + head_insert = true; } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) { /* Sanity check */ ret = EINVAL; @@ -906,7 +1132,16 @@ memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effect if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) { ret = EALREADY; proc_list_unlock(); - MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", pid); + MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", p->p_pid); + goto out; + } + + if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) || ((p->p_listflag & P_LIST_EXITED) != 0)) { + /* + * This could happen when a process calling posix_spawn() is exiting on the jetsam thread. + */ + ret = EBUSY; + proc_list_unlock(); goto out; } @@ -920,16 +1155,37 @@ memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effect if (memlimit_background) { /* Will be set as priority is updated */ p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND; + + /* Cannot have a background memory limit and be fatal. */ + is_fatal_limit = FALSE; + } else { /* Otherwise, apply now */ if (memorystatus_highwater_enabled) { task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); } } + + if (is_fatal_limit || memlimit <= 0) { + p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; + } else { + p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; + } } #endif - memorystatus_update_priority_locked(p, priority); + /* + * We can't add to the JETSAM_PRIORITY_IDLE_DEFERRED bucket here. + * But, we could be removing it from the bucket. + * Check and take appropriate steps if so. + */ + + if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { + + memorystatus_invalidate_idle_demotion_locked(p, TRUE); + } + + memorystatus_update_priority_locked(p, priority, head_insert); proc_list_unlock(); ret = 0; @@ -946,15 +1202,19 @@ memorystatus_remove(proc_t p, boolean_t locked) int ret; memstat_bucket_t *bucket; - MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", pid); + MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", p->p_pid); if (!locked) { proc_list_lock(); } assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); - + bucket = &memstat_bucket[p->p_memstat_effectivepriority]; + if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { + assert(bucket->count == memorystatus_scheduled_idle_demotions); + } + TAILQ_REMOVE(&bucket->list, p, p_memstat_list); bucket->count--; @@ -1005,6 +1265,12 @@ memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) { return FALSE; } + /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */ + if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) && + !(pcontrol & PROC_DIRTY_TRACK)) { + return FALSE; + } + /* Deferral is only relevant if idle exit is specified */ if ((pcontrol & PROC_DIRTY_DEFER) && !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) { @@ -1026,7 +1292,9 @@ memorystatus_update_idle_priority_locked(proc_t p) { priority = p->p_memstat_requestedpriority; } - memorystatus_update_priority_locked(p, priority); + if (priority != p->p_memstat_effectivepriority) { + memorystatus_update_priority_locked(p, priority, false); + } } /* @@ -1050,10 +1318,23 @@ int memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { unsigned int old_dirty; boolean_t reschedule = FALSE; + boolean_t already_deferred = FALSE; + boolean_t defer_now = FALSE; int ret; + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK), + p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0); + proc_list_lock(); + if ((p->p_listflag & P_LIST_EXITED) != 0) { + /* + * Process is on its way out. + */ + ret = EBUSY; + goto exit; + } + if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { ret = EPERM; goto exit; @@ -1075,28 +1356,82 @@ memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT; } + if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { + p->p_memstat_dirty |= P_DIRTY_LAUNCH_IN_PROGRESS; + } + + if (old_dirty & P_DIRTY_DEFER_IN_PROGRESS) { + already_deferred = TRUE; + } + /* This can be set and cleared exactly once. */ - if ((pcontrol & PROC_DIRTY_DEFER) && !(old_dirty & P_DIRTY_DEFER)) { - p->p_memstat_dirty |= (P_DIRTY_DEFER|P_DIRTY_DEFER_IN_PROGRESS); - } else { - p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; + if (pcontrol & PROC_DIRTY_DEFER) { + + if ( !(old_dirty & P_DIRTY_DEFER)) { + p->p_memstat_dirty |= P_DIRTY_DEFER; + } + + defer_now = TRUE; } - MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / deferred %s / dirty %s for process %d\n", + MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for process %d\n", ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N", - p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS ? "Y" : "N", + defer_now ? "Y" : "N", p->p_memstat_dirty & P_DIRTY ? "Y" : "N", p->p_pid); /* Kick off or invalidate the idle exit deferment if there's a state transition. */ if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) { if (((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) && - (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) && !(old_dirty & P_DIRTY_DEFER_IN_PROGRESS)) { + defer_now && !already_deferred) { + + /* + * Request to defer a clean process that's idle-exit enabled + * and not already in the jetsam deferred band. + */ memorystatus_schedule_idle_demotion_locked(p, TRUE); reschedule = TRUE; - } else if (!(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) && (old_dirty & P_DIRTY_DEFER_IN_PROGRESS)) { + + } else if (!defer_now && already_deferred) { + + /* + * Either the process is no longer idle-exit enabled OR + * there's a request to cancel a currently active deferral. + */ + memorystatus_invalidate_idle_demotion_locked(p, TRUE); + reschedule = TRUE; + } + } else { + + /* + * We are trying to operate on a dirty process. Dirty processes have to + * be removed from the deferred band. The question is do we reset the + * deferred state or not? + * + * This could be a legal request like: + * - this process had opted into the JETSAM_DEFERRED band + * - but it's now dirty and requests to opt out. + * In this case, we remove the process from the band and reset its + * state too. It'll opt back in properly when needed. + * + * OR, this request could be a user-space bug. E.g.: + * - this process had opted into the JETSAM_DEFERRED band when clean + * - and, then issues another request to again put it into the band except + * this time the process is dirty. + * The process going dirty, as a transition in memorystatus_dirty_set(), will pull the process out of + * the deferred band with its state intact. So our request below is no-op. + * But we do it here anyways for coverage. + * + * memorystatus_update_idle_priority_locked() + * single-mindedly treats a dirty process as "cannot be in the deferred band". + */ + + if (!defer_now && already_deferred) { memorystatus_invalidate_idle_demotion_locked(p, TRUE); reschedule = TRUE; + } else { + memorystatus_invalidate_idle_demotion_locked(p, FALSE); + reschedule = TRUE; } } @@ -1123,9 +1458,19 @@ memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { boolean_t now_dirty = FALSE; MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty); + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_SET), p->p_pid, self, pcontrol, 0, 0); proc_list_lock(); + if ((p->p_listflag & P_LIST_EXITED) != 0) { + /* + * Process is on its way out. + */ + ret = EBUSY; + goto exit; + } + if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { ret = EPERM; goto exit; @@ -1181,15 +1526,59 @@ memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { /* Manage idle exit deferral, if applied */ if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) { + + /* + * P_DIRTY_DEFER_IN_PROGRESS means the process is in the deferred band OR it might be heading back + * there once it's clean again and has some protection window left. + */ + if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { + /* + * New dirty process i.e. "was_dirty == FALSE && now_dirty == TRUE" + * + * The process will move from the deferred band to its higher requested + * jetsam band. But we don't clear its state i.e. we want to remember that + * this process was part of the "deferred" band and will return to it. + * + * This way, we don't let it age beyond the protection + * window when it returns to "clean". All the while giving + * it a chance to perform its work while "dirty". + * + */ memorystatus_invalidate_idle_demotion_locked(p, FALSE); reschedule = TRUE; } else { - /* We evaluate lazily, so reset the idle-deadline if it's expired by the time the process becomes clean. */ + + /* + * Process is back from "dirty" to "clean". + * + * Is its timer up OR does it still have some protection + * window left? + */ + if (mach_absolute_time() >= p->p_memstat_idledeadline) { - p->p_memstat_idledeadline = 0; - p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; + /* + * The process' deadline has expired. It currently + * does not reside in the DEFERRED bucket. + * + * It's on its way to the JETSAM_PRIORITY_IDLE + * bucket via memorystatus_update_idle_priority_locked() + * below. + + * So all we need to do is reset all the state on the + * process that's related to the DEFERRED bucket i.e. + * the DIRTY_DEFER_IN_PROGRESS flag and the timer deadline. + * + */ + + memorystatus_invalidate_idle_demotion_locked(p, TRUE); + reschedule = TRUE; } else { + /* + * It still has some protection window left and so + * we just re-arm the timer without modifying any + * state on the process. + */ memorystatus_schedule_idle_demotion_locked(p, FALSE); reschedule = TRUE; } @@ -1215,12 +1604,71 @@ exit: } int -memorystatus_dirty_get(proc_t p) { +memorystatus_dirty_clear(proc_t p, uint32_t pcontrol) { + int ret = 0; - - proc_list_lock(); + + MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_clear(): %d 0x%x 0x%x\n", p->p_pid, pcontrol, p->p_memstat_dirty); - if (p->p_memstat_dirty & P_DIRTY_TRACK) { + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_CLEAR), p->p_pid, pcontrol, 0, 0, 0); + + proc_list_lock(); + + if ((p->p_listflag & P_LIST_EXITED) != 0) { + /* + * Process is on its way out. + */ + ret = EBUSY; + goto exit; + } + + if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { + ret = EPERM; + goto exit; + } + + if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { + /* Dirty tracking not enabled */ + ret = EINVAL; + goto exit; + } + + if (!pcontrol || (pcontrol & (PROC_DIRTY_LAUNCH_IN_PROGRESS | PROC_DIRTY_DEFER)) == 0) { + ret = EINVAL; + goto exit; + } + + if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { + p->p_memstat_dirty &= ~P_DIRTY_LAUNCH_IN_PROGRESS; + } + + /* This can be set and cleared exactly once. */ + if (pcontrol & PROC_DIRTY_DEFER) { + + if (p->p_memstat_dirty & P_DIRTY_DEFER) { + + p->p_memstat_dirty &= ~P_DIRTY_DEFER; + + memorystatus_invalidate_idle_demotion_locked(p, TRUE); + memorystatus_update_idle_priority_locked(p); + memorystatus_reschedule_idle_demotion_locked(); + } + } + + ret = 0; +exit: + proc_list_unlock(); + + return ret; +} + +int +memorystatus_dirty_get(proc_t p) { + int ret = 0; + + proc_list_lock(); + + if (p->p_memstat_dirty & P_DIRTY_TRACK) { ret |= PROC_DIRTY_TRACKED; if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) { ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT; @@ -1228,6 +1676,9 @@ memorystatus_dirty_get(proc_t p) { if (p->p_memstat_dirty & P_DIRTY) { ret |= PROC_DIRTY_IS_DIRTY; } + if (p->p_memstat_dirty & P_DIRTY_LAUNCH_IN_PROGRESS) { + ret |= PROC_DIRTY_LAUNCH_IS_IN_PROGRESS; + } } proc_list_unlock(); @@ -1261,7 +1712,7 @@ memorystatus_on_suspend(proc_t p) { #if CONFIG_FREEZE uint32_t pages; - memorystatus_get_task_page_counts(p->task, &pages, NULL); + memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); #endif proc_list_lock(); #if CONFIG_FREEZE @@ -1392,10 +1843,14 @@ kill_idle_exit_proc(void) } #endif +#if CONFIG_JETSAM static void memorystatus_thread_wake(void) { thread_wakeup((event_t)&memorystatus_wakeup); } +#endif /* CONFIG_JETSAM */ + +extern void vm_pressure_response(void); static int memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation) @@ -1409,9 +1864,6 @@ memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation) return thread_block(continuation); } -extern boolean_t vm_compressor_thrashing_detected; -extern uint64_t vm_compressor_total_compressions(void); - static void memorystatus_thread(void *param __unused, wait_result_t wr __unused) { @@ -1419,6 +1871,7 @@ memorystatus_thread(void *param __unused, wait_result_t wr __unused) #if CONFIG_JETSAM boolean_t post_snapshot = FALSE; uint32_t errors = 0; + uint32_t hwm_kill = 0; #endif if (is_vm_privileged == FALSE) { @@ -1437,75 +1890,102 @@ memorystatus_thread(void *param __unused, wait_result_t wr __unused) KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0); - uint32_t cause = vm_compressor_thrashing_detected ? kMemorystatusKilledVMThrashing : kMemorystatusKilledVMPageShortage; - - /* Jetsam aware version. + /* + * Jetsam aware version. + * + * The VM pressure notification thread is working it's way through clients in parallel. * - * If woken under pressure, go down the path of killing: + * So, while the pressure notification thread is targeting processes in order of + * increasing jetsam priority, we can hopefully reduce / stop it's work by killing + * any processes that have exceeded their highwater mark. * - * - processes exceeding their highwater mark if no clean victims available - * - the least recently used process if no highwater mark victims available + * If we run out of HWM processes and our available pages drops below the critical threshold, then, + * we target the least recently used process in order of increasing jetsam priority (exception: the FG band). */ -#if !LATENCY_JETSAM - while (vm_compressor_thrashing_detected || memorystatus_available_pages <= memorystatus_available_pages_critical) { -#else - while (kill_under_pressure) { - const uint32_t SNAPSHOT_WAIT_TIMEOUT_MS = 100; - wait_result_t wait_result; -#endif + while (is_thrashing(kill_under_pressure_cause) || + memorystatus_available_pages <= memorystatus_available_pages_pressure) { boolean_t killed; int32_t priority; + uint32_t cause; + + if (kill_under_pressure_cause) { + cause = kill_under_pressure_cause; + } else { + cause = kMemorystatusKilledVMPageShortage; + } #if LEGACY_HIWATER /* Highwater */ killed = memorystatus_kill_hiwat_proc(&errors); if (killed) { + hwm_kill++; post_snapshot = TRUE; goto done; + } else { + memorystatus_hwm_candidates = FALSE; + } + + /* No highwater processes to kill. Continue or stop for now? */ + if (!is_thrashing(kill_under_pressure_cause) && + (memorystatus_available_pages > memorystatus_available_pages_critical)) { + /* + * We are _not_ out of pressure but we are above the critical threshold and there's: + * - no compressor thrashing + * - no more HWM processes left. + * For now, don't kill any other processes. + */ + + if (hwm_kill == 0) { + memorystatus_thread_wasted_wakeup++; + } + + break; } #endif /* LRU */ killed = memorystatus_kill_top_process(TRUE, cause, &priority, &errors); if (killed) { - if (!kill_under_pressure && (priority != JETSAM_PRIORITY_IDLE)) { - /* Don't generate logs for steady-state idle-exit kills */ + /* Don't generate logs for steady-state idle-exit kills (unless overridden for debug) */ + if ((priority != JETSAM_PRIORITY_IDLE) || memorystatus_idle_snapshot) { post_snapshot = TRUE; } goto done; } - - /* Under pressure and unable to kill a process - panic */ - panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages); + + if (memorystatus_available_pages <= memorystatus_available_pages_critical) { + /* Under pressure and unable to kill a process - panic */ + panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages); + } done: - kill_under_pressure = FALSE; - vm_compressor_thrashing_detected = FALSE; - -#if LATENCY_JETSAM - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_LATENCY_COALESCE) | DBG_FUNC_START, - memorystatus_available_pages, 0, 0, 0, 0); - thread_wakeup((event_t)&latency_jetsam_wakeup); - /* - * Coalesce snapshot reports in the face of repeated jetsams by blocking here with a timeout. - * If the wait expires, issue the note. + + /* + * We do not want to over-kill when thrashing has been detected. + * To avoid that, we reset the flag here and notify the + * compressor. */ - wait_result = memorystatus_thread_block(SNAPSHOT_WAIT_TIMEOUT_MS, THREAD_CONTINUE_NULL); - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_LATENCY_COALESCE) | DBG_FUNC_END, - memorystatus_available_pages, 0, 0, 0, 0); - if (wait_result != THREAD_AWAKENED) { - /* Catch-all */ - break; + if (is_thrashing(kill_under_pressure_cause)) { + kill_under_pressure_cause = 0; + vm_thrashing_jetsam_done(); } -#endif } - + + kill_under_pressure_cause = 0; + if (errors) { memorystatus_clear_errors(); } #if VM_PRESSURE_EVENTS - memorystatus_update_vm_pressure(TRUE); + /* + * LD: We used to target the foreground process first and foremost here. + * Now, we target all processes, starting from the non-suspended, background + * processes first. We will target foreground too. + * + * memorystatus_update_vm_pressure(TRUE); + */ + //vm_pressure_response(); #endif if (post_snapshot) { @@ -1520,29 +2000,27 @@ done: #else /* CONFIG_JETSAM */ - /* Simple version. - * - * Jetsam not enabled, so just kill the first suitable clean process - * and sleep. + /* + * Jetsam not enabled */ - if (kill_idle_exit) { - kill_idle_exit_proc(); - kill_idle_exit = FALSE; - } - #endif /* CONFIG_JETSAM */ memorystatus_thread_block(0, memorystatus_thread); } #if !CONFIG_JETSAM +/* + * Returns TRUE: + * when an idle-exitable proc was killed + * Returns FALSE: + * when there are no more idle-exitable procs found + * when the attempt to kill an idle-exitable proc failed + */ boolean_t memorystatus_idle_exit_from_VM(void) { - kill_idle_exit = TRUE; - memorystatus_thread_wake(); - return TRUE; + return(kill_idle_exit_proc()); } -#endif +#endif /* !CONFIG_JETSAM */ #if CONFIG_JETSAM @@ -1551,41 +2029,70 @@ boolean_t memorystatus_idle_exit_from_VM(void) { * (dirty pages + IOKit mappings) * * This is invoked for both advisory, non-fatal per-task high watermarks, - * as well as the fatal system-wide task memory limit. + * as well as the fatal task memory limits. */ void memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb) { proc_t p = current_proc(); - - printf("process %d (%s) %s physical memory footprint limit of %d MB\n", - p->p_pid, p->p_comm, - warning ? "approaching" : "exceeded", - max_footprint_mb); + + if (warning == FALSE) { + printf("process %d (%s) exceeded physical memory footprint limit of %d MB\n", + p->p_pid, p->p_comm, max_footprint_mb); + } #if VM_PRESSURE_EVENTS if (warning == TRUE) { - if (memorystatus_warn_process(p->p_pid) != TRUE) { + if (memorystatus_warn_process(p->p_pid, TRUE /* critical? */) != TRUE) { /* Print warning, since it's possible that task has not registered for pressure notifications */ - printf("task_exceeded_footprint: failed to warn the current task (exiting?).\n"); + printf("task_exceeded_footprint: failed to warn the current task (exiting, or no handler registered?).\n"); } return; } #endif /* VM_PRESSURE_EVENTS */ - if (p->p_memstat_memlimit <= 0) { + if ((p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT) == P_MEMSTAT_FATAL_MEMLIMIT) { /* - * If this process has no high watermark, then we have been invoked because the task - * has violated the system-wide per-task memory limit. + * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task + * has violated either the system-wide per-task memory limit OR its own task limit. */ if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit) != TRUE) { printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n"); } + } else { + /* + * HWM offender exists. Done without locks or synchronization. + * See comment near its declaration for more details. + */ + memorystatus_hwm_candidates = TRUE; + } +} + +/* + * This is invoked when cpulimits have been exceeded while in fatal mode. + * The jetsam_flags do not apply as those are for memory related kills. + * We call this routine so that the offending process is killed with + * a non-zero exit status. + */ +void +jetsam_on_ledger_cpulimit_exceeded(void) +{ + int retval = 0; + int jetsam_flags = 0; /* make it obvious */ + proc_t p = current_proc(); + + printf("task_exceeded_cpulimit: killing pid %d [%s]\n", + p->p_pid, (p->p_comm ? p->p_comm : "(unknown)")); + + retval = jetsam_do_kill(p, jetsam_flags); + + if (retval) { + printf("task_exceeded_cpulimit: failed to kill current task (exiting?).\n"); } } static void -memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint) +memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages) { assert(task); assert(footprint); @@ -1594,31 +2101,15 @@ memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *ma if (max_footprint) { *max_footprint = (uint32_t)(get_task_phys_footprint_max(task) / PAGE_SIZE_64); } -} - -static int -memorystatus_send_note(int event_code, void *data, size_t data_length) { - int ret; - struct kev_msg ev_msg; - - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_SYSTEM_CLASS; - ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS; - - ev_msg.event_code = event_code; - - ev_msg.dv[0].data_length = data_length; - ev_msg.dv[0].data_ptr = data; - ev_msg.dv[1].data_length = 0; - - ret = kev_post_msg(&ev_msg); - if (ret) { - printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); + if (max_footprint_lifetime) { + *max_footprint_lifetime = (uint32_t)(get_task_resident_max(task) / PAGE_SIZE_64); + } + if (purgeable_pages) { + *purgeable_pages = (uint32_t)(get_task_purgeable_size(task) / PAGE_SIZE_64); } - - return ret; } + static void memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause) { @@ -1642,6 +2133,25 @@ memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause) void memorystatus_pages_update(unsigned int pages_avail) { + memorystatus_available_pages = pages_avail; + +#if VM_PRESSURE_EVENTS + /* + * Since memorystatus_available_pages changes, we should + * re-evaluate the pressure levels on the system and + * check if we need to wake the pressure thread. + * We also update memorystatus_level in that routine. + */ + vm_pressure_response(); + + if (memorystatus_available_pages <= memorystatus_available_pages_pressure) { + + if (memorystatus_hwm_candidates || (memorystatus_available_pages <= memorystatus_available_pages_critical)) { + memorystatus_thread_wake(); + } + } +#else /* VM_PRESSURE_EVENTS */ + boolean_t critical, delta; if (!memorystatus_delta) { @@ -1653,32 +2163,32 @@ void memorystatus_pages_update(unsigned int pages_avail) || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE; if (critical || delta) { - memorystatus_available_pages = pages_avail; memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem); - -#if LATENCY_JETSAM - /* Bail early to avoid excessive wake-ups */ - if (critical) { - return; - } -#endif - memorystatus_thread_wake(); } +#endif /* VM_PRESSURE_EVENTS */ } static boolean_t memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry) { + clock_sec_t tv_sec; + clock_usec_t tv_usec; + memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t)); entry->pid = p->p_pid; strlcpy(&entry->name[0], p->p_comm, MAXCOMLEN+1); entry->priority = p->p_memstat_effectivepriority; - memorystatus_get_task_page_counts(p->task, &entry->pages, &entry->max_pages); + memorystatus_get_task_page_counts(p->task, &entry->pages, &entry->max_pages, &entry->max_pages_lifetime, &entry->purgeable_pages); entry->state = memorystatus_build_state(p); entry->user_data = p->p_memstat_userdata; memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid)); + entry->fds = p->p_fd->fd_nfiles; + + absolutetime_to_microtime(get_task_cpu_time(p->task), &tv_sec, &tv_usec); + entry->cpu_time.tv_sec = tv_sec; + entry->cpu_time.tv_usec = tv_usec; return TRUE; } @@ -1779,8 +2289,9 @@ memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) { return FALSE; } - printf("memorystatus: specifically killing pid %d [%s] - memorystatus_available_pages: %d\n", - victim_pid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); + printf("memorystatus: specifically killing pid %d [%s] (%s) - memorystatus_available_pages: %d\n", + victim_pid, (p->p_comm ? p->p_comm : "(unknown)"), + jetsam_kill_cause_name[cause], memorystatus_available_pages); proc_list_lock(); @@ -1817,6 +2328,8 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, proc_list_lock(); + memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND); + next_p = memorystatus_get_first_proc_locked(&i, TRUE); while (next_p) { #if DEVELOPMENT || DEBUG @@ -1845,6 +2358,22 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, } #endif /* DEVELOPMENT || DEBUG */ + if (cause == kMemorystatusKilledVnodes) + { + /* + * If the system runs out of vnodes, we systematically jetsam + * processes in hopes of stumbling onto a vnode gain that helps + * the system recover. The process that happens to trigger + * this path has no known relationship to the vnode consumption. + * We attempt to safeguard that process e.g: do not jetsam it. + */ + + if (p == current_proc()) { + /* do not jetsam the current process */ + continue; + } + } + #if CONFIG_FREEZE boolean_t skip; boolean_t reclaim_proc = !(p->p_memstat_state & (P_MEMSTAT_LOCKED | P_MEMSTAT_NORECLAIM)); @@ -1869,7 +2398,7 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, * - the priority was requested *and* the targeted process is not at idle priority */ if ((memorystatus_jetsam_snapshot_count == 0) && - ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE)))) { + (memorystatus_idle_snapshot || ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE))))) { memorystatus_jetsam_snapshot_procs_locked(); new_snapshot = TRUE; } @@ -1911,8 +2440,11 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, p = proc_ref_locked(p); proc_list_unlock(); if (p) { - printf("memorystatus: jetsam killing pid %d [%s] - memorystatus_available_pages: %d\n", - aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); + printf("memorystatus: %s %d [%s] (%s) - memorystatus_available_pages: %d\n", + ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) ? + "idle exiting pid" : "jetsam killing pid"), + aPid, (p->p_comm ? p->p_comm : "(unknown)"), + jetsam_kill_cause_name[cause], memorystatus_available_pages); killed = memorystatus_do_kill(p, cause); } @@ -1962,6 +2494,7 @@ memorystatus_kill_hiwat_proc(uint32_t *errors) memorystatus_available_pages, 0, 0, 0, 0); proc_list_lock(); + memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND); next_p = memorystatus_get_first_proc_locked(&i, TRUE); while (next_p) { @@ -2010,8 +2543,8 @@ memorystatus_kill_hiwat_proc(uint32_t *errors) if (skip) { continue; } else { - MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d pages > 1 (%d)\n", - (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, pages, hiwat); + MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d Mb > 1 (%d Mb)\n", + (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, footprint, p->p_memstat_memlimit); if (memorystatus_jetsam_snapshot_count == 0) { memorystatus_jetsam_snapshot_procs_locked(); @@ -2085,11 +2618,12 @@ exit: static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause) { /* TODO: allow a general async path */ - if ((victim_pid != -1) || (cause != kMemorystatusKilledVMPageShortage || cause != kMemorystatusKilledVMThrashing)) { + if ((victim_pid != -1) || (cause != kMemorystatusKilledVMPageShortage && cause != kMemorystatusKilledVMThrashing && + cause != kMemorystatusKilledFCThrashing)) { return FALSE; } - kill_under_pressure = TRUE; + kill_under_pressure_cause = cause; memorystatus_thread_wake(); return TRUE; } @@ -2139,6 +2673,15 @@ memorystatus_kill_on_VM_thrashing(boolean_t async) { } } +boolean_t +memorystatus_kill_on_FC_thrashing(boolean_t async) { + if (async) { + return memorystatus_kill_process_async(-1, kMemorystatusKilledFCThrashing); + } else { + return memorystatus_kill_process_sync(-1, kMemorystatusKilledFCThrashing); + } +} + boolean_t memorystatus_kill_on_vnode_limit(void) { return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes); @@ -2196,12 +2739,12 @@ memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) } /* Only freeze processes meeting our minimum resident page criteria */ - memorystatus_get_task_page_counts(p->task, &pages, NULL); + memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); if (pages < memorystatus_freeze_pages_min) { continue; // with lock held } - if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { /* Ensure there's enough free space to freeze this process. */ max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); if (max_pages < memorystatus_freeze_pages_min) { @@ -2409,8 +2952,9 @@ memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) if (memorystatus_freeze_enabled) { if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { - /* Only freeze if we've not exceeded our pageout budgets */ - if (!memorystatus_freeze_update_throttle()) { + /* Only freeze if we've not exceeded our pageout budgets or we're not backed by swap. */ + if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS || + !memorystatus_freeze_update_throttle()) { memorystatus_freeze_top_process(&memorystatus_freeze_swap_low); } else { printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n"); @@ -2425,113 +2969,70 @@ memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) #endif /* CONFIG_FREEZE */ -#if CONFIG_JETSAM && VM_PRESSURE_EVENTS +#if VM_PRESSURE_EVENTS -boolean_t -memorystatus_warn_process(pid_t pid) { - return (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0); -} +#if CONFIG_MEMORYSTATUS -static inline boolean_t -memorystatus_update_pressure_locked(boolean_t *pressured) { - vm_pressure_level_t old_level, new_level; - - old_level = memorystatus_vm_pressure_level; - - if (memorystatus_available_pages > memorystatus_available_pages_pressure) { - /* Too many free pages */ - new_level = kVMPressureNormal; - } -#if CONFIG_FREEZE - else if (memorystatus_frozen_count > 0) { - /* Frozen processes exist */ - new_level = kVMPressureNormal; - } -#endif - else if (memorystatus_suspended_count > MEMORYSTATUS_SUSPENDED_THRESHOLD) { - /* Too many supended processes */ - new_level = kVMPressureNormal; - } - else if (memorystatus_suspended_count > 0) { - /* Some suspended processes - warn */ - new_level = kVMPressureWarning; - } - else { - /* Otherwise, pressure level is urgent */ - new_level = kVMPressureUrgent; - } - - *pressured = (new_level != kVMPressureNormal); +static int +memorystatus_send_note(int event_code, void *data, size_t data_length) { + int ret; + struct kev_msg ev_msg; - /* Did the pressure level change? */ - if (old_level != new_level) { - MEMORYSTATUS_DEBUG(1, "memorystatus_update_pressure_locked(): memory pressure changed %d -> %d; memorystatus_available_pages: %d\n ", - old_level, new_level, memorystatus_available_pages); - memorystatus_vm_pressure_level = new_level; - return TRUE; + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_SYSTEM_CLASS; + ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS; + + ev_msg.event_code = event_code; + + ev_msg.dv[0].data_length = data_length; + ev_msg.dv[0].data_ptr = data; + ev_msg.dv[1].data_length = 0; + + ret = kev_post_msg(&ev_msg); + if (ret) { + printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); } - return FALSE; + return ret; } -kern_return_t -memorystatus_update_vm_pressure(boolean_t target_foreground) { - boolean_t pressure_changed, pressured; - boolean_t warn = FALSE; - - /* - * Centralised pressure handling routine. Called from: - * - The main jetsam thread. In this case, we update the pressure level and dispatch warnings to the foreground - * process *only*, each time the available page % drops. - * - The pageout scan path. In this scenario, every other registered process is targeted in footprint order. - * - * This scheme guarantees delivery to the foreground app, while providing for warnings to the remaining processes - * driven by the pageout scan. - */ +boolean_t +memorystatus_warn_process(pid_t pid, boolean_t critical) { - MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): foreground %d; available %d, critical %d, pressure %d\n", - target_foreground, memorystatus_available_pages, memorystatus_available_pages_critical, memorystatus_available_pages_pressure); + boolean_t ret = FALSE; + struct knote *kn = NULL; - proc_list_lock(); + /* + * See comment in sysctl_memorystatus_vm_pressure_send. + */ - pressure_changed = memorystatus_update_pressure_locked(&pressured); + memorystatus_klist_lock(); + kn = vm_find_knote_from_pid(pid, &memorystatus_klist); + if (kn) { + /* + * By setting the "fflags" here, we are forcing + * a process to deal with the case where it's + * bumping up into its memory limits. If we don't + * do this here, we will end up depending on the + * system pressure snapshot evaluation in + * filt_memorystatus(). + */ - if (pressured) { - if (target_foreground) { - if (memorystatus_available_pages != memorystatus_last_foreground_pressure_pages) { - if (memorystatus_available_pages < memorystatus_last_foreground_pressure_pages) { - warn = TRUE; - } - memorystatus_last_foreground_pressure_pages = memorystatus_available_pages; - } + if (critical) { + kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; } else { - warn = TRUE; + kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; } - } else if (pressure_changed) { - memorystatus_last_foreground_pressure_pages = (unsigned int)-1; - } - - proc_list_unlock(); - - /* Target foreground processes if specified */ - if (warn) { - if (target_foreground) { - MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_foreground_candidates()\n"); - vm_find_pressure_foreground_candidates(); - } else { - MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_candidate()\n"); - /* Defer to VM code. This can race with the foreground priority, but - * it's preferable to holding onto locks for an extended period. */ - vm_find_pressure_candidate(); + KNOTE(&memorystatus_klist, kMemorystatusPressure); + ret = TRUE; + } else { + if (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0) { + ret = TRUE; } } - - /* Dispatch the global kevent to privileged listeners */ - if (pressure_changed) { - memorystatus_issue_pressure_kevent(pressured); - } + memorystatus_klist_unlock(); - return KERN_SUCCESS; + return ret; } int @@ -2540,6 +3041,20 @@ memorystatus_send_pressure_note(pid_t pid) { return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid)); } +void +memorystatus_send_low_swap_note(void) { + + struct knote *kn = NULL; + + memorystatus_klist_lock(); + SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { + if (is_knote_registered_modify_task_pressure_bits(kn, NOTE_MEMORYSTATUS_LOW_SWAP, NULL, 0, 0) == TRUE) { + KNOTE(&memorystatus_klist, kMemorystatusLowSwap); + } + } + memorystatus_klist_unlock(); +} + boolean_t memorystatus_bg_pressure_eligible(proc_t p) { boolean_t eligible = FALSE; @@ -2563,8 +3078,7 @@ memorystatus_is_foreground_locked(proc_t p) { return ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) || (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT)); } - -#else /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ +#endif /* CONFIG_MEMORYSTATUS */ /* * Trigger levels to test the mechanism. @@ -2581,10 +3095,10 @@ boolean_t memorystatus_manual_testing_on = FALSE; vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal; extern struct knote * -vm_pressure_select_optimal_candidate_to_notify(struct klist *, int); +vm_pressure_select_optimal_candidate_to_notify(struct klist *, int, boolean_t); extern -kern_return_t vm_pressure_notification_without_levels(void); +kern_return_t vm_pressure_notification_without_levels(boolean_t); extern void vm_pressure_klist_lock(void); extern void vm_pressure_klist_unlock(void); @@ -2611,8 +3125,6 @@ void memorystatus_on_pageout_scan_end(void) { * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately. * */ -boolean_t -is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t); boolean_t is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set) @@ -2631,19 +3143,48 @@ is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pr return FALSE; } -extern kern_return_t vm_pressure_notify_dispatch_vm_clients(void); +extern kern_return_t vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process); + +#define VM_PRESSURE_DECREASED_SMOOTHING_PERIOD 5000 /* milliseconds */ kern_return_t -memorystatus_update_vm_pressure(boolean_t target_best_process) +memorystatus_update_vm_pressure(boolean_t target_foreground_process) { struct knote *kn_max = NULL; pid_t target_pid = -1; struct klist dispatch_klist = { NULL }; proc_t target_proc = PROC_NULL; - static vm_pressure_level_t level_snapshot = kVMPressureNormal; struct task *task = NULL; boolean_t found_candidate = FALSE; + static vm_pressure_level_t level_snapshot = kVMPressureNormal; + static vm_pressure_level_t prev_level_snapshot = kVMPressureNormal; + boolean_t smoothing_window_started = FALSE; + struct timeval smoothing_window_start_tstamp = {0, 0}; + struct timeval curr_tstamp = {0, 0}; + int elapsed_msecs = 0; + +#if !CONFIG_JETSAM +#define MAX_IDLE_KILLS 100 /* limit the number of idle kills allowed */ + + int idle_kill_counter = 0; + + /* + * On desktop we take this opportunity to free up memory pressure + * by immediately killing idle exitable processes. We use a delay + * to avoid overkill. And we impose a max counter as a fail safe + * in case daemons re-launch too fast. + */ + while ((memorystatus_vm_pressure_level != kVMPressureNormal) && (idle_kill_counter < MAX_IDLE_KILLS)) { + if (memorystatus_idle_exit_from_VM() == FALSE) { + /* No idle exitable processes left to kill */ + break; + } + idle_kill_counter++; + delay(1000000); /* 1 second */ + } +#endif /* !CONFIG_JETSAM */ + while (1) { /* @@ -2652,8 +3193,33 @@ memorystatus_update_vm_pressure(boolean_t target_best_process) */ level_snapshot = memorystatus_vm_pressure_level; + if (prev_level_snapshot > level_snapshot) { + /* + * Pressure decreased? Let's take a little breather + * and see if this condition stays. + */ + if (smoothing_window_started == FALSE) { + + smoothing_window_started = TRUE; + microuptime(&smoothing_window_start_tstamp); + } + + microuptime(&curr_tstamp); + timevalsub(&curr_tstamp, &smoothing_window_start_tstamp); + elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; + + if (elapsed_msecs < VM_PRESSURE_DECREASED_SMOOTHING_PERIOD) { + + delay(INTER_NOTIFICATION_DELAY); + continue; + } + } + + prev_level_snapshot = level_snapshot; + smoothing_window_started = FALSE; + memorystatus_klist_lock(); - kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot); + kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot, target_foreground_process); if (kn_max == NULL) { memorystatus_klist_unlock(); @@ -2737,25 +3303,17 @@ memorystatus_update_vm_pressure(boolean_t target_best_process) microuptime(&target_proc->vm_pressure_last_notify_tstamp); proc_rele(target_proc); - if (target_best_process == TRUE) { + if (memorystatus_manual_testing_on == TRUE && target_foreground_process == TRUE) { break; } try_dispatch_vm_clients: - if (level_snapshot != kVMPressureNormal) { - /* - * Wake up idle-exit thread. - * Targets one process per invocation. - * - * TODO: memorystatus_idle_exit_from_VM should return FALSE once it's - * done with all idle-exitable processes. Currently, we will exit this - * loop when we are done with notification clients (level and non-level based) - * but we may still have some idle-exitable processes around. - * + if (kn_max == NULL && level_snapshot != kVMPressureNormal) { + /* + * We will exit this loop when we are done with + * notification clients (level and non-level based). */ - memorystatus_idle_exit_from_VM(); - - if ((vm_pressure_notify_dispatch_vm_clients() == KERN_FAILURE) && (kn_max == NULL)) { + if ((vm_pressure_notify_dispatch_vm_clients(target_foreground_process) == KERN_FAILURE) && (kn_max == NULL)) { /* * kn_max == NULL i.e. we didn't find any eligible clients for the level-based notifications * AND @@ -2767,8 +3325,45 @@ try_dispatch_vm_clients: } } - if (memorystatus_manual_testing_on == FALSE) { - delay(INTER_NOTIFICATION_DELAY); + /* + * LD: This block of code below used to be invoked in the older memory notification scheme on embedded everytime + * a process was sent a memory pressure notification. The "memorystatus_klist" list was used to hold these + * privileged listeners. But now we have moved to the newer scheme and are trying to move away from the extra + * notifications. So the code is here in case we break compat. and need to send out notifications to the privileged + * apps. + */ +#if 0 +#endif /* 0 */ + + if (memorystatus_manual_testing_on == TRUE) { + /* + * Testing out the pressure notification scheme. + * No need for delays etc. + */ + } else { + + uint32_t sleep_interval = INTER_NOTIFICATION_DELAY; +#if CONFIG_JETSAM + unsigned int page_delta = 0; + unsigned int skip_delay_page_threshold = 0; + + assert(memorystatus_available_pages_pressure >= memorystatus_available_pages_critical_base); + + page_delta = (memorystatus_available_pages_pressure - memorystatus_available_pages_critical_base) / 2; + skip_delay_page_threshold = memorystatus_available_pages_pressure - page_delta; + + if (memorystatus_available_pages <= skip_delay_page_threshold) { + /* + * We are nearing the critcal mark fast and can't afford to wait between + * notifications. + */ + sleep_interval = 0; + } +#endif /* CONFIG_JETSAM */ + + if (sleep_interval) { + delay(sleep_interval); + } } } @@ -2815,15 +3410,22 @@ static int sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS { #pragma unused(arg1, arg2, oidp) - vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level); return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level)); } +#if DEBUG || DEVELOPMENT + SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); +#else /* DEBUG || DEVELOPMENT */ + +SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED, + 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); + +#endif /* DEBUG || DEVELOPMENT */ extern int memorystatus_purge_on_warning; extern int memorystatus_purge_on_critical; @@ -2928,7 +3530,7 @@ sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS } else { vm_pressure_klist_lock(); - vm_pressure_notification_without_levels(); + vm_pressure_notification_without_levels(FALSE); vm_pressure_klist_unlock(); } @@ -2948,7 +3550,7 @@ SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT|CTLFLAG_RW SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_critical, 0, ""); -#endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ +#endif /* VM_PRESSURE_EVENTS */ /* Return both allocated and actual size, since there's a race between allocation and list compilation */ static int @@ -3072,19 +3674,34 @@ memorystatus_clear_errors(void) static void memorystatus_update_levels_locked(boolean_t critical_only) { + memorystatus_available_pages_critical = memorystatus_available_pages_critical_base; -#if !LATENCY_JETSAM - { - // If there's an entry in the first bucket, we have idle processes - memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; - if (first_bucket->count) { - memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset; + + /* + * If there's an entry in the first bucket, we have idle processes. + */ + memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; + if (first_bucket->count) { + memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset; + + if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) { + /* + * The critical threshold must never exceed the pressure threshold + */ + memorystatus_available_pages_critical = memorystatus_available_pages_pressure; } } -#endif + #if DEBUG || DEVELOPMENT if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_diagnostic; + + if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) { + /* + * The critical threshold must never exceed the pressure threshold + */ + memorystatus_available_pages_critical = memorystatus_available_pages_pressure; + } } #endif @@ -3127,6 +3744,7 @@ memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *sna return 0; } + static int memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer, size_t buffer_size, int32_t *retval) { int error = EINVAL; @@ -3154,6 +3772,206 @@ out: return error; } +/* + * Routine: memorystatus_cmd_grp_set_properties + * Purpose: Update properties for a group of processes. + * + * Supported Properties: + * [priority] + * Move each process out of its effective priority + * band and into a new priority band. + * Maintains relative order from lowest to highest priority. + * In single band, maintains relative order from head to tail. + * + * eg: before [effectivepriority | pid] + * [18 | p101 ] + * [17 | p55, p67, p19 ] + * [12 | p103 p10 ] + * [ 7 | p25 ] + * [ 0 | p71, p82, ] + * + * after [ new band | pid] + * [ xxx | p71, p82, p25, p103, p10, p55, p67, p19, p101] + * + * Returns: 0 on success, else non-zero. + * + * Caveat: We know there is a race window regarding recycled pids. + * A process could be killed before the kernel can act on it here. + * If a pid cannot be found in any of the jetsam priority bands, + * then we simply ignore it. No harm. + * But, if the pid has been recycled then it could be an issue. + * In that scenario, we might move an unsuspecting process to the new + * priority band. It's not clear how the kernel can safeguard + * against this, but it would be an extremely rare case anyway. + * The caller of this api might avoid such race conditions by + * ensuring that the processes passed in the pid list are suspended. + */ + + +/* This internal structure can expand when we add support for more properties */ +typedef struct memorystatus_internal_properties +{ + proc_t proc; + int32_t priority; /* see memorytstatus_priority_entry_t : priority */ +} memorystatus_internal_properties_t; + + +static int +memorystatus_cmd_grp_set_properties(int32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { + +#pragma unused (flags) + + /* + * We only handle setting priority + * per process + */ + + int error = 0; + memorystatus_priority_entry_t *entries = NULL; + uint32_t entry_count = 0; + + /* This will be the ordered proc list */ + memorystatus_internal_properties_t *table = NULL; + size_t table_size = 0; + uint32_t table_count = 0; + + uint32_t i = 0; + uint32_t bucket_index = 0; + boolean_t head_insert; + int32_t new_priority; + + proc_t p; + + /* Verify inputs */ + if ((buffer == USER_ADDR_NULL) || (buffer_size == 0) || ((buffer_size % sizeof(memorystatus_priority_entry_t)) != 0)) { + error = EINVAL; + goto out; + } + + entry_count = (buffer_size / sizeof(memorystatus_priority_entry_t)); + if ((entries = (memorystatus_priority_entry_t *)kalloc(buffer_size)) == NULL) { + error = ENOMEM; + goto out; + } + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_START, entry_count, 0, 0, 0, 0); + + if ((error = copyin(buffer, entries, buffer_size)) != 0) { + goto out; + } + + /* Verify sanity of input priorities */ + for (i=0; i < entry_count; i++) { + if (entries[i].priority == -1) { + /* Use as shorthand for default priority */ + entries[i].priority = JETSAM_PRIORITY_DEFAULT; + } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_DEFERRED) { + /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; + * if requested, adjust to JETSAM_PRIORITY_IDLE. */ + entries[i].priority = JETSAM_PRIORITY_IDLE; + } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { + /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle + * queue */ + /* Deal with this later */ + } else if ((entries[i].priority < 0) || (entries[i].priority >= MEMSTAT_BUCKET_COUNT)) { + /* Sanity check */ + error = EINVAL; + goto out; + } + } + + table_size = sizeof(memorystatus_internal_properties_t) * entry_count; + if ( (table = (memorystatus_internal_properties_t *)kalloc(table_size)) == NULL) { + error = ENOMEM; + goto out; + } + memset(table, 0, table_size); + + + /* + * For each jetsam bucket entry, spin through the input property list. + * When a matching pid is found, populate an adjacent table with the + * appropriate proc pointer and new property values. + * This traversal automatically preserves order from lowest + * to highest priority. + */ + + bucket_index=0; + + proc_list_lock(); + + /* Create the ordered table */ + p = memorystatus_get_first_proc_locked(&bucket_index, TRUE); + while (p && (table_count < entry_count)) { + for (i=0; i < entry_count; i++ ) { + if (p->p_pid == entries[i].pid) { + /* Build the table data */ + table[table_count].proc = p; + table[table_count].priority = entries[i].priority; + table_count++; + break; + } + } + p = memorystatus_get_next_proc_locked(&bucket_index, p, TRUE); + } + + /* We now have ordered list of procs ready to move */ + for (i=0; i < table_count; i++) { + p = table[i].proc; + assert(p != NULL); + + /* Allow head inserts -- but relative order is now */ + if (table[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { + new_priority = JETSAM_PRIORITY_IDLE; + head_insert = true; + } else { + new_priority = table[i].priority; + head_insert = false; + } + + /* Not allowed */ + if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { + continue; + } + + /* + * Take appropriate steps if moving proc out of the + * JETSAM_PRIORITY_IDLE_DEFERRED band. + */ + if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { + memorystatus_invalidate_idle_demotion_locked(p, TRUE); + } + + memorystatus_update_priority_locked(p, new_priority, head_insert); + } + + proc_list_unlock(); + + /* + * if (table_count != entry_count) + * then some pids were not found in a jetsam band. + * harmless but interesting... + */ + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_END, entry_count, table_count, 0, 0, 0); + +out: + if (entries) + kfree(entries, buffer_size); + if (table) + kfree(table, table_size); + + return (error); +} + + +/* + * This routine is meant solely for the purpose of adjusting jetsam priorities and bands. + * It is _not_ meant to be used for the setting of memory limits, especially, since we can't + * tell if the memory limit being set is fatal or not. + * + * So the the last 5 args to the memorystatus_update() call below, related to memory limits, are all 0 or FALSE. + */ + static int memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { const uint32_t MAX_ENTRY_COUNT = 2; /* Cap the entry count */ @@ -3196,8 +4014,8 @@ memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t b proc_rele(p); break; } - - error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0); + + error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0, FALSE); proc_rele(p); } @@ -3222,8 +4040,12 @@ memorystatus_cmd_get_pressure_status(int32_t *retval) { return error; } +/* + * Every process, including a P_MEMSTAT_INTERNAL process (currently only pid 1), is allowed to set a HWM. + */ + static int -memorystatus_cmd_set_jetsam_high_water_mark(pid_t pid, int32_t high_water_mark, __unused int32_t *retval) { +memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit) { int error = 0; proc_t p = proc_find(pid); @@ -3237,27 +4059,56 @@ memorystatus_cmd_set_jetsam_high_water_mark(pid_t pid, int32_t high_water_mark, proc_list_lock(); - if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { - error = EPERM; - goto exit; - } - p->p_memstat_memlimit = high_water_mark; if (memorystatus_highwater_enabled) { if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { - memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority); + + memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority, false); + + /* + * The update priority call above takes care to set/reset the fatal memory limit state + * IF the process is transitioning between foreground <-> background and has a background + * memory limit. + * Here, however, the process won't be doing any such transitions and so we explicitly tackle + * the fatal limit state. + */ + is_fatal_limit = FALSE; + } else { error = (task_set_phys_footprint_limit_internal(p->task, high_water_mark, NULL, TRUE) == 0) ? 0 : EINVAL; } } -exit: + if (error == 0) { + if (is_fatal_limit == TRUE) { + p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; + } else { + p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; + } + } + proc_list_unlock(); proc_rele(p); return error; } +/* + * Returns the jetsam priority (effective or requested) of the process + * associated with this task. + */ +int +proc_get_memstat_priority(proc_t p, boolean_t effective_priority) +{ + if (p) { + if (effective_priority) { + return p->p_memstat_effectivepriority; + } else { + return p->p_memstat_requestedpriority; + } + } + return 0; +} #endif /* CONFIG_JETSAM */ int @@ -3288,6 +4139,9 @@ memorystatus_control(struct proc *p __unused, struct memorystatus_control_args * case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES: error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret); break; + case MEMORYSTATUS_CMD_GRP_SET_PROPERTIES: + error = memorystatus_cmd_grp_set_properties((int32_t)args->flags, args->buffer, args->buffersize, ret); + break; case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT: error = memorystatus_cmd_get_jetsam_snapshot(args->buffer, args->buffersize, ret); break; @@ -3295,8 +4149,10 @@ memorystatus_control(struct proc *p __unused, struct memorystatus_control_args * error = memorystatus_cmd_get_pressure_status(ret); break; case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK: - /* TODO: deprecate. Keeping it in as there's no pid based way to set the ledger limit right now. */ - error = memorystatus_cmd_set_jetsam_high_water_mark(args->pid, (int32_t)args->flags, ret); + error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, FALSE); + break; + case MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT: + error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, TRUE); break; /* Test commands */ #if DEVELOPMENT || DEBUG @@ -3352,6 +4208,11 @@ filt_memorystatus(struct knote *kn __unused, long hint) } } break; + case kMemorystatusLowSwap: + if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { + kn->kn_fflags |= NOTE_MEMORYSTATUS_LOW_SWAP; + } + break; default: break; } @@ -3382,14 +4243,13 @@ memorystatus_knote_register(struct knote *kn) { memorystatus_klist_lock(); - if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL)) { + if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP)) { -#if CONFIG_JETSAM && VM_PRESSURE_EVENTS - /* Need a privilege to register */ - error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); -#endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ + if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { + error = suser(kauth_cred_get(), 0); + } - if (!error) { + if (error == 0) { KNOTE_ATTACH(&memorystatus_klist, kn); } } else { @@ -3408,6 +4268,8 @@ memorystatus_knote_unregister(struct knote *kn __unused) { memorystatus_klist_unlock(); } + +#if 0 #if CONFIG_JETSAM && VM_PRESSURE_EVENTS static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured) { @@ -3416,5 +4278,5 @@ memorystatus_issue_pressure_kevent(boolean_t pressured) { memorystatus_klist_unlock(); return TRUE; } - #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ +#endif /* 0 */ diff --git a/bsd/kern/kern_mib.c b/bsd/kern/kern_mib.c index c03c98c97..0783d51d4 100644 --- a/bsd/kern/kern_mib.c +++ b/bsd/kern/kern_mib.c @@ -426,30 +426,30 @@ sysctl_cpu_capability } -SYSCTL_PROC(_hw_optional, OID_AUTO, mmx, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasMMX, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, sse, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, sse2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE2, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, sse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE3, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, supplementalsse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSupplementalSSE3, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, sse4_1, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE4_1, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, sse4_2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE4_2, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, mmx, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasMMX, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, sse, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, sse2, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE2, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, sse3, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE3, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, supplementalsse3, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSupplementalSSE3, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, sse4_1, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE4_1, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, sse4_2, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE4_2, 0, sysctl_cpu_capability, "I", ""); /* "x86_64" is actually a preprocessor symbol on the x86_64 kernel, so we have to hack this */ #undef x86_64 -SYSCTL_PROC(_hw_optional, OID_AUTO, x86_64, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) k64Bit, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, aes, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAES, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, avx1_0, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX1_0, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, rdrand, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasRDRAND, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, f16c, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasF16C, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, enfstrg, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasENFSTRG, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, fma, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasFMA, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, avx2_0, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX2_0, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, bmi1, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasBMI1, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, bmi2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasBMI2, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, rtm, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasRTM, 0, sysctl_cpu_capability, "I", ""); -SYSCTL_PROC(_hw_optional, OID_AUTO, hle, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasHLE, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, x86_64, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) k64Bit, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, aes, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAES, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, avx1_0, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX1_0, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, rdrand, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasRDRAND, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, f16c, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasF16C, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, enfstrg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasENFSTRG, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, fma, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasFMA, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, avx2_0, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX2_0, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, bmi1, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasBMI1, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, bmi2, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasBMI2, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, rtm, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasRTM, 0, sysctl_cpu_capability, "I", ""); +SYSCTL_PROC(_hw_optional, OID_AUTO, hle, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasHLE, 0, sysctl_cpu_capability, "I", ""); #else #error Unsupported arch -#endif /* !__i386__ && !__x86_64 && !__arm__ */ +#endif /* !__i386__ && !__x86_64 && !__arm__ && ! __arm64__ */ /****************************************************************************** @@ -506,6 +506,6 @@ sysctl_mib_init(void) #else #error unknown architecture -#endif /* !__i386__ && !__x86_64 && !__arm__ */ +#endif /* !__i386__ && !__x86_64 && !__arm__ && !__arm64__ */ } diff --git a/bsd/kern/kern_mman.c b/bsd/kern/kern_mman.c index 575110d69..27bee276c 100644 --- a/bsd/kern/kern_mman.c +++ b/bsd/kern/kern_mman.c @@ -101,6 +101,7 @@ #include #include +#include #include #include @@ -120,16 +121,15 @@ #include #include #include +#include + +#include #include #include #include #include -/* XXX the following function should probably be static */ -kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *, - boolean_t, vm_size_t); - /* * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct * XXX usage is PROT_* from an interface perspective. Thus the values of @@ -1103,211 +1103,119 @@ munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int return(ENOSYS); } -/* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */ -kern_return_t -map_fd(struct map_fd_args *args) -{ - int fd = args->fd; - vm_offset_t offset = args->offset; - vm_offset_t *va = args->va; - boolean_t findspace = args->findspace; - vm_size_t size = args->size; - kern_return_t ret; - - AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD); - AUDIT_ARG(addr, CAST_DOWN(user_addr_t, args->va)); - AUDIT_ARG(fd, fd); - - ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size); - - AUDIT_MACH_SYSCALL_EXIT(ret); - return ret; -} - -kern_return_t -map_fd_funneled( - int fd, - vm_object_offset_t offset, - vm_offset_t *va, - boolean_t findspace, - vm_size_t size) +#if CONFIG_CODE_DECRYPTION +int +mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval) { - kern_return_t result; - struct fileproc *fp; - struct vnode *vp; - void * pager; - vm_offset_t map_addr=0; - vm_size_t map_size; - int err=0; - vm_prot_t maxprot = VM_PROT_ALL; - vm_map_t my_map; - proc_t p = current_proc(); - struct vnode_attr vattr; - - my_map = current_map(); - - /* - * Find the inode; verify that it's a regular file. - */ - - err = fp_lookup(p, fd, &fp, 0); - if (err) - return(err); - - if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { - err = KERN_INVALID_ARGUMENT; - goto bad; - } - - if (!(fp->f_fglob->fg_flag & FREAD)) { - err = KERN_PROTECTION_FAILURE; - goto bad; - } - - vp = (struct vnode *)fp->f_fglob->fg_data; - err = vnode_getwithref(vp); - if(err != 0) - goto bad; - - if (vp->v_type != VREG) { - (void)vnode_put(vp); - err = KERN_INVALID_ARGUMENT; - goto bad; - } - -#if CONFIG_MACF - err = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), - fp->f_fglob, VM_PROT_DEFAULT, MAP_FILE, &maxprot); - if (err) { - (void)vnode_put(vp); - goto bad; - } -#endif /* MAC */ - -#if CONFIG_PROTECT - /* check for content protection access */ - { - err = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0); - if (err != 0) { - (void) vnode_put(vp); - goto bad; - } - } -#endif /* CONFIG_PROTECT */ - - AUDIT_ARG(vnpath, vp, ARG_VNODE1); - - /* - * POSIX: mmap needs to update access time for mapped files - */ - if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { - VATTR_INIT(&vattr); - nanotime(&vattr.va_access_time); - VATTR_SET_ACTIVE(&vattr, va_access_time); - vnode_setattr(vp, &vattr, vfs_context_current()); - } - - if (offset & vm_map_page_mask(my_map)) { - printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm); - (void)vnode_put(vp); - err = KERN_INVALID_ARGUMENT; - goto bad; - } - map_size = vm_map_round_page(size, vm_map_page_mask(my_map)); - - /* - * Allow user to map in a zero length file. - */ - if (size == 0) { - (void)vnode_put(vp); - err = KERN_SUCCESS; - goto bad; - } - /* - * Map in the file. - */ - pager = (void *)ubc_getpager(vp); - if (pager == NULL) { - (void)vnode_put(vp); - err = KERN_FAILURE; - goto bad; - } - - result = vm_map_64( - my_map, - &map_addr, map_size, (vm_offset_t)0, - VM_FLAGS_ANYWHERE, pager, offset, TRUE, - VM_PROT_DEFAULT, maxprot, - VM_INHERIT_DEFAULT); - if (result != KERN_SUCCESS) { - (void)vnode_put(vp); - err = result; - goto bad; - } - - - if (!findspace) { - //K64todo fix for 64bit user? - uint32_t dst_addr; - vm_map_copy_t tmp; - - if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) || - trunc_page(dst_addr) != dst_addr) { - (void) vm_map_remove( - my_map, - map_addr, map_addr + map_size, - VM_MAP_NO_FLAGS); - (void)vnode_put(vp); - err = KERN_INVALID_ADDRESS; - goto bad; - } - - result = vm_map_copyin(my_map, (vm_map_address_t)map_addr, - (vm_map_size_t)map_size, TRUE, &tmp); - if (result != KERN_SUCCESS) { - - (void) vm_map_remove( - my_map, - vm_map_trunc_page(map_addr, - vm_map_page_mask(my_map)), - vm_map_round_page(map_addr + map_size, - vm_map_page_mask(my_map)), - VM_MAP_NO_FLAGS); - (void)vnode_put(vp); - err = result; - goto bad; - } - - result = vm_map_copy_overwrite(my_map, - (vm_map_address_t)dst_addr, tmp, FALSE); - if (result != KERN_SUCCESS) { - vm_map_copy_discard(tmp); - (void)vnode_put(vp); - err = result; - goto bad; - } - } else { - // K64todo bug compatible now, should fix for 64bit user - uint32_t user_map_addr = CAST_DOWN_EXPLICIT(uint32_t, map_addr); - if (copyout(&user_map_addr, CAST_USER_ADDR_T(va), sizeof (user_map_addr))) { - (void) vm_map_remove( - my_map, - vm_map_trunc_page(map_addr, - vm_map_page_mask(my_map)), - vm_map_round_page(map_addr + map_size, - vm_map_page_mask(my_map)), - VM_MAP_NO_FLAGS); - (void)vnode_put(vp); - err = KERN_INVALID_ADDRESS; - goto bad; - } - } - - ubc_setthreadcred(vp, current_proc(), current_thread()); - (void)vnode_put(vp); - err = 0; -bad: - fp_drop(p, fd, fp, 0); - return (err); + mach_vm_offset_t user_addr; + mach_vm_size_t user_size; + kern_return_t result; + vm_map_t user_map; + uint32_t cryptid; + cpu_type_t cputype; + cpu_subtype_t cpusubtype; + pager_crypt_info_t crypt_info; + const char * cryptname = 0; + char *vpath; + int len, ret; + struct proc_regioninfo_internal pinfo; + vnode_t vp; + uintptr_t vnodeaddr; + uint32_t vid; + + AUDIT_ARG(addr, uap->addr); + AUDIT_ARG(len, uap->len); + + user_map = current_map(); + user_addr = (mach_vm_offset_t) uap->addr; + user_size = (mach_vm_size_t) uap->len; + + cryptid = uap->cryptid; + cputype = uap->cputype; + cpusubtype = uap->cpusubtype; + + if (user_addr & vm_map_page_mask(user_map)) { + /* UNIX SPEC: user address is not page-aligned, return EINVAL */ + return EINVAL; + } + + switch(cryptid) { + case 0: + /* not encrypted, just an empty load command */ + return 0; + case 1: + cryptname="com.apple.unfree"; + break; + case 0x10: + /* some random cryptid that you could manually put into + * your binary if you want NULL */ + cryptname="com.apple.null"; + break; + default: + return EINVAL; + } + + if (NULL == text_crypter_create) return ENOTSUP; + + ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid); + if (ret == 0 || !vnodeaddr) { + /* No really, this returns 0 if the memory address is not backed by a file */ + return (EINVAL); + } + + vp = (vnode_t)vnodeaddr; + if ((vnode_getwithvid(vp, vid)) == 0) { + MALLOC_ZONE(vpath, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if(vpath == NULL) { + vnode_put(vp); + return (ENOMEM); + } + + len = MAXPATHLEN; + ret = vn_getpath(vp, vpath, &len); + if(ret) { + FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI); + vnode_put(vp); + return (ret); + } + + vnode_put(vp); + } else { + return (EINVAL); + } + +#if 0 + kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n", + __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size); +#endif + + /* set up decrypter first */ + crypt_file_data_t crypt_data = { + .filename = vpath, + .cputype = cputype, + .cpusubtype = cpusubtype }; + result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data); + FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI); + + if(result) { + printf("%s: unable to create decrypter %s, kr=%d\n", + __FUNCTION__, cryptname, result); + if (result == kIOReturnNotPrivileged) { + /* text encryption returned decryption failure */ + return (EPERM); + } else { + return (ENOMEM); + } + } + + /* now remap using the decrypter */ + result = vm_map_apple_protected(user_map, user_addr, user_addr+user_size, &crypt_info); + if (result) { + printf("%s: mapping failed with %d\n", __FUNCTION__, result); + crypt_info.crypt_end(crypt_info.crypt_ops); + return (EPERM); + } + + return 0; } - +#endif /* CONFIG_CODE_DECRYPTION */ diff --git a/bsd/kern/kern_newsysctl.c b/bsd/kern/kern_newsysctl.c index 2d872d54c..a0a72cb5c 100644 --- a/bsd/kern/kern_newsysctl.c +++ b/bsd/kern/kern_newsysctl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -73,18 +73,15 @@ #include #include #include +#include #include +#include #include -/* -struct sysctl_oid_list sysctl__debug_children; -struct sysctl_oid_list sysctl__kern_children; -struct sysctl_oid_list sysctl__net_children; -struct sysctl_oid_list sysctl__sysctl_children; -*/ - +lck_grp_t * sysctl_lock_group = NULL; lck_rw_t * sysctl_geometry_lock = NULL; +lck_mtx_t * sysctl_unlocked_node_lock = NULL; /* * Conditionally allow dtrace to see these functions for debugging purposes. @@ -99,7 +96,6 @@ lck_rw_t * sysctl_geometry_lock = NULL; #endif /* forward declarations of static functions */ -STATIC funnel_t *spl_kernel_funnel(void); STATIC void sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i); STATIC int sysctl_sysctl_debug(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); @@ -110,51 +106,24 @@ STATIC int sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp, struct sysctl_oid **oidpp); STATIC int sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l); STATIC int sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l); -STATIC int name2oid (char *name, int *oid, int *len); +STATIC int name2oid (char *name, int *oid, u_int *len); STATIC int sysctl_sysctl_name2oid(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_sysctl_next(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_sysctl_oidfmt(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); -STATIC void splx_kernel_funnel(funnel_t *saved); STATIC int sysctl_old_user(struct sysctl_req *req, const void *p, size_t l); STATIC int sysctl_new_user(struct sysctl_req *req, void *p, size_t l); -STATIC int sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -STATIC int sysctlnametomib(const char *name, int *mibp, size_t *sizep); - +STATIC void sysctl_create_user_req(struct sysctl_req *req, struct proc *p, user_addr_t oldp, + size_t oldlen, user_addr_t newp, size_t newlen); +STATIC int sysctl_root(boolean_t from_kernel, boolean_t string_is_canonical, char *namestring, size_t namestringlen, int *name, u_int namelen, struct sysctl_req *req); -/* - * Locking and stats - */ - -/* - * XXX this does not belong here - */ -STATIC funnel_t * -spl_kernel_funnel(void) -{ - funnel_t *cfunnel; - - cfunnel = thread_funnel_get(); - if (cfunnel != kernel_flock) { - if (cfunnel != NULL) - thread_funnel_set(cfunnel, FALSE); - thread_funnel_set(kernel_flock, TRUE); - } - return(cfunnel); -} - -STATIC void -splx_kernel_funnel(funnel_t *saved) -{ - if (saved != kernel_flock) { - thread_funnel_set(kernel_flock, FALSE); - if (saved != NULL) - thread_funnel_set(saved, TRUE); - } -} - -STATIC int sysctl_root SYSCTL_HANDLER_ARGS; +int kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen); +int kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +int userland_sysctl(boolean_t string_is_canonical, + char *namestring, size_t namestringlen, + int *name, u_int namelen, struct sysctl_req *req, + size_t *retval); struct sysctl_oid_list sysctl__children; /* root list */ @@ -172,7 +141,6 @@ sysctl_register_oid(struct sysctl_oid *new_oidp) struct sysctl_oid *p; struct sysctl_oid *q; int n; - funnel_t *fnl = NULL; /* compiler doesn't notice CTLFLAG_LOCKED */ /* * The OID can be old-style (needs copy), new style without an earlier @@ -209,13 +177,6 @@ sysctl_register_oid(struct sysctl_oid *new_oidp) } } - /* - * If it's a locked OID being registered, we can assume that the - * caller is doing their own reentrancy locking before calling us. - */ - if (!(oidp->oid_kind & CTLFLAG_LOCKED)) - fnl = spl_kernel_funnel(); - if(sysctl_geometry_lock == NULL) { /* @@ -225,8 +186,9 @@ sysctl_register_oid(struct sysctl_oid *new_oidp) * sysctl_register_fixed(). */ - lck_grp_t* lck_grp = lck_grp_alloc_init("sysctl", NULL); - sysctl_geometry_lock = lck_rw_alloc_init(lck_grp, NULL); + sysctl_lock_group = lck_grp_alloc_init("sysctl", NULL); + sysctl_geometry_lock = lck_rw_alloc_init(sysctl_lock_group, NULL); + sysctl_unlocked_node_lock = lck_mtx_alloc_init(sysctl_lock_group, NULL); } /* Get the write lock to modify the geometry */ lck_rw_lock_exclusive(sysctl_geometry_lock); @@ -268,9 +230,6 @@ sysctl_register_oid(struct sysctl_oid *new_oidp) /* Release the write lock */ lck_rw_unlock_exclusive(sysctl_geometry_lock); - - if (!(oidp->oid_kind & CTLFLAG_LOCKED)) - splx_kernel_funnel(fnl); } void @@ -278,10 +237,6 @@ sysctl_unregister_oid(struct sysctl_oid *oidp) { struct sysctl_oid *removed_oidp = NULL; /* OID removed from tree */ struct sysctl_oid *old_oidp = NULL; /* OID compatibility copy */ - funnel_t *fnl = NULL; /* compiler doesn't notice CTLFLAG_LOCKED */ - - if (!(oidp->oid_kind & CTLFLAG_LOCKED)) - fnl = spl_kernel_funnel(); /* Get the write lock to modify the geometry */ lck_rw_lock_exclusive(sysctl_geometry_lock); @@ -335,10 +290,6 @@ sysctl_unregister_oid(struct sysctl_oid *oidp) if (old_oidp != NULL) { FREE(old_oidp, M_TEMP); } - - /* And drop the funnel interlock, if needed */ - if (!(oidp->oid_kind & CTLFLAG_LOCKED)) - splx_kernel_funnel(fnl); } /* @@ -910,7 +861,7 @@ SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_next, " * Locks: Assumes sysctl_geometry_lock is held prior to calling */ STATIC int -name2oid (char *name, int *oid, int *len) +name2oid (char *name, int *oid, u_int *len) { int i; struct sysctl_oid *oidp; @@ -954,6 +905,7 @@ name2oid (char *name, int *oid, int *len) lsp = (struct sysctl_oid_list *)oidp->oid_arg1; oidp = SLIST_FIRST(lsp); + *p = i; /* restore */ name = p+1; for (p = name; *p && *p != '.'; p++) ; @@ -1009,7 +961,7 @@ sysctl_sysctl_name2oid(__unused struct sysctl_oid *oidp, __unused void *arg1, { char *p; int error, oid[CTL_MAXNAME]; - int len = 0; /* set by name2oid() */ + u_int len = 0; /* set by name2oid() */ if (req->newlen < 1) return ENOENT; @@ -1306,11 +1258,7 @@ kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldle req.lock = 1; /* make the request */ - error = sysctl_root(0, name, namelen, &req); - - /* unlock memory if required */ - if (req.lock == 2) - vsunlock(req.oldptr, (user_size_t)req.oldlen, B_WRITE); + error = sysctl_root(TRUE, FALSE, NULL, 0, name, namelen, &req); if (error && error != ENOMEM) return (error); @@ -1367,37 +1315,52 @@ sysctl_new_user(struct sysctl_req *req, void *p, size_t l) */ int -sysctl_root(__unused struct sysctl_oid *oidp, void *arg1, int arg2, - struct sysctl_req *req) +sysctl_root(boolean_t from_kernel, boolean_t string_is_canonical, char *namestring, size_t namestringlen, int *name, u_int namelen, struct sysctl_req *req) { - int *name = (int *) arg1; - u_int namelen = arg2; u_int indx; int i; struct sysctl_oid *oid; struct sysctl_oid_list *lsp = &sysctl__children; int error; - funnel_t *fnl = NULL; - boolean_t funnel_held = FALSE; + boolean_t unlocked_node_found = FALSE; + boolean_t namestring_started = FALSE; /* Get the read lock on the geometry */ lck_rw_lock_shared(sysctl_geometry_lock); + if (string_is_canonical) { + /* namestring is actually canonical, name/namelen needs to be populated */ + error = name2oid(namestring, name, &namelen); + if (error) { + goto err; + } + } + oid = SLIST_FIRST(lsp); indx = 0; while (oid && indx < CTL_MAXNAME) { if (oid->oid_number == name[indx]) { + + if (!from_kernel && !string_is_canonical) { + if (namestring_started) { + if (strlcat(namestring, ".", namestringlen) >= namestringlen) { + error = ENAMETOOLONG; + goto err; + } + } + + if (strlcat(namestring, oid->oid_name, namestringlen) >= namestringlen) { + error = ENAMETOOLONG; + goto err; + } + namestring_started = TRUE; + } + indx++; if (!(oid->oid_kind & CTLFLAG_LOCKED)) { -/* -printf("sysctl_root: missing CTLFLAG_LOCKED: "); -for(i = 0; i < (int)(indx - 1); i++) -printf("oid[%d] = %d ", i, name[i]); -printf("\n"); -*/ - funnel_held = TRUE; + unlocked_node_found = TRUE; } if (oid->oid_kind & CTLFLAG_NOLOCK) req->lock = 0; @@ -1439,6 +1402,33 @@ printf("\n"); error = ENOENT; goto err; found: + + /* + * indx is the index of the first remaining OID name, + * for sysctls that take them as arguments + */ + if (!from_kernel && !string_is_canonical && (indx < namelen)) { + char tempbuf[10]; + u_int indx2; + + for (indx2 = indx; indx2 < namelen; indx2++) { + snprintf(tempbuf, sizeof(tempbuf), "%d",name[indx2]); + + if (namestring_started) { + if (strlcat(namestring, ".", namestringlen) >= namestringlen) { + error = ENAMETOOLONG; + goto err; + } + } + + if (strlcat(namestring, tempbuf, namestringlen) >= namestringlen) { + error = ENAMETOOLONG; + goto err; + } + namestring_started = TRUE; + } + } + /* If writing isn't allowed */ if (req->newptr && (!(oid->oid_kind & CTLFLAG_WR) || ((oid->oid_kind & CTLFLAG_SECURE) && securelevel > 0))) { @@ -1448,9 +1438,8 @@ found: /* * If we're inside the kernel, the OID must be marked as kernel-valid. - * XXX This mechanism for testing is bad. */ - if ((req->oldfunc == sysctl_old_kernel) && !(oid->oid_kind & CTLFLAG_KERN)) + if (from_kernel && !(oid->oid_kind & CTLFLAG_KERN)) { error = (EPERM); goto err; @@ -1484,14 +1473,28 @@ found: lck_rw_done(sysctl_geometry_lock); +#if CONFIG_MACF + if (!from_kernel) { + error = mac_system_check_sysctlbyname(kauth_cred_get(), + namestring, + name, + namelen, + req->oldptr, + req->oldlen, + req->newptr, + req->newlen); + if (error) + goto dropref; + } +#endif + /* - * ...however, we still have to grab the funnel for those calls which - * may be into code whose reentrancy is protected by the funnel; a - * blocking operation should not prevent reentrancy, at this point. + * ...however, we still have to grab the mutex for those calls which + * may be into code whose reentrancy is protected by it. */ - if (funnel_held) + if (unlocked_node_found) { - fnl = spl_kernel_funnel(); + lck_mtx_lock(sysctl_unlocked_node_lock); } if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { @@ -1505,11 +1508,16 @@ found: } error = i; - if (funnel_held) + if (unlocked_node_found) { - splx_kernel_funnel(fnl); + lck_mtx_unlock(sysctl_unlocked_node_lock); } +#if CONFIG_MACF + /* only used from another CONFIG_MACF block */ +dropref: +#endif + /* * This is tricky... we re-grab the geometry lock in order to drop * the reference and wake on the address; since the geometry @@ -1533,41 +1541,155 @@ err: return (error); } -#ifndef _SYS_SYSPROTO_H_ -struct sysctl_args { - int *name; - u_int namelen; - void *old; - size_t *oldlenp; - void *new; - size_t newlen; -}; -#endif +void sysctl_create_user_req(struct sysctl_req *req, struct proc *p, user_addr_t oldp, + size_t oldlen, user_addr_t newp, size_t newlen) +{ + bzero(req, sizeof(*req)); + + req->p = p; + + req->oldlen = oldlen; + req->oldptr = oldp; + + if (newlen) { + req->newlen = newlen; + req->newptr = newp; + } + + req->oldfunc = sysctl_old_user; + req->newfunc = sysctl_new_user; + req->lock = 1; + + return; +} int -/* __sysctl(struct proc *p, struct sysctl_args *uap) */ -new_sysctl(struct proc *p, struct sysctl_args *uap) +sysctl(proc_t p, struct sysctl_args *uap, __unused int32_t *retval) { - int error, i, name[CTL_MAXNAME]; - size_t j; - + int error; + size_t oldlen = 0, newlen; + int name[CTL_MAXNAME]; + struct sysctl_req req; + char *namestring; + size_t namestringlen = MAXPATHLEN; + + /* + * all top-level sysctl names are non-terminal + */ if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); + error = copyin(uap->name, &name[0], uap->namelen * sizeof(int)); + if (error) + return (error); + + AUDIT_ARG(ctlname, name, uap->namelen); + + if (uap->newlen > SIZE_T_MAX) + return (EINVAL); + newlen = (size_t)uap->newlen; + + if (uap->oldlenp != USER_ADDR_NULL) { + uint64_t oldlen64 = fuulong(uap->oldlenp); + + /* + * If more than 4G, clamp to 4G + */ + if (oldlen64 > SIZE_T_MAX) + oldlen = SIZE_T_MAX; + else + oldlen = (size_t)oldlen64; + } + + sysctl_create_user_req(&req, p, uap->old, oldlen, uap->new, newlen); + + /* Guess that longest length for the passed-in MIB, if we can be more aggressive than MAXPATHLEN */ + if (uap->namelen == 2) { + if (name[0] == CTL_KERN && name[1] < KERN_MAXID) { + namestringlen = 32; /* "kern.speculative_reads_disabled" */ + } else if (name[0] == CTL_HW && name[1] < HW_MAXID) { + namestringlen = 32; /* "hw.cachelinesize_compat" */ + } + } + + MALLOC(namestring, char *, namestringlen, M_TEMP, M_WAITOK); + if (!namestring) { + oldlen = 0; + goto err; + } - error = copyin(CAST_USER_ADDR_T(uap->name), &name, uap->namelen * sizeof(int)); - if (error) + error = userland_sysctl(FALSE, namestring, namestringlen, name, uap->namelen, &req, &oldlen); + + FREE(namestring, M_TEMP); + + if ((error) && (error != ENOMEM)) return (error); + +err: + if (uap->oldlenp != USER_ADDR_NULL) + error = suulong(uap->oldlenp, oldlen); + + return (error); +} - error = userland_sysctl(p, name, uap->namelen, - CAST_USER_ADDR_T(uap->old), uap->oldlenp, - CAST_USER_ADDR_T(uap->new), uap->newlen, &j); - if (error && error != ENOMEM) +int +sysctlbyname(proc_t p, struct sysctlbyname_args *uap, __unused int32_t *retval) +{ + int error; + size_t oldlen = 0, newlen; + char *name; + size_t namelen = 0; + struct sysctl_req req; + int oid[CTL_MAXNAME]; + + if (uap->namelen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ + return (ENAMETOOLONG); + namelen = (size_t)uap->namelen; + + MALLOC(name, char *, namelen+1, M_TEMP, M_WAITOK); + if (!name) + return ENOMEM; + + error = copyin(uap->name, name, namelen); + if (error) { + FREE(name, M_TEMP); return (error); - if (uap->oldlenp) { - i = copyout(&j, CAST_USER_ADDR_T(uap->oldlenp), sizeof(j)); - if (i) - return (i); } + name[namelen] = '\0'; + + /* XXX + * AUDIT_ARG(ctlname, name, uap->namelen); + */ + + if (uap->newlen > SIZE_T_MAX) { + FREE(name, M_TEMP); + return (EINVAL); + } + newlen = (size_t)uap->newlen; + + if (uap->oldlenp != USER_ADDR_NULL) { + uint64_t oldlen64 = fuulong(uap->oldlenp); + + /* + * If more than 4G, clamp to 4G + */ + if (oldlen64 > SIZE_T_MAX) + oldlen = SIZE_T_MAX; + else + oldlen = (size_t)oldlen64; + } + + sysctl_create_user_req(&req, p, uap->old, oldlen, uap->new, newlen); + + error = userland_sysctl(TRUE, name, namelen+1, oid, CTL_MAXNAME, &req, &oldlen); + + FREE(name, M_TEMP); + + if ((error) && (error != ENOMEM)) + return (error); + + if (uap->oldlenp != USER_ADDR_NULL) + error = suulong(uap->oldlenp, oldlen); + return (error); } @@ -1576,56 +1698,35 @@ new_sysctl(struct proc *p, struct sysctl_args *uap) * must be in kernel space. */ int -userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t oldp, - size_t *oldlenp, user_addr_t newp, size_t newlen, +userland_sysctl(boolean_t string_is_canonical, + char *namestring, size_t namestringlen, + int *name, u_int namelen, struct sysctl_req *req, size_t *retval) { int error = 0; - struct sysctl_req req, req2; - - bzero(&req, sizeof req); - - req.p = p; - - if (oldlenp) { - req.oldlen = *oldlenp; - } - - if (oldp) { - req.oldptr = oldp; - } - - if (newlen) { - req.newlen = newlen; - req.newptr = newp; - } - - req.oldfunc = sysctl_old_user; - req.newfunc = sysctl_new_user; - req.lock = 1; + struct sysctl_req req2; do { - req2 = req; - error = sysctl_root(0, name, namelen, &req2); - } while (error == EAGAIN); + /* if EAGAIN, reset output cursor */ + req2 = *req; + if (!string_is_canonical) + namestring[0] = '\0'; - req = req2; + error = sysctl_root(FALSE, string_is_canonical, namestring, namestringlen, name, namelen, &req2); + } while (error == EAGAIN); if (error && error != ENOMEM) return (error); if (retval) { - if (req.oldptr && req.oldidx > req.oldlen) - *retval = req.oldlen; + if (req2.oldptr && req2.oldidx > req2.oldlen) + *retval = req2.oldlen; else - *retval = req.oldidx; + *retval = req2.oldidx; } return (error); } -/* Non-standard BSDI extension - only present on their 4.3 net-2 releases */ -#define KINFO_BSDI_SYSINFO (101<<8) - /* * Kernel versions of the userland sysctl helper functions. * @@ -1634,54 +1735,29 @@ userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t oldp, * * Note that some sysctl handlers use copyin/copyout, which * may not work correctly. + * + * The "sysctlbyname" KPI for use by kexts is aliased to this function. */ -STATIC int -sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) -{ - - return(kernel_sysctl(current_proc(), name, namelen, oldp, oldlenp, newp, newlen)); -} - -STATIC int -sysctlnametomib(const char *name, int *mibp, size_t *sizep) -{ - int oid[2]; - int error; - char *non_const_name; - - /* - * NOTE: This cast is safe because the service node does not modify - * the contents of the string as part of its operation. - */ - non_const_name = __CAST_AWAY_QUALIFIER(name, const, char *); - - /* magic service node */ - oid[0] = 0; - oid[1] = 3; - - /* look up OID for name */ - *sizep *= sizeof(int); - error = sysctl(oid, 2, mibp, sizep, non_const_name, strlen(name)); - *sizep /= sizeof(int); - - return(error); -} - int -sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) +kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - int oid[CTL_MAXNAME + 2]; + int oid[CTL_MAXNAME]; + int name2mib_oid[2]; int error; size_t oidlen; - /* look up the OID */ - oidlen = CTL_MAXNAME; - error = sysctlnametomib(name, oid, &oidlen); + /* look up the OID with magic service node */ + name2mib_oid[0] = 0; + name2mib_oid[1] = 3; + oidlen = sizeof(oid); + error = kernel_sysctl(current_proc(), name2mib_oid, 2, oid, &oidlen, (void *)name, strlen(name)); + oidlen /= sizeof(int); + /* now use the OID */ if (error == 0) - error = sysctl(oid, oidlen, oldp, oldlenp, newp, newlen); + error = kernel_sysctl(current_proc(), oid, oidlen, oldp, oldlenp, newp, newlen); return(error); } diff --git a/bsd/kern/kern_overrides.c b/bsd/kern/kern_overrides.c index e2055aae6..7b2a4622c 100644 --- a/bsd/kern/kern_overrides.c +++ b/bsd/kern/kern_overrides.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include @@ -67,6 +67,12 @@ static int64_t cpu_throttle_assert_cnt; /* Wait Channel for system override */ static uint64_t sys_override_wait; +/* Global variable to indicate if system_override is enabled */ +int sys_override_enabled; + +/* Sysctl definition for sys_override_enabled */ +SYSCTL_INT(_debug, OID_AUTO, sys_override_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &sys_override_enabled, 0, ""); + /* Forward Declarations */ static void enable_system_override(uint64_t flags); static void disable_system_override(uint64_t flags); @@ -85,6 +91,7 @@ init_system_override() sys_override_mtx_attr = lck_attr_alloc_init(); lck_mtx_init(&sys_override_lock, sys_override_mtx_grp, sys_override_mtx_attr); io_throttle_assert_cnt = cpu_throttle_assert_cnt = 0; + sys_override_enabled = 1; } /* system call implementation */ @@ -100,12 +107,30 @@ system_override(__unused struct proc *p, struct system_override_args * uap, __un goto out; } - /* Check to see if some flags are specified. Zero flags are invalid. */ - if ((flags == 0) || ((flags & ~SYS_OVERRIDE_FLAGS_MASK) != 0)) { + /* Check to see if some flags are specified. */ + if ((flags & ~SYS_OVERRIDE_FLAGS_MASK) != 0) { error = EINVAL; goto out; } + if (flags == SYS_OVERRIDE_DISABLE) { + + printf("Process %s [%d] disabling system_override()\n", current_proc()->p_comm, current_proc()->p_pid); + + lck_mtx_lock(&sys_override_lock); + + if (io_throttle_assert_cnt > 0) + sys_override_io_throttle(THROTTLE_IO_ENABLE); + if (cpu_throttle_assert_cnt > 0) + sys_override_cpu_throttle(CPU_THROTTLE_ENABLE); + + sys_override_enabled = 0; + + lck_mtx_unlock(&sys_override_lock); + + goto out; + } + lck_mtx_lock(&sys_override_lock); enable_system_override(flags); @@ -129,22 +154,22 @@ enable_system_override(uint64_t flags) { if (flags & SYS_OVERRIDE_IO_THROTTLE) { - if (io_throttle_assert_cnt == 0) { + if ((io_throttle_assert_cnt == 0) && sys_override_enabled) { /* Disable I/O Throttling */ printf("Process %s [%d] disabling system-wide I/O Throttling\n", current_proc()->p_comm, current_proc()->p_pid); - KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0); sys_override_io_throttle(THROTTLE_IO_DISABLE); } + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0); io_throttle_assert_cnt++; } if (flags & SYS_OVERRIDE_CPU_THROTTLE) { - if (cpu_throttle_assert_cnt == 0) { + if ((cpu_throttle_assert_cnt == 0) && sys_override_enabled) { /* Disable CPU Throttling */ printf("Process %s [%d] disabling system-wide CPU Throttling\n", current_proc()->p_comm, current_proc()->p_pid); - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0); sys_override_cpu_throttle(CPU_THROTTLE_DISABLE); } + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0); cpu_throttle_assert_cnt++; } @@ -161,9 +186,9 @@ disable_system_override(uint64_t flags) if (flags & SYS_OVERRIDE_IO_THROTTLE) { assert(io_throttle_assert_cnt > 0); io_throttle_assert_cnt--; - if (io_throttle_assert_cnt == 0) { + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0); + if ((io_throttle_assert_cnt == 0) && sys_override_enabled) { /* Enable I/O Throttling */ - KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0); sys_override_io_throttle(THROTTLE_IO_ENABLE); } } @@ -171,9 +196,9 @@ disable_system_override(uint64_t flags) if (flags & SYS_OVERRIDE_CPU_THROTTLE) { assert(cpu_throttle_assert_cnt > 0); cpu_throttle_assert_cnt--; - if (cpu_throttle_assert_cnt == 0) { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0); + if ((cpu_throttle_assert_cnt == 0) && sys_override_enabled) { /* Enable CPU Throttling */ - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0); sys_override_cpu_throttle(CPU_THROTTLE_ENABLE); } } diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c index 5125e49b6..ce7a65a8e 100644 --- a/bsd/kern/kern_proc.c +++ b/bsd/kern/kern_proc.c @@ -99,11 +99,17 @@ #include #include #include +#include +#include #include #include #include /* vm_map_switch_protect() */ +#include #include #include +#include +#include +#include #if CONFIG_MEMORYSTATUS #include @@ -172,6 +178,8 @@ __XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN+1] = {"/cores/core.%P"}; static void orphanpg(struct pgrp *pg); void proc_name_kdp(task_t t, char * buf, int size); +int proc_threadname_kdp(void *uth, char *buf, size_t size); +void proc_starttime_kdp(void *p, uint64_t *tv_sec, uint64_t *tv_usec); char *proc_name_address(void *p); static void pgrp_add(struct pgrp * pgrp, proc_t parent, proc_t child); @@ -712,6 +720,44 @@ proc_name_kdp(task_t t, char * buf, int size) strlcpy(buf, &p->p_comm[0], size); } + +int +proc_threadname_kdp(void *uth, char *buf, size_t size) +{ + if (size < MAXTHREADNAMESIZE) { + /* this is really just a protective measure for the future in + * case the thread name size in stackshot gets out of sync with + * the BSD max thread name size. Note that bsd_getthreadname + * doesn't take input buffer size into account. */ + return -1; + } + + if (uth != NULL) { + bsd_getthreadname(uth, buf); + } + return 0; +} + +/* note that this function is generally going to be called from stackshot, + * and the arguments will be coming from a struct which is declared packed + * thus the input arguments will in general be unaligned. We have to handle + * that here. */ +void +proc_starttime_kdp(void *p, uint64_t *tv_sec, uint64_t *tv_usec) +{ + proc_t pp = (proc_t)p; + struct uint64p { + uint64_t val; + } __attribute__((packed)); + + if (pp != PROC_NULL) { + if (tv_sec != NULL) + ((struct uint64p *)tv_sec)->val = pp->p_start.tv_sec; + if (tv_usec != NULL) + ((struct uint64p *)tv_usec)->val = pp->p_start.tv_usec; + } +} + char * proc_name_address(void *p) { @@ -785,17 +831,6 @@ proc_forcequota(proc_t p) } -int -proc_tbe(proc_t p) -{ - int retval = 0; - - if (p) - retval = p->p_flag & P_TBE; - return(retval? 1: 0); - -} - int proc_suser(proc_t p) { @@ -872,6 +907,16 @@ proc_puniqueid(proc_t p) return(p->p_puniqueid); } +uint64_t +proc_coalitionid(__unused proc_t p) +{ +#if CONFIG_COALITIONS + return(task_coalition_id(p->task)); +#else + return 0; +#endif +} + uint64_t proc_was_throttled(proc_t p) { @@ -898,6 +943,21 @@ proc_getexecutableuuid(proc_t p, unsigned char *uuidbuf, unsigned long size) } } +/* Return vnode for executable with an iocount. Must be released with vnode_put() */ +vnode_t +proc_getexecutablevnode(proc_t p) +{ + vnode_t tvp = p->p_textvp; + + if ( tvp != NULLVP) { + if (vnode_getwithref(tvp) == 0) { + return tvp; + } + } + + return NULLVP; +} + void bsd_set_dependency_capable(task_t task) @@ -1866,6 +1926,7 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user case CS_OPS_CDHASH: case CS_OPS_PIDOFFSET: case CS_OPS_ENTITLEMENTS_BLOB: + case CS_OPS_IDENTITY: case CS_OPS_BLOB: break; /* unrestricted */ default: @@ -2018,6 +2079,7 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user CS_HARD | CS_EXEC_SET_HARD | CS_KILL | CS_EXEC_SET_KILL | CS_RESTRICT | + CS_REQUIRE_LV | CS_ENFORCEMENT | CS_EXEC_SET_ENFORCEMENT; proc_lock(pt); @@ -2172,13 +2234,11 @@ proc_iterate(flags, callout, arg, filterfn, filterarg) switch (retval) { case PROC_RETURNED: + proc_rele(p); + break; case PROC_RETURNED_DONE: proc_rele(p); - if (retval == PROC_RETURNED_DONE) { - goto out; - } - break; - + goto out; case PROC_CLAIMED_DONE: goto out; case PROC_CLAIMED: @@ -2192,13 +2252,11 @@ proc_iterate(flags, callout, arg, filterfn, filterarg) switch (retval) { case PROC_RETURNED: + proc_drop_zombref(p); + break; case PROC_RETURNED_DONE: proc_drop_zombref(p); - if (retval == PROC_RETURNED_DONE) { - goto out; - } - break; - + goto out; case PROC_CLAIMED_DONE: goto out; case PROC_CLAIMED: @@ -2750,12 +2808,12 @@ session_rele(struct session *sess) } int -proc_transstart(proc_t p, int locked) +proc_transstart(proc_t p, int locked, int non_blocking) { if (locked == 0) proc_lock(p); while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) { - if ((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT) { + if (((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT) || non_blocking) { if (locked == 0) proc_unlock(p); return EDEADLK; @@ -2906,7 +2964,7 @@ proc_getpcontrol(int pid, int * pcontrolp) } int -proc_dopcontrol(proc_t p, void *num_found) +proc_dopcontrol(proc_t p) { int pcontrol; @@ -2914,13 +2972,12 @@ proc_dopcontrol(proc_t p, void *num_found) pcontrol = PROC_CONTROL_STATE(p); - if (PROC_ACTION_STATE(p) ==0) { + if (PROC_ACTION_STATE(p) == 0) { switch(pcontrol) { case P_PCTHROTTLE: PROC_SETACTION_STATE(p); proc_unlock(p); printf("low swap: throttling pid %d (%s)\n", p->p_pid, p->p_comm); - (*(int *)num_found)++; break; case P_PCSUSP: @@ -2928,7 +2985,6 @@ proc_dopcontrol(proc_t p, void *num_found) proc_unlock(p); printf("low swap: suspending pid %d (%s)\n", p->p_pid, p->p_comm); task_suspend(p->task); - (*(int *)num_found)++; break; case P_PCKILL: @@ -2936,7 +2992,6 @@ proc_dopcontrol(proc_t p, void *num_found) proc_unlock(p); printf("low swap: killing pid %d (%s)\n", p->p_pid, p->p_comm); psignal(p, SIGKILL); - (*(int *)num_found)++; break; default: @@ -3010,97 +3065,246 @@ proc_resetpcontrol(int pid) } -/* - * Return true if the specified process has an action state specified for it and it isn't - * already in an action state and it's using more physical memory than the specified threshold. - * Note: the memory_threshold argument is specified in bytes and is of type uint64_t. - */ + +struct no_paging_space +{ + uint64_t pcs_max_size; + uint64_t pcs_uniqueid; + int pcs_pid; + int pcs_proc_count; + uint64_t pcs_total_size; + + uint64_t npcs_max_size; + uint64_t npcs_uniqueid; + int npcs_pid; + int npcs_proc_count; + uint64_t npcs_total_size; + + int apcs_proc_count; + uint64_t apcs_total_size; +}; + static int -proc_pcontrol_filter(proc_t p, void *memory_thresholdp) +proc_pcontrol_filter(proc_t p, void *arg) { - - return PROC_CONTROL_STATE(p) && /* if there's an action state specified... */ - (PROC_ACTION_STATE(p) == 0) && /* and we're not in the action state yet... */ - (get_task_resident_size(p->task) > *((uint64_t *)memory_thresholdp)); /* and this proc is over the mem threshold, */ - /* then return true to take action on this proc */ + struct no_paging_space *nps; + uint64_t compressed; + + nps = (struct no_paging_space *)arg; + + compressed = get_task_compressed(p->task); + + if (PROC_CONTROL_STATE(p)) { + if (PROC_ACTION_STATE(p) == 0) { + if (compressed > nps->pcs_max_size) { + nps->pcs_pid = p->p_pid; + nps->pcs_uniqueid = p->p_uniqueid; + nps->pcs_max_size = compressed; + } + nps->pcs_total_size += compressed; + nps->pcs_proc_count++; + } else { + nps->apcs_total_size += compressed; + nps->apcs_proc_count++; + } + } else { + if (compressed > nps->npcs_max_size) { + nps->npcs_pid = p->p_pid; + nps->npcs_uniqueid = p->p_uniqueid; + nps->npcs_max_size = compressed; + } + nps->npcs_total_size += compressed; + nps->npcs_proc_count++; + + } + return (0); } +static int +proc_pcontrol_null(__unused proc_t p, __unused void *arg) +{ + return(PROC_RETURNED); +} + /* - * Deal with the out of swap space condition. This routine gets called when - * we want to swap something out but there's no more space left. Since this - * creates a memory deadlock situtation, we need to take action to free up - * some memory resources in order to prevent the system from hanging completely. - * The action we take is based on what the system processes running at user level - * have specified. Processes are marked in one of four categories: ones that - * can be killed immediately, ones that should be suspended, ones that should - * be throttled, and all the rest which are basically none of the above. Which - * processes are marked as being in which category is a user level policy decision; - * we just take action based on those decisions here. + * Deal with the low on compressor pool space condition... this function + * gets called when we are approaching the limits of the compressor pool or + * we are unable to create a new swap file. + * Since this eventually creates a memory deadlock situtation, we need to take action to free up + * memory resources (both compressed and uncompressed) in order to prevent the system from hanging completely. + * There are 2 categories of processes to deal with. Those that have an action + * associated with them by the task itself and those that do not. Actionable + * tasks can have one of three categories specified: ones that + * can be killed immediately, ones that should be suspended, and ones that should + * be throttled. Processes that do not have an action associated with them are normally + * ignored unless they are utilizing such a large percentage of the compressor pool (currently 50%) + * that only by killing them can we hope to put the system back into a usable state. */ -#define STARTING_PERCENTAGE 50 /* memory threshold expressed as a percentage */ - /* of physical memory */ +#define NO_PAGING_SPACE_DEBUG 0 + +extern uint64_t vm_compressor_pages_compressed(void); struct timeval last_no_space_action = {0, 0}; -void -no_paging_space_action(void) +int +no_paging_space_action() { - - uint64_t memory_threshold; - int num_found; + proc_t p; + struct no_paging_space nps; struct timeval now; /* - * Throttle how often we come through here. Once every 20 seconds should be plenty. + * Throttle how often we come through here. Once every 5 seconds should be plenty. */ - microtime(&now); - if (now.tv_sec <= last_no_space_action.tv_sec + 20) - return; - - last_no_space_action = now; + if (now.tv_sec <= last_no_space_action.tv_sec + 5) + return (0); /* - * Examine all processes and find those that have been marked to have some action - * taken when swap space runs out. Of those processes, select one or more and - * apply the specified action to them. The idea is to only take action against - * a few processes rather than hitting too many at once. If the low swap condition - * persists, this routine will get called again and we'll take action against more - * processes. + * Examine all processes and find the biggest (biggest is based on the number of pages this + * task has in the compressor pool) that has been marked to have some action + * taken when swap space runs out... we also find the biggest that hasn't been marked for + * action. * - * Of the processes that have been marked, we choose which ones to take action - * against according to how much physical memory they're presently using. We - * start with the STARTING_THRESHOLD and any processes using more physical memory - * than the percentage threshold will have action taken against it. If there - * are no processes over the threshold, then the threshold is cut in half and we - * look again for processes using more than this threshold. We continue in - * this fashion until we find at least one process to take action against. This - * iterative approach is less than ideally efficient, however we only get here - * when the system is almost in a memory deadlock and is pretty much just - * thrashing if it's doing anything at all. Therefore, the cpu overhead of - * potentially multiple passes here probably isn't revelant. + * If the biggest non-actionable task is over the "dangerously big" threashold (currently 50% of + * the total number of pages held by the compressor, we go ahead and kill it since no other task + * can have any real effect on the situation. Otherwise, we go after the actionable process. */ + bzero(&nps, sizeof(nps)); - memory_threshold = (sane_size * STARTING_PERCENTAGE) / 100; /* resident threshold in bytes */ - - for (num_found = 0; num_found == 0; memory_threshold = memory_threshold / 2) { - proc_iterate(PROC_ALLPROCLIST, proc_dopcontrol, (void *)&num_found, proc_pcontrol_filter, (void *)&memory_threshold); + proc_iterate(PROC_ALLPROCLIST, proc_pcontrol_null, (void *)NULL, proc_pcontrol_filter, (void *)&nps); +#if NO_PAGING_SPACE_DEBUG + printf("low swap: npcs_proc_count = %d, npcs_total_size = %qd, npcs_max_size = %qd\n", + nps.npcs_proc_count, nps.npcs_total_size, nps.npcs_max_size); + printf("low swap: pcs_proc_count = %d, pcs_total_size = %qd, pcs_max_size = %qd\n", + nps.pcs_proc_count, nps.pcs_total_size, nps.pcs_max_size); + printf("low swap: apcs_proc_count = %d, apcs_total_size = %qd\n", + nps.apcs_proc_count, nps.apcs_total_size); +#endif + if (nps.npcs_max_size > (vm_compressor_pages_compressed() * 50) / 100) { /* - * If we just looked with memory_threshold == 0, then there's no need to iterate any further since - * we won't find any eligible processes at this point. + * for now we'll knock out any task that has more then 50% of the pages + * held by the compressor */ + if ((p = proc_find(nps.npcs_pid)) != PROC_NULL) { + + if (nps.npcs_uniqueid == p->p_uniqueid) { + /* + * verify this is still the same process + * in case the proc exited and the pid got reused while + * we were finishing the proc_iterate and getting to this point + */ + last_no_space_action = now; + + printf("low swap: killing pid %d (%s)\n", p->p_pid, p->p_comm); + psignal(p, SIGKILL); + + proc_rele(p); - if (memory_threshold == 0) { - if (num_found == 0) /* log that we couldn't do anything in this case */ - printf("low swap: unable to find any eligible processes to take action on\n"); + return (0); + } + + proc_rele(p); + } + } - break; + if (nps.pcs_max_size > 0) { + if ((p = proc_find(nps.pcs_pid)) != PROC_NULL) { + + if (nps.pcs_uniqueid == p->p_uniqueid) { + /* + * verify this is still the same process + * in case the proc exited and the pid got reused while + * we were finishing the proc_iterate and getting to this point + */ + last_no_space_action = now; + + proc_dopcontrol(p); + + proc_rele(p); + + return (1); + } + + proc_rele(p); } } + last_no_space_action = now; + + printf("low swap: unable to find any eligible processes to take action on\n"); + + return (0); +} + +int +proc_trace_log(__unused proc_t p, struct proc_trace_log_args *uap, __unused int *retval) +{ + int ret = 0; + proc_t target_proc = PROC_NULL; + pid_t target_pid = uap->pid; + uint64_t target_uniqueid = uap->uniqueid; + task_t target_task = NULL; + + if (priv_check_cred(kauth_cred_get(), PRIV_PROC_TRACE_INSPECT, 0)) { + ret = EPERM; + goto out; + } + target_proc = proc_find(target_pid); + if (target_proc != PROC_NULL) { + if (target_uniqueid != proc_uniqueid(target_proc)) { + ret = ENOENT; + goto out; + } + + target_task = proc_task(target_proc); + if (task_send_trace_memory(target_task, target_pid, target_uniqueid)) { + ret = EINVAL; + goto out; + } + } else + ret = ENOENT; + +out: + if (target_proc != PROC_NULL) + proc_rele(target_proc); + return (ret); +} + +#if VM_SCAN_FOR_SHADOW_CHAIN +extern int vm_map_shadow_max(vm_map_t map); +int proc_shadow_max(void); +int proc_shadow_max(void) +{ + int retval, max; + proc_t p; + task_t task; + vm_map_t map; + + max = 0; + proc_list_lock(); + for (p = allproc.lh_first; (p != 0); p = p->p_list.le_next) { + if (p->p_stat == SIDL) + continue; + task = p->task; + if (task == NULL) { + continue; + } + map = get_task_map(task); + if (map == NULL) { + continue; + } + retval = vm_map_shadow_max(map); + if (retval > max) { + max = retval; + } + } + proc_list_unlock(); + return max; } +#endif /* VM_SCAN_FOR_SHADOW_CHAIN */ diff --git a/bsd/kern/kern_prot.c b/bsd/kern/kern_prot.c index 73806d055..1301dbeea 100644 --- a/bsd/kern/kern_prot.c +++ b/bsd/kern/kern_prot.c @@ -104,9 +104,6 @@ #if CONFIG_MACF #include -#if CONFIG_MACF_MACH -#include -#endif #endif #include @@ -138,6 +135,9 @@ extern void kauth_cred_print(kauth_cred_t cred); #define DEBUG_CRED_CHANGE(fmt, ...) do {} while (0) #endif /* !DEBUG_CRED */ +#if DEVELOPMENT || DEBUG +extern void task_importance_update_owner_info(task_t); +#endif /* @@ -1997,10 +1997,6 @@ set_security_token(proc_t p) audit_token.val[6] = my_cred->cr_audit.as_aia_p->ai_asid; audit_token.val[7] = p->p_idversion; -#if CONFIG_MACF_MACH - mac_task_label_update_cred(my_cred, p->task); -#endif - host_priv = (sec_token.val[0]) ? HOST_PRIV_NULL : host_priv_self(); #if CONFIG_MACF if (host_priv != HOST_PRIV_NULL && mac_system_check_host_priv(my_cred)) @@ -2008,6 +2004,13 @@ set_security_token(proc_t p) #endif kauth_cred_unref(&my_cred); +#if DEVELOPMENT || DEBUG + /* + * Update the pid an proc name for importance base if any + */ + task_importance_update_owner_info(p->task); +#endif + return (host_security_set_task_token(host_security_self(), p->task, sec_token, diff --git a/bsd/kern/kern_resource.c b/bsd/kern/kern_resource.c index ce669fc02..2900cd52b 100644 --- a/bsd/kern/kern_resource.c +++ b/bsd/kern/kern_resource.c @@ -97,7 +97,6 @@ #include #include #include /* for thread_policy_set( ) */ -#include #include #include @@ -113,22 +112,27 @@ int donice(struct proc *curp, struct proc *chgp, int n); int dosetrlimit(struct proc *p, u_int which, struct rlimit *limp); int uthread_get_background_state(uthread_t); -static void do_background_socket(struct proc *p, thread_t thread, int priority); +static void do_background_socket(struct proc *p, thread_t thread); static int do_background_thread(struct proc *curp, thread_t thread, int priority); static int do_background_proc(struct proc *curp, struct proc *targetp, int priority); +static int set_gpudeny_proc(struct proc *curp, struct proc *targetp, int priority); +static int proc_set_darwin_role(proc_t curp, proc_t targetp, int priority); +static int proc_get_darwin_role(proc_t curp, proc_t targetp, int *priority); static int get_background_proc(struct proc *curp, struct proc *targetp, int *priority); void proc_apply_task_networkbg_internal(proc_t, thread_t); void proc_restore_task_networkbg_internal(proc_t, thread_t); int proc_pid_rusage(int pid, int flavor, user_addr_t buf, int32_t *retval); -void gather_rusage_info_v2(proc_t p, struct rusage_info_v2 *ru, int flavor); -int fill_task_rusage_v2(task_t task, struct rusage_info_v2 *ri); -static void rusage_info_v2_to_v0(struct rusage_info_v0 *ri_v0, struct rusage_info_v2 *ri_v2); -static void rusage_info_v2_to_v1(struct rusage_info_v1 *ri_v1, struct rusage_info_v2 *ri_v2); +void gather_rusage_info(proc_t p, rusage_info_current *ru, int flavor); +int fill_task_rusage(task_t task, rusage_info_current *ri); +void fill_task_billed_usage(task_t task, rusage_info_current *ri); +int fill_task_io_rusage(task_t task, rusage_info_current *ri); +int fill_task_qos_rusage(task_t task, rusage_info_current *ri); +static void rusage_info_conversion(rusage_info_t ri_info, rusage_info_current *ri_current, int flavor); int proc_get_rusage(proc_t p, int flavor, user_addr_t buffer, __unused int is_zombie); rlim_t maxdmap = MAXDSIZ; /* XXX */ -rlim_t maxsmap = MAXSSIZ - PAGE_SIZE; /* XXX */ +rlim_t maxsmap = MAXSSIZ - PAGE_MAX_SIZE; /* XXX */ /* * Limits on the number of open files per process, and the number @@ -261,6 +265,24 @@ getpriority(struct proc *curp, struct getpriority_args *uap, int32_t *retval) return (error); break; + case PRIO_DARWIN_ROLE: + if (uap->who == 0) { + p = curp; + } else { + p = proc_find(uap->who); + if (p == PROC_NULL) + break; + refheld = 1; + } + + error = proc_get_darwin_role(curp, p, &low); + + if (refheld) + proc_rele(p); + if (error) + return (error); + break; + default: return (EINVAL); } @@ -321,7 +343,7 @@ ppgrp_donice_callback(proc_t p, void * arg) */ /* ARGSUSED */ int -setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *retval) +setpriority(struct proc *curp, struct setpriority_args *uap, int32_t *retval) { struct proc *p; int found = 0, error = 0; @@ -417,11 +439,48 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *r break; } + case PRIO_DARWIN_GPU: { + if (uap->who == 0) + return (EINVAL); + + p = proc_find(uap->who); + if (p == PROC_NULL) + break; + + error = set_gpudeny_proc(curp, p, uap->prio); + + found++; + proc_rele(p); + break; + } + + case PRIO_DARWIN_ROLE: { + if (uap->who == 0) { + p = curp; + } else { + p = proc_find(uap->who); + if (p == PROC_NULL) + break; + refheld = 1; + } + + error = proc_set_darwin_role(curp, p, uap->prio); + + found++; + if (refheld != 0) + proc_rele(p); + break; + } + default: return (EINVAL); } if (found == 0) return (ESRCH); + if (error == EIDRM) { + *retval = -2; + error = 0; + } return (error); } @@ -471,6 +530,163 @@ out: return (error); } +static int +set_gpudeny_proc(struct proc *curp, struct proc *targetp, int priority) +{ + int error = 0; + kauth_cred_t ucred; + kauth_cred_t target_cred; + + ucred = kauth_cred_get(); + target_cred = kauth_cred_proc_ref(targetp); + + /* TODO: Entitlement instead of uid check */ + + if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) && + kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) && + kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred)) { + error = EPERM; + goto out; + } + + if (curp == targetp) { + error = EPERM; + goto out; + } + +#if CONFIG_MACF + error = mac_proc_check_sched(curp, targetp); + if (error) + goto out; +#endif + + switch (priority) { + case PRIO_DARWIN_GPU_DENY: + task_set_gpu_denied(proc_task(targetp), TRUE); + break; + case PRIO_DARWIN_GPU_ALLOW: + task_set_gpu_denied(proc_task(targetp), FALSE); + break; + default: + error = EINVAL; + goto out; + } + +out: + kauth_cred_unref(&target_cred); + return (error); + +} + +static int +proc_set_darwin_role(proc_t curp, proc_t targetp, int priority) +{ + int error = 0; + uint32_t flagsp; + + kauth_cred_t ucred, target_cred; + + ucred = kauth_cred_get(); + target_cred = kauth_cred_proc_ref(targetp); + + if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) && + kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) && + kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred)) { + error = EPERM; + goto out; + } + + if (curp != targetp) { +#if CONFIG_MACF + if ((error = mac_proc_check_sched(curp, targetp))) + goto out; +#endif + } + + proc_get_darwinbgstate(proc_task(targetp), &flagsp); + if ((flagsp & PROC_FLAG_APPLICATION) != PROC_FLAG_APPLICATION) { + error = ENOTSUP; + goto out; + } + + integer_t role = 0; + + switch (priority) { + case PRIO_DARWIN_ROLE_DEFAULT: + role = TASK_UNSPECIFIED; + break; + case PRIO_DARWIN_ROLE_UI_FOCAL: + role = TASK_FOREGROUND_APPLICATION; + break; + case PRIO_DARWIN_ROLE_UI: + role = TASK_BACKGROUND_APPLICATION; + break; + case PRIO_DARWIN_ROLE_NON_UI: + role = TASK_NONUI_APPLICATION; + break; + default: + error = EINVAL; + goto out; + } + + proc_set_task_policy(proc_task(targetp), THREAD_NULL, + TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, role); + +out: + kauth_cred_unref(&target_cred); + return (error); +} + +static int +proc_get_darwin_role(proc_t curp, proc_t targetp, int *priority) +{ + int error = 0; + int role = 0; + + kauth_cred_t ucred, target_cred; + + ucred = kauth_cred_get(); + target_cred = kauth_cred_proc_ref(targetp); + + if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) && + kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) && + kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred)) { + error = EPERM; + goto out; + } + + if (curp != targetp) { +#if CONFIG_MACF + if ((error = mac_proc_check_sched(curp, targetp))) + goto out; +#endif + } + + role = proc_get_task_policy(proc_task(targetp), THREAD_NULL, + TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE); + + switch (role) { + case TASK_FOREGROUND_APPLICATION: + *priority = PRIO_DARWIN_ROLE_UI_FOCAL; + break; + case TASK_BACKGROUND_APPLICATION: + *priority = PRIO_DARWIN_ROLE_UI; + break; + case TASK_NONUI_APPLICATION: + *priority = PRIO_DARWIN_ROLE_NON_UI; + break; + case TASK_UNSPECIFIED: + default: + *priority = PRIO_DARWIN_ROLE_DEFAULT; + break; + } + +out: + kauth_cred_unref(&target_cred); + return (error); +} + + static int get_background_proc(struct proc *curp, struct proc *targetp, int *priority) { @@ -507,7 +723,6 @@ do_background_proc(struct proc *curp, struct proc *targetp, int priority) kauth_cred_t ucred; kauth_cred_t target_cred; int external; - int flavor; int enable; ucred = kauth_cred_get(); @@ -530,27 +745,19 @@ do_background_proc(struct proc *curp, struct proc *targetp, int priority) external = (curp == targetp) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL; switch (priority) { - case PRIO_DARWIN_NONUI: - flavor = TASK_POLICY_GPU_DENY; - enable = TASK_POLICY_ENABLE; - break; case PRIO_DARWIN_BG: - flavor = TASK_POLICY_DARWIN_BG; enable = TASK_POLICY_ENABLE; break; + case PRIO_DARWIN_NONUI: + /* ignored for compatibility */ + goto out; default: - /* - * DARWIN_BG and GPU_DENY disable are overloaded, - * so we need to turn them both off at the same time - * - * TODO: It would be nice to fail if priority != 0 - */ - flavor = TASK_POLICY_DARWIN_BG_AND_GPU; + /* TODO: EINVAL if priority != 0 */ enable = TASK_POLICY_DISABLE; break; } - proc_set_task_policy(proc_task(targetp), THREAD_NULL, external, flavor, enable); + proc_set_task_policy(proc_task(targetp), THREAD_NULL, external, TASK_POLICY_DARWIN_BG, enable); out: kauth_cred_unref(&target_cred); @@ -558,21 +765,27 @@ out: } static void -do_background_socket(struct proc *p, thread_t thread, int priority) +do_background_socket(struct proc *p, thread_t thread) { #if SOCKETS struct filedesc *fdp; struct fileproc *fp; - int i; + int i, background; - if (priority == PRIO_DARWIN_BG) { + proc_fdlock(p); + + if (thread != THREAD_NULL) + background = proc_get_effective_thread_policy(thread, TASK_POLICY_ALL_SOCKETS_BG); + else + background = proc_get_effective_task_policy(proc_task(p), TASK_POLICY_ALL_SOCKETS_BG); + + if (background) { /* * For PRIO_DARWIN_PROCESS (thread is NULL), simply mark * the sockets with the background flag. There's nothing * to do here for the PRIO_DARWIN_THREAD case. */ if (thread == THREAD_NULL) { - proc_fdlock(p); fdp = p->p_fd; for (i = 0; i < fdp->fd_nfiles; i++) { @@ -587,17 +800,13 @@ do_background_socket(struct proc *p, thread_t thread, int priority) socket_set_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND); sockp->so_background_thread = NULL; } - proc_fdunlock(p); } - } else { - /* disable networking IO throttle. * NOTE - It is a known limitation of the current design that we * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for * sockets created by other threads within this process. */ - proc_fdlock(p); fdp = p->p_fd; for ( i = 0; i < fdp->fd_nfiles; i++ ) { struct socket *sockp; @@ -615,10 +824,11 @@ do_background_socket(struct proc *p, thread_t thread, int priority) socket_clear_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND); sockp->so_background_thread = NULL; } - proc_fdunlock(p); } + + proc_fdunlock(p); #else -#pragma unused(p, thread, priority) +#pragma unused(p, thread) #endif } @@ -634,6 +844,7 @@ do_background_thread(struct proc *curp, thread_t thread, int priority) { struct uthread *ut; int enable, external; + int rv = 0; ut = get_bsdthread_info(thread); @@ -641,6 +852,16 @@ do_background_thread(struct proc *curp, thread_t thread, int priority) if ((ut->uu_flag & UT_VFORK) != 0) return(EPERM); + if (thread_is_static_param(thread)) { + return(EPERM); + } + + /* Not allowed to combine QoS and DARWIN_BG, doing so strips the QoS */ + if (thread_has_qos_policy(thread)) { + thread_remove_qos_policy(thread); + rv = EIDRM; + } + /* TODO: Fail if someone passes something besides 0 or PRIO_DARWIN_BG */ enable = (priority == PRIO_DARWIN_BG) ? TASK_POLICY_ENABLE : TASK_POLICY_DISABLE; external = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL; @@ -648,7 +869,7 @@ do_background_thread(struct proc *curp, thread_t thread, int priority) proc_set_task_policy_thread(curp->task, thread_tid(thread), external, TASK_POLICY_DARWIN_BG, enable); - return(0); + return rv; } @@ -1087,20 +1308,20 @@ ruadd(struct rusage *ru, struct rusage *ru2) * Note: proc lock of parent should be held while calling this function. */ void -update_rusage_info_child(struct rusage_info_child *ri, struct rusage_info_v2 *ri2) +update_rusage_info_child(struct rusage_info_child *ri, rusage_info_current *ri_current) { - ri->ri_child_user_time += (ri2->ri_user_time + - ri2->ri_child_user_time); - ri->ri_child_system_time += (ri2->ri_system_time + - ri2->ri_child_system_time); - ri->ri_child_pkg_idle_wkups += (ri2->ri_pkg_idle_wkups + - ri2->ri_child_pkg_idle_wkups); - ri->ri_child_interrupt_wkups += (ri2->ri_interrupt_wkups + - ri2->ri_child_interrupt_wkups); - ri->ri_child_pageins += (ri2->ri_pageins + - ri2->ri_child_pageins); - ri->ri_child_elapsed_abstime += ((ri2->ri_proc_exit_abstime - - ri2->ri_proc_start_abstime) + ri2->ri_child_elapsed_abstime); + ri->ri_child_user_time += (ri_current->ri_user_time + + ri_current->ri_child_user_time); + ri->ri_child_system_time += (ri_current->ri_system_time + + ri_current->ri_child_system_time); + ri->ri_child_pkg_idle_wkups += (ri_current->ri_pkg_idle_wkups + + ri_current->ri_child_pkg_idle_wkups); + ri->ri_child_interrupt_wkups += (ri_current->ri_interrupt_wkups + + ri_current->ri_child_interrupt_wkups); + ri->ri_child_pageins += (ri_current->ri_pageins + + ri_current->ri_child_pageins); + ri->ri_child_elapsed_abstime += ((ri_current->ri_proc_exit_abstime - + ri_current->ri_proc_start_abstime) + ri_current->ri_child_elapsed_abstime); } void @@ -1227,7 +1448,7 @@ static int iopolicysys_vfs(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param); int -iopolicysys(struct proc *p, struct iopolicysys_args *uap, __unused int32_t *retval) +iopolicysys(struct proc *p, struct iopolicysys_args *uap, int32_t *retval) { int error = 0; struct _iopol_param_t iop_param; @@ -1238,6 +1459,10 @@ iopolicysys(struct proc *p, struct iopolicysys_args *uap, __unused int32_t *retv switch (iop_param.iop_iotype) { case IOPOL_TYPE_DISK: error = iopolicysys_disk(p, uap->cmd, iop_param.iop_scope, iop_param.iop_policy, &iop_param); + if (error == EIDRM) { + *retval = -2; + error = 0; + } if (error) goto out; break; @@ -1279,6 +1504,32 @@ iopolicysys_disk(struct proc *p __unused, int cmd, int scope, int policy, struct case IOPOL_SCOPE_THREAD: thread = current_thread(); policy_flavor = TASK_POLICY_IOPOL; + + /* Not allowed to combine QoS and (non-PASSIVE) IO policy, doing so strips the QoS */ + if (cmd == IOPOL_CMD_SET && thread_has_qos_policy(thread)) { + switch (policy) { + case IOPOL_DEFAULT: + case IOPOL_PASSIVE: + break; + case IOPOL_UTILITY: + case IOPOL_THROTTLE: + case IOPOL_IMPORTANT: + case IOPOL_STANDARD: + if (!thread_is_static_param(thread)) { + thread_remove_qos_policy(thread); + /* + * This is not an error case, this is to return a marker to user-space that + * we stripped the thread of its QoS class. + */ + error = EIDRM; + break; + } + /* otherwise, fall through to the error case. */ + default: + error = EINVAL; + goto out; + } + } break; case IOPOL_SCOPE_DARWIN_BG: @@ -1413,34 +1664,40 @@ out: } /* BSD call back function for task_policy */ -void proc_apply_task_networkbg(void * bsd_info, thread_t thread, int bg); +void proc_apply_task_networkbg(void * bsd_info, thread_t thread); void -proc_apply_task_networkbg(void * bsd_info, thread_t thread, int bg) +proc_apply_task_networkbg(void * bsd_info, thread_t thread) { - proc_t p = PROC_NULL; - proc_t curp = (proc_t)bsd_info; - pid_t pid; - int prio = (bg ? PRIO_DARWIN_BG : 0); + assert(bsd_info != PROC_NULL); + + pid_t pid = proc_pid((proc_t)bsd_info); + + proc_t p = proc_find(pid); - pid = curp->p_pid; - p = proc_find(pid); if (p != PROC_NULL) { - do_background_socket(p, thread, prio); + assert(p == (proc_t)bsd_info); + + do_background_socket(p, thread); proc_rele(p); } } void -gather_rusage_info_v2(proc_t p, struct rusage_info_v2 *ru, int flavor) +gather_rusage_info(proc_t p, rusage_info_current *ru, int flavor) { struct rusage_info_child *ri_child; assert(p->p_stats != NULL); switch(flavor) { + + case RUSAGE_INFO_V3: + fill_task_qos_rusage(p->task, ru); + fill_task_billed_usage(p->task, ru); + /* fall through */ + case RUSAGE_INFO_V2: - ru->ri_diskio_bytesread = p->p_stats->ri_diskiobytes.ri_bytesread; - ru->ri_diskio_byteswritten = p->p_stats->ri_diskiobytes.ri_byteswritten; + fill_task_io_rusage(p->task, ru); /* fall through */ case RUSAGE_INFO_V1: @@ -1462,58 +1719,67 @@ gather_rusage_info_v2(proc_t p, struct rusage_info_v2 *ru, int flavor) case RUSAGE_INFO_V0: proc_getexecutableuuid(p, (unsigned char *)&ru->ri_uuid, sizeof (ru->ri_uuid)); - fill_task_rusage_v2(p->task, ru); + fill_task_rusage(p->task, ru); ru->ri_proc_start_abstime = p->p_stats->ps_start; } } -/* - * Temporary function to copy value from rusage_info_v2 to rusage_info_v0. - */ static void -rusage_info_v2_to_v0(struct rusage_info_v0 *ri_v0, struct rusage_info_v2 *ri_v2) +rusage_info_conversion(rusage_info_t ri_info, rusage_info_current *ri_current, int flavor) { - memcpy(&ri_v0->ri_uuid[0], &ri_v2->ri_uuid[0], sizeof(ri_v0->ri_uuid)); - ri_v0->ri_user_time = ri_v2->ri_user_time; - ri_v0->ri_system_time = ri_v2->ri_system_time; - ri_v0->ri_pkg_idle_wkups = ri_v2->ri_pkg_idle_wkups; - ri_v0->ri_interrupt_wkups = ri_v2->ri_interrupt_wkups; - ri_v0->ri_pageins = ri_v2->ri_pageins; - ri_v0->ri_wired_size = ri_v2->ri_wired_size; - ri_v0->ri_resident_size = ri_v2->ri_resident_size; - ri_v0->ri_phys_footprint = ri_v2->ri_phys_footprint; - ri_v0->ri_proc_start_abstime = ri_v2->ri_proc_start_abstime; - ri_v0->ri_proc_exit_abstime = ri_v2->ri_proc_exit_abstime; -} + struct rusage_info_v0 *ri_v0; + struct rusage_info_v1 *ri_v1; + struct rusage_info_v2 *ri_v2; -static void -rusage_info_v2_to_v1(struct rusage_info_v1 *ri_v1, struct rusage_info_v2 *ri_v2) -{ - memcpy(&ri_v1->ri_uuid[0], &ri_v2->ri_uuid[0], sizeof(ri_v1->ri_uuid)); - ri_v1->ri_user_time = ri_v2->ri_user_time; - ri_v1->ri_system_time = ri_v2->ri_system_time; - ri_v1->ri_pkg_idle_wkups = ri_v2->ri_pkg_idle_wkups; - ri_v1->ri_interrupt_wkups = ri_v2->ri_interrupt_wkups; - ri_v1->ri_pageins = ri_v2->ri_pageins; - ri_v1->ri_wired_size = ri_v2->ri_wired_size; - ri_v1->ri_resident_size = ri_v2->ri_resident_size; - ri_v1->ri_phys_footprint = ri_v2->ri_phys_footprint; - ri_v1->ri_proc_start_abstime = ri_v2->ri_proc_start_abstime; - ri_v1->ri_proc_exit_abstime = ri_v2->ri_proc_exit_abstime; - ri_v1->ri_child_user_time = ri_v2->ri_child_user_time; - ri_v1->ri_child_system_time = ri_v2->ri_child_system_time; - ri_v1->ri_child_pkg_idle_wkups = ri_v2->ri_child_pkg_idle_wkups; - ri_v1->ri_child_interrupt_wkups = ri_v2->ri_child_interrupt_wkups; - ri_v1->ri_child_pageins = ri_v2->ri_child_pageins; - ri_v1->ri_child_elapsed_abstime = ri_v2->ri_child_elapsed_abstime; + switch (flavor) { + + case RUSAGE_INFO_V2: + ri_v2 = (struct rusage_info_v2 *)ri_info; + ri_v2->ri_diskio_bytesread = ri_current->ri_diskio_bytesread; + ri_v2->ri_diskio_byteswritten = ri_current->ri_diskio_byteswritten; + /* fall through */ + + case RUSAGE_INFO_V1: + ri_v1 = (struct rusage_info_v1 *)ri_info; + ri_v1->ri_child_user_time = ri_current->ri_child_user_time; + ri_v1->ri_child_system_time = ri_current->ri_child_system_time; + ri_v1->ri_child_pkg_idle_wkups = ri_current->ri_child_pkg_idle_wkups; + ri_v1->ri_child_interrupt_wkups = ri_current->ri_child_interrupt_wkups; + ri_v1->ri_child_pageins = ri_current->ri_child_pageins; + ri_v1->ri_child_elapsed_abstime = ri_current->ri_child_elapsed_abstime; + /* fall through */ + + case RUSAGE_INFO_V0: + ri_v0 = (struct rusage_info_v0 *)ri_info; + memcpy(&ri_v0->ri_uuid[0], &ri_current->ri_uuid[0], sizeof(ri_v0->ri_uuid)); + ri_v0->ri_user_time = ri_current->ri_user_time; + ri_v0->ri_system_time = ri_current->ri_system_time; + ri_v0->ri_pkg_idle_wkups = ri_current->ri_pkg_idle_wkups; + ri_v0->ri_interrupt_wkups = ri_current->ri_interrupt_wkups; + ri_v0->ri_pageins = ri_current->ri_pageins; + ri_v0->ri_wired_size = ri_current->ri_wired_size; + ri_v0->ri_resident_size = ri_current->ri_resident_size; + ri_v0->ri_phys_footprint = ri_current->ri_phys_footprint; + ri_v0->ri_proc_start_abstime = ri_current->ri_proc_start_abstime; + ri_v0->ri_proc_exit_abstime = ri_current->ri_proc_exit_abstime; + + break; + + default: + break; + } } + int proc_get_rusage(proc_t p, int flavor, user_addr_t buffer, __unused int is_zombie) { struct rusage_info_v0 ri_v0; struct rusage_info_v1 ri_v1; struct rusage_info_v2 ri_v2; + struct rusage_info_v3 ri_v3; + + rusage_info_current ri_current; int error = 0; @@ -1524,11 +1790,11 @@ proc_get_rusage(proc_t p, int flavor, user_addr_t buffer, __unused int is_zombie * Otherwise, look to the cached info in the zombie proc. */ if (p->p_ru == NULL) { - gather_rusage_info_v2(p, &ri_v2, flavor); - ri_v2.ri_proc_exit_abstime = 0; - rusage_info_v2_to_v0(&ri_v0, &ri_v2); + gather_rusage_info(p, &ri_current, flavor); + ri_current.ri_proc_exit_abstime = 0; + rusage_info_conversion(&ri_v0, &ri_current, flavor); } else { - rusage_info_v2_to_v0(&ri_v0, &p->p_ru->ri); + rusage_info_conversion(&ri_v0, &p->p_ru->ri, flavor); } error = copyout(&ri_v0, buffer, sizeof (ri_v0)); break; @@ -1539,11 +1805,11 @@ proc_get_rusage(proc_t p, int flavor, user_addr_t buffer, __unused int is_zombie * Otherwise, look to the cached info in the zombie proc. */ if (p->p_ru == NULL) { - gather_rusage_info_v2(p, &ri_v2, flavor); - ri_v2.ri_proc_exit_abstime = 0; - rusage_info_v2_to_v1(&ri_v1, &ri_v2); + gather_rusage_info(p, &ri_current, flavor); + ri_current.ri_proc_exit_abstime = 0; + rusage_info_conversion(&ri_v1, &ri_current, flavor); } else { - rusage_info_v2_to_v1(&ri_v1, &p->p_ru->ri); + rusage_info_conversion(&ri_v1, &p->p_ru->ri, flavor); } error = copyout(&ri_v1, buffer, sizeof (ri_v1)); break; @@ -1554,14 +1820,29 @@ proc_get_rusage(proc_t p, int flavor, user_addr_t buffer, __unused int is_zombie * Otherwise, look to the cached info in the zombie proc. */ if (p->p_ru == NULL) { - gather_rusage_info_v2(p, &ri_v2, flavor); - ri_v2.ri_proc_exit_abstime = 0; + gather_rusage_info(p, &ri_current, flavor); + ri_current.ri_proc_exit_abstime = 0; + rusage_info_conversion(&ri_v2, &ri_current, flavor); } else { - ri_v2 = p->p_ru->ri; + rusage_info_conversion(&ri_v2, &p->p_ru->ri, flavor); } error = copyout(&ri_v2, buffer, sizeof (ri_v2)); break; + case RUSAGE_INFO_V3: + /* + * If task is still alive, collect info from the live task itself. + * Otherwise, look to the cached info in the zombie proc. + */ + if (p->p_ru == NULL) { + gather_rusage_info(p, &ri_v3, flavor); + ri_v3.ri_proc_exit_abstime = 0; + } else { + ri_v3 = p->p_ru->ri; + } + error = copyout(&ri_v3, buffer, sizeof (ri_v3)); + break; + default: error = EINVAL; break; @@ -1597,18 +1878,24 @@ mach_to_bsd_rv(int mach_rv) * RLIMIT_WAKEUPS_MONITOR */ int -proc_rlimit_control(__unused struct proc *p, struct proc_rlimit_control_args *uap, int32_t *retval) +proc_rlimit_control(__unused struct proc *p, struct proc_rlimit_control_args *uap, __unused int32_t *retval) { proc_t targetp; int error = 0; struct proc_rlimit_control_wakeupmon wakeupmon_args; uint32_t cpumon_flags; + uint32_t cpulimits_flags; kauth_cred_t my_cred, target_cred; - *retval = 0; + /* -1 implicitly means our own process (perhaps even the current thread for per-thread attributes) */ + if (uap->pid == -1) { + targetp = proc_self(); + } else { + targetp = proc_find(uap->pid); + } - if ((targetp = proc_find(uap->pid)) == PROC_NULL) { - *retval = -1; + /* proc_self() can return NULL for an exiting process */ + if (targetp == PROC_NULL) { return (ESRCH); } @@ -1620,9 +1907,7 @@ proc_rlimit_control(__unused struct proc *p, struct proc_rlimit_control_args *ua kauth_cred_getruid(my_cred) != kauth_cred_getuid(target_cred)) { proc_rele(targetp); kauth_cred_unref(&target_cred); - *retval = -1; - error = EACCES; - return (error); + return (EACCES); } switch (uap->flavor) { @@ -1640,6 +1925,29 @@ proc_rlimit_control(__unused struct proc *p, struct proc_rlimit_control_args *ua cpumon_flags = uap->arg; // XXX temporarily stashing flags in argp (12592127) error = mach_to_bsd_rv(task_cpu_usage_monitor_ctl(targetp->task, &cpumon_flags)); break; + case RLIMIT_THREAD_CPULIMITS: + cpulimits_flags = (uint32_t)uap->arg; // only need a limited set of bits, pass in void * argument + + if (uap->pid != -1) { + error = EINVAL; + break; + } + + uint8_t percent = 0; + uint32_t ms_refill = 0; + uint64_t ns_refill; + + percent = (uint8_t)(cpulimits_flags & 0xffU); /* low 8 bits for percent */ + ms_refill = (cpulimits_flags >> 8) & 0xffffff; /* next 24 bits represent ms refill value */ + if (percent >= 100) { + error = EINVAL; + break; + } + + ns_refill = ((uint64_t)ms_refill) * NSEC_PER_MSEC; + + error = mach_to_bsd_rv(thread_set_cpulimit(THREAD_CPULIMIT_BLOCK, percent, ns_refill)); + break; default: error = EINVAL; break; @@ -1648,13 +1956,21 @@ proc_rlimit_control(__unused struct proc *p, struct proc_rlimit_control_args *ua proc_rele(targetp); kauth_cred_unref(&target_cred); - if (error != 0) { - *retval = -1; - } - /* * Return value from this function becomes errno to userland caller. - * *retval is what the system call invocation returns. */ return (error); } + +/* + * Return the current amount of CPU consumed by this thread (in either user or kernel mode) + */ +int thread_selfusage(struct proc *p __unused, struct thread_selfusage_args *uap __unused, uint64_t *retval) +{ + uint64_t runtime; + + runtime = thread_get_runtime_self(); + *retval = runtime; + + return (0); +} diff --git a/bsd/kern/kern_sfi.c b/bsd/kern/kern_sfi.c new file mode 100644 index 000000000..a9787dfb9 --- /dev/null +++ b/bsd/kern/kern_sfi.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This file provides the syscall-based configuration facility + * for Selective Forced Idle (SFI). Input arguments have basic checking + * applied here, although more specific semantic checking is done in + * osfmk/kern/sfi.c. All copyin()/copyout() operations are performed + * in this source file. + */ + +#define SFI_DEBUG 0 + +#if SFI_DEBUG +#define dprintf(...) printf(__VA_ARGS__) +#else +#define dprintf(...) do { } while(0) +#endif + +static int proc_apply_sfi_managed(proc_t p, void * arg); + +int sfi_ctl(struct proc *p __unused, struct sfi_ctl_args *uap, int32_t *retval __unused) +{ + uint32_t operation = uap->operation; + int error = 0; + kern_return_t kret = KERN_SUCCESS; + uint64_t out_time = 0; + + switch (operation) { + case SFI_CTL_OPERATION_SFI_SET_WINDOW: + if (uap->out_time != USER_ADDR_NULL) { + return EINVAL; + } + if (uap->sfi_class != SFI_CLASS_UNSPECIFIED) { + return EINVAL; + } + + error = priv_check_cred(kauth_cred_get(), PRIV_SELECTIVE_FORCED_IDLE, 0); + if (error) { + dprintf("%s failed privilege check for sfi_ctl: %d\n", p->p_comm, error); + return (error); + } else { + dprintf("%s succeeded privilege check for sfi_ctl\n", p->p_comm); + } + + if (uap->time == 0) { + /* actually a cancel */ + kret = sfi_window_cancel(); + } else { + kret = sfi_set_window(uap->time); + } + + if (kret) { + error = EINVAL; + } + + break; + case SFI_CTL_OPERATION_SFI_GET_WINDOW: + if (uap->time != 0) { + return EINVAL; + } + if (uap->sfi_class != SFI_CLASS_UNSPECIFIED) { + return EINVAL; + } + + kret = sfi_get_window(&out_time); + if (kret == KERN_SUCCESS) { + error = copyout(&out_time, uap->out_time, sizeof(out_time)); + } else { + error = EINVAL; + } + + break; + case SFI_CTL_OPERATION_SET_CLASS_OFFTIME: + if (uap->out_time != USER_ADDR_NULL) { + return EINVAL; + } + + error = priv_check_cred(kauth_cred_get(), PRIV_SELECTIVE_FORCED_IDLE, 0); + if (error) { + dprintf("%s failed privilege check for sfi_ctl: %d\n", p->p_comm, error); + return (error); + } else { + dprintf("%s succeeded privilege check for sfi_ctl\n", p->p_comm); + } + + if (uap->time == 0) { + /* actually a cancel */ + kret = sfi_class_offtime_cancel(uap->sfi_class); + } else { + kret = sfi_set_class_offtime(uap->sfi_class, uap->time); + } + + if (kret) { + error = EINVAL; + } + + break; + case SFI_CTL_OPERATION_GET_CLASS_OFFTIME: + if (uap->time != 0) { + return EINVAL; + } + + kret = sfi_get_class_offtime(uap->sfi_class, &out_time); + if (kret == KERN_SUCCESS) { + error = copyout(&out_time, uap->out_time, sizeof(out_time)); + } else { + error = EINVAL; + } + + break; + default: + error = ENOTSUP; + break; + } + + return error; +} + +static int proc_apply_sfi_managed(proc_t p, void * arg) +{ + uint32_t flags = *(uint32_t *)arg; + pid_t pid = p->p_pid; + boolean_t managed_enabled = (flags == SFI_PROCESS_SET_MANAGED)? TRUE : FALSE; + + if (pid == 0) { /* ignore setting on kernproc */ + return PROC_RETURNED; + } + + if (managed_enabled) { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_PID_SET_MANAGED) | DBG_FUNC_NONE, pid, 0, 0, 0, 0); + } else { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_PID_CLEAR_MANAGED) | DBG_FUNC_NONE, pid, 0, 0, 0, 0); + } + + proc_set_task_policy(p->task, THREAD_NULL, + TASK_POLICY_ATTRIBUTE, TASK_POLICY_SFI_MANAGED, + managed_enabled ? TASK_POLICY_ENABLE : TASK_POLICY_DISABLE); + return PROC_RETURNED; +} + +int sfi_pidctl(struct proc *p __unused, struct sfi_pidctl_args *uap, int32_t *retval __unused) +{ + uint32_t operation = uap->operation; + pid_t pid = uap->pid; + int error = 0; + uint32_t out_flags = 0; + boolean_t managed_enabled; + proc_t targetp; + + switch (operation) { + case SFI_PIDCTL_OPERATION_PID_SET_FLAGS: + if (uap->out_sfi_flags != USER_ADDR_NULL + || !(uap->sfi_flags & SFI_PROCESS_SET_MANAGED_MASK) + || uap->sfi_flags == SFI_PROCESS_SET_MANAGED_MASK) { + return EINVAL; + } + + error = priv_check_cred(kauth_cred_get(), PRIV_SELECTIVE_FORCED_IDLE, 0); + if (error) { + dprintf("%s failed privilege check for sfi_pidctl: %d\n", p->p_comm, error); + return (error); + } else { + dprintf("%s succeeded privilege check for sfi_pidctl\n", p->p_comm); + } + + if (uap->pid == 0) { + /* only allow SFI_PROCESS_SET_UNMANAGED for pid 0 */ + if (uap->sfi_flags != SFI_PROCESS_SET_UNMANAGED) { + return EINVAL; + } + + proc_iterate(PROC_ALLPROCLIST, proc_apply_sfi_managed, (void *)&uap->sfi_flags, NULL, NULL); + break; + } + + targetp = proc_find(pid); + if (!targetp) { + error = ESRCH; + break; + } + + proc_apply_sfi_managed(targetp, (void *)&uap->sfi_flags); + + proc_rele(targetp); + + break; + case SFI_PIDCTL_OPERATION_PID_GET_FLAGS: + if (uap->sfi_flags != 0) { + return EINVAL; + } + if (uap->pid == 0) { + return EINVAL; + } + + targetp = proc_find(pid); + if (!targetp) { + error = ESRCH; + break; + } + + managed_enabled = proc_get_task_policy(targetp->task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_SFI_MANAGED); + + proc_rele(targetp); + + out_flags = managed_enabled ? SFI_PROCESS_SET_MANAGED : SFI_PROCESS_SET_UNMANAGED; + + error = copyout(&out_flags, uap->out_sfi_flags, sizeof(out_flags)); + + break; + default: + error = ENOTSUP; + break; + } + + return error; +} diff --git a/bsd/kern/kern_sig.c b/bsd/kern/kern_sig.c index 9605be839..bb44111fe 100644 --- a/bsd/kern/kern_sig.c +++ b/bsd/kern/kern_sig.c @@ -101,7 +101,6 @@ #include #include /* for coredump */ #include /* for APC support */ -#include #include /* extern void *get_bsdtask_info(task_t); */ #include #include @@ -1712,7 +1711,7 @@ static void psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) { int prop; - sig_t action = NULL; + user_addr_t action = USER_ADDR_NULL; proc_t sig_proc; thread_t sig_thread; register task_t sig_task; @@ -3089,6 +3088,10 @@ proc_signalstart(proc_t p, int locked) { if (!locked) proc_lock(p); + + if(p->p_signalholder == current_thread()) + panic("proc_signalstart: thread attempting to signal a process for which it holds the signal lock"); + p->p_sigwaitcnt++; while ((p->p_lflag & P_LINSIGNAL) == P_LINSIGNAL) msleep(&p->p_sigmask, &p->p_mlock, 0, "proc_signstart", NULL); diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index 062ffa524..0e9d6c9c6 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -133,7 +133,8 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, filechunk = 1*1024*1024*1024; if (filechunk > (size_t)(end - offset)) filechunk = (size_t)(end - offset); - error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, &filechunk, NULL, 0, NULL); + error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, + &filechunk, NULL, VNODE_WRITE, NULL); if (error) break; fileblk = blkno * ref->blksize; } @@ -156,7 +157,7 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, { pin.cp_extent.offset = fileblk; pin.cp_extent.length = filechunk; - pin.cp_flags = _DKIOCSPINDISCARDDATA; + pin.cp_flags = _DKIOCCSPINFORHIBERNATION; error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin); if (error && (ENOTTY != error)) { @@ -172,11 +173,10 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, return (error); } -int -kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len); struct kern_direct_file_io_ref_t * kern_open_file_for_direct_io(const char * name, + boolean_t create_file, kern_get_file_extents_callback_t callback, void * callback_ref, off_t set_file_size, @@ -223,7 +223,8 @@ kern_open_file_for_direct_io(const char * name, p = kernproc; ref->ctx = vfs_context_create(vfs_context_current()); - if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, ref->ctx))) + if ((error = vnode_open(name, (create_file) ? (O_CREAT | FWRITE) : FWRITE, + (0), 0, &ref->vp, ref->ctx))) goto out; if (ref->vp->v_type == VREG) @@ -235,7 +236,7 @@ kern_open_file_for_direct_io(const char * name, if (write_file_addr && write_file_len) { - if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len))) + if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, 0))) goto out; } @@ -255,9 +256,9 @@ kern_open_file_for_direct_io(const char * name, if (ref->vp->v_type == VREG) { - /* Don't dump files with links. */ - if (va.va_nlink != 1) - goto out; + /* Don't dump files with links. */ + if (va.va_nlink != 1) + goto out; device = va.va_fsid; ref->filelength = va.va_data_size; @@ -266,37 +267,14 @@ kern_open_file_for_direct_io(const char * name, p2 = p; do_ioctl = &file_ioctl; - if (set_file_size && (set_file_size != (off_t) va.va_data_alloc)) - { - u_int32_t alloc_flags = PREALLOCATE | ALLOCATEFROMPEOF | ALLOCATEALL; - - vnode_lock_spin(ref->vp); - CLR(ref->vp->v_flag, VSWAP); - vnode_unlock(ref->vp); - - if (set_file_size < (off_t) va.va_data_alloc) - { - struct vnode_attr setva; - VATTR_INIT(&setva); - VATTR_SET(&setva, va_data_size, set_file_size); - error = vnode_setattr(ref->vp, &setva, ref->ctx); - } - else - { - off_t bytesallocated = set_file_size - va.va_data_alloc; - error = VNOP_ALLOCATE(ref->vp, bytesallocated, alloc_flags, - &bytesallocated, 0 /*fst_offset*/, - ref->ctx); - HIBLOG("VNOP_ALLOCATE(%d) %qd\n", error, bytesallocated); - } - // F_SETSIZE: - (void) vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL, ref->ctx); - ref->filelength = set_file_size; - - vnode_lock_spin(ref->vp); - SET(ref->vp->v_flag, VSWAP); - vnode_unlock(ref->vp); - } + if (set_file_size) + { + error = vnode_setsize(ref->vp, set_file_size, + IO_NOZEROFILL | IO_NOAUTH, ref->ctx); + if (error) + goto out; + ref->filelength = set_file_size; + } } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { @@ -350,7 +328,8 @@ kern_open_file_for_direct_io(const char * name, filechunk = 1*1024*1024*1024; daddr64_t blkno; - error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, 0, NULL); + error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, + &filechunk, NULL, VNODE_WRITE, NULL); if (error) goto out; @@ -402,9 +381,12 @@ kern_open_file_for_direct_io(const char * name, // get partition base - error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); - if (error) - goto out; + if (partitionbase_result) + { + error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); + if (error) + goto out; + } // get block size & constraints @@ -470,7 +452,7 @@ kern_open_file_for_direct_io(const char * name, *partition_device_result = device; if (image_device_result) *image_device_result = target; - if (flags) + if (oflags) *oflags = flags; out: @@ -484,6 +466,15 @@ out: if (error && ref) { + if (ref->pinned) + { + _dk_cs_pin_t pin; + bzero(&pin, sizeof(pin)); + + pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST; + p1 = &device; + (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin); + } if (ref->vp) { vnode_close(ref->vp, FWRITE, ref->ctx); @@ -498,11 +489,11 @@ out: } int -kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len) +kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len, int ioflag) { return (vn_rdwr(UIO_WRITE, ref->vp, addr, len, offset, - UIO_SYSSPACE, IO_SYNC|IO_NODELOCKED|IO_UNIT, + UIO_SYSSPACE, ioflag|IO_SYNC|IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ref->ctx), (int *) 0, vfs_context_proc(ref->ctx))); } @@ -514,6 +505,7 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, off_t discard_offset, off_t discard_end) { int error; + _dk_cs_pin_t pin; kprintf("kern_close_file_for_direct_io\n"); if (!ref) return; @@ -538,6 +530,14 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, do_ioctl = &device_ioctl; } (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); + + if (ref->pinned) + { + bzero(&pin, sizeof(pin)); + pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST; + (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin); + } + if (discard_offset && discard_end && !ref->pinned) { @@ -545,7 +545,7 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, } if (addr && write_length) { - (void) kern_write_file(ref, write_offset, addr, write_length); + (void) kern_write_file(ref, write_offset, addr, write_length, 0); } error = vnode_close(ref->vp, FWRITE, ref->ctx); @@ -557,4 +557,3 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, ref->ctx = NULL; kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); } - diff --git a/bsd/kern/kern_synch.c b/bsd/kern/kern_synch.c index 34cb1520a..b1c4eda1c 100644 --- a/bsd/kern/kern_synch.c +++ b/bsd/kern/kern_synch.c @@ -53,7 +53,7 @@ #include #include -#include +#include #include /* for unix_syscall_return() */ #include diff --git a/bsd/kern/kern_sysctl.c b/bsd/kern/kern_sysctl.c index c97123066..251629b5b 100644 --- a/bsd/kern/kern_sysctl.c +++ b/bsd/kern/kern_sysctl.c @@ -113,7 +113,6 @@ #include #include #include -#include #include #include #include @@ -122,7 +121,6 @@ #include #include -#include #include #include @@ -133,6 +131,7 @@ #include #include #include +#include #if defined(__i386__) || defined(__x86_64__) #include @@ -146,14 +145,16 @@ #include #endif +#if HYPERVISOR +#include +#endif + /* * deliberately setting max requests to really high number * so that runaway settings do not cause MALLOC overflows */ #define AIO_MAX_REQUESTS (128 * CONFIG_AIO_MAX) -extern sysctlfn net_sysctl; -extern sysctlfn cpu_sysctl; extern int aio_max_requests; extern int aio_max_requests_per_process; extern int aio_worker_threads; @@ -164,8 +165,8 @@ extern int speculative_reads_disabled; extern int ignore_is_ssd; extern unsigned int speculative_prefetch_max; extern unsigned int speculative_prefetch_max_iosize; -extern unsigned int preheat_pages_max; -extern unsigned int preheat_pages_min; +extern unsigned int preheat_max_bytes; +extern unsigned int preheat_min_bytes; extern long numvnodes; extern uuid_string_t bootsessionuuid_string; @@ -247,17 +248,12 @@ STATIC int sysctl_handle_kern_threadname(struct sysctl_oid *oidp, void *arg1, in STATIC int sysctl_sched_stats(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_sched_stats_enable(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_kdebug_ops SYSCTL_HANDLER_ARGS; -STATIC int sysctl_dotranslate SYSCTL_HANDLER_ARGS; -STATIC int sysctl_doaffinity SYSCTL_HANDLER_ARGS; #if COUNT_SYSCALLS STATIC int sysctl_docountsyscalls SYSCTL_HANDLER_ARGS; #endif /* COUNT_SYSCALLS */ STATIC int sysctl_doprocargs SYSCTL_HANDLER_ARGS; STATIC int sysctl_doprocargs2 SYSCTL_HANDLER_ARGS; STATIC int sysctl_prochandle SYSCTL_HANDLER_ARGS; -#if DEBUG -STATIC int sysctl_dodebug SYSCTL_HANDLER_ARGS; -#endif STATIC int sysctl_aiomax(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_aioprocmax(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_aiothreads(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); @@ -288,9 +284,6 @@ STATIC int sysctl_nx(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysct STATIC int sysctl_loadavg(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_vm_toggle_address_reuse(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_swapusage(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); -#if defined(__i386__) || defined(__x86_64__) -STATIC int sysctl_sysctl_exec_affinity(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); -#endif STATIC int fetch_process_cputype( proc_t cur_proc, int *name, u_int namelen, cpu_type_t *cputype); STATIC int sysctl_sysctl_native(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); STATIC int sysctl_sysctl_cputype(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); @@ -319,243 +312,6 @@ fill_loadavg32(struct loadavg *la, struct user32_loadavg *la32) la32->fscale = (user32_long_t)la->fscale; } -/* - * sysctl_mem_hold - * - * Description: Wire down the callers address map on behalf of sysctl's - * that perform their own copy operations while holding - * locks e.g. in the paging path, which could lead to a - * deadlock, or while holding a spinlock. - * - * Parameters: addr User buffer address - * len User buffer length - * - * Returns: 0 Success - * vslock:ENOMEM Insufficient physical pages to wire - * vslock:EACCES Bad protection mode - * vslock:EINVAL Invalid parameters - * - * Notes: This code is invoked for the first OID element where the - * CTLFLAG_LOCKED is not specified for a given OID node - * element durng OID traversal, and is held for all - * subsequent node traversals, and only released after the - * leaf node handler invocation is complete. - * - * Legacy: For legacy scyctl's provided by third party code which - * expect funnel protection for calls into their code, this - * routine will also take the funnel, which will also only - * be released after the leaf node handler is complete. - * - * This is to support legacy 32 bit BSD KEXTs and legacy 32 - * bit single threaded filesystem KEXTs and similar code - * which relies on funnel protection, e.g. for things like - * FSID based sysctl's. - * - * NEW CODE SHOULD NOT RELY ON THIS BEHAVIOUR! IT WILL BE - * REMOVED IN A FUTURE RELASE OF Mac OS X! - * - * Bugs: This routine does nothing with the new_addr and new_len - * at present, but it should, since read from the user space - * process adddress space which could potentially trigger - * paging may also be occurring deep down. This is due to - * a current limitation of the vslock() routine, which will - * always request a wired mapping be read/write, due to not - * taking an access mode parameter. Note that this could - * also cause problems for output on architectures where - * write access does not require read acccess if the current - * mapping lacks read access. - * - * XXX: To be moved to kern_newsysctl.c to avoid __private_extern__ - */ -int sysctl_mem_lock(user_addr_t old_addr, user_size_t old_len, user_addr_t new_addr, user_size_t new_len); -int -sysctl_mem_lock(__unused user_addr_t old_addr, __unused user_size_t old_len, __unused user_addr_t new_addr, __unused user_size_t new_len) -{ - return 0; -} - -/* - * Locking and stats - */ - -/* sysctl() syscall */ -int -__sysctl(proc_t p, struct __sysctl_args *uap, __unused int32_t *retval) -{ - boolean_t funnel_state = FALSE; /* not held if unknown */ - int error; - size_t savelen = 0, oldlen = 0, newlen; - int name[CTL_MAXNAME]; - int error1; - boolean_t vslock_taken = FALSE; - boolean_t funnel_taken = FALSE; -#if CONFIG_MACF - kauth_cred_t my_cred; -#endif - - /* - * all top-level sysctl names are non-terminal - */ - if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) - return (EINVAL); - error = copyin(uap->name, &name[0], uap->namelen * sizeof(int)); - if (error) - return (error); - - AUDIT_ARG(ctlname, name, uap->namelen); - - if (proc_is64bit(p)) { - /* uap->newlen is a size_t value which grows to 64 bits - * when coming from a 64-bit process. since it's doubtful we'll - * have a sysctl newp buffer greater than 4GB we shrink it to size_t - */ - newlen = CAST_DOWN(size_t, uap->newlen); - } - else { - newlen = uap->newlen; - } - -/* - * XXX TODO: push down rights check for CTL_HW OIDs; most duplicate - * XXX it anyway, which is a performance sink, and requires use - * XXX of SUID root programs (see ). - * - * Note: Opt out of non-leaf node enforcement by removing this - * check for the top level OID value, and then adding - * CTLFLAG_ANYBODY to the leaf nodes in question. Enforce as - * suser for writed in leaf nodes by omitting this flag. - * Enforce with a higher granularity by making the leaf node - * of type SYSCTL_PROC() in order to provide a procedural - * enforcement call site. - * - * NOTE: This function is called prior to any subfunctions being - * called with a fallback to userland_sysctl(); as such, this - * permissions check here will veto the fallback operation. - */ - /* CTL_UNSPEC is used to get oid to AUTO_OID */ - if (uap->new != USER_ADDR_NULL - && ((name[0] == CTL_HW) - || (name[0] == CTL_VM)) - && (error = suser(kauth_cred_get(), &p->p_acflag))) - return (error); - -// XXX need to relocate into each terminal instead of leaving this here... -// XXX macf preemptory check. -#if CONFIG_MACF - my_cred = kauth_cred_proc_ref(p); - error = mac_system_check_sysctl( - my_cred, - (int *) name, - uap->namelen, - uap->old, - uap->oldlenp, - 0, /* XXX 1 for CTL_KERN checks */ - uap->new, - newlen - ); - kauth_cred_unref(&my_cred); - if (error) - return (error); -#endif - - if (uap->oldlenp != USER_ADDR_NULL) { - uint64_t oldlen64 = fuulong(uap->oldlenp); - - oldlen = CAST_DOWN(size_t, oldlen64); - /* - * If more than 4G, clamp to 4G - useracc() below will catch - * with an EFAULT, if it's actually necessary. - */ - if (oldlen64 > 0x00000000ffffffffULL) - oldlen = 0xffffffffUL; - } - - if ((name[0] == CTL_VFS || name[0] == CTL_VM)) { - /* - * Always take the funnel for CTL_VFS and CTL_VM - * - * XXX We should also take it for any OID without the - * XXX CTLFLAG_LOCKED set on it; fix this later! - */ - funnel_state = thread_funnel_set(kernel_flock, TRUE); - funnel_taken = TRUE; - - /* - * XXX Take the vslock() only when we are copying out; this - * XXX erroneously assumes that the copy in will not cause - * XXX a fault if caled from the paging path due to the - * XXX having been recently touched in order to establish - * XXX the input data. This is a bad assumption. - * - * Note: This is overkill, but third parties might - * already call sysctl internally in KEXTs that - * implement mass storage drivers. If you are - * writing a new KEXT, don't do that. - */ - if(uap->old != USER_ADDR_NULL) { - if (!useracc(uap->old, (user_size_t)oldlen, B_WRITE)) { - thread_funnel_set(kernel_flock, funnel_state); - return (EFAULT); - } - - if (oldlen) { - if ((error = vslock(uap->old, (user_size_t)oldlen))) { - thread_funnel_set(kernel_flock, funnel_state); - return(error); - } - savelen = oldlen; - vslock_taken = TRUE; - } - } - } - - /* - * XXX convert vfs_sysctl subelements to newsysctl; this is hard - * XXX because of VFS_NUMMNTOPS being top level. - */ - error = ENOTSUP; - if (name[0] == CTL_VFS) { - error = vfs_sysctl(name + 1, uap->namelen - 1, uap->old, - &oldlen, uap->new, newlen, p); - } - - if (vslock_taken == TRUE) { - error1 = vsunlock(uap->old, (user_size_t)savelen, B_WRITE); - if (!error) - error = error1; - } - - if ( (name[0] != CTL_VFS) && (error == ENOTSUP) ) { - size_t tmp = oldlen; - error = userland_sysctl(p, name, uap->namelen, uap->old, &tmp, - uap->new, newlen, &oldlen); - } - - /* - * If we took the funnel, which we only do for CTL_VFS and CTL_VM on - * 32 bit architectures, then drop it. - * - * XXX the grabbing and dropping need to move into the leaf nodes, - * XXX for sysctl's that are not marked CTLFLAG_LOCKED, but this is - * XXX true for the vslock, as well. We have a start at a routine - * to wrapper this (above), but it's not turned on. The current code - * removed the funnel and the vslock() from all but these two top - * level OIDs. Note that VFS only needs to take the funnel if the FS - * against which it's operating is not thread safe (but since an FS - * can be in the paging path, it still needs to take the vslock()). - */ - if (funnel_taken) - thread_funnel_set(kernel_flock, funnel_state); - - if ((error) && (error != ENOMEM)) - return (error); - - if (uap->oldlenp != USER_ADDR_NULL) - error = suulong(uap->oldlenp, oldlen); - - return (error); -} - /* * Attributes stored in the kernel. */ @@ -573,105 +329,6 @@ int securelevel = -1; int securelevel; #endif -STATIC int -sysctl_doaffinity SYSCTL_HANDLER_ARGS -{ - __unused int cmd = oidp->oid_arg2; /* subcommand*/ - int *name = arg1; /* oid element argument vector */ - int namelen = arg2; /* number of oid element arguments */ - user_addr_t oldp = req->oldptr; /* user buffer copy out address */ - size_t *oldlenp = &req->oldlen; /* user buffer copy out size */ - user_addr_t newp = req->newptr; /* user buffer copy in address */ -// size_t newlen = req->newlen; /* user buffer copy in size */ - - int error = ENOTSUP; /* Default to failure */ - - proc_t cur_proc = current_proc(); - - if (namelen < 1) - return (ENOTSUP); - - if (name[0] == 0 && 1 == namelen) { - error = sysctl_rdint(oldp, oldlenp, newp, - (cur_proc->p_flag & P_AFFINITY) ? 1 : 0); - } else if (name[0] == 1 && 2 == namelen) { - if (name[1] == 0) { - OSBitAndAtomic(~((uint32_t)P_AFFINITY), &cur_proc->p_flag); - } else { - OSBitOrAtomic(P_AFFINITY, &cur_proc->p_flag); - } - error = 0; - } - - /* adjust index so we return the right required/consumed amount */ - if (!error) - req->oldidx += req->oldlen; - - return (error); -} -SYSCTL_PROC(_kern, KERN_AFFINITY, affinity, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, - 0, /* Pointer argument (arg1) */ - 0, /* Integer argument (arg2) */ - sysctl_doaffinity, /* Handler function */ - NULL, /* Data pointer */ - ""); - -STATIC int -sysctl_dotranslate SYSCTL_HANDLER_ARGS -{ - __unused int cmd = oidp->oid_arg2; /* subcommand*/ - int *name = arg1; /* oid element argument vector */ - int namelen = arg2; /* number of oid element arguments */ - user_addr_t oldp = req->oldptr; /* user buffer copy out address */ - size_t *oldlenp = &req->oldlen; /* user buffer copy out size */ - user_addr_t newp = req->newptr; /* user buffer copy in address */ -// size_t newlen = req->newlen; /* user buffer copy in size */ - int error; - - proc_t cur_proc = current_proc(); - proc_t p; - int istranslated = 0; - kauth_cred_t my_cred; - uid_t uid; - - if (namelen != 1) - return (ENOTSUP); - - p = proc_find(name[0]); - if (p == NULL) - return (EINVAL); - - my_cred = kauth_cred_proc_ref(p); - uid = kauth_cred_getuid(my_cred); - kauth_cred_unref(&my_cred); - if ((uid != kauth_cred_getuid(kauth_cred_get())) - && suser(kauth_cred_get(), &cur_proc->p_acflag)) { - proc_rele(p); - return (EPERM); - } - - istranslated = (p->p_flag & P_TRANSLATED); - proc_rele(p); - error = sysctl_rdint(oldp, oldlenp, newp, - (istranslated != 0) ? 1 : 0); - - /* adjust index so we return the right required/consumed amount */ - if (!error) - req->oldidx += req->oldlen; - - return (error); -} -/* - * XXX make CTLFLAG_RW so sysctl_rdint() will EPERM on attempts to write; - * XXX this may not be necessary. - */ -SYSCTL_PROC(_kern, KERN_TRANSLATE, translate, CTLTYPE_NODE|CTLFLAG_RW | CTLFLAG_LOCKED, - 0, /* Pointer argument (arg1) */ - 0, /* Integer argument (arg2) */ - sysctl_dotranslate, /* Handler function */ - NULL, /* Data pointer */ - ""); - STATIC int sysctl_handle_kern_threadname( __unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) @@ -867,82 +524,6 @@ SYSCTL_PROC(_kern, KERN_COUNT_SYSCALLS, count_syscalls, CTLTYPE_NODE|CTLFLAG_RD ""); #endif /* COUNT_SYSCALLS */ -#if DEBUG -/* - * Debugging related system variables. - */ -#if DIAGNOSTIC -extern -#endif /* DIAGNOSTIC */ -struct ctldebug debug0, debug1; -struct ctldebug debug2, debug3, debug4; -struct ctldebug debug5, debug6, debug7, debug8, debug9; -struct ctldebug debug10, debug11, debug12, debug13, debug14; -struct ctldebug debug15, debug16, debug17, debug18, debug19; -STATIC struct ctldebug *debugvars[CTL_DEBUG_MAXID] = { - &debug0, &debug1, &debug2, &debug3, &debug4, - &debug5, &debug6, &debug7, &debug8, &debug9, - &debug10, &debug11, &debug12, &debug13, &debug14, - &debug15, &debug16, &debug17, &debug18, &debug19, -}; -STATIC int -sysctl_dodebug SYSCTL_HANDLER_ARGS -{ - int cmd = oidp->oid_arg2; /* subcommand*/ - int *name = arg1; /* oid element argument vector */ - int namelen = arg2; /* number of oid element arguments */ - user_addr_t oldp = req->oldptr; /* user buffer copy out address */ - size_t *oldlenp = &req->oldlen; /* user buffer copy out size */ - user_addr_t newp = req->newptr; /* user buffer copy in address */ - size_t newlen = req->newlen; /* user buffer copy in size */ - int error; - - struct ctldebug *cdp; - - /* all sysctl names at this level are name and field */ - if (namelen != 1) - return (ENOTSUP); /* overloaded */ - if (cmd < 0 || cmd >= CTL_DEBUG_MAXID) - return (ENOTSUP); - cdp = debugvars[cmd]; - if (cdp->debugname == 0) - return (ENOTSUP); - switch (name[0]) { - case CTL_DEBUG_NAME: - error = sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname); - break; - case CTL_DEBUG_VALUE: - error = sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar); - break; - default: - error = ENOTSUP; - break; - } - - /* adjust index so we return the right required/consumed amount */ - if (!error) - req->oldidx += req->oldlen; - - return (error); -} -/* - * XXX We mark this RW instead of RD to let sysctl_rdstring() return the - * XXX historical error. - */ -SYSCTL_PROC(_debug, CTL_DEBUG_NAME, name, CTLTYPE_NODE|CTLFLAG_RW | CTLFLAG_LOCKED, - 0, /* Pointer argument (arg1) */ - CTL_DEBUG_NAME, /* Integer argument (arg2) */ - sysctl_dodebug, /* Handler function */ - NULL, /* Data pointer */ - "Debugging"); -SYSCTL_PROC(_debug, CTL_DEBUG_VALUE, value, CTLTYPE_NODE|CTLFLAG_RW | CTLFLAG_LOCKED, - 0, /* Pointer argument (arg1) */ - CTL_DEBUG_VALUE, /* Integer argument (arg2) */ - sysctl_dodebug, /* Handler function */ - NULL, /* Data pointer */ - "Debugging"); -#endif /* DEBUG */ - /* * The following sysctl_* functions should not be used * any more, as they can only cope with callers in @@ -979,26 +560,6 @@ sysctl_int(user_addr_t oldp, size_t *oldlenp, return (error); } -/* - * As above, but read-only. - */ -int -sysctl_rdint(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, int val) -{ - int error = 0; - - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && *oldlenp < sizeof(int)) - return (ENOMEM); - if (newp) - return (EPERM); - *oldlenp = sizeof(int); - if (oldp) - error = copyout((caddr_t)&val, oldp, sizeof(int)); - return (error); -} - /* * Validate parameters and get old / set new parameters * for an quad(64bit)-valued sysctl function. @@ -1023,167 +584,6 @@ sysctl_quad(user_addr_t oldp, size_t *oldlenp, return (error); } -/* - * As above, but read-only. - */ -int -sysctl_rdquad(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, quad_t val) -{ - int error = 0; - - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && *oldlenp < sizeof(quad_t)) - return (ENOMEM); - if (newp) - return (EPERM); - *oldlenp = sizeof(quad_t); - if (oldp) - error = copyout((caddr_t)&val, oldp, sizeof(quad_t)); - return (error); -} - -/* - * Validate parameters and get old / set new parameters - * for a string-valued sysctl function. Unlike sysctl_string, if you - * give it a too small (but larger than 0 bytes) buffer, instead of - * returning ENOMEM, it truncates the returned string to the buffer - * size. This preserves the semantics of some library routines - * implemented via sysctl, which truncate their returned data, rather - * than simply returning an error. The returned string is always NUL - * terminated. - */ -int -sysctl_trstring(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, char *str, int maxlen) -{ - int len, copylen, error = 0; - - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - copylen = len = strlen(str) + 1; - if (oldp && (len < 0 || *oldlenp < 1)) - return (ENOMEM); - if (oldp && (*oldlenp < (size_t)len)) - copylen = *oldlenp + 1; - if (newp && (maxlen < 0 || newlen >= (size_t)maxlen)) - return (EINVAL); - *oldlenp = copylen - 1; /* deal with NULL strings correctly */ - if (oldp) { - error = copyout(str, oldp, copylen); - if (!error) { - unsigned char c = 0; - /* NUL terminate */ - oldp += *oldlenp; - error = copyout((void *)&c, oldp, sizeof(char)); - } - } - if (error == 0 && newp) { - error = copyin(newp, str, newlen); - str[newlen] = 0; - AUDIT_ARG(text, (char *)str); - } - return (error); -} - -/* - * Validate parameters and get old / set new parameters - * for a string-valued sysctl function. - */ -int -sysctl_string(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, char *str, int maxlen) -{ - int len, error = 0; - - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - len = strlen(str) + 1; - if (oldp && (len < 0 || *oldlenp < (size_t)len)) - return (ENOMEM); - if (newp && (maxlen < 0 || newlen >= (size_t)maxlen)) - return (EINVAL); - *oldlenp = len -1; /* deal with NULL strings correctly */ - if (oldp) { - error = copyout(str, oldp, len); - } - if (error == 0 && newp) { - error = copyin(newp, str, newlen); - str[newlen] = 0; - AUDIT_ARG(text, (char *)str); - } - return (error); -} - -/* - * As above, but read-only. - */ -int -sysctl_rdstring(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, char *str) -{ - int len, error = 0; - - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - len = strlen(str) + 1; - if (oldp && *oldlenp < (size_t)len) - return (ENOMEM); - if (newp) - return (EPERM); - *oldlenp = len; - if (oldp) - error = copyout(str, oldp, len); - return (error); -} - -/* - * Validate parameters and get old / set new parameters - * for a structure oriented sysctl function. - */ -int -sysctl_struct(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, void *sp, int len) -{ - int error = 0; - - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && (len < 0 || *oldlenp < (size_t)len)) - return (ENOMEM); - if (newp && (len < 0 || newlen > (size_t)len)) - return (EINVAL); - if (oldp) { - *oldlenp = len; - error = copyout(sp, oldp, len); - } - if (error == 0 && newp) - error = copyin(newp, sp, len); - return (error); -} - -/* - * Validate parameters and get old parameters - * for a structure oriented sysctl function. - */ -int -sysctl_rdstruct(user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, void *sp, int len) -{ - int error = 0; - - if (oldp != USER_ADDR_NULL && oldlenp == NULL) - return (EFAULT); - if (oldp && (len < 0 || *oldlenp < (size_t)len)) - return (ENOMEM); - if (newp) - return (EPERM); - *oldlenp = len; - if (oldp) - error = copyout(sp, oldp, len); - return (error); -} - STATIC int sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg) { @@ -1205,12 +605,9 @@ sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg) STATIC int sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg) { - boolean_t funnel_state; int retval; struct tty *tp; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); /* This is very racy but list lock is held.. Hmmm. */ if ((p->p_flag & P_CONTROLT) == 0 || (p->p_pgrp == NULL) || (p->p_pgrp->pg_session == NULL) || @@ -1220,8 +617,6 @@ sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg) else retval = 1; - thread_funnel_set(kernel_flock, funnel_state); - return(retval); } @@ -2368,6 +1763,13 @@ SYSCTL_PROC(_kern, KERN_AIOTHREADS, aiothreads, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_aiothreads, "I", ""); +#if (DEVELOPMENT || DEBUG) +extern int sched_smt_balance; +SYSCTL_INT(_kern, OID_AUTO, sched_smt_balance, + CTLFLAG_KERN| CTLFLAG_RW| CTLFLAG_LOCKED, + &sched_smt_balance, 0, ""); +#endif + STATIC int sysctl_securelvl (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) @@ -2449,13 +1851,13 @@ SYSCTL_INT(_kern, OID_AUTO, ignore_is_ssd, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, &ignore_is_ssd, 0, ""); -SYSCTL_UINT(_kern, OID_AUTO, preheat_pages_max, +SYSCTL_UINT(_kern, OID_AUTO, preheat_max_bytes, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, - &preheat_pages_max, 0, ""); + &preheat_max_bytes, 0, ""); -SYSCTL_UINT(_kern, OID_AUTO, preheat_pages_min, +SYSCTL_UINT(_kern, OID_AUTO, preheat_min_bytes, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, - &preheat_pages_min, 0, ""); + &preheat_min_bytes, 0, ""); SYSCTL_UINT(_kern, OID_AUTO, speculative_prefetch_max, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, @@ -2518,7 +1920,7 @@ sysctl_boottime } SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, - CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, + CTLTYPE_STRUCT | CTLFLAG_KERN | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_boottime, "S,timeval", ""); STATIC int @@ -2660,6 +2062,7 @@ SYSCTL_PROC(_kern, OID_AUTO, imgsrcinfo, SYSCTL_DECL(_kern_timer); SYSCTL_NODE(_kern, OID_AUTO, timer, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "timer"); + SYSCTL_INT(_kern_timer, OID_AUTO, coalescing_enabled, CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, &mach_timer_coalescing_enabled, 0, ""); @@ -2674,6 +2077,7 @@ SYSCTL_QUAD(_kern_timer, OID_AUTO, deadline_tracking_bin_2, SYSCTL_DECL(_kern_timer_longterm); SYSCTL_NODE(_kern_timer, OID_AUTO, longterm, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "longterm"); + /* Must match definition in osfmk/kern/timer_call.c */ enum { THRESHOLD, QCOUNT, @@ -2764,8 +2168,9 @@ sysctl_coredump (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { #ifdef SECURE_KERNEL + (void)req; return (ENOTSUP); -#endif +#else int new_value, changed; int error = sysctl_io_number(req, do_coredump, sizeof(int), &new_value, &changed); if (changed) { @@ -2775,6 +2180,7 @@ sysctl_coredump error = EINVAL; } return(error); +#endif } SYSCTL_PROC(_kern, KERN_COREDUMP, coredump, @@ -2786,8 +2192,9 @@ sysctl_suid_coredump (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { #ifdef SECURE_KERNEL + (void)req; return (ENOTSUP); -#endif +#else int new_value, changed; int error = sysctl_io_number(req, sugid_coredump, sizeof(int), &new_value, &changed); if (changed) { @@ -2797,6 +2204,7 @@ sysctl_suid_coredump error = EINVAL; } return(error); +#endif } SYSCTL_PROC(_kern, KERN_SUGID_COREDUMP, sugid_coredump, @@ -2956,8 +2364,9 @@ sysctl_nx (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { #ifdef SECURE_KERNEL + (void)req; return ENOTSUP; -#endif +#else int new_value, changed; int error; @@ -2976,6 +2385,7 @@ sysctl_nx nx_enabled = new_value; } return(error); +#endif /* SECURE_KERNEL */ } @@ -3071,7 +2481,7 @@ sysctl_freeze_enabled SYSCTL_HANDLER_ARGS if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { //assert(req->newptr); - printf("Failed this request to set the sysctl\n"); + printf("Failed attempt to set vm.freeze_enabled sysctl\n"); return EINVAL; } @@ -3097,38 +2507,6 @@ SYSCTL_INT(_kern, KERN_SHREG_PRIVATIZABLE, shreg_private, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)NULL, 0, ""); -#if defined(__i386__) || defined(__x86_64__) -STATIC int -sysctl_sysctl_exec_affinity(__unused struct sysctl_oid *oidp, - __unused void *arg1, __unused int arg2, - struct sysctl_req *req) -{ - proc_t cur_proc = req->p; - int error; - - if (req->oldptr != USER_ADDR_NULL) { - cpu_type_t oldcputype = (cur_proc->p_flag & P_AFFINITY) ? CPU_TYPE_POWERPC : CPU_TYPE_I386; - if ((error = SYSCTL_OUT(req, &oldcputype, sizeof(oldcputype)))) - return error; - } - - if (req->newptr != USER_ADDR_NULL) { - cpu_type_t newcputype; - if ((error = SYSCTL_IN(req, &newcputype, sizeof(newcputype)))) - return error; - if (newcputype == CPU_TYPE_I386) - OSBitAndAtomic(~((uint32_t)P_AFFINITY), &cur_proc->p_flag); - else if (newcputype == CPU_TYPE_POWERPC) - OSBitOrAtomic(P_AFFINITY, &cur_proc->p_flag); - else - return (EINVAL); - } - - return 0; -} -SYSCTL_PROC(_sysctl, OID_AUTO, proc_exec_affinity, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY | CTLFLAG_LOCKED, 0, 0, sysctl_sysctl_exec_affinity ,"I","proc_exec_affinity"); -#endif - STATIC int fetch_process_cputype( proc_t cur_proc, @@ -3153,17 +2531,10 @@ fetch_process_cputype( goto out; } -#if defined(__i386__) || defined(__x86_64__) - if (p->p_flag & P_TRANSLATED) { - ret = CPU_TYPE_POWERPC; - } - else -#endif - { - ret = cpu_type() & ~CPU_ARCH_MASK; - if (IS_64BIT_PROCESS(p)) - ret |= CPU_ARCH_ABI64; - } + ret = cpu_type() & ~CPU_ARCH_MASK; + if (IS_64BIT_PROCESS(p)) + ret |= CPU_ARCH_ABI64; + *cputype = ret; if (refheld != 0) @@ -3315,14 +2686,26 @@ SYSCTL_INT(_vm, OID_AUTO, compressor_majorcompact_threshold_divisor, CTLFLAG_RW SYSCTL_INT(_vm, OID_AUTO, compressor_unthrottle_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_unthrottle_threshold_divisor, 0, ""); SYSCTL_INT(_vm, OID_AUTO, compressor_catchup_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_catchup_threshold_divisor, 0, ""); +SYSCTL_STRING(_vm, OID_AUTO, swapfileprefix, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, swapfilename, sizeof(swapfilename) - SWAPFILENAME_INDEX_LEN, ""); + +#if CONFIG_PHANTOM_CACHE +extern uint32_t phantom_cache_thrashing_threshold; +extern uint32_t phantom_cache_eval_period_in_msecs; +extern uint32_t phantom_cache_thrashing_threshold_ssd; + + +SYSCTL_INT(_vm, OID_AUTO, phantom_cache_eval_period_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &phantom_cache_eval_period_in_msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, phantom_cache_thrashing_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &phantom_cache_thrashing_threshold, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, phantom_cache_thrashing_threshold_ssd, CTLFLAG_RW | CTLFLAG_LOCKED, &phantom_cache_thrashing_threshold_ssd, 0, ""); +#endif + /* - * enable back trace events for thread blocks + * Enable tracing of voucher contents */ +extern uint32_t ipc_voucher_trace_contents; -extern uint32_t kdebug_thread_block; - -SYSCTL_INT (_kern, OID_AUTO, kdebug_thread_block, - CTLFLAG_RW | CTLFLAG_LOCKED, &kdebug_thread_block, 0, "kdebug thread_block"); +SYSCTL_INT (_kern, OID_AUTO, ipc_voucher_trace_contents, + CTLFLAG_RW | CTLFLAG_LOCKED, &ipc_voucher_trace_contents, 0, "Enable tracing voucher contents"); /* * Kernel stack size and depth @@ -3361,3 +2744,184 @@ SYSCTL_STRING(_kern, OID_AUTO, sched, * Only support runtime modification on embedded platforms * with development config enabled */ + + +/* Parameters related to timer coalescing tuning, to be replaced + * with a dedicated systemcall in the future. + */ +/* Enable processing pending timers in the context of any other interrupt + * Coalescing tuning parameters for various thread/task attributes */ +STATIC int +sysctl_timer_user_us_kernel_abstime SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp) + int size = arg2; /* subcommand*/ + int error; + int changed = 0; + uint64_t old_value_ns; + uint64_t new_value_ns; + uint64_t value_abstime; + if (size == sizeof(uint32_t)) + value_abstime = *((uint32_t *)arg1); + else if (size == sizeof(uint64_t)) + value_abstime = *((uint64_t *)arg1); + else return ENOTSUP; + + absolutetime_to_nanoseconds(value_abstime, &old_value_ns); + error = sysctl_io_number(req, old_value_ns, sizeof(old_value_ns), &new_value_ns, &changed); + if ((error) || (!changed)) + return error; + + nanoseconds_to_absolutetime(new_value_ns, &value_abstime); + if (size == sizeof(uint32_t)) + *((uint32_t *)arg1) = (uint32_t)value_abstime; + else + *((uint64_t *)arg1) = value_abstime; + return error; +} + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_bg_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_bg_shift, 0, ""); +SYSCTL_PROC(_kern, OID_AUTO, timer_resort_threshold_ns, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_resort_threshold_abstime, + sizeof(tcoal_prio_params.timer_resort_threshold_abstime), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_bg_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_bg_abstime_max, + sizeof(tcoal_prio_params.timer_coalesce_bg_abstime_max), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_kt_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_kt_shift, 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_kt_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_kt_abstime_max, + sizeof(tcoal_prio_params.timer_coalesce_kt_abstime_max), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_fp_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_fp_shift, 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_fp_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_fp_abstime_max, + sizeof(tcoal_prio_params.timer_coalesce_fp_abstime_max), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_ts_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_ts_shift, 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_ts_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_ts_abstime_max, + sizeof(tcoal_prio_params.timer_coalesce_ts_abstime_max), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier0_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[0], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier0_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[0], + sizeof(tcoal_prio_params.latency_qos_abstime_max[0]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier1_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[1], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier1_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[1], + sizeof(tcoal_prio_params.latency_qos_abstime_max[1]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier2_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[2], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier2_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[2], + sizeof(tcoal_prio_params.latency_qos_abstime_max[2]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier3_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[3], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier3_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[3], + sizeof(tcoal_prio_params.latency_qos_abstime_max[3]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier4_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[4], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier4_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[4], + sizeof(tcoal_prio_params.latency_qos_abstime_max[4]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier5_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[5], 0, ""); + +SYSCTL_PROC(_kern, OID_AUTO, timer_coalesce_tier5_ns_max, + CTLTYPE_QUAD | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_abstime_max[5], + sizeof(tcoal_prio_params.latency_qos_abstime_max[5]), + sysctl_timer_user_us_kernel_abstime, + "Q", ""); + +/* Communicate the "user idle level" heuristic to the timer layer, and + * potentially other layers in the future. + */ + +static int +timer_user_idle_level(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { + int new_value = 0, old_value = 0, changed = 0, error; + + old_value = timer_get_user_idle_level(); + + error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed); + + if (error == 0 && changed) { + if (timer_set_user_idle_level(new_value) != KERN_SUCCESS) + error = ERANGE; + } + + return error; +} + +SYSCTL_PROC(_machdep, OID_AUTO, user_idle_level, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, + timer_user_idle_level, "I", "User idle level heuristic, 0-128"); + +#if HYPERVISOR +SYSCTL_INT(_kern, OID_AUTO, hv_support, + CTLFLAG_KERN | CTLFLAG_RD | CTLFLAG_LOCKED, + &hv_support_available, 0, ""); +#endif diff --git a/bsd/kern/kern_xxx.c b/bsd/kern/kern_xxx.c index 293808838..fc94a14e0 100644 --- a/bsd/kern/kern_xxx.c +++ b/bsd/kern/kern_xxx.c @@ -67,8 +67,6 @@ * Version 2.0. */ -#include - #include #include #include diff --git a/bsd/kern/kpi_mbuf.c b/bsd/kern/kpi_mbuf.c index d3cfc8deb..faaf98c29 100644 --- a/bsd/kern/kpi_mbuf.c +++ b/bsd/kern/kpi_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -492,12 +492,22 @@ errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags) { errno_t ret = 0; + mbuf_flags_t oflags = mbuf->m_flags; - if ((flags | (mbuf->m_flags & mbuf_flags_mask)) & - (~mbuf_flags_mask | mbuf_cflags_mask)) { + /* + * 1. Return error if public but un-alterable flags are changed + * in flags argument. + * 2. Return error if bits other than public flags are set in passed + * flags argument. + * Please note that private flag bits must be passed as reset by kexts, + * as they must use mbuf_flags KPI to get current set of mbuf flags + * and mbuf_flags KPI does not expose private flags. + */ + if ((flags ^ oflags) & mbuf_cflags_mask) { + ret = EINVAL; + } else if (flags & ~mbuf_flags_mask) { ret = EINVAL; } else { - mbuf_flags_t oflags = mbuf->m_flags; mbuf->m_flags = flags | (mbuf->m_flags & ~mbuf_flags_mask); /* * If M_PKTHDR bit has changed, we have work to do; @@ -519,7 +529,7 @@ mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags, mbuf_flags_t mask) { errno_t ret = 0; - if ((flags | mask) & (~mbuf_flags_mask | mbuf_cflags_mask)) { + if (mask & (~mbuf_flags_mask | mbuf_cflags_mask)) { ret = EINVAL; } else { mbuf_flags_t oflags = mbuf->m_flags; @@ -554,6 +564,18 @@ size_t mbuf_pkthdr_len(const mbuf_t mbuf) return mbuf->m_pkthdr.len; } +__private_extern__ size_t mbuf_pkthdr_maxlen(mbuf_t m) +{ + size_t maxlen = 0; + mbuf_t n = m; + + while (n) { + maxlen += mbuf_maxlen(n); + n = mbuf_next(n); + } + return (maxlen); +} + void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len) { mbuf->m_pkthdr.len = len; @@ -1111,7 +1133,31 @@ out: return error; } +__private_extern__ size_t +mbuf_pkt_list_len(mbuf_t m) +{ + size_t len = 0; + mbuf_t n = m; + + while (n) { + len += mbuf_pkthdr_len(n); + n = mbuf_nextpkt(n); + } + return (len); +} +__private_extern__ size_t +mbuf_pkt_list_maxlen(mbuf_t m) +{ + size_t maxlen = 0; + mbuf_t n = m; + + while (n) { + maxlen += mbuf_pkthdr_maxlen(n); + n = mbuf_nextpkt(n); + } + return (maxlen); +} /* * mbuf_copyback differs from m_copyback in a few ways: diff --git a/bsd/kern/kpi_socket.c b/bsd/kern/kpi_socket.c index 1db81b353..07c65bf39 100644 --- a/bsd/kern/kpi_socket.c +++ b/bsd/kern/kpi_socket.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include @@ -1183,7 +1183,7 @@ sock_catchevents(socket_t sock, sock_evupcall ecallback, void *econtext, sock->so_eventarg = econtext; sock->so_eventmask = emask; } else { - sock->so_event = NULL; + sock->so_event = sonullevent; sock->so_eventarg = NULL; sock->so_eventmask = 0; } @@ -1191,3 +1191,12 @@ sock_catchevents(socket_t sock, sock_evupcall ecallback, void *econtext, return (0); } + +/* + * Returns true whether or not a socket belongs to the kernel. + */ +int +sock_iskernel(socket_t so) +{ + return (so && so->last_pid == 0); +} diff --git a/bsd/kern/kpi_socketfilter.c b/bsd/kern/kpi_socketfilter.c index 1b55a8053..ac9dfcb1c 100644 --- a/bsd/kern/kpi_socketfilter.c +++ b/bsd/kern/kpi_socketfilter.c @@ -1375,8 +1375,8 @@ sflt_register(const struct sflt_filter *filter, int domain, int type, sflt_handle handle = filter->sf_handle; so = solisthead->so; + socket_lock(so, 0); sflt_initsock(so); - if (so->so_state & SS_ISCONNECTING) sflt_notify_after_register(so, sock_evt_connecting, handle); @@ -1399,6 +1399,7 @@ sflt_register(const struct sflt_filter *filter, int domain, int type, else if (so->so_state & SS_CANTRCVMORE) sflt_notify_after_register(so, sock_evt_cantrecvmore, handle); + socket_unlock(so, 0); /* XXX no easy way to post the sock_evt_closing event */ sock_release(so); solist = solisthead; diff --git a/bsd/kern/mach_loader.c b/bsd/kern/mach_loader.c index 711729eeb..c590c52d6 100644 --- a/bsd/kern/mach_loader.c +++ b/bsd/kern/mach_loader.c @@ -305,6 +305,7 @@ load_machfile( load_result_t myresult; load_return_t lret; boolean_t create_map = FALSE; + boolean_t enforce_hard_pagezero = TRUE; int spawn = (imgp->ip_flags & IMGPF_SPAWN); task_t task = current_task(); proc_t p = current_proc(); @@ -333,8 +334,15 @@ load_machfile( } if (create_map) { - pmap = pmap_create(get_task_ledger(task), (vm_map_size_t) 0, - (imgp->ip_flags & IMGPF_IS_64BIT)); + task_t ledger_task; + if (imgp->ip_new_thread) { + ledger_task = get_threadtask(imgp->ip_new_thread); + } else { + ledger_task = task; + } + pmap = pmap_create(get_task_ledger(ledger_task), + (vm_map_size_t) 0, + (imgp->ip_flags & IMGPF_IS_64BIT)); pal_switch_pmap(thread, pmap, imgp->ip_flags & IMGPF_IS_64BIT); map = vm_map_create(pmap, 0, @@ -343,6 +351,7 @@ load_machfile( } else map = new_map; + #ifndef CONFIG_ENFORCE_SIGNED_CODE /* This turns off faulting for executable pages, which allows * to circumvent Code Signing Enforcement. The per process @@ -390,16 +399,25 @@ load_machfile( return(lret); } +#if __x86_64__ /* - * For 64-bit users, check for presence of a 4GB page zero - * which will enable the kernel to share the user's address space - * and hence avoid TLB flushes on kernel entry/exit + * On x86, for compatibility, don't enforce the hard page-zero restriction for 32-bit binaries. + */ + if ((imgp->ip_flags & IMGPF_IS_64BIT) == 0) { + enforce_hard_pagezero = FALSE; + } +#endif + /* + * Check to see if the page zero is enforced by the map->min_offset. */ - - if ((imgp->ip_flags & IMGPF_IS_64BIT) && - vm_map_has_4GB_pagezero(map)) { - vm_map_set_4GB_pagezero(map); + if (enforce_hard_pagezero && (vm_map_has_hard_pagezero(map, 0x1000) == FALSE)) { + if (create_map) { + vm_map_deallocate(map); /* will lose pmap reference too */ + } + printf("Cannot enforce a hard page-zero for %s\n", imgp->ip_strings); + return (LOAD_BADMACHO); } + /* * Commit to new map. * @@ -437,7 +455,8 @@ load_machfile( */ kret = task_start_halt(task); if (kret != KERN_SUCCESS) { - return(kret); + vm_map_deallocate(map); /* will lose pmap reference too */ + return (LOAD_FAILURE); } proc_transcommit(p, 0); workqueue_mark_exiting(p); @@ -445,7 +464,6 @@ load_machfile( workqueue_exit(p); } old_map = swap_task_map(old_task, thread, map, !spawn); - vm_map_clear_4GB_pagezero(old_map); vm_map_deallocate(old_map); } return(LOAD_SUCCESS); @@ -745,7 +763,7 @@ parse_machfile( file_offset, macho_size, header->cputype, - (depth == 1) ? result : NULL); + result); if (ret != LOAD_SUCCESS) { printf("proc %d: load code signature error %d " "for file \"%s\"\n", @@ -795,14 +813,23 @@ parse_machfile( if (ret != LOAD_SUCCESS) break; } + if (ret == LOAD_SUCCESS) { if (! got_code_signatures) { struct cs_blob *blob; /* no embedded signatures: look for detached ones */ blob = ubc_cs_blob_get(vp, -1, file_offset); if (blob != NULL) { - /* get flags to be applied to the process */ - result->csflags |= blob->csb_flags; + unsigned int cs_flag_data = blob->csb_flags; + if(0 != ubc_cs_generation_check(vp)) { + if (0 != ubc_cs_blob_revalidate(vp, blob)) { + /* clear out the flag data if revalidation fails */ + cs_flag_data = 0; + result->csflags &= ~CS_VALID; + } + } + /* get flags to be applied to the process */ + result->csflags |= cs_flag_data; } } @@ -812,14 +839,13 @@ parse_machfile( } if ((ret == LOAD_SUCCESS) && (dlp != 0)) { - /* - * load the dylinker, and slide it by the independent DYLD ASLR - * offset regardless of the PIE-ness of the main binary. - */ - - ret = load_dylinker(dlp, dlarchbits, map, thread, depth, - dyld_aslr_offset, result); - } + /* + * load the dylinker, and slide it by the independent DYLD ASLR + * offset regardless of the PIE-ness of the main binary. + */ + ret = load_dylinker(dlp, dlarchbits, map, thread, depth, + dyld_aslr_offset, result); + } if((ret == LOAD_SUCCESS) && (depth == 1)) { if (result->thread_count == 0) { @@ -839,7 +865,7 @@ parse_machfile( #define APPLE_UNPROTECTED_HEADER_SIZE (3 * PAGE_SIZE_64) static load_return_t -unprotect_segment( +unprotect_dsmos_segment( uint64_t file_off, uint64_t file_size, struct vnode *vp, @@ -892,7 +918,7 @@ unprotect_segment( } #else /* CONFIG_CODE_DECRYPTION */ static load_return_t -unprotect_segment( +unprotect_dsmos_segment( __unused uint64_t file_off, __unused uint64_t file_size, __unused struct vnode *vp, @@ -927,7 +953,6 @@ load_segment( vm_prot_t maxprot; size_t segment_command_size, total_section_size, single_section_size; - boolean_t prohibit_pagezero_mapping = FALSE; if (LC_SEGMENT_64 == lcp->cmd) { segment_command_size = sizeof(struct segment_command_64); @@ -991,25 +1016,19 @@ load_segment( */ seg_size += slide; slide = 0; - /* XXX (4596982) this interferes with Rosetta, so limit to 64-bit tasks */ - if (scp->cmd == LC_SEGMENT_64) { - prohibit_pagezero_mapping = TRUE; - } - - if (prohibit_pagezero_mapping) { - /* - * This is a "page zero" segment: it starts at address 0, - * is not mapped from the binary file and is not accessible. - * User-space should never be able to access that memory, so - * make it completely off limits by raising the VM map's - * minimum offset. - */ - ret = vm_map_raise_min_offset(map, seg_size); - if (ret != KERN_SUCCESS) { - return (LOAD_FAILURE); - } - return (LOAD_SUCCESS); + + /* + * This is a "page zero" segment: it starts at address 0, + * is not mapped from the binary file and is not accessible. + * User-space should never be able to access that memory, so + * make it completely off limits by raising the VM map's + * minimum offset. + */ + ret = vm_map_raise_min_offset(map, seg_size); + if (ret != KERN_SUCCESS) { + return (LOAD_FAILURE); } + return (LOAD_SUCCESS); } /* If a non-zero slide was specified by the caller, apply now */ @@ -1086,7 +1105,7 @@ load_segment( result->mach_header = map_addr; if (scp->flags & SG_PROTECTED_VERSION_1) { - ret = unprotect_segment(scp->fileoff, + ret = unprotect_dsmos_segment(scp->fileoff, scp->filesize, vp, pager_offset, @@ -1527,6 +1546,9 @@ load_dylinker( result->validentry = myresult->validentry; result->all_image_info_addr = myresult->all_image_info_addr; result->all_image_info_size = myresult->all_image_info_size; + if (myresult->platform_binary) { + result->csflags |= CS_DYLD_PLATFORM; + } } out: vnode_put(vp); @@ -1563,20 +1585,23 @@ load_code_signature( } blob = ubc_cs_blob_get(vp, cputype, -1); - if (blob != NULL && - blob->csb_cpu_type == cputype && - blob->csb_base_offset == macho_offset && - blob->csb_blob_offset == lcp->dataoff && - blob->csb_mem_size == lcp->datasize) { - /* - * we already have a blob for this vnode and cputype - * and its at the same offset in Mach-O. Optimize to - * not reload, revalidate, and compare the blob hashes. - * Security will not be compromised, but we might miss - * out on some messagetracer info about the differences - * in blob content. - */ - ret = LOAD_SUCCESS; + if (blob != NULL) { + /* we already have a blob for this vnode and cputype */ + if (blob->csb_cpu_type == cputype && + blob->csb_base_offset == macho_offset && + blob->csb_mem_size == lcp->datasize) { + /* it matches the blob we want here, lets verify the version */ + if(0 != ubc_cs_generation_check(vp)) { + if (0 != ubc_cs_blob_revalidate(vp, blob)) { + ret = LOAD_FAILURE; /* set error same as from ubc_cs_blob_add */ + goto out; + } + } + ret = LOAD_SUCCESS; + } else { + /* the blob has changed for this vnode: fail ! */ + ret = LOAD_BADMACHO; + } goto out; } @@ -1607,7 +1632,6 @@ load_code_signature( cputype, macho_offset, addr, - lcp->dataoff, lcp->datasize)) { ret = LOAD_FAILURE; goto out; @@ -1624,8 +1648,9 @@ load_code_signature( ret = LOAD_SUCCESS; out: - if (result && ret == LOAD_SUCCESS) { + if (ret == LOAD_SUCCESS) { result->csflags |= blob->csb_flags; + result->platform_binary = blob->csb_platform_binary; } if (addr != 0) { ubc_cs_blob_deallocate(addr, blob_size); diff --git a/bsd/kern/mach_loader.h b/bsd/kern/mach_loader.h index 295199d43..d1c83d1f9 100644 --- a/bsd/kern/mach_loader.h +++ b/bsd/kern/mach_loader.h @@ -69,6 +69,7 @@ typedef struct _load_result { unsigned char uuid[16]; mach_vm_address_t min_vm_addr; mach_vm_address_t max_vm_addr; + unsigned int platform_binary; } load_result_t; struct image_params; @@ -89,6 +90,6 @@ load_return_t load_machfile( #define LOAD_RESOURCE 7 /* resource allocation failure */ #define LOAD_ENOENT 8 /* resource not found */ #define LOAD_IOERROR 9 /* IO error */ -#define LOAD_DECRYPTFAIL 10 /* FP decrpty failure */ +#define LOAD_DECRYPTFAIL 10 /* FP decrypt failure */ #endif /* _BSD_KERN_MACH_LOADER_H_ */ diff --git a/bsd/kern/mach_process.c b/bsd/kern/mach_process.c index b0cda592a..ef8ebffcd 100644 --- a/bsd/kern/mach_process.c +++ b/bsd/kern/mach_process.c @@ -134,7 +134,7 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE, p->p_pid, W_EXITCODE(ENOTSUP, 0), 4, 0, 0); exit1(p, W_EXITCODE(ENOTSUP, 0), retval); - /* drop funnel before we return */ + thread_exception_return(); /* NOTREACHED */ } @@ -474,5 +474,13 @@ cantrace(proc_t cur_procp, kauth_cred_t creds, proc_t traced_procp, int *errp) *errp = EBUSY; return (0); } + +#if CONFIG_MACF + if ((my_err = mac_proc_check_debug(cur_procp, traced_procp)) != 0) { + *errp = my_err; + return (0); + } +#endif + return(1); } diff --git a/bsd/kern/makesyscalls.sh b/bsd/kern/makesyscalls.sh index 405c027d4..13d56f58a 100755 --- a/bsd/kern/makesyscalls.sh +++ b/bsd/kern/makesyscalls.sh @@ -195,7 +195,7 @@ s/\$//g printf " * argument structures with elements large enough for any of them.\n" > sysarg printf "*/\n" > sysarg printf "\n" > sysarg - printf "#ifndef __arm__\n" > sysarg + printf "#if CONFIG_REQUIRES_U32_MUNGING\n" > sysarg printf "#define\tPAD_(t)\t(sizeof(uint64_t) <= sizeof(t) \\\n " > sysarg printf "\t\t? 0 : sizeof(uint64_t) - sizeof(t))\n" > sysarg printf "#else\n" > sysarg @@ -210,83 +210,7 @@ s/\$//g printf "#define\tPADR_(t)\t0\n" > sysarg printf "#endif\n" > sysarg printf "\n__BEGIN_DECLS\n" > sysarg - printf "#if !defined(__arm__)\n" > sysarg - printf "void munge_w(const void *, void *); \n" > sysarg - printf "void munge_ww(const void *, void *); \n" > sysarg - printf "void munge_www(const void *, void *); \n" > sysarg - printf "void munge_wwww(const void *, void *); \n" > sysarg - printf "void munge_wwwww(const void *, void *); \n" > sysarg - printf "void munge_wwwwww(const void *, void *); \n" > sysarg - printf "void munge_wwwwwww(const void *, void *); \n" > sysarg - printf "void munge_wwwwwwww(const void *, void *); \n" > sysarg - printf "void munge_wl(const void *, void *); \n" > sysarg - printf "void munge_wlw(const void *, void *); \n" > sysarg - printf "void munge_wlwwwll(const void *, void *); \n" > sysarg - printf "void munge_wlwwwllw(const void *, void *); \n" > sysarg - printf "void munge_wlwwlwlw(const void *, void *); \n" > sysarg - printf "void munge_wllwwll(const void *, void *); \n" > sysarg - printf "void munge_wwwl(const void *, void *); \n" > sysarg - printf "void munge_wwwlw(const void *, void *); \n" > sysarg - printf "void munge_wwwlww(const void *, void *); \n" > sysarg - printf "void munge_wwlwww(const void *, void *); \n" > sysarg - printf "void munge_wwwwlw(const void *, void *); \n" > sysarg - printf "void munge_wwwwl(const void *, void *); \n" > sysarg - printf "void munge_wwwwwl(const void *, void *); \n" > sysarg - printf "void munge_wwwwwlww(const void *, void *); \n" > sysarg - printf "void munge_wwwwwllw(const void *, void *); \n" > sysarg - printf "void munge_wwwwwlll(const void *, void *); \n" > sysarg - printf "void munge_wwwwwwll(const void *, void *); \n" > sysarg - printf "void munge_wwwwwwl(const void *, void *); \n" > sysarg - printf "void munge_wwwwwwlw(const void *, void *); \n" > sysarg - printf "void munge_wsw(const void *, void *); \n" > sysarg - printf "void munge_wws(const void *, void *); \n" > sysarg - printf "void munge_wwwsw(const void *, void *); \n" > sysarg - printf "void munge_llllll(const void *, void *); \n" > sysarg - printf "void munge_ll(const void *, void *); \n" > sysarg - printf "#else \n" > sysarg - printf "/* ARM does not need mungers for BSD system calls... */\n" > sysarg - printf "#define munge_w NULL \n" > sysarg - printf "#define munge_ww NULL \n" > sysarg - printf "#define munge_www NULL \n" > sysarg - printf "#define munge_wwww NULL \n" > sysarg - printf "#define munge_wwwww NULL \n" > sysarg - printf "#define munge_wwwwww NULL \n" > sysarg - printf "#define munge_wwwwwww NULL \n" > sysarg - printf "#define munge_wwwwwwww NULL \n" > sysarg - printf "#define munge_wl NULL \n" > sysarg - printf "#define munge_wlw NULL \n" > sysarg - printf "#define munge_wlwwwll NULL \n" > sysarg - printf "#define munge_wlwwwllw NULL \n" > sysarg - printf "#define munge_wlwwlwlw NULL \n" > sysarg - printf "#define munge_wllwwll NULL \n" > sysarg - printf "#define munge_wwwl NULL \n" > sysarg - printf "#define munge_wwwlw NULL \n" > sysarg - printf "#define munge_wwwlww NULL\n" > sysarg - printf "#define munge_wwlwww NULL \n" > sysarg - printf "#define munge_wwwwl NULL \n" > sysarg - printf "#define munge_wwwwlw NULL \n" > sysarg - printf "#define munge_wwwwwl NULL \n" > sysarg - printf "#define munge_wwwwwlww NULL \n" > sysarg - printf "#define munge_wwwwwllw NULL \n" > sysarg - printf "#define munge_wwwwwlll NULL \n" > sysarg - printf "#define munge_wwwwwwl NULL \n" > sysarg - printf "#define munge_wwwwwwlw NULL \n" > sysarg - printf "#define munge_wsw NULL \n" > sysarg - printf "#define munge_wws NULL \n" > sysarg - printf "#define munge_wwwsw NULL \n" > sysarg - printf "#define munge_llllll NULL \n" > sysarg - printf "#define munge_ll NULL \n" > sysarg - printf "#endif /* __arm__ */\n" > sysarg - printf "\n" > sysarg - printf "/* Active 64-bit user ABIs do not need munging */\n" > sysarg - printf "#define munge_d NULL \n" > sysarg - printf "#define munge_dd NULL \n" > sysarg - printf "#define munge_ddd NULL \n" > sysarg - printf "#define munge_dddd NULL \n" > sysarg - printf "#define munge_ddddd NULL \n" > sysarg - printf "#define munge_dddddd NULL \n" > sysarg - printf "#define munge_ddddddd NULL \n" > sysarg - printf "#define munge_dddddddd NULL \n" > sysarg + printf "#include \n" > sysarg printf "\n" > sysarg @@ -522,7 +446,6 @@ s/\$//g # output function argument structures to sysproto.h and build the # name of the appropriate argument mungers munge32 = "NULL" - munge64 = "NULL" size32 = 0 if ((funcname != "nosys" && funcname != "enosys") || (syscall_num == 0 && funcname == "nosys")) { @@ -531,53 +454,45 @@ s/\$//g printf("struct %s {\n", argalias) > sysarg } munge32 = "munge_" - munge64 = "munge_" for (i = 1; i <= argc; i++) { # Build name of argument munger. # We account for all sys call argument types here. # This is where you add any new types. With LP64 support # each argument consumes 64-bits. - # see .../xnu/bsd/dev/ppc/munge.s for munge argument types. + # see .../xnu/bsd/dev/munge.c for munge argument types. if (argtype[i] == "long") { ext_argtype[i] = "user_long_t"; munge32 = munge32 "s" - munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "u_long") { ext_argtype[i] = "user_ulong_t"; munge32 = munge32 "w" - munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "size_t") { ext_argtype[i] = "user_size_t"; munge32 = munge32 "w" - munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "ssize_t") { ext_argtype[i] = "user_ssize_t"; munge32 = munge32 "s" - munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "user_ssize_t" || argtype[i] == "user_long_t") { munge32 = munge32 "s" - munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "user_addr_t" || argtype[i] == "user_size_t" || argtype[i] == "user_ulong_t") { munge32 = munge32 "w" - munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "caddr_t" || argtype[i] == "semun_t" || argtype[i] == "uuid_t" || match(argtype[i], "[\*]") != 0) { ext_argtype[i] = "user_addr_t"; munge32 = munge32 "w" - munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "int" || argtype[i] == "u_int" || @@ -589,12 +504,10 @@ s/\$//g argtype[i] == "mach_port_name_t" || argtype[i] == "au_asid_t" || argtype[i] == "associd_t" || argtype[i] == "connid_t") { munge32 = munge32 "w" - munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "off_t" || argtype[i] == "int64_t" || argtype[i] == "uint64_t") { munge32 = munge32 "l" - munge64 = munge64 "d" size32 += 8 } else { @@ -626,7 +539,6 @@ s/\$//g if (add_sysent_entry == 0) { argssize = "0" munge32 = "NULL" - munge64 = "NULL" munge_ret = "_SYSCALL_RET_NONE" if (tempname != "enosys") { tempname = "nosys" @@ -670,12 +582,17 @@ s/\$//g } } - printf("\t{ \(sy_call_t *\)%s, %s, %s, %s, %s, %s},", - tempname, munge32, munge64, munge_ret, argssize, size32) > sysent - linesize = length(tempname) + length(munge32) + length(munge64) + \ + printf("#if CONFIG_REQUIRES_U32_MUNGING\n") > sysent + printf("\t{ \(sy_call_t *\)%s, %s, %s, %s, %s},", + tempname, munge32, munge_ret, argssize, size32) > sysent + linesize = length(tempname) + length(munge32) + \ length(munge_ret) + length(argssize) + length(size32) + 28 align_comment(linesize, 88, sysent) printf("/* %d = %s%s*/\n", syscall_num, funcname, additional_comments) > sysent + printf("#else\n") > sysent + printf("\t{ \(sy_call_t *\)%s, %s, %s, %s},\n", + tempname, munge_ret, argssize, size32) > sysent + printf("#endif\n") > sysent # output to syscalls.c if (add_sysnames_entry == 1) { diff --git a/bsd/kern/mcache.c b/bsd/kern/mcache.c index 0ffea0719..823fbf95e 100644 --- a/bsd/kern/mcache.c +++ b/bsd/kern/mcache.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2013 Apple Inc. All rights reserved. + * Copyright (c) 2006-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -106,7 +107,8 @@ static lck_attr_t *mcache_llock_attr; static lck_grp_t *mcache_llock_grp; static lck_grp_attr_t *mcache_llock_grp_attr; static struct zone *mcache_zone; -static unsigned int mcache_reap_interval; +static const uint32_t mcache_reap_interval = 15; +static const uint32_t mcache_reap_interval_leeway = 2; static UInt32 mcache_reaping; static int mcache_ready; static int mcache_updating; @@ -118,6 +120,8 @@ static unsigned int mcache_flags = MCF_DEBUG; static unsigned int mcache_flags = 0; #endif +int mca_trn_max = MCA_TRN_MAX; + #define DUMP_MCA_BUF_SIZE 512 static char *mca_dump_buf; @@ -156,18 +160,21 @@ static void mcache_cache_reap(mcache_t *); static void mcache_cache_update(mcache_t *); static void mcache_cache_bkt_resize(void *); static void mcache_cache_enable(void *); -static void mcache_update(void *); +static void mcache_update(thread_call_param_t __unused, thread_call_param_t __unused); static void mcache_update_timeout(void *); static void mcache_applyall(void (*)(mcache_t *)); static void mcache_reap_start(void *); static void mcache_reap_done(void *); -static void mcache_reap_timeout(void *); +static void mcache_reap_timeout(thread_call_param_t __unused, thread_call_param_t); static void mcache_notify(mcache_t *, u_int32_t); static void mcache_purge(void *); static LIST_HEAD(, mcache) mcache_head; mcache_t *mcache_audit_cache; +static thread_call_t mcache_reap_tcall; +static thread_call_t mcache_update_tcall; + /* * Initialize the framework; this is currently called as part of BSD init. */ @@ -178,6 +185,8 @@ mcache_init(void) unsigned int i; char name[32]; + VERIFY(mca_trn_max >= 2); + ncpu = ml_get_max_cpus(); (void) mcache_cache_line_size(); /* prime it */ @@ -187,6 +196,11 @@ mcache_init(void) mcache_llock_attr = lck_attr_alloc_init(); mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr); + mcache_reap_tcall = thread_call_allocate(mcache_reap_timeout, NULL); + mcache_update_tcall = thread_call_allocate(mcache_update, NULL); + if (mcache_reap_tcall == NULL || mcache_update_tcall == NULL) + panic("mcache_init: thread_call_allocate failed"); + mcache_zone = zinit(MCACHE_ALLOC_SIZE, 256 * MCACHE_ALLOC_SIZE, PAGE_SIZE, "mcache"); if (mcache_zone == NULL) @@ -203,13 +217,12 @@ mcache_init(void) (btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP); } - PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof (mcache_flags)); + PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof(mcache_flags)); mcache_flags &= MCF_FLAGS_MASK; mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t), 0, 0, MCR_SLEEP); - mcache_reap_interval = 15 * hz; mcache_applyall(mcache_cache_bkt_enable); mcache_ready = 1; @@ -659,11 +672,10 @@ mcache_purge(void *arg) lck_mtx_lock_spin(&cp->mc_sync_lock); cp->mc_enable_cnt++; lck_mtx_unlock(&cp->mc_sync_lock); - } __private_extern__ boolean_t -mcache_purge_cache(mcache_t *cp) +mcache_purge_cache(mcache_t *cp, boolean_t async) { /* * Purging a cache that has no per-CPU caches or is already @@ -680,7 +692,10 @@ mcache_purge_cache(mcache_t *cp) cp->mc_purge_cnt++; lck_mtx_unlock(&cp->mc_sync_lock); - mcache_dispatch(mcache_purge, cp); + if (async) + mcache_dispatch(mcache_purge, cp); + else + mcache_purge(cp); return (TRUE); } @@ -1253,7 +1268,8 @@ mcache_bkt_ws_reap(mcache_t *cp) } static void -mcache_reap_timeout(void *arg) +mcache_reap_timeout(thread_call_param_t dummy __unused, + thread_call_param_t arg) { volatile UInt32 *flag = arg; @@ -1265,7 +1281,14 @@ mcache_reap_timeout(void *arg) static void mcache_reap_done(void *flag) { - timeout(mcache_reap_timeout, flag, mcache_reap_interval); + uint64_t deadline, leeway; + + clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC, + &deadline); + clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway, + NSEC_PER_SEC, &leeway); + thread_call_enter_delayed_with_leeway(mcache_reap_tcall, flag, + deadline, leeway, THREAD_CALL_DELAY_LEEWAY); } static void @@ -1390,14 +1413,22 @@ mcache_cache_enable(void *arg) static void mcache_update_timeout(__unused void *arg) { - timeout(mcache_update, NULL, mcache_reap_interval); + uint64_t deadline, leeway; + + clock_interval_to_deadline(mcache_reap_interval, NSEC_PER_SEC, + &deadline); + clock_interval_to_absolutetime_interval(mcache_reap_interval_leeway, + NSEC_PER_SEC, &leeway); + thread_call_enter_delayed_with_leeway(mcache_update_tcall, NULL, + deadline, leeway, THREAD_CALL_DELAY_LEEWAY); } static void -mcache_update(__unused void *arg) +mcache_update(thread_call_param_t arg __unused, + thread_call_param_t dummy __unused) { mcache_applyall(mcache_cache_update); - mcache_dispatch(mcache_update_timeout, NULL); + mcache_update_timeout(NULL); } static void @@ -1425,25 +1456,30 @@ mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp, { struct timeval now, base = { 0, 0 }; void *stack[MCACHE_STACK_DEPTH + 1]; + struct mca_trn *transaction; + + transaction = &mca->mca_trns[mca->mca_next_trn]; mca->mca_addr = addr; mca->mca_cache = cp; - mca->mca_pthread = mca->mca_thread; - mca->mca_thread = current_thread(); - bcopy(mca->mca_stack, mca->mca_pstack, sizeof (mca->mca_pstack)); - mca->mca_pdepth = mca->mca_depth; + + transaction->mca_thread = current_thread(); + bzero(stack, sizeof (stack)); - mca->mca_depth = OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1; - bcopy(&stack[1], mca->mca_stack, sizeof (mca->mca_pstack)); + transaction->mca_depth = OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1; + bcopy(&stack[1], transaction->mca_stack, + sizeof (transaction->mca_stack)); - mca->mca_ptstamp = mca->mca_tstamp; microuptime(&now); if (base_ts != NULL) base = *base_ts; /* tstamp is in ms relative to base_ts */ - mca->mca_tstamp = ((now.tv_usec - base.tv_usec) / 1000); + transaction->mca_tstamp = ((now.tv_usec - base.tv_usec) / 1000); if ((now.tv_sec - base.tv_sec) > 0) - mca->mca_tstamp += ((now.tv_sec - base.tv_sec) * 1000); + transaction->mca_tstamp += ((now.tv_sec - base.tv_sec) * 1000); + + mca->mca_next_trn = + (mca->mca_next_trn + 1) % mca_trn_max; } __private_extern__ void @@ -1546,6 +1582,26 @@ mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset, #undef panic +#define DUMP_TRN_FMT() \ + "%s transaction thread %p saved PC stack (%d deep):\n" \ + "\t%p, %p, %p, %p, %p, %p, %p, %p\n" \ + "\t%p, %p, %p, %p, %p, %p, %p, %p\n" + +#define DUMP_TRN_FIELDS(s, x) \ + s, \ + mca->mca_trns[x].mca_thread, mca->mca_trns[x].mca_depth, \ + mca->mca_trns[x].mca_stack[0], mca->mca_trns[x].mca_stack[1], \ + mca->mca_trns[x].mca_stack[2], mca->mca_trns[x].mca_stack[3], \ + mca->mca_trns[x].mca_stack[4], mca->mca_trns[x].mca_stack[5], \ + mca->mca_trns[x].mca_stack[6], mca->mca_trns[x].mca_stack[7], \ + mca->mca_trns[x].mca_stack[8], mca->mca_trns[x].mca_stack[9], \ + mca->mca_trns[x].mca_stack[10], mca->mca_trns[x].mca_stack[11], \ + mca->mca_trns[x].mca_stack[12], mca->mca_trns[x].mca_stack[13], \ + mca->mca_trns[x].mca_stack[14], mca->mca_trns[x].mca_stack[15] + +#define MCA_TRN_LAST ((mca->mca_next_trn + mca_trn_max) % mca_trn_max) +#define MCA_TRN_PREV ((mca->mca_next_trn + mca_trn_max - 1) % mca_trn_max) + __private_extern__ char * mcache_dump_mca(mcache_audit_t *mca) { @@ -1553,29 +1609,16 @@ mcache_dump_mca(mcache_audit_t *mca) return (NULL); snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE, - "mca %p: addr %p, cache %p (%s)\n" - "last transaction; thread %p, saved PC stack (%d deep):\n" - "\t%p, %p, %p, %p, %p, %p, %p, %p\n" - "\t%p, %p, %p, %p, %p, %p, %p, %p\n" - "previous transaction; thread %p, saved PC stack (%d deep):\n" - "\t%p, %p, %p, %p, %p, %p, %p, %p\n" - "\t%p, %p, %p, %p, %p, %p, %p, %p\n", + "mca %p: addr %p, cache %p (%s) nxttrn %d\n" + DUMP_TRN_FMT() + DUMP_TRN_FMT(), + mca, mca->mca_addr, mca->mca_cache, mca->mca_cache ? mca->mca_cache->mc_name : "?", - mca->mca_thread, mca->mca_depth, - mca->mca_stack[0], mca->mca_stack[1], mca->mca_stack[2], - mca->mca_stack[3], mca->mca_stack[4], mca->mca_stack[5], - mca->mca_stack[6], mca->mca_stack[7], mca->mca_stack[8], - mca->mca_stack[9], mca->mca_stack[10], mca->mca_stack[11], - mca->mca_stack[12], mca->mca_stack[13], mca->mca_stack[14], - mca->mca_stack[15], - mca->mca_pthread, mca->mca_pdepth, - mca->mca_pstack[0], mca->mca_pstack[1], mca->mca_pstack[2], - mca->mca_pstack[3], mca->mca_pstack[4], mca->mca_pstack[5], - mca->mca_pstack[6], mca->mca_pstack[7], mca->mca_pstack[8], - mca->mca_pstack[9], mca->mca_pstack[10], mca->mca_pstack[11], - mca->mca_pstack[12], mca->mca_pstack[13], mca->mca_pstack[14], - mca->mca_pstack[15]); + mca->mca_next_trn, + + DUMP_TRN_FIELDS("last", MCA_TRN_LAST), + DUMP_TRN_FIELDS("previous", MCA_TRN_PREV)); return (mca_dump_buf); } diff --git a/bsd/kern/policy_check.c b/bsd/kern/policy_check.c index 5bf5c5cba..a8acdd2b0 100644 --- a/bsd/kern/policy_check.c +++ b/bsd/kern/policy_check.c @@ -118,7 +118,7 @@ common_hook(void) return rv; } -#if (MAC_POLICY_OPS_VERSION != 24) +#if (MAC_POLICY_OPS_VERSION != 31) # error "struct mac_policy_ops doesn't match definition in mac_policy.h" #endif /* @@ -262,28 +262,28 @@ static struct mac_policy_ops policy_ops = { .mpo_policy_initbsd = hook_policy_initbsd, CHECK_SET_HOOK(policy_syscall) - CHECK_SET_HOOK(port_check_copy_send) - CHECK_SET_HOOK(port_check_hold_receive) - CHECK_SET_HOOK(port_check_hold_send_once) - CHECK_SET_HOOK(port_check_hold_send) - CHECK_SET_HOOK(port_check_label_update) - CHECK_SET_HOOK(port_check_make_send_once) - CHECK_SET_HOOK(port_check_make_send) - CHECK_SET_HOOK(port_check_method) - CHECK_SET_HOOK(port_check_move_receive) - CHECK_SET_HOOK(port_check_move_send_once) - CHECK_SET_HOOK(port_check_move_send) - CHECK_SET_HOOK(port_check_receive) - CHECK_SET_HOOK(port_check_send) - CHECK_SET_HOOK(port_check_service) - CHECK_SET_HOOK(port_label_associate_kernel) - CHECK_SET_HOOK(port_label_associate) - CHECK_SET_HOOK(port_label_compute) - CHECK_SET_HOOK(port_label_copy) - CHECK_SET_HOOK(port_label_destroy) - CHECK_SET_HOOK(port_label_init) - CHECK_SET_HOOK(port_label_update_cred) - CHECK_SET_HOOK(port_label_update_kobject) + CHECK_SET_HOOK(system_check_sysctlbyname) + CHECK_SET_HOOK(proc_check_inherit_ipc_ports) + CHECK_SET_HOOK(vnode_check_rename) + .mpo_reserved4 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved5 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved6 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved7 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved8 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved9 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved10 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved11 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved12 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved13 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved14 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved15 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved16 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved17 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved18 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved19 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved20 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved21 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved22 = (mpo_reserved_hook_t *)common_hook, CHECK_SET_HOOK(posixsem_check_create) CHECK_SET_HOOK(posixsem_check_open) @@ -361,7 +361,7 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(system_check_settime) CHECK_SET_HOOK(system_check_swapoff) CHECK_SET_HOOK(system_check_swapon) - CHECK_SET_HOOK(system_check_sysctl) + .mpo_reserved31 = (mpo_reserved_hook_t *)common_hook, CHECK_SET_HOOK(sysvmsg_label_associate) CHECK_SET_HOOK(sysvmsg_label_destroy) @@ -394,14 +394,14 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(sysvshm_label_init) CHECK_SET_HOOK(sysvshm_label_recycle) - CHECK_SET_HOOK(task_label_associate_kernel) - CHECK_SET_HOOK(task_label_associate) - CHECK_SET_HOOK(task_label_copy) - CHECK_SET_HOOK(task_label_destroy) - CHECK_SET_HOOK(task_label_externalize) - CHECK_SET_HOOK(task_label_init) - CHECK_SET_HOOK(task_label_internalize) - CHECK_SET_HOOK(task_label_update) + .mpo_reserved23 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved24 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved25 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved26 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved27 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved28 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved29 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved30 = (mpo_reserved_hook_t *)common_hook, CHECK_SET_HOOK(iokit_check_hid_control) @@ -510,8 +510,8 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(vnode_notify_link) - .mpo_reserved28 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved29 = (mpo_reserved_hook_t *)common_hook, + CHECK_SET_HOOK(iokit_check_filter_properties) + CHECK_SET_HOOK(iokit_check_get_property) }; /* diff --git a/bsd/kern/posix_sem.c b/bsd/kern/posix_sem.c index 18f5d4b73..42a12cb7e 100644 --- a/bsd/kern/posix_sem.c +++ b/bsd/kern/posix_sem.c @@ -992,24 +992,6 @@ out: return(error); } -int -sem_init(__unused proc_t p, __unused struct sem_init_args *uap, __unused int32_t *retval) -{ - return(ENOSYS); -} - -int -sem_destroy(__unused proc_t p, __unused struct sem_destroy_args *uap, __unused int32_t *retval) -{ - return(ENOSYS); -} - -int -sem_getvalue(__unused proc_t p, __unused struct sem_getvalue_args *uap, __unused int32_t *retval) -{ - return(ENOSYS); -} - static int psem_close(struct psemnode *pnode, __unused int flags) { diff --git a/bsd/kern/posix_shm.c b/bsd/kern/posix_shm.c index aa57783cb..4a0a848a2 100644 --- a/bsd/kern/posix_shm.c +++ b/bsd/kern/posix_shm.c @@ -273,7 +273,6 @@ pshm_cache_search(struct pshminfo **pshmp, struct pshmname *pnp, /* * We found a "negative" match, ENOENT notifies client of this match. - * The nc_vpid field records whether this is a whiteout. */ pshmstats.neghits++; return (ENOENT); @@ -304,9 +303,6 @@ pshm_cache_add(struct pshminfo *pshmp, struct pshmname *pnp, struct pshmcache *p /* * Fill in cache info, if vp is NULL this is a "negative" cache entry. - * For negative entries, we have to record whether it is a whiteout. - * the whiteout flag is stored in the nc_vpid field which is - * otherwise unused. */ pcp->pshminfo = pshmp; pcp->pshm_nlen = pnp->pshm_namelen; diff --git a/bsd/kern/proc_info.c b/bsd/kern/proc_info.c index 7a06b0a97..2af5cc29e 100644 --- a/bsd/kern/proc_info.c +++ b/bsd/kern/proc_info.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -82,6 +81,10 @@ #include +/* Needed by proc_pidnoteexit() */ +#include +#include + struct pshmnode; struct psemnode; struct pipe; @@ -92,44 +95,76 @@ uint64_t get_dispatchqueue_offset_from_proc(void *); uint64_t get_dispatchqueue_serialno_offset_from_proc(void *); int proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +/* + * TODO: Replace the noinline attribute below. Currently, it serves + * to avoid stack bloat caused by inlining multiple functions that + * have large stack footprints; when the functions are independent + * of each other (will not both be called in any given call to the + * caller), this only serves to bloat the stack, as we allocate + * space for both functions, despite the fact that we only need a + * fraction of that space. + * + * Long term, these functions should not be allocating everything on + * the stack, and should move large allocations (the huge structs + * that proc info deals in) to the heap, or eliminate them if + * possible. + * + * The functions that most desperately need to improve stack usage + * (starting with the worst offenders): + * proc_pidvnodepathinfo + * proc_pidinfo + * proc_pidregionpathinfo + * pid_vnodeinfopath + * pid_pshminfo + * pid_pseminfo + * pid_socketinfo + * proc_pid_rusage + * proc_pidoriginatorinfo + */ + /* protos for proc_info calls */ -int proc_listpids(uint32_t type, uint32_t tyoneinfo, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int proc_pidfdinfo(int pid, int flavor,int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int proc_setcontrol(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int proc_pidfileportinfo(int pid, int flavor, mach_port_name_t name, user_addr_t buffer, uint32_t buffersize, int32_t *retval); -int proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t * retval); -int proc_terminate(int pid, int32_t * retval); -int proc_pid_rusage(int pid, int flavor, user_addr_t buffer, int32_t * retval); +int __attribute__ ((noinline)) proc_listpids(uint32_t type, uint32_t tyoneinfo, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) proc_pidfdinfo(int pid, int flavor,int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) proc_setcontrol(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) proc_pidfileportinfo(int pid, int flavor, mach_port_name_t name, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int __attribute__ ((noinline)) proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t * retval); +int __attribute__ ((noinline)) proc_terminate(int pid, int32_t * retval); +int __attribute__ ((noinline)) proc_pid_rusage(int pid, int flavor, user_addr_t buffer, int32_t * retval); +int __attribute__ ((noinline)) proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t buffersize, int32_t * retval); /* protos for procpidinfo calls */ -int proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); -int proc_pidbsdinfo(proc_t p, struct proc_bsdinfo *pbsd, int zombie); -int proc_pidshortbsdinfo(proc_t p, struct proc_bsdshortinfo *pbsd_shortp, int zombie); -int proc_pidtaskinfo(proc_t p, struct proc_taskinfo *ptinfo); -int proc_pidallinfo(proc_t p, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); -int proc_pidthreadinfo(proc_t p, uint64_t arg, int thuniqueid, struct proc_threadinfo *pthinfo); -int proc_pidthreadpathinfo(proc_t p, uint64_t arg, struct proc_threadwithpathinfo *pinfo); -int proc_pidlistthreads(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); -int proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); -int proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); -int proc_pidvnodepathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); -int proc_pidpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); -int proc_pidworkqueueinfo(proc_t p, struct proc_workqueueinfo *pwqinfo); -int proc_pidfileportlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); -void proc_piduniqidentifierinfo(proc_t p, struct proc_uniqidentifierinfo *p_uniqidinfo); +int __attribute__ ((noinline)) proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int __attribute__ ((noinline)) proc_pidbsdinfo(proc_t p, struct proc_bsdinfo *pbsd, int zombie); +int __attribute__ ((noinline)) proc_pidshortbsdinfo(proc_t p, struct proc_bsdshortinfo *pbsd_shortp, int zombie); +int __attribute__ ((noinline)) proc_pidtaskinfo(proc_t p, struct proc_taskinfo *ptinfo); +int __attribute__ ((noinline)) proc_pidthreadinfo(proc_t p, uint64_t arg, int thuniqueid, struct proc_threadinfo *pthinfo); +int __attribute__ ((noinline)) proc_pidthreadpathinfo(proc_t p, uint64_t arg, struct proc_threadwithpathinfo *pinfo); +int __attribute__ ((noinline)) proc_pidlistthreads(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int __attribute__ ((noinline)) proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int __attribute__ ((noinline)) proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int __attribute__ ((noinline)) proc_pidregionpathinfo2(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int __attribute__ ((noinline)) proc_pidregionpathinfo3(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int __attribute__ ((noinline)) proc_pidvnodepathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int __attribute__ ((noinline)) proc_pidpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int __attribute__ ((noinline)) proc_pidworkqueueinfo(proc_t p, struct proc_workqueueinfo *pwqinfo); +int __attribute__ ((noinline)) proc_pidfileportlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +void __attribute__ ((noinline)) proc_piduniqidentifierinfo(proc_t p, struct proc_uniqidentifierinfo *p_uniqidinfo); +void __attribute__ ((noinline)) proc_archinfo(proc_t p, struct proc_archinfo *pai); +void __attribute__ ((noinline)) proc_pidcoalitioninfo(proc_t p, struct proc_pidcoalitioninfo *pci); +int __attribute__ ((noinline)) proc_pidnoteexit(proc_t p, uint64_t arg, uint32_t *data); /* protos for proc_pidfdinfo calls */ -int pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int pid_pseminfo(struct psemnode * psem, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int pid_pshminfo(struct pshmnode * pshm, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int pid_pipeinfo(struct pipe * p, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int pid_kqueueinfo(struct kqueue * kq, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); -int pid_atalkinfo(struct atalk * at, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_pseminfo(struct psemnode * psem, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_pshminfo(struct pshmnode * pshm, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_pipeinfo(struct pipe * p, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_kqueueinfo(struct kqueue * kq, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int __attribute__ ((noinline)) pid_atalkinfo(struct atalk * at, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); /* protos for misc */ @@ -138,6 +173,7 @@ int fill_vnodeinfo(vnode_t vp, struct vnode_info *vinfo); void fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * finfo); int proc_security_policy(proc_t targetp, int callnum, int flavor, boolean_t check_same_user); static void munge_vinfo_stat(struct stat64 *sbp, struct vinfo_stat *vsbp); +static int proc_piduuidinfo(pid_t pid, uuid_t uuid_buf, uint32_t buffersize); extern int cansignal(struct proc *, kauth_cred_t, struct proc *, int, int); extern int proc_get_rusage(proc_t proc, int flavor, user_addr_t buffer, int is_zombie); @@ -198,6 +234,8 @@ proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t b return(proc_dirtycontrol(pid, flavor, arg, retval)); case PROC_INFO_CALL_PIDRUSAGE: return (proc_pid_rusage(pid, flavor, buffer, retval)); + case PROC_INFO_CALL_PIDORIGINATORINFO: + return (proc_pidoriginatorinfo(pid, flavor, buffer, buffersize, retval)); default: return(EINVAL); } @@ -329,7 +367,7 @@ proc_loop: } -/********************************** proc_pidinfo routines ********************************/ +/********************************** proc_pidfdlist routines ********************************/ int proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval) @@ -849,6 +887,105 @@ proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint return(error); } +int +proc_pidregionpathinfo2(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, int32_t *retval) +{ + struct proc_regionwithpathinfo preginfo; + int ret, error = 0; + uintptr_t vnodeaddr= 0; + uint32_t vnodeid= 0; + vnode_t vp; + int count; + + bzero(&preginfo, sizeof(struct proc_regionwithpathinfo)); + + ret = fill_procregioninfo_onlymappedvnodes( p->task, arg, (struct proc_regioninfo_internal *)&preginfo.prp_prinfo, (uintptr_t *)&vnodeaddr, (uint32_t *)&vnodeid); + if (ret == 0) + return(EINVAL); + if (!vnodeaddr) + return(EINVAL); + + vp = (vnode_t)vnodeaddr; + if ((vnode_getwithvid(vp, vnodeid)) == 0) { + /* FILL THE VNODEINFO */ + error = fill_vnodeinfo(vp, &preginfo.prp_vip.vip_vi); + count = MAXPATHLEN; + vn_getpath(vp, &preginfo.prp_vip.vip_path[0], &count); + /* Always make sure it is null terminated */ + preginfo.prp_vip.vip_path[MAXPATHLEN-1] = 0; + vnode_put(vp); + } else { + return(EINVAL); + } + + error = copyout(&preginfo, buffer, sizeof(struct proc_regionwithpathinfo)); + if (error == 0) + *retval = sizeof(struct proc_regionwithpathinfo); + return(error); +} + +int +proc_pidregionpathinfo3(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, int32_t *retval) +{ + struct proc_regionwithpathinfo preginfo; + int ret, error = 0; + uintptr_t vnodeaddr; + uint32_t vnodeid; + vnode_t vp; + int count; + uint64_t addr = 0; + + /* Loop while looking for vnodes that match dev_t filter */ + do { + bzero(&preginfo, sizeof(struct proc_regionwithpathinfo)); + vnodeaddr = 0; + vnodeid = 0; + + ret = fill_procregioninfo_onlymappedvnodes( p->task, addr, (struct proc_regioninfo_internal *)&preginfo.prp_prinfo, (uintptr_t *)&vnodeaddr, (uint32_t *)&vnodeid); + if (ret == 0) + return(EINVAL); + if (!vnodeaddr) + return(EINVAL); + + vp = (vnode_t)vnodeaddr; + if ((vnode_getwithvid(vp, vnodeid)) == 0) { + /* Check if the vnode matches the filter, otherwise loop looking for the next memory region backed by a vnode */ + struct vnode_attr va; + + memset(&va, 0, sizeof(va)); + VATTR_INIT(&va); + VATTR_WANTED(&va, va_fsid); + + ret = vnode_getattr(vp, &va, vfs_context_current()); + if (ret) { + vnode_put(vp); + return(EINVAL); + } + + if (va.va_fsid == arg) { + /* FILL THE VNODEINFO */ + error = fill_vnodeinfo(vp, &preginfo.prp_vip.vip_vi); + count = MAXPATHLEN; + vn_getpath(vp, &preginfo.prp_vip.vip_path[0], &count); + /* Always make sure it is null terminated */ + preginfo.prp_vip.vip_path[MAXPATHLEN-1] = 0; + vnode_put(vp); + break; + } + vnode_put(vp); + } else { + return(EINVAL); + } + + addr = preginfo.prp_prinfo.pri_address + preginfo.prp_prinfo.pri_size; + } while (1); + + error = copyout(&preginfo, buffer, sizeof(struct proc_regionwithpathinfo)); + if (error == 0) + *retval = sizeof(struct proc_regionwithpathinfo); + return(error); +} + /* * Path is relative to current process directory; may different from current * thread directory. @@ -980,6 +1117,122 @@ proc_piduniqidentifierinfo(proc_t p, struct proc_uniqidentifierinfo *p_uniqidinf p_uniqidinfo->p_reserve4 = 0; } + +static int +proc_piduuidinfo(pid_t pid, uuid_t uuid_buf, uint32_t buffersize) +{ + struct proc * p = PROC_NULL; + int zombref = 0; + + if (buffersize < sizeof(uuid_t)) + return EINVAL; + + if ((p = proc_find(pid)) == PROC_NULL) { + p = proc_find_zombref(pid); + zombref = 1; + } + if (p == PROC_NULL) { + return ESRCH; + } + + proc_getexecutableuuid(p, (unsigned char *)uuid_buf, buffersize); + + if (zombref) + proc_drop_zombref(p); + else + proc_rele(p); + + return 0; +} + +/* + * Function to get the uuid of the originator of the voucher. + */ +int +proc_pidoriginatoruuid(uuid_t uuid, uint32_t buffersize) +{ + pid_t originator_pid; + kern_return_t kr; + int error; + + /* + * Get the current voucher origin pid. The pid returned here + * might not be valid or may have been recycled. + */ + kr = thread_get_current_voucher_origin_pid(&originator_pid); + /* If errors, convert errors to appropriate format */ + if (kr) { + if (kr == KERN_INVALID_TASK) + error = ESRCH; + else if (kr == KERN_INVALID_VALUE) + error = ENOATTR; + else + error = EINVAL; + return error; + } + + error = proc_piduuidinfo(originator_pid, uuid, buffersize); + return error; +} + +/***************************** proc_pidoriginatorinfo ***************************/ + +int +proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t buffersize, int32_t * retval) +{ + int error = ENOTSUP; + uint32_t size; + + switch (flavor) { + case PROC_PIDORIGINATOR_UUID: + size = PROC_PIDORIGINATOR_UUID_SIZE; + break; + case PROC_PIDORIGINATOR_BGSTATE: + size = PROC_PIDORIGINATOR_BGSTATE_SIZE; + break; + default: + return(EINVAL); + } + + if (buffersize < size) + return(ENOMEM); + + if (pid != 0 && pid != proc_selfpid()) + return (EINVAL); + + switch (flavor) { + case PROC_PIDORIGINATOR_UUID: { + uuid_t uuid; + + error = proc_pidoriginatoruuid(uuid, sizeof(uuid)); + if (error != 0) + goto out; + + error = copyout(uuid, buffer, size); + if (error == 0) + *retval = size; + } + break; + + case PROC_PIDORIGINATOR_BGSTATE: { + uint32_t is_backgrounded; + error = proc_get_originatorbgstate(&is_backgrounded); + if (error) + goto out; + + error = copyout(&is_backgrounded, buffer, size); + if (error == 0) + *retval = size; + } + break; + + default: + error = ENOTSUP; + } +out: + return error; +} + /********************************** proc_pidinfo ********************************/ @@ -1057,6 +1310,26 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu case PROC_PIDT_BSDINFOWITHUNIQID: size = PROC_PIDT_BSDINFOWITHUNIQID_SIZE; break; + case PROC_PIDARCHINFO: + size = PROC_PIDARCHINFO_SIZE; + break; + case PROC_PIDCOALITIONINFO: + size = PROC_PIDCOALITIONINFO_SIZE; + break; + case PROC_PIDNOTEEXIT: + /* + * Set findzomb explicitly because arg passed + * in is used as note exit status bits. + */ + size = PROC_PIDNOTEEXIT_SIZE; + findzomb = 1; + break; + case PROC_PIDREGIONPATHINFO2: + size = PROC_PIDREGIONPATHINFO2_SIZE; + break; + case PROC_PIDREGIONPATHINFO3: + size = PROC_PIDREGIONPATHINFO3_SIZE; + break; default: return(EINVAL); } @@ -1070,7 +1343,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu /* Check if we need to look for zombies */ if ((flavor == PROC_PIDTBSDINFO) || (flavor == PROC_PIDT_SHORTBSDINFO) || (flavor == PROC_PIDT_BSDINFOWITHUNIQID) - || (flavor == PROC_PIDUNIQIDENTIFIERINFO)) { + || (flavor == PROC_PIDUNIQIDENTIFIERINFO)) { if (arg) findzomb = 1; } @@ -1213,6 +1486,16 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu } break; + case PROC_PIDREGIONPATHINFO2:{ + error = proc_pidregionpathinfo2(p, arg, buffer, buffersize, retval); + } + break; + + case PROC_PIDREGIONPATHINFO3:{ + error = proc_pidregionpathinfo3(p, arg, buffer, buffersize, retval); + } + break; + case PROC_PIDVNODEPATHINFO:{ error = proc_pidvnodepathinfo(p, arg, buffer, buffersize, retval); } @@ -1255,6 +1538,37 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu } break; + case PROC_PIDARCHINFO: { + struct proc_archinfo pai; + proc_archinfo(p, &pai); + error = copyout(&pai, buffer, sizeof(struct proc_archinfo)); + if (error == 0) { + *retval = sizeof(struct proc_archinfo); + } + } + + case PROC_PIDCOALITIONINFO: { + struct proc_pidcoalitioninfo pci; + proc_pidcoalitioninfo(p, &pci); + error = copyout(&pci, buffer, sizeof(struct proc_pidcoalitioninfo)); + if (error == 0) { + *retval = sizeof(struct proc_pidcoalitioninfo); + } + } + break; + + case PROC_PIDNOTEEXIT: { + uint32_t data; + error = proc_pidnoteexit(p, arg, &data); + if (error == 0) { + error = copyout(&data, buffer, sizeof(data)); + if (error == 0) { + *retval = sizeof(data); + } + } + } + break; + default: error = ENOTSUP; } @@ -1951,6 +2265,17 @@ proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t *retval) { } } break; + + case PROC_DIRTYCONTROL_CLEAR: { + /* Check privileges; use cansignal() here since the process could be terminated */ + if (!cansignal(current_proc(), my_cred, target_p, SIGKILL, 0)) { + error = EPERM; + goto out; + } + + error = memorystatus_dirty_clear(target_p, pcontrol); + } + break; } out: @@ -2097,3 +2422,104 @@ out: return (error); } +void +proc_archinfo(proc_t p, struct proc_archinfo *pai) +{ + proc_lock(p); + pai->p_cputype = p->p_cputype; + pai->p_cpusubtype = p->p_cpusubtype; + proc_unlock(p); +} + +void +proc_pidcoalitioninfo(proc_t p, struct proc_pidcoalitioninfo *ppci) +{ + bzero(ppci, sizeof(*ppci)); + ppci->coalition_id = proc_coalitionid(p); +} + + + +/* + * Wrapper to provide NOTE_EXIT_DETAIL and NOTE_EXITSTATUS + * It mimics the data that is typically captured by the + * EVFILT_PROC, NOTE_EXIT event mechanism. + * See filt_proc() in kern_event.c. + */ +int +proc_pidnoteexit(proc_t p, uint64_t flags, uint32_t *data) +{ + uint32_t exit_data = 0; + uint32_t exit_flags = (uint32_t)flags; + + proc_lock(p); + + /* + * Allow access to the parent of the exiting + * child or the parent debugger only. + */ + do { + pid_t selfpid = proc_selfpid(); + + if (p->p_ppid == selfpid) + break; /* parent => ok */ + + if ((p->p_lflag & P_LTRACED) != 0 && + (p->p_oppid == selfpid)) + break; /* parent-in-waiting => ok */ + + proc_unlock(p); + return (EACCES); + } while (0); + + if ((exit_flags & NOTE_EXITSTATUS) != 0) { + /* The signal and exit status */ + exit_data |= (p->p_xstat & NOTE_PDATAMASK); + } + + if ((exit_flags & NOTE_EXIT_DETAIL) != 0) { + /* The exit detail */ + if ((p->p_lflag & P_LTERM_DECRYPTFAIL) != 0) { + exit_data |= NOTE_EXIT_DECRYPTFAIL; + } + + if ((p->p_lflag & P_LTERM_JETSAM) != 0) { + exit_data |= NOTE_EXIT_MEMORY; + + switch (p->p_lflag & P_JETSAM_MASK) { + case P_JETSAM_VMPAGESHORTAGE: + exit_data |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE; + break; + case P_JETSAM_VMTHRASHING: + exit_data |= NOTE_EXIT_MEMORY_VMTHRASHING; + break; + case P_JETSAM_FCTHRASHING: + exit_data |= NOTE_EXIT_MEMORY_FCTHRASHING; + break; + case P_JETSAM_VNODE: + exit_data |= NOTE_EXIT_MEMORY_VNODE; + break; + case P_JETSAM_HIWAT: + exit_data |= NOTE_EXIT_MEMORY_HIWAT; + break; + case P_JETSAM_PID: + exit_data |= NOTE_EXIT_MEMORY_PID; + break; + case P_JETSAM_IDLEEXIT: + exit_data |= NOTE_EXIT_MEMORY_IDLE; + break; + } + } + + if ((p->p_csflags & CS_KILLED) != 0) { + exit_data |= NOTE_EXIT_CSERROR; + } + } + + proc_unlock(p); + + *data = exit_data; + + return (0); +} + diff --git a/bsd/kern/proc_uuid_policy.c b/bsd/kern/proc_uuid_policy.c index 5c69d488a..bc930ad14 100644 --- a/bsd/kern/proc_uuid_policy.c +++ b/bsd/kern/proc_uuid_policy.c @@ -89,13 +89,16 @@ static int proc_uuid_policy_insert(uuid_t uuid, uint32_t flags); static struct proc_uuid_policy_entry * -proc_uuid_policy_remove_locked(uuid_t uuid); +proc_uuid_policy_remove_locked(uuid_t uuid, uint32_t flags, int *should_delete); static int -proc_uuid_policy_remove(uuid_t uuid); +proc_uuid_policy_remove(uuid_t uuid, uint32_t flags); + +static struct proc_uuid_policy_entry * +proc_uuid_policy_lookup_locked(uuid_t uuid); static int -proc_uuid_policy_clear(void); +proc_uuid_policy_clear(uint32_t flags); void proc_uuid_policy_init(void) @@ -113,7 +116,7 @@ proc_uuid_policy_init(void) static int proc_uuid_policy_insert(uuid_t uuid, uint32_t flags) { - struct proc_uuid_policy_entry *entry, *delentry = NULL; + struct proc_uuid_policy_entry *entry, *foundentry = NULL; int error; #if PROC_UUID_POLICY_DEBUG @@ -131,25 +134,28 @@ proc_uuid_policy_insert(uuid_t uuid, uint32_t flags) PROC_UUID_POLICY_SUBSYS_LOCK(); - delentry = proc_uuid_policy_remove_locked(uuid); - - /* Our target UUID is not in the list, insert it now */ - if (proc_uuid_policy_count < MAX_PROC_UUID_POLICY_COUNT) { - LIST_INSERT_HEAD(UUIDHASH(uuid), entry, entries); - proc_uuid_policy_count++; + foundentry = proc_uuid_policy_lookup_locked(uuid); + if (foundentry != NULL) { + /* The UUID is already in the list. Update the flags. */ + foundentry->flags |= flags; error = 0; + FREE(entry, M_PROC_UUID_POLICY); + entry = NULL; BUMP_PROC_UUID_POLICY_GENERATION_COUNT(); } else { - error = ENOMEM; + /* Our target UUID is not in the list, insert it now */ + if (proc_uuid_policy_count < MAX_PROC_UUID_POLICY_COUNT) { + LIST_INSERT_HEAD(UUIDHASH(uuid), entry, entries); + proc_uuid_policy_count++; + error = 0; + BUMP_PROC_UUID_POLICY_GENERATION_COUNT(); + } else { + error = ENOMEM; + } } PROC_UUID_POLICY_SUBSYS_UNLOCK(); - /* If we had found a pre-existing entry, deallocate its memory now */ - if (delentry) { - FREE(delentry, M_PROC_UUID_POLICY); - } - if (error) { FREE(entry, M_PROC_UUID_POLICY); dprintf("Failed to insert proc uuid policy (%s,0x%08x), table full\n", uuidstr, flags); @@ -161,28 +167,35 @@ proc_uuid_policy_insert(uuid_t uuid, uint32_t flags) } static struct proc_uuid_policy_entry * -proc_uuid_policy_remove_locked(uuid_t uuid) +proc_uuid_policy_remove_locked(uuid_t uuid, uint32_t flags, int *should_delete) { - struct proc_uuid_policy_entry *tmpentry, *searchentry, *delentry = NULL; - - LIST_FOREACH_SAFE(searchentry, UUIDHASH(uuid), entries, tmpentry) { - if (0 == memcmp(searchentry->uuid, uuid, sizeof(uuid_t))) { - /* Existing entry under same UUID. Remove it and save for de-allocation */ - delentry = searchentry; - LIST_REMOVE(searchentry, entries); + struct proc_uuid_policy_entry *foundentry = NULL; + if (should_delete) { + *should_delete = 0; + } + + foundentry = proc_uuid_policy_lookup_locked(uuid); + if (foundentry) { + if (foundentry->flags == flags) { + LIST_REMOVE(foundentry, entries); proc_uuid_policy_count--; - break; + if (should_delete) { + *should_delete = 1; + } + } else { + foundentry->flags &= ~flags; } } - - return delentry; + + return foundentry; } static int -proc_uuid_policy_remove(uuid_t uuid) +proc_uuid_policy_remove(uuid_t uuid, uint32_t flags) { struct proc_uuid_policy_entry *delentry = NULL; int error; + int should_delete = 0; #if PROC_UUID_POLICY_DEBUG uuid_string_t uuidstr; @@ -194,7 +207,7 @@ proc_uuid_policy_remove(uuid_t uuid) PROC_UUID_POLICY_SUBSYS_LOCK(); - delentry = proc_uuid_policy_remove_locked(uuid); + delentry = proc_uuid_policy_remove_locked(uuid, flags, &should_delete); if (delentry) { error = 0; @@ -206,7 +219,7 @@ proc_uuid_policy_remove(uuid_t uuid) PROC_UUID_POLICY_SUBSYS_UNLOCK(); /* If we had found a pre-existing entry, deallocate its memory now */ - if (delentry) { + if (delentry && should_delete) { FREE(delentry, M_PROC_UUID_POLICY); } @@ -219,10 +232,25 @@ proc_uuid_policy_remove(uuid_t uuid) return error; } +static struct proc_uuid_policy_entry * +proc_uuid_policy_lookup_locked(uuid_t uuid) +{ + struct proc_uuid_policy_entry *tmpentry, *searchentry, *foundentry = NULL; + + LIST_FOREACH_SAFE(searchentry, UUIDHASH(uuid), entries, tmpentry) { + if (0 == memcmp(searchentry->uuid, uuid, sizeof(uuid_t))) { + foundentry = searchentry; + break; + } + } + + return foundentry; +} + int proc_uuid_policy_lookup(uuid_t uuid, uint32_t *flags, int32_t *gencount) { - struct proc_uuid_policy_entry *tmpentry, *searchentry, *foundentry = NULL; + struct proc_uuid_policy_entry *foundentry = NULL; int error; #if PROC_UUID_POLICY_DEBUG @@ -244,13 +272,7 @@ proc_uuid_policy_lookup(uuid_t uuid, uint32_t *flags, int32_t *gencount) PROC_UUID_POLICY_SUBSYS_LOCK(); - LIST_FOREACH_SAFE(searchentry, UUIDHASH(uuid), entries, tmpentry) { - if (0 == memcmp(searchentry->uuid, uuid, sizeof(uuid_t))) { - /* Found existing entry */ - foundentry = searchentry; - break; - } - } + foundentry = proc_uuid_policy_lookup_locked(uuid); if (foundentry) { *flags = foundentry->flags; @@ -270,11 +292,16 @@ proc_uuid_policy_lookup(uuid_t uuid, uint32_t *flags, int32_t *gencount) } static int -proc_uuid_policy_clear(void) +proc_uuid_policy_clear(uint32_t flags) { struct proc_uuid_policy_entry *tmpentry, *searchentry; struct proc_uuid_policy_hashhead deletehead = LIST_HEAD_INITIALIZER(deletehead); unsigned long hashslot; + + /* If clear call includes no flags, infer 'No Cellular' flag */ + if (flags == PROC_UUID_POLICY_FLAGS_NONE) { + flags = PROC_UUID_NO_CELLULAR; + } PROC_UUID_POLICY_SUBSYS_LOCK(); @@ -284,10 +311,14 @@ proc_uuid_policy_clear(void) struct proc_uuid_policy_hashhead *headp = &proc_uuid_policy_hashtbl[hashslot]; LIST_FOREACH_SAFE(searchentry, headp, entries, tmpentry) { - /* Move each entry to our delete list */ - LIST_REMOVE(searchentry, entries); - proc_uuid_policy_count--; - LIST_INSERT_HEAD(&deletehead, searchentry, entries); + if ((searchentry->flags & flags) == searchentry->flags) { + /* We are clearing all flags for this entry, move entry to our delete list */ + LIST_REMOVE(searchentry, entries); + proc_uuid_policy_count--; + LIST_INSERT_HEAD(&deletehead, searchentry, entries); + } else { + searchentry->flags &= ~flags; + } } } @@ -307,6 +338,31 @@ proc_uuid_policy_clear(void) return 0; } +int proc_uuid_policy_kernel(uint32_t operation, uuid_t uuid, uint32_t flags) +{ + int error = 0; + + switch (operation) { + case PROC_UUID_POLICY_OPERATION_CLEAR: + error = proc_uuid_policy_clear(flags); + break; + + case PROC_UUID_POLICY_OPERATION_ADD: + error = proc_uuid_policy_insert(uuid, flags); + break; + + case PROC_UUID_POLICY_OPERATION_REMOVE: + error = proc_uuid_policy_remove(uuid, flags); + break; + + default: + error = EINVAL; + break; + } + + return error; +} + int proc_uuid_policy(struct proc *p __unused, struct proc_uuid_policy_args *uap, int32_t *retval __unused) { int error = 0; @@ -321,41 +377,14 @@ int proc_uuid_policy(struct proc *p __unused, struct proc_uuid_policy_args *uap, dprintf("%s succeeded privilege check for proc_uuid_policy\n", p->p_comm); } - switch (uap->operation) { - case PROC_UUID_POLICY_OPERATION_CLEAR: - error = proc_uuid_policy_clear(); - break; - - case PROC_UUID_POLICY_OPERATION_ADD: - if (uap->uuidlen != sizeof(uuid_t)) { - error = ERANGE; - break; - } - - error = copyin(uap->uuid, uuid, sizeof(uuid_t)); - if (error) - break; - - error = proc_uuid_policy_insert(uuid, uap->flags); - break; - - case PROC_UUID_POLICY_OPERATION_REMOVE: - if (uap->uuidlen != sizeof(uuid_t)) { - error = ERANGE; - break; - } - - error = copyin(uap->uuid, uuid, sizeof(uuid_t)); - if (error) - break; - - error = proc_uuid_policy_remove(uuid); - break; - - default: - error = EINVAL; - break; + if (uap->uuid) { + if (uap->uuidlen != sizeof(uuid_t)) + return ERANGE; + + error = copyin(uap->uuid, uuid, sizeof(uuid_t)); + if (error) + return error; } - - return error; + + return proc_uuid_policy_kernel(uap->operation, uuid, uap->flags); } diff --git a/bsd/kern/process_policy.c b/bsd/kern/process_policy.c index 9ae4b32bc..ded6d7215 100644 --- a/bsd/kern/process_policy.c +++ b/bsd/kern/process_policy.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include @@ -206,6 +205,7 @@ handle_resourceuse(__unused int scope, __unused int action, __unused int policy, int entitled = TRUE; uint64_t interval = -1ULL; int error = 0; + uint8_t percentage; switch(policy_subtype) { case PROC_POLICY_RUSAGE_NONE: @@ -238,8 +238,6 @@ handle_resourceuse(__unused int scope, __unused int action, __unused int policy, #endif switch (action) { - uint8_t percentage; - case PROC_POLICY_ACTION_GET: error = proc_get_task_ruse_cpu(proc->task, &cpuattr.ppattr_cpu_attr, &percentage, @@ -325,7 +323,7 @@ handle_apptype( int scope, return (EINVAL); /* PROCESS ENABLE APPTYPE HOLDIMP */ - error = task_importance_hold_external_assertion(current_task(), 1); + error = task_importance_hold_legacy_external_assertion(current_task(), 1); return(error); @@ -336,7 +334,7 @@ handle_apptype( int scope, return (EINVAL); /* PROCESS ENABLE APPTYPE DROPIMP */ - error = task_importance_drop_external_assertion(current_task(), 1); + error = task_importance_drop_legacy_external_assertion(current_task(), 1); return(error); @@ -397,17 +395,17 @@ handle_boost(int scope, switch(policy_subtype) { case PROC_POLICY_IMP_IMPORTANT: - if (task_is_importance_receiver(target_proc->task) == FALSE) + if (task_is_importance_receiver_type(target_proc->task) == FALSE) return (EINVAL); switch (action) { case PROC_POLICY_ACTION_HOLD: /* PROCESS HOLD BOOST IMPORTANT */ - error = task_importance_hold_external_assertion(current_task(), 1); + error = task_importance_hold_legacy_external_assertion(current_task(), 1); break; case PROC_POLICY_ACTION_DROP: /* PROCESS DROP BOOST IMPORTANT */ - error = task_importance_drop_external_assertion(current_task(), 1); + error = task_importance_drop_legacy_external_assertion(current_task(), 1); break; default: error = (EINVAL); @@ -469,6 +467,56 @@ proc_pidbackgrounded(pid_t pid, uint32_t* state) return (0); } +/* + * Get the darwin background state of the originator. If the current + * process app type is App, then it is the originator, else if it is + * a Daemon, then creator of the Resource Accounting attribute of + * the current thread voucher is the originator of the work. + */ +int +proc_get_originatorbgstate(uint32_t *is_backgrounded) +{ + uint32_t bgstate; + proc_t p = current_proc(); + uint32_t flagsp; + kern_return_t kr; + pid_t pid; + int ret; + thread_t thread = current_thread(); + + bgstate = proc_get_effective_thread_policy(thread, TASK_POLICY_DARWIN_BG); + + /* If current thread or task backgrounded, return background */ + if (bgstate) { + *is_backgrounded = 1; + return 0; + } + + /* Check if current process app type is App, then return foreground */ + proc_get_darwinbgstate(p->task, &flagsp); + if ((flagsp & PROC_FLAG_APPLICATION) == PROC_FLAG_APPLICATION) { + *is_backgrounded = 0; + return 0; + } + + /* + * Get the current voucher origin pid and it's bgstate.The pid + * returned here might not be valid or may have been recycled. + */ + kr = thread_get_current_voucher_origin_pid(&pid); + if (kr != KERN_SUCCESS) { + if (kr == KERN_INVALID_TASK) + return ESRCH; + else if (kr == KERN_INVALID_VALUE) + return ENOATTR; + else + return EINVAL; + } + + ret = proc_pidbackgrounded(pid, is_backgrounded); + return ret; +} + int proc_apply_resource_actions(void * bsdinfo, __unused int type, int action) { @@ -499,7 +547,6 @@ proc_apply_resource_actions(void * bsdinfo, __unused int type, int action) return(0); } - int proc_restore_resource_actions(void * bsdinfo, __unused int type, int action) { diff --git a/bsd/kern/pthread_shims.c b/bsd/kern/pthread_shims.c index d951de700..4e41a19e4 100644 --- a/bsd/kern/pthread_shims.c +++ b/bsd/kern/pthread_shims.c @@ -49,6 +49,13 @@ /* version number of the in-kernel shims given to pthread.kext */ #define PTHREAD_SHIMS_VERSION 1 +/* on arm, the callbacks function has two #ifdef arm ponters */ +#define PTHREAD_CALLBACK_MEMBER ml_get_max_cpus + +/* compile time asserts to check the length of structures in pthread_shims.h */ +char pthread_functions_size_compile_assert[(sizeof(struct pthread_functions_s) - offsetof(struct pthread_functions_s, psynch_rw_yieldwrlock) - sizeof(void*)) == (sizeof(void*) * 100) ? 1 : -1]; +char pthread_callbacks_size_compile_assert[(sizeof(struct pthread_callbacks_s) - offsetof(struct pthread_callbacks_s, PTHREAD_CALLBACK_MEMBER) - sizeof(void*)) == (sizeof(void*) * 100) ? 1 : -1]; + /* old pthread code had definitions for these as they don't exist in headers */ extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t); extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t); @@ -67,8 +74,10 @@ PTHREAD_STRUCT_ACCESSOR(proc_get_threadstart, proc_set_threadstart, user_addr_t, PTHREAD_STRUCT_ACCESSOR(proc_get_pthsize, proc_set_pthsize, int, struct proc*, p_pthsize); PTHREAD_STRUCT_ACCESSOR(proc_get_wqthread, proc_set_wqthread, user_addr_t, struct proc*, p_wqthread); PTHREAD_STRUCT_ACCESSOR(proc_get_targconc, proc_set_targconc, user_addr_t, struct proc*, p_targconc); +PTHREAD_STRUCT_ACCESSOR(proc_get_stack_addr_hint, proc_set_stack_addr_hint, user_addr_t, struct proc *, p_stack_addr_hint); PTHREAD_STRUCT_ACCESSOR(proc_get_dispatchqueue_offset, proc_set_dispatchqueue_offset, uint64_t, struct proc*, p_dispatchqueue_offset); PTHREAD_STRUCT_ACCESSOR(proc_get_dispatchqueue_serialno_offset, proc_set_dispatchqueue_serialno_offset, uint64_t, struct proc*, p_dispatchqueue_serialno_offset); +PTHREAD_STRUCT_ACCESSOR(proc_get_pthread_tsd_offset, proc_set_pthread_tsd_offset, uint32_t, struct proc *, p_pth_tsd_offset); PTHREAD_STRUCT_ACCESSOR(proc_get_wqptr, proc_set_wqptr, void*, struct proc*, p_wqptr); PTHREAD_STRUCT_ACCESSOR(proc_get_wqsize, proc_set_wqsize, int, struct proc*, p_wqsize); PTHREAD_STRUCT_ACCESSOR(proc_get_pthhash, proc_set_pthhash, void*, struct proc*, p_pthhash); @@ -131,6 +140,48 @@ _current_map(void) return current_map(); } +static boolean_t +qos_main_thread_active(void) +{ + return TRUE; +} + + +static int proc_usynch_get_requested_thread_qos(struct uthread *uth) +{ + task_t task = current_task(); + thread_t thread = uth ? uth->uu_thread : current_thread(); + int requested_qos; + + requested_qos = proc_get_task_policy(task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS); + + /* + * For the purposes of userspace synchronization, it doesn't make sense to place an override of UNSPECIFIED + * on another thread, if the current thread doesn't have any QoS set. In these cases, upgrade to + * THREAD_QOS_USER_INTERACTIVE. + */ + if (requested_qos == THREAD_QOS_UNSPECIFIED) { + requested_qos = THREAD_QOS_USER_INTERACTIVE; + } + + return requested_qos; +} + +static boolean_t proc_usynch_thread_qos_add_override(struct uthread *uth, uint64_t tid, int override_qos, boolean_t first_override_for_resource) +{ + task_t task = current_task(); + thread_t thread = uth ? uth->uu_thread : THREAD_NULL; + + return proc_thread_qos_add_override(task, thread, tid, override_qos, first_override_for_resource); +} + +static boolean_t proc_usynch_thread_qos_remove_override(struct uthread *uth, uint64_t tid) +{ + task_t task = current_task(); + thread_t thread = uth ? uth->uu_thread : THREAD_NULL; + + return proc_thread_qos_remove_override(task, thread, tid); +} /* kernel (core) to kext shims */ @@ -210,8 +261,17 @@ bsdthread_create(struct proc *p, struct bsdthread_create_args *uap, user_addr_t int bsdthread_register(struct proc *p, struct bsdthread_register_args *uap, __unused int32_t *retval) { - return pthread_functions->bsdthread_register(p, uap->threadstart, uap->wqthread, uap->pthsize, uap->dummy_value, - uap->targetconc_ptr, uap->dispatchqueue_offset, retval); + if (pthread_functions->version >= 1) { + return pthread_functions->bsdthread_register2(p, uap->threadstart, uap->wqthread, + uap->flags, uap->stack_addr_hint, + uap->targetconc_ptr, uap->dispatchqueue_offset, + uap->tsd_offset, retval); + } else { + return pthread_functions->bsdthread_register(p, uap->threadstart, uap->wqthread, + uap->flags, uap->stack_addr_hint, + uap->targetconc_ptr, uap->dispatchqueue_offset, + retval); + } } int @@ -220,6 +280,13 @@ bsdthread_terminate(struct proc *p, struct bsdthread_terminate_args *uap, int32_ return pthread_functions->bsdthread_terminate(p, uap->stackaddr, uap->freesize, uap->port, uap->sem, retval); } +int +bsdthread_ctl(struct proc *p, struct bsdthread_ctl_args *uap, int *retval) +{ + return pthread_functions->bsdthread_ctl(p, uap->cmd, uap->arg1, uap->arg2, uap->arg3, retval); +} + + int thread_selfid(struct proc *p, __unused struct thread_selfid_args *uap, uint64_t *retval) { @@ -324,14 +391,6 @@ psynch_rw_downgrade(__unused proc_t p, __unused struct psynch_rw_downgrade_args return 0; } -/* unimplemented guard */ - -// static void -// unhooked_panic(void) -// { -// panic("pthread system call not hooked up"); -// } - /* * The callbacks structure (defined in pthread_shims.h) contains a collection * of kernel functions that were not deemed sensible to expose as a KPI to all @@ -398,6 +457,8 @@ static struct pthread_callbacks_s pthread_callbacks = { .thread_static_param = thread_static_param, .thread_create_workq = thread_create_workq, .thread_policy_set_internal = thread_policy_set_internal, + .thread_policy_get = thread_policy_get, + .thread_set_voucher_name = thread_set_voucher_name, .thread_affinity_set = thread_affinity_set, @@ -419,6 +480,19 @@ static struct pthread_callbacks_s pthread_callbacks = { .proc_get_dispatchqueue_serialno_offset = proc_get_dispatchqueue_serialno_offset, .proc_set_dispatchqueue_serialno_offset = proc_set_dispatchqueue_serialno_offset, + + .proc_get_stack_addr_hint = proc_get_stack_addr_hint, + .proc_set_stack_addr_hint = proc_set_stack_addr_hint, + .proc_get_pthread_tsd_offset = proc_get_pthread_tsd_offset, + .proc_set_pthread_tsd_offset = proc_set_pthread_tsd_offset, + + .thread_set_tsd_base = thread_set_tsd_base, + + .proc_usynch_get_requested_thread_qos = proc_usynch_get_requested_thread_qos, + .proc_usynch_thread_qos_add_override = proc_usynch_thread_qos_add_override, + .proc_usynch_thread_qos_remove_override = proc_usynch_thread_qos_remove_override, + + .qos_main_thread_active = qos_main_thread_active, }; pthread_callbacks_t pthread_kern = &pthread_callbacks; diff --git a/bsd/kern/socket_info.c b/bsd/kern/socket_info.c index 157b47dc3..73725bbb4 100644 --- a/bsd/kern/socket_info.c +++ b/bsd/kern/socket_info.c @@ -149,12 +149,14 @@ fill_socketinfo(struct socket *so, struct socket_info *si) si->soi_kind = SOCKINFO_TCP; tcpsi->tcpsi_state = tp->t_state; - tcpsi->tcpsi_timer[TCPT_REXMT] = + tcpsi->tcpsi_timer[TSI_T_REXMT] = tp->t_timer[TCPT_REXMT]; - tcpsi->tcpsi_timer[TCPT_PERSIST] = + tcpsi->tcpsi_timer[TSI_T_PERSIST] = tp->t_timer[TCPT_PERSIST]; - tcpsi->tcpsi_timer[TCPT_KEEP] = tp->t_timer[TCPT_KEEP]; - tcpsi->tcpsi_timer[TCPT_2MSL] = tp->t_timer[TCPT_2MSL]; + tcpsi->tcpsi_timer[TSI_T_KEEP] = + tp->t_timer[TCPT_KEEP]; + tcpsi->tcpsi_timer[TSI_T_2MSL] = + tp->t_timer[TCPT_2MSL]; tcpsi->tcpsi_mss = tp->t_maxseg; tcpsi->tcpsi_flags = tp->t_flags; tcpsi->tcpsi_tp = diff --git a/bsd/kern/subr_xxx.c b/bsd/kern/subr_xxx.c index 8879e5d87..946f938e3 100644 --- a/bsd/kern/subr_xxx.c +++ b/bsd/kern/subr_xxx.c @@ -70,7 +70,7 @@ #include #include /* for psignal() */ - +#include #ifdef GPROF #include @@ -195,4 +195,22 @@ cfreemem(caddr_t cp, int size) } #endif +#if !CRYPTO +#include + +/* Stubs must be present in all configs for Unsupported KPI exports */ + +void +rc4_init(struct rc4_state *state __unused, const u_char *key __unused, int keylen __unused) +{ + panic("rc4_init: unsupported kernel configuration"); +} + +void +rc4_crypt(struct rc4_state *state __unused, + const u_char *inbuf __unused, u_char *outbuf __unused, int buflen __unused) +{ + panic("rc4_crypt: unsupported kernel configuration"); +} +#endif /* !CRYPTO */ diff --git a/bsd/kern/sys_coalition.c b/bsd/kern/sys_coalition.c new file mode 100644 index 000000000..3255fb0d6 --- /dev/null +++ b/bsd/kern/sys_coalition.c @@ -0,0 +1,281 @@ +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +/* Coalitions syscalls */ + +/* + * Create a new, empty coalition and return its ID. + * + * Returns: + * EINVAL Flags parameter was invalid + * ENOMEM Unable to allocate kernel resources for a new coalition + * EFAULT cidp parameter pointed to invalid memory. + * + * Returns with reference held for userspace caller. + */ +static +int +coalition_create_syscall(user_addr_t cidp, uint32_t flags) +{ + int error = 0; + kern_return_t kr; + uint64_t cid; + coalition_t coal; + + if ((flags & (~COALITION_CREATE_FLAG_MASK)) != 0) { + return EINVAL; + } + + boolean_t privileged = flags & COALITION_CREATE_FLAG_PRIVILEGED; + + kr = coalition_create_internal(&coal, privileged); + if (kr != KERN_SUCCESS) { + /* for now, the only kr is KERN_RESOURCE_SHORTAGE */ + error = ENOMEM; + goto out; + } + + cid = coalition_id(coal); + +#if COALITION_DEBUG + printf("%s(addr, %u) -> %llu\n", __func__, flags, cid); +#endif + error = copyout(&cid, cidp, sizeof(cid)); +out: + return error; +} + +/* + * Request to terminate the coalition identified by ID. + * Attempts to spawn into this coalition using the posix_spawnattr will begin + * failing. Processes already within the coalition may still fork. + * Arms the 'coalition is empty' notification when the coalition's active + * count reaches zero. + * + * Returns: + * ESRCH No coalition with that ID could be found. + * EALREADY The coalition with that ID has already been terminated. + * EFAULT cidp parameter pointed to invalid memory. + * EPERM Caller doesn't have permission to terminate that coalition. + */ +static +int +coalition_request_terminate_syscall(user_addr_t cidp, uint32_t flags) +{ + kern_return_t kr; + int error = 0; + uint64_t cid; + coalition_t coal; + + if (flags != 0) { + return EINVAL; + } + + error = copyin(cidp, &cid, sizeof(cid)); + if (error) { + return error; + } + + coal = coalition_find_by_id(cid); + if (coal == COALITION_NULL) { + return ESRCH; + } + + kr = coalition_request_terminate_internal(coal); + coalition_release(coal); + + switch (kr) { + case KERN_SUCCESS: + break; + case KERN_DEFAULT_SET: + error = EPERM; + case KERN_TERMINATED: + error = EALREADY; + case KERN_INVALID_NAME: + error = ESRCH; + default: + error = EIO; + } + +#if COALITION_DEBUG + printf("%s(%llu, %u) -> %d\n", __func__, cid, flags, error); +#endif + + return error; +} + +/* + * Request the kernel to deallocate the coalition identified by ID, which + * must be both terminated and empty. This balances the reference taken + * in coalition_create. + * The memory containig the coalition object may not be freed just yet, if + * other kernel operations still hold references to it. + * + * Returns: + * EINVAL Flags parameter was invalid + * ESRCH Coalition ID refers to a coalition that doesn't exist. + * EBUSY Coalition has not yet been terminated. + * EBUSY Coalition is still active. + * EFAULT cidp parameter pointed to invalid memory. + * EPERM Caller doesn't have permission to terminate that coalition. + * Consumes one reference, "held" by caller since coalition_create + */ +static +int +coalition_reap_syscall(user_addr_t cidp, uint32_t flags) +{ + kern_return_t kr; + int error = 0; + uint64_t cid; + coalition_t coal; + + if (flags != 0) { + return EINVAL; + } + + error = copyin(cidp, &cid, sizeof(cid)); + if (error) { + return error; + } + + coal = coalition_find_by_id(cid); + if (coal == COALITION_NULL) { + return ESRCH; + } + + kr = coalition_reap_internal(coal); + coalition_release(coal); + + switch (kr) { + case KERN_SUCCESS: + break; + case KERN_DEFAULT_SET: + error = EPERM; + case KERN_TERMINATED: + error = ESRCH; + case KERN_FAILURE: + error = EBUSY; + default: + error = EIO; + } + +#if COALITION_DEBUG + printf("%s(%llu, %u) -> %d\n", __func__, cid, flags, error); +#endif + + return error; +} + +/* Syscall demux. + * Returns EPERM if the calling process is not privileged to make this call. + */ +int coalition(proc_t p, struct coalition_args *cap, __unused int32_t *retval) +{ + uint32_t operation = cap->operation; + user_addr_t cidp = cap->cid; + uint32_t flags = cap->flags; + int error = 0; + + if (!task_is_in_privileged_coalition(p->task)) { + return EPERM; + } + + switch (operation) { + case COALITION_OP_CREATE: + error = coalition_create_syscall(cidp, flags); + break; + case COALITION_OP_REAP: + error = coalition_reap_syscall(cidp, flags); + break; + case COALITION_OP_TERMINATE: + error = coalition_request_terminate_syscall(cidp, flags); + break; + default: + error = ENOSYS; + } + return error; +} + +/* This is a temporary interface, likely to be changed by 15385642. */ +static int __attribute__ ((noinline)) +coalition_info_resource_usage(coalition_t coal, user_addr_t buffer, user_size_t bufsize) +{ + kern_return_t kr; + struct coalition_resource_usage cru; + + if (bufsize != sizeof(cru)) { + return EINVAL; + } + + kr = coalition_resource_usage_internal(coal, &cru); + + switch (kr) { + case KERN_INVALID_ARGUMENT: + return EINVAL; + case KERN_RESOURCE_SHORTAGE: + return ENOMEM; + case KERN_SUCCESS: + break; + default: + return EIO; /* shrug */ + } + + return copyout(&cru, buffer, bufsize); +} + +int coalition_info(proc_t p, struct coalition_info_args *uap, __unused int32_t *retval) +{ + user_addr_t cidp = uap->cid; + user_addr_t buffer = uap->buffer; + user_addr_t bufsizep = uap->bufsize; + user_size_t bufsize; + uint32_t flavor = uap->flavor; + int error; + uint64_t cid; + coalition_t coal; + + error = copyin(cidp, &cid, sizeof(cid)); + if (error) { + return error; + } + + coal = coalition_find_by_id(cid); + if (coal == COALITION_NULL) { + return ESRCH; + } + /* TODO: priv check? EPERM or ESRCH? */ + + if (IS_64BIT_PROCESS(p)) { + user64_size_t size64; + error = copyin(bufsizep, &size64, sizeof(size64)); + bufsize = (user_size_t)size64; + } else { + user32_size_t size32; + error = copyin(bufsizep, &size32, sizeof(size32)); + bufsize = (user_size_t)size32; + } + if (error) { + goto bad; + } + + switch (flavor) { + case COALITION_INFO_RESOURCE_USAGE: + error = coalition_info_resource_usage(coal, buffer, bufsize); + break; + default: + error = EINVAL; + } + +bad: + coalition_release(coal); + return error; +} diff --git a/bsd/kern/sys_domain.c b/bsd/kern/sys_domain.c index d28648810..76741295c 100644 --- a/bsd/kern/sys_domain.c +++ b/bsd/kern/sys_domain.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -35,6 +35,7 @@ #include #include #include +#include struct domain *systemdomain = NULL; @@ -47,6 +48,10 @@ struct domain systemdomain_s = { .dom_init = systemdomain_init, }; +SYSCTL_NODE(_net, PF_SYSTEM, systm, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "System domain"); + + static void systemdomain_init(struct domain *dp) { diff --git a/bsd/kern/sys_generic.c b/bsd/kern/sys_generic.c index f41cd0018..c839e868f 100644 --- a/bsd/kern/sys_generic.c +++ b/bsd/kern/sys_generic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -81,6 +81,7 @@ #include #include #include +#include #include #include #include @@ -104,9 +105,7 @@ #include #include #include -#if CONFIG_TELEMETRY #include -#endif #include #include @@ -139,8 +138,6 @@ #include #include -#include - /* XXX should be in a header file somewhere */ void evsofree(struct socket *); void evpipefree(struct pipe *); @@ -149,9 +146,7 @@ void postevent(struct socket *, struct sockbuf *, int); extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp); int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval); -int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval); -extern void *get_bsduthreadarg(thread_t); -extern int *get_bsduthreadrval(thread_t); +int wr_uio(struct proc *p, struct fileproc *fp, uio_t uio, user_ssize_t *retval); __private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp, user_addr_t bufp, user_size_t nbyte, @@ -166,10 +161,6 @@ __private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, in /* Conflict wait queue for when selects collide (opaque type) */ struct wait_queue select_conflict_queue; -#if 13841988 -int temp_debug_13841988 = 0; -#endif - /* * Init routine called from bsd_init.c */ @@ -178,11 +169,6 @@ void select_wait_queue_init(void) { wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO); -#if 13841988 - if (PE_parse_boot_argn("temp_debug_13841988", &temp_debug_13841988, sizeof(temp_debug_13841988))) { - kprintf("Temporary debugging for 13841988 enabled\n"); - } -#endif } #define f_flag f_fglob->fg_flag @@ -470,6 +456,7 @@ write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *re struct fileproc *fp; int error; int fd = uap->fd; + bool wrote_some = false; AUDIT_ARG(fd, fd); @@ -478,14 +465,20 @@ write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *re return(error); if ((fp->f_flag & FWRITE) == 0) { error = EBADF; + } else if (FP_ISGUARDED(fp, GUARD_WRITE)) { + proc_fdlock(p); + error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE); + proc_fdunlock(p); } else { struct vfs_context context = *(vfs_context_current()); context.vc_ucred = fp->f_fglob->fg_cred; error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte, (off_t)-1, 0, retval); + + wrote_some = *retval > 0; } - if (error == 0) + if (wrote_some) fp_drop_written(p, fd, fp); else fp_drop(p, fd, fp, 0); @@ -517,6 +510,7 @@ pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t * int error; int fd = uap->fd; vnode_t vp = (vnode_t)0; + bool wrote_some = false; AUDIT_ARG(fd, fd); @@ -526,6 +520,10 @@ pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t * if ((fp->f_flag & FWRITE) == 0) { error = EBADF; + } else if (FP_ISGUARDED(fp, GUARD_WRITE)) { + proc_fdlock(p); + error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE); + proc_fdunlock(p); } else { struct vfs_context context = *vfs_context_current(); context.vc_ucred = fp->f_fglob->fg_cred; @@ -550,9 +548,10 @@ pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t * error = dofilewrite(&context, fp, uap->buf, uap->nbyte, uap->offset, FOF_OFFSET, retval); + wrote_some = *retval > 0; } errout: - if (error == 0) + if (wrote_some) fp_drop_written(p, fd, fp); else fp_drop(p, fd, fp, 0); @@ -579,8 +578,10 @@ dofilewrite(vfs_context_t ctx, struct fileproc *fp, user_ssize_t bytecnt; char uio_buf[ UIO_SIZEOF(1) ]; - if (nbyte > INT_MAX) + if (nbyte > INT_MAX) { + *retval = 0; return (EINVAL); + } if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE, @@ -624,7 +625,9 @@ writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t * { uio_t auio = NULL; int error; + struct fileproc *fp; struct user_iovec *iovp; + bool wrote_some = false; AUDIT_ARG(fd, uap->fd); @@ -658,7 +661,26 @@ writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t * if (error) { goto ExitThisRoutine; } - error = wr_uio(p, uap->fd, auio, retval); + + error = fp_lookup(p, uap->fd, &fp, 0); + if (error) + goto ExitThisRoutine; + + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + } else if (FP_ISGUARDED(fp, GUARD_WRITE)) { + proc_fdlock(p); + error = fp_guard_exception(p, uap->fd, fp, kGUARD_EXC_WRITE); + proc_fdunlock(p); + } else { + error = wr_uio(p, fp, auio, retval); + wrote_some = *retval > 0; + } + + if (wrote_some) + fp_drop_written(p, uap->fd, fp); + else + fp_drop(p, uap->fd, fp, 0); ExitThisRoutine: if (auio != NULL) { @@ -669,21 +691,12 @@ ExitThisRoutine: int -wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval) +wr_uio(struct proc *p, struct fileproc *fp, uio_t uio, user_ssize_t *retval) { - struct fileproc *fp; int error; user_ssize_t count; struct vfs_context context = *vfs_context_current(); - error = fp_lookup(p,fdes,&fp,0); - if (error) - return(error); - - if ((fp->f_flag & FWRITE) == 0) { - error = EBADF; - goto out; - } count = uio_resid(uio); context.vc_ucred = fp->f_cred; @@ -699,11 +712,6 @@ wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval) } *retval = count - uio_resid(uio); -out: - if (error == 0) - fp_drop_written(p, fdes, fp); - else - fp_drop(p, fdes, fp, 0); return(error); } @@ -924,7 +932,7 @@ int selwait, nselcoll; #define SEL_SECONDPASS 2 extern int selcontinue(int error); extern int selprocess(int error, int sel_pass); -static int selscan(struct proc *p, struct _select * sel, +static int selscan(struct proc *p, struct _select * sel, struct _select_data * seldata, int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub); static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count); static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount); @@ -953,16 +961,19 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva thread_t th_act; struct uthread *uth; struct _select *sel; + struct _select_data *seldata; int needzerofill = 1; int count = 0; th_act = current_thread(); uth = get_bsdthread_info(th_act); sel = &uth->uu_select; - sel->data = &uth->uu_kevent.ss_select_data; - retval = (int *)get_bsduthreadrval(th_act); + seldata = &uth->uu_kevent.ss_select_data; *retval = 0; + seldata->args = uap; + seldata->retval = retval; + if (uap->nd < 0) { return (EINVAL); } @@ -1054,16 +1065,16 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva } clock_absolutetime_interval_to_deadline( - tvtoabstime(&atv), &sel->data->abstime); + tvtoabstime(&atv), &seldata->abstime); } else - sel->data->abstime = 0; + seldata->abstime = 0; if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) { goto continuation; } - sel->data->count = count; + seldata->count = count; size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK); if (uth->uu_allocsize) { if (uth->uu_wqset == 0) @@ -1083,7 +1094,7 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva panic("failed to allocate memory for waitqueue\n"); } bzero(uth->uu_wqset, size); - sel->data->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET; + seldata->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET; wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST)); continuation: @@ -1122,9 +1133,10 @@ selprocess(int error, int sel_pass) thread_t th_act; struct uthread *uth; struct proc *p; - struct select_args *uap; + struct select_nocancel_args *uap; int *retval; struct _select *sel; + struct _select_data *seldata; int unwind = 1; int prepost = 0; int somewakeup = 0; @@ -1133,14 +1145,15 @@ selprocess(int error, int sel_pass) p = current_proc(); th_act = current_thread(); - uap = (struct select_args *)get_bsduthreadarg(th_act); - retval = (int *)get_bsduthreadrval(th_act); uth = get_bsdthread_info(th_act); sel = &uth->uu_select; + seldata = &uth->uu_kevent.ss_select_data; + uap = seldata->args; + retval = seldata->retval; if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) unwind = 0; - if (sel->data->count == 0) + if (seldata->count == 0) unwind = 0; retry: if (error != 0) { @@ -1151,7 +1164,7 @@ retry: ncoll = nselcoll; OSBitOrAtomic(P_SELECT, &p->p_flag); /* skip scans if the select is just for timeouts */ - if (sel->data->count) { + if (seldata->count) { /* * Clear out any dangling refs from prior calls; technically * there should not be any. @@ -1159,7 +1172,7 @@ retry: if (sel_pass == SEL_FIRSTPASS) wait_queue_sub_clearrefs(uth->uu_wqset); - error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset); + error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset); if (error || *retval) { goto done; } @@ -1180,7 +1193,7 @@ retry: uint64_t now; clock_get_uptime(&now); - if (now >= sel->data->abstime) + if (now >= seldata->abstime) goto done; } @@ -1195,7 +1208,7 @@ retry: * To effect a poll, the timeout argument should be * non-nil, pointing to a zero-valued timeval structure. */ - if (uap->tv && sel->data->abstime == 0) { + if (uap->tv && seldata->abstime == 0) { goto done; } @@ -1208,13 +1221,13 @@ retry: OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); /* if the select is just for timeout skip check */ - if (sel->data->count &&(sel_pass == SEL_SECONDPASS)) + if (seldata->count &&(sel_pass == SEL_SECONDPASS)) panic("selprocess: 2nd pass assertwaiting"); /* Wait Queue Subordinate has waitqueue as first element */ wait_result = wait_queue_assert_wait_with_leeway((wait_queue_t)uth->uu_wqset, NULL, THREAD_ABORTSAFE, - TIMEOUT_URGENCY_USER_NORMAL, sel->data->abstime, 0); + TIMEOUT_URGENCY_USER_NORMAL, seldata->abstime, 0); if (wait_result != THREAD_AWAKENED) { /* there are no preposted events */ error = tsleep1(NULL, PSOCK | PCATCH, @@ -1280,7 +1293,7 @@ done: * invalid. */ static int -selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval, +selscan(struct proc *p, struct _select *sel, struct _select_data * seldata, int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub) { struct filedesc *fdp = p->p_fd; @@ -1308,11 +1321,11 @@ selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval, } ibits = sel->ibits; obits = sel->obits; - wql = sel->data->wql; + wql = seldata->wql; nw = howmany(nfd, NFDBITS); - count = sel->data->count; + count = seldata->count; nc = 0; if (count) { @@ -1326,7 +1339,11 @@ selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval, while ((j = ffs(bits)) && (fd = i + --j) < nfd) { bits &= ~(1 << j); - fp = fdp->fd_ofiles[fd]; + + if (fd < fdp->fd_nfiles) + fp = fdp->fd_ofiles[fd]; + else + fp = NULL; if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) { /* @@ -1555,9 +1572,7 @@ poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data) if (fds->revents & POLLHUP) mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND ); else { - mask = 0; - if (kevp->data != 0) - mask |= (POLLIN | POLLRDNORM ); + mask = (POLLIN | POLLRDNORM ); if (kevp->flags & EV_OOBAND) mask |= ( POLLPRI | POLLRDBAND ); } @@ -1649,7 +1664,12 @@ selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp) bits = iptr[i/NFDBITS]; while ((j = ffs(bits)) && (fd = i + --j) < nfd) { bits &= ~(1 << j); - fp = fdp->fd_ofiles[fd]; + + if (fd < fdp->fd_nfiles) + fp = fdp->fd_ofiles[fd]; + else + fp = NULL; + if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) { *countp = 0; @@ -2762,6 +2782,7 @@ waitevent_close(struct proc *p, struct fileproc *fp) * Returns: 0 Success * EWOULDBLOCK Timeout is too short * copyout:EFAULT Bad user buffer + * mac_system_check_info:EPERM Client not allowed to perform this operation * * Notes: A timeout seems redundant, since if it's tolerable to not * have a system UUID in hand, then why ask for one? @@ -2775,15 +2796,6 @@ gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retv __darwin_uuid_t uuid_kern; /* for IOKit call */ if (!uap->spi) { -#if 13841988 - uint32_t flags; - if (temp_debug_13841988 && (0 == proc_get_darwinbgstate(p->task, &flags)) && (flags & PROC_FLAG_IOS_APPLICATION)) { - printf("Unauthorized access to gethostuuid() by %s(%d)\n", p->p_comm, proc_pid(p)); - return (EPERM); - } -#else - /* Perform sandbox check */ -#endif } /* Convert the 32/64 bit timespec into a mach_timespec_t */ @@ -2931,16 +2943,22 @@ ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval) return (rval); } -#if CONFIG_TELEMETRY int telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t *retval) { int error = 0; switch (args->cmd) { +#if CONFIG_TELEMETRY case TELEMETRY_CMD_TIMER_EVENT: error = telemetry_timer_event(args->deadline, args->interval, args->leeway); break; +#endif /* CONFIG_TELEMETRY */ + case TELEMETRY_CMD_VOUCHER_NAME: + if (thread_set_voucher_name((mach_port_name_t)args->deadline)) + error = EINVAL; + break; + default: error = EINVAL; break; @@ -2948,4 +2966,3 @@ telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t return (error); } -#endif /* CONFIG_TELEMETRY */ diff --git a/bsd/kern/sys_pipe.c b/bsd/kern/sys_pipe.c index 83ea24a35..374d82381 100644 --- a/bsd/kern/sys_pipe.c +++ b/bsd/kern/sys_pipe.c @@ -17,7 +17,7 @@ * are met. */ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -317,7 +317,7 @@ pipe_touch(struct pipe *tpipe, int touch) } } -static const unsigned int pipesize_blocks[] = {128,256,1024,2048,PAGE_SIZE, PAGE_SIZE * 2, PIPE_SIZE , PIPE_SIZE * 4 }; +static const unsigned int pipesize_blocks[] = {128,256,1024,2048,4096, 4096 * 2, PIPE_SIZE , PIPE_SIZE * 4 }; /* * finds the right size from possible sizes in pipesize_blocks @@ -1216,7 +1216,7 @@ pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx) wpipe->pipe_state |= PIPE_WSELECT; if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) || (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && - (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) > 0)) { + (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) { retnum = 1; } else { diff --git a/bsd/kern/syscalls.master b/bsd/kern/syscalls.master index 67e8898b8..d19267e84 100644 --- a/bsd/kern/syscalls.master +++ b/bsd/kern/syscalls.master @@ -49,21 +49,21 @@ 5 AUE_OPEN_RWTC ALL { int open(user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; } 6 AUE_CLOSE ALL { int close(int fd); } 7 AUE_WAIT4 ALL { int wait4(int pid, user_addr_t status, int options, user_addr_t rusage) NO_SYSCALL_STUB; } -8 AUE_NULL ALL { int nosys(void); } { old creat } +8 AUE_NULL ALL { int enosys(void); } { old creat } 9 AUE_LINK ALL { int link(user_addr_t path, user_addr_t link); } 10 AUE_UNLINK ALL { int unlink(user_addr_t path) NO_SYSCALL_STUB; } -11 AUE_NULL ALL { int nosys(void); } { old execv } +11 AUE_NULL ALL { int enosys(void); } { old execv } 12 AUE_CHDIR ALL { int chdir(user_addr_t path); } 13 AUE_FCHDIR ALL { int fchdir(int fd); } 14 AUE_MKNOD ALL { int mknod(user_addr_t path, int mode, int dev); } 15 AUE_CHMOD ALL { int chmod(user_addr_t path, int mode) NO_SYSCALL_STUB; } 16 AUE_CHOWN ALL { int chown(user_addr_t path, int uid, int gid); } -17 AUE_NULL ALL { int nosys(void); } { old break } +17 AUE_NULL ALL { int enosys(void); } { old break } 18 AUE_GETFSSTAT ALL { int getfsstat(user_addr_t buf, int bufsize, int flags); } -19 AUE_NULL ALL { int nosys(void); } { old lseek } +19 AUE_NULL ALL { int enosys(void); } { old lseek } 20 AUE_GETPID ALL { int getpid(void); } -21 AUE_NULL ALL { int nosys(void); } { old mount } -22 AUE_NULL ALL { int nosys(void); } { old umount } +21 AUE_NULL ALL { int enosys(void); } { old mount } +22 AUE_NULL ALL { int enosys(void); } { old umount } 23 AUE_SETUID ALL { int setuid(uid_t uid); } 24 AUE_GETUID ALL { int getuid(void); } 25 AUE_GETEUID ALL { int geteuid(void); } @@ -146,7 +146,7 @@ 93 AUE_SELECT ALL { int select(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, struct timeval *tv) NO_SYSCALL_STUB; } 94 AUE_NULL ALL { int nosys(void); } { old setdopt } 95 AUE_FSYNC ALL { int fsync(int fd); } -96 AUE_SETPRIORITY ALL { int setpriority(int which, id_t who, int prio); } +96 AUE_SETPRIORITY ALL { int setpriority(int which, id_t who, int prio) NO_SYSCALL_STUB; } #if SOCKETS 97 AUE_SOCKET ALL { int socket(int domain, int type, int protocol); } 98 AUE_CONNECT ALL { int connect(int s, caddr_t name, socklen_t namelen) NO_SYSCALL_STUB; } @@ -290,7 +290,7 @@ 199 AUE_LSEEK ALL { off_t lseek(int fd, off_t offset, int whence); } 200 AUE_TRUNCATE ALL { int truncate(char *path, off_t length); } 201 AUE_FTRUNCATE ALL { int ftruncate(int fd, off_t length); } -202 AUE_SYSCTL ALL { int __sysctl(int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen); } +202 AUE_SYSCTL ALL { int sysctl(int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen) NO_SYSCALL_STUB; } 203 AUE_MLOCK ALL { int mlock(caddr_t addr, size_t len); } 204 AUE_MUNLOCK ALL { int munlock(caddr_t addr, size_t len); } 205 AUE_UNDELETE ALL { int undelete(user_addr_t path); } @@ -417,9 +417,9 @@ 271 AUE_SEMWAIT ALL { int sem_wait(sem_t *sem); } 272 AUE_SEMTRYWAIT ALL { int sem_trywait(sem_t *sem); } 273 AUE_SEMPOST ALL { int sem_post(sem_t *sem); } -274 AUE_SEMGETVALUE ALL { int sem_getvalue(sem_t *sem, int *sval); } -275 AUE_SEMINIT ALL { int sem_init(sem_t *sem, int phsared, u_int value); } -276 AUE_SEMDESTROY ALL { int sem_destroy(sem_t *sem); } +274 AUE_SYSCTL ALL { int sysctlbyname(const char *name, size_t namelen, void *old, size_t *oldlenp, void *new, size_t newlen) NO_SYSCALL_STUB; } +275 AUE_NULL ALL { int enosys(void); } { old sem_init } +276 AUE_NULL ALL { int enosys(void); } { old sem_destroy } 277 AUE_OPEN_EXTENDED_RWTC ALL { int open_extended(user_addr_t path, int flags, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } 278 AUE_UMASK_EXTENDED ALL { int umask_extended(int newmask, user_addr_t xsecurity) NO_SYSCALL_STUB; } 279 AUE_STAT_EXTENDED ALL { int stat_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } @@ -549,7 +549,7 @@ 364 AUE_LCHOWN ALL { int lchown(user_addr_t path, uid_t owner, gid_t group) NO_SYSCALL_STUB; } 365 AUE_STACKSNAPSHOT ALL { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset) NO_SYSCALL_STUB; } #if CONFIG_WORKQUEUE -366 AUE_NULL ALL { int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, int pthsize,user_addr_t dummy_value, user_addr_t targetconc_ptr, uint64_t dispatchqueue_offset) NO_SYSCALL_STUB; } +366 AUE_NULL ALL { int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, uint32_t flags, user_addr_t stack_addr_hint, user_addr_t targetconc_ptr, uint32_t dispatchqueue_offset, uint32_t tsd_offset) NO_SYSCALL_STUB; } 367 AUE_WORKQOPEN ALL { int workq_open(void) NO_SYSCALL_STUB; } 368 AUE_WORKQOPS ALL { int workq_kernreturn(int options, user_addr_t item, int affinity, int prio) NO_SYSCALL_STUB; } #else @@ -699,11 +699,7 @@ 449 AUE_NULL ALL { int nosys(void); } 450 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -#if CONFIG_TELEMETRY 451 AUE_NULL ALL { int telemetry(uint64_t cmd, uint64_t deadline, uint64_t interval, uint64_t leeway, uint64_t arg4, uint64_t arg5) NO_SYSCALL_STUB; } -#else -451 AUE_NULL ALL { int nosys(void); } -#endif /* TELEMETRY */ #if CONFIG_PROC_UUID_POLICY 452 AUE_NULL ALL { int proc_uuid_policy(uint32_t operation, uuid_t uuid, size_t uuidlen, uint32_t flags); } #else @@ -716,3 +712,63 @@ #endif 454 AUE_NULL ALL { int system_override(uint64_t timeout, uint64_t flags); } 455 AUE_NULL ALL { int vfs_purge(void); } +456 AUE_NULL ALL { int sfi_ctl(uint32_t operation, uint32_t sfi_class, uint64_t time, uint64_t *out_time) NO_SYSCALL_STUB; } +457 AUE_NULL ALL { int sfi_pidctl(uint32_t operation, pid_t pid, uint32_t sfi_flags, uint32_t *out_sfi_flags) NO_SYSCALL_STUB; } +#if CONFIG_COALITIONS +458 AUE_NULL ALL { int coalition(uint32_t operation, uint64_t *cid, uint32_t flags) NO_SYSCALL_STUB; } +459 AUE_NULL ALL { int coalition_info(uint32_t flavor, uint64_t *cid, void *buffer, size_t *bufsize) NO_SYSCALL_STUB; } +#else +458 AUE_NULL ALL { int enosys(void); } +459 AUE_NULL ALL { int enosys(void); } +#endif /* COALITIONS */ +#if NECP +460 AUE_NULL ALL { int necp_match_policy(uint8_t *parameters, size_t parameters_size, struct necp_aggregate_result *returned_result); } +#else +460 AUE_NULL ALL { int nosys(void); } +#endif /* NECP */ +461 AUE_GETATTRLISTBULK ALL { int getattrlistbulk(int dirfd, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, uint64_t options); } +462 AUE_NULL ALL { int enosys(void); } /* PLACEHOLDER for CLONEFILE */ +463 AUE_OPENAT_RWTC ALL { int openat(int fd, user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; } +464 AUE_OPENAT_RWTC ALL { int openat_nocancel(int fd, user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; } +465 AUE_RENAMEAT ALL { int renameat(int fromfd, char *from, int tofd, char *to) NO_SYSCALL_STUB; } +466 AUE_FACCESSAT ALL { int faccessat(int fd, user_addr_t path, int amode, int flag); } +467 AUE_FCHMODAT ALL { int fchmodat(int fd, user_addr_t path, int mode, int flag); } +468 AUE_FCHOWNAT ALL { int fchownat(int fd, user_addr_t path, uid_t uid,gid_t gid, int flag); } +469 AUE_FSTATAT ALL { int fstatat(int fd, user_addr_t path, user_addr_t ub, int flag); } +470 AUE_FSTATAT ALL { int fstatat64(int fd, user_addr_t path, user_addr_t ub, int flag); } +471 AUE_LINKAT ALL { int linkat(int fd1, user_addr_t path, int fd2, user_addr_t link, int flag); } +472 AUE_UNLINKAT ALL { int unlinkat(int fd, user_addr_t path, int flag) NO_SYSCALL_STUB; } +473 AUE_READLINKAT ALL { int readlinkat(int fd, user_addr_t path, user_addr_t buf, size_t bufsize); } +474 AUE_SYMLINKAT ALL { int symlinkat(user_addr_t *path1, int fd, user_addr_t path2); } +475 AUE_MKDIRAT ALL { int mkdirat(int fd, user_addr_t path, int mode); } +476 AUE_GETATTRLISTAT ALL { int getattrlistat(int fd, const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } +477 AUE_NULL ALL { int proc_trace_log(pid_t pid, uint64_t uniqueid); } +478 AUE_NULL ALL { int bsdthread_ctl(user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3) NO_SYSCALL_STUB; } +479 AUE_OPENBYID_RWT ALL { int openbyid_np(user_addr_t fsid, user_addr_t objid, int oflags); } +#if SOCKETS +480 AUE_NULL ALL { user_ssize_t recvmsg_x(int s, struct msghdr_x *msgp, u_int cnt, int flags); } +481 AUE_NULL ALL { user_ssize_t sendmsg_x(int s, struct msghdr_x *msgp, u_int cnt, int flags); } +#else +480 AUE_NULL ALL { int nosys(void); } +481 AUE_NULL ALL { int nosys(void); } +#endif /* SOCKETS */ +482 AUE_NULL ALL { uint64_t thread_selfusage(void) NO_SYSCALL_STUB; } +#if CONFIG_CSR +483 AUE_NULL ALL { int csrctl(uint32_t op, user_addr_t useraddr, user_addr_t usersize) NO_SYSCALL_STUB; } +#else +483 AUE_NULL ALL { int enosys(void); } +#endif /* CSR */ +484 AUE_NULL ALL { int guarded_open_dprotected_np(const char *path, const guardid_t *guard, u_int guardflags, int flags, int dpclass, int dpflags, int mode) NO_SYSCALL_STUB; } +485 AUE_NULL ALL { user_ssize_t guarded_write_np(int fd, const guardid_t *guard, user_addr_t cbuf, user_size_t nbyte); } +486 AUE_PWRITE ALL { user_ssize_t guarded_pwrite_np(int fd, const guardid_t *guard, user_addr_t buf, user_size_t nbyte, off_t offset); } +487 AUE_WRITEV ALL { user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iovec *iovp, u_int iovcnt); } +#if CONFIG_SECLUDED_RENAME +488 AUE_RENAME ALL { int rename_ext(char *from, char *to, u_int flags) NO_SYSCALL_STUB; } +#else +488 AUE_NULL ALL { int enosys(void); } +#endif +#if CONFIG_CODE_DECRYPTION +489 AUE_MPROTECT ALL { int mremap_encrypted(caddr_t addr, size_t len, uint32_t cryptid, uint32_t cputype, uint32_t cpusubtype); } +#else +489 AUE_NULL ALL { int enosys(void); } +#endif diff --git a/bsd/kern/sysv_sem.c b/bsd/kern/sysv_sem.c index f98530962..418a4c6e0 100644 --- a/bsd/kern/sysv_sem.c +++ b/bsd/kern/sysv_sem.c @@ -1473,10 +1473,7 @@ semexit(struct proc *p) /* Maybe we should build a list of semakptr's to wake * up, finish all access to data structures, release the * subsystem lock, and wake all the processes. Something - * to think about. It wouldn't buy us anything unless - * wakeup had the potential to block, or the syscall - * funnel state was changed to allow multiple threads - * in the BSD code at once. + * to think about. */ #ifdef SEM_WAKEUP sem_wakeup((caddr_t)semakptr); diff --git a/bsd/kern/trace.codes b/bsd/kern/trace.codes index ef6b4f3c8..904d2311f 100644 --- a/bsd/kern/trace.codes +++ b/bsd/kern/trace.codes @@ -56,6 +56,7 @@ 0x1090034 TMR_TimerEscalate 0x1090038 TMR_TimerOverdue 0x109003C TMR_Rescan +0x1090040 TMR_set_apic_deadline 0x10c0000 MACH_SysCall 0x10c0004 MSC_kern_invalid_#1 0x10c0008 MSC_kern_invalid_#2 @@ -67,8 +68,11 @@ 0x10c0020 MSC_kern_invalid_#8 0x10c0024 MSC_kern_invalid_#9 0x10c0028 MSC_mach_vm_allocate_trap +0x10c002c MSC_kern_invalid_#11 0x10c0030 MSC_mach_vm_deallocate_trap +0x10c0034 MSC_kern_invalid_#13 0x10c0038 MSC_mach_vm_protect_trap +0x10c003c MSC_mach_vm_map_trap 0x10c0040 MSC_mach_port_allocate_trap 0x10c0044 MSC_mach_port_destroy_trap 0x10c0048 MSC_mach_port_deallocate_trap @@ -77,8 +81,8 @@ 0x10c0054 MSC_mach_port_insert_right_trap 0x10c0058 MSC_mach_port_insert_member_trap 0x10c005c MSC_mach_port_extract_member_trap -0x10c0060 MSC_kern_invalid_#24 -0x10c0064 MSC_kern_invalid_#25 +0x10c0060 MSC_mach_port_construct_trap +0x10c0064 MSC_mach_port_destruct_trap 0x10c0068 MSC_mach_reply_port 0x10c006c MSC_thread_self_trap 0x10c0070 MSC_task_self_trap @@ -94,9 +98,9 @@ 0x10c0098 MSC_semaphore_timedwait_trap 0x10c009c MSC_semaphore_timedwait_signal_trap 0x10c00a0 MSC_kern_invalid_#40 -0x10c00a4 MSC_kern_invalid_#41 -0x10c00a8 MSC_kern_invalid_#42 -0x10c00ac MSC_map_fd +0x10c00a4 MSC_mach_port_guard_trap +0x10c00a8 MSC_mach_port_unguard_trap +0x10c00ac MSC_kern_invalid_#43 0x10c00b0 MSC_task_name_for_pid 0x10c00b4 MSC_task_for_pid 0x10c00b8 MSC_pid_for_task @@ -183,6 +187,14 @@ 0x10c01fc MSC_kern_invalid_#127 0x1200000 MACH_task_suspend 0x1200004 MACH_task_resume +0x1200008 MACH_thread_set_voucher +0x120000C MACH_IPC_msg_send +0x1200010 MACH_IPC_msg_recv +0x1200014 MACH_IPC_msg_recv_voucher_refused +0x1200018 MACH_IPC_kmsg_free +0x120001c MACH_IPC_voucher_create +0x1200020 MACH_IPC_voucher_create_attr_data +0x1200024 MACH_IPC_voucher_destroy 0x1300004 MACH_Pageout 0x1300008 MACH_vmfault 0x1300100 MACH_purgable_token_add @@ -193,6 +205,7 @@ 0x1300124 MACH_purgable_object_remove 0x1300128 MACH_purgable_object_purge 0x130012c MACH_purgable_object_purge_all +0x1300150 MACH_vm_map_partial_reap 0x1300400 MACH_vm_check_zf_delay 0x1300404 MACH_vm_cow_delay 0x1300408 MACH_vm_zf_delay @@ -204,10 +217,11 @@ 0x1300420 MACH_vm_pageout_cache_evict 0x1300424 MACH_vm_pageout_thread_block 0x1300428 MACH_vm_pageout_jetsam -0x130042C MACH_vm_pageout_token 0x1300480 MACH_vm_upl_page_wait 0x1300484 MACH_vm_iopl_page_wait 0x1300488 MACH_vm_page_wait_block +0x130048C MACH_vm_page_sleep +0x1300490 MACH_vm_page_expedite 0x1400000 MACH_SCHED 0x1400004 MACH_STKATTACH 0x1400008 MACH_STKHANDOFF @@ -237,6 +251,11 @@ 0x140006C MACH_CPU_THROTTLE_DISABLE 0x1400070 MACH_RW_PROMOTE 0x1400074 MACH_RW_DEMOTE +0x140007C MACH_SCHED_MAINTENANCE +0x1400080 MACH_DISPATCH +0x1400084 MACH_QUANTUM_HANDOFF +0x1400088 MACH_MULTIQ_DEQUEUE +0x140008C MACH_SCHED_THREAD_SWITCH 0x1500000 MACH_MSGID_INVALID 0x1600000 MTX_SLEEP 0x1600004 MTX_SLEEP_DEADLINE @@ -281,6 +300,7 @@ 0x1700030 PMAP_query_resident 0x1700034 PMAP_flush_kernel_TLBS 0x1700038 PMAP_flush_delayed_TLBS +0x170003c PMAP_flush_TLBS_TO 0x1900000 MP_TLB_FLUSH 0x1900004 MP_CPUS_CALL 0x1900008 MP_CPUS_CALL_LOCAL @@ -291,6 +311,18 @@ 0x190001c MP_CPU_DEACTIVATE 0x1a10000 MICROSTACKSHOT_RECORD 0x1a10004 MICROSTACKSHOT_GATHER +0x1a20000 SFI_SET_WINDOW +0x1a20004 SFI_CANCEL_WINDOW +0x1a20008 SFI_SET_CLASS_OFFTIME +0x1a2000c SFI_CANCEL_CLASS_OFFTIME +0x1a20010 SFI_THREAD_DEFER +0x1a20014 SFI_OFF_TIMER +0x1a20018 SFI_ON_TIMER +0x1a2001c SFI_WAIT_CANCELED +0x1a20020 SFI_PID_SET_MANAGED +0x1a20024 SFI_PID_CLEAR_MANAGED +0x1a30004 ENERGY_PERF_GPU_DESCRIPTION +0x1a30008 ENERGY_PERF_GPU_TIME 0x2010000 L_IP_In_Beg 0x2010004 L_IP_Out_Beg 0x2010008 L_IP_In_End @@ -330,33 +362,29 @@ 0x20b0c04 F_SendIt 0x20b1004 F_SoSend 0x20b1008 F_SoSend_CopyD +0x20b100C F_SoSend_List 0x20b1400 F_RecvFrom 0x20b1800 F_RecvMsg 0x20b1c00 F_RecvIt 0x20b2000 F_SoReceive +0x20b200C F_SoReceive_List 0x20b2100 F_SoShutdown 0x20b2400 F_SoAccept 0x20b2800 F_sendfile 0x20b2804 F_sendfile_wait 0x20b2808 F_sendfile_read 0x20b280c F_sendfile_send +0x20b2c00 F_sendmsg_x +0x20b3000 F_recvmsg_x 0x2650004 AT_DDPinput 0x2f00000 F_FreemList 0x2f00004 F_m_copym 0x2f00008 F_getpackets 0x2f0000c F_getpackethdrs 0x3010000 HFS_Write -0x3010004 HFS_Fsync -0x3010008 HFS_Close -0x301000c HFS_Remove -0x3010010 HFS_Create -0x3010014 HFS_Inactive -0x3010018 HFS_Reclaim 0x301001C HFS_Truncate 0x3010028 vinvalbuf 0x3010030 HFS_Read -0x3010034 HFS_RL_ADD -0x3010038 HFS_RL_REMOVE 0x301003c MACH_copyiostr 0x3010040 UIO_copyout 0x3010044 UIO_copyin @@ -378,8 +406,7 @@ 0x3010088 Cl_phys_uiomove 0x301008c Cl_read_commit 0x3010090 VFS_LOOKUP -0x3010094 Cl_read_uplmap -0x3010098 Cl_read_uplunmap +0x3010094 HFS_getnewvnode 0x301009C VFS_LOOKUP_DONE 0x30100a0 Cl_write_copy 0x30100a4 Cl_write_list_req @@ -512,7 +539,7 @@ 0x3011024 UPL_map_enter_upl 0x3011028 UPL_map_remove_upl 0x301102c UPL_commit_range_speculative -0x3018000 hfs_update +0x3018000 HFS_update 0x3020000 P_WrData 0x3020004 P_WrDataDone 0x3020008 P_RdData @@ -678,9 +705,103 @@ 0x3060004 SPEC_trim_extent 0x3070004 BootCache_tag 0x3070008 BootCache_batch +0x3070010 BC_IO_HIT +0x3070020 BC_IO_HIT_STALLED +0x3070040 BC_IO_MISS +0x3070080 BC_IO_MISS_CUT_THROUGH +0x3070100 BC_PLAYBACK_IO +0x3080000 HFS_Unmap_free +0x3080004 HFS_Unmap_alloc +0x3080008 HFS_Unmap_callback +0x3080010 HFS_BlockAllocate +0x3080014 HFS_BlockDeallocate +0x3080018 HFS_ReadBitmapBlock +0x308001C HFS_ReleaseBitmapBlock +0x3080020 HFS_BlockAllocateContig +0x3080024 HFS_BlockAllocateAnyBitmap +0x3080028 HFS_BlockAllocateKnown +0x308002C HFS_BlockMarkAllocated +0x3080030 HFS_BlockMarkFree +0x3080034 HFS_BlockFindContiguous +0x3080038 HFS_IsAllocated +0x3080040 HFS_ResetFreeExtCache +0x3080044 HFS_remove_free_extcache +0x3080048 HFS_add_free_extcache +0x308004C HFS_ReadBitmapRange +0x3080050 HFS_ReleaseScanBitmapRange +0x3080054 HFS_syncer +0x3080058 HFS_syncer_timed +0x308005C HFS_ScanUnmapBlocks +0x3080060 HFS_issue_unmap +0x30A0000 SMB_vop_mount +0x30A0004 SMB_vop_unmount +0x30A0008 SMB_vop_root +0x30A000C SMB_vop_getattr +0x30A0010 SMB_vop_sync +0x30A0014 SMB_vop_vget +0x30A0018 SMB_vop_sysctl +0x30A001C SMB_vnop_advlock +0x30A0020 SMB_vnop_close +0x30A0024 SMB_vnop_create +0x30A0028 SMB_vnop_fsync +0x30A002C SMB_vnop_get_attr +0x30A0030 SMB_vnop_page_in +0x30A0034 SMB_vnop_inactive +0x30A0038 SMB_vnop_ioctl +0x30A003C SMB_vnop_link +0x30A0040 SMB_vnop_lookup +0x30A0044 SMB_vnop_mkdir +0x30A0048 SMB_vnop_mknode +0x30A004C SMB_vnop_mmap +0x30A0050 SMB_vnop_mnomap +0x30A0054 SMB_vnop_open +0x30A0058 SMB_vnop_cmpd_open +0x30A005C SMB_vnop_pathconf +0x30A0060 SMB_vnop_page_out +0x30A0064 SMB_vnop_copyfile +0x30A0068 SMB_vnop_read +0x30A006C SMB_vnop_read_dir +0x30A0070 SMB_vnop_read_dir_attr +0x30A0074 SMB_vnop_read_link +0x30A0078 SMB_vnop_reclaim +0x30A007C SMB_vnop_remove +0x30A0080 SMB_vnop_rename +0x30A0084 SMB_vnop_rm_dir +0x30A0088 SMB_vnop_set_attr +0x30A008C SMB_vnop_sym_link +0x30A0090 SMB_vnop_write +0x30A0094 SMB_vnop_strategy +0x30A0098 SMB_vnop_get_xattr +0x30A009C SMB_vnop_set_xattr +0x30A00A0 SMB_vnop_rm_xattr +0x30A00A4 SMB_vnop_list_xattr +0x30A00A8 SMB_vnop_monitor +0x30A00AC SMB_vnop_get_nstream +0x30A00B0 SMB_vnop_make_nstream +0x30A00B4 SMB_vnop_rm_nstream +0x30A00B8 SMB_vnop_access +0x30A00BC SMB_vnop_allocate +0x30A00C0 SMB_smbfs_close +0x30A00C4 SMB_smbfs_create +0x30A00C8 SMB_smbfs_fsync +0x30A00CC SMB_smb_fsync +0x30A00D0 SMB_smbfs_update_cache +0x30A00D4 SMB_smbfs_open +0x30A00D8 SMB_smb_read +0x30A00DC SMB_smb_rw_async +0x30A00E0 SMB_smb_rw_fill +0x30A00E4 SMB_pack_attr_blk +0x30A00E8 SMB_smbfs_remove +0x30A00EC SMB_smbfs_setattr +0x30A00F0 SMB_smbfs_get_sec +0x30A00F4 SMB_smbfs_set_sec +0x30A00F8 SMB_smbfs_get_max_access +0x30A00FC SMB_smbfs_lookup +0x30A0100 SMB_smbfs_notify 0x3110004 OpenThrottleWindow 0x3110008 CauseIOThrottle 0x311000C IO_THROTTLE_DISABLE +0x3CF0000 CP_OFFSET_IO 0x4010004 proc_exit 0x4010008 force_exit 0x4020004 MEMSTAT_scan @@ -691,6 +812,11 @@ 0x4020018 MEMSTAT_update 0x402001C MEMSTAT_idle_demote 0x4020020 MEMSTAT_clear_errors +0x4020024 MEMSTAT_dirty_track +0x4020028 MEMSTAT_dirty_set +0x402002C MEMSTAT_dirty_clear +0x4020030 MEMSTAT_grp_set_properties +0x4020034 MEMSTAT_do_kill 0x40c0000 BSC_SysCall 0x40c0004 BSC_exit 0x40c0008 BSC_fork @@ -965,9 +1091,7 @@ 0x40c043c BSC_sem_wait 0x40c0440 BSC_sem_trywait 0x40c0444 BSC_sem_post -0x40c0448 BSC_sem_getvalue -0x40c044c BSC_sem_init -0x40c0450 BSC_sem_destroy +0x40c0448 BSC_sysctlbyname 0x40c0454 BSC_open_extended 0x40c0458 BSC_umask_extended 0x40c045c BSC_stat_extended @@ -1143,6 +1267,30 @@ 0x40c0708 BSC_socket_delegate 0x40c070c BSC_telemetry 0x40c0710 BSC_proc_uuid_policy +0x40c0714 BSC_memorystatus_get_level +0x40c0718 BSC_system_override +0x40c071c BSC_vfs_purge +0x40c0720 BSC_sfi_ctl +0x40c0724 BSC_sfi_pidctl +0x40c0734 BSC_getattrlistbulk +0x40c073c BSC_openat +0x40c0740 BSC_openat_nocancel +0x40c0744 BSC_renameat +0x40c074c BSC_chmodat +0x40c0750 BSC_chownat +0x40c0754 BSC_fstatat +0x40c0758 BSC_fstatat64 +0x40c075c BSC_linkat +0x40c0760 BSC_unlinkat +0x40c0764 BSC_readlinkat +0x40c0768 BSC_symlinkat +0x40c076c BSC_mkdirat +0x40c0770 BSC_getattrlistat +0x40c0778 BSC_bsdthread_ctl +0x40c0780 BSC_recvmsg_x +0x40c0784 BSC_sendmsg_x +0x40c0788 BSC_thread_selfusage +0x40c07a4 BSC_mremap_extended 0x40e0104 BSC_msync_extended_info 0x40e0264 BSC_pread_extended_info 0x40e0268 BSC_pwrite_extended_info @@ -1397,6 +1545,10 @@ 0x5310264 CPUPM_CPU_OFFLINE 0x5310268 CPUPM_CPU_EXIT_HALT 0x531026C CPUPM_PST_QOS_CHARGE +0x5310270 CPUPM_PST_QOS_APPLY +0x5310274 CPUPM_PST_QOS_SWITCH2 +0x5310278 CPUPM_PST_UIB +0x531027C CPUPM_PST_PLIMIT_UIB 0x5330000 HIBERNATE 0x5330004 HIBERNATE_WRITE_IMAGE 0x5330008 HIBERNATE_MACHINE_INIT @@ -1411,12 +1563,14 @@ 0x533002c HIBERNATE_fastwake_warmup 0x7000004 TRACE_DATA_NEWTHREAD 0x7000008 TRACE_DATA_EXEC +0x700000c TRACE_DATA_THREAD_TERMINATE 0x7010004 TRACE_STRING_NEWTHREAD 0x7010008 TRACE_STRING_EXEC 0x7020000 TRACE_PANIC 0x7020004 TRACE_TIMESTAMPS 0x7020008 TRACE_LOST_EVENTS 0x702000c TRACE_WRITING_EVENTS +0x7020010 TRACE_INFO_STRING 0x8000000 USER_TEST 0x8000004 USER_run 0x8000008 USER_join @@ -1598,7 +1752,6 @@ 0x1100001c DNC_update_identity 0x11000020 DNC_PURGE 0x11000030 DNC_LOOKUP_PATH -0x11000034 HFS_vnop_lookup 0x11000038 NAMEI 0x11000048 VFS_SUSPENDED 0x1100004C VFS_CACHEPURGE @@ -1609,19 +1762,18 @@ 0x11000060 VFS_GETIOCOUNT 0x11000064 VFS_vnode_recycle 0x11000068 VFS_vnode_reclaim -0x11000070 HFS_getnewvnode1 -0x11000074 HFS_getnewvnode2 -0x11000078 HFS_chash_getcnode -0x1100007c HFS_vfs_getpath 0x11000080 VOLFS_lookup 0x11000084 lookup_mountedhere 0x11000088 VNOP_LOOKUP -0x1100008c HFS_chash_getvnode 0x11000090 VFS_vnode_rele 0x11000094 VFS_vnode_put 0x11004100 NC_lock_shared 0x11004104 NC_lock_exclusive 0x11004108 NC_unlock +0x1e000000 SEC_ENTROPY_READ0 +0x1e000004 SEC_ENTROPY_READ1 +0x1e000008 SEC_ENTROPY_READ2 +0x1e00000c SEC_ENTROPY_READ3 0x1f000000 DYLD_initialize 0x1f010000 DYLD_CALL_image_init_routine 0x1f010004 DYLD_CALL_dependent_init_routine @@ -1733,7 +1885,7 @@ 0x21030518 LOGINWINDOW_UNLOCKUI_TRIGGERED 0x2103051c LOGINWINDOW_UNLOCKUI_SHOWN 0x21030520 LOGINWINDOW_NO_LOCKUI_SHOWN -0x21040000 APP_DIDActivateWindow +0x21040000 APP_AudioOverload 0x21050000 TOOL_PRIVATE_1 0x21050004 TOOL_PRIVATE_2 0x21050008 TOOL_PRIVATE_3 @@ -1779,6 +1931,7 @@ 0x21090004 PHD_DAEMON_FINISH 0x21090010 PHD_SYNCNOW_START 0x21090014 PHD_SYNCNOW_FINISH +0x210a0fac DISPATCH_voucher_transport 0x210b0000 TAL_APP_LAUNCH_START 0x210b0004 TAL_APP_LAUNCH_UNSUSPENDED 0x210b0008 TAL_APP_LAUNCH_UNTHROTTLED @@ -1788,172 +1941,6 @@ 0x210c0000 NSAPPLICATION_RECEIVED_KEYEVENT 0x210c0004 NSWINDOW_FLUSHED 0x210c0008 NSTEXTVIEW_PROCESSED_KEYEVENT -0x21800000 SMB_smbd_idle -0x21800004 SMB_syscall_opendir -0x21800008 SMB_syscall_readdir -0x2180000c SMB_syscall_seekdir -0x21800010 SMB_syscall_telldir -0x21800014 SMB_syscall_rewinddir -0x21800018 SMB_syscall_mkdir -0x2180001c SMB_syscall_rmdir -0x21800020 SMB_syscall_closedir -0x21800024 SMB_syscall_open -0x21800028 SMB_syscall_close -0x2180002c SMB_syscall_read -0x21800030 SMB_syscall_pread -0x21800034 SMB_syscall_write -0x21800038 SMB_syscall_pwrite -0x2180003c SMB_syscall_lseek -0x21800040 SMB_syscall_sendfile -0x21800044 SMB_syscall_rename -0x21800048 SMB_syscall_fsync -0x2180004c SMB_syscall_stat -0x21800050 SMB_syscall_fstat -0x21800054 SMB_syscall_lstat -0x21800058 SMB_syscall_unlink -0x2180005c SMB_syscall_chmod -0x21800060 SMB_syscall_fchmod -0x21800064 SMB_syscall_chown -0x21800068 SMB_syscall_fchown -0x2180006c SMB_syscall_chdir -0x21800070 SMB_syscall_getwd -0x21800074 SMB_syscall_utime -0x21800078 SMB_syscall_ftruncate -0x2180007c SMB_syscall_fcntl_lock -0x21800080 SMB_syscall_kernel_flock -0x21800084 SMB_syscall_fcntl_getlock -0x21800088 SMB_syscall_readlink -0x2180008c SMB_syscall_symlink -0x21800090 SMB_syscall_link -0x21800094 SMB_syscall_mknod -0x21800098 SMB_syscall_realpath -0x2180009c SMB_syscall_get_quota -0x218000a0 SMB_syscall_set_quota -0x218000a4 SMB_smbmkdir -0x218000a8 SMB_smbrmdir -0x218000ac SMB_smbopen -0x218000b0 SMB_smbcreate -0x218000b4 SMB_smbclose -0x218000b8 SMB_smbflush -0x218000bc SMB_smbunlink -0x218000c0 SMB_smbmv -0x218000c4 SMB_smbgetatr -0x218000c8 SMB_smbsetatr -0x218000cc SMB_smbread -0x218000d0 SMB_smbwrite -0x218000d4 SMB_smblock -0x218000d8 SMB_smbunlock -0x218000dc SMB_smbctemp -0x218000e0 SMB_smbmknew -0x218000e4 SMB_smbcheckpath -0x218000e8 SMB_smbexit -0x218000ec SMB_smblseek -0x218000f0 SMB_smblockread -0x218000f4 SMB_smbwriteunlock -0x218000f8 SMB_smbreadbraw -0x218000fc SMB_smbreadbmpx -0x21800100 SMB_smbreadbs -0x21800104 SMB_smbwritebraw -0x21800108 SMB_smbwritebmpx -0x2180010c SMB_smbwritebs -0x21800110 SMB_smbwritec -0x21800114 SMB_smbsetattre -0x21800118 SMB_smbgetattre -0x2180011c SMB_smblockingx -0x21800120 SMB_smbtrans -0x21800124 SMB_smbtranss -0x21800128 SMB_smbioctl -0x2180012c SMB_smbioctls -0x21800130 SMB_smbcopy -0x21800134 SMB_smbmove -0x21800138 SMB_smbecho -0x2180013c SMB_smbwriteclose -0x21800140 SMB_smbopenx -0x21800144 SMB_smbreadx -0x21800148 SMB_smbwritex -0x2180014c SMB_smbtrans2 -0x21800150 SMB_smbtranss2 -0x21800154 SMB_smbfindclose -0x21800158 SMB_smbfindnclose -0x2180015c SMB_smbtcon -0x21800160 SMB_smbtdis -0x21800164 SMB_smbnegprot -0x21800168 SMB_smbsesssetupx -0x2180016c SMB_smbulogoffx -0x21800170 SMB_smbtconx -0x21800174 SMB_smbdskattr -0x21800178 SMB_smbsearch -0x2180017c SMB_smbffirst -0x21800180 SMB_smbfunique -0x21800184 SMB_smbfclose -0x21800188 SMB_smbnttrans -0x2180018c SMB_smbnttranss -0x21800190 SMB_smbntcreatex -0x21800194 SMB_smbntcancel -0x21800198 SMB_smbntrename -0x2180019c SMB_smbsplopen -0x218001a0 SMB_smbsplwr -0x218001a4 SMB_smbsplclose -0x218001a8 SMB_smbsplretq -0x218001ac SMB_smbsends -0x218001b0 SMB_smbsendb -0x218001b4 SMB_smbfwdname -0x218001b8 SMB_smbcancelf -0x218001bc SMB_smbgetmac -0x218001c0 SMB_smbsendstrt -0x218001c4 SMB_smbsendend -0x218001c8 SMB_smbsendtxt -0x218001cc SMB_smbinvalid -0x218001d0 SMB_pathworks_setdir -0x218001d4 SMB_trans2_open -0x218001d8 SMB_trans2_findfirst -0x218001dc SMB_trans2_findnext -0x218001e0 SMB_trans2_qfsinfo -0x218001e4 SMB_trans2_setfsinfo -0x218001e8 SMB_trans2_qpathinfo -0x218001ec SMB_trans2_setpathinfo -0x218001f0 SMB_trans2_qfileinfo -0x218001f4 SMB_trans2_setfileinfo -0x218001f8 SMB_trans2_fsctl -0x218001fc SMB_trans2_ioctl -0x21800200 SMB_trans2_findnotifyfirst -0x21800204 SMB_trans2_findnotifynext -0x21800208 SMB_trans2_mkdir -0x2180020c SMB_trans2_session_setup -0x21800210 SMB_trans2_get_dfs_referral -0x21800214 SMB_trans2_report_dfs_inconsistancy -0x21800218 SMB_nt_transact_create -0x2180021c SMB_nt_transact_ioctl -0x21800220 SMB_nt_transact_set_security_desc -0x21800224 SMB_nt_transact_notify_change -0x21800228 SMB_nt_transact_rename -0x2180022c SMB_nt_transact_query_security_desc -0x21800230 SMB_nt_transact_get_user_quota -0x21800234 SMB_nt_transact_set_user_quota -0x21800238 SMB_get_nt_acl -0x2180023c SMB_fget_nt_acl -0x21800240 SMB_set_nt_acl -0x21800244 SMB_fset_nt_acl -0x21800248 SMB_chmod_acl -0x2180024c SMB_fchmod_acl -0x21800250 SMB_name_release -0x21800254 SMB_name_refresh -0x21800258 SMB_name_registration -0x2180025c SMB_node_status -0x21800260 SMB_name_query -0x21800264 SMB_host_announce -0x21800268 SMB_workgroup_announce -0x2180026c SMB_local_master_announce -0x21800270 SMB_master_browser_announce -0x21800274 SMB_lm_host_announce -0x21800278 SMB_get_backup_list -0x2180027c SMB_reset_browser -0x21800280 SMB_announce_request -0x21800284 SMB_lm_announce_request -0x21800288 SMB_domain_logon -0x2180028c SMB_sync_browse_lists -0x21800290 SMB_run_elections -0x21800294 SMB_election 0x22000004 LAUNCHD_starting 0x22000008 LAUNCHD_exiting 0x2200000c LAUNCHD_finding_stray_pg @@ -2021,6 +2008,10 @@ 0x26180018 imp_apptype_tal_app 0x26190010 imp_update_task 0x26190020 imp_update_thread +0x261a0000 imp_usynch_add_override +0x261a0004 imp_usynch_remove_override +0x261b0000 imp_donor_update_live_donor +0x261b0004 imp_donor_init_donor_state 0x26210010 imp_task_int_bg 0x26210014 imp_task_ext_bg 0x26210020 imp_thread_int_bg @@ -2037,10 +2028,6 @@ 0x26240014 imp_task_ext_passive_io 0x26240020 imp_thread_int_passive_io 0x26240024 imp_thread_ext_passive_io -0x26250010 imp_task_int_bg_plus_gpu -0x26250014 imp_task_ext_bg_plus_gpu -0x26260010 imp_task_int_gpu_deny -0x26260014 imp_task_ext_gpu_deny 0x26270018 imp_task_dbg_iopol 0x26280018 imp_task_tal 0x26290018 imp_task_boost @@ -2078,6 +2065,12 @@ 0x2700E010 PERF_SRAMEMA_DOM1 0x2700E020 PERF_SRAMEMA_DOM2 0x2700E030 PERF_SRAMEMA_DOM3 +0x2a100004 ATM_MIN_CALLED +0x2a100008 ATM_MIN_LINK_LIST +0x2a200004 ATM_VALUE_REPLACED +0x2a200008 ATM_VALUE_ADDED +0x2a300004 ATM_VALUE_UNREGISTERED +0x2a300008 ATM_VALUE_DIFF_MAILBOX 0xff000104 MSG_mach_notify_port_deleted 0xff000114 MSG_mach_notify_port_destroyed 0xff000118 MSG_mach_notify_no_senders diff --git a/bsd/kern/tty.c b/bsd/kern/tty.c index 01ea986ea..2586c482f 100644 --- a/bsd/kern/tty.c +++ b/bsd/kern/tty.c @@ -130,7 +130,7 @@ static lck_grp_t *tty_lck_grp; static lck_grp_attr_t *tty_lck_grp_attr; static lck_attr_t *tty_lck_attr; -static int ttnread(struct tty *tp); +__private_extern__ int ttnread(struct tty *tp); static void ttyecho(int c, struct tty *tp); static int ttyoutput(int c, struct tty *tp); static void ttypend(struct tty *tp); @@ -960,6 +960,29 @@ ttyoutput(int c, struct tty *tp) return (-1); } +/* + * Sets the tty state to not allow any more changes of foreground process + * group. This is required to be done so that a subsequent revoke on a vnode + * is able to always successfully complete. + * + * Locks : Assumes tty_lock held on entry + */ +void +ttysetpgrphup(struct tty *tp) +{ + TTY_LOCK_OWNED(tp); /* debug assert */ + SET(tp->t_state, TS_PGRPHUP); +} + +/* + * Locks : Assumes tty lock held on entry + */ +void +ttyclrpgrphup(struct tty *tp) +{ + TTY_LOCK_OWNED(tp); /* debug assert */ + CLR(tp->t_state, TS_PGRPHUP); +} /* * ttioctl @@ -1453,6 +1476,15 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p) error = EPERM; goto out; } + /* + * The session leader is going away and is possibly going to revoke + * the terminal, we can't change the process group when that is the + * case. + */ + if (ISSET(tp->t_state, TS_PGRPHUP)) { + error = EPERM; + goto out; + } proc_list_lock(); oldpg = tp->t_pgrp; tp->t_pgrp = pgrp; @@ -1570,7 +1602,7 @@ ttselect(dev_t dev, int rw, void *wql, proc_t p) /* * Locks: Assumes tp is locked on entry, remains locked on exit */ -static int +__private_extern__ int ttnread(struct tty *tp) { int nread; diff --git a/bsd/kern/tty_conf.c b/bsd/kern/tty_conf.c index 585af4b71..b72d464d2 100644 --- a/bsd/kern/tty_conf.c +++ b/bsd/kern/tty_conf.c @@ -109,7 +109,7 @@ struct linesw linesw[MAXLDISC] = NODISC(7), /* loadable */ }; -int nlinesw = sizeof (linesw) / sizeof (linesw[0]); +const int nlinesw = sizeof (linesw) / sizeof (linesw[0]); static struct linesw nodisc = NODISC(0); diff --git a/bsd/kern/tty_dev.c b/bsd/kern/tty_dev.c new file mode 100644 index 000000000..87f3cd7ee --- /dev/null +++ b/bsd/kern/tty_dev.c @@ -0,0 +1,1205 @@ +/* + * Copyright (c) 1997-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tty_pty.c 8.4 (Berkeley) 2/20/95 + */ + +/* Common callbacks for the pseudo-teletype driver (pty/tty) + * and cloning pseudo-teletype driver (ptmx/pts). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* _devfs_setattr() */ +#include /* _devfs_setattr() */ +#include +#include +#include +#include +#include /* DEVFS_LOCK()/DEVFS_UNLOCK() */ + +#if CONFIG_MACF +#include +#endif + +#include "tty_dev.h" + +/* XXX belongs in devfs somewhere - LATER */ +static int _devfs_setattr(void *, unsigned short, uid_t, gid_t); + +/* + * Forward declarations + */ +static void ptcwakeup(struct tty *tp, int flag); +__XNU_PRIVATE_EXTERN d_open_t ptsopen; +__XNU_PRIVATE_EXTERN d_close_t ptsclose; +__XNU_PRIVATE_EXTERN d_read_t ptsread; +__XNU_PRIVATE_EXTERN d_write_t ptswrite; +__XNU_PRIVATE_EXTERN d_ioctl_t ptyioctl; /* common ioctl */ +__XNU_PRIVATE_EXTERN d_stop_t ptsstop; +__XNU_PRIVATE_EXTERN d_reset_t ptsreset; +__XNU_PRIVATE_EXTERN d_select_t ptsselect; +__XNU_PRIVATE_EXTERN d_open_t ptcopen; +__XNU_PRIVATE_EXTERN d_close_t ptcclose; +__XNU_PRIVATE_EXTERN d_read_t ptcread; +__XNU_PRIVATE_EXTERN d_write_t ptcwrite; +__XNU_PRIVATE_EXTERN d_stop_t ptcstop; /* NO-OP */ +__XNU_PRIVATE_EXTERN d_reset_t ptcreset; +__XNU_PRIVATE_EXTERN d_select_t ptcselect; + +/* + * XXX Should be devfs function... and use VATTR mechanisms, per + * XXX vnode_setattr2(); only we maybe can't really get back to the + * XXX vnode here for cloning devices (but it works for *cloned* devices + * XXX that are not themselves cloning). + * + * Returns: 0 Success + * namei:??? + * vnode_setattr:??? + */ +static int +_devfs_setattr(void * handle, unsigned short mode, uid_t uid, gid_t gid) +{ + devdirent_t *direntp = (devdirent_t *)handle; + devnode_t *devnodep; + int error = EACCES; + vfs_context_t ctx = vfs_context_current();; + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_SET(&va, va_uid, uid); + VATTR_SET(&va, va_gid, gid); + VATTR_SET(&va, va_mode, mode & ALLPERMS); + + /* + * If the TIOCPTYGRANT loses the race with the clone operation because + * this function is not part of devfs, and therefore can't take the + * devfs lock to protect the direntp update, then force user space to + * redrive the grant request. + */ + if (direntp == NULL || (devnodep = direntp->de_dnp) == NULL) { + error = ERESTART; + goto out; + } + + /* + * Only do this if we are operating on device that doesn't clone + * each time it's referenced. We perform a lookup on the device + * to insure we get the right instance. We can't just use the call + * to devfs_dntovn() to get the vp for the operation, because + * dn_dvm may not have been initialized. + */ + if (devnodep->dn_clone == NULL) { + struct nameidata nd; + char name[128]; + + snprintf(name, sizeof(name), "/dev/%s", direntp->de_name); + NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ctx); + error = namei(&nd); + if (error) + goto out; + error = vnode_setattr(nd.ni_vp, &va, ctx); + vnode_put(nd.ni_vp); + nameidone(&nd); + goto out; + } + +out: + return(error); +} + +#define BUFSIZ 100 /* Chunk size iomoved to/from user */ + +static struct tty_dev_t *tty_dev_head; + +__private_extern__ void +tty_dev_register(struct tty_dev_t *driver) +{ + if (driver) { + driver->next = tty_dev_head; + tty_dev_head = driver; + } +} + +/* + * Given a minor number, return the corresponding structure for that minor + * number. If there isn't one, and the create flag is specified, we create + * one if possible. + * + * Parameters: minor Minor number of ptmx device + * open_flag PF_OPEN_M First open of master + * PF_OPEN_S First open of slave + * 0 Just want ioctl struct + * + * Returns: NULL Did not exist/could not create + * !NULL structure corresponding minor number + * + * Locks: tty_lock() on ptmx_ioctl->pt_tty NOT held on entry or exit. + */ + +static struct tty_dev_t * +pty_get_driver(dev_t dev) +{ + int major = major(dev); + struct tty_dev_t *driver; + for (driver = tty_dev_head; driver != NULL; driver = driver->next) { + if ((driver->master == major || driver->slave == major)) { + break; + } + } + return driver; +} + +static struct ptmx_ioctl * +pty_get_ioctl(dev_t dev, int open_flag, struct tty_dev_t **out_driver) +{ + struct tty_dev_t *driver = pty_get_driver(dev); + if (out_driver) { + *out_driver = driver; + } + if (driver && driver->open) { + return driver->open(minor(dev), open_flag); + } + return NULL; +} + +/* + * Locks: tty_lock() of old_ptmx_ioctl->pt_tty NOT held for this call. + */ +static int +pty_free_ioctl(dev_t dev, int open_flag) +{ + struct tty_dev_t *driver = pty_get_driver(dev); + if (driver && driver->free) { + return driver->free(minor(dev), open_flag); + } + return 0; +} + +static int +pty_get_name(dev_t dev, char *buffer, size_t size) +{ + struct tty_dev_t *driver = pty_get_driver(dev); + if (driver && driver->name) { + return driver->name(minor(dev), buffer, size); + } + return 0; +} + +__private_extern__ int +ptsopen(dev_t dev, int flag, __unused int devtype, __unused struct proc *p) +{ + int error; + struct tty_dev_t *driver; + struct ptmx_ioctl *pti = pty_get_ioctl(dev, PF_OPEN_S, &driver); + if (pti == NULL) { + return ENXIO; + } + if (!(pti->pt_flags & PF_UNLOCKED)) { + return EAGAIN; + } + + struct tty *tp = pti->pt_tty; + tty_lock(tp); + + if ((tp->t_state & TS_ISOPEN) == 0) { + termioschars(&tp->t_termios); /* Set up default chars */ + tp->t_iflag = TTYDEF_IFLAG; + tp->t_oflag = TTYDEF_OFLAG; + tp->t_lflag = TTYDEF_LFLAG; + tp->t_cflag = TTYDEF_CFLAG; + tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; + ttsetwater(tp); /* would be done in xxparam() */ + } else if ((tp->t_state & TS_XCLUDE) && kauth_cred_issuser(kauth_cred_get())) { + error = EBUSY; + goto out; + } + if (tp->t_oproc) /* Ctrlr still around. */ + (void)(*linesw[tp->t_line].l_modem)(tp, 1); + while ((tp->t_state & TS_CARR_ON) == 0) { + if (flag&FNONBLOCK) + break; + error = ttysleep(tp, TSA_CARR_ON(tp), TTIPRI | PCATCH, __FUNCTION__, 0); + if (error) + goto out; + } + error = (*linesw[tp->t_line].l_open)(dev, tp); + /* Successful open; mark as open by the slave */ + + pti->pt_flags |= PF_OPEN_S; + CLR(tp->t_state, TS_IOCTL_NOT_OK); + if (error == 0) + ptcwakeup(tp, FREAD|FWRITE); + +out: + tty_unlock(tp); + return (error); +} + +__private_extern__ int +ptsclose(dev_t dev, int flag, __unused int mode, __unused proc_t p) +{ + int err; + + /* + * This is temporary until the VSX conformance tests + * are fixed. They are hanging with a deadlock + * where close() will not complete without t_timeout set + */ +#define FIX_VSX_HANG 1 +#ifdef FIX_VSX_HANG + int save_timeout; +#endif + struct tty_dev_t *driver; + struct ptmx_ioctl *pti = pty_get_ioctl(dev, 0, &driver); + struct tty *tp; + + if (pti == NULL) + return (ENXIO); + + tp = pti->pt_tty; + tty_lock(tp); +#ifdef FIX_VSX_HANG + save_timeout = tp->t_timeout; + tp->t_timeout = 60; +#endif + err = (*linesw[tp->t_line].l_close)(tp, flag); + ptsstop(tp, FREAD|FWRITE); + (void) ttyclose(tp); +#ifdef FIX_VSX_HANG + tp->t_timeout = save_timeout; +#endif + tty_unlock(tp); + + if ((flag & IO_REVOKE) == IO_REVOKE && driver->revoke) { + driver->revoke(minor(dev), tp); + } + /* unconditional, just like ttyclose() */ + pty_free_ioctl(dev, PF_OPEN_S); + + return (err); +} + +__private_extern__ int +ptsread(dev_t dev, struct uio *uio, int flag) +{ + proc_t p = current_proc(); + struct ptmx_ioctl *pti = pty_get_ioctl(dev, 0, NULL); + struct tty *tp; + int error = 0; + struct uthread *ut; + struct pgrp *pg; + + if (pti == NULL) + return (ENXIO); + tp = pti->pt_tty; + tty_lock(tp); + + ut = (struct uthread *)get_bsdthread_info(current_thread()); +again: + if (pti->pt_flags & PF_REMOTE) { + while (isbackground(p, tp)) { + if ((p->p_sigignore & sigmask(SIGTTIN)) || + (ut->uu_sigmask & sigmask(SIGTTIN)) || + p->p_lflag & P_LPPWAIT) { + error = EIO; + goto out; + } + + + pg = proc_pgrp(p); + if (pg == PGRP_NULL) { + error = EIO; + goto out; + } + /* + * SAFE: We about to drop the lock ourselves by + * SAFE: erroring out or sleeping anyway. + */ + tty_unlock(tp); + if (pg->pg_jobc == 0) { + pg_rele(pg); + tty_lock(tp); + error = EIO; + goto out; + } + pgsignal(pg, SIGTTIN, 1); + pg_rele(pg); + tty_lock(tp); + + error = ttysleep(tp, &ptsread, TTIPRI | PCATCH | PTTYBLOCK, __FUNCTION__, hz); + if (error) + goto out; + } + if (tp->t_canq.c_cc == 0) { + if (flag & IO_NDELAY) { + error = EWOULDBLOCK; + goto out; + } + error = ttysleep(tp, TSA_PTS_READ(tp), TTIPRI | PCATCH, __FUNCTION__, 0); + if (error) + goto out; + goto again; + } + while (tp->t_canq.c_cc > 1 && uio_resid(uio) > 0) { + int cc; + char buf[BUFSIZ]; + + cc = MIN((int)uio_resid(uio), BUFSIZ); + // Don't copy the very last byte + cc = MIN(cc, tp->t_canq.c_cc - 1); + cc = q_to_b(&tp->t_canq, (u_char *)buf, cc); + error = uiomove(buf, cc, uio); + if (error) + break; + } + if (tp->t_canq.c_cc == 1) + (void) getc(&tp->t_canq); + if (tp->t_canq.c_cc) + goto out; + } else + if (tp->t_oproc) + error = (*linesw[tp->t_line].l_read)(tp, uio, flag); + ptcwakeup(tp, FWRITE); +out: + tty_unlock(tp); + return (error); +} + +/* + * Write to pseudo-tty. + * Wakeups of controlling tty will happen + * indirectly, when tty driver calls ptsstart. + */ +__private_extern__ int +ptswrite(dev_t dev, struct uio *uio, int flag) +{ + struct ptmx_ioctl *pti = pty_get_ioctl(dev, 0, NULL); + struct tty *tp; + int error; + + if (pti == NULL) + return (ENXIO); + tp = pti->pt_tty; + tty_lock(tp); + + if (tp->t_oproc == 0) + error = EIO; + else + error = (*linesw[tp->t_line].l_write)(tp, uio, flag); + + tty_unlock(tp); + + return (error); +} + +/* + * Start output on pseudo-tty. + * Wake up process selecting or sleeping for input from controlling tty. + * + * t_oproc for this driver; called from within the line discipline + * + * Locks: Assumes tp is locked on entry, remains locked on exit + */ +static void +ptsstart(struct tty *tp) +{ + struct ptmx_ioctl *pti = pty_get_ioctl(tp->t_dev, 0, NULL); + if (pti == NULL) + goto out; + if (tp->t_state & TS_TTSTOP) + goto out; + if (pti->pt_flags & PF_STOPPED) { + pti->pt_flags &= ~PF_STOPPED; + pti->pt_send = TIOCPKT_START; + } + ptcwakeup(tp, FREAD); +out: + return; +} + +/* + * Locks: Assumes tty_lock() is held over this call. + */ +static void +ptcwakeup(struct tty *tp, int flag) +{ + struct ptmx_ioctl *pti = pty_get_ioctl(tp->t_dev, 0, NULL); + if (pti == NULL) + return; + + if (flag & FREAD) { + selwakeup(&pti->pt_selr); + wakeup(TSA_PTC_READ(tp)); + } + if (flag & FWRITE) { + selwakeup(&pti->pt_selw); + wakeup(TSA_PTC_WRITE(tp)); + } +} + +__private_extern__ int +ptcopen(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) +{ + struct tty_dev_t *driver; + struct ptmx_ioctl *pti = pty_get_ioctl(dev, PF_OPEN_M, &driver); + if (pti == NULL) { + return (ENXIO); + } else if (pti == (struct ptmx_ioctl*)-1) { + return (EREDRIVEOPEN); + } + + struct tty *tp = pti->pt_tty; + tty_lock(tp); + + /* If master is open OR slave is still draining, pty is still busy */ + if (tp->t_oproc || (tp->t_state & TS_ISOPEN)) { + tty_unlock(tp); + /* + * If master is closed, we are the only reference, so we + * need to clear the master open bit + */ + if (!tp->t_oproc) { + pty_free_ioctl(dev, PF_OPEN_M); + } + return EBUSY; + } + tp->t_oproc = ptsstart; + CLR(tp->t_state, TS_ZOMBIE); + SET(tp->t_state, TS_IOCTL_NOT_OK); +#ifdef sun4c + tp->t_stop = ptsstop; +#endif + (void)(*linesw[tp->t_line].l_modem)(tp, 1); + tp->t_lflag &= ~EXTPROC; + + if (driver->open_reset) { + pti->pt_flags = PF_UNLOCKED; + pti->pt_send = 0; + pti->pt_ucntl = 0; + } + + tty_unlock(tp); + return 0; +} + +__private_extern__ int +ptcclose(dev_t dev, __unused int flags, __unused int fmt, __unused proc_t p) +{ + struct tty_dev_t *driver; + struct ptmx_ioctl *pti = pty_get_ioctl(dev, 0, &driver); + struct tty *tp; + + if (pti == NULL) + return (ENXIO); + tp = pti->pt_tty; + tty_lock(tp); + + (void)(*linesw[tp->t_line].l_modem)(tp, 0); + + /* + * XXX MDMBUF makes no sense for ptys but would inhibit the above + * l_modem(). CLOCAL makes sense but isn't supported. Special + * l_modem()s that ignore carrier drop make no sense for ptys but + * may be in use because other parts of the line discipline make + * sense for ptys. Recover by doing everything that a normal + * ttymodem() would have done except for sending a SIGHUP. + */ + if (tp->t_state & TS_ISOPEN) { + tp->t_state &= ~(TS_CARR_ON | TS_CONNECTED); + tp->t_state |= TS_ZOMBIE; + ttyflush(tp, FREAD | FWRITE); + } + + tp->t_oproc = 0; /* mark closed */ + + tty_unlock(tp); + + pty_free_ioctl(dev, PF_OPEN_M); +#if CONFIG_MACF + if (driver->mac_notify) { + mac_pty_notify_close(p, tp, dev, NULL); + } +#endif + + return (0); +} + +__private_extern__ int +ptcread(dev_t dev, struct uio *uio, int flag) +{ + struct ptmx_ioctl *pti = pty_get_ioctl(dev, 0, NULL); + struct tty *tp; + char buf[BUFSIZ]; + int error = 0, cc; + + if (pti == NULL) + return (ENXIO); + tp = pti->pt_tty; + tty_lock(tp); + + /* + * We want to block until the slave + * is open, and there's something to read; + * but if we lost the slave or we're NBIO, + * then return the appropriate error instead. + */ + for (;;) { + if (tp->t_state & TS_ISOPEN) { + if (pti->pt_flags & PF_PKT && pti->pt_send) { + error = ureadc((int)pti->pt_send, uio); + if (error) + goto out; + if (pti->pt_send & TIOCPKT_IOCTL) { + cc = MIN((int)uio_resid(uio), + (int)sizeof(tp->t_termios)); + uiomove((caddr_t)&tp->t_termios, cc, + uio); + } + pti->pt_send = 0; + goto out; + } + if (pti->pt_flags & PF_UCNTL && pti->pt_ucntl) { + error = ureadc((int)pti->pt_ucntl, uio); + if (error) + goto out; + pti->pt_ucntl = 0; + goto out; + } + if (tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) + break; + } + if ((tp->t_state & TS_CONNECTED) == 0) + goto out; /* EOF */ + if (flag & IO_NDELAY) { + error = EWOULDBLOCK; + goto out; + } + error = ttysleep(tp, TSA_PTC_READ(tp), TTIPRI | PCATCH, __FUNCTION__, 0); + if (error) + goto out; + } + if (pti->pt_flags & (PF_PKT|PF_UCNTL)) + error = ureadc(0, uio); + while (uio_resid(uio) > 0 && error == 0) { + cc = q_to_b(&tp->t_outq, (u_char *)buf, MIN((int)uio_resid(uio), BUFSIZ)); + if (cc <= 0) + break; + error = uiomove(buf, cc, uio); + } + (*linesw[tp->t_line].l_start)(tp); + +out: + tty_unlock(tp); + + return (error); +} + +/* + * Line discipline callback + * + * Locks: tty_lock() is assumed held on entry and exit. + */ +__private_extern__ int +ptsstop(struct tty* tp, int flush) +{ + struct ptmx_ioctl *pti = pty_get_ioctl(tp->t_dev, 0, NULL); + int flag; + + if (pti == NULL) + return (ENXIO); + + /* note: FLUSHREAD and FLUSHWRITE already ok */ + if (flush == 0) { + flush = TIOCPKT_STOP; + pti->pt_flags |= PF_STOPPED; + } else + pti->pt_flags &= ~PF_STOPPED; + pti->pt_send |= flush; + /* change of perspective */ + flag = 0; + if (flush & FREAD) + flag |= FWRITE; + if (flush & FWRITE) + flag |= FREAD; + ptcwakeup(tp, flag); + return 0; +} + +__private_extern__ int +ptsreset(__unused int uban) +{ + return (0); +} + +int +ptsselect(dev_t dev, int rw, void *wql, proc_t p) +{ + struct ptmx_ioctl *pti = pty_get_ioctl(dev, 0, NULL); + struct tty *tp; + int retval = 0; + + if (pti == NULL) + return (ENXIO); + tp = pti->pt_tty; + if (tp == NULL) + return (ENXIO); + + tty_lock(tp); + + switch (rw) { + case FREAD: + if (ISSET(tp->t_state, TS_ZOMBIE)) { + retval = 1; + break; + } + + retval = ttnread(tp); + if (retval > 0) { + break; + } + + selrecord(p, &tp->t_rsel, wql); + break; + case FWRITE: + if (ISSET(tp->t_state, TS_ZOMBIE)) { + retval = 1; + break; + } + + if ((tp->t_outq.c_cc <= tp->t_lowat) && + ISSET(tp->t_state, TS_CONNECTED)) { + retval = tp->t_hiwat - tp->t_outq.c_cc; + break; + } + + selrecord(p, &tp->t_wsel, wql); + break; + } + + tty_unlock(tp); + return (retval); +} + +__private_extern__ int +ptcselect(dev_t dev, int rw, void *wql, proc_t p) +{ + struct tty_dev_t *driver; + struct ptmx_ioctl *pti = pty_get_ioctl(dev, 0, &driver); + struct tty *tp; + int retval = 0; + + if (pti == NULL) + return (ENXIO); + tp = pti->pt_tty; + tty_lock(tp); + + if ((tp->t_state & TS_CONNECTED) == 0) { + retval = 1; + goto out; + } + switch (rw) { + + case FREAD: + /* + * Need to block timeouts (ttrstart). + */ + if ((tp->t_state&TS_ISOPEN) && + tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) { + retval = (driver->fix_7828447) ? tp->t_outq.c_cc : 1; + break; + } + /* FALLTHROUGH */ + + case 0: /* exceptional */ + if ((tp->t_state&TS_ISOPEN) && + ((pti->pt_flags & PF_PKT && pti->pt_send) || + (pti->pt_flags & PF_UCNTL && pti->pt_ucntl))) { + retval = 1; + break; + } + selrecord(p, &pti->pt_selr, wql); + break; + + + case FWRITE: + if (tp->t_state&TS_ISOPEN) { + if (pti->pt_flags & PF_REMOTE) { + if (tp->t_canq.c_cc == 0) { + retval = (driver->fix_7828447) ? (TTYHOG - 1) : 1; + break; + } + } else { + retval = (TTYHOG - 2) - (tp->t_rawq.c_cc + tp->t_canq.c_cc); + if (retval > 0) { + retval = (driver->fix_7828447) ? retval : 1; + break; + } + if (tp->t_canq.c_cc == 0 && (tp->t_lflag&ICANON)) { + retval = 1; + break; + } + retval = 0; + } + } + selrecord(p, &pti->pt_selw, wql); + break; + + } +out: + tty_unlock(tp); + + return (retval); +} + +__private_extern__ int +ptcstop(__unused struct tty *tp, __unused int flush) +{ + return (0); +} + +__private_extern__ int +ptcreset(__unused int uban) +{ + return (0); +} + +__private_extern__ int +ptcwrite(dev_t dev, struct uio *uio, int flag) +{ + struct ptmx_ioctl *pti = pty_get_ioctl(dev, 0, NULL); + struct tty *tp; + u_char *cp = NULL; + int cc = 0; + u_char locbuf[BUFSIZ]; + int wcnt = 0; + int error = 0; + + if (pti == NULL) + return (ENXIO); + tp = pti->pt_tty; + tty_lock(tp); + +again: + if ((tp->t_state & TS_ISOPEN) == 0) + goto block; + if (pti->pt_flags & PF_REMOTE) { + if (tp->t_canq.c_cc) + goto block; + while ((uio_resid(uio) > 0 || cc > 0) && + tp->t_canq.c_cc < TTYHOG - 1) { + if (cc == 0) { + cc = MIN((int)uio_resid(uio), BUFSIZ); + cc = MIN(cc, TTYHOG - 1 - tp->t_canq.c_cc); + cp = locbuf; + error = uiomove((caddr_t)cp, cc, uio); + if (error) + goto out; + /* check again for safety */ + if ((tp->t_state & TS_ISOPEN) == 0) { + /* adjust as usual */ + uio_setresid(uio, (uio_resid(uio) + cc)); + error = EIO; + goto out; + } + } + if (cc > 0) { + cc = b_to_q((u_char *)cp, cc, &tp->t_canq); + /* + * XXX we don't guarantee that the canq size + * is >= TTYHOG, so the above b_to_q() may + * leave some bytes uncopied. However, space + * is guaranteed for the null terminator if + * we don't fail here since (TTYHOG - 1) is + * not a multiple of CBSIZE. + */ + if (cc > 0) + break; + } + } + /* adjust for data copied in but not written */ + uio_setresid(uio, (uio_resid(uio) + cc)); + (void) putc(0, &tp->t_canq); + ttwakeup(tp); + wakeup(TSA_PTS_READ(tp)); + goto out; + } + while (uio_resid(uio) > 0 || cc > 0) { + if (cc == 0) { + cc = MIN((int)uio_resid(uio), BUFSIZ); + cp = locbuf; + error = uiomove((caddr_t)cp, cc, uio); + if (error) + goto out; + /* check again for safety */ + if ((tp->t_state & TS_ISOPEN) == 0) { + /* adjust for data copied in but not written */ + uio_setresid(uio, (uio_resid(uio) + cc)); + error = EIO; + goto out; + } + } + while (cc > 0) { + if ((tp->t_rawq.c_cc + tp->t_canq.c_cc) >= TTYHOG - 2 && + (tp->t_canq.c_cc > 0 || !(tp->t_lflag&ICANON))) { + wakeup(TSA_HUP_OR_INPUT(tp)); + goto block; + } + (*linesw[tp->t_line].l_rint)(*cp++, tp); + wcnt++; + cc--; + } + cc = 0; + } +out: + tty_unlock(tp); + + return (error); + +block: + /* + * Come here to wait for slave to open, for space + * in outq, or space in rawq, or an empty canq. + */ + if ((tp->t_state & TS_CONNECTED) == 0) { + /* adjust for data copied in but not written */ + uio_setresid(uio, (uio_resid(uio) + cc)); + error = EIO; + goto out; + } + if (flag & IO_NDELAY) { + /* adjust for data copied in but not written */ + uio_setresid(uio, (uio_resid(uio) + cc)); + if (wcnt == 0) + error = EWOULDBLOCK; + goto out; + } + error = ttysleep(tp, TSA_PTC_WRITE(tp), TTOPRI | PCATCH, __FUNCTION__, 0); + if (error) { + /* adjust for data copied in but not written */ + uio_setresid(uio, (uio_resid(uio) + cc)); + goto out; + } + goto again; +} + +__private_extern__ int +ptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) +{ + struct tty_dev_t *driver; + struct ptmx_ioctl *pti = pty_get_ioctl(dev, 0, &driver); + struct tty *tp; + int stop, error = 0; + int allow_ext_ioctl = 1; + + if (pti == NULL) + return (ENXIO); + tp = pti->pt_tty; + tty_lock(tp); + + u_char *cc = tp->t_cc; + + /* + * Do not permit extended ioctls on the master side of the pty unless + * the slave side has been successfully opened and initialized. + */ + if (major(dev) == driver->master && + driver->fix_7070978 && + ISSET(tp->t_state, TS_IOCTL_NOT_OK)) { + allow_ext_ioctl = 0; + } + + /* + * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG. + * ttywflush(tp) will hang if there are characters in the outq. + */ + if (cmd == TIOCEXT && allow_ext_ioctl) { + /* + * When the EXTPROC bit is being toggled, we need + * to send an TIOCPKT_IOCTL if the packet driver + * is turned on. + */ + if (*(int *)data) { + if (pti->pt_flags & PF_PKT) { + pti->pt_send |= TIOCPKT_IOCTL; + ptcwakeup(tp, FREAD); + } + tp->t_lflag |= EXTPROC; + } else { + if ((tp->t_lflag & EXTPROC) && + (pti->pt_flags & PF_PKT)) { + pti->pt_send |= TIOCPKT_IOCTL; + ptcwakeup(tp, FREAD); + } + tp->t_lflag &= ~EXTPROC; + } + goto out; + } else + if (cdevsw[major(dev)].d_open == ptcopen) { + switch (cmd) { + + case TIOCGPGRP: + /* + * We aviod calling ttioctl on the controller since, + * in that case, tp must be the controlling terminal. + */ + *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0; + goto out; + + case TIOCPKT: + if (*(int *)data) { + if (pti->pt_flags & PF_UCNTL) { + error = EINVAL; + goto out; + } + pti->pt_flags |= PF_PKT; + } else + pti->pt_flags &= ~PF_PKT; + goto out; + + case TIOCUCNTL: + if (*(int *)data) { + if (pti->pt_flags & PF_PKT) { + error = EINVAL; + goto out; + } + pti->pt_flags |= PF_UCNTL; + } else + pti->pt_flags &= ~PF_UCNTL; + goto out; + + case TIOCREMOTE: + if (*(int *)data) + pti->pt_flags |= PF_REMOTE; + else + pti->pt_flags &= ~PF_REMOTE; + ttyflush(tp, FREAD|FWRITE); + goto out; + + case TIOCSETP: + case TIOCSETN: + case TIOCSETD: + case TIOCSETA_32: + case TIOCSETAW_32: + case TIOCSETAF_32: + case TIOCSETA_64: + case TIOCSETAW_64: + case TIOCSETAF_64: + ndflush(&tp->t_outq, tp->t_outq.c_cc); + break; + + case TIOCSIG: + if (*(unsigned int *)data >= NSIG || + *(unsigned int *)data == 0) { + error = EINVAL; + goto out; + } + if ((tp->t_lflag&NOFLSH) == 0) + ttyflush(tp, FREAD|FWRITE); + if ((*(unsigned int *)data == SIGINFO) && + ((tp->t_lflag&NOKERNINFO) == 0)) + ttyinfo_locked(tp); + /* + * SAFE: All callers drop the lock on return and + * SAFE: the linesw[] will short circut this call + * SAFE: if the ioctl() is eaten before the lower + * SAFE: level code gets to see it. + */ + tty_unlock(tp); + tty_pgsignal(tp, *(unsigned int *)data, 1); + tty_lock(tp); + goto out; + + case TIOCPTYGRANT: /* grantpt(3) */ + /* + * Change the uid of the slave to that of the calling + * thread, change the gid of the slave to GID_TTY, + * change the mode to 0620 (rw--w----). + */ + { + error = _devfs_setattr(pti->pt_devhandle, 0620, kauth_getuid(), GID_TTY); + if (major(dev) == driver->master) { + if (driver->mac_notify) { +#if CONFIG_MACF + if (!error) { + tty_unlock(tp); + mac_pty_notify_grant(p, tp, dev, NULL); + tty_lock(tp); + } +#endif + } else { + error = 0; + } + } + goto out; + } + + case TIOCPTYGNAME: /* ptsname(3) */ + /* + * Report the name of the slave device in *data + * (128 bytes max.). Use the same template string + * used for calling devfs_make_node() to create it. + */ + pty_get_name(dev, data, 128); + error = 0; + goto out; + + case TIOCPTYUNLK: /* unlockpt(3) */ + /* + * Unlock the slave device so that it can be opened. + */ + if (major(dev) == driver->master) { + pti->pt_flags |= PF_UNLOCKED; + } + error = 0; + goto out; + } + + /* + * Fail all other calls; pty masters are not serial devices; + * we only pretend they are when the slave side of the pty is + * already open. + */ + if (!allow_ext_ioctl) { + error = ENOTTY; + goto out; + } + } + error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); + if (error == ENOTTY) { + error = ttioctl_locked(tp, cmd, data, flag, p); + if (error == ENOTTY) { + if (pti->pt_flags & PF_UCNTL && (cmd & ~0xff) == UIOCCMD(0)) { + /* Process the UIOCMD ioctl group */ + if (cmd & 0xff) { + pti->pt_ucntl = (u_char)cmd; + ptcwakeup(tp, FREAD); + } + error = 0; + goto out; + } else if (cmd == TIOCSBRK || cmd == TIOCCBRK) { + /* + * POSIX conformance; rdar://3936338 + * + * Clear ENOTTY in the case of setting or + * clearing a break failing because pty's + * don't support break like real serial + * ports. + */ + error = 0; + goto out; + } + } + } + + /* + * If external processing and packet mode send ioctl packet. + */ + if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) { + switch(cmd) { + case TIOCSETA_32: + case TIOCSETAW_32: + case TIOCSETAF_32: + case TIOCSETA_64: + case TIOCSETAW_64: + case TIOCSETAF_64: + case TIOCSETP: + case TIOCSETN: + case TIOCSETC: + case TIOCSLTC: + case TIOCLBIS: + case TIOCLBIC: + case TIOCLSET: + pti->pt_send |= TIOCPKT_IOCTL; + ptcwakeup(tp, FREAD); + default: + break; + } + } + stop = (tp->t_iflag & IXON) && CCEQ(cc[VSTOP], CTRL('s')) + && CCEQ(cc[VSTART], CTRL('q')); + if (pti->pt_flags & PF_NOSTOP) { + if (stop) { + pti->pt_send &= ~TIOCPKT_NOSTOP; + pti->pt_send |= TIOCPKT_DOSTOP; + pti->pt_flags &= ~PF_NOSTOP; + ptcwakeup(tp, FREAD); + } + } else { + if (!stop) { + pti->pt_send &= ~TIOCPKT_DOSTOP; + pti->pt_send |= TIOCPKT_NOSTOP; + pti->pt_flags |= PF_NOSTOP; + ptcwakeup(tp, FREAD); + } + } +out: + tty_unlock(tp); + + return (error); +} diff --git a/bsd/kern/tty_dev.h b/bsd/kern/tty_dev.h new file mode 100644 index 000000000..49fc715db --- /dev/null +++ b/bsd/kern/tty_dev.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __TTY_DEV_H__ +#define __TTY_DEV_H__ + +/* + * ptmx_ioctl is a pointer to a list of pointers to tty structures which is + * grown, as necessary, copied, and replaced, but never shrunk. The ioctl + * structures themselves pointed to from this list come and go as needed. + */ +struct ptmx_ioctl { + struct tty *pt_tty; /* pointer to ttymalloc()'ed data */ + int pt_flags; + struct selinfo pt_selr; + struct selinfo pt_selw; + u_char pt_send; + u_char pt_ucntl; + void *pt_devhandle; /* cloned slave device handle */ +}; + +#define PF_PKT 0x0008 /* packet mode */ +#define PF_STOPPED 0x0010 /* user told stopped */ +#define PF_REMOTE 0x0020 /* remote and flow controlled input */ +#define PF_NOSTOP 0x0040 +#define PF_UCNTL 0x0080 /* user control mode */ +#define PF_UNLOCKED 0x0100 /* slave unlock (master open resets) */ +#define PF_OPEN_M 0x0200 /* master is open */ +#define PF_OPEN_S 0x0400 /* slave is open */ + +struct tty_dev_t { + int master; // master major device number + int slave; // slave major device number + unsigned int fix_7828447:1, + fix_7070978:1, + mac_notify:1, + open_reset:1, + _reserved:28; +#if __LP64__ + int _pad; +#endif + + struct tty_dev_t *next; + + struct ptmx_ioctl *(*open)(int minor, int flags); + int (*free)(int minor, int flags); + int (*name)(int minor, char *buffer, size_t size); + void (*revoke)(int minor, struct tty *tp); +}; + +extern void tty_dev_register(struct tty_dev_t *dev); + +extern int ttnread(struct tty *tp); + +#endif // __TTY_DEV_H__ diff --git a/bsd/kern/tty_ptmx.c b/bsd/kern/tty_ptmx.c index 0f2133634..6f1c71c62 100644 --- a/bsd/kern/tty_ptmx.c +++ b/bsd/kern/tty_ptmx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997-2010 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1997-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,8 +77,6 @@ #include #include #include -#include /* _devfs_setattr() */ -#include /* _devfs_setattr() */ #include #include #include @@ -89,118 +87,50 @@ #include #endif -/* XXX belongs in devfs somewhere - LATER */ -int _devfs_setattr(void *, unsigned short, uid_t, gid_t); - - -#define FREE_BSDSTATIC __private_extern__ -#define d_devtotty_t struct tty ** +#include "tty_dev.h" /* * Forward declarations */ int ptmx_init(int n_ptys); -static void ptsd_start(struct tty *tp); -static void ptmx_wakeup(struct tty *tp, int flag); -__XNU_PRIVATE_EXTERN d_open_t ptsd_open; -__XNU_PRIVATE_EXTERN d_close_t ptsd_close; -__XNU_PRIVATE_EXTERN d_read_t ptsd_read; -__XNU_PRIVATE_EXTERN d_write_t ptsd_write; -__XNU_PRIVATE_EXTERN d_ioctl_t cptyioctl; /* common ioctl */ -__XNU_PRIVATE_EXTERN d_stop_t ptsd_stop; -__XNU_PRIVATE_EXTERN d_reset_t ptsd_reset; -__XNU_PRIVATE_EXTERN d_open_t ptmx_open; -__XNU_PRIVATE_EXTERN d_close_t ptmx_close; -__XNU_PRIVATE_EXTERN d_read_t ptmx_read; -__XNU_PRIVATE_EXTERN d_write_t ptmx_write; -__XNU_PRIVATE_EXTERN d_stop_t ptmx_stop; /* NO-OP */ -__XNU_PRIVATE_EXTERN d_reset_t ptmx_reset; -__XNU_PRIVATE_EXTERN d_select_t ptmx_select; -__XNU_PRIVATE_EXTERN d_select_t ptsd_select; - -extern d_devtotty_t ptydevtotty; +static struct ptmx_ioctl *ptmx_get_ioctl(int minor, int open_flag); +static int ptmx_free_ioctl(int minor, int open_flag); +static int ptmx_get_name(int minor, char *buffer, size_t size); +static void ptsd_revoke_knotes(int minor, struct tty *tp); + +extern d_open_t ptsopen; +extern d_close_t ptsclose; +extern d_read_t ptsread; +extern d_write_t ptswrite; +extern d_ioctl_t ptyioctl; +extern d_stop_t ptsstop; +extern d_reset_t ptsreset; +extern d_select_t ptsselect; + +extern d_open_t ptcopen; +extern d_close_t ptcclose; +extern d_read_t ptcread; +extern d_write_t ptcwrite; +extern d_stop_t ptcstop; +extern d_reset_t ptcreset; +extern d_select_t ptcselect; static int ptmx_major; /* dynamically assigned major number */ static struct cdevsw ptmx_cdev = { - ptmx_open, ptmx_close, ptmx_read, ptmx_write, - cptyioctl, ptmx_stop, ptmx_reset, 0, - ptmx_select, eno_mmap, eno_strat, eno_getc, + ptcopen, ptcclose, ptcread, ptcwrite, + ptyioctl, ptcstop, ptcreset, 0, + ptcselect, eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY }; static int ptsd_major; /* dynamically assigned major number */ static struct cdevsw ptsd_cdev = { - ptsd_open, ptsd_close, ptsd_read, ptsd_write, - cptyioctl, ptsd_stop, ptsd_reset, 0, - ptsd_select, eno_mmap, eno_strat, eno_getc, + ptsopen, ptsclose, ptsread, ptswrite, + ptyioctl, ptsstop, ptsreset, 0, + ptsselect, eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY }; -/* - * XXX Should be devfs function... and use VATTR mechanisms, per - * XXX vnode_setattr2(); only we maybe can't really get back to the - * XXX vnode here for cloning devices (but it works for *cloned* devices - * XXX that are not themselves cloning). - * - * Returns: 0 Success - * namei:??? - * vnode_setattr:??? - */ -int -_devfs_setattr(void * handle, unsigned short mode, uid_t uid, gid_t gid) -{ - devdirent_t *direntp = (devdirent_t *)handle; - devnode_t *devnodep; - int error = EACCES; - vfs_context_t ctx = vfs_context_current();; - struct vnode_attr va; - - VATTR_INIT(&va); - VATTR_SET(&va, va_uid, uid); - VATTR_SET(&va, va_gid, gid); - VATTR_SET(&va, va_mode, mode & ALLPERMS); - - /* - * If the TIOCPTYGRANT loses the race with the clone operation because - * this function is not part of devfs, and therefore can't take the - * devfs lock to protect the direntp update, then force user space to - * redrive the grant request. - */ - if (direntp == NULL || (devnodep = direntp->de_dnp) == NULL) { - error = ERESTART; - goto out; - } - - /* - * Only do this if we are operating on device that doesn't clone - * each time it's referenced. We perform a lookup on the device - * to insure we get the right instance. We can't just use the call - * to devfs_dntovn() to get the vp for the operation, because - * dn_dvm may not have been initialized. - */ - if (devnodep->dn_clone == NULL) { - struct nameidata nd; - char name[128]; - - snprintf(name, sizeof(name), "/dev/%s", direntp->de_name); - NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ctx); - error = namei(&nd); - if (error) - goto out; - error = vnode_setattr(nd.ni_vp, &va, ctx); - vnode_put(nd.ni_vp); - nameidone(&nd); - goto out; - } - -out: - return(error); -} - - - -#define BUFSIZ 100 /* Chunk size iomoved to/from user */ - /* * ptmx == /dev/ptmx * ptsd == /dev/pts[0123456789]{3} @@ -237,31 +167,6 @@ SYSCTL_PROC(_kern_tty, OID_AUTO, ptmx_max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ptmx_max, 0, &sysctl_ptmx_max, "I", "ptmx_max"); - -/* - * ptmx_ioctl is a pointer to a list of pointers to tty structures which is - * grown, as necessary, copied, and replaced, but never shrunk. The ioctl - * structures themselves pointed to from this list come and go as needed. - */ -struct ptmx_ioctl { - struct tty *pt_tty; /* pointer to ttymalloc()'ed data */ - int pt_flags; - struct selinfo pt_selr; - struct selinfo pt_selw; - u_char pt_send; - u_char pt_ucntl; - void *pt_devhandle; /* cloned slave device handle */ -}; - -#define PF_PKT 0x0008 /* packet mode */ -#define PF_STOPPED 0x0010 /* user told stopped */ -#define PF_REMOTE 0x0020 /* remote and flow controlled input */ -#define PF_NOSTOP 0x0040 -#define PF_UCNTL 0x0080 /* user control mode */ -#define PF_UNLOCKED 0x0100 /* slave unlock (master open resets) */ -#define PF_OPEN_M 0x0200 /* master is open */ -#define PF_OPEN_S 0x0400 /* slave is open */ - static int ptmx_clone(dev_t dev, int minor); /* @@ -297,6 +202,8 @@ ptsd_kevent_mtx_unlock(int minor) lck_mtx_unlock(&ptsd_kevent_lock[PTSD_KE_LOCK_INDEX(minor)]); } +static struct tty_dev_t _ptmx_driver; + int ptmx_init( __unused int config_count) { @@ -335,7 +242,21 @@ ptmx_init( __unused int config_count) (void)devfs_make_node_clone(makedev(ptmx_major, 0), DEVFS_CHAR, UID_ROOT, GID_TTY, 0666, ptmx_clone, PTMX_TEMPLATE); - return (0); + + _ptmx_driver.master = ptmx_major; + _ptmx_driver.slave = ptsd_major; + _ptmx_driver.fix_7828447 = 1; + _ptmx_driver.fix_7070978 = 1; +#if CONFIG_MACF + _ptmx_driver.mac_notify = 1; +#endif + _ptmx_driver.open = &ptmx_get_ioctl; + _ptmx_driver.free = &ptmx_free_ioctl; + _ptmx_driver.name = &ptmx_get_name; + _ptmx_driver.revoke = &ptsd_revoke_knotes; + tty_dev_register(&_ptmx_driver); + + return (0); } @@ -422,6 +343,14 @@ ptmx_get_ioctl(int minor, int open_flag) FREE(old_pis_ioctl_list, M_TTYS); } + /* is minor in range now? */ + if (minor < 0 || minor >= _state.pis_total) { + ttyfree(new_ptmx_ioctl->pt_tty); + DEVFS_UNLOCK(); + FREE(new_ptmx_ioctl, M_TTYS); + return (NULL); + } + if (_state.pis_ioctl_list[minor] != NULL) { ttyfree(new_ptmx_ioctl->pt_tty); DEVFS_UNLOCK(); @@ -451,11 +380,12 @@ ptmx_get_ioctl(int minor, int open_flag) if (_state.pis_ioctl_list[minor]->pt_devhandle == NULL) { printf("devfs_make_node() call failed for ptmx_get_ioctl()!!!!\n"); } - } else if (open_flag & PF_OPEN_S) { - DEVFS_LOCK(); - _state.pis_ioctl_list[minor]->pt_flags |= PF_OPEN_S; - DEVFS_UNLOCK(); } + + if (minor < 0 || minor >= _state.pis_total) { + return (NULL); + } + return (_state.pis_ioctl_list[minor]); } @@ -468,6 +398,12 @@ ptmx_free_ioctl(int minor, int open_flag) struct ptmx_ioctl *old_ptmx_ioctl = NULL; DEVFS_LOCK(); + + if (minor < 0 || minor >= _state.pis_total) { + DEVFS_UNLOCK(); + return (-1); + } + _state.pis_ioctl_list[minor]->pt_flags &= ~(open_flag); /* @@ -496,7 +432,7 @@ ptmx_free_ioctl(int minor, int open_flag) /* Don't remove the entry until the devfs slot is free */ DEVFS_LOCK(); - _state.pis_ioctl_list[ minor] = NULL; + _state.pis_ioctl_list[minor] = NULL; _state.pis_free++; DEVFS_UNLOCK(); } @@ -504,6 +440,11 @@ ptmx_free_ioctl(int minor, int open_flag) return (0); /* Success */ } +static int +ptmx_get_name(int minor, char *buffer, size_t size) +{ + return snprintf(buffer, size, "/dev/" PTSD_TEMPLATE, minor); +} @@ -557,983 +498,6 @@ ptmx_clone(__unused dev_t dev, int action) return(-1); } -FREE_BSDSTATIC int -ptsd_open(dev_t dev, int flag, __unused int devtype, __unused proc_t p) -{ - struct tty *tp; - struct ptmx_ioctl *pti; - int error; - - if ((pti = ptmx_get_ioctl(minor(dev), 0)) == NULL) { - return (ENXIO); - } - - if (!(pti->pt_flags & PF_UNLOCKED)) { - return (EAGAIN); - } - - tp = pti->pt_tty; - tty_lock(tp); - - if ((tp->t_state & TS_ISOPEN) == 0) { - termioschars(&tp->t_termios); /* Set up default chars */ - tp->t_iflag = TTYDEF_IFLAG; - tp->t_oflag = TTYDEF_OFLAG; - tp->t_lflag = TTYDEF_LFLAG; - tp->t_cflag = TTYDEF_CFLAG; - tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; - ttsetwater(tp); /* would be done in xxparam() */ - } else if (tp->t_state&TS_XCLUDE && suser(kauth_cred_get(), NULL)) { - error = EBUSY; - goto out; - } - if (tp->t_oproc) /* Ctrlr still around. */ - (void)(*linesw[tp->t_line].l_modem)(tp, 1); - while ((tp->t_state & TS_CARR_ON) == 0) { - if (flag&FNONBLOCK) - break; - error = ttysleep(tp, TSA_CARR_ON(tp), TTIPRI | PCATCH, - "ptsd_opn", 0); - if (error) - goto out; - } - error = (*linesw[tp->t_line].l_open)(dev, tp); - /* Successful open; mark as open by the slave */ - pti->pt_flags |= PF_OPEN_S; - CLR(tp->t_state, TS_IOCTL_NOT_OK); - if (error == 0) - ptmx_wakeup(tp, FREAD|FWRITE); -out: - tty_unlock(tp); - return (error); -} - -static void ptsd_revoke_knotes(dev_t, struct tty *); - -FREE_BSDSTATIC int -ptsd_close(dev_t dev, int flag, __unused int mode, __unused proc_t p) -{ - struct tty *tp; - struct ptmx_ioctl *pti; - int err; - - /* - * This is temporary until the VSX conformance tests - * are fixed. They are hanging with a deadlock - * where close(ptsd) will not complete without t_timeout set - */ -#define FIX_VSX_HANG 1 -#ifdef FIX_VSX_HANG - int save_timeout; -#endif - pti = ptmx_get_ioctl(minor(dev), 0); - - tp = pti->pt_tty; - tty_lock(tp); - -#ifdef FIX_VSX_HANG - save_timeout = tp->t_timeout; - tp->t_timeout = 60; -#endif - err = (*linesw[tp->t_line].l_close)(tp, flag); - ptsd_stop(tp, FREAD|FWRITE); - (void) ttyclose(tp); -#ifdef FIX_VSX_HANG - tp->t_timeout = save_timeout; -#endif - tty_unlock(tp); - - if ((flag & IO_REVOKE) == IO_REVOKE) - ptsd_revoke_knotes(dev, tp); - - /* unconditional, just like ttyclose() */ - ptmx_free_ioctl(minor(dev), PF_OPEN_S); - - return (err); -} - -FREE_BSDSTATIC int -ptsd_read(dev_t dev, struct uio *uio, int flag) -{ - proc_t p = current_proc(); - - struct tty *tp; - struct ptmx_ioctl *pti; - int error = 0; - struct uthread *ut; - struct pgrp * pg; - - pti = ptmx_get_ioctl(minor(dev), 0); - - tp = pti->pt_tty; - tty_lock(tp); - - ut = (struct uthread *)get_bsdthread_info(current_thread()); -again: - if (pti->pt_flags & PF_REMOTE) { - while (isbackground(p, tp)) { - if ((p->p_sigignore & sigmask(SIGTTIN)) || - (ut->uu_sigmask & sigmask(SIGTTIN)) || - p->p_lflag & P_LPPWAIT) { - error = EIO; - goto out; - } - pg = proc_pgrp(p); - if (pg == PGRP_NULL) { - error = EIO; - goto out; - } - /* - * SAFE: We about to drop the lock ourselves by - * SAFE: erroring out or sleeping anyway. - */ - tty_unlock(tp); - if (pg->pg_jobc == 0) { - pg_rele(pg); - tty_lock(tp); - error = EIO; - goto out; - } - pgsignal(pg, SIGTTIN, 1); - pg_rele(pg); - tty_lock(tp); - - error = ttysleep(tp, &ptsd_read, TTIPRI | PCATCH | PTTYBLOCK, "ptsd_bg", - hz); - if (error) - goto out; - } - if (tp->t_canq.c_cc == 0) { - if (flag & IO_NDELAY) { - error = EWOULDBLOCK; - goto out; - } - error = ttysleep(tp, TSA_PTS_READ(tp), TTIPRI | PCATCH, - "ptsd_in", 0); - if (error) - goto out; - goto again; - } - while (tp->t_canq.c_cc > 1 && uio_resid(uio) > 0) { - int cc; - char buf[BUFSIZ]; - - cc = MIN(uio_resid(uio), BUFSIZ); - // Don't copy the very last byte - cc = MIN(cc, tp->t_canq.c_cc - 1); - cc = q_to_b(&tp->t_canq, (u_char *)buf, cc); - error = uiomove(buf, cc, uio); - if (error) - break; - } - if (tp->t_canq.c_cc == 1) - (void) getc(&tp->t_canq); - if (tp->t_canq.c_cc) - goto out; - } else - if (tp->t_oproc) - error = (*linesw[tp->t_line].l_read)(tp, uio, flag); - ptmx_wakeup(tp, FWRITE); -out: - tty_unlock(tp); - return (error); -} - -/* - * Write to pseudo-tty. - * Wakeups of controlling tty will happen - * indirectly, when tty driver calls ptsd_start. - */ -FREE_BSDSTATIC int -ptsd_write(dev_t dev, struct uio *uio, int flag) -{ - struct tty *tp; - struct ptmx_ioctl *pti; - int error; - - pti = ptmx_get_ioctl(minor(dev), 0); - - tp = pti->pt_tty; - tty_lock(tp); - - if (tp->t_oproc == 0) - error = EIO; - else - error = (*linesw[tp->t_line].l_write)(tp, uio, flag); - - tty_unlock(tp); - return (error); -} - -/* - * Start output on pseudo-tty. - * Wake up process selecting or sleeping for input from controlling tty. - * - * t_oproc for this driver; called from within the line discipline - * - * Locks: Assumes tp is locked on entry, remains locked on exit - */ -static void -ptsd_start(struct tty *tp) -{ - struct ptmx_ioctl *pti; - - pti = ptmx_get_ioctl(minor(tp->t_dev), 0); - - if (tp->t_state & TS_TTSTOP) - goto out; - if (pti->pt_flags & PF_STOPPED) { - pti->pt_flags &= ~PF_STOPPED; - pti->pt_send = TIOCPKT_START; - } - ptmx_wakeup(tp, FREAD); -out: - return; -} - -/* - * Locks: Assumes tty_lock() is held over this call. - */ -static void -ptmx_wakeup(struct tty *tp, int flag) -{ - struct ptmx_ioctl *pti; - - pti = ptmx_get_ioctl(minor(tp->t_dev), 0); - - if (flag & FREAD) { - selwakeup(&pti->pt_selr); - wakeup(TSA_PTC_READ(tp)); - } - if (flag & FWRITE) { - selwakeup(&pti->pt_selw); - wakeup(TSA_PTC_WRITE(tp)); - } -} - -FREE_BSDSTATIC int -ptmx_open(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) -{ - struct tty *tp; - struct ptmx_ioctl *pti; - int error = 0; - - pti = ptmx_get_ioctl(minor(dev), PF_OPEN_M); - if (pti == NULL) { - return (ENXIO); - } else if (pti == (struct ptmx_ioctl*)-1) { - return (EREDRIVEOPEN); - } - - tp = pti->pt_tty; - tty_lock(tp); - - /* If master is open OR slave is still draining, pty is still busy */ - if (tp->t_oproc || (tp->t_state & TS_ISOPEN)) { - tty_unlock(tp); - /* - * If master is closed, we are the only reference, so we - * need to clear the master open bit - */ - if (!tp->t_oproc) - ptmx_free_ioctl(minor(dev), PF_OPEN_M); - error = EBUSY; - goto err; - } - tp->t_oproc = ptsd_start; - CLR(tp->t_state, TS_ZOMBIE); - SET(tp->t_state, TS_IOCTL_NOT_OK); -#ifdef sun4c - tp->t_stop = ptsd_stop; -#endif - (void)(*linesw[tp->t_line].l_modem)(tp, 1); - tp->t_lflag &= ~EXTPROC; - - tty_unlock(tp); -err: - return (error); -} - -FREE_BSDSTATIC int -ptmx_close(dev_t dev, __unused int flags, __unused int fmt, __unused proc_t p) -{ - struct tty *tp; - struct ptmx_ioctl *pti; - - pti = ptmx_get_ioctl(minor(dev), 0); - - tp = pti->pt_tty; - tty_lock(tp); - - (void)(*linesw[tp->t_line].l_modem)(tp, 0); - - /* - * XXX MDMBUF makes no sense for ptys but would inhibit the above - * l_modem(). CLOCAL makes sense but isn't supported. Special - * l_modem()s that ignore carrier drop make no sense for ptys but - * may be in use because other parts of the line discipline make - * sense for ptys. Recover by doing everything that a normal - * ttymodem() would have done except for sending a SIGHUP. - */ - if (tp->t_state & TS_ISOPEN) { - tp->t_state &= ~(TS_CARR_ON | TS_CONNECTED); - tp->t_state |= TS_ZOMBIE; - ttyflush(tp, FREAD | FWRITE); - } - - tp->t_oproc = 0; /* mark closed */ - - tty_unlock(tp); - - ptmx_free_ioctl(minor(dev), PF_OPEN_M); - -#if CONFIG_MACF - mac_pty_notify_close(p, tp, dev, NULL); -#endif - - return (0); -} - -FREE_BSDSTATIC int -ptmx_read(dev_t dev, struct uio *uio, int flag) -{ - struct tty *tp; - struct ptmx_ioctl *pti; - char buf[BUFSIZ]; - int error = 0, cc; - - pti = ptmx_get_ioctl(minor(dev), 0); - - tp = pti->pt_tty; - tty_lock(tp); - - /* - * We want to block until the slave - * is open, and there's something to read; - * but if we lost the slave or we're NBIO, - * then return the appropriate error instead. - */ - for (;;) { - if (tp->t_state&TS_ISOPEN) { - if (pti->pt_flags & PF_PKT && pti->pt_send) { - error = ureadc((int)pti->pt_send, uio); - if (error) - goto out; - if (pti->pt_send & TIOCPKT_IOCTL) { - cc = MIN(uio_resid(uio), - (user_ssize_t)sizeof(tp->t_termios)); - uiomove((caddr_t)&tp->t_termios, cc, - uio); - } - pti->pt_send = 0; - goto out; - } - if (pti->pt_flags & PF_UCNTL && pti->pt_ucntl) { - error = ureadc((int)pti->pt_ucntl, uio); - if (error) - goto out; - pti->pt_ucntl = 0; - goto out; - } - if (tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) - break; - } - if ((tp->t_state & TS_CONNECTED) == 0) - goto out; /* EOF */ - if (flag & IO_NDELAY) { - error = EWOULDBLOCK; - goto out; - } - error = ttysleep(tp, TSA_PTC_READ(tp), TTIPRI | PCATCH, "ptmx_in", 0); - if (error) - goto out; - } - if (pti->pt_flags & (PF_PKT|PF_UCNTL)) - error = ureadc(0, uio); - while (uio_resid(uio) > 0 && error == 0) { - cc = q_to_b(&tp->t_outq, (u_char *)buf, MIN(uio_resid(uio), BUFSIZ)); - if (cc <= 0) - break; - error = uiomove(buf, cc, uio); - } - (*linesw[tp->t_line].l_start)(tp); - -out: - tty_unlock(tp); - return (error); -} - -/* - * Line discipline callback - * - * Locks: tty_lock() is assumed held on entry and exit. - */ -FREE_BSDSTATIC int -ptsd_stop(struct tty *tp, int flush) -{ - struct ptmx_ioctl *pti; - int flag; - - pti = ptmx_get_ioctl(minor(tp->t_dev), 0); - - /* note: FLUSHREAD and FLUSHWRITE already ok */ - if (flush == 0) { - flush = TIOCPKT_STOP; - pti->pt_flags |= PF_STOPPED; - } else - pti->pt_flags &= ~PF_STOPPED; - pti->pt_send |= flush; - /* change of perspective */ - flag = 0; - if (flush & FREAD) - flag |= FWRITE; - if (flush & FWRITE) - flag |= FREAD; - ptmx_wakeup(tp, flag); - - return (0); -} - -FREE_BSDSTATIC int -ptsd_reset(__unused int uban) -{ - return (0); -} - -/* - * Reinput pending characters after state switch - * call at spltty(). - * - * XXX Code duplication: static function, should be inlined - */ -static void -ttypend(struct tty *tp) -{ - struct clist tq; - int c; - - CLR(tp->t_lflag, PENDIN); - SET(tp->t_state, TS_TYPEN); - tq = tp->t_rawq; - tp->t_rawq.c_cc = 0; - tp->t_rawq.c_cf = tp->t_rawq.c_cl = 0; - while ((c = getc(&tq)) >= 0) - ttyinput(c, tp); - CLR(tp->t_state, TS_TYPEN); -} - -/* - * Must be called at spltty(). - * - * XXX Code duplication: static function, should be inlined - */ -static int -ttnread(struct tty *tp) -{ - int nread; - - if (ISSET(tp->t_lflag, PENDIN)) - ttypend(tp); - nread = tp->t_canq.c_cc; - if (!ISSET(tp->t_lflag, ICANON)) { - nread += tp->t_rawq.c_cc; - if (nread < tp->t_cc[VMIN] && tp->t_cc[VTIME] == 0) - nread = 0; - } - return (nread); -} - -int -ptsd_select(dev_t dev, int rw, void *wql, proc_t p) -{ - struct ptmx_ioctl *pti; - struct tty *tp; - int retval = 0; - - pti = ptmx_get_ioctl(minor(dev), 0); - - tp = pti->pt_tty; - - if (tp == NULL) - return (ENXIO); - - tty_lock(tp); - - switch (rw) { - case FREAD: - if (ISSET(tp->t_state, TS_ZOMBIE)) { - retval = 1; - break; - } - - retval = ttnread(tp); - if (retval > 0) { - break; - } - - selrecord(p, &tp->t_rsel, wql); - break; - case FWRITE: - if (ISSET(tp->t_state, TS_ZOMBIE)) { - retval = 1; - break; - } - - if ((tp->t_outq.c_cc <= tp->t_lowat) && - ISSET(tp->t_state, TS_CONNECTED)) { - retval = tp->t_hiwat - tp->t_outq.c_cc; - break; - } - - selrecord(p, &tp->t_wsel, wql); - break; - } - - tty_unlock(tp); - return (retval); -} - -FREE_BSDSTATIC int -ptmx_select(dev_t dev, int rw, void *wql, proc_t p) -{ - struct tty *tp; - struct ptmx_ioctl *pti; - int retval = 0; - - pti = ptmx_get_ioctl(minor(dev), 0); - - tp = pti->pt_tty; - tty_lock(tp); - - if ((tp->t_state & TS_CONNECTED) == 0) { - retval = 1; - goto out; - } - switch (rw) { - case FREAD: - /* - * Need to block timeouts (ttrstart). - */ - if ((tp->t_state&TS_ISOPEN) && - tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) { - retval = tp->t_outq.c_cc; - break; - } - /* FALLTHROUGH */ - - case 0: /* exceptional */ - if ((tp->t_state&TS_ISOPEN) && - ((pti->pt_flags & PF_PKT && pti->pt_send) || - (pti->pt_flags & PF_UCNTL && pti->pt_ucntl))) { - retval = 1; - break; - } - selrecord(p, &pti->pt_selr, wql); - break; - - case FWRITE: - if (tp->t_state&TS_ISOPEN) { - if (pti->pt_flags & PF_REMOTE) { - if (tp->t_canq.c_cc == 0) { - retval = (TTYHOG -1) ; - break; - } - } else { - retval = (TTYHOG - 2) - (tp->t_rawq.c_cc + tp->t_canq.c_cc); - if (retval > 0) { - break; - } - if (tp->t_canq.c_cc == 0 && (tp->t_lflag&ICANON)) { - retval = 1; - break; - } - retval = 0; - } - } - selrecord(p, &pti->pt_selw, wql); - break; - - } -out: - tty_unlock(tp); - return (retval); -} - -FREE_BSDSTATIC int -ptmx_stop(__unused struct tty *tp, __unused int flush) -{ - return (0); -} - -FREE_BSDSTATIC int -ptmx_reset(__unused int uban) -{ - return (0); -} - -FREE_BSDSTATIC int -ptmx_write(dev_t dev, struct uio *uio, int flag) -{ - struct tty *tp; - struct ptmx_ioctl *pti; - u_char *cp = NULL; - int cc = 0; - u_char locbuf[BUFSIZ]; - int wcnt = 0; - int error = 0; - - pti = ptmx_get_ioctl(minor(dev), 0); - - tp = pti->pt_tty; - tty_lock(tp); - -again: - if ((tp->t_state&TS_ISOPEN) == 0) - goto block; - if (pti->pt_flags & PF_REMOTE) { - if (tp->t_canq.c_cc) - goto block; - while ((uio_resid(uio) > 0 || cc > 0) && - tp->t_canq.c_cc < TTYHOG - 1) { - if (cc == 0) { - cc = MIN(uio_resid(uio), BUFSIZ); - cc = MIN(cc, TTYHOG - 1 - tp->t_canq.c_cc); - cp = locbuf; - error = uiomove((caddr_t)cp, cc, uio); - if (error) - goto out; - /* check again for safety */ - if ((tp->t_state & TS_ISOPEN) == 0) { - /* adjust as usual */ - uio_setresid(uio, (uio_resid(uio) + cc)); - error = EIO; - goto out; - } - } - if (cc > 0) { - cc = b_to_q((u_char *)cp, cc, &tp->t_canq); - /* - * XXX we don't guarantee that the canq size - * is >= TTYHOG, so the above b_to_q() may - * leave some bytes uncopied. However, space - * is guaranteed for the null terminator if - * we don't fail here since (TTYHOG - 1) is - * not a multiple of CBSIZE. - */ - if (cc > 0) - break; - } - } - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - (void) putc(0, &tp->t_canq); - ttwakeup(tp); - wakeup(TSA_PTS_READ(tp)); - goto out; - } - while (uio_resid(uio) > 0 || cc > 0) { - if (cc == 0) { - cc = MIN(uio_resid(uio), BUFSIZ); - cp = locbuf; - error = uiomove((caddr_t)cp, cc, uio); - if (error) - goto out; - /* check again for safety */ - if ((tp->t_state & TS_ISOPEN) == 0) { - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - error = EIO; - goto out; - } - } - while (cc > 0) { - if ((tp->t_rawq.c_cc + tp->t_canq.c_cc) >= TTYHOG - 2 && - (tp->t_canq.c_cc > 0 || !(tp->t_lflag&ICANON))) { - wakeup(TSA_HUP_OR_INPUT(tp)); - goto block; - } - (*linesw[tp->t_line].l_rint)(*cp++, tp); - wcnt++; - cc--; - } - cc = 0; - } - -out: - tty_unlock(tp); - return (error); - -block: - /* - * Come here to wait for slave to open, for space - * in outq, or space in rawq, or an empty canq. - */ - if ((tp->t_state & TS_CONNECTED) == 0) { - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - error = EIO; - goto out; - } - if (flag & IO_NDELAY) { - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - if (wcnt == 0) - error = EWOULDBLOCK; - goto out; - } - error = ttysleep(tp, TSA_PTC_WRITE(tp), TTOPRI | PCATCH, "ptmx_out", 0); - if (error) { - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - goto out; - } - goto again; -} - - -FREE_BSDSTATIC int -cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) -{ - struct tty *tp; - struct ptmx_ioctl *pti; - u_char *cc; - int stop, error = 0; - int allow_ext_ioctl = 1; - - pti = ptmx_get_ioctl(minor(dev), 0); - - tp = pti->pt_tty; - tty_lock(tp); - - cc = tp->t_cc; - - /* - * Do not permit extended ioctls on the master side of the pty unless - * the slave side has been successfully opened and initialized. - */ - if (cdevsw[major(dev)].d_open == ptmx_open && ISSET(tp->t_state, TS_IOCTL_NOT_OK)) - allow_ext_ioctl = 0; - - /* - * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG. - * ttywflush(tp) will hang if there are characters in the outq. - */ - if (cmd == TIOCEXT && allow_ext_ioctl) { - /* - * When the EXTPROC bit is being toggled, we need - * to send an TIOCPKT_IOCTL if the packet driver - * is turned on. - */ - if (*(int *)data) { - if (pti->pt_flags & PF_PKT) { - pti->pt_send |= TIOCPKT_IOCTL; - ptmx_wakeup(tp, FREAD); - } - tp->t_lflag |= EXTPROC; - } else { - if ((tp->t_lflag & EXTPROC) && - (pti->pt_flags & PF_PKT)) { - pti->pt_send |= TIOCPKT_IOCTL; - ptmx_wakeup(tp, FREAD); - } - tp->t_lflag &= ~EXTPROC; - } - goto out; - } else - if (cdevsw[major(dev)].d_open == ptmx_open) { - switch (cmd) { - - case TIOCGPGRP: - /* - * We aviod calling ttioctl on the controller since, - * in that case, tp must be the controlling terminal. - */ - *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0; - goto out; - - case TIOCPKT: - if (*(int *)data) { - if (pti->pt_flags & PF_UCNTL) { - error = EINVAL; - goto out; - } - pti->pt_flags |= PF_PKT; - } else - pti->pt_flags &= ~PF_PKT; - goto out; - - case TIOCUCNTL: - if (*(int *)data) { - if (pti->pt_flags & PF_PKT) { - error = EINVAL; - goto out; - } - pti->pt_flags |= PF_UCNTL; - } else - pti->pt_flags &= ~PF_UCNTL; - goto out; - - case TIOCREMOTE: - if (*(int *)data) - pti->pt_flags |= PF_REMOTE; - else - pti->pt_flags &= ~PF_REMOTE; - ttyflush(tp, FREAD|FWRITE); - goto out; - - case TIOCSETP: - case TIOCSETN: - case TIOCSETD: - case TIOCSETA_32: - case TIOCSETAW_32: - case TIOCSETAF_32: - case TIOCSETA_64: - case TIOCSETAW_64: - case TIOCSETAF_64: - ndflush(&tp->t_outq, tp->t_outq.c_cc); - break; - - case TIOCSIG: - if (*(unsigned int *)data >= NSIG || - *(unsigned int *)data == 0) { - error = EINVAL; - goto out; - } - if ((tp->t_lflag&NOFLSH) == 0) - ttyflush(tp, FREAD|FWRITE); - if ((*(unsigned int *)data == SIGINFO) && - ((tp->t_lflag&NOKERNINFO) == 0)) - ttyinfo_locked(tp); - /* - * SAFE: All callers drop the lock on return and - * SAFE: the linesw[] will short circut this call - * SAFE: if the ioctl() is eaten before the lower - * SAFE: level code gets to see it. - */ - tty_unlock(tp); - tty_pgsignal(tp, *(unsigned int *)data, 1); - tty_lock(tp); - goto out; - - case TIOCPTYGRANT: /* grantpt(3) */ - /* - * Change the uid of the slave to that of the calling - * thread, change the gid of the slave to GID_TTY, - * change the mode to 0620 (rw--w----). - */ - { - error = _devfs_setattr(pti->pt_devhandle, 0620, kauth_getuid(), GID_TTY); -#if CONFIG_MACF - if (!error) { - tty_unlock(tp); - mac_pty_notify_grant(p, tp, dev, NULL); - tty_lock(tp); - } -#endif - goto out; - } - - case TIOCPTYGNAME: /* ptsname(3) */ - /* - * Report the name of the slave device in *data - * (128 bytes max.). Use the same template string - * used for calling devfs_make_node() to create it. - */ - snprintf(data, 128, "/dev/" PTSD_TEMPLATE, minor(dev)); - error = 0; - goto out; - - case TIOCPTYUNLK: /* unlockpt(3) */ - /* - * Unlock the slave device so that it can be opened. - */ - pti->pt_flags |= PF_UNLOCKED; - error = 0; - goto out; - } - - /* - * Fail all other calls; pty masters are not serial devices; - * we only pretend they are when the slave side of the pty is - * already open. - */ - if (!allow_ext_ioctl) { - error = ENOTTY; - goto out; - } - } - error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); - if (error == ENOTTY) { - error = ttioctl_locked(tp, cmd, data, flag, p); - if (error == ENOTTY) { - if (pti->pt_flags & PF_UCNTL && (cmd & ~0xff) == UIOCCMD(0)) { - /* Process the UIOCMD ioctl group */ - if (cmd & 0xff) { - pti->pt_ucntl = (u_char)cmd; - ptmx_wakeup(tp, FREAD); - } - error = 0; - goto out; - } else if (cmd == TIOCSBRK || cmd == TIOCCBRK) { - /* - * POSIX conformance; rdar://3936338 - * - * Clear ENOTTY in the case of setting or - * clearing a break failing because pty's - * don't support break like real serial - * ports. - */ - error = 0; - goto out; - } - } - } - - /* - * If external processing and packet mode send ioctl packet. - */ - if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) { - switch(cmd) { - case TIOCSETA_32: - case TIOCSETAW_32: - case TIOCSETAF_32: - case TIOCSETA_64: - case TIOCSETAW_64: - case TIOCSETAF_64: - case TIOCSETP: - case TIOCSETN: - case TIOCSETC: - case TIOCSLTC: - case TIOCLBIS: - case TIOCLBIC: - case TIOCLSET: - pti->pt_send |= TIOCPKT_IOCTL; - ptmx_wakeup(tp, FREAD); - default: - break; - } - } - stop = (tp->t_iflag & IXON) && CCEQ(cc[VSTOP], CTRL('s')) - && CCEQ(cc[VSTART], CTRL('q')); - if (pti->pt_flags & PF_NOSTOP) { - if (stop) { - pti->pt_send &= ~TIOCPKT_NOSTOP; - pti->pt_send |= TIOCPKT_DOSTOP; - pti->pt_flags &= ~PF_NOSTOP; - ptmx_wakeup(tp, FREAD); - } - } else { - if (!stop) { - pti->pt_send &= ~TIOCPKT_DOSTOP; - pti->pt_send |= TIOCPKT_NOSTOP; - pti->pt_flags |= PF_NOSTOP; - ptmx_wakeup(tp, FREAD); - } - } -out: - tty_unlock(tp); - return (error); -} /* * kqueue support. @@ -1651,7 +615,7 @@ ptsd_kqfilter(dev_t dev, struct knote *kn) int retval = 0; /* make sure we're talking about the right device type */ - if (cdevsw[major(dev)].d_open != ptsd_open) { + if (cdevsw[major(dev)].d_open != ptsopen) { return (EINVAL); } @@ -1694,14 +658,14 @@ ptsd_kqfilter(dev_t dev, struct knote *kn) * zero to make the final detach passively successful. */ static void -ptsd_revoke_knotes(dev_t dev, struct tty *tp) +ptsd_revoke_knotes(int minor, struct tty *tp) { struct klist *list; struct knote *kn, *tkn; /* (Hold and drop the right locks in the right order.) */ - ptsd_kevent_mtx_lock(minor(dev)); + ptsd_kevent_mtx_lock(minor); tty_lock(tp); list = &tp->t_rsel.si_note; @@ -1713,14 +677,14 @@ ptsd_revoke_knotes(dev_t dev, struct tty *tp) kn->kn_hook = PTSD_KNOTE_REVOKED; tty_unlock(tp); - ptsd_kevent_mtx_unlock(minor(dev)); + ptsd_kevent_mtx_unlock(minor); tty_lock(tp); ttwakeup(tp); ttwwakeup(tp); tty_unlock(tp); - ptsd_kevent_mtx_lock(minor(dev)); + ptsd_kevent_mtx_lock(minor); tty_lock(tp); list = &tp->t_rsel.si_note; @@ -1736,5 +700,5 @@ ptsd_revoke_knotes(dev_t dev, struct tty *tp) } tty_unlock(tp); - ptsd_kevent_mtx_unlock(minor(dev)); + ptsd_kevent_mtx_unlock(minor); } diff --git a/bsd/kern/tty_pty.c b/bsd/kern/tty_pty.c index 05931c126..f4bb4dac6 100644 --- a/bsd/kern/tty_pty.c +++ b/bsd/kern/tty_pty.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1997-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -80,34 +80,11 @@ #include #include -#define d_devtotty_t struct tty ** - -#ifdef d_stop_t -#undef d_stop_t +#if CONFIG_MACF +#include #endif -typedef void d_stop_t(struct tty *tp, int rw); - -/* XXX function should be removed??? */ -int pty_init(int n_ptys); - -/* XXX should be a devfs function */ -int _devfs_setattr(void * handle, unsigned short mode, uid_t uid, gid_t gid); - -static void ptsstart(struct tty *tp); -static void ptcwakeup(struct tty *tp, int flag); -__XNU_PRIVATE_EXTERN d_open_t ptsopen; -__XNU_PRIVATE_EXTERN d_close_t ptsclose; -__XNU_PRIVATE_EXTERN d_read_t ptsread; -__XNU_PRIVATE_EXTERN d_write_t ptswrite; -__XNU_PRIVATE_EXTERN d_ioctl_t ptyioctl; -__XNU_PRIVATE_EXTERN d_stop_t ptsstop; -__XNU_PRIVATE_EXTERN d_devtotty_t ptydevtotty; -__XNU_PRIVATE_EXTERN d_open_t ptcopen; -__XNU_PRIVATE_EXTERN d_close_t ptcclose; -__XNU_PRIVATE_EXTERN d_read_t ptcread; -__XNU_PRIVATE_EXTERN d_write_t ptcwrite; -__XNU_PRIVATE_EXTERN d_select_t ptcselect; +#include "tty_dev.h" #if NPTY == 1 #undef NPTY @@ -115,868 +92,83 @@ __XNU_PRIVATE_EXTERN d_select_t ptcselect; #warning You have only one pty defined, redefining to 32. #endif -#define BUFSIZ 100 /* Chunk size iomoved to/from user */ +#define PTY_MAJOR 5 +#define TTY_MAJOR 4 /* * pts == /dev/tty[pqrsPQRS][0123456789abcdefghijklmnopqrstuv] * ptc == /dev/pty[pqrsPQRS][0123456789abcdefghijklmnopqrstuv] */ -/* All references to have been changed to indirections in the file */ -__private_extern__ struct tty *pt_tty[NPTY] = { NULL }; +static struct ptmx_ioctl pt_ioctl[NPTY]; -static struct pt_ioctl { - int pt_flags; - struct selinfo pt_selr, pt_selw; - u_char pt_send; - u_char pt_ucntl; - void *pt_devhandle; /* slave device handle for grantpt() */ -} pt_ioctl[NPTY]; /* XXX */ -static int npty = NPTY; /* for pstat -t */ - -#define PF_PKT 0x08 /* packet mode */ -#define PF_STOPPED 0x10 /* user told stopped */ -#define PF_REMOTE 0x20 /* remote and flow controlled input */ -#define PF_NOSTOP 0x40 -#define PF_UCNTL 0x80 /* user control mode */ +int pty_init(int n_ptys); #ifndef DEVFS int pty_init(__unused int n_ptys) { - return 0; + return 0; } -#else +#else // DEVFS #include #define START_CHAR 'p' #define HEX_BASE 16 -int -pty_init(int n_ptys) -{ - int i; - int j; - /* create the pseudo tty device nodes */ - for (j = 0; j < 10; j++) { - for (i = 0; i < HEX_BASE; i++) { - int m = j * HEX_BASE + i; - if (m == n_ptys) - goto done; - pt_ioctl[m].pt_devhandle = devfs_make_node(makedev(4, m), - DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, - "tty%c%x", j + START_CHAR, i); - (void)devfs_make_node(makedev(5, m), - DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, - "pty%c%x", j + START_CHAR, i); - } - } - done: - return (0); -} -#endif /* DEVFS */ +static struct tty_dev_t _pty_driver; -__private_extern__ int -ptsopen(dev_t dev, int flag, __unused int devtype, __unused struct proc *p) +static struct ptmx_ioctl * +pty_get_ioctl(int minor, int open_flag) { - struct tty *tp; - int error; - - /* - * You will see this sort of code coming up in diffs later both - * the ttymalloc and the tp indirection. - */ - if (minor(dev) >= npty) { - error = ENXIO; - goto err; + if (minor >= NPTY) { + return NULL; } - if (!pt_tty[minor(dev)]) { - /* - * If we can't allocate a new one, act as if we had run out - * of device nodes. - */ - if ((tp = pt_tty[minor(dev)] = ttymalloc()) == NULL) { - error = ENXIO; - goto err; + struct ptmx_ioctl *pti = &pt_ioctl[minor]; + if (open_flag & (PF_OPEN_M|PF_OPEN_S)) { + if (!pti->pt_tty) { + pti->pt_tty = ttymalloc(); } - } else - tp = pt_tty[minor(dev)]; - - tty_lock(tp); - - if ((tp->t_state & TS_ISOPEN) == 0) { - termioschars(&tp->t_termios); /* Set up default chars */ - tp->t_iflag = TTYDEF_IFLAG; - tp->t_oflag = TTYDEF_OFLAG; - tp->t_lflag = TTYDEF_LFLAG; - tp->t_cflag = TTYDEF_CFLAG; - tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; - ttsetwater(tp); /* would be done in xxparam() */ - } else if (tp->t_state&TS_XCLUDE && suser(kauth_cred_get(), NULL)) { - error = EBUSY; - goto out; - } - if (tp->t_oproc) /* Ctrlr still around. */ - (void)(*linesw[tp->t_line].l_modem)(tp, 1); - while ((tp->t_state & TS_CARR_ON) == 0) { - if (flag&FNONBLOCK) - break; - error = ttysleep(tp, TSA_CARR_ON(tp), TTIPRI | PCATCH, - "ptsopn", 0); - if (error) - goto out; - } - error = (*linesw[tp->t_line].l_open)(dev, tp); - if (error == 0) - ptcwakeup(tp, FREAD|FWRITE); - -out: - tty_unlock(tp); -err: - return (error); -} - -__private_extern__ int -ptsclose(dev_t dev, int flag, __unused int mode, __unused proc_t p) -{ - struct tty *tp; - int err; - - /* - * This is temporary until the VSX conformance tests - * are fixed. They are hanging with a deadlock - * where close(pts) will not complete without t_timeout set - */ -#define FIX_VSX_HANG 1 -#ifdef FIX_VSX_HANG - int save_timeout; -#endif - - tp = pt_tty[minor(dev)]; - tty_lock(tp); -#ifdef FIX_VSX_HANG - save_timeout = tp->t_timeout; - tp->t_timeout = 60; -#endif - err = (*linesw[tp->t_line].l_close)(tp, flag); - ptsstop(tp, FREAD|FWRITE); - (void) ttyclose(tp); -#ifdef FIX_VSX_HANG - tp->t_timeout = save_timeout; -#endif - tty_unlock(tp); - return (err); -} - -__private_extern__ int -ptsread(dev_t dev, struct uio *uio, int flag) -{ - struct proc *p = current_proc(); - struct tty *tp = pt_tty[minor(dev)]; - struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; - int error = 0; - struct uthread *ut; - struct pgrp *pg; - - tty_lock(tp); - - ut = (struct uthread *)get_bsdthread_info(current_thread()); -again: - if (pti->pt_flags & PF_REMOTE) { - while (isbackground(p, tp)) { - if ((p->p_sigignore & sigmask(SIGTTIN)) || - (ut->uu_sigmask & sigmask(SIGTTIN)) || - p->p_lflag & P_LPPWAIT) { - error = EIO; - goto out; - } - - - pg = proc_pgrp(p); - if (pg == PGRP_NULL) { - error = EIO; - goto out; - } - /* - * SAFE: We about to drop the lock ourselves by - * SAFE: erroring out or sleeping anyway. - */ - tty_unlock(tp); - if (pg->pg_jobc == 0) { - pg_rele(pg); - tty_lock(tp); - error = EIO; - goto out; - } - pgsignal(pg, SIGTTIN, 1); - pg_rele(pg); - tty_lock(tp); - - error = ttysleep(tp, &ptsread, TTIPRI | PCATCH | PTTYBLOCK, "ptsbg", - hz); - if (error) - goto out; - } - if (tp->t_canq.c_cc == 0) { - if (flag & IO_NDELAY) { - error = EWOULDBLOCK; - goto out; - } - error = ttysleep(tp, TSA_PTS_READ(tp), TTIPRI | PCATCH, - "ptsin", 0); - if (error) - goto out; - goto again; + if (!pti->pt_tty) { + return NULL; } - while (tp->t_canq.c_cc > 1 && uio_resid(uio) > 0) { - int cc; - char buf[BUFSIZ]; - - cc = min(uio_resid(uio), BUFSIZ); - // Don't copy the very last byte - cc = min(cc, tp->t_canq.c_cc - 1); - cc = q_to_b(&tp->t_canq, (u_char *)buf, cc); - error = uiomove(buf, cc, uio); - if (error) - break; - } - if (tp->t_canq.c_cc == 1) - (void) getc(&tp->t_canq); - if (tp->t_canq.c_cc) - goto out; - } else - if (tp->t_oproc) - error = (*linesw[tp->t_line].l_read)(tp, uio, flag); - ptcwakeup(tp, FWRITE); -out: - tty_unlock(tp); - return (error); -} - -/* - * Write to pseudo-tty. - * Wakeups of controlling tty will happen - * indirectly, when tty driver calls ptsstart. - */ -__private_extern__ int -ptswrite(dev_t dev, struct uio *uio, int flag) -{ - struct tty *tp; - int error; - - tp = pt_tty[minor(dev)]; - - tty_lock(tp); - - if (tp->t_oproc == 0) - error = EIO; - else - error = (*linesw[tp->t_line].l_write)(tp, uio, flag); - - tty_unlock(tp); - - return (error); -} - -/* - * Start output on pseudo-tty. - * Wake up process selecting or sleeping for input from controlling tty. - * - * t_oproc for this driver; called from within the line discipline - * - * Locks: Assumes tp is locked on entry, remains locked on exit - */ -static void -ptsstart(struct tty *tp) -{ - struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; - - if (tp->t_state & TS_TTSTOP) - goto out; - if (pti->pt_flags & PF_STOPPED) { - pti->pt_flags &= ~PF_STOPPED; - pti->pt_send = TIOCPKT_START; - } - ptcwakeup(tp, FREAD); -out: - return; -} - -/* - * Locks: Assumes tty_lock() is held over this call. - */ -static void -ptcwakeup(struct tty *tp, int flag) -{ - struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; - - if (flag & FREAD) { - selwakeup(&pti->pt_selr); - wakeup(TSA_PTC_READ(tp)); - } - if (flag & FWRITE) { - selwakeup(&pti->pt_selw); - wakeup(TSA_PTC_WRITE(tp)); } + return pti; } -__private_extern__ int -ptcopen(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) +static int +pty_get_name(int minor, char *buffer, size_t size) { - struct tty *tp; - struct pt_ioctl *pti; - int error = 0; - - if (minor(dev) >= npty) { - error = ENXIO; - goto out; - } - if(!pt_tty[minor(dev)]) { - tp = pt_tty[minor(dev)] = ttymalloc(); - } else - tp = pt_tty[minor(dev)]; - - tty_lock(tp); - - /* If master is open OR slave is still draining, pty is still busy */ - if (tp->t_oproc || (tp->t_state & TS_ISOPEN)) { - error = EBUSY; - } else { - tp->t_oproc = ptsstart; - CLR(tp->t_state, TS_ZOMBIE); -#ifdef sun4c - tp->t_stop = ptsstop; -#endif - (void)(*linesw[tp->t_line].l_modem)(tp, 1); - tp->t_lflag &= ~EXTPROC; - pti = &pt_ioctl[minor(dev)]; - pti->pt_flags = 0; - pti->pt_send = 0; - pti->pt_ucntl = 0; - } - - tty_unlock(tp); - -out: - return (error); + return snprintf(buffer, size, "/dev/tty%c%x", + START_CHAR + (minor / HEX_BASE), + minor % HEX_BASE); } -__private_extern__ int -ptcclose(dev_t dev, __unused int flags, __unused int fmt, __unused proc_t p) +int +pty_init(int n_ptys) { - struct tty *tp = pt_tty[minor(dev)]; - - tty_lock(tp); - - (void)(*linesw[tp->t_line].l_modem)(tp, 0); - - /* - * XXX MDMBUF makes no sense for ptys but would inhibit the above - * l_modem(). CLOCAL makes sense but isn't supported. Special - * l_modem()s that ignore carrier drop make no sense for ptys but - * may be in use because other parts of the line discipline make - * sense for ptys. Recover by doing everything that a normal - * ttymodem() would have done except for sending a SIGHUP. - */ - if (tp->t_state & TS_ISOPEN) { - tp->t_state &= ~(TS_CARR_ON | TS_CONNECTED); - tp->t_state |= TS_ZOMBIE; - ttyflush(tp, FREAD | FWRITE); - } - - tp->t_oproc = 0; /* mark closed */ - - tty_unlock(tp); - + int i; + int j; + + /* create the pseudo tty device nodes */ + for (j = 0; j < 10; j++) { + for (i = 0; i < HEX_BASE; i++) { + int m = j * HEX_BASE + i; + if (m == n_ptys) + goto done; + pt_ioctl[m].pt_devhandle = devfs_make_node(makedev(TTY_MAJOR, m), + DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, + "tty%c%x", j + START_CHAR, i); + (void)devfs_make_node(makedev(PTY_MAJOR, m), + DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, + "pty%c%x", j + START_CHAR, i); + } + } +done: + _pty_driver.master = PTY_MAJOR; + _pty_driver.slave = TTY_MAJOR; + _pty_driver.open_reset = 1; + _pty_driver.open = &pty_get_ioctl; + _pty_driver.name = &pty_get_name; + tty_dev_register(&_pty_driver); return (0); } - -__private_extern__ int -ptcread(dev_t dev, struct uio *uio, int flag) -{ - struct tty *tp = pt_tty[minor(dev)]; - struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; - char buf[BUFSIZ]; - int error = 0, cc; - - tty_lock(tp); - - /* - * We want to block until the slave - * is open, and there's something to read; - * but if we lost the slave or we're NBIO, - * then return the appropriate error instead. - */ - for (;;) { - if (tp->t_state&TS_ISOPEN) { - if (pti->pt_flags&PF_PKT && pti->pt_send) { - error = ureadc((int)pti->pt_send, uio); - if (error) - goto out; - if (pti->pt_send & TIOCPKT_IOCTL) { - cc = min(uio_resid(uio), - sizeof(tp->t_termios)); - uiomove((caddr_t)&tp->t_termios, cc, - uio); - } - pti->pt_send = 0; - goto out; - } - if (pti->pt_flags&PF_UCNTL && pti->pt_ucntl) { - error = ureadc((int)pti->pt_ucntl, uio); - if (error) - goto out; - pti->pt_ucntl = 0; - goto out; - } - if (tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) - break; - } - if ((tp->t_state & TS_CONNECTED) == 0) - goto out; /* EOF */ - if (flag & IO_NDELAY) { - error = EWOULDBLOCK; - goto out; - } - error = ttysleep(tp, TSA_PTC_READ(tp), TTIPRI | PCATCH, "ptcin", 0); - if (error) - goto out; - } - if (pti->pt_flags & (PF_PKT|PF_UCNTL)) - error = ureadc(0, uio); - while (uio_resid(uio) > 0 && error == 0) { - cc = q_to_b(&tp->t_outq, (u_char *)buf, min(uio_resid(uio), BUFSIZ)); - if (cc <= 0) - break; - error = uiomove(buf, cc, uio); - } - (*linesw[tp->t_line].l_start)(tp); - -out: - tty_unlock(tp); - - return (error); -} - -/* - * Line discipline callback - * - * Locks: tty_lock() is assumed held on entry and exit. - */ -__private_extern__ void -ptsstop(struct tty *tp, int flush) -{ - struct pt_ioctl *pti; - int flag; - - pti = &pt_ioctl[minor(tp->t_dev)]; - - /* note: FLUSHREAD and FLUSHWRITE already ok */ - if (flush == 0) { - flush = TIOCPKT_STOP; - pti->pt_flags |= PF_STOPPED; - } else - pti->pt_flags &= ~PF_STOPPED; - pti->pt_send |= flush; - /* change of perspective */ - flag = 0; - if (flush & FREAD) - flag |= FWRITE; - if (flush & FWRITE) - flag |= FREAD; - ptcwakeup(tp, flag); -} - -__private_extern__ int -ptcselect(dev_t dev, int rw, void *wql, struct proc *p) -{ - struct tty *tp = pt_tty[minor(dev)]; - struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; - int retval = 0; - - tty_lock(tp); - - if ((tp->t_state & TS_CONNECTED) == 0) { - retval = 1; - goto out; - } - switch (rw) { - - case FREAD: - /* - * Need to block timeouts (ttrstart). - */ - if ((tp->t_state&TS_ISOPEN) && - tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) { - retval = 1; - goto out; - } - /* FALLTHROUGH */ - - case 0: /* exceptional */ - if ((tp->t_state&TS_ISOPEN) && - ((pti->pt_flags&PF_PKT && pti->pt_send) || - (pti->pt_flags&PF_UCNTL && pti->pt_ucntl))) { - retval = 1; - goto out; - } - selrecord(p, &pti->pt_selr, wql); - break; - - - case FWRITE: - if (tp->t_state&TS_ISOPEN) { - if (pti->pt_flags & PF_REMOTE) { - if (tp->t_canq.c_cc == 0) { - retval = 1; - goto out; - } - } else { - if (tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2) { - retval = 1; - goto out; - } - if (tp->t_canq.c_cc == 0 && (tp->t_lflag&ICANON)) { - retval = 1; - goto out; - } - } - } - selrecord(p, &pti->pt_selw, wql); - break; - - } -out: - tty_unlock(tp); - - return (retval); -} - -__private_extern__ int -ptcwrite(dev_t dev, struct uio *uio, int flag) -{ - struct tty *tp = pt_tty[minor(dev)]; - u_char *cp = NULL; - int cc = 0; - u_char locbuf[BUFSIZ]; - int wcnt = 0; - struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; - int error = 0; - - tty_lock(tp); - -again: - if ((tp->t_state&TS_ISOPEN) == 0) - goto block; - if (pti->pt_flags & PF_REMOTE) { - if (tp->t_canq.c_cc) - goto block; - while ((uio_resid(uio) > 0 || cc > 0) && - tp->t_canq.c_cc < TTYHOG - 1) { - if (cc == 0) { - cc = min(uio_resid(uio), BUFSIZ); - cc = min(cc, TTYHOG - 1 - tp->t_canq.c_cc); - cp = locbuf; - error = uiomove((caddr_t)cp, cc, uio); - if (error) - goto out; - /* check again for safety */ - if ((tp->t_state & TS_ISOPEN) == 0) { - /* adjust as usual */ - uio_setresid(uio, (uio_resid(uio) + cc)); - error = EIO; - goto out; - } - } - if (cc > 0) { - cc = b_to_q((u_char *)cp, cc, &tp->t_canq); - /* - * XXX we don't guarantee that the canq size - * is >= TTYHOG, so the above b_to_q() may - * leave some bytes uncopied. However, space - * is guaranteed for the null terminator if - * we don't fail here since (TTYHOG - 1) is - * not a multiple of CBSIZE. - */ - if (cc > 0) - break; - } - } - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - (void) putc(0, &tp->t_canq); - ttwakeup(tp); - wakeup(TSA_PTS_READ(tp)); - goto out; - } - while (uio_resid(uio) > 0 || cc > 0) { - if (cc == 0) { - cc = min(uio_resid(uio), BUFSIZ); - cp = locbuf; - error = uiomove((caddr_t)cp, cc, uio); - if (error) - goto out; - /* check again for safety */ - if ((tp->t_state & TS_ISOPEN) == 0) { - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - error = EIO; - goto out; - } - } - while (cc > 0) { - if ((tp->t_rawq.c_cc + tp->t_canq.c_cc) >= TTYHOG - 2 && - (tp->t_canq.c_cc > 0 || !(tp->t_lflag&ICANON))) { - wakeup(TSA_HUP_OR_INPUT(tp)); - goto block; - } - (*linesw[tp->t_line].l_rint)(*cp++, tp); - wcnt++; - cc--; - } - cc = 0; - } -out: - tty_unlock(tp); - - return (error); - -block: - /* - * Come here to wait for slave to open, for space - * in outq, or space in rawq, or an empty canq. - */ - if ((tp->t_state & TS_CONNECTED) == 0) { - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - error = EIO; - goto out; - } - if (flag & IO_NDELAY) { - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - if (wcnt == 0) - error = EWOULDBLOCK; - goto out; - } - error = ttysleep(tp, TSA_PTC_WRITE(tp), TTOPRI | PCATCH, "ptcout", 0); - if (error) { - /* adjust for data copied in but not written */ - uio_setresid(uio, (uio_resid(uio) + cc)); - goto out; - } - goto again; -} - -__private_extern__ int -ptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) -{ - struct tty *tp = pt_tty[minor(dev)]; - struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; - u_char *cc = tp->t_cc; - int stop, error = 0; - - tty_lock(tp); - - /* - * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG. - * ttywflush(tp) will hang if there are characters in the outq. - */ - if (cmd == TIOCEXT) { - /* - * When the EXTPROC bit is being toggled, we need - * to send an TIOCPKT_IOCTL if the packet driver - * is turned on. - */ - if (*(int *)data) { - if (pti->pt_flags & PF_PKT) { - pti->pt_send |= TIOCPKT_IOCTL; - ptcwakeup(tp, FREAD); - } - tp->t_lflag |= EXTPROC; - } else { - if ((tp->t_lflag & EXTPROC) && - (pti->pt_flags & PF_PKT)) { - pti->pt_send |= TIOCPKT_IOCTL; - ptcwakeup(tp, FREAD); - } - tp->t_lflag &= ~EXTPROC; - } - goto out; - } else - if (cdevsw[major(dev)].d_open == ptcopen) - switch (cmd) { - - case TIOCGPGRP: - /* - * We aviod calling ttioctl on the controller since, - * in that case, tp must be the controlling terminal. - */ - *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0; - goto out; - - case TIOCPKT: - if (*(int *)data) { - if (pti->pt_flags & PF_UCNTL) { - error = EINVAL; - goto out; - } - pti->pt_flags |= PF_PKT; - } else - pti->pt_flags &= ~PF_PKT; - goto out; - - case TIOCUCNTL: - if (*(int *)data) { - if (pti->pt_flags & PF_PKT) { - error = EINVAL; - goto out; - } - pti->pt_flags |= PF_UCNTL; - } else - pti->pt_flags &= ~PF_UCNTL; - goto out; - - case TIOCREMOTE: - if (*(int *)data) - pti->pt_flags |= PF_REMOTE; - else - pti->pt_flags &= ~PF_REMOTE; - ttyflush(tp, FREAD|FWRITE); - goto out; - - case TIOCSETP: - case TIOCSETN: - case TIOCSETD: - case TIOCSETA_32: - case TIOCSETAW_32: - case TIOCSETAF_32: - case TIOCSETA_64: - case TIOCSETAW_64: - case TIOCSETAF_64: - ndflush(&tp->t_outq, tp->t_outq.c_cc); - break; - - case TIOCSIG: - if (*(unsigned int *)data >= NSIG || - *(unsigned int *)data == 0) { - error = EINVAL; - goto out; - } - if ((tp->t_lflag&NOFLSH) == 0) - ttyflush(tp, FREAD|FWRITE); - if ((*(unsigned int *)data == SIGINFO) && - ((tp->t_lflag&NOKERNINFO) == 0)) - ttyinfo_locked(tp); - /* - * SAFE: All callers drop the lock on return and - * SAFE: the linesw[] will short circut this call - * SAFE: if the ioctl() is eaten before the lower - * SAFE: level code gets to see it. - */ - tty_unlock(tp); - tty_pgsignal(tp, *(unsigned int *)data, 1); - tty_lock(tp); - goto out; - - case TIOCPTYGRANT: /* grantpt(3) */ - /* - * Change the uid of the slave to that of the calling - * thread, change the gid of the slave to GID_TTY, - * change the mode to 0620 (rw--w----). - */ - { - _devfs_setattr(pti->pt_devhandle, 0620, kauth_getuid(), GID_TTY); - goto out; - } - - case TIOCPTYGNAME: /* ptsname(3) */ - /* - * Report the name of the slave device in *data - * (128 bytes max.). Use the same derivation method - * used for calling devfs_make_node() to create it. - */ - snprintf(data, 128, "/dev/tty%c%x", - START_CHAR + (minor(dev) / HEX_BASE), - minor(dev) % HEX_BASE); - error = 0; - goto out; - - case TIOCPTYUNLK: /* unlockpt(3) */ - /* - * Unlock the slave device so that it can be opened. - */ - error = 0; - goto out; - } - error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); - if (error == ENOTTY) { - error = ttioctl_locked(tp, cmd, data, flag, p); - if (error == ENOTTY) { - if (pti->pt_flags & PF_UCNTL && (cmd & ~0xff) == UIOCCMD(0)) { - /* Process the UIOCMD ioctl group */ - if (cmd & 0xff) { - pti->pt_ucntl = (u_char)cmd; - ptcwakeup(tp, FREAD); - } - error = 0; - goto out; - } else if (cmd == TIOCSBRK || cmd == TIOCCBRK) { - /* - * POSIX conformance; rdar://3936338 - * - * Clear ENOTTY in the case of setting or - * clearing a break failing because pty's - * don't support break like real serial - * ports. - */ - error = 0; - goto out; - } - } - } - - /* - * If external processing and packet mode send ioctl packet. - */ - if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) { - switch(cmd) { - case TIOCSETA_32: - case TIOCSETAW_32: - case TIOCSETAF_32: - case TIOCSETA_64: - case TIOCSETAW_64: - case TIOCSETAF_64: - case TIOCSETP: - case TIOCSETN: - case TIOCSETC: - case TIOCSLTC: - case TIOCLBIS: - case TIOCLBIC: - case TIOCLSET: - pti->pt_send |= TIOCPKT_IOCTL; - ptcwakeup(tp, FREAD); - default: - break; - } - } - stop = (tp->t_iflag & IXON) && CCEQ(cc[VSTOP], CTRL('s')) - && CCEQ(cc[VSTART], CTRL('q')); - if (pti->pt_flags & PF_NOSTOP) { - if (stop) { - pti->pt_send &= ~TIOCPKT_NOSTOP; - pti->pt_send |= TIOCPKT_DOSTOP; - pti->pt_flags &= ~PF_NOSTOP; - ptcwakeup(tp, FREAD); - } - } else { - if (!stop) { - pti->pt_send &= ~TIOCPKT_DOSTOP; - pti->pt_send |= TIOCPKT_NOSTOP; - pti->pt_flags |= PF_NOSTOP; - ptcwakeup(tp, FREAD); - } - } -out: - tty_unlock(tp); - - return (error); -} +#endif // DEVFS diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c index 2916f3e08..89e61f1a6 100644 --- a/bsd/kern/ubc_subr.c +++ b/bsd/kern/ubc_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Apple Inc. All rights reserved. + * Copyright (c) 1999-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include #include @@ -68,10 +70,8 @@ #include #include -#include -#include - #include +#include /* XXX These should be in a BSD accessible Mach header, but aren't. */ extern kern_return_t memory_object_pages_resident(memory_object_control_t, @@ -109,7 +109,7 @@ static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *); static void ubc_cs_free(struct ubc_info *uip); struct zone *ubc_info_zone; - +static uint32_t cs_blob_generation_count = 1; /* * CODESIGNING @@ -118,6 +118,11 @@ struct zone *ubc_info_zone; extern int cs_debug; +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K ((1<hashSize != SHA1_RESULTLEN) return EBADEXEC; - if (cd->pageSize != PAGE_SHIFT) + if (cd->pageSize != PAGE_SHIFT_4K) return EBADEXEC; if (cd->hashType != CS_HASHTYPE_SHA1) return EBADEXEC; @@ -389,6 +382,16 @@ cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length) return EBADEXEC; } + /* team identifier is NULL terminated string */ + if (ntohl(cd->version) >= CS_SUPPORTSTEAMID && ntohl(cd->teamOffset)) { + if (length < ntohl(cd->teamOffset)) + return EBADEXEC; + + uint8_t *ptr = (uint8_t *)cd + ntohl(cd->teamOffset); + if (memchr(ptr, 0, length - ntohl(cd->teamOffset)) == NULL) + return EBADEXEC; + } + return 0; } @@ -533,7 +536,7 @@ cs_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t m } -static const CS_GenericBlob * +const CS_GenericBlob * cs_find_blob(struct cs_blob *csblob, uint32_t type, uint32_t magic) { if ((csblob->csb_flags & CS_VALID) == 0) @@ -917,39 +920,124 @@ ubc_info_deallocate(struct ubc_info *uip) ubc_info_free(uip); } +/* + * This should be public but currently it is only used below so we + * defer making that change. + */ +static errno_t mach_to_bsd_errno(kern_return_t mach_err) +{ + switch (mach_err) { + case KERN_SUCCESS: + return 0; + + case KERN_INVALID_ADDRESS: + case KERN_INVALID_ARGUMENT: + case KERN_NOT_IN_SET: + case KERN_INVALID_NAME: + case KERN_INVALID_TASK: + case KERN_INVALID_RIGHT: + case KERN_INVALID_VALUE: + case KERN_INVALID_CAPABILITY: + case KERN_INVALID_HOST: + case KERN_MEMORY_PRESENT: + case KERN_INVALID_PROCESSOR_SET: + case KERN_INVALID_POLICY: + case KERN_ALREADY_WAITING: + case KERN_DEFAULT_SET: + case KERN_EXCEPTION_PROTECTED: + case KERN_INVALID_LEDGER: + case KERN_INVALID_MEMORY_CONTROL: + case KERN_INVALID_SECURITY: + case KERN_NOT_DEPRESSED: + case KERN_LOCK_OWNED: + case KERN_LOCK_OWNED_SELF: + return EINVAL; + + case KERN_PROTECTION_FAILURE: + case KERN_NOT_RECEIVER: + case KERN_NO_ACCESS: + case KERN_POLICY_STATIC: + return EACCES; + + case KERN_NO_SPACE: + case KERN_RESOURCE_SHORTAGE: + case KERN_UREFS_OVERFLOW: + case KERN_INVALID_OBJECT: + return ENOMEM; + + case KERN_FAILURE: + return EIO; + + case KERN_MEMORY_FAILURE: + case KERN_POLICY_LIMIT: + case KERN_CODESIGN_ERROR: + return EPERM; + + case KERN_MEMORY_ERROR: + return EBUSY; + + case KERN_ALREADY_IN_SET: + case KERN_NAME_EXISTS: + case KERN_RIGHT_EXISTS: + return EEXIST; + + case KERN_ABORTED: + return EINTR; + + case KERN_TERMINATED: + case KERN_LOCK_SET_DESTROYED: + case KERN_LOCK_UNSTABLE: + case KERN_SEMAPHORE_DESTROYED: + return ENOENT; + + case KERN_RPC_SERVER_TERMINATED: + return ECONNRESET; + + case KERN_NOT_SUPPORTED: + return ENOTSUP; + + case KERN_NODE_DOWN: + return ENETDOWN; + + case KERN_NOT_WAITING: + return ENOENT; + + case KERN_OPERATION_TIMED_OUT: + return ETIMEDOUT; + + default: + return EIO; + } +} /* - * ubc_setsize + * ubc_setsize_ex * - * Tell the VM that the the size of the file represented by the vnode has + * Tell the VM that the the size of the file represented by the vnode has * changed * - * Parameters: vp The vp whose backing file size is - * being changed - * nsize The new size of the backing file - * - * Returns: 1 Success - * 0 Failure - * - * Notes: This function will indicate failure if the new size that's - * being attempted to be set is negative. - * - * This function will fail if there is no ubc_info currently - * associated with the vnode. - * - * This function will indicate success it the new size is the - * same or larger than the old size (in this case, the remainder - * of the file will require modification or use of an existing upl - * to access successfully). - * - * This function will fail if the new file size is smaller, and - * the memory region being invalidated was unable to actually be - * invalidated and/or the last page could not be flushed, if the - * new size is not aligned to a page boundary. This is usually - * indicative of an I/O error. + * Parameters: vp The vp whose backing file size is + * being changed + * nsize The new size of the backing file + * opts Options + * + * Returns: EINVAL for new size < 0 + * ENOENT if no UBC info exists + * EAGAIN if UBC_SETSIZE_NO_FS_REENTRY option is set and new_size < old size + * Other errors (mapped to errno_t) returned by VM functions + * + * Notes: This function will indicate success if the new size is the + * same or larger than the old size (in this case, the + * remainder of the file will require modification or use of + * an existing upl to access successfully). + * + * This function will fail if the new file size is smaller, + * and the memory region being invalidated was unable to + * actually be invalidated and/or the last page could not be + * flushed, if the new size is not aligned to a page + * boundary. This is usually indicative of an I/O error. */ -int -ubc_setsize(struct vnode *vp, off_t nsize) +errno_t ubc_setsize_ex(struct vnode *vp, off_t nsize, ubc_setsize_opts_t opts) { off_t osize; /* ui_size before change */ off_t lastpg, olastpgend, lastoff; @@ -958,13 +1046,17 @@ ubc_setsize(struct vnode *vp, off_t nsize) kern_return_t kret = KERN_SUCCESS; if (nsize < (off_t)0) - return (0); + return EINVAL; if (!UBCINFOEXISTS(vp)) - return (0); + return ENOENT; uip = vp->v_ubcinfo; osize = uip->ui_size; + + if (ISSET(opts, UBC_SETSIZE_NO_FS_REENTRY) && nsize < osize) + return EAGAIN; + /* * Update the size before flushing the VM */ @@ -975,7 +1067,7 @@ ubc_setsize(struct vnode *vp, off_t nsize) lock_vnode_and_post(vp, NOTE_EXTEND); } - return (1); /* return success */ + return 0; } /* @@ -991,17 +1083,16 @@ ubc_setsize(struct vnode *vp, off_t nsize) lastoff = (nsize & PAGE_MASK_64); if (lastoff) { - upl_t upl; + upl_t upl; upl_page_info_t *pl; - - /* + /* * new EOF ends up in the middle of a page - * zero the tail of this page if its currently + * zero the tail of this page if it's currently * present in the cache */ - kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE); - + kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE); + if (kret != KERN_SUCCESS) panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret); @@ -1019,20 +1110,25 @@ ubc_setsize(struct vnode *vp, off_t nsize) flags = MEMORY_OBJECT_DATA_FLUSH_ALL; else flags = MEMORY_OBJECT_DATA_FLUSH; - /* + /* * invalidate the pages beyond the new EOF page * */ - kret = memory_object_lock_request(control, - (memory_object_offset_t)lastpg, - (memory_object_size_t)(olastpgend - lastpg), NULL, NULL, - MEMORY_OBJECT_RETURN_NONE, flags, VM_PROT_NO_CHANGE); + kret = memory_object_lock_request(control, + (memory_object_offset_t)lastpg, + (memory_object_size_t)(olastpgend - lastpg), NULL, NULL, + MEMORY_OBJECT_RETURN_NONE, flags, VM_PROT_NO_CHANGE); if (kret != KERN_SUCCESS) printf("ubc_setsize: invalidate failed (error = %d)\n", kret); } - return ((kret == KERN_SUCCESS) ? 1 : 0); + return mach_to_bsd_errno(kret); } +// Returns true for success +int ubc_setsize(vnode_t vp, off_t nsize) +{ + return ubc_setsize_ex(vp, nsize, 0) == 0; +} /* * ubc_getsize @@ -1067,7 +1163,7 @@ ubc_getsize(struct vnode *vp) /* * ubc_umount * - * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this + * Call ubc_msync(vp, 0, EOF, NULL, UBC_PUSHALL) on all the vnodes for this * mount point * * Parameters: mp The mount point @@ -1166,7 +1262,6 @@ ubc_getcred(struct vnode *vp) * This function is generally used only in the following cases: * * o a memory mapped file via the mmap() system call - * o a memory mapped file via the deprecated map_fd() call * o a swap store backing file * o subsequent to a successful write via vn_write() * @@ -1182,8 +1277,7 @@ ubc_getcred(struct vnode *vp) * * o Because a page-in may occur prior to a write, the * credential may not be set at this time, if the page-in - * is not the result of a mapping established via mmap() - * or map_fd(). + * is not the result of a mapping established via mmap(). * * In both these cases, this will be triggered from the paging * path, which will instead use the credential of the current @@ -1466,35 +1560,6 @@ ubc_pages_resident(vnode_t vp) return (0); } - -/* - * ubc_sync_range - * - * Clean and/or invalidate a range in the memory object that backs this vnode - * - * Parameters: vp The vnode whose associated ubc_info's - * associated memory object is to have a - * range invalidated within it - * beg_off The start of the range, as an offset - * end_off The end of the range, as an offset - * flags See ubc_msync_internal() - * - * Returns: 1 Success - * 0 Failure - * - * Notes: see ubc_msync_internal() for more detailed information. - * - * DEPRECATED: This interface is obsolete due to a failure to return error - * information needed in order to correct failures. The currently - * recommended interface is ubc_msync(). - */ -int -ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags) -{ - return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL)); -} - - /* * ubc_msync * @@ -1546,6 +1611,8 @@ ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags) /* + * ubc_msync_internal + * * Clean and/or invalidate a range in the memory object that backs this vnode * * Parameters: vp The vnode whose associated ubc_info's @@ -1662,7 +1729,7 @@ ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, i /* - * ubc_msync_internal + * ubc_map * * Explicitly map a vnode that has an associate ubc_info, and add a reference * to it for the ubc system, if there isn't one already, so it will not be @@ -1691,7 +1758,6 @@ ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, i * It is primarily used by: * * o mmap(), when mapping a file - * o The deprecated map_fd() interface, when mapping a file * o When mapping a shared file (a shared library in the * shared segment region) * o When loading a program image during the exec process @@ -1920,6 +1986,8 @@ ubc_unmap(struct vnode *vp) return; if (UBCINFOEXISTS(vp)) { + bool want_fsevent = false; + vnode_lock(vp); uip = vp->v_ubcinfo; @@ -1931,19 +1999,54 @@ ubc_unmap(struct vnode *vp) SET(uip->ui_flags, UI_MAPBUSY); if (ISSET(uip->ui_flags, UI_ISMAPPED)) { - CLR(uip->ui_flags, UI_ISMAPPED); + if (ISSET(uip->ui_flags, UI_MAPPEDWRITE)) + want_fsevent = true; + need_rele = 1; + + /* + * We want to clear the mapped flags after we've called + * VNOP_MNOMAP to avoid certain races and allow + * VNOP_MNOMAP to call ubc_is_mapped_writable. + */ } vnode_unlock(vp); - + if (need_rele) { - (void)VNOP_MNOMAP(vp, vfs_context_current()); + vfs_context_t ctx = vfs_context_current(); + + (void)VNOP_MNOMAP(vp, ctx); + +#if CONFIG_FSE + /* + * Why do we want an fsevent here? Normally the + * content modified fsevent is posted when a file is + * closed and only if it's written to via conventional + * means. It's perfectly legal to close a file and + * keep your mappings and we don't currently track + * whether it was written to via a mapping. + * Therefore, we need to post an fsevent here if the + * file was mapped writable. This may result in false + * events, i.e. we post a notification when nothing + * has really changed. + */ + if (want_fsevent && need_fsevent(FSE_CONTENT_MODIFIED, vp)) { + add_fsevent(FSE_CONTENT_MODIFIED, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); + } +#endif + vnode_rele(vp); } vnode_lock_spin(vp); + if (need_rele) + CLR(uip->ui_flags, UI_ISMAPPED | UI_MAPPEDWRITE); + CLR(uip->ui_flags, UI_MAPBUSY); + if (ISSET(uip->ui_flags, UI_MAPWAITING)) { CLR(uip->ui_flags, UI_MAPWAITING); need_wakeup = 1; @@ -2195,7 +2298,7 @@ ubc_create_upl( if (bufsize & 0xfff) return KERN_INVALID_ARGUMENT; - if (bufsize > MAX_UPL_SIZE * PAGE_SIZE) + if (bufsize > MAX_UPL_SIZE_BYTES) return KERN_INVALID_ARGUMENT; if (uplflags & (UPL_UBC_MSYNC | UPL_UBC_PAGEOUT | UPL_UBC_PAGEIN)) { @@ -2263,7 +2366,7 @@ upl_size_t ubc_upl_maxbufsize( void) { - return(MAX_UPL_SIZE * PAGE_SIZE); + return(MAX_UPL_SIZE_BYTES); } /* @@ -2343,7 +2446,7 @@ ubc_upl_commit( kern_return_t kr; pl = UPL_GET_INTERNAL_PAGE_LIST(upl); - kr = upl_commit(upl, pl, MAX_UPL_SIZE); + kr = upl_commit(upl, pl, MAX_UPL_SIZE_BYTES >> PAGE_SHIFT); upl_deallocate(upl); return kr; } @@ -2422,7 +2525,7 @@ ubc_upl_commit_range( pl = UPL_GET_INTERNAL_PAGE_LIST(upl); kr = upl_commit_range(upl, offset, size, flags, - pl, MAX_UPL_SIZE, &empty); + pl, MAX_UPL_SIZE_BYTES >> PAGE_SHIFT, &empty); if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty) upl_deallocate(upl); @@ -2580,7 +2683,7 @@ ubc_upl_pageinfo( int -UBCINFOEXISTS(struct vnode * vp) +UBCINFOEXISTS(const struct vnode * vp) { return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL)); } @@ -2595,6 +2698,21 @@ ubc_upl_range_needed( upl_range_needed(upl, index, count); } +boolean_t ubc_is_mapped(const struct vnode *vp, boolean_t *writable) +{ + if (!UBCINFOEXISTS(vp) || !ISSET(vp->v_ubcinfo->ui_flags, UI_ISMAPPED)) + return FALSE; + if (writable) + *writable = ISSET(vp->v_ubcinfo->ui_flags, UI_MAPPEDWRITE); + return TRUE; +} + +boolean_t ubc_is_mapped_writable(const struct vnode *vp) +{ + boolean_t writable; + return ubc_is_mapped(vp, &writable) && writable; +} + /* * CODE SIGNING @@ -2683,6 +2801,8 @@ ubc_cs_sigpup_add( blob->csb_mem_handle = IPC_PORT_NULL; blob->csb_mem_kaddr = address; blob->csb_sigpup = 1; + blob->csb_platform_binary = 0; + blob->csb_teamid = NULL; /* * Validate the blob's contents @@ -2698,13 +2818,13 @@ ubc_cs_sigpup_add( } blob->csb_flags = ntohl(cd->flags) | CS_VALID; - blob->csb_end_offset = round_page(ntohl(cd->codeLimit)); + blob->csb_end_offset = round_page_4K(ntohl(cd->codeLimit)); if((ntohl(cd->version) >= CS_SUPPORTSSCATTER) && (ntohl(cd->scatterOffset))) { const SC_Scatter *scatter = (const SC_Scatter*) ((const char*)cd + ntohl(cd->scatterOffset)); - blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE; + blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE_4K; } else { - blob->csb_start_offset = (blob->csb_end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE)); + blob->csb_start_offset = (blob->csb_end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE_4K)); } /* @@ -2769,7 +2889,6 @@ ubc_cs_blob_add( cpu_type_t cputype, off_t base_offset, vm_address_t addr, - off_t blob_offset, vm_size_t size) { kern_return_t kr; @@ -2782,8 +2901,10 @@ ubc_cs_blob_add( off_t blob_start_offset, blob_end_offset; SHA1_CTX sha1ctxt; boolean_t record_mtime; + int is_platform_binary; record_mtime = FALSE; + is_platform_binary = 0; blob_handle = IPC_PORT_NULL; @@ -2822,12 +2943,13 @@ ubc_cs_blob_add( blob->csb_cpu_type = cputype; blob->csb_sigpup = 0; blob->csb_base_offset = base_offset; - blob->csb_blob_offset = blob_offset; blob->csb_mem_size = size; blob->csb_mem_offset = 0; blob->csb_mem_handle = blob_handle; blob->csb_mem_kaddr = addr; blob->csb_flags = 0; + blob->csb_platform_binary = 0; + blob->csb_teamid = NULL; /* * Validate the blob's contents @@ -2847,14 +2969,14 @@ ubc_cs_blob_add( int sha1_size; blob->csb_flags = (ntohl(cd->flags) & CS_ALLOWED_MACHO) | CS_VALID; - blob->csb_end_offset = round_page(ntohl(cd->codeLimit)); + blob->csb_end_offset = round_page_4K(ntohl(cd->codeLimit)); if((ntohl(cd->version) >= CS_SUPPORTSSCATTER) && (ntohl(cd->scatterOffset))) { const SC_Scatter *scatter = (const SC_Scatter*) ((const char*)cd + ntohl(cd->scatterOffset)); - blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE; + blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE_4K; } else { blob->csb_start_offset = (blob->csb_end_offset - - (ntohl(cd->nCodeSlots) * PAGE_SIZE)); + (ntohl(cd->nCodeSlots) * PAGE_SIZE_4K)); } /* compute the blob's SHA1 hash */ sha1_base = (const unsigned char *) cd; @@ -2868,11 +2990,29 @@ ubc_cs_blob_add( * Let policy module check whether the blob's signature is accepted. */ #if CONFIG_MACF - error = mac_vnode_check_signature(vp, base_offset, blob->csb_sha1, (void*)addr, size); - if (error) + error = mac_vnode_check_signature(vp, base_offset, blob->csb_sha1, (const void*)cd, size, &is_platform_binary); + if (error) { + if (cs_debug) + printf("check_signature[pid: %d], error = %d\n", current_proc()->p_pid, error); goto out; + } #endif + if (is_platform_binary) { + if (cs_debug > 1) + printf("check_signature[pid: %d]: platform binary\n", current_proc()->p_pid); + blob->csb_platform_binary = 1; + } else { + blob->csb_platform_binary = 0; + blob->csb_teamid = csblob_get_teamid(blob); + if (cs_debug > 1) { + if (blob->csb_teamid) + printf("check_signature[pid: %d]: team-id is %s\n", current_proc()->p_pid, blob->csb_teamid); + else + printf("check_signature[pid: %d]: no team-id\n", current_proc()->p_pid); + } + } + /* * Validate the blob's coverage */ @@ -2901,6 +3041,30 @@ ubc_cs_blob_add( oblob = oblob->csb_next) { off_t oblob_start_offset, oblob_end_offset; + /* check for conflicting teamid */ + if (blob->csb_platform_binary) { //platform binary needs to be the same for app slices + if (!oblob->csb_platform_binary) { + vnode_unlock(vp); + error = EALREADY; + goto out; + } + } else if (blob->csb_teamid) { //teamid binary needs to be the same for app slices + if (oblob->csb_platform_binary || + oblob->csb_teamid == NULL || + strcmp(oblob->csb_teamid, blob->csb_teamid) != 0) { + vnode_unlock(vp); + error = EALREADY; + goto out; + } + } else { // non teamid binary needs to be the same for app slices + if (oblob->csb_platform_binary || + oblob->csb_teamid != NULL) { + vnode_unlock(vp); + error = EALREADY; + goto out; + } + } + oblob_start_offset = (oblob->csb_base_offset + oblob->csb_start_offset); oblob_end_offset = (oblob->csb_base_offset + @@ -2934,43 +3098,11 @@ ubc_cs_blob_add( */ oblob->csb_cpu_type = cputype; } - /* - * If the same blob moved around in the Mach-O, we - * want to remember the new blob offset to avoid - * coming back here again and again. - */ - oblob->csb_blob_offset = blob_offset; - vnode_unlock(vp); error = EAGAIN; goto out; } else { /* different blob: reject the new one */ - char pathbuf[MAXPATHLEN]; - char new_sha1_str[2*SHA1_RESULTLEN+1]; - char old_sha1_str[2*SHA1_RESULTLEN+1]; - char arch_str[20]; - const char *pathp = "?unknown"; - int pblen = sizeof(pathbuf); - if (vn_getpath(vp, pathbuf, &pblen) == 0) { - /* pblen == strlen(pathbuf) + 1. Assume strlen(pathbuf) > 0 */ - for (pathp = pathbuf + pblen - 2; pathp > pathbuf && pathp[-1] != '/'; pathp--) ; - } - snprintf(arch_str, sizeof(arch_str), "%x", cputype); - hex_str(oblob->csb_sha1, SHA1_RESULTLEN, old_sha1_str); - hex_str(blob->csb_sha1, SHA1_RESULTLEN, new_sha1_str); - kern_asl_msg(LOG_NOTICE, "messagetracer", - 6, - "com.apple.message.domain", "com.apple.kernel.cs.replace", - "com.apple.message.signature", pathp, - "com.apple.message.signature2", arch_str, - "com.apple.message.signature3", old_sha1_str, - "com.apple.message.result", new_sha1_str, - "com.apple.message.summarize", "YES", - NULL - ); - printf("CODESIGNING: rejected new signature for architecture %d of file %s\n", - cputype, pathbuf); vnode_unlock(vp); error = EALREADY; goto out; @@ -2979,6 +3111,7 @@ ubc_cs_blob_add( } + /* mark this vnode's VM object as having "signed pages" */ kr = memory_object_signed(uip->ui_control, TRUE); if (kr != KERN_SUCCESS) { @@ -2992,6 +3125,9 @@ ubc_cs_blob_add( record_mtime = TRUE; } + /* set the generation count for cs_blobs */ + uip->cs_add_gen = cs_blob_generation_count; + /* * Add this blob to the list of blobs for this vnode. * We always add at the front of the list and we never remove a @@ -3040,6 +3176,9 @@ ubc_cs_blob_add( out: if (error) { + if (cs_debug) + printf("check_signature[pid: %d]: error = %d\n", current_proc()->p_pid, error); + /* we failed; release what we allocated */ if (blob) { kfree(blob, sizeof (*blob)); @@ -3067,7 +3206,6 @@ out: return error; } - struct cs_blob * ubc_cs_blob_get( struct vnode *vp, @@ -3102,8 +3240,10 @@ ubc_cs_blob_get( } } - if (cs_debug && blob != NULL && blob->csb_sigpup) + if (cs_debug && blob != NULL && blob->csb_sigpup) { printf("found sig pup blob\n"); + } + out: vnode_unlock(vp); @@ -3139,6 +3279,81 @@ ubc_cs_free( uip->cs_blobs = NULL; } +/* check cs blob generation on vnode + * returns: + * 0 : Success, the cs_blob attached is current + * ENEEDAUTH : Generation count mismatch. Needs authentication again. + */ +int +ubc_cs_generation_check( + struct vnode *vp) +{ + int retval = ENEEDAUTH; + + vnode_lock_spin(vp); + + if (UBCINFOEXISTS(vp) && vp->v_ubcinfo->cs_add_gen == cs_blob_generation_count) { + retval = 0; + } + + vnode_unlock(vp); + return retval; +} + +int +ubc_cs_blob_revalidate( + struct vnode *vp, + struct cs_blob *blob + ) +{ + int error = 0; +#if CONFIG_MACF + int is_platform_binary = 0; +#endif + const CS_CodeDirectory *cd = NULL; + + assert(vp != NULL); + assert(blob != NULL); + + error = cs_validate_csblob((const uint8_t *)blob->csb_mem_kaddr, blob->csb_mem_size, &cd); + if (error) { + if (cs_debug) { + printf("CODESIGNING: csblob invalid: %d\n", error); + } + goto out; + } + + /* callout to mac_vnode_check_signature */ +#if CONFIG_MACF + error = mac_vnode_check_signature(vp, blob->csb_base_offset, blob->csb_sha1, (const void*)cd, blob->csb_cpu_type, &is_platform_binary); + if (cs_debug && error) { + printf("revalidate: check_signature[pid: %d], error = %d\n", current_proc()->p_pid, error); + } +#endif + + /* update generation number if success */ + vnode_lock_spin(vp); + if (UBCINFOEXISTS(vp)) { + if (error == 0) + vp->v_ubcinfo->cs_add_gen = cs_blob_generation_count; + else + vp->v_ubcinfo->cs_add_gen = 0; + } + + vnode_unlock(vp); + +out: + return error; +} + +void +cs_blob_reset_cache() +{ + /* incrementing odd no by 2 makes sure '0' is never reached. */ + OSAddAtomic(+2, &cs_blob_generation_count); + printf("Reseting cs_blob cache from all vnodes. \n"); +} + struct cs_blob * ubc_get_cs_blobs( struct vnode *vp) @@ -3269,7 +3484,7 @@ cs_validate_page( embedded = (const CS_SuperBlob *) blob_addr; cd = findCodeDirectory(embedded, lower_bound, upper_bound); if (cd != NULL) { - if (cd->pageSize != PAGE_SHIFT || + if (cd->pageSize != PAGE_SHIFT_4K || cd->hashType != CS_HASHTYPE_SHA1 || cd->hashSize != SHA1_RESULTLEN) { /* bogus blob ? */ @@ -3291,7 +3506,7 @@ cs_validate_page( if (blob->csb_sigpup && cs_debug) printf("sigpup codesize %d\n", (int)codeLimit); - hash = hashes(cd, (unsigned)atop(offset), + hash = hashes(cd, (unsigned)(offset>>PAGE_SHIFT_4K), lower_bound, upper_bound); if (hash != NULL) { bcopy(hash, expected_hash, @@ -3328,41 +3543,34 @@ cs_validate_page( *tainted = FALSE; } else { - size = PAGE_SIZE; + size = PAGE_SIZE_4K; + const uint32_t *asha1, *esha1; if ((off_t)(offset + size) > codeLimit) { /* partial page at end of segment */ assert(offset < codeLimit); - size = (size_t) (codeLimit & PAGE_MASK); + size = (size_t) (codeLimit & PAGE_MASK_4K); } /* compute the actual page's SHA1 hash */ SHA1Init(&sha1ctxt); SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size); SHA1Final(actual_hash, &sha1ctxt); + asha1 = (const uint32_t *) actual_hash; + esha1 = (const uint32_t *) expected_hash; + if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) { - char asha1_str[2*SHA1_RESULTLEN+1]; - char esha1_str[2*SHA1_RESULTLEN+1]; - hex_str(actual_hash, SHA1_RESULTLEN, asha1_str); - hex_str(expected_hash, SHA1_RESULTLEN, esha1_str); if (cs_debug) { printf("CODE SIGNING: cs_validate_page: " - "mobj %p off 0x%llx size 0x%lx: actual %s expected %s\n", - pager, page_offset, size, asha1_str, esha1_str); + "mobj %p off 0x%llx size 0x%lx: " + "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != " + "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n", + pager, page_offset, size, + asha1[0], asha1[1], asha1[2], + asha1[3], asha1[4], + esha1[0], esha1[1], esha1[2], + esha1[3], esha1[4]); } cs_validate_page_bad_hash++; - if (!*tainted) { - char page_offset_str[20]; - snprintf(page_offset_str, sizeof(page_offset_str), "%llx", page_offset); - kern_asl_msg(LOG_NOTICE, "messagetracer", - 5, - "com.apple.message.domain", "com.apple.kernel.cs.mismatch", - "com.apple.message.signature", page_offset_str, - "com.apple.message.signature2", asha1_str, - "com.apple.message.signature3", esha1_str, - "com.apple.message.summarize", "YES", - NULL - ); - } *tainted = TRUE; } else { if (cs_debug > 10) { diff --git a/bsd/kern/uipc_domain.c b/bsd/kern/uipc_domain.c index 5a1c62a79..9c5801dde 100644 --- a/bsd/kern/uipc_domain.c +++ b/bsd/kern/uipc_domain.c @@ -102,8 +102,6 @@ static lck_grp_attr_t *domain_proto_mtx_grp_attr; decl_lck_mtx_data(static, domain_proto_mtx); decl_lck_mtx_data(static, domain_timeout_mtx); -extern sysctlfn net_sysctl; - static u_int64_t _net_uptime; static void @@ -909,53 +907,6 @@ pffindprotonotype(int family, int protocol) return (pp); } -int -net_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, struct proc *p) -{ -#pragma unused(p) - int family, protocol, error = 0; - struct domain *dp; - struct protosw *pp; - domain_guard_t guard; - - /* - * All sysctl names at this level are nonterminal; - * next two components are protocol family and protocol number, - * then at least one addition component. - */ - if (namelen < 3) - return (EISDIR); /* overloaded */ - family = name[0]; - protocol = name[1]; - - if (family == 0) - return (0); - - guard = domain_guard_deploy(); - TAILQ_FOREACH(dp, &domains, dom_entry) { - if (dp->dom_family == family) - break; - } - if (dp == NULL) { - error = ENOPROTOOPT; - goto done; - } - - TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) { - if (pp->pr_protocol == protocol && pp->pr_sysctl != NULL) { - error = (*pp->pr_sysctl)(name + 2, namelen - 2, - (void *)(uintptr_t)oldp, oldlenp, - (void *)(uintptr_t)newp, newlen); - goto done; - } - } - error = ENOPROTOOPT; -done: - domain_guard_release(guard); - return (error); -} - void pfctlinput(int cmd, struct sockaddr *sa) { diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index 91d1ce4ca..d5f73128e 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2013 Apple Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -102,6 +102,7 @@ #endif /* MAC_NET */ #include +#include /* * MBUF IMPLEMENTATION NOTES. @@ -578,6 +579,7 @@ typedef struct { int mtbl_minlimit; /* minimum allowed */ int mtbl_maxlimit; /* maximum allowed */ u_int32_t mtbl_wantpurge; /* purge during next reclaim */ + uint32_t mtbl_avgtotal; /* average total on iOS */ } mbuf_table_t; #define m_class(c) mbuf_table[c].mtbl_class @@ -588,6 +590,7 @@ typedef struct { #define m_minlimit(c) mbuf_table[c].mtbl_minlimit #define m_maxlimit(c) mbuf_table[c].mtbl_maxlimit #define m_wantpurge(c) mbuf_table[c].mtbl_wantpurge +#define m_avgtotal(c) mbuf_table[c].mtbl_avgtotal #define m_cname(c) mbuf_table[c].mtbl_stats->mbcl_cname #define m_size(c) mbuf_table[c].mtbl_stats->mbcl_size #define m_total(c) mbuf_table[c].mtbl_stats->mbcl_total @@ -600,19 +603,23 @@ typedef struct { #define m_purge_cnt(c) mbuf_table[c].mtbl_stats->mbcl_purge_cnt #define m_fail_cnt(c) mbuf_table[c].mtbl_stats->mbcl_fail_cnt #define m_ctotal(c) mbuf_table[c].mtbl_stats->mbcl_ctotal +#define m_peak(c) mbuf_table[c].mtbl_stats->mbcl_peak_reported +#define m_release_cnt(c) mbuf_table[c].mtbl_stats->mbcl_release_cnt static mbuf_table_t mbuf_table[] = { /* * The caches for mbufs, regular clusters and big clusters. + * The average total values were based on data gathered by actual + * usage patterns on iOS. */ { MC_MBUF, NULL, TAILQ_HEAD_INITIALIZER(m_slablist(MC_MBUF)), - NULL, NULL, 0, 0, 0, 0 }, + NULL, NULL, 0, 0, 0, 0, 3000 }, { MC_CL, NULL, TAILQ_HEAD_INITIALIZER(m_slablist(MC_CL)), - NULL, NULL, 0, 0, 0, 0 }, + NULL, NULL, 0, 0, 0, 0, 2000 }, { MC_BIGCL, NULL, TAILQ_HEAD_INITIALIZER(m_slablist(MC_BIGCL)), - NULL, NULL, 0, 0, 0, 0 }, + NULL, NULL, 0, 0, 0, 0, 1000 }, { MC_16KCL, NULL, TAILQ_HEAD_INITIALIZER(m_slablist(MC_16KCL)), - NULL, NULL, 0, 0, 0, 0 }, + NULL, NULL, 0, 0, 0, 0, 1000 }, /* * The following are special caches; they serve as intermediate * caches backed by the above rudimentary caches. Each object @@ -621,9 +628,9 @@ static mbuf_table_t mbuf_table[] = { * deal with the slab structures; instead, the constructed * cached elements are simply stored in the freelists. */ - { MC_MBUF_CL, NULL, { NULL, NULL }, NULL, NULL, 0, 0, 0, 0 }, - { MC_MBUF_BIGCL, NULL, { NULL, NULL }, NULL, NULL, 0, 0, 0, 0 }, - { MC_MBUF_16KCL, NULL, { NULL, NULL }, NULL, NULL, 0, 0, 0, 0 }, + { MC_MBUF_CL, NULL, { NULL, NULL }, NULL, NULL, 0, 0, 0, 0, 2000 }, + { MC_MBUF_BIGCL, NULL, { NULL, NULL }, NULL, NULL, 0, 0, 0, 0, 1000 }, + { MC_MBUF_16KCL, NULL, { NULL, NULL }, NULL, NULL, 0, 0, 0, 0, 1000 }, }; #define NELEM(a) (sizeof (a) / sizeof ((a)[0])) @@ -631,6 +638,12 @@ static mbuf_table_t mbuf_table[] = { static void *mb_waitchan = &mbuf_table; /* wait channel for all caches */ static int mb_waiters; /* number of waiters */ +boolean_t mb_peak_newreport = FALSE; +boolean_t mb_peak_firstreport = FALSE; + +/* generate a report by default after 1 week of uptime */ +#define MBUF_PEAK_FIRST_REPORT_THRESHOLD 604800 + #define MB_WDT_MAXTIME 10 /* # of secs before watchdog panic */ static struct timeval mb_wdtstart; /* watchdog start timestamp */ static char *mbuf_dump_buf; @@ -640,8 +653,12 @@ static char *mbuf_dump_buf; /* * mbuf watchdog is enabled by default on embedded platforms. It is * also toggeable via the kern.ipc.mb_watchdog sysctl. + * Garbage collection is also enabled by default on embedded platforms. + * mb_drain_maxint controls the amount of time to wait (in seconds) before + * consecutive calls to m_drain(). */ static unsigned int mb_watchdog = 0; +static unsigned int mb_drain_maxint = 0; /* Red zone */ static u_int32_t mb_redzone_cookie; @@ -691,6 +708,7 @@ static boolean_t mbuf_sleep(mbuf_class_t, unsigned int, int); static void mcl_audit_init(void *, mcache_audit_t **, mcache_obj_t **, size_t, unsigned int); +static void mcl_audit_free(void *, unsigned int); static mcache_audit_t *mcl_audit_buf2mca(mbuf_class_t, mcache_obj_t *); static void mcl_audit_mbuf(mcache_audit_t *, void *, boolean_t, boolean_t); static void mcl_audit_cluster(mcache_audit_t *, void *, size_t, boolean_t, @@ -720,6 +738,8 @@ static boolean_t slab_is_detached(mcl_slab_t *); static int m_copyback0(struct mbuf **, int, int, const void *, int, int); static struct mbuf *m_split0(struct mbuf *, int, int, int); +__private_extern__ void mbuf_report_peak_usage(void); +static boolean_t mbuf_report_usage(mbuf_class_t); /* flags for m_copyback0 */ #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ @@ -1064,6 +1084,7 @@ mb_stat_sysctl SYSCTL_HANDLER_ARGS oc->mbcl_purge_cnt = c->mbcl_purge_cnt; oc->mbcl_fail_cnt = c->mbcl_fail_cnt; oc->mbcl_ctotal = c->mbcl_ctotal; + oc->mbcl_release_cnt = c->mbcl_release_cnt; oc->mbcl_mc_state = c->mbcl_mc_state; oc->mbcl_mc_cached = c->mbcl_mc_cached; oc->mbcl_mc_waiter_cnt = c->mbcl_mc_waiter_cnt; @@ -1524,7 +1545,11 @@ mbinit(void) /* Make sure we didn't miss any */ VERIFY(m_minlimit(m_class(m)) == 0 || m_total(m_class(m)) >= m_minlimit(m_class(m))); + + /* populate the initial sizes and report from there on */ + m_peak(m_class(m)) = m_total(m_class(m)); } + mb_peak_newreport = FALSE; lck_mtx_unlock(mbuf_mlock); @@ -2756,6 +2781,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) for (i = 0; i < numpages; i++, page += NBPG) { ppnum_t offset = ((char *)page - (char *)mbutl) / NBPG; ppnum_t new_page = pmap_find_phys(kernel_pmap, page); + mbuf_class_t class = MC_BIGCL; /* * If there is a mapper the appropriate I/O page is returned; @@ -2796,6 +2822,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) m_infree(MC_MBUF_BIGCL); mbstat.m_bigclusters = ++m_total(MC_BIGCL); VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL)); + class = MC_BIGCL; } else if ((i % NSLABSP16KB) == 0) { union m16kcluster *m16kcl = (union m16kcluster *)page; mcl_slab_t *nsp; @@ -2831,7 +2858,10 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) m_infree(MC_16KCL)++; m_total(MC_16KCL)++; VERIFY(m_total(MC_16KCL) <= m_maxlimit(MC_16KCL)); + class = MC_16KCL; } + if (!mb_peak_newreport && mbuf_report_usage(class)) + mb_peak_newreport = TRUE; } VERIFY(mca_list == NULL && con_list == NULL); @@ -2987,6 +3017,8 @@ freelist_populate(mbuf_class_t class, unsigned int num, int wait) VERIFY(m_total(MC_BIGCL) >= m_minlimit(MC_BIGCL)); VERIFY(m_total(class) <= m_maxlimit(class)); + if (!mb_peak_newreport && mbuf_report_usage(class)) + mb_peak_newreport = TRUE; i = numobj; if (class == MC_MBUF) { @@ -3214,7 +3246,7 @@ m_reclaim(mbuf_class_t class, unsigned int num, boolean_t comp) /* Sigh; we have no other choices but to ask mcache to purge */ for (m = 0; m < NELEM(mbuf_table); m++) { if ((bmap & (1 << m)) && - mcache_purge_cache(m_cache(m))) { + mcache_purge_cache(m_cache(m), TRUE)) { lck_mtx_lock(mbuf_mlock); m_purge_cnt(m)++; mbstat.m_drain++; @@ -3719,7 +3751,6 @@ m_copy_classifier(struct mbuf *to, struct mbuf *from) to->m_pkthdr.pkt_flags = from->m_pkthdr.pkt_flags; (void) m_set_service_class(to, from->m_pkthdr.pkt_svc); to->m_pkthdr.pkt_ifainfo = from->m_pkthdr.pkt_ifainfo; - to->m_pkthdr.ipsec_policy = from->m_pkthdr.ipsec_policy; #if MEASURE_BW to->m_pkthdr.pkt_bwseq = from->m_pkthdr.pkt_bwseq; #endif /* MEASURE_BW */ @@ -4509,7 +4540,11 @@ m_copym_mode(struct mbuf *m, int off0, int len, int wait, uint32_t mode) if (off < 0 || len < 0) panic("m_copym: invalid offset %d or len %d", off, len); - if (off == 0 && (m->m_flags & M_PKTHDR)) { + VERIFY((mode != M_COPYM_MUST_COPY_HDR && + mode != M_COPYM_MUST_MOVE_HDR) || (m->m_flags & M_PKTHDR)); + + if ((off == 0 && (m->m_flags & M_PKTHDR)) || + mode == M_COPYM_MUST_COPY_HDR || mode == M_COPYM_MUST_MOVE_HDR) { mhdr = m; copyhdr = 1; } @@ -4530,16 +4565,21 @@ m_copym_mode(struct mbuf *m, int off0, int len, int wait, uint32_t mode) break; } - n = _M_RETRY(wait, m->m_type); + if (copyhdr) + n = _M_RETRYHDR(wait, m->m_type); + else + n = _M_RETRY(wait, m->m_type); *np = n; if (n == NULL) goto nospace; if (copyhdr != 0) { - if (mode == M_COPYM_MOVE_HDR) { + if ((mode == M_COPYM_MOVE_HDR) || + (mode == M_COPYM_MUST_MOVE_HDR)) { M_COPY_PKTHDR(n, mhdr); - } else if (mode == M_COPYM_COPY_HDR) { + } else if ((mode == M_COPYM_COPY_HDR) || + (mode == M_COPYM_MUST_COPY_HDR)) { if (m_dup_pkthdr(n, mhdr, wait) == 0) goto nospace; } @@ -4548,25 +4588,34 @@ m_copym_mode(struct mbuf *m, int off0, int len, int wait, uint32_t mode) else n->m_pkthdr.len = len; copyhdr = 0; - } - if (len == M_COPYALL) { - if (MIN(len, (m->m_len - off)) == len) { - printf("m->m_len %d - off %d = %d, %d\n", - m->m_len, off, m->m_len - off, - MIN(len, (m->m_len - off))); + /* + * There is data to copy from the packet header mbuf + * if it is empty or it is before the starting offset + */ + if (mhdr != m) { + np = &n->m_next; + continue; } } n->m_len = MIN(len, (m->m_len - off)); - if (n->m_len == M_COPYALL) { - printf("n->m_len == M_COPYALL, fixing\n"); - n->m_len = MHLEN; - } if (m->m_flags & M_EXT) { n->m_ext = m->m_ext; m_incref(m); n->m_data = m->m_data + off; n->m_flags |= M_EXT; } else { + /* + * Limit to the capacity of the destination + */ + if (n->m_flags & M_PKTHDR) + n->m_len = MIN(n->m_len, MHLEN); + else + n->m_len = MIN(n->m_len, MLEN); + + if (MTOD(n, char *) + n->m_len > ((char *)n) + MSIZE) + panic("%s n %p copy overflow", + __func__, n); + bcopy(MTOD(m, caddr_t)+off, MTOD(n, caddr_t), (unsigned)n->m_len); } @@ -4602,10 +4651,10 @@ m_copym(struct mbuf *m, int off0, int len, int wait) * list (normally hung off of the socket) */ struct mbuf * -m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait, +m_copym_with_hdrs(struct mbuf *m0, int off0, int len0, int wait, struct mbuf **m_lastm, int *m_off, uint32_t mode) { - struct mbuf *n, **np = NULL; + struct mbuf *m = m0, *n, **np = NULL; int off = off0, len = len0; struct mbuf *top = NULL; int mcflags = MSLEEPF(wait); @@ -4616,8 +4665,8 @@ m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait, if (off == 0 && (m->m_flags & M_PKTHDR)) copyhdr = 1; - - if (*m_lastm != NULL) { + + if (m_lastm != NULL && *m_lastm != NULL) { m = *m_lastm; off = *m_off; } else { @@ -4676,9 +4725,11 @@ m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait, } if (copyhdr) { - if (mode == M_COPYM_MOVE_HDR) { + if ((mode == M_COPYM_MOVE_HDR) || + (mode == M_COPYM_MUST_MOVE_HDR)) { M_COPY_PKTHDR(n, m); - } else if (mode == M_COPYM_COPY_HDR) { + } else if ((mode == M_COPYM_COPY_HDR) || + (mode == M_COPYM_MUST_COPY_HDR)) { if (m_dup_pkthdr(n, m, wait) == 0) goto nospace; } @@ -4693,18 +4744,24 @@ m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait, n->m_data = m->m_data + off; n->m_flags |= M_EXT; } else { + if (MTOD(n, char *) + n->m_len > ((char *)n) + MSIZE) + panic("%s n %p copy overflow", + __func__, n); + bcopy(MTOD(m, caddr_t)+off, MTOD(n, caddr_t), (unsigned)n->m_len); } len -= n->m_len; if (len == 0) { - if ((off + n->m_len) == m->m_len) { - *m_lastm = m->m_next; - *m_off = 0; - } else { - *m_lastm = m; - *m_off = off + n->m_len; + if (m_lastm != NULL && m_off != NULL) { + if ((off + n->m_len) == m->m_len) { + *m_lastm = m->m_next; + *m_off = 0; + } else { + *m_lastm = m; + *m_off = off + n->m_len; + } } break; } @@ -6208,12 +6265,10 @@ slab_get(void *buf) if ((slg = slabstbl[ix]) == NULL) { /* - * In the current implementation, we never shrink the memory - * pool (hence the cluster map); if we attempt to reallocate - * a cluster group when it's already allocated, panic since - * this is a sign of a memory corruption (slabstbl[ix] got - * nullified). This also means that there shouldn't be any - * hole in the kernel sub-map for the mbuf pool. + * In the current implementation, we never shrink the slabs + * table; if we attempt to reallocate a cluster group when + * it's already allocated, panic since this is a sign of a + * memory corruption (slabstbl[ix] got nullified). */ ++slabgrp; VERIFY(ix < slabgrp); @@ -6421,6 +6476,29 @@ mcl_audit_init(void *buf, mcache_audit_t **mca_list, mca_tail->mca_next = NULL; } +static void +mcl_audit_free(void *buf, unsigned int num) +{ + unsigned int i, ix; + mcache_audit_t *mca, *mca_list; + + ix = MTOBG(buf); + VERIFY(ix < maxclaudit); + + if (mclaudit[ix].cl_audit[0] != NULL) { + mca_list = mclaudit[ix].cl_audit[0]; + for (i = 0; i < num; i++) { + mca = mclaudit[ix].cl_audit[i]; + mclaudit[ix].cl_audit[i] = NULL; + if (mca->mca_contents) + mcache_free(mcl_audit_con_cache, + mca->mca_contents); + } + mcache_free_ext(mcache_audit_cache, + (mcache_obj_t *)mca_list); + } +} + /* * Given an address of a buffer (mbuf/2KB/4KB/16KB), return * the corresponding audit structure for that buffer. @@ -6577,7 +6655,7 @@ mcl_audit_scratch(mcache_audit_t *mca) msa->msa_pdepth = msa->msa_depth; bzero(stack, sizeof (stack)); msa->msa_depth = OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1; - bcopy(&stack[1], msa->msa_stack, sizeof (mca->mca_pstack)); + bcopy(&stack[1], msa->msa_stack, sizeof (msa->msa_stack)); msa->msa_ptstamp = msa->msa_tstamp; microuptime(&now); @@ -7062,11 +7140,13 @@ mbuf_dump(void) if (i < mltr->mltr_depth) { if (mleak_stat->ml_isaddr64) { k = snprintf(c, clen, "0x%0llx ", - mltr->mltr_addr[i]); + (uint64_t)VM_KERNEL_UNSLIDE( + mltr->mltr_addr[i])); } else { k = snprintf(c, clen, "0x%08x ", - (u_int32_t)mltr->mltr_addr[i]); + (uint32_t)VM_KERNEL_UNSLIDE( + mltr->mltr_addr[i])); } } else { if (mleak_stat->ml_isaddr64) @@ -7107,9 +7187,13 @@ m_reinit(struct mbuf *m, int hdr) * as the packet header structure might overlap * with the data. */ - printf("%s: cannot set M_PKTHDR on altered mbuf %p, " - "m_data %p (expected %p), m_len %d (expected 0)\n", - __func__, m, m->m_data, m->m_dat, m->m_len); + printf("%s: cannot set M_PKTHDR on altered mbuf %llx, " + "m_data %llx (expected %llx), " + "m_len %d (expected 0)\n", + __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m), + (uint64_t)VM_KERNEL_ADDRPERM(m->m_data), + (uint64_t)VM_KERNEL_ADDRPERM(m->m_dat), m->m_len); ret = EBUSY; } else { VERIFY((m->m_flags & M_EXT) || m->m_data == m->m_dat); @@ -7130,16 +7214,41 @@ m_reinit(struct mbuf *m, int hdr) void m_scratch_init(struct mbuf *m) { + struct pkthdr *pkt = &m->m_pkthdr; + VERIFY(m->m_flags & M_PKTHDR); - bzero(&m->m_pkthdr.pkt_mpriv, sizeof (m->m_pkthdr.pkt_mpriv)); + /* See comments in */ + if (pkt->pkt_flags & PKTF_PRIV_GUARDED) { + panic_plain("Invalid attempt to modify guarded module-private " + "area: mbuf %p, pkt_flags 0x%x\n", m, pkt->pkt_flags); + /* NOTREACHED */ + } + + bzero(&pkt->pkt_mpriv, sizeof (pkt->pkt_mpriv)); } +/* + * This routine is reserved for mbuf_get_driver_scratch(); clients inside + * xnu that intend on utilizing the module-private area should directly + * refer to the pkt_mpriv structure in the pkthdr. They are also expected + * to set and clear PKTF_PRIV_GUARDED, while owning the packet and prior + * to handing it off to another module, respectively. + */ u_int32_t m_scratch_get(struct mbuf *m, u_int8_t **p) { + struct pkthdr *pkt = &m->m_pkthdr; + VERIFY(m->m_flags & M_PKTHDR); + /* See comments in */ + if (pkt->pkt_flags & PKTF_PRIV_GUARDED) { + panic_plain("Invalid attempt to access guarded module-private " + "area: mbuf %p, pkt_flags 0x%x\n", m, pkt->pkt_flags); + /* NOTREACHED */ + } + if (mcltrace) { mcache_audit_t *mca; @@ -7150,8 +7259,8 @@ m_scratch_get(struct mbuf *m, u_int8_t **p) lck_mtx_unlock(mbuf_mlock); } - *p = (u_int8_t *)&m->m_pkthdr.pkt_mpriv; - return (sizeof (m->m_pkthdr.pkt_mpriv)); + *p = (u_int8_t *)&pkt->pkt_mpriv; + return (sizeof (pkt->pkt_mpriv)); } static void @@ -7181,18 +7290,286 @@ m_redzone_verify(struct mbuf *m) } } +/* + * Send a report of mbuf usage if the usage is at least 6% of max limit + * or if there has been at least 3% increase since the last report. + * + * The values 6% and 3% are chosen so that we can do simple arithmetic + * with shift operations. + */ +static boolean_t +mbuf_report_usage(mbuf_class_t cl) +{ + /* if a report is already in progress, nothing to do */ + if (mb_peak_newreport) + return (TRUE); + + if (m_total(cl) > m_peak(cl) && + m_total(cl) >= (m_maxlimit(cl) >> 4) && + (m_total(cl) - m_peak(cl)) >= (m_peak(cl) >> 5)) + return (TRUE); + return (FALSE); +} + +__private_extern__ void +mbuf_report_peak_usage(void) +{ + int i = 0; + u_int64_t uptime; + struct nstat_sysinfo_data ns_data; + uint32_t memreleased = 0; + + uptime = net_uptime(); + lck_mtx_lock(mbuf_mlock); + + /* Generate an initial report after 1 week of uptime */ + if (!mb_peak_firstreport && + uptime > MBUF_PEAK_FIRST_REPORT_THRESHOLD) { + mb_peak_newreport = TRUE; + mb_peak_firstreport = TRUE; + } + + if (!mb_peak_newreport) { + lck_mtx_unlock(mbuf_mlock); + return; + } + + /* + * Since a report is being generated before 1 week, + * we do not need to force another one later + */ + if (uptime < MBUF_PEAK_FIRST_REPORT_THRESHOLD) + mb_peak_firstreport = TRUE; + + for (i = 0; i < NELEM(mbuf_table); i++) { + m_peak(m_class(i)) = m_total(m_class(i)); + memreleased += m_release_cnt(i); + } + mb_peak_newreport = FALSE; + lck_mtx_unlock(mbuf_mlock); + + bzero(&ns_data, sizeof(ns_data)); + ns_data.flags = NSTAT_SYSINFO_MBUF_STATS; + ns_data.u.mb_stats.total_256b = m_peak(MC_MBUF); + ns_data.u.mb_stats.total_2kb = m_peak(MC_CL); + ns_data.u.mb_stats.total_4kb = m_peak(MC_BIGCL); + ns_data.u.mb_stats.sbmb_total = total_sbmb_cnt_peak; + ns_data.u.mb_stats.sb_atmbuflimit = sbmb_limreached; + ns_data.u.mb_stats.draincnt = mbstat.m_drain; + ns_data.u.mb_stats.memreleased = memreleased; + + nstat_sysinfo_send_data(&ns_data); +} + +/* + * Called by the VM when there's memory pressure. + */ +__private_extern__ void +m_drain(void) +{ + mbuf_class_t mc; + mcl_slab_t *sp, *sp_tmp, *nsp; + unsigned int num, k, interval, released = 0; + unsigned int total_mem = 0, use_mem = 0; + boolean_t ret, purge_caches = FALSE; + ppnum_t offset; + mcache_obj_t *obj; + float per; + static uint64_t last_drain = 0; + static unsigned char scratch[32]; + static ppnum_t scratch_pa = 0; + + if (mb_drain_maxint == 0 || mb_waiters) + return; + if (scratch_pa == 0) { + bzero(scratch, sizeof(scratch)); + scratch_pa = pmap_find_phys(kernel_pmap, (addr64_t)scratch); + VERIFY(scratch_pa); + } else if (mclverify) { + /* + * Panic if a driver wrote to our scratch memory. + */ + for (k = 0; k < sizeof(scratch); k++) + if (scratch[k]) + panic("suspect DMA to freed address"); + } + /* + * Don't free memory too often as that could cause excessive + * waiting times for mbufs. Purge caches if we were asked to drain + * in the last 5 minutes. + */ + lck_mtx_lock(mbuf_mlock); + if (last_drain == 0) { + last_drain = net_uptime(); + lck_mtx_unlock(mbuf_mlock); + return; + } + interval = net_uptime() - last_drain; + if (interval <= mb_drain_maxint) { + lck_mtx_unlock(mbuf_mlock); + return; + } + if (interval <= mb_drain_maxint * 5) + purge_caches = TRUE; + last_drain = net_uptime(); + /* + * Don't free any memory if we're using 60% or more. + */ + for (mc = 0; mc < NELEM(mbuf_table); mc++) { + total_mem += m_total(mc) * m_maxsize(mc); + use_mem += m_active(mc) * m_maxsize(mc); + } + per = (float)use_mem / (float)total_mem; + if (per >= 0.6) { + lck_mtx_unlock(mbuf_mlock); + return; + } + /* + * Purge all the caches. This effectively disables + * caching for a few seconds, but the mbuf worker thread will + * re-enable them again. + */ + if (purge_caches == TRUE) + for (mc = 0; mc < NELEM(mbuf_table); mc++) { + if (m_total(mc) < m_avgtotal(mc)) + continue; + lck_mtx_unlock(mbuf_mlock); + ret = mcache_purge_cache(m_cache(mc), FALSE); + lck_mtx_lock(mbuf_mlock); + if (ret == TRUE) + m_purge_cnt(mc)++; + } + /* + * Move the objects from the composite class freelist to + * the rudimentary slabs list, but keep at least 10% of the average + * total in the freelist. + */ + for (mc = 0; mc < NELEM(mbuf_table); mc++) { + while (m_cobjlist(mc) && + m_total(mc) < m_avgtotal(mc) && + m_infree(mc) > 0.1 * m_avgtotal(mc) + m_minlimit(mc)) { + obj = m_cobjlist(mc); + m_cobjlist(mc) = obj->obj_next; + obj->obj_next = NULL; + num = cslab_free(mc, obj, 1); + VERIFY(num == 1); + m_free_cnt(mc)++; + m_infree(mc)--; + /* cslab_free() handles m_total */ + } + } + /* + * Free the buffers present in the slab list up to 10% of the total + * average per class. + * + * We walk the list backwards in an attempt to reduce fragmentation. + */ + for (mc = NELEM(mbuf_table) - 1; (int)mc >= 0; mc--) { + TAILQ_FOREACH_SAFE(sp, &m_slablist(mc), sl_link, sp_tmp) { + /* + * Process only unused slabs occupying memory. + */ + if (sp->sl_refcnt != 0 || sp->sl_len == 0 || + sp->sl_base == NULL) + continue; + if (m_total(mc) < m_avgtotal(mc) || + m_infree(mc) < 0.1 * m_avgtotal(mc) + m_minlimit(mc)) + break; + slab_remove(sp, mc); + switch (mc) { + case MC_MBUF: + m_infree(mc) -= NMBPBG; + m_total(mc) -= NMBPBG; + if (mclaudit != NULL) + mcl_audit_free(sp->sl_base, NMBPBG); + break; + case MC_CL: + m_infree(mc) -= NCLPBG; + m_total(mc) -= NCLPBG; + if (mclaudit != NULL) + mcl_audit_free(sp->sl_base, NMBPBG); + break; + case MC_BIGCL: + m_infree(mc)--; + m_total(mc)--; + if (mclaudit != NULL) + mcl_audit_free(sp->sl_base, NMBPBG); + break; + case MC_16KCL: + m_infree(mc)--; + m_total(mc)--; + for (nsp = sp, k = 1; k < NSLABSP16KB; k++) { + nsp = nsp->sl_next; + VERIFY(nsp->sl_refcnt == 0 && + nsp->sl_base != NULL && + nsp->sl_len == 0); + slab_init(nsp, 0, 0, NULL, NULL, 0, 0, + 0); + nsp->sl_flags = 0; + } + if (mclaudit != NULL) + mcl_audit_free(sp->sl_base, 1); + break; + default: + /* + * The composite classes have their own + * freelist (m_cobjlist), so we only + * process rudimentary classes here. + */ + VERIFY(0); + } + m_release_cnt(mc) += m_size(mc); + released += m_size(mc); + offset = ((char *)sp->sl_base - (char *)mbutl) / NBPG; + /* + * Make sure the IOMapper points to a valid, but + * bogus, address. This should prevent further DMA + * accesses to freed memory. + */ + IOMapperInsertPage(mcl_paddr_base, offset, scratch_pa); + mcl_paddr[offset] = 0; + kmem_free(mb_map, (vm_offset_t)sp->sl_base, + sp->sl_len); + slab_init(sp, 0, 0, NULL, NULL, 0, 0, 0); + sp->sl_flags = 0; + } + } + mbstat.m_drain++; + mbstat.m_bigclusters = m_total(MC_BIGCL); + mbstat.m_clusters = m_total(MC_CL); + mbstat.m_mbufs = m_total(MC_MBUF); + mbuf_stat_sync(); + mbuf_mtypes_sync(TRUE); + lck_mtx_unlock(mbuf_mlock); +} + +static int +m_drain_force_sysctl SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int val = 0, err; + + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0 || req->newptr == USER_ADDR_NULL) + return (err); + if (val) + m_drain(); + + return (err); +} + SYSCTL_DECL(_kern_ipc); SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat, - CTLFLAG_RD | CTLFLAG_LOCKED, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, mbstat_sysctl, "S,mbstat", ""); SYSCTL_PROC(_kern_ipc, OID_AUTO, mb_stat, - CTLFLAG_RD | CTLFLAG_LOCKED, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, mb_stat_sysctl, "S,mb_stat", ""); SYSCTL_PROC(_kern_ipc, OID_AUTO, mleak_top_trace, - CTLFLAG_RD | CTLFLAG_LOCKED, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, mleak_top_trace_sysctl, "S,mb_top_trace", ""); SYSCTL_PROC(_kern_ipc, OID_AUTO, mleak_table, - CTLFLAG_RD | CTLFLAG_LOCKED, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, mleak_table_sysctl, "S,mleak_table", ""); SYSCTL_INT(_kern_ipc, OID_AUTO, mleak_sample_factor, CTLFLAG_RW | CTLFLAG_LOCKED, &mleak_table.mleak_sample_factor, 0, ""); @@ -7200,3 +7577,10 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, mb_normalized, CTLFLAG_RD | CTLFLAG_LOCKED, &mb_normalized, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, mb_watchdog, CTLFLAG_RW | CTLFLAG_LOCKED, &mb_watchdog, 0, ""); +SYSCTL_PROC(_kern_ipc, OID_AUTO, mb_drain_force, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0, + m_drain_force_sysctl, "I", + "Forces the mbuf garbage collection to run"); +SYSCTL_INT(_kern_ipc, OID_AUTO, mb_drain_maxint, + CTLFLAG_RW | CTLFLAG_LOCKED, &mb_drain_maxint, 0, + "Minimum time interval between garbage collection"); diff --git a/bsd/kern/uipc_mbuf2.c b/bsd/kern/uipc_mbuf2.c index fa97af53c..2d6f23f08 100644 --- a/bsd/kern/uipc_mbuf2.c +++ b/bsd/kern/uipc_mbuf2.c @@ -666,6 +666,7 @@ m_tag_init(struct mbuf *m, int all) if (all) { bzero(&m->m_pkthdr.pf_mtag, sizeof (m->m_pkthdr.pf_mtag)); bzero(&m->m_pkthdr.proto_mtag, sizeof (m->m_pkthdr.proto_mtag)); + bzero(&m->m_pkthdr.necp_mtag, sizeof (m->m_pkthdr.necp_mtag)); } } diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index f7f3d5202..6b57b3cf8 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2013 Apple Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,7 @@ #include #include #include +#include #include #include #include @@ -112,6 +114,7 @@ #include #include #include +#include #if CONFIG_MACF #include @@ -120,6 +123,7 @@ #if MULTIPATH #include +#include #endif /* MULTIPATH */ /* TODO: this should be in a header file somewhere */ @@ -170,8 +174,14 @@ static struct filterops sock_filtops = { .f_event = filt_sockev, }; +SYSCTL_DECL(_kern_ipc); + #define EVEN_MORE_LOCKING_DEBUG 0 + int socket_debug = 0; +SYSCTL_INT(_kern_ipc, OID_AUTO, socket_debug, + CTLFLAG_RW | CTLFLAG_LOCKED, &socket_debug, 0, ""); + static int socket_zone = M_SOCKET; so_gen_t so_gencnt; /* generation count for sockets */ @@ -183,13 +193,13 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1) #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3) #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1) +#define DBG_FNC_SOSEND_LIST NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 3) #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8)) +#define DBG_FNC_SORECEIVE_LIST NETDBG_CODE(DBG_NETSOCK, (8 << 8) | 3) #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8)) #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES) -SYSCTL_DECL(_kern_ipc); - int somaxconn = SOMAXCONN; SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, ""); @@ -226,6 +236,10 @@ int sosendjcl_ignore_capab = 0; SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl_ignore_capab, 0, ""); +int sosendbigcl_ignore_capab = 0; +SYSCTL_INT(_kern_ipc, OID_AUTO, sosendbigcl_ignore_capab, + CTLFLAG_RW | CTLFLAG_LOCKED, &sosendbigcl_ignore_capab, 0, ""); + int sodefunctlog = 0; SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED, &sodefunctlog, 0, ""); @@ -238,18 +252,10 @@ int sorestrictrecv = 1; SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictrecv, CTLFLAG_RW | CTLFLAG_LOCKED, &sorestrictrecv, 0, "Enable inbound interface restrictions"); -/* - * Socket operation routines. - * These routines are called by the routines in - * sys_socket.c or from a system process, and - * implement the semantics of socket operations by - * switching out to the protocol specific routines. - */ +int sorestrictsend = 1; +SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictsend, CTLFLAG_RW | CTLFLAG_LOCKED, + &sorestrictsend, 0, "Enable outbound interface restrictions"); -/* sys_generic.c */ -extern void postevent(struct socket *, struct sockbuf *, int); -extern void evsofree(struct socket *); -extern int tcp_notsent_lowat_check(struct socket *so); extern struct inpcbinfo tcbinfo; /* TODO: these should be in header file */ @@ -280,6 +286,9 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED, void socketinit(void) { + _CASSERT(sizeof(so_gencnt) == sizeof(uint64_t)); + VERIFY(IS_P2ALIGNED(&so_gencnt, sizeof(uint32_t))); + if (socketinit_done) { printf("socketinit: already called...\n"); return; @@ -443,6 +452,7 @@ so_update_last_owner_locked(struct socket *so, proc_t self) proc_getexecutableuuid(self, so->last_uuid, sizeof (so->last_uuid)); } + proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid)); } } @@ -453,6 +463,15 @@ so_update_policy(struct socket *so) (void) inp_update_policy(sotoinpcb(so)); } +#if NECP +static void +so_update_necp_policy(struct socket *so, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr) +{ + if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) + inp_update_necp_policy(sotoinpcb(so), override_local_addr, override_remote_addr, 0); +} +#endif /* NECP */ + boolean_t so_cache_timer(void) { @@ -511,7 +530,7 @@ soalloc(int waitok, int dom, int type) bzero(so, sizeof (*so)); } if (so != NULL) { - so->so_gencnt = ++so_gencnt; + so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt); so->so_zone = socket_zone; #if CONFIG_MACF_SOCKET /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */ @@ -580,6 +599,7 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, so->last_upid = proc_uniqueid(p); so->last_pid = proc_pid(p); proc_getexecutableuuid(p, so->last_uuid, sizeof (so->last_uuid)); + proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid)); if (ep != PROC_NULL && ep != p) { so->e_upid = proc_uniqueid(ep); @@ -656,6 +676,17 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, break; } + /* + * Entitlements can't be checked at socket creation time except if the + * application requested a feature guarded by a privilege (c.f., socket + * delegation). + * The priv(9) and the Sandboxing APIs are designed with the idea that + * a privilege check should only be triggered by a userland request. + * A privilege check at socket creation time is time consuming and + * could trigger many authorisation error messages from the security + * APIs. + */ + *aso = so; return (0); @@ -737,7 +768,11 @@ sobindlock(struct socket *so, struct sockaddr *nam, int dolock) so_update_last_owner_locked(so, p); so_update_policy(so); - + +#if NECP + so_update_necp_policy(so, nam, NULL); +#endif /* NECP */ + /* * If this is a bind request on a socket that has been marked * as inactive, reject it now before we go any further. @@ -773,6 +808,10 @@ sodealloc(struct socket *so) /* Remove any filters */ sflt_termsock(so); +#if CONTENT_FILTER + cfil_sock_detach(so); +#endif /* CONTENT_FILTER */ + /* Delete the state allocated for msg queues on a socket */ if (so->so_flags & SOF_ENABLE_MSGS) { FREE(so->so_msg_state, M_TEMP); @@ -780,7 +819,7 @@ sodealloc(struct socket *so) } VERIFY(so->so_msg_state == NULL); - so->so_gencnt = ++so_gencnt; + so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt); #if CONFIG_MACF_SOCKET mac_socket_label_destroy(so); @@ -822,7 +861,11 @@ solisten(struct socket *so, int backlog) so_update_last_owner_locked(so, p); so_update_policy(so); - + +#if NECP + so_update_necp_policy(so, NULL, NULL); +#endif /* NECP */ + if (so->so_proto == NULL) { error = EINVAL; goto out; @@ -902,7 +945,7 @@ sofreelastref(struct socket *so, int dealloc) selthreadclear(&so->so_rcv.sb_sel); so->so_rcv.sb_flags &= ~(SB_SEL|SB_UPCALL); so->so_snd.sb_flags &= ~(SB_SEL|SB_UPCALL); - so->so_event = NULL; + so->so_event = sonullevent; return; } if (head != NULL) { @@ -921,7 +964,7 @@ sofreelastref(struct socket *so, int dealloc) selthreadclear(&so->so_rcv.sb_sel); so->so_rcv.sb_flags &= ~(SB_SEL|SB_UPCALL); so->so_snd.sb_flags &= ~(SB_SEL|SB_UPCALL); - so->so_event = NULL; + so->so_event = sonullevent; socket_unlock(head, 1); return; } else { @@ -944,7 +987,7 @@ sofreelastref(struct socket *so, int dealloc) /* 3932268: disable upcall */ so->so_rcv.sb_flags &= ~SB_UPCALL; so->so_snd.sb_flags &= ~SB_UPCALL; - so->so_event = NULL; + so->so_event = sonullevent; if (dealloc) sodealloc(so); @@ -998,6 +1041,17 @@ soclose_locked(struct socket *so) if (so->so_upcallusecount) soclose_wait_locked(so); +#if CONTENT_FILTER + /* + * We have to wait until the content filters are done + */ + if ((so->so_flags & SOF_CONTENT_FILTER) != 0) { + cfil_sock_close_wait(so); + cfil_sock_is_closed(so); + cfil_sock_detach(so); + } +#endif /* CONTENT_FILTER */ + if ((so->so_options & SO_ACCEPTCONN)) { struct socket *sp, *sonext; int socklock = 0; @@ -1114,14 +1168,6 @@ drop: /* NOTREACHED */ } if (so->so_pcb != NULL && !(so->so_flags & SOF_PCBCLEARING)) { - /* - * Let NetworkStatistics know this PCB is going away - * before we detach it. - */ - if (nstat_collect && - (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) - nstat_pcb_detach(so->so_pcb); - int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); if (error == 0) error = error2; @@ -1214,6 +1260,9 @@ soacceptlock(struct socket *so, struct sockaddr **nam, int dolock) so_update_last_owner_locked(so, PROC_NULL); so_update_policy(so); +#if NECP + so_update_necp_policy(so, NULL, NULL); +#endif /* NECP */ if ((so->so_state & SS_NOFDREF) == 0) panic("soaccept: !NOFDREF"); @@ -1315,6 +1364,10 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock) so_update_last_owner_locked(so, p); so_update_policy(so); +#if NECP + so_update_necp_policy(so, NULL, nam); +#endif /* NECP */ + /* * If this is a listening socket or if this is a previously-accepted * socket that has been marked as inactive, reject the connect request. @@ -1406,6 +1459,9 @@ soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl, { int error; + so_update_last_owner_locked(so, p); + so_update_policy(so); + /* * If this is a listening socket or if this is a previously-accepted * socket that has been marked as inactive, reject the connect request. @@ -1585,9 +1641,21 @@ defunct: return (error); } - if (so->so_state & SS_CANTSENDMORE) - return (EPIPE); - + if (so->so_state & SS_CANTSENDMORE) { +#if CONTENT_FILTER + /* + * Can re-inject data of half closed connections + */ + if ((so->so_state & SS_ISDISCONNECTED) == 0 && + so->so_snd.sb_cfil_thread == current_thread() && + cfil_sock_data_pending(&so->so_snd) != 0) + CFIL_LOG(LOG_INFO, + "so %llx ignore SS_CANTSENDMORE", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + else +#endif /* CONTENT_FILTER */ + return (EPIPE); + } if (so->so_error) { error = so->so_error; so->so_error = 0; @@ -1596,9 +1664,19 @@ defunct: if ((so->so_state & SS_ISCONNECTED) == 0) { if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) { - if ((so->so_state & SS_ISCONFIRMING) == 0 && - !(resid == 0 && clen != 0)) + if (((so->so_state & SS_ISCONFIRMING) == 0) && + (resid != 0 || clen == 0)) { +#if MPTCP + /* + * MPTCP Fast Join sends data before the + * socket is truly connected. + */ + if ((so->so_flags & (SOF_MP_SUBFLOW | + SOF_MPTCP_FASTJOIN)) != + (SOF_MP_SUBFLOW | SOF_MPTCP_FASTJOIN)) +#endif /* MPTCP */ return (ENOTCONN); + } } else if (addr == 0 && !(flags&MSG_HOLD)) { return ((so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ); @@ -1717,9 +1795,19 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat); socket_lock(so, 1); + + /* + * Re-injection should not affect process accounting + */ + if ((flags & MSG_SKIPCFIL) == 0) { so_update_last_owner_locked(so, p); so_update_policy(so); - + +#if NECP + so_update_necp_policy(so, NULL, addr); +#endif /* NECP */ + } + if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) { error = EOPNOTSUPP; socket_unlock(so, 1); @@ -1736,10 +1824,10 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets. * But it will be used by sockets doing message delivery. * - * Note: We limit resid to be a positive 32 bits value as we use + * Note: We limit resid to be a positive int value as we use * imin() to set bytes_to_copy -- radr://14558484 */ - if ((int32_t)resid < 0 || (so->so_type == SOCK_STREAM && + if (resid < 0 || resid > INT_MAX || (so->so_type == SOCK_STREAM && !(so->so_flags & SOF_ENABLE_MSGS) && (flags & MSG_EOR))) { error = EINVAL; socket_unlock(so, 1); @@ -1779,6 +1867,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, int chainlength; int bytes_to_copy; boolean_t jumbocl; + boolean_t bigcl; bytes_to_copy = imin(resid, space); @@ -1787,6 +1876,14 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, else chainlength = sosendmaxchain; + /* + * Use big 4 KB cluster only when outgoing + * interface does not want 2 LB clusters + */ + bigcl = + !(so->so_flags1 & SOF1_IF_2KCL) || + sosendbigcl_ignore_capab; + /* * Attempt to use larger than system page-size * clusters for large writes only if there is @@ -1795,7 +1892,8 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, */ jumbocl = sosendjcl && njcl > 0 && ((so->so_flags & SOF_MULTIPAGES) || - sosendjcl_ignore_capab); + sosendjcl_ignore_capab) && + bigcl; socket_unlock(so, 0); @@ -1841,7 +1939,8 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, } if (freelist == NULL && - bytes_to_copy > MCLBYTES) { + bytes_to_copy > MCLBYTES && + bigcl) { num_needed = bytes_to_copy / MBIGCLBYTES; @@ -1987,27 +2086,41 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME */ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; - - /* - * Socket filter processing - */ - error = sflt_data_out(so, addr, &top, - &control, (sendflags & MSG_OOB) ? - sock_data_filt_flag_oob : 0); - if (error) { - if (error == EJUSTRETURN) { - error = 0; - clen = 0; - control = NULL; - top = NULL; + + if ((flags & MSG_SKIPCFIL) == 0) { + /* + * Socket filter processing + */ + error = sflt_data_out(so, addr, &top, + &control, (sendflags & MSG_OOB) ? + sock_data_filt_flag_oob : 0); + if (error) { + if (error == EJUSTRETURN) { + error = 0; + clen = 0; + control = NULL; + top = NULL; + } + goto release; } - - goto release; +#if CONTENT_FILTER + /* + * Content filter processing + */ + error = cfil_sock_data_out(so, addr, top, + control, (sendflags & MSG_OOB) ? + sock_data_filt_flag_oob : 0); + if (error) { + if (error == EJUSTRETURN) { + error = 0; + clen = 0; + control = NULL; + top = NULL; + } + goto release; + } +#endif /* CONTENT_FILTER */ } - /* - * End Socket filter processing - */ - if (so->so_flags & SOF_ENABLE_MSGS) { /* * Make a copy of control mbuf, @@ -2056,6 +2169,288 @@ out: return (error); } +int +sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray, + u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags) +{ + struct mbuf *m, *freelist = NULL; + user_ssize_t len, resid; + int clen = 0, error, dontroute, mlen; + int atomic = sosendallatonce(so) || top; + int sblocked = 0; + struct proc *p = current_proc(); + u_int uiofirst = 0; + u_int uiolast = 0; + + KERNEL_DEBUG((DBG_FNC_SOSEND_LIST | DBG_FUNC_START), so, uiocnt, + so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat); + + if (so->so_type != SOCK_DGRAM) { + error = EINVAL; + goto out; + } + if (atomic == 0) { + error = EINVAL; + goto out; + } + if (so->so_proto->pr_usrreqs->pru_send_list == NULL) { + error = EPROTONOSUPPORT; + goto out; + } + if (flags & ~(MSG_DONTWAIT | MSG_NBIO)) { + error = EINVAL; + goto out; + } + if (uioarray != NULL) + resid = uio_array_resid(uioarray, uiocnt); + else + resid = mbuf_pkt_list_len(top); + + /* + * In theory resid should be unsigned. + * However, space must be signed, as it might be less than 0 + * if we over-committed, and we must use a signed comparison + * of space and resid. On the other hand, a negative resid + * causes us to loop sending 0-length segments to the protocol. + * + * Note: We limit resid to be a positive int value as we use + * imin() to set bytes_to_copy -- radr://14558484 + */ + if (resid < 0 || resid > INT_MAX) { + error = EINVAL; + goto out; + } + /* + * Disallow functionality not currently supported + * Note: Will need to treat arrays of addresses and controls + */ + if (addr != NULL) { + printf("%s addr not supported\n", __func__); + error = EOPNOTSUPP; + goto out; + } + if (control != NULL) { + printf("%s control not supported\n", __func__); + error = EOPNOTSUPP; + goto out; + } + + socket_lock(so, 1); + so_update_last_owner_locked(so, p); + so_update_policy(so); + +#if NECP + so_update_necp_policy(so, NULL, addr); +#endif /* NECP */ + + dontroute = (flags & MSG_DONTROUTE) && + (so->so_options & SO_DONTROUTE) == 0 && + (so->so_proto->pr_flags & PR_ATOMIC); + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd); + + if (control != NULL) + clen = control->m_len; + + error = sosendcheck(so, addr, resid, clen, atomic, flags, + &sblocked, control); + if (error) + goto release; + + do { + int i; + + if (uioarray == NULL) { + /* + * Data is prepackaged in "top". + */ + resid = 0; + } else { + int num_needed = 0; + int chainlength; + size_t maxpktlen = 0; + + if (sosendminchain > 0) + chainlength = 0; + else + chainlength = sosendmaxchain; + + socket_unlock(so, 0); + + /* + * Find a set of uio that fit in a reasonable number + * of mbuf packets + */ + for (i = uiofirst; i < uiocnt; i++) { + struct uio *auio = uioarray[i]; + + len = uio_resid(auio); + + /* Do nothing for empty messages */ + if (len == 0) + continue; + + num_needed += 1; + uiolast += 1; + + if (len > maxpktlen) + maxpktlen = len; + + chainlength += len; + if (chainlength > sosendmaxchain) + break; + } + /* + * Nothing left to send + */ + if (num_needed == 0) { + socket_lock(so, 0); + break; + } + /* + * Allocate the mbuf packets at once + */ + freelist = m_allocpacket_internal( + (unsigned int *)&num_needed, + maxpktlen, NULL, M_WAIT, 1, 0); + + if (freelist == NULL) { + socket_lock(so, 0); + error = ENOMEM; + goto release; + } + /* + * Copy each uio of the set into its own mbuf packet + */ + for (i = uiofirst, m = freelist; + i < uiolast && m != NULL; + i++) { + int bytes_to_copy; + struct mbuf *n; + struct uio *auio = uioarray[i]; + + bytes_to_copy = uio_resid(auio); + + /* Do nothing for empty messages */ + if (bytes_to_copy == 0) + continue; + + for (n = m; n != NULL; n = n->m_next) { + mlen = mbuf_maxlen(n); + + len = imin(mlen, bytes_to_copy); + + /* + * Note: uiomove() decrements the iovec + * length + */ + error = uiomove(mtod(n, caddr_t), + len, auio); + if (error != 0) + break; + n->m_len = len; + m->m_pkthdr.len += len; + + VERIFY(m->m_pkthdr.len <= maxpktlen); + + bytes_to_copy -= len; + resid -= len; + } + if (m->m_pkthdr.len == 0) { + printf("%s so %llx pkt %llx len null\n", + __func__, + (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)VM_KERNEL_ADDRPERM(m)); + } + if (error != 0) + break; + m = m->m_nextpkt; + } + + socket_lock(so, 0); + + if (error) + goto release; + top = freelist; + freelist = NULL; + } + + if (dontroute) + so->so_options |= SO_DONTROUTE; + + if ((flags & MSG_SKIPCFIL) == 0) { + struct mbuf **prevnextp = NULL; + + for (i = uiofirst, m = top; + i < uiolast && m != NULL; + i++) { + struct mbuf *nextpkt = m->m_nextpkt; + + /* + * Socket filter processing + */ + error = sflt_data_out(so, addr, &m, + &control, 0); + if (error != 0 && error != EJUSTRETURN) + goto release; + +#if CONTENT_FILTER + if (error == 0) { + /* + * Content filter processing + */ + error = cfil_sock_data_out(so, addr, m, + control, 0); + if (error != 0 && error != EJUSTRETURN) + goto release; + } +#endif /* CONTENT_FILTER */ + /* + * Remove packet from the list when + * swallowed by a filter + */ + if (error == EJUSTRETURN) { + error = 0; + if (prevnextp != NULL) + *prevnextp = nextpkt; + else + top = nextpkt; + } + + m = nextpkt; + if (m != NULL) + prevnextp = &m->m_nextpkt; + } + } + if (top != NULL) + error = (*so->so_proto->pr_usrreqs->pru_send_list) + (so, 0, top, addr, control, p); + + if (dontroute) + so->so_options &= ~SO_DONTROUTE; + + clen = 0; + top = NULL; + uiofirst = uiolast; + } while (resid > 0 && error == 0); +release: + if (sblocked) + sbunlock(&so->so_snd, FALSE); /* will unlock socket */ + else + socket_unlock(so, 1); +out: + if (top != NULL) + m_freem(top); + if (control != NULL) + m_freem(control); + if (freelist != NULL) + m_freem_list(freelist); + + KERNEL_DEBUG(DBG_FNC_SOSEND_LIST | DBG_FUNC_END, so, resid, + so->so_snd.sb_cc, 0, error); + + return (error); +} + /* * Implement receive operations on a socket. * We depend on the way that records are added to the sockbuf @@ -2112,6 +2507,13 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so, uio_resid(uio), so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat); + /* + * Sanity check on the length passed by caller as we are making 'int' + * comparisons + */ + if (orig_resid < 0 || orig_resid > INT_MAX) + return (EINVAL); + socket_lock(so, 1); so_update_last_owner_locked(so, p); so_update_policy(so); @@ -2208,16 +2610,18 @@ bad: nooob: if (mp != NULL) *mp = NULL; - if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) + + if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) { (*pr->pr_usrreqs->pru_rcvd)(so, 0); + } free_list = NULL; delayed_copy_len = 0; restart: #ifdef MORE_LOCKING_DEBUG if (so->so_usecount <= 1) - printf("soreceive: sblock so=%p ref=%d on socket\n", - so, so->so_usecount); + printf("soreceive: sblock so=0x%llx ref=%d on socket\n", + (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_usecount); #endif /* * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) @@ -2284,6 +2688,17 @@ restart: goto release; } if (so->so_state & SS_CANTRCVMORE) { +#if CONTENT_FILTER + /* + * Deal with half closed connections + */ + if ((so->so_state & SS_ISDISCONNECTED) == 0 && + cfil_sock_data_pending(&so->so_rcv) != 0) + CFIL_LOG(LOG_INFO, + "so %llx ignore SS_CANTRCVMORE", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + else +#endif /* CONTENT_FILTER */ if (m != NULL) goto dontblock; else @@ -2366,6 +2781,7 @@ dontblock: SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a"); socket_unlock(so, 0); + if (mac_socket_check_received(proc_ucred(p), so, mtod(m, struct sockaddr *)) != 0) { /* @@ -2826,7 +3242,11 @@ dontblock: while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == NULL && (uio_resid(uio) - delayed_copy_len) > 0 && !sosendallatonce(so) && !nextrecord) { - if (so->so_error || so->so_state & SS_CANTRCVMORE) + if (so->so_error || ((so->so_state & SS_CANTRCVMORE) +#if CONTENT_FILTER + && cfil_sock_data_pending(&so->so_rcv) == 0 +#endif /* CONTENT_FILTER */ + )) goto release; /* @@ -3005,37 +3425,486 @@ sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, return (error); } -/* - * Returns: 0 Success - * EINVAL - * ENOTCONN - * :EINVAL - * :EADDRNOTAVAIL[TCP] - * :ENOBUFS[TCP] - * :EMSGSIZE[TCP] - * :EHOSTUNREACH[TCP] - * :ENETUNREACH[TCP] - * :ENETDOWN[TCP] - * :ENOMEM[TCP] - * :EACCES[TCP] - * :EMSGSIZE[TCP] - * :ENOBUFS[TCP] - * :???[TCP] [ignorable: mostly IPSEC/firewall/DLIL] - * :??? [other protocol families] - */ int -soshutdown(struct socket *so, int how) +soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray, + u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { - int error; + struct mbuf *m, **mp; + struct mbuf *nextrecord; + struct mbuf *ml = NULL, *free_list = NULL; + int flags, error, offset; + user_ssize_t len; + struct protosw *pr = so->so_proto; + user_ssize_t orig_resid, resid; + struct proc *p = current_proc(); + struct uio *auio = NULL; + int i = 0; + int sblocked = 0; - switch (how) { - case SHUT_RD: - case SHUT_WR: - case SHUT_RDWR: - socket_lock(so, 1); - if ((so->so_state & - (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) == 0) { - error = ENOTCONN; + KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_START, + so, uiocnt, + so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat); + + mp = mp0; + if (psa != NULL) + *psa = NULL; + if (controlp != NULL) + *controlp = NULL; + if (flagsp != NULL) + flags = *flagsp &~ MSG_EOR; + else + flags = 0; + /* + * Disallow functionality not currently supported + */ + if (mp0 != NULL) { + printf("%s mp0 not supported\n", __func__); + error = EOPNOTSUPP; + goto out; + } + if (psa != NULL) { + printf("%s sockaddr not supported\n", __func__); + error = EOPNOTSUPP; + goto out; + } + if (controlp != NULL) { + printf("%s control not supported\n", __func__); + error = EOPNOTSUPP; + goto out; + } + + /* + * Sanity checks: + * - Only supports don't wait flags + * - Only support datagram sockets (could be extended to raw) + * - Must be atomic + * - Protocol must support packet chains + * - The uio array is NULL (should we panic?) + */ + if (flags & ~(MSG_DONTWAIT | MSG_NBIO)) { + printf("%s flags not supported\n", __func__); + error = EOPNOTSUPP; + goto out; + } + if (so->so_type != SOCK_DGRAM) { + error = EINVAL; + goto out; + } + if (sosendallatonce(so) == 0) { + error = EINVAL; + goto out; + } + if (so->so_proto->pr_usrreqs->pru_send_list == NULL) { + error = EPROTONOSUPPORT; + goto out; + } + if (uioarray == NULL) { + printf("%s uioarray is NULL\n", __func__); + error = EINVAL; + goto out; + } + if (uiocnt == 0) { + printf("%s uiocnt is 0\n", __func__); + error = EINVAL; + goto out; + } + /* + * Sanity check on the length passed by caller as we are making 'int' + * comparisons + */ + resid = orig_resid = uio_array_resid(uioarray, uiocnt); + if (orig_resid < 0 || orig_resid > INT_MAX) { + error = EINVAL; + goto out; + } + + socket_lock(so, 1); + so_update_last_owner_locked(so, p); + so_update_policy(so); + +#if NECP + so_update_necp_policy(so, NULL, NULL); +#endif /* NECP */ + + /* + * If a recv attempt is made on a previously-accepted socket + * that has been marked as inactive (disconnected), reject + * the request. + */ + if (so->so_flags & SOF_DEFUNCT) { + struct sockbuf *sb = &so->so_rcv; + + error = ENOTCONN; + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", + __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); + /* + * This socket should have been disconnected and flushed + * prior to being returned from sodefunct(); there should + * be no data on its receive list, so panic otherwise. + */ + if (so->so_state & SS_DEFUNCT) + sb_empty_assert(sb, __func__); + goto release; + } + if (mp != NULL) + *mp = NULL; +restart: + /* + * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) + * and if so just return to the caller. This could happen when + * soreceive() is called by a socket upcall function during the + * time the socket is freed. The socket buffer would have been + * locked across the upcall, therefore we cannot put this thread + * to sleep (else we will deadlock) or return EWOULDBLOCK (else + * we may livelock), because the lock on the socket buffer will + * only be released when the upcall routine returns to its caller. + * Because the socket has been officially closed, there can be + * no further read on it. + */ + if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) == + (SS_NOFDREF | SS_CANTRCVMORE)) { + error = 0; + goto release; + } + + error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); + if (error) { + goto release; + } + sblocked = 1; + + /* + * Skip empty uio + */ + auio = uioarray[i]; + while (uio_resid(auio) == 0) { + i++; + if (i >= uiocnt) { + error = 0; + goto release; + } + } + + m = so->so_rcv.sb_mb; + /* + * Block awaiting more datagram if needed + */ + if (m == NULL) { + /* + * Panic if we notice inconsistencies in the socket's + * receive list; both sb_mb and sb_cc should correctly + * reflect the contents of the list, otherwise we may + * end up with false positives during select() or poll() + * which could put the application in a bad state. + */ + SB_MB_CHECK(&so->so_rcv); + + if (so->so_error) { + error = so->so_error; + goto release; + } + if (so->so_state & SS_CANTRCVMORE) { + goto release; + } + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && + (so->so_proto->pr_flags & PR_CONNREQUIRED)) { + error = ENOTCONN; + goto release; + } + if ((so->so_state & SS_NBIO) || + (flags & (MSG_DONTWAIT|MSG_NBIO))) { + error = EWOULDBLOCK; + goto release; + } + /* + * Do not block if we got some data + * Note: We could use MSG_WAITALL to wait + */ + resid = uio_array_resid(uioarray, uiocnt); + if (resid != orig_resid) { + error = 0; + goto release; + } + + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); + + sbunlock(&so->so_rcv, TRUE); /* keep socket locked */ + sblocked = 0; + + error = sbwait(&so->so_rcv); + if (error) { + goto release; + } + goto restart; + } + + if (m->m_pkthdr.len == 0) { + printf("%s so %llx pkt %llx len is null\n", + __func__, + (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)VM_KERNEL_ADDRPERM(m)); + goto restart; + } + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv); + SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); + + /* + * Consume the current uio index as we have a datagram + */ + i += 1; + nextrecord = m->m_nextpkt; + +#if SO_RECEIVE_LIST_SOCKADDR_NOT_YET + if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) { + /* + * to be adapted from soreceive() + */ + } +#endif /* SO_RECEIVE_LIST_SOCKADDR_NOT_YET */ + +#if SO_RECEIVE_LIST_CONTROL_NOT_YET + /* + * Process one or more MT_CONTROL mbufs present before any data mbufs + * in the first mbuf chain on the socket buffer. If MSG_PEEK, we + * just copy the data; if !MSG_PEEK, we call into the protocol to + * perform externalization. + */ + if (m != NULL && m->m_type == MT_CONTROL) { + /* + * to be adapted from soreceive() + */ + } +#endif /* SO_RECEIVE_LIST_CONTROL_NOT_YET */ + + offset = 0; + + /* + * Loop to copy out the mbufs of the current record + */ + while (m != NULL && uio_resid(auio) > 0 && error == 0) { + len = uio_resid(auio); + + if (m->m_len == 0) + printf("%s: so %llx m %llx m_len is 0\n", + __func__, + (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)VM_KERNEL_ADDRPERM(m)); + + /* + * Clip to the residual length + */ + if (len > m->m_len) + len = m->m_len; + /* + * If mp is set, just pass back the mbufs. + * Otherwise copy them out via the uio, then free. + * Sockbuf must be consistent here (points to current mbuf, + * it points to next record) when we drop priority; + * we must note any additions to the sockbuf when we + * block interrupts again. + */ + if (mp != NULL) { + uio_setresid(auio, (uio_resid(auio) - len)); + } else { + SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); + + socket_unlock(so, 0); + error = uiomove(mtod(m, caddr_t), (int)len, auio); + socket_lock(so, 0); + + if (error) + goto release; + } + if (len == m->m_len) { + /* + * m was entirely copied + */ + nextrecord = m->m_nextpkt; + sbfree(&so->so_rcv, m); + m->m_nextpkt = NULL; + + /* + * Move to m_next + */ + if (mp != NULL) { + *mp = m; + mp = &m->m_next; + so->so_rcv.sb_mb = m = m->m_next; + *mp = NULL; + } else { + if (free_list == NULL) + free_list = m; + else + ml->m_next = m; + ml = m; + so->so_rcv.sb_mb = m = m->m_next; + ml->m_next = NULL; + ml->m_nextpkt = NULL; + } + if (m != NULL) { + m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_lastrecord = m; + } else { + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); + } else { + /* + * Stop the loop on partial copy + */ + if (mp != NULL) { + int copy_flag; + + if (flags & MSG_DONTWAIT) + copy_flag = M_DONTWAIT; + else + copy_flag = M_WAIT; + *mp = m_copym(m, 0, len, copy_flag); + /* + * Failed to allocate an mbuf? + * Adjust uio_resid back, it was + * adjusted down by len bytes which + * we didn't copy over. + */ + if (*mp == NULL) { + uio_setresid(auio, + (uio_resid(auio) + len)); + error = ENOMEM; + break; + } + } + break; + } + } +#ifdef MORE_LOCKING_DEBUG + if (so->so_usecount <= 1) { + panic("%s: after big while so=%llx ref=%d on socket\n", + __func__, + (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_usecount); + /* NOTREACHED */ + } +#endif + /* + * Tell the caller we made a partial copy + */ + if (m != NULL) { + if (so->so_options & SO_DONTTRUNC) { + m->m_data += len; + m->m_len -= len; + so->so_rcv.sb_cc -= len; + flags |= MSG_RCVMORE; + } else { + (void) sbdroprecord(&so->so_rcv); + nextrecord = so->so_rcv.sb_mb; + m = NULL; + flags |= MSG_TRUNC; + } + } + + if (m == NULL) { + so->so_rcv.sb_mb = nextrecord; + /* + * First part is an inline SB_EMPTY_FIXUP(). Second + * part makes sure sb_lastrecord is up-to-date if + * there is still data in the socket buffer. + */ + if (so->so_rcv.sb_mb == NULL) { + so->so_rcv.sb_mbtail = NULL; + so->so_rcv.sb_lastrecord = NULL; + } else if (nextrecord->m_nextpkt == NULL) { + so->so_rcv.sb_lastrecord = nextrecord; + } + SB_MB_CHECK(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); + + /* + * We can continue to the next packet as long as: + * - We haven't exhausted the uio array + * - There was no error + * - A packet was not truncated + * - We can still receive more data + */ + if (i < uiocnt && error == 0 && + (flags & (MSG_RCVMORE | MSG_TRUNC)) == 0 + && (so->so_state & SS_CANTRCVMORE) == 0) { + sbunlock(&so->so_rcv, TRUE); /* keep socket locked */ + sblocked = 0; + + goto restart; + } + +release: + /* + * pru_rcvd may cause more data to be received if the socket lock + * is dropped so we set MSG_HAVEMORE now based on what we know. + * That way the caller won't be surprised if it receives less data than requested. + */ + if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) + flags |= MSG_HAVEMORE; + + if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) + (*pr->pr_usrreqs->pru_rcvd)(so, flags); + + if (flagsp != NULL) + *flagsp |= flags; + if (sblocked) + sbunlock(&so->so_rcv, FALSE); /* will unlock socket */ + else + socket_unlock(so, 1); +out: + /* + * Amortize the cost + */ + if (free_list != NULL) + m_freem_list(free_list); + + KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_END, error, + 0, 0, 0, 0); + return (error); +} + +/* + * Returns: 0 Success + * EINVAL + * ENOTCONN + * :EINVAL + * :EADDRNOTAVAIL[TCP] + * :ENOBUFS[TCP] + * :EMSGSIZE[TCP] + * :EHOSTUNREACH[TCP] + * :ENETUNREACH[TCP] + * :ENETDOWN[TCP] + * :ENOMEM[TCP] + * :EACCES[TCP] + * :EMSGSIZE[TCP] + * :ENOBUFS[TCP] + * :???[TCP] [ignorable: mostly IPSEC/firewall/DLIL] + * :??? [other protocol families] + */ +int +soshutdown(struct socket *so, int how) +{ + int error; + + KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, how, 0, 0, 0, 0); + + switch (how) { + case SHUT_RD: + case SHUT_WR: + case SHUT_RDWR: + socket_lock(so, 1); + if ((so->so_state & + (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) == 0) { + error = ENOTCONN; } else { error = soshutdownlock(so, how); } @@ -3046,11 +3915,13 @@ soshutdown(struct socket *so, int how) break; } + KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, how, error, 0, 0, 0); + return (error); } int -soshutdownlock(struct socket *so, int how) +soshutdownlock_final(struct socket *so, int how) { struct protosw *pr = so->so_proto; int error = 0; @@ -3076,7 +3947,34 @@ soshutdownlock(struct socket *so, int how) postevent(so, 0, EV_WCLOSED); } done: - KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0, 0, 0, 0, 0); + KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN, how, 1, 0, 0, 0); + return (error); +} + +int +soshutdownlock(struct socket *so, int how) +{ + int error = 0; + +#if CONTENT_FILTER + /* + * A content filter may delay the actual shutdown until it + * has processed the pending data + */ + if (so->so_flags & SOF_CONTENT_FILTER) { + error = cfil_sock_shutdown(so, &how); + if (error == EJUSTRETURN) { + error = 0; + goto done; + } else if (error != 0) { + goto done; + } + } +#endif /* CONTENT_FILTER */ + + error = soshutdownlock_final(so, how); + +done: return (error); } @@ -3416,6 +4314,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) case SO_DONTTRUNC: case SO_WANTMORE: case SO_WANTOOBFLAG: + case SO_NOWAKEFROMSLEEP: error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); if (error != 0) @@ -3463,17 +4362,39 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) * Make sure the low-water is never greater than * the high-water. */ - case SO_SNDLOWAT: + case SO_SNDLOWAT: { + int space = sbspace(&so->so_snd); + u_int32_t hiwat = so->so_snd.sb_hiwat; + + if (so->so_snd.sb_flags & SB_UNIX) { + struct unpcb *unp = + (struct unpcb *)(so->so_pcb); + if (unp != NULL && unp->unp_conn != NULL) { + hiwat += unp->unp_conn->unp_cc; + } + } + so->so_snd.sb_lowat = - (optval > so->so_snd.sb_hiwat) ? - so->so_snd.sb_hiwat : optval; + (optval > hiwat) ? + hiwat : optval; + + if (space >= so->so_snd.sb_lowat) { + sowwakeup(so); + } break; - case SO_RCVLOWAT: + } + case SO_RCVLOWAT: { + int64_t data_len; so->so_rcv.sb_lowat = (optval > so->so_rcv.sb_hiwat) ? so->so_rcv.sb_hiwat : optval; + data_len = so->so_rcv.sb_cc + - so->so_rcv.sb_ctl; + if (data_len >= so->so_rcv.sb_lowat) + sorwakeup(so); break; } + } break; case SO_SNDTIMEO: @@ -3561,6 +4482,39 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) error = so_set_restrictions(so, optval); break; + case SO_AWDL_UNRESTRICTED: + if (SOCK_DOM(so) != PF_INET && + SOCK_DOM(so) != PF_INET6) { + error = EOPNOTSUPP; + goto out; + } + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error != 0) + goto out; + if (optval != 0) { + kauth_cred_t cred = NULL; + proc_t ep = PROC_NULL; + + if (so->so_flags & SOF_DELEGATED) { + ep = proc_find(so->e_pid); + if (ep) + cred = kauth_cred_proc_ref(ep); + } + error = priv_check_cred( + cred ? cred : so->so_cred, + PRIV_NET_RESTRICTED_AWDL, 0); + if (error == 0) + inp_set_awdl_unrestricted( + sotoinpcb(so)); + if (cred) + kauth_cred_unref(&cred); + if (ep != PROC_NULL) + proc_rele(ep); + } else + inp_clear_awdl_unrestricted(sotoinpcb(so)); + break; + case SO_LABEL: #if CONFIG_MACF_SOCKET if ((error = sooptcopyin(sopt, &extmac, sizeof (extmac), @@ -3782,6 +4736,32 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) error = so_set_effective_uuid(so, euuid, sopt->sopt_p); break; } + +#if NECP + case SO_NECP_ATTRIBUTES: + error = necp_set_socket_attributes(so, sopt); + break; +#endif /* NECP */ + +#if MPTCP + case SO_MPTCP_FASTJOIN: + if (!((so->so_flags & SOF_MP_SUBFLOW) || + ((SOCK_CHECK_DOM(so, PF_MULTIPATH)) && + (SOCK_CHECK_PROTO(so, IPPROTO_TCP))))) { + error = ENOPROTOOPT; + break; + } + + error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval)); + if (error != 0) + goto out; + if (optval == 0) + so->so_flags &= ~SOF_MPTCP_FASTJOIN; + else + so->so_flags |= SOF_MPTCP_FASTJOIN; + break; +#endif /* MPTCP */ default: error = ENOPROTOOPT; @@ -3932,6 +4912,7 @@ sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock) case SO_DONTTRUNC: case SO_WANTMORE: case SO_WANTOOBFLAG: + case SO_NOWAKEFROMSLEEP: optval = so->so_options & sopt->sopt_name; integer: error = sooptcopyout(sopt, &optval, sizeof (optval)); @@ -3961,6 +4942,26 @@ integer: } goto integer; + case SO_NUMRCVPKT: + if (so->so_proto->pr_flags & PR_ATOMIC) { + int cnt = 0; + struct mbuf *m1; + + m1 = so->so_rcv.sb_mb; + while (m1 != NULL) { + if (m1->m_type == MT_DATA || + m1->m_type == MT_HEADER || + m1->m_type == MT_OOBDATA) + cnt += 1; + m1 = m1->m_nextpkt; + } + optval = cnt; + goto integer; + } else { + error = EINVAL; + break; + } + case SO_NWRITE: optval = so->so_snd.sb_cc; goto integer; @@ -3970,10 +4971,20 @@ integer: so->so_error = 0; goto integer; - case SO_SNDBUF: - optval = so->so_snd.sb_hiwat; - goto integer; + case SO_SNDBUF: { + u_int32_t hiwat = so->so_snd.sb_hiwat; + if (so->so_snd.sb_flags & SB_UNIX) { + struct unpcb *unp = + (struct unpcb *)(so->so_pcb); + if (unp != NULL && unp->unp_conn != NULL) { + hiwat += unp->unp_conn->unp_cc; + } + } + + optval = hiwat; + goto integer; + } case SO_RCVBUF: optval = so->so_rcv.sb_hiwat; goto integer; @@ -4015,6 +5026,16 @@ integer: optval = so_get_restrictions(so); goto integer; + case SO_AWDL_UNRESTRICTED: + if (SOCK_DOM(so) == PF_INET || + SOCK_DOM(so) == PF_INET6) { + optval = inp_get_awdl_unrestricted( + sotoinpcb(so)); + goto integer; + } else + error = EOPNOTSUPP; + break; + case SO_LABEL: #if CONFIG_MACF_SOCKET if ((error = sooptcopyin(sopt, &extmac, sizeof (extmac), @@ -4119,6 +5140,36 @@ integer: error = flow_divert_token_get(so, sopt); break; #endif /* FLOW_DIVERT */ + +#if NECP + case SO_NECP_ATTRIBUTES: + error = necp_get_socket_attributes(so, sopt); + break; +#endif /* NECP */ + +#if CONTENT_FILTER + case SO_CFIL_SOCK_ID: { + cfil_sock_id_t sock_id; + + sock_id = cfil_sock_id_from_socket(so); + + error = sooptcopyout(sopt, &sock_id, + sizeof(cfil_sock_id_t)); + break; + } +#endif /* CONTENT_FILTER */ + +#if MPTCP + case SO_MPTCP_FASTJOIN: + if (!((so->so_flags & SOF_MP_SUBFLOW) || + ((SOCK_CHECK_DOM(so, PF_MULTIPATH)) && + (SOCK_CHECK_PROTO(so, IPPROTO_TCP))))) { + error = ENOPROTOOPT; + break; + } + optval = (so->so_flags & SOF_MPTCP_FASTJOIN); + break; +#endif /* MPTCP */ default: error = ENOPROTOOPT; @@ -4429,7 +5480,11 @@ filt_soread(struct knote *kn, long hint) kn->kn_data = so->so_oobmark; kn->kn_flags |= EV_OOBAND; } else { - if (so->so_state & SS_CANTRCVMORE) { + if ((so->so_state & SS_CANTRCVMORE) +#if CONTENT_FILTER + && cfil_sock_data_pending(&so->so_rcv) == 0 +#endif /* CONTENT_FILTER */ + ) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; if ((hint & SO_FILT_HINT_LOCKED) == 0) @@ -4530,8 +5585,21 @@ filt_sowrite(struct knote *kn, long hint) lowwat = kn->kn_sdata; } if (kn->kn_data >= lowwat) { - if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) { - ret = tcp_notsent_lowat_check(so); + if (so->so_flags & SOF_NOTSENT_LOWAT) { + if ((SOCK_DOM(so) == PF_INET + || SOCK_DOM(so) == PF_INET6) + && so->so_type == SOCK_STREAM) { + ret = tcp_notsent_lowat_check(so); + } +#if MPTCP + else if ((SOCK_DOM(so) == PF_MULTIPATH) && + (SOCK_PROTO(so) == IPPROTO_TCP)) { + ret = mptcp_notsent_lowat_check(so); + } +#endif + else { + return (1); + } } else { ret = 1; } @@ -4612,7 +5680,11 @@ filt_sockev(struct knote *kn, long hint) } if ((kn->kn_sfflags & NOTE_READCLOSED) && - (so->so_state & SS_CANTRCVMORE)) + (so->so_state & SS_CANTRCVMORE) +#if CONTENT_FILTER + && cfil_sock_data_pending(&so->so_rcv) == 0 +#endif /* CONTENT_FILTER */ + ) kn->kn_fflags |= NOTE_READCLOSED; if ((kn->kn_sfflags & NOTE_WRITECLOSED) && @@ -4794,6 +5866,15 @@ somultipages(struct socket *so, boolean_t set) so->so_flags &= ~SOF_MULTIPAGES; } +void +soif2kcl(struct socket *so, boolean_t set) +{ + if (set) + so->so_flags1 |= SOF1_IF_2KCL; + else + so->so_flags1 &= ~SOF1_IF_2KCL; +} + int so_isdstlocal(struct socket *so) { @@ -4919,6 +6000,7 @@ sodefunct(struct proc *p, struct socket *so, int level) sbwakeup(rcv); sbwakeup(snd); + so->so_flags1 |= SOF1_DEFUNCTINPROG; if (rcv->sb_flags & SB_LOCK) sbunlock(rcv, TRUE); /* keep socket locked */ if (snd->sb_flags & SB_LOCK) @@ -4930,8 +6012,8 @@ sodefunct(struct proc *p, struct socket *so, int level) * states are set for the socket. This would also flush out data * hanging off the receive list of this socket. */ - (void) soshutdownlock(so, SHUT_RD); - (void) soshutdownlock(so, SHUT_WR); + (void) soshutdownlock_final(so, SHUT_RD); + (void) soshutdownlock_final(so, SHUT_WR); (void) sodisconnectlocked(so); /* @@ -4999,7 +6081,7 @@ int so_set_restrictions(struct socket *so, uint32_t vals) { int nocell_old, nocell_new; - int ret = 0; + int noexpensive_old, noexpensive_new; /* * Deny-type restrictions are trapdoors; once set they cannot be @@ -5015,34 +6097,40 @@ so_set_restrictions(struct socket *so, uint32_t vals) * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued. */ nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR); + noexpensive_old = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE); so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN | - SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR)); + SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR | + SO_RESTRICT_DENY_EXPENSIVE)); nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR); - - /* other than deny cellular, there's nothing more to do */ - if ((nocell_new - nocell_old) == 0) - return (ret); + noexpensive_new = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE); /* we can only set, not clear restrictions */ - VERIFY((nocell_new - nocell_old) > 0); - + if ((nocell_new - nocell_old) == 0 && + (noexpensive_new - noexpensive_old) == 0) + return (0); #if INET6 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { #else if (SOCK_DOM(so) == PF_INET) { #endif /* !INET6 */ - /* if deny cellular is now set, do what's needed for INPCB */ - inp_set_nocellular(sotoinpcb(so)); + if (nocell_new - nocell_old != 0) { + /* if deny cellular is now set, do what's needed for INPCB */ + inp_set_nocellular(sotoinpcb(so)); + } + if (noexpensive_new - noexpensive_old != 0) { + inp_set_noexpensive(sotoinpcb(so)); + } } - return (ret); + return (0); } uint32_t so_get_restrictions(struct socket *so) { return (so->so_restrictions & (SO_RESTRICT_DENY_IN | - SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR)); + SO_RESTRICT_DENY_OUT | + SO_RESTRICT_DENY_CELLULAR | SO_RESTRICT_DENY_EXPENSIVE)); } struct sockaddr_entry * @@ -5227,7 +6315,6 @@ so_set_effective_pid(struct socket *so, int epid, struct proc *p) so->e_pid = proc_pid(ep); proc_getexecutableuuid(ep, so->e_uuid, sizeof (so->e_uuid)); } - done: if (error == 0 && net_io_policy_log) { uuid_string_t buf; @@ -5246,6 +6333,15 @@ done: proc_name_address(ep), error); } + /* Update this socket's policy upon success */ + if (error == 0) { + so->so_policy_gencnt *= -1; + so_update_policy(so); +#if NECP + so_update_necp_policy(so, NULL, NULL); +#endif /* NECP */ + } + if (ep != PROC_NULL) proc_rele(ep); @@ -5337,6 +6433,15 @@ done: SOCK_TYPE(so), buf, error); } + /* Update this socket's policy upon success */ + if (error == 0) { + so->so_policy_gencnt *= -1; + so_update_policy(so); +#if NECP + so_update_necp_policy(so, NULL, NULL); +#endif /* NECP */ + } + return (error); } @@ -5364,3 +6469,50 @@ netpolicy_post_msg(uint32_t ev_code, struct netpolicy_event_data *ev_data, kev_post_msg(&ev_msg); } + +void +socket_post_kev_msg(uint32_t ev_code, + struct kev_socket_event_data *ev_data, + uint32_t ev_datalen) +{ + struct kev_msg ev_msg; + + bzero(&ev_msg, sizeof(ev_msg)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_SOCKET_SUBCLASS; + ev_msg.event_code = ev_code; + + ev_msg.dv[0].data_ptr = ev_data; + ev_msg.dv[0]. data_length = ev_datalen; + + kev_post_msg(&ev_msg); +} + +void +socket_post_kev_msg_closed(struct socket *so) +{ + struct kev_socket_closed ev; + struct sockaddr *socksa = NULL, *peersa = NULL; + int err; + bzero(&ev, sizeof(ev)); + err = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &socksa); + if (err == 0) { + err = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, + &peersa); + if (err == 0) { + memcpy(&ev.ev_data.kev_sockname, socksa, + min(socksa->sa_len, + sizeof (ev.ev_data.kev_sockname))); + memcpy(&ev.ev_data.kev_peername, peersa, + min(peersa->sa_len, + sizeof (ev.ev_data.kev_peername))); + socket_post_kev_msg(KEV_SOCKET_CLOSED, + &ev.ev_data, sizeof (ev)); + } + } + if (socksa != NULL) + FREE(socksa, M_SONAME); + if (peersa != NULL) + FREE(peersa, M_SONAME); +} diff --git a/bsd/kern/uipc_socket2.c b/bsd/kern/uipc_socket2.c index dae44827b..5cbf06334 100644 --- a/bsd/kern/uipc_socket2.c +++ b/bsd/kern/uipc_socket2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2013 Apple Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include @@ -97,8 +98,9 @@ #include -/* TODO: this should be in a header file somewhere */ -extern void postevent(struct socket *, struct sockbuf *, int); +#if MPTCP +#include +#endif #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4) #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5) @@ -127,7 +129,9 @@ u_int32_t sb_max = SB_MAX; /* XXX should be static */ u_int32_t high_sb_max = SB_MAX; static u_int32_t sb_efficiency = 8; /* parameter for sbreserve() */ -__private_extern__ int32_t total_sbmb_cnt = 0; +int32_t total_sbmb_cnt __attribute__((aligned(8))) = 0; +int32_t total_sbmb_cnt_peak __attribute__((aligned(8))) = 0; +int64_t sbmb_limreached __attribute__((aligned(8))) = 0; /* Control whether to throttle sockets eligible to be throttled */ __private_extern__ u_int32_t net_io_policy_throttled = 0; @@ -237,6 +241,11 @@ soisdisconnected(struct socket *so) wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); + +#if CONTENT_FILTER + /* Notify content filters as soon as we cannot send/receive data */ + cfil_sock_notify_shutdown(so, SHUT_RDWR); +#endif /* CONTENT_FILTER */ } /* @@ -254,6 +263,11 @@ sodisconnectwakeup(struct socket *so) wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); + +#if CONTENT_FILTER + /* Notify content filters as soon as we cannot send/receive data */ + cfil_sock_notify_shutdown(so, SHUT_RDWR); +#endif /* CONTENT_FILTER */ } /* @@ -576,6 +590,20 @@ sowakeup(struct socket *so, struct sockbuf *sb) so->so_upcallusecount == 0) wakeup((caddr_t)&so->so_upcallusecount); } +#if CONTENT_FILTER + /* + * Trap disconnection events for content filters + */ + if ((so->so_flags & SOF_CONTENT_FILTER) != 0) { + if ((sb->sb_flags & SB_RECV)) { + if (so->so_state & (SS_CANTRCVMORE)) + cfil_sock_notify_shutdown(so, SHUT_RD); + } else { + if (so->so_state & (SS_CANTSENDMORE)) + cfil_sock_notify_shutdown(so, SHUT_WR); + } + } +#endif /* CONTENT_FILTER */ } /* @@ -719,14 +747,22 @@ sbappend(struct sockbuf *sb, struct mbuf *m) if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR)) return (sbappendrecord(sb, m)); - if (sb->sb_flags & SB_RECV) { + if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) { int error = sflt_data_in(so, NULL, &m, NULL, 0); SBLASTRECORDCHK(sb, "sbappend 2"); + +#if CONTENT_FILTER + if (error == 0) + error = cfil_sock_data_in(so, NULL, m, NULL, 0); +#endif /* CONTENT_FILTER */ + if (error != 0) { if (error != EJUSTRETURN) m_freem(m); return (0); } + } else if (m) { + m->m_flags &= ~M_SKIPCFIL; } /* If this is the first record, it's also the last record */ @@ -760,14 +796,22 @@ sbappendstream(struct sockbuf *sb, struct mbuf *m) SBLASTMBUFCHK(sb, __func__); - if (sb->sb_flags & SB_RECV) { + if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) { int error = sflt_data_in(so, NULL, &m, NULL, 0); SBLASTRECORDCHK(sb, "sbappendstream 1"); + +#if CONTENT_FILTER + if (error == 0) + error = cfil_sock_data_in(so, NULL, m, NULL, 0); +#endif /* CONTENT_FILTER */ + if (error != 0) { if (error != EJUSTRETURN) m_freem(m); return (0); } + } else if (m) { + m->m_flags &= ~M_SKIPCFIL; } sbcompress(sb, m, sb->sb_mbtail); @@ -821,11 +865,14 @@ sblastrecordchk(struct sockbuf *sb, const char *where) m = m->m_nextpkt; if (m != sb->sb_lastrecord) { - printf("sblastrecordchk: mb %p lastrecord %p last %p\n", - sb->sb_mb, sb->sb_lastrecord, m); + printf("sblastrecordchk: mb 0x%llx lastrecord 0x%llx " + "last 0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb), + (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_lastrecord), + (uint64_t)VM_KERNEL_ADDRPERM(m)); printf("packet chain:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) - printf("\t%p\n", m); + printf("\t0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(m)); panic("sblastrecordchk from %s", where); } } @@ -843,13 +890,15 @@ sblastmbufchk(struct sockbuf *sb, const char *where) m = m->m_next; if (m != sb->sb_mbtail) { - printf("sblastmbufchk: mb %p mbtail %p last %p\n", - sb->sb_mb, sb->sb_mbtail, m); + printf("sblastmbufchk: mb 0x%llx mbtail 0x%llx last 0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb), + (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mbtail), + (uint64_t)VM_KERNEL_ADDRPERM(m)); printf("packet tree:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { printf("\t"); for (n = m; n != NULL; n = n->m_next) - printf("%p ", n); + printf("0x%llx ", (uint64_t)VM_KERNEL_ADDRPERM(n)); printf("\n"); } panic("sblastmbufchk from %s", where); @@ -879,15 +928,23 @@ sbappendrecord(struct sockbuf *sb, struct mbuf *m0) return (0); } - if (sb->sb_flags & SB_RECV) { + if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) { int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record); + +#if CONTENT_FILTER + if (error == 0) + error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0); +#endif /* CONTENT_FILTER */ + if (error != 0) { SBLASTRECORDCHK(sb, "sbappendrecord 1"); if (error != EJUSTRETURN) m_freem(m0); return (0); } + } else if (m0) { + m0->m_flags &= ~M_SKIPCFIL; } /* @@ -930,17 +987,25 @@ sbinsertoob(struct sockbuf *sb, struct mbuf *m0) SBLASTRECORDCHK(sb, "sbinsertoob 1"); - if ((sb->sb_flags & SB_RECV) != 0) { + if ((sb->sb_flags & SB_RECV && !(m0->m_flags & M_SKIPCFIL)) != 0) { int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_oob); SBLASTRECORDCHK(sb, "sbinsertoob 2"); + +#if CONTENT_FILTER + if (error == 0) + error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0); +#endif /* CONTENT_FILTER */ + if (error) { if (error != EJUSTRETURN) { m_freem(m0); } return (0); } + } else if (m0) { + m0->m_flags &= ~M_SKIPCFIL; } for (mp = &sb->sb_mb; *mp; mp = &((*mp)->m_nextpkt)) { @@ -1075,10 +1140,16 @@ sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, } /* Call socket data in filters */ - if ((sb->sb_flags & SB_RECV) != 0) { + if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) { int error; error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0); SBLASTRECORDCHK(sb, __func__); + +#if CONTENT_FILTER + if (error == 0) + error = cfil_sock_data_in(sb->sb_so, asa, m0, control, 0); +#endif /* CONTENT_FILTER */ + if (error) { if (error != EJUSTRETURN) { if (m0) @@ -1090,6 +1161,8 @@ sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, } return (0); } + } else if (m0) { + m0->m_flags &= ~M_SKIPCFIL; } result = sbappendaddr_internal(sb, asa, m0, control); @@ -1168,11 +1241,17 @@ sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, return (0); } - if (sb->sb_flags & SB_RECV) { + if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) { int error; error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0); SBLASTRECORDCHK(sb, __func__); + +#if CONTENT_FILTER + if (error == 0) + error = cfil_sock_data_in(sb->sb_so, NULL, m0, control, 0); +#endif /* CONTENT_FILTER */ + if (error) { if (error != EJUSTRETURN) { if (m0) @@ -1184,6 +1263,8 @@ sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, } return (0); } + } else if (m0) { + m0->m_flags &= ~M_SKIPCFIL; } result = sbappendcontrol_internal(sb, m0, control); @@ -1232,10 +1313,9 @@ sbappendmsgstream_rcv(struct sockbuf *sb, struct mbuf *m, uint32_t seqnum, m_eor->m_flags |= M_UNORDERED_DATA; data_len += m_eor->m_len; so->so_msg_state->msg_uno_bytes += m_eor->m_len; - } else { + } else { m_eor->m_flags &= ~M_UNORDERED_DATA; } - if (m_eor->m_next == NULL) break; } @@ -1253,7 +1333,15 @@ sbappendmsgstream_rcv(struct sockbuf *sb, struct mbuf *m, uint32_t seqnum, __func__); } - ret = sbappendrecord(sb, m); + if (!unordered && (sb->sb_mbtail != NULL) && + !(sb->sb_mbtail->m_flags & M_UNORDERED_DATA)) { + sb->sb_mbtail->m_flags &= ~M_EOR; + sbcompress(sb, m, sb->sb_mbtail); + ret = 1; + } else { + ret = sbappendrecord(sb, m); + } + VERIFY(sb->sb_mbtail->m_flags & M_EOR); return (ret); } @@ -1314,7 +1402,8 @@ sbappendmptcpstream_rcv(struct sockbuf *sb, struct mbuf *m) SBLASTMBUFCHK(sb, __func__); - mptcp_adj_rmap(so, m); + if (mptcp_adj_rmap(so, m) != 0) + return (0); /* No filter support (SB_RECV) on mptcp subflow sockets */ @@ -1719,7 +1808,8 @@ sbdrop(struct sockbuf *sb, int len) (!(sb->sb_flags & SB_RECV)) && ((sb->sb_so->so_flags & SOF_MP_SUBFLOW) || ((SOCK_CHECK_DOM(sb->sb_so, PF_MULTIPATH)) && - (SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP))))) { + (SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP)))) && + (!(sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC))) { mptcp_preproc_sbdrop(m, (unsigned int)len); } #endif /* MPTCP */ @@ -1804,6 +1894,10 @@ sbdrop(struct sockbuf *sb, int len) sb->sb_lastrecord = m; } +#if CONTENT_FILTER + cfil_sock_buf_update(sb); +#endif /* CONTENT_FILTER */ + postevent(0, sb, EV_RWBYTES); KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0); @@ -2018,6 +2112,14 @@ pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, return (EOPNOTSUPP); } +int +pru_send_list_notsupp(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *addr, struct mbuf *control, struct proc *p) +{ +#pragma unused(so, flags, m, addr, control, p) + return (EOPNOTSUPP); +} + /* * This isn't really a ``null'' operation, but it's the default one * and doesn't do anything destructive. @@ -2049,6 +2151,14 @@ pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, return (EOPNOTSUPP); } +int +pru_sosend_list_notsupp(struct socket *so, struct sockaddr *addr, struct uio **uio, + u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags) +{ +#pragma unused(so, addr, uio, uiocnt, top, control, flags) + return (EOPNOTSUPP); +} + int pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) @@ -2057,6 +2167,14 @@ pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, return (EOPNOTSUPP); } +int +pru_soreceive_list_notsupp(struct socket *so, struct sockaddr **paddr, + struct uio **uio, u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +{ +#pragma unused(so, paddr, uio, uiocnt, mp0, controlp, flagsp) + return (EOPNOTSUPP); +} + int pru_shutdown_notsupp(struct socket *so) { @@ -2109,12 +2227,15 @@ pru_sanitize(struct pr_usrreqs *pru) DEFAULT(pru->pru_rcvd, pru_rcvd_notsupp); DEFAULT(pru->pru_rcvoob, pru_rcvoob_notsupp); DEFAULT(pru->pru_send, pru_send_notsupp); + DEFAULT(pru->pru_send_list, pru_send_list_notsupp); DEFAULT(pru->pru_sense, pru_sense_null); DEFAULT(pru->pru_shutdown, pru_shutdown_notsupp); DEFAULT(pru->pru_sockaddr, pru_sockaddr_notsupp); DEFAULT(pru->pru_sopoll, pru_sopoll_notsupp); DEFAULT(pru->pru_soreceive, pru_soreceive_notsupp); + DEFAULT(pru->pru_soreceive_list, pru_soreceive_list_notsupp); DEFAULT(pru->pru_sosend, pru_sosend_notsupp); + DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp); DEFAULT(pru->pru_socheckopt, pru_socheckopt_null); #undef DEFAULT } @@ -2143,11 +2264,21 @@ sb_notify(struct sockbuf *sb) int sbspace(struct sockbuf *sb) { + int pending = 0; int space = imin((int)(sb->sb_hiwat - sb->sb_cc), (int)(sb->sb_mbmax - sb->sb_mbcnt)); if (space < 0) space = 0; + /* Compensate for data being processed by content filters */ +#if CONTENT_FILTER + pending = cfil_sock_data_space(sb); +#endif /* CONTENT_FILTER */ + if (pending > space) + space = 0; + else + space -= pending; + return (space); } @@ -2160,11 +2291,15 @@ msgq_sbspace(struct socket *so, struct mbuf *control) { int space = 0, error; u_int32_t msgpri; - VERIFY(so->so_type == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP && - control != NULL); - error = tcp_get_msg_priority(control, &msgpri); - if (error) - return (0); + VERIFY(so->so_type == SOCK_STREAM && + SOCK_PROTO(so) == IPPROTO_TCP); + if (control != NULL) { + error = tcp_get_msg_priority(control, &msgpri); + if (error) + return (0); + } else { + msgpri = MSG_PRI_0; + } space = (so->so_snd.sb_idealsize / MSG_PRI_COUNT) - so->so_msg_state->msg_priq[msgpri].msgq_bytes; if (space < 0) @@ -2184,7 +2319,11 @@ int soreadable(struct socket *so) { return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat || - (so->so_state & SS_CANTRCVMORE) || + ((so->so_state & SS_CANTRCVMORE) +#if CONTENT_FILTER + && cfil_sock_data_pending(&so->so_rcv) == 0 +#endif /* CONTENT_FILTER */ + ) || so->so_comp.tqh_first || so->so_error); } @@ -2193,12 +2332,34 @@ soreadable(struct socket *so) int sowriteable(struct socket *so) { - return ((!so_wait_for_if_feedback(so) && + if ((so->so_state & SS_CANTSENDMORE) || + so->so_error > 0) + return (1); + + if (!so_wait_for_if_feedback(so) && sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && ((so->so_state & SS_ISCONNECTED) || - (so->so_proto->pr_flags & PR_CONNREQUIRED) == 0)) || - (so->so_state & SS_CANTSENDMORE) || - so->so_error); + !(so->so_proto->pr_flags & PR_CONNREQUIRED))) { + if (so->so_flags & SOF_NOTSENT_LOWAT) { + if ((SOCK_DOM(so) == PF_INET6 + || SOCK_DOM(so) == PF_INET) + && so->so_type == SOCK_STREAM) { + return (tcp_notsent_lowat_check(so)); + } +#if MPTCP + else if ((SOCK_DOM(so) == PF_MULTIPATH) && + (SOCK_PROTO(so) == IPPROTO_TCP)) { + return (mptcp_notsent_lowat_check(so)); + } +#endif + else { + return (1); + } + } else { + return (1); + } + } + return (0); } /* adjust counters in sb reflecting allocation of m */ @@ -2219,6 +2380,8 @@ sballoc(struct sockbuf *sb, struct mbuf *m) } OSAddAtomic(cnt, &total_sbmb_cnt); VERIFY(total_sbmb_cnt > 0); + if (total_sbmb_cnt > total_sbmb_cnt_peak) + total_sbmb_cnt_peak = total_sbmb_cnt; } /* adjust counters in sb reflecting freeing of m */ @@ -2253,6 +2416,7 @@ sblock(struct sockbuf *sb, uint32_t flags) struct socket *so = sb->sb_so; void * wchan; int error = 0; + thread_t tp = current_thread(); VERIFY((flags & SBL_VALID) == flags); @@ -2268,9 +2432,24 @@ sblock(struct sockbuf *sb, uint32_t flags) /* NOTREACHED */ } + /* + * The content filter thread must hold the sockbuf lock + */ + if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) { + /* + * Don't panic if we are defunct because SB_LOCK has + * been cleared by sodefunct() + */ + if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) + panic("%s: SB_LOCK not held for %p\n", + __func__, sb); + + /* Keep the sockbuf locked */ + return (0); + } + if ((sb->sb_flags & SB_LOCK) && !(flags & SBL_WAIT)) return (EWOULDBLOCK); - /* * We may get here from sorflush(), in which case "sb" may not * point to the real socket buffer. Use the actual socket buffer @@ -2279,7 +2458,13 @@ sblock(struct sockbuf *sb, uint32_t flags) wchan = (sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags : &so->so_snd.sb_flags; - while (sb->sb_flags & SB_LOCK) { + /* + * A content filter thread has exclusive access to the sockbuf + * until it clears the + */ + while ((sb->sb_flags & SB_LOCK) || + ((so->so_flags & SOF_CONTENT_FILTER) && + sb->sb_cfil_thread != NULL)) { lck_mtx_t *mutex_held; /* @@ -2329,6 +2514,7 @@ sbunlock(struct sockbuf *sb, boolean_t keeplocked) { void *lr_saved = __builtin_return_address(0); struct socket *so = sb->sb_so; + thread_t tp = current_thread(); /* so_usecount may be 0 if we get here from sofreelastref() */ if (so == NULL) { @@ -2342,17 +2528,38 @@ sbunlock(struct sockbuf *sb, boolean_t keeplocked) /* NOTREACHED */ } - VERIFY(sb->sb_flags & SB_LOCK); - sb->sb_flags &= ~SB_LOCK; - - if (sb->sb_wantlock > 0) { + /* + * The content filter thread must hold the sockbuf lock + */ + if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) { /* - * We may get here from sorflush(), in which case "sb" may not - * point to the real socket buffer. Use the actual socket - * buffer address from the socket instead. + * Don't panic if we are defunct because SB_LOCK has + * been cleared by sodefunct() */ - wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags : - &so->so_snd.sb_flags); + if (!(so->so_flags & SOF_DEFUNCT) && + !(sb->sb_flags & SB_LOCK) && + !(so->so_state & SS_DEFUNCT) && + !(so->so_flags1 & SOF1_DEFUNCTINPROG)) { + panic("%s: SB_LOCK not held for %p\n", + __func__, sb); + } + /* Keep the sockbuf locked and proceed*/ + } else { + VERIFY((sb->sb_flags & SB_LOCK) || + (so->so_state & SS_DEFUNCT) || + (so->so_flags1 & SOF1_DEFUNCTINPROG)); + + sb->sb_flags &= ~SB_LOCK; + + if (sb->sb_wantlock > 0) { + /* + * We may get here from sorflush(), in which case "sb" may not + * point to the real socket buffer. Use the actual socket + * buffer address from the socket instead. + */ + wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags : + &so->so_snd.sb_flags); + } } if (!keeplocked) { /* unlock on exit */ @@ -2395,17 +2602,21 @@ soevent(struct socket *so, long hint) soevupcall(so, hint); - /* Don't post an event if this a subflow socket */ - if ((hint & SO_FILT_HINT_IFDENIED) && !(so->so_flags & SOF_MP_SUBFLOW)) + /* + * Don't post an event if this a subflow socket or + * the app has opted out of using cellular interface + */ + if ((hint & SO_FILT_HINT_IFDENIED) && + !(so->so_flags & SOF_MP_SUBFLOW) && + !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) && + !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) soevent_ifdenied(so); } void soevupcall(struct socket *so, u_int32_t hint) { - void (*so_event)(struct socket *, void *, uint32_t); - - if ((so_event = so->so_event) != NULL) { + if (so->so_event != NULL) { caddr_t so_eventarg = so->so_eventarg; hint &= so->so_eventmask; @@ -2606,6 +2817,18 @@ soissrcbackground(struct socket *so) IS_SO_TC_BACKGROUND(so->so_traffic_class)); } +int +soissrcrealtime(struct socket *so) +{ + return (so->so_traffic_class >= SO_TC_AV); +} + +void +sonullevent(struct socket *so, void *arg, uint32_t hint) +{ +#pragma unused(so, arg, hint) +} + /* * Here is the definition of some of the basic objects in the kern.ipc * branch of the MIB. @@ -2658,10 +2881,6 @@ SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size"); -SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, - CTLFLAG_RD | CTLFLAG_LOCKED, &maxsockets, 0, - "Maximum number of sockets avaliable"); - SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW | CTLFLAG_LOCKED, &sb_efficiency, 0, ""); diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c index 09f0e4f4a..106e11dc2 100644 --- a/bsd/kern/uipc_syscalls.c +++ b/bsd/kern/uipc_syscalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -78,7 +78,7 @@ #include #include #include -#include +#include #include #include #include @@ -110,7 +110,6 @@ #define f_offset f_fglob->fg_offset #define f_data f_fglob->fg_data - #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0) #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2) #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1) @@ -125,6 +124,8 @@ #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1)) #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2)) #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3)) +#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8)) +#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8)) /* TODO: should be in header file */ @@ -153,6 +154,14 @@ static int disconnectx_nocancel(struct proc *, struct disconnectx_args *, int *); static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int); +static int internalize_user_msghdr_array(const void *, int, int, u_int, + struct user_msghdr_x *, struct uio **); +static u_int externalize_user_msghdr_array(void *, int, int, u_int, + const struct user_msghdr_x *, struct uio **); + +static void free_uio_array(struct uio **, u_int); +static int uio_array_is_valid(struct uio **, u_int); + /* * System call interface to the socket abstraction. */ @@ -520,11 +529,31 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, fp->f_flag = fflag; fp->f_ops = &socketops; fp->f_data = (caddr_t)so; + socket_lock(head, 0); if (dosocklock) socket_lock(so, 1); + so->so_state &= ~SS_COMP; so->so_head = NULL; + + /* Sync socket non-blocking/async state with file flags */ + if (fp->f_flag & FNONBLOCK) { + so->so_state |= SS_NBIO; + } else { + so->so_state &= ~SS_NBIO; + } + + if (fp->f_flag & FASYNC) { + so->so_state |= SS_ASYNC; + so->so_rcv.sb_flags |= SB_ASYNC; + so->so_snd.sb_flags |= SB_ASYNC; + } else { + so->so_state &= ~SS_ASYNC; + so->so_rcv.sb_flags &= ~SB_ASYNC; + so->so_snd.sb_flags &= ~SB_ASYNC; + } + (void) soacceptlock(so, &sa, 0); socket_unlock(head, 1); if (sa == NULL) { @@ -679,6 +708,12 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) goto out; } + /* + * XXX Workaround to ensure connectx does not fail because + * of unreaped so_error. + */ + so->so_error = 0; + /* * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET * if this is a datagram socket; translate for other types. @@ -1325,6 +1360,174 @@ done: return (error); } +int +sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) +{ + int error = 0; + struct user_msghdr_x *user_msg = NULL; + struct uio **uiop = NULL; + struct socket *so; + u_int i; + struct sockaddr *to = NULL; + struct mbuf *control = NULL; + user_ssize_t len_before = 0, len_after; + int need_drop = 0; + size_t size_of_msghdr; + void *umsgp = NULL; + u_int uiocnt; + + KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0); + + error = file_socket(uap->s, &so); + if (error) { + goto out; + } + need_drop = 1; + if (so == NULL) { + error = EBADF; + goto out; + } + if (so->so_proto->pr_usrreqs->pru_sosend_list == NULL) { + printf("%s no pru_sosend_list\n", __func__); + error = EOPNOTSUPP; + goto out; + } + + /* + * Input parameter range check + */ + if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) { + error = EINVAL; + goto out; + } + user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), + M_TEMP, M_WAITOK | M_ZERO); + if (user_msg == NULL) { + printf("%s _MALLOC() user_msg failed\n", __func__); + error = ENOMEM; + goto out; + } + uiop = _MALLOC(uap->cnt * sizeof(struct uio *), + M_TEMP, M_WAITOK | M_ZERO); + if (uiop == NULL) { + printf("%s _MALLOC() uiop failed\n", __func__); + error = ENOMEM; + goto out; + } + + size_of_msghdr = IS_64BIT_PROCESS(p) ? + sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x); + + umsgp = _MALLOC(uap->cnt * size_of_msghdr, + M_TEMP, M_WAITOK | M_ZERO); + if (umsgp == NULL) { + printf("%s _MALLOC() user_msg failed\n", __func__); + error = ENOMEM; + goto out; + } + error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr); + if (error) { + printf("%s copyin() failed\n", __func__); + goto out; + } + error = internalize_user_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_WRITE, uap->cnt, user_msg, uiop); + if (error) { + printf("%s copyin_user_msghdr_array() failed\n", __func__); + goto out; + } + /* + * Make sure the size of each message iovec and + * the aggregate size of all the iovec is valid + */ + if (uio_array_is_valid(uiop, uap->cnt) == 0) { + error = EINVAL; + goto out; + } + + /* + * Sanity check on passed arguments + */ + for (i = 0; i < uap->cnt; i++) { + struct user_msghdr_x *mp = &user_msg[i]; + + /* + * No flags on send message + */ + if (mp->msg_flags != 0) { + error = EINVAL; + goto out; + } + /* + * No support for address or ancillary data (yet) + */ + if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) { + error = EINVAL; + goto out; + } + if (mp->msg_control != USER_ADDR_NULL || + mp->msg_controllen != 0) { + error = EINVAL; + goto out; + } +#if CONFIG_MACF_SOCKET_SUBSET + /* + * We check the state without holding the socket lock; + * if a race condition occurs, it would simply result + * in an extra call to the MAC check function. + * + * Note: The following check is never true taken with the + * current limitation that we do not accept to pass an address, + * this is effectively placeholder code. If we add support for addresses, + * we will have to check every address. + */ + if ( to != NULL && + !(so->so_state & SS_DEFUNCT) && + (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) + goto out; +#endif /* MAC_SOCKET_SUBSET */ + } + + len_before = uio_array_resid(uiop, uap->cnt); + + error = so->so_proto->pr_usrreqs->pru_sosend_list(so, to, uiop, + uap->cnt, 0, control, uap->flags); + + len_after = uio_array_resid(uiop, uap->cnt); + + if (error != 0) { + if (len_after != len_before && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + /* Generation of SIGPIPE can be controlled per socket */ + if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) + psignal(p, SIGPIPE); + } + if (error == 0) { + uiocnt = externalize_user_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_WRITE, uap->cnt, user_msg, uiop); + + *retval = (int)(uiocnt); + } +out: + if (need_drop) + file_drop(uap->s); + if (umsgp != NULL) + _FREE(umsgp, M_TEMP); + if (uiop != NULL) { + free_uio_array(uiop, uap->cnt); + _FREE(uiop, M_TEMP); + } + if (user_msg != NULL) + _FREE(user_msg, M_TEMP); + + KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0); + + return (error); +} + /* * Returns: 0 Success * ENOTSOCK @@ -1737,6 +1940,166 @@ done: return (error); } +int +recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) +{ + int error = EOPNOTSUPP; + struct user_msghdr_x *user_msg = NULL; + struct uio **uiop = NULL; + struct socket *so; + user_ssize_t len_before = 0, len_after; + int need_drop = 0; + size_t size_of_msghdr; + void *umsgp = NULL; + u_int i; + u_int uiocnt; + + KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0); + + error = file_socket(uap->s, &so); + if (error) { + goto out; + } + need_drop = 1; + if (so == NULL) { + error = EBADF; + goto out; + } + if (so->so_proto->pr_usrreqs->pru_soreceive_list == NULL) { + printf("%s no pru_soreceive_list\n", __func__); + error = EOPNOTSUPP; + goto out; + } + + /* + * Input parameter range check + */ + if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) { + error = EINVAL; + goto out; + } + user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), + M_TEMP, M_WAITOK | M_ZERO); + if (user_msg == NULL) { + printf("%s _MALLOC() user_msg failed\n", __func__); + error = ENOMEM; + goto out; + } + uiop = _MALLOC(uap->cnt * sizeof(struct uio *), + M_TEMP, M_WAITOK | M_ZERO); + if (uiop == NULL) { + printf("%s _MALLOC() uiop failed\n", __func__); + error = ENOMEM; + goto out; + } + + size_of_msghdr = IS_64BIT_PROCESS(p) ? + sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x); + + umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO); + if (umsgp == NULL) { + printf("%s _MALLOC() user_msg failed\n", __func__); + error = ENOMEM; + goto out; + } + error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr); + if (error) { + printf("%s copyin() failed\n", __func__); + goto out; + } + error = internalize_user_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_READ, uap->cnt, user_msg, uiop); + if (error) { + printf("%s copyin_user_msghdr_array() failed\n", __func__); + goto out; + } + /* + * Make sure the size of each message iovec and + * the aggregate size of all the iovec is valid + */ + if (uio_array_is_valid(uiop, uap->cnt) == 0) { + error = EINVAL; + goto out; + } + + /* + * Sanity check on passed arguments + */ + for (i = 0; i < uap->cnt; i++) { + struct user_msghdr_x *mp = &user_msg[i]; + + if (mp->msg_flags != 0) { + error = EINVAL; + goto out; + } + /* + * No support for address or ancillary data (yet) + */ + if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) { + error = EINVAL; + goto out; + } + if (mp->msg_control != USER_ADDR_NULL || + mp->msg_controllen != 0) { + error = EINVAL; + goto out; + } + } +#if CONFIG_MACF_SOCKET_SUBSET + /* + * We check the state without holding the socket lock; + * if a race condition occurs, it would simply result + * in an extra call to the MAC check function. + */ + if (!(so->so_state & SS_DEFUNCT) && + !(so->so_state & SS_ISCONNECTED) && + !(so->so_proto->pr_flags & PR_CONNREQUIRED) && + (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) + goto out; +#endif /* MAC_SOCKET_SUBSET */ + + len_before = uio_array_resid(uiop, uap->cnt); + + error = so->so_proto->pr_usrreqs->pru_soreceive_list(so, NULL, uiop, + uap->cnt, (struct mbuf **)0, NULL, NULL); + + len_after = uio_array_resid(uiop, uap->cnt); + + if (error) { + if (len_after != len_before && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + } + if (error == 0) { + uiocnt = externalize_user_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_READ, uap->cnt, user_msg, uiop); + + error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr); + if (error) { + printf("%s copyout() failed\n", __func__); + goto out; + } + *retval = (int)(uiocnt); + } +out: + if (need_drop) + file_drop(uap->s); + if (umsgp != NULL) + _FREE(umsgp, M_TEMP); + if (uiop != NULL) { + free_uio_array(uiop, uap->cnt); + _FREE(uiop, M_TEMP); + } + if (user_msg != NULL) + _FREE(user_msg, M_TEMP); + + KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0); + + return (error); +} + /* * Returns: 0 Success * EBADF @@ -2258,6 +2621,174 @@ getsockaddrlist(struct socket *so, struct sockaddr_list **slp, return (error); } +int +internalize_user_msghdr_array(const void *src, int spacetype, int direction, + u_int count, struct user_msghdr_x *dst, struct uio **uiop) +{ + int error = 0; + u_int i; + + for (i = 0; i < count; i++) { + uio_t auio; + struct user_iovec *iovp; + struct user_msghdr_x *user_msg = &dst[i]; + + if (spacetype == UIO_USERSPACE64) { + struct user64_msghdr_x *msghdr64; + + msghdr64 = ((struct user64_msghdr_x *)src) + i; + + user_msg->msg_name = msghdr64->msg_name; + user_msg->msg_namelen = msghdr64->msg_namelen; + user_msg->msg_iov = msghdr64->msg_iov; + user_msg->msg_iovlen = msghdr64->msg_iovlen; + user_msg->msg_control = msghdr64->msg_control; + user_msg->msg_controllen = msghdr64->msg_controllen; + user_msg->msg_flags = msghdr64->msg_flags; + user_msg->msg_datalen = msghdr64->msg_datalen; + } else { + struct user32_msghdr_x *msghdr32; + + msghdr32 = ((struct user32_msghdr_x *)src) + i; + + user_msg->msg_name = msghdr32->msg_name; + user_msg->msg_namelen = msghdr32->msg_namelen; + user_msg->msg_iov = msghdr32->msg_iov; + user_msg->msg_iovlen = msghdr32->msg_iovlen; + user_msg->msg_control = msghdr32->msg_control; + user_msg->msg_controllen = msghdr32->msg_controllen; + user_msg->msg_flags = msghdr32->msg_flags; + user_msg->msg_datalen = msghdr32->msg_datalen; + } + + if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) { + error = EMSGSIZE; + goto done; + } + auio = uio_create(user_msg->msg_iovlen, 0, spacetype, direction); + if (auio == NULL) { + error = ENOMEM; + goto done; + } + uiop[i] = auio; + + if (user_msg->msg_iovlen) { + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto done; + } + error = copyin_user_iovec_array(user_msg->msg_iov, + spacetype, user_msg->msg_iovlen, iovp); + if (error) + goto done; + user_msg->msg_iov = CAST_USER_ADDR_T(iovp); + + error = uio_calculateresid(auio); + if (error) + goto done; + user_msg->msg_datalen = uio_resid(auio); + } else { + user_msg->msg_datalen = 0; + } + } +done: + return (error); +} + +u_int +externalize_user_msghdr_array(void *dst, int spacetype, int direction, + u_int count, const struct user_msghdr_x *src, struct uio **uiop) +{ +#pragma unused(direction) + u_int i; + int seenlast = 0; + u_int retcnt = 0; + + for (i = 0; i < count; i++) { + const struct user_msghdr_x *user_msg = &src[i]; + uio_t auio = uiop[i]; + user_ssize_t len = user_msg->msg_datalen - uio_resid(auio); + + if (user_msg->msg_datalen != 0 && len == 0) + seenlast = 1; + + if (seenlast == 0) + retcnt ++; + + if (spacetype == UIO_USERSPACE64) { + struct user64_msghdr_x *msghdr64; + + msghdr64 = ((struct user64_msghdr_x *)dst) + i; + + msghdr64->msg_flags = user_msg->msg_flags; + msghdr64->msg_datalen = len; + + } else { + struct user32_msghdr_x *msghdr32; + + msghdr32 = ((struct user32_msghdr_x *)dst) + i; + + msghdr32->msg_flags = user_msg->msg_flags; + msghdr32->msg_datalen = len; + } + } + return (retcnt); +} + +void +free_uio_array(struct uio **uiop, u_int count) +{ + u_int i; + + for (i = 0; i < count; i++) { + if (uiop[i] != NULL) + uio_free(uiop[i]); + } +} + +__private_extern__ user_ssize_t +uio_array_resid(struct uio **uiop, u_int count) +{ + user_ssize_t len = 0; + u_int i; + + for (i = 0; i < count; i++) { + struct uio *auio = uiop[i]; + + if (auio!= NULL) + len += uio_resid(auio); + } + return (len); +} + +int +uio_array_is_valid(struct uio **uiop, u_int count) +{ + user_ssize_t len = 0; + u_int i; + + for (i = 0; i < count; i++) { + struct uio *auio = uiop[i]; + + if (auio != NULL) { + user_ssize_t resid = uio_resid(auio); + + /* + * Sanity check on the validity of the iovec: + * no point of going over sb_max + */ + if (resid < 0 || (u_int32_t)resid > sb_max) + return (0); + + len += resid; + if (len < 0 || (u_int32_t)len > sb_max) + return (0); + } + } + return (1); +} + #if SENDFILE #define SFUIOBUFS 64 @@ -2273,20 +2804,6 @@ getsockaddrlist(struct socket *so, struct sockaddr_list **slp, #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES) #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES) -size_t mbuf_pkt_maxlen(mbuf_t m); - -__private_extern__ size_t -mbuf_pkt_maxlen(mbuf_t m) -{ - size_t maxlen = 0; - - while (m) { - maxlen += mbuf_maxlen(m); - m = mbuf_next(m); - } - return (maxlen); -} - static void alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks, struct mbuf **m, boolean_t jumbocl) @@ -2554,7 +3071,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) socket_unlock(so, 0); alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl); - pktlen = mbuf_pkt_maxlen(m0); + pktlen = mbuf_pkthdr_maxlen(m0); if (pktlen < (size_t)xfsize) xfsize = pktlen; diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c index a01ac5eb2..71c4fce53 100644 --- a/bsd/kern/uipc_usrreq.c +++ b/bsd/kern/uipc_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -553,7 +553,12 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; - snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; + if ((int32_t)snd->sb_hiwat >= + (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) { + snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; + } else { + snd->sb_hiwat = 0; + } unp->unp_conn->unp_cc = rcv->sb_cc; if (didreceive) { control = NULL; @@ -1671,10 +1676,12 @@ unp_pcblist SYSCTL_HANDLER_ARGS return (error); } -SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", "List of active local datagram sockets"); -SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", "List of active local stream sockets"); @@ -1817,10 +1824,12 @@ unp_pcblist64 SYSCTL_HANDLER_ARGS return (error); } -SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64", "List of active local datagram sockets 64 bit"); -SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64", "List of active local stream sockets 64 bit"); @@ -2116,10 +2125,6 @@ unp_gc(void) continue; } #ifdef notdef - /* - * if this code is enabled need to run - * under network funnel - */ if (so->so_rcv.sb_flags & SB_LOCK) { /* * This is problematical; it's not clear @@ -2277,7 +2282,6 @@ unp_listen(struct unpcb *unp, proc_t p) return (0); } -/* should run under kernel funnel */ static void unp_scan(struct mbuf *m0, void (*op)(struct fileglob *)) { @@ -2306,7 +2310,6 @@ unp_scan(struct mbuf *m0, void (*op)(struct fileglob *)) } } -/* should run under kernel funnel */ static void unp_mark(struct fileglob *fg) { @@ -2323,7 +2326,6 @@ unp_mark(struct fileglob *fg) unp_defer++; } -/* should run under kernel funnel */ static void unp_discard(struct fileglob *fg) { diff --git a/bsd/kern/vm_pressure.c b/bsd/kern/vm_pressure.c index dfb50afbd..27c1aed10 100644 --- a/bsd/kern/vm_pressure.c +++ b/bsd/kern/vm_pressure.c @@ -54,6 +54,7 @@ * This value is the threshold that a process must meet to be considered for scavenging. */ #define VM_PRESSURE_MINIMUM_RSIZE 10 /* MB */ + #define VM_PRESSURE_NOTIFY_WAIT_PERIOD 10000 /* milliseconds */ void vm_pressure_klist_lock(void); @@ -62,8 +63,8 @@ void vm_pressure_klist_unlock(void); static void vm_dispatch_memory_pressure(void); void vm_reset_active_list(void); -#if !(CONFIG_MEMORYSTATUS && CONFIG_JETSAM) -static kern_return_t vm_try_pressure_candidates(void); +#if CONFIG_MEMORYSTATUS +static kern_return_t vm_try_pressure_candidates(boolean_t target_foreground_process); #endif static lck_mtx_t vm_pressure_klist_mutex; @@ -170,17 +171,13 @@ void consider_vm_pressure_events(void) vm_dispatch_memory_pressure(); } -#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM - -static void vm_dispatch_memory_pressure(void) -{ - /* Update the pressure level and target the foreground or next-largest process as appropriate */ - memorystatus_update_vm_pressure(FALSE); -} +#if CONFIG_MEMORYSTATUS /* Jetsam aware version. Called with lock held */ -static struct knote *vm_find_knote_from_pid(pid_t pid, struct klist *list) { +struct knote *vm_find_knote_from_pid(pid_t, struct klist *); + +struct knote *vm_find_knote_from_pid(pid_t pid, struct klist *list) { struct knote *kn = NULL; SLIST_FOREACH(kn, list, kn_selnext) { @@ -219,8 +216,9 @@ int vm_dispatch_pressure_note_to_pid(pid_t pid, boolean_t locked) { ret = 0; } else { kn = vm_find_knote_from_pid(pid, &vm_pressure_klist_dormant); - if (!kn) { + if (kn) { KNOTE(&vm_pressure_klist_dormant, pid); + ret = 0; } } @@ -380,17 +378,17 @@ void vm_find_pressure_candidate(void) exit: vm_pressure_klist_unlock(); } +#endif /* CONFIG_MEMORYSTATUS */ -#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ struct knote * -vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level); +vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process); -kern_return_t vm_pressure_notification_without_levels(void); -kern_return_t vm_pressure_notify_dispatch_vm_clients(void); +kern_return_t vm_pressure_notification_without_levels(boolean_t target_foreground_process); +kern_return_t vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process); kern_return_t -vm_pressure_notify_dispatch_vm_clients(void) +vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process) { vm_pressure_klist_lock(); @@ -402,7 +400,7 @@ vm_pressure_notify_dispatch_vm_clients(void) VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n"); - if (KERN_SUCCESS == vm_try_pressure_candidates()) { + if (KERN_SUCCESS == vm_try_pressure_candidates(target_foreground_process)) { vm_pressure_klist_unlock(); return KERN_SUCCESS; } @@ -424,11 +422,10 @@ extern vm_pressure_level_t convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); struct knote * -vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level) +vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process) { struct knote *kn = NULL, *kn_max = NULL; unsigned int resident_max = 0; - kern_return_t kr = KERN_SUCCESS; struct timeval curr_tstamp = {0, 0}; int elapsed_msecs = 0; int selected_task_importance = 0; @@ -473,8 +470,6 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int SLIST_FOREACH(kn, candidate_list, kn_selnext) { - struct mach_task_basic_info basic_info; - mach_msg_type_number_t size = MACH_TASK_BASIC_INFO_COUNT; unsigned int resident_size = 0; proc_t p = PROC_NULL; struct task* t = TASK_NULL; @@ -490,11 +485,21 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int } proc_list_unlock(); +#if CONFIG_MEMORYSTATUS + if (target_foreground_process == TRUE && !memorystatus_is_foreground_locked(p)) { + /* + * Skip process not marked foreground. + */ + proc_rele(p); + continue; + } +#endif /* CONFIG_MEMORYSTATUS */ + t = (struct task *)(p->task); timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp); elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; - + if ((level == -1) && (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD)) { proc_rele(p); continue; @@ -506,26 +511,28 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int * registered for the current level. */ vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(level); - + if ((kn->kn_sfflags & dispatch_level) == 0) { proc_rele(p); continue; } } - - if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) { - VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed with %d\n", p->p_pid, kr); + +#if CONFIG_MEMORYSTATUS + if (target_foreground_process == FALSE && !memorystatus_bg_pressure_eligible(p)) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid); proc_rele(p); - continue; - } + continue; + } +#endif /* CONFIG_MEMORYSTATUS */ curr_task_importance = task_importance_estimate(t); /* - * We don't want a small process to block large processes from - * being notified again. - */ - resident_size = (basic_info.resident_size)/(MB); + * We don't want a small process to block large processes from + * being notified again. + */ + resident_size = (get_task_phys_footprint(t))/(1024*1024ULL); //(MB); if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) { @@ -534,13 +541,31 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int * Warning or Critical Pressure. */ if (pressure_increase) { - if ((curr_task_importance <= selected_task_importance) && (resident_size > resident_max)) { + if ((curr_task_importance < selected_task_importance) || + ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { + + /* + * We have found a candidate process which is: + * a) at a lower importance than the current selected process + * OR + * b) has importance equal to that of the current selected process but is larger + */ + if (task_has_been_notified(t, level) == FALSE) { consider_knote = TRUE; } } } else { - if ((curr_task_importance >= selected_task_importance) && (resident_size > resident_max)) { + if ((curr_task_importance > selected_task_importance) || + ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { + + /* + * We have found a candidate process which is: + * a) at a higher importance than the current selected process + * OR + * b) has importance equal to that of the current selected process but is larger + */ + if (task_has_been_notified(t, level) == FALSE) { consider_knote = TRUE; } @@ -550,7 +575,8 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int /* * Pressure back to normal. */ - if ((curr_task_importance >= selected_task_importance) && (resident_size > resident_max)) { + if ((curr_task_importance > selected_task_importance) || + ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { if ((task_has_been_notified(t, kVMPressureWarning) == TRUE) || (task_has_been_notified(t, kVMPressureCritical) == TRUE)) { consider_knote = TRUE; @@ -590,51 +616,67 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int /* * vm_pressure_klist_lock is held for this routine. */ -kern_return_t vm_pressure_notification_without_levels(void) +kern_return_t vm_pressure_notification_without_levels(boolean_t target_foreground_process) { struct knote *kn_max = NULL; pid_t target_pid = -1; struct klist dispatch_klist = { NULL }; proc_t target_proc = PROC_NULL; + struct klist *candidate_list = NULL; - kn_max = vm_pressure_select_optimal_candidate_to_notify(&vm_pressure_klist, -1); + candidate_list = &vm_pressure_klist; + + kn_max = vm_pressure_select_optimal_candidate_to_notify(candidate_list, -1, target_foreground_process); if (kn_max == NULL) { - return KERN_FAILURE; + if (target_foreground_process) { + /* + * Doesn't matter if the process had been notified earlier on. + * This is a very specific request. Deliver it. + */ + candidate_list = &vm_pressure_klist_dormant; + kn_max = vm_pressure_select_optimal_candidate_to_notify(candidate_list, -1, target_foreground_process); + } + + if (kn_max == NULL) { + return KERN_FAILURE; + } } target_proc = kn_max->kn_kq->kq_p; - KNOTE_DETACH(&vm_pressure_klist, kn_max); + KNOTE_DETACH(candidate_list, kn_max); if (target_proc != PROC_NULL) { target_pid = target_proc->p_pid; - + memoryshot(VM_PRESSURE_EVENT, DBG_FUNC_NONE); KNOTE_ATTACH(&dispatch_klist, kn_max); KNOTE(&dispatch_klist, target_pid); KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max); +#if CONFIG_MEMORYSTATUS + memorystatus_send_pressure_note(target_pid); +#endif /* CONFIG_MEMORYSTATUS */ + microuptime(&target_proc->vm_pressure_last_notify_tstamp); } return KERN_SUCCESS; } -static kern_return_t vm_try_pressure_candidates(void) +static kern_return_t vm_try_pressure_candidates(boolean_t target_foreground_process) { /* * This takes care of candidates that use NOTE_VM_PRESSURE. * It's a notification without indication of the level * of memory pressure. */ - return (vm_pressure_notification_without_levels()); + return (vm_pressure_notification_without_levels(target_foreground_process)); } -#endif /* !(CONFIG_MEMORYSTATUS && CONFIG_JETSAM) */ - /* * Remove all elements from the dormant list and place them on the active list. * Called with klist lock held. diff --git a/bsd/kern/vm_pressure.h b/bsd/kern/vm_pressure.h index 5386ddd1a..402283583 100644 --- a/bsd/kern/vm_pressure.h +++ b/bsd/kern/vm_pressure.h @@ -40,10 +40,10 @@ void vm_knote_unregister(struct knote *); void consider_vm_pressure_events(void); void vm_pressure_proc_cleanup(proc_t); -#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM +#if VM_PRESSURE_EVENTS void vm_find_pressure_foreground_candidates(void); void vm_find_pressure_candidate(void); boolean_t vm_dispatch_pressure_note_to_pid(pid_t pid, boolean_t locked); -#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ +#endif /* VM_PRESSURE_EVENTS */ #endif /* VM_PRESSURE_H */ diff --git a/bsd/machine/Makefile b/bsd/machine/Makefile index 929228907..b7c7225dc 100644 --- a/bsd/machine/Makefile +++ b/bsd/machine/Makefile @@ -11,8 +11,8 @@ include $(MakeInc_def) DATAFILES = \ byte_order.h endian.h fasttrap_isa.h \ limits.h param.h profile.h \ - setjmp.h signal.h types.h\ - vmparam.h _structs.h _types.h _limits.h _param.h \ + signal.h types.h \ + vmparam.h _types.h _limits.h _param.h \ _mcontext.h KERNELFILES = \ @@ -20,7 +20,7 @@ KERNELFILES = \ byte_order.h endian.h \ limits.h param.h profile.h \ signal.h spl.h types.h \ - vmparam.h _structs.h _types.h _limits.h _param.h \ + vmparam.h _types.h _limits.h _param.h \ _mcontext.h diff --git a/bsd/man/Makefile b/bsd/man/Makefile index dfdb1b572..45da1d7e2 100644 --- a/bsd/man/Makefile +++ b/bsd/man/Makefile @@ -7,7 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTMAN_SUBDIRS = \ +INSTTEXTFILES_SUBDIRS = \ man2 \ man3 \ man4 \ diff --git a/bsd/man/man2/Makefile b/bsd/man/man2/Makefile index 93d247a32..5c906991d 100644 --- a/bsd/man/man2/Makefile +++ b/bsd/man/man2/Makefile @@ -39,10 +39,13 @@ DATAFILES = \ dup2.2 \ execve.2 \ exchangedata.2 \ + faccessat.2 \ fchdir.2 \ fchflags.2 \ fchmod.2 \ + fchmodat.2 \ fchown.2 \ + fchownat.2 \ fcntl.2 \ fgetattrlist.2 \ fsetattrlist.2 \ @@ -56,12 +59,15 @@ DATAFILES = \ fsetxattr.2 \ fstat.2 \ fstat64.2 \ + fstatat.2 \ fstatfs.2 \ fstatfs64.2 \ fsync.2 \ ftruncate.2 \ futimes.2 \ getattrlist.2 \ + getattrlistat.2 \ + getattrlistbulk.2 \ getaudit.2 \ getaudit_addr.2 \ getauid.2 \ @@ -102,6 +108,7 @@ DATAFILES = \ kqueue.2 \ lchown.2 \ link.2 \ + linkat.2 \ listen.2 \ listxattr.2 \ lseek.2 \ @@ -111,6 +118,7 @@ DATAFILES = \ mincore.2 \ minherit.2 \ mkdir.2 \ + mkdirat.2 \ mkfifo.2 \ mknod.2 \ mlock.2 \ @@ -123,6 +131,7 @@ DATAFILES = \ nfsclnt.2 \ nfssvc.2 \ open.2 \ + openat.2 \ pathconf.2 \ pipe.2 \ poll.2 \ @@ -135,12 +144,14 @@ DATAFILES = \ quotactl.2 \ read.2 \ readlink.2 \ + readlinkat.2 \ readv.2 \ reboot.2 \ recv.2 \ recvfrom.2 \ recvmsg.2 \ rename.2 \ + renameat.2 \ removexattr.2 \ revoke.2 \ rmdir.2 \ @@ -200,12 +211,14 @@ DATAFILES = \ statfs.2 \ statfs64.2 \ symlink.2 \ + symlinkat.2 \ sync.2 \ syscall.2 \ truncate.2 \ umask.2 \ undelete.2 \ unlink.2 \ + unlinkat.2 \ unmount.2 \ utimes.2 \ vfork.2 \ diff --git a/bsd/man/man2/access.2 b/bsd/man/man2/access.2 index d1114f5e9..2c89f7a11 100644 --- a/bsd/man/man2/access.2 +++ b/bsd/man/man2/access.2 @@ -37,7 +37,8 @@ .Dt ACCESS 2 .Os BSD 4 .Sh NAME -.Nm access +.Nm access , +.Nm faccessat .Nd check access permissions of a file or pathname .Sh SYNOPSIS .Fd #include @@ -46,6 +47,8 @@ .Fa "const char *path" .Fa "int amode" .Fc +.Ft int +.Fn faccessat "int fd" "const char *path" "int mode" "int flag" .Sh DESCRIPTION The .Fn access @@ -75,6 +78,38 @@ and the real group access list (including the real group ID) are used in place of the effective ID for verifying permission. .Pp +The +.Fn faccessat +system call is equivalent to +.Fn access +except in the case where +.Fa path +specifies a relative path. +In this case the file whose accessibility is to be determined is +located relative to the directory associated with the file descriptor +.Fa fd +instead of the current working directory. +If +.Fn faccessat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +parameter, the current working directory is used and the behavior is +identical to a call to +.Fn access . +Values for +.Fa flag +are constructed by a bitwise-inclusive OR of flags from the following +list, defined in +.In fcntl.h : +.Bl -tag -width indent +.It Dv AT_EACCESS +The checks for accessibility are performed using the effective user and group +IDs instead of the real user and group ID as required in a call to +.Fn access . +.El +.Pp Even if a process has appropriate privileges and indicates success for .Dv X_OK , the file may not actually have execute permission bits set. @@ -140,6 +175,33 @@ Write access is requested for a file on a read-only file system. Write access is requested for a pure procedure (shared text) file that is presently being executed. .El +.Pp +Also, the +.Fn faccessat +system call may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa path +argument does not specify an absolute path and the +.Fa fd +argument is +neither +.Dv AT_FDCWD +nor a valid file descriptor. +.It Bq Er EINVAL +The value of the +.Fa flag +argument is not valid. +.It Bq Er ENOTDIR +The +.Fa path +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh SEE ALSO .Xr chmod 2 , .Xr stat 2 @@ -148,7 +210,10 @@ The .Fn access function conforms to .St -p1003.1-90 . +The +.Fn faccessat +system call is expected to conform to POSIX.1-2008 . .Sh CAVEAT -.Fn Access +.Fn access is a potential security hole and should never be used. diff --git a/bsd/man/man2/bind.2 b/bsd/man/man2/bind.2 index 694ad472e..1e968edaa 100644 --- a/bsd/man/man2/bind.2 +++ b/bsd/man/man2/bind.2 @@ -131,6 +131,11 @@ The following errors are specific to binding names in the UNIX domain. A component of the path prefix does not allow searching or the node's parent directory denies write permission. .\" ========== +.It Bq Er EEXIST +A file already exists at the pathname. +.Xr unlink 2 +it first. +.\" ========== .It Bq Er EIO An I/O error occurred while making the directory entry or allocating the inode. diff --git a/bsd/man/man2/chmod.2 b/bsd/man/man2/chmod.2 index 69002be40..b49c28075 100644 --- a/bsd/man/man2/chmod.2 +++ b/bsd/man/man2/chmod.2 @@ -38,7 +38,8 @@ .Os BSD 4 .Sh NAME .Nm chmod , -.Nm fchmod +.Nm fchmod , +.Nm fchmodat .Nd change mode of file .Sh SYNOPSIS .Fd #include @@ -53,6 +54,8 @@ .Fa "int fildes" .Fa "mode_t mode" .Fc +.Ft int +.Fn fchmodat "int fd" "const char *path" "mode_t mode" "int flag" .Sh DESCRIPTION The function .Fn chmod @@ -62,11 +65,11 @@ specified by the pathname .Fa path to .Fa mode . -.Fn Fchmod +.Fn fchmod sets the permission bits of the specified file descriptor .Fa fildes . -.Fn Chmod +.Fn chmod verifies that the process owner (user) either owns the file specified by .Fa path @@ -74,6 +77,41 @@ the file specified by .Fa fildes ) , or is the super-user. +.Pp +The +.Fn fchmodat +is equivalent to +.Fn chmod +except in the case where +.Fa path +specifies a relative path. +In this case the file to be changed is determined relative to the directory +associated with the file descriptor +.Fa fd +instead of the current working directory. +The values for the +.Fa flag +are constructed by a bitwise-inclusive OR of flags from the following list, defined +in +.In fcntl.h : +.Bl -tag -width indent +.It Dv AT_SYMLINK_NOFOLLOW +If +.Fa path +names a symbolic link, then the mode of the symbolic link is changed. +.El +.Pp +If +.Fn fchmodat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +parameter, the current working directory is used. +If also +.Fa flag +is zero, the behavior is identical to a call to +.Fn chmod . A mode is created from .Em or'd permission bit masks @@ -204,6 +242,34 @@ the effective user ID is not the super-user. .It Bq Er EROFS The file resides on a read-only file system. .El +.Pp +In addition to the +.Fn chmod +errors, +.Fn fchmodat +fails if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa path +argument does not specify an absolute path and the +.Fa fd +argument is neither +.Fa AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er EINVAL +The value of the +.Fa flag +argument is not valid. +.It Bq Er ENOTDIR +The +.Fa path +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh LEGACY SYNOPSIS .Fd #include .Fd #include @@ -223,9 +289,15 @@ The .Fn chmod function is expected to conform to .St -p1003.1-88 . +The +.Fn fchmodat +function is expected to conform to POSIX.1-2008 . .Sh HISTORY The .Fn fchmod function call appeared in .Bx 4.2 . +The +.Fn fchmodat +system call appeared in OS X 10.10 diff --git a/bsd/man/man2/chown.2 b/bsd/man/man2/chown.2 index 7179b362e..6734cf17d 100644 --- a/bsd/man/man2/chown.2 +++ b/bsd/man/man2/chown.2 @@ -37,7 +37,8 @@ .Sh NAME .Nm chown , .Nm fchown , -.Nm lchown +.Nm lchown , +.Nm fchownat .Nd change owner and group of a file .Sh SYNOPSIS .In unistd.h @@ -59,6 +60,8 @@ .Fa "uid_t owner" .Fa "gid_t group" .Fc +.Ft int +.Fn fchownat "int fd" "const char *path" "uid_t owner" "gid_t group" "int flag" .Sh DESCRIPTION The owner ID and group ID of the file named by @@ -104,6 +107,49 @@ system call is similar to .Fn chown but does not follow symbolic links. .Pp +The +.Fn fchownat +system call is equivalent to the +.Fn chown +and +.Fn lchown +except in the case where +.Fa path +specifies a relative path. +In this case the file to be changed is determined relative to the directory +associated with the file descriptor +.Fa fd +instead of the current working directory. +.Pp +Values for +.Fa flag +are constructed by a bitwise-inclusive OR of flags from the following +list, defined in +.In fcntl.h : +.Bl -tag -width indent +.It Dv AT_SYMLINK_NOFOLLOW +If +.Fa path +names a symbolic link, ownership of the symbolic link is changed. +.El +.Pp +If +.Fn fchownat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +parameter, the current working directory is used and the behavior is identical +to a call to +.Fn chown +or +.Fn lchown +respectively, depending on whether or not the +.Dv AT_SYMLINK_NOFOLLOW +bit is set in the +.Fa flag +argument. +.Pp One of the owner or group id's may be left unchanged by specifying it as -1. .Sh RETURN VALUES @@ -176,6 +222,36 @@ and the calling process does not have appropriate (i.e., root) privileges. .It Bq Er EROFS The named file resides on a read-only file system. .El +.Pp +In addition to the errors specified for +.Fn chown +and +.Fn lchown , +the +.Fn fchownat +system call may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa path +argument does not specify an absolute path and the +.Fa fd +argument is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er EINVAL +The value of the +.Fa flag +argument is not valid. +.It Bq Er ENOTDIR +The +.Fa path +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh SEE ALSO .Xr chgrp 1 , .Xr chmod 2 , @@ -186,6 +262,9 @@ The .Fn chown system call is expected to conform to .St -p1003.1-90 . +The +.Fn fchownat +system call is expected to conform to POSIX.1-2008 . .Sh HISTORY The .Fn chown @@ -207,3 +286,7 @@ The system call was added in .Fx 3.0 to compensate for the loss of functionality. +.Pp +The +.Fn fchownat +system call appeared in OS X 10.10 diff --git a/bsd/man/man2/faccessat.2 b/bsd/man/man2/faccessat.2 new file mode 100644 index 000000000..9d4f76e5b --- /dev/null +++ b/bsd/man/man2/faccessat.2 @@ -0,0 +1 @@ +.so man2/access.2 diff --git a/bsd/man/man2/fchmodat.2 b/bsd/man/man2/fchmodat.2 new file mode 100644 index 000000000..92647d2e3 --- /dev/null +++ b/bsd/man/man2/fchmodat.2 @@ -0,0 +1 @@ +.so man2/chmod.2 diff --git a/bsd/man/man2/fchownat.2 b/bsd/man/man2/fchownat.2 new file mode 100644 index 000000000..f0a5635ae --- /dev/null +++ b/bsd/man/man2/fchownat.2 @@ -0,0 +1 @@ +.so man2/chown.2 diff --git a/bsd/man/man2/fstatat.2 b/bsd/man/man2/fstatat.2 new file mode 100644 index 000000000..b1a86c195 --- /dev/null +++ b/bsd/man/man2/fstatat.2 @@ -0,0 +1 @@ +.so man2/stat.2 diff --git a/bsd/man/man2/getattrlist.2 b/bsd/man/man2/getattrlist.2 index df7e6ac1b..124e695c9 100644 --- a/bsd/man/man2/getattrlist.2 +++ b/bsd/man/man2/getattrlist.2 @@ -16,12 +16,13 @@ .\" .\" @(#)getattrlist.2 . -.Dd October 14, 2004 +.Dd February 25, 2014 .Dt GETATTRLIST 2 .Os Darwin .Sh NAME .Nm getattrlist , -.Nm fgetattrlist +.Nm fgetattrlist , +.Nm getattrlistat .Nd get file system attributes .Sh SYNOPSIS .Fd #include @@ -31,6 +32,11 @@ . .Ft int .Fn fgetattrlist "int fd" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned long options" +.Ft int +.Fo getattrlistat +.Fa "int fd" "const char *path" "struct attrlist * attrList" "void * attrBuf" +.Fa "size_t attrBufSize" "unsigned long options" +.Fc .Sh DESCRIPTION The .Fn getattrlist @@ -42,6 +48,28 @@ while .Fn fgetattrlist works on the provided file descriptor .Fa fd . +.Pp +The +.Fn getattrlistat +system call is equivalent to +.Fn getattrlist +except in the case where +.Fa path +specifies a relative path. +In this case the attributes are returned for the file system object named by +path relative to the directory associated with the file descriptor +.Fa fd +instead of the current working directory. +If +.Fn getattrlistat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +parameter, the current working directory is used and the behavior is +identical to a call to +.Fn getattrlist . +.Pp You can think of .Fn getattrlist as a seriously enhanced version of @@ -59,18 +87,6 @@ The parameter lets you control specific aspects of the function's behavior. .Pp . -The -.Fn getattrlist -and -.Fn fgetattrlist -functions are only supported by certain volume format implementations. -For maximum compatibility, client programs should use high-level APIs -(such as the Carbon File Manager) to access file system attributes. -These high-level APIs include logic to emulate file system attributes -on volumes that don't support -the calls. -.Pp -. Not all volumes support all attributes. See the discussion of .Dv ATTR_VOL_ATTRIBUTES @@ -219,6 +235,14 @@ will be used for the invalid ones. Requires that .Dv ATTR_CMN_RETURNED_ATTRS be requested. . +.It FSOPT_ATTR_CMN_EXTENDED +If this is bit is set, then +.Dv ATTR_CMN_GEN_COUNT +and +.Dv ATTR_CMN_DOCUMENT_ID +can be requested. When this option is used, callers must not reference +forkattrs anywhere. +. .El . .Sh ATTRIBUTE BUFFER @@ -343,10 +367,10 @@ structure which is used to report which of the requested attributes were actually returned. This attribute, when requested, will always be the first attribute returned. By default, unsupported attributes will be skipped (i.e. not packed into the output buffer). This behavior -can be over-ridden using the FSOPT_PACK_INVAL_ATTRS option flag. Only -.Xr getattrlist 2 supports this attribute ( -.Xr getdirentriesattr 2 and -.Xr searchfs 2 do not support it ). +can be over-ridden using the FSOPT_PACK_INVAL_ATTRS option flag. Both +.Xr getattrlist 2 and +.Xr getatttrlistbulk 2 support this attribute while +.Xr searchfs 2 does not. . .It ATTR_CMN_NAME An @@ -385,15 +409,6 @@ field of the structure returned by .Xr statfs 2 . . -.Pp -This value is not related to the file system ID from traditional Mac OS (for example, -the -.Fa filesystemID -field of the -.Vt FSVolumeInfo -structure returned by Carbon's FSGetVolumeInfo() function). -On current versions of Mac OS X that value is synthesised by the Carbon File Manager. -. .It ATTR_CMN_OBJTYPE An .Vt fsobj_type_t @@ -415,48 +430,45 @@ in .It ATTR_CMN_OBJID An .Vt fsobj_id_t -structure that uniquely identifies the file system object -within its volume. -The fid_generation field of this structure will be zero for all non-root callers -(effective UID not 0). -This identifier need not be persistent across an unmount/mount sequence. +structure that uniquely identifies the file system object within a mounted +volume for the duration of it's mount; this identifier is not guaranteed to be +persistent for the volume and may change every time the volume is mounted. .Pp -. -Some volume formats use well known values for the -.Fa fid_objno -field for the root directory (2) and the parent of root directory (1). -This is not a required behaviour of this attribute. +On HFS+ volumes, the ATTR_CMN_OBJID of a file system object is distinct from +the ATTR_CMN_OBJID of any hard link to that file system object. Although the +ATTR_CMN_OBJID of a file system object may appear similar (in whole +or in part) to it's ATTR_CMN_FILEID (see description of ATTR_CMN_FILEID below), +\fBno relation between the two attributes should ever be implied.\fP . .It ATTR_CMN_OBJPERMANENTID An .Vt fsobj_id_t -structure that uniquely identifies the file system object -within its volume. -The fid_generation field of this structure will be zero for all non-root callers -(effective UID not 0). -This identifier should be persistent across an unmount/mount sequence. -.Pp -Some file systems (for example, original HFS) may need to modify the on-disk -structure to return a persistent identifier. -If such a file system is mounted read-only, an attempt to get this attribute -will fail with the error -.Dv EROFS . +structure that uniquely and persistently identifies the file system object +within its volume; persistence implies that this attribute is unaffected by +mount/unmount operations on the volume. +.Pp +Some file systems can not return this attribute when the volume is mounted +read-only and will fail the request with error +.Dv EROFS. +.br +(e.g. original HFS modifies on disk structures to generate persistent +identifiers, and hence cannot do so if the volume is mounted read only.) . .It ATTR_CMN_PAROBJID An .Vt fsobj_id_t -structure that identifies the parent directory of the file system object. -The fid_generation field of this structure will be zero for all non-root callers -(effective UID not 0). -Equivalent to the ATTR_CMN_OBJID attribute of the parent directory. -This identifier need not be persistent across an unmount/mount sequence. +structure that uniquely identifies the parent directory of the file system +object within a mounted volume, for the duration of the volume mount; this +identifier is not guaranteed to be persistent for the volume and may change +every time the volume is mounted. .Pp . -On a volume that supports hard links, a multiply linked file has no unique parent. -This attribute will return an unspecified parent. -.Pp +If a file system object is hard linked from multiple directories, the parent +directory returned for this attribute is non deterministic; it can be any one +of the parent directories of this object. . -For some volume formats this attribute is very expensive to calculate. +For some volume formats the computing cost for this attribute is significant; +developers are advised to request this attribute sparingly. . .It ATTR_CMN_SCRIPT (read/write) A @@ -465,11 +477,8 @@ containing a text encoding hint for the file system object's name. It is included to facilitate the lossless round trip conversion of names between Unicode and traditional Mac OS script encodings. -The values are defined in -.Aq Pa CarbonCore/TextCommon.h . File systems that do not have an appropriate text encoding value should return kTextEncodingMacUnicode. -See DTS Q&A 1173 "File Manager Text Encoding Hints". . .It ATTR_CMN_CRTIME (read/write) A @@ -533,8 +542,6 @@ structure structure and an .Vt ExtendedFolderInfo structure). -These structures are defined in -.Aq Pa CarbonCore/Finder.h . .Pp This attribute is not byte swapped by the file system. The value of multibyte fields on disk is always big endian. @@ -579,18 +586,6 @@ are valid; other bits should be ignored, e.g., by masking with .Dv ~S_IFMT . . -.It ATTR_CMN_NAMEDATTRCOUNT -A -.Vt u_int32_t -containing the number of named attributes of the file system object. -. -.It ATTR_CMN_NAMEDATTRLIST -An -.Vt attrreference -structure containing a list of named attributes of the file system object. -No built-in file systems on Mac OS X currently support named attributes. -Because of this, the structure of this attribute's value is not yet defined. -. .It ATTR_CMN_FLAGS (read/write) A .Vt u_int32_t @@ -603,19 +598,38 @@ structure returned by .Xr stat 2 . For more information about these flags, see .Xr chflags 2 . +. +.It ATTR_CMN_GEN_COUNT +A +.Vt u_int32_t +containing a non zero monotonically increasing generation +count for this file system object. The generation count tracks +the number of times the data in a file system object has been +modified. No meaning can be implied from its value. The +value of the generation count for a file system object can +be compared against a previous value of the same file system +object for equality; i.e. an unchanged generation +count indicates identical data. Requesting this attribute requires the +FSOPT_ATTR_CMN_EXTENDED option flag. .Pp . -The order that attributes are placed into the attribute buffer -almost invariably matches the order of the attribute mask bit values. -The exception is -.Dv ATTR_CMN_FLAGS . -If its order was based on its bit position, it would be before -the -.Dv ATTR_CMN_NAMEDATTRCOUNT -/ -.Dv ATTR_CMN_NAMEDATTRLIST -pair, however, -it is placed in the buffer after them. +A generation count value of 0 is invalid and cannot be used to +determine data change. +.Pp +The generation count is invalid while a file is mmap'ed. An invalid +generation count value of 0 will be returned for mmap'ed files. +. +.It ATTR_CMN_DOCUMENT_ID +A +.Vt u_int32_t +containing the document id. The document id is a value assigned +by the kernel to a document (which can be a file or directory) +and is used to track the data regardless of where it gets moved. +The document id survives safe saves; i.e it is sticky to the path it +was assigned to. Requesting this attribute requires the +FSOPT_ATTR_CMN_EXTENDED option flag. +.Pp +A document id of 0 is invalid. . .It ATTR_CMN_USERACCESS A @@ -655,7 +669,13 @@ Analoguous to .It ATTR_CMN_FILEID A .Vt u_int64_t -that uniquely identifies the file system object within its volume. +that uniquely identifies the file system object within it's mounted volume. +Equivalent to +.Fa st_ino +field of the +.Vt stat +structure returned by +.Xr stat 2 . . .It ATTR_CMN_PARENTID A @@ -680,6 +700,11 @@ A that contains the time that the file system object was created or renamed into its containing directory. Note that inconsistent behavior may be observed when this attribute is requested on hard-linked items. +. +.It ATTR_CMN_DATA_PROTECT_FLAGS +A +.Vt u_int32_t +that contains the file or directory's data protection class. .Pp . .El @@ -717,9 +742,6 @@ A containing the volume signature word. This value is unique within a given file system type and lets you distinguish between different volume formats handled by the same file system. -See -.Aq Pa CarbonCore/Files.h -for more details. . .It ATTR_VOL_SIZE An @@ -1248,7 +1270,7 @@ must also set Introduced with Darwin 7.0 (Mac OS X version 10.3). . .It VOL_CAP_FMT_FAST_STATFS -This bit is used as a hint to upper layers (specifically the Carbon File Manager) to +This bit is used as a hint to upper layers to indicate that .Xr statfs 2 is fast enough that its results need not be cached by the caller. @@ -1588,6 +1610,30 @@ The volume is read-only but must be modified in order to return this attribute. An I/O error occurred while reading from or writing to the file system. .El .Pp +In addition to the errors returned by the +.Fn getattrlist , +the +.Fn getattrlistat +function may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa path +argument does not specify an absolute path and the +.Fa fd +argument is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er ENOTDIR +The +.Fa path +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El +.Pp . .Sh CAVEATS . @@ -1605,15 +1651,6 @@ For example, (0x00000001) comes before .Dv ATTR_CMN_DEVID (0x00000002) because its value is smaller. -However, you can not rely on this ordering because there is one key exception: -.Dv ATTR_CMN_FLAGS -is placed after the -.Dv ATTR_CMN_NAMEDATTRCOUNT -/ -.Dv ATTR_CMN_NAMEDATTRLIST -pair, even though its bit position indicates that it should come before. -This is due to a bug in an early version of Mac OS X that can't be fixed for -binary compatibility reasons. When ordering attributes, you should always use the order in which they are described above. .Pp @@ -1923,7 +1960,7 @@ main(int argc, char **argv) .Xr chflags 2 , .Xr exchangedata 2 , .Xr fcntl 2 , -.Xr getdirentriesattr 2 , +.Xr getattrlistbulk 2 , .Xr mount 2 , .Xr searchfs 2 , .Xr setattrlist 2 , @@ -1934,4 +1971,6 @@ main(int argc, char **argv) A .Fn getattrlist function call appeared in Darwin 1.3.1 (Mac OS X version 10.0). -. +The +.Fn getattrlistat +function call appeared in OS X 10.10 . diff --git a/bsd/man/man2/getattrlistat.2 b/bsd/man/man2/getattrlistat.2 new file mode 100644 index 000000000..7c9a2ee01 --- /dev/null +++ b/bsd/man/man2/getattrlistat.2 @@ -0,0 +1 @@ +.so man2/getattrlist.2 diff --git a/bsd/man/man2/getattrlistbulk.2 b/bsd/man/man2/getattrlistbulk.2 new file mode 100644 index 000000000..f67795e96 --- /dev/null +++ b/bsd/man/man2/getattrlistbulk.2 @@ -0,0 +1,376 @@ +.\" Copyright (c) 2013 Apple Computer, Inc. All rights reserved. +.\" +.\" The contents of this file constitute Original Code as defined in and +.\" are subject to the Apple Public Source License Version 1.1 (the +.\" "License"). You may not use this file except in compliance with the +.\" License. Please obtain a copy of the License at +.\" http://www.apple.com/publicsource and read it before using this file. +.\" +.\" This Original Code and all software distributed under the License are +.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the +.\" License for the specific language governing rights and limitations +.\" under the License. +.\" +.\" @(#)getattrlistbulk.2 +. +.Dd November 15, 2013 +.Dt GETATTRLISTBULK 2 +.Os Darwin +.Sh NAME +.Nm getattrlistbulk +.Nd get file system attributes for multiple directory entries +.Sh SYNOPSIS +.Fd #include +.Fd #include +.Pp +.Ft int +.Fn getattrlistbulk "int dirfd" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "uint64_t options" +. +. +.Sh DESCRIPTION +The +.Fn getattrlistbulk +function iterates over the items in a directory and returns information about +each directory entry like +.Xr getattrlist 2 . +Note: when +.Fn getattrlistbulk +returns information about a symbolic link, the information returned is about the link itself, not the target of the link. +.Pp +The function reads directory entries from the directory referenced by the file +descriptor +.Fa dirfd . +The +.Fa attrList +parameter determines what attributes are returned for each entry. +Attributes of those directory entries are placed into the buffer specified by +.Fa attrBuf +and +.Fa attrBufSize . +The +.Fa options +parameter allows you to modify the behaviour of the call. +.Pp +. +.Pp +. +.\" dirfd parameter +. +The +.Fa dirfd +parameter must be a file descriptor that references a directory that you have opened for reading. +.Pp +. +.\" attrList parameter +. +The +.Fa attrList +parameter is a pointer to an +.Vt attrlist +structure. +All fields of this structure must be filled before calling the function. +See the discussion of the +.Xr getattrlist 2 +function for a detailed description of this structure. +To get an attribute, the corresponding bit in the appropriate +.Vt attrgroup_t +field of the +.Vt attrlist +structure must be set. +Volume attributes cannot be requested but all other supported getattrlist attributes can be used. For this function, +.Dv ATTR_CMN_NAME +and +.Dv ATRR_CMN_RETURNED_ATTRS +are required and the absence of these attributes in the attrList parameter results in an error. +.Pp +. +.\" attrBuf and attrBufSize parameters +. +The +.Fa attrBuf +and +.Fa attrBufSize +parameters specify a buffer into which the function places attribute values. +The attributes for any given directory entry are grouped together and +packed in exactly the same way as they are returned from +.Xr getattrlist 2 +and are subject to exactly the same alignment specifications +and restrictions. These groups are then placed into the buffer, one after another. +.Xr getattrlist 2 should be consulted on details of the attributes that can be +requested for and returned. The name of the entry itself is provided by the +.Dv ATTR_CMN_NAME +attribute. Each group starts with a leading +.Vt uint32_t +, which will always be 8-byte aligned that contains the overall length of the group. +You can step from one group to the next by simply adding this length to your pointer. +The sample code (below) shows how to do this. +The initial contents of this buffer are ignored. +.Pp +. +.\" options parameter +. +The +.Fa options +parameter is a bit set that controls the behaviour of +.Fn getattrlistbulk . +The following option bits are defined. +. +.Bl -tag -width FSOPT_PACK_INVAL_ATTRS +. +.It FSOPT_PACK_INVAL_ATTRS +If this is bit is set, then all requested attributes, +even ones that are not supported by the object or file +file system, will be returned the attrBuf. The attributes +actually returned can be determined by looking at the +attribute_set_t structure returned for the +.Dv ATTR_CMN_RETURNED_ATTRS +attribute. Default values will be returned for invalid +attributes and should be ignored. +.Pp +Please see the discussion of this flag in +.Xr getattrlist 2 +. +.El +.Pp +If +.Dv ATTR_CMN_ERROR +has been requested and an error specific to a directory entry occurs, +an error will be reported. The +.Dv ATTR_CMN_ERROR +attribute is a uint32_t which, if non-zero, specifies the error code +that was encountered during the processing of that directory entry. The +.Dv ATTR_CMN_ERROR +attribute will be after +.Dv ATTR_CMN_RETURNED_ATTRS +attribute in the returned buffer. +.Pp +It is typical to ask for a combination of common, file, and directory +attributes and then use the value of the +.Dv ATTR_CMN_OBJTYPE +attribute to parse the resulting attribute buffer. +. +.Sh RETURN VALUES +Upon successful completion the numbers of entries successfully read +is returned. A value of 0 indicates there are no more entries. On error, +a value of -1 is returned and +.Va errno +is set to indicate the error. +.Pp +When iterating all entries in a directory, +.Fn getattrlistbulk +is called repeatedly until a 0 is returned. In such a case if +.Fn readdir +and +.Fn getattrlistbulk +calls on the same fd are mixed, the behavior is undefined. + +.Pp +.Sh ERRORS +.Fn getattrlistbulk +will fail if: +.Bl -tag -width Er +. +.It Bq Er EBADF +.Fa dirfd +is not a valid file descriptor for a directory open for reading. +. +.It Bq Er ENOTDIR +The File descriptor +.Fa dirfd +is not a directory. +. +.It Bq Er EACCES +Search permission is denied on the directory whose descriptor is given +as input. +. +.It Bq Er EFAULT +.Fa attrList +or +.Em attrBuf +points to an invalid address. +. +.It Bq Er ERANGE +The buffer was too small. +. +.It Bq Er EINVAL +The +.Fa bitmapcount +field of +.Fa attrList +is not +.Dv ATTR_BIT_MAP_COUNT . +. +.It Bq Er EINVAL +An invalid attribute was requested. +. +.It Bq Er EINVAL +Volume attributes were requested. +. +.It Bq Er EINVAL +.Dv ATTR_CMN_NAME +or +.Dv ATTR_CMN_RETURNED_ATTRS +was not requested in the attrList parameter. +. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.El +.Pp +. +.Sh EXAMPLES +. +The following code lists the contents of a directory using +.Fn getattrlistbulk . +The listing includes the file type. +. +.Bd -literal +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct val_attrs { + uint32_t length; + attribute_set_t returned; + uint32_t error; + attrreference_t name_info; + char *name; + fsobj_type_t obj_type; +} val_attrs_t; + + +void demo(const char *dirpath) +{ + int error; + int dirfd; + struct attrlist attrList; + char *entry_start; + char attrBuf[256]; + + memset(&attrList, 0, sizeof(attrList)); + attrList.bitmapcount = ATTR_BIT_MAP_COUNT; + attrList.commonattr = ATTR_CMN_RETURNED_ATTRS | + ATTR_CMN_NAME | + ATTR_CMN_ERROR | + ATTR_CMN_OBJTYPE; + + error = 0; + dirfd = open(dirpath, O_RDONLY, 0); + if (dirfd < 0) { + error = errno; + printf("Could not open directory %s", dirpath); + perror("Error was "); + } else { + for (;;) { + int retcount; + + retcount = getattrlistbulk(dirfd, &attrList, &attrBuf[0], + sizeof(attrBuf), 0); + printf("\engetattrlistbulk returned %d", retcount); + if (retcount == -1) { + error = errno; + perror("Error returned : "); + printf("\en"); + break; + } else if (retcount == 0) { + /* No more entries in directory */ + error = 0; + break; + } else { + int index; + uint32_t total_length; + char *field; + + entry_start = &attrBuf[0]; + total_length = 0; + printf(" -> entries returned"); + for (index = 0; index < retcount; index++) { + val_attrs_t attrs = {0}; + + printf("\en Entry %d", index); + printf(" -- "); + field = entry_start; + attrs.length = *(uint32_t *)field; + printf(" Length %d ", attrs.length); + total_length += attrs.length; + printf(" Total Length %d ", total_length); + field += sizeof(uint32_t); + printf(" -- "); + + /* set starting point for next entry */ + entry_start += attrs.length; + + attrs.returned = *(attribute_set_t *)field; + field += sizeof(attribute_set_t); + + if (attrs.returned.commonattr & ATTR_CMN_ERROR) { + attrs.error = *(uint32_t *)field; + field += sizeof(uint32_t); + } + + if (attrs.returned.commonattr & ATTR_CMN_NAME) { + attrs.name = field; + attrs.name_info = *(attrreference_t *)field; + field += sizeof(attrreference_t); + printf(" %s ", (attrs.name + + attrs.name_info.attr_dataoffset)); + } + + /* Check for error for this entry */ + if (attrs.error) { + /* + * Print error and move on to next + * entry + */ + printf("Error in reading attributes for directory \ + entry %d", attrs.error); + continue; + } + + printf(" -- "); + if (attrs.returned.commonattr & ATTR_CMN_OBJTYPE) { + attrs.obj_type = *(fsobj_type_t *)field; + field += sizeof(fsobj_type_t); + + switch (attrs.obj_type) { + case VREG: + printf("file "); + break; + case VDIR: + printf("directory "); + break; + default: + printf("obj_type = %-2d ", attrs.obj_type); + break; + } + } + printf(" -- "); + } + } + } + (void)close(dirfd); + } +} +.Ed +.Pp +. +.Sh SEE ALSO +. +.Xr getattrlist 2 , +.Xr lseek 2 +. +.Sh HISTORY +A +.Fn getattrlistbulk +function call appeared in OS X version 10.10 +. diff --git a/bsd/man/man2/getdirentriesattr.2 b/bsd/man/man2/getdirentriesattr.2 index 78a839766..6be39ee87 100644 --- a/bsd/man/man2/getdirentriesattr.2 +++ b/bsd/man/man2/getdirentriesattr.2 @@ -1,4 +1,4 @@ -.\" Copyright (c) 2003 Apple Computer, Inc. All rights reserved. +q.\" Copyright (c) 2003 Apple Computer, Inc. All rights reserved. .\" .\" The contents of this file constitute Original Code as defined in and .\" are subject to the Apple Public Source License Version 1.1 (the @@ -20,7 +20,7 @@ .Dt GETDIRENTRIESATTR 2 .Os Darwin .Sh NAME -.Nm getdirentriesattr +.Nm getdirentriesattr(NOW DEPRECATED) .Nd get file system attributes for multiple directory entries .Sh SYNOPSIS .Fd #include @@ -207,7 +207,14 @@ It is typical to ask for a combination of common, file, and directory attributes and then use the value of the .Dv ATTR_CMN_OBJTYPE attribute to parse the resulting attribute buffer. -. +.Sh NOTES +As of Mac OS X 10.10, +.Fn getdirentriesattr +is deprecated. It is replaced by +.Nm getattrlistbulk(2). +Continued use of +.Fn getdirentriesattr +is strongly discouraged as comprehensive results are not guaranteed. .Sh RETURN VALUES Upon successful completion a value of 0 or 1 is returned. The value 0 indicates that the routine completed successfully. diff --git a/bsd/man/man2/link.2 b/bsd/man/man2/link.2 index 470164057..35010cab7 100644 --- a/bsd/man/man2/link.2 +++ b/bsd/man/man2/link.2 @@ -37,7 +37,8 @@ .Dt LINK 2 .Os BSD 4 .Sh NAME -.Nm link +.Nm link , +.Nm linkat .Nd make a hard file link .Sh SYNOPSIS .Fd #include @@ -46,6 +47,10 @@ .Fa "const char *path1" .Fa "const char *path2" .Fc +.Ft int +.Fo linkat +.Fa "int fd1" "const char *name1" "int fd2" "const char *name2" "int flag" +.Fc .Sh DESCRIPTION The .Fn link @@ -96,6 +101,69 @@ will point the hard link, to the underlying object pointed to by .Fa path1 , not to the symbolic link itself. +.Pp +The +.Fn linkat +system call is equivalent to +.Fa link +except in the case where either +.Fa name1 +or +.Fa name2 +or both are relative paths. +In this case a relative path +.Fa name1 +is interpreted relative to +the directory associated with the file descriptor +.Fa fd1 +instead of the current working directory and similarly for +.Fa name2 +and the file descriptor +.Fa fd2 . +.Pp +Values for +.Fa flag +are constructed by a bitwise-inclusive OR of flags from the following +list, defined in +.In fcntl.h : +.Bl -tag -width indent +.It Dv AT_SYMLINK_FOLLOW +If +.Fa name1 +names a symbolic link, a new link for the target of the symbolic link is +created. +.El +.Pp +If +.Fn linkat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd1 +or +.Fa fd2 +parameter, the current working directory is used for the respective +.Fa name +argument. +If both +.Fa fd1 +and +.Fa fd2 +have value +.Dv AT_FDCWD , +the behavior is identical to a call to +.Fn link . +Unless +.Fa flag +contains the +.Dv AT_SYMLINK_FOLLOW +flag, if +.Fa name1 +names a symbolic link, a new link is created for the symbolic link +.Fa name1 +and not its target. On OS X, not assigning AT_SYMLINK_FOLLOW to +.Fa flag +may result in some filesystems returning an error. .Sh RETURN VALUES Upon successful completion, a value of 0 is returned. Otherwise, a value of -1 is returned and @@ -181,6 +249,46 @@ and the file named by .Fa path1 are on different file systems. .El +.Pp +In addition to the errors returned by the +.Fn link , +the +.Fn linkat +system call may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa name1 +or +.Fa name2 +argument does not specify an absolute path and the +.Fa fd1 +or +.Fa fd2 +argument, respectively, is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er EINVAL +The value of the +.Fa flag +argument is not valid. +.It Bq Er ENOTSUP +.Fa flag +was not set to +.Dv AT_SYMLINK_FOLLOW (some filesystems only) +.It Bq Er ENOTDIR +The +.Fa name1 +or +.Fa name2 +argument is not an absolute path and +.Fa fd1 +or +.Fa fd2 , +respectively, is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh SEE ALSO .Xr symlink 2 , .Xr unlink 2 @@ -189,3 +297,6 @@ The .Fn link function is expected to conform to .St -p1003.1-88 . +The +.Fn linkat +system call is expected to conform to POSIX.1-2008 . diff --git a/bsd/man/man2/linkat.2 b/bsd/man/man2/linkat.2 new file mode 100644 index 000000000..a7d6da568 --- /dev/null +++ b/bsd/man/man2/linkat.2 @@ -0,0 +1 @@ +.so man2/link.2 diff --git a/bsd/man/man2/mkdir.2 b/bsd/man/man2/mkdir.2 index 5fba3bf5a..11f934240 100644 --- a/bsd/man/man2/mkdir.2 +++ b/bsd/man/man2/mkdir.2 @@ -37,7 +37,8 @@ .Dt MKDIR 2 .Os BSD 4.2 .Sh NAME -.Nm mkdir +.Nm mkdir , +.Nm mkdirat .Nd make a directory file .Sh SYNOPSIS .Fd #include @@ -46,6 +47,8 @@ .Fa "const char *path" .Fa "mode_t mode" .Fc +.Ft int +.Fn mkdirat "int fd" "const char *path" "mode_t mode" .Sh DESCRIPTION The directory .Fa path @@ -69,6 +72,27 @@ is undefined when mode bits other than the low 9 bits are used. Use after .Fn mkdir to explicitly set the other bits (See example below). +.Pp +The +.Fn mkdirat +system call is equivalent to +.Fn mkdir +except in the case where +.Fa path +specifies a relative path. +In this case the newly created directory is created relative to the +directory associated with the file descriptor +.Fa fd +instead of the current working directory. +If +.Fn mkdirat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +parameter, the current working directory is used and the behavior is +identical to a call to +.Fn mkdir . .Sh RETURN VALUES A 0 return value indicates success. A -1 return value indicates an error, and an error code is stored in @@ -138,6 +162,30 @@ A component of the path prefix is not a directory. .It Bq Er EROFS The parent directory resides on a read-only file system. .El +.Pp +In addition to the errors returned by the +.Fn mkdir , +the +.Fn mkdirat +function may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa path +argument does not specify an absolute path and the +.Fa fd +argument is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er ENOTDIR +The +.Fa path +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh EXAMPLE .Bd -literal -offset indent @@ -170,3 +218,10 @@ The .Fn mkdir function conforms to .St -p1003.1-88 . +The +.Fn mkdirat +system call is expected to conform to POSIX.1-2008 . +.Sh HISTORY +The +.Fn mkdirat +system call appeared in OS X 10.10 diff --git a/bsd/man/man2/mkdirat.2 b/bsd/man/man2/mkdirat.2 new file mode 100644 index 000000000..467b98a8b --- /dev/null +++ b/bsd/man/man2/mkdirat.2 @@ -0,0 +1 @@ +.so man2/mkdir.2 diff --git a/bsd/man/man2/open.2 b/bsd/man/man2/open.2 index 80c293626..40a94d7d1 100644 --- a/bsd/man/man2/open.2 +++ b/bsd/man/man2/open.2 @@ -60,7 +60,7 @@ .Dt OPEN 2 .Os BSD 4 .Sh NAME -.Nm open +.Nm open , openat .Nd open or create a file for reading or writing .Sh SYNOPSIS .\" OH??? .Fd #include @@ -71,6 +71,8 @@ .Fa "int oflag" .Fa "..." .Fc +.Ft int +.Fn openat "int fd" "const char *path" "int oflag" "..." .Sh DESCRIPTION The file name specified by .Fa path @@ -86,8 +88,10 @@ argument may indicate that the file is to be created if it does not exist (by specifying the .Dv O_CREAT flag). In this case, -.Nm -requires a third argument +.Fn open +and +.Fn openat +require an additional argument .Fa "mode_t mode" ; the file is created with mode .Fa mode @@ -96,7 +100,36 @@ as described in and modified by the process' umask value (see .Xr umask 2 ) . .Pp -The flags specified are formed by +The +.Fn openat +function is equivalent to the +.Fn open +function except in the case where the +.Fa path +specifies a relative path. +In this case the file to be opened is determined relative to the directory +associated with the file descriptor +.Fa fd +instead of the current working directory. +The +.Fa oflag +argument and the optional fourth argument correspond exactly to +the arguments for +.Fn open . +If +.Fn openat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +argument, the current working directory is used +and the behavior is identical to a call to +.Fn open . +.Pp +The flags specified +for the +.Fa oflag +argument are formed by .Em or Ns 'ing the following values: .Pp @@ -371,6 +404,23 @@ The file is a pure procedure (shared text) file that is being executed and the .Fn open call requests write access. +.It Bq Eq EBADF +The +.Fa path +argument does not specify an absolute path and the +.Fa fd +argument is +neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Eq ENOTDIR +The +.Fa path +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. .El .Sh COMPATIBILITY .Fn open @@ -392,3 +442,6 @@ An .Fn open function call appeared in .At v6 . +The +.Fn openat +function was introduced in OS X 10.10 diff --git a/bsd/man/man2/openat.2 b/bsd/man/man2/openat.2 new file mode 100644 index 000000000..604e1213a --- /dev/null +++ b/bsd/man/man2/openat.2 @@ -0,0 +1 @@ +.so man2/open.2 diff --git a/bsd/man/man2/readlink.2 b/bsd/man/man2/readlink.2 index acd80233f..8d02c1e0d 100644 --- a/bsd/man/man2/readlink.2 +++ b/bsd/man/man2/readlink.2 @@ -37,7 +37,8 @@ .Dt READLINK 2 .Os BSD 4.2 .Sh NAME -.Nm readlink +.Nm readlink , +.Nm readlinkat .Nd read value of a symbolic link .Sh SYNOPSIS .Fd #include @@ -47,6 +48,10 @@ .Fa "char *restrict buf" .Fa "size_t bufsize" .Fc +.Ft ssize_t +.Fo readlinkat +.Fa "int fd" "const char *restrict path" "char *restrict buf" "size_t bufsize" +.Fc .Sh DESCRIPTION .Fn Readlink places the contents of the symbolic link @@ -60,6 +65,26 @@ does not append a .Dv NUL character to .Fa buf . +.Pp +The +.Fn readlinkat +system call is equivalent to +.Fn readlink +except in the case where +.Fa path +specifies a relative path. +In this case the symbolic link whose content is read relative to the +directory associated with the file descriptor +.Fa fd +instead of the current working directory. +If +.Fn readlinkat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +parameter, the current working directory is used and the behavior is +identical to a call to .Sh RETURN VALUES The call returns the count of characters placed in the buffer if it succeeds, or a -1 if an error occurs, placing the error @@ -101,6 +126,30 @@ The named file does not exist. .It Bq Er ENOTDIR A component of the path prefix is not a directory. .El +.Pp +In addition to the errors returned by the +.Fn readlink , +the +.Fn readlinkat +may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa path +argument does not specify an absolute path and the +.Fa fd +argument is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er ENOTDIR +The +.Fa path +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh LEGACY SYNOPSIS .Fd #include .Pp @@ -121,8 +170,15 @@ have changed. .Xr symlink 2 , .Xr compat 5 , .Xr symlink 7 +.Sh STANDARDS +The +.Fn readlinkat +system call is expected to conform to POSIX.1-2008 . .Sh HISTORY The .Fn readlink function call appeared in .Bx 4.2 . +The +.Fn readlinkat +system call appeared in OS X 10.10 diff --git a/bsd/man/man2/readlinkat.2 b/bsd/man/man2/readlinkat.2 new file mode 100644 index 000000000..b29d1b541 --- /dev/null +++ b/bsd/man/man2/readlinkat.2 @@ -0,0 +1 @@ +.so man2/readlink.2 diff --git a/bsd/man/man2/rename.2 b/bsd/man/man2/rename.2 index cefb57810..f07b1ab3b 100644 --- a/bsd/man/man2/rename.2 +++ b/bsd/man/man2/rename.2 @@ -37,7 +37,8 @@ .Dt RENAME 2 .Os BSD 4.2 .Sh NAME -.Nm rename +.Nm rename , +.Nm renameat .Nd change the name of a file .Sh SYNOPSIS .Fd #include @@ -46,6 +47,8 @@ .Fa "const char *old" .Fa "const char *new" .Fc +.Ft int +.Fn renameat "int fromfd" "const char *from" "int tofd" "const char *to" .Sh DESCRIPTION The .Fn rename @@ -76,6 +79,37 @@ If the final component of is a symbolic link, the symbolic link is renamed, not the file or directory to which it points. +.Pp +The +.Fn renameat +system call is equivalent to +.Fn rename +except in the case where either +.Fa from +or +.Fa to +specifies a relative path. +If +.Fa from +is a relative path, the file to be renamed is located +relative to the directory associated with the file descriptor +.Fa fromfd +instead of the current working directory. +If the +.Fa to +is a relative path, the same happens only relative to the directory associated +with +.Fa tofd . +If the +.Fn renameat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fromfd +or +.Fa tofd +parameter, the current working directory is used in the determination +of the file for the respective path parameter. .Sh CAVEATS The system can deadlock if a loop is present in the file system graph. This loop takes the form of an entry in directory @@ -232,6 +266,41 @@ are on different logical devices (file systems). Note that this error code will not be returned if the implementation permits cross-device links. .El +.Pp +In addition to the errors returned by the +.Fn rename , +the +.Fn renameat +may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa from +argument does not specify an absolute path and the +.Fa fromfd +argument is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching, or the +.Fa to +argument does not specify an absolute path and the +.Fa tofd +argument is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er ENOTDIR +The +.Fa from +argument is not an absolute path and +.Fa fromfd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory, or the +.Fa to +argument is not an absolute path and +.Fa tofd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. .Sh CONFORMANCE The restriction on renaming a directory whose permissions disallow writing is based on the fact that UFS directories contain a ".." entry. @@ -250,3 +319,6 @@ The .Fn rename function conforms to .St -p1003.1-88 . +The +.Fn renameat +system call is expected to conform to POSIX.1-2008 . diff --git a/bsd/man/man2/renameat.2 b/bsd/man/man2/renameat.2 new file mode 100644 index 000000000..9b74442c8 --- /dev/null +++ b/bsd/man/man2/renameat.2 @@ -0,0 +1 @@ +.so man2/rename.2 diff --git a/bsd/man/man2/sem_close.2 b/bsd/man/man2/sem_close.2 index cdff87c7c..f32c62da1 100644 --- a/bsd/man/man2/sem_close.2 +++ b/bsd/man/man2/sem_close.2 @@ -49,7 +49,6 @@ succeeds unless: is not a valid semaphore descriptor. .El .Sh SEE ALSO -.Xr sem_init 2 , .Xr sem_open 2 , .Xr sem_unlink 2 , .Xr semctl 2 , diff --git a/bsd/man/man2/stat.2 b/bsd/man/man2/stat.2 index 02de79c72..e9acf874f 100644 --- a/bsd/man/man2/stat.2 +++ b/bsd/man/man2/stat.2 @@ -42,7 +42,8 @@ .Nm lstat , .Nm lstat64 , .Nm stat , -.Nm stat64 +.Nm stat64 , +.Nm fstatat .Nd get file status .Sh SYNOPSIS .Fd #include @@ -61,6 +62,8 @@ .Fa "const char *restrict path" .Fa "struct stat *restrict buf" .Fc +.Ft int +.Fn fstatat "int fd" "const char *path" "struct stat *buf" "int flag" .Sh TRANSITIIONAL SYNOPSIS (NOW DEPRECATED) .Ft int .br @@ -118,6 +121,48 @@ known by the file descriptor .Fa fildes . .Pp The +.Fn fstatat +system call is equivalent to +.Fn stat +and +.Fn lstat +except in the case where the +.Fa path +specifies a relative path. +In this case the status is retrieved from a file relative to +the directory associated with the file descriptor +.Fa fd +instead of the current working directory. +.Pp +The values for the +.Fa flag +are constructed by a bitwise-inclusive OR of flags from the following list, +defined in +.In fcntl.h : +.Bl -tag -width indent +.It Dv AT_SYMLINK_NOFOLLOW +If +.Fa path +names a symbolic link, the status of the symbolic link is returned. +.El +.Pp +If +.Fn fstatat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +parameter, the current working directory is used and the behavior is +identical to a call to +.Fn stat +or +.Fn lstat +respectively, depending on whether or not the +.Dv AT_SYMLINK_NOFOLLOW +bit is set in +.Fa flag . +.Pp +The .Fa buf argument is a pointer to a .Fa stat @@ -483,6 +528,35 @@ or the file serial number cannot be represented correctly in the structure pointed to by .Fa buf . .El +.Pp +In addition to the errors returned by the +.Fn stat +and +.Fn lstat , +.Fn fstatat +may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa path +argument does not specify an absolute path and the +.Fa fd +argument is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er EINVAL +The value of the +.Fa flag +argument is not valid. +.It Bq Er ENOTDIR +The +.Fa path +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh CAVEATS The file generation number, .Fa st_gen , @@ -538,6 +612,9 @@ and .Fn fstat function calls are expected to conform to .St -p1003.1-88 . +The +.Fn fstatat +system call is expected to conform to POSIX.1-2008 . .Sh HISTORY An .Fn lstat @@ -550,3 +627,6 @@ and .Fn lstat64 system calls first appeared in Mac OS X 10.5 (Leopard) and are now deprecated in favor of the corresponding symbol variants. +The +.Fn fstatat +system call appeared in OS X 10.10 diff --git a/bsd/man/man2/symlink.2 b/bsd/man/man2/symlink.2 index ff2d106de..ae6f2ad6b 100644 --- a/bsd/man/man2/symlink.2 +++ b/bsd/man/man2/symlink.2 @@ -37,7 +37,8 @@ .Dt SYMLINK 2 .Os BSD 4.2 .Sh NAME -.Nm symlink +.Nm symlink , +.Nm symlinkat .Nd make symbolic link to a file .Sh SYNOPSIS .Fd #include @@ -46,6 +47,8 @@ .Fa "const char *path1" .Fa "const char *path2" .Fc +.Ft int +.Fn symlinkat "const char *name1" "int fd" "const char *name2" .Sh DESCRIPTION A symbolic link .Fa path2 @@ -59,6 +62,27 @@ is the string used in creating the symbolic link). Either name may be an arbitrary path name; the files need not be on the same file system. +.Pp +The +.Fn symlinkat +system call is equivalent to +.Fn symlink +except in the case where +.Fa name2 +specifies a relative path. +In this case the symbolic link is created relative to the directory +associated with the file descriptor +.Fa fd +instead of the current working directory. +If +.Fn symlinkat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +parameter, the current working directory is used and the behavior is +identical to a call to +.Fn symlink . .Sh RETURN VALUES Upon successful completion, a zero value is returned. If an error occurs, the error code is stored in @@ -156,13 +180,44 @@ The file .Fa path2 would reside on a read-only file system. .El +.Pp +In addition to the errors returned by the +.Fn symlink , +the +.Fn symlinkat +may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa name2 +argument does not specify an absolute path and the +.Fa fd +argument is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er ENOTDIR +The +.Fa name2 +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh SEE ALSO .Xr ln 1 , .Xr link 2 , .Xr unlink 2 , .Xr symlink 7 +.Sh STANDARDS +The +.Fn symlinkat +system call is expected to conform to POSIX.1-2008 . .Sh HISTORY The .Fn symlink function call appeared in .Bx 4.2 . +The +.Fn symlinkat +system call appeared in OS X 10.10 diff --git a/bsd/man/man2/symlinkat.2 b/bsd/man/man2/symlinkat.2 new file mode 100644 index 000000000..78568cd0a --- /dev/null +++ b/bsd/man/man2/symlinkat.2 @@ -0,0 +1 @@ +.so man2/symlink.2 diff --git a/bsd/man/man2/unlink.2 b/bsd/man/man2/unlink.2 index 3418f53fa..260a9ac80 100644 --- a/bsd/man/man2/unlink.2 +++ b/bsd/man/man2/unlink.2 @@ -37,7 +37,8 @@ .Dt UNLINK 2 .Os BSD 4 .Sh NAME -.Nm unlink +.Nm unlink , +.Nm unlinkat .Nd remove directory entry .Sh SYNOPSIS .Fd #include @@ -45,6 +46,8 @@ .Fo unlink .Fa "const char *path" .Fc +.Ft int +.Fn unlinkat "int fd" "const char *path" "int flag" .Sh DESCRIPTION The .Fn unlink @@ -60,6 +63,49 @@ all resources associated with the file are reclaimed. If one or more process have the file open when the last link is removed, the link is removed, but the removal of the file is delayed until all references to it have been closed. +.Pp +The +.Fn unlinkat +system call is equivalent to +.Fn unlink +or +.Fn rmdir +except in the case where +.Fa path +specifies a relative path. +In this case the directory entry to be removed is determined +relative to the directory associated with the file descriptor +.Fa fd +instead of the current working directory. +.Pp +The values for +.Fa flag +are constructed by a bitwise-inclusive OR of flags from the following list, +defined in +.In fcntl.h : +.Bl -tag -width indent +.It Dv AT_REMOVEDIR +Remove the directory entry specified by +.Fa fd +and +.Fa path +as a directory, not a normal file. +.El +.Pp +If +.Fn unlinkat +is passed the special value +.Dv AT_FDCWD +in the +.Fa fd +parameter, the current working directory is used and the behavior is +identical to a call to +.Fa unlink +or +.Fa rmdir +respectively, depending on whether or not the +.Dv AT_REMOVEDIR +bit is set in flag. .Sh RETURN VALUES Upon successful completion, a value of 0 is returned. Otherwise, a value of -1 is returned and @@ -125,13 +171,66 @@ are owned by the effective user ID. .It Bq Er EROFS The named file resides on a read-only file system. .El +.Pp +In addition to the errors returned by the +.Fn unlink , +the +.Fn unlinkat +may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa path +argument does not specify an absolute path and the +.Fa fd +argument is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er ENOTEMPTY +The +.Fa flag +parameter has the +.Dv AT_REMOVEDIR +bit set and the +.Fa path +argument names a directory that is not an empty directory, +or there are hard links to the directory other than dot or +a single entry in dot-dot. +.It Bq Er ENOTDIR +The +.Fa flag +parameter has the +.Dv AT_REMOVEDIR +bit set and +.Fa path +does not name a directory. +.It Bq Er EINVAL +The value of the +.Fa flag +argument is not valid. +.It Bq Er ENOTDIR +The +.Fa path +argument is not an absolute path and +.Fa fd +is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh SEE ALSO .Xr close 2 , .Xr link 2 , .Xr rmdir 2 , .Xr symlink 7 +.Sh STANDARDS +The +.Fn unlinkat +system call is expected to conform to POSIX.1-2008 . .Sh HISTORY An .Fn unlink function call appeared in .At v6 . +The +.Fn unlinkat +system call appeared in OS X 10.10 diff --git a/bsd/man/man2/unlinkat.2 b/bsd/man/man2/unlinkat.2 new file mode 100644 index 000000000..4921f7313 --- /dev/null +++ b/bsd/man/man2/unlinkat.2 @@ -0,0 +1 @@ +.so man2/unlink.2 diff --git a/bsd/man/man4/tcp.4 b/bsd/man/man4/tcp.4 index 8d5b79c4f..05584e255 100644 --- a/bsd/man/man4/tcp.4 +++ b/bsd/man/man4/tcp.4 @@ -33,7 +33,7 @@ .\" .\" @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" -.Dd February 28, 2007 +.Dd April 16, 2014 .Dt TCP 4 .Os BSD 4.2 .Sh NAME @@ -185,6 +185,30 @@ connections. The default value is specified by the .Tn MIB variable .Va net.inet.tcp.keepinit . +.It Dv TCP_KEEPINTVL +When keepalive probes are enabled, this option will set the amount of time in seconds between successive keepalives sent to probe an unresponsive peer. +.It Dv TCP_KEEPCNT +.Tn When keepalive probes are enabled, this option will set the number of times a keepalive probe should be repeated if the peer is not responding. After this many probes, the connection will be closed. +.It Dv TCP_SENDMOREACKS +When a stream of +.Tn TCP +data packets are received, OS X uses an algorithm to reduce the number of acknowlegements by generating a +.Tn TCP +acknowlegement for 8 data packets instead of acknowledging every other data packet. When this socket option is enabled, the connection will always send a +.Tn TCP +acknowledgement for every other data packet. +.It Dv TCP_ENABLE_ECN +Using Explicit Congestion Notification (ECN) on +.Tn TCP +allows end-to-end notification of congestion without dropping packets. Conventionally TCP/IP networks signal congestion by dropping packets. When ECN is successfully negotiated, an ECN-aware router may set a mark in the IP header instead of dropping a packet in order to signal impending congestion. The +.Tn TCP +receiver of the packet echoes congestion indication to the +.Tn TCP +sender, which reduces it's transmission rate as if it detected a dropped packet. This will avoid unnecessary retransmissions and will improve latency by saving the time required for recovering a lost packet. +.It Dv TCP_NOTSENT_LOWAT +The send socket buffer of a +.Tn TCP sender has unsent and unacknowledged data. This option allows a +.Tn TCP sender to control the amount of unsent data kept in the send socket buffer. The value of the option should be the maximum amount of unsent data in bytes. Kevent, poll and select will generate a write notification when the unsent data falls below the amount given by this option. This will allow an application to generate just-in-time fresh updates for real-time communication. .El .Pp The option level for the diff --git a/bsd/man/man5/dir.5 b/bsd/man/man5/dir.5 index c9e37b3b5..fa91bdcb8 100644 --- a/bsd/man/man5/dir.5 +++ b/bsd/man/man5/dir.5 @@ -131,7 +131,7 @@ structure is defined as: struct dirent { /* when _DARWIN_FEATURE_64_BIT_INODE is defined */ ino_t d_fileno; /* file number of entry */ - __uint16_t d_seekoff; /* seek offset (optional, used by servers) */ + __uint64_t d_seekoff; /* seek offset (optional, used by servers) */ __uint16_t d_reclen; /* length of this record */ __uint16_t d_namlen; /* length of string in d_name */ __uint8_t d_type; /* file type, see below */ diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c index daf8c8ace..45d529e8d 100644 --- a/bsd/miscfs/devfs/devfs_tree.c +++ b/bsd/miscfs/devfs/devfs_tree.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -1129,6 +1129,7 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) struct vnode_fsparam vfsp; enum vtype vtype = 0; int markroot = 0; + int nretries = 0; int n_minor = DEVFS_CLONE_ALLOC; /* new minor number for clone device */ /* @@ -1179,6 +1180,26 @@ retry: * vnode. Therefore, ENOENT is a valid result. */ error = ENOENT; + } else if (error && (nretries < DEV_MAX_VNODE_RETRY)) { + /* + * If we got an error from vnode_getwithvid, it means + * we raced with a recycle and lost i.e. we asked for + * an iocount only after vnode_drain had completed on + * the vnode and returned with an error only after + * devfs_reclaim was called on the vnode. While + * devfs_reclaim sets dn_vn to NULL but while we were + * waiting to reacquire DEVFS_LOCK, another vnode might + * have gotten associated with the dnp. In either case, + * we need to retry otherwise we will end up returning + * an ENOENT for this lookup but the next lookup will + * succeed because it creates a new vnode (or a racing + * lookup created a new vnode already). + * + * We cap the number of retries at 8. + */ + error = 0; + nretries++; + goto retry; } if ( !error) *vn_pp = vn_p; diff --git a/bsd/miscfs/devfs/devfs_vfsops.c b/bsd/miscfs/devfs/devfs_vfsops.c index b40778e88..6ad899939 100644 --- a/bsd/miscfs/devfs/devfs_vfsops.c +++ b/bsd/miscfs/devfs/devfs_vfsops.c @@ -93,9 +93,8 @@ static int devfs_statfs( struct mount *mp, struct vfsstatfs *sbp, vfs_context_t ctx); static int devfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx); -#if !defined(SECURE_KERNEL) -extern int setup_kmem; -__private_extern__ void devfs_setup_kmem(void); +#if CONFIG_DEV_KMEM +extern boolean_t dev_kmem_enabled; #endif /*- @@ -109,43 +108,33 @@ __private_extern__ void devfs_setup_kmem(void); static int devfs_init(__unused struct vfsconf *vfsp) { - if (devfs_sinit()) - return (ENOTSUP); - devfs_make_node(makedev(0, 0), DEVFS_CHAR, - UID_ROOT, GID_WHEEL, 0622, "console"); - devfs_make_node(makedev(2, 0), DEVFS_CHAR, - UID_ROOT, GID_WHEEL, 0666, "tty"); -#if !defined(SECURE_KERNEL) - if (setup_kmem) { - devfs_setup_kmem(); - } + if (devfs_sinit()) + return (ENOTSUP); + devfs_make_node(makedev(0, 0), DEVFS_CHAR, + UID_ROOT, GID_WHEEL, 0622, "console"); + devfs_make_node(makedev(2, 0), DEVFS_CHAR, + UID_ROOT, GID_WHEEL, 0666, "tty"); +#if CONFIG_DEV_KMEM + if (dev_kmem_enabled) { + /* (3,0) reserved for /dev/mem physical memory */ + devfs_make_node(makedev(3, 1), DEVFS_CHAR, + UID_ROOT, GID_KMEM, 0640, "kmem"); + } #endif - devfs_make_node(makedev(3, 2), DEVFS_CHAR, - UID_ROOT, GID_WHEEL, 0666, "null"); - devfs_make_node(makedev(3, 3), DEVFS_CHAR, - UID_ROOT, GID_WHEEL, 0666, "zero"); - devfs_make_node(makedev(6, 0), DEVFS_CHAR, - UID_ROOT, GID_WHEEL, 0600, "klog"); + devfs_make_node(makedev(3, 2), DEVFS_CHAR, + UID_ROOT, GID_WHEEL, 0666, "null"); + devfs_make_node(makedev(3, 3), DEVFS_CHAR, + UID_ROOT, GID_WHEEL, 0666, "zero"); + devfs_make_node(makedev(6, 0), DEVFS_CHAR, + UID_ROOT, GID_WHEEL, 0600, "klog"); #if FDESC - devfs_fdesc_init(); + devfs_fdesc_init(); #endif return 0; } -#if !defined(SECURE_KERNEL) -__private_extern__ void -devfs_setup_kmem(void) -{ - devfs_make_node(makedev(3, 0), DEVFS_CHAR, - UID_ROOT, GID_KMEM, 0640, "mem"); - devfs_make_node(makedev(3, 1), DEVFS_CHAR, - UID_ROOT, GID_KMEM, 0640, "kmem"); -} -#endif - - /*- * mp - pointer to 'mount' structure * path - addr in user space of mount point (ie /usr or whatever) @@ -481,109 +470,16 @@ devfs_sysctl(__unused int *name, __unused u_int namelen, __unused user_addr_t ol int devfs_kernel_mount(char * mntname) { - struct mount *mp; int error; - struct nameidata nd; - struct vnode * vp; vfs_context_t ctx = vfs_context_kernel(); - struct vfstable *vfsp; - - /* Find our vfstable entry */ - for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) - if (!strncmp(vfsp->vfc_name, "devfs", sizeof(vfsp->vfc_name))) - break; - - if (!vfsp) { - panic("Could not find entry in vfsconf for devfs.\n"); - } - - /* - * Get vnode to be covered - */ - NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | LOCKLEAF, UIO_SYSSPACE, - CAST_USER_ADDR_T(mntname), ctx); - if ((error = namei(&nd))) { - printf("devfs_kernel_mount: failed to find directory '%s', %d\n", - mntname, error); - return (error); - } - nameidone(&nd); - vp = nd.ni_vp; - - if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) { - printf("devfs_kernel_mount: vnop_fsync failed: %d\n", error); - vnode_put(vp); - return (error); - } - if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) { - printf("devfs_kernel_mount: buf_invalidateblks failed: %d\n", error); - vnode_put(vp); - return (error); - } - if (vnode_isdir(vp) == 0) { - printf("devfs_kernel_mount: '%s' is not a directory\n", mntname); - vnode_put(vp); - return (ENOTDIR); - } - if ((vnode_mountedhere(vp))) { - vnode_put(vp); - return (EBUSY); - } - - /* - * Allocate and initialize the filesystem. - */ - MALLOC_ZONE(mp, struct mount *, sizeof(struct mount), - M_MOUNT, M_WAITOK); - bzero((char *)mp, sizeof(struct mount)); - - /* Initialize the default IO constraints */ - mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; - mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; - mp->mnt_ioflags = 0; - mp->mnt_realrootvp = NULLVP; - mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; - - mount_lock_init(mp); - TAILQ_INIT(&mp->mnt_vnodelist); - TAILQ_INIT(&mp->mnt_workerqueue); - TAILQ_INIT(&mp->mnt_newvnodes); - - (void)vfs_busy(mp, LK_NOWAIT); - mp->mnt_op = &devfs_vfsops; - mp->mnt_vtable = vfsp; - mp->mnt_flag = 0; - mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; - strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); - vp->v_mountedhere = mp; - mp->mnt_vnodecovered = vp; - mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get()); - (void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0); -#if CONFIG_MACF - mac_mount_label_init(mp); - mac_mount_label_associate(ctx, mp); -#endif - - error = devfs_mount(mp, NULL, USER_ADDR_NULL, ctx); + char fsname[] = "devfs"; + error = kernel_mount(fsname, NULLVP, NULLVP, mntname, NULL, 0, MNT_DONTBROWSE, KERNEL_MOUNT_NOAUTH, ctx); if (error) { - printf("devfs_kernel_mount: mount %s failed: %d\n", mntname, error); - mp->mnt_vtable->vfc_refcount--; - - vfs_unbusy(mp); - - mount_lock_destroy(mp); -#if CONFIG_MACF - mac_mount_label_destroy(mp); -#endif - FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); - vnode_put(vp); - return (error); + printf("devfs_kernel_mount: kernel_mount failed: %d\n", error); + return (error); } - vnode_ref(vp); - vnode_put(vp); - vfs_unbusy(mp); - mount_list_add(mp); + return (0); } diff --git a/bsd/miscfs/devfs/devfsdefs.h b/bsd/miscfs/devfs/devfsdefs.h index e8b12000a..79e99f512 100644 --- a/bsd/miscfs/devfs/devfsdefs.h +++ b/bsd/miscfs/devfs/devfsdefs.h @@ -119,6 +119,8 @@ union devnode_type { }Slnk; }; +#define DEV_MAX_VNODE_RETRY 8 /* Max number of retries when we try to + get a vnode for the devnode */ struct devnode { devfstype_t dn_type; diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index 49d021787..0fd816243 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -91,7 +91,7 @@ /* XXX following three prototypes should be in a header file somewhere */ extern dev_t chrtoblk(dev_t dev); -extern int iskmemdev(dev_t dev); +extern boolean_t iskmemdev(dev_t dev); extern int bpfkqfilter(dev_t dev, struct knote *kn); extern int ptsd_kqfilter(dev_t dev, struct knote *kn); @@ -217,6 +217,7 @@ struct _throttle_io_info_t { int32_t throttle_refcnt; int32_t throttle_alloc; + int32_t throttle_disabled; }; struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV]; @@ -307,17 +308,18 @@ spec_open(struct vnop_open_args *ap) */ if (securelevel >= 2 && isdisk(dev, VCHR)) return (EPERM); + + /* Never allow writing to /dev/mem or /dev/kmem */ + if (iskmemdev(dev)) + return (EPERM); /* - * When running in secure mode, do not allow opens - * for writing of /dev/mem, /dev/kmem, or character - * devices whose corresponding block devices are - * currently mounted. + * When running in secure mode, do not allow opens for + * writing of character devices whose corresponding block + * devices are currently mounted. */ if (securelevel >= 1) { if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error)) return (error); - if (iskmemdev(dev)) - return (EPERM); } } @@ -464,7 +466,6 @@ spec_read(struct vnop_read_args *ap) struct _throttle_io_info_t *throttle_info; throttle_info = &_throttle_io_info[vp->v_un.vu_specinfo->si_devbsdunit]; - throttle_info_update_internal(throttle_info, NULL, 0, vp->v_un.vu_specinfo->si_isssd); } error = (*cdevsw[major(vp->v_rdev)].d_read) @@ -1233,13 +1234,20 @@ throttle_init_throttle_period(struct _throttle_io_info_t *info, boolean_t isssd) } +#if CONFIG_IOSCHED +extern void vm_io_reprioritize_init(void); +int iosched_enabled = 1; +#endif + void throttle_init(void) { struct _throttle_io_info_t *info; int i; int level; - +#if CONFIG_IOSCHED + int iosched; +#endif /* * allocate lock group attribute and group */ @@ -1265,7 +1273,17 @@ throttle_init(void) info->throttle_last_IO_pid[level] = 0; } info->throttle_next_wake_level = THROTTLE_LEVEL_END; + info->throttle_disabled = 0; + } +#if CONFIG_IOSCHED + if (PE_parse_boot_argn("iosched", &iosched, sizeof(iosched))) { + iosched_enabled = iosched; + } + if (iosched_enabled) { + /* Initialize I/O Reprioritization mechanism */ + vm_io_reprioritize_init(); } +#endif } void @@ -1273,6 +1291,7 @@ sys_override_io_throttle(int flag) { if (flag == THROTTLE_IO_ENABLE) lowpri_throttle_enabled = 1; + if (flag == THROTTLE_IO_DISABLE) lowpri_throttle_enabled = 0; } @@ -1579,7 +1598,7 @@ throttle_io_will_be_throttled_internal(void * throttle_info, int * mylevel, int int throttle_io_will_be_throttled(__unused int lowpri_window_msecs, mount_t mp) { - void *info; + struct _throttle_io_info_t *info; /* * Should we just return zero if no mount point @@ -1591,7 +1610,10 @@ throttle_io_will_be_throttled(__unused int lowpri_window_msecs, mount_t mp) else info = mp->mnt_throttle_info; - return throttle_io_will_be_throttled_internal(info, NULL, NULL); + if (info->throttle_disabled) + return (THROTTLE_DISENGAGED); + else + return throttle_io_will_be_throttled_internal(info, NULL, NULL); } /* @@ -1599,18 +1621,18 @@ throttle_io_will_be_throttled(__unused int lowpri_window_msecs, mount_t mp) */ static void -throttle_update_proc_stats(pid_t throttling_pid) +throttle_update_proc_stats(pid_t throttling_pid, int count) { proc_t throttling_proc; proc_t throttled_proc = current_proc(); /* The throttled_proc is always the current proc; so we are not concerned with refs */ - OSAddAtomic64(1, &(throttled_proc->was_throttled)); + OSAddAtomic64(count, &(throttled_proc->was_throttled)); /* The throttling pid might have exited by now */ throttling_proc = proc_find(throttling_pid); if (throttling_proc != PROC_NULL) { - OSAddAtomic64(1, &(throttling_proc->did_throttle)); + OSAddAtomic64(count, &(throttling_proc->did_throttle)); proc_rele(throttling_proc); } } @@ -1670,7 +1692,6 @@ throttle_lowpri_io(int sleep_amount) goto done; } assert(throttling_level >= THROTTLE_LEVEL_START && throttling_level <= THROTTLE_LEVEL_END); - throttle_update_proc_stats(info->throttle_last_IO_pid[throttling_level]); KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_THROTTLE, PROCESS_THROTTLED)) | DBG_FUNC_NONE, info->throttle_last_IO_pid[throttling_level], throttling_level, proc_selfpid(), mylevel, 0); @@ -1703,6 +1724,13 @@ done: if (sleep_cnt) { KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END, throttle_windows_msecs[mylevel], info->throttle_io_periods[mylevel], info->throttle_io_count, 0, 0); + /* + * We update the stats for the last pid which opened a throttle window for the throttled thread. + * This might not be completely accurate since the multiple throttles seen by the lower tier pid + * might have been caused by various higher prio pids. However, updating these stats accurately + * means doing a proc_find while holding the throttle lock which leads to deadlock. + */ + throttle_update_proc_stats(info->throttle_last_IO_pid[throttling_level], sleep_cnt); } throttle_info_rel(info); @@ -1730,7 +1758,6 @@ void throttle_set_thread_io_policy(int policy) } -static void throttle_info_reset_window(uthread_t ut) { struct _throttle_io_info_t *info; @@ -1747,7 +1774,7 @@ void throttle_info_reset_window(uthread_t ut) static void throttle_info_set_initial_window(uthread_t ut, struct _throttle_io_info_t *info, boolean_t BC_throttle, boolean_t isssd) { - if (lowpri_throttle_enabled == 0) + if (lowpri_throttle_enabled == 0 || info->throttle_disabled) return; if (info->throttle_io_periods == 0) { @@ -1770,7 +1797,7 @@ void throttle_info_update_internal(struct _throttle_io_info_t *info, uthread_t u { int thread_throttle_level; - if (lowpri_throttle_enabled == 0) + if (lowpri_throttle_enabled == 0 || info->throttle_disabled) return; if (ut == NULL) @@ -1858,6 +1885,25 @@ void throttle_info_update_by_mask(void *throttle_info_handle, int flags) */ throttle_info_update(throttle_info, flags); } +/* + * KPI routine + * + * This routine marks the throttle info as disabled. Used for mount points which + * support I/O scheduling. + */ + +void throttle_info_disable_throttle(int devno) +{ + struct _throttle_io_info_t *info; + + if (devno < 0 || devno >= LOWPRI_MAX_NUM_DEV) + panic("Illegal devno (%d) passed into throttle_info_disable_throttle()", devno); + + info = &_throttle_io_info[devno]; + info->throttle_disabled = 1; + return; +} + /* * KPI routine (private) @@ -1922,6 +1968,8 @@ spec_strategy(struct vnop_strategy_args *ap) int strategy_ret; struct _throttle_io_info_t *throttle_info; boolean_t isssd = FALSE; + int code = 0; + proc_t curproc = current_proc(); bp = ap->a_bp; @@ -1935,10 +1983,34 @@ spec_strategy(struct vnop_strategy_args *ap) if (bp->b_flags & B_META) bap->ba_flags |= BA_META; +#if CONFIG_IOSCHED + /* + * For I/O Scheduling, we currently do not have a way to track and expedite metadata I/Os. + * To ensure we dont get into priority inversions due to metadata I/Os, we use the following rules: + * For metadata reads, ceil all I/Os to IOSCHED_METADATA_TIER & mark them passive if the I/O tier was upgraded + * For metadata writes, unconditionally mark them as IOSCHED_METADATA_TIER and passive + */ + if (bap->ba_flags & BA_META) { + if (mp && (mp->mnt_ioflags & MNT_IOFLAGS_IOSCHED_SUPPORTED)) { + if (bp->b_flags & B_READ) { + if (io_tier > IOSCHED_METADATA_TIER) { + io_tier = IOSCHED_METADATA_TIER; + passive = 1; + } + } else { + io_tier = IOSCHED_METADATA_TIER; + passive = 1; + } + } + } +#endif /* CONFIG_IOSCHED */ + SET_BUFATTR_IO_TIER(bap, io_tier); - if (passive) + if (passive) { bp->b_flags |= B_PASSIVE; + bap->ba_flags |= BA_PASSIVE; + } if ((curproc != NULL) && ((curproc->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)) bap->ba_flags |= BA_DELAYIDLESLEEP; @@ -1948,38 +2020,38 @@ spec_strategy(struct vnop_strategy_args *ap) if (((bflags & B_READ) == 0) && ((bflags & B_ASYNC) == 0)) bufattr_markquickcomplete(bap); - if (kdebug_enable) { - int code = 0; - - if (bflags & B_READ) - code |= DKIO_READ; - if (bflags & B_ASYNC) - code |= DKIO_ASYNC; + if (bflags & B_READ) + code |= DKIO_READ; + if (bflags & B_ASYNC) + code |= DKIO_ASYNC; + if (bflags & B_META) + code |= DKIO_META; + else if (bflags & B_PAGEIO) + code |= DKIO_PAGING; - if (bflags & B_META) - code |= DKIO_META; - else if (bflags & B_PAGEIO) - code |= DKIO_PAGING; + if (io_tier != 0) + code |= DKIO_THROTTLE; - if (io_tier != 0) - code |= DKIO_THROTTLE; + code |= ((io_tier << DKIO_TIER_SHIFT) & DKIO_TIER_MASK); - code |= ((io_tier << DKIO_TIER_SHIFT) & DKIO_TIER_MASK); + if (bflags & B_PASSIVE) + code |= DKIO_PASSIVE; - if (bflags & B_PASSIVE) - code |= DKIO_PASSIVE; - - if (bap->ba_flags & BA_NOCACHE) - code |= DKIO_NOCACHE; + if (bap->ba_flags & BA_NOCACHE) + code |= DKIO_NOCACHE; + if (kdebug_enable) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_COMMON, FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, buf_kernel_addrperm_addr(bp), bdev, (int)buf_blkno(bp), buf_count(bp), 0); } + + thread_update_io_stats(current_thread(), buf_count(bp), code); + if (mp != NULL) { if ((mp->mnt_kern_flag & MNTK_SSD) && !ignore_is_ssd) isssd = TRUE; throttle_info = &_throttle_io_info[mp->mnt_devbsdunit]; - } else + } else throttle_info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1]; throttle_info_update_internal(throttle_info, ut, bflags, isssd); @@ -2060,7 +2132,6 @@ spec_close(struct vnop_close_args *ap) int flags = ap->a_fflag; struct proc *p = vfs_context_proc(ap->a_context); struct session *sessp; - int do_rele = 0; switch (vp->v_type) { @@ -2078,7 +2149,7 @@ spec_close(struct vnop_close_args *ap) devsw_lock(dev, S_IFCHR); if (sessp != SESSION_NULL) { if (vp == sessp->s_ttyvp && vcount(vp) == 1) { - struct tty *tp; + struct tty *tp = TTY_NULL; devsw_unlock(dev, S_IFCHR); session_lock(sessp); @@ -2088,14 +2159,20 @@ spec_close(struct vnop_close_args *ap) sessp->s_ttyvid = 0; sessp->s_ttyp = TTY_NULL; sessp->s_ttypgrpid = NO_PID; - do_rele = 1; } session_unlock(sessp); - if (do_rele) { - vnode_rele(vp); - if (NULL != tp) - ttyfree(tp); + if (tp != TTY_NULL) { + /* + * We may have won a race with a proc_exit + * of the session leader, the winner + * clears the flag (even if not set) + */ + tty_lock(tp); + ttyclrpgrphup(tp); + tty_unlock(tp); + + ttyfree(tp); } devsw_lock(dev, S_IFCHR); } diff --git a/bsd/net/Makefile b/bsd/net/Makefile index fba2de704..bdb100c29 100644 --- a/bsd/net/Makefile +++ b/bsd/net/Makefile @@ -29,7 +29,7 @@ PRIVATE_DATAFILES = \ netsrc.h raw_cb.h etherdefs.h if_pflog.h pfvar.h \ if_bridgevar.h ntstat.h iptap.h if_llreach.h \ if_utun_crypto.h if_utun_crypto_ipsec.h if_utun_crypto_dtls.h \ - pktap.h if_ipsec.h \ + pktap.h if_ipsec.h necp.h content_filter.h packet_mangler.h PRIVATE_KERNELFILES = $(filter-out radix.h,${KERNELFILES}) \ bpfdesc.h ppp_comp.h \ diff --git a/bsd/net/bpf.c b/bsd/net/bpf.c index 207ab752e..1383cbb80 100644 --- a/bsd/net/bpf.c +++ b/bsd/net/bpf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -122,6 +122,7 @@ #include #include +#include #include #include @@ -150,6 +151,14 @@ SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED, static unsigned int bpf_maxdevices = 256; SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED, &bpf_maxdevices, 0, ""); +/* + * bpf_wantpktap controls the defaul visibility of DLT_PKTAP + * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP + * explicitly to be able to use DLT_PKTAP. + */ +static unsigned int bpf_wantpktap = 0; +SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED, + &bpf_wantpktap, 0, ""); /* * bpf_iflist is the list of interfaces; each corresponds to an ifnet @@ -480,12 +489,21 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp) if (first) { /* Find the default bpf entry for this ifp */ if (bp->bif_ifp->if_bpf == NULL) { - struct bpf_if *primary; + struct bpf_if *tmp, *primary = NULL; - for (primary = bpf_iflist; primary && primary->bif_ifp != bp->bif_ifp; - primary = primary->bif_next) - ; - + for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) { + if (tmp->bif_ifp != bp->bif_ifp) + continue; + primary = tmp; + /* + * Make DLT_PKTAP only if process knows how + * to deal with it, otherwise find another one + */ + if (tmp->bif_dlt == DLT_PKTAP && + !(d->bd_flags & BPF_WANT_PKTAP)) + continue; + break; + } bp->bif_ifp->if_bpf = primary; } @@ -497,6 +515,12 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp) error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT); } + if (bp->bif_ifp->if_bpf != NULL && + bp->bif_ifp->if_bpf->bif_dlt == DLT_PKTAP) + d->bd_flags |= BPF_FINALIZE_PKTAP; + else + d->bd_flags &= ~BPF_FINALIZE_PKTAP; + return error; } @@ -677,6 +701,10 @@ bpfopen(dev_t dev, int flags, __unused int fmt, d->bd_state = BPF_IDLE; d->bd_thread_call = thread_call_allocate(bpf_timed_out, d); d->bd_traffic_class = SO_TC_BE; + if (bpf_wantpktap) + d->bd_flags |= BPF_WANT_PKTAP; + else + d->bd_flags &= ~BPF_WANT_PKTAP; if (d->bd_thread_call == NULL) { printf("bpfopen: malloc thread call failed\n"); @@ -829,6 +857,7 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) caddr_t hbuf; int timed_out, hbuf_len; int error; + int flags; lck_mtx_lock(bpf_mlock); @@ -915,8 +944,16 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) lck_mtx_unlock(bpf_mlock); return (ENXIO); } - + if (error == EINTR || error == ERESTART) { + if (d->bd_hbuf) { + /* + * Because we msleep, the hold buffer might + * be filled when we wake up. Avoid rotating + * in this case. + */ + break; + } if (d->bd_slen) { /* * Sometimes we may be interrupted often and @@ -957,16 +994,28 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) * At this point, we know we have something in the hold slot. */ + /* + * Set the hold buffer read. So we do not + * rotate the buffers until the hold buffer + * read is complete. Also to avoid issues resulting + * from page faults during disk sleep (). + */ + d->bd_hbuf_read = 1; + hbuf = d->bd_hbuf; + hbuf_len = d->bd_hlen; + flags = d->bd_flags; + lck_mtx_unlock(bpf_mlock); + #ifdef __APPLE__ /* * Before we move data to userland, we fill out the extended * header fields. */ - if (d->bd_extendedhdr) { + if (flags & BPF_EXTENDED_HDR) { char *p; - p = d->bd_hbuf; - while (p < d->bd_hbuf + d->bd_hlen) { + p = hbuf; + while (p < hbuf + hbuf_len) { struct bpf_hdr_ext *ehp; uint32_t flowid; struct so_procinfo soprocinfo; @@ -980,26 +1029,56 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) else if (ehp->bh_proto == IPPROTO_UDP) found = inp_findinpcb_procinfo(&udbinfo, flowid, &soprocinfo); - if (found != 0) { + if (found == 1) { ehp->bh_pid = soprocinfo.spi_pid; proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN); } ehp->bh_flowid = 0; } + if (flags & BPF_FINALIZE_PKTAP) { + struct pktap_header *pktaphdr; + + pktaphdr = (struct pktap_header *)(void *) + (p + BPF_WORDALIGN(ehp->bh_hdrlen)); + + if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) + pktap_finalize_proc_info(pktaphdr); + + if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) { + ehp->bh_tstamp.tv_sec = + pktaphdr->pth_tstamp.tv_sec; + ehp->bh_tstamp.tv_usec = + pktaphdr->pth_tstamp.tv_usec; + } + } p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen); } + } else if (flags & BPF_FINALIZE_PKTAP) { + char *p; + + p = hbuf; + while (p < hbuf + hbuf_len) { + struct bpf_hdr *hp; + struct pktap_header *pktaphdr; + + hp = (struct bpf_hdr *)(void *)p; + pktaphdr = (struct pktap_header *)(void *) + (p + BPF_WORDALIGN(hp->bh_hdrlen)); + + if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) + pktap_finalize_proc_info(pktaphdr); + + if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) { + hp->bh_tstamp.tv_sec = + pktaphdr->pth_tstamp.tv_sec; + hp->bh_tstamp.tv_usec = + pktaphdr->pth_tstamp.tv_usec; + } + + p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen); + } } #endif - /* - * Set the hold buffer read. So we do not - * rotate the buffers until the hold buffer - * read is complete. Also to avoid issues resulting - * from page faults during disk sleep (). - */ - d->bd_hbuf_read = 1; - hbuf = d->bd_hbuf; - hbuf_len = d->bd_hlen; - lck_mtx_unlock(bpf_mlock); /* * Move data from hold buffer into user space. @@ -1234,7 +1313,8 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, struct proc *p) { struct bpf_d *d; - int error = 0, int_arg; + int error = 0; + u_int int_arg; struct ifreq ifr; lck_mtx_lock(bpf_mlock); @@ -1609,8 +1689,12 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, bcopy(&d->bd_sig, addr, sizeof (u_int)); break; #ifdef __APPLE__ - case BIOCSEXTHDR: - bcopy(addr, &d->bd_extendedhdr, sizeof (u_int)); + case BIOCSEXTHDR: /* u_int */ + bcopy(addr, &int_arg, sizeof (int_arg)); + if (int_arg) + d->bd_flags |= BPF_EXTENDED_HDR; + else + d->bd_flags &= ~BPF_EXTENDED_HDR; break; case BIOCGIFATTACHCOUNT: { /* struct ifreq */ @@ -1637,6 +1721,18 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, bcopy(&ifr, addr, sizeof (ifr)); break; } + case BIOCGWANTPKTAP: /* u_int */ + int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0; + bcopy(&int_arg, addr, sizeof (int_arg)); + break; + + case BIOCSWANTPKTAP: /* u_int */ + bcopy(addr, &int_arg, sizeof (int_arg)); + if (int_arg) + d->bd_flags |= BPF_WANT_PKTAP; + else + d->bd_flags &= ~BPF_WANT_PKTAP; + break; #endif } @@ -1724,6 +1820,13 @@ bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev) if (ifp == 0 || ifp != theywant || (dlt != 0 && dlt != bp->bif_dlt)) continue; + /* + * If the process knows how to deal with DLT_PKTAP, use it + * by default + */ + if (dlt == 0 && bp->bif_dlt == DLT_PKTAP && + !(d->bd_flags & BPF_WANT_PKTAP)) + continue; /* * We found the requested interface. * Allocate the packet buffers if we need to. @@ -1778,9 +1881,15 @@ bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p) ifp = d->bd_bif->bif_ifp; n = 0; error = 0; + for (bp = bpf_iflist; bp; bp = bp->bif_next) { if (bp->bif_ifp != ifp) continue; + /* + * Return DLT_PKTAP only to processes that know how to handle it + */ + if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) + continue; if (dlist != USER_ADDR_NULL) { if (n >= bfl.bfl_len) { return (ENOMEM); @@ -1818,7 +1927,7 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev) d = bpf_dtab[minor(dev)]; if (d == 0 || d == (void *)1) return (ENXIO); - + ifp = d->bd_bif->bif_ifp; for (bp = bpf_iflist; bp; bp = bp->bif_next) { if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) @@ -2216,7 +2325,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen, struct m_tag *mt = NULL; struct bpf_mtag *bt = NULL; - hdrlen = d->bd_extendedhdr ? d->bd_bif->bif_exthdrlen : + hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen : d->bd_bif->bif_hdrlen; /* * Figure out how many bytes to move. If the packet is @@ -2262,7 +2371,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen, * Append the bpf header. */ microtime(&tv); - if (d->bd_extendedhdr) { + if (d->bd_flags & BPF_EXTENDED_HDR) { ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen); memset(ehp, 0, sizeof(*ehp)); ehp->bh_tstamp.tv_sec = tv.tv_sec; @@ -2460,42 +2569,56 @@ void bpfdetach(struct ifnet *ifp) { struct bpf_if *bp, *bp_prev, *bp_next; - struct bpf_if *bp_free = NULL; + struct bpf_if *bp_free_list = NULL; struct bpf_d *d; - lck_mtx_lock(bpf_mlock); - /* Locate BPF interface information */ + /* + * Build the list of devices attached to that interface + * that we need to free while keeping the lock to maintain + * the integrity of the interface list + */ bp_prev = NULL; for (bp = bpf_iflist; bp != NULL; bp = bp_next) { bp_next = bp->bif_next; + if (ifp != bp->bif_ifp) { bp_prev = bp; continue; } - + /* Unlink from the interface list */ + if (bp_prev) + bp_prev->bif_next = bp->bif_next; + else + bpf_iflist = bp->bif_next; + + /* Add to the list to be freed */ + bp->bif_next = bp_free_list; + bp_free_list = bp; + } + + /* + * Detach the bpf devices attached to the interface + * Now we do not care if we lose the bpf_mlock in bpf_detachd + */ + for (bp = bp_free_list; bp != NULL; bp = bp->bif_next) { while ((d = bp->bif_dlist) != NULL) { bpf_detachd(d); bpf_wakeup(d); } - - if (bp_prev) { - bp_prev->bif_next = bp->bif_next; - } else { - bpf_iflist = bp->bif_next; - } - - bp->bif_next = bp_free; - bp_free = bp; - ifnet_release(ifp); } lck_mtx_unlock(bpf_mlock); - FREE(bp, M_DEVBUF); - + /* + * Free the list + */ + while ((bp = bp_free_list) != NULL) { + bp_free_list = bp->bif_next; + FREE(bp, M_DEVBUF); + } } void diff --git a/bsd/net/bpf.h b/bsd/net/bpf.h index 00e3ac5d8..003f631d1 100644 --- a/bsd/net/bpf.h +++ b/bsd/net/bpf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -206,7 +206,10 @@ struct bpf_version { #define BIOCSETFNR64 _IOW('B',126, struct bpf_program64) #define BIOCSETFNR32 _IOW('B',126, struct bpf_program32) #endif /* KERNEL_PRIVATE */ - +#ifdef PRIVATE +#define BIOCGWANTPKTAP _IOR('B', 127, u_int) +#define BIOCSWANTPKTAP _IOWR('B', 127, u_int) +#endif /* PRIVATE */ /* * Structure prepended to each packet. */ diff --git a/bsd/net/bpfdesc.h b/bsd/net/bpfdesc.h index 8cf632799..4145bf405 100644 --- a/bsd/net/bpfdesc.h +++ b/bsd/net/bpfdesc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -134,7 +134,7 @@ struct bpf_d { struct label * bd_label; /* MAC label for descriptor */ #endif int bd_traffic_class; /* traffic service class */ - int bd_extendedhdr; /* process req. the extended header */ + int bd_flags; /* flags */ }; /* Values for bd_state */ @@ -149,6 +149,11 @@ struct bpf_d { (bd)->bd_slen != 0)) +/* Values for bd_flags */ +#define BPF_EXTENDED_HDR 0x01 /* process req. the extended header */ +#define BPF_WANT_PKTAP 0x02 /* process knows how to keep DLT_PKTAP private */ +#define BPF_FINALIZE_PKTAP 0x04 /* finalize pktap header on read */ + /* * Descriptor associated with each attached hardware interface. */ diff --git a/bsd/net/classq/classq.h b/bsd/net/classq/classq.h index b9705acb7..e6edbac70 100644 --- a/bsd/net/classq/classq.h +++ b/bsd/net/classq/classq.h @@ -113,8 +113,8 @@ struct pktcntr { typedef struct _class_queue_ { MBUFQ_HEAD(mq_head) mbufq; /* Packet queue */ u_int32_t qlen; /* Queue length (in number of packets) */ - u_int32_t qsize; /* Approx. queue size (in number of bytes) */ u_int32_t qlim; /* Queue limit (in number of packets*) */ + u_int64_t qsize; /* Approx. queue size (in number of bytes) */ classq_type_t qtype; /* Queue type */ classq_state_t qstate; /* Queue state */ } class_queue_t; diff --git a/bsd/net/classq/classq_sfb.c b/bsd/net/classq/classq_sfb.c index 014870ac7..4f705e0f1 100644 --- a/bsd/net/classq/classq_sfb.c +++ b/bsd/net/classq/classq_sfb.c @@ -130,6 +130,24 @@ #define PBOXTIME_MIN (30ULL * 1000 * 1000) /* 30ms */ #define PBOXTIME_MAX (300ULL * 1000 * 1000) /* 300ms */ +/* + * Target queueing delay is the amount of extra delay that can be added + * to accommodate variations in the link bandwidth. The queue should be + * large enough to induce this much delay and nothing more than that. + */ +#define TARGET_QDELAY_BASE (10ULL * 1000 * 1000) /* 10ms */ +#define TARGET_QDELAY_MIN (10ULL * 1000) /* 10us */ +#define TARGET_QDELAY_MAX (20ULL * 1000 * 1000 * 1000) /* 20s */ + +/* + * Update interval for checking the extra delay added by the queue. This + * should be 90-95 percentile of RTT experienced by any TCP connection + * so that it will take care of the burst traffic. + */ +#define UPDATE_INTERVAL_BASE (100ULL * 1000 * 1000) /* 100ms */ +#define UPDATE_INTERVAL_MIN (100ULL * 1000 * 1000) /* 100ms */ +#define UPDATE_INTERVAL_MAX (10ULL * 1000 * 1000 * 1000) /* 10s */ + #define SFB_RANDOM(sp, tmin, tmax) ((sfb_random(sp) % (tmax)) + (tmin)) #define SFB_PKT_PBOX 0x1 /* in penalty box */ @@ -167,6 +185,19 @@ } \ } while (0) +/* Minimum nuber of bytes in queue to get flow controlled */ +#define SFB_MIN_FC_THRESHOLD_BYTES 7500 + +#define SFB_SET_DELAY_HIGH(_sp_, _q_) do { \ + (_sp_)->sfb_flags |= SFBF_DELAYHIGH; \ + (_sp_)->sfb_fc_threshold = max(SFB_MIN_FC_THRESHOLD_BYTES, \ + (qsize((_q_)) >> 3)); \ +} while (0) + +#define SFB_QUEUE_DELAYBASED(_sp_) ((_sp_)->sfb_flags & SFBF_DELAYBASED) +#define SFB_IS_DELAYHIGH(_sp_) ((_sp_)->sfb_flags & SFBF_DELAYHIGH) +#define SFB_QUEUE_DELAYBASED_MAXSIZE 2048 /* max pkts */ + #define HINTERVAL_MIN (10) /* 10 seconds */ #define HINTERVAL_MAX (20) /* 20 seconds */ #define SFB_HINTERVAL(sp) ((sfb_random(sp) % HINTERVAL_MAX) + HINTERVAL_MIN) @@ -212,6 +243,8 @@ static void sfb_resetq(struct sfb *, cqev_t); static void sfb_calc_holdtime(struct sfb *, u_int64_t); static void sfb_calc_pboxtime(struct sfb *, u_int64_t); static void sfb_calc_hinterval(struct sfb *, u_int64_t *); +static void sfb_calc_target_qdelay(struct sfb *, u_int64_t); +static void sfb_calc_update_interval(struct sfb *, u_int64_t); static void sfb_swap_bins(struct sfb *, u_int32_t); static inline int sfb_pcheck(struct sfb *, struct pkthdr *); static int sfb_penalize(struct sfb *, struct pkthdr *, struct timespec *); @@ -222,13 +255,16 @@ static void sfb_decrement_bin(struct sfb *, struct sfbbinstats *, static void sfb_increment_bin(struct sfb *, struct sfbbinstats *, struct timespec *, struct timespec *); static inline void sfb_dq_update_bins(struct sfb *, struct pkthdr *, - struct timespec *); + struct timespec *, u_int32_t qsize); static inline void sfb_eq_update_bins(struct sfb *, struct pkthdr *); static int sfb_drop_early(struct sfb *, struct pkthdr *, u_int16_t *, struct timespec *); static boolean_t sfb_bin_addfcentry(struct sfb *, struct pkthdr *); static void sfb_fclist_append(struct sfb *, struct sfb_fcl *); static void sfb_fclists_clean(struct sfb *sp); +static int sfb_bin_mark_or_drop(struct sfb *sp, struct sfbbinstats *bin); +static void sfb_detect_dequeue_stall(struct sfb *sp, class_queue_t *, + struct timespec *); SYSCTL_NODE(_net_classq, OID_AUTO, sfb, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "SFB"); @@ -244,6 +280,14 @@ static u_int64_t sfb_hinterval; SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, hinterval, CTLFLAG_RW|CTLFLAG_LOCKED, &sfb_hinterval, "SFB hash interval in nanoseconds"); +static u_int64_t sfb_target_qdelay; +SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, target_qdelay, CTLFLAG_RW|CTLFLAG_LOCKED, + &sfb_target_qdelay, "SFB target queue delay in milliseconds"); + +static u_int64_t sfb_update_interval; +SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, update_interval, + CTLFLAG_RW|CTLFLAG_LOCKED, &sfb_update_interval, "SFB update interval"); + static u_int32_t sfb_increment = SFB_INCREMENT; SYSCTL_UINT(_net_classq_sfb, OID_AUTO, increment, CTLFLAG_RW|CTLFLAG_LOCKED, &sfb_increment, SFB_INCREMENT, "SFB increment [d1]"); @@ -260,8 +304,9 @@ static u_int32_t sfb_ratelimit = 0; SYSCTL_UINT(_net_classq_sfb, OID_AUTO, ratelimit, CTLFLAG_RW|CTLFLAG_LOCKED, &sfb_ratelimit, 0, "SFB rate limit"); -#define MBPS (1ULL * 1000 * 1000) -#define GBPS (MBPS * 1000) +#define KBPS (1ULL * 1000) /* 1 Kbits per second */ +#define MBPS (1ULL * 1000 * 1000) /* 1 Mbits per second */ +#define GBPS (MBPS * 1000) /* 1 Gbits per second */ struct sfb_time_tbl { u_int64_t speed; /* uplink speed */ @@ -394,6 +439,47 @@ sfb_calc_hinterval(struct sfb *sp, u_int64_t *t) net_timeradd(&now, &sp->sfb_hinterval, &sp->sfb_nextreset); } +static void +sfb_calc_target_qdelay(struct sfb *sp, u_int64_t out_bw) +{ +#pragma unused(out_bw) + u_int64_t target_qdelay = 0; + struct ifnet *ifp = sp->sfb_ifp; + + target_qdelay = IFCQ_TARGET_QDELAY(&ifp->if_snd); + + if (sfb_target_qdelay != 0) + target_qdelay = sfb_target_qdelay; + + /* + * If we do not know the effective bandwidth, use the default + * target queue delay. + */ + if (target_qdelay == 0) + target_qdelay = IFQ_TARGET_DELAY; + + sp->sfb_target_qdelay = target_qdelay; +} + +static void +sfb_calc_update_interval(struct sfb *sp, u_int64_t out_bw) +{ +#pragma unused(out_bw) + u_int64_t update_interval = 0; + + /* If the system-level override is set, use it */ + if (sfb_update_interval != 0) + update_interval = sfb_update_interval; + /* + * If we do not know the effective bandwidth, use the default + * update interval. + */ + if (update_interval == 0) + update_interval = IFQ_UPDATE_INTERVAL; + + net_nsectimer(&update_interval, &sp->sfb_update_interval); +} + /* * sfb support routines */ @@ -514,6 +600,8 @@ sfb_resetq(struct sfb *sp, cqev_t ev) sfb_calc_holdtime(sp, eff_rate); sfb_calc_pboxtime(sp, eff_rate); sfb_calc_hinterval(sp, NULL); + sfb_calc_target_qdelay(sp, eff_rate); + sfb_calc_update_interval(sp, eff_rate); if (ev == CLASSQ_EV_LINK_DOWN || ev == CLASSQ_EV_LINK_UP) @@ -527,12 +615,16 @@ sfb_resetq(struct sfb *sp, cqev_t ev) log(LOG_DEBUG, "%s: SFB qid=%d, holdtime=%llu nsec, " "pboxtime=%llu nsec, allocation=%d, drop_thresh=%d, " - "hinterval=%d sec, sfb_bins=%d bytes, eff_rate=%llu bps\n", + "hinterval=%d sec, sfb_bins=%d bytes, eff_rate=%llu bps" + "target_qdelay= %llu nsec " + "update_interval=%llu sec %llu nsec flags=0x%x\n", if_name(ifp), sp->sfb_qid, (u_int64_t)sp->sfb_holdtime.tv_nsec, (u_int64_t)sp->sfb_pboxtime.tv_nsec, (u_int32_t)sp->sfb_allocation, (u_int32_t)sp->sfb_drop_thresh, (int)sp->sfb_hinterval.tv_sec, (int)sizeof (*sp->sfb_bins), - eff_rate); + eff_rate, (u_int64_t)sp->sfb_target_qdelay, + (u_int64_t)sp->sfb_update_interval.tv_sec, + (u_int64_t)sp->sfb_update_interval.tv_nsec, sp->sfb_flags); } void @@ -542,10 +634,15 @@ sfb_getstats(struct sfb *sp, struct sfb_stats *sps) sps->dropthresh = sp->sfb_drop_thresh; sps->clearpkts = sp->sfb_clearpkts; sps->current = sp->sfb_current; + sps->target_qdelay = sp->sfb_target_qdelay; + sps->min_estdelay = sp->sfb_min_qdelay; + sps->delay_fcthreshold = sp->sfb_fc_threshold; + sps->flags = sp->sfb_flags; net_timernsec(&sp->sfb_holdtime, &sp->sfb_stats.hold_time); net_timernsec(&sp->sfb_pboxtime, &sp->sfb_stats.pbox_time); net_timernsec(&sp->sfb_hinterval, &sp->sfb_stats.rehash_intval); + net_timernsec(&sp->sfb_update_interval, &sps->update_interval); *(&(sps->sfbstats)) = *(&(sp->sfb_stats)); _CASSERT(sizeof ((*sp->sfb_bins)[0].stats) == @@ -597,6 +694,7 @@ sfb_swap_bins(struct sfb *sp, u_int32_t len) wbin = SFB_BINST(sp, j, i, s ^ 1); /* warm-up */ cbin->pkts = 0; + cbin->bytes = 0; if (cbin->pmark > SFB_MAX_PMARK) cbin->pmark = SFB_MAX_PMARK; if (cbin->pmark < 0) @@ -750,7 +848,8 @@ sfb_increment_bin(struct sfb *sp, struct sfbbinstats *bin, struct timespec *ft, } static inline void -sfb_dq_update_bins(struct sfb *sp, struct pkthdr *pkt, struct timespec *now) +sfb_dq_update_bins(struct sfb *sp, struct pkthdr *pkt, + struct timespec *now, u_int32_t qsize) { #if SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 int i; @@ -770,23 +869,35 @@ sfb_dq_update_bins(struct sfb *sp, struct pkthdr *pkt, struct timespec *now) n = SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1)]); bin = SFB_BINST(sp, 0, n, s); - VERIFY(bin->pkts > 0); - if (--bin->pkts == 0) { + VERIFY(bin->pkts > 0 && bin->bytes >= (u_int32_t)pkt->len); + bin->pkts--; + bin->bytes -= pkt->len; + + if (bin->pkts == 0) sfb_decrement_bin(sp, bin, SFB_BINFT(sp, 0, n, s), now); + + /* Deliver flow control feedback to the sockets */ + if (SFB_QUEUE_DELAYBASED(sp)) { + if (!(SFB_IS_DELAYHIGH(sp)) || + bin->bytes <= sp->sfb_fc_threshold || + bin->pkts == 0 || qsize == 0) + fcl = SFB_FC_LIST(sp, n); + } else if (bin->pkts <= (sp->sfb_allocation >> 2)) { + fcl = SFB_FC_LIST(sp, n); } - if (bin->pkts <= (sp->sfb_allocation >> 2)) { - /* deliver flow control feedback to the sockets */ - fcl = SFB_FC_LIST(sp, n); - if (!STAILQ_EMPTY(&fcl->fclist)) - sfb_fclist_append(sp, fcl); - } + + if (fcl != NULL && !STAILQ_EMPTY(&fcl->fclist)) + sfb_fclist_append(sp, fcl); + fcl = NULL; /* Level 1: bin index at [1] for set 0; [3] for set 1 */ n = SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1) + 1]); bin = SFB_BINST(sp, 1, n, s); - VERIFY(bin->pkts > 0); - if (--bin->pkts == 0) + VERIFY(bin->pkts > 0 && bin->bytes >= (u_int64_t)pkt->len); + bin->pkts--; + bin->bytes -= pkt->len; + if (bin->pkts == 0) sfb_decrement_bin(sp, bin, SFB_BINFT(sp, 1, n, s), now); #else /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */ for (i = 0; i < SFB_LEVELS; i++) { @@ -797,19 +908,24 @@ sfb_dq_update_bins(struct sfb *sp, struct pkthdr *pkt, struct timespec *now) bin = SFB_BINST(sp, i, n, s); - VERIFY(bin->pkts > 0); - if (--bin->pkts == 0) { + VERIFY(bin->pkts > 0 && bin->bytes >= pkt->len); + bin->pkts--; + bin->bytes -= pkt->len; + if (bin->pkts == 0) sfb_decrement_bin(sp, bin, SFB_BINFT(sp, i, n, s), now); - } - if (bin->pkts <= (sp->sfb_allocation >> 2)) { - /* deliver flow control feedback to the sockets */ - if (i == SFB_FC_LEVEL) { + if (i != SFB_FC_LEVEL) + continue; + if (SFB_QUEUE_DELAYBASED(sp)) { + if (!(SFB_IS_DELAYHIGH(sp)) || + bin->bytes <= sp->sfb_fc_threshold) fcl = SFB_FC_LIST(sp, n); - if (!STAILQ_EMPTY(&fcl->fclist)) - sfb_fclist_append(sp, fcl); - } + } else if (bin->pkts <= (sp->sfb_allocation >> 2)) { + fcl = SFB_FC_LIST(sp, n); } + if (fcl != NULL && !STAILQ_EMPTY(&fcl->fclist)) + sfb_fclist_append(sp, fcl); + fcl = NULL; } #endif /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */ } @@ -821,7 +937,7 @@ sfb_eq_update_bins(struct sfb *sp, struct pkthdr *pkt) int i, n; #endif /* SFB_LEVELS != 2 */ int s; - + struct sfbbinstats *bin; s = sp->sfb_current; VERIFY((s + (s ^ 1)) == 1); @@ -830,12 +946,17 @@ sfb_eq_update_bins(struct sfb *sp, struct pkthdr *pkt) */ #if SFB_LEVELS == 2 /* Level 0: bin index at [0] for set 0; [2] for set 1 */ - SFB_BINST(sp, 0, - SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1)]), s)->pkts++; + bin = SFB_BINST(sp, 0, + SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1)]), s); + bin->pkts++; + bin->bytes += pkt->len; /* Level 1: bin index at [1] for set 0; [3] for set 1 */ - SFB_BINST(sp, 1, - SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1) + 1]), s)->pkts++; + bin = SFB_BINST(sp, 1, + SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1) + 1]), s); + bin->pkts++; + bin->bytes += pkt->len; + #else /* SFB_LEVELS != 2 */ for (i = 0; i < SFB_LEVELS; i++) { if (s == 0) /* set 0, bin index [0,1] */ @@ -843,7 +964,9 @@ sfb_eq_update_bins(struct sfb *sp, struct pkthdr *pkt) else /* set 1, bin index [2,3] */ n = SFB_BINMASK(pkt->pkt_sfb_hash8[i + 2]); - SFB_BINST(sp, i, n, s)->pkts++; + bin = SFB_BINST(sp, i, n, s); + bin->pkts++; + bin->bytes += pkt->len; } #endif /* SFB_LEVELS != 2 */ } @@ -893,6 +1016,30 @@ sfb_bin_addfcentry(struct sfb *sp, struct pkthdr *pkt) return (fce != NULL); } +/* + * check if this flow needs to be flow-controlled or if this + * packet needs to be dropped. + */ +static int +sfb_bin_mark_or_drop(struct sfb *sp, struct sfbbinstats *bin) +{ + int ret = 0; + if (SFB_QUEUE_DELAYBASED(sp)) { + /* + * Mark or drop if this bin has more + * bytes than the flowcontrol threshold. + */ + if (SFB_IS_DELAYHIGH(sp) && + bin->bytes >= (sp->sfb_fc_threshold << 1)) + ret = 1; + } else { + if (bin->pkts >= sp->sfb_allocation && + bin->pkts >= sp->sfb_drop_thresh) + ret = 1; /* drop or mark */ + } + return (ret); +} + /* * early-drop probability is kept in pmark of each bin of the flow */ @@ -921,11 +1068,12 @@ sfb_drop_early(struct sfb *sp, struct pkthdr *pkt, u_int16_t *pmin, if (*pmin > (u_int16_t)bin->pmark) *pmin = (u_int16_t)bin->pmark; - if (bin->pkts >= sp->sfb_allocation) { - if (bin->pkts >= sp->sfb_drop_thresh) - ret = 1; /* drop or mark */ + + /* Update SFB probability */ + if (bin->pkts >= sp->sfb_allocation) sfb_increment_bin(sp, bin, SFB_BINFT(sp, 0, n, s), now); - } + + ret = sfb_bin_mark_or_drop(sp, bin); /* Level 1: bin index at [1] for set 0; [3] for set 1 */ n = SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1) + 1]); @@ -933,11 +1081,8 @@ sfb_drop_early(struct sfb *sp, struct pkthdr *pkt, u_int16_t *pmin, if (*pmin > (u_int16_t)bin->pmark) *pmin = (u_int16_t)bin->pmark; - if (bin->pkts >= sp->sfb_allocation) { - if (bin->pkts >= sp->sfb_drop_thresh) - ret = 1; /* drop or mark */ + if (bin->pkts >= sp->sfb_allocation) sfb_increment_bin(sp, bin, SFB_BINFT(sp, 1, n, s), now); - } #else /* SFB_LEVELS != 2 */ for (i = 0; i < SFB_LEVELS; i++) { if (s == 0) /* set 0, bin index [0,1] */ @@ -949,12 +1094,11 @@ sfb_drop_early(struct sfb *sp, struct pkthdr *pkt, u_int16_t *pmin, if (*pmin > (u_int16_t)bin->pmark) *pmin = (u_int16_t)bin->pmark; - if (bin->pkts >= sp->sfb_allocation) { - if (bin->pkts >= sp->sfb_drop_thresh) - ret = 1; /* drop or mark */ + if (bin->pkts >= sp->sfb_allocation) sfb_increment_bin(sp, bin, SFB_BINFT(sp, i, n, s), now); - } + if (i == SFB_FC_LEVEL) + ret = sfb_bin_mark_or_drop(sp, bin); } #endif /* SFB_LEVELS != 2 */ @@ -964,6 +1108,29 @@ sfb_drop_early(struct sfb *sp, struct pkthdr *pkt, u_int16_t *pmin, return (ret); } +void +sfb_detect_dequeue_stall(struct sfb *sp, class_queue_t *q, + struct timespec *now) +{ + struct timespec max_getqtime; + + if (!SFB_QUEUE_DELAYBASED(sp) || SFB_IS_DELAYHIGH(sp) || + qsize(q) <= SFB_MIN_FC_THRESHOLD_BYTES || + !net_timerisset(&sp->sfb_getqtime)) + return; + + net_timeradd(&sp->sfb_getqtime, &sp->sfb_update_interval, + &max_getqtime); + if (net_timercmp(now, &max_getqtime, >)) { + /* + * No packets have been dequeued in an update interval + * worth of time. It means that the queue is stalled + */ + SFB_SET_DELAY_HIGH(sp, q); + sp->sfb_stats.dequeue_stall++; + } +} + #define DTYPE_NODROP 0 /* no drop */ #define DTYPE_FORCED 1 /* a "forced" drop */ #define DTYPE_EARLY 2 /* an "unforced" (early) drop */ @@ -986,6 +1153,10 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) s = sp->sfb_current; VERIFY((s + (s ^ 1)) == 1); + /* See comments in */ + VERIFY(!(pkt->pkt_flags & PKTF_PRIV_GUARDED)); + pkt->pkt_flags |= PKTF_PRIV_GUARDED; + /* time to swap the bins? */ if (net_timercmp(&now, &sp->sfb_nextreset, >=)) { net_timeradd(&now, &sp->sfb_hinterval, &sp->sfb_nextreset); @@ -994,6 +1165,11 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) VERIFY((s + (s ^ 1)) == 1); } + if (!net_timerisset(&sp->sfb_update_time)) { + net_timeradd(&now, &sp->sfb_update_interval, + &sp->sfb_update_time); + } + pkt->pkt_sfb_flags = 0; pkt->pkt_sfb_hash16[s] = (SFB_HASH(&pkt->pkt_flowid, sizeof (pkt->pkt_flowid), @@ -1002,6 +1178,9 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) (SFB_HASH(&pkt->pkt_flowid, sizeof (pkt->pkt_flowid), (*sp->sfb_bins)[s ^ 1].fudge) & SFB_HASHMASK); + /* check if the queue has been stalled */ + sfb_detect_dequeue_stall(sp, q, &now); + /* see if we drop early */ droptype = DTYPE_NODROP; if (sfb_drop_early(sp, pkt, &pmin, &now)) { @@ -1039,8 +1218,23 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) sp->sfb_stats.drop_pbox++; } - /* if the queue length hits the hard limit, it's a forced drop */ - if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) { + /* + * if max queue size is static, make it a forced drop + * when the queue length hits the queue limit + */ + if (!(SFB_QUEUE_DELAYBASED(sp)) && + droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) { + droptype = DTYPE_FORCED; + sp->sfb_stats.drop_queue++; + } + + /* + * delay based queues have a larger maximum size to + * allow for bursts + */ + if (SFB_QUEUE_DELAYBASED(sp) && + droptype == DTYPE_NODROP && + qlen(q) >= SFB_QUEUE_DELAYBASED_MAXSIZE) { droptype = DTYPE_FORCED; sp->sfb_stats.drop_queue++; } @@ -1059,9 +1253,9 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) ret = CLASSQEQ_DROPPED_FC; } } - /* if successful enqueue this packet, else drop it */ if (droptype == DTYPE_NODROP) { + net_timernsec(&now, &pkt->pkt_enqueue_ts); _addq(q, m); } else { IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd); @@ -1100,13 +1294,13 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge) VERIFY(m->m_flags & M_PKTHDR); pkt = &m->m_pkthdr; + VERIFY(pkt->pkt_flags & PKTF_PRIV_GUARDED); if (!purge) { /* calculate EWMA of dequeues */ if (net_timerisset(&sp->sfb_getqtime)) { struct timespec delta; u_int64_t avg, new; - net_timersub(&now, &sp->sfb_getqtime, &delta); net_timernsec(&delta, &new); avg = sp->sfb_stats.dequeue_avg; @@ -1115,7 +1309,7 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge) /* * If the time since last dequeue is * significantly greater than the current - * average, weight the average more against + * average, weigh the average more against * the old value. */ if (DEQUEUE_SPIKE(new, avg)) @@ -1129,6 +1323,30 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge) *(&sp->sfb_getqtime) = *(&now); } + if (!purge && SFB_QUEUE_DELAYBASED(sp)) { + u_int64_t dequeue_ns, queue_delay = 0; + net_timernsec(&now, &dequeue_ns); + if (dequeue_ns > pkt->pkt_enqueue_ts) + queue_delay = dequeue_ns - pkt->pkt_enqueue_ts; + + if (sp->sfb_min_qdelay == 0 || + (queue_delay > 0 && queue_delay < sp->sfb_min_qdelay)) + sp->sfb_min_qdelay = queue_delay; + if (net_timercmp(&now, &sp->sfb_update_time, >=)) { + if (sp->sfb_min_qdelay > sp->sfb_target_qdelay) { + if (!SFB_IS_DELAYHIGH(sp)) + SFB_SET_DELAY_HIGH(sp, q); + } else { + sp->sfb_flags &= ~(SFBF_DELAYHIGH); + sp->sfb_fc_threshold = 0; + + } + net_timeradd(&now, &sp->sfb_update_interval, + &sp->sfb_update_time); + sp->sfb_min_qdelay = 0; + } + } + /* * Clearpkts are the ones which were in the queue when the hash * function was perturbed. Since the perturbation value (fudge), @@ -1145,7 +1363,21 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge) } else if (sp->sfb_clearpkts > 0) { sp->sfb_clearpkts--; } else { - sfb_dq_update_bins(sp, pkt, &now); + sfb_dq_update_bins(sp, pkt, &now, qsize(q)); + } + + /* See comments in */ + pkt->pkt_flags &= ~PKTF_PRIV_GUARDED; + + /* + * If the queue becomes empty before the update interval, reset + * the flow control threshold + */ + if (qsize(q) == 0) { + sp->sfb_flags &= ~SFBF_DELAYHIGH; + sp->sfb_min_qdelay = 0; + sp->sfb_fc_threshold = 0; + net_timerclear(&sp->sfb_update_time); } return (m); @@ -1200,6 +1432,8 @@ sfb_updateq(struct sfb *sp, cqev_t ev) } sfb_calc_holdtime(sp, eff_rate); sfb_calc_pboxtime(sp, eff_rate); + sfb_calc_target_qdelay(sp, eff_rate); + sfb_calc_update_interval(sp, eff_rate); break; } diff --git a/bsd/net/classq/classq_sfb.h b/bsd/net/classq/classq_sfb.h index 6fc8726a7..f401b0eb5 100644 --- a/bsd/net/classq/classq_sfb.h +++ b/bsd/net/classq/classq_sfb.h @@ -60,11 +60,13 @@ struct sfbstats { u_int64_t null_flowid; u_int64_t flow_controlled; u_int64_t flow_feedback; + u_int64_t dequeue_stall; }; struct sfbbinstats { int16_t pmark; /* marking probability in Q format */ u_int16_t pkts; /* number of packets */ + u_int32_t bytes; /* number of bytes */ }; struct sfb_stats { @@ -72,6 +74,11 @@ struct sfb_stats { u_int32_t dropthresh; u_int32_t clearpkts; u_int32_t current; + u_int64_t target_qdelay; + u_int64_t update_interval; + u_int64_t min_estdelay; + u_int32_t delay_fcthreshold; + u_int32_t flags; struct sfbstats sfbstats; struct sfbbins { struct sfbbinstats stats[SFB_LEVELS][SFB_BINS]; @@ -95,10 +102,12 @@ struct sfb_fcl { #define SFBF_ECN6 0x02 /* use packet marking for IPv6 packets */ #define SFBF_ECN (SFBF_ECN4 | SFBF_ECN6) #define SFBF_FLOWCTL 0x04 /* enable flow control advisories */ +#define SFBF_DELAYBASED 0x08 /* queueing is delay based */ +#define SFBF_DELAYHIGH 0x10 /* Estimated delay is greater than target */ #define SFBF_SUSPENDED 0x1000 /* queue is suspended */ #define SFBF_USERFLAGS \ - (SFBF_ECN4 | SFBF_ECN6 | SFBF_FLOWCTL) + (SFBF_ECN4 | SFBF_ECN6 | SFBF_FLOWCTL | SFBF_DELAYBASED) typedef struct sfb { /* variables for internal use */ @@ -109,10 +118,19 @@ typedef struct sfb { u_int16_t sfb_drop_thresh; u_int32_t sfb_clearpkts; u_int64_t sfb_eff_rate; /* last known effective rate */ - struct timespec sfb_getqtime; /* last dequeue timestamp */ + struct timespec sfb_getqtime; /* last dequeue timestamp */ struct timespec sfb_holdtime; /* random holdtime in nsec */ struct ifnet *sfb_ifp; /* back pointer to ifnet */ + /* target queue delay and interval for queue sizing */ + u_int64_t sfb_target_qdelay; + struct timespec sfb_update_interval; + u_int64_t sfb_fc_threshold; /* for flow control feedback */ + + /* variables for computing estimated delay of the queue */ + u_int64_t sfb_min_qdelay; + struct timespec sfb_update_time; + /* moving hash function */ struct timespec sfb_hinterval; /* random reset interval in sec */ struct timespec sfb_nextreset; /* reset deadline */ diff --git a/bsd/net/classq/classq_subr.c b/bsd/net/classq/classq_subr.c index d62a1337b..109cae586 100644 --- a/bsd/net/classq/classq_subr.c +++ b/bsd/net/classq/classq_subr.c @@ -115,6 +115,14 @@ ifclassq_setup(struct ifnet *ifp, u_int32_t sflags, boolean_t reuse) maxlen = if_sndq_maxlen; IFCQ_SET_MAXLEN(ifq, maxlen); + if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen && + IFCQ_TARGET_QDELAY(ifq) == 0) { + /* + * Choose static queues because the interface has + * maximum queue size set + */ + sflags &= ~PKTSCHEDF_QALG_DELAYBASED; + } ifq->ifcq_sflags = sflags; err = ifclassq_pktsched_setup(ifq); if (err == 0) diff --git a/bsd/net/classq/classq_util.c b/bsd/net/classq/classq_util.c index 9cc141265..56aa56eea 100644 --- a/bsd/net/classq/classq_util.c +++ b/bsd/net/classq/classq_util.c @@ -259,14 +259,17 @@ mark_ecn(struct mbuf *m, struct pf_mtag *t, int flags) otos = ip->ip_tos; ip->ip_tos |= IPTOS_ECN_CE; /* - * update checksum (from RFC1624) + * update checksum (from RFC1624) only if hw + * checksum is not supported. * HC' = ~(~HC + ~m + m') */ - sum = ~ntohs(ip->ip_sum) & 0xffff; - sum += (~otos & 0xffff) + ip->ip_tos; - sum = (sum >> 16) + (sum & 0xffff); - sum += (sum >> 16); /* add carry */ - ip->ip_sum = htons(~sum & 0xffff); + if (!(m->m_pkthdr.csum_flags & CSUM_DELAY_IP)) { + sum = ~ntohs(ip->ip_sum) & 0xffff; + sum += (~otos & 0xffff) + ip->ip_tos; + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); /* add carry */ + ip->ip_sum = htons(~sum & 0xffff); + } return (1); } break; diff --git a/bsd/net/classq/if_classq.h b/bsd/net/classq/if_classq.h index 2c0da5f5e..cb60c5464 100644 --- a/bsd/net/classq/if_classq.h +++ b/bsd/net/classq/if_classq.h @@ -128,6 +128,7 @@ struct ifclassq { u_int32_t ifcq_type; /* scheduler type */ u_int32_t ifcq_flags; /* flags */ u_int32_t ifcq_sflags; /* scheduler flags */ + u_int32_t ifcq_target_qdelay; /* target queue delay */ void *ifcq_disc; /* for scheduler-specific use */ /* * ifcq_disc_slots[] represents the leaf classes configured for the @@ -340,6 +341,7 @@ struct if_ifclassq_stats { #define IFCQ_DEC_LEN(_ifcq) (IFCQ_LEN(_ifcq)--) #define IFCQ_MAXLEN(_ifcq) ((_ifcq)->ifcq_maxlen) #define IFCQ_SET_MAXLEN(_ifcq, _len) ((_ifcq)->ifcq_maxlen = (_len)) +#define IFCQ_TARGET_QDELAY(_ifcq) ((_ifcq)->ifcq_target_qdelay) #define IFCQ_XMIT_ADD(_ifcq, _pkt, _len) do { \ PKTCNTR_ADD(&(_ifcq)->ifcq_xmitcnt, _pkt, _len); \ diff --git a/bsd/net/content_filter.c b/bsd/net/content_filter.c new file mode 100644 index 000000000..58bea9bbb --- /dev/null +++ b/bsd/net/content_filter.c @@ -0,0 +1,3942 @@ +/* + * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * THEORY OF OPERATION + * + * The socket content filter subsystem provides a way for user space agents to + * make filtering decisions based on the content of the data being sent and + * received by TCP/IP sockets. + * + * A content filter user space agents gets a copy of the data and the data is + * also kept in kernel buffer until the user space agents makes a pass or drop + * decision. This unidirectional flow of content avoids unnecessary data copies + * back to the kernel. + * * + * A user space filter agent opens a kernel control socket with the name + * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem. + * When connected, a "struct content_filter" is created and set as the + * "unitinfo" of the corresponding kernel control socket instance. + * + * The socket content filter subsystem exchanges messages with the user space + * filter agent until an ultimate pass or drop decision is made by the + * user space filter agent. + * + * It should be noted that messages about many TCP/IP sockets can be multiplexed + * over a single kernel control socket. + * + * Notes: + * - The current implementation is limited to TCP sockets. + * - The current implementation supports up to two simultaneous content filters + * for the sake of simplicity of the implementation. + * + * + * NECP FILTER CONTROL UNIT + * + * A user space filter agent uses the Network Extension Control Policy (NECP) + * database specify which TCP/IP sockets needs to be filtered. The NECP + * criteria may be based on a variety of properties like user ID or proc UUID. + * + * The NECP "filter control unit" is used by the socket content filter subsystem + * to deliver the relevant TCP/IP content information to the appropriate + * user space filter agent via its kernel control socket instance. + * This works as follows: + * + * 1) The user space filter agent specifies an NECP filter control unit when + * in adds its filtering rules to the NECP database. + * + * 2) The user space filter agent also sets its NECP filter control unit on the + * content filter kernel control socket via the socket option + * CFIL_OPT_NECP_CONTROL_UNIT. + * + * 3) The NECP database is consulted to find out if a given TCP/IP socket + * needs to be subjected to content filtering and returns the corresponding + * NECP filter control unit -- the NECP filter control unit is actually + * stored in the TCP/IP socket structure so the NECP lookup is really simple. + * + * 4) The NECP filter control unit is then used to find the corresponding + * kernel control socket instance. + * + * Note: NECP currently supports a ingle filter control unit per TCP/IP socket + * but this restriction may be soon lifted. + * + * + * THE MESSAGING PROTOCOL + * + * The socket content filter subsystem and a user space filter agent + * communicate over the kernel control socket via an asynchronous + * messaging protocol (this is not a request-response protocol). + * The socket content filter subsystem sends event messages to the user + * space filter agent about the TCP/IP sockets it is interested to filter. + * The user space filter agent sends action messages to either allow + * data to pass or to disallow the data flow (and drop the connection). + * + * All messages over a content filter kernel control socket share the same + * common header of type "struct cfil_msg_hdr". The message type tells if + * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION". + * The message header field "cfm_sock_id" identifies a given TCP/IP socket. + * Note the message header length field may be padded for alignment and can + * be larger than the actual content of the message. + * The field "cfm_op" describe the kind of event or action. + * + * Here are the kinds of content filter events: + * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered + * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed + * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket + * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket + * + * + * EVENT MESSAGES + * + * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of + * data that is being sent or received. The position of this span of data + * in the data flow is described by a set of start and end offsets. These + * are absolute 64 bits offsets. The first byte sent (or received) starts + * at offset 0 and ends at offset 1. The length of the content data + * is given by the difference between the end offset and the start offset. + * + * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and + * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE + * action message is send by the user space filter agent. + * + * Note: absolute 64 bits offsets should be large enough for the foreseeable + * future. A 64-bits counter will wrap after 468 years are 10 Gbit/sec: + * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63 + * + * They are two kinds of content filter actions: + * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction. + * - CFM_OP_DROP: to shutdown socket and disallow further data flow + * + * + * ACTION MESSAGES + * + * The CFM_OP_DATA_UPDATE action messages let the user space filter + * agent allow data to flow up to the specified pass offset -- there + * is a pass offset for outgoing data and a pass offset for incoming data. + * When a new TCP/IP socket is attached to the content filter, each pass offset + * is initially set to 0 so not data is allowed to pass by default. + * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE + * then the data flow becomes unrestricted. + * + * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message + * with a pass offset smaller than the pass offset of a previous + * CFM_OP_DATA_UPDATE message is silently ignored. + * + * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages + * to tell the kernel how much data it wants to see by using the peek offsets. + * Just like pass offsets, there is a peek offset for each direction. + * When a new TCP/IP socket is attached to the content filter, each peek offset + * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event + * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message + * with a greater than 0 peek offset is sent by the user space filter agent. + * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE + * then the flow of update data events becomes unrestricted. + * + * Note that peek offsets cannot be smaller than the corresponding pass offset. + * Also a peek offsets cannot be smaller than the corresponding end offset + * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying + * to set a too small peek value is silently ignored. + * + * + * PER SOCKET "struct cfil_info" + * + * As soon as a TCP/IP socket gets attached to a content filter, a + * "struct cfil_info" is created to hold the content filtering state for this + * socket. + * + * The content filtering state is made of the following information + * for each direction: + * - The current pass offset; + * - The first and last offsets of the data pending, waiting for a filtering + * decision; + * - The inject queue for data that passed the filters and that needs + * to be re-injected; + * - A content filter specific state in a set of "struct cfil_entry" + * + * + * CONTENT FILTER STATE "struct cfil_entry" + * + * The "struct cfil_entry" maintains the information most relevant to the + * message handling over a kernel control socket with a user space filter agent. + * + * The "struct cfil_entry" holds the NECP filter control unit that corresponds + * to the kernel control socket unit it corresponds to and also has a pointer + * to the corresponding "struct content_filter". + * + * For each direction, "struct cfil_entry" maintains the following information: + * - The pass offset + * - The peek offset + * - The offset of the last data peeked at by the filter + * - A queue of data that's waiting to be delivered to the user space filter + * agent on the kernel control socket + * - A queue of data for which event messages have been sent on the kernel + * control socket and are pending for a filtering decision. + * + * + * CONTENT FILTER QUEUES + * + * Data that is being filtered is steered away from the TCP/IP socket buffer + * and instead will sit in one of three content filter queue until the data + * can be re-injected into the TCP/IP socket buffer. + * + * A content filter queue is represented by "struct cfil_queue" that contains + * a list of mbufs and the start and end offset of the data span of + * the list of mbufs. + * + * The data moves into the three content filter queues according to this + * sequence: + * a) The "cfe_ctl_q" of "struct cfil_entry" + * b) The "cfe_pending_q" of "struct cfil_entry" + * c) The "cfi_inject_q" of "struct cfil_info" + * + * Note: The seqyence (a),(b) may be repeated several times if there are more + * than one content filter attached to the TCP/IP socket. + * + * The "cfe_ctl_q" queue holds data than cannot be delivered to the + * kernel conntrol socket for two reasons: + * - The peek offset is less that the end offset of the mbuf data + * - The kernel control socket is flow controlled + * + * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or + * CFM_OP_DATA_IN have been successfully dispatched to the kernel control + * socket and are waiting for a pass action message fromn the user space + * filter agent. An mbuf length must be fully allowed to pass to be removed + * from the cfe_pending_q. + * + * The "cfi_inject_q" queue holds data that has been fully allowed to pass + * by the user space filter agent and that needs to be re-injected into the + * TCP/IP socket. + * + * + * IMPACT ON FLOW CONTROL + * + * An essential aspect of the content filer subsystem is to minimize the + * impact on flow control of the TCP/IP sockets being filtered. + * + * The processing overhead of the content filtering may have an effect on + * flow control by adding noticeable delays and cannot be eliminated -- + * care must be taken by the user space filter agent to minimize the + * processing delays. + * + * The amount of data being filtered is kept in buffers while waiting for + * a decision by the user space filter agent. This amount of data pending + * needs to be subtracted from the amount of data available in the + * corresponding TCP/IP socket buffer. This is done by modifying + * sbspace() and tcp_sbspace() to account for amount of data pending + * in the content filter. + * + * + * LOCKING STRATEGY + * + * The global state of content filter subsystem is protected by a single + * read-write lock "cfil_lck_rw". The data flow can be done with the + * cfil read-write lock held as shared so it can be re-entered from multiple + * threads. + * + * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is + * protected by the socket lock. + * + * A TCP/IP socket lock cannot be taken while the cfil read-write lock + * is held. That's why we have some sequences where we drop the cfil read-write + * lock before taking the TCP/IP lock. + * + * It is also important to lock the TCP/IP socket buffer while the content + * filter is modifying the amount of pending data. Otherwise the calculations + * in sbspace() and tcp_sbspace() could be wrong. + * + * The "cfil_lck_rw" protects "struct content_filter" and also the fields + * "cfe_link" and "cfe_filter" of "struct cfil_entry". + * + * Actually "cfe_link" and "cfe_filter" are protected by both by + * "cfil_lck_rw" and the socket lock: they may be modified only when + * "cfil_lck_rw" is exclusive and the socket is locked. + * + * To read the other fields of "struct content_filter" we have to take + * "cfil_lck_rw" in shared mode. + * + * + * LIMITATIONS + * + * - For TCP sockets only + * + * - Does not support TCP unordered messages + */ + +/* + * TO DO LIST + * + * SOONER: + * + * Deal with OOB + * + * LATER: + * + * If support datagram, enqueue control and address mbufs as well + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + + +#define MAX_CONTENT_FILTER 2 + +struct cfil_entry; + +/* + * The structure content_filter represents a user space content filter + * It's created and associated with a kernel control socket instance + */ +struct content_filter { + kern_ctl_ref cf_kcref; + u_int32_t cf_kcunit; + u_int32_t cf_flags; + + uint32_t cf_necp_control_unit; + + uint32_t cf_sock_count; + TAILQ_HEAD(, cfil_entry) cf_sock_entries; +}; + +#define CFF_ACTIVE 0x01 +#define CFF_DETACHING 0x02 +#define CFF_FLOW_CONTROLLED 0x04 + +struct content_filter **content_filters = NULL; +uint32_t cfil_active_count = 0; /* Number of active content filters */ +uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */ +uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */ + +static kern_ctl_ref cfil_kctlref = NULL; + +static lck_grp_attr_t *cfil_lck_grp_attr = NULL; +static lck_attr_t *cfil_lck_attr = NULL; +static lck_grp_t *cfil_lck_grp = NULL; +decl_lck_rw_data(static, cfil_lck_rw); + +#define CFIL_RW_LCK_MAX 8 + +int cfil_rw_nxt_lck = 0; +void* cfil_rw_lock_history[CFIL_RW_LCK_MAX]; + +int cfil_rw_nxt_unlck = 0; +void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX]; + +#define CONTENT_FILTER_ZONE_NAME "content_filter" +#define CONTENT_FILTER_ZONE_MAX 10 +static struct zone *content_filter_zone = NULL; /* zone for content_filter */ + + +#define CFIL_INFO_ZONE_NAME "cfil_info" +#define CFIL_INFO_ZONE_MAX 1024 +static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */ + +MBUFQ_HEAD(cfil_mqhead); + +struct cfil_queue { + uint64_t q_start; /* offset of first byte in queue */ + uint64_t q_end; /* offset of last byte in queue */ + struct cfil_mqhead q_mq; +}; + +/* + * struct cfil_entry + * + * The is one entry per content filter + */ +struct cfil_entry { + TAILQ_ENTRY(cfil_entry) cfe_link; + struct content_filter *cfe_filter; + + struct cfil_info *cfe_cfil_info; + uint32_t cfe_flags; + uint32_t cfe_necp_control_unit; + struct timeval cfe_last_event; /* To user space */ + struct timeval cfe_last_action; /* From user space */ + + struct cfe_buf { + /* + * cfe_pending_q holds data that has been delivered to + * the filter and for which we are waiting for an action + */ + struct cfil_queue cfe_pending_q; + /* + * This queue is for data that has not be delivered to + * the content filter (new data, pass peek or flow control) + */ + struct cfil_queue cfe_ctl_q; + + uint64_t cfe_pass_offset; + uint64_t cfe_peek_offset; + uint64_t cfe_peeked; + } cfe_snd, cfe_rcv; +}; + +#define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */ +#define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */ +#define CFEF_DATA_START 0x0004 /* can send data event */ +#define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */ +#define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */ +#define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */ +#define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */ +#define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */ + +/* + * struct cfil_info + * + * There is a struct cfil_info per socket + */ +struct cfil_info { + TAILQ_ENTRY(cfil_info) cfi_link; + struct socket *cfi_so; + uint64_t cfi_flags; + uint64_t cfi_sock_id; + + struct cfi_buf { + /* + * cfi_pending_first and cfi_pending_last describe the total + * amount of data outstanding for all the filters on + * this socket and data in the flow queue + * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used" + */ + uint64_t cfi_pending_first; + uint64_t cfi_pending_last; + int cfi_pending_mbcnt; + /* + * cfi_pass_offset is the minimum of all the filters + */ + uint64_t cfi_pass_offset; + /* + * cfi_inject_q holds data that needs to be re-injected + * into the socket after filtering and that can + * be queued because of flow control + */ + struct cfil_queue cfi_inject_q; + } cfi_snd, cfi_rcv; + + struct cfil_entry cfi_entries[MAX_CONTENT_FILTER]; +}; + +#define CFIF_DROP 0x0001 /* drop action applied */ +#define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */ +#define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */ +#define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */ +#define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */ +#define CFIF_SHUT_WR 0x0040 /* shutdown write */ +#define CFIF_SHUT_RD 0x0080 /* shutdown read */ + +#define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */ +#define CFI_SHIFT_GENCNT 32 +#define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */ +#define CFI_SHIFT_FLOWHASH 0 + +TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head; + +#define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x) +#define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x) + +/* + * Statistics + */ + +struct cfil_stats cfil_stats; + +/* + * For troubleshooting + */ +int cfil_log_level = LOG_ERR; +int cfil_debug = 1; + +/* + * Sysctls for logs and statistics + */ +static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int, + struct sysctl_req *); +static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int, + struct sysctl_req *); + +SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "cfil"); + +SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED, + &cfil_log_level, 0, ""); + +SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW|CTLFLAG_LOCKED, + &cfil_debug, 0, ""); + +SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD|CTLFLAG_LOCKED, + &cfil_sock_attached_count, 0, ""); + +SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD|CTLFLAG_LOCKED, + &cfil_active_count, 0, ""); + +SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW|CTLFLAG_LOCKED, + &cfil_close_wait_timeout, 0, ""); + +static int cfil_sbtrim = 1; +SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW|CTLFLAG_LOCKED, + &cfil_sbtrim, 0, ""); + +SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD|CTLFLAG_LOCKED, + 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", ""); + +SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD|CTLFLAG_LOCKED, + 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", ""); + +SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD|CTLFLAG_LOCKED, + &cfil_stats, cfil_stats, ""); + +/* + * Forward declaration to appease the compiler + */ +static int cfil_action_data_pass(struct socket *, uint32_t, int, + uint64_t, uint64_t); +static int cfil_action_drop(struct socket *, uint32_t); +static int cfil_dispatch_closed_event(struct socket *, int); +static int cfil_data_common(struct socket *, int, struct sockaddr *, + struct mbuf *, struct mbuf *, uint32_t); +static int cfil_data_filter(struct socket *, uint32_t, int, + struct mbuf *, uint64_t); +static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *, + struct in_addr, u_int16_t); +static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *, + struct in6_addr *, u_int16_t); +static int cfil_dispatch_attach_event(struct socket *, uint32_t); +static void cfil_info_free(struct socket *, struct cfil_info *); +static struct cfil_info * cfil_info_alloc(struct socket *); +static int cfil_info_attach_unit(struct socket *, uint32_t); +static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t); +static int cfil_service_pending_queue(struct socket *, uint32_t, int); +static int cfil_data_service_ctl_q(struct socket *, uint32_t, int); +static void cfil_info_verify(struct cfil_info *); +static int cfil_update_data_offsets(struct socket *, uint32_t, int, + uint64_t, uint64_t); +static int cfil_acquire_sockbuf(struct socket *, int); +static void cfil_release_sockbuf(struct socket *, int); +static int cfil_filters_attached(struct socket *); + +static void cfil_rw_lock_exclusive(lck_rw_t *); +static void cfil_rw_unlock_exclusive(lck_rw_t *); +static void cfil_rw_lock_shared(lck_rw_t *); +static void cfil_rw_unlock_shared(lck_rw_t *); +static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *); +static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *); + +static unsigned int cfil_data_length(struct mbuf *, int *); + +/* + * Content filter global read write lock + */ + +static void +cfil_rw_lock_exclusive(lck_rw_t *lck) +{ + void *lr_saved; + + lr_saved = __builtin_return_address(0); + + lck_rw_lock_exclusive(lck); + + cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved; + cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX; +} + +static void +cfil_rw_unlock_exclusive(lck_rw_t *lck) +{ + void *lr_saved; + + lr_saved = __builtin_return_address(0); + + lck_rw_unlock_exclusive(lck); + + cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved; + cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX; +} + +static void +cfil_rw_lock_shared(lck_rw_t *lck) +{ + void *lr_saved; + + lr_saved = __builtin_return_address(0); + + lck_rw_lock_shared(lck); + + cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved; + cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX; +} + +static void +cfil_rw_unlock_shared(lck_rw_t *lck) +{ + void *lr_saved; + + lr_saved = __builtin_return_address(0); + + lck_rw_unlock_shared(lck); + + cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved; + cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX; +} + +static boolean_t +cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck) +{ + void *lr_saved; + boolean_t upgraded; + + lr_saved = __builtin_return_address(0); + + upgraded = lck_rw_lock_shared_to_exclusive(lck); + if (upgraded) { + cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved; + cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX; + } + return (upgraded); +} + +static void +cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck) +{ + void *lr_saved; + + lr_saved = __builtin_return_address(0); + + lck_rw_lock_exclusive_to_shared(lck); + + cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved; + cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX; +} + +static void +cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive) +{ + lck_rw_assert(lck, + exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD); +} + +static void +socket_lock_assert_owned(struct socket *so) +{ + lck_mtx_t *mutex_held; + + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); +} + +/* + * Return the number of bytes in the mbuf chain using the same + * method as m_length() or sballoc() + */ +static unsigned int +cfil_data_length(struct mbuf *m, int *retmbcnt) +{ + struct mbuf *m0; + unsigned int pktlen; + int mbcnt; + + if (retmbcnt == NULL) + return (m_length(m)); + + pktlen = 0; + mbcnt = 0; + for (m0 = m; m0 != NULL; m0 = m0->m_next) { + pktlen += m0->m_len; + mbcnt += MSIZE; + if (m0->m_flags & M_EXT) + mbcnt += m0->m_ext.ext_size; + } + *retmbcnt = mbcnt; + return (pktlen); +} + +/* + * Common mbuf queue utilities + */ + +static inline void +cfil_queue_init(struct cfil_queue *cfq) +{ + cfq->q_start = 0; + cfq->q_end = 0; + MBUFQ_INIT(&cfq->q_mq); +} + +static inline uint64_t +cfil_queue_drain(struct cfil_queue *cfq) +{ + uint64_t drained = cfq->q_start - cfq->q_end; + cfq->q_start = 0; + cfq->q_end = 0; + MBUFQ_DRAIN(&cfq->q_mq); + + return (drained); +} + +/* Return 1 when empty, 0 otherwise */ +static inline int +cfil_queue_empty(struct cfil_queue *cfq) +{ + return (MBUFQ_EMPTY(&cfq->q_mq)); +} + +static inline uint64_t +cfil_queue_offset_first(struct cfil_queue *cfq) +{ + return (cfq->q_start); +} + +static inline uint64_t +cfil_queue_offset_last(struct cfil_queue *cfq) +{ + return (cfq->q_end); +} + +static inline uint64_t +cfil_queue_len(struct cfil_queue *cfq) +{ + return (cfq->q_end - cfq->q_start); +} + +/* + * Routines to verify some fundamental assumptions + */ + +static void +cfil_queue_verify(struct cfil_queue *cfq) +{ + mbuf_t m; + mbuf_t n; + uint64_t queuesize = 0; + + /* Verify offset are ordered */ + VERIFY(cfq->q_start <= cfq->q_end); + + /* + * When queue is empty, the offsets are equal otherwise the offsets + * are different + */ + VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) || + (!MBUFQ_EMPTY(&cfq->q_mq) && + cfq->q_start != cfq->q_end)); + + MBUFQ_FOREACH(m, &cfq->q_mq) { + size_t chainsize = 0; + unsigned int mlen = m_length(m); + + if (m == (void *)M_TAG_FREE_PATTERN || + m->m_next == (void *)M_TAG_FREE_PATTERN || + m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) + panic("%s - mq %p is free at %p", __func__, + &cfq->q_mq, m); + for (n = m; n != NULL; n = n->m_next) { + if (n->m_type != MT_DATA && + n->m_type != MT_HEADER && + n->m_type != MT_OOBDATA) + panic("%s - %p unsupported type %u", __func__, + n, n->m_type); + chainsize += n->m_len; + } + if (mlen != chainsize) + panic("%s - %p m_length() %u != chainsize %lu", + __func__, m, mlen, chainsize); + queuesize += chainsize; + } + if (queuesize != cfq->q_end - cfq->q_start) + panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__, + m, queuesize, cfq->q_end - cfq->q_start); +} + +static void +cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len) +{ + CFIL_QUEUE_VERIFY(cfq); + + MBUFQ_ENQUEUE(&cfq->q_mq, m); + cfq->q_end += len; + + CFIL_QUEUE_VERIFY(cfq); +} + +static void +cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len) +{ + CFIL_QUEUE_VERIFY(cfq); + + VERIFY(m_length(m) == len); + + MBUFQ_REMOVE(&cfq->q_mq, m); + MBUFQ_NEXT(m) = NULL; + cfq->q_start += len; + + CFIL_QUEUE_VERIFY(cfq); +} + +static mbuf_t +cfil_queue_first(struct cfil_queue *cfq) +{ + return (MBUFQ_FIRST(&cfq->q_mq)); +} + +static mbuf_t +cfil_queue_next(struct cfil_queue *cfq, mbuf_t m) +{ +#pragma unused(cfq) + return (MBUFQ_NEXT(m)); +} + +static void +cfil_entry_buf_verify(struct cfe_buf *cfe_buf) +{ + CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q); + CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q); + + /* Verify the queues are ordered so that pending is before ctl */ + VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end); + + /* The peek offset cannot be less than the pass offset */ + VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset); + + /* Make sure we've updated the offset we peeked at */ + VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked); +} + +static void +cfil_entry_verify(struct cfil_entry *entry) +{ + cfil_entry_buf_verify(&entry->cfe_snd); + cfil_entry_buf_verify(&entry->cfe_rcv); +} + +static void +cfil_info_buf_verify(struct cfi_buf *cfi_buf) +{ + CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q); + + VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last); + VERIFY(cfi_buf->cfi_pending_mbcnt >= 0); +} + +static void +cfil_info_verify(struct cfil_info *cfil_info) +{ + int i; + + if (cfil_info == NULL) + return; + + cfil_info_buf_verify(&cfil_info->cfi_snd); + cfil_info_buf_verify(&cfil_info->cfi_rcv); + + for (i = 0; i < MAX_CONTENT_FILTER; i++) + cfil_entry_verify(&cfil_info->cfi_entries[i]); +} + +static void +verify_content_filter(struct content_filter *cfc) +{ + struct cfil_entry *entry; + uint32_t count = 0; + + VERIFY(cfc->cf_sock_count >= 0); + + TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) { + count++; + VERIFY(cfc == entry->cfe_filter); + } + VERIFY(count == cfc->cf_sock_count); +} + +/* + * Kernel control socket callbacks + */ +static errno_t +cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, + void **unitinfo) +{ + errno_t error = 0; + struct content_filter *cfc = NULL; + + CFIL_LOG(LOG_NOTICE, ""); + + cfc = zalloc(content_filter_zone); + if (cfc == NULL) { + CFIL_LOG(LOG_ERR, "zalloc failed"); + error = ENOMEM; + goto done; + } + bzero(cfc, sizeof(struct content_filter)); + + cfil_rw_lock_exclusive(&cfil_lck_rw); + if (content_filters == NULL) { + struct content_filter **tmp; + + cfil_rw_unlock_exclusive(&cfil_lck_rw); + + MALLOC(tmp, + struct content_filter **, + MAX_CONTENT_FILTER * sizeof(struct content_filter *), + M_TEMP, + M_WAITOK | M_ZERO); + + cfil_rw_lock_exclusive(&cfil_lck_rw); + + if (tmp == NULL && content_filters == NULL) { + error = ENOMEM; + cfil_rw_unlock_exclusive(&cfil_lck_rw); + goto done; + } + /* Another thread may have won the race */ + if (content_filters != NULL) + FREE(tmp, M_TEMP); + else + content_filters = tmp; + } + + if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) { + CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit); + error = EINVAL; + } else if (content_filters[sac->sc_unit - 1] != NULL) { + CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit); + error = EADDRINUSE; + } else { + /* + * kernel control socket kcunit numbers start at 1 + */ + content_filters[sac->sc_unit - 1] = cfc; + + cfc->cf_kcref = kctlref; + cfc->cf_kcunit = sac->sc_unit; + TAILQ_INIT(&cfc->cf_sock_entries); + + *unitinfo = cfc; + cfil_active_count++; + } + cfil_rw_unlock_exclusive(&cfil_lck_rw); +done: + if (error != 0 && cfc != NULL) + zfree(content_filter_zone, cfc); + + if (error == 0) + OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok); + else + OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail); + + CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u", + error, cfil_active_count, sac->sc_unit); + + return (error); +} + +static errno_t +cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo) +{ +#pragma unused(kctlref) + errno_t error = 0; + struct content_filter *cfc; + struct cfil_entry *entry; + + CFIL_LOG(LOG_NOTICE, ""); + + if (content_filters == NULL) { + CFIL_LOG(LOG_ERR, "no content filter"); + error = EINVAL; + goto done; + } + if (kcunit > MAX_CONTENT_FILTER) { + CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", + kcunit, MAX_CONTENT_FILTER); + error = EINVAL; + goto done; + } + + cfc = (struct content_filter *)unitinfo; + if (cfc == NULL) + goto done; + + cfil_rw_lock_exclusive(&cfil_lck_rw); + if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) { + CFIL_LOG(LOG_ERR, "bad unit info %u)", + kcunit); + cfil_rw_unlock_exclusive(&cfil_lck_rw); + goto done; + } + cfc->cf_flags |= CFF_DETACHING; + /* + * Remove all sockets from the filter + */ + while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) { + cfil_rw_lock_assert_held(&cfil_lck_rw, 1); + + verify_content_filter(cfc); + /* + * Accept all outstanding data by pushing to next filter + * or back to socket + * + * TBD: Actually we should make sure all data has been pushed + * back to socket + */ + if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) { + struct cfil_info *cfil_info = entry->cfe_cfil_info; + struct socket *so = cfil_info->cfi_so; + + /* Need to let data flow immediately */ + entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED | + CFEF_DATA_START; + + /* + * Respect locking hierarchy + */ + cfil_rw_unlock_exclusive(&cfil_lck_rw); + + socket_lock(so, 1); + + /* + * When cfe_filter is NULL the filter is detached + * and the entry has been removed from cf_sock_entries + */ + if (so->so_cfil == NULL || entry->cfe_filter == NULL) { + cfil_rw_lock_exclusive(&cfil_lck_rw); + goto release; + } + (void) cfil_action_data_pass(so, kcunit, 1, + CFM_MAX_OFFSET, + CFM_MAX_OFFSET); + + (void) cfil_action_data_pass(so, kcunit, 0, + CFM_MAX_OFFSET, + CFM_MAX_OFFSET); + + cfil_rw_lock_exclusive(&cfil_lck_rw); + + /* + * Check again as the socket may have been unlocked + * when when calling cfil_acquire_sockbuf() + */ + if (so->so_cfil == NULL || entry->cfe_filter == NULL) + goto release; + + /* The filter is now detached */ + entry->cfe_flags |= CFEF_CFIL_DETACHED; + CFIL_LOG(LOG_NOTICE, "so %llx detached %u", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); + + if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && + cfil_filters_attached(so) == 0) { + CFIL_LOG(LOG_NOTICE, "so %llx waking", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + wakeup((caddr_t)&so->so_cfil); + } + + /* + * Remove the filter entry from the content filter + * but leave the rest of the state intact as the queues + * may not be empty yet + */ + entry->cfe_filter = NULL; + entry->cfe_necp_control_unit = 0; + + TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link); + cfc->cf_sock_count--; +release: + socket_unlock(so, 1); + } + } + verify_content_filter(cfc); + + VERIFY(cfc->cf_sock_count == 0); + + /* + * Make filter inactive + */ + content_filters[kcunit - 1] = NULL; + cfil_active_count--; + cfil_rw_unlock_exclusive(&cfil_lck_rw); + + zfree(content_filter_zone, cfc); +done: + if (error == 0) + OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok); + else + OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail); + + CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u", + error, cfil_active_count, kcunit); + + return (error); +} + +/* + * cfil_acquire_sockbuf() + * + * Prevent any other thread from acquiring the sockbuf + * We use sb_cfil_thread as a semaphore to prevent other threads from + * messing with the sockbuf -- see sblock() + * Note: We do not set SB_LOCK here because the thread may check or modify + * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently + * sblock(), sbunlock() or sodefunct() + */ +static int +cfil_acquire_sockbuf(struct socket *so, int outgoing) +{ + thread_t tp = current_thread(); + struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv; + lck_mtx_t *mutex_held; + int error = 0; + + /* + * Wait until no thread is holding the sockbuf and other content + * filter threads have released the sockbuf + */ + while ((sb->sb_flags & SB_LOCK) || + (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) { + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + + sb->sb_wantlock++; + VERIFY(sb->sb_wantlock != 0); + + msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf", + NULL); + + VERIFY(sb->sb_wantlock != 0); + sb->sb_wantlock--; + } + /* + * Use reference count for repetitive calls on same thread + */ + if (sb->sb_cfil_refs == 0) { + VERIFY(sb->sb_cfil_thread == NULL); + VERIFY((sb->sb_flags & SB_LOCK) == 0); + + sb->sb_cfil_thread = tp; + sb->sb_flags |= SB_LOCK; + } + sb->sb_cfil_refs++; + + /* We acquire the socket buffer when we need to cleanup */ + if (so->so_cfil == NULL) { + CFIL_LOG(LOG_ERR, "so %llx cfil detached", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = 0; + } else if (so->so_cfil->cfi_flags & CFIF_DROP) { + CFIL_LOG(LOG_ERR, "so %llx drop set", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = EPIPE; + } + + return (error); +} + +static void +cfil_release_sockbuf(struct socket *so, int outgoing) +{ + struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv; + thread_t tp = current_thread(); + + socket_lock_assert_owned(so); + + if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) + panic("%s sb_cfil_thread %p not current %p", __func__, + sb->sb_cfil_thread, tp); + /* + * Don't panic if we are defunct because SB_LOCK has + * been cleared by sodefunct() + */ + if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) + panic("%s SB_LOCK not set on %p", __func__, + sb); + /* + * We can unlock when the thread unwinds to the last reference + */ + sb->sb_cfil_refs--; + if (sb->sb_cfil_refs == 0) { + sb->sb_cfil_thread = NULL; + sb->sb_flags &= ~SB_LOCK; + + if (sb->sb_wantlock > 0) + wakeup(&sb->sb_flags); + } +} + +cfil_sock_id_t +cfil_sock_id_from_socket(struct socket *so) +{ + if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) + return (so->so_cfil->cfi_sock_id); + else + return (CFIL_SOCK_ID_NONE); +} + +static struct socket * +cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id) +{ + struct socket *so = NULL; + u_int64_t gencnt = cfil_sock_id >> 32; + u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff); + struct inpcb *inp = NULL; + struct inpcbinfo *pcbinfo = &tcbinfo; + + lck_rw_lock_shared(pcbinfo->ipi_lock); + LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { + if (inp->inp_state != INPCB_STATE_DEAD && + inp->inp_socket != NULL && + inp->inp_flowhash == flowhash && + (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt && + inp->inp_socket->so_cfil != NULL) { + so = inp->inp_socket; + break; + } + } + lck_rw_done(pcbinfo->ipi_lock); + + if (so == NULL) { + OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found); + CFIL_LOG(LOG_DEBUG, + "no socket for sock_id %llx gencnt %llx flowhash %x", + cfil_sock_id, gencnt, flowhash); + } + + return (so); +} + +static errno_t +cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m, + int flags) +{ +#pragma unused(kctlref, flags) + errno_t error = 0; + struct cfil_msg_hdr *msghdr; + struct content_filter *cfc = (struct content_filter *)unitinfo; + struct socket *so; + struct cfil_msg_action *action_msg; + struct cfil_entry *entry; + + CFIL_LOG(LOG_INFO, ""); + + if (content_filters == NULL) { + CFIL_LOG(LOG_ERR, "no content filter"); + error = EINVAL; + goto done; + } + if (kcunit > MAX_CONTENT_FILTER) { + CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", + kcunit, MAX_CONTENT_FILTER); + error = EINVAL; + goto done; + } + + if (m_length(m) < sizeof(struct cfil_msg_hdr)) { + CFIL_LOG(LOG_ERR, "too short %u", m_length(m)); + error = EINVAL; + goto done; + } + msghdr = (struct cfil_msg_hdr *)mbuf_data(m); + if (msghdr->cfm_version != CFM_VERSION_CURRENT) { + CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version); + error = EINVAL; + goto done; + } + if (msghdr->cfm_type != CFM_TYPE_ACTION) { + CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type); + error = EINVAL; + goto done; + } + /* Validate action operation */ + switch (msghdr->cfm_op) { + case CFM_OP_DATA_UPDATE: + OSIncrementAtomic( + &cfil_stats.cfs_ctl_action_data_update); + break; + case CFM_OP_DROP: + OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop); + break; + default: + OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op); + CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op); + error = EINVAL; + goto done; + } + if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) { + OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len); + error = EINVAL; + CFIL_LOG(LOG_ERR, "bad len: %u for op %u", + msghdr->cfm_len, + msghdr->cfm_op); + goto done; + } + cfil_rw_lock_shared(&cfil_lck_rw); + if (cfc != (void *)content_filters[kcunit - 1]) { + CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", + kcunit); + error = EINVAL; + cfil_rw_unlock_shared(&cfil_lck_rw); + goto done; + } + + so = cfil_socket_from_sock_id(msghdr->cfm_sock_id); + if (so == NULL) { + CFIL_LOG(LOG_NOTICE, "bad sock_id %llx", + msghdr->cfm_sock_id); + error = EINVAL; + cfil_rw_unlock_shared(&cfil_lck_rw); + goto done; + } + cfil_rw_unlock_shared(&cfil_lck_rw); + + socket_lock(so, 1); + + if (so->so_cfil == NULL) { + CFIL_LOG(LOG_NOTICE, "so %llx not attached", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = EINVAL; + goto unlock; + } else if (so->so_cfil->cfi_flags & CFIF_DROP) { + CFIL_LOG(LOG_NOTICE, "so %llx drop set", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = EINVAL; + goto unlock; + } + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + if (entry->cfe_filter == NULL) { + CFIL_LOG(LOG_NOTICE, "so %llx no filter", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = EINVAL; + goto unlock; + } + + if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) + entry->cfe_flags |= CFEF_DATA_START; + else { + CFIL_LOG(LOG_ERR, + "so %llx attached not sent for %u", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); + error = EINVAL; + goto unlock; + } + + microuptime(&entry->cfe_last_action); + + action_msg = (struct cfil_msg_action *)msghdr; + + switch (msghdr->cfm_op) { + case CFM_OP_DATA_UPDATE: + if (action_msg->cfa_out_peek_offset != 0 || + action_msg->cfa_out_pass_offset != 0) + error = cfil_action_data_pass(so, kcunit, 1, + action_msg->cfa_out_pass_offset, + action_msg->cfa_out_peek_offset); + if (error == EJUSTRETURN) + error = 0; + if (error != 0) + break; + if (action_msg->cfa_in_peek_offset != 0 || + action_msg->cfa_in_pass_offset != 0) + error = cfil_action_data_pass(so, kcunit, 0, + action_msg->cfa_in_pass_offset, + action_msg->cfa_in_peek_offset); + if (error == EJUSTRETURN) + error = 0; + break; + + case CFM_OP_DROP: + error = cfil_action_drop(so, kcunit); + break; + + default: + error = EINVAL; + break; + } +unlock: + socket_unlock(so, 1); +done: + mbuf_freem(m); + + if (error == 0) + OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok); + else + OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad); + + return (error); +} + +static errno_t +cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, + int opt, void *data, size_t *len) +{ +#pragma unused(kctlref, opt) + errno_t error = 0; + struct content_filter *cfc = (struct content_filter *)unitinfo; + + CFIL_LOG(LOG_NOTICE, ""); + + cfil_rw_lock_shared(&cfil_lck_rw); + + if (content_filters == NULL) { + CFIL_LOG(LOG_ERR, "no content filter"); + error = EINVAL; + goto done; + } + if (kcunit > MAX_CONTENT_FILTER) { + CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", + kcunit, MAX_CONTENT_FILTER); + error = EINVAL; + goto done; + } + if (cfc != (void *)content_filters[kcunit - 1]) { + CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", + kcunit); + error = EINVAL; + goto done; + } + switch (opt) { + case CFIL_OPT_NECP_CONTROL_UNIT: + if (*len < sizeof(uint32_t)) { + CFIL_LOG(LOG_ERR, "len too small %lu", *len); + error = EINVAL; + goto done; + } + if (data != NULL) + *(uint32_t *)data = cfc->cf_necp_control_unit; + break; + default: + error = ENOPROTOOPT; + break; + } +done: + cfil_rw_unlock_shared(&cfil_lck_rw); + + return (error); +} + +static errno_t +cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, + int opt, void *data, size_t len) +{ +#pragma unused(kctlref, opt) + errno_t error = 0; + struct content_filter *cfc = (struct content_filter *)unitinfo; + + CFIL_LOG(LOG_NOTICE, ""); + + cfil_rw_lock_exclusive(&cfil_lck_rw); + + if (content_filters == NULL) { + CFIL_LOG(LOG_ERR, "no content filter"); + error = EINVAL; + goto done; + } + if (kcunit > MAX_CONTENT_FILTER) { + CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", + kcunit, MAX_CONTENT_FILTER); + error = EINVAL; + goto done; + } + if (cfc != (void *)content_filters[kcunit - 1]) { + CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", + kcunit); + error = EINVAL; + goto done; + } + switch (opt) { + case CFIL_OPT_NECP_CONTROL_UNIT: + if (len < sizeof(uint32_t)) { + CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (cfc->cf_necp_control_unit != 0) { + CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT " + "already set %u", + cfc->cf_necp_control_unit); + error = EINVAL; + goto done; + } + cfc->cf_necp_control_unit = *(uint32_t *)data; + break; + default: + error = ENOPROTOOPT; + break; + } +done: + cfil_rw_unlock_exclusive(&cfil_lck_rw); + + return (error); +} + + +static void +cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags) +{ +#pragma unused(kctlref, flags) + struct content_filter *cfc = (struct content_filter *)unitinfo; + struct socket *so = NULL; + int error; + struct cfil_entry *entry; + + CFIL_LOG(LOG_INFO, ""); + + if (content_filters == NULL) { + CFIL_LOG(LOG_ERR, "no content filter"); + OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad); + return; + } + if (kcunit > MAX_CONTENT_FILTER) { + CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", + kcunit, MAX_CONTENT_FILTER); + OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad); + return; + } + cfil_rw_lock_shared(&cfil_lck_rw); + if (cfc != (void *)content_filters[kcunit - 1]) { + CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", + kcunit); + OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad); + goto done; + } + /* Let's assume the flow control is lifted */ + if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { + if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) + cfil_rw_lock_exclusive(&cfil_lck_rw); + + cfc->cf_flags &= ~CFF_FLOW_CONTROLLED; + + cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw); + lck_rw_assert(&cfil_lck_rw, LCK_RW_ASSERT_SHARED); + } + /* + * Flow control will be raised again as soon as an entry cannot enqueue + * to the kernel control socket + */ + while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) { + verify_content_filter(cfc); + + cfil_rw_lock_assert_held(&cfil_lck_rw, 0); + + /* Find an entry that is flow controlled */ + TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) { + if (entry->cfe_cfil_info == NULL || + entry->cfe_cfil_info->cfi_so == NULL) + continue; + if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) + continue; + } + if (entry == NULL) + break; + + OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift); + + so = entry->cfe_cfil_info->cfi_so; + + cfil_rw_unlock_shared(&cfil_lck_rw); + socket_lock(so, 1); + + do { + error = cfil_acquire_sockbuf(so, 1); + if (error == 0) + error = cfil_data_service_ctl_q(so, kcunit, 1); + cfil_release_sockbuf(so, 1); + if (error != 0) + break; + + error = cfil_acquire_sockbuf(so, 0); + if (error == 0) + error = cfil_data_service_ctl_q(so, kcunit, 0); + cfil_release_sockbuf(so, 0); + } while (0); + + socket_lock_assert_owned(so); + socket_unlock(so, 1); + + cfil_rw_lock_shared(&cfil_lck_rw); + } +done: + cfil_rw_unlock_shared(&cfil_lck_rw); +} + +void +cfil_init(void) +{ + struct kern_ctl_reg kern_ctl; + errno_t error = 0; + vm_size_t content_filter_size = 0; /* size of content_filter */ + vm_size_t cfil_info_size = 0; /* size of cfil_info */ + + CFIL_LOG(LOG_NOTICE, ""); + + /* + * Compile time verifications + */ + _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER); + _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0); + _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0); + _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0); + + /* + * Runtime time verifications + */ + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued, + sizeof(uint32_t))); + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued, + sizeof(uint32_t))); + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked, + sizeof(uint32_t))); + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked, + sizeof(uint32_t))); + + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued, + sizeof(uint32_t))); + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued, + sizeof(uint32_t))); + + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued, + sizeof(uint32_t))); + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued, + sizeof(uint32_t))); + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed, + sizeof(uint32_t))); + VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed, + sizeof(uint32_t))); + + /* + * Zone for content filters kernel control sockets + */ + content_filter_size = sizeof(struct content_filter); + content_filter_zone = zinit(content_filter_size, + CONTENT_FILTER_ZONE_MAX * content_filter_size, + 0, + CONTENT_FILTER_ZONE_NAME); + if (content_filter_zone == NULL) { + panic("%s: zinit(%s) failed", __func__, + CONTENT_FILTER_ZONE_NAME); + /* NOTREACHED */ + } + zone_change(content_filter_zone, Z_CALLERACCT, FALSE); + zone_change(content_filter_zone, Z_EXPAND, TRUE); + + /* + * Zone for per socket content filters + */ + cfil_info_size = sizeof(struct cfil_info); + cfil_info_zone = zinit(cfil_info_size, + CFIL_INFO_ZONE_MAX * cfil_info_size, + 0, + CFIL_INFO_ZONE_NAME); + if (cfil_info_zone == NULL) { + panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME); + /* NOTREACHED */ + } + zone_change(cfil_info_zone, Z_CALLERACCT, FALSE); + zone_change(cfil_info_zone, Z_EXPAND, TRUE); + + /* + * Allocate locks + */ + cfil_lck_grp_attr = lck_grp_attr_alloc_init(); + if (cfil_lck_grp_attr == NULL) { + panic("%s: lck_grp_attr_alloc_init failed", __func__); + /* NOTREACHED */ + } + cfil_lck_grp = lck_grp_alloc_init("content filter", + cfil_lck_grp_attr); + if (cfil_lck_grp == NULL) { + panic("%s: lck_grp_alloc_init failed", __func__); + /* NOTREACHED */ + } + cfil_lck_attr = lck_attr_alloc_init(); + if (cfil_lck_attr == NULL) { + panic("%s: lck_attr_alloc_init failed", __func__); + /* NOTREACHED */ + } + lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr); + + TAILQ_INIT(&cfil_sock_head); + + /* + * Register kernel control + */ + bzero(&kern_ctl, sizeof(kern_ctl)); + strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME, + sizeof(kern_ctl.ctl_name)); + kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED; + kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */ + kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */ + kern_ctl.ctl_connect = cfil_ctl_connect; + kern_ctl.ctl_disconnect = cfil_ctl_disconnect; + kern_ctl.ctl_send = cfil_ctl_send; + kern_ctl.ctl_getopt = cfil_ctl_getopt; + kern_ctl.ctl_setopt = cfil_ctl_setopt; + kern_ctl.ctl_rcvd = cfil_ctl_rcvd; + error = ctl_register(&kern_ctl, &cfil_kctlref); + if (error != 0) { + CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error); + return; + } +} + +struct cfil_info * +cfil_info_alloc(struct socket *so) +{ + int kcunit; + struct cfil_info *cfil_info = NULL; + struct inpcb *inp = sotoinpcb(so); + + CFIL_LOG(LOG_INFO, ""); + + socket_lock_assert_owned(so); + + cfil_info = zalloc(cfil_info_zone); + if (cfil_info == NULL) + goto done; + bzero(cfil_info, sizeof(struct cfil_info)); + + cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q); + cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q); + + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + struct cfil_entry *entry; + + entry = &cfil_info->cfi_entries[kcunit - 1]; + entry->cfe_cfil_info = cfil_info; + + /* Initialize the filter entry */ + entry->cfe_filter = NULL; + entry->cfe_flags = 0; + entry->cfe_necp_control_unit = 0; + entry->cfe_snd.cfe_pass_offset = 0; + entry->cfe_snd.cfe_peek_offset = 0; + entry->cfe_snd.cfe_peeked = 0; + entry->cfe_rcv.cfe_pass_offset = 0; + entry->cfe_rcv.cfe_peek_offset = 0; + entry->cfe_rcv.cfe_peeked = 0; + + cfil_queue_init(&entry->cfe_snd.cfe_pending_q); + cfil_queue_init(&entry->cfe_rcv.cfe_pending_q); + cfil_queue_init(&entry->cfe_snd.cfe_ctl_q); + cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q); + } + + cfil_rw_lock_exclusive(&cfil_lck_rw); + + so->so_cfil = cfil_info; + cfil_info->cfi_so = so; + /* + * Create a cfi_sock_id that's not the socket pointer! + */ + if (inp->inp_flowhash == 0) + inp->inp_flowhash = inp_calc_flowhash(inp); + cfil_info->cfi_sock_id = + ((so->so_gencnt << 32) | inp->inp_flowhash); + + TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link); + + cfil_sock_attached_count++; + + cfil_rw_unlock_exclusive(&cfil_lck_rw); + +done: + if (cfil_info != NULL) + OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok); + else + OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail); + + return (cfil_info); +} + +int +cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit) +{ + int kcunit; + struct cfil_info *cfil_info = so->so_cfil; + int attached = 0; + + CFIL_LOG(LOG_INFO, ""); + + socket_lock_assert_owned(so); + + cfil_rw_lock_exclusive(&cfil_lck_rw); + + for (kcunit = 1; + content_filters != NULL && kcunit <= MAX_CONTENT_FILTER; + kcunit++) { + struct content_filter *cfc = content_filters[kcunit - 1]; + struct cfil_entry *entry; + + if (cfc == NULL) + continue; + if (cfc->cf_necp_control_unit != filter_control_unit) + continue; + + entry = &cfil_info->cfi_entries[kcunit - 1]; + + entry->cfe_filter = cfc; + entry->cfe_necp_control_unit = filter_control_unit; + TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link); + cfc->cf_sock_count++; + verify_content_filter(cfc); + attached = 1; + entry->cfe_flags |= CFEF_CFIL_ATTACHED; + break; + } + + cfil_rw_unlock_exclusive(&cfil_lck_rw); + + return (attached); +} + +static void +cfil_info_free(struct socket *so, struct cfil_info *cfil_info) +{ + int kcunit; + uint64_t in_drain = 0; + uint64_t out_drained = 0; + + so->so_cfil = NULL; + + if (so->so_flags & SOF_CONTENT_FILTER) { + so->so_flags &= ~SOF_CONTENT_FILTER; + so->so_usecount--; + } + if (cfil_info == NULL) + return; + + CFIL_LOG(LOG_INFO, ""); + + cfil_rw_lock_exclusive(&cfil_lck_rw); + + for (kcunit = 1; + content_filters != NULL && kcunit <= MAX_CONTENT_FILTER; + kcunit++) { + struct cfil_entry *entry; + struct content_filter *cfc; + + entry = &cfil_info->cfi_entries[kcunit - 1]; + + /* Don't be silly and try to detach twice */ + if (entry->cfe_filter == NULL) + continue; + + cfc = content_filters[kcunit - 1]; + + VERIFY(cfc == entry->cfe_filter); + + entry->cfe_filter = NULL; + entry->cfe_necp_control_unit = 0; + TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link); + cfc->cf_sock_count--; + + verify_content_filter(cfc); + } + cfil_sock_attached_count--; + TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link); + + out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q); + in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q); + + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + struct cfil_entry *entry; + + entry = &cfil_info->cfi_entries[kcunit - 1]; + out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q); + in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q); + out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q); + in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q); + } + cfil_rw_unlock_exclusive(&cfil_lck_rw); + + if (out_drained) + OSIncrementAtomic(&cfil_stats.cfs_flush_out_free); + if (in_drain) + OSIncrementAtomic(&cfil_stats.cfs_flush_in_free); + + zfree(cfil_info_zone, cfil_info); +} + +/* + * Entry point from Sockets layer + * The socket is locked. + */ +errno_t +cfil_sock_attach(struct socket *so) +{ + errno_t error = 0; + uint32_t filter_control_unit; + + socket_lock_assert_owned(so); + + /* Limit ourselves to TCP */ + if ((so->so_proto->pr_domain->dom_family != PF_INET && + so->so_proto->pr_domain->dom_family != PF_INET6) || + so->so_proto->pr_type != SOCK_STREAM || + so->so_proto->pr_protocol != IPPROTO_TCP) + goto done; + + filter_control_unit = necp_socket_get_content_filter_control_unit(so); + if (filter_control_unit == 0) + goto done; + + if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) { + OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only); + goto done; + } + if (cfil_active_count == 0) { + OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain); + goto done; + } + if (so->so_cfil != NULL) { + OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already); + CFIL_LOG(LOG_ERR, "already attached"); + } else { + cfil_info_alloc(so); + if (so->so_cfil == NULL) { + error = ENOMEM; + OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem); + goto done; + } + } + if (cfil_info_attach_unit(so, filter_control_unit) == 0) { + CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed", + filter_control_unit); + OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed); + goto done; + } + CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockid %llx", + (uint64_t)VM_KERNEL_ADDRPERM(so), + filter_control_unit, so->so_cfil->cfi_sock_id); + + so->so_flags |= SOF_CONTENT_FILTER; + OSIncrementAtomic(&cfil_stats.cfs_sock_attached); + + /* Hold a reference on the socket */ + so->so_usecount++; + + error = cfil_dispatch_attach_event(so, filter_control_unit); + /* We can recover from flow control or out of memory errors */ + if (error == ENOBUFS || error == ENOMEM) + error = 0; + else if (error != 0) + goto done; + + CFIL_INFO_VERIFY(so->so_cfil); +done: + return (error); +} + +/* + * Entry point from Sockets layer + * The socket is locked. + */ +errno_t +cfil_sock_detach(struct socket *so) +{ + if (so->so_cfil) { + cfil_info_free(so, so->so_cfil); + OSIncrementAtomic(&cfil_stats.cfs_sock_detached); + } + return (0); +} + +static int +cfil_dispatch_attach_event(struct socket *so, uint32_t filter_control_unit) +{ + errno_t error = 0; + struct cfil_entry *entry = NULL; + struct cfil_msg_sock_attached msg_attached; + uint32_t kcunit; + struct content_filter *cfc; + + socket_lock_assert_owned(so); + + cfil_rw_lock_shared(&cfil_lck_rw); + + if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) { + error = EINVAL; + goto done; + } + /* + * Find the matching filter unit + */ + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + cfc = content_filters[kcunit - 1]; + + if (cfc == NULL) + continue; + if (cfc->cf_necp_control_unit != filter_control_unit) + continue; + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + if (entry->cfe_filter == NULL) + continue; + + VERIFY(cfc == entry->cfe_filter); + + break; + } + + if (entry == NULL || entry->cfe_filter == NULL) + goto done; + + if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) + goto done; + + CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u", + (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit); + + /* Would be wasteful to try when flow controlled */ + if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { + error = ENOBUFS; + goto done; + } + + bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached)); + msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached); + msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT; + msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT; + msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED; + msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; + + msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family; + msg_attached.cfs_sock_type = so->so_proto->pr_type; + msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol; + msg_attached.cfs_pid = so->last_pid; + memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t)); + if (so->so_flags & SOF_DELEGATED) { + msg_attached.cfs_e_pid = so->e_pid; + memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t)); + } else { + msg_attached.cfs_e_pid = so->last_pid; + memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t)); + } + error = ctl_enqueuedata(entry->cfe_filter->cf_kcref, + entry->cfe_filter->cf_kcunit, + &msg_attached, + sizeof(struct cfil_msg_sock_attached), + CTL_DATA_EOR); + if (error != 0) { + CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error); + goto done; + } + microuptime(&entry->cfe_last_event); + entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED; + OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok); +done: + + /* We can recover from flow control */ + if (error == ENOBUFS) { + entry->cfe_flags |= CFEF_FLOW_CONTROLLED; + OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control); + + if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) + cfil_rw_lock_exclusive(&cfil_lck_rw); + + cfc->cf_flags |= CFF_FLOW_CONTROLLED; + + cfil_rw_unlock_exclusive(&cfil_lck_rw); + } else { + if (error != 0) + OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail); + + cfil_rw_unlock_shared(&cfil_lck_rw); + } + return (error); +} + +static int +cfil_dispatch_disconnect_event(struct socket *so, uint32_t kcunit, int outgoing) +{ + errno_t error = 0; + struct mbuf *msg = NULL; + struct cfil_entry *entry; + struct cfe_buf *entrybuf; + struct cfil_msg_hdr msg_disconnected; + struct content_filter *cfc; + + socket_lock_assert_owned(so); + + cfil_rw_lock_shared(&cfil_lck_rw); + + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + if (outgoing) + entrybuf = &entry->cfe_snd; + else + entrybuf = &entry->cfe_rcv; + + cfc = entry->cfe_filter; + if (cfc == NULL) + goto done; + + CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); + + /* + * Send the disconnection event once + */ + if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) || + (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) { + CFIL_LOG(LOG_INFO, "so %llx disconnect already sent", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + goto done; + } + + /* + * We're not disconnected as long as some data is waiting + * to be delivered to the filter + */ + if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) { + CFIL_LOG(LOG_INFO, "so %llx control queue not empty", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = EBUSY; + goto done; + } + /* Would be wasteful to try when flow controlled */ + if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { + error = ENOBUFS; + goto done; + } + + bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr)); + msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr); + msg_disconnected.cfm_version = CFM_VERSION_CURRENT; + msg_disconnected.cfm_type = CFM_TYPE_EVENT; + msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT : + CFM_OP_DISCONNECT_IN; + msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; + error = ctl_enqueuedata(entry->cfe_filter->cf_kcref, + entry->cfe_filter->cf_kcunit, + &msg_disconnected, + sizeof(struct cfil_msg_hdr), + CTL_DATA_EOR); + if (error != 0) { + CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error); + mbuf_freem(msg); + goto done; + } + microuptime(&entry->cfe_last_event); + + /* Remember we have sent the disconnection message */ + if (outgoing) { + entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT; + OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok); + } else { + entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN; + OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok); + } +done: + if (error == ENOBUFS) { + entry->cfe_flags |= CFEF_FLOW_CONTROLLED; + OSIncrementAtomic( + &cfil_stats.cfs_disconnect_event_flow_control); + + if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) + cfil_rw_lock_exclusive(&cfil_lck_rw); + + cfc->cf_flags |= CFF_FLOW_CONTROLLED; + + cfil_rw_unlock_exclusive(&cfil_lck_rw); + } else { + if (error != 0) + OSIncrementAtomic( + &cfil_stats.cfs_disconnect_event_fail); + + cfil_rw_unlock_shared(&cfil_lck_rw); + } + return (error); +} + +int +cfil_dispatch_closed_event(struct socket *so, int kcunit) +{ + struct cfil_entry *entry; + struct cfil_msg_hdr msg_closed; + errno_t error = 0; + struct content_filter *cfc; + + socket_lock_assert_owned(so); + + cfil_rw_lock_shared(&cfil_lck_rw); + + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + cfc = entry->cfe_filter; + if (cfc == NULL) + goto done; + + CFIL_LOG(LOG_INFO, "so %llx kcunit %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); + + /* Would be wasteful to try when flow controlled */ + if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { + error = ENOBUFS; + goto done; + } + /* + * Send a single closed message per filter + */ + if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) + goto done; + if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) + goto done; + + bzero(&msg_closed, sizeof(struct cfil_msg_hdr)); + msg_closed.cfm_len = sizeof(struct cfil_msg_hdr); + msg_closed.cfm_version = CFM_VERSION_CURRENT; + msg_closed.cfm_type = CFM_TYPE_EVENT; + msg_closed.cfm_op = CFM_OP_SOCKET_CLOSED; + msg_closed.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; + error = ctl_enqueuedata(entry->cfe_filter->cf_kcref, + entry->cfe_filter->cf_kcunit, + &msg_closed, + sizeof(struct cfil_msg_hdr), + CTL_DATA_EOR); + if (error != 0) { + CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", + error); + goto done; + } + microuptime(&entry->cfe_last_event); + entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED; + OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok); +done: + /* We can recover from flow control */ + if (error == ENOBUFS) { + entry->cfe_flags |= CFEF_FLOW_CONTROLLED; + OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control); + + if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) + cfil_rw_lock_exclusive(&cfil_lck_rw); + + cfc->cf_flags |= CFF_FLOW_CONTROLLED; + + cfil_rw_unlock_exclusive(&cfil_lck_rw); + } else { + if (error != 0) + OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail); + + cfil_rw_unlock_shared(&cfil_lck_rw); + } + + return (error); +} + +static void +fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46, + struct in6_addr *ip6, u_int16_t port) +{ + struct sockaddr_in6 *sin6 = &sin46->sin6; + + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_port = port; + sin6->sin6_addr = *ip6; + if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) { + sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); + sin6->sin6_addr.s6_addr16[1] = 0; + } +} + +static void +fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46, + struct in_addr ip, u_int16_t port) +{ + struct sockaddr_in *sin = &sin46->sin; + + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_port = port; + sin->sin_addr.s_addr = ip.s_addr; +} + +static int +cfil_dispatch_data_event(struct socket *so, uint32_t kcunit, int outgoing, + struct mbuf *data, unsigned int copyoffset, unsigned int copylen) +{ + errno_t error = 0; + struct mbuf *copy = NULL; + struct mbuf *msg = NULL; + unsigned int one = 1; + struct cfil_msg_data_event *data_req; + size_t hdrsize; + struct inpcb *inp = (struct inpcb *)so->so_pcb; + struct cfil_entry *entry; + struct cfe_buf *entrybuf; + struct content_filter *cfc; + + cfil_rw_lock_shared(&cfil_lck_rw); + + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + if (outgoing) + entrybuf = &entry->cfe_snd; + else + entrybuf = &entry->cfe_rcv; + + cfc = entry->cfe_filter; + if (cfc == NULL) + goto done; + + CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); + + socket_lock_assert_owned(so); + + /* Would be wasteful to try */ + if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { + error = ENOBUFS; + goto done; + } + + /* Make a copy of the data to pass to kernel control socket */ + copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT, + M_COPYM_NOOP_HDR); + if (copy == NULL) { + CFIL_LOG(LOG_ERR, "m_copym_mode() failed"); + error = ENOMEM; + goto done; + } + + /* We need an mbuf packet for the message header */ + hdrsize = sizeof(struct cfil_msg_data_event); + error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg); + if (error != 0) { + CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed"); + m_freem(copy); + /* + * ENOBUFS is to indicate flow control + */ + error = ENOMEM; + goto done; + } + mbuf_setlen(msg, hdrsize); + mbuf_pkthdr_setlen(msg, hdrsize + copylen); + msg->m_next = copy; + data_req = (struct cfil_msg_data_event *)mbuf_data(msg); + bzero(data_req, hdrsize); + data_req->cfd_msghdr.cfm_len = hdrsize + copylen; + data_req->cfd_msghdr.cfm_version = 1; + data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT; + data_req->cfd_msghdr.cfm_op = + outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN; + data_req->cfd_msghdr.cfm_sock_id = + entry->cfe_cfil_info->cfi_sock_id; + data_req->cfd_start_offset = entrybuf->cfe_peeked; + data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen; + + /* + * TBD: + * For non connected sockets need to copy addresses from passed + * parameters + */ + if (inp->inp_vflag & INP_IPV6) { + if (outgoing) { + fill_ip6_sockaddr_4_6(&data_req->cfc_src, + &inp->in6p_laddr, inp->inp_lport); + fill_ip6_sockaddr_4_6(&data_req->cfc_dst, + &inp->in6p_faddr, inp->inp_fport); + } else { + fill_ip6_sockaddr_4_6(&data_req->cfc_src, + &inp->in6p_faddr, inp->inp_fport); + fill_ip6_sockaddr_4_6(&data_req->cfc_dst, + &inp->in6p_laddr, inp->inp_lport); + } + } else if (inp->inp_vflag & INP_IPV4) { + if (outgoing) { + fill_ip_sockaddr_4_6(&data_req->cfc_src, + inp->inp_laddr, inp->inp_lport); + fill_ip_sockaddr_4_6(&data_req->cfc_dst, + inp->inp_faddr, inp->inp_fport); + } else { + fill_ip_sockaddr_4_6(&data_req->cfc_src, + inp->inp_faddr, inp->inp_fport); + fill_ip_sockaddr_4_6(&data_req->cfc_dst, + inp->inp_laddr, inp->inp_lport); + } + } + + /* Pass the message to the content filter */ + error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref, + entry->cfe_filter->cf_kcunit, + msg, CTL_DATA_EOR); + if (error != 0) { + CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error); + mbuf_freem(msg); + goto done; + } + entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED; + OSIncrementAtomic(&cfil_stats.cfs_data_event_ok); +done: + if (error == ENOBUFS) { + entry->cfe_flags |= CFEF_FLOW_CONTROLLED; + OSIncrementAtomic( + &cfil_stats.cfs_data_event_flow_control); + + if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) + cfil_rw_lock_exclusive(&cfil_lck_rw); + + cfc->cf_flags |= CFF_FLOW_CONTROLLED; + + cfil_rw_unlock_exclusive(&cfil_lck_rw); + } else { + if (error != 0) + OSIncrementAtomic(&cfil_stats.cfs_data_event_fail); + + cfil_rw_unlock_shared(&cfil_lck_rw); + } + return (error); +} + +/* + * Process the queue of data waiting to be delivered to content filter + */ +static int +cfil_data_service_ctl_q(struct socket *so, uint32_t kcunit, int outgoing) +{ + errno_t error = 0; + struct mbuf *data, *tmp = NULL; + unsigned int datalen = 0, copylen = 0, copyoffset = 0; + struct cfil_entry *entry; + struct cfe_buf *entrybuf; + uint64_t currentoffset = 0; + + if (so->so_cfil == NULL) + return (0); + + CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); + + socket_lock_assert_owned(so); + + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + if (outgoing) + entrybuf = &entry->cfe_snd; + else + entrybuf = &entry->cfe_rcv; + + /* Send attached message if not yet done */ + if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) { + error = cfil_dispatch_attach_event(so, kcunit); + if (error != 0) { + /* We can recover from flow control */ + if (error == ENOBUFS || error == ENOMEM) + error = 0; + goto done; + } + } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) { + OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started); + goto done; + } + CFIL_LOG(LOG_DEBUG, "pass_offset %llu peeked %llu peek_offset %llu", + entrybuf->cfe_pass_offset, + entrybuf->cfe_peeked, + entrybuf->cfe_peek_offset); + + /* Move all data that can pass */ + while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL && + entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) { + datalen = cfil_data_length(data, NULL); + tmp = data; + + if (entrybuf->cfe_ctl_q.q_start + datalen <= + entrybuf->cfe_pass_offset) { + /* + * The first mbuf can fully pass + */ + copylen = datalen; + } else { + /* + * The first mbuf can partially pass + */ + copylen = entrybuf->cfe_pass_offset - + entrybuf->cfe_ctl_q.q_start; + } + VERIFY(copylen <= datalen); + + CFIL_LOG(LOG_DEBUG, + "%llx first %llu peeked %llu pass %llu peek %llu" + "datalen %u copylen %u", + (uint64_t)VM_KERNEL_ADDRPERM(tmp), + entrybuf->cfe_ctl_q.q_start, + entrybuf->cfe_peeked, + entrybuf->cfe_pass_offset, + entrybuf->cfe_peek_offset, + datalen, copylen); + + /* + * Data that passes has been peeked at explicitly or + * implicitly + */ + if (entrybuf->cfe_ctl_q.q_start + copylen > + entrybuf->cfe_peeked) + entrybuf->cfe_peeked = + entrybuf->cfe_ctl_q.q_start + copylen; + /* + * Stop on partial pass + */ + if (copylen < datalen) + break; + + /* All good, move full data from ctl queue to pending queue */ + cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen); + + cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen); + if (outgoing) + OSAddAtomic64(datalen, + &cfil_stats.cfs_pending_q_out_enqueued); + else + OSAddAtomic64(datalen, + &cfil_stats.cfs_pending_q_in_enqueued); + } + CFIL_INFO_VERIFY(so->so_cfil); + if (tmp != NULL) + CFIL_LOG(LOG_DEBUG, + "%llx first %llu peeked %llu pass %llu peek %llu" + "datalen %u copylen %u", + (uint64_t)VM_KERNEL_ADDRPERM(tmp), + entrybuf->cfe_ctl_q.q_start, + entrybuf->cfe_peeked, + entrybuf->cfe_pass_offset, + entrybuf->cfe_peek_offset, + datalen, copylen); + tmp = NULL; + + /* Now deal with remaining data the filter wants to peek at */ + for (data = cfil_queue_first(&entrybuf->cfe_ctl_q), + currentoffset = entrybuf->cfe_ctl_q.q_start; + data != NULL && currentoffset < entrybuf->cfe_peek_offset; + data = cfil_queue_next(&entrybuf->cfe_ctl_q, data), + currentoffset += datalen) { + datalen = cfil_data_length(data, NULL); + tmp = data; + + /* We've already peeked at this mbuf */ + if (currentoffset + datalen <= entrybuf->cfe_peeked) + continue; + /* + * The data in the first mbuf may have been + * partially peeked at + */ + copyoffset = entrybuf->cfe_peeked - currentoffset; + VERIFY(copyoffset < datalen); + copylen = datalen - copyoffset; + VERIFY(copylen <= datalen); + /* + * Do not copy more than needed + */ + if (currentoffset + copyoffset + copylen > + entrybuf->cfe_peek_offset) { + copylen = entrybuf->cfe_peek_offset - + (currentoffset + copyoffset); + } + + CFIL_LOG(LOG_DEBUG, + "%llx current %llu peeked %llu pass %llu peek %llu" + "datalen %u copylen %u copyoffset %u", + (uint64_t)VM_KERNEL_ADDRPERM(tmp), + currentoffset, + entrybuf->cfe_peeked, + entrybuf->cfe_pass_offset, + entrybuf->cfe_peek_offset, + datalen, copylen, copyoffset); + + /* + * Stop if there is nothing more to peek at + */ + if (copylen == 0) + break; + /* + * Let the filter get a peek at this span of data + */ + error = cfil_dispatch_data_event(so, kcunit, + outgoing, data, copyoffset, copylen); + if (error != 0) { + /* On error, leave data in ctl_q */ + break; + } + entrybuf->cfe_peeked += copylen; + if (outgoing) + OSAddAtomic64(copylen, + &cfil_stats.cfs_ctl_q_out_peeked); + else + OSAddAtomic64(copylen, + &cfil_stats.cfs_ctl_q_in_peeked); + + /* Stop when data could not be fully peeked at */ + if (copylen + copyoffset < datalen) + break; + } + CFIL_INFO_VERIFY(so->so_cfil); + if (tmp != NULL) + CFIL_LOG(LOG_DEBUG, + "%llx first %llu peeked %llu pass %llu peek %llu" + "datalen %u copylen %u copyoffset %u", + (uint64_t)VM_KERNEL_ADDRPERM(tmp), + currentoffset, + entrybuf->cfe_peeked, + entrybuf->cfe_pass_offset, + entrybuf->cfe_peek_offset, + datalen, copylen, copyoffset); + + /* + * Process data that has passed the filter + */ + error = cfil_service_pending_queue(so, kcunit, outgoing); + if (error != 0) { + CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d", + error); + goto done; + } + + /* + * Dispatch disconnect events that could not be sent + */ + if (so->so_cfil == NULL) + goto done; + else if (outgoing) { + if ((so->so_cfil->cfi_flags & CFIF_SHUT_WR) && + !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) + cfil_dispatch_disconnect_event(so, kcunit, 1); + } else { + if ((so->so_cfil->cfi_flags & CFIF_SHUT_RD) && + !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) + cfil_dispatch_disconnect_event(so, kcunit, 0); + } + +done: + CFIL_LOG(LOG_DEBUG, + "first %llu peeked %llu pass %llu peek %llu", + entrybuf->cfe_ctl_q.q_start, + entrybuf->cfe_peeked, + entrybuf->cfe_pass_offset, + entrybuf->cfe_peek_offset); + + CFIL_INFO_VERIFY(so->so_cfil); + return (error); +} + +/* + * cfil_data_filter() + * + * Process data for a content filter installed on a socket + */ +int +cfil_data_filter(struct socket *so, uint32_t kcunit, int outgoing, + struct mbuf *data, uint64_t datalen) +{ + errno_t error = 0; + struct cfil_entry *entry; + struct cfe_buf *entrybuf; + + CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); + + socket_lock_assert_owned(so); + + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + if (outgoing) + entrybuf = &entry->cfe_snd; + else + entrybuf = &entry->cfe_rcv; + + /* Are we attached to the filter? */ + if (entry->cfe_filter == NULL) { + error = 0; + goto done; + } + + /* Dispatch to filters */ + cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen); + if (outgoing) + OSAddAtomic64(datalen, + &cfil_stats.cfs_ctl_q_out_enqueued); + else + OSAddAtomic64(datalen, + &cfil_stats.cfs_ctl_q_in_enqueued); + + error = cfil_data_service_ctl_q(so, kcunit, outgoing); + if (error != 0) { + CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d", + error); + } + /* + * We have to return EJUSTRETURN in all cases to avoid double free + * by socket layer + */ + error = EJUSTRETURN; +done: + CFIL_INFO_VERIFY(so->so_cfil); + + CFIL_LOG(LOG_INFO, "return %d", error); + return (error); +} + +/* + * cfil_service_inject_queue() re-inject data that passed the + * content filters + */ +static int +cfil_service_inject_queue(struct socket *so, int outgoing) +{ + mbuf_t data; + unsigned int datalen; + int mbcnt; + unsigned int copylen; + errno_t error = 0; + struct mbuf *copy = NULL; + struct cfi_buf *cfi_buf; + struct cfil_queue *inject_q; + int need_rwakeup = 0; + + if (so->so_cfil == NULL) + return (0); + + CFIL_LOG(LOG_INFO, "so %llx outgoing %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing); + + socket_lock_assert_owned(so); + + if (outgoing) { + cfi_buf = &so->so_cfil->cfi_snd; + so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_OUT; + } else { + cfi_buf = &so->so_cfil->cfi_rcv; + so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_IN; + } + inject_q = &cfi_buf->cfi_inject_q; + + while ((data = cfil_queue_first(inject_q)) != NULL) { + datalen = cfil_data_length(data, &mbcnt); + + CFIL_LOG(LOG_INFO, "data %llx datalen %u", + (uint64_t)VM_KERNEL_ADDRPERM(data), datalen); + + /* Make a copy in case of injection error */ + copy = m_copym_mode(data, 0, M_COPYALL, M_DONTWAIT, + M_COPYM_COPY_HDR); + if (copy == NULL) { + CFIL_LOG(LOG_ERR, "m_copym_mode() failed"); + error = ENOMEM; + break; + } + + if ((copylen = m_length(copy)) != datalen) + panic("%s so %p copylen %d != datalen %d", + __func__, so, copylen, datalen); + + if (outgoing) { + socket_unlock(so, 0); + + /* + * Set both DONTWAIT and NBIO flags are we really + * do not want to block + */ + error = sosend(so, NULL, NULL, + copy, NULL, + MSG_SKIPCFIL | MSG_DONTWAIT | MSG_NBIO); + + socket_lock(so, 0); + + if (error != 0) { + CFIL_LOG(LOG_ERR, "sosend() failed %d", + error); + } + } else { + copy->m_flags |= M_SKIPCFIL; + + /* + * NOTE: + * This work only because we support plain TCP + * For UDP, RAWIP, MPTCP and message TCP we'll + * need to call the appropriate sbappendxxx() + * of fix sock_inject_data_in() + */ + if (sbappendstream(&so->so_rcv, copy)) + need_rwakeup = 1; + } + + /* Need to reassess if filter is still attached after unlock */ + if (so->so_cfil == NULL) { + CFIL_LOG(LOG_ERR, "so %llx cfil detached", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + OSIncrementAtomic(&cfil_stats.cfs_inject_q_detached); + error = 0; + break; + } + if (error != 0) + break; + + /* Injection successful */ + cfil_queue_remove(inject_q, data, datalen); + mbuf_freem(data); + + cfi_buf->cfi_pending_first += datalen; + cfi_buf->cfi_pending_mbcnt -= mbcnt; + cfil_info_buf_verify(cfi_buf); + + if (outgoing) + OSAddAtomic64(datalen, + &cfil_stats.cfs_inject_q_out_passed); + else + OSAddAtomic64(datalen, + &cfil_stats.cfs_inject_q_in_passed); + } + + /* A single wakeup is for several packets is more efficient */ + if (need_rwakeup) + sorwakeup(so); + + if (error != 0 && so->so_cfil) { + if (error == ENOBUFS) + OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs); + if (error == ENOMEM) + OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem); + + if (outgoing) { + so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_OUT; + OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail); + } else { + so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_IN; + OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail); + } + } + + /* + * Notify + */ + if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_SHUT_WR)) { + cfil_sock_notify_shutdown(so, SHUT_WR); + if (cfil_sock_data_pending(&so->so_snd) == 0) + soshutdownlock_final(so, SHUT_WR); + } + if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) { + if (cfil_filters_attached(so) == 0) { + CFIL_LOG(LOG_INFO, "so %llx waking", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + wakeup((caddr_t)&so->so_cfil); + } + } + + CFIL_INFO_VERIFY(so->so_cfil); + + return (error); +} + +static int +cfil_service_pending_queue(struct socket *so, uint32_t kcunit, int outgoing) +{ + uint64_t passlen, curlen; + mbuf_t data; + unsigned int datalen; + errno_t error = 0; + struct cfil_entry *entry; + struct cfe_buf *entrybuf; + struct cfil_queue *pending_q; + + CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); + + socket_lock_assert_owned(so); + + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + if (outgoing) + entrybuf = &entry->cfe_snd; + else + entrybuf = &entry->cfe_rcv; + + pending_q = &entrybuf->cfe_pending_q; + + passlen = entrybuf->cfe_pass_offset - pending_q->q_start; + + /* + * Locate the chunks of data that we can pass to the next filter + * A data chunk must be on mbuf boundaries + */ + curlen = 0; + while ((data = cfil_queue_first(pending_q)) != NULL) { + datalen = cfil_data_length(data, NULL); + + CFIL_LOG(LOG_INFO, + "data %llx datalen %u passlen %llu curlen %llu", + (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, + passlen, curlen); + + if (curlen + datalen > passlen) + break; + + cfil_queue_remove(pending_q, data, datalen); + + curlen += datalen; + + for (kcunit += 1; + kcunit <= MAX_CONTENT_FILTER; + kcunit++) { + error = cfil_data_filter(so, kcunit, outgoing, + data, datalen); + /* 0 means passed so we can continue */ + if (error != 0) + break; + } + /* When data has passed all filters, re-inject */ + if (error == 0) { + if (outgoing) { + cfil_queue_enqueue( + &so->so_cfil->cfi_snd.cfi_inject_q, + data, datalen); + OSAddAtomic64(datalen, + &cfil_stats.cfs_inject_q_out_enqueued); + } else { + cfil_queue_enqueue( + &so->so_cfil->cfi_rcv.cfi_inject_q, + data, datalen); + OSAddAtomic64(datalen, + &cfil_stats.cfs_inject_q_in_enqueued); + } + } + } + + CFIL_INFO_VERIFY(so->so_cfil); + + return (error); +} + +int +cfil_update_data_offsets(struct socket *so, uint32_t kcunit, int outgoing, + uint64_t pass_offset, uint64_t peek_offset) +{ + errno_t error = 0; + struct cfil_entry *entry; + struct cfe_buf *entrybuf; + int updated = 0; + + CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset); + + socket_lock_assert_owned(so); + + if (so->so_cfil == NULL) { + CFIL_LOG(LOG_ERR, "so %llx cfil detached", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = 0; + goto done; + } else if (so->so_cfil->cfi_flags & CFIF_DROP) { + CFIL_LOG(LOG_ERR, "so %llx drop set", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = EPIPE; + goto done; + } + + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + if (outgoing) + entrybuf = &entry->cfe_snd; + else + entrybuf = &entry->cfe_rcv; + + /* Record updated offsets for this content filter */ + if (pass_offset > entrybuf->cfe_pass_offset) { + entrybuf->cfe_pass_offset = pass_offset; + + if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) + entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset; + updated = 1; + } else { + CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu", + pass_offset, entrybuf->cfe_pass_offset); + } + /* Filter does not want or need to see data that's allowed to pass */ + if (peek_offset > entrybuf->cfe_pass_offset && + peek_offset > entrybuf->cfe_peek_offset) { + entrybuf->cfe_peek_offset = peek_offset; + updated = 1; + } + /* Nothing to do */ + if (updated == 0) + goto done; + + /* Move data held in control queue to pending queue if needed */ + error = cfil_data_service_ctl_q(so, kcunit, outgoing); + if (error != 0) { + CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d", + error); + goto done; + } + error = EJUSTRETURN; + +done: + /* + * The filter is effectively detached when pass all from both sides + * or when the socket is closed and no more data is waiting + * to be delivered to the filter + */ + if (so->so_cfil != NULL && + ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET && + entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) || + ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && + cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) && + cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) { + entry->cfe_flags |= CFEF_CFIL_DETACHED; + CFIL_LOG(LOG_INFO, "so %llx detached %u", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); + if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && + cfil_filters_attached(so) == 0) { + CFIL_LOG(LOG_INFO, "so %llx waking", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + wakeup((caddr_t)&so->so_cfil); + } + } + CFIL_INFO_VERIFY(so->so_cfil); + CFIL_LOG(LOG_INFO, "return %d", error); + return (error); +} + +/* + * Update pass offset for socket when no data is pending + */ +static int +cfil_set_socket_pass_offset(struct socket *so, int outgoing) +{ + struct cfi_buf *cfi_buf; + struct cfil_entry *entry; + struct cfe_buf *entrybuf; + uint32_t kcunit; + uint64_t pass_offset = 0; + + if (so->so_cfil == NULL) + return (0); + + CFIL_LOG(LOG_INFO, "so %llx outgoing %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing); + + socket_lock_assert_owned(so); + + if (outgoing) + cfi_buf = &so->so_cfil->cfi_snd; + else + cfi_buf = &so->so_cfil->cfi_rcv; + + if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) { + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + + /* Are we attached to a filter? */ + if (entry->cfe_filter == NULL) + continue; + + if (outgoing) + entrybuf = &entry->cfe_snd; + else + entrybuf = &entry->cfe_rcv; + + if (pass_offset == 0 || + entrybuf->cfe_pass_offset < pass_offset) + pass_offset = entrybuf->cfe_pass_offset; + } + cfi_buf->cfi_pass_offset = pass_offset; + } + + return (0); +} + +int +cfil_action_data_pass(struct socket *so, uint32_t kcunit, int outgoing, + uint64_t pass_offset, uint64_t peek_offset) +{ + errno_t error = 0; + + CFIL_LOG(LOG_INFO, ""); + + socket_lock_assert_owned(so); + + error = cfil_acquire_sockbuf(so, outgoing); + if (error != 0) { + CFIL_LOG(LOG_INFO, "so %llx %s dropped", + (uint64_t)VM_KERNEL_ADDRPERM(so), + outgoing ? "out" : "in"); + goto release; + } + + error = cfil_update_data_offsets(so, kcunit, outgoing, + pass_offset, peek_offset); + + cfil_service_inject_queue(so, outgoing); + + cfil_set_socket_pass_offset(so, outgoing); +release: + CFIL_INFO_VERIFY(so->so_cfil); + cfil_release_sockbuf(so, outgoing); + + return (error); +} + + +static void +cfil_flush_queues(struct socket *so) +{ + struct cfil_entry *entry; + int kcunit; + uint64_t drained; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + goto done; + + socket_lock_assert_owned(so); + + /* + * Flush the output queues and ignore errors as long as + * we are attached + */ + (void) cfil_acquire_sockbuf(so, 1); + if (so->so_cfil != NULL) { + drained = 0; + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + + drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q); + drained += cfil_queue_drain( + &entry->cfe_snd.cfe_pending_q); + } + drained += cfil_queue_drain(&so->so_cfil->cfi_snd.cfi_inject_q); + if (drained) { + if (so->so_cfil->cfi_flags & CFIF_DROP) + OSIncrementAtomic( + &cfil_stats.cfs_flush_out_drop); + else + OSIncrementAtomic( + &cfil_stats.cfs_flush_out_close); + } + } + cfil_release_sockbuf(so, 1); + + /* + * Flush the input queues + */ + (void) cfil_acquire_sockbuf(so, 0); + if (so->so_cfil != NULL) { + drained = 0; + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + + drained += cfil_queue_drain( + &entry->cfe_rcv.cfe_ctl_q); + drained += cfil_queue_drain( + &entry->cfe_rcv.cfe_pending_q); + } + drained += cfil_queue_drain(&so->so_cfil->cfi_rcv.cfi_inject_q); + if (drained) { + if (so->so_cfil->cfi_flags & CFIF_DROP) + OSIncrementAtomic( + &cfil_stats.cfs_flush_in_drop); + else + OSIncrementAtomic( + &cfil_stats.cfs_flush_in_close); + } + } + cfil_release_sockbuf(so, 0); +done: + CFIL_INFO_VERIFY(so->so_cfil); +} + +int +cfil_action_drop(struct socket *so, uint32_t kcunit) +{ + errno_t error = 0; + struct cfil_entry *entry; + struct proc *p; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + goto done; + + socket_lock_assert_owned(so); + + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + + /* Are we attached to the filter? */ + if (entry->cfe_filter == NULL) + goto done; + + so->so_cfil->cfi_flags |= CFIF_DROP; + + p = current_proc(); + + /* Force the socket to be marked defunct */ + error = sosetdefunct(p, so, + SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, 1); + + /* Flush the socket buffer and disconnect */ + if (error == 0) + error = sodefunct(p, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL); + + /* The filter is done, mark as detached */ + entry->cfe_flags |= CFEF_CFIL_DETACHED; + CFIL_LOG(LOG_INFO, "so %llx detached %u", + (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); + + /* Pending data needs to go */ + cfil_flush_queues(so); + + if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) { + if (cfil_filters_attached(so) == 0) { + CFIL_LOG(LOG_INFO, "so %llx waking", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + wakeup((caddr_t)&so->so_cfil); + } + } +done: + return (error); +} + +static int +cfil_update_entry_offsets(struct socket *so, int outgoing, unsigned int datalen) +{ + struct cfil_entry *entry; + struct cfe_buf *entrybuf; + uint32_t kcunit; + + CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u", + (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen); + + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + + /* Are we attached to the filter? */ + if (entry->cfe_filter == NULL) + continue; + + if (outgoing) + entrybuf = &entry->cfe_snd; + else + entrybuf = &entry->cfe_rcv; + + entrybuf->cfe_ctl_q.q_start += datalen; + entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start; + entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start; + if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) + entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset; + + entrybuf->cfe_ctl_q.q_end += datalen; + + entrybuf->cfe_pending_q.q_start += datalen; + entrybuf->cfe_pending_q.q_end += datalen; + } + CFIL_INFO_VERIFY(so->so_cfil); + return (0); +} + +int +cfil_data_common(struct socket *so, int outgoing, struct sockaddr *to, + struct mbuf *data, struct mbuf *control, uint32_t flags) +{ +#pragma unused(to, control, flags) + errno_t error = 0; + unsigned int datalen; + int mbcnt; + int kcunit; + struct cfi_buf *cfi_buf; + + if (so->so_cfil == NULL) { + CFIL_LOG(LOG_ERR, "so %llx cfil detached", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = 0; + goto done; + } else if (so->so_cfil->cfi_flags & CFIF_DROP) { + CFIL_LOG(LOG_ERR, "so %llx drop set", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + error = EPIPE; + goto done; + } + + datalen = cfil_data_length(data, &mbcnt); + + CFIL_LOG(LOG_INFO, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx", + (uint64_t)VM_KERNEL_ADDRPERM(so), + outgoing ? "out" : "in", + (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags, + (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt)); + + if (outgoing) + cfi_buf = &so->so_cfil->cfi_snd; + else + cfi_buf = &so->so_cfil->cfi_rcv; + + cfi_buf->cfi_pending_last += datalen; + cfi_buf->cfi_pending_mbcnt += mbcnt; + cfil_info_buf_verify(cfi_buf); + + CFIL_LOG(LOG_INFO, "so %llx cfi_pending_last %llu cfi_pass_offset %llu", + (uint64_t)VM_KERNEL_ADDRPERM(so), + cfi_buf->cfi_pending_last, + cfi_buf->cfi_pass_offset); + + /* Fast path when below pass offset */ + if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) { + cfil_update_entry_offsets(so, outgoing, datalen); + } else { + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + error = cfil_data_filter(so, kcunit, outgoing, data, + datalen); + /* 0 means passed so continue with next filter */ + if (error != 0) + break; + } + } + + /* Move cursor if no filter claimed the data */ + if (error == 0) { + cfi_buf->cfi_pending_first += datalen; + cfi_buf->cfi_pending_mbcnt -= mbcnt; + cfil_info_buf_verify(cfi_buf); + } +done: + CFIL_INFO_VERIFY(so->so_cfil); + + return (error); +} + +/* + * Callback from socket layer sosendxxx() + */ +int +cfil_sock_data_out(struct socket *so, struct sockaddr *to, + struct mbuf *data, struct mbuf *control, uint32_t flags) +{ + int error = 0; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + return (0); + + socket_lock_assert_owned(so); + + if (so->so_cfil->cfi_flags & CFIF_DROP) { + CFIL_LOG(LOG_ERR, "so %llx drop set", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + return (EPIPE); + } + if (control != NULL) { + CFIL_LOG(LOG_ERR, "so %llx control", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + OSIncrementAtomic(&cfil_stats.cfs_data_out_control); + } + if ((flags & MSG_OOB)) { + CFIL_LOG(LOG_ERR, "so %llx MSG_OOB", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + OSIncrementAtomic(&cfil_stats.cfs_data_out_oob); + } + if ((so->so_snd.sb_flags & SB_LOCK) == 0) + panic("so %p SB_LOCK not set", so); + + if (so->so_snd.sb_cfil_thread != NULL) + panic("%s sb_cfil_thread %p not NULL", __func__, + so->so_snd.sb_cfil_thread); + + error = cfil_data_common(so, 1, to, data, control, flags); + + return (error); +} + +/* + * Callback from socket layer sbappendxxx() + */ +int +cfil_sock_data_in(struct socket *so, struct sockaddr *from, + struct mbuf *data, struct mbuf *control, uint32_t flags) +{ + int error = 0; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + return (0); + + socket_lock_assert_owned(so); + + if (so->so_cfil->cfi_flags & CFIF_DROP) { + CFIL_LOG(LOG_ERR, "so %llx drop set", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + return (EPIPE); + } + if (control != NULL) { + CFIL_LOG(LOG_ERR, "so %llx control", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + OSIncrementAtomic(&cfil_stats.cfs_data_in_control); + } + if (data->m_type == MT_OOBDATA) { + CFIL_LOG(LOG_ERR, "so %llx MSG_OOB", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + OSIncrementAtomic(&cfil_stats.cfs_data_in_oob); + } + error = cfil_data_common(so, 0, from, data, control, flags); + + return (error); +} + +/* + * Callback from socket layer soshutdownxxx() + * + * We may delay the shutdown write if there's outgoing data in process. + * + * There is no point in delaying the shutdown read because the process + * indicated that it does not want to read anymore data. + */ +int +cfil_sock_shutdown(struct socket *so, int *how) +{ + int error = 0; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + goto done; + + socket_lock_assert_owned(so); + + CFIL_LOG(LOG_INFO, "so %llx how %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), *how); + + /* + * Check the state of the socket before the content filter + */ + if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) { + /* read already shut down */ + error = ENOTCONN; + goto done; + } + if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) { + /* write already shut down */ + error = ENOTCONN; + goto done; + } + + if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) { + CFIL_LOG(LOG_ERR, "so %llx drop set", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + goto done; + } + + /* + * shutdown read: SHUT_RD or SHUT_RDWR + */ + if (*how != SHUT_WR) { + if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) { + error = ENOTCONN; + goto done; + } + so->so_cfil->cfi_flags |= CFIF_SHUT_RD; + cfil_sock_notify_shutdown(so, SHUT_RD); + } + /* + * shutdown write: SHUT_WR or SHUT_RDWR + */ + if (*how != SHUT_RD) { + if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) { + error = ENOTCONN; + goto done; + } + so->so_cfil->cfi_flags |= CFIF_SHUT_WR; + cfil_sock_notify_shutdown(so, SHUT_WR); + /* + * When outgoing data is pending, we delay the shutdown at the + * protocol level until the content filters give the final + * verdict on the pending data. + */ + if (cfil_sock_data_pending(&so->so_snd) != 0) { + /* + * When shutting down the read and write sides at once + * we can proceed to the final shutdown of the read + * side. Otherwise, we just return. + */ + if (*how == SHUT_WR) { + error = EJUSTRETURN; + } else if (*how == SHUT_RDWR) { + *how = SHUT_RD; + } + } + } +done: + return (error); +} + +/* + * This is called when the socket is closed and there is no more + * opportunity for filtering + */ +void +cfil_sock_is_closed(struct socket *so) +{ + errno_t error = 0; + int kcunit; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + return; + + CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so)); + + socket_lock_assert_owned(so); + + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + /* Let the filters know of the closing */ + error = cfil_dispatch_closed_event(so, kcunit); + } + + /* Last chance to push passed data out */ + error = cfil_acquire_sockbuf(so, 1); + if (error == 0) + cfil_service_inject_queue(so, 1); + cfil_release_sockbuf(so, 1); + + so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED; + + /* Pending data needs to go */ + cfil_flush_queues(so); + + CFIL_INFO_VERIFY(so->so_cfil); +} + +/* + * This is called when the socket is disconnected so let the filters + * know about the disconnection and that no more data will come + * + * The how parameter has the same values as soshutown() + */ +void +cfil_sock_notify_shutdown(struct socket *so, int how) +{ + errno_t error = 0; + int kcunit; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + return; + + CFIL_LOG(LOG_INFO, "so %llx how %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), how); + + socket_lock_assert_owned(so); + + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + /* Disconnect incoming side */ + if (how != SHUT_WR) + error = cfil_dispatch_disconnect_event(so, kcunit, 0); + /* Disconnect outgoing side */ + if (how != SHUT_RD) + error = cfil_dispatch_disconnect_event(so, kcunit, 1); + } +} + +static int +cfil_filters_attached(struct socket *so) +{ + struct cfil_entry *entry; + uint32_t kcunit; + int attached = 0; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + return (0); + + socket_lock_assert_owned(so); + + for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { + entry = &so->so_cfil->cfi_entries[kcunit - 1]; + + /* Are we attached to the filter? */ + if (entry->cfe_filter == NULL) + continue; + if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) + continue; + if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) + continue; + attached = 1; + break; + } + + return (attached); +} + +/* + * This is called when the socket is closed and we are waiting for + * the filters to gives the final pass or drop + */ +void +cfil_sock_close_wait(struct socket *so) +{ + lck_mtx_t *mutex_held; + struct timespec ts; + int error; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + return; + + CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so)); + + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + + while (cfil_filters_attached(so)) { + /* + * Notify the filters we are going away so they can detach + */ + cfil_sock_notify_shutdown(so, SHUT_RDWR); + + /* + * Make sure we need to wait after the filter are notified + * of the disconnection + */ + if (cfil_filters_attached(so) == 0) + break; + + CFIL_LOG(LOG_INFO, "so %llx waiting", + (uint64_t)VM_KERNEL_ADDRPERM(so)); + + ts.tv_sec = cfil_close_wait_timeout / 1000; + ts.tv_nsec = (cfil_close_wait_timeout % 1000) * + NSEC_PER_USEC * 1000; + + OSIncrementAtomic(&cfil_stats.cfs_close_wait); + so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT; + error = msleep((caddr_t)&so->so_cfil, mutex_held, + PSOCK | PCATCH, "cfil_sock_close_wait", &ts); + so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT; + + CFIL_LOG(LOG_NOTICE, "so %llx timed out %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0)); + + /* + * Force close in case of timeout + */ + if (error != 0) { + OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout); + break; + } + } + +} + +/* + * Returns the size of the data held by the content filter by using + */ +int32_t +cfil_sock_data_pending(struct sockbuf *sb) +{ + struct socket *so = sb->sb_so; + uint64_t pending = 0; + + if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) { + struct cfi_buf *cfi_buf; + + socket_lock_assert_owned(so); + + if ((sb->sb_flags & SB_RECV) == 0) + cfi_buf = &so->so_cfil->cfi_snd; + else + cfi_buf = &so->so_cfil->cfi_rcv; + + pending = cfi_buf->cfi_pending_last - + cfi_buf->cfi_pending_first; + + /* + * If we are limited by the "chars of mbufs used" roughly + * adjust so we won't overcommit + */ + if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) + pending = cfi_buf->cfi_pending_mbcnt; + } + + VERIFY(pending < INT32_MAX); + + return (int32_t)(pending); +} + +/* + * Return the socket buffer space used by data being held by content filters + * so processes won't clog the socket buffer + */ +int32_t +cfil_sock_data_space(struct sockbuf *sb) +{ + struct socket *so = sb->sb_so; + uint64_t pending = 0; + + if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL && + so->so_snd.sb_cfil_thread != current_thread()) { + struct cfi_buf *cfi_buf; + + socket_lock_assert_owned(so); + + if ((sb->sb_flags & SB_RECV) == 0) + cfi_buf = &so->so_cfil->cfi_snd; + else + cfi_buf = &so->so_cfil->cfi_rcv; + + pending = cfi_buf->cfi_pending_last - + cfi_buf->cfi_pending_first; + + /* + * If we are limited by the "chars of mbufs used" roughly + * adjust so we won't overcommit + */ + if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) + pending = cfi_buf->cfi_pending_mbcnt; + } + + VERIFY(pending < INT32_MAX); + + return (int32_t)(pending); +} + +/* + * A callback from the socket and protocol layer when data becomes + * available in the socket buffer to give a chance for the content filter + * to re-inject data that was held back + */ +void +cfil_sock_buf_update(struct sockbuf *sb) +{ + int outgoing; + int error; + struct socket *so = sb->sb_so; + + if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) + return; + + if (!cfil_sbtrim) + return; + + socket_lock_assert_owned(so); + + if ((sb->sb_flags & SB_RECV) == 0) { + if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) + return; + outgoing = 1; + OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry); + } else { + if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) + return; + outgoing = 0; + OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry); + } + + CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d", + (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing); + + error = cfil_acquire_sockbuf(so, outgoing); + if (error == 0) + cfil_service_inject_queue(so, outgoing); + cfil_release_sockbuf(so, outgoing); +} + +int +sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2, + struct sysctl_req *req) +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + size_t len = 0; + u_int32_t i; + + /* Read only */ + if (req->newptr != USER_ADDR_NULL) + return (EPERM); + + cfil_rw_lock_shared(&cfil_lck_rw); + + for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) { + struct cfil_filter_stat filter_stat; + struct content_filter *cfc = content_filters[i]; + + if (cfc == NULL) + continue; + + /* If just asking for the size */ + if (req->oldptr == USER_ADDR_NULL) { + len += sizeof(struct cfil_filter_stat); + continue; + } + + bzero(&filter_stat, sizeof(struct cfil_filter_stat)); + filter_stat.cfs_len = sizeof(struct cfil_filter_stat); + filter_stat.cfs_filter_id = cfc->cf_kcunit; + filter_stat.cfs_flags = cfc->cf_flags; + filter_stat.cfs_sock_count = cfc->cf_sock_count; + filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit; + + error = SYSCTL_OUT(req, &filter_stat, + sizeof (struct cfil_filter_stat)); + if (error != 0) + break; + } + /* If just asking for the size */ + if (req->oldptr == USER_ADDR_NULL) + req->oldidx = len; + + cfil_rw_unlock_shared(&cfil_lck_rw); + + return (error); +} + +static int sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2, + struct sysctl_req *req) +{ +#pragma unused(oidp, arg1, arg2) + int error = 0; + u_int32_t i; + struct cfil_info *cfi; + + /* Read only */ + if (req->newptr != USER_ADDR_NULL) + return (EPERM); + + cfil_rw_lock_shared(&cfil_lck_rw); + + /* + * If just asking for the size, + */ + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = cfil_sock_attached_count * + sizeof(struct cfil_sock_stat); + /* Bump the length in case new sockets gets attached */ + req->oldidx += req->oldidx >> 3; + goto done; + } + + TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) { + struct cfil_entry *entry; + struct cfil_sock_stat stat; + struct socket *so = cfi->cfi_so; + + bzero(&stat, sizeof(struct cfil_sock_stat)); + stat.cfs_len = sizeof(struct cfil_sock_stat); + stat.cfs_sock_id = cfi->cfi_sock_id; + stat.cfs_flags = cfi->cfi_flags; + + if (so != NULL) { + stat.cfs_pid = so->last_pid; + memcpy(stat.cfs_uuid, so->last_uuid, + sizeof(uuid_t)); + if (so->so_flags & SOF_DELEGATED) { + stat.cfs_e_pid = so->e_pid; + memcpy(stat.cfs_e_uuid, so->e_uuid, + sizeof(uuid_t)); + } else { + stat.cfs_e_pid = so->last_pid; + memcpy(stat.cfs_e_uuid, so->last_uuid, + sizeof(uuid_t)); + } + } + + stat.cfs_snd.cbs_pending_first = + cfi->cfi_snd.cfi_pending_first; + stat.cfs_snd.cbs_pending_last = + cfi->cfi_snd.cfi_pending_last; + stat.cfs_snd.cbs_inject_q_len = + cfil_queue_len(&cfi->cfi_snd.cfi_inject_q); + stat.cfs_snd.cbs_pass_offset = + cfi->cfi_snd.cfi_pass_offset; + + stat.cfs_rcv.cbs_pending_first = + cfi->cfi_rcv.cfi_pending_first; + stat.cfs_rcv.cbs_pending_last = + cfi->cfi_rcv.cfi_pending_last; + stat.cfs_rcv.cbs_inject_q_len = + cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q); + stat.cfs_rcv.cbs_pass_offset = + cfi->cfi_rcv.cfi_pass_offset; + + for (i = 0; i < MAX_CONTENT_FILTER; i++) { + struct cfil_entry_stat *estat; + struct cfe_buf *ebuf; + struct cfe_buf_stat *sbuf; + + entry = &cfi->cfi_entries[i]; + + estat = &stat.ces_entries[i]; + + estat->ces_len = sizeof(struct cfil_entry_stat); + estat->ces_filter_id = entry->cfe_filter ? + entry->cfe_filter->cf_kcunit : 0; + estat->ces_flags = entry->cfe_flags; + estat->ces_necp_control_unit = + entry->cfe_necp_control_unit; + + estat->ces_last_event.tv_sec = + (int64_t)entry->cfe_last_event.tv_sec; + estat->ces_last_event.tv_usec = + (int64_t)entry->cfe_last_event.tv_usec; + + estat->ces_last_action.tv_sec = + (int64_t)entry->cfe_last_action.tv_sec; + estat->ces_last_action.tv_usec = + (int64_t)entry->cfe_last_action.tv_usec; + + ebuf = &entry->cfe_snd; + sbuf = &estat->ces_snd; + sbuf->cbs_pending_first = + cfil_queue_offset_first(&ebuf->cfe_pending_q); + sbuf->cbs_pending_last = + cfil_queue_offset_last(&ebuf->cfe_pending_q); + sbuf->cbs_ctl_first = + cfil_queue_offset_first(&ebuf->cfe_ctl_q); + sbuf->cbs_ctl_last = + cfil_queue_offset_last(&ebuf->cfe_ctl_q); + sbuf->cbs_pass_offset = ebuf->cfe_pass_offset; + sbuf->cbs_peek_offset = ebuf->cfe_peek_offset; + sbuf->cbs_peeked = ebuf->cfe_peeked; + + ebuf = &entry->cfe_rcv; + sbuf = &estat->ces_rcv; + sbuf->cbs_pending_first = + cfil_queue_offset_first(&ebuf->cfe_pending_q); + sbuf->cbs_pending_last = + cfil_queue_offset_last(&ebuf->cfe_pending_q); + sbuf->cbs_ctl_first = + cfil_queue_offset_first(&ebuf->cfe_ctl_q); + sbuf->cbs_ctl_last = + cfil_queue_offset_last(&ebuf->cfe_ctl_q); + sbuf->cbs_pass_offset = ebuf->cfe_pass_offset; + sbuf->cbs_peek_offset = ebuf->cfe_peek_offset; + sbuf->cbs_peeked = ebuf->cfe_peeked; + } + error = SYSCTL_OUT(req, &stat, + sizeof (struct cfil_sock_stat)); + if (error != 0) + break; + } +done: + cfil_rw_unlock_shared(&cfil_lck_rw); + + return (error); +} diff --git a/bsd/net/content_filter.h b/bsd/net/content_filter.h new file mode 100644 index 000000000..2e4facaef --- /dev/null +++ b/bsd/net/content_filter.h @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef __CONTENT_FILTER_H__ +#define __CONTENT_FILTER_H__ + +#include +#include +#include +#include +#include +#include + +#ifdef BSD_KERNEL_PRIVATE +#include +#include +#endif /* BSD_KERNEL_PRIVATE */ + +__BEGIN_DECLS + +#ifdef PRIVATE + +/* + * Kernel control name for an instance of a Content Filter + * Use CTLIOCGINFO to find out the corresponding kernel control id + * to be set in the sc_id field of sockaddr_ctl for connect(2) + * Note: the sc_unit is ephemeral + */ +#define CONTENT_FILTER_CONTROL_NAME "com.apple.content-filter" + +/* + * CFIL_OPT_NECP_CONTROL_UNIT + * To set or get the NECP filter control unit for the kernel control socket + * The option level is SYSPROTO_CONTROL + */ +#define CFIL_OPT_NECP_CONTROL_UNIT 1 /* uint32_t */ + +/* + * How many filter may be active simultaneously + */ +#define CFIL_MAX_FILTER_COUNT 2 + +/* + * Types of messages + * + * Event messages flow from kernel to user space while action + * messages flow in the reverse direction. + * A message in entirely represented by a packet sent or received + * on a Content Filter kernel control socket. + */ +#define CFM_TYPE_EVENT 1 /* message from kernel */ +#define CFM_TYPE_ACTION 2 /* message to kernel */ + +/* + * Operations associated with events from kernel + */ +#define CFM_OP_SOCKET_ATTACHED 1 /* a socket has been attached */ +#define CFM_OP_SOCKET_CLOSED 2 /* a socket is being closed */ +#define CFM_OP_DATA_OUT 3 /* data being sent */ +#define CFM_OP_DATA_IN 4 /* data being received */ +#define CFM_OP_DISCONNECT_OUT 5 /* no more outgoing data */ +#define CFM_OP_DISCONNECT_IN 6 /* no more incoming data */ + +/* + * Operations associated with action from filter to kernel + */ +#define CFM_OP_DATA_UPDATE 16 /* update pass or peek offsets */ +#define CFM_OP_DROP 17 /* shutdown socket, no more data */ + +/* + * Opaque socket identifier + */ +typedef uint64_t cfil_sock_id_t; + +#define CFIL_SOCK_ID_NONE UINT64_MAX + +/* + * Invariant timeval structure definition across architectures + */ +struct timeval64 { + int64_t tv_sec; + int64_t tv_usec; +}; + +/* + * struct cfil_msg_hdr + * + * Header common to all messages + */ +struct cfil_msg_hdr { + uint32_t cfm_len; /* total length */ + uint32_t cfm_version; + uint32_t cfm_type; + uint32_t cfm_op; + cfil_sock_id_t cfm_sock_id; +}; + +#define CFM_VERSION_CURRENT 1 + +/* + * struct cfil_msg_sock_attached + * + * Information about a new socket being attached to the content filter + * + * Action: No reply is expected as this does not block the creation of the + * TCP/IP but timely action must be taken to avoid user noticeable delays. + * + * Valid Types: CFM_TYPE_EVENT + * + * Valid Op: CFM_OP_SOCKET_ATTACHED + */ +struct cfil_msg_sock_attached { + struct cfil_msg_hdr cfs_msghdr; + int cfs_sock_family; /* e.g. PF_INET */ + int cfs_sock_type; /* e.g. SOCK_STREAM */ + int cfs_sock_protocol; /* e.g. IPPROTO_TCP */ + int cfs_unused; /* padding */ + pid_t cfs_pid; + pid_t cfs_e_pid; + uuid_t cfs_uuid; + uuid_t cfs_e_uuid; +}; + +/* + * struct cfil_msg_data_event + * + * Event for the content fiter to act on a span of data + * A data span is described by a pair of offsets over the cumulative + * number of bytes sent or received on the socket. + * + * Action: The event must be acted upon but the filter may buffer + * data spans until it has enough content to make a decision. + * The action must be timely to avoid user noticeable delays. + * + * Valid Type: CFM_TYPE_EVENT + * + * Valid Ops: CFM_OP_DATA_OUT, CFM_OP_DATA_IN + */ +struct cfil_msg_data_event { + struct cfil_msg_hdr cfd_msghdr; + union sockaddr_in_4_6 cfc_src; + union sockaddr_in_4_6 cfc_dst; + uint64_t cfd_start_offset; + uint64_t cfd_end_offset; + /* Actual content data immediatly follows */ +}; + +/* + * struct cfil_msg_action + * + * Valid Type: CFM_TYPE_ACTION + * + * Valid Ops: CFM_OP_DATA_UPDATE, CFM_OP_DROP + * + * For CFM_OP_DATA_UPDATE: + * + * cfa_in_pass_offset and cfa_out_pass_offset indicates how much data is + * allowed to pass. A zero value does not modify the corresponding pass offset. + * + * cfa_in_peek_offset and cfa_out_peek_offset lets the filter specify how much + * data it needs to make a decision: the kernel will deliver data up to that + * offset (if less than cfa_pass_offset it is ignored). Use CFM_MAX_OFFSET + * if you don't value the corresponding peek offset to be updated. + */ +struct cfil_msg_action { + struct cfil_msg_hdr cfa_msghdr; + uint64_t cfa_in_pass_offset; + uint64_t cfa_in_peek_offset; + uint64_t cfa_out_pass_offset; + uint64_t cfa_out_peek_offset; +}; + +#define CFM_MAX_OFFSET UINT64_MAX + +/* + * Statistics retrieved via sysctl(3) + */ +struct cfil_filter_stat { + uint32_t cfs_len; + uint32_t cfs_filter_id; + uint32_t cfs_flags; + uint32_t cfs_sock_count; + uint32_t cfs_necp_control_unit; +}; + +struct cfil_entry_stat { + uint32_t ces_len; + uint32_t ces_filter_id; + uint32_t ces_flags; + uint32_t ces_necp_control_unit; + struct timeval64 ces_last_event; + struct timeval64 ces_last_action; + struct cfe_buf_stat { + uint64_t cbs_pending_first; + uint64_t cbs_pending_last; + uint64_t cbs_ctl_first; + uint64_t cbs_ctl_last; + uint64_t cbs_pass_offset; + uint64_t cbs_peek_offset; + uint64_t cbs_peeked; + } ces_snd, ces_rcv; +}; + +struct cfil_sock_stat { + uint32_t cfs_len; + int cfs_sock_family; + int cfs_sock_type; + int cfs_sock_protocol; + cfil_sock_id_t cfs_sock_id; + uint64_t cfs_flags; + pid_t cfs_pid; + pid_t cfs_e_pid; + uuid_t cfs_uuid; + uuid_t cfs_e_uuid; + struct cfi_buf_stat { + uint64_t cbs_pending_first; + uint64_t cbs_pending_last; + uint64_t cbs_pass_offset; + uint64_t cbs_inject_q_len; + } cfs_snd, cfs_rcv; + struct cfil_entry_stat ces_entries[CFIL_MAX_FILTER_COUNT]; +}; + +/* + * Global statistics + */ +struct cfil_stats { + int32_t cfs_ctl_connect_ok; + int32_t cfs_ctl_connect_fail; + int32_t cfs_ctl_disconnect_ok; + int32_t cfs_ctl_disconnect_fail; + int32_t cfs_ctl_send_ok; + int32_t cfs_ctl_send_bad; + int32_t cfs_ctl_rcvd_ok; + int32_t cfs_ctl_rcvd_bad; + int32_t cfs_ctl_rcvd_flow_lift; + int32_t cfs_ctl_action_data_update; + int32_t cfs_ctl_action_drop; + int32_t cfs_ctl_action_bad_op; + int32_t cfs_ctl_action_bad_len; + + int32_t cfs_sock_id_not_found; + + int32_t cfs_cfi_alloc_ok; + int32_t cfs_cfi_alloc_fail; + + int32_t cfs_sock_userspace_only; + int32_t cfs_sock_attach_in_vain; + int32_t cfs_sock_attach_already; + int32_t cfs_sock_attach_no_mem; + int32_t cfs_sock_attach_failed; + int32_t cfs_sock_attached; + int32_t cfs_sock_detached; + + int32_t cfs_attach_event_ok; + int32_t cfs_attach_event_flow_control; + int32_t cfs_attach_event_fail; + + int32_t cfs_closed_event_ok; + int32_t cfs_closed_event_flow_control; + int32_t cfs_closed_event_fail; + + int32_t cfs_data_event_ok; + int32_t cfs_data_event_flow_control; + int32_t cfs_data_event_fail; + + int32_t cfs_disconnect_in_event_ok; + int32_t cfs_disconnect_out_event_ok; + int32_t cfs_disconnect_event_flow_control; + int32_t cfs_disconnect_event_fail; + + int32_t cfs_ctl_q_not_started; + + int32_t cfs_close_wait; + int32_t cfs_close_wait_timeout; + + int32_t cfs_flush_in_drop; + int32_t cfs_flush_out_drop; + int32_t cfs_flush_in_close; + int32_t cfs_flush_out_close; + int32_t cfs_flush_in_free; + int32_t cfs_flush_out_free; + + int32_t cfs_inject_q_nomem; + int32_t cfs_inject_q_nobufs; + int32_t cfs_inject_q_detached; + int32_t cfs_inject_q_in_fail; + int32_t cfs_inject_q_out_fail; + + int32_t cfs_inject_q_in_retry; + int32_t cfs_inject_q_out_retry; + + int32_t cfs_data_in_control; + int32_t cfs_data_in_oob; + int32_t cfs_data_out_control; + int32_t cfs_data_out_oob; + + int64_t cfs_ctl_q_in_enqueued __attribute__((aligned(8))); + int64_t cfs_ctl_q_out_enqueued __attribute__((aligned(8))); + int64_t cfs_ctl_q_in_peeked __attribute__((aligned(8))); + int64_t cfs_ctl_q_out_peeked __attribute__((aligned(8))); + + int64_t cfs_pending_q_in_enqueued __attribute__((aligned(8))); + int64_t cfs_pending_q_out_enqueued __attribute__((aligned(8))); + + int64_t cfs_inject_q_in_enqueued __attribute__((aligned(8))); + int64_t cfs_inject_q_out_enqueued __attribute__((aligned(8))); + int64_t cfs_inject_q_in_passed __attribute__((aligned(8))); + int64_t cfs_inject_q_out_passed __attribute__((aligned(8))); + +}; +#endif /* PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE + +#define M_SKIPCFIL M_PROTO5 + +extern int cfil_log_level; + +#define CFIL_LOG(level, fmt, ...) \ +do { \ + if (cfil_log_level >= level) \ + printf("%s:%d " fmt "\n",\ + __FUNCTION__, __LINE__, ##__VA_ARGS__); \ +} while (0) + + +extern void cfil_init(void); + +extern errno_t cfil_sock_attach(struct socket *so); +extern errno_t cfil_sock_detach(struct socket *so); + +extern int cfil_sock_data_out(struct socket *so, struct sockaddr *to, + struct mbuf *data, struct mbuf *control, + uint32_t flags); +extern int cfil_sock_data_in(struct socket *so, struct sockaddr *from, + struct mbuf *data, struct mbuf *control, + uint32_t flags); + +extern int cfil_sock_shutdown(struct socket *so, int *how); +extern void cfil_sock_is_closed(struct socket *so); +extern void cfil_sock_notify_shutdown(struct socket *so, int how); +extern void cfil_sock_close_wait(struct socket *so); + +extern boolean_t cfil_sock_data_pending(struct sockbuf *sb); +extern int cfil_sock_data_space(struct sockbuf *sb); +extern void cfil_sock_buf_update(struct sockbuf *sb); + +extern cfil_sock_id_t cfil_sock_id_from_socket(struct socket *so); + +__END_DECLS + +#endif /* BSD_KERNEL_PRIVATE */ + +#endif /* __CONTENT_FILTER_H__ */ diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index ff2d31cf0..28d307d13 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2013 Apple Inc. All rights reserved. + * Copyright (c) 1999-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -606,6 +606,11 @@ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory, CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1, "enable flow-advisory mechanism"); +static u_int32_t if_delaybased_queue = 1; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue, + CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1, + "enable delay based dynamic queue sizing"); + static uint64_t hwcksum_in_invalidated = 0; SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED, @@ -973,7 +978,7 @@ dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, event_data_len = sizeof(struct net_event_data); } - strncpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ); + strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ); event_data->if_family = ifp->if_family; event_data->if_unit = (u_int32_t) ifp->if_unit; @@ -1332,6 +1337,7 @@ dlil_init(void) _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH); _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI); _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT); + _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED); _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN); _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN); @@ -2575,8 +2581,6 @@ ifnet_start_thread_fn(void *v, wait_result_t w) } /* NOTREACHED */ - lck_mtx_unlock(&ifp->if_start_lock); - VERIFY(0); /* we should never get here */ } void @@ -2738,8 +2742,6 @@ ifnet_poll_thread_fn(void *v, wait_result_t w) } /* NOTREACHED */ - lck_mtx_unlock(&ifp->if_poll_lock); - VERIFY(0); /* we should never get here */ } void @@ -2953,39 +2955,54 @@ ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) errno_t ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp) { + errno_t rc; if (ifp == NULL || mp == NULL) return (EINVAL); else if (!(ifp->if_eflags & IFEF_TXSTART) || (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL)) return (ENXIO); + if (!ifnet_is_attached(ifp, 1)) + return (ENXIO); + rc = ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL); + ifnet_decr_iorefcnt(ifp); - return (ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL)); + return (rc); } errno_t ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc, struct mbuf **mp) { + errno_t rc; if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) return (EINVAL); else if (!(ifp->if_eflags & IFEF_TXSTART) || (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)) return (ENXIO); - - return (ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL)); + if (!ifnet_is_attached(ifp, 1)) + return (ENXIO); + + rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL); + ifnet_decr_iorefcnt(ifp); + return (rc); } errno_t ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) { + errno_t rc; if (ifp == NULL || head == NULL || limit < 1) return (EINVAL); else if (!(ifp->if_eflags & IFEF_TXSTART) || (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL)) return (ENXIO); - - return (ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len)); + if (!ifnet_is_attached(ifp, 1)) + return (ENXIO); + + rc = ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len); + ifnet_decr_iorefcnt(ifp); + return (rc); } errno_t @@ -2993,15 +3010,18 @@ ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc, u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) { - + errno_t rc; if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc)) return (EINVAL); else if (!(ifp->if_eflags & IFEF_TXSTART) || (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)) return (ENXIO); - - return (ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head, - tail, cnt, len)); + if (!ifnet_is_attached(ifp, 1)) + return (ENXIO); + rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head, + tail, cnt, len); + ifnet_decr_iorefcnt(ifp); + return (rc); } errno_t @@ -4982,6 +5002,9 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) if (if_flowadv) sflags |= PKTSCHEDF_QALG_FLOWCTL; + if (if_delaybased_queue) + sflags |= PKTSCHEDF_QALG_DELAYBASED; + /* Initialize transmit queue(s) */ err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE)); if (err != 0) { @@ -5112,6 +5135,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) VERIFY(ifp->if_delegated.type == 0); VERIFY(ifp->if_delegated.family == 0); VERIFY(ifp->if_delegated.subfamily == 0); + VERIFY(ifp->if_delegated.expensive == 0); ifnet_lock_done(ifp); ifnet_head_done(); @@ -5413,9 +5437,6 @@ ifnet_detach(ifnet_t ifp) /* Mark the interface as DOWN */ if_down(ifp); - /* Drain send queue */ - ifclassq_teardown(ifp); - /* Disable forwarding cached route */ lck_mtx_lock(&ifp->if_cached_route_lock); ifp->if_fwd_cacheok = 0; @@ -5548,6 +5569,9 @@ ifnet_detach_final(struct ifnet *ifp) } lck_mtx_unlock(&ifp->if_ref_lock); + /* Drain and destroy send queue */ + ifclassq_teardown(ifp); + /* Detach interface filters */ lck_mtx_lock(&ifp->if_flt_lock); if_flt_monitor_enter(ifp); @@ -5719,6 +5743,7 @@ ifnet_detach_final(struct ifnet *ifp) VERIFY(ifp->if_delegated.type == 0); VERIFY(ifp->if_delegated.family == 0); VERIFY(ifp->if_delegated.subfamily == 0); + VERIFY(ifp->if_delegated.expensive == 0); ifnet_lock_done(ifp); @@ -6036,7 +6061,7 @@ dlil_if_release(ifnet_t ifp) ifnet_lock_exclusive(ifp); lck_mtx_lock(&dlifp->dl_if_lock); dlifp->dl_if_flags &= ~DLIF_INUSE; - strncpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ); + strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ); ifp->if_name = dlifp->dl_if_namestorage; /* Reset external name (name + unit) */ ifp->if_xname = dlifp->dl_if_xnamestorage; @@ -6228,7 +6253,9 @@ if_lqm_update(struct ifnet *ifp, int lqm) VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX); /* Normalize to edge */ - if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_POOR) + if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_BAD) + lqm = IFNET_LQM_THRESH_BAD; + else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR) lqm = IFNET_LQM_THRESH_POOR; else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD) lqm = IFNET_LQM_THRESH_GOOD; @@ -6497,10 +6524,7 @@ dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep, if (dlil_lladdr_ckreq) { switch (sdl->sdl_type) { case IFT_ETHER: - case IFT_BRIDGE: case IFT_IEEE1394: - case IFT_IEEE8023ADLAG: - case IFT_L2VLAN: break; default: credp = NULL; @@ -6514,9 +6538,6 @@ dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep, switch (sdl->sdl_type) { case IFT_ETHER: - case IFT_BRIDGE: - case IFT_IEEE8023ADLAG: - case IFT_L2VLAN: VERIFY(size == ETHER_ADDR_LEN); bytes = unspec; break; diff --git a/bsd/net/ether_inet6_pr_module.c b/bsd/net/ether_inet6_pr_module.c index 02a408921..ee014a3dd 100644 --- a/bsd/net/ether_inet6_pr_module.c +++ b/bsd/net/ether_inet6_pr_module.c @@ -68,7 +68,6 @@ #include #include #include -#include #include #include diff --git a/bsd/net/ether_inet_pr_module.c b/bsd/net/ether_inet_pr_module.c index 94768329b..b47c9dfba 100644 --- a/bsd/net/ether_inet_pr_module.c +++ b/bsd/net/ether_inet_pr_module.c @@ -74,7 +74,6 @@ #include #include #include -#include #include #include diff --git a/bsd/net/if.c b/bsd/net/if.c index 280b6d49a..c9bb74aa4 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -82,6 +82,8 @@ #include #include #include +#include +#include #include #include @@ -1786,7 +1788,11 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCGIFLINKQUALITYMETRIC: /* struct ifreq */ case SIOCSIFLOG: /* struct ifreq */ case SIOCGIFLOG: /* struct ifreq */ - case SIOCGIFDELEGATE: { /* struct ifreq */ + case SIOCGIFDELEGATE: /* struct ifreq */ + case SIOCGIFEXPENSIVE: /* struct ifreq */ + case SIOCSIFEXPENSIVE: /* struct ifreq */ + case SIOCSIF2KCL: /* struct ifreq */ + case SIOCGIF2KCL: { /* struct ifreq */ struct ifreq ifr; bcopy(data, &ifr, sizeof (ifr)); ifr.ifr_name[IFNAMSIZ - 1] = '\0'; @@ -1823,13 +1829,6 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) break; #endif /* INET6 */ - case SIOCSLIFPHYADDR: /* struct if_laddrreq */ - case SIOCGLIFPHYADDR: /* struct if_laddrreq */ - bcopy(((struct if_laddrreq *)(void *)data)->iflr_name, - ifname, IFNAMSIZ); - ifp = ifunit(ifname); - break; - case SIOCGIFSTATUS: /* struct ifstat */ ifs = _MALLOC(sizeof (*ifs), M_DEVBUF, M_WAITOK); if (ifs == NULL) { @@ -1905,7 +1904,6 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCSIFPHYADDR_IN6_32: /* struct in6_aliasreq_32 */ case SIOCSIFPHYADDR_IN6_64: /* struct in6_aliasreq_64 */ #endif /* INET6 */ - case SIOCSLIFPHYADDR: /* struct if_laddrreq */ error = proc_suser(p); if (error != 0) break; @@ -1926,7 +1924,6 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) bcopy(ifs, data, sizeof (*ifs)); break; - case SIOCGLIFPHYADDR: /* struct if_laddrreq */ case SIOCGIFMEDIA32: /* struct ifmediareq32 */ case SIOCGIFMEDIA64: /* struct ifmediareq64 */ error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, data); @@ -2381,6 +2378,67 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) ifnet_lock_done(ifp); break; + case SIOCGIFEXPENSIVE: + ifnet_lock_shared(ifp); + if (ifp->if_eflags & IFEF_EXPENSIVE) + ifr->ifr_expensive = 1; + else + ifr->ifr_expensive = 0; + ifnet_lock_done(ifp); + break; + + case SIOCSIFEXPENSIVE: + { + struct ifnet *difp; + + if ((error = priv_check_cred(kauth_cred_get(), + PRIV_NET_INTERFACE_CONTROL, 0)) != 0) + return (error); + ifnet_lock_exclusive(ifp); + if (ifr->ifr_expensive) + ifp->if_eflags |= IFEF_EXPENSIVE; + else + ifp->if_eflags &= ~IFEF_EXPENSIVE; + ifnet_lock_done(ifp); + /* + * Update the expensive bit in the delegated interface + * structure. + */ + ifnet_head_lock_shared(); + TAILQ_FOREACH(difp, &ifnet_head, if_link) { + ifnet_lock_exclusive(difp); + if (difp->if_delegated.ifp == ifp) { + difp->if_delegated.expensive = + ifp->if_eflags & IFEF_EXPENSIVE ? 1 : 0; + + } + ifnet_lock_done(difp); + } + ifnet_head_done(); + break; + } + + case SIOCGIF2KCL: + ifnet_lock_shared(ifp); + if (ifp->if_eflags & IFEF_2KCL) + ifr->ifr_2kcl = 1; + else + ifr->ifr_2kcl = 0; + ifnet_lock_done(ifp); + break; + + case SIOCSIF2KCL: + if ((error = priv_check_cred(kauth_cred_get(), + PRIV_NET_INTERFACE_CONTROL, 0)) != 0) + return (error); + ifnet_lock_exclusive(ifp); + if (ifr->ifr_2kcl) + ifp->if_eflags |= IFEF_2KCL; + else + ifp->if_eflags &= ~IFEF_2KCL; + ifnet_lock_done(ifp); + break; + case SIOCSIFDSTADDR: case SIOCSIFADDR: case SIOCSIFBRDADDR: @@ -3880,9 +3938,6 @@ ifioctl_cassert(void) case SIOCSIFMETRIC: case SIOCDIFADDR: case SIOCAIFADDR: - case SIOCALIFADDR: - case SIOCGLIFADDR: - case SIOCDLIFADDR: case SIOCGIFADDR: case SIOCGIFDSTADDR: case SIOCGIFBRDADDR: @@ -3910,8 +3965,6 @@ ifioctl_cassert(void) case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCDIFPHYADDR: - case SIOCSLIFPHYADDR: - case SIOCGLIFPHYADDR: case SIOCGIFDEVMTU: case SIOCSIFALTMTU: case SIOCGIFALTMTU: diff --git a/bsd/net/if.h b/bsd/net/if.h index 0de578479..fd7800d8a 100644 --- a/bsd/net/if.h +++ b/bsd/net/if.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -102,6 +102,8 @@ #define KEV_DL_MASTER_ELECTED 23 #define KEV_DL_ISSUES 24 #define KEV_DL_IFDELEGATE_CHANGED 25 +#define KEV_DL_AWDL_RESTRICTED 26 +#define KEV_DL_AWDL_UNRESTRICTED 27 #include #include @@ -155,24 +157,27 @@ struct if_clonereq32 { #ifdef PRIVATE /* extended flags definitions: (all bits reserved for internal/future use) */ -#define IFEF_AUTOCONFIGURING 0x1 /* allow BOOTP/DHCP replies to enter */ -#define IFEF_IPV6_DISABLED 0x20 /* coupled to ND6_IFF_IFDISABLED */ -#define IFEF_ACCEPT_RTADV 0x40 /* accepts IPv6 RA on the interface */ -#define IFEF_TXSTART 0x80 /* has start callback */ -#define IFEF_RXPOLL 0x100 /* supports opportunistic input poll */ -#define IFEF_VLAN 0x200 /* interface has one or more vlans */ -#define IFEF_BOND 0x400 /* interface is part of bond */ -#define IFEF_ARPLL 0x800 /* ARP for IPv4LL addresses */ -#define IFEF_NOWINDOWSCALE 0x1000 /* Don't scale TCP window on iface */ -#define IFEF_NOAUTOIPV6LL 0x2000 /* Need explicit IPv6 LL address */ -#define IFEF_IPV4_ROUTER 0x8000 /* interior when in IPv4 router mode */ -#define IFEF_IPV6_ROUTER 0x10000 /* interior when in IPv6 router mode */ -#define IFEF_LOCALNET_PRIVATE 0x20000 /* local private network */ +#define IFEF_AUTOCONFIGURING 0x00000001 /* allow BOOTP/DHCP replies to enter */ +#define IFEF_IPV6_DISABLED 0x00000020 /* coupled to ND6_IFF_IFDISABLED */ +#define IFEF_ACCEPT_RTADV 0x00000040 /* accepts IPv6 RA on the interface */ +#define IFEF_TXSTART 0x00000080 /* has start callback */ +#define IFEF_RXPOLL 0x00000100 /* supports opportunistic input poll */ +#define IFEF_VLAN 0x00000200 /* interface has one or more vlans */ +#define IFEF_BOND 0x00000400 /* interface is part of bond */ +#define IFEF_ARPLL 0x00000800 /* ARP for IPv4LL addresses */ +#define IFEF_NOWINDOWSCALE 0x00001000 /* Don't scale TCP window on iface */ +#define IFEF_NOAUTOIPV6LL 0x00002000 /* Need explicit IPv6 LL address */ +#define IFEF_EXPENSIVE 0x00004000 /* Data access has a cost */ +#define IFEF_IPV4_ROUTER 0x00008000 /* interior when in IPv4 router mode */ +#define IFEF_IPV6_ROUTER 0x00010000 /* interior when in IPv6 router mode */ +#define IFEF_LOCALNET_PRIVATE 0x00020000 /* local private network */ #define IFEF_SERVICE_TRIGGERED IFEF_LOCALNET_PRIVATE -#define IFEF_IPV6_ND6ALT 0x40000 /* alternative. KPI for ND6 */ -#define IFEF_RESTRICTED_RECV 0x80000 /* interface restricts inbound pkts */ -#define IFEF_AWDL 0x100000 /* Apple Wireless Direct Link */ -#define IFEF_NOACKPRI 0x200000 /* No TCP ACK prioritization */ +#define IFEF_IPV6_ND6ALT 0x00040000 /* alternative. KPI for ND6 */ +#define IFEF_RESTRICTED_RECV 0x00080000 /* interface restricts inbound pkts */ +#define IFEF_AWDL 0x00100000 /* Apple Wireless Direct Link */ +#define IFEF_NOACKPRI 0x00200000 /* No TCP ACK prioritization */ +#define IFEF_AWDL_RESTRICTED 0x00400000 /* Restricted AWDL mode */ +#define IFEF_2KCL 0x00800000 /* prefers 2K cluster (socket based tunnel) */ #define IFEF_SENDLIST 0x10000000 /* Supports tx packet lists */ #define IFEF_DIRECTLINK 0x20000000 /* point-to-point topology */ #define _IFEF_INUSE 0x40000000 /* deprecated */ @@ -244,7 +249,9 @@ struct if_clonereq32 { IFCAP_VLAN_HWTAGGING | IFCAP_JUMBO_MTU | IFCAP_AV | IFCAP_TXSTATUS) #define IFQ_MAXLEN 128 -#define IFNET_SLOWHZ 1 /* granularity is 1 second */ +#define IFNET_SLOWHZ 1 /* granularity is 1 second */ +#define IFQ_TARGET_DELAY (10ULL * 1000 * 1000) /* 10 ms */ +#define IFQ_UPDATE_INTERVAL (100ULL * 1000 * 1000) /* 100 ms */ /* * Message format for use in obtaining information about interfaces @@ -457,7 +464,11 @@ struct ifreq { #define IFRTYPE_SUBFAMILY_BLUETOOTH 2 #define IFRTYPE_SUBFAMILY_WIFI 3 #define IFRTYPE_SUBFAMILY_THUNDERBOLT 4 +#define IFRTYPE_SUBFAMILY_RESERVED 5 } ifru_type; + u_int32_t ifru_expensive; + u_int32_t ifru_awdl_restricted; + u_int32_t ifru_2kcl; #endif /* PRIVATE */ } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ @@ -492,7 +503,10 @@ struct ifreq { #define ifr_eflags ifr_ifru.ifru_eflags /* extended flags */ #define ifr_log ifr_ifru.ifru_log /* logging level/flags */ #define ifr_delegated ifr_ifru.ifru_delegated /* delegated interface index */ +#define ifr_expensive ifr_ifru.ifru_expensive #define ifr_type ifr_ifru.ifru_type /* interface type */ +#define ifr_awdl_restricted ifr_ifru.ifru_awdl_restricted +#define ifr_2kcl ifr_ifru.ifru_2kcl #endif /* PRIVATE */ }; @@ -641,30 +655,20 @@ struct kev_dl_proto_data { u_int32_t proto_remaining_count; }; -/* - * Structure for SIOC[AGD]LIFADDR - */ -struct if_laddrreq { - char iflr_name[IFNAMSIZ]; - unsigned int flags; -#define IFLR_PREFIX 0x8000 /* in: prefix given out: kernel fills id */ - unsigned int prefixlen; /* in/out */ - struct sockaddr_storage addr; /* in/out */ - struct sockaddr_storage dstaddr; /* out */ -}; - #ifdef PRIVATE /* * Link Quality Metrics * * IFNET_LQM_THRESH_OFF Metric is not available; device is off. * IFNET_LQM_THRESH_UNKNOWN Metric is not (yet) known. + * IFNET_LQM_THRESH_BAD Link quality is considered bad by driver. * IFNET_LQM_THRESH_POOR Link quality is considered poor by driver. * IFNET_LQM_THRESH_GOOD Link quality is considered good by driver. */ enum { IFNET_LQM_THRESH_OFF = (-2), IFNET_LQM_THRESH_UNKNOWN = (-1), + IFNET_LQM_THRESH_BAD = 10, IFNET_LQM_THRESH_POOR = 50, IFNET_LQM_THRESH_GOOD = 100 }; diff --git a/bsd/net/if_bond.c b/bsd/net/if_bond.c index 271a07668..2bb5113f1 100644 --- a/bsd/net/if_bond.c +++ b/bsd/net/if_bond.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2013 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -302,26 +302,6 @@ struct bondport_s { static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap); -static __inline__ int -ifbond_flags_promisc(ifbond_ref ifb) -{ - return ((ifb->ifb_flags & IFBF_PROMISC) != 0); -} - -static __inline__ void -ifbond_flags_set_promisc(ifbond_ref ifb) -{ - ifb->ifb_flags |= IFBF_PROMISC; - return; -} - -static __inline__ void -ifbond_flags_clear_promisc(ifbond_ref ifb) -{ - ifb->ifb_flags &= ~IFBF_PROMISC; - return; -} - static __inline__ int ifbond_flags_if_detaching(ifbond_ref ifb) { @@ -341,20 +321,6 @@ ifbond_flags_lladdr(ifbond_ref ifb) return ((ifb->ifb_flags & IFBF_LLADDR) != 0); } -static __inline__ void -ifbond_flags_set_lladdr(ifbond_ref ifb) -{ - ifb->ifb_flags |= IFBF_LLADDR; - return; -} - -static __inline__ void -ifbond_flags_clear_lladdr(ifbond_ref ifb) -{ - ifb->ifb_flags &= ~IFBF_LLADDR; - return; -} - static __inline__ int ifbond_flags_change_in_progress(ifbond_ref ifb) { @@ -2539,7 +2505,7 @@ bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap) break; } bzero(&ibs, sizeof(ibs)); - strncpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name)); + strlcpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name)); ibs.ibs_port_priority = port->po_priority; if (ifb->ifb_mode == IF_BOND_MODE_LACP) { ibs.ibs_state = port->po_actor_state; @@ -3073,7 +3039,7 @@ interface_link_event(struct ifnet * ifp, u_int32_t event_code) event.header.event_code = event_code; event.header.event_data[0] = ifnet_family(ifp); event.unit = (u_int32_t) ifnet_unit(ifp); - strncpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); + strlcpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); ifnet_event(ifp, &event.header); return; } diff --git a/bsd/net/if_bridge.c b/bsd/net/if_bridge.c index bb2ced374..37bea9581 100644 --- a/bsd/net/if_bridge.c +++ b/bsd/net/if_bridge.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2013 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -143,6 +143,7 @@ #include /* for struct arpcom */ #include #include +#define _IP_VHL #include #include #if INET6 @@ -171,6 +172,9 @@ #endif /* PFIL_HOOKS */ #include +#include +#include + #if BRIDGE_DEBUG #define BR_DBGF_LIFECYCLE 0x0001 #define BR_DBGF_INPUT 0x0002 @@ -180,6 +184,7 @@ #define BR_DBGF_IOCTL 0x0020 #define BR_DBGF_MBUF 0x0040 #define BR_DBGF_MCAST 0x0080 +#define BR_DBGF_HOSTFILTER 0x0100 #endif /* BRIDGE_DEBUG */ #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx) @@ -304,12 +309,18 @@ struct bridge_iflist { interface_filter_t bif_iff_ref; struct bridge_softc *bif_sc; uint32_t bif_flags; + + struct in_addr bif_hf_ipsrc; + uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN]; }; #define BIFF_PROMISC 0x01 /* promiscuous mode set */ #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */ #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */ #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */ +#define BIFF_HOST_FILTER 0x10 /* host filter enabled */ +#define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */ +#define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */ /* * Bridge route node. @@ -336,6 +347,7 @@ struct bridge_delayed_call { bridge_delayed_func_t bdc_func; /* Function to call */ struct timespec bdc_ts; /* Time to call */ u_int32_t bdc_flags; + thread_call_t bdc_thread_call; }; #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */ @@ -391,6 +403,8 @@ struct bridge_softc { #define SCF_RESIZING 0x02 #define SCF_MEDIA_ACTIVE 0x04 +struct bridge_hostfilter_stats bridge_hostfilter_stats; + decl_lck_mtx_data(static, bridge_list_mtx); static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; @@ -514,6 +528,8 @@ static int bridge_ioctl_stxhc(struct bridge_softc *, void *); static int bridge_ioctl_purge(struct bridge_softc *sc, void *); static int bridge_ioctl_gfilt(struct bridge_softc *, void *); static int bridge_ioctl_sfilt(struct bridge_softc *, void *); +static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *); +static int bridge_ioctl_shostfilter(struct bridge_softc *, void *); #ifdef PFIL_HOOKS static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *, int); @@ -536,6 +552,8 @@ static u_int32_t bridge_updatelinkstatus(struct bridge_softc *); static int interface_media_active(struct ifnet *); static void bridge_schedule_delayed_call(struct bridge_delayed_call *); static void bridge_cancel_delayed_call(struct bridge_delayed_call *); +static void bridge_cleanup_delayed_call(struct bridge_delayed_call *); +static int bridge_host_filter(struct bridge_iflist *, struct mbuf *); #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how) @@ -545,6 +563,9 @@ static void bridge_cancel_delayed_call(struct bridge_delayed_call *); u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +static u_int8_t ethernulladdr[ETHER_ADDR_LEN] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + #if BRIDGESTP static struct bstp_cb_ops bridge_ops = { .bcb_state = bridge_state_change, @@ -581,6 +602,10 @@ SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay, "Delay before calling delayed function"); #endif +SYSCTL_STRUCT(_net_link_bridge, OID_AUTO, + hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED, + &bridge_hostfilter_stats, bridge_hostfilter_stats, ""); + #if defined(PFIL_HOOKS) static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */ static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */ @@ -619,7 +644,7 @@ struct bridge_control { #define BC_F_SUSER 0x04 /* do super-user check */ static const struct bridge_control bridge_control_table32[] = { - { bridge_ioctl_add, sizeof (struct ifbreq), + { bridge_ioctl_add, sizeof (struct ifbreq), /* 0 */ BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_del, sizeof (struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, @@ -644,7 +669,7 @@ static const struct bridge_control bridge_control_table32[] = { { bridge_ioctl_sto, sizeof (struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, - { bridge_ioctl_gto, sizeof (struct ifbrparam), + { bridge_ioctl_gto, sizeof (struct ifbrparam), /* 10 */ BC_F_COPYOUT }, { bridge_ioctl_daddr32, sizeof (struct ifbareq32), @@ -670,7 +695,7 @@ static const struct bridge_control bridge_control_table32[] = { { bridge_ioctl_gma, sizeof (struct ifbrparam), BC_F_COPYOUT }, - { bridge_ioctl_sma, sizeof (struct ifbrparam), + { bridge_ioctl_sma, sizeof (struct ifbrparam), /* 20 */ BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifprio, sizeof (struct ifbreq), @@ -698,7 +723,7 @@ static const struct bridge_control bridge_control_table32[] = { { bridge_ioctl_grte, sizeof (struct ifbrparam), BC_F_COPYOUT }, - { bridge_ioctl_gifsstp32, sizeof (struct ifbpstpconf32), + { bridge_ioctl_gifsstp32, sizeof (struct ifbpstpconf32), /* 30 */ BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sproto, sizeof (struct ifbrparam), @@ -709,10 +734,15 @@ static const struct bridge_control bridge_control_table32[] = { { bridge_ioctl_sifmaxaddr, sizeof (struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_ghostfilter, sizeof (struct ifbrhostfilter), + BC_F_COPYIN|BC_F_COPYOUT }, + { bridge_ioctl_shostfilter, sizeof (struct ifbrhostfilter), + BC_F_COPYIN|BC_F_SUSER }, }; static const struct bridge_control bridge_control_table64[] = { - { bridge_ioctl_add, sizeof (struct ifbreq), + { bridge_ioctl_add, sizeof (struct ifbreq), /* 0 */ BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_del, sizeof (struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, @@ -737,7 +767,7 @@ static const struct bridge_control bridge_control_table64[] = { { bridge_ioctl_sto, sizeof (struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, - { bridge_ioctl_gto, sizeof (struct ifbrparam), + { bridge_ioctl_gto, sizeof (struct ifbrparam), /* 10 */ BC_F_COPYOUT }, { bridge_ioctl_daddr64, sizeof (struct ifbareq64), @@ -763,7 +793,7 @@ static const struct bridge_control bridge_control_table64[] = { { bridge_ioctl_gma, sizeof (struct ifbrparam), BC_F_COPYOUT }, - { bridge_ioctl_sma, sizeof (struct ifbrparam), + { bridge_ioctl_sma, sizeof (struct ifbrparam), /* 20 */ BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifprio, sizeof (struct ifbreq), @@ -791,7 +821,7 @@ static const struct bridge_control bridge_control_table64[] = { { bridge_ioctl_grte, sizeof (struct ifbrparam), BC_F_COPYOUT }, - { bridge_ioctl_gifsstp64, sizeof (struct ifbpstpconf64), + { bridge_ioctl_gifsstp64, sizeof (struct ifbpstpconf64), /* 30 */ BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sproto, sizeof (struct ifbrparam), @@ -802,6 +832,11 @@ static const struct bridge_control bridge_control_table64[] = { { bridge_ioctl_sifmaxaddr, sizeof (struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_ghostfilter, sizeof (struct ifbrhostfilter), + BC_F_COPYIN|BC_F_COPYOUT }, + { bridge_ioctl_shostfilter, sizeof (struct ifbrhostfilter), + BC_F_COPYIN|BC_F_SUSER }, }; static const unsigned int bridge_control_table_size = @@ -980,7 +1015,7 @@ printf_mbuf_data(mbuf_t m, size_t offset, size_t len) if (offset > pktlen) return; - maxlen = (pktlen - offset > len) ? len : pktlen; + maxlen = (pktlen - offset > len) ? len : pktlen - offset; n = m; mlen = mbuf_len(n); ptr = mbuf_data(n); @@ -1250,7 +1285,7 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params) */ sc->sc_defaddr[4] = (((sc->sc_defaddr[4] & 0x0f) << 4) | - ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^ + ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^ sc->sc_defaddr[5]; sc->sc_defaddr[5] = ifp->if_unit & 0xff; } @@ -1349,6 +1384,9 @@ bridge_clone_destroy(struct ifnet *ifp) bridge_cancel_delayed_call(&sc->sc_resize_call); + bridge_cleanup_delayed_call(&sc->sc_resize_call); + bridge_cleanup_delayed_call(&sc->sc_aging_timer); + error = ifnet_set_flags(ifp, 0, IFF_UP); if (error != 0) { printf("%s: ifnet_set_flags failed %d\n", __func__, error); @@ -3112,6 +3150,67 @@ bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg) #endif /* !BRIDGESTP */ } + +static int +bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg) +{ + struct ifbrhostfilter *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->ifbrhf_ifsname); + if (bif == NULL) + return (ENOENT); + + bzero(req, sizeof(struct ifbrhostfilter)); + if (bif->bif_flags & BIFF_HOST_FILTER) { + req->ifbrhf_flags |= IFBRHF_ENABLED; + bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca, + ETHER_ADDR_LEN); + req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr; + } + return (0); +} + +static int +bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg) +{ + struct ifbrhostfilter *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->ifbrhf_ifsname); + if (bif == NULL) + return (ENOENT); + + if (req->ifbrhf_flags & IFBRHF_ENABLED) { + bif->bif_flags |= BIFF_HOST_FILTER; + + if (req->ifbrhf_flags & IFBRHF_HWSRC) { + bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc, + ETHER_ADDR_LEN); + if (bcmp(req->ifbrhf_hwsrca, ethernulladdr, + ETHER_ADDR_LEN) != 0) + bif->bif_flags |= BIFF_HF_HWSRC; + else + bif->bif_flags &= ~BIFF_HF_HWSRC; + } + if (req->ifbrhf_flags & IFBRHF_IPSRC) { + bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc; + if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) + bif->bif_flags |= BIFF_HF_IPSRC; + else + bif->bif_flags &= ~BIFF_HF_IPSRC; + } + } else { + bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC | + BIFF_HF_IPSRC); + bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN); + bif->bif_hf_ipsrc.s_addr = INADDR_ANY; + } + + return (0); +} + + /* * bridge_ifdetach: * @@ -3274,14 +3373,14 @@ bridge_delayed_callback(void *param) BRIDGE_LOCK(sc); -#if BRIDGE_DEBUG +#if BRIDGE_DEBUG_DELAYED_CALLBACK if (if_bridge_debug & BR_DBGF_DELAYED_CALL) printf("%s: %s call 0x%llx flags 0x%x\n", __func__, sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call), call->bdc_flags); -#endif /* BRIDGE_DEBUG */ +#endif /* BRIDGE_DEBUG_DELAYED_CALLBACK */ - if (call->bdc_flags & BDCF_CANCELLING) { + if (call->bdc_flags & BDCF_CANCELLING) { wakeup(call); } else { if ((sc->sc_flags & SCF_DETACHING) == 0) @@ -3318,15 +3417,24 @@ bridge_schedule_delayed_call(struct bridge_delayed_call *call) call->bdc_flags = BDCF_OUTSTANDING; -#if BRIDGE_DEBUG +#if BRIDGE_DEBUG_DELAYED_CALLBACK if (if_bridge_debug & BR_DBGF_DELAYED_CALL) printf("%s: %s call 0x%llx flags 0x%x\n", __func__, sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call), call->bdc_flags); -#endif /* BRIDGE_DEBUG */ +#endif /* BRIDGE_DEBUG_DELAYED_CALLBACK */ - thread_call_func_delayed((thread_call_func_t)bridge_delayed_callback, - call, deadline); + if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) + thread_call_func_delayed( + (thread_call_func_t)bridge_delayed_callback, + call, deadline); + else { + if (call->bdc_thread_call == NULL) + call->bdc_thread_call = thread_call_allocate( + (thread_call_func_t)bridge_delayed_callback, + call); + thread_call_enter(call->bdc_thread_call); + } } /* @@ -3377,6 +3485,38 @@ bridge_cancel_delayed_call(struct bridge_delayed_call *call) call->bdc_flags &= ~BDCF_CANCELLING; } +/* + * bridge_cleanup_delayed_call: + * + * Dispose resource allocated for a delayed call + * Assume the delayed call is not queued or running . + */ +static void +bridge_cleanup_delayed_call(struct bridge_delayed_call *call) +{ + boolean_t result; + struct bridge_softc *sc = call->bdc_sc; + + /* + * The call was never scheduled + */ + if (sc == NULL) + return; + + BRIDGE_LOCK_ASSERT_HELD(sc); + + VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0); + VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0); + + if (call->bdc_thread_call != NULL) { + result = thread_call_free(call->bdc_thread_call); + if (result == FALSE) + panic("%s thread_call_free() failed for call %p", + __func__, call); + call->bdc_thread_call = NULL; + } +} + /* * bridge_init: * @@ -4066,6 +4206,17 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header) return (0); } + if (bif->bif_flags & BIFF_HOST_FILTER) { + error = bridge_host_filter(bif, m); + if (error != 0) { + if (if_bridge_debug & BR_DBGF_INPUT) + printf("%s: %s bridge_host_filter failed\n", + __func__, bif->bif_ifp->if_xname); + BRIDGE_UNLOCK(sc); + return (EJUSTRETURN); + } + } + eh = mtod(m, struct ether_header *); bridge_span(sc, m); @@ -4382,6 +4533,10 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, if ((dst_if->if_flags & IFF_RUNNING) == 0) continue; + if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) { + continue; + } + if (TAILQ_NEXT(dbif, bif_next) == NULL) { mc = m; used = 1; @@ -5745,6 +5900,286 @@ bridge_link_event(struct ifnet *ifp, u_int32_t event_code) event.header.event_code = event_code; event.header.event_data[0] = ifnet_family(ifp); event.unit = (u_int32_t)ifnet_unit(ifp); - strncpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); + strlcpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); ifnet_event(ifp, &event.header); } + +#define BRIDGE_HF_DROP(reason, func, line) { \ + bridge_hostfilter_stats.reason++; \ + if (if_bridge_debug & BR_DBGF_HOSTFILTER) \ + printf("%s.%d" #reason, func, line); \ + error = EINVAL; \ +} + +/* + * Make sure this is a DHCP or Bootp request that match the host filter + */ +static int +bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset) +{ + int error = EINVAL; + struct dhcp dhcp; + + /* + * Note: We use the dhcp structure because bootp structure definition + * is larger and some vendors do not pad the request + */ + error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp); + if (error != 0) { + BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__); + goto done; + } + if (dhcp.dp_op != BOOTREQUEST) { + BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__); + goto done; + } + /* + * The hardware address must be an exact match + */ + if (dhcp.dp_htype != ARPHRD_ETHER) { + BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__); + goto done; + } + if (dhcp.dp_hlen != ETHER_ADDR_LEN) { + BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__); + goto done; + } + if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc, + ETHER_ADDR_LEN) != 0) { + BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__); + goto done; + } + /* + * Client address must match the host address or be not specified + */ + if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr && + dhcp.dp_ciaddr.s_addr != INADDR_ANY) { + BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__); + goto done; + } + error = 0; +done: + return (error); +} + +static int +bridge_host_filter(struct bridge_iflist *bif, struct mbuf *m) +{ + int error = EINVAL; + struct ether_header *eh; + static struct in_addr inaddr_any = { .s_addr = INADDR_ANY }; + + /* + * Check the Ethernet header is large enough + */ + if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) { + BRIDGE_HF_DROP(brhf_ether_too_small, __func__, __LINE__); + goto done; + } + if (mbuf_len(m) < sizeof(struct ether_header) && + mbuf_pullup(&m, sizeof(struct ether_header)) != 0) { + BRIDGE_HF_DROP(brhf_ether_pullup_failed, __func__, __LINE__); + goto done; + } + eh = mtod(m, struct ether_header *); + + /* + * Restrict the source hardware address + */ + if ((bif->bif_flags & BIFF_HF_HWSRC) == 0 || + bcmp(eh->ether_shost, bif->bif_hf_hwsrc, + ETHER_ADDR_LEN) != 0) { + BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__); + goto done; + } + + /* + * Restrict Ethernet protocols to ARP and IP + */ + if (eh->ether_type == htons(ETHERTYPE_ARP)) { + struct ether_arp *ea; + size_t minlen = sizeof(struct ether_header) + + sizeof(struct ether_arp); + + /* + * Make the Ethernet and ARP headers contiguous + */ + if (mbuf_pkthdr_len(m) < minlen) { + BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__); + goto done; + } + if (mbuf_len(m) < minlen && mbuf_pullup(&m, minlen) != 0) { + BRIDGE_HF_DROP(brhf_arp_pullup_failed, + __func__, __LINE__); + goto done; + } + /* + * Verify this is an ethernet/ip arp + */ + eh = mtod(m, struct ether_header *); + ea = (struct ether_arp *)(eh + 1); + if (ea->arp_hrd != htons(ARPHRD_ETHER)) { + BRIDGE_HF_DROP(brhf_arp_bad_hw_type, + __func__, __LINE__); + goto done; + } + if (ea->arp_pro != htons(ETHERTYPE_IP)) { + BRIDGE_HF_DROP(brhf_arp_bad_pro_type, + __func__, __LINE__); + goto done; + } + /* + * Verify the address lengths are correct + */ + if (ea->arp_hln != ETHER_ADDR_LEN) { + BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__); + goto done; + } + if (ea->arp_pln != sizeof(struct in_addr)) { + BRIDGE_HF_DROP(brhf_arp_bad_pro_len, + __func__, __LINE__); + goto done; + } + + /* + * Allow only ARP request or ARP reply + */ + if (ea->arp_op != htons(ARPOP_REQUEST) && + ea->arp_op != htons(ARPOP_REPLY)) { + BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__); + goto done; + } + /* + * Verify source hardware address matches + */ + if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc, + ETHER_ADDR_LEN) != 0) { + BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__); + goto done; + } + /* + * Verify source protocol address: + * May be null for an ARP probe + */ + if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr, + sizeof(struct in_addr)) != 0 && + bcmp(ea->arp_spa, &inaddr_any, + sizeof(struct in_addr)) != 0) { + BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__); + goto done; + } + /* + * + */ + bridge_hostfilter_stats.brhf_arp_ok += 1; + error = 0; + } else if (eh->ether_type == htons(ETHERTYPE_IP)) { + size_t minlen = sizeof(struct ether_header) + sizeof(struct ip); + struct ip iphdr; + size_t offset; + + /* + * Make the Ethernet and IP headers contiguous + */ + if (mbuf_pkthdr_len(m) < minlen) { + BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__); + goto done; + } + offset = sizeof(struct ether_header); + error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr); + if (error != 0) { + BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__); + goto done; + } + /* + * Verify the source IP address + */ + if (iphdr.ip_p == IPPROTO_UDP) { + struct udphdr udp; + + minlen += sizeof(struct udphdr); + if (mbuf_pkthdr_len(m) < minlen) { + BRIDGE_HF_DROP(brhf_ip_too_small, + __func__, __LINE__); + goto done; + } + + /* + * Allow all zero addresses for DHCP requests + */ + if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr && + iphdr.ip_src.s_addr != INADDR_ANY) { + BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, + __func__, __LINE__); + goto done; + } + offset = sizeof(struct ether_header) + + (IP_VHL_HL(iphdr.ip_vhl) << 2); + error = mbuf_copydata(m, offset, + sizeof(struct udphdr), &udp); + if (error != 0) { + BRIDGE_HF_DROP(brhf_ip_too_small, + __func__, __LINE__); + goto done; + } + /* + * Either it's a Bootp/DHCP packet that we like or + * it's a UDP packet from the host IP as source address + */ + if (udp.uh_sport == htons(IPPORT_BOOTPC) && + udp.uh_dport == htons(IPPORT_BOOTPS)) { + minlen += sizeof(struct dhcp); + if (mbuf_pkthdr_len(m) < minlen) { + BRIDGE_HF_DROP(brhf_ip_too_small, + __func__, __LINE__); + goto done; + } + offset += sizeof(struct udphdr); + error = bridge_dhcp_filter(bif, m, offset); + if (error != 0) + goto done; + } else if (iphdr.ip_src.s_addr == INADDR_ANY) { + BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, + __func__, __LINE__); + goto done; + } + } else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr || + bif->bif_hf_ipsrc.s_addr == INADDR_ANY) { + + BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__); + goto done; + } + /* + * Allow only boring IP protocols + */ + if (iphdr.ip_p != IPPROTO_TCP && + iphdr.ip_p != IPPROTO_UDP && + iphdr.ip_p != IPPROTO_ICMP && + iphdr.ip_p != IPPROTO_ESP && + iphdr.ip_p != IPPROTO_AH && + iphdr.ip_p != IPPROTO_GRE) { + BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__); + goto done; + } + bridge_hostfilter_stats.brhf_ip_ok += 1; + error = 0; + } else { + BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__); + goto done; + } +done: + if (error != 0) { + if (if_bridge_debug & BR_DBGF_HOSTFILTER) { + if (m) { + printf_mbuf_data(m, 0, + sizeof(struct ether_header) + + sizeof(struct ip)); + } + printf("\n"); + } + + if (m != NULL) + m_freem(m); + } + return (error); +} diff --git a/bsd/net/if_bridgevar.h b/bsd/net/if_bridgevar.h index f3774c5aa..8b7188c86 100644 --- a/bsd/net/if_bridgevar.h +++ b/bsd/net/if_bridgevar.h @@ -1,6 +1,5 @@ -/* $NetBSD: if_bridgevar.h,v 1.4 2003/07/08 07:13:50 itojun Exp $ */ /* - * Copyright (c) 2004-2010 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -139,8 +138,8 @@ #define BRDGSMA 20 /* set max age (ifbrparam) */ #define BRDGSIFPRIO 21 /* set if priority (ifbreq) */ #define BRDGSIFCOST 22 /* set if path cost (ifbreq) */ -#define BRDGGFILT 23 /* get filter flags (ifbrparam) */ -#define BRDGSFILT 24 /* set filter flags (ifbrparam) */ +#define BRDGGFILT 23 /* get filter flags (ifbrparam) */ +#define BRDGSFILT 24 /* set filter flags (ifbrparam) */ #define BRDGPURGE 25 /* purge address cache for a particular interface (ifbreq) */ #define BRDGADDS 26 /* add bridge span member (ifbreq) */ #define BRDGDELS 27 /* delete bridge span member (ifbreq) */ @@ -150,6 +149,8 @@ #define BRDGSPROTO 31 /* set protocol (ifbrparam) */ #define BRDGSTXHC 32 /* set tx hold count (ifbrparam) */ #define BRDGSIFAMAX 33 /* set max interface addrs (ifbreq) */ +#define BRDGGHOSTFILTER 34 /* set max interface addrs (ifbrhostfilter) */ +#define BRDGSHOSTFILTER 35 /* set max interface addrs (ifbrhostfilter) */ /* * Generic bridge control request. @@ -200,13 +201,13 @@ struct ifbreq { #define IFBF_FLUSHALL 0x01 /* flush all addresses */ /* BRDGSFILT */ -#define IFBF_FILT_USEIPF 0x00000001 /* run pfil hooks on the bridge +#define IFBF_FILT_USEIPF 0x00000001 /* run pfil hooks on the bridge interface */ -#define IFBF_FILT_MEMBER 0x00000002 /* run pfil hooks on the member +#define IFBF_FILT_MEMBER 0x00000002 /* run pfil hooks on the member interfaces */ -#define IFBF_FILT_ONLYIP 0x00000004 /* only pass IP[46] packets when +#define IFBF_FILT_ONLYIP 0x00000004 /* only pass IP[46] packets when pfil is enabled */ -#define IFBF_FILT_MASK 0x00000007 /* mask of valid values */ +#define IFBF_FILT_MASK 0x00000007 /* mask of valid values */ /* APPLE MODIFICATION : Default is to pass non-IP packets. */ @@ -489,6 +490,59 @@ struct ifbpstpconf64 { #pragma pack() +/* + * Bridge member host filter. + */ + +#define IFBRHF_ENABLED 0x01 +#define IFBRHF_HWSRC 0x02 /* Valid with enabled flags */ +#define IFBRHF_IPSRC 0x04 /* Valid with enabled flags */ + +#pragma pack(4) + +struct ifbrhostfilter { + uint32_t ifbrhf_flags; /* flags */ + char ifbrhf_ifsname[IFNAMSIZ]; /* member if name */ + uint8_t ifbrhf_hwsrca[ETHER_ADDR_LEN]; + uint32_t ifbrhf_ipsrc; +}; + +#pragma pack() + +/* + * sysctl net.link.bridge.hostfilterstats + */ +struct bridge_hostfilter_stats { + uint64_t brhf_bad_ether_type; + uint64_t brhf_bad_ether_srchw_addr; + + uint64_t brhf_ether_too_small; + uint64_t brhf_ether_pullup_failed; + + uint64_t brhf_arp_ok; + uint64_t brhf_arp_too_small; + uint64_t brhf_arp_pullup_failed; + uint64_t brhf_arp_bad_hw_type; + uint64_t brhf_arp_bad_pro_type; + uint64_t brhf_arp_bad_hw_len; + uint64_t brhf_arp_bad_pro_len; + uint64_t brhf_arp_bad_op; + uint64_t brhf_arp_bad_sha; + uint64_t brhf_arp_bad_spa; + + uint64_t brhf_ip_ok; + uint64_t brhf_ip_too_small; + uint64_t brhf_ip_pullup_failed; + uint64_t brhf_ip_bad_srcaddr; + uint64_t brhf_ip_bad_proto; + + uint64_t brhf_dhcp_too_small; + uint64_t brhf_dhcp_bad_op; + uint64_t brhf_dhcp_bad_htype; + uint64_t brhf_dhcp_bad_hlen; + uint64_t brhf_dhcp_bad_chaddr; + uint64_t brhf_dhcp_bad_ciaddr; +}; #ifdef XNU_KERNEL_PRIVATE diff --git a/bsd/net/if_gif.c b/bsd/net/if_gif.c index feb736273..c33ef2f04 100644 --- a/bsd/net/if_gif.c +++ b/bsd/net/if_gif.c @@ -684,7 +684,6 @@ gif_ioctl( case SIOCSIFPHYADDR_IN6_32: case SIOCSIFPHYADDR_IN6_64: #endif /* INET6 */ - case SIOCSLIFPHYADDR: switch (cmd) { #if INET case SIOCSIFPHYADDR: @@ -713,11 +712,6 @@ gif_ioctl( break; } #endif - case SIOCSLIFPHYADDR: - src = (struct sockaddr *) - &(((struct if_laddrreq *)data)->addr); - dst = (struct sockaddr *) - &(((struct if_laddrreq *)data)->dstaddr); } /* sa_family must be equal */ @@ -771,9 +765,6 @@ gif_ioctl( break; return (EAFNOSUPPORT); #endif /* INET6 */ - case SIOCSLIFPHYADDR: - /* checks done in the above */ - break; } #define GIF_ORDERED_LOCK(sc, sc2) \ @@ -975,38 +966,6 @@ gif_ioctl( GIF_UNLOCK(sc); break; - case SIOCGLIFPHYADDR: - GIF_LOCK(sc); - if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) { - GIF_UNLOCK(sc); - error = EADDRNOTAVAIL; - goto bad; - } - - /* copy src */ - src = sc->gif_psrc; - dst = (struct sockaddr *) - &(((struct if_laddrreq *)data)->addr); - size = sizeof (((struct if_laddrreq *)data)->addr); - if (src->sa_len > size) { - GIF_UNLOCK(sc); - return (EINVAL); - } - bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); - - /* copy dst */ - src = sc->gif_pdst; - dst = (struct sockaddr *) - &(((struct if_laddrreq *)data)->dstaddr); - size = sizeof (((struct if_laddrreq *)data)->dstaddr); - if (src->sa_len > size) { - GIF_UNLOCK(sc); - return (EINVAL); - } - bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); - GIF_UNLOCK(sc); - break; - case SIOCSIFFLAGS: /* if_ioctl() takes care of it */ break; diff --git a/bsd/net/if_ipsec.c b/bsd/net/if_ipsec.c index e1aac8335..460fa731c 100644 --- a/bsd/net/if_ipsec.c +++ b/bsd/net/if_ipsec.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include /* Kernel Control functions */ static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, @@ -127,7 +129,7 @@ ipsec_register_control(void) } bzero(&kern_ctl, sizeof(kern_ctl)); - strncpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); + strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */ kern_ctl.ctl_sendsize = 64 * 1024; @@ -209,6 +211,7 @@ ipsec_ctl_connect(kern_ctl_ref kctlref, *unitinfo = pcb; pcb->ipsec_ctlref = kctlref; pcb->ipsec_unit = sac->sc_unit; + pcb->ipsec_output_service_class = MBUF_SC_OAM; printf("ipsec_ctl_connect: creating interface ipsec%d\n", pcb->ipsec_unit - 1); @@ -217,8 +220,7 @@ ipsec_ctl_connect(kern_ctl_ref kctlref, ipsec_init.ver = IFNET_INIT_CURRENT_VERSION; ipsec_init.len = sizeof (ipsec_init); ipsec_init.name = "ipsec"; - ipsec_init.start = ipsec_start; - ipsec_init.sndq_maxlen = IPSECQ_MAXLEN; + ipsec_init.start = ipsec_start; ipsec_init.unit = pcb->ipsec_unit - 1; ipsec_init.family = ipsec_family; ipsec_init.type = IFT_OTHER; @@ -476,6 +478,7 @@ ipsec_ctl_setopt(__unused kern_ctl_ref kctlref, case IPSEC_OPT_FLAGS: case IPSEC_OPT_EXT_IFDATA_STATS: case IPSEC_OPT_SET_DELEGATE_INTERFACE: + case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: if (kauth_cred_issuser(kauth_cred_get()) == 0) { return EPERM; } @@ -540,6 +543,20 @@ ipsec_ctl_setopt(__unused kern_ctl_ref kctlref, break; } + case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: { + if (len != sizeof(int)) { + result = EMSGSIZE; + break; + } + mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data); + if (output_service_class == MBUF_SC_UNSPEC) { + pcb->ipsec_output_service_class = MBUF_SC_OAM; + } else { + pcb->ipsec_output_service_class = output_service_class; + } + break; + } + default: result = ENOPROTOOPT; break; @@ -578,6 +595,14 @@ ipsec_ctl_getopt(__unused kern_ctl_ref kctlref, *len = snprintf(data, *len, "%s%d", ifnet_name(pcb->ipsec_ifp), ifnet_unit(pcb->ipsec_ifp)) + 1; break; + case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: { + if (*len != sizeof(int)) { + result = EMSGSIZE; + break; + } + *(int *)data = so_svc2tc(pcb->ipsec_output_service_class); + break; + } default: result = ENOPROTOOPT; break; @@ -598,36 +623,26 @@ ipsec_output(ifnet_t interface, int length; struct ip *ip; struct ip6_hdr *ip6; - struct secpolicy *sp = NULL; struct ip_out_args ipoa; struct ip6_out_args ip6oa; int error = 0; u_int ip_version = 0; uint32_t af; - int flags = 0;; - int out_interface_index = 0; + int flags = 0; struct flowadv *adv = NULL; - uint32_t policy_id = 0; - - /* Find policy using ID in mbuf */ - policy_id = data->m_pkthdr.ipsec_policy; - sp = key_getspbyid(policy_id); + // Make sure this packet isn't looping through the interface + if (necp_get_last_interface_index_from_packet(data) == interface->if_index) { + error = -1; + goto ipsec_output_err; + } + + // Mark the interface so NECP can evaluate tunnel policy + necp_mark_packet_from_interface(data, interface); - if (sp == NULL) { - printf("ipsec_output: No policy specified, dropping packet.\n"); - goto ipsec_output_err; - } - - /* Validate policy */ - if (sp->ipsec_if != pcb->ipsec_ifp) { - printf("ipsec_output: Selected policy does not match %s interface.\n", pcb->ipsec_ifp->if_xname); - goto ipsec_output_err; - } - ip = mtod(data, struct ip *); ip_version = ip->ip_v; - + switch (ip_version) { case 4: /* Tap */ @@ -637,18 +652,18 @@ ipsec_output(ifnet_t interface, /* Apply encryption */ bzero(&ipsec_state, sizeof(ipsec_state)); ipsec_state.m = data; - ipsec_state.dst = (struct sockaddr *)&sp->spidx.dst; + ipsec_state.dst = (struct sockaddr *)&ip->ip_dst; bzero(&ipsec_state.ro, sizeof(ipsec_state.ro)); - error = ipsec4_output(&ipsec_state, sp, 0); + error = ipsec4_interface_output(&ipsec_state, interface); data = ipsec_state.m; if (error || data == NULL) { - printf("ipsec_output: ipsec4_output error.\n"); + printf("ipsec_output: ipsec4_output error %d.\n", error); goto ipsec_output_err; } - /* Set traffic class to OAM, set flow */ - m_set_service_class(data, MBUF_SC_OAM); + /* Set traffic class, set flow */ + m_set_service_class(data, pcb->ipsec_output_service_class); data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET; data->m_pkthdr.pkt_flowid = interface->if_flowhash; data->m_pkthdr.pkt_proto = ip->ip_p; @@ -666,18 +681,14 @@ ipsec_output(ifnet_t interface, /* Send to ip_output */ bzero(&ro, sizeof(ro)); - flags = IP_OUTARGS | /* Passing out args to specify interface */ - IP_NOIPSEC; /* To ensure the packet doesn't go through ipsec twice */ - - if (sp->outgoing_if != NULL) { - out_interface_index = sp->outgoing_if->if_index; - } + flags = IP_OUTARGS | /* Passing out args to specify interface */ + IP_NOIPSEC; /* To ensure the packet doesn't go through ipsec twice */ bzero(&ipoa, sizeof(ipoa)); ipoa.ipoa_flowadv.code = 0; ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR; - if (out_interface_index) { - ipoa.ipoa_boundif = out_interface_index; + if (ipsec_state.outgoing_if) { + ipoa.ipoa_boundif = ipsec_state.outgoing_if; ipoa.ipoa_flags |= IPOAF_BOUND_IF; } @@ -696,29 +707,28 @@ ipsec_output(ifnet_t interface, af = AF_INET6; bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af)); + data = ipsec6_splithdr(data); ip6 = mtod(data, struct ip6_hdr *); + + bzero(&ipsec_state, sizeof(ipsec_state)); + ipsec_state.m = data; + ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst; + bzero(&ipsec_state.ro, sizeof(ipsec_state.ro)); - u_char *nexthdrp = &ip6->ip6_nxt; - struct mbuf *mprev = data; - - int needipsectun = 0; - error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev, sp, flags, &needipsectun); - if (needipsectun) { - error = ipsec6_output_tunnel(&ipsec_state, sp, flags); - if (ipsec_state.tunneled == 4) /* tunneled in IPv4 - packet is gone */ - goto done; - } + error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m); + if (error == 0 && ipsec_state.tunneled == 4) /* tunneled in IPv4 - packet is gone */ + goto done; data = ipsec_state.m; if (error || data == NULL) { - printf("ipsec_output: ipsec6_output error.\n"); + printf("ipsec_output: ipsec6_output error %d.\n", error); goto ipsec_output_err; } - /* Set traffic class to OAM, set flow */ - m_set_service_class(data, MBUF_SC_OAM); + /* Set traffic class, set flow */ + m_set_service_class(data, pcb->ipsec_output_service_class); data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET; data->m_pkthdr.pkt_flowid = interface->if_flowhash; - data->m_pkthdr.pkt_proto = ip->ip_p; + data->m_pkthdr.pkt_proto = ip6->ip6_nxt; data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC); /* Increment statistics */ @@ -730,15 +740,11 @@ ipsec_output(ifnet_t interface, flags = IPV6_OUTARGS; - if (sp->outgoing_if != NULL) { - out_interface_index = sp->outgoing_if->if_index; - } - bzero(&ip6oa, sizeof(ip6oa)); ip6oa.ip6oa_flowadv.code = 0; ip6oa.ip6oa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR; - if (out_interface_index) { - ip6oa.ip6oa_boundif = out_interface_index; + if (ipsec_state.outgoing_if) { + ip6oa.ip6oa_boundif = ipsec_state.outgoing_if; ip6oa.ip6oa_flags |= IPOAF_BOUND_IF; } @@ -760,9 +766,6 @@ ipsec_output(ifnet_t interface, } done: - if (sp != NULL) { - key_freesp(sp, KEY_SADB_UNLOCKED); - } return error; ipsec_output_err: @@ -774,13 +777,14 @@ ipsec_output_err: static void ipsec_start(ifnet_t interface) { - mbuf_t data; - - for (;;) { - if (ifnet_dequeue(interface, &data) != 0) - break; - (void) ipsec_output(interface, data); - } + mbuf_t data; + + for (;;) { + if (ifnet_dequeue(interface, &data) != 0) + break; + if (ipsec_output(interface, data) != 0) + break; + } } /* Network Interface functions */ @@ -880,11 +884,22 @@ ipsec_detached( /* Protocol Handlers */ static errno_t -ipsec_proto_input(__unused ifnet_t interface, +ipsec_proto_input(ifnet_t interface, protocol_family_t protocol, - mbuf_t m, - __unused char *frame_header) + mbuf_t m, + __unused char *frame_header) { + struct ip *ip; + uint32_t af = 0; + ip = mtod(m, struct ip *); + if (ip->ip_v == 4) + af = AF_INET; + else if (ip->ip_v == 6) + af = AF_INET6; + + mbuf_pkthdr_setrcvif(m, interface); + bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af)); + if (proto_input(protocol, m) != 0) m_freem(m); @@ -924,3 +939,38 @@ ipsec_attach_proto(ifnet_t interface, return result; } + +errno_t +ipsec_inject_inbound_packet(ifnet_t interface, + mbuf_t packet) +{ + errno_t error; + protocol_family_t protocol; + if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) { + return error; + } + + return ipsec_proto_input(interface, protocol, packet, NULL); +} + +void +ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family) +{ + if (packet != NULL && interface != NULL) { + struct ipsec_pcb *pcb = ifnet_softc(interface); + if (pcb != NULL) { + /* Set traffic class, set flow */ + m_set_service_class(packet, pcb->ipsec_output_service_class); + packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET; + packet->m_pkthdr.pkt_flowid = interface->if_flowhash; + if (family == AF_INET) { + struct ip *ip = mtod(packet, struct ip *); + packet->m_pkthdr.pkt_proto = ip->ip_p; + } else if (family == AF_INET) { + struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *); + packet->m_pkthdr.pkt_proto = ip6->ip6_nxt; + } + packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC); + } + } +} diff --git a/bsd/net/if_ipsec.h b/bsd/net/if_ipsec.h index 670e01f23..e665f5b21 100644 --- a/bsd/net/if_ipsec.h +++ b/bsd/net/if_ipsec.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,11 +37,12 @@ /* Control block allocated for each kernel control connection */ struct ipsec_pcb { - kern_ctl_ref ipsec_ctlref; - ifnet_t ipsec_ifp; - u_int32_t ipsec_unit; - u_int32_t ipsec_flags; - int ipsec_ext_ifdata_stats; + kern_ctl_ref ipsec_ctlref; + ifnet_t ipsec_ifp; + u_int32_t ipsec_unit; + u_int32_t ipsec_flags; + int ipsec_ext_ifdata_stats; + mbuf_svc_class_t ipsec_output_service_class; }; errno_t ipsec_register_control(void); @@ -49,6 +50,10 @@ errno_t ipsec_register_control(void); /* Helpers */ int ipsec_interface_isvalid (ifnet_t interface); +errno_t ipsec_inject_inbound_packet(ifnet_t interface, mbuf_t packet); + +void ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family); + #endif /* @@ -65,6 +70,7 @@ int ipsec_interface_isvalid (ifnet_t interface); #define IPSEC_OPT_INC_IFDATA_STATS_IN 4 /* set to increment stat counters (type struct ipsec_stats_param) */ #define IPSEC_OPT_INC_IFDATA_STATS_OUT 5 /* set to increment stat counters (type struct ipsec_stats_param) */ #define IPSEC_OPT_SET_DELEGATE_INTERFACE 6 /* set the delegate interface (char[]) */ +#define IPSEC_OPT_OUTPUT_TRAFFIC_CLASS 7 /* set the traffic class for packets leaving the interface, see sys/socket.h */ /* * ipsec stats parameter structure */ diff --git a/bsd/net/if_llreach.h b/bsd/net/if_llreach.h index b36612ce1..e2ce299a8 100644 --- a/bsd/net/if_llreach.h +++ b/bsd/net/if_llreach.h @@ -58,7 +58,7 @@ struct if_llreach_info { #ifdef XNU_KERNEL_PRIVATE #include -#include +#include #include #include #if INET6 diff --git a/bsd/net/if_utun.c b/bsd/net/if_utun.c index c4fe86099..5570c8578 100644 --- a/bsd/net/if_utun.c +++ b/bsd/net/if_utun.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2013 Apple Inc. All rights reserved. + * Copyright (c) 2008-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -66,8 +66,11 @@ static errno_t utun_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *uniti int opt, void *data, size_t *len); static errno_t utun_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t len); +static void utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int flags); /* Network Interface functions */ +static void utun_start(ifnet_t interface); static errno_t utun_output(ifnet_t interface, mbuf_t data); static errno_t utun_demux(ifnet_t interface, mbuf_t data, char *frame_header, protocol_family_t *protocol); @@ -134,9 +137,9 @@ utun_register_control(void) } bzero(&kern_ctl, sizeof(kern_ctl)); - strncpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); + strlcpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; - kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */ + kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED; /* Require root */ kern_ctl.ctl_sendsize = 512 * 1024; kern_ctl.ctl_recvsize = 512 * 1024; kern_ctl.ctl_connect = utun_ctl_connect; @@ -144,6 +147,7 @@ utun_register_control(void) kern_ctl.ctl_send = utun_ctl_send; kern_ctl.ctl_setopt = utun_ctl_setopt; kern_ctl.ctl_getopt = utun_ctl_getopt; + kern_ctl.ctl_rcvd = utun_ctl_rcvd; utun_ctl_init_crypto(); @@ -198,6 +202,8 @@ utun_ctl_connect( *unitinfo = pcb; pcb->utun_ctlref = kctlref; pcb->utun_unit = sac->sc_unit; + pcb->utun_pending_packets = 0; + pcb->utun_max_pending_packets = 1; printf("utun_ctl_connect: creating interface utun%d\n", pcb->utun_unit - 1); @@ -205,12 +211,11 @@ utun_ctl_connect( bzero(&utun_init, sizeof(utun_init)); utun_init.ver = IFNET_INIT_CURRENT_VERSION; utun_init.len = sizeof (utun_init); - utun_init.flags = IFNET_INIT_LEGACY; utun_init.name = "utun"; + utun_init.start = utun_start; utun_init.unit = pcb->utun_unit - 1; utun_init.family = utun_family; utun_init.type = IFT_OTHER; - utun_init.output = utun_output; utun_init.demux = utun_demux; utun_init.framer_extended = utun_framer; utun_init.add_proto = utun_add_proto; @@ -557,8 +562,7 @@ utun_ctl_setopt( utsp->utsp_bytes, utsp->utsp_errors); break; } - - case UTUN_OPT_SET_DELEGATE_INTERFACE: { + case UTUN_OPT_SET_DELEGATE_INTERFACE: { ifnet_t del_ifp = NULL; char name[IFNAMSIZ]; @@ -578,10 +582,24 @@ utun_ctl_setopt( } break; } - - default: + case UTUN_OPT_MAX_PENDING_PACKETS: { + u_int32_t max_pending_packets = 0; + if (len != sizeof(u_int32_t)) { + result = EMSGSIZE; + break; + } + max_pending_packets = *(u_int32_t *)data; + if (max_pending_packets == 0) { + result = EINVAL; + break; + } + pcb->utun_max_pending_packets = max_pending_packets; + break; + } + default: { result = ENOPROTOOPT; break; + } } return result; @@ -621,7 +639,11 @@ utun_ctl_getopt( case UTUN_OPT_GENERATE_CRYPTO_KEYS_IDX: result = utun_ctl_generate_crypto_keys_idx(kctlref, unit, unitinfo, opt, data, len); break; - + case UTUN_OPT_MAX_PENDING_PACKETS: { + *len = sizeof(u_int32_t); + *((u_int32_t *)data) = pcb->utun_max_pending_packets; + break; + } default: result = ENOPROTOOPT; break; @@ -630,7 +652,68 @@ utun_ctl_getopt( return result; } +static void +utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags) +{ +#pragma unused(kctlref, unit, flags) + bool reenable_output = false; + struct utun_pcb *pcb = unitinfo; + if (pcb == NULL) { + return; + } + ifnet_lock_exclusive(pcb->utun_ifp); + if (pcb->utun_pending_packets > 0) { + pcb->utun_pending_packets--; + if (pcb->utun_pending_packets < pcb->utun_max_pending_packets) { + reenable_output = true; + } + } + + if (reenable_output) { + errno_t error = ifnet_enable_output(pcb->utun_ifp); + if (error != 0) { + printf("utun_ctl_rcvd: ifnet_enable_output returned error %d\n", error); + } + } + ifnet_lock_done(pcb->utun_ifp); +} + /* Network Interface functions */ +static void +utun_start(ifnet_t interface) +{ + mbuf_t data; + struct utun_pcb*pcb = ifnet_softc(interface); + for (;;) { + bool can_accept_packets = true; + ifnet_lock_shared(pcb->utun_ifp); + can_accept_packets = (pcb->utun_pending_packets < pcb->utun_max_pending_packets); + if (!can_accept_packets && pcb->utun_ctlref) { + u_int32_t difference = 0; + if (ctl_getenqueuereadable(pcb->utun_ctlref, pcb->utun_unit, &difference) == 0) { + if (difference > 0) { + // If the low-water mark has not yet been reached, we still need to enqueue data + // into the buffer + can_accept_packets = true; + } + } + } + if (!can_accept_packets) { + errno_t error = ifnet_disable_output(interface); + if (error != 0) { + printf("utun_start: ifnet_disable_output returned error %d\n", error); + } + ifnet_lock_done(pcb->utun_ifp); + break; + } + ifnet_lock_done(pcb->utun_ifp); + if (ifnet_dequeue(interface, &data) != 0) + break; + if (utun_output(interface, data) != 0) + break; + } +} + static errno_t utun_output( ifnet_t interface, @@ -667,8 +750,16 @@ utun_output( *(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data)); length = mbuf_pkthdr_len(data); + // Increment packet count optimistically + ifnet_lock_exclusive(pcb->utun_ifp); + pcb->utun_pending_packets++; + ifnet_lock_done(pcb->utun_ifp); result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR); if (result != 0) { + // Decrement packet count if errored + ifnet_lock_exclusive(pcb->utun_ifp); + pcb->utun_pending_packets--; + ifnet_lock_done(pcb->utun_ifp); mbuf_freem(data); printf("utun_output - ctl_enqueuembuf failed: %d\n", result); @@ -685,7 +776,6 @@ utun_output( return 0; } -/* Network Interface functions */ static errno_t utun_demux( __unused ifnet_t interface, diff --git a/bsd/net/if_utun.h b/bsd/net/if_utun.h index 51122c980..cc74d87b3 100644 --- a/bsd/net/if_utun.h +++ b/bsd/net/if_utun.h @@ -45,6 +45,8 @@ struct utun_pcb { u_int32_t utun_unit; u_int32_t utun_flags; int utun_ext_ifdata_stats; + u_int32_t utun_max_pending_packets; + u_int32_t utun_pending_packets; utun_crypto_ctx_t utun_crypto_ctx[UTUN_CRYPTO_CTX_NUM_DIRS]; }; @@ -81,6 +83,8 @@ errno_t utun_register_control(void); #endif /* PRIVATE */ #define UTUN_OPT_SET_DELEGATE_INTERFACE 15 /* set the delegate interface (char[]) */ +#define UTUN_OPT_MAX_PENDING_PACKETS 16 /* the number of packets that can be waiting to be read + from the control socket at a time */ /* * Flags for by UTUN_OPT_FLAGS diff --git a/bsd/net/if_var.h b/bsd/net/if_var.h index 7b82a0fbe..efa21e8b8 100644 --- a/bsd/net/if_var.h +++ b/bsd/net/if_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -683,6 +683,7 @@ struct ifnet { u_int32_t type; /* delegated i/f type */ u_int32_t family; /* delegated i/f family */ u_int32_t subfamily; /* delegated i/f sub-family */ + uint32_t expensive:1; /* delegated i/f expensive? */ } if_delegated; u_int64_t if_data_threshold; @@ -989,6 +990,42 @@ struct ifmultiaddr { ((_ifp)->if_subfamily == IFNET_SUBFAMILY_WIFI || \ (_ifp)->if_delegated.subfamily == IFNET_SUBFAMILY_WIFI) +/* + * Indicate whether or not the immediate interface, or the interface delegated + * by it, is a Wired interface (several families). Delegated interface + * family is set/cleared along with the delegated ifp; we cache the family + * for performance to avoid dereferencing delegated ifp each time. + * + * Note that this is meant to be used only for accounting and policy purposes; + * certain places need to explicitly know the immediate interface type, and + * this macro should not be used there. + */ +#define IFNET_IS_WIRED(_ifp) \ + ((_ifp)->if_family == IFNET_FAMILY_ETHERNET || \ + (_ifp)->if_delegated.family == IFNET_FAMILY_ETHERNET || \ + (_ifp)->if_family == IFNET_FAMILY_FIREWIRE || \ + (_ifp)->if_delegated.family == IFNET_FAMILY_FIREWIRE) + +/* + * Indicate whether or not the immediate interface, or the interface delegated + * by it, is marked as expensive. The delegated interface is set/cleared + * along with the delegated ifp; we cache the flag for performance to avoid + * dereferencing delegated ifp each time. + * + * Note that this is meant to be used only for policy purposes. + */ +#define IFNET_IS_EXPENSIVE(_ifp) \ + ((_ifp)->if_eflags & IFEF_EXPENSIVE || \ + (_ifp)->if_delegated.expensive) + +/* + * We don't support AWDL interface delegation. + */ +#define IFNET_IS_AWDL_RESTRICTED(_ifp) \ + (((_ifp)->if_eflags & (IFEF_AWDL|IFEF_AWDL_RESTRICTED)) == \ + (IFEF_AWDL|IFEF_AWDL_RESTRICTED)) + + extern struct ifnethead ifnet_head; extern struct ifnet **ifindex2ifnet; extern u_int32_t if_sndq_maxlen; diff --git a/bsd/net/if_vlan.c b/bsd/net/if_vlan.c index 1f98f6ac0..1a45bf4d8 100644 --- a/bsd/net/if_vlan.c +++ b/bsd/net/if_vlan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2013 Apple Inc. All rights reserved. + * Copyright (c) 2003-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -196,13 +196,15 @@ typedef struct vlan_parent { vlan_parent_entry vlp_parent_list;/* list of parents */ struct ifnet * vlp_ifp; /* interface */ struct ifvlan_list vlp_vlan_list; /* list of VLAN's */ -#define VLPF_SUPPORTS_VLAN_MTU 0x1 -#define VLPF_CHANGE_IN_PROGRESS 0x2 -#define VLPF_DETACHING 0x4 +#define VLPF_SUPPORTS_VLAN_MTU 0x00000001 +#define VLPF_CHANGE_IN_PROGRESS 0x00000002 +#define VLPF_DETACHING 0x00000004 +#define VLPF_LINK_EVENT_REQUIRED 0x00000008 u_int32_t vlp_flags; + u_int32_t vlp_event_code; struct ifdevmtu vlp_devmtu; - SInt32 vlp_retain_count; - UInt32 vlp_signature; /* VLP_SIGNATURE */ + int32_t vlp_retain_count; + u_int32_t vlp_signature; /* VLP_SIGNATURE */ } vlan_parent, * vlan_parent_ref; #define IFV_SIGNATURE 0xbeefbeef @@ -224,8 +226,8 @@ struct ifvlan { u_int32_t ifv_flags; bpf_packet_func ifv_bpf_input; bpf_packet_func ifv_bpf_output; - SInt32 ifv_retain_count; - UInt32 ifv_signature; /* IFV_SIGNATURE */ + int32_t ifv_retain_count; + u_int32_t ifv_signature; /* IFV_SIGNATURE */ }; typedef struct ifvlan * ifvlan_ref; @@ -263,13 +265,6 @@ vlan_parent_flags_set_supports_vlan_mtu(vlan_parent_ref vlp) return; } -static __inline__ void -vlan_parent_flags_clear_supports_vlan_mtu(vlan_parent_ref vlp) -{ - vlp->vlp_flags &= ~VLPF_SUPPORTS_VLAN_MTU; - return; -} - static __inline__ int vlan_parent_flags_change_in_progress(vlan_parent_ref vlp) { @@ -303,6 +298,26 @@ vlan_parent_flags_set_detaching(struct vlan_parent * vlp) return; } +static __inline__ int +vlan_parent_flags_link_event_required(vlan_parent_ref vlp) +{ + return ((vlp->vlp_flags & VLPF_LINK_EVENT_REQUIRED) != 0); +} + +static __inline__ void +vlan_parent_flags_set_link_event_required(vlan_parent_ref vlp) +{ + vlp->vlp_flags |= VLPF_LINK_EVENT_REQUIRED; + return; +} + +static __inline__ void +vlan_parent_flags_clear_link_event_required(vlan_parent_ref vlp) +{ + vlp->vlp_flags &= ~VLPF_LINK_EVENT_REQUIRED; + return; +} + /** ** ifvlan_flags in-lines routines @@ -340,13 +355,6 @@ ifvlan_flags_set_ready(ifvlan_ref ifv) return; } -static __inline__ void -ifvlan_flags_clear_ready(ifvlan_ref ifv) -{ - ifv->ifv_flags &= ~IFVF_READY; - return; -} - static __inline__ int ifvlan_flags_detaching(ifvlan_ref ifv) { @@ -413,7 +421,7 @@ ifvlan_retain(ifvlan_ref ifv) static void ifvlan_release(ifvlan_ref ifv) { - UInt32 old_retain_count; + u_int32_t old_retain_count; if (ifv->ifv_signature != IFV_SIGNATURE) { panic("ifvlan_release: bad signature\n"); @@ -609,7 +617,7 @@ vlan_parent_retain(vlan_parent_ref vlp) static void vlan_parent_release(vlan_parent_ref vlp) { - UInt32 old_retain_count; + u_int32_t old_retain_count; if (vlp->vlp_signature != VLP_SIGNATURE) { panic("vlan_parent_release: signature is bad\n"); @@ -684,12 +692,34 @@ vlan_parent_wait(vlan_parent_ref vlp, const char * msg) static void vlan_parent_signal(vlan_parent_ref vlp, const char * msg) { + struct ifnet * vlp_ifp = vlp->vlp_ifp; + + if (vlan_parent_flags_link_event_required(vlp)) { + vlan_parent_flags_clear_link_event_required(vlp); + if (!vlan_parent_flags_detaching(vlp)) { + u_int32_t event_code = vlp->vlp_event_code; + ifvlan_ref ifv; + + vlan_unlock(); + + /* we can safely walk the list unlocked */ + LIST_FOREACH(ifv, &vlp->vlp_vlan_list, ifv_vlan_list) { + struct ifnet * ifp = ifv->ifv_ifp; + + interface_link_event(ifp, event_code); + } + if (g_vlan->verbose) { + printf("%s%d: propagated link event to vlans\n", + ifnet_name(vlp_ifp), ifnet_unit(vlp_ifp)); + } + vlan_lock(); + } + } vlan_parent_flags_clear_change_in_progress(vlp); wakeup((caddr_t)vlp); if (g_vlan->verbose) { - struct ifnet * ifp = vlp->vlp_ifp; - - printf("%s%d: %s wakeup\n", ifnet_name(ifp), ifnet_unit(ifp), msg); + printf("%s%d: %s wakeup\n", + ifnet_name(vlp_ifp), ifnet_unit(vlp_ifp), msg); } return; } @@ -2028,7 +2058,7 @@ interface_link_event(struct ifnet * ifp, u_int32_t event_code) event.header.event_code = event_code; event.header.event_data[0] = ifnet_family(ifp); event.unit = (u_int32_t) ifnet_unit(ifp); - strncpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); + strlcpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); ifnet_event(ifp, &event.header); return; } @@ -2036,7 +2066,6 @@ interface_link_event(struct ifnet * ifp, u_int32_t event_code) static void vlan_parent_link_event(struct ifnet * p, u_int32_t event_code) { - ifvlan_ref ifv; vlan_parent_ref vlp; vlan_lock(); @@ -2051,25 +2080,15 @@ vlan_parent_link_event(struct ifnet * p, u_int32_t event_code) vlan_unlock(); return; } - + vlan_parent_flags_set_link_event_required(vlp); + vlp->vlp_event_code = event_code; + if (vlan_parent_flags_change_in_progress(vlp)) { + /* don't block waiting to generate an event */ + vlan_unlock(); + return; + } vlan_parent_retain(vlp); vlan_parent_wait(vlp, "vlan_parent_link_event"); - if (vlan_parent_flags_detaching(vlp)) { - goto signal_done; - } - - vlan_unlock(); - - /* vlan_parent_wait() gives us exclusive access to the list */ - LIST_FOREACH(ifv, &vlp->vlp_vlan_list, ifv_vlan_list) { - struct ifnet * ifp = ifv->ifv_ifp; - - interface_link_event(ifp, event_code); - } - - vlan_lock(); - - signal_done: vlan_parent_signal(vlp, "vlan_parent_link_event"); vlan_unlock(); vlan_parent_release(vlp); diff --git a/bsd/net/kpi_interface.c b/bsd/net/kpi_interface.c index 7b64e747b..22b18df05 100644 --- a/bsd/net/kpi_interface.c +++ b/bsd/net/kpi_interface.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2013 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,6 +62,7 @@ #include #include #include +#include #ifdef INET #include #endif @@ -204,7 +205,7 @@ ifnet_allocate_extended(const struct ifnet_init_eparams *einit0, * to point to storage of at least IFNAMSIZ bytes. It is safe * to write to this. */ - strncpy(__DECONST(char *, ifp->if_name), einit.name, IFNAMSIZ); + strlcpy(__DECONST(char *, ifp->if_name), einit.name, IFNAMSIZ); ifp->if_type = einit.type; ifp->if_family = einit.family; ifp->if_subfamily = einit.subfamily; @@ -345,6 +346,8 @@ ifnet_allocate_extended(const struct ifnet_init_eparams *einit0, bzero(&ifp->if_broadcast, sizeof (ifp->if_broadcast)); } + IFCQ_TARGET_QDELAY(&ifp->if_snd) = + einit.output_target_qdelay; IFCQ_MAXLEN(&ifp->if_snd) = einit.sndq_maxlen; if (error == 0) { @@ -481,6 +484,9 @@ ifnet_flags(ifnet_t interface) * If IFEF_AWDL has been set on the interface and the caller attempts * to clear one or more of the associated flags in IFEF_AWDL_MASK, * return failure. + * + * If IFEF_AWDL_RESTRICTED is set by the caller, make sure IFEF_AWDL is set + * on the interface. * * All other flags not associated with AWDL are not affected. * @@ -498,7 +504,7 @@ ifnet_awdl_check_eflags(ifnet_t ifp, u_int32_t *new_eflags, u_int32_t *mask) if (ifp->if_eflags & IFEF_AWDL) { if (eflags & IFEF_AWDL) { if ((eflags & IFEF_AWDL_MASK) != IFEF_AWDL_MASK) - return (1); + return (EINVAL); } else { *new_eflags &= ~IFEF_AWDL_MASK; *mask |= IFEF_AWDL_MASK; @@ -506,7 +512,9 @@ ifnet_awdl_check_eflags(ifnet_t ifp, u_int32_t *new_eflags, u_int32_t *mask) } else if (eflags & IFEF_AWDL) { *new_eflags |= IFEF_AWDL_MASK; *mask |= IFEF_AWDL_MASK; - } + } else if (eflags & IFEF_AWDL_RESTRICTED && + !(ifp->if_eflags & IFEF_AWDL)) + return (EINVAL); return (0); } @@ -514,9 +522,14 @@ ifnet_awdl_check_eflags(ifnet_t ifp, u_int32_t *new_eflags, u_int32_t *mask) errno_t ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, u_int32_t mask) { + uint32_t oeflags; + struct kev_msg ev_msg; + struct net_event_data ev_data; + if (interface == NULL) return (EINVAL); + bzero(&ev_msg, sizeof(ev_msg)); ifnet_lock_exclusive(interface); /* * Sanity checks for IFEF_AWDL and its related flags. @@ -525,9 +538,39 @@ ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, u_int32_t mask) ifnet_lock_done(interface); return (EINVAL); } + oeflags = interface->if_eflags; interface->if_eflags = (new_flags & mask) | (interface->if_eflags & ~mask); ifnet_lock_done(interface); + if (interface->if_eflags & IFEF_AWDL_RESTRICTED && + !(oeflags & IFEF_AWDL_RESTRICTED)) { + ev_msg.event_code = KEV_DL_AWDL_RESTRICTED; + /* + * The interface is now restricted to applications that have + * the entitlement. + * The check for the entitlement will be done in the data + * path, so we don't have to do anything here. + */ + } else if (oeflags & IFEF_AWDL_RESTRICTED && + !(interface->if_eflags & IFEF_AWDL_RESTRICTED)) + ev_msg.event_code = KEV_DL_AWDL_UNRESTRICTED; + /* + * Notify configd so that it has a chance to perform better + * reachability detection. + */ + if (ev_msg.event_code) { + bzero(&ev_data, sizeof(ev_data)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_DL_SUBCLASS; + strlcpy(ev_data.if_name, interface->if_name, IFNAMSIZ); + ev_data.if_family = interface->if_family; + ev_data.if_unit = interface->if_unit; + ev_msg.dv[0].data_length = sizeof(struct net_event_data); + ev_msg.dv[0].data_ptr = &ev_data; + ev_msg.dv[1].data_length = 0; + kev_post_msg(&ev_msg); + } return (0); } @@ -2443,9 +2486,15 @@ fail: errno_t ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol, - u_int32_t wildcardok, u_int8_t *bitfield) + u_int32_t flags, u_int8_t *bitfield) { u_int32_t ifindex; + u_int32_t inp_flags = 0; + + inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) ? + INPCB_GET_PORTS_USED_WILDCARDOK : 0); + inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) ? + INPCB_GET_PORTS_USED_NOWAKEUPOK : 0); if (bitfield == NULL) return (EINVAL); @@ -2464,8 +2513,11 @@ ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol, ifindex = (ifp != NULL) ? ifp->if_index : 0; - udp_get_ports_used(ifindex, protocol, wildcardok, bitfield); - tcp_get_ports_used(ifindex, protocol, wildcardok, bitfield); + if (!(flags & IFNET_GET_LOCAL_PORTS_TCPONLY)) + udp_get_ports_used(ifindex, protocol, inp_flags, bitfield); + + if (!(flags & IFNET_GET_LOCAL_PORTS_UDPONLY)) + tcp_get_ports_used(ifindex, protocol, inp_flags, bitfield); return (0); } @@ -2473,7 +2525,9 @@ ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol, errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield) { - return (ifnet_get_local_ports_extended(ifp, PF_UNSPEC, 1, bitfield)); + u_int32_t flags = IFNET_GET_LOCAL_PORTS_WILDCARDOK; + return (ifnet_get_local_ports_extended(ifp, PF_UNSPEC, flags, + bitfield)); } errno_t @@ -2558,6 +2612,8 @@ ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp) ifp->if_delegated.type = delegated_ifp->if_type; ifp->if_delegated.family = delegated_ifp->if_family; ifp->if_delegated.subfamily = delegated_ifp->if_subfamily; + ifp->if_delegated.expensive = + delegated_ifp->if_eflags & IFEF_EXPENSIVE ? 1 : 0; printf("%s: is now delegating %s (type 0x%x, family %u, " "sub-family %u)\n", ifp->if_xname, delegated_ifp->if_xname, delegated_ifp->if_type, delegated_ifp->if_family, @@ -2602,3 +2658,29 @@ ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp) return (0); } + +extern u_int32_t key_fill_offload_frames_for_savs (ifnet_t ifp, + struct ipsec_offload_frame *frames_array, u_int32_t frames_array_count, + size_t frame_data_offset); + +extern errno_t +ifnet_get_ipsec_offload_frames(ifnet_t ifp, + struct ipsec_offload_frame *frames_array, + u_int32_t frames_array_count, + size_t frame_data_offset, + u_int32_t *used_frames_count) +{ + if (frames_array == NULL || used_frames_count == NULL) { + return (EINVAL); + } + + *used_frames_count = 0; + + if (frames_array_count == 0) { + return (0); + } + + *used_frames_count = key_fill_offload_frames_for_savs(ifp, + frames_array, frames_array_count, frame_data_offset); + return (0); +} diff --git a/bsd/net/kpi_interface.h b/bsd/net/kpi_interface.h index 10551a82c..1f4a41534 100644 --- a/bsd/net/kpi_interface.h +++ b/bsd/net/kpi_interface.h @@ -50,12 +50,8 @@ #ifdef XNU_KERNEL_PRIVATE #define KPI_INTERFACE_EMBEDDED 0 #else -#if TARGET_OS_EMBEDDED -#define KPI_INTERFACE_EMBEDDED 1 -#else #define KPI_INTERFACE_EMBEDDED 0 #endif -#endif struct timeval; struct sockaddr; @@ -67,6 +63,7 @@ struct ifnet_demux_desc; /*! @enum Interface Families @abstract Constants defining interface families. + @discussion @constant IFNET_FAMILY_ANY Match interface of any family type. @constant IFNET_FAMILY_LOOPBACK A software loopback interface. @constant IFNET_FAMILY_ETHERNET An Ethernet interface. @@ -124,6 +121,7 @@ enum { IFNET_SUBFAMILY_BLUETOOTH = 2, IFNET_SUBFAMILY_WIFI = 3, IFNET_SUBFAMILY_THUNDERBOLT = 4, + IFNET_SUBFAMILY_RESERVED = 5, }; /* @@ -138,6 +136,7 @@ typedef u_int32_t ifnet_subfamily_t; /*! @enum BPF tap mode @abstract Constants defining interface families. + @discussion @constant BPF_MODE_DISABLED Disable bpf. @constant BPF_MODE_INPUT Enable input only. @constant BPF_MODE_OUTPUT Enable output only. @@ -166,6 +165,7 @@ typedef u_int32_t protocol_family_t; /*! @enum Interface Abilities @abstract Constants defining interface offload support. + @discussion @constant IFNET_CSUM_IP Hardware will calculate IPv4 checksums. @constant IFNET_CSUM_TCP Hardware will calculate TCP checksums. @constant IFNET_CSUM_UDP Hardware will calculate UDP checksums. @@ -744,6 +744,7 @@ typedef void (*ifnet_input_poll_func)(ifnet_t interface, u_int32_t flags, /* @enum Interface control commands @abstract Constants defining control commands. + @discussion @constant IFNET_CTL_SET_INPUT_MODEL Set input model. @constant IFNET_CTL_GET_INPUT_MODEL Get input model. @constant IFNET_CTL_SET_LOG Set logging level. @@ -766,6 +767,7 @@ typedef u_int32_t ifnet_ctl_cmd_t; /* @enum Interface model sub-commands @abstract Constants defining model sub-commands. + @discussion @constant IFNET_MODEL_INPUT_POLL_OFF Polling is inactive. When set, the network stack will no longer invoke the input_poll callback until the next time polling is turned on; the driver should @@ -810,6 +812,7 @@ struct ifnet_model_params { @abstract Constants defining logging levels/priorities. A level includes all other levels below it. It is expected that verbosity increases along with the level. + @discussion @constant IFNET_LOG_DEFAULT Revert to default logging level. @constant IFNET_LOG_ALERT Log actions that must be taken immediately. @constant IFNET_LOG_CRITICAL Log critical conditions. @@ -845,6 +848,7 @@ typedef int32_t ifnet_log_level_t; @enum Interface logging facilities @abstract Constants defining the logging facilities which are to be configured with the specified logging level. + @discussion @constant IFNET_LOGF_DLIL The DLIL layer. @constant IFNET_LOGF_FAMILY The networking family layer. @constant IFNET_LOGF_DRIVER The device driver layer. @@ -876,6 +880,7 @@ typedef u_int32_t ifnet_log_flags_t; /* @enum Interface logging category @abstract Constants defininig categories for issues experienced. + @discussion @constant IFNET_LOGCAT_CONNECTIVITY Connectivity related issues. @constant IFNET_LOGCAT_QUALITY Quality/fidelity related issues. @constant IFNET_LOGCAT_PERFORMANCE Performance related issues. @@ -968,12 +973,15 @@ typedef errno_t (*ifnet_ctl_func)(ifnet_t interface, ifnet_ctl_cmd_t cmd, through this function. @field pre_enqueue The pre_enqueue function for the interface, valid only if IFNET_INIT_LEGACY is not set, and optional if it is set. - @field start The start function for the interface, valid only if - IFNET_INIT_LEGACY is not set, and required if it is set. + @field start The start function for the interface, valid and required + only if IFNET_INIT_LEGACY is not set. @field output_ctl The output control function for the interface, valid only if IFNET_INIT_LEGACY is not set. @field output_sched_model The IFNET_SCHED_MODEL value for the output queue, as defined in net/if.h + @field output_target_qdelay The target queue delay is used for + dynamically sizing the output queue, valid only if + IFNET_INIT_LEGACY is not set. @field output_bw The effective output bandwidth (in bits per second.) @field output_bw_max The maximum theoretical output bandwidth (in bits per second.) @@ -1037,7 +1045,7 @@ struct ifnet_init_eparams { ifnet_start_func start; /* required only for new model */ ifnet_ctl_func output_ctl; /* optional, only for new model */ u_int32_t output_sched_model; /* optional, only for new model */ - u_int32_t reserved; /* for future use */ + u_int32_t output_target_qdelay; /* optional, only for new model */ u_int64_t output_bw; /* optional */ u_int64_t output_bw_max; /* optional */ u_int64_t output_lt; /* optional */ @@ -2045,6 +2053,7 @@ extern errno_t ifnet_get_tso_mtu(ifnet_t interface, sa_family_t family, /*! @enum Interface wake properties @abstract Constants defining Interface wake properties. + @discussion @constant IFNET_WAKE_ON_MAGIC_PACKET Wake on Magic Packet. */ enum { @@ -3109,6 +3118,10 @@ extern errno_t ifnet_clone_detach(if_clone_t ifcloner); */ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield); +#define IFNET_GET_LOCAL_PORTS_WILDCARDOK 0x1 +#define IFNET_GET_LOCAL_PORTS_NOWAKEUPOK 0x2 +#define IFNET_GET_LOCAL_PORTS_TCPONLY 0x4 +#define IFNET_GET_LOCAL_PORTS_UDPONLY 0x8 /* @function ifnet_get_local_ports_extended @discussion Returns a bitfield indicating which local ports of the @@ -3123,14 +3136,25 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield); all interfaces. @param protocol The protocol family of the sockets. PF_UNSPEC (0) means all protocols, otherwise PF_INET or PF_INET6. - @param wildcardok A boolean value (0 or 1) indicating whether or not - the list of local ports should include those that are used - by sockets that aren't bound to any local address. + @param flags A bitwise of the following flags: + IFNET_GET_LOCAL_PORTS_EXTENDED_WILDCARDOK: When bit is set, + the list of local ports should include those that are + used by sockets that aren't bound to any local address. + IFNET_GET_LOCAL_PORTS_EXTENDED_NOWAKEUPOK: When bit is + set, the list of local ports should return all sockets + including the ones that do not need a wakeup from sleep. + Sockets that do not want to wake from sleep are marked + with a socket option. + IFNET_GET_LOCAL_PORTS_TCPONLY: When bit is set, the list + of local ports should return the ports used by TCP sockets. + IFNET_GET_LOCAL_PORTS_UDPONLY: When bit is set, the list + of local ports should return the ports used by UDP sockets. + only. @param bitfield A pointer to 8192 bytes. @result Returns 0 on success. */ extern errno_t ifnet_get_local_ports_extended(ifnet_t ifp, - protocol_family_t protocol, u_int32_t wildcardok, u_int8_t *bitfield); + protocol_family_t protocol, u_int32_t flags, u_int8_t *bitfield); /******************************************************************************/ /* for reporting issues */ @@ -3164,6 +3188,7 @@ extern errno_t ifnet_report_issues(ifnet_t ifp, u_int8_t modid[IFNET_MODIDLEN], @enum Per packet phy level transmit completion status values @abstract Constants defining possible completion status values A driver may support all or some of these values + @discussion @constant IFNET_TX_COMPL_SUCCESS link transmission succeeded @constant IFNET_TX_COMPL_FAIL link transmission failed @constant IFNET_TX_COMPL_ABORTED link transmission aborted, may retry @@ -3277,6 +3302,38 @@ ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp); */ extern errno_t ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp); + +/******************************************************************************/ +/* for interface IPSec keepalive offload */ +/******************************************************************************/ + +#define IPSEC_OFFLOAD_FRAME_DATA_SIZE 128 +struct ipsec_offload_frame { + u_int8_t data[IPSEC_OFFLOAD_FRAME_DATA_SIZE]; /* Frame bytes */ + u_int16_t length; /* Number of valid bytes in data, including offset */ + u_int16_t interval; /* Interval in seconds */ +}; + +/* + @function ifnet_get_ipsec_offload_frames + @discussion Fills out frames_array with IP packets to send at periodic + intervals on behalf of IPSec. + @param ifp The interface to send the frames out on. This is used to + select which IPSec SAs should generate the packets. + @param frames_array An array of ipsec_offload_frame structs. This is + allocated by the caller, and has frames_array_count frames of valid + memory. + @param frames_array_count The number of valid frames allocated in + frames_array. + @param frame_data_offset The offset in bytes into each frame data at + which IPSec should write the IP header and payload. + @param used_frames_count The returned number of frames that were filled + out with valid information. + @result Returns 0 on success, error number otherwise. + */ +extern errno_t ifnet_get_ipsec_offload_frames(ifnet_t ifp, + struct ipsec_offload_frame *frames_array, u_int32_t frames_array_count, + size_t frame_data_offset, u_int32_t *used_frames_count); #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/net/ndrv.c b/bsd/net/ndrv.c index 468cee375..34d7504b9 100644 --- a/bsd/net/ndrv.c +++ b/bsd/net/ndrv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997-2012 Apple Inc. All rights reserved. + * Copyright (c) 1997-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -614,7 +614,7 @@ sprint_d(u_int n, char *buf, int buflen) *cp = "0123456789"[n % 10]; n /= 10; } while (n != 0 && buflen > 0); - strncpy(buf, cp, IFNAMSIZ-buflen); + strlcpy(buf, cp, IFNAMSIZ-buflen); return; } @@ -629,7 +629,7 @@ static int name_cmp(struct ifnet *ifp, char *q) r = buf; len = strlen(ifnet_name(ifp)); - strncpy(r, ifnet_name(ifp), IFNAMSIZ); + strlcpy(r, ifnet_name(ifp), IFNAMSIZ); r += len; sprint_d(ifnet_unit(ifp), r, IFNAMSIZ-(r-buf)); #if NDRV_DEBUG @@ -865,7 +865,6 @@ ndrv_handle_ifp_detach(u_int32_t family, short unit) so = np->nd_socket; /* Make sure sending returns an error */ - /* Is this safe? Will we drop the funnel? */ lck_mtx_assert(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED); socantsendmore(so); socantrcvmore(so); diff --git a/bsd/net/necp.c b/bsd/net/necp.c new file mode 100644 index 000000000..6c589c221 --- /dev/null +++ b/bsd/net/necp.c @@ -0,0 +1,5788 @@ +/* + * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * NECP - Network Extension Control Policy database + * ------------------------------------------------ + * The goal of this module is to allow clients connecting via a + * kernel control socket to create high-level policy sessions, which + * are ingested into low-level kernel policies that control and tag + * traffic at the application, socket, and IP layers. + * + * ------------------------------------------------ + * Sessions + * ------------------------------------------------ + * Each session owns a list of session policies, each of which can + * specify any combination of conditions and a single result. Each + * session also has a priority level (such as High, Default, or Low) + * which is requested by the client. Based on the requested level, + * a session order value is assigned to the session, which will be used + * to sort kernel policies generated by the session. The session client + * can specify the sub-order for each policy it creates which will be + * used to further sort the kernel policies. + * + * Kernel Control Socket --> 1 necp_session --> list of necp_session_policy structs + * + * ------------------------------------------------ + * Kernel Policies + * ------------------------------------------------ + * Whenever a session send the Apply command, its policies are ingested + * and generate kernel policies. There are two phases of kernel policy + * ingestion. + * + * 1. The session policy is parsed to create kernel policies at the socket + * and IP layers, when applicable. For example, a policy that requires + * all traffic from App1 to Pass will generate a socket kernel policy to + * match App1 and mark packets with ID1, and also an IP policy to match + * ID1 and let the packet pass. This is handled in necp_apply_policy. The + * resulting kernel policies are added to the global socket and IP layer + * policy lists. + * necp_session_policy --> necp_kernel_socket_policy and necp_kernel_ip_output_policy + * || || + * \/ \/ + * necp_kernel_socket_policies necp_kernel_ip_output_policies + * + * 2. Once the global lists of kernel policies have been filled out, each + * list is traversed to create optimized sub-lists ("Maps") which are used during + * data-path evaluation. IP policies are sent into necp_kernel_ip_output_policies_map, + * which hashes incoming packets based on marked socket-layer policies, and removes + * duplicate or overlapping polcies. Socket policies are sent into two maps, + * necp_kernel_socket_policies_map and necp_kernel_socket_policies_app_layer_map. + * The app layer map is used for policy checks coming in from user space, and is one + * list with duplicate and overlapping policies removed. The socket map hashes based + * on app UUID, and removes duplicate and overlapping policies. + * necp_kernel_socket_policy --> necp_kernel_socket_policies_app_layer_map + * |-> necp_kernel_socket_policies_map + * + * necp_kernel_ip_output_policies --> necp_kernel_ip_output_policies_map + * + * ------------------------------------------------ + * Drop All Level + * ------------------------------------------------ + * The Drop All Level is a sysctl that controls the level at which policies are allowed + * to override a global drop rule. If the value is 0, no drop rule is applied. If the value + * is 1, all traffic is dropped. If the value is greater than 1, all kernel policies created + * by a session with a priority level better than (numerically less than) the + * Drop All Level will allow matching traffic to not be dropped. The Drop All Level is + * dynamically interpreted into necp_drop_all_order, which specifies the equivalent assigned + * session orders to be dropped. + */ + +u_int32_t necp_drop_all_order = 0; +u_int32_t necp_drop_all_level = 0; + +u_int32_t necp_pass_loopback = 1; // 0=Off, 1=On +u_int32_t necp_pass_keepalives = 1; // 0=Off, 1=On + +u_int32_t necp_debug = 0; // 0=None, 1=Basic, 2=EveryMatch + +static int sysctl_handle_necp_level SYSCTL_HANDLER_ARGS; + +SYSCTL_NODE(_net, OID_AUTO, necp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "NECP"); +SYSCTL_INT(_net_necp, NECPCTL_PASS_LOOPBACK, pass_loopback, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_loopback, 0, ""); +SYSCTL_INT(_net_necp, NECPCTL_PASS_KEEPALIVES, pass_keepalives, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_keepalives, 0, ""); +SYSCTL_INT(_net_necp, NECPCTL_DEBUG, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_debug, 0, ""); +SYSCTL_PROC(_net_necp, NECPCTL_DROP_ALL_LEVEL, drop_all_level, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, &necp_drop_all_level, 0, &sysctl_handle_necp_level, "IU", ""); + +#define NECPLOG(level, format, ...) do { \ + log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s: " format "\n", __FUNCTION__, __VA_ARGS__); \ +} while (0) + +#define NECPLOG0(level, msg) do { \ + log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s: %s\n", __FUNCTION__, msg); \ +} while (0) + +#define LIST_INSERT_SORTED_ASCENDING(head, elm, field, sortfield, tmpelm) do { \ + if (LIST_EMPTY((head)) || (LIST_FIRST(head)->sortfield >= (elm)->sortfield)) { \ + LIST_INSERT_HEAD((head), elm, field); \ + } else { \ + LIST_FOREACH(tmpelm, head, field) { \ + if (LIST_NEXT(tmpelm, field) == NULL || LIST_NEXT(tmpelm, field)->sortfield >= (elm)->sortfield) { \ + LIST_INSERT_AFTER(tmpelm, elm, field); \ + break; \ + } \ + } \ + } \ +} while (0) + +#define LIST_INSERT_SORTED_TWICE_ASCENDING(head, elm, field, firstsortfield, secondsortfield, tmpelm) do { \ + if (LIST_EMPTY((head)) || (LIST_FIRST(head)->firstsortfield > (elm)->firstsortfield) || ((LIST_FIRST(head)->firstsortfield == (elm)->firstsortfield) && (LIST_FIRST(head)->secondsortfield >= (elm)->secondsortfield))) { \ + LIST_INSERT_HEAD((head), elm, field); \ + } else { \ + LIST_FOREACH(tmpelm, head, field) { \ + if (LIST_NEXT(tmpelm, field) == NULL || (LIST_NEXT(tmpelm, field)->firstsortfield > (elm)->firstsortfield) || ((LIST_NEXT(tmpelm, field)->firstsortfield == (elm)->firstsortfield) && (LIST_NEXT(tmpelm, field)->secondsortfield >= (elm)->secondsortfield))) { \ + LIST_INSERT_AFTER(tmpelm, elm, field); \ + break; \ + } \ + } \ + } \ +} while (0) + +#define LIST_INSERT_SORTED_THRICE_ASCENDING(head, elm, field, firstsortfield, secondsortfield, thirdsortfield, tmpelm) do { \ + if (LIST_EMPTY((head)) || (LIST_FIRST(head)->firstsortfield > (elm)->firstsortfield) || ((LIST_FIRST(head)->firstsortfield == (elm)->firstsortfield) && (LIST_FIRST(head)->secondsortfield >= (elm)->secondsortfield)) || ((LIST_FIRST(head)->firstsortfield == (elm)->firstsortfield) && (LIST_FIRST(head)->secondsortfield == (elm)->secondsortfield) && (LIST_FIRST(head)->thirdsortfield >= (elm)->thirdsortfield))) { \ + LIST_INSERT_HEAD((head), elm, field); \ + } else { \ + LIST_FOREACH(tmpelm, head, field) { \ + if (LIST_NEXT(tmpelm, field) == NULL || (LIST_NEXT(tmpelm, field)->firstsortfield > (elm)->firstsortfield) || ((LIST_NEXT(tmpelm, field)->firstsortfield == (elm)->firstsortfield) && (LIST_NEXT(tmpelm, field)->secondsortfield >= (elm)->secondsortfield)) || ((LIST_NEXT(tmpelm, field)->firstsortfield == (elm)->firstsortfield) && (LIST_NEXT(tmpelm, field)->secondsortfield == (elm)->secondsortfield) && (LIST_NEXT(tmpelm, field)->thirdsortfield >= (elm)->thirdsortfield))) { \ + LIST_INSERT_AFTER(tmpelm, elm, field); \ + break; \ + } \ + } \ + } \ +} while (0) + +#define NECP_KERNEL_CONDITION_ALL_INTERFACES 0x00001 +#define NECP_KERNEL_CONDITION_BOUND_INTERFACE 0x00002 +#define NECP_KERNEL_CONDITION_PROTOCOL 0x00004 +#define NECP_KERNEL_CONDITION_LOCAL_START 0x00008 +#define NECP_KERNEL_CONDITION_LOCAL_END 0x00010 +#define NECP_KERNEL_CONDITION_LOCAL_PREFIX 0x00020 +#define NECP_KERNEL_CONDITION_REMOTE_START 0x00040 +#define NECP_KERNEL_CONDITION_REMOTE_END 0x00080 +#define NECP_KERNEL_CONDITION_REMOTE_PREFIX 0x00100 +#define NECP_KERNEL_CONDITION_APP_ID 0x00200 +#define NECP_KERNEL_CONDITION_REAL_APP_ID 0x00400 +#define NECP_KERNEL_CONDITION_DOMAIN 0x00800 +#define NECP_KERNEL_CONDITION_ACCOUNT_ID 0x01000 +#define NECP_KERNEL_CONDITION_POLICY_ID 0x02000 +#define NECP_KERNEL_CONDITION_PID 0x04000 +#define NECP_KERNEL_CONDITION_UID 0x08000 +#define NECP_KERNEL_CONDITION_LAST_INTERFACE 0x10000 // Only set from packets looping between interfaces +#define NECP_KERNEL_CONDITION_TRAFFIC_CLASS 0x20000 +#define NECP_KERNEL_CONDITION_ENTITLEMENT 0x40000 + +struct necp_service_registration { + LIST_ENTRY(necp_service_registration) session_chain; + LIST_ENTRY(necp_service_registration) kernel_chain; + u_int32_t service_id; +}; + +struct necp_session { + u_int32_t control_unit; + u_int32_t session_priority; // Descriptive priority rating + u_int32_t session_order; + + bool proc_locked; // Messages must come from proc_uuid + uuid_t proc_uuid; + + bool dirty; + LIST_HEAD(_policies, necp_session_policy) policies; + + LIST_HEAD(_services, necp_service_registration) services; +}; + +struct necp_socket_info { + pid_t pid; + uid_t uid; + union necp_sockaddr_union local_addr; + union necp_sockaddr_union remote_addr; + u_int32_t bound_interface_index; + u_int32_t traffic_class; + u_int16_t protocol; + u_int32_t application_id; + u_int32_t real_application_id; + u_int32_t account_id; + char *domain; + errno_t cred_result; +}; + +static kern_ctl_ref necp_kctlref; +static u_int32_t necp_family; +static OSMallocTag necp_malloc_tag; +static lck_grp_attr_t *necp_kernel_policy_grp_attr = NULL; +static lck_attr_t *necp_kernel_policy_mtx_attr = NULL; +static lck_grp_t *necp_kernel_policy_mtx_grp = NULL; +decl_lck_rw_data(static, necp_kernel_policy_lock); + +static necp_policy_id necp_last_policy_id = 0; +static necp_kernel_policy_id necp_last_kernel_policy_id = 0; +static u_int32_t necp_last_uuid_id = 0; +static u_int32_t necp_last_string_id = 0; + +/* + * On modification, invalidate cached lookups by bumping the generation count. + * Other calls will need to take the slowpath of taking + * the subsystem lock. + */ +static volatile int32_t necp_kernel_socket_policies_gencount; +#define BUMP_KERNEL_SOCKET_POLICIES_GENERATION_COUNT() do { \ + if (OSIncrementAtomic(&necp_kernel_socket_policies_gencount) == (INT32_MAX - 1)) { \ + necp_kernel_socket_policies_gencount = 1; \ + } \ +} while (0) + +static u_int32_t necp_kernel_application_policies_condition_mask; +static size_t necp_kernel_application_policies_count; +static u_int32_t necp_kernel_socket_policies_condition_mask; +static size_t necp_kernel_socket_policies_count; +static size_t necp_kernel_socket_policies_non_app_count; +static LIST_HEAD(_necpkernelsocketconnectpolicies, necp_kernel_socket_policy) necp_kernel_socket_policies; +#define NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS 5 +#define NECP_SOCKET_MAP_APP_ID_TO_BUCKET(appid) (appid ? (appid%(NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS - 1) + 1) : 0) +static struct necp_kernel_socket_policy **necp_kernel_socket_policies_map[NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS]; +static struct necp_kernel_socket_policy **necp_kernel_socket_policies_app_layer_map; +/* + * A note on policy 'maps': these are used for boosting efficiency when matching policies. For each dimension of the map, + * such as an ID, the 0 bucket is reserved for sockets/packets that do not have this parameter, while the other + * buckets lead to an array of policy pointers that form the list applicable when the (parameter%(NUM_BUCKETS - 1) + 1) == bucket_index. + * + * For example, a packet with policy ID of 7, when there are 4 ID buckets, will map to bucket (7%3 + 1) = 2. + */ + +static u_int32_t necp_kernel_ip_output_policies_condition_mask; +static size_t necp_kernel_ip_output_policies_count; +static size_t necp_kernel_ip_output_policies_non_id_count; +static LIST_HEAD(_necpkernelipoutputpolicies, necp_kernel_ip_output_policy) necp_kernel_ip_output_policies; +#define NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS 5 +#define NECP_IP_OUTPUT_MAP_ID_TO_BUCKET(id) (id ? (id%(NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS - 1) + 1) : 0) +static struct necp_kernel_ip_output_policy **necp_kernel_ip_output_policies_map[NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS]; + +static struct necp_session *necp_create_session(u_int32_t control_unit); +static void necp_delete_session(struct necp_session *session); + +static void necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); +static void necp_handle_policy_get(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); +static void necp_handle_policy_delete(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); +static void necp_handle_policy_apply_all(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); +static void necp_handle_policy_list_all(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); +static void necp_handle_policy_delete_all(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); +static void necp_handle_set_session_priority(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); +static void necp_handle_lock_session_to_proc(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); +static void necp_handle_register_service(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); +static void necp_handle_unregister_service(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset); + +static struct necp_session_policy *necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, size_t conditions_array_size, u_int8_t *result, size_t result_size); +static struct necp_session_policy *necp_policy_find(struct necp_session *session, necp_policy_id policy_id); +static bool necp_policy_mark_for_deletion(struct necp_session *session, struct necp_session_policy *policy); +static bool necp_policy_mark_all_for_deletion(struct necp_session *session); +static bool necp_policy_delete(struct necp_session *session, struct necp_session_policy *policy); +static void necp_policy_apply_all(struct necp_session *session); + +static necp_kernel_policy_id necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter); +static bool necp_kernel_socket_policy_delete(necp_kernel_policy_id policy_id); +static bool necp_kernel_socket_policies_reprocess(void); +static bool necp_kernel_socket_policies_update_uuid_table(void); +static inline struct necp_kernel_socket_policy *necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service); + +static necp_kernel_policy_id necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter); +static bool necp_kernel_ip_output_policy_delete(necp_kernel_policy_id policy_id); +static bool necp_kernel_ip_output_policies_reprocess(void); + +static bool necp_is_addr_in_range(struct sockaddr *addr, struct sockaddr *range_start, struct sockaddr *range_end); +static bool necp_is_range_in_range(struct sockaddr *inner_range_start, struct sockaddr *inner_range_end, struct sockaddr *range_start, struct sockaddr *range_end); +static bool necp_is_addr_in_subnet(struct sockaddr *addr, struct sockaddr *subnet_addr, u_int8_t subnet_prefix); +static int necp_addr_compare(struct sockaddr *sa1, struct sockaddr *sa2, int check_port); +static bool necp_buffer_compare_with_bit_prefix(u_int8_t *p1, u_int8_t *p2, u_int32_t bits); +static bool necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet); + +struct necp_uuid_id_mapping { + LIST_ENTRY(necp_uuid_id_mapping) chain; + uuid_t uuid; + u_int32_t id; + u_int32_t refcount; + u_int32_t table_refcount; // Add to UUID policy table count +}; +static size_t necp_num_uuid_app_id_mappings; +static bool necp_uuid_app_id_mappings_dirty; +#define NECP_UUID_APP_ID_HASH_SIZE 64 +static u_long necp_uuid_app_id_hash_mask; +static u_long necp_uuid_app_id_hash_num_buckets; +static LIST_HEAD(necp_uuid_id_mapping_head, necp_uuid_id_mapping) *necp_uuid_app_id_hashtbl, necp_uuid_service_id_list; // App map is real hash table, service map is just mapping +#define APPUUIDHASH(uuid) (&necp_uuid_app_id_hashtbl[uuid[0] & necp_uuid_app_id_hash_mask]) // Assume first byte of UUIDs are evenly distributed +static u_int32_t necp_create_uuid_app_id_mapping(uuid_t uuid, bool *allocated_mapping, bool uuid_policy_table); +static bool necp_remove_uuid_app_id_mapping(uuid_t uuid, bool *removed_mapping, bool uuid_policy_table); + +static struct necp_uuid_id_mapping *necp_uuid_lookup_service_id_locked(uuid_t uuid); +static struct necp_uuid_id_mapping *necp_uuid_lookup_uuid_with_service_id_locked(u_int32_t local_id); +static u_int32_t necp_create_uuid_service_id_mapping(uuid_t uuid); +static bool necp_remove_uuid_service_id_mapping(uuid_t uuid); + +struct necp_string_id_mapping { + LIST_ENTRY(necp_string_id_mapping) chain; + char *string; + necp_app_id id; + u_int32_t refcount; +}; +static LIST_HEAD(necp_string_id_mapping_list, necp_string_id_mapping) necp_account_id_list; +static u_int32_t necp_create_string_to_id_mapping(struct necp_string_id_mapping_list *list, char *domain); +static bool necp_remove_string_to_id_mapping(struct necp_string_id_mapping_list *list, char *domain); + +static LIST_HEAD(_necp_kernel_service_list, necp_service_registration) necp_registered_service_list; + +static char *necp_create_trimmed_domain(char *string, size_t length); +static inline int necp_count_dots(char *string, size_t length); + +// Session order allocation +static u_int32_t +necp_allocate_new_session_order(u_int32_t priority, u_int32_t control_unit) +{ + u_int32_t new_order = 0; + + // For now, just allocate 1000 orders for each priority + if (priority == NECP_SESSION_PRIORITY_UNKNOWN || priority > NECP_SESSION_NUM_PRIORITIES) { + priority = NECP_SESSION_PRIORITY_DEFAULT; + } + + // Use the control unit to decide the offset into the priority list + new_order = (control_unit) + ((priority - 1) * 1000); + + return (new_order); +} + +static inline u_int32_t +necp_get_first_order_for_priority(u_int32_t priority) +{ + return (((priority - 1) * 1000) + 1); +} + +// Sysctl handler +static int +sysctl_handle_necp_level SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); + if (necp_drop_all_level == 0) { + necp_drop_all_order = 0; + } else { + necp_drop_all_order = necp_get_first_order_for_priority(necp_drop_all_level); + } + return (error); +} + + +// Kernel Control functions +static errno_t necp_register_control(void); +static errno_t necp_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, void **unitinfo); +static errno_t necp_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo); +static errno_t necp_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, mbuf_t m, int flags); +static void necp_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags); +static errno_t necp_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t *len); +static errno_t necp_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t len); + +static bool necp_send_ctl_data(struct necp_session *session, u_int8_t *buffer, size_t buffer_size); + +errno_t +necp_init(void) +{ + errno_t result = 0; + + result = necp_register_control(); + if (result != 0) { + goto done; + } + + necp_kernel_policy_grp_attr = lck_grp_attr_alloc_init(); + if (necp_kernel_policy_grp_attr == NULL) { + NECPLOG0(LOG_ERR, "lck_grp_attr_alloc_init failed"); + result = ENOMEM; + goto done; + } + + necp_kernel_policy_mtx_grp = lck_grp_alloc_init(NECP_CONTROL_NAME, necp_kernel_policy_grp_attr); + if (necp_kernel_policy_mtx_grp == NULL) { + NECPLOG0(LOG_ERR, "lck_grp_alloc_init failed"); + result = ENOMEM; + goto done; + } + + necp_kernel_policy_mtx_attr = lck_attr_alloc_init(); + if (necp_kernel_policy_mtx_attr == NULL) { + NECPLOG0(LOG_ERR, "lck_attr_alloc_init failed"); + result = ENOMEM; + goto done; + } + + lck_rw_init(&necp_kernel_policy_lock, necp_kernel_policy_mtx_grp, necp_kernel_policy_mtx_attr); + + LIST_INIT(&necp_kernel_socket_policies); + LIST_INIT(&necp_kernel_ip_output_policies); + + LIST_INIT(&necp_account_id_list); + + LIST_INIT(&necp_uuid_service_id_list); + + LIST_INIT(&necp_registered_service_list); + + necp_uuid_app_id_hashtbl = hashinit(NECP_UUID_APP_ID_HASH_SIZE, M_NECP, &necp_uuid_app_id_hash_mask); + necp_uuid_app_id_hash_num_buckets = necp_uuid_app_id_hash_mask + 1; + necp_num_uuid_app_id_mappings = 0; + necp_uuid_app_id_mappings_dirty = FALSE; + + necp_kernel_application_policies_condition_mask = 0; + necp_kernel_socket_policies_condition_mask = 0; + necp_kernel_ip_output_policies_condition_mask = 0; + + necp_kernel_application_policies_count = 0; + necp_kernel_socket_policies_count = 0; + necp_kernel_socket_policies_non_app_count = 0; + necp_kernel_ip_output_policies_count = 0; + necp_kernel_ip_output_policies_non_id_count = 0; + + necp_last_policy_id = 0; + necp_last_kernel_policy_id = 0; + + necp_kernel_socket_policies_gencount = 1; + + memset(&necp_kernel_socket_policies_map, 0, sizeof(necp_kernel_socket_policies_map)); + memset(&necp_kernel_ip_output_policies_map, 0, sizeof(necp_kernel_ip_output_policies_map)); + necp_kernel_socket_policies_app_layer_map = NULL; + +done: + if (result != 0) { + if (necp_kernel_policy_mtx_attr != NULL) { + lck_attr_free(necp_kernel_policy_mtx_attr); + necp_kernel_policy_mtx_attr = NULL; + } + if (necp_kernel_policy_mtx_grp != NULL) { + lck_grp_free(necp_kernel_policy_mtx_grp); + necp_kernel_policy_mtx_grp = NULL; + } + if (necp_kernel_policy_grp_attr != NULL) { + lck_grp_attr_free(necp_kernel_policy_grp_attr); + necp_kernel_policy_grp_attr = NULL; + } + if (necp_kctlref != NULL) { + ctl_deregister(necp_kctlref); + necp_kctlref = NULL; + } + } + return (result); +} + +static errno_t +necp_register_control(void) +{ + struct kern_ctl_reg kern_ctl; + errno_t result = 0; + + // Create a tag to allocate memory + necp_malloc_tag = OSMalloc_Tagalloc(NECP_CONTROL_NAME, OSMT_DEFAULT); + + // Find a unique value for our interface family + result = mbuf_tag_id_find(NECP_CONTROL_NAME, &necp_family); + if (result != 0) { + NECPLOG(LOG_ERR, "mbuf_tag_id_find_internal failed: %d", result); + return (result); + } + + bzero(&kern_ctl, sizeof(kern_ctl)); + strlcpy(kern_ctl.ctl_name, NECP_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); + kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; + kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; // Require root + kern_ctl.ctl_sendsize = 64 * 1024; + kern_ctl.ctl_recvsize = 64 * 1024; + kern_ctl.ctl_connect = necp_ctl_connect; + kern_ctl.ctl_disconnect = necp_ctl_disconnect; + kern_ctl.ctl_send = necp_ctl_send; + kern_ctl.ctl_rcvd = necp_ctl_rcvd; + kern_ctl.ctl_setopt = necp_ctl_setopt; + kern_ctl.ctl_getopt = necp_ctl_getopt; + + result = ctl_register(&kern_ctl, &necp_kctlref); + if (result != 0) { + NECPLOG(LOG_ERR, "ctl_register failed: %d", result); + return (result); + } + + return (0); +} + +static errno_t +necp_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, void **unitinfo) +{ +#pragma unused(kctlref) + *unitinfo = necp_create_session(sac->sc_unit); + if (*unitinfo == NULL) { + // Could not allocate session + return (ENOBUFS); + } + + return (0); +} + +static errno_t +necp_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo) +{ +#pragma unused(kctlref, unit) + struct necp_session *session = (struct necp_session *)unitinfo; + if (session != NULL) { + necp_policy_mark_all_for_deletion(session); + necp_policy_apply_all(session); + necp_delete_session((struct necp_session *)unitinfo); + } + + return (0); +} + + +// Message handling +static int +necp_packet_find_tlv(mbuf_t packet, int offset, u_int8_t type, int *err, int next) +{ + size_t cursor = offset; + int error = 0; + size_t curr_length; + u_int8_t curr_type; + + *err = 0; + + do { + if (!next) { + error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type); + if (error) { + *err = ENOENT; + return (-1); + } + } else { + next = 0; + curr_type = NECP_TLV_NIL; + } + + if (curr_type != type) { + cursor += sizeof(curr_type); + error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length); + if (error) { + *err = error; + return (-1); + } + cursor += (sizeof(curr_length) + curr_length); + } + } while (curr_type != type); + + return (cursor); +} + +static int +necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, size_t buff_len, void *buff, size_t *value_size) +{ + int error = 0; + size_t length; + + if (tlv_offset < 0) { + return (error); + } + + error = mbuf_copydata(packet, tlv_offset + sizeof(u_int8_t), sizeof(length), &length); + if (error) { + return (error); + } + + if (value_size != NULL) { + *value_size = length; + } + + if (buff != NULL && buff_len > 0) { + size_t to_copy = (length < buff_len) ? length : buff_len; + error = mbuf_copydata(packet, tlv_offset + sizeof(u_int8_t) + sizeof(length), to_copy, buff); + if (error) { + return (error); + } + } + + return (0); +} + +static int +necp_packet_get_tlv(mbuf_t packet, int offset, u_int8_t type, size_t buff_len, void *buff, size_t *value_size) +{ + int error = 0; + int tlv_offset; + + tlv_offset = necp_packet_find_tlv(packet, offset, type, &error, 0); + if (tlv_offset < 0) { + return (error); + } + + return (necp_packet_get_tlv_at_offset(packet, tlv_offset, buff_len, buff, value_size)); +} + +static u_int8_t * +necp_buffer_write_packet_header(u_int8_t *buffer, u_int8_t packet_type, u_int8_t flags, u_int32_t message_id) +{ + ((struct necp_packet_header *)(void *)buffer)->packet_type = packet_type; + ((struct necp_packet_header *)(void *)buffer)->flags = flags; + ((struct necp_packet_header *)(void *)buffer)->message_id = message_id; + return (buffer + sizeof(struct necp_packet_header)); +} + +static u_int8_t * +necp_buffer_write_tlv(u_int8_t *buffer, u_int8_t type, size_t length, const void *value) +{ + *(u_int8_t *)(buffer) = type; + *(size_t *)(void *)(buffer + sizeof(type)) = length; + if (length > 0) { + memcpy((u_int8_t *)(buffer + sizeof(type) + sizeof(length)), value, length); + } + + return ((u_int8_t *)(buffer + sizeof(type) + sizeof(length) + length)); +} + +static u_int8_t +necp_buffer_get_tlv_type(u_int8_t *buffer, int tlv_offset) +{ + u_int8_t *type = NULL; + + if (buffer == NULL) { + return (0); + } + + type = (u_int8_t *)((u_int8_t *)buffer + tlv_offset); + return (type ? *type : 0); +} + +static size_t +necp_buffer_get_tlv_length(u_int8_t *buffer, int tlv_offset) +{ + size_t *length = NULL; + + if (buffer == NULL) { + return (0); + } + + length = (size_t *)(void *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t)); + return (length ? *length : 0); +} + +static u_int8_t * +necp_buffer_get_tlv_value(u_int8_t *buffer, int tlv_offset, size_t *value_size) +{ + u_int8_t *value = NULL; + size_t length = necp_buffer_get_tlv_length(buffer, tlv_offset); + if (length == 0) { + return (value); + } + + if (value_size) { + *value_size = length; + } + + value = (u_int8_t *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t) + sizeof(size_t)); + return (value); +} + +static int +necp_buffer_find_tlv(u_int8_t *buffer, size_t buffer_length, int offset, u_int8_t type, int next) +{ + size_t cursor = offset; + size_t curr_length; + u_int8_t curr_type; + + do { + if (cursor >= buffer_length) { + return (-1); + } + if (!next) { + curr_type = necp_buffer_get_tlv_type(buffer, cursor); + } else { + next = 0; + curr_type = NECP_TLV_NIL; + } + if (curr_type != type) { + curr_length = necp_buffer_get_tlv_length(buffer, cursor); + cursor += (sizeof(curr_type) + sizeof(curr_length) + curr_length); + } + } while (curr_type != type); + + return (cursor); +} + +static bool +necp_send_ctl_data(struct necp_session *session, u_int8_t *buffer, size_t buffer_size) +{ + int error; + + if (necp_kctlref == NULL || session == NULL || buffer == NULL || buffer_size == 0) { + return (FALSE); + } + + error = ctl_enqueuedata(necp_kctlref, session->control_unit, buffer, buffer_size, CTL_DATA_EOR); + + return (error == 0); +} + +static bool +necp_send_success_response(struct necp_session *session, u_int8_t packet_type, u_int32_t message_id) +{ + bool success = TRUE; + u_int8_t *response = NULL; + u_int8_t *cursor = NULL; + size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(size_t); + MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK); + if (response == NULL) { + return (FALSE); + } + cursor = response; + cursor = necp_buffer_write_packet_header(cursor, packet_type, NECP_PACKET_FLAGS_RESPONSE, message_id); + cursor = necp_buffer_write_tlv(cursor, NECP_TLV_NIL, 0, NULL); + + if (!(success = necp_send_ctl_data(session, (u_int8_t *)response, response_size))) { + NECPLOG0(LOG_ERR, "Failed to send response"); + } + + FREE(response, M_NECP); + return (success); +} + +static bool +necp_send_error_response(struct necp_session *session, u_int8_t packet_type, u_int32_t message_id, u_int32_t error) +{ + bool success = TRUE; + u_int8_t *response = NULL; + u_int8_t *cursor = NULL; + size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(size_t) + sizeof(u_int32_t); + MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK); + if (response == NULL) { + return (FALSE); + } + cursor = response; + cursor = necp_buffer_write_packet_header(cursor, packet_type, NECP_PACKET_FLAGS_RESPONSE, message_id); + cursor = necp_buffer_write_tlv(cursor, NECP_TLV_ERROR, sizeof(error), &error); + + if (!(success = necp_send_ctl_data(session, (u_int8_t *)response, response_size))) { + NECPLOG0(LOG_ERR, "Failed to send response"); + } + + FREE(response, M_NECP); + return (success); +} + +static bool +necp_send_policy_id_response(struct necp_session *session, u_int8_t packet_type, u_int32_t message_id, necp_policy_id policy_id) +{ + bool success = TRUE; + u_int8_t *response = NULL; + u_int8_t *cursor = NULL; + size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(size_t) + sizeof(u_int32_t); + MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK); + if (response == NULL) { + return (FALSE); + } + cursor = response; + cursor = necp_buffer_write_packet_header(cursor, packet_type, NECP_PACKET_FLAGS_RESPONSE, message_id); + cursor = necp_buffer_write_tlv(cursor, NECP_TLV_POLICY_ID, sizeof(policy_id), &policy_id); + + if (!(success = necp_send_ctl_data(session, (u_int8_t *)response, response_size))) { + NECPLOG0(LOG_ERR, "Failed to send response"); + } + + FREE(response, M_NECP); + return (success); +} + +static errno_t +necp_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, mbuf_t packet, int flags) +{ +#pragma unused(kctlref, unit, flags) + struct necp_session *session = (struct necp_session *)unitinfo; + struct necp_packet_header header; + int error = 0; + + if (session == NULL) { + NECPLOG0(LOG_ERR, "Got a NULL session"); + error = EINVAL; + goto done; + } + + if (mbuf_pkthdr_len(packet) < sizeof(header)) { + NECPLOG(LOG_ERR, "Got a bad packet, length (%lu) < sizeof header (%lu)", mbuf_pkthdr_len(packet), sizeof(header)); + error = EINVAL; + goto done; + } + + error = mbuf_copydata(packet, 0, sizeof(header), &header); + if (error) { + NECPLOG(LOG_ERR, "mbuf_copydata failed for the header: %d", error); + error = ENOBUFS; + goto done; + } + + if (session->proc_locked) { + // Verify that the calling process is allowed to send messages + uuid_t proc_uuid; + proc_getexecutableuuid(current_proc(), proc_uuid, sizeof(proc_uuid)); + if (uuid_compare(proc_uuid, session->proc_uuid) != 0) { + necp_send_error_response(session, header.packet_type, header.message_id, NECP_ERROR_INVALID_PROCESS); + goto done; + } + } + + switch (header.packet_type) { + case NECP_PACKET_TYPE_POLICY_ADD: { + necp_handle_policy_add(session, header.message_id, packet, sizeof(header)); + break; + } + case NECP_PACKET_TYPE_POLICY_GET: { + necp_handle_policy_get(session, header.message_id, packet, sizeof(header)); + break; + } + case NECP_PACKET_TYPE_POLICY_DELETE: { + necp_handle_policy_delete(session, header.message_id, packet, sizeof(header)); + break; + } + case NECP_PACKET_TYPE_POLICY_APPLY_ALL: { + necp_handle_policy_apply_all(session, header.message_id, packet, sizeof(header)); + break; + } + case NECP_PACKET_TYPE_POLICY_LIST_ALL: { + necp_handle_policy_list_all(session, header.message_id, packet, sizeof(header)); + break; + } + case NECP_PACKET_TYPE_POLICY_DELETE_ALL: { + necp_handle_policy_delete_all(session, header.message_id, packet, sizeof(header)); + break; + } + case NECP_PACKET_TYPE_SET_SESSION_PRIORITY: { + necp_handle_set_session_priority(session, header.message_id, packet, sizeof(header)); + break; + } + case NECP_PACKET_TYPE_LOCK_SESSION_TO_PROC: { + necp_handle_lock_session_to_proc(session, header.message_id, packet, sizeof(header)); + break; + } + case NECP_PACKET_TYPE_REGISTER_SERVICE: { + necp_handle_register_service(session, header.message_id, packet, sizeof(header)); + break; + } + case NECP_PACKET_TYPE_UNREGISTER_SERVICE: { + necp_handle_unregister_service(session, header.message_id, packet, sizeof(header)); + break; + } + default: { + NECPLOG(LOG_ERR, "Received unknown message type %d", header.packet_type); + necp_send_error_response(session, header.packet_type, header.message_id, NECP_ERROR_UNKNOWN_PACKET_TYPE); + break; + } + } + +done: + mbuf_freem(packet); + return (error); +} + +static void +necp_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags) +{ +#pragma unused(kctlref, unit, unitinfo, flags) + return; +} + +static errno_t +necp_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t *len) +{ +#pragma unused(kctlref, unit, unitinfo, opt, data, len) + return (0); +} + +static errno_t +necp_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t len) +{ +#pragma unused(kctlref, unit, unitinfo, opt, data, len) + return (0); +} + +// Session Management +static struct necp_session * +necp_create_session(u_int32_t control_unit) +{ + struct necp_session *new_session = NULL; + + MALLOC(new_session, struct necp_session *, sizeof(*new_session), M_NECP, M_WAITOK); + if (new_session == NULL) { + goto done; + } + if (necp_debug) { + NECPLOG(LOG_DEBUG, "Create NECP session, control unit %d", control_unit); + } + memset(new_session, 0, sizeof(*new_session)); + new_session->session_priority = NECP_SESSION_PRIORITY_UNKNOWN; + new_session->session_order = necp_allocate_new_session_order(new_session->session_priority, control_unit); + new_session->control_unit = control_unit; + new_session->dirty = FALSE; + LIST_INIT(&new_session->policies); + +done: + return (new_session); +} + +static void +necp_delete_session(struct necp_session *session) +{ + if (session != NULL) { + struct necp_service_registration *service = NULL; + struct necp_service_registration *temp_service = NULL; + LIST_FOREACH_SAFE(service, &session->services, session_chain, temp_service) { + LIST_REMOVE(service, session_chain); + lck_rw_lock_exclusive(&necp_kernel_policy_lock); + LIST_REMOVE(service, kernel_chain); + lck_rw_done(&necp_kernel_policy_lock); + FREE(service, M_NECP); + } + if (necp_debug) { + NECPLOG0(LOG_DEBUG, "Deleted NECP session"); + } + FREE(session, M_NECP); + } +} + +// Session Policy Management +static inline u_int8_t +necp_policy_result_get_type_from_buffer(u_int8_t *buffer, size_t length) +{ + return ((buffer && length >= sizeof(u_int8_t)) ? buffer[0] : 0); +} + +static inline size_t +necp_policy_result_get_parameter_length_from_buffer(u_int8_t *buffer, size_t length) +{ + return ((buffer && length > sizeof(u_int8_t)) ? (length - sizeof(u_int8_t)) : 0); +} + +static inline u_int8_t * +necp_policy_result_get_parameter_pointer_from_buffer(u_int8_t *buffer, size_t length) +{ + return ((buffer && length > sizeof(u_int8_t)) ? (buffer + sizeof(u_int8_t)) : NULL); +} + +static bool +necp_policy_result_is_valid(u_int8_t *buffer, size_t length) +{ + bool validated = FALSE; + u_int8_t type = necp_policy_result_get_type_from_buffer(buffer, length); + size_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(buffer, length); + switch (type) { + case NECP_POLICY_RESULT_PASS: { + validated = TRUE; + break; + } + case NECP_POLICY_RESULT_SKIP: { + if (parameter_length >= sizeof(u_int32_t)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_RESULT_DROP: { + validated = TRUE; + break; + } + case NECP_POLICY_RESULT_SOCKET_DIVERT: { + if (parameter_length >= sizeof(u_int32_t)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_RESULT_SOCKET_SCOPED: { + if (parameter_length > 0) { + validated = TRUE; + } + break; + } + case NECP_POLICY_RESULT_IP_TUNNEL: { + if (parameter_length > sizeof(u_int32_t)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_RESULT_SOCKET_FILTER: { + if (parameter_length >= sizeof(u_int32_t)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_RESULT_TRIGGER: + case NECP_POLICY_RESULT_TRIGGER_IF_NEEDED: + case NECP_POLICY_RESULT_TRIGGER_SCOPED: + case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED: { + if (parameter_length >= sizeof(uuid_t)) { + validated = TRUE; + } + break; + } + default: { + validated = FALSE; + break; + } + } + + if (necp_debug) { + NECPLOG(LOG_DEBUG, "Policy result type %d, valid %d", type, validated); + } + + return (validated); +} + +static inline u_int8_t +necp_policy_condition_get_type_from_buffer(u_int8_t *buffer, size_t length) +{ + return ((buffer && length >= sizeof(u_int8_t)) ? buffer[0] : 0); +} + +static inline u_int8_t +necp_policy_condition_get_flags_from_buffer(u_int8_t *buffer, size_t length) +{ + return ((buffer && length >= (2 * sizeof(u_int8_t))) ? buffer[1] : 0); +} + +static inline size_t +necp_policy_condition_get_value_length_from_buffer(u_int8_t *buffer, size_t length) +{ + return ((buffer && length >= (2 * sizeof(u_int8_t))) ? (length - (2 * sizeof(u_int8_t))) : 0); +} + +static inline u_int8_t * +necp_policy_condition_get_value_pointer_from_buffer(u_int8_t *buffer, size_t length) +{ + return ((buffer && length > (2 * sizeof(u_int8_t))) ? (buffer + (2 * sizeof(u_int8_t))) : NULL); +} + +static inline bool +necp_policy_condition_is_default(u_int8_t *buffer, size_t length) +{ + return (necp_policy_condition_get_type_from_buffer(buffer, length) == NECP_POLICY_CONDITION_DEFAULT); +} + +static inline bool +necp_policy_condition_is_application(u_int8_t *buffer, size_t length) +{ + return (necp_policy_condition_get_type_from_buffer(buffer, length) == NECP_POLICY_CONDITION_APPLICATION); +} + +static inline bool +necp_policy_condition_requires_application(u_int8_t *buffer, size_t length) +{ + u_int8_t type = necp_policy_condition_get_type_from_buffer(buffer, length); + return (type == NECP_POLICY_CONDITION_REAL_APPLICATION || + type == NECP_POLICY_CONDITION_ENTITLEMENT); +} + +static bool +necp_policy_condition_is_valid(u_int8_t *buffer, size_t length, u_int8_t policy_result_type) +{ + bool validated = FALSE; + bool result_cannot_have_ip_layer = (policy_result_type == NECP_POLICY_RESULT_SOCKET_DIVERT || + policy_result_type == NECP_POLICY_RESULT_SOCKET_FILTER || + policy_result_type == NECP_POLICY_RESULT_TRIGGER || + policy_result_type == NECP_POLICY_RESULT_TRIGGER_IF_NEEDED || + policy_result_type == NECP_POLICY_RESULT_TRIGGER_SCOPED || + policy_result_type == NECP_POLICY_RESULT_NO_TRIGGER_SCOPED || + policy_result_type == NECP_POLICY_RESULT_SOCKET_SCOPED) ? TRUE : FALSE; + size_t condition_length = necp_policy_condition_get_value_length_from_buffer(buffer, length); + u_int8_t *condition_value = necp_policy_condition_get_value_pointer_from_buffer(buffer, length); + u_int8_t type = necp_policy_condition_get_type_from_buffer(buffer, length); + u_int8_t flags = necp_policy_condition_get_flags_from_buffer(buffer, length); + switch (type) { + case NECP_POLICY_CONDITION_APPLICATION: + case NECP_POLICY_CONDITION_REAL_APPLICATION: { + if (!(flags & NECP_POLICY_CONDITION_FLAGS_NEGATIVE) && + condition_length >= sizeof(uuid_t) && + condition_value != NULL && + !uuid_is_null(condition_value)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_DOMAIN: + case NECP_POLICY_CONDITION_ACCOUNT: + case NECP_POLICY_CONDITION_BOUND_INTERFACE: { + if (condition_length > 0) { + validated = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_TRAFFIC_CLASS: { + if (condition_length >= sizeof(struct necp_policy_condition_tc_range)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_DEFAULT: + case NECP_POLICY_CONDITION_ALL_INTERFACES: + case NECP_POLICY_CONDITION_ENTITLEMENT: { + if (!(flags & NECP_POLICY_CONDITION_FLAGS_NEGATIVE)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_IP_PROTOCOL: { + if (condition_length >= sizeof(u_int16_t)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_PID: { + if (condition_length >= sizeof(pid_t) && + condition_value != NULL && + *((pid_t *)(void *)condition_value) != 0) { + validated = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_UID: { + if (condition_length >= sizeof(uid_t)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_LOCAL_ADDR: + case NECP_POLICY_CONDITION_REMOTE_ADDR: { + if (!result_cannot_have_ip_layer && condition_length >= sizeof(struct necp_policy_condition_addr)) { + validated = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_LOCAL_ADDR_RANGE: + case NECP_POLICY_CONDITION_REMOTE_ADDR_RANGE: { + if (!result_cannot_have_ip_layer && condition_length >= sizeof(struct necp_policy_condition_addr_range)) { + validated = TRUE; + } + break; + } + default: { + validated = FALSE; + break; + } + } + + if (necp_debug) { + NECPLOG(LOG_DEBUG, "Policy condition type %d, valid %d", type, validated); + } + + return (validated); +} + +static void +necp_handle_set_session_priority(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ + int error; + struct necp_session_policy *policy = NULL; + struct necp_session_policy *temp_policy = NULL; + u_int32_t response_error = NECP_ERROR_INTERNAL; + u_int32_t requested_session_priority = NECP_SESSION_PRIORITY_UNKNOWN; + + // Read policy id + error = necp_packet_get_tlv(packet, offset, NECP_TLV_SESSION_PRIORITY, sizeof(requested_session_priority), &requested_session_priority, NULL); + if (error) { + NECPLOG(LOG_ERR, "Failed to get session priority: %d", error); + response_error = NECP_ERROR_INVALID_TLV; + goto fail; + } + + if (session == NULL) { + NECPLOG0(LOG_ERR, "Failed to find session"); + response_error = NECP_ERROR_INTERNAL; + goto fail; + } + + // Enforce special session priorities with entitlements + if (requested_session_priority == NECP_SESSION_PRIORITY_CONTROL || + requested_session_priority == NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL) { + errno_t cred_result = priv_check_cred(kauth_cred_get(), PRIV_NET_PRIVILEGED_NECP_POLICIES, 0); + if (cred_result != 0) { + NECPLOG(LOG_ERR, "Session does not hold necessary entitlement to claim priority level %d", requested_session_priority); + goto fail; + } + } + + if (session->session_priority != requested_session_priority) { + session->session_priority = requested_session_priority; + session->session_order = necp_allocate_new_session_order(session->session_priority, session->control_unit); + session->dirty = TRUE; + + // Mark all policies as needing updates + LIST_FOREACH_SAFE(policy, &session->policies, chain, temp_policy) { + policy->pending_update = TRUE; + } + } + + necp_send_success_response(session, NECP_PACKET_TYPE_SET_SESSION_PRIORITY, message_id); + return; + +fail: + necp_send_error_response(session, NECP_PACKET_TYPE_SET_SESSION_PRIORITY, message_id, response_error); +} + +static void +necp_handle_lock_session_to_proc(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ +#pragma unused(packet, offset) + proc_getexecutableuuid(current_proc(), session->proc_uuid, sizeof(session->proc_uuid)); + session->proc_locked = TRUE; + necp_send_success_response(session, NECP_PACKET_TYPE_LOCK_SESSION_TO_PROC, message_id); +} + +static void +necp_handle_register_service(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ + int error; + struct necp_service_registration *new_service = NULL; + u_int32_t response_error = NECP_ERROR_INTERNAL; + uuid_t service_uuid; + uuid_clear(service_uuid); + + if (session == NULL) { + NECPLOG0(LOG_ERR, "Failed to find session"); + response_error = NECP_ERROR_INTERNAL; + goto fail; + } + + // Enforce entitlements + errno_t cred_result = priv_check_cred(kauth_cred_get(), PRIV_NET_PRIVILEGED_NECP_POLICIES, 0); + if (cred_result != 0) { + NECPLOG0(LOG_ERR, "Session does not hold necessary entitlement to register service"); + goto fail; + } + + // Read service uuid + error = necp_packet_get_tlv(packet, offset, NECP_TLV_SERVICE_UUID, sizeof(uuid_t), service_uuid, NULL); + if (error) { + NECPLOG(LOG_ERR, "Failed to get service UUID: %d", error); + response_error = NECP_ERROR_INVALID_TLV; + goto fail; + } + + MALLOC(new_service, struct necp_service_registration *, sizeof(*new_service), M_NECP, M_WAITOK); + if (new_service == NULL) { + NECPLOG0(LOG_ERR, "Failed to allocate service registration"); + response_error = NECP_ERROR_INTERNAL; + goto fail; + } + + lck_rw_lock_exclusive(&necp_kernel_policy_lock); + memset(new_service, 0, sizeof(*new_service)); + new_service->service_id = necp_create_uuid_service_id_mapping(service_uuid); + LIST_INSERT_HEAD(&session->services, new_service, session_chain); + LIST_INSERT_HEAD(&necp_registered_service_list, new_service, kernel_chain); + lck_rw_done(&necp_kernel_policy_lock); + + necp_send_success_response(session, NECP_PACKET_TYPE_REGISTER_SERVICE, message_id); + return; +fail: + necp_send_error_response(session, NECP_PACKET_TYPE_REGISTER_SERVICE, message_id, response_error); +} + +static void +necp_handle_unregister_service(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ + int error; + struct necp_service_registration *service = NULL; + struct necp_service_registration *temp_service = NULL; + u_int32_t response_error = NECP_ERROR_INTERNAL; + struct necp_uuid_id_mapping *mapping = NULL; + uuid_t service_uuid; + uuid_clear(service_uuid); + + if (session == NULL) { + NECPLOG0(LOG_ERR, "Failed to find session"); + response_error = NECP_ERROR_INTERNAL; + goto fail; + } + + // Read service uuid + error = necp_packet_get_tlv(packet, offset, NECP_TLV_SERVICE_UUID, sizeof(uuid_t), service_uuid, NULL); + if (error) { + NECPLOG(LOG_ERR, "Failed to get service UUID: %d", error); + response_error = NECP_ERROR_INVALID_TLV; + goto fail; + } + + // Mark remove all matching services for this session + lck_rw_lock_exclusive(&necp_kernel_policy_lock); + mapping = necp_uuid_lookup_service_id_locked(service_uuid); + if (mapping != NULL) { + LIST_FOREACH_SAFE(service, &session->services, session_chain, temp_service) { + if (service->service_id == mapping->id) { + LIST_REMOVE(service, session_chain); + LIST_REMOVE(service, kernel_chain); + FREE(service, M_NECP); + } + } + necp_remove_uuid_service_id_mapping(service_uuid); + } + lck_rw_done(&necp_kernel_policy_lock); + + necp_send_success_response(session, NECP_PACKET_TYPE_REGISTER_SERVICE, message_id); + return; +fail: + necp_send_error_response(session, NECP_PACKET_TYPE_REGISTER_SERVICE, message_id, response_error); +} + +static void +necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ + bool has_default_condition = FALSE; + bool has_non_default_condition = FALSE; + bool has_application_condition = FALSE; + bool requires_application_condition = FALSE; + u_int8_t *conditions_array = NULL; + size_t conditions_array_size = 0; + int conditions_array_cursor; + + int cursor; + int error = 0; + u_int32_t response_error = NECP_ERROR_INTERNAL; + + necp_policy_order order = 0; + struct necp_session_policy *policy = NULL; + u_int8_t *policy_result = NULL; + size_t policy_result_size = 0; + + // Read policy order + error = necp_packet_get_tlv(packet, offset, NECP_TLV_POLICY_ORDER, sizeof(order), &order, NULL); + if (error) { + NECPLOG(LOG_ERR, "Failed to get policy order: %d", error); + response_error = NECP_ERROR_INVALID_TLV; + goto fail; + } + + // Read policy result + cursor = necp_packet_find_tlv(packet, offset, NECP_TLV_POLICY_RESULT, &error, 0); + error = necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &policy_result_size); + if (error || policy_result_size == 0) { + NECPLOG(LOG_ERR, "Failed to get policy result length: %d", error); + response_error = NECP_ERROR_INVALID_TLV; + goto fail; + } + MALLOC(policy_result, u_int8_t *, policy_result_size, M_NECP, M_WAITOK); + if (policy_result == NULL) { + NECPLOG(LOG_ERR, "Failed to allocate a policy result buffer (size %d)", policy_result_size); + response_error = NECP_ERROR_INTERNAL; + goto fail; + } + error = necp_packet_get_tlv_at_offset(packet, cursor, policy_result_size, policy_result, NULL); + if (error) { + NECPLOG(LOG_ERR, "Failed to get policy result: %d", error); + response_error = NECP_ERROR_POLICY_RESULT_INVALID; + goto fail; + } + if (!necp_policy_result_is_valid(policy_result, policy_result_size)) { + NECPLOG0(LOG_ERR, "Failed to validate policy result"); + response_error = NECP_ERROR_POLICY_RESULT_INVALID; + goto fail; + } + + // Read policy conditions + for (cursor = necp_packet_find_tlv(packet, offset, NECP_TLV_POLICY_CONDITION, &error, 0); + cursor >= 0; + cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_POLICY_CONDITION, &error, 1)) { + size_t condition_size = 0; + necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &condition_size); + + if (condition_size > 0) { + conditions_array_size += (sizeof(u_int8_t) + sizeof(size_t) + condition_size); + } + } + + if (conditions_array_size == 0) { + NECPLOG0(LOG_ERR, "Failed to get policy conditions"); + response_error = NECP_ERROR_INVALID_TLV; + goto fail; + } + MALLOC(conditions_array, u_int8_t *, conditions_array_size, M_NECP, M_WAITOK); + if (conditions_array == NULL) { + NECPLOG(LOG_ERR, "Failed to allocate a policy conditions array (size %d)", conditions_array_size); + response_error = NECP_ERROR_INTERNAL; + goto fail; + } + + conditions_array_cursor = 0; + for (cursor = necp_packet_find_tlv(packet, offset, NECP_TLV_POLICY_CONDITION, &error, 0); + cursor >= 0; + cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_POLICY_CONDITION, &error, 1)) { + u_int8_t condition_type = NECP_TLV_POLICY_CONDITION; + size_t condition_size = 0; + necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &condition_size); + if (condition_size > 0 && condition_size <= (conditions_array_size - conditions_array_cursor)) { + // Add type + memcpy((conditions_array + conditions_array_cursor), &condition_type, sizeof(condition_type)); + conditions_array_cursor += sizeof(condition_type); + + // Add length + memcpy((conditions_array + conditions_array_cursor), &condition_size, sizeof(condition_size)); + conditions_array_cursor += sizeof(condition_size); + + // Add value + necp_packet_get_tlv_at_offset(packet, cursor, condition_size, (conditions_array + conditions_array_cursor), NULL); + if (!necp_policy_condition_is_valid((conditions_array + conditions_array_cursor), condition_size, necp_policy_result_get_type_from_buffer(policy_result, policy_result_size))) { + NECPLOG0(LOG_ERR, "Failed to validate policy condition"); + response_error = NECP_ERROR_POLICY_CONDITIONS_INVALID; + goto fail; + } + + if (necp_policy_condition_is_default((conditions_array + conditions_array_cursor), condition_size)) { + has_default_condition = TRUE; + } else { + has_non_default_condition = TRUE; + } + if (has_default_condition && has_non_default_condition) { + NECPLOG0(LOG_ERR, "Failed to validate conditions; contained default and non-default conditions"); + response_error = NECP_ERROR_POLICY_CONDITIONS_INVALID; + goto fail; + } + + if (necp_policy_condition_is_application((conditions_array + conditions_array_cursor), condition_size)) { + has_application_condition = TRUE; + } + + if (necp_policy_condition_requires_application((conditions_array + conditions_array_cursor), condition_size)) { + requires_application_condition = TRUE; + } + + conditions_array_cursor += condition_size; + } + } + + if (requires_application_condition && !has_application_condition) { + NECPLOG0(LOG_ERR, "Failed to validate conditions; did not contain application condition"); + response_error = NECP_ERROR_POLICY_CONDITIONS_INVALID; + goto fail; + } + + if ((policy = necp_policy_create(session, order, conditions_array, conditions_array_size, policy_result, policy_result_size)) == NULL) { + response_error = NECP_ERROR_INTERNAL; + goto fail; + } + + necp_send_policy_id_response(session, NECP_PACKET_TYPE_POLICY_ADD, message_id, policy->id); + return; + +fail: + if (policy_result != NULL) { + FREE(policy_result, M_NECP); + } + if (conditions_array != NULL) { + FREE(conditions_array, M_NECP); + } + + necp_send_error_response(session, NECP_PACKET_TYPE_POLICY_ADD, message_id, response_error); +} + +static void +necp_handle_policy_get(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ +#pragma unused(offset) + int error; + u_int8_t *response = NULL; + u_int8_t *cursor = NULL; + u_int32_t response_error = NECP_ERROR_INTERNAL; + necp_policy_id policy_id = 0; + size_t order_tlv_size = 0; + size_t result_tlv_size = 0; + size_t response_size = 0; + + struct necp_session_policy *policy = NULL; + + // Read policy id + error = necp_packet_get_tlv(packet, offset, NECP_TLV_POLICY_ID, sizeof(policy_id), &policy_id, NULL); + if (error) { + NECPLOG(LOG_ERR, "Failed to get policy id: %d", error); + response_error = NECP_ERROR_INVALID_TLV; + goto fail; + } + + policy = necp_policy_find(session, policy_id); + if (policy == NULL || policy->pending_deletion) { + NECPLOG(LOG_ERR, "Failed to find policy with id %d", policy_id); + response_error = NECP_ERROR_POLICY_ID_NOT_FOUND; + goto fail; + } + + order_tlv_size = sizeof(u_int8_t) + sizeof(size_t) + sizeof(necp_policy_order); + result_tlv_size = (policy->result_size ? (sizeof(u_int8_t) + sizeof(size_t) + policy->result_size) : 0); + response_size = sizeof(struct necp_packet_header) + order_tlv_size + result_tlv_size + policy->conditions_size; + MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK); + if (response == NULL) { + necp_send_error_response(session, NECP_PACKET_TYPE_POLICY_LIST_ALL, message_id, NECP_ERROR_INTERNAL); + return; + } + + cursor = response; + cursor = necp_buffer_write_packet_header(cursor, NECP_PACKET_TYPE_POLICY_GET, NECP_PACKET_FLAGS_RESPONSE, message_id); + cursor = necp_buffer_write_tlv(cursor, NECP_TLV_POLICY_ORDER, sizeof(necp_policy_order), &policy->order); + + if (result_tlv_size) { + cursor = necp_buffer_write_tlv(cursor, NECP_TLV_POLICY_RESULT, policy->result_size, &policy->result); + } + if (policy->conditions_size) { + memcpy(((u_int8_t *)(void *)(cursor)), policy->conditions, policy->conditions_size); + } + + if (!necp_send_ctl_data(session, (u_int8_t *)response, response_size)) { + NECPLOG0(LOG_ERR, "Failed to send response"); + } + + FREE(response, M_NECP); + return; + +fail: + necp_send_error_response(session, NECP_PACKET_TYPE_POLICY_GET, message_id, response_error); +} + +static void +necp_handle_policy_delete(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ + int error; + u_int32_t response_error = NECP_ERROR_INTERNAL; + necp_policy_id policy_id = 0; + + struct necp_session_policy *policy = NULL; + + // Read policy id + error = necp_packet_get_tlv(packet, offset, NECP_TLV_POLICY_ID, sizeof(policy_id), &policy_id, NULL); + if (error) { + NECPLOG(LOG_ERR, "Failed to get policy id: %d", error); + response_error = NECP_ERROR_INVALID_TLV; + goto fail; + } + + policy = necp_policy_find(session, policy_id); + if (policy == NULL || policy->pending_deletion) { + NECPLOG(LOG_ERR, "Failed to find policy with id %d", policy_id); + response_error = NECP_ERROR_POLICY_ID_NOT_FOUND; + goto fail; + } + + necp_policy_mark_for_deletion(session, policy); + + necp_send_success_response(session, NECP_PACKET_TYPE_POLICY_DELETE, message_id); + return; + +fail: + necp_send_error_response(session, NECP_PACKET_TYPE_POLICY_DELETE, message_id, response_error); +} + +static void +necp_handle_policy_apply_all(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ +#pragma unused(packet, offset) + necp_policy_apply_all(session); + necp_send_success_response(session, NECP_PACKET_TYPE_POLICY_APPLY_ALL, message_id); +} + +static void +necp_handle_policy_list_all(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ +#pragma unused(packet, offset) + size_t tlv_size = (sizeof(u_int8_t) + sizeof(size_t) + sizeof(u_int32_t)); + size_t response_size = 0; + u_int8_t *response = NULL; + u_int8_t *cursor = NULL; + int num_policies = 0; + int cur_policy_index = 0; + struct necp_session_policy *policy; + + LIST_FOREACH(policy, &session->policies, chain) { + if (!policy->pending_deletion) { + num_policies++; + } + } + + // Create a response with one Policy ID TLV for each policy + response_size = sizeof(struct necp_packet_header) + num_policies * tlv_size; + MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK); + if (response == NULL) { + necp_send_error_response(session, NECP_PACKET_TYPE_POLICY_LIST_ALL, message_id, NECP_ERROR_INTERNAL); + return; + } + + cursor = response; + cursor = necp_buffer_write_packet_header(cursor, NECP_PACKET_TYPE_POLICY_LIST_ALL, NECP_PACKET_FLAGS_RESPONSE, message_id); + + LIST_FOREACH(policy, &session->policies, chain) { + if (!policy->pending_deletion && cur_policy_index < num_policies) { + cursor = necp_buffer_write_tlv(cursor, NECP_TLV_POLICY_ID, sizeof(u_int32_t), &policy->id); + cur_policy_index++; + } + } + + if (!necp_send_ctl_data(session, (u_int8_t *)response, response_size)) { + NECPLOG0(LOG_ERR, "Failed to send response"); + } + + FREE(response, M_NECP); +} + +static void +necp_handle_policy_delete_all(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset) +{ +#pragma unused(packet, offset) + necp_policy_mark_all_for_deletion(session); + necp_send_success_response(session, NECP_PACKET_TYPE_POLICY_DELETE_ALL, message_id); +} + +static necp_policy_id +necp_policy_get_new_id(void) +{ + necp_policy_id newid = 0; + + lck_rw_lock_exclusive(&necp_kernel_policy_lock); + + necp_last_policy_id++; + if (necp_last_policy_id < 1) { + necp_last_policy_id = 1; + } + + newid = necp_last_policy_id; + lck_rw_done(&necp_kernel_policy_lock); + + if (newid == 0) { + NECPLOG0(LOG_DEBUG, "Allocate policy id failed.\n"); + return (0); + } + + return (newid); +} + +static struct necp_session_policy * +necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, size_t conditions_array_size, u_int8_t *result, size_t result_size) +{ + struct necp_session_policy *new_policy = NULL; + struct necp_session_policy *tmp_policy = NULL; + + if (session == NULL || conditions_array == NULL || result == NULL || result_size == 0) { + goto done; + } + + MALLOC_ZONE(new_policy, struct necp_session_policy *, sizeof(*new_policy), M_NECP_SESSION_POLICY, M_WAITOK); + if (new_policy == NULL) { + goto done; + } + + memset(new_policy, 0, sizeof(*new_policy)); + new_policy->applied = FALSE; + new_policy->pending_deletion = FALSE; + new_policy->pending_update = FALSE; + new_policy->order = order; + new_policy->conditions = conditions_array; + new_policy->conditions_size = conditions_array_size; + new_policy->result = result; + new_policy->result_size = result_size; + new_policy->id = necp_policy_get_new_id(); + + LIST_INSERT_SORTED_ASCENDING(&session->policies, new_policy, chain, order, tmp_policy); + + session->dirty = TRUE; + + if (necp_debug) { + NECPLOG(LOG_DEBUG, "Created NECP policy, order %d", order); + } +done: + return (new_policy); +} + +static struct necp_session_policy * +necp_policy_find(struct necp_session *session, necp_policy_id policy_id) +{ + struct necp_session_policy *policy = NULL; + if (policy_id == 0) { + return (NULL); + } + + LIST_FOREACH(policy, &session->policies, chain) { + if (policy->id == policy_id) { + return (policy); + } + } + + return (NULL); +} + +static inline u_int8_t +necp_policy_get_result_type(struct necp_session_policy *policy) +{ + return (policy ? necp_policy_result_get_type_from_buffer(policy->result, policy->result_size) : 0); +} + +static inline size_t +necp_policy_get_result_parameter_length(struct necp_session_policy *policy) +{ + return (policy ? necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size) : 0); +} + +static bool +necp_policy_get_result_parameter(struct necp_session_policy *policy, u_int8_t *parameter_buffer, size_t parameter_buffer_length) +{ + if (policy) { + size_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size); + if (parameter_buffer_length >= parameter_length) { + u_int8_t *parameter = necp_policy_result_get_parameter_pointer_from_buffer(policy->result, policy->result_size); + if (parameter && parameter_buffer) { + memcpy(parameter_buffer, parameter, parameter_length); + return (TRUE); + } + } + } + + return (FALSE); +} + +static bool +necp_policy_mark_for_deletion(struct necp_session *session, struct necp_session_policy *policy) +{ + if (session == NULL || policy == NULL) { + return (FALSE); + } + + policy->pending_deletion = TRUE; + session->dirty = TRUE; + + if (necp_debug) { + NECPLOG0(LOG_DEBUG, "Marked NECP policy for removal"); + } + return (TRUE); +} + +static bool +necp_policy_mark_all_for_deletion(struct necp_session *session) +{ + struct necp_session_policy *policy = NULL; + struct necp_session_policy *temp_policy = NULL; + + LIST_FOREACH_SAFE(policy, &session->policies, chain, temp_policy) { + necp_policy_mark_for_deletion(session, policy); + } + + return (TRUE); +} + +static bool +necp_policy_delete(struct necp_session *session, struct necp_session_policy *policy) +{ + if (session == NULL || policy == NULL) { + return (FALSE); + } + + LIST_REMOVE(policy, chain); + + if (policy->result) { + FREE(policy->result, M_NECP); + policy->result = NULL; + } + + if (policy->conditions) { + FREE(policy->conditions, M_NECP); + policy->conditions = NULL; + } + + FREE_ZONE(policy, sizeof(*policy), M_NECP_SESSION_POLICY); + + if (necp_debug) { + NECPLOG0(LOG_DEBUG, "Removed NECP policy"); + } + return (TRUE); +} + +static bool +necp_policy_unapply(struct necp_session_policy *policy) +{ + int i = 0; + if (policy == NULL) { + return (FALSE); + } + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + // Release local uuid mappings + if (!uuid_is_null(policy->applied_app_uuid)) { + bool removed_mapping = FALSE; + if (necp_remove_uuid_app_id_mapping(policy->applied_app_uuid, &removed_mapping, TRUE) && removed_mapping) { + necp_uuid_app_id_mappings_dirty = TRUE; + necp_num_uuid_app_id_mappings--; + } + uuid_clear(policy->applied_app_uuid); + } + if (!uuid_is_null(policy->applied_real_app_uuid)) { + necp_remove_uuid_app_id_mapping(policy->applied_real_app_uuid, NULL, FALSE); + uuid_clear(policy->applied_real_app_uuid); + } + if (!uuid_is_null(policy->applied_service_uuid)) { + necp_remove_uuid_service_id_mapping(policy->applied_service_uuid); + uuid_clear(policy->applied_service_uuid); + } + + // Release string mappings + if (policy->applied_account != NULL) { + necp_remove_string_to_id_mapping(&necp_account_id_list, policy->applied_account); + FREE(policy->applied_account, M_NECP); + policy->applied_account = NULL; + } + + // Remove socket policies + for (i = 0; i < MAX_KERNEL_SOCKET_POLICIES; i++) { + if (policy->kernel_socket_policies[i] != 0) { + necp_kernel_socket_policy_delete(policy->kernel_socket_policies[i]); + policy->kernel_socket_policies[i] = 0; + } + } + + // Remove IP output policies + for (i = 0; i < MAX_KERNEL_IP_OUTPUT_POLICIES; i++) { + if (policy->kernel_ip_output_policies[i] != 0) { + necp_kernel_ip_output_policy_delete(policy->kernel_ip_output_policies[i]); + policy->kernel_ip_output_policies[i] = 0; + } + } + + policy->applied = FALSE; + + return (TRUE); +} + +#define NECP_KERNEL_POLICY_SUBORDER_ID_TUNNEL_CONDITION 0 +#define NECP_KERNEL_POLICY_SUBORDER_NON_ID_TUNNEL_CONDITION 1 +#define NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION 2 +#define NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS 3 +struct necp_policy_result_ip_tunnel { + u_int32_t secondary_result; + char interface_name[IFXNAMSIZ]; +} __attribute__((__packed__)); + +struct necp_policy_result_service { + uuid_t identifier; + u_int32_t data; +} __attribute__((__packed__)); + +static bool +necp_policy_apply(struct necp_session *session, struct necp_session_policy *policy) +{ + bool socket_only_conditions = FALSE; + bool socket_ip_conditions = FALSE; + + bool socket_layer_non_id_conditions = FALSE; + bool ip_output_layer_non_id_conditions = FALSE; + bool ip_output_layer_id_condition = FALSE; + bool ip_output_layer_tunnel_condition_from_id = FALSE; + bool ip_output_layer_tunnel_condition_from_non_id = FALSE; + necp_kernel_policy_id cond_ip_output_layer_id = NECP_KERNEL_POLICY_ID_NONE; + + u_int32_t master_condition_mask = 0; + u_int32_t master_condition_negated_mask = 0; + ifnet_t cond_bound_interface = NULL; + u_int32_t cond_account_id = 0; + char *cond_domain = NULL; + pid_t cond_pid = 0; + uid_t cond_uid = 0; + necp_app_id cond_app_id = 0; + necp_app_id cond_real_app_id = 0; + struct necp_policy_condition_tc_range cond_traffic_class; + cond_traffic_class.start_tc = 0; + cond_traffic_class.end_tc = 0; + u_int16_t cond_protocol = 0; + union necp_sockaddr_union cond_local_start; + union necp_sockaddr_union cond_local_end; + u_int8_t cond_local_prefix = 0; + union necp_sockaddr_union cond_remote_start; + union necp_sockaddr_union cond_remote_end; + u_int8_t cond_remote_prefix = 0; + size_t offset = 0; + u_int8_t ultimate_result = 0; + u_int32_t secondary_result = 0; + necp_kernel_policy_result_parameter secondary_result_parameter; + memset(&secondary_result_parameter, 0, sizeof(secondary_result_parameter)); + u_int32_t cond_last_interface_index = 0; + necp_kernel_policy_result_parameter ultimate_result_parameter; + memset(&ultimate_result_parameter, 0, sizeof(ultimate_result_parameter)); + + if (policy == NULL) { + return (FALSE); + } + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + // Process conditions + while (offset < policy->conditions_size) { + size_t length = 0; + u_int8_t *value = necp_buffer_get_tlv_value(policy->conditions, offset, &length); + + u_int8_t condition_type = necp_policy_condition_get_type_from_buffer(value, length); + u_int8_t condition_flags = necp_policy_condition_get_flags_from_buffer(value, length); + bool condition_is_negative = condition_flags & NECP_POLICY_CONDITION_FLAGS_NEGATIVE; + size_t condition_length = necp_policy_condition_get_value_length_from_buffer(value, length); + u_int8_t *condition_value = necp_policy_condition_get_value_pointer_from_buffer(value, length); + switch (condition_type) { + case NECP_POLICY_CONDITION_DEFAULT: { + socket_ip_conditions = TRUE; + break; + } + case NECP_POLICY_CONDITION_ALL_INTERFACES: { + master_condition_mask |= NECP_KERNEL_CONDITION_ALL_INTERFACES; + socket_ip_conditions = TRUE; + break; + } + case NECP_POLICY_CONDITION_ENTITLEMENT: { + master_condition_mask |= NECP_KERNEL_CONDITION_ENTITLEMENT; + socket_only_conditions = TRUE; + break; + } + case NECP_POLICY_CONDITION_DOMAIN: { + // Make sure there is only one such rule + if (condition_length > 0 && cond_domain == NULL) { + cond_domain = necp_create_trimmed_domain((char *)condition_value, condition_length); + if (cond_domain != NULL) { + master_condition_mask |= NECP_KERNEL_CONDITION_DOMAIN; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_DOMAIN; + } + socket_only_conditions = TRUE; + } + } + break; + } + case NECP_POLICY_CONDITION_ACCOUNT: { + // Make sure there is only one such rule + if (condition_length > 0 && cond_account_id == 0 && policy->applied_account == NULL) { + char *string = NULL; + MALLOC(string, char *, condition_length + 1, M_NECP, M_WAITOK); + if (string != NULL) { + memcpy(string, condition_value, condition_length); + string[condition_length] = 0; + cond_account_id = necp_create_string_to_id_mapping(&necp_account_id_list, string); + if (cond_account_id != 0) { + policy->applied_account = string; // Save the string in parent policy + master_condition_mask |= NECP_KERNEL_CONDITION_ACCOUNT_ID; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_ACCOUNT_ID; + } + socket_only_conditions = TRUE; + } else { + FREE(string, M_NECP); + } + } + } + break; + } + case NECP_POLICY_CONDITION_APPLICATION: { + // Make sure there is only one such rule, because we save the uuid in the policy + if (condition_length >= sizeof(uuid_t) && cond_app_id == 0) { + bool allocated_mapping = FALSE; + uuid_t application_uuid; + memcpy(application_uuid, condition_value, sizeof(uuid_t)); + cond_app_id = necp_create_uuid_app_id_mapping(application_uuid, &allocated_mapping, TRUE); + if (cond_app_id != 0) { + if (allocated_mapping) { + necp_uuid_app_id_mappings_dirty = TRUE; + necp_num_uuid_app_id_mappings++; + } + uuid_copy(policy->applied_app_uuid, application_uuid); + master_condition_mask |= NECP_KERNEL_CONDITION_APP_ID; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_APP_ID; + } + socket_only_conditions = TRUE; + } + } + break; + } + case NECP_POLICY_CONDITION_REAL_APPLICATION: { + // Make sure there is only one such rule, because we save the uuid in the policy + if (condition_length >= sizeof(uuid_t) && cond_real_app_id == 0) { + uuid_t real_application_uuid; + memcpy(real_application_uuid, condition_value, sizeof(uuid_t)); + cond_real_app_id = necp_create_uuid_app_id_mapping(real_application_uuid, NULL, FALSE); + if (cond_real_app_id != 0) { + uuid_copy(policy->applied_real_app_uuid, real_application_uuid); + master_condition_mask |= NECP_KERNEL_CONDITION_REAL_APP_ID; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_REAL_APP_ID; + } + socket_only_conditions = TRUE; + } + } + break; + } + case NECP_POLICY_CONDITION_PID: { + if (condition_length >= sizeof(pid_t)) { + master_condition_mask |= NECP_KERNEL_CONDITION_PID; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_PID; + } + memcpy(&cond_pid, condition_value, sizeof(cond_pid)); + socket_only_conditions = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_UID: { + if (condition_length >= sizeof(uid_t)) { + master_condition_mask |= NECP_KERNEL_CONDITION_UID; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_UID; + } + memcpy(&cond_uid, condition_value, sizeof(cond_uid)); + socket_only_conditions = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_TRAFFIC_CLASS: { + if (condition_length >= sizeof(struct necp_policy_condition_tc_range)) { + master_condition_mask |= NECP_KERNEL_CONDITION_TRAFFIC_CLASS; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_TRAFFIC_CLASS; + } + memcpy(&cond_traffic_class, condition_value, sizeof(cond_traffic_class)); + socket_only_conditions = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_BOUND_INTERFACE: { + if (condition_length <= IFXNAMSIZ && condition_length > 0) { + char interface_name[IFXNAMSIZ]; + memcpy(interface_name, condition_value, condition_length); + interface_name[condition_length - 1] = 0; // Make sure the string is NULL terminated + if (ifnet_find_by_name(interface_name, &cond_bound_interface) == 0) { + master_condition_mask |= NECP_KERNEL_CONDITION_BOUND_INTERFACE; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_BOUND_INTERFACE; + } + } + socket_ip_conditions = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_IP_PROTOCOL: { + if (condition_length >= sizeof(u_int16_t)) { + master_condition_mask |= NECP_KERNEL_CONDITION_PROTOCOL; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_PROTOCOL; + } + memcpy(&cond_protocol, condition_value, sizeof(cond_protocol)); + socket_ip_conditions = TRUE; + } + break; + } + case NECP_POLICY_CONDITION_LOCAL_ADDR: { + struct necp_policy_condition_addr *address_struct = (struct necp_policy_condition_addr *)(void *)condition_value; + cond_local_prefix = address_struct->prefix; + memcpy(&cond_local_start, &address_struct->address, sizeof(address_struct->address)); + master_condition_mask |= NECP_KERNEL_CONDITION_LOCAL_START; + master_condition_mask |= NECP_KERNEL_CONDITION_LOCAL_PREFIX; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_LOCAL_START; + master_condition_negated_mask |= NECP_KERNEL_CONDITION_LOCAL_PREFIX; + } + socket_ip_conditions = TRUE; + break; + } + case NECP_POLICY_CONDITION_REMOTE_ADDR: { + struct necp_policy_condition_addr *address_struct = (struct necp_policy_condition_addr *)(void *)condition_value; + cond_remote_prefix = address_struct->prefix; + memcpy(&cond_remote_start, &address_struct->address, sizeof(address_struct->address)); + master_condition_mask |= NECP_KERNEL_CONDITION_REMOTE_START; + master_condition_mask |= NECP_KERNEL_CONDITION_REMOTE_PREFIX; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_REMOTE_START; + master_condition_negated_mask |= NECP_KERNEL_CONDITION_REMOTE_PREFIX; + } + socket_ip_conditions = TRUE; + break; + } + case NECP_POLICY_CONDITION_LOCAL_ADDR_RANGE: { + struct necp_policy_condition_addr_range *address_struct = (struct necp_policy_condition_addr_range *)(void *)condition_value; + memcpy(&cond_local_start, &address_struct->start_address, sizeof(address_struct->start_address)); + memcpy(&cond_local_end, &address_struct->end_address, sizeof(address_struct->end_address)); + master_condition_mask |= NECP_KERNEL_CONDITION_LOCAL_START; + master_condition_mask |= NECP_KERNEL_CONDITION_LOCAL_END; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_LOCAL_START; + master_condition_negated_mask |= NECP_KERNEL_CONDITION_LOCAL_END; + } + socket_ip_conditions = TRUE; + break; + } + case NECP_POLICY_CONDITION_REMOTE_ADDR_RANGE: { + struct necp_policy_condition_addr_range *address_struct = (struct necp_policy_condition_addr_range *)(void *)condition_value; + memcpy(&cond_remote_start, &address_struct->start_address, sizeof(address_struct->start_address)); + memcpy(&cond_remote_end, &address_struct->end_address, sizeof(address_struct->end_address)); + master_condition_mask |= NECP_KERNEL_CONDITION_REMOTE_START; + master_condition_mask |= NECP_KERNEL_CONDITION_REMOTE_END; + if (condition_is_negative) { + master_condition_negated_mask |= NECP_KERNEL_CONDITION_REMOTE_START; + master_condition_negated_mask |= NECP_KERNEL_CONDITION_REMOTE_END; + } + socket_ip_conditions = TRUE; + break; + } + default: { + break; + } + } + + offset += sizeof(u_int8_t) + sizeof(size_t) + length; + } + + // Process result + ultimate_result = necp_policy_get_result_type(policy); + switch (ultimate_result) { + case NECP_POLICY_RESULT_PASS: { + if (socket_only_conditions) { // socket_ip_conditions can be TRUE or FALSE + socket_layer_non_id_conditions = TRUE; + ip_output_layer_id_condition = TRUE; + } else if (socket_ip_conditions) { + socket_layer_non_id_conditions = TRUE; + ip_output_layer_id_condition = TRUE; + ip_output_layer_non_id_conditions = TRUE; + } + break; + } + case NECP_POLICY_RESULT_DROP: { + if (socket_only_conditions) { // socket_ip_conditions can be TRUE or FALSE + socket_layer_non_id_conditions = TRUE; + } else if (socket_ip_conditions) { + socket_layer_non_id_conditions = TRUE; + ip_output_layer_non_id_conditions = TRUE; + } + break; + } + case NECP_POLICY_RESULT_SKIP: { + u_int32_t skip_policy_order = 0; + if (necp_policy_get_result_parameter(policy, (u_int8_t *)&skip_policy_order, sizeof(skip_policy_order))) { + ultimate_result_parameter.skip_policy_order = skip_policy_order; + } + + if (socket_only_conditions) { // socket_ip_conditions can be TRUE or FALSE + socket_layer_non_id_conditions = TRUE; + ip_output_layer_id_condition = TRUE; + } else if (socket_ip_conditions) { + socket_layer_non_id_conditions = TRUE; + ip_output_layer_non_id_conditions = TRUE; + } + break; + } + case NECP_POLICY_RESULT_SOCKET_DIVERT: + case NECP_POLICY_RESULT_SOCKET_FILTER: { + u_int32_t control_unit = 0; + if (necp_policy_get_result_parameter(policy, (u_int8_t *)&control_unit, sizeof(control_unit))) { + ultimate_result_parameter.flow_divert_control_unit = control_unit; + } + socket_layer_non_id_conditions = TRUE; + break; + } + case NECP_POLICY_RESULT_IP_TUNNEL: { + struct necp_policy_result_ip_tunnel tunnel_parameters; + size_t tunnel_parameters_length = necp_policy_get_result_parameter_length(policy); + if (tunnel_parameters_length > sizeof(u_int32_t) && + tunnel_parameters_length <= sizeof(struct necp_policy_result_ip_tunnel) && + necp_policy_get_result_parameter(policy, (u_int8_t *)&tunnel_parameters, sizeof(tunnel_parameters))) { + ifnet_t tunnel_interface = NULL; + tunnel_parameters.interface_name[tunnel_parameters_length - sizeof(u_int32_t) - 1] = 0; // Make sure the string is NULL terminated + if (ifnet_find_by_name(tunnel_parameters.interface_name, &tunnel_interface) == 0) { + ultimate_result_parameter.tunnel_interface_index = tunnel_interface->if_index; + } + + secondary_result = tunnel_parameters.secondary_result; + if (secondary_result) { + cond_last_interface_index = ultimate_result_parameter.tunnel_interface_index; + } + } + + if (socket_only_conditions) { // socket_ip_conditions can be TRUE or FALSE + socket_layer_non_id_conditions = TRUE; + ip_output_layer_id_condition = TRUE; + if (secondary_result) { + ip_output_layer_tunnel_condition_from_id = TRUE; + } + } else if (socket_ip_conditions) { + socket_layer_non_id_conditions = TRUE; + ip_output_layer_id_condition = TRUE; + ip_output_layer_non_id_conditions = TRUE; + if (secondary_result) { + ip_output_layer_tunnel_condition_from_id = TRUE; + ip_output_layer_tunnel_condition_from_non_id = TRUE; + } + } + break; + } + case NECP_POLICY_RESULT_TRIGGER: + case NECP_POLICY_RESULT_TRIGGER_IF_NEEDED: + case NECP_POLICY_RESULT_TRIGGER_SCOPED: + case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED: { + struct necp_policy_result_service service_parameters; + size_t service_result_length = necp_policy_get_result_parameter_length(policy); + bool has_extra_service_data = FALSE; + if (service_result_length >= (sizeof(service_parameters))) { + has_extra_service_data = TRUE; + } + if (necp_policy_get_result_parameter(policy, (u_int8_t *)&service_parameters, sizeof(service_parameters))) { + ultimate_result_parameter.service.identifier = necp_create_uuid_service_id_mapping(service_parameters.identifier); + if (ultimate_result_parameter.service.identifier != 0) { + uuid_copy(policy->applied_service_uuid, service_parameters.identifier); + socket_layer_non_id_conditions = TRUE; + if (has_extra_service_data) { + ultimate_result_parameter.service.data = service_parameters.data; + } else { + ultimate_result_parameter.service.data = 0; + } + } + } + break; + } + case NECP_POLICY_RESULT_SOCKET_SCOPED: { + size_t interface_name_length = necp_policy_get_result_parameter_length(policy); + if (interface_name_length <= IFXNAMSIZ && interface_name_length > 0) { + char interface_name[IFXNAMSIZ]; + ifnet_t scope_interface = NULL; + necp_policy_get_result_parameter(policy, (u_int8_t *)interface_name, interface_name_length); + interface_name[interface_name_length - 1] = 0; // Make sure the string is NULL terminated + if (ifnet_find_by_name(interface_name, &scope_interface) == 0) { + ultimate_result_parameter.scoped_interface_index = scope_interface->if_index; + socket_layer_non_id_conditions = TRUE; + } + } + } + default: { + break; + } + } + + if (socket_layer_non_id_conditions) { + necp_kernel_policy_id policy_id = necp_kernel_socket_policy_add(policy->id, policy->order, session->session_order, master_condition_mask, master_condition_negated_mask, cond_app_id, cond_real_app_id, cond_account_id, cond_domain, cond_pid, cond_uid, cond_bound_interface, cond_traffic_class, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter); + + if (policy_id == 0) { + NECPLOG0(LOG_DEBUG, "Error applying socket kernel policy"); + goto fail; + } + + cond_ip_output_layer_id = policy_id; + policy->kernel_socket_policies[0] = policy_id; + } + + if (ip_output_layer_non_id_conditions) { + necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS, session->session_order, master_condition_mask, master_condition_negated_mask, NECP_KERNEL_POLICY_ID_NONE, cond_bound_interface, 0, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter); + + if (policy_id == 0) { + NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy"); + goto fail; + } + + policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS] = policy_id; + } + + if (ip_output_layer_id_condition) { + necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION, session->session_order, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, cond_ip_output_layer_id, NULL, 0, 0, NULL, NULL, 0, NULL, NULL, 0, ultimate_result, ultimate_result_parameter); + + if (policy_id == 0) { + NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy"); + goto fail; + } + + policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION] = policy_id; + } + + // Extra policies for IP Output tunnels for when packets loop back + if (ip_output_layer_tunnel_condition_from_id) { + necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_TUNNEL_CONDITION, session->session_order, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter); + + if (policy_id == 0) { + NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy"); + goto fail; + } + + policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_NON_ID_TUNNEL_CONDITION] = policy_id; + } + + if (ip_output_layer_tunnel_condition_from_id) { + necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_TUNNEL_CONDITION, session->session_order, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter); + + if (policy_id == 0) { + NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy"); + goto fail; + } + + policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_ID_TUNNEL_CONDITION] = policy_id; + } + + policy->applied = TRUE; + policy->pending_update = FALSE; + return (TRUE); + +fail: + return (FALSE); +} + +static void +necp_policy_apply_all(struct necp_session *session) +{ + struct necp_session_policy *policy = NULL; + struct necp_session_policy *temp_policy = NULL; + + lck_rw_lock_exclusive(&necp_kernel_policy_lock); + + // Remove exisiting applied policies + if (session->dirty) { + LIST_FOREACH_SAFE(policy, &session->policies, chain, temp_policy) { + if (policy->pending_deletion) { + if (policy->applied) { + necp_policy_unapply(policy); + } + // Delete the policy + necp_policy_delete(session, policy); + } else if (!policy->applied) { + necp_policy_apply(session, policy); + } else if (policy->pending_update) { + // Must have been applied, but needs an update. Remove and re-add. + necp_policy_unapply(policy); + necp_policy_apply(session, policy); + } + } + + necp_kernel_socket_policies_update_uuid_table(); + necp_kernel_socket_policies_reprocess(); + necp_kernel_ip_output_policies_reprocess(); + + // Clear dirty bit flags + session->dirty = FALSE; + } + + lck_rw_done(&necp_kernel_policy_lock); + + if (necp_debug) { + NECPLOG0(LOG_DEBUG, "Applied NECP policies"); + } +} + +// Kernel Policy Management +// --------------------- +// Kernel policies are derived from session policies +static necp_kernel_policy_id +necp_kernel_policy_get_new_id(void) +{ + necp_kernel_policy_id newid = NECP_KERNEL_POLICY_ID_NONE; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + necp_last_kernel_policy_id++; + if (necp_last_kernel_policy_id < NECP_KERNEL_POLICY_ID_FIRST_VALID) { + necp_last_kernel_policy_id = NECP_KERNEL_POLICY_ID_FIRST_VALID; + } + + newid = necp_last_kernel_policy_id; + if (newid == NECP_KERNEL_POLICY_ID_NONE) { + NECPLOG0(LOG_DEBUG, "Allocate kernel policy id failed.\n"); + return (0); + } + + return (newid); +} + +#define NECP_KERNEL_VALID_SOCKET_CONDITIONS (NECP_KERNEL_CONDITION_APP_ID | NECP_KERNEL_CONDITION_REAL_APP_ID | NECP_KERNEL_CONDITION_DOMAIN | NECP_KERNEL_CONDITION_ACCOUNT_ID | NECP_KERNEL_CONDITION_PID | NECP_KERNEL_CONDITION_UID | NECP_KERNEL_CONDITION_ALL_INTERFACES | NECP_KERNEL_CONDITION_BOUND_INTERFACE | NECP_KERNEL_CONDITION_TRAFFIC_CLASS | NECP_KERNEL_CONDITION_PROTOCOL | NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_ENTITLEMENT) +static necp_kernel_policy_id +necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *cond_domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter) +{ + struct necp_kernel_socket_policy *new_kernel_policy = NULL; + struct necp_kernel_socket_policy *tmp_kernel_policy = NULL; + + MALLOC_ZONE(new_kernel_policy, struct necp_kernel_socket_policy *, sizeof(*new_kernel_policy), M_NECP_SOCKET_POLICY, M_WAITOK); + if (new_kernel_policy == NULL) { + goto done; + } + + memset(new_kernel_policy, 0, sizeof(*new_kernel_policy)); + new_kernel_policy->parent_policy_id = parent_policy_id; + new_kernel_policy->id = necp_kernel_policy_get_new_id(); + new_kernel_policy->order = order; + new_kernel_policy->session_order = session_order; + + // Sanitize condition mask + new_kernel_policy->condition_mask = (condition_mask & NECP_KERNEL_VALID_SOCKET_CONDITIONS); + if ((new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_ALL_INTERFACES) && (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE)) { + new_kernel_policy->condition_mask &= ~NECP_KERNEL_CONDITION_BOUND_INTERFACE; + } + if ((new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID) && !(new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_APP_ID)) { + new_kernel_policy->condition_mask &= ~NECP_KERNEL_CONDITION_REAL_APP_ID; + } + if ((new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_ENTITLEMENT) && !(new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_APP_ID)) { + new_kernel_policy->condition_mask &= ~NECP_KERNEL_CONDITION_ENTITLEMENT; + } + if ((new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_END) && (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX)) { + new_kernel_policy->condition_mask &= ~NECP_KERNEL_CONDITION_LOCAL_PREFIX; + } + if ((new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_END) && (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX)) { + new_kernel_policy->condition_mask &= ~NECP_KERNEL_CONDITION_REMOTE_PREFIX; + } + new_kernel_policy->condition_negated_mask = condition_negated_mask & new_kernel_policy->condition_mask; + + // Set condition values + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_APP_ID) { + new_kernel_policy->cond_app_id = cond_app_id; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID) { + new_kernel_policy->cond_real_app_id = cond_real_app_id; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID) { + new_kernel_policy->cond_account_id = cond_account_id; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_DOMAIN) { + new_kernel_policy->cond_domain = cond_domain; + new_kernel_policy->cond_domain_dot_count = necp_count_dots(cond_domain, strlen(cond_domain)); + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_PID) { + new_kernel_policy->cond_pid = cond_pid; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_UID) { + new_kernel_policy->cond_uid = cond_uid; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE) { + if (cond_bound_interface) { + ifnet_reference(cond_bound_interface); + } + new_kernel_policy->cond_bound_interface = cond_bound_interface; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_TRAFFIC_CLASS) { + new_kernel_policy->cond_traffic_class = cond_traffic_class; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_PROTOCOL) { + new_kernel_policy->cond_protocol = cond_protocol; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_START) { + memcpy(&new_kernel_policy->cond_local_start, cond_local_start, cond_local_start->sa.sa_len); + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_END) { + memcpy(&new_kernel_policy->cond_local_end, cond_local_end, cond_local_end->sa.sa_len); + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX) { + new_kernel_policy->cond_local_prefix = cond_local_prefix; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_START) { + memcpy(&new_kernel_policy->cond_remote_start, cond_remote_start, cond_remote_start->sa.sa_len); + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_END) { + memcpy(&new_kernel_policy->cond_remote_end, cond_remote_end, cond_remote_end->sa.sa_len); + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX) { + new_kernel_policy->cond_remote_prefix = cond_remote_prefix; + } + + new_kernel_policy->result = result; + memcpy(&new_kernel_policy->result_parameter, &result_parameter, sizeof(result_parameter)); + + if (necp_debug) { + NECPLOG(LOG_DEBUG, "Added kernel policy: socket, id=%d, mask=%x\n", new_kernel_policy->id, new_kernel_policy->condition_mask); + } + LIST_INSERT_SORTED_TWICE_ASCENDING(&necp_kernel_socket_policies, new_kernel_policy, chain, session_order, order, tmp_kernel_policy); +done: + return (new_kernel_policy ? new_kernel_policy->id : 0); +} + +static struct necp_kernel_socket_policy * +necp_kernel_socket_policy_find(necp_kernel_policy_id policy_id) +{ + struct necp_kernel_socket_policy *kernel_policy = NULL; + struct necp_kernel_socket_policy *tmp_kernel_policy = NULL; + + if (policy_id == 0) { + return (NULL); + } + + LIST_FOREACH_SAFE(kernel_policy, &necp_kernel_socket_policies, chain, tmp_kernel_policy) { + if (kernel_policy->id == policy_id) { + return (kernel_policy); + } + } + + return (NULL); +} + +static bool +necp_kernel_socket_policy_delete(necp_kernel_policy_id policy_id) +{ + struct necp_kernel_socket_policy *policy = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + policy = necp_kernel_socket_policy_find(policy_id); + if (policy) { + LIST_REMOVE(policy, chain); + + if (policy->cond_bound_interface) { + ifnet_release(policy->cond_bound_interface); + policy->cond_bound_interface = NULL; + } + + if (policy->cond_domain) { + FREE(policy->cond_domain, M_NECP); + policy->cond_domain = NULL; + } + + FREE_ZONE(policy, sizeof(*policy), M_NECP_SOCKET_POLICY); + return (TRUE); + } + + return (FALSE); +} + +static void +necp_kernel_socket_policies_dump_all(void) +{ + struct necp_kernel_socket_policy *policy = NULL; + int policy_i; + int app_i; + + if (necp_debug) { + NECPLOG0(LOG_DEBUG, "NECP Application Policies:\n"); + NECPLOG0(LOG_DEBUG, "-----------\n"); + for (policy_i = 0; necp_kernel_socket_policies_app_layer_map != NULL && necp_kernel_socket_policies_app_layer_map[policy_i] != NULL; policy_i++) { + policy = necp_kernel_socket_policies_app_layer_map[policy_i]; + NECPLOG(LOG_DEBUG, "\t%d. Policy ID: %d, Order: %d.%d, Mask: %x, Result: %d, Parameter: %d\n", policy_i, policy->id, policy->session_order, policy->order, policy->condition_mask, policy->result, policy->result_parameter); + } + if (necp_kernel_socket_policies_app_layer_map[0] != NULL) { + NECPLOG0(LOG_DEBUG, "-----------\n"); + } + + NECPLOG0(LOG_DEBUG, "NECP Socket Policies:\n"); + NECPLOG0(LOG_DEBUG, "-----------\n"); + for (app_i = 0; app_i < NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS; app_i++) { + NECPLOG(LOG_DEBUG, "\tApp Bucket: %d\n", app_i); + for (policy_i = 0; necp_kernel_socket_policies_map[app_i] != NULL && (necp_kernel_socket_policies_map[app_i])[policy_i] != NULL; policy_i++) { + policy = (necp_kernel_socket_policies_map[app_i])[policy_i]; + NECPLOG(LOG_DEBUG, "\t%d. Policy ID: %d, Order: %d.%d, Mask: %x, Result: %d, Parameter: %d\n", policy_i, policy->id, policy->session_order, policy->order, policy->condition_mask, policy->result, policy->result_parameter); + } + NECPLOG0(LOG_DEBUG, "-----------\n"); + } + } +} + +static inline bool +necp_kernel_socket_result_is_service_type(struct necp_kernel_socket_policy *kernel_policy) +{ + return (kernel_policy->result >= NECP_KERNEL_POLICY_RESULT_TRIGGER && kernel_policy->result <= NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED); +} + +static inline bool +necp_kernel_socket_policy_results_overlap(struct necp_kernel_socket_policy *upper_policy, struct necp_kernel_socket_policy *lower_policy) +{ + if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_DROP) { + // Drop always cancels out lower policies + return (TRUE); + } else if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER) { + // Filters never cancel out lower policies + return (FALSE); + } else if (necp_kernel_socket_result_is_service_type(upper_policy)) { + // Trigger/Scoping policies can overlap one another, but not other results + return (necp_kernel_socket_result_is_service_type(lower_policy)); + } else if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_SKIP) { + if (upper_policy->session_order != lower_policy->session_order) { + // A skip cannot override a policy of a different session + return (FALSE); + } else { + if (upper_policy->result_parameter.skip_policy_order == 0 || + lower_policy->order >= upper_policy->result_parameter.skip_policy_order) { + // This policy is beyond the skip + return (FALSE); + } else { + // This policy is inside the skip + return (TRUE); + } + } + } + + // A hard pass, flow divert, or tunnel will currently block out lower policies + return (TRUE); +} + +static bool +necp_kernel_socket_policy_is_unnecessary(struct necp_kernel_socket_policy *policy, struct necp_kernel_socket_policy **policy_array, int valid_indices) +{ + bool can_skip = FALSE; + u_int32_t highest_skip_session_order = 0; + u_int32_t highest_skip_order = 0; + int i; + for (i = 0; i < valid_indices; i++) { + struct necp_kernel_socket_policy *compared_policy = policy_array[i]; + + // For policies in a skip window, we can't mark conflicting policies as unnecessary + if (can_skip) { + if (highest_skip_session_order != compared_policy->session_order || + (highest_skip_order != 0 && compared_policy->order >= highest_skip_order)) { + // If we've moved on to the next session, or passed the skip window + highest_skip_session_order = 0; + highest_skip_order = 0; + can_skip = FALSE; + } else { + // If this policy is also a skip, in can increase the skip window + if (compared_policy->result == NECP_KERNEL_POLICY_RESULT_SKIP) { + if (compared_policy->result_parameter.skip_policy_order > highest_skip_order) { + highest_skip_order = compared_policy->result_parameter.skip_policy_order; + } + } + continue; + } + } + + if (compared_policy->result == NECP_KERNEL_POLICY_RESULT_SKIP) { + // This policy is a skip. Set the skip window accordingly + can_skip = TRUE; + highest_skip_session_order = compared_policy->session_order; + highest_skip_order = compared_policy->result_parameter.skip_policy_order; + } + + // The result of the compared policy must be able to block out this policy result + if (!necp_kernel_socket_policy_results_overlap(compared_policy, policy)) { + continue; + } + + // If new policy matches All Interfaces, compared policy must also + if ((policy->condition_mask & NECP_KERNEL_CONDITION_ALL_INTERFACES) && !(compared_policy->condition_mask & NECP_KERNEL_CONDITION_ALL_INTERFACES)) { + continue; + } + + // Default makes lower policies unecessary always + if (compared_policy->condition_mask == 0) { + return (TRUE); + } + + // Compared must be more general than policy, and include only conditions within policy + if ((policy->condition_mask & compared_policy->condition_mask) != compared_policy->condition_mask) { + continue; + } + + // Negative conditions must match for the overlapping conditions + if ((policy->condition_negated_mask & compared_policy->condition_mask) != (compared_policy->condition_negated_mask & compared_policy->condition_mask)) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_DOMAIN && + strcmp(compared_policy->cond_domain, policy->cond_domain) != 0) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID && + compared_policy->cond_account_id != policy->cond_account_id) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_POLICY_ID && + compared_policy->cond_policy_id != policy->cond_policy_id) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_APP_ID && + compared_policy->cond_app_id != policy->cond_app_id) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID && + compared_policy->cond_real_app_id != policy->cond_real_app_id) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_PID && + compared_policy->cond_pid != policy->cond_pid) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_UID && + compared_policy->cond_uid != policy->cond_uid) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE && + compared_policy->cond_bound_interface != policy->cond_bound_interface) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_PROTOCOL && + compared_policy->cond_protocol != policy->cond_protocol) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_TRAFFIC_CLASS && + !(compared_policy->cond_traffic_class.start_tc <= policy->cond_traffic_class.start_tc && + compared_policy->cond_traffic_class.end_tc >= policy->cond_traffic_class.end_tc)) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_START) { + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_END) { + if (!necp_is_range_in_range((struct sockaddr *)&policy->cond_local_start, (struct sockaddr *)&policy->cond_local_end, (struct sockaddr *)&compared_policy->cond_local_start, (struct sockaddr *)&compared_policy->cond_local_end)) { + continue; + } + } else if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX) { + if (compared_policy->cond_local_prefix > policy->cond_local_prefix || + !necp_is_addr_in_subnet((struct sockaddr *)&policy->cond_local_start, (struct sockaddr *)&compared_policy->cond_local_start, compared_policy->cond_local_prefix)) { + continue; + } + } + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_START) { + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_END) { + if (!necp_is_range_in_range((struct sockaddr *)&policy->cond_remote_start, (struct sockaddr *)&policy->cond_remote_end, (struct sockaddr *)&compared_policy->cond_remote_start, (struct sockaddr *)&compared_policy->cond_remote_end)) { + continue; + } + } else if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX) { + if (compared_policy->cond_remote_prefix > policy->cond_remote_prefix || + !necp_is_addr_in_subnet((struct sockaddr *)&policy->cond_remote_start, (struct sockaddr *)&compared_policy->cond_remote_start, compared_policy->cond_remote_prefix)) { + continue; + } + } + } + + return (TRUE); + } + + return (FALSE); +} + +static bool +necp_kernel_socket_policies_reprocess(void) +{ + int app_i; + int bucket_allocation_counts[NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS]; + int bucket_current_free_index[NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS]; + int app_layer_allocation_count = 0; + int app_layer_current_free_index = 0; + struct necp_kernel_socket_policy *kernel_policy = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + // Reset mask to 0 + necp_kernel_application_policies_condition_mask = 0; + necp_kernel_socket_policies_condition_mask = 0; + necp_kernel_application_policies_count = 0; + necp_kernel_socket_policies_count = 0; + necp_kernel_socket_policies_non_app_count = 0; + + // Reset all maps to NULL + for (app_i = 0; app_i < NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS; app_i++) { + if (necp_kernel_socket_policies_map[app_i] != NULL) { + FREE(necp_kernel_socket_policies_map[app_i], M_NECP); + necp_kernel_socket_policies_map[app_i] = NULL; + } + + // Init counts + bucket_allocation_counts[app_i] = 0; + } + if (necp_kernel_socket_policies_app_layer_map != NULL) { + FREE(necp_kernel_socket_policies_app_layer_map, M_NECP); + necp_kernel_socket_policies_app_layer_map = NULL; + } + + // Create masks and counts + LIST_FOREACH(kernel_policy, &necp_kernel_socket_policies, chain) { + // App layer mask/count + necp_kernel_application_policies_condition_mask |= kernel_policy->condition_mask; + necp_kernel_application_policies_count++; + app_layer_allocation_count++; + + // Update socket layer bucket mask/counts + necp_kernel_socket_policies_condition_mask |= kernel_policy->condition_mask; + necp_kernel_socket_policies_count++; + + if (!(kernel_policy->condition_mask & NECP_KERNEL_CONDITION_APP_ID) || + kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_APP_ID) { + necp_kernel_socket_policies_non_app_count++; + for (app_i = 0; app_i < NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS; app_i++) { + bucket_allocation_counts[app_i]++; + } + } else { + bucket_allocation_counts[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(kernel_policy->cond_app_id)]++; + } + } + + // Allocate maps + for (app_i = 0; app_i < NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS; app_i++) { + if (bucket_allocation_counts[app_i] > 0) { + // Allocate a NULL-terminated array of policy pointers for each bucket + MALLOC(necp_kernel_socket_policies_map[app_i], struct necp_kernel_socket_policy **, sizeof(struct necp_kernel_socket_policy *) * (bucket_allocation_counts[app_i] + 1), M_NECP, M_WAITOK); + if (necp_kernel_socket_policies_map[app_i] == NULL) { + goto fail; + } + + // Initialize the first entry to NULL + (necp_kernel_socket_policies_map[app_i])[0] = NULL; + } + bucket_current_free_index[app_i] = 0; + } + MALLOC(necp_kernel_socket_policies_app_layer_map, struct necp_kernel_socket_policy **, sizeof(struct necp_kernel_socket_policy *) * (app_layer_allocation_count + 1), M_NECP, M_WAITOK); + if (necp_kernel_socket_policies_app_layer_map == NULL) { + goto fail; + } + necp_kernel_socket_policies_app_layer_map[0] = NULL; + + // Fill out maps + LIST_FOREACH(kernel_policy, &necp_kernel_socket_policies, chain) { + // Insert pointers into map + if (!(kernel_policy->condition_mask & NECP_KERNEL_CONDITION_APP_ID) || + kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_APP_ID) { + for (app_i = 0; app_i < NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS; app_i++) { + if (!necp_kernel_socket_policy_is_unnecessary(kernel_policy, necp_kernel_socket_policies_map[app_i], bucket_current_free_index[app_i])) { + (necp_kernel_socket_policies_map[app_i])[(bucket_current_free_index[app_i])] = kernel_policy; + bucket_current_free_index[app_i]++; + (necp_kernel_socket_policies_map[app_i])[(bucket_current_free_index[app_i])] = NULL; + } + } + } else { + app_i = NECP_SOCKET_MAP_APP_ID_TO_BUCKET(kernel_policy->cond_app_id); + if (!necp_kernel_socket_policy_is_unnecessary(kernel_policy, necp_kernel_socket_policies_map[app_i], bucket_current_free_index[app_i])) { + (necp_kernel_socket_policies_map[app_i])[(bucket_current_free_index[app_i])] = kernel_policy; + bucket_current_free_index[app_i]++; + (necp_kernel_socket_policies_map[app_i])[(bucket_current_free_index[app_i])] = NULL; + } + } + + if (!necp_kernel_socket_policy_is_unnecessary(kernel_policy, necp_kernel_socket_policies_app_layer_map, app_layer_current_free_index)) { + necp_kernel_socket_policies_app_layer_map[app_layer_current_free_index] = kernel_policy; + app_layer_current_free_index++; + necp_kernel_socket_policies_app_layer_map[app_layer_current_free_index] = NULL; + } + } + necp_kernel_socket_policies_dump_all(); + BUMP_KERNEL_SOCKET_POLICIES_GENERATION_COUNT(); + return (TRUE); + +fail: + // Free memory, reset masks to 0 + necp_kernel_application_policies_condition_mask = 0; + necp_kernel_socket_policies_condition_mask = 0; + necp_kernel_application_policies_count = 0; + necp_kernel_socket_policies_count = 0; + necp_kernel_socket_policies_non_app_count = 0; + for (app_i = 0; app_i < NECP_KERNEL_SOCKET_POLICIES_MAP_NUM_APP_ID_BUCKETS; app_i++) { + if (necp_kernel_socket_policies_map[app_i] != NULL) { + FREE(necp_kernel_socket_policies_map[app_i], M_NECP); + necp_kernel_socket_policies_map[app_i] = NULL; + } + } + if (necp_kernel_socket_policies_app_layer_map != NULL) { + FREE(necp_kernel_socket_policies_app_layer_map, M_NECP); + necp_kernel_socket_policies_app_layer_map = NULL; + } + return (FALSE); +} + +static u_int32_t +necp_get_new_string_id(void) +{ + u_int32_t newid = 0; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + necp_last_string_id++; + if (necp_last_string_id < 1) { + necp_last_string_id = 1; + } + + newid = necp_last_string_id; + if (newid == 0) { + NECPLOG0(LOG_DEBUG, "Allocate string id failed.\n"); + return (0); + } + + return (newid); +} + +static struct necp_string_id_mapping * +necp_lookup_string_to_id_locked(struct necp_string_id_mapping_list *list, char *string) +{ + struct necp_string_id_mapping *searchentry = NULL; + struct necp_string_id_mapping *foundentry = NULL; + + LIST_FOREACH(searchentry, list, chain) { + if (strcmp(searchentry->string, string) == 0) { + foundentry = searchentry; + break; + } + } + + return (foundentry); +} + +static u_int32_t +necp_create_string_to_id_mapping(struct necp_string_id_mapping_list *list, char *string) +{ + u_int32_t string_id = 0; + struct necp_string_id_mapping *existing_mapping = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + existing_mapping = necp_lookup_string_to_id_locked(list, string); + if (existing_mapping != NULL) { + string_id = existing_mapping->id; + existing_mapping->refcount++; + } else { + struct necp_string_id_mapping *new_mapping = NULL; + MALLOC(new_mapping, struct necp_string_id_mapping *, sizeof(struct necp_string_id_mapping), M_NECP, M_WAITOK); + if (new_mapping != NULL) { + memset(new_mapping, 0, sizeof(struct necp_string_id_mapping)); + + size_t length = strlen(string) + 1; + MALLOC(new_mapping->string, char *, length, M_NECP, M_WAITOK); + if (new_mapping->string != NULL) { + memcpy(new_mapping->string, string, length); + new_mapping->id = necp_get_new_string_id(); + new_mapping->refcount = 1; + LIST_INSERT_HEAD(list, new_mapping, chain); + string_id = new_mapping->id; + } else { + FREE(new_mapping, M_NECP); + new_mapping = NULL; + } + } + } + return (string_id); +} + +static bool +necp_remove_string_to_id_mapping(struct necp_string_id_mapping_list *list, char *string) +{ + struct necp_string_id_mapping *existing_mapping = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + existing_mapping = necp_lookup_string_to_id_locked(list, string); + if (existing_mapping != NULL) { + if (--existing_mapping->refcount == 0) { + LIST_REMOVE(existing_mapping, chain); + FREE(existing_mapping->string, M_NECP); + FREE(existing_mapping, M_NECP); + } + return (TRUE); + } + + return (FALSE); +} + +#define NECP_NULL_SERVICE_ID 1 +static u_int32_t +necp_get_new_uuid_id(void) +{ + u_int32_t newid = 0; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + necp_last_uuid_id++; + if (necp_last_uuid_id < (NECP_NULL_SERVICE_ID + 1)) { + necp_last_uuid_id = (NECP_NULL_SERVICE_ID + 1); + } + + newid = necp_last_uuid_id; + if (newid == 0) { + NECPLOG0(LOG_DEBUG, "Allocate uuid id failed.\n"); + return (0); + } + + return (newid); +} + +static struct necp_uuid_id_mapping * +necp_uuid_lookup_app_id_locked(uuid_t uuid) +{ + struct necp_uuid_id_mapping *searchentry = NULL; + struct necp_uuid_id_mapping *foundentry = NULL; + + LIST_FOREACH(searchentry, APPUUIDHASH(uuid), chain) { + if (uuid_compare(searchentry->uuid, uuid) == 0) { + foundentry = searchentry; + break; + } + } + + return (foundentry); +} + +static u_int32_t +necp_create_uuid_app_id_mapping(uuid_t uuid, bool *allocated_mapping, bool uuid_policy_table) +{ + u_int32_t local_id = 0; + struct necp_uuid_id_mapping *existing_mapping = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + if (allocated_mapping) { + *allocated_mapping = FALSE; + } + + existing_mapping = necp_uuid_lookup_app_id_locked(uuid); + if (existing_mapping != NULL) { + local_id = existing_mapping->id; + existing_mapping->refcount++; + if (uuid_policy_table) { + existing_mapping->table_refcount++; + } + } else { + struct necp_uuid_id_mapping *new_mapping = NULL; + MALLOC(new_mapping, struct necp_uuid_id_mapping *, sizeof(*new_mapping), M_NECP, M_WAITOK); + if (new_mapping != NULL) { + uuid_copy(new_mapping->uuid, uuid); + new_mapping->id = necp_get_new_uuid_id(); + new_mapping->refcount = 1; + if (uuid_policy_table) { + new_mapping->table_refcount = 1; + } else { + new_mapping->table_refcount = 0; + } + + LIST_INSERT_HEAD(APPUUIDHASH(uuid), new_mapping, chain); + + if (allocated_mapping) { + *allocated_mapping = TRUE; + } + + local_id = new_mapping->id; + } + } + + return (local_id); +} + +static bool +necp_remove_uuid_app_id_mapping(uuid_t uuid, bool *removed_mapping, bool uuid_policy_table) +{ + struct necp_uuid_id_mapping *existing_mapping = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + if (removed_mapping) { + *removed_mapping = FALSE; + } + + existing_mapping = necp_uuid_lookup_app_id_locked(uuid); + if (existing_mapping != NULL) { + if (uuid_policy_table) { + existing_mapping->table_refcount--; + } + if (--existing_mapping->refcount == 0) { + LIST_REMOVE(existing_mapping, chain); + FREE(existing_mapping, M_NECP); + if (removed_mapping) { + *removed_mapping = TRUE; + } + } + return (TRUE); + } + + return (FALSE); +} + +static struct necp_uuid_id_mapping * +necp_uuid_get_null_service_id_mapping(void) +{ + static struct necp_uuid_id_mapping null_mapping; + uuid_clear(null_mapping.uuid); + null_mapping.id = NECP_NULL_SERVICE_ID; + + return (&null_mapping); +} + +static struct necp_uuid_id_mapping * +necp_uuid_lookup_service_id_locked(uuid_t uuid) +{ + struct necp_uuid_id_mapping *searchentry = NULL; + struct necp_uuid_id_mapping *foundentry = NULL; + + if (uuid_is_null(uuid)) { + return necp_uuid_get_null_service_id_mapping(); + } + + LIST_FOREACH(searchentry, &necp_uuid_service_id_list, chain) { + if (uuid_compare(searchentry->uuid, uuid) == 0) { + foundentry = searchentry; + break; + } + } + + return (foundentry); +} + +static struct necp_uuid_id_mapping * +necp_uuid_lookup_uuid_with_service_id_locked(u_int32_t local_id) +{ + struct necp_uuid_id_mapping *searchentry = NULL; + struct necp_uuid_id_mapping *foundentry = NULL; + + if (local_id == NECP_NULL_SERVICE_ID) { + return necp_uuid_get_null_service_id_mapping(); + } + + LIST_FOREACH(searchentry, &necp_uuid_service_id_list, chain) { + if (searchentry->id == local_id) { + foundentry = searchentry; + break; + } + } + + return (foundentry); +} + +static u_int32_t +necp_create_uuid_service_id_mapping(uuid_t uuid) +{ + u_int32_t local_id = 0; + struct necp_uuid_id_mapping *existing_mapping = NULL; + + if (uuid_is_null(uuid)) { + return (NECP_NULL_SERVICE_ID); + } + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + existing_mapping = necp_uuid_lookup_service_id_locked(uuid); + if (existing_mapping != NULL) { + local_id = existing_mapping->id; + existing_mapping->refcount++; + } else { + struct necp_uuid_id_mapping *new_mapping = NULL; + MALLOC(new_mapping, struct necp_uuid_id_mapping *, sizeof(*new_mapping), M_NECP, M_WAITOK); + if (new_mapping != NULL) { + uuid_copy(new_mapping->uuid, uuid); + new_mapping->id = necp_get_new_uuid_id(); + new_mapping->refcount = 1; + + LIST_INSERT_HEAD(&necp_uuid_service_id_list, new_mapping, chain); + + local_id = new_mapping->id; + } + } + + return (local_id); +} + +static bool +necp_remove_uuid_service_id_mapping(uuid_t uuid) +{ + struct necp_uuid_id_mapping *existing_mapping = NULL; + + if (uuid_is_null(uuid)) { + return (TRUE); + } + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + existing_mapping = necp_uuid_lookup_app_id_locked(uuid); + if (existing_mapping != NULL) { + if (--existing_mapping->refcount == 0) { + LIST_REMOVE(existing_mapping, chain); + FREE(existing_mapping, M_NECP); + } + return (TRUE); + } + + return (FALSE); +} + + +static bool +necp_kernel_socket_policies_update_uuid_table(void) +{ + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + if (necp_uuid_app_id_mappings_dirty) { + if (proc_uuid_policy_kernel(PROC_UUID_POLICY_OPERATION_CLEAR, NULL, PROC_UUID_NECP_APP_POLICY) < 0) { + NECPLOG0(LOG_DEBUG, "Error clearing uuids from policy table\n"); + return (FALSE); + } + + if (necp_num_uuid_app_id_mappings > 0) { + struct necp_uuid_id_mapping_head *uuid_list_head = NULL; + for (uuid_list_head = &necp_uuid_app_id_hashtbl[necp_uuid_app_id_hash_num_buckets - 1]; uuid_list_head >= necp_uuid_app_id_hashtbl; uuid_list_head--) { + struct necp_uuid_id_mapping *mapping = NULL; + LIST_FOREACH(mapping, uuid_list_head, chain) { + if (mapping->table_refcount > 0 && + proc_uuid_policy_kernel(PROC_UUID_POLICY_OPERATION_ADD, mapping->uuid, PROC_UUID_NECP_APP_POLICY) < 0) { + NECPLOG0(LOG_DEBUG, "Error adding uuid to policy table\n"); + } + } + } + } + + necp_uuid_app_id_mappings_dirty = FALSE; + } + + return (TRUE); +} + +#define NECP_KERNEL_VALID_IP_OUTPUT_CONDITIONS (NECP_KERNEL_CONDITION_ALL_INTERFACES | NECP_KERNEL_CONDITION_BOUND_INTERFACE | NECP_KERNEL_CONDITION_PROTOCOL | NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE) +static necp_kernel_policy_id +necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter) +{ + struct necp_kernel_ip_output_policy *new_kernel_policy = NULL; + struct necp_kernel_ip_output_policy *tmp_kernel_policy = NULL; + + MALLOC_ZONE(new_kernel_policy, struct necp_kernel_ip_output_policy *, sizeof(*new_kernel_policy), M_NECP_IP_POLICY, M_WAITOK); + if (new_kernel_policy == NULL) { + goto done; + } + + memset(new_kernel_policy, 0, sizeof(*new_kernel_policy)); + new_kernel_policy->parent_policy_id = parent_policy_id; + new_kernel_policy->id = necp_kernel_policy_get_new_id(); + new_kernel_policy->suborder = suborder; + new_kernel_policy->order = order; + new_kernel_policy->session_order = session_order; + + // Sanitize condition mask + new_kernel_policy->condition_mask = (condition_mask & NECP_KERNEL_VALID_IP_OUTPUT_CONDITIONS); + if ((new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_ALL_INTERFACES) && (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE)) { + new_kernel_policy->condition_mask &= ~NECP_KERNEL_CONDITION_BOUND_INTERFACE; + } + if ((new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_END) && (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX)) { + new_kernel_policy->condition_mask &= ~NECP_KERNEL_CONDITION_LOCAL_PREFIX; + } + if ((new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_END) && (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX)) { + new_kernel_policy->condition_mask &= ~NECP_KERNEL_CONDITION_REMOTE_PREFIX; + } + new_kernel_policy->condition_negated_mask = condition_negated_mask & new_kernel_policy->condition_mask; + + // Set condition values + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_POLICY_ID) { + new_kernel_policy->cond_policy_id = cond_policy_id; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE) { + if (cond_bound_interface) { + ifnet_reference(cond_bound_interface); + } + new_kernel_policy->cond_bound_interface = cond_bound_interface; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LAST_INTERFACE) { + new_kernel_policy->cond_last_interface_index = cond_last_interface_index; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_PROTOCOL) { + new_kernel_policy->cond_protocol = cond_protocol; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_START) { + memcpy(&new_kernel_policy->cond_local_start, cond_local_start, cond_local_start->sa.sa_len); + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_END) { + memcpy(&new_kernel_policy->cond_local_end, cond_local_end, cond_local_end->sa.sa_len); + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX) { + new_kernel_policy->cond_local_prefix = cond_local_prefix; + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_START) { + memcpy(&new_kernel_policy->cond_remote_start, cond_remote_start, cond_remote_start->sa.sa_len); + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_END) { + memcpy(&new_kernel_policy->cond_remote_end, cond_remote_end, cond_remote_end->sa.sa_len); + } + if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX) { + new_kernel_policy->cond_remote_prefix = cond_remote_prefix; + } + + new_kernel_policy->result = result; + memcpy(&new_kernel_policy->result_parameter, &result_parameter, sizeof(result_parameter)); + + if (necp_debug) { + NECPLOG(LOG_DEBUG, "Added kernel policy: ip output, id=%d, mask=%x\n", new_kernel_policy->id, new_kernel_policy->condition_mask); + } + LIST_INSERT_SORTED_THRICE_ASCENDING(&necp_kernel_ip_output_policies, new_kernel_policy, chain, session_order, order, suborder, tmp_kernel_policy); +done: + return (new_kernel_policy ? new_kernel_policy->id : 0); +} + +static struct necp_kernel_ip_output_policy * +necp_kernel_ip_output_policy_find(necp_kernel_policy_id policy_id) +{ + struct necp_kernel_ip_output_policy *kernel_policy = NULL; + struct necp_kernel_ip_output_policy *tmp_kernel_policy = NULL; + + if (policy_id == 0) { + return (NULL); + } + + LIST_FOREACH_SAFE(kernel_policy, &necp_kernel_ip_output_policies, chain, tmp_kernel_policy) { + if (kernel_policy->id == policy_id) { + return (kernel_policy); + } + } + + return (NULL); +} + +static bool +necp_kernel_ip_output_policy_delete(necp_kernel_policy_id policy_id) +{ + struct necp_kernel_ip_output_policy *policy = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + policy = necp_kernel_ip_output_policy_find(policy_id); + if (policy) { + LIST_REMOVE(policy, chain); + + if (policy->cond_bound_interface) { + ifnet_release(policy->cond_bound_interface); + policy->cond_bound_interface = NULL; + } + + FREE_ZONE(policy, sizeof(*policy), M_NECP_IP_POLICY); + return (TRUE); + } + + return (FALSE); +} + +static void +necp_kernel_ip_output_policies_dump_all(void) +{ + struct necp_kernel_ip_output_policy *policy = NULL; + int policy_i; + int id_i; + + if (necp_debug) { + NECPLOG0(LOG_DEBUG, "NECP IP Output Policies:\n"); + NECPLOG0(LOG_DEBUG, "-----------\n"); + for (id_i = 0; id_i < NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS; id_i++) { + NECPLOG(LOG_DEBUG, " ID Bucket: %d\n", id_i); + for (policy_i = 0; necp_kernel_ip_output_policies_map[id_i] != NULL && (necp_kernel_ip_output_policies_map[id_i])[policy_i] != NULL; policy_i++) { + policy = (necp_kernel_ip_output_policies_map[id_i])[policy_i]; + NECPLOG(LOG_DEBUG, "\t%d. Policy ID: %d, Order: %d.%d.%d, Mask: %x, Result: %d, Parameter: %d\n", policy_i, policy->id, policy->session_order, policy->order, policy->suborder, policy->condition_mask, policy->result, policy->result_parameter); + } + NECPLOG0(LOG_DEBUG, "-----------\n"); + } + } +} + +static inline bool +necp_kernel_ip_output_policy_results_overlap(struct necp_kernel_ip_output_policy *upper_policy, struct necp_kernel_ip_output_policy *lower_policy) +{ + if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_SKIP) { + if (upper_policy->session_order != lower_policy->session_order) { + // A skip cannot override a policy of a different session + return (FALSE); + } else { + if (upper_policy->result_parameter.skip_policy_order == 0 || + lower_policy->order >= upper_policy->result_parameter.skip_policy_order) { + // This policy is beyond the skip + return (FALSE); + } else { + // This policy is inside the skip + return (TRUE); + } + } + } + + // All other IP Output policy results (drop, tunnel, hard pass) currently overlap + return (TRUE); +} + +static bool +necp_kernel_ip_output_policy_is_unnecessary(struct necp_kernel_ip_output_policy *policy, struct necp_kernel_ip_output_policy **policy_array, int valid_indices) +{ + bool can_skip = FALSE; + u_int32_t highest_skip_session_order = 0; + u_int32_t highest_skip_order = 0; + int i; + for (i = 0; i < valid_indices; i++) { + struct necp_kernel_ip_output_policy *compared_policy = policy_array[i]; + + // For policies in a skip window, we can't mark conflicting policies as unnecessary + if (can_skip) { + if (highest_skip_session_order != compared_policy->session_order || + (highest_skip_order != 0 && compared_policy->order >= highest_skip_order)) { + // If we've moved on to the next session, or passed the skip window + highest_skip_session_order = 0; + highest_skip_order = 0; + can_skip = FALSE; + } else { + // If this policy is also a skip, in can increase the skip window + if (compared_policy->result == NECP_KERNEL_POLICY_RESULT_SKIP) { + if (compared_policy->result_parameter.skip_policy_order > highest_skip_order) { + highest_skip_order = compared_policy->result_parameter.skip_policy_order; + } + } + continue; + } + } + + if (compared_policy->result == NECP_KERNEL_POLICY_RESULT_SKIP) { + // This policy is a skip. Set the skip window accordingly + can_skip = TRUE; + highest_skip_session_order = compared_policy->session_order; + highest_skip_order = compared_policy->result_parameter.skip_policy_order; + } + + // The result of the compared policy must be able to block out this policy result + if (!necp_kernel_ip_output_policy_results_overlap(compared_policy, policy)) { + continue; + } + + // If new policy matches All Interfaces, compared policy must also + if ((policy->condition_mask & NECP_KERNEL_CONDITION_ALL_INTERFACES) && !(compared_policy->condition_mask & NECP_KERNEL_CONDITION_ALL_INTERFACES)) { + continue; + } + + // Default makes lower policies unecessary always + if (compared_policy->condition_mask == 0) { + return (TRUE); + } + + // Compared must be more general than policy, and include only conditions within policy + if ((policy->condition_mask & compared_policy->condition_mask) != compared_policy->condition_mask) { + continue; + } + + // Negative conditions must match for the overlapping conditions + if ((policy->condition_negated_mask & compared_policy->condition_mask) != (compared_policy->condition_negated_mask & compared_policy->condition_mask)) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_POLICY_ID && + compared_policy->cond_policy_id != policy->cond_policy_id) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE && + compared_policy->cond_bound_interface != policy->cond_bound_interface) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_PROTOCOL && + compared_policy->cond_protocol != policy->cond_protocol) { + continue; + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_START) { + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_END) { + if (!necp_is_range_in_range((struct sockaddr *)&policy->cond_local_start, (struct sockaddr *)&policy->cond_local_end, (struct sockaddr *)&compared_policy->cond_local_start, (struct sockaddr *)&compared_policy->cond_local_end)) { + continue; + } + } else if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX) { + if (compared_policy->cond_local_prefix > policy->cond_local_prefix || + !necp_is_addr_in_subnet((struct sockaddr *)&policy->cond_local_start, (struct sockaddr *)&compared_policy->cond_local_start, compared_policy->cond_local_prefix)) { + continue; + } + } + } + + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_START) { + if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_END) { + if (!necp_is_range_in_range((struct sockaddr *)&policy->cond_remote_start, (struct sockaddr *)&policy->cond_remote_end, (struct sockaddr *)&compared_policy->cond_remote_start, (struct sockaddr *)&compared_policy->cond_remote_end)) { + continue; + } + } else if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX) { + if (compared_policy->cond_remote_prefix > policy->cond_remote_prefix || + !necp_is_addr_in_subnet((struct sockaddr *)&policy->cond_remote_start, (struct sockaddr *)&compared_policy->cond_remote_start, compared_policy->cond_remote_prefix)) { + continue; + } + } + } + + return (TRUE); + } + + return (FALSE); +} + +static bool +necp_kernel_ip_output_policies_reprocess(void) +{ + int i; + int bucket_allocation_counts[NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS]; + int bucket_current_free_index[NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS]; + struct necp_kernel_ip_output_policy *kernel_policy = NULL; + + lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE); + + // Reset mask to 0 + necp_kernel_ip_output_policies_condition_mask = 0; + necp_kernel_ip_output_policies_count = 0; + necp_kernel_ip_output_policies_non_id_count = 0; + + for (i = 0; i < NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS; i++) { + if (necp_kernel_ip_output_policies_map[i] != NULL) { + FREE(necp_kernel_ip_output_policies_map[i], M_NECP); + necp_kernel_ip_output_policies_map[i] = NULL; + } + + // Init counts + bucket_allocation_counts[i] = 0; + } + + LIST_FOREACH(kernel_policy, &necp_kernel_ip_output_policies, chain) { + // Update mask + necp_kernel_ip_output_policies_condition_mask |= kernel_policy->condition_mask; + necp_kernel_ip_output_policies_count++; + + // Update bucket counts + if (!(kernel_policy->condition_mask & NECP_KERNEL_CONDITION_POLICY_ID)) { + necp_kernel_ip_output_policies_non_id_count++; + for (i = 0; i < NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS; i++) { + bucket_allocation_counts[i]++; + } + } else { + bucket_allocation_counts[NECP_IP_OUTPUT_MAP_ID_TO_BUCKET(kernel_policy->cond_policy_id)]++; + } + } + + for (i = 0; i < NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS; i++) { + if (bucket_allocation_counts[i] > 0) { + // Allocate a NULL-terminated array of policy pointers for each bucket + MALLOC(necp_kernel_ip_output_policies_map[i], struct necp_kernel_ip_output_policy **, sizeof(struct necp_kernel_ip_output_policy *) * (bucket_allocation_counts[i] + 1), M_NECP, M_WAITOK); + if (necp_kernel_ip_output_policies_map[i] == NULL) { + goto fail; + } + + // Initialize the first entry to NULL + (necp_kernel_ip_output_policies_map[i])[0] = NULL; + } + bucket_current_free_index[i] = 0; + } + + LIST_FOREACH(kernel_policy, &necp_kernel_ip_output_policies, chain) { + // Insert pointers into map + if (!(kernel_policy->condition_mask & NECP_KERNEL_CONDITION_POLICY_ID)) { + for (i = 0; i < NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS; i++) { + if (!necp_kernel_ip_output_policy_is_unnecessary(kernel_policy, necp_kernel_ip_output_policies_map[i], bucket_current_free_index[i])) { + (necp_kernel_ip_output_policies_map[i])[(bucket_current_free_index[i])] = kernel_policy; + bucket_current_free_index[i]++; + (necp_kernel_ip_output_policies_map[i])[(bucket_current_free_index[i])] = NULL; + } + } + } else { + i = NECP_IP_OUTPUT_MAP_ID_TO_BUCKET(kernel_policy->cond_policy_id); + if (!necp_kernel_ip_output_policy_is_unnecessary(kernel_policy, necp_kernel_ip_output_policies_map[i], bucket_current_free_index[i])) { + (necp_kernel_ip_output_policies_map[i])[(bucket_current_free_index[i])] = kernel_policy; + bucket_current_free_index[i]++; + (necp_kernel_ip_output_policies_map[i])[(bucket_current_free_index[i])] = NULL; + } + } + } + necp_kernel_ip_output_policies_dump_all(); + return (TRUE); + +fail: + // Free memory, reset mask to 0 + necp_kernel_ip_output_policies_condition_mask = 0; + necp_kernel_ip_output_policies_count = 0; + necp_kernel_ip_output_policies_non_id_count = 0; + for (i = 0; i < NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS; i++) { + if (necp_kernel_ip_output_policies_map[i] != NULL) { + FREE(necp_kernel_ip_output_policies_map[i], M_NECP); + necp_kernel_ip_output_policies_map[i] = NULL; + } + } + return (FALSE); +} + +// Outbound Policy Matching +// --------------------- +struct substring { + char *string; + size_t length; +}; + +static struct substring +necp_trim_dots_and_stars(char *string, size_t length) +{ + struct substring sub; + sub.string = string; + sub.length = string ? length : 0; + + while (sub.length && (sub.string[0] == '.' || sub.string[0] == '*')) { + sub.string++; + sub.length--; + } + + while (sub.length && (sub.string[sub.length - 1] == '.' || sub.string[sub.length - 1] == '*')) { + sub.length--; + } + + return (sub); +} + +static char * +necp_create_trimmed_domain(char *string, size_t length) +{ + char *trimmed_domain = NULL; + struct substring sub = necp_trim_dots_and_stars(string, length); + + MALLOC(trimmed_domain, char *, sub.length + 1, M_NECP, M_WAITOK); + if (trimmed_domain == NULL) { + return (NULL); + } + + memcpy(trimmed_domain, sub.string, sub.length); + trimmed_domain[sub.length] = 0; + + return (trimmed_domain); +} + +static inline int +necp_count_dots(char *string, size_t length) +{ + int dot_count = 0; + size_t i = 0; + + for (i = 0; i < length; i++) { + if (string[i] == '.') { + dot_count++; + } + } + + return (dot_count); +} + +static bool +necp_check_suffix(struct substring parent, struct substring suffix, bool require_dot_before_suffix) +{ + if (parent.length <= suffix.length) { + return (FALSE); + } + + size_t length_difference = (parent.length - suffix.length); + + if (require_dot_before_suffix) { + if (((char *)(parent.string + length_difference - 1))[0] != '.') { + return (FALSE); + } + } + + return (memcmp(parent.string + length_difference, suffix.string, suffix.length) == 0); +} + +static bool +necp_hostname_matches_domain(struct substring hostname_substring, u_int8_t hostname_dot_count, char *domain, u_int8_t domain_dot_count) +{ + if (hostname_substring.string == NULL || domain == NULL) { + return (hostname_substring.string == domain); + } + + struct substring domain_substring; + domain_substring.string = domain; + domain_substring.length = strlen(domain); + + if (hostname_dot_count == domain_dot_count) { + if (hostname_substring.length == domain_substring.length && + memcmp(hostname_substring.string, domain_substring.string, hostname_substring.length) == 0) { + return (TRUE); + } + } else if (domain_dot_count > 0 && domain_dot_count < hostname_dot_count) { + if (necp_check_suffix(hostname_substring, domain_substring, TRUE)) { + return (TRUE); + } + } + + return (FALSE); +} + +static void +necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, struct necp_socket_info *info) +{ + memset(info, 0, sizeof(struct necp_socket_info)); + + info->pid = pid; + info->uid = uid; + info->protocol = protocol; + info->bound_interface_index = bound_interface_index; + info->traffic_class = traffic_class; + info->cred_result = 0; // Don't check the entitlement here, only in the socket layer + + if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_APP_ID && !uuid_is_null(application_uuid)) { + struct necp_uuid_id_mapping *existing_mapping = necp_uuid_lookup_app_id_locked(application_uuid); + if (existing_mapping) { + info->application_id = existing_mapping->id; + } + } + + if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID && !uuid_is_null(real_application_uuid)) { + if (uuid_compare(application_uuid, real_application_uuid) == 0) { + info->real_application_id = info->application_id; + } else { + struct necp_uuid_id_mapping *existing_mapping = necp_uuid_lookup_app_id_locked(real_application_uuid); + if (existing_mapping) { + info->real_application_id = existing_mapping->id; + } + } + } + + if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID && account != NULL) { + struct necp_string_id_mapping *existing_mapping = necp_lookup_string_to_id_locked(&necp_account_id_list, account); + if (existing_mapping) { + info->account_id = existing_mapping->id; + } + } + + if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_DOMAIN) { + info->domain = domain; + } +} + +static int +necp_application_find_policy_match_internal(u_int8_t *parameters, size_t parameters_size, struct necp_aggregate_result *returned_result) +{ + int error = 0; + size_t offset = 0; + + struct necp_kernel_socket_policy *matched_policy = NULL; + struct necp_socket_info info; + necp_kernel_policy_filter filter_control_unit = 0; + necp_kernel_policy_result service_action = 0; + necp_kernel_policy_service service = { 0, 0 }; + + pid_t pid = 0; + uid_t uid = 0; + u_int16_t protocol = 0; + u_int32_t bound_interface_index = 0; + u_int32_t traffic_class = 0; + + uuid_t application_uuid; + uuid_clear(application_uuid); + uuid_t real_application_uuid; + uuid_clear(real_application_uuid); + char *domain = NULL; + char *account = NULL; + + if (returned_result == NULL) { + return (EINVAL); + } + + memset(returned_result, 0, sizeof(struct necp_aggregate_result)); + + lck_rw_lock_shared(&necp_kernel_policy_lock); + if (necp_kernel_application_policies_count == 0) { + if (necp_drop_all_order > 0) { + returned_result->routing_result = NECP_KERNEL_POLICY_RESULT_DROP; + lck_rw_done(&necp_kernel_policy_lock); + return (0); + } + } + lck_rw_done(&necp_kernel_policy_lock); + + while (offset < parameters_size) { + u_int8_t type = necp_buffer_get_tlv_type(parameters, offset); + size_t length = necp_buffer_get_tlv_length(parameters, offset); + + if (length > 0 && (offset + sizeof(u_int8_t) + sizeof(size_t) + length) <= parameters_size) { + u_int8_t *value = necp_buffer_get_tlv_value(parameters, offset, NULL); + if (value != NULL) { + switch (type) { + case NECP_POLICY_CONDITION_APPLICATION: { + if (length >= sizeof(uuid_t)) { + uuid_copy(application_uuid, value); + } + break; + } + case NECP_POLICY_CONDITION_REAL_APPLICATION: { + if (length >= sizeof(uuid_t)) { + uuid_copy(real_application_uuid, value); + } + break; + } + case NECP_POLICY_CONDITION_DOMAIN: { + domain = (char *)value; + domain[length - 1] = 0; + break; + } + case NECP_POLICY_CONDITION_ACCOUNT: { + account = (char *)value; + account[length - 1] = 0; + break; + } + case NECP_POLICY_CONDITION_TRAFFIC_CLASS: { + if (length >= sizeof(u_int32_t)) { + memcpy(&traffic_class, value, sizeof(u_int32_t)); + } + break; + } + case NECP_POLICY_CONDITION_PID: { + if (length >= sizeof(pid_t)) { + memcpy(&pid, value, sizeof(pid_t)); + } + break; + } + case NECP_POLICY_CONDITION_UID: { + if (length >= sizeof(uid_t)) { + memcpy(&uid, value, sizeof(uid_t)); + } + break; + } + case NECP_POLICY_CONDITION_IP_PROTOCOL: { + if (length >= sizeof(u_int16_t)) { + memcpy(&protocol, value, sizeof(u_int16_t)); + } + break; + } + case NECP_POLICY_CONDITION_BOUND_INTERFACE: { + if (length <= IFXNAMSIZ && length > 0) { + ifnet_t bound_interface = NULL; + char interface_name[IFXNAMSIZ]; + memcpy(interface_name, value, length); + interface_name[length - 1] = 0; // Make sure the string is NULL terminated + if (ifnet_find_by_name(interface_name, &bound_interface) == 0) { + bound_interface_index = bound_interface->if_index; + } + } + break; + } + default: { + break; + } + } + } + } + + offset += sizeof(u_int8_t) + sizeof(size_t) + length; + } + + // Lock + lck_rw_lock_shared(&necp_kernel_policy_lock); + + necp_application_fillout_info_locked(application_uuid, real_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &info); + matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_app_layer_map, &info, &filter_control_unit, &service_action, &service); + if (matched_policy) { + returned_result->routing_result = matched_policy->result; + memcpy(&returned_result->routing_result_parameter, &matched_policy->result_parameter, sizeof(returned_result->routing_result_parameter)); + } else { + returned_result->routing_result = NECP_KERNEL_POLICY_RESULT_NONE; + } + returned_result->filter_control_unit = filter_control_unit; + returned_result->service_action = service_action; + + if (service.identifier != 0) { + struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(service.identifier); + if (mapping != NULL) { + struct necp_service_registration *service_registration = NULL; + uuid_copy(returned_result->service_uuid, mapping->uuid); + returned_result->service_data = service.data; + if (service.identifier == NECP_NULL_SERVICE_ID) { + // NULL service is always 'registered' + returned_result->service_flags |= NECP_SERVICE_FLAGS_REGISTERED; + } else { + LIST_FOREACH(service_registration, &necp_registered_service_list, kernel_chain) { + if (service.identifier == service_registration->service_id) { + returned_result->service_flags |= NECP_SERVICE_FLAGS_REGISTERED; + break; + } + } + } + } + } + + // Unlock + lck_rw_done(&necp_kernel_policy_lock); + + return (error); +} + +#define NECP_MAX_MATCH_POLICY_PARAMETER_SIZE 1024 + +int +necp_match_policy(struct proc *p, struct necp_match_policy_args *uap, int32_t *retval) +{ +#pragma unused(p, retval) + u_int8_t *parameters = NULL; + struct necp_aggregate_result returned_result; + int error = 0; + + if (uap == NULL) { + error = EINVAL; + goto done; + } + + if (uap->parameters == 0 || uap->parameters_size == 0 || uap->parameters_size > NECP_MAX_MATCH_POLICY_PARAMETER_SIZE || uap->returned_result == 0) { + error = EINVAL; + goto done; + } + + MALLOC(parameters, u_int8_t *, uap->parameters_size, M_NECP, M_WAITOK); + if (parameters == NULL) { + error = ENOMEM; + goto done; + } + // Copy parameters in + copyin(uap->parameters, parameters, uap->parameters_size); + + error = necp_application_find_policy_match_internal(parameters, uap->parameters_size, &returned_result); + if (error) { + goto done; + } + + // Copy return value back + copyout(&returned_result, uap->returned_result, sizeof(struct necp_aggregate_result)); +done: + if (parameters != NULL) { + FREE(parameters, M_NECP); + } + return (error); +} + +static bool +necp_socket_check_policy(struct necp_kernel_socket_policy *kernel_policy, necp_app_id app_id, necp_app_id real_app_id, errno_t cred_result, u_int32_t account_id, struct substring domain, u_int8_t domain_dot_count, pid_t pid, uid_t uid, u_int32_t bound_interface_index, u_int32_t traffic_class, u_int16_t protocol, union necp_sockaddr_union *local, union necp_sockaddr_union *remote) +{ + if (!(kernel_policy->condition_mask & NECP_KERNEL_CONDITION_ALL_INTERFACES)) { + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE) { + u_int32_t cond_bound_interface_index = kernel_policy->cond_bound_interface ? kernel_policy->cond_bound_interface->if_index : 0; + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE) { + if (bound_interface_index == cond_bound_interface_index) { + // No match, matches forbidden interface + return (FALSE); + } + } else { + if (bound_interface_index != cond_bound_interface_index) { + // No match, does not match required interface + return (FALSE); + } + } + } else { + if (bound_interface_index != 0) { + // No match, requires a non-bound packet + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask == 0) { + return (TRUE); + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_APP_ID) { + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_APP_ID) { + if (app_id == kernel_policy->cond_app_id) { + // No match, matches forbidden application + return (FALSE); + } + } else { + if (app_id != kernel_policy->cond_app_id) { + // No match, does not match required application + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID) { + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_REAL_APP_ID) { + if (real_app_id == kernel_policy->cond_real_app_id) { + // No match, matches forbidden application + return (FALSE); + } + } else { + if (real_app_id != kernel_policy->cond_real_app_id) { + // No match, does not match required application + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_ENTITLEMENT) { + if (cred_result != 0) { + // Process is missing entitlement + return (FALSE); + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_DOMAIN) { + bool domain_matches = necp_hostname_matches_domain(domain, domain_dot_count, kernel_policy->cond_domain, kernel_policy->cond_domain_dot_count); + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_DOMAIN) { + if (domain_matches) { + // No match, matches forbidden domain + return (FALSE); + } + } else { + if (!domain_matches) { + // No match, does not match required domain + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID) { + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID) { + if (account_id == kernel_policy->cond_account_id) { + // No match, matches forbidden account + return (FALSE); + } + } else { + if (account_id != kernel_policy->cond_account_id) { + // No match, does not match required account + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_PID) { + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_PID) { + if (pid == kernel_policy->cond_pid) { + // No match, matches forbidden pid + return (FALSE); + } + } else { + if (pid != kernel_policy->cond_pid) { + // No match, does not match required pid + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_UID) { + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_UID) { + if (uid == kernel_policy->cond_uid) { + // No match, matches forbidden uid + return (FALSE); + } + } else { + if (uid != kernel_policy->cond_uid) { + // No match, does not match required uid + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_TRAFFIC_CLASS) { + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_TRAFFIC_CLASS) { + if (traffic_class >= kernel_policy->cond_traffic_class.start_tc && + traffic_class <= kernel_policy->cond_traffic_class.end_tc) { + // No match, matches forbidden traffic class + return (FALSE); + } + } else { + if (traffic_class < kernel_policy->cond_traffic_class.start_tc || + traffic_class > kernel_policy->cond_traffic_class.end_tc) { + // No match, does not match required traffic class + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_PROTOCOL) { + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_PROTOCOL) { + if (protocol == kernel_policy->cond_protocol) { + // No match, matches forbidden protocol + return (FALSE); + } + } else { + if (protocol != kernel_policy->cond_protocol) { + // No match, does not match required protocol + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_START) { + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_END) { + bool inRange = necp_is_addr_in_range((struct sockaddr *)local, (struct sockaddr *)&kernel_policy->cond_local_start, (struct sockaddr *)&kernel_policy->cond_local_end); + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_LOCAL_END) { + if (inRange) { + return (FALSE); + } + } else { + if (!inRange) { + return (FALSE); + } + } + } else if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX) { + bool inSubnet = necp_is_addr_in_subnet((struct sockaddr *)local, (struct sockaddr *)&kernel_policy->cond_local_start, kernel_policy->cond_local_prefix); + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX) { + if (inSubnet) { + return (FALSE); + } + } else { + if (!inSubnet) { + return (FALSE); + } + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_START) { + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_END) { + bool inRange = necp_is_addr_in_range((struct sockaddr *)remote, (struct sockaddr *)&kernel_policy->cond_remote_start, (struct sockaddr *)&kernel_policy->cond_remote_end); + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_REMOTE_END) { + if (inRange) { + return (FALSE); + } + } else { + if (!inRange) { + return (FALSE); + } + } + } else if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX) { + bool inSubnet = necp_is_addr_in_subnet((struct sockaddr *)remote, (struct sockaddr *)&kernel_policy->cond_remote_start, kernel_policy->cond_remote_prefix); + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX) { + if (inSubnet) { + return (FALSE); + } + } else { + if (!inSubnet) { + return (FALSE); + } + } + } + } + + return (TRUE); +} + +static inline u_int32_t +necp_socket_calc_flowhash_locked(struct necp_socket_info *info) +{ + return (net_flowhash(info, sizeof(*info), necp_kernel_socket_policies_gencount)); +} + +#define NECP_KERNEL_ADDRESS_TYPE_CONDITIONS (NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX) +static void +necp_socket_fillout_info_locked(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface, struct necp_socket_info *info) +{ + struct socket *so = NULL; + + memset(info, 0, sizeof(struct necp_socket_info)); + + so = inp->inp_socket; + + if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_PID) { + info->pid = ((so->so_flags & SOF_DELEGATED) ? so->e_pid : so->last_pid); + } + + if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_UID) { + info->uid = kauth_cred_getuid(so->so_cred); + } + + if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_TRAFFIC_CLASS) { + info->traffic_class = so->so_traffic_class; + } + + if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_PROTOCOL) { + if (inp->inp_ip_p) { + info->protocol = inp->inp_ip_p; + } else { + info->protocol = SOCK_PROTO(so); + } + } + + if (inp->inp_flags2 & INP2_WANT_APP_POLICY && necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_APP_ID) { + struct necp_uuid_id_mapping *existing_mapping = necp_uuid_lookup_app_id_locked(((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid)); + if (existing_mapping) { + info->application_id = existing_mapping->id; + } + + if (!(so->so_flags & SOF_DELEGATED)) { + info->real_application_id = info->application_id; + } else if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID) { + struct necp_uuid_id_mapping *real_existing_mapping = necp_uuid_lookup_app_id_locked(so->last_uuid); + if (real_existing_mapping) { + info->real_application_id = real_existing_mapping->id; + } + } + + if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_ENTITLEMENT) { + info->cred_result = priv_check_cred(so->so_cred, PRIV_NET_PRIVILEGED_NECP_MATCH, 0); + } + } + + if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID && inp->inp_necp_attributes.inp_account != NULL) { + struct necp_string_id_mapping *existing_mapping = necp_lookup_string_to_id_locked(&necp_account_id_list, inp->inp_necp_attributes.inp_account); + if (existing_mapping) { + info->account_id = existing_mapping->id; + } + } + + if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_DOMAIN) { + info->domain = inp->inp_necp_attributes.inp_domain; + } + + if (override_bound_interface) { + info->bound_interface_index = override_bound_interface; + } else { + if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp) { + info->bound_interface_index = inp->inp_boundifp->if_index; + } + } + + if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_ADDRESS_TYPE_CONDITIONS) { + if (inp->inp_vflag & INP_IPV4) { + if (override_local_addr) { + memcpy(&info->local_addr, override_local_addr, override_local_addr->sa_len); + } else { + ((struct sockaddr_in *)&info->local_addr)->sin_family = AF_INET; + ((struct sockaddr_in *)&info->local_addr)->sin_len = sizeof(struct sockaddr_in); + ((struct sockaddr_in *)&info->local_addr)->sin_port = inp->inp_lport; + memcpy(&((struct sockaddr_in *)&info->local_addr)->sin_addr, &inp->inp_laddr, sizeof(struct in_addr)); + } + + if (override_remote_addr) { + memcpy(&info->remote_addr, override_remote_addr, override_remote_addr->sa_len); + } else { + ((struct sockaddr_in *)&info->remote_addr)->sin_family = AF_INET; + ((struct sockaddr_in *)&info->remote_addr)->sin_len = sizeof(struct sockaddr_in); + ((struct sockaddr_in *)&info->remote_addr)->sin_port = inp->inp_fport; + memcpy(&((struct sockaddr_in *)&info->remote_addr)->sin_addr, &inp->inp_faddr, sizeof(struct in_addr)); + } + } else if (inp->inp_vflag & INP_IPV6) { + if (override_local_addr) { + memcpy(&info->local_addr, override_local_addr, override_local_addr->sa_len); + } else { + ((struct sockaddr_in6 *)&info->local_addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *)&info->local_addr)->sin6_len = sizeof(struct sockaddr_in6); + ((struct sockaddr_in6 *)&info->local_addr)->sin6_port = inp->inp_lport; + memcpy(&((struct sockaddr_in6 *)&info->local_addr)->sin6_addr, &inp->in6p_laddr, sizeof(struct in6_addr)); + } + + if (override_remote_addr) { + memcpy(&info->remote_addr, override_remote_addr, override_remote_addr->sa_len); + } else { + ((struct sockaddr_in6 *)&info->remote_addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *)&info->remote_addr)->sin6_len = sizeof(struct sockaddr_in6); + ((struct sockaddr_in6 *)&info->remote_addr)->sin6_port = inp->inp_fport; + memcpy(&((struct sockaddr_in6 *)&info->remote_addr)->sin6_addr, &inp->in6p_faddr, sizeof(struct in6_addr)); + } + } + } +} + +static inline struct necp_kernel_socket_policy * +necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service) +{ + struct necp_kernel_socket_policy *matched_policy = NULL; + u_int32_t skip_order = 0; + u_int32_t skip_session_order = 0; + int i; + + // Pre-process domain for quick matching + struct substring domain_substring = necp_trim_dots_and_stars(info->domain, info->domain ? strlen(info->domain) : 0); + u_int8_t domain_dot_count = necp_count_dots(domain_substring.string, domain_substring.length); + + if (return_filter) { + *return_filter = 0; + } + + if (return_service_action) { + *return_service_action = 0; + } + + if (return_service) { + return_service->identifier = 0; + return_service->data = 0; + } + + if (policy_search_array != NULL) { + for (i = 0; policy_search_array[i] != NULL; i++) { + if (necp_drop_all_order != 0 && policy_search_array[i]->session_order >= necp_drop_all_order) { + // We've hit a drop all rule + break; + } + if (skip_session_order && policy_search_array[i]->session_order >= skip_session_order) { + // Done skipping + skip_order = 0; + skip_session_order = 0; + } + if (skip_order) { + if (policy_search_array[i]->order < skip_order) { + // Skip this policy + continue; + } else { + // Done skipping + skip_order = 0; + skip_session_order = 0; + } + } else if (skip_session_order) { + // Skip this policy + continue; + } + if (necp_socket_check_policy(policy_search_array[i], info->application_id, info->real_application_id, info->cred_result, info->account_id, domain_substring, domain_dot_count, info->pid, info->uid, info->bound_interface_index, info->traffic_class, info->protocol, &info->local_addr, &info->remote_addr)) { + if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER) { + if (return_filter && *return_filter == 0) { + *return_filter = policy_search_array[i]->result_parameter.filter_control_unit; + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Socket Policy: (Application %d Real Application %d BoundInterface %d Proto %d) Filter %d", info->application_id, info->real_application_id, info->bound_interface_index, info->protocol, policy_search_array[i]->result_parameter.filter_control_unit); + } + } + continue; + } else if (necp_kernel_socket_result_is_service_type(policy_search_array[i])) { + if (return_service_action && *return_service_action == 0) { + *return_service_action = policy_search_array[i]->result; + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Socket Policy: (Application %d Real Application %d BoundInterface %d Proto %d) Service Action %d", info->application_id, info->real_application_id, info->bound_interface_index, info->protocol, policy_search_array[i]->result); + } + } + if (return_service && return_service->identifier == 0) { + return_service->identifier = policy_search_array[i]->result_parameter.service.identifier; + return_service->data = policy_search_array[i]->result_parameter.service.data; + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Socket Policy: (Application %d Real Application %d BoundInterface %d Proto %d) Service ID %d Data %d", info->application_id, info->real_application_id, info->bound_interface_index, info->protocol, policy_search_array[i]->result_parameter.service.identifier, policy_search_array[i]->result_parameter.service.data); + } + } + continue; + } + + // Passed all tests, found a match + matched_policy = policy_search_array[i]; + if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_SKIP) { + skip_order = policy_search_array[i]->result_parameter.skip_policy_order; + skip_session_order = policy_search_array[i]->session_order + 1; + continue; + } + break; + } + } + } + + return (matched_policy); +} + +static bool +necp_socket_uses_interface(struct inpcb *inp, u_int32_t interface_index) +{ + bool found_match = FALSE; + errno_t result = 0; + ifaddr_t *addresses = NULL; + union necp_sockaddr_union address_storage; + int i; + int family = AF_INET; + ifnet_t interface = ifindex2ifnet[interface_index]; + + if (inp == NULL || interface == NULL) { + return (FALSE); + } + + if (inp->inp_vflag & INP_IPV4) { + family = AF_INET; + } else if (inp->inp_vflag & INP_IPV6) { + family = AF_INET6; + } + + result = ifnet_get_address_list_family(interface, &addresses, family); + if (result != 0) { + NECPLOG(LOG_ERR, "Failed to get address list for %s%d", ifnet_name(interface), ifnet_unit(interface)); + return (FALSE); + } + + for (i = 0; addresses[i] != NULL; i++) { + if (ifaddr_address(addresses[i], &address_storage.sa, sizeof(address_storage)) == 0) { + if (family == AF_INET) { + if (memcmp(&address_storage.sin.sin_addr, &inp->inp_laddr, sizeof(inp->inp_laddr)) == 0) { + found_match = TRUE; + goto done; + } + } else if (family == AF_INET6) { + if (memcmp(&address_storage.sin6.sin6_addr, &inp->in6p_laddr, sizeof(inp->in6p_laddr)) == 0) { + found_match = TRUE; + goto done; + } + } + } + } + +done: + ifnet_free_address_list(addresses); + addresses = NULL; + return (found_match); +} + +static inline bool +necp_socket_is_connected(struct inpcb *inp) +{ + return (inp->inp_socket->so_state & (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING)); +} + +necp_kernel_policy_id +necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface) +{ + struct socket *so = NULL; + necp_kernel_policy_filter filter_control_unit = 0; + struct necp_kernel_socket_policy *matched_policy = NULL; + necp_kernel_policy_id matched_policy_id = NECP_KERNEL_POLICY_ID_NONE; + necp_kernel_policy_result service_action = 0; + necp_kernel_policy_service service = { 0, 0 }; + + struct necp_socket_info info; + + if (inp == NULL) { + return (NECP_KERNEL_POLICY_ID_NONE); + } + + so = inp->inp_socket; + + // Don't lock. Possible race condition, but we don't want the performance hit. + if (necp_kernel_socket_policies_count == 0 || + (!(inp->inp_flags2 & INP2_WANT_APP_POLICY) && necp_kernel_socket_policies_non_app_count == 0)) { + if (necp_drop_all_order > 0) { + inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + inp->inp_policyresult.policy_gencount = 0; + inp->inp_policyresult.flowhash = 0; + inp->inp_policyresult.results.filter_control_unit = 0; + if (necp_pass_loopback > 0 && + necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_PASS; + } else { + inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP; + } + } + return (NECP_KERNEL_POLICY_ID_NONE); + } + + // Check for loopback exception + if (necp_pass_loopback > 0 && + necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + // Mark socket as a pass + inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + inp->inp_policyresult.policy_gencount = 0; + inp->inp_policyresult.flowhash = 0; + inp->inp_policyresult.results.filter_control_unit = 0; + inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_PASS; + return (NECP_KERNEL_POLICY_ID_NONE); + } + + // Lock + lck_rw_lock_shared(&necp_kernel_policy_lock); + + necp_socket_fillout_info_locked(inp, override_local_addr, override_remote_addr, override_bound_interface, &info); + + // Check info + u_int32_t flowhash = necp_socket_calc_flowhash_locked(&info); + if (inp->inp_policyresult.policy_id != NECP_KERNEL_POLICY_ID_NONE && + inp->inp_policyresult.policy_gencount == necp_kernel_socket_policies_gencount && + inp->inp_policyresult.flowhash == flowhash) { + // If already matched this socket on this generation of table, skip + + // Unlock + lck_rw_done(&necp_kernel_policy_lock); + + return (inp->inp_policyresult.policy_id); + } + + // Match socket to policy + matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, &filter_control_unit, &service_action, &service); + // If the socket matched a scoped service policy, mark as Drop if not registered. + // This covers the cases in which a service is required (on demand) but hasn't started yet. + if ((service_action == NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED || + service_action == NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED) && + service.identifier != 0 && + service.identifier != NECP_NULL_SERVICE_ID) { + bool service_is_registered = FALSE; + struct necp_service_registration *service_registration = NULL; + LIST_FOREACH(service_registration, &necp_registered_service_list, kernel_chain) { + if (service.identifier == service_registration->service_id) { + service_is_registered = TRUE; + break; + } + } + if (!service_is_registered) { + // Mark socket as a drop if service is not registered + inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount; + inp->inp_policyresult.flowhash = flowhash; + inp->inp_policyresult.results.filter_control_unit = 0; + inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP; + + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Socket Policy: (BoundInterface %d Proto %d) Dropping packet because service is not registered", info.bound_interface_index, info.protocol); + } + + // Unlock + lck_rw_done(&necp_kernel_policy_lock); + return (NECP_KERNEL_POLICY_ID_NONE); + } + } + if (matched_policy) { + matched_policy_id = matched_policy->id; + inp->inp_policyresult.policy_id = matched_policy->id; + inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount; + inp->inp_policyresult.flowhash = flowhash; + inp->inp_policyresult.results.filter_control_unit = filter_control_unit; + inp->inp_policyresult.results.result = matched_policy->result; + memcpy(&inp->inp_policyresult.results.result_parameter, &matched_policy->result_parameter, sizeof(matched_policy->result_parameter)); + + if (necp_socket_is_connected(inp) && + (matched_policy->result == NECP_KERNEL_POLICY_RESULT_DROP || + (matched_policy->result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && !necp_socket_uses_interface(inp, matched_policy->result_parameter.tunnel_interface_index)))) { + if (necp_debug) { + NECPLOG(LOG_DEBUG, "Marking socket in state %d as defunct", so->so_state); + } + sosetdefunct(current_proc(), so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE); + } + + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "Socket Policy: (BoundInterface %d Proto %d) Policy %d Result %d Parameter %d", info.bound_interface_index, info.protocol, matched_policy->id, matched_policy->result, matched_policy->result_parameter.tunnel_interface_index); + } + } else if (necp_drop_all_order > 0) { + // Mark socket as a drop if set + inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount; + inp->inp_policyresult.flowhash = flowhash; + inp->inp_policyresult.results.filter_control_unit = 0; + inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP; + } else { + // Mark non-matching socket so we don't re-check it + inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount; + inp->inp_policyresult.flowhash = flowhash; + inp->inp_policyresult.results.filter_control_unit = filter_control_unit; // We may have matched a filter, so mark it! + inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_NONE; + } + + // Unlock + lck_rw_done(&necp_kernel_policy_lock); + + return (matched_policy_id); +} + +static bool +necp_ip_output_check_policy(struct necp_kernel_ip_output_policy *kernel_policy, necp_kernel_policy_id socket_policy_id, u_int32_t bound_interface_index, u_int32_t last_interface_index, u_int16_t protocol, union necp_sockaddr_union *local, union necp_sockaddr_union *remote) +{ + if (!(kernel_policy->condition_mask & NECP_KERNEL_CONDITION_ALL_INTERFACES)) { + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE) { + u_int32_t cond_bound_interface_index = kernel_policy->cond_bound_interface ? kernel_policy->cond_bound_interface->if_index : 0; + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE) { + if (bound_interface_index == cond_bound_interface_index) { + // No match, matches forbidden interface + return (FALSE); + } + } else { + if (bound_interface_index != cond_bound_interface_index) { + // No match, does not match required interface + return (FALSE); + } + } + } else { + if (bound_interface_index != 0) { + // No match, requires a non-bound packet + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask == 0) { + return (TRUE); + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_POLICY_ID) { + if (socket_policy_id != kernel_policy->cond_policy_id) { + // No match, does not match required id + return (FALSE); + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LAST_INTERFACE) { + if (last_interface_index != kernel_policy->cond_last_interface_index) { + return (FALSE); + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_PROTOCOL) { + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_PROTOCOL) { + if (protocol == kernel_policy->cond_protocol) { + // No match, matches forbidden protocol + return (FALSE); + } + } else { + if (protocol != kernel_policy->cond_protocol) { + // No match, does not match required protocol + return (FALSE); + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_START) { + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_END) { + bool inRange = necp_is_addr_in_range((struct sockaddr *)local, (struct sockaddr *)&kernel_policy->cond_local_start, (struct sockaddr *)&kernel_policy->cond_local_end); + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_LOCAL_END) { + if (inRange) { + return (FALSE); + } + } else { + if (!inRange) { + return (FALSE); + } + } + } else if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX) { + bool inSubnet = necp_is_addr_in_subnet((struct sockaddr *)local, (struct sockaddr *)&kernel_policy->cond_local_start, kernel_policy->cond_local_prefix); + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_LOCAL_PREFIX) { + if (inSubnet) { + return (FALSE); + } + } else { + if (!inSubnet) { + return (FALSE); + } + } + } + } + + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_START) { + if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_END) { + bool inRange = necp_is_addr_in_range((struct sockaddr *)remote, (struct sockaddr *)&kernel_policy->cond_remote_start, (struct sockaddr *)&kernel_policy->cond_remote_end); + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_REMOTE_END) { + if (inRange) { + return (FALSE); + } + } else { + if (!inRange) { + return (FALSE); + } + } + } else if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX) { + bool inSubnet = necp_is_addr_in_subnet((struct sockaddr *)remote, (struct sockaddr *)&kernel_policy->cond_remote_start, kernel_policy->cond_remote_prefix); + if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_REMOTE_PREFIX) { + if (inSubnet) { + return (FALSE); + } + } else { + if (!inSubnet) { + return (FALSE); + } + } + } + } + + return (TRUE); +} + +static inline struct necp_kernel_ip_output_policy * +necp_ip_output_find_policy_match_locked(necp_kernel_policy_id socket_policy_id, u_int32_t bound_interface_index, u_int32_t last_interface_index, u_int16_t protocol, union necp_sockaddr_union *local_addr, union necp_sockaddr_union *remote_addr) +{ + u_int32_t skip_order = 0; + u_int32_t skip_session_order = 0; + int i; + struct necp_kernel_ip_output_policy *matched_policy = NULL; + struct necp_kernel_ip_output_policy **policy_search_array = necp_kernel_ip_output_policies_map[NECP_IP_OUTPUT_MAP_ID_TO_BUCKET(socket_policy_id)]; + if (policy_search_array != NULL) { + for (i = 0; policy_search_array[i] != NULL; i++) { + if (necp_drop_all_order != 0 && policy_search_array[i]->session_order >= necp_drop_all_order) { + // We've hit a drop all rule + break; + } + if (skip_session_order && policy_search_array[i]->session_order >= skip_session_order) { + // Done skipping + skip_order = 0; + skip_session_order = 0; + } + if (skip_order) { + if (policy_search_array[i]->order < skip_order) { + // Skip this policy + continue; + } else { + // Done skipping + skip_order = 0; + skip_session_order = 0; + } + } else if (skip_session_order) { + // Skip this policy + continue; + } + if (necp_ip_output_check_policy(policy_search_array[i], socket_policy_id, bound_interface_index, last_interface_index, protocol, local_addr, remote_addr)) { + // Passed all tests, found a match + matched_policy = policy_search_array[i]; + + if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_SKIP) { + skip_order = policy_search_array[i]->result_parameter.skip_policy_order; + skip_session_order = policy_search_array[i]->session_order + 1; + continue; + } + + break; + } + } + } + + return (matched_policy); +} + +necp_kernel_policy_id +necp_ip_output_find_policy_match(struct mbuf *packet, int flags, struct ip_out_args *ipoa, necp_kernel_policy_result *result, necp_kernel_policy_result_parameter *result_parameter) +{ + struct ip *ip = NULL; + int hlen = sizeof(struct ip); + necp_kernel_policy_id socket_policy_id = NECP_KERNEL_POLICY_ID_NONE; + necp_kernel_policy_id matched_policy_id = NECP_KERNEL_POLICY_ID_NONE; + struct necp_kernel_ip_output_policy *matched_policy = NULL; + u_int16_t protocol = 0; + u_int32_t bound_interface_index = 0; + u_int32_t last_interface_index = 0; + union necp_sockaddr_union local_addr; + union necp_sockaddr_union remote_addr; + + if (result) { + *result = 0; + } + + if (result_parameter) { + memset(result_parameter, 0, sizeof(*result_parameter)); + } + + if (packet == NULL) { + return (NECP_KERNEL_POLICY_ID_NONE); + } + + socket_policy_id = necp_get_policy_id_from_packet(packet); + + // Exit early for an empty list + // Don't lock. Possible race condition, but we don't want the performance hit. + if (necp_kernel_ip_output_policies_count == 0 || + ((socket_policy_id == NECP_KERNEL_POLICY_ID_NONE) && necp_kernel_ip_output_policies_non_id_count == 0)) { + if (necp_drop_all_order > 0) { + matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + if (result) { + if ((necp_pass_loopback > 0 && + necp_is_loopback(NULL, NULL, NULL, packet)) || + (necp_pass_keepalives > 0 && + necp_get_is_keepalive_from_packet(packet))) { + *result = NECP_KERNEL_POLICY_RESULT_PASS; + } else { + *result = NECP_KERNEL_POLICY_RESULT_DROP; + } + } + } + + return (matched_policy_id); + } + + // Check for loopback exception + if ((necp_pass_loopback > 0 && + necp_is_loopback(NULL, NULL, NULL, packet)) || + (necp_pass_keepalives > 0 && + necp_get_is_keepalive_from_packet(packet))) { + matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + if (result) { + *result = NECP_KERNEL_POLICY_RESULT_PASS; + } + return (matched_policy_id); + } + + last_interface_index = necp_get_last_interface_index_from_packet(packet); + + // Process packet to get relevant fields + ip = mtod(packet, struct ip *); +#ifdef _IP_VHL + hlen = _IP_VHL_HL(ip->ip_vhl) << 2; +#else + hlen = ip->ip_hl << 2; +#endif + + protocol = ip->ip_p; + + if ((flags & IP_OUTARGS) && (ipoa != NULL) && + (ipoa->ipoa_flags & IPOAF_BOUND_IF) && + ipoa->ipoa_boundif != IFSCOPE_NONE) { + bound_interface_index = ipoa->ipoa_boundif; + } + + local_addr.sin.sin_family = AF_INET; + local_addr.sin.sin_len = sizeof(struct sockaddr_in); + memcpy(&local_addr.sin.sin_addr, &ip->ip_src, sizeof(ip->ip_src)); + + remote_addr.sin.sin_family = AF_INET; + remote_addr.sin.sin_len = sizeof(struct sockaddr_in); + memcpy(&((struct sockaddr_in *)&remote_addr)->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)); + + switch (protocol) { + case IPPROTO_TCP: { + struct tcphdr th; + if ((int)(hlen + sizeof(th)) <= packet->m_pkthdr.len) { + m_copydata(packet, hlen, sizeof(th), (u_int8_t *)&th); + ((struct sockaddr_in *)&local_addr)->sin_port = th.th_sport; + ((struct sockaddr_in *)&remote_addr)->sin_port = th.th_dport; + } + break; + } + case IPPROTO_UDP: { + struct udphdr uh; + if ((int)(hlen + sizeof(uh)) <= packet->m_pkthdr.len) { + m_copydata(packet, hlen, sizeof(uh), (u_int8_t *)&uh); + ((struct sockaddr_in *)&local_addr)->sin_port = uh.uh_sport; + ((struct sockaddr_in *)&remote_addr)->sin_port = uh.uh_dport; + } + break; + } + default: { + ((struct sockaddr_in *)&local_addr)->sin_port = 0; + ((struct sockaddr_in *)&remote_addr)->sin_port = 0; + break; + } + } + + // Match packet to policy + lck_rw_lock_shared(&necp_kernel_policy_lock); + matched_policy = necp_ip_output_find_policy_match_locked(socket_policy_id, bound_interface_index, last_interface_index, protocol, &local_addr, &remote_addr); + if (matched_policy) { + matched_policy_id = matched_policy->id; + if (result) { + *result = matched_policy->result; + } + + if (result_parameter) { + memcpy(result_parameter, &matched_policy->result_parameter, sizeof(matched_policy->result_parameter)); + } + + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "IP Output: (ID %d BoundInterface %d LastInterface %d Proto %d) Policy %d Result %d Parameter %d", socket_policy_id, bound_interface_index, last_interface_index, protocol, matched_policy->id, matched_policy->result, matched_policy->result_parameter.tunnel_interface_index); + } + } else if (necp_drop_all_order > 0) { + matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + if (result) { + *result = NECP_KERNEL_POLICY_RESULT_DROP; + } + } + + lck_rw_done(&necp_kernel_policy_lock); + + return (matched_policy_id); +} + +necp_kernel_policy_id +necp_ip6_output_find_policy_match(struct mbuf *packet, int flags, struct ip6_out_args *ip6oa, necp_kernel_policy_result *result, necp_kernel_policy_result_parameter *result_parameter) +{ + struct ip6_hdr *ip6 = NULL; + int next = -1; + int offset = 0; + necp_kernel_policy_id socket_policy_id = NECP_KERNEL_POLICY_ID_NONE; + necp_kernel_policy_id matched_policy_id = NECP_KERNEL_POLICY_ID_NONE; + struct necp_kernel_ip_output_policy *matched_policy = NULL; + u_int16_t protocol = 0; + u_int32_t bound_interface_index = 0; + u_int32_t last_interface_index = 0; + union necp_sockaddr_union local_addr; + union necp_sockaddr_union remote_addr; + + if (result) { + *result = 0; + } + + if (result_parameter) { + memset(result_parameter, 0, sizeof(*result_parameter)); + } + + if (packet == NULL) { + return (NECP_KERNEL_POLICY_ID_NONE); + } + + socket_policy_id = necp_get_policy_id_from_packet(packet); + + // Exit early for an empty list + // Don't lock. Possible race condition, but we don't want the performance hit. + if (necp_kernel_ip_output_policies_count == 0 || + ((socket_policy_id == NECP_KERNEL_POLICY_ID_NONE) && necp_kernel_ip_output_policies_non_id_count == 0)) { + if (necp_drop_all_order > 0) { + matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + if (result) { + if ((necp_pass_loopback > 0 && + necp_is_loopback(NULL, NULL, NULL, packet)) || + (necp_pass_keepalives > 0 && + necp_get_is_keepalive_from_packet(packet))) { + *result = NECP_KERNEL_POLICY_RESULT_PASS; + } else { + *result = NECP_KERNEL_POLICY_RESULT_DROP; + } + } + } + + return (matched_policy_id); + } + + // Check for loopback exception + if ((necp_pass_loopback > 0 && + necp_is_loopback(NULL, NULL, NULL, packet)) || + (necp_pass_keepalives > 0 && + necp_get_is_keepalive_from_packet(packet))) { + matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + if (result) { + *result = NECP_KERNEL_POLICY_RESULT_PASS; + } + return (matched_policy_id); + } + + last_interface_index = necp_get_last_interface_index_from_packet(packet); + + // Process packet to get relevant fields + ip6 = mtod(packet, struct ip6_hdr *); + + if ((flags & IPV6_OUTARGS) && (ip6oa != NULL) && + (ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) && + ip6oa->ip6oa_boundif != IFSCOPE_NONE) { + bound_interface_index = ip6oa->ip6oa_boundif; + } + + ((struct sockaddr_in6 *)&local_addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *)&local_addr)->sin6_len = sizeof(struct sockaddr_in6); + memcpy(&((struct sockaddr_in6 *)&local_addr)->sin6_addr, &ip6->ip6_src, sizeof(ip6->ip6_src)); + + ((struct sockaddr_in6 *)&remote_addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *)&remote_addr)->sin6_len = sizeof(struct sockaddr_in6); + memcpy(&((struct sockaddr_in6 *)&remote_addr)->sin6_addr, &ip6->ip6_dst, sizeof(ip6->ip6_dst)); + + offset = ip6_lasthdr(packet, 0, IPPROTO_IPV6, &next); + if (offset >= 0 && packet->m_pkthdr.len >= offset) { + protocol = next; + switch (protocol) { + case IPPROTO_TCP: { + struct tcphdr th; + if ((int)(offset + sizeof(th)) <= packet->m_pkthdr.len) { + m_copydata(packet, offset, sizeof(th), (u_int8_t *)&th); + ((struct sockaddr_in6 *)&local_addr)->sin6_port = th.th_sport; + ((struct sockaddr_in6 *)&remote_addr)->sin6_port = th.th_dport; + } + break; + } + case IPPROTO_UDP: { + struct udphdr uh; + if ((int)(offset + sizeof(uh)) <= packet->m_pkthdr.len) { + m_copydata(packet, offset, sizeof(uh), (u_int8_t *)&uh); + ((struct sockaddr_in6 *)&local_addr)->sin6_port = uh.uh_sport; + ((struct sockaddr_in6 *)&remote_addr)->sin6_port = uh.uh_dport; + } + break; + } + default: { + ((struct sockaddr_in6 *)&local_addr)->sin6_port = 0; + ((struct sockaddr_in6 *)&remote_addr)->sin6_port = 0; + break; + } + } + } + + // Match packet to policy + lck_rw_lock_shared(&necp_kernel_policy_lock); + matched_policy = necp_ip_output_find_policy_match_locked(socket_policy_id, bound_interface_index, last_interface_index, protocol, &local_addr, &remote_addr); + if (matched_policy) { + matched_policy_id = matched_policy->id; + if (result) { + *result = matched_policy->result; + } + + if (result_parameter) { + memcpy(result_parameter, &matched_policy->result_parameter, sizeof(matched_policy->result_parameter)); + } + + if (necp_debug > 1) { + NECPLOG(LOG_DEBUG, "IP6 Output: (ID %d BoundInterface %d LastInterface %d Proto %d) Policy %d Result %d Parameter %d", socket_policy_id, bound_interface_index, last_interface_index, protocol, matched_policy->id, matched_policy->result, matched_policy->result_parameter.tunnel_interface_index); + } + } else if (necp_drop_all_order > 0) { + matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + if (result) { + *result = NECP_KERNEL_POLICY_RESULT_DROP; + } + } + + lck_rw_done(&necp_kernel_policy_lock); + + return (matched_policy_id); +} + +// Utilities +static bool +necp_is_addr_in_range(struct sockaddr *addr, struct sockaddr *range_start, struct sockaddr *range_end) +{ + int cmp = 0; + + if (addr == NULL || range_start == NULL || range_end == NULL) { + return (FALSE); + } + + /* Must be greater than or equal to start */ + cmp = necp_addr_compare(addr, range_start, 1); + if (cmp != 0 && cmp != 1) { + return (FALSE); + } + + /* Must be less than or equal to end */ + cmp = necp_addr_compare(addr, range_end, 1); + if (cmp != 0 && cmp != -1) { + return (FALSE); + } + + return (TRUE); +} + +static bool +necp_is_range_in_range(struct sockaddr *inner_range_start, struct sockaddr *inner_range_end, struct sockaddr *range_start, struct sockaddr *range_end) +{ + int cmp = 0; + + if (inner_range_start == NULL || inner_range_end == NULL || range_start == NULL || range_end == NULL) { + return (FALSE); + } + + /* Must be greater than or equal to start */ + cmp = necp_addr_compare(inner_range_start, range_start, 1); + if (cmp != 0 && cmp != 1) { + return (FALSE); + } + + /* Must be less than or equal to end */ + cmp = necp_addr_compare(inner_range_end, range_end, 1); + if (cmp != 0 && cmp != -1) { + return (FALSE); + } + + return (TRUE); +} + +static bool +necp_is_addr_in_subnet(struct sockaddr *addr, struct sockaddr *subnet_addr, u_int8_t subnet_prefix) +{ + if (addr == NULL || subnet_addr == NULL) { + return (FALSE); + } + + if (addr->sa_family != subnet_addr->sa_family || addr->sa_len != subnet_addr->sa_len) { + return (FALSE); + } + + switch (addr->sa_family) { + case AF_INET: { + if (satosin(subnet_addr)->sin_port != 0 && + satosin(addr)->sin_port != satosin(subnet_addr)->sin_port) { + return (FALSE); + } + return (necp_buffer_compare_with_bit_prefix((u_int8_t *)&satosin(addr)->sin_addr, (u_int8_t *)&satosin(subnet_addr)->sin_addr, subnet_prefix)); + } + case AF_INET6: { + if (satosin6(subnet_addr)->sin6_port != 0 && + satosin6(addr)->sin6_port != satosin6(subnet_addr)->sin6_port) { + return (FALSE); + } + if (satosin6(addr)->sin6_scope_id && + satosin6(subnet_addr)->sin6_scope_id && + satosin6(addr)->sin6_scope_id != satosin6(subnet_addr)->sin6_scope_id) { + return (FALSE); + } + return (necp_buffer_compare_with_bit_prefix((u_int8_t *)&satosin6(addr)->sin6_addr, (u_int8_t *)&satosin6(subnet_addr)->sin6_addr, subnet_prefix)); + } + default: { + return (FALSE); + } + } + + return (FALSE); +} + +/* + * Return values: + * -1: sa1 < sa2 + * 0: sa1 == sa2 + * 1: sa1 > sa2 + * 2: Not comparable or error + */ +static int +necp_addr_compare(struct sockaddr *sa1, struct sockaddr *sa2, int check_port) +{ + int result = 0; + int port_result = 0; + + if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) { + return (2); + } + + if (sa1->sa_len == 0) { + return (0); + } + + switch (sa1->sa_family) { + case AF_INET: { + if (sa1->sa_len != sizeof(struct sockaddr_in)) { + return (2); + } + + result = memcmp(&satosin(sa1)->sin_addr.s_addr, &satosin(sa2)->sin_addr.s_addr, sizeof(satosin(sa1)->sin_addr.s_addr)); + + if (check_port) { + if (satosin(sa1)->sin_port < satosin(sa2)->sin_port) { + port_result = -1; + } else if (satosin(sa1)->sin_port > satosin(sa2)->sin_port) { + port_result = 1; + } + + if (result == 0) { + result = port_result; + } else if ((result > 0 && port_result < 0) || (result < 0 && port_result > 0)) { + return (2); + } + } + + break; + } + case AF_INET6: { + if (sa1->sa_len != sizeof(struct sockaddr_in6)) { + return (2); + } + + if (satosin6(sa1)->sin6_scope_id != satosin6(sa2)->sin6_scope_id) { + return (2); + } + + result = memcmp(&satosin6(sa1)->sin6_addr.s6_addr[0], &satosin6(sa2)->sin6_addr.s6_addr[0], sizeof(struct in6_addr)); + + if (check_port) { + if (satosin6(sa1)->sin6_port < satosin6(sa2)->sin6_port) { + port_result = -1; + } else if (satosin6(sa1)->sin6_port > satosin6(sa2)->sin6_port) { + port_result = 1; + } + + if (result == 0) { + result = port_result; + } else if ((result > 0 && port_result < 0) || (result < 0 && port_result > 0)) { + return (2); + } + } + + break; + } + default: { + result = memcmp(sa1, sa2, sa1->sa_len); + break; + } + } + + if (result < 0) { + result = (-1); + } else if (result > 0) { + result = (1); + } + + return (result); +} + +static bool +necp_buffer_compare_with_bit_prefix(u_int8_t *p1, u_int8_t *p2, u_int32_t bits) +{ + u_int8_t mask; + + /* Handle null pointers */ + if (p1 == NULL || p2 == NULL) { + return (p1 == p2); + } + + while (bits >= 8) { + if (*p1++ != *p2++) { + return (FALSE); + } + bits -= 8; + } + + if (bits > 0) { + mask = ~((1<<(8-bits))-1); + if ((*p1 & mask) != (*p2 & mask)) { + return (FALSE); + } + } + return (TRUE); +} + +// Socket operations +#define NECP_MAX_SOCKET_ATTRIBUTE_STRING_LENGTH 253 + +static bool +necp_set_socket_attribute(u_int8_t *buffer, size_t buffer_length, u_int8_t type, char **buffer_p) +{ + int error = 0; + int cursor = 0; + size_t string_size = 0; + char *local_string = NULL; + u_int8_t *value = NULL; + + cursor = necp_buffer_find_tlv(buffer, buffer_length, 0, type, 0); + if (cursor < 0) { + // This will clear out the parameter + goto done; + } + + string_size = necp_buffer_get_tlv_length(buffer, cursor); + if (string_size == 0 || string_size > NECP_MAX_SOCKET_ATTRIBUTE_STRING_LENGTH) { + // This will clear out the parameter + goto done; + } + + MALLOC(local_string, char *, string_size + 1, M_NECP, M_WAITOK); + if (local_string == NULL) { + NECPLOG(LOG_ERR, "Failed to allocate a socket attribute buffer (size %d)", string_size); + goto fail; + } + + value = necp_buffer_get_tlv_value(buffer, cursor, NULL); + if (value == NULL) { + NECPLOG0(LOG_ERR, "Failed to get socket attribute"); + goto fail; + } + + memcpy(local_string, value, string_size); + local_string[string_size] = 0; + +done: + if (*buffer_p != NULL) { + FREE(*buffer_p, M_NECP); + *buffer_p = NULL; + } + + *buffer_p = local_string; + return (0); +fail: + if (local_string != NULL) { + FREE(local_string, M_NECP); + } + return (error); +} + +errno_t +necp_set_socket_attributes(struct socket *so, struct sockopt *sopt) +{ + int error = 0; + u_int8_t *buffer = NULL; + struct inpcb *inp = sotoinpcb(so); + + size_t valsize = sopt->sopt_valsize; + if (valsize == 0 || + valsize > ((sizeof(u_int8_t) + sizeof(size_t) + NECP_MAX_SOCKET_ATTRIBUTE_STRING_LENGTH) * 2)) { + goto done; + } + + MALLOC(buffer, u_int8_t *, valsize, M_NECP, M_WAITOK); + if (buffer == NULL) { + goto done; + } + + error = sooptcopyin(sopt, buffer, valsize, 0); + if (error) { + goto done; + } + + error = necp_set_socket_attribute(buffer, valsize, NECP_TLV_ATTRIBUTE_DOMAIN, &inp->inp_necp_attributes.inp_domain); + if (error) { + NECPLOG0(LOG_ERR, "Could not set domain TLV for socket attributes"); + goto done; + } + + error = necp_set_socket_attribute(buffer, valsize, NECP_TLV_ATTRIBUTE_ACCOUNT, &inp->inp_necp_attributes.inp_account); + if (error) { + NECPLOG0(LOG_ERR, "Could not set account TLV for socket attributes"); + goto done; + } + + if (necp_debug) { + NECPLOG(LOG_DEBUG, "Set on socket: Domain %s, Account %s", inp->inp_necp_attributes.inp_domain, inp->inp_necp_attributes.inp_account); + } +done: + if (buffer != NULL) { + FREE(buffer, M_NECP); + } + + return (error); +} + +errno_t +necp_get_socket_attributes(struct socket *so, struct sockopt *sopt) +{ + int error = 0; + u_int8_t *buffer = NULL; + u_int8_t *cursor = NULL; + size_t valsize = 0; + struct inpcb *inp = sotoinpcb(so); + + if (inp->inp_necp_attributes.inp_domain != NULL) { + valsize += sizeof(u_int8_t) + sizeof(size_t) + strlen(inp->inp_necp_attributes.inp_domain); + } + if (inp->inp_necp_attributes.inp_account != NULL) { + valsize += sizeof(u_int8_t) + sizeof(size_t) + strlen(inp->inp_necp_attributes.inp_account); + } + if (valsize == 0) { + goto done; + } + + MALLOC(buffer, u_int8_t *, valsize, M_NECP, M_WAITOK); + if (buffer == NULL) { + goto done; + } + + cursor = buffer; + if (inp->inp_necp_attributes.inp_domain != NULL) { + cursor = necp_buffer_write_tlv(cursor, NECP_TLV_ATTRIBUTE_DOMAIN, strlen(inp->inp_necp_attributes.inp_domain), inp->inp_necp_attributes.inp_domain); + } + + if (inp->inp_necp_attributes.inp_account != NULL) { + cursor = necp_buffer_write_tlv(cursor, NECP_TLV_ATTRIBUTE_ACCOUNT, strlen(inp->inp_necp_attributes.inp_account), inp->inp_necp_attributes.inp_account); + } + + error = sooptcopyout(sopt, buffer, valsize); + if (error) { + goto done; + } +done: + if (buffer != NULL) { + FREE(buffer, M_NECP); + } + + return (error); +} + +static bool +necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id) +{ + u_int32_t verifyifindex = interface ? interface->if_index : 0; + bool allowed_to_receive = TRUE; + struct necp_socket_info info; + u_int32_t flowhash = 0; + necp_kernel_policy_result service_action = 0; + necp_kernel_policy_service service = { 0, 0 }; + + if (return_policy_id) { + *return_policy_id = NECP_KERNEL_POLICY_ID_NONE; + } + + if (inp == NULL) { + goto done; + } + + // Don't lock. Possible race condition, but we don't want the performance hit. + if (necp_kernel_socket_policies_count == 0 || + (!(inp->inp_flags2 & INP2_WANT_APP_POLICY) && necp_kernel_socket_policies_non_app_count == 0)) { + if (necp_drop_all_order > 0) { + if (necp_pass_loopback > 0 && + necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + allowed_to_receive = TRUE; + } else { + allowed_to_receive = FALSE; + } + } + goto done; + } + + // If this socket is connected, or we are not taking addresses into account, try to reuse last result + if ((necp_socket_is_connected(inp) || (override_local_addr == NULL && override_remote_addr == NULL)) && inp->inp_policyresult.policy_id != NECP_KERNEL_POLICY_ID_NONE) { + bool policies_have_changed = FALSE; + lck_rw_lock_shared(&necp_kernel_policy_lock); + if (inp->inp_policyresult.policy_gencount != necp_kernel_socket_policies_gencount) { + policies_have_changed = TRUE; + } + lck_rw_done(&necp_kernel_policy_lock); + + if (!policies_have_changed) { + if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP || + inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT || + (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface && + inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex)) { + allowed_to_receive = FALSE; + } else if (return_policy_id) { + *return_policy_id = inp->inp_policyresult.policy_id; + } + goto done; + } + } + + // Check for loopback exception + if (necp_pass_loopback > 0 && + necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + allowed_to_receive = TRUE; + goto done; + } + + // Actually calculate policy result + lck_rw_lock_shared(&necp_kernel_policy_lock); + necp_socket_fillout_info_locked(inp, override_local_addr, override_remote_addr, 0, &info); + + flowhash = necp_socket_calc_flowhash_locked(&info); + if (inp->inp_policyresult.policy_id != NECP_KERNEL_POLICY_ID_NONE && + inp->inp_policyresult.policy_gencount == necp_kernel_socket_policies_gencount && + inp->inp_policyresult.flowhash == flowhash) { + if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP || + inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT || + (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface && + inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex)) { + allowed_to_receive = FALSE; + } else if (return_policy_id) { + *return_policy_id = inp->inp_policyresult.policy_id; + } + lck_rw_done(&necp_kernel_policy_lock); + goto done; + } + + struct necp_kernel_socket_policy *matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, NULL, &service_action, &service); + if (matched_policy != NULL) { + if (matched_policy->result == NECP_KERNEL_POLICY_RESULT_DROP || + matched_policy->result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT || + (matched_policy->result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface && + matched_policy->result_parameter.tunnel_interface_index != verifyifindex) || + ((service_action == NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED || + service_action == NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED) && + service.identifier != 0 && service.identifier != NECP_NULL_SERVICE_ID)) { + allowed_to_receive = FALSE; + } else if (return_policy_id) { + *return_policy_id = matched_policy->id; + } + lck_rw_done(&necp_kernel_policy_lock); + + if (necp_debug > 1 && matched_policy->id != inp->inp_policyresult.policy_id) { + NECPLOG(LOG_DEBUG, "Socket Send/Recv Policy: Policy %d Allowed %d", return_policy_id ? *return_policy_id : 0, allowed_to_receive); + } + goto done; + } else if (necp_drop_all_order > 0) { + allowed_to_receive = FALSE; + } + + lck_rw_done(&necp_kernel_policy_lock); + +done: + return (allowed_to_receive); +} + +bool +necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id) +{ + struct sockaddr_in local; + struct sockaddr_in remote; + local.sin_family = remote.sin_family = AF_INET; + local.sin_len = remote.sin_len = sizeof(struct sockaddr_in); + local.sin_port = local_port; + remote.sin_port = remote_port; + memcpy(&local.sin_addr, local_addr, sizeof(local.sin_addr)); + memcpy(&remote.sin_addr, remote_addr, sizeof(remote.sin_addr)); + + return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id)); +} + +bool +necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id) +{ + struct sockaddr_in6 local; + struct sockaddr_in6 remote; + local.sin6_family = remote.sin6_family = AF_INET6; + local.sin6_len = remote.sin6_len = sizeof(struct sockaddr_in6); + local.sin6_port = local_port; + remote.sin6_port = remote_port; + memcpy(&local.sin6_addr, local_addr, sizeof(local.sin6_addr)); + memcpy(&remote.sin6_addr, remote_addr, sizeof(remote.sin6_addr)); + + return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id)); +} + +bool +necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id) +{ + return (necp_socket_is_allowed_to_send_recv_internal(inp, NULL, NULL, NULL, return_policy_id)); +} + +int +necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id) +{ + if (packet == NULL || inp == NULL) { + return (EINVAL); + } + + // Mark ID for Pass and IP Tunnel + if (policy_id != NECP_KERNEL_POLICY_ID_NONE) { + packet->m_pkthdr.necp_mtag.necp_policy_id = policy_id; + } else if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_PASS || + inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL) { + packet->m_pkthdr.necp_mtag.necp_policy_id = inp->inp_policyresult.policy_id; + } else { + packet->m_pkthdr.necp_mtag.necp_policy_id = NECP_KERNEL_POLICY_ID_NONE; + } + packet->m_pkthdr.necp_mtag.necp_last_interface_index = 0; + + return (0); +} + +int +necp_mark_packet_from_ip(struct mbuf *packet, necp_kernel_policy_id policy_id) +{ + if (packet == NULL) { + return (EINVAL); + } + + // Mark ID for Pass and IP Tunnel + if (policy_id != NECP_KERNEL_POLICY_ID_NONE) { + packet->m_pkthdr.necp_mtag.necp_policy_id = policy_id; + } else { + packet->m_pkthdr.necp_mtag.necp_policy_id = NECP_KERNEL_POLICY_ID_NONE; + } + + return (0); +} + +int +necp_mark_packet_from_interface(struct mbuf *packet, ifnet_t interface) +{ + if (packet == NULL) { + return (EINVAL); + } + + // Mark ID for Pass and IP Tunnel + if (interface != NULL) { + packet->m_pkthdr.necp_mtag.necp_last_interface_index = interface->if_index; + } + + return (0); +} + +int +necp_mark_packet_as_keepalive(struct mbuf *packet, bool is_keepalive) +{ + if (packet == NULL) { + return (EINVAL); + } + + if (is_keepalive) { + packet->m_pkthdr.pkt_flags |= PKTF_KEEPALIVE; + } else { + packet->m_pkthdr.pkt_flags &= ~PKTF_KEEPALIVE; + } + + return (0); +} + +necp_kernel_policy_id +necp_get_policy_id_from_packet(struct mbuf *packet) +{ + if (packet == NULL) { + return (NECP_KERNEL_POLICY_ID_NONE); + } + + return (packet->m_pkthdr.necp_mtag.necp_policy_id); +} + +u_int32_t +necp_get_last_interface_index_from_packet(struct mbuf *packet) +{ + if (packet == NULL) { + return (0); + } + + return (packet->m_pkthdr.necp_mtag.necp_last_interface_index); +} + +bool +necp_get_is_keepalive_from_packet(struct mbuf *packet) +{ + if (packet == NULL) { + return (FALSE); + } + + return (packet->m_pkthdr.pkt_flags & PKTF_KEEPALIVE); +} + +u_int32_t +necp_socket_get_content_filter_control_unit(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + if (inp == NULL) { + return (0); + } + return (inp->inp_policyresult.results.filter_control_unit); +} + +bool +necp_socket_should_use_flow_divert(struct inpcb *inp) +{ + if (inp == NULL) { + return (FALSE); + } + + return (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT); +} + +u_int32_t +necp_socket_get_flow_divert_control_unit(struct inpcb *inp) +{ + if (inp == NULL) { + return (0); + } + + if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) { + return (inp->inp_policyresult.results.result_parameter.flow_divert_control_unit); + } + + return (0); +} + +bool +necp_socket_should_rescope(struct inpcb *inp) +{ + if (inp == NULL) { + return (FALSE); + } + + return (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED); +} + +u_int +necp_socket_get_rescope_if_index(struct inpcb *inp) +{ + if (inp == NULL) { + return (0); + } + + if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED) { + return (inp->inp_policyresult.results.result_parameter.scoped_interface_index); + } + + return (0); +} + +ifnet_t +necp_get_ifnet_from_result_parameter(necp_kernel_policy_result_parameter *result_parameter) +{ + if (result_parameter == NULL) { + return (NULL); + } + + return (ifindex2ifnet[result_parameter->tunnel_interface_index]); +} + +bool +necp_packet_can_rebind_to_ifnet(struct mbuf *packet, struct ifnet *interface, struct route *new_route, int family) +{ + bool found_match = FALSE; + errno_t result = 0; + ifaddr_t *addresses = NULL; + union necp_sockaddr_union address_storage; + int i; + + if (packet == NULL || interface == NULL || new_route == NULL || (family != AF_INET && family != AF_INET6)) { + return (FALSE); + } + + result = ifnet_get_address_list_family(interface, &addresses, family); + if (result != 0) { + NECPLOG(LOG_ERR, "Failed to get address list for %s%d", ifnet_name(interface), ifnet_unit(interface)); + return (FALSE); + } + + for (i = 0; addresses[i] != NULL; i++) { + ROUTE_RELEASE(new_route); + if (ifaddr_address(addresses[i], &address_storage.sa, sizeof(address_storage)) == 0) { + if (family == AF_INET) { + struct ip *ip = mtod(packet, struct ip *); + if (memcmp(&address_storage.sin.sin_addr, &ip->ip_src, sizeof(ip->ip_src)) == 0) { + struct sockaddr_in *dst4 = (struct sockaddr_in *)(void *)&new_route->ro_dst; + dst4->sin_family = AF_INET; + dst4->sin_len = sizeof(struct sockaddr_in); + dst4->sin_addr = ip->ip_dst; + rtalloc_scoped(new_route, interface->if_index); + if (!ROUTE_UNUSABLE(new_route)) { + found_match = TRUE; + goto done; + } + } + } else if (family == AF_INET6) { + struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *); + if (memcmp(&address_storage.sin6.sin6_addr, &ip6->ip6_src, sizeof(ip6->ip6_src)) == 0) { + struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)(void *)&new_route->ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof(struct sockaddr_in6); + dst6->sin6_addr = ip6->ip6_dst; + rtalloc_scoped(new_route, interface->if_index); + if (!ROUTE_UNUSABLE(new_route)) { + found_match = TRUE; + goto done; + } + } + } + } + } + +done: + ifnet_free_address_list(addresses); + addresses = NULL; + return (found_match); +} + +static bool +necp_addr_is_loopback(struct sockaddr *address) +{ + if (address == NULL) { + return (FALSE); + } + + if (address->sa_family == AF_INET) { + return (ntohl(((struct sockaddr_in *)(void *)address)->sin_addr.s_addr) == INADDR_LOOPBACK); + } else if (address->sa_family == AF_INET6) { + return IN6_IS_ADDR_LOOPBACK(&((struct sockaddr_in6 *)(void *)address)->sin6_addr); + } + + return (FALSE); +} + +static bool +necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet) +{ + // Note: This function only checks for the loopback addresses. + // In the future, we may want to expand to also allow any traffic + // going through the loopback interface, but until then, this + // check is cheaper. + + if (local_addr != NULL && necp_addr_is_loopback(local_addr)) { + return (TRUE); + } + + if (remote_addr != NULL && necp_addr_is_loopback(remote_addr)) { + return (TRUE); + } + + if (inp != NULL) { + if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp && (inp->inp_boundifp->if_flags & IFF_LOOPBACK)) { + return (TRUE); + } + if (inp->inp_vflag & INP_IPV4) { + if (ntohl(inp->inp_laddr.s_addr) == INADDR_LOOPBACK || + ntohl(inp->inp_faddr.s_addr) == INADDR_LOOPBACK) { + return (TRUE); + } + } else if (inp->inp_vflag & INP_IPV6) { + if (IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr) || + IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr)) { + return (TRUE); + } + } + } + + if (packet != NULL) { + struct ip *ip = mtod(packet, struct ip *); + if (ip->ip_v == 4) { + if (ntohl(ip->ip_src.s_addr) == INADDR_LOOPBACK) { + return (TRUE); + } + if (ntohl(ip->ip_dst.s_addr) == INADDR_LOOPBACK) { + return (TRUE); + } + } else if (ip->ip_v == 6) { + struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *); + if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src)) { + return (TRUE); + } + if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst)) { + return (TRUE); + } + } + } + + return (FALSE); +} diff --git a/bsd/net/necp.h b/bsd/net/necp.h new file mode 100644 index 000000000..519995b85 --- /dev/null +++ b/bsd/net/necp.h @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2013, 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NET_NECP_H_ +#define _NET_NECP_H_ + +#include +#include + +/* + * Name registered by the ipsec kernel control + */ +#define NECP_CONTROL_NAME "com.apple.net.necp_control" + +struct necp_packet_header { + u_int8_t packet_type; + u_int8_t flags; + u_int32_t message_id; +}; +#define NECP_PACKET_TYPE_POLICY_ADD 1 +#define NECP_PACKET_TYPE_POLICY_GET 2 +#define NECP_PACKET_TYPE_POLICY_DELETE 3 +#define NECP_PACKET_TYPE_POLICY_APPLY_ALL 4 +#define NECP_PACKET_TYPE_POLICY_LIST_ALL 5 +#define NECP_PACKET_TYPE_POLICY_DELETE_ALL 6 +#define NECP_PACKET_TYPE_SET_SESSION_PRIORITY 7 +#define NECP_PACKET_TYPE_LOCK_SESSION_TO_PROC 8 +#define NECP_PACKET_TYPE_REGISTER_SERVICE 9 +#define NECP_PACKET_TYPE_UNREGISTER_SERVICE 10 + +#define NECP_PACKET_FLAGS_RESPONSE 0x01 // Used for acks, errors, and query responses + +#define NECP_TLV_NIL 0 +#define NECP_TLV_ERROR 1 // u_int32_t +#define NECP_TLV_POLICY_ORDER 2 // u_int32_t +#define NECP_TLV_POLICY_CONDITION 3 +#define NECP_TLV_POLICY_RESULT 4 +#define NECP_TLV_POLICY_ID 5 // u_int32_t +#define NECP_TLV_SESSION_PRIORITY 6 // u_int32_t +#define NECP_TLV_ATTRIBUTE_DOMAIN 7 // char[] +#define NECP_TLV_ATTRIBUTE_ACCOUNT 8 // char[] +#define NECP_TLV_SERVICE_UUID 9 // uuid_t + +#define NECP_POLICY_CONDITION_FLAGS_NEGATIVE 0x01 // Negative + +// Conditions +#define NECP_POLICY_CONDITION_DEFAULT 0 // N/A, not valid with any other conditions +// Socket/Application conditions +#define NECP_POLICY_CONDITION_APPLICATION 1 // uuid_t, uses effective UUID when possible +#define NECP_POLICY_CONDITION_REAL_APPLICATION 2 // uuid_t, never uses effective UUID. Only valid with NECP_POLICY_CONDITION_APPLICATION +// Application-only Conditions +#define NECP_POLICY_CONDITION_DOMAIN 3 // String, such as apple.com +#define NECP_POLICY_CONDITION_ACCOUNT 4 // String +// Socket/Application condition +#define NECP_POLICY_CONDITION_ENTITLEMENT 5 // String +#define NECP_POLICY_CONDITION_PID 6 // pid_t +#define NECP_POLICY_CONDITION_UID 7 // uid_t +#define NECP_POLICY_CONDITION_ALL_INTERFACES 8 // N/A +#define NECP_POLICY_CONDITION_BOUND_INTERFACE 9 // String +#define NECP_POLICY_CONDITION_TRAFFIC_CLASS 10 // necp_policy_condition_tc_range +// Socket/IP conditions +#define NECP_POLICY_CONDITION_IP_PROTOCOL 11 // u_int8_t +#define NECP_POLICY_CONDITION_LOCAL_ADDR 12 // necp_policy_condition_addr +#define NECP_POLICY_CONDITION_REMOTE_ADDR 13 // necp_policy_condition_addr +#define NECP_POLICY_CONDITION_LOCAL_ADDR_RANGE 14 // necp_policy_condition_addr_range +#define NECP_POLICY_CONDITION_REMOTE_ADDR_RANGE 15 // necp_policy_condition_addr_range + +// Results +#define NECP_POLICY_RESULT_PASS 1 // N/A +#define NECP_POLICY_RESULT_SKIP 2 // u_int32_t, policy order to skip to. 0 to skip all session policies. +#define NECP_POLICY_RESULT_DROP 3 // N/A +#define NECP_POLICY_RESULT_SOCKET_DIVERT 4 // u_int32_t, flow divert control unit +#define NECP_POLICY_RESULT_SOCKET_FILTER 5 // u_int32_t, filter control unit +#define NECP_POLICY_RESULT_IP_TUNNEL 6 // String, interface name +#define NECP_POLICY_RESULT_IP_FILTER 7 // ? +#define NECP_POLICY_RESULT_TRIGGER 8 // service uuid_t +#define NECP_POLICY_RESULT_TRIGGER_IF_NEEDED 9 // service uuid_t +#define NECP_POLICY_RESULT_TRIGGER_SCOPED 10 // service uuid_t +#define NECP_POLICY_RESULT_NO_TRIGGER_SCOPED 11 // service uuid_t +#define NECP_POLICY_RESULT_SOCKET_SCOPED 12 // String, interface name + +#define NECP_POLICY_RESULT_MAX NECP_POLICY_RESULT_SOCKET_SCOPED + +// Errors +#define NECP_ERROR_INTERNAL 0 +#define NECP_ERROR_UNKNOWN_PACKET_TYPE 1 +#define NECP_ERROR_INVALID_TLV 2 +#define NECP_ERROR_POLICY_RESULT_INVALID 3 +#define NECP_ERROR_POLICY_CONDITIONS_INVALID 4 +#define NECP_ERROR_POLICY_ID_NOT_FOUND 5 +#define NECP_ERROR_INVALID_PROCESS 6 + +// Modifiers +#define NECP_MASK_USERSPACE_ONLY 0x80000000 // on filter_control_unit value + +struct necp_policy_condition_tc_range { + u_int32_t start_tc; + u_int32_t end_tc; +} __attribute__((__packed__)); + +struct necp_policy_condition_addr { + u_int8_t prefix; + union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } address; +} __attribute__((__packed__)); + +struct necp_policy_condition_addr_range { + union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } start_address; + union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } end_address; +} __attribute__((__packed__)); + +#define NECP_SESSION_PRIORITY_UNKNOWN 0 +#define NECP_SESSION_PRIORITY_CONTROL 1 +#define NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL 2 +#define NECP_SESSION_PRIORITY_HIGH 3 +#define NECP_SESSION_PRIORITY_DEFAULT 4 +#define NECP_SESSION_PRIORITY_LOW 5 + +#define NECP_SESSION_NUM_PRIORITIES NECP_SESSION_PRIORITY_LOW + +typedef u_int32_t necp_policy_id; +typedef u_int32_t necp_policy_order; + +typedef u_int32_t necp_kernel_policy_result; +typedef u_int32_t necp_kernel_policy_filter; + +typedef union { + u_int tunnel_interface_index; + u_int scoped_interface_index; + u_int32_t flow_divert_control_unit; + u_int32_t filter_control_unit; +} necp_kernel_policy_routing_result_parameter; + +#define NECP_SERVICE_FLAGS_REGISTERED 0x01 +struct necp_aggregate_result { + necp_kernel_policy_result routing_result; + necp_kernel_policy_routing_result_parameter routing_result_parameter; + necp_kernel_policy_filter filter_control_unit; + necp_kernel_policy_result service_action; + uuid_t service_uuid; + u_int32_t service_flags; + u_int32_t service_data; +}; + +#ifdef BSD_KERNEL_PRIVATE +#include +#include +#include +#include +#include + +#define NECPCTL_DROP_ALL_LEVEL 1 /* Drop all packets if no policy matches above this level */ +#define NECPCTL_DEBUG 2 /* Log all kernel policy matches */ +#define NECPCTL_PASS_LOOPBACK 3 /* Pass all loopback traffic */ +#define NECPCTL_PASS_KEEPALIVES 4 /* Pass all kernel-generated keepalive traffic */ + +#define NECPCTL_NAMES { \ + { 0, 0 }, \ + { "drop_all_level", CTLTYPE_INT }, \ + { "debug", CTLTYPE_INT }, \ + { "pass_loopback", CTLTYPE_INT }, \ + { "pass_keepalives", CTLTYPE_INT }, \ +} + +typedef u_int32_t necp_kernel_policy_id; +#define NECP_KERNEL_POLICY_ID_NONE 0 +#define NECP_KERNEL_POLICY_ID_NO_MATCH 1 +#define NECP_KERNEL_POLICY_ID_FIRST_VALID 2 + +typedef u_int32_t necp_app_id; + +#define NECP_KERNEL_POLICY_RESULT_NONE 0 +#define NECP_KERNEL_POLICY_RESULT_PASS NECP_POLICY_RESULT_PASS +#define NECP_KERNEL_POLICY_RESULT_SKIP NECP_POLICY_RESULT_SKIP +#define NECP_KERNEL_POLICY_RESULT_DROP NECP_POLICY_RESULT_DROP +#define NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT NECP_POLICY_RESULT_SOCKET_DIVERT +#define NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER NECP_POLICY_RESULT_SOCKET_FILTER +#define NECP_KERNEL_POLICY_RESULT_IP_TUNNEL NECP_POLICY_RESULT_IP_TUNNEL +#define NECP_KERNEL_POLICY_RESULT_IP_FILTER NECP_POLICY_RESULT_IP_FILTER +#define NECP_KERNEL_POLICY_RESULT_TRIGGER NECP_POLICY_RESULT_TRIGGER +#define NECP_KERNEL_POLICY_RESULT_TRIGGER_IF_NEEDED NECP_POLICY_RESULT_TRIGGER_IF_NEEDED +#define NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED NECP_POLICY_RESULT_TRIGGER_SCOPED +#define NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED NECP_POLICY_RESULT_NO_TRIGGER_SCOPED +#define NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED NECP_POLICY_RESULT_SOCKET_SCOPED + +typedef struct { + u_int32_t identifier; + u_int32_t data; +} necp_kernel_policy_service; + +typedef union { + u_int tunnel_interface_index; + u_int scoped_interface_index; + u_int32_t flow_divert_control_unit; + u_int32_t filter_control_unit; + u_int32_t skip_policy_order; + necp_kernel_policy_service service; +} necp_kernel_policy_result_parameter; + +union necp_sockaddr_union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +}; + +struct necp_kernel_socket_policy { + LIST_ENTRY(necp_kernel_socket_policy) chain; + necp_policy_id parent_policy_id; + necp_kernel_policy_id id; + necp_policy_order order; + u_int32_t session_order; + + u_int32_t condition_mask; + u_int32_t condition_negated_mask; + necp_kernel_policy_id cond_policy_id; + u_int32_t cond_app_id; // Locally assigned ID value stored + u_int32_t cond_real_app_id; // Locally assigned ID value stored + u_int32_t cond_account_id; // Locally assigned ID value stored + char *cond_domain; // String + u_int8_t cond_domain_dot_count; // Number of dots in cond_domain + pid_t cond_pid; + uid_t cond_uid; + ifnet_t cond_bound_interface; // Matches specific binding only + struct necp_policy_condition_tc_range cond_traffic_class; // Matches traffic class in range + u_int16_t cond_protocol; // Matches IP protcol number + union necp_sockaddr_union cond_local_start; // Matches local IP address (or start) + union necp_sockaddr_union cond_local_end; // Matches IP address range + u_int8_t cond_local_prefix; // Defines subnet + union necp_sockaddr_union cond_remote_start; // Matches remote IP address (or start) + union necp_sockaddr_union cond_remote_end; // Matches IP address range + u_int8_t cond_remote_prefix; // Defines subnet + + necp_kernel_policy_result result; + necp_kernel_policy_result_parameter result_parameter; +}; + +struct necp_kernel_ip_output_policy { + LIST_ENTRY(necp_kernel_ip_output_policy) chain; + necp_policy_id parent_policy_id; + necp_kernel_policy_id id; + necp_policy_order suborder; + necp_policy_order order; + u_int32_t session_order; + + u_int32_t condition_mask; + u_int32_t condition_negated_mask; + necp_kernel_policy_id cond_policy_id; + ifnet_t cond_bound_interface; // Matches specific binding only + u_int16_t cond_protocol; // Matches IP protcol number + union necp_sockaddr_union cond_local_start; // Matches local IP address (or start) + union necp_sockaddr_union cond_local_end; // Matches IP address range + u_int8_t cond_local_prefix; // Defines subnet + union necp_sockaddr_union cond_remote_start; // Matches remote IP address (or start) + union necp_sockaddr_union cond_remote_end; // Matches IP address range + u_int8_t cond_remote_prefix; // Defines subnet + u_int32_t cond_last_interface_index; + + necp_kernel_policy_result result; + necp_kernel_policy_result_parameter result_parameter; +}; + +#define MAX_KERNEL_SOCKET_POLICIES 1 +#define MAX_KERNEL_IP_OUTPUT_POLICIES 4 +struct necp_session_policy { + LIST_ENTRY(necp_session_policy) chain; + bool applied; // Applied into the kernel table + bool pending_deletion; // Waiting to be removed from kernel table + bool pending_update; // Policy has been modified since creation/last application + necp_policy_id id; + necp_policy_order order; + u_int8_t *result; + size_t result_size; + u_int8_t *conditions; // Array of conditions, each with a size_t length at start + size_t conditions_size; + + uuid_t applied_app_uuid; + uuid_t applied_real_app_uuid; + char *applied_domain; + char *applied_account; + + uuid_t applied_service_uuid; + + necp_kernel_policy_id kernel_socket_policies[MAX_KERNEL_SOCKET_POLICIES]; + necp_kernel_policy_id kernel_ip_output_policies[MAX_KERNEL_IP_OUTPUT_POLICIES]; +}; + +struct necp_aggregate_socket_result { + necp_kernel_policy_result result; + necp_kernel_policy_result_parameter result_parameter; + necp_kernel_policy_filter filter_control_unit; +}; + +struct necp_inpcb_result { + char *application_layer_domain; + u_int32_t application_layer_account_id; + necp_kernel_policy_id policy_id; + int32_t policy_gencount; + u_int32_t flowhash; + struct necp_aggregate_socket_result results; +}; + +errno_t necp_init(void); + +errno_t necp_set_socket_attributes(struct socket *so, struct sockopt *sopt); +errno_t necp_get_socket_attributes(struct socket *so, struct sockopt *sopt); + +u_int32_t necp_socket_get_content_filter_control_unit(struct socket *so); + +bool necp_socket_should_use_flow_divert(struct inpcb *inp); +u_int32_t necp_socket_get_flow_divert_control_unit(struct inpcb *inp); + +bool necp_socket_should_rescope(struct inpcb *inp); +u_int necp_socket_get_rescope_if_index(struct inpcb *inp); + +bool necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id); +bool necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id); +bool necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id); +int necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id); +necp_kernel_policy_id necp_get_policy_id_from_packet(struct mbuf *packet); +u_int32_t necp_get_last_interface_index_from_packet(struct mbuf *packet); + +necp_kernel_policy_id necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface); +necp_kernel_policy_id necp_ip_output_find_policy_match(struct mbuf *packet, int flags, struct ip_out_args *ipoa, necp_kernel_policy_result *result, necp_kernel_policy_result_parameter *result_parameter); +necp_kernel_policy_id necp_ip6_output_find_policy_match(struct mbuf *packet, int flags, struct ip6_out_args *ip6oa, necp_kernel_policy_result *result, necp_kernel_policy_result_parameter *result_parameter); + +int necp_mark_packet_from_ip(struct mbuf *packet, necp_kernel_policy_id policy_id); +int necp_mark_packet_from_interface(struct mbuf *packet, ifnet_t interface); + +ifnet_t necp_get_ifnet_from_result_parameter(necp_kernel_policy_result_parameter *result_parameter); +bool necp_packet_can_rebind_to_ifnet(struct mbuf *packet, struct ifnet *interface, struct route *new_route, int family); + +int necp_mark_packet_as_keepalive(struct mbuf *packet, bool is_keepalive); +bool necp_get_is_keepalive_from_packet(struct mbuf *packet); + +#endif /* BSD_KERNEL_PRIVATE */ +#ifndef KERNEL +int necp_match_policy(const uint8_t *parameters, size_t parameters_size, struct necp_aggregate_result *returned_result); +#endif /* !KERNEL */ + +#endif diff --git a/bsd/net/net_stubs.c b/bsd/net/net_stubs.c index 0fe96fe37..54211edde 100644 --- a/bsd/net/net_stubs.c +++ b/bsd/net/net_stubs.c @@ -1,2692 +1,441 @@ +/* + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + #include #if !NETWORKING -int bpf_attach(void); -int bpf_attach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int bpf_tap_in(void); -int bpf_tap_in(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int bpf_tap_out(void); -int bpf_tap_out(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int bpfattach(void); -int bpfattach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ctl_deregister(void); -int ctl_deregister(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ctl_enqueuedata(void); -int ctl_enqueuedata(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ctl_enqueuembuf(void); -int ctl_enqueuembuf(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ctl_getenqueuespace(void); -int ctl_getenqueuespace(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ctl_register(void); -int ctl_register(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ether_add_proto(void); -int ether_add_proto(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ether_check_multi(void); -int ether_check_multi(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ether_del_proto(void); -int ether_del_proto(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ether_demux(void); -int ether_demux(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ether_frameout(void); -int ether_frameout(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ether_ioctl(void); -int ether_ioctl(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_advlock(void); -int fifo_advlock(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_close(void); -int fifo_close(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_inactive(void); -int fifo_inactive(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_ioctl(void); -int fifo_ioctl(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_lookup(void); -int fifo_lookup(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_open(void); -int fifo_open(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_pathconf(void); -int fifo_pathconf(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_read(void); -int fifo_read(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_select(void); -int fifo_select(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int fifo_write(void); -int fifo_write(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_address(void); -int ifaddr_address(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_address_family(void); -int ifaddr_address_family(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_dstaddress(void); -int ifaddr_dstaddress(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_findbestforaddr(void); -int ifaddr_findbestforaddr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_ifnet(void); -int ifaddr_ifnet(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_netmask(void); -int ifaddr_netmask(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_reference(void); -int ifaddr_reference(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_release(void); -int ifaddr_release(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_withaddr(void); -int ifaddr_withaddr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_withdstaddr(void); -int ifaddr_withdstaddr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_withnet(void); -int ifaddr_withnet(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifaddr_withroute(void); -int ifaddr_withroute(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int iflt_attach(void); -int iflt_attach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int iflt_detach(void); -int iflt_detach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifmaddr_address(void); -int ifmaddr_address(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifmaddr_ifnet(void); -int ifmaddr_ifnet(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifmaddr_lladdress(void); -int ifmaddr_lladdress(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifmaddr_reference(void); -int ifmaddr_reference(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifmaddr_release(void); -int ifmaddr_release(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_add_multicast(void); -int ifnet_add_multicast(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_addrlen(void); -int ifnet_addrlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_allocate(void); -int ifnet_allocate(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_attach(void); -int ifnet_attach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_attach_protocol(void); -int ifnet_attach_protocol(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_baudrate(void); -int ifnet_baudrate(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_capabilities_enabled(void); -int ifnet_capabilities_enabled(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_capabilities_supported(void); -int ifnet_capabilities_supported(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_detach(void); -int ifnet_detach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_detach_protocol(void); -int ifnet_detach_protocol(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_eflags(void); -int ifnet_eflags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_event(void); -int ifnet_event(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_family(void); -int ifnet_family(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_subfamily(void); -int ifnet_subfamily(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_find_by_name(void); -int ifnet_find_by_name(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_flags(void); -int ifnet_flags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_free_address_list(void); -int ifnet_free_address_list(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_free_multicast_list(void); -int ifnet_free_multicast_list(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_address_list(void); -int ifnet_get_address_list(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_address_list_family(void); -int ifnet_get_address_list_family(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_link_mib_data(void); -int ifnet_get_link_mib_data(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_link_mib_data_length(void); -int ifnet_get_link_mib_data_length(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_multicast_list(void); -int ifnet_get_multicast_list(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_service_class_sndq_len(void); -int ifnet_get_service_class_sndq_len(void) +#define STUB(name) \ + int name(void); \ + int name(void) \ + { \ + panic("stub called in a config with no networking"); \ + return 0; \ + } + +STUB(bpf_attach); +STUB(bpf_tap_in); +STUB(bpf_tap_out); +STUB(bpfattach); +STUB(ctl_deregister); +STUB(ctl_enqueuedata); +STUB(ctl_enqueuembuf); +STUB(ctl_enqueuembuf_list); +STUB(ctl_getenqueuespace); +STUB(ctl_register); +STUB(ether_add_proto); +STUB(ether_check_multi); +STUB(ether_del_proto); +STUB(ether_demux); +STUB(ether_frameout); +STUB(ether_ioctl); +STUB(fifo_advlock); +STUB(fifo_close); +STUB(fifo_inactive); +STUB(fifo_ioctl); +STUB(fifo_lookup); +STUB(fifo_open); +STUB(fifo_pathconf); +STUB(fifo_read); +STUB(fifo_select); +STUB(fifo_write); +STUB(ifaddr_address); +STUB(ifaddr_address_family); +STUB(ifaddr_dstaddress); +STUB(ifaddr_findbestforaddr); +STUB(ifaddr_ifnet); +STUB(ifaddr_netmask); +STUB(ifaddr_reference); +STUB(ifaddr_release); +STUB(ifaddr_withaddr); +STUB(ifaddr_withdstaddr); +STUB(ifaddr_withnet); +STUB(ifaddr_withroute); +STUB(iflt_attach); +STUB(iflt_detach); +STUB(ifmaddr_address); +STUB(ifmaddr_ifnet); +STUB(ifmaddr_lladdress); +STUB(ifmaddr_reference); +STUB(ifmaddr_release); +STUB(ifnet_add_multicast); +STUB(ifnet_addrlen); +STUB(ifnet_allocate); +STUB(ifnet_attach); +STUB(ifnet_attach_protocol); +STUB(ifnet_baudrate); +STUB(ifnet_capabilities_enabled); +STUB(ifnet_capabilities_supported); +STUB(ifnet_detach); +STUB(ifnet_detach_protocol); +STUB(ifnet_eflags); +STUB(ifnet_event); +STUB(ifnet_family); +STUB(ifnet_subfamily); +STUB(ifnet_find_by_name); +STUB(ifnet_flags); +STUB(ifnet_free_address_list); +STUB(ifnet_free_multicast_list); +STUB(ifnet_get_address_list); +STUB(ifnet_get_address_list_family); +STUB(ifnet_get_link_mib_data); +STUB(ifnet_get_link_mib_data_length); +STUB(ifnet_get_multicast_list); +STUB(ifnet_get_service_class_sndq_len); +STUB(ifnet_get_tso_mtu); +STUB(ifnet_get_wake_flags); +STUB(ifnet_hdrlen); +STUB(ifnet_index); +STUB(ifnet_input); +STUB(ifnet_interface_family_find); +STUB(ifnet_ioctl); +STUB(ifnet_lastchange); +STUB(ifnet_list_free); +STUB(ifnet_list_get); +STUB(ifnet_lladdr); +STUB(ifnet_lladdr_copy_bytes); +STUB(ifnet_llbroadcast_copy_bytes); +STUB(ifnet_metric); +STUB(ifnet_mtu); +STUB(ifnet_name); +STUB(ifnet_offload); +STUB(ifnet_output); +STUB(ifnet_output_raw); +STUB(ifnet_reference); +STUB(ifnet_release); +STUB(ifnet_remove_multicast); +STUB(ifnet_resolve_multicast); +STUB(ifnet_set_addrlen); +STUB(ifnet_set_baudrate); +STUB(ifnet_set_capabilities_enabled); +STUB(ifnet_set_capabilities_supported); +STUB(ifnet_set_delegate); +STUB(ifnet_set_eflags); +STUB(ifnet_set_flags); +STUB(ifnet_set_hdrlen); +STUB(ifnet_set_link_mib_data); +STUB(ifnet_set_lladdr); +STUB(ifnet_set_metric); +STUB(ifnet_set_mtu); +STUB(ifnet_set_offload); +STUB(ifnet_set_promiscuous); +STUB(ifnet_set_stat); +STUB(ifnet_set_tso_mtu); +STUB(ifnet_set_wake_flags); +STUB(ifnet_softc); +STUB(ifnet_stat); +STUB(ifnet_stat_increment); +STUB(ifnet_stat_increment_in); +STUB(ifnet_stat_increment_out); +STUB(ifnet_touch_lastchange); +STUB(ifnet_type); +STUB(ifnet_unit); +STUB(in_cksum); +STUB(inet_arp_handle_input); +STUB(inet_arp_init_ifaddr); +STUB(inet_arp_lookup); +STUB(ipf_addv4); +STUB(ipf_addv6); +STUB(ipf_inject_input); +STUB(ipf_inject_output); +STUB(ipf_remove); +STUB(kev_msg_post); +STUB(kev_vendor_code_find); +STUB(mbuf_adj); +STUB(mbuf_adjustlen); +STUB(mbuf_align_32); +STUB(mbuf_alloccluster); +STUB(mbuf_allocpacket); +STUB(mbuf_allocpacket_list); +STUB(mbuf_attachcluster); +STUB(mbuf_clear_csum_performed); +STUB(mbuf_clear_csum_requested); +STUB(mbuf_clear_vlan_tag); +STUB(mbuf_concatenate); +STUB(mbuf_copy_pkthdr); +STUB(mbuf_copyback); +STUB(mbuf_copydata); +STUB(mbuf_copym); +STUB(mbuf_data); +STUB(mbuf_data_to_physical); +STUB(mbuf_datastart); +STUB(mbuf_dup); +STUB(mbuf_flags); +STUB(mbuf_free); +STUB(mbuf_freecluster); +STUB(mbuf_freem); +STUB(mbuf_freem_list); +STUB(mbuf_get); +STUB(mbuf_get_csum_performed); +STUB(mbuf_get_csum_requested); +STUB(mbuf_get_mhlen); +STUB(mbuf_get_minclsize); +STUB(mbuf_get_mlen); +STUB(mbuf_get_traffic_class); +STUB(mbuf_get_tso_requested); +STUB(mbuf_get_vlan_tag); +STUB(mbuf_getcluster); +STUB(mbuf_gethdr); +STUB(mbuf_getpacket); +STUB(mbuf_inbound_modified); +STUB(mbuf_inet_cksum); +STUB(mbuf_is_traffic_class_privileged); +STUB(mbuf_leadingspace); +STUB(mbuf_len); +STUB(mbuf_maxlen); +STUB(mbuf_mclget); +STUB(mbuf_mclhasreference); +STUB(mbuf_next); +STUB(mbuf_nextpkt); +STUB(mbuf_outbound_finalize); +STUB(mbuf_pkthdr_adjustlen); +STUB(mbuf_pkthdr_header); +STUB(mbuf_pkthdr_len); +STUB(mbuf_pkthdr_rcvif); +STUB(mbuf_pkthdr_setheader); +STUB(mbuf_pkthdr_setlen); +STUB(mbuf_pkthdr_setrcvif); +STUB(mbuf_prepend); +STUB(mbuf_pulldown); +STUB(mbuf_pullup); +STUB(mbuf_set_csum_performed); +STUB(mbuf_set_csum_requested); +STUB(mbuf_set_traffic_class); +STUB(mbuf_set_vlan_tag); +STUB(mbuf_setdata); +STUB(mbuf_setflags); +STUB(mbuf_setflags_mask); +STUB(mbuf_setlen); +STUB(mbuf_setnext); +STUB(mbuf_setnextpkt); +STUB(mbuf_settype); +STUB(mbuf_split); +STUB(mbuf_stats); +STUB(mbuf_tag_allocate); +STUB(mbuf_tag_find); +STUB(mbuf_tag_free); +STUB(mbuf_tag_id_find); +STUB(mbuf_add_drvaux); +STUB(mbuf_find_drvaux); +STUB(mbuf_del_drvaux); +STUB(mbuf_trailingspace); +STUB(mbuf_type); +STUB(net_init_add); +STUB(proto_inject); +STUB(proto_input); +STUB(proto_register_plumber); +STUB(proto_unregister_plumber); +STUB(sflt_attach); +STUB(sflt_detach); +STUB(sflt_register); +STUB(sflt_unregister); +STUB(sock_accept); +STUB(sock_bind); +STUB(sock_close); +STUB(sock_connect); +STUB(sock_connectwait); +STUB(sock_getpeername); +STUB(sock_getsockname); +STUB(sock_getsockopt); +STUB(sock_gettype); +STUB(sock_inject_data_in); +STUB(sock_inject_data_out); +STUB(sock_ioctl); +STUB(sock_isconnected); +STUB(sock_isnonblocking); +STUB(sock_listen); +STUB(sock_nointerrupt); +STUB(sock_receive); +STUB(sock_receivembuf); +STUB(sock_send); +STUB(sock_sendmbuf); +STUB(sock_setpriv); +STUB(sock_setsockopt); +STUB(sock_shutdown); +STUB(sock_socket); +STUB(sockopt_copyin); +STUB(sockopt_copyout); +STUB(sockopt_direction); +STUB(sockopt_level); +STUB(sockopt_name); +STUB(sockopt_valsize); +STUB(kev_post_msg); +STUB(ctl_id_by_name); +STUB(ctl_name_by_id); +STUB(ifnet_allocate_extended); +STUB(ifnet_bandwidths); +STUB(ifnet_clone_attach); +STUB(ifnet_clone_detach); +STUB(ifnet_dequeue); +STUB(ifnet_dequeue_multi); +STUB(ifnet_dequeue_service_class); +STUB(ifnet_dequeue_service_class_multi); +STUB(ifnet_enqueue); +STUB(ifnet_get_delegate); +STUB(ifnet_get_inuse_address_list); +STUB(ifnet_get_local_ports); +STUB(ifnet_get_local_ports_extended); +STUB(ifnet_get_rcvq_maxlen); +STUB(ifnet_get_sndq_len); +STUB(ifnet_get_sndq_maxlen); +STUB(ifnet_idle_flags); +STUB(ifnet_inet6_defrouter_llreachinfo); +STUB(ifnet_inet_defrouter_llreachinfo); +STUB(ifnet_input_extended); +STUB(ifnet_latencies); +STUB(ifnet_link_quality); +STUB(ifnet_notice_master_elected); +STUB(ifnet_notice_node_absence); +STUB(ifnet_notice_node_presence); +STUB(ifnet_poll_params); +STUB(ifnet_purge); +STUB(ifnet_report_issues); +STUB(ifnet_set_bandwidths); +STUB(ifnet_set_idle_flags); +STUB(ifnet_set_latencies); +STUB(ifnet_set_link_quality); +STUB(ifnet_set_output_sched_model); +STUB(ifnet_set_poll_params); +STUB(ifnet_set_rcvq_maxlen); +STUB(ifnet_set_sndq_maxlen); +STUB(ifnet_start); +STUB(ifnet_transmit_burst_end); +STUB(ifnet_transmit_burst_start); +STUB(ifnet_tx_compl_status); +STUB(ifnet_flowid); +STUB(ifnet_enable_output); +STUB(ifnet_disable_output); +STUB(ifnet_get_ipsec_offload_frames); +STUB(in6_localaddr); +STUB(in_localaddr); +STUB(in6addr_local); +STUB(inaddr_local); +STUB(inp_clear_INP_INADDR_ANY); +STUB(ip_gre_output); +STUB(m_cat); +STUB(m_free); +STUB(m_freem); +STUB(m_get); +STUB(m_gethdr); +STUB(m_mtod); +STUB(m_prepend_2); +STUB(m_pullup); +STUB(m_split); +STUB(m_trailingspace); +STUB(mbuf_get_driver_scratch); +STUB(mbuf_get_priority); +STUB(mbuf_get_service_class); +STUB(mbuf_get_service_class_index); +STUB(mbuf_get_service_class_max_count); +STUB(mbuf_get_traffic_class_index); +STUB(mbuf_get_traffic_class_max_count); +STUB(mbuf_is_service_class_privileged); +STUB(mbuf_pkthdr_aux_flags); +STUB(mcl_to_paddr); +STUB(net_add_domain); +STUB(net_add_domain_old); +STUB(net_add_proto); +STUB(net_add_proto_old); +STUB(net_del_domain); +STUB(net_del_domain_old); +STUB(net_del_proto); +STUB(net_del_proto_old); +STUB(pffinddomain); +STUB(pffinddomain_old); +STUB(pffindproto); +STUB(pffindproto_old); +STUB(pru_abort_notsupp); +STUB(pru_accept_notsupp); +STUB(pru_bind_notsupp); +STUB(pru_connect2_notsupp); +STUB(pru_connect_notsupp); +STUB(pru_disconnect_notsupp); +STUB(pru_listen_notsupp); +STUB(pru_peeraddr_notsupp); +STUB(pru_rcvd_notsupp); +STUB(pru_rcvoob_notsupp); +STUB(pru_send_notsupp); +STUB(pru_send_list_notsupp); +STUB(pru_sense_null); +STUB(pru_shutdown_notsupp); +STUB(pru_sockaddr_notsupp); +STUB(pru_sopoll_notsupp); +STUB(sbappendaddr); +STUB(sbappendrecord); +STUB(sbflush); +STUB(sbspace); +STUB(soabort); +STUB(socantrcvmore); +STUB(socantsendmore); +STUB(sock_getlistener); +STUB(sock_gettclassopt); +STUB(sock_release); +STUB(sock_retain); +STUB(sock_settclassopt); +STUB(sock_catchevents); +STUB(sock_setupcall); +STUB(sock_setupcalls); +STUB(sodisconnect); +STUB(sofree); +STUB(sofreelastref); +STUB(soisconnected); +STUB(soisconnecting); +STUB(soisdisconnected); +STUB(soisdisconnecting); +STUB(sonewconn); +STUB(sooptcopyin); +STUB(sooptcopyout); +STUB(sopoll); +STUB(soreceive); +STUB(soreceive_list); +STUB(soreserve); +STUB(sorwakeup); +STUB(sosend); +STUB(sosend_list); +STUB(utun_ctl_disable_crypto_dtls); +STUB(utun_ctl_register_dtls); +STUB(utun_pkt_dtls_input); +STUB(dlil_resolve_multi); +STUB(inet_cksum_simple); +STUB(arp_ip_handle_input); +STUB(arp_ifinit); +STUB(arp_lookup_ip); +STUB(ip_gre_register_input); +STUB(sock_iskernel); +#undef STUB + +/* + * Called from vm_pageout.c. Nothing to be done when there's no networking. + */ +void m_drain(void); +void m_drain(void) { - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_tso_mtu(void); -int ifnet_get_tso_mtu(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_wake_flags(void); -int ifnet_get_wake_flags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_hdrlen(void); -int ifnet_hdrlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_index(void); -int ifnet_index(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_input(void); -int ifnet_input(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_interface_family_find(void); -int ifnet_interface_family_find(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_ioctl(void); -int ifnet_ioctl(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_lastchange(void); -int ifnet_lastchange(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_list_free(void); -int ifnet_list_free(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_list_get(void); -int ifnet_list_get(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_lladdr(void); -int ifnet_lladdr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_lladdr_copy_bytes(void); -int ifnet_lladdr_copy_bytes(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_llbroadcast_copy_bytes(void); -int ifnet_llbroadcast_copy_bytes(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_metric(void); -int ifnet_metric(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_mtu(void); -int ifnet_mtu(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_name(void); -int ifnet_name(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_offload(void); -int ifnet_offload(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_output(void); -int ifnet_output(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_output_raw(void); -int ifnet_output_raw(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_reference(void); -int ifnet_reference(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_release(void); -int ifnet_release(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_remove_multicast(void); -int ifnet_remove_multicast(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_resolve_multicast(void); -int ifnet_resolve_multicast(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_addrlen(void); -int ifnet_set_addrlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_baudrate(void); -int ifnet_set_baudrate(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_capabilities_enabled(void); -int ifnet_set_capabilities_enabled(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_capabilities_supported(void); -int ifnet_set_capabilities_supported(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_delegate(void); -int ifnet_set_delegate(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_eflags(void); -int ifnet_set_eflags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_flags(void); -int ifnet_set_flags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_hdrlen(void); -int ifnet_set_hdrlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_link_mib_data(void); -int ifnet_set_link_mib_data(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_lladdr(void); -int ifnet_set_lladdr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_metric(void); -int ifnet_set_metric(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_mtu(void); -int ifnet_set_mtu(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_offload(void); -int ifnet_set_offload(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_promiscuous(void); -int ifnet_set_promiscuous(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_stat(void); -int ifnet_set_stat(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_tso_mtu(void); -int ifnet_set_tso_mtu(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_wake_flags(void); -int ifnet_set_wake_flags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_softc(void); -int ifnet_softc(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_stat(void); -int ifnet_stat(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_stat_increment(void); -int ifnet_stat_increment(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_stat_increment_in(void); -int ifnet_stat_increment_in(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_stat_increment_out(void); -int ifnet_stat_increment_out(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_touch_lastchange(void); -int ifnet_touch_lastchange(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_type(void); -int ifnet_type(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_unit(void); -int ifnet_unit(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int in_cksum(void); -int in_cksum(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int inet_arp_handle_input(void); -int inet_arp_handle_input(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int inet_arp_init_ifaddr(void); -int inet_arp_init_ifaddr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int inet_arp_lookup(void); -int inet_arp_lookup(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ipf_addv4(void); -int ipf_addv4(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ipf_addv6(void); -int ipf_addv6(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ipf_inject_input(void); -int ipf_inject_input(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ipf_inject_output(void); -int ipf_inject_output(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ipf_remove(void); -int ipf_remove(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int kev_msg_post(void); -int kev_msg_post(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int kev_vendor_code_find(void); -int kev_vendor_code_find(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_adj(void); -int mbuf_adj(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_adjustlen(void); -int mbuf_adjustlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_align_32(void); -int mbuf_align_32(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_alloccluster(void); -int mbuf_alloccluster(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_allocpacket(void); -int mbuf_allocpacket(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_allocpacket_list(void); -int mbuf_allocpacket_list(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_attachcluster(void); -int mbuf_attachcluster(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_clear_csum_performed(void); -int mbuf_clear_csum_performed(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_clear_csum_requested(void); -int mbuf_clear_csum_requested(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_clear_vlan_tag(void); -int mbuf_clear_vlan_tag(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_concatenate(void); -int mbuf_concatenate(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_copy_pkthdr(void); -int mbuf_copy_pkthdr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_copyback(void); -int mbuf_copyback(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_copydata(void); -int mbuf_copydata(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_copym(void); -int mbuf_copym(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_data(void); -int mbuf_data(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_data_to_physical(void); -int mbuf_data_to_physical(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_datastart(void); -int mbuf_datastart(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_dup(void); -int mbuf_dup(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_flags(void); -int mbuf_flags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_free(void); -int mbuf_free(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_freecluster(void); -int mbuf_freecluster(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_freem(void); -int mbuf_freem(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_freem_list(void); -int mbuf_freem_list(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get(void); -int mbuf_get(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_csum_performed(void); -int mbuf_get_csum_performed(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_csum_requested(void); -int mbuf_get_csum_requested(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_mhlen(void); -int mbuf_get_mhlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_minclsize(void); -int mbuf_get_minclsize(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_mlen(void); -int mbuf_get_mlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_traffic_class(void); -int mbuf_get_traffic_class(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_tso_requested(void); -int mbuf_get_tso_requested(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_vlan_tag(void); -int mbuf_get_vlan_tag(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_getcluster(void); -int mbuf_getcluster(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_gethdr(void); -int mbuf_gethdr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_getpacket(void); -int mbuf_getpacket(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_inbound_modified(void); -int mbuf_inbound_modified(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_inet_cksum(void); -int mbuf_inet_cksum(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_is_traffic_class_privileged(void); -int mbuf_is_traffic_class_privileged(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_leadingspace(void); -int mbuf_leadingspace(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_len(void); -int mbuf_len(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_maxlen(void); -int mbuf_maxlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_mclget(void); -int mbuf_mclget(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_mclhasreference(void); -int mbuf_mclhasreference(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_next(void); -int mbuf_next(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_nextpkt(void); -int mbuf_nextpkt(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_outbound_finalize(void); -int mbuf_outbound_finalize(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pkthdr_adjustlen(void); -int mbuf_pkthdr_adjustlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pkthdr_header(void); -int mbuf_pkthdr_header(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pkthdr_len(void); -int mbuf_pkthdr_len(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pkthdr_rcvif(void); -int mbuf_pkthdr_rcvif(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pkthdr_setheader(void); -int mbuf_pkthdr_setheader(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pkthdr_setlen(void); -int mbuf_pkthdr_setlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pkthdr_setrcvif(void); -int mbuf_pkthdr_setrcvif(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_prepend(void); -int mbuf_prepend(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pulldown(void); -int mbuf_pulldown(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pullup(void); -int mbuf_pullup(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_set_csum_performed(void); -int mbuf_set_csum_performed(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_set_csum_requested(void); -int mbuf_set_csum_requested(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_set_traffic_class(void); -int mbuf_set_traffic_class(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_set_vlan_tag(void); -int mbuf_set_vlan_tag(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_setdata(void); -int mbuf_setdata(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_setflags(void); -int mbuf_setflags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_setflags_mask(void); -int mbuf_setflags_mask(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_setlen(void); -int mbuf_setlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_setnext(void); -int mbuf_setnext(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_setnextpkt(void); -int mbuf_setnextpkt(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_settype(void); -int mbuf_settype(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_split(void); -int mbuf_split(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_stats(void); -int mbuf_stats(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_tag_allocate(void); -int mbuf_tag_allocate(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_tag_find(void); -int mbuf_tag_find(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_tag_free(void); -int mbuf_tag_free(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_tag_id_find(void); -int mbuf_tag_id_find(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_add_drvaux(void); -int mbuf_add_drvaux(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_find_drvaux(void); -int mbuf_find_drvaux(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_del_drvaux(void); -int mbuf_del_drvaux(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_trailingspace(void); -int mbuf_trailingspace(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_type(void); -int mbuf_type(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int net_init_add(void); -int net_init_add(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int proto_inject(void); -int proto_inject(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int proto_input(void); -int proto_input(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int proto_register_plumber(void); -int proto_register_plumber(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int proto_unregister_plumber(void); -int proto_unregister_plumber(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sflt_attach(void); -int sflt_attach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sflt_detach(void); -int sflt_detach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sflt_register(void); -int sflt_register(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sflt_unregister(void); -int sflt_unregister(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_accept(void); -int sock_accept(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_bind(void); -int sock_bind(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_close(void); -int sock_close(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_connect(void); -int sock_connect(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_connectwait(void); -int sock_connectwait(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_getpeername(void); -int sock_getpeername(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_getsockname(void); -int sock_getsockname(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_getsockopt(void); -int sock_getsockopt(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_gettype(void); -int sock_gettype(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_inject_data_in(void); -int sock_inject_data_in(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_inject_data_out(void); -int sock_inject_data_out(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_ioctl(void); -int sock_ioctl(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_isconnected(void); -int sock_isconnected(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_isnonblocking(void); -int sock_isnonblocking(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_listen(void); -int sock_listen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_nointerrupt(void); -int sock_nointerrupt(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_receive(void); -int sock_receive(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_receivembuf(void); -int sock_receivembuf(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_send(void); -int sock_send(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_sendmbuf(void); -int sock_sendmbuf(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_setpriv(void); -int sock_setpriv(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_setsockopt(void); -int sock_setsockopt(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_shutdown(void); -int sock_shutdown(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_socket(void); -int sock_socket(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sockopt_copyin(void); -int sockopt_copyin(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sockopt_copyout(void); -int sockopt_copyout(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sockopt_direction(void); -int sockopt_direction(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sockopt_level(void); -int sockopt_level(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sockopt_name(void); -int sockopt_name(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sockopt_valsize(void); -int sockopt_valsize(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int kev_post_msg(void); -int kev_post_msg(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ctl_id_by_name(void); -int ctl_id_by_name(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ctl_name_by_id(void); -int ctl_name_by_id(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_allocate_extended(void); -int ifnet_allocate_extended(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_bandwidths(void); -int ifnet_bandwidths(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_clone_attach(void); -int ifnet_clone_attach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_clone_detach(void); -int ifnet_clone_detach(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_dequeue(void); -int ifnet_dequeue(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_dequeue_multi(void); -int ifnet_dequeue_multi(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_dequeue_service_class(void); -int ifnet_dequeue_service_class(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_dequeue_service_class_multi(void); -int ifnet_dequeue_service_class_multi(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_enqueue(void); -int ifnet_enqueue(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_delegate(void); -int ifnet_get_delegate(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_inuse_address_list(void); -int ifnet_get_inuse_address_list(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_local_ports(void); -int ifnet_get_local_ports(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_local_ports_extended(void); -int ifnet_get_local_ports_extended(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_rcvq_maxlen(void); -int ifnet_get_rcvq_maxlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_sndq_len(void); -int ifnet_get_sndq_len(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_get_sndq_maxlen(void); -int ifnet_get_sndq_maxlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_idle_flags(void); -int ifnet_idle_flags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_inet6_defrouter_llreachinfo(void); -int ifnet_inet6_defrouter_llreachinfo(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_inet_defrouter_llreachinfo(void); -int ifnet_inet_defrouter_llreachinfo(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_input_extended(void); -int ifnet_input_extended(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_latencies(void); -int ifnet_latencies(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_link_quality(void); -int ifnet_link_quality(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_notice_master_elected(void); -int ifnet_notice_master_elected(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_notice_node_absence(void); -int ifnet_notice_node_absence(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_notice_node_presence(void); -int ifnet_notice_node_presence(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_poll_params(void); -int ifnet_poll_params(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_purge(void); -int ifnet_purge(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_report_issues(void); -int ifnet_report_issues(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_bandwidths(void); -int ifnet_set_bandwidths(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_idle_flags(void); -int ifnet_set_idle_flags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_latencies(void); -int ifnet_set_latencies(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_link_quality(void); -int ifnet_set_link_quality(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_output_sched_model(void); -int ifnet_set_output_sched_model(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_poll_params(void); -int ifnet_set_poll_params(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_rcvq_maxlen(void); -int ifnet_set_rcvq_maxlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_set_sndq_maxlen(void); -int ifnet_set_sndq_maxlen(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_start(void); -int ifnet_start(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_transmit_burst_end(void); -int ifnet_transmit_burst_end(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_transmit_burst_start(void); -int ifnet_transmit_burst_start(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_tx_compl_status(void); -int ifnet_tx_compl_status(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_flowid(void); -int ifnet_flowid(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_enable_output(void); -int ifnet_enable_output(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ifnet_disable_output(void); -int ifnet_disable_output(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int in6_localaddr(void); -int in6_localaddr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int in_localaddr(void); -int in_localaddr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int in6addr_local(void); -int in6addr_local(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int inaddr_local(void); -int inaddr_local(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int inp_clear_INP_INADDR_ANY(void); -int inp_clear_INP_INADDR_ANY(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ip_gre_output(void); -int ip_gre_output(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_cat(void); -int m_cat(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_free(void); -int m_free(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_freem(void); -int m_freem(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_get(void); -int m_get(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_gethdr(void); -int m_gethdr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_mtod(void); -int m_mtod(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_prepend_2(void); -int m_prepend_2(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_pullup(void); -int m_pullup(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_split(void); -int m_split(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int m_trailingspace(void); -int m_trailingspace(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_driver_scratch(void); -int mbuf_get_driver_scratch(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_priority(void); -int mbuf_get_priority(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_service_class(void); -int mbuf_get_service_class(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_service_class_index(void); -int mbuf_get_service_class_index(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_service_class_max_count(void); -int mbuf_get_service_class_max_count(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_traffic_class_index(void); -int mbuf_get_traffic_class_index(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_get_traffic_class_max_count(void); -int mbuf_get_traffic_class_max_count(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_is_service_class_privileged(void); -int mbuf_is_service_class_privileged(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mbuf_pkthdr_aux_flags(void); -int mbuf_pkthdr_aux_flags(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int mcl_to_paddr(void); -int mcl_to_paddr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int net_add_domain(void); -int net_add_domain(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int net_add_domain_old(void); -int net_add_domain_old(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int net_add_proto(void); -int net_add_proto(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int net_add_proto_old(void); -int net_add_proto_old(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int net_del_domain(void); -int net_del_domain(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int net_del_domain_old(void); -int net_del_domain_old(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int net_del_proto(void); -int net_del_proto(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int net_del_proto_old(void); -int net_del_proto_old(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pffinddomain(void); -int pffinddomain(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pffinddomain_old(void); -int pffinddomain_old(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pffindproto(void); -int pffindproto(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pffindproto_old(void); -int pffindproto_old(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_abort_notsupp(void); -int pru_abort_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_accept_notsupp(void); -int pru_accept_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_bind_notsupp(void); -int pru_bind_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_connect2_notsupp(void); -int pru_connect2_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_connect_notsupp(void); -int pru_connect_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_disconnect_notsupp(void); -int pru_disconnect_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_listen_notsupp(void); -int pru_listen_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_peeraddr_notsupp(void); -int pru_peeraddr_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_rcvd_notsupp(void); -int pru_rcvd_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_rcvoob_notsupp(void); -int pru_rcvoob_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_send_notsupp(void); -int pru_send_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_sense_null(void); -int pru_sense_null(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_shutdown_notsupp(void); -int pru_shutdown_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_sockaddr_notsupp(void); -int pru_sockaddr_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int pru_sopoll_notsupp(void); -int pru_sopoll_notsupp(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sbappendaddr(void); -int sbappendaddr(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sbappendrecord(void); -int sbappendrecord(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sbflush(void); -int sbflush(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sbspace(void); -int sbspace(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int soabort(void); -int soabort(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int socantrcvmore(void); -int socantrcvmore(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int socantsendmore(void); -int socantsendmore(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_getlistener(void); -int sock_getlistener(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_gettclassopt(void); -int sock_gettclassopt(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_release(void); -int sock_release(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_retain(void); -int sock_retain(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_settclassopt(void); -int sock_settclassopt(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_catchevents(void); -int sock_catchevents(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_setupcall(void); -int sock_setupcall(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sock_setupcalls(void); -int sock_setupcalls(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sodisconnect(void); -int sodisconnect(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sofree(void); -int sofree(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sofreelastref(void); -int sofreelastref(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int soisconnected(void); -int soisconnected(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int soisconnecting(void); -int soisconnecting(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int soisdisconnected(void); -int soisdisconnected(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int soisdisconnecting(void); -int soisdisconnecting(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sonewconn(void); -int sonewconn(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sooptcopyin(void); -int sooptcopyin(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sooptcopyout(void); -int sooptcopyout(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sopoll(void); -int sopoll(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int soreceive(void); -int soreceive(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int soreserve(void); -int soreserve(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sorwakeup(void); -int sorwakeup(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int sosend(void); -int sosend(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - - - -int utun_ctl_disable_crypto_dtls(void); -int utun_ctl_disable_crypto_dtls(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int utun_ctl_register_dtls(void); -int utun_ctl_register_dtls(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int utun_pkt_dtls_input(void); -int utun_pkt_dtls_input(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - - -int dlil_resolve_multi(void); -int dlil_resolve_multi(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - - -int inet_cksum_simple(void); -int inet_cksum_simple(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - - -int arp_ip_handle_input(void); -int arp_ip_handle_input(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int arp_ifinit(void); -int arp_ifinit(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int arp_lookup_ip(void); -int arp_lookup_ip(void) -{ - panic("stub called in a config with no networking"); - return 0; -} - -int ip_gre_register_input(void); -int ip_gre_register_input(void) -{ - panic("stub called in a config with no networking"); - return 0; - + return; } -#endif +#endif /* !NETWORKING */ diff --git a/bsd/net/ntstat.c b/bsd/net/ntstat.c index 15ad48f34..49380d884 100644 --- a/bsd/net/ntstat.c +++ b/bsd/net/ntstat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013 Apple Inc. All rights reserved. + * Copyright (c) 2010-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -57,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +72,54 @@ static int nstat_privcheck = 0; SYSCTL_INT(_net, OID_AUTO, statistics_privcheck, CTLFLAG_RW | CTLFLAG_LOCKED, &nstat_privcheck, 0, "Entitlement check"); +SYSCTL_NODE(_net, OID_AUTO, stats, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "network statistics"); + +static int nstat_debug = 0; +SYSCTL_INT(_net_stats, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, + &nstat_debug, 0, ""); + +static int nstat_sendspace = 2048; +SYSCTL_INT(_net_stats, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED, + &nstat_sendspace, 0, ""); + +static int nstat_recvspace = 8192; +SYSCTL_INT(_net_stats, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED, + &nstat_recvspace, 0, ""); + +static int nstat_successmsgfailures = 0; +SYSCTL_INT(_net_stats, OID_AUTO, successmsgfailures, CTLFLAG_RD| CTLFLAG_LOCKED, + &nstat_successmsgfailures, 0, ""); + +static int nstat_sendountfailures = 0; +SYSCTL_INT(_net_stats, OID_AUTO, sendountfailures, CTLFLAG_RD| CTLFLAG_LOCKED, + &nstat_sendountfailures, 0, ""); + +static int nstat_sysinfofailures = 0; +SYSCTL_INT(_net_stats, OID_AUTO, sysinfofalures, CTLFLAG_RD| CTLFLAG_LOCKED, + &nstat_sysinfofailures, 0, ""); + +static int nstat_srccountfailures = 0; +SYSCTL_INT(_net_stats, OID_AUTO, srccountfailures, CTLFLAG_RD| CTLFLAG_LOCKED, + &nstat_srccountfailures, 0, ""); + +static int nstat_descriptionfailures = 0; +SYSCTL_INT(_net_stats, OID_AUTO, descriptionfailures, CTLFLAG_RD| CTLFLAG_LOCKED, + &nstat_descriptionfailures, 0, ""); + +static int nstat_msgremovedfailures = 0; +SYSCTL_INT(_net_stats, OID_AUTO, msgremovedfailures , CTLFLAG_RD| CTLFLAG_LOCKED, + &nstat_msgremovedfailures, 0, ""); + +static int nstat_srcaddedfailures = 0; +SYSCTL_INT(_net_stats, OID_AUTO, srcaddedfailures , CTLFLAG_RD| CTLFLAG_LOCKED, + &nstat_srcaddedfailures, 0, ""); + +static int nstat_msgerrorfailures = 0; +SYSCTL_INT(_net_stats, OID_AUTO, msgerrorfailures , CTLFLAG_RD| CTLFLAG_LOCKED, + &nstat_msgerrorfailures, 0, ""); + + enum { NSTAT_FLAG_CLEANUP = (1 << 0), @@ -125,11 +175,22 @@ static u_int32_t nstat_tcp_watchers = 0; static void nstat_control_register(void); +/* + * The lock order is as follows: + * + * socket_lock (inpcb) + * nstat_mtx + * state->mtx + */ static volatile OSMallocTag nstat_malloc_tag = NULL; static nstat_control_state *nstat_controls = NULL; static uint64_t nstat_idle_time = 0; static decl_lck_mtx_data(, nstat_mtx); +/* some extern definitions */ +extern void mbuf_report_peak_usage(void); +extern void tcp_report_stats(void); + static void nstat_copy_sa_out( const struct sockaddr *src, @@ -146,8 +207,8 @@ nstat_copy_sa_out( if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) { if (sin6->sin6_scope_id == 0) - sin6->sin6_scope_id = ntohs(sin6->sin6_addr.__u6_addr.__u6_addr16[1]); - sin6->sin6_addr.__u6_addr.__u6_addr16[1] = 0; + sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); + sin6->sin6_addr.s6_addr16[1] = 0; } } } @@ -184,8 +245,8 @@ nstat_ip6_to_sockaddr( sin6->sin6_addr = *ip6; if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) { - sin6->sin6_scope_id = ntohs(sin6->sin6_addr.__u6_addr.__u6_addr16[1]); - sin6->sin6_addr.__u6_addr.__u6_addr16[1] = 0; + sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); + sin6->sin6_addr.s6_addr16[1] = 0; } } @@ -230,6 +291,7 @@ static void nstat_init_route_provider(void); static void nstat_init_tcp_provider(void); static void nstat_init_udp_provider(void); static void nstat_init_ifnet_provider(void); +static void nstat_init_sysinfo_provider(void); __private_extern__ void nstat_init(void) @@ -249,6 +311,7 @@ nstat_init(void) nstat_init_tcp_provider(); nstat_init_udp_provider(); nstat_init_ifnet_provider(); + nstat_init_sysinfo_provider(); nstat_control_register(); } } @@ -330,8 +393,14 @@ nstat_route_lookup( { return EINVAL; } + if ((param->dst.v4.sin_family == AF_INET && + param->dst.v4.sin_len < sizeof(struct sockaddr_in)) || + (param->dst.v6.sin6_family == AF_INET6 && + param->dst.v6.sin6_len < sizeof(struct sockaddr_in6))) + { + return EINVAL; + } - // TBD: Need to validate length of sockaddr for different families? dst.const_sa = (const struct sockaddr*)¶m->dst; mask.const_sa = param->mask.v4.sin_family ? (const struct sockaddr*)¶m->mask : NULL; @@ -511,9 +580,9 @@ nstat_route_copy_descriptor( bzero(desc, sizeof(*desc)); struct rtentry *rt = (struct rtentry*)cookie; - desc->id = (uintptr_t)rt; - desc->parent_id = (uintptr_t)rt->rt_parent; - desc->gateway_id = (uintptr_t)rt->rt_gwroute; + desc->id = (uint64_t)VM_KERNEL_ADDRPERM(rt); + desc->parent_id = (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_parent); + desc->gateway_id = (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_gwroute); // key/dest @@ -749,23 +818,43 @@ nstat_route_rtt( * might be gone by the time we get the PCB detach notification), * we need to cache the process name. Without this, proc_name() would * return null and the process name would never be sent to userland. + * + * For UDP sockets, we also store the cached the connection tuples along with + * the interface index. This is necessary because when UDP sockets are + * disconnected, the connection tuples are forever lost from the inpcb, thus + * we need to keep track of the last call to connect() in ntstat. */ -struct nstat_tcpudp_cookie { +struct nstat_tucookie { struct inpcb *inp; char pname[MAXCOMLEN+1]; + bool cached; + union + { + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } local; + union + { + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } remote; + unsigned int if_index; }; -static struct nstat_tcpudp_cookie * -nstat_tcpudp_cookie_alloc( +static struct nstat_tucookie * +nstat_tucookie_alloc_internal( struct inpcb *inp, - bool ref) + bool ref, + bool locked) { - struct nstat_tcpudp_cookie *cookie; + struct nstat_tucookie *cookie; cookie = OSMalloc(sizeof(*cookie), nstat_malloc_tag); if (cookie == NULL) return NULL; - if (ref && in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + if (!locked) + lck_mtx_assert(&nstat_mtx, LCK_MTX_ASSERT_NOTOWNED); + if (ref && in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) { OSFree(cookie, sizeof(*cookie), nstat_malloc_tag); return NULL; @@ -774,19 +863,63 @@ nstat_tcpudp_cookie_alloc( cookie->inp = inp; proc_name(inp->inp_socket->last_pid, cookie->pname, sizeof(cookie->pname)); + /* + * We only increment the reference count for UDP sockets because we + * only cache UDP socket tuples. + */ + if (SOCK_PROTO(inp->inp_socket) == IPPROTO_UDP) + OSIncrementAtomic(&inp->inp_nstat_refcnt); return cookie; } +static struct nstat_tucookie * +nstat_tucookie_alloc( + struct inpcb *inp) +{ + return nstat_tucookie_alloc_internal(inp, false, false); +} + +static struct nstat_tucookie * +nstat_tucookie_alloc_ref( + struct inpcb *inp) +{ + return nstat_tucookie_alloc_internal(inp, true, false); +} + +static struct nstat_tucookie * +nstat_tucookie_alloc_ref_locked( + struct inpcb *inp) +{ + return nstat_tucookie_alloc_internal(inp, true, true); +} + static void -nstat_tcpudp_cookie_release( - struct nstat_tcpudp_cookie *cookie, +nstat_tucookie_release_internal( + struct nstat_tucookie *cookie, int inplock) { + if (SOCK_PROTO(cookie->inp->inp_socket) == IPPROTO_UDP) + OSDecrementAtomic(&cookie->inp->inp_nstat_refcnt); in_pcb_checkstate(cookie->inp, WNT_RELEASE, inplock); OSFree(cookie, sizeof(*cookie), nstat_malloc_tag); } +static void +nstat_tucookie_release( + struct nstat_tucookie *cookie) +{ + nstat_tucookie_release_internal(cookie, false); +} + +static void +nstat_tucookie_release_locked( + struct nstat_tucookie *cookie) +{ + nstat_tucookie_release_internal(cookie, true); +} + + static nstat_provider nstat_tcp_provider; static errno_t @@ -862,7 +995,7 @@ nstat_tcpudp_lookup( return ENOENT; // At this point we have a ref to the inpcb - *out_cookie = nstat_tcpudp_cookie_alloc(inp, false); + *out_cookie = nstat_tucookie_alloc(inp); if (*out_cookie == NULL) in_pcb_checkstate(inp, WNT_RELEASE, 0); @@ -882,15 +1015,14 @@ static int nstat_tcp_gone( nstat_provider_cookie_t cookie) { - struct nstat_tcpudp_cookie *tucookie = - (struct nstat_tcpudp_cookie *)cookie; + struct nstat_tucookie *tucookie = + (struct nstat_tucookie *)cookie; struct inpcb *inp; struct tcpcb *tp; return (!(inp = tucookie->inp) || - !(tp = intotcpcb(inp)) || - inp->inp_state == INPCB_STATE_DEAD || - tp->t_state == TCPS_TIME_WAIT) ? 1 : 0; + !(tp = intotcpcb(inp)) || + inp->inp_state == INPCB_STATE_DEAD) ? 1 : 0; } static errno_t @@ -899,8 +1031,8 @@ nstat_tcp_counts( struct nstat_counts *out_counts, int *out_gone) { - struct nstat_tcpudp_cookie *tucookie = - (struct nstat_tcpudp_cookie *)cookie; + struct nstat_tucookie *tucookie = + (struct nstat_tucookie *)cookie; struct inpcb *inp; bzero(out_counts, sizeof(*out_counts)); @@ -935,6 +1067,8 @@ nstat_tcp_counts( atomic_get_64(out_counts->nstat_cell_txbytes, &inp->inp_cstat->txbytes); atomic_get_64(out_counts->nstat_wifi_rxbytes, &inp->inp_wstat->rxbytes); atomic_get_64(out_counts->nstat_wifi_txbytes, &inp->inp_wstat->txbytes); + atomic_get_64(out_counts->nstat_wired_rxbytes, &inp->inp_Wstat->rxbytes); + atomic_get_64(out_counts->nstat_wired_txbytes, &inp->inp_Wstat->txbytes); return 0; } @@ -944,10 +1078,10 @@ nstat_tcp_release( nstat_provider_cookie_t cookie, int locked) { - struct nstat_tcpudp_cookie *tucookie = - (struct nstat_tcpudp_cookie *)cookie; + struct nstat_tucookie *tucookie = + (struct nstat_tucookie *)cookie; - nstat_tcpudp_cookie_release(tucookie, locked); + nstat_tucookie_release_internal(tucookie, locked); } static errno_t @@ -960,16 +1094,16 @@ nstat_tcp_add_watcher( // Add all current tcp inpcbs. Ignore those in timewait struct inpcb *inp; - struct nstat_tcpudp_cookie *cookie; - for (inp = LIST_FIRST(tcbinfo.ipi_listhead); inp; inp = LIST_NEXT(inp, inp_list)) + struct nstat_tucookie *cookie; + LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) { - cookie = nstat_tcpudp_cookie_alloc(inp, true); + cookie = nstat_tucookie_alloc_ref(inp); if (cookie == NULL) continue; if (nstat_control_source_add(0, state, &nstat_tcp_provider, cookie) != 0) { - nstat_tcpudp_cookie_release(cookie, false); + nstat_tucookie_release(cookie); break; } } @@ -990,11 +1124,12 @@ __private_extern__ void nstat_tcp_new_pcb( struct inpcb *inp) { - struct nstat_tcpudp_cookie *cookie; + struct nstat_tucookie *cookie; if (nstat_tcp_watchers == 0) return; - + + socket_lock(inp->inp_socket, 0); lck_mtx_lock(&nstat_mtx); nstat_control_state *state; for (state = nstat_controls; state; state = state->ncs_next) @@ -1003,19 +1138,20 @@ nstat_tcp_new_pcb( { // this client is watching tcp // acquire a reference for it - cookie = nstat_tcpudp_cookie_alloc(inp, true); + cookie = nstat_tucookie_alloc_ref_locked(inp); if (cookie == NULL) continue; // add the source, if that fails, release the reference if (nstat_control_source_add(0, state, &nstat_tcp_provider, cookie) != 0) { - nstat_tcpudp_cookie_release(cookie, false); + nstat_tucookie_release_locked(cookie); break; } } } lck_mtx_unlock(&nstat_mtx); + socket_unlock(inp->inp_socket, 0); } __private_extern__ void @@ -1024,7 +1160,8 @@ nstat_pcb_detach(struct inpcb *inp) nstat_control_state *state; nstat_src *src, *prevsrc; nstat_src *dead_list = NULL; - struct nstat_tcpudp_cookie *tucookie; + struct nstat_tucookie *tucookie; + errno_t result; if (inp == NULL || (nstat_tcp_watchers == 0 && nstat_udp_watchers == 0)) return; @@ -1035,20 +1172,29 @@ nstat_pcb_detach(struct inpcb *inp) for (prevsrc = NULL, src = state->ncs_srcs; src; prevsrc = src, src = src->next) { - tucookie = (struct nstat_tcpudp_cookie *)src->cookie; + tucookie = (struct nstat_tucookie *)src->cookie; if (tucookie->inp == inp) break; } if (src) { // send one last counts notification - nstat_control_send_counts(state, src, 0, NULL); + result = nstat_control_send_counts(state, src, 0, NULL); + if (result != 0 && nstat_debug) + printf("%s - nstat_control_send_counts() %d\n", + __func__, result); // send a last description - nstat_control_send_description(state, src, 0); + result = nstat_control_send_description(state, src, 0); + if (result != 0 && nstat_debug) + printf("%s - nstat_control_send_description() %d\n", + __func__, result); // send the source removed notification - nstat_control_send_removed(state, src); + result = nstat_control_send_removed(state, src); + if (result != 0 && nstat_debug) + printf("%s - nstat_control_send_removed() %d\n", + __func__, result); if (prevsrc) prevsrc->next = src->next; @@ -1070,6 +1216,87 @@ nstat_pcb_detach(struct inpcb *inp) } } +__private_extern__ void +nstat_pcb_cache(struct inpcb *inp) +{ + nstat_control_state *state; + nstat_src *src; + struct nstat_tucookie *tucookie; + + if (inp == NULL || nstat_udp_watchers == 0 || + inp->inp_nstat_refcnt == 0) + return; + VERIFY(SOCK_PROTO(inp->inp_socket) == IPPROTO_UDP); + lck_mtx_lock(&nstat_mtx); + for (state = nstat_controls; state; state = state->ncs_next) { + lck_mtx_lock(&state->mtx); + for (src = state->ncs_srcs; src; src = src->next) + { + tucookie = (struct nstat_tucookie *)src->cookie; + if (tucookie->inp == inp) + { + if (inp->inp_vflag & INP_IPV6) + { + nstat_ip6_to_sockaddr(&inp->in6p_laddr, + inp->inp_lport, + &tucookie->local.v6, + sizeof(tucookie->local)); + nstat_ip6_to_sockaddr(&inp->in6p_faddr, + inp->inp_fport, + &tucookie->remote.v6, + sizeof(tucookie->remote)); + } + else if (inp->inp_vflag & INP_IPV4) + { + nstat_ip_to_sockaddr(&inp->inp_laddr, + inp->inp_lport, + &tucookie->local.v4, + sizeof(tucookie->local)); + nstat_ip_to_sockaddr(&inp->inp_faddr, + inp->inp_fport, + &tucookie->remote.v4, + sizeof(tucookie->remote)); + } + if (inp->inp_last_outifp) + tucookie->if_index = + inp->inp_last_outifp->if_index; + tucookie->cached = true; + break; + } + } + lck_mtx_unlock(&state->mtx); + } + lck_mtx_unlock(&nstat_mtx); +} + +__private_extern__ void +nstat_pcb_invalidate_cache(struct inpcb *inp) +{ + nstat_control_state *state; + nstat_src *src; + struct nstat_tucookie *tucookie; + + if (inp == NULL || nstat_udp_watchers == 0 || + inp->inp_nstat_refcnt == 0) + return; + VERIFY(SOCK_PROTO(inp->inp_socket) == IPPROTO_UDP); + lck_mtx_lock(&nstat_mtx); + for (state = nstat_controls; state; state = state->ncs_next) { + lck_mtx_lock(&state->mtx); + for (src = state->ncs_srcs; src; src = src->next) + { + tucookie = (struct nstat_tucookie *)src->cookie; + if (tucookie->inp == inp) + { + tucookie->cached = false; + break; + } + } + lck_mtx_unlock(&state->mtx); + } + lck_mtx_unlock(&nstat_mtx); +} + static errno_t nstat_tcp_copy_descriptor( nstat_provider_cookie_t cookie, @@ -1085,8 +1312,8 @@ nstat_tcp_copy_descriptor( return EINVAL; nstat_tcp_descriptor *desc = (nstat_tcp_descriptor*)data; - struct nstat_tcpudp_cookie *tucookie = - (struct nstat_tcpudp_cookie *)cookie; + struct nstat_tucookie *tucookie = + (struct nstat_tucookie *)cookie; struct inpcb *inp = tucookie->inp; struct tcpcb *tp = intotcpcb(inp); bzero(desc, sizeof(*desc)); @@ -1114,6 +1341,11 @@ nstat_tcp_copy_descriptor( desc->txunacked = tp->snd_max - tp->snd_una; desc->txwindow = tp->snd_wnd; desc->txcwindow = tp->snd_cwnd; + + if (CC_ALGO(tp)->name != NULL) { + strlcpy(desc->cc_algo, CC_ALGO(tp)->name, + sizeof(desc->cc_algo)); + } struct socket *so = inp->inp_socket; if (so) @@ -1123,6 +1355,7 @@ nstat_tcp_copy_descriptor( desc->upid = so->last_upid; desc->pid = so->last_pid; desc->traffic_class = so->so_traffic_class; + desc->traffic_mgt_flags = so->so_traffic_mgt_flags; proc_name(desc->pid, desc->pname, sizeof(desc->pname)); if (desc->pname == NULL || desc->pname[0] == 0) { @@ -1136,6 +1369,7 @@ nstat_tcp_copy_descriptor( sizeof(tucookie->pname)); } memcpy(desc->uuid, so->last_uuid, sizeof(so->last_uuid)); + memcpy(desc->vuuid, so->so_vuuid, sizeof(so->so_vuuid)); if (so->so_flags & SOF_DELEGATED) { desc->eupid = so->e_upid; desc->epid = so->e_pid; @@ -1188,8 +1422,8 @@ static int nstat_udp_gone( nstat_provider_cookie_t cookie) { - struct nstat_tcpudp_cookie *tucookie = - (struct nstat_tcpudp_cookie *)cookie; + struct nstat_tucookie *tucookie = + (struct nstat_tucookie *)cookie; struct inpcb *inp; return (!(inp = tucookie->inp) || @@ -1202,8 +1436,8 @@ nstat_udp_counts( struct nstat_counts *out_counts, int *out_gone) { - struct nstat_tcpudp_cookie *tucookie = - (struct nstat_tcpudp_cookie *)cookie; + struct nstat_tucookie *tucookie = + (struct nstat_tucookie *)cookie; *out_gone = 0; @@ -1224,6 +1458,8 @@ nstat_udp_counts( atomic_get_64(out_counts->nstat_cell_txbytes, &inp->inp_cstat->txbytes); atomic_get_64(out_counts->nstat_wifi_rxbytes, &inp->inp_wstat->rxbytes); atomic_get_64(out_counts->nstat_wifi_txbytes, &inp->inp_wstat->txbytes); + atomic_get_64(out_counts->nstat_wired_rxbytes, &inp->inp_Wstat->rxbytes); + atomic_get_64(out_counts->nstat_wired_txbytes, &inp->inp_Wstat->txbytes); return 0; } @@ -1233,10 +1469,10 @@ nstat_udp_release( nstat_provider_cookie_t cookie, int locked) { - struct nstat_tcpudp_cookie *tucookie = - (struct nstat_tcpudp_cookie *)cookie; + struct nstat_tucookie *tucookie = + (struct nstat_tucookie *)cookie; - nstat_tcpudp_cookie_release(tucookie, locked); + nstat_tucookie_release_internal(tucookie, locked); } static errno_t @@ -1244,21 +1480,21 @@ nstat_udp_add_watcher( nstat_control_state *state) { struct inpcb *inp; - struct nstat_tcpudp_cookie *cookie; + struct nstat_tucookie *cookie; OSIncrementAtomic(&nstat_udp_watchers); lck_rw_lock_shared(udbinfo.ipi_lock); // Add all current UDP inpcbs. - for (inp = LIST_FIRST(udbinfo.ipi_listhead); inp; inp = LIST_NEXT(inp, inp_list)) + LIST_FOREACH(inp, udbinfo.ipi_listhead, inp_list) { - cookie = nstat_tcpudp_cookie_alloc(inp, true); + cookie = nstat_tucookie_alloc_ref(inp); if (cookie == NULL) continue; if (nstat_control_source_add(0, state, &nstat_udp_provider, cookie) != 0) { - nstat_tcpudp_cookie_release(cookie, false); + nstat_tucookie_release(cookie); break; } } @@ -1279,11 +1515,12 @@ __private_extern__ void nstat_udp_new_pcb( struct inpcb *inp) { - struct nstat_tcpudp_cookie *cookie; + struct nstat_tucookie *cookie; if (nstat_udp_watchers == 0) return; + socket_lock(inp->inp_socket, 0); lck_mtx_lock(&nstat_mtx); nstat_control_state *state; for (state = nstat_controls; state; state = state->ncs_next) @@ -1292,19 +1529,20 @@ nstat_udp_new_pcb( { // this client is watching tcp // acquire a reference for it - cookie = nstat_tcpudp_cookie_alloc(inp, true); + cookie = nstat_tucookie_alloc_ref_locked(inp); if (cookie == NULL) continue; // add the source, if that fails, release the reference if (nstat_control_source_add(0, state, &nstat_udp_provider, cookie) != 0) { - nstat_tcpudp_cookie_release(cookie, false); + nstat_tucookie_release_locked(cookie); break; } } } lck_mtx_unlock(&nstat_mtx); + socket_unlock(inp->inp_socket, 0); } static errno_t @@ -1321,30 +1559,51 @@ nstat_udp_copy_descriptor( if (nstat_udp_gone(cookie)) return EINVAL; - struct nstat_tcpudp_cookie *tucookie = - (struct nstat_tcpudp_cookie *)cookie; + struct nstat_tucookie *tucookie = + (struct nstat_tucookie *)cookie; nstat_udp_descriptor *desc = (nstat_udp_descriptor*)data; struct inpcb *inp = tucookie->inp; bzero(desc, sizeof(*desc)); - if (inp->inp_vflag & INP_IPV6) - { - nstat_ip6_to_sockaddr(&inp->in6p_laddr, inp->inp_lport, - &desc->local.v6, sizeof(desc->local)); - nstat_ip6_to_sockaddr(&inp->in6p_faddr, inp->inp_fport, - &desc->remote.v6, sizeof(desc->remote)); + if (tucookie->cached == false) { + if (inp->inp_vflag & INP_IPV6) + { + nstat_ip6_to_sockaddr(&inp->in6p_laddr, inp->inp_lport, + &desc->local.v6, sizeof(desc->local)); + nstat_ip6_to_sockaddr(&inp->in6p_faddr, inp->inp_fport, + &desc->remote.v6, sizeof(desc->remote)); + } + else if (inp->inp_vflag & INP_IPV4) + { + nstat_ip_to_sockaddr(&inp->inp_laddr, inp->inp_lport, + &desc->local.v4, sizeof(desc->local)); + nstat_ip_to_sockaddr(&inp->inp_faddr, inp->inp_fport, + &desc->remote.v4, sizeof(desc->remote)); + } } - else if (inp->inp_vflag & INP_IPV4) + else { - nstat_ip_to_sockaddr(&inp->inp_laddr, inp->inp_lport, - &desc->local.v4, sizeof(desc->local)); - nstat_ip_to_sockaddr(&inp->inp_faddr, inp->inp_fport, - &desc->remote.v4, sizeof(desc->remote)); + if (inp->inp_vflag & INP_IPV6) + { + memcpy(&desc->local.v6, &tucookie->local.v6, + sizeof(desc->local)); + memcpy(&desc->remote.v6, &tucookie->remote.v6, + sizeof(desc->remote)); + } + else if (inp->inp_vflag & INP_IPV4) + { + memcpy(&desc->local.v4, &tucookie->local.v4, + sizeof(desc->local)); + memcpy(&desc->remote.v4, &tucookie->remote.v4, + sizeof(desc->remote)); + } } - desc->ifindex = (inp->inp_last_outifp == NULL) ? 0 : - inp->inp_last_outifp->if_index; + if (inp->inp_last_outifp) + desc->ifindex = inp->inp_last_outifp->if_index; + else + desc->ifindex = tucookie->if_index; struct socket *so = inp->inp_socket; if (so) @@ -1366,6 +1625,7 @@ nstat_udp_copy_descriptor( sizeof(tucookie->pname)); } memcpy(desc->uuid, so->last_uuid, sizeof(so->last_uuid)); + memcpy(desc->vuuid, so->so_vuuid, sizeof(so->so_vuuid)); if (so->so_flags & SOF_DELEGATED) { desc->eupid = so->e_upid; desc->epid = so->e_pid; @@ -1664,6 +1924,258 @@ nstat_ifnet_threshold_reached(unsigned int ifindex) lck_mtx_unlock(&nstat_mtx); } +#pragma mark -- Sysinfo Provider -- + +static nstat_provider nstat_sysinfo_provider; + +/* We store the flags requested by the client */ +typedef struct nstat_sysinfo_cookie +{ + u_int32_t flags; +} nstat_sysinfo_cookie; + +static errno_t +nstat_sysinfo_lookup( + const void *data, + u_int32_t length, + nstat_provider_cookie_t *out_cookie) +{ + const nstat_sysinfo_add_param *param = (nstat_sysinfo_add_param *)data; + nstat_sysinfo_cookie *cookie; + + if (length < sizeof(*param)) + return (EINVAL); + + if (nstat_privcheck != 0) { + errno_t result = priv_check_cred(kauth_cred_get(), + PRIV_NET_PRIVILEGED_NETWORK_STATISTICS, 0); + if (result != 0) + return (result); + } + + cookie = OSMalloc(sizeof(*cookie), nstat_malloc_tag); + if (cookie == NULL) + return (ENOMEM); + cookie->flags = param->flags; + *out_cookie = cookie; + return (0); +} + +static int +nstat_sysinfo_gone( + __unused nstat_provider_cookie_t cookie) +{ + /* Sysinfo always exists */ + return (0); +} + +static errno_t +nstat_sysinfo_copy_descriptor( + nstat_provider_cookie_t cookie, + void *data, + u_int32_t len) +{ + nstat_sysinfo_descriptor *desc = (nstat_sysinfo_descriptor *)data; + struct nstat_sysinfo_cookie *syscookie = + (struct nstat_sysinfo_cookie *)cookie; + + if (len < sizeof(nstat_sysinfo_descriptor)) + return (EINVAL); + desc->flags = syscookie->flags; + return (0); +} + +static void +nstat_sysinfo_release( + nstat_provider_cookie_t cookie, + __unused boolean_t locked) +{ + struct nstat_sysinfo_cookie *syscookie = + (struct nstat_sysinfo_cookie *)cookie; + OSFree(syscookie, sizeof(*syscookie), nstat_malloc_tag); +} + +static errno_t +nstat_enqueue_success( + uint64_t context, + nstat_control_state *state) +{ + nstat_msg_hdr success; + errno_t result; + + bzero(&success, sizeof(success)); + success.context = context; + success.type = NSTAT_MSG_TYPE_SUCCESS; + result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success, + sizeof(success), CTL_DATA_EOR | CTL_DATA_CRIT); + if (result != 0) { + printf("%s: could not enqueue success message %d\n", + __func__, result); + nstat_successmsgfailures += 1; + } + return result; +} + +static void +nstat_init_sysinfo_provider(void) +{ + bzero(&nstat_sysinfo_provider, sizeof(nstat_sysinfo_provider)); + nstat_sysinfo_provider.nstat_provider_id = NSTAT_PROVIDER_SYSINFO; + nstat_sysinfo_provider.nstat_descriptor_length = sizeof(nstat_sysinfo_descriptor); + nstat_sysinfo_provider.nstat_lookup = nstat_sysinfo_lookup; + nstat_sysinfo_provider.nstat_gone = nstat_sysinfo_gone; + nstat_sysinfo_provider.nstat_counts = NULL; + nstat_sysinfo_provider.nstat_watcher_add = NULL; + nstat_sysinfo_provider.nstat_watcher_remove = NULL; + nstat_sysinfo_provider.nstat_copy_descriptor = nstat_sysinfo_copy_descriptor; + nstat_sysinfo_provider.nstat_release = nstat_sysinfo_release; + nstat_sysinfo_provider.next = nstat_providers; + nstat_providers = &nstat_sysinfo_provider; +} + +static void +nstat_sysinfo_send_data_internal( + nstat_control_state *control, + nstat_src *src, + nstat_sysinfo_data *data) +{ + nstat_msg_sysinfo_counts *syscnt = NULL; + size_t allocsize = 0, countsize = 0, nkeyvals = 0; + nstat_sysinfo_keyval *kv; + errno_t result = 0; + + allocsize = offsetof(nstat_msg_sysinfo_counts, counts); + countsize = offsetof(nstat_sysinfo_counts, nstat_sysinfo_keyvals); + + /* get number of key-vals for each kind of stat */ + switch (data->flags) + { + case NSTAT_SYSINFO_MBUF_STATS: + nkeyvals = 5; + break; + case NSTAT_SYSINFO_TCP_STATS: + nkeyvals = 6; + break; + default: + return; + } + countsize += sizeof(nstat_sysinfo_keyval) * nkeyvals; + allocsize += countsize; + + syscnt = OSMalloc(allocsize, nstat_malloc_tag); + if (syscnt == NULL) + return; + bzero(syscnt, allocsize); + + syscnt->hdr.type = NSTAT_MSG_TYPE_SYSINFO_COUNTS; + syscnt->counts.nstat_sysinfo_len = countsize; + syscnt->srcref = src->srcref; + + kv = (nstat_sysinfo_keyval *) &syscnt->counts.nstat_sysinfo_keyvals; + switch (data->flags) + { + case NSTAT_SYSINFO_MBUF_STATS: + { + kv[0].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_MBUF_256B_TOTAL; + kv[0].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[0].u.nstat_sysinfo_scalar = data->u.mb_stats.total_256b; + + kv[1].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_MBUF_2KB_TOTAL; + kv[1].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[1].u.nstat_sysinfo_scalar = data->u.mb_stats.total_2kb; + + kv[2].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_MBUF_4KB_TOTAL; + kv[2].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[2].u.nstat_sysinfo_scalar = data->u.mb_stats.total_4kb; + + kv[3].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SOCK_MBCNT; + kv[3].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[3].u.nstat_sysinfo_scalar = data->u.mb_stats.sbmb_total; + + + kv[4].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SOCK_ATMBLIMIT; + kv[4].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[4].u.nstat_sysinfo_scalar = data->u.mb_stats.sb_atmbuflimit; + break; + } + case NSTAT_SYSINFO_TCP_STATS: + { + kv[0].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_IPV4_AVGRTT; + kv[0].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[0].u.nstat_sysinfo_scalar = data->u.tcp_stats.ipv4_avgrtt; + + kv[1].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_IPV6_AVGRTT; + kv[1].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[1].u.nstat_sysinfo_scalar = data->u.tcp_stats.ipv6_avgrtt; + + kv[2].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SEND_PLR; + kv[2].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[2].u.nstat_sysinfo_scalar = data->u.tcp_stats.send_plr; + + kv[3].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_RECV_PLR; + kv[3].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[3].u.nstat_sysinfo_scalar = data->u.tcp_stats.recv_plr; + + kv[4].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SEND_TLRTO; + kv[4].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[4].u.nstat_sysinfo_scalar = data->u.tcp_stats.send_tlrto_rate; + + kv[5].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SEND_REORDERRATE; + kv[5].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR; + kv[5].u.nstat_sysinfo_scalar = data->u.tcp_stats.send_reorder_rate; + break; + } + } + + if (syscnt != NULL) + { + result = ctl_enqueuedata(control->ncs_kctl, + control->ncs_unit, syscnt, allocsize, CTL_DATA_EOR); + if (result != 0) + nstat_sysinfofailures += 1; + OSFree(syscnt, allocsize, nstat_malloc_tag); + } + return; +} + +__private_extern__ void +nstat_sysinfo_send_data( + nstat_sysinfo_data *data) +{ + nstat_control_state *control; + + lck_mtx_lock(&nstat_mtx); + for (control = nstat_controls; control; control = control->ncs_next) + { + lck_mtx_lock(&control->mtx); + nstat_src *src; + for (src = control->ncs_srcs; src; src = src->next) + { + if (src->provider->nstat_provider_id == + NSTAT_PROVIDER_SYSINFO) + { + struct nstat_sysinfo_cookie *syscookie; + syscookie = (struct nstat_sysinfo_cookie *) src->cookie; + if (syscookie->flags & data->flags) + { + nstat_sysinfo_send_data_internal(control, + src, data); + } + } + } + lck_mtx_unlock(&control->mtx); + } + lck_mtx_unlock(&nstat_mtx); + +} + +static void +nstat_sysinfo_generate_report(void) +{ + mbuf_report_peak_usage(); + tcp_report_stats(); +} + #pragma mark -- Kernel Control Socket -- static kern_ctl_ref nstat_ctlref = NULL; @@ -1674,7 +2186,6 @@ static errno_t nstat_control_disconnect(kern_ctl_ref kctl, u_int32_t unit, void static errno_t nstat_control_send(kern_ctl_ref kctl, u_int32_t unit, void *uinfo, mbuf_t m, int flags); - static void* nstat_idle_check( __unused thread_call_param_t p0, @@ -1698,19 +2209,30 @@ nstat_idle_check( { if ((*srcpp)->provider->nstat_gone((*srcpp)->cookie)) { + errno_t result; + // Pull it off the list dead = *srcpp; *srcpp = (*srcpp)->next; // send one last counts notification - nstat_control_send_counts(control, dead, + result = nstat_control_send_counts(control, dead, 0, NULL); + if (result != 0 && nstat_debug) + printf("%s - nstat_control_send_counts() %d\n", + __func__, result); // send a last description - nstat_control_send_description(control, dead, 0); + result = nstat_control_send_description(control, dead, 0); + if (result != 0 && nstat_debug) + printf("%s - nstat_control_send_description() %d\n", + __func__, result); // send the source removed notification - nstat_control_send_removed(control, dead); + result = nstat_control_send_removed(control, dead); + if (result != 0 && nstat_debug) + printf("%s - nstat_control_send_removed() %d\n", + __func__, result); // Put this on the list to release later dead->next = dead_list; @@ -1725,7 +2247,7 @@ nstat_idle_check( control->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS; lck_mtx_unlock(&control->mtx); } - + if (nstat_controls) { clock_interval_to_deadline(60, NSEC_PER_SEC, &nstat_idle_time); @@ -1734,6 +2256,9 @@ nstat_idle_check( lck_mtx_unlock(&nstat_mtx); + /* Generate any system level reports, if needed */ + nstat_sysinfo_generate_report(); + // Release the sources now that we aren't holding lots of locks while (dead_list) { @@ -1761,6 +2286,9 @@ nstat_control_register(void) struct kern_ctl_reg nstat_control; bzero(&nstat_control, sizeof(nstat_control)); strlcpy(nstat_control.ctl_name, NET_STAT_CONTROL_NAME, sizeof(nstat_control.ctl_name)); + nstat_control.ctl_flags = CTL_FLAG_REG_EXTENDED | CTL_FLAG_REG_CRIT; + nstat_control.ctl_sendsize = nstat_sendspace; + nstat_control.ctl_recvsize = nstat_recvspace; nstat_control.ctl_connect = nstat_control_connect; nstat_control.ctl_disconnect = nstat_control_disconnect; nstat_control.ctl_send = nstat_control_send; @@ -1774,9 +2302,14 @@ nstat_control_cleanup_source( struct nstat_src *src, boolean_t locked) { - if (state) - nstat_control_send_removed(state, src); + errno_t result; + if (state) { + result = nstat_control_send_removed(state, src); + if (result != 0 && nstat_debug) + printf("%s - nstat_control_send_removed() %d\n", + __func__, result); + } // Cleanup the source if we found it. src->provider->nstat_release(src->cookie, locked); OSFree(src, sizeof(*src), nstat_malloc_tag); @@ -1914,20 +2447,28 @@ nstat_control_send_counts( int localgone = 0; errno_t result = 0; + /* Some providers may not have any counts to send */ + if (src->provider->nstat_counts == NULL) + return (0); + + bzero(&counts, sizeof(counts)); counts.hdr.type = NSTAT_MSG_TYPE_SRC_COUNTS; counts.hdr.context = context; counts.srcref = src->srcref; - bzero(&counts.counts, sizeof(counts.counts)); + if (src->provider->nstat_counts(src->cookie, &counts.counts, &localgone) == 0) { if ((src->filter & NSTAT_FILTER_NOZEROBYTES) && counts.counts.nstat_rxbytes == 0 && - counts.counts.nstat_txbytes == 0) + counts.counts.nstat_txbytes == 0) { result = EAGAIN; - else + } else { result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &counts, sizeof(counts), CTL_DATA_EOR); + if (result != 0) + nstat_srccountfailures += 1; + } } if (gone) *gone = localgone; @@ -1946,40 +2487,42 @@ nstat_control_send_description( { return EOPNOTSUPP; } - + // Allocate storage for the descriptor message mbuf_t msg; unsigned int one = 1; u_int32_t size = offsetof(nstat_msg_src_description, data) + src->provider->nstat_descriptor_length; - if (mbuf_allocpacket(MBUF_WAITOK, size, &one, &msg) != 0) + if (mbuf_allocpacket(MBUF_DONTWAIT, size, &one, &msg) != 0) { return ENOMEM; } - + nstat_msg_src_description *desc = (nstat_msg_src_description*)mbuf_data(msg); + bzero(desc, size); mbuf_setlen(msg, size); mbuf_pkthdr_setlen(msg, mbuf_len(msg)); - + // Query the provider for the provider specific bits errno_t result = src->provider->nstat_copy_descriptor(src->cookie, desc->data, src->provider->nstat_descriptor_length); - + if (result != 0) { mbuf_freem(msg); return result; } - + desc->hdr.context = context; desc->hdr.type = NSTAT_MSG_TYPE_SRC_DESC; desc->srcref = src->srcref; desc->provider = src->provider->nstat_provider_id; - + result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, CTL_DATA_EOR); if (result != 0) { + nstat_descriptionfailures += 1; mbuf_freem(msg); } - + return result; } @@ -1991,11 +2534,14 @@ nstat_control_send_removed( nstat_msg_src_removed removed; errno_t result; + bzero(&removed, sizeof(removed)); removed.hdr.type = NSTAT_MSG_TYPE_SRC_REMOVED; removed.hdr.context = 0; removed.srcref = src->srcref; result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &removed, - sizeof(removed), CTL_DATA_EOR); + sizeof(removed), CTL_DATA_EOR | CTL_DATA_CRIT); + if (result != 0) + nstat_msgremovedfailures += 1; return result; } @@ -2092,16 +2638,8 @@ nstat_control_handle_add_all( state->ncs_watching &= ~(1 << provider->nstat_provider_id); lck_mtx_unlock(&state->mtx); } - if (result == 0) - { - // Notify the client - nstat_msg_hdr success; - success.context = req->hdr.context; - success.type = NSTAT_MSG_TYPE_SUCCESS; - success.pad = 0; - ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success, sizeof(success), CTL_DATA_EOR); - } + nstat_enqueue_success(req->hdr.context, state); return result; } @@ -2117,7 +2655,8 @@ nstat_control_source_add( mbuf_t msg = NULL; unsigned int one = 1; - if (mbuf_allocpacket(MBUF_WAITOK, sizeof(nstat_msg_src_added), &one, &msg) != 0) + if (mbuf_allocpacket(MBUF_DONTWAIT, sizeof(nstat_msg_src_added), &one, + &msg) != 0) return ENOMEM; mbuf_setlen(msg, sizeof(nstat_msg_src_added)); @@ -2152,9 +2691,11 @@ nstat_control_source_add( src->filter = 0; // send the source added message - errno_t result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, CTL_DATA_EOR); + errno_t result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, + CTL_DATA_EOR); if (result != 0) { + nstat_srcaddedfailures += 1; lck_mtx_unlock(&state->mtx); OSFree(src, sizeof(*src), nstat_malloc_tag); mbuf_freem(msg); @@ -2224,6 +2765,7 @@ nstat_control_handle_query_request( nstat_src *dead_srcs = NULL; errno_t result = ENOENT; nstat_msg_query_src_req req; + if (mbuf_copydata(m, 0, sizeof(req), &req) != 0) { return EINVAL; @@ -2236,12 +2778,14 @@ nstat_control_handle_query_request( while (*srcpp != NULL) { int gone; + gone = 0; - // XXX ignore IFACE types? if (req.srcref == NSTAT_SRC_REF_ALL || - (*srcpp)->srcref == req.srcref) + (*srcpp)->srcref == req.srcref) { + gone = 0; + result = nstat_control_send_counts(state, *srcpp, req.hdr.context, &gone); @@ -2253,10 +2797,17 @@ nstat_control_handle_query_request( if (gone) { // send one last descriptor message so client may see last state + // If we can't send the notification now, it + // will be sent in the idle cleanup. + result = nstat_control_send_description(state, *srcpp, 0); + if (result != 0 && nstat_debug) + printf("%s - nstat_control_send_description() %d\n", + __func__, result); + if (result != 0) { + state->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS; + break; + } - nstat_control_send_description(state, *srcpp, - 0); - // pull src out of the list nstat_src *src = *srcpp; *srcpp = src->next; @@ -2274,6 +2825,12 @@ nstat_control_handle_query_request( } lck_mtx_unlock(&state->mtx); + if (req.srcref == NSTAT_SRC_REF_ALL) + { + nstat_enqueue_success(req.hdr.context, state); + result = 0; + } + while (dead_srcs) { nstat_src *src; @@ -2285,16 +2842,6 @@ nstat_control_handle_query_request( nstat_control_cleanup_source(state, src, FALSE); } - if (req.srcref == NSTAT_SRC_REF_ALL) - { - nstat_msg_hdr success; - success.context = req.hdr.context; - success.type = NSTAT_MSG_TYPE_SUCCESS; - success.pad = 0; - ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success, sizeof(success), CTL_DATA_EOR); - result = 0; - } - return result; } @@ -2304,7 +2851,7 @@ nstat_control_handle_get_src_description( mbuf_t m) { nstat_msg_get_src_description req; - errno_t result; + errno_t result = 0; nstat_src *src; if (mbuf_copydata(m, 0, sizeof(req), &req) != 0) @@ -2331,11 +2878,7 @@ nstat_control_handle_get_src_description( result = ENOENT; else if (req.srcref == NSTAT_SRC_REF_ALL) { - nstat_msg_hdr success; - success.context = req.hdr.context; - success.type = NSTAT_MSG_TYPE_SUCCESS; - success.pad = 0; - ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success, sizeof(success), CTL_DATA_EOR); + nstat_enqueue_success(req.hdr.context, state); result = 0; } @@ -2436,11 +2979,15 @@ nstat_control_send( { struct nstat_msg_error err; + bzero(&err, sizeof(err)); err.hdr.type = NSTAT_MSG_TYPE_ERROR; err.hdr.context = hdr->context; err.error = result; - result = ctl_enqueuedata(kctl, unit, &err, sizeof(err), CTL_DATA_EOR); + result = ctl_enqueuedata(kctl, unit, &err, sizeof(err), + CTL_DATA_EOR | CTL_DATA_CRIT); + if (result != 0) + nstat_descriptionfailures += 1; } mbuf_freem(m); diff --git a/bsd/net/ntstat.h b/bsd/net/ntstat.h index 6acef925b..2aad07b65 100644 --- a/bsd/net/ntstat.h +++ b/bsd/net/ntstat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013 Apple Inc. All rights reserved. + * Copyright (c) 2010-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -35,7 +35,7 @@ #pragma pack(push, 4) #pragma mark -- Common Data Structures -- -#define __NSTAT_REVISION__ 4 +#define __NSTAT_REVISION__ 6 typedef u_int32_t nstat_provider_id_t; typedef u_int32_t nstat_src_ref_t; @@ -63,8 +63,47 @@ typedef struct nstat_counts u_int64_t nstat_cell_txbytes __attribute__((aligned(8))); u_int64_t nstat_wifi_rxbytes __attribute__((aligned(8))); u_int64_t nstat_wifi_txbytes __attribute__((aligned(8))); + u_int64_t nstat_wired_rxbytes __attribute__((aligned(8))); + u_int64_t nstat_wired_txbytes __attribute__((aligned(8))); } nstat_counts; +typedef struct nstat_sysinfo_keyval +{ + u_int32_t nstat_sysinfo_key; + u_int32_t nstat_sysinfo_flags; + union { + int64_t nstat_sysinfo_scalar; + double nstat_sysinfo_distribution; + } u; +} nstat_sysinfo_keyval; + +#define NSTAT_SYSINFO_FLAG_SCALAR 0x0001 +#define NSTAT_SYSINFO_FLAG_DISTRIBUTION 0x0002 + +typedef struct nstat_sysinfo_counts +{ + /* Counters */ + u_int32_t nstat_sysinfo_len; + u_int32_t pad; + u_int8_t nstat_sysinfo_keyvals[]; +} nstat_sysinfo_counts; + +enum +{ + NSTAT_SYSINFO_KEY_MBUF_256B_TOTAL = 1 + ,NSTAT_SYSINFO_KEY_MBUF_2KB_TOTAL = 2 + ,NSTAT_SYSINFO_KEY_MBUF_4KB_TOTAL = 3 + ,NSTAT_SYSINFO_KEY_SOCK_MBCNT = 4 + ,NSTAT_SYSINFO_KEY_SOCK_ATMBLIMIT = 5 + ,NSTAT_SYSINFO_KEY_IPV4_AVGRTT = 6 + ,NSTAT_SYSINFO_KEY_IPV6_AVGRTT = 7 + ,NSTAT_SYSINFO_KEY_SEND_PLR = 8 + ,NSTAT_SYSINFO_KEY_RECV_PLR = 9 + ,NSTAT_SYSINFO_KEY_SEND_TLRTO = 10 + ,NSTAT_SYSINFO_KEY_SEND_REORDERRATE = 11 + +}; + #pragma mark -- Network Statistics Providers -- enum @@ -73,6 +112,7 @@ enum ,NSTAT_PROVIDER_TCP = 2 ,NSTAT_PROVIDER_UDP = 3 ,NSTAT_PROVIDER_IFNET = 4 + ,NSTAT_PROVIDER_SYSINFO = 5 }; typedef struct nstat_route_add_param @@ -130,6 +170,8 @@ typedef struct nstat_tcp_descriptor u_int32_t txwindow; u_int32_t txcwindow; u_int32_t traffic_class; + u_int32_t traffic_mgt_flags; + char cc_algo[16]; u_int64_t upid; u_int32_t pid; @@ -139,6 +181,7 @@ typedef struct nstat_tcp_descriptor uint8_t uuid[16]; uint8_t euuid[16]; + uint8_t vuuid[16]; } nstat_tcp_descriptor; typedef struct nstat_tcp_add_param nstat_udp_add_param; @@ -171,6 +214,7 @@ typedef struct nstat_udp_descriptor uint8_t uuid[16]; uint8_t euuid[16]; + uint8_t vuuid[16]; } nstat_udp_descriptor; typedef struct nstat_route_descriptor @@ -223,6 +267,20 @@ typedef struct nstat_ifnet_descriptor char description[IF_DESCSIZE]; } nstat_ifnet_descriptor; +typedef struct nstat_sysinfo_descriptor +{ + u_int32_t flags; +} nstat_sysinfo_descriptor; + +typedef struct nstat_sysinfo_add_param +{ + /* To indicate which system level information should be collected */ + u_int32_t flags; +} nstat_sysinfo_add_param; + +#define NSTAT_SYSINFO_MBUF_STATS 0x0001 +#define NSTAT_SYSINFO_TCP_STATS 0x0002 + #pragma mark -- Network Statistics User Client -- #define NET_STAT_CONTROL_NAME "com.apple.network.statistics" @@ -246,6 +304,7 @@ enum ,NSTAT_MSG_TYPE_SRC_REMOVED = 10002 ,NSTAT_MSG_TYPE_SRC_DESC = 10003 ,NSTAT_MSG_TYPE_SRC_COUNTS = 10004 + ,NSTAT_MSG_TYPE_SYSINFO_COUNTS = 10005 }; enum @@ -338,6 +397,43 @@ typedef struct nstat_msg_src_removed nstat_src_ref_t srcref; } nstat_msg_src_removed; +typedef struct nstat_msg_sysinfo_counts +{ + nstat_msg_hdr hdr; + nstat_src_ref_t srcref; + nstat_sysinfo_counts counts; +} nstat_msg_sysinfo_counts; + +typedef struct nstat_sysinfo_mbuf_stats +{ + u_int32_t total_256b; + u_int32_t total_2kb; + u_int32_t total_4kb; + u_int32_t sbmb_total; + u_int32_t sb_atmbuflimit; + u_int32_t draincnt; + u_int32_t memreleased; +} nstat_sysinfo_mbuf_stats; + +typedef struct nstat_sysinfo_tcp_stats +{ + u_int32_t ipv4_avgrtt; + u_int32_t ipv6_avgrtt; + u_int32_t send_plr; + u_int32_t recv_plr; + u_int32_t send_tlrto_rate; + u_int32_t send_reorder_rate; +} nstat_sysinfo_tcp_stats; + +typedef struct nstat_sysinfo_data +{ + u_int32_t flags; + union { + nstat_sysinfo_mbuf_stats mb_stats; + nstat_sysinfo_tcp_stats tcp_stats; + } u; +} nstat_sysinfo_data; + #pragma pack(pop) #endif /* PRIVATE */ @@ -382,10 +478,14 @@ void nstat_tcp_new_pcb(struct inpcb *inp); void nstat_udp_new_pcb(struct inpcb *inp); void nstat_route_new_entry(struct rtentry *rt); void nstat_pcb_detach(struct inpcb *inp); +void nstat_pcb_cache(struct inpcb *inp); +void nstat_pcb_invalidate_cache(struct inpcb *inp); void nstat_ifnet_threshold_reached(unsigned int ifindex); +void nstat_sysinfo_send_data(struct nstat_sysinfo_data *); + // locked_add_64 uses atomic operations on 32bit so the 64bit // value can be properly read. The values are only ever incremented // while under the socket lock, so on 64bit we don't actually need diff --git a/bsd/net/packet_mangler.c b/bsd/net/packet_mangler.c new file mode 100644 index 000000000..b2666ee19 --- /dev/null +++ b/bsd/net/packet_mangler.c @@ -0,0 +1,1037 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http: www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * THEORY OF OPERATION + * + * The packet mangler subsystem provides a limited way for user space + * applications to apply certain actions on certain flows. + * + * A user space applications opens a kernel control socket with the name + * PACKET_MANGLER_CONTROL_NAME to attach to the packet mangler subsystem. + * When connected, a "struct packet_mangler" is created and set as the + * "unitinfo" of the corresponding kernel control socket instance. + * Connect call for packet mangler's kernel control socket also registers + * ip filers with cookie set to the packet_mangler instance. + * The ip filters are removed when control socket is disconnected. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#define MAX_PACKET_MANGLER 1 + +#define PKT_MNGLR_FLG_IPFILTER_ATTACHED 0x00000001 + +SYSCTL_NODE(_net, OID_AUTO, pktmnglr, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "pktmnglr"); +SYSCTL_INT(_net_pktmnglr, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED, + &pkt_mnglr_log_level, 0, ""); +/* + * The structure packet_mangler represents a user space packet filter + * It's created and associated with a kernel control socket instance + */ +struct packet_mangler { + kern_ctl_ref pkt_mnglr_kcref; + uint32_t pkt_mnglr_kcunit; + uint32_t pkt_mnglr_flags; + /* IP filter related params */ + ipfilter_t pkt_mnglr_ipfref; + ipfilter_t pkt_mnglr_ipfrefv6; + struct ipf_filter pkt_mnglr_ipfilter; + + /* Options */ + uint8_t activate; + Pkt_Mnglr_Flow dir; + struct sockaddr_storage lsaddr; + struct sockaddr_storage rsaddr; + struct sockaddr_storage swap_lsaddr; + struct sockaddr_storage swap_rsaddr; + uint32_t ip_action_mask; + uint16_t lport; + uint16_t rport; + uint32_t proto; + uint32_t proto_action_mask; +}; + +/* Array of all the packet mangler instancesi */ +struct packet_mangler **packet_manglers = NULL; + +uint32_t pkt_mnglr_active_count = 0; /* Number of active packet filters */ +uint32_t pkt_mnglr_close_wait_timeout = 1000; /* in milliseconds */ + +static kern_ctl_ref pkt_mnglr_kctlref = NULL; + +static lck_grp_attr_t *pkt_mnglr_lck_grp_attr = NULL; +static lck_attr_t *pkt_mnglr_lck_attr = NULL; +static lck_grp_t *pkt_mnglr_lck_grp = NULL; + +/* The lock below protects packet_manglers DS, packet_mangler DS */ +decl_lck_rw_data(static, pkt_mnglr_lck_rw); + +#define PKT_MNGLR_RW_LCK_MAX 8 + +int pkt_mnglr_rw_nxt_lck = 0; +void* pkt_mnglr_rw_lock_history[PKT_MNGLR_RW_LCK_MAX]; + +int pkt_mnglr_rw_nxt_unlck = 0; +void* pkt_mnglr_rw_unlock_history[PKT_MNGLR_RW_LCK_MAX]; + + +#define PACKET_MANGLER_ZONE_NAME "packet_mangler" +#define PACKET_MANGLER_ZONE_MAX 10 +static struct zone *packet_mangler_zone = NULL; /* zone for packet_mangler */ + +/* + * For troubleshooting + */ +int pkt_mnglr_log_level = LOG_ERR; +int pkt_mnglr_debug = 1; + +/* + * Forward declaration to appease the compiler + */ +static void pkt_mnglr_rw_lock_exclusive(lck_rw_t *); +static void pkt_mnglr_rw_unlock_exclusive(lck_rw_t *); +static void pkt_mnglr_rw_lock_shared(lck_rw_t *); +static void pkt_mnglr_rw_unlock_shared(lck_rw_t *); + +static errno_t pktmnglr_ipfilter_output(void *cookie, mbuf_t *data, + ipf_pktopts_t options); +static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, + int offset, u_int8_t protocol); +static void pktmnglr_ipfilter_detach(void *cookie); + +static void chksm_update(mbuf_t data); + +/* + * packet filter global read write lock + */ + +static void +pkt_mnglr_rw_lock_exclusive(lck_rw_t *lck) +{ + void *lr_saved; + + lr_saved = __builtin_return_address(0); + + lck_rw_lock_exclusive(lck); + + pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved; + pkt_mnglr_rw_nxt_lck = + (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX; +} + +static void +pkt_mnglr_rw_unlock_exclusive(lck_rw_t *lck) +{ + void *lr_saved; + + lr_saved = __builtin_return_address(0); + + lck_rw_unlock_exclusive(lck); + + pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] = + lr_saved; + pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX; +} + +static void +pkt_mnglr_rw_lock_shared(lck_rw_t *lck) +{ + void *lr_saved; + + lr_saved = __builtin_return_address(0); + + lck_rw_lock_shared(lck); + + pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved; + pkt_mnglr_rw_nxt_lck = (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX; +} + +static void +pkt_mnglr_rw_unlock_shared(lck_rw_t *lck) +{ + void *lr_saved; + + lr_saved = __builtin_return_address(0); + + lck_rw_unlock_shared(lck); + + pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] = lr_saved; + pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX; +} + +/* + * Packet Mangler's Kernel control socket callbacks + */ +static errno_t +pkt_mnglr_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, + void **unitinfo) +{ + errno_t error = 0; + struct packet_mangler *p_pkt_mnglr = NULL; + + PKT_MNGLR_LOG(LOG_NOTICE, "Connecting packet mangler filter."); + + p_pkt_mnglr = zalloc(packet_mangler_zone); + if (p_pkt_mnglr == NULL) { + PKT_MNGLR_LOG(LOG_ERR, "zalloc failed"); + error = ENOMEM; + goto done; + } + + bzero(p_pkt_mnglr, sizeof(struct packet_mangler)); + + pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw); + if (packet_manglers == NULL) { + struct packet_mangler **tmp; + + pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw); + + MALLOC(tmp, + struct packet_mangler **, + MAX_PACKET_MANGLER * sizeof(struct packet_mangler *), + M_TEMP, + M_WAITOK | M_ZERO); + + pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw); + + if (tmp == NULL && packet_manglers == NULL) { + error = ENOMEM; + pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw); + goto done; + } + /* Another thread may have won the race */ + if (packet_manglers != NULL) + FREE(tmp, M_TEMP); + else + packet_manglers = tmp; + } + + if (sac->sc_unit == 0 || sac->sc_unit > MAX_PACKET_MANGLER) { + PKT_MNGLR_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit); + error = EINVAL; + } else if (packet_manglers[sac->sc_unit - 1] != NULL) { + PKT_MNGLR_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit); + error = EADDRINUSE; + } else { + /* + * kernel control socket kcunit numbers start at 1 + */ + packet_manglers[sac->sc_unit - 1] = p_pkt_mnglr; + + p_pkt_mnglr->pkt_mnglr_kcref = kctlref; + p_pkt_mnglr->pkt_mnglr_kcunit = sac->sc_unit; + + *unitinfo = p_pkt_mnglr; + pkt_mnglr_active_count++; + } + + p_pkt_mnglr->pkt_mnglr_ipfilter.cookie = p_pkt_mnglr; + p_pkt_mnglr->pkt_mnglr_ipfilter.name = "com.apple.pktmnglripfilter"; + p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_input = pktmnglr_ipfilter_input; + p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_output = pktmnglr_ipfilter_output; + p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_detach = pktmnglr_ipfilter_detach; + error = ipf_addv4(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfref)); + if (error) { + PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv4 Filter"); + goto done; + } + error = ipf_addv6(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfrefv6)); + if (error) { + ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref); + PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv6 Filter"); + goto done; + } + + PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler's IP Filters"); + p_pkt_mnglr->pkt_mnglr_flags |= PKT_MNGLR_FLG_IPFILTER_ATTACHED; + pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw); + +done: + if (error != 0 && p_pkt_mnglr != NULL) + zfree(packet_mangler_zone, p_pkt_mnglr); + + PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u", + error, pkt_mnglr_active_count, sac->sc_unit); + + return (error); +} + +static errno_t +pkt_mnglr_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo) +{ +#pragma unused(kctlref) + errno_t error = 0; + struct packet_mangler *p_pkt_mnglr; + + PKT_MNGLR_LOG(LOG_INFO, "Disconnecting packet mangler kernel control"); + + if (packet_manglers == NULL) { + PKT_MNGLR_LOG(LOG_ERR, "no packet filter"); + error = EINVAL; + goto done; + } + if (kcunit > MAX_PACKET_MANGLER) { + PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)", + kcunit, MAX_PACKET_MANGLER); + error = EINVAL; + goto done; + } + + p_pkt_mnglr = (struct packet_mangler *)unitinfo; + if (p_pkt_mnglr == NULL) { + PKT_MNGLR_LOG(LOG_ERR, "Unit info is NULL"); + goto done; + } + + pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw); + if (packet_manglers[kcunit - 1] != p_pkt_mnglr || p_pkt_mnglr->pkt_mnglr_kcunit != kcunit) { + PKT_MNGLR_LOG(LOG_ERR, "bad unit info %u)", + kcunit); + pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw); + goto done; + } + + /* + * Make filter inactive + */ + packet_manglers[kcunit - 1] = NULL; + pkt_mnglr_active_count--; + if (p_pkt_mnglr->pkt_mnglr_flags & PKT_MNGLR_FLG_IPFILTER_ATTACHED) { + (void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref); + (void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfrefv6); + } + pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw); + zfree(packet_mangler_zone, p_pkt_mnglr); +done: + PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u", + error, pkt_mnglr_active_count, kcunit); + + return (error); +} + +static errno_t +pkt_mnglr_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, + int opt, void *data, size_t *len) +{ +#pragma unused(kctlref, opt) + errno_t error = 0; + struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo; + + PKT_MNGLR_LOG(LOG_NOTICE, ""); + + pkt_mnglr_rw_lock_shared(&pkt_mnglr_lck_rw); + + if (packet_manglers == NULL) { + PKT_MNGLR_LOG(LOG_ERR, "no packet filter"); + error = EINVAL; + goto done; + } + if (kcunit > MAX_PACKET_MANGLER) { + PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)", + kcunit, MAX_PACKET_MANGLER); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) { + PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", + kcunit); + error = EINVAL; + goto done; + } + switch (opt) { + case PKT_MNGLR_OPT_PROTO_ACT_MASK: + if (*len < sizeof(uint32_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK " + "len too small %lu", *len); + error = EINVAL; + goto done; + } + + if (data != NULL) { + *(uint32_t *)data = p_pkt_mnglr->proto_action_mask; + } + break; + case PKT_MNGLR_OPT_IP_ACT_MASK: + if (*len < sizeof(uint32_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK " + "len too small %lu", *len); + error = EINVAL; + goto done; + } + + if (data != NULL) { + *(uint32_t *)data = p_pkt_mnglr->ip_action_mask; + } + break; + case PKT_MNGLR_OPT_LOCAL_IP: + if (*len < sizeof(struct sockaddr_storage)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP " + "len too small %lu", *len); + error = EINVAL; + goto done; + } + + if (data != NULL) { + *(struct sockaddr_storage *)data = p_pkt_mnglr->lsaddr; + } + break; + case PKT_MNGLR_OPT_REMOTE_IP: + if (*len < sizeof(struct sockaddr_storage)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP " + "len too small %lu", *len); + error = EINVAL; + goto done; + } + + if (data != NULL) { + *(struct sockaddr_storage *)data = p_pkt_mnglr->rsaddr; + } + break; + case PKT_MNGLR_OPT_LOCAL_PORT: + if (*len < sizeof(uint16_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT " + "len too small %lu", *len); + error = EINVAL; + goto done; + } + + if (data != NULL) { + *(uint16_t *)data = p_pkt_mnglr->lport; + } + break; + case PKT_MNGLR_OPT_REMOTE_PORT: + if (*len < sizeof(uint16_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT " + "len too small %lu", *len); + error = EINVAL; + goto done; + } + + if (data != NULL) { + *(uint16_t *)data = p_pkt_mnglr->rport; + } + break; + case PKT_MNGLR_OPT_DIRECTION: + if (*len < sizeof(uint32_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION " + "len too small %lu", *len); + error = EINVAL; + goto done; + } + if (data != NULL) { + *(uint32_t *)data = p_pkt_mnglr->dir; + } + break; + case PKT_MNGLR_OPT_PROTOCOL: + if (*len < sizeof(uint32_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL " + "len too small %lu", *len); + error = EINVAL; + goto done; + } + if (data != NULL) { + *(uint32_t *)data = p_pkt_mnglr->proto; + } + break; + case PKT_MNGLR_OPT_ACTIVATE: + if (*len < sizeof(uint8_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE " + "len too small %lu", *len); + error = EINVAL; + goto done; + } + + if (data != NULL) { + *(uint8_t *)data = p_pkt_mnglr->activate; + } + break; + default: + error = ENOPROTOOPT; + break; + } +done: + pkt_mnglr_rw_unlock_shared(&pkt_mnglr_lck_rw); + + return (error); +} + +static errno_t +pkt_mnglr_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, + int opt, void *data, size_t len) +{ +#pragma unused(kctlref, opt) + errno_t error = 0; + struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo; + + PKT_MNGLR_LOG(LOG_NOTICE, ""); + + pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw); + + if (packet_manglers == NULL) { + PKT_MNGLR_LOG(LOG_ERR, "no packet filter"); + error = EINVAL; + goto done; + } + if (kcunit > MAX_PACKET_MANGLER) { + PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)", + kcunit, MAX_PACKET_MANGLER); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) { + PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", + kcunit); + error = EINVAL; + goto done; + } + switch (opt) { + case PKT_MNGLR_OPT_PROTO_ACT_MASK: + if (len < sizeof(uint32_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr->proto_action_mask != 0) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK " + "already set %u", + p_pkt_mnglr->proto_action_mask); + error = EINVAL; + goto done; + } + p_pkt_mnglr->proto_action_mask = *(uint32_t *)data; + PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr->proto_action_mask set to :%d", p_pkt_mnglr->proto_action_mask); + break; + case PKT_MNGLR_OPT_IP_ACT_MASK: + if (len < sizeof(uint32_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr->ip_action_mask != 0) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK " + "already set %u", + p_pkt_mnglr->ip_action_mask); + error = EINVAL; + goto done; + } + p_pkt_mnglr->ip_action_mask = *(uint32_t *)data; + break; + case PKT_MNGLR_OPT_LOCAL_IP: + if (len < sizeof(struct sockaddr_storage)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr->lsaddr.ss_family) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP " + "already set"); + error = EINVAL; + goto done; + } + p_pkt_mnglr->lsaddr = *(struct sockaddr_storage *)data; + break; + case PKT_MNGLR_OPT_REMOTE_IP: + if (len < sizeof(struct sockaddr_storage)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr->rsaddr.ss_family) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP " + "already set"); + error = EINVAL; + goto done; + } + + p_pkt_mnglr->rsaddr = *(struct sockaddr_storage *)data; + PKT_MNGLR_LOG(LOG_INFO, + "Remote IP registered for address family: %d", + p_pkt_mnglr->rsaddr.ss_family); + break; + case PKT_MNGLR_OPT_LOCAL_PORT: + if (len < sizeof(uint16_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr->lport != 0) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT " + "already set %d", + p_pkt_mnglr->lport); + error = EINVAL; + goto done; + } + p_pkt_mnglr->lport = *(uint16_t *)data; + break; + case PKT_MNGLR_OPT_REMOTE_PORT: + if (len < sizeof(uint16_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr->rport != 0) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT " + "already set %d", + p_pkt_mnglr->rport); + error = EINVAL; + goto done; + } + p_pkt_mnglr->rport = *(uint16_t *)data; + break; + case PKT_MNGLR_OPT_DIRECTION: + if (len < sizeof(uint32_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr->dir != 0) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION " + "already set %u", + p_pkt_mnglr->dir); + error = EINVAL; + goto done; + } + p_pkt_mnglr->dir = *(uint32_t *)data; + break; + case PKT_MNGLR_OPT_PROTOCOL: + if (len < sizeof(uint32_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr->proto != 0) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL " + "already set %u", + p_pkt_mnglr->proto); + error = EINVAL; + goto done; + } + p_pkt_mnglr->proto = *(uint32_t *)data; + break; + case PKT_MNGLR_OPT_ACTIVATE: + if (len < sizeof(uint8_t)) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE " + "len too small %lu", len); + error = EINVAL; + goto done; + } + if (p_pkt_mnglr->activate != 0) { + PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE " + "already set %u", + p_pkt_mnglr->activate); + error = EINVAL; + goto done; + } + p_pkt_mnglr->activate = *(uint8_t *)data; + PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr->activate set to :%d", + p_pkt_mnglr->activate); + break; + default: + error = ENOPROTOOPT; + break; + } +done: + pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw); + + return (error); +} + +void +pkt_mnglr_init(void) +{ + struct kern_ctl_reg kern_ctl; + errno_t error = 0; + vm_size_t pkt_mnglr_size = 0; + + PKT_MNGLR_LOG(LOG_NOTICE, ""); + + /* + * Compile time verifications + */ + _CASSERT(PKT_MNGLR_MAX_FILTER_COUNT == MAX_PACKET_MANGLER); + + /* + * Zone for packet mangler kernel control sockets + */ + pkt_mnglr_size = sizeof(struct packet_mangler); + packet_mangler_zone = zinit(pkt_mnglr_size, + PACKET_MANGLER_ZONE_MAX * pkt_mnglr_size, + 0, + PACKET_MANGLER_ZONE_NAME); + + if (packet_mangler_zone == NULL) { + panic("%s: zinit(%s) failed", __func__, + PACKET_MANGLER_ZONE_NAME); + /* NOTREACHED */ + } + zone_change(packet_mangler_zone, Z_CALLERACCT, FALSE); + zone_change(packet_mangler_zone, Z_EXPAND, TRUE); + + /* + * Allocate locks + */ + pkt_mnglr_lck_grp_attr = lck_grp_attr_alloc_init(); + if (pkt_mnglr_lck_grp_attr == NULL) { + panic("%s: lck_grp_attr_alloc_init failed", __func__); + /* NOTREACHED */ + } + pkt_mnglr_lck_grp = lck_grp_alloc_init("packet manglerr", + pkt_mnglr_lck_grp_attr); + if (pkt_mnglr_lck_grp == NULL) { + panic("%s: lck_grp_alloc_init failed", __func__); + /* NOTREACHED */ + } + pkt_mnglr_lck_attr = lck_attr_alloc_init(); + if (pkt_mnglr_lck_attr == NULL) { + panic("%s: lck_attr_alloc_init failed", __func__); + /* NOTREACHED */ + } + lck_rw_init(&pkt_mnglr_lck_rw, pkt_mnglr_lck_grp, pkt_mnglr_lck_attr); + + /* + * Register kernel control + */ + bzero(&kern_ctl, sizeof(kern_ctl)); + strlcpy(kern_ctl.ctl_name, PACKET_MANGLER_CONTROL_NAME, + sizeof(kern_ctl.ctl_name)); + kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED; + kern_ctl.ctl_connect = pkt_mnglr_ctl_connect; + kern_ctl.ctl_disconnect = pkt_mnglr_ctl_disconnect; + kern_ctl.ctl_getopt = pkt_mnglr_ctl_getopt; + kern_ctl.ctl_setopt = pkt_mnglr_ctl_setopt; + error = ctl_register(&kern_ctl, &pkt_mnglr_kctlref); + if (error != 0) { + PKT_MNGLR_LOG(LOG_ERR, "ctl_register failed: %d", error); + } else { + PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler kernel control."); + } +} + +static errno_t pktmnglr_ipfilter_output(void *cookie, mbuf_t *data, ipf_pktopts_t options) +{ + struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie; + unsigned char *ptr = (unsigned char *)mbuf_data(*data); + struct ip *ip = (struct ip *)(void *)ptr; + struct tcphdr *tcp; + int optlen = 0; + +#pragma unused(tcp, optlen, options) + + if (p_pkt_mnglr == NULL) { + return 0; + } + + if (!p_pkt_mnglr->activate) { + return 0; + } + + if (data == NULL) { + PKT_MNGLR_LOG(LOG_INFO, "%s:%d Data pointer is NULL\n", __FILE__, __LINE__); + return 0; + } + + if (p_pkt_mnglr->dir == IN) { + return 0; + } + + if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip->ip_v == 4)) { + return 0; + } + + if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip->ip_v == 6)) { + return 0; + } + + if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) { + struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr)); + if (ip->ip_src.s_addr != laddr.sin_addr.s_addr) { + return 0; + } + } + + if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) { + struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr)); + if (ip->ip_dst.s_addr != raddr.sin_addr.s_addr) { + return 0; + } + } + + if (ip->ip_v != 4) { + PKT_MNGLR_LOG(LOG_INFO, "%s:%d Not handling IP version %d\n", __FILE__, __LINE__, ip->ip_v); + return 0; + } + + /* Not handling output flow */ + return 0; +} + +#define TCP_MAX_OPTLEN 40 + +static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u_int8_t protocol) +{ + struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie; + struct ip ip; + struct tcphdr tcp; + char tcp_opt_buf[TCP_MAX_OPTLEN] = {0}; + int orig_tcp_optlen; + int tcp_optlen = 0; + errno_t error = 0; + + if (p_pkt_mnglr == NULL) { + PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr is NULL"); + goto input_done; + } + + if (p_pkt_mnglr->activate == 0) { + PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr not yet activated"); + goto input_done; + } + + if (data == NULL) { + PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL"); + goto input_done; + } + + if (p_pkt_mnglr->dir == OUT) { + goto input_done; + } + + /* Check for IP filter options */ + error = mbuf_copydata(*data, 0, sizeof(ip), &ip); + if (error) { + PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy"); + goto input_done; + } + + if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) { + PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family of packet is IPv4 but local " + "address is set to IPv6"); + goto input_done; + } + + if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) { + PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family " + "of packet is IPv6 but local address is set to IPv4"); + goto input_done; + } + + if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) { + struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr)); + if (ip.ip_dst.s_addr != laddr.sin_addr.s_addr) { + goto input_done; + } + } + + if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) { + struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr)); + if (ip.ip_src.s_addr != raddr.sin_addr.s_addr) { + goto input_done; + } + PKT_MNGLR_LOG(LOG_INFO, "Remote IP: %x Source IP: %x in input path", + raddr.sin_addr.s_addr, + ip.ip_src.s_addr); + } + + if (ip.ip_v != 4) { + goto input_done; + } + + if (protocol != p_pkt_mnglr->proto) { + PKT_MNGLR_LOG(LOG_INFO, "Skip: Protocol mismatch"); + goto input_done; + } + + switch (protocol) { + case IPPROTO_TCP: + error = mbuf_copydata(*data, offset, sizeof(tcp), &tcp); + if (error) { + PKT_MNGLR_LOG(LOG_ERR, "Could not make local TCP header copy"); + goto input_done; + } + + if (p_pkt_mnglr->lport && (p_pkt_mnglr->lport != tcp.th_dport)) { + PKT_MNGLR_LOG(LOG_INFO, "Local port and IP des port do not match"); + goto input_done; + } + + if (p_pkt_mnglr->rport && (p_pkt_mnglr->rport != tcp.th_sport)) { + PKT_MNGLR_LOG(LOG_INFO, "Remote port and IP src port do not match"); + goto input_done; + } + break; + case IPPROTO_UDP: + goto input_done; + break; + case IPPROTO_ICMP: + goto input_done; + break; + case IPPROTO_ICMPV6: + goto input_done; + break; + default: + goto input_done; + break; + } + + /* XXX Do IP actions here */ + PKT_MNGLR_LOG(LOG_INFO, "Proceeding with packet mangler actions on the packet"); + + /* Protocol actions */ + switch (protocol) { + case IPPROTO_TCP: + if (p_pkt_mnglr->proto_action_mask & PKT_MNGLR_TCP_ACT_NOP_MPTCP) { + int i = 0; + tcp_optlen = (tcp.th_off << 2)-sizeof(struct tcphdr); + PKT_MNGLR_LOG(LOG_INFO, "Packet from F5 is TCP\n"); + PKT_MNGLR_LOG(LOG_INFO, "Optlen: %d\n", tcp_optlen); + orig_tcp_optlen = tcp_optlen; + if (orig_tcp_optlen) { + error = mbuf_copydata(*data, offset+sizeof(struct tcphdr), orig_tcp_optlen, tcp_opt_buf); + if (error) { + PKT_MNGLR_LOG(LOG_ERR, "Failed to copy tcp options"); + goto input_done; + } + } + + while (tcp_optlen) { + if (tcp_opt_buf[i] == 0x1) { + PKT_MNGLR_LOG(LOG_INFO, "Skipping NOP\n"); + tcp_optlen--; + i++; + continue; + } else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != 0x1e)) { + PKT_MNGLR_LOG(LOG_INFO, "Skipping option %x\n", tcp_opt_buf[i]); + tcp_optlen -= tcp_opt_buf[i+1]; + i += tcp_opt_buf[i+1]; + continue; + } else if (tcp_opt_buf[i] == 0x1e) { + int j = 0; + int mptcpoptlen = tcp_opt_buf[i+1]; + PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]); + PKT_MNGLR_LOG(LOG_INFO, "Overwriting with NOP\n"); + for (; j < mptcpoptlen; j++) { + tcp_opt_buf[i+j] = 0x1; + } + tcp_optlen -= mptcpoptlen; + i += mptcpoptlen; + } else { + tcp_optlen--; + i++; + } + } + error = mbuf_copyback(*data, + offset+sizeof(struct tcphdr), + orig_tcp_optlen, tcp_opt_buf, MBUF_WAITOK); + + if (error) { + PKT_MNGLR_LOG(LOG_ERR, + "Failed to copy tcp options"); + goto input_done; + } + } + break; + case IPPROTO_UDP: + /* Don't handle UDP */ + break; + case IPPROTO_ICMP: + break; + case IPPROTO_ICMPV6: + break; + default: + break; + } + chksm_update(*data); +input_done: + return 0; +} + +static void pktmnglr_ipfilter_detach(void *cookie) +{ +#pragma unused(cookie) + return; +} + +/* XXX Still need to modify this to use mbuf_copy* macros */ +static void chksm_update(mbuf_t data) +{ + u_int16_t ip_sum; + u_int16_t tsum; + struct tcphdr *tcp; + + unsigned char *ptr = (unsigned char *)mbuf_data(data); + struct ip *ip = (struct ip *)(void *)ptr; + if (ip->ip_v != 4) { + return; + } + + ip->ip_sum = 0; + mbuf_inet_cksum(data, 0, 0, ip->ip_hl << 2, &ip_sum); // ip sum + + ip->ip_sum = ip_sum; + switch (ip->ip_p) { + case IPPROTO_TCP: + tcp = (struct tcphdr *)(void *)(ptr + (ip->ip_hl << 2)); + tcp->th_sum = 0; + mbuf_inet_cksum(data, IPPROTO_TCP, ip->ip_hl << 2, + ntohs(ip->ip_len) - (ip->ip_hl << 2), &tsum); + tcp->th_sum = tsum; + break; + case IPPROTO_UDP: + /* Don't handle UDP */ + break; + case IPPROTO_ICMP: + break; + case IPPROTO_ICMPV6: + break; + default: + break; + } + + mbuf_clear_csum_performed(data); + return; +} diff --git a/bsd/net/packet_mangler.h b/bsd/net/packet_mangler.h new file mode 100644 index 000000000..7042fe0c4 --- /dev/null +++ b/bsd/net/packet_mangler.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef __PACKET_MANGLER_H__ +#define __PACKET_MANGLER_H__ + +#include +#include +#include +#include +#include +#include + +#ifdef BSD_KERNEL_PRIVATE +#include +#include +#endif /* BSD_KERNEL_PRIVATE */ + +__BEGIN_DECLS + +#ifdef PRIVATE + +typedef enum { + INOUT, + IN, + OUT +} Pkt_Mnglr_Flow; + +/* + * Kernel control name for an instance of a packet mangler. + * Use CTLIOCGINFO to find out the corresponding kernel control id + * to be set in the sc_id field of sockaddr_ctl for connect(2) + * Note: the sc_unit is ephemeral + */ +#define PACKET_MANGLER_CONTROL_NAME "com.apple.packet-mangler" + +#define PKT_MNGLR_OPT_PROTO_ACT_MASK 1 +#define PKT_MNGLR_OPT_IP_ACT_MASK 2 +#define PKT_MNGLR_OPT_LOCAL_IP 3 +#define PKT_MNGLR_OPT_REMOTE_IP 4 +#define PKT_MNGLR_OPT_LOCAL_PORT 5 +#define PKT_MNGLR_OPT_REMOTE_PORT 6 +#define PKT_MNGLR_OPT_DIRECTION 7 +#define PKT_MNGLR_OPT_PROTOCOL 8 +#define PKT_MNGLR_OPT_ACTIVATE 0xFFFFFFFF + +/* Packet mangler action masks */ +/* Packet Mangler TCP action mask */ +#define PKT_MNGLR_TCP_ACT_NOP_MPTCP 0x00000001 +#define PKT_MNGLR_TCP_ACT_SWAP_L_PORT 0x00000002 +#define PKT_MNGLR_TCP_ACT_SWAP_R_PORT 0x00000004 +#define PKT_MNGLR_TCP_ACT_CHK_EXTENDED 0x80000000 + +/* Packet Mangler IP action mask */ +#define PKT_MNGLR_IP_ACT_FLT_L_IP 0x00000001 +#define PKT_MNGLR_IP_ACT_FLT_R_IP 0x00000002 +#define PKT_MNGLR_IP_ACT_SWAP_L_IP 0x00000004 +#define PKT_MNGLR_IP_ACT_SWAP_R_IP 0x00000008 +#define PKT_MNGLR_IP_ACT_DROP_PACKET 0x00000010 +#define PKT_MNGLR_IP_ACT_CHK_EXTENDED 0x80000000 + +/* + * How many filter may be active simultaneously + */ +#define PKT_MNGLR_MAX_FILTER_COUNT 1 + +#define PKT_MNGLR_VERSION_CURRENT 1 + +#endif /* PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE + +extern int pkt_mnglr_log_level; + +#define PKT_MNGLR_LOG(level, fmt, ...) \ +do { \ + if (pkt_mnglr_log_level >= level) \ + printf("%s:%d " fmt "\n",\ + __FUNCTION__, __LINE__, ##__VA_ARGS__); \ +} while (0) + + +extern void pkt_mnglr_init(void); + +__END_DECLS + +#endif /* BSD_KERNEL_PRIVATE */ + +#endif /* __PACKET_MANGLER_H__ */ diff --git a/bsd/net/pf.c b/bsd/net/pf.c index 55e1c27ef..0a74fe5d2 100644 --- a/bsd/net/pf.c +++ b/bsd/net/pf.c @@ -1567,8 +1567,7 @@ pf_state_expires(const struct pf_state *state) /* handle all PFTM_* > PFTM_MAX here */ if (state->timeout == PFTM_PURGE) return (pf_time_second()); - if (state->timeout == PFTM_UNTIL_PACKET) - return (0); + VERIFY(state->timeout != PFTM_UNLINKED); VERIFY(state->timeout < PFTM_MAX); t = state->rule.ptr->timeout[state->timeout]; diff --git a/bsd/net/pf_ioctl.c b/bsd/net/pf_ioctl.c index 1346210be..bfad2e2ae 100644 --- a/bsd/net/pf_ioctl.c +++ b/bsd/net/pf_ioctl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Apple Inc. All rights reserved. + * Copyright (c) 2007-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -3552,7 +3552,7 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p break; if (rule->anchor != NULL) - strncpy(rule->anchor->owner, rule->owner, + strlcpy(rule->anchor->owner, rule->owner, PF_OWNER_NAME_SIZE); if (r) { @@ -3614,17 +3614,42 @@ pfioctl_ioc_state_kill(u_long cmd, struct pfioc_state_kill *psk, struct proc *p) #pragma unused(p) int error = 0; + psk->psk_ifname[sizeof (psk->psk_ifname) - 1] = '\0'; + psk->psk_ownername[sizeof(psk->psk_ownername) - 1] = '\0'; + + bool ifname_matched = true; + bool owner_matched = true; + switch (cmd) { case DIOCCLRSTATES: { struct pf_state *s, *nexts; int killed = 0; - psk->psk_ifname[sizeof (psk->psk_ifname) - 1] = '\0'; for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); + /* + * Purge all states only when neither ifname + * or owner is provided. If any of these are provided + * we purge only the states with meta data that match + */ + bool unlink_state = false; + ifname_matched = true; + owner_matched = true; + + if (psk->psk_ifname[0] && + strcmp(psk->psk_ifname, s->kif->pfik_name)) { + ifname_matched = false; + } + + if (psk->psk_ownername[0] && + ((NULL == s->rule.ptr) || + strcmp(psk->psk_ownername, s->rule.ptr->owner))) { + owner_matched = false; + } - if (!psk->psk_ifname[0] || strcmp(psk->psk_ifname, - s->kif->pfik_name) == 0) { + unlink_state = ifname_matched && owner_matched; + + if (unlink_state) { #if NPFSYNC /* don't send out individual delete messages */ s->sync_flags = PFSTATE_NOSYNC; @@ -3650,6 +3675,19 @@ pfioctl_ioc_state_kill(u_long cmd, struct pfioc_state_kill *psk, struct proc *p) s = nexts) { nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); sk = s->state_key; + ifname_matched = true; + owner_matched = true; + + if (psk->psk_ifname[0] && + strcmp(psk->psk_ifname, s->kif->pfik_name)) { + ifname_matched = false; + } + + if (psk->psk_ownername[0] && + ((NULL == s->rule.ptr) || + strcmp(psk->psk_ownername, s->rule.ptr->owner))) { + owner_matched = false; + } if (sk->direction == PF_OUT) { src = &sk->lan; @@ -3674,8 +3712,8 @@ pfioctl_ioc_state_kill(u_long cmd, struct pfioc_state_kill *psk, struct proc *p) (pf_match_xport(psk->psk_proto, psk->psk_proto_variant, &psk->psk_dst.xport, &dst->xport)) && - (!psk->psk_ifname[0] || strcmp(psk->psk_ifname, - s->kif->pfik_name) == 0)) { + ifname_matched && + owner_matched) { #if NPFSYNC /* send immediate delete of state */ pfsync_delete_state(s); @@ -3710,8 +3748,7 @@ pfioctl_ioc_state(u_long cmd, struct pfioc_state *ps, struct proc *p) struct pf_state_key *sk; struct pfi_kif *kif; - if (sp->timeout >= PFTM_MAX && - sp->timeout != PFTM_UNTIL_PACKET) { + if (sp->timeout >= PFTM_MAX) { error = EINVAL; break; } @@ -4795,6 +4832,7 @@ pf_af_hook(struct ifnet *ifp, struct mbuf **mppn, struct mbuf **mp, int error = 0; struct mbuf *nextpkt; net_thread_marks_t marks; + struct ifnet * pf_ifp = ifp; marks = net_thread_marks_push(NET_THREAD_HELD_PF); @@ -4810,16 +4848,32 @@ pf_af_hook(struct ifnet *ifp, struct mbuf **mppn, struct mbuf **mp, if ((nextpkt = (*mp)->m_nextpkt) != NULL) (*mp)->m_nextpkt = NULL; + /* + * For packets destined to locally hosted IP address + * ip_output_list sets Mbuf's pkt header's rcvif to + * the interface hosting the IP address. + * While on the output path ifp passed to pf_af_hook + * to such local communication is the loopback interface, + * the input path derives ifp from mbuf packet header's + * rcvif. + * This asymmetry caues issues with PF. + * To handle that case, we have a limited change here to + * pass interface as loopback if packets are looped in. + */ + if (input && ((*mp)->m_pkthdr.pkt_flags & PKTF_LOOP)) { + pf_ifp = lo_ifp; + } + switch (af) { #if INET case AF_INET: { - error = pf_inet_hook(ifp, mp, input, fwa); + error = pf_inet_hook(pf_ifp, mp, input, fwa); break; } #endif /* INET */ #if INET6 case AF_INET6: - error = pf_inet6_hook(ifp, mp, input, fwa); + error = pf_inet6_hook(pf_ifp, mp, input, fwa); break; #endif /* INET6 */ default: diff --git a/bsd/net/pfkeyv2.h b/bsd/net/pfkeyv2.h index 1de6752ef..880eb1ecb 100644 --- a/bsd/net/pfkeyv2.h +++ b/bsd/net/pfkeyv2.h @@ -139,7 +139,10 @@ struct sadb_sa { struct sadb_sa_2 { struct sadb_sa sa; u_int16_t sadb_sa_natt_port; - u_int16_t sadb_reserved0; + union { + u_int16_t sadb_reserved0; + u_int16_t sadb_sa_natt_interval; + }; u_int32_t sadb_reserved1; }; #endif /* PRIVATE */ @@ -257,7 +260,12 @@ struct sadb_x_sa2 { u_int8_t sadb_x_sa2_alwaysexpire; #endif }; - u_int16_t sadb_x_sa2_reserved2; + union { + u_int16_t sadb_x_sa2_reserved2; +#ifdef PRIVATE + u_int16_t sadb_x_sa2_flags; +#endif + }; u_int32_t sadb_x_sa2_sequence; u_int32_t sadb_x_sa2_reqid; }; @@ -457,13 +465,18 @@ struct sadb_sastat { #define SADB_X_EXT_NATT_DETECTED_PEER 0x1000 #define SADB_X_EXT_ESP_KEEPALIVE 0x2000 #define SADB_X_EXT_PUNT_RX_KEEPALIVE 0x4000 +#define SADB_X_EXT_NATT_KEEPALIVE_OFFLOAD 0x8000 #endif /* PRIVATE */ #if 1 #define SADB_X_EXT_RAWCPI 0x0080 /* use well known CPI (IPComp) */ #endif -#define SADB_KEY_FLAGS_MAX 0x0fff +#define SADB_KEY_FLAGS_MAX 0x7fff + +#ifdef PRIVATE +#define SADB_X_EXT_SA2_DELETE_ON_DETACH 0x0001 +#endif /* SPI size for PF_KEYv2 */ #define PFKEY_SPI_SIZE sizeof(u_int32_t) diff --git a/bsd/net/pfvar.h b/bsd/net/pfvar.h index 171e1c750..29a7716ab 100644 --- a/bsd/net/pfvar.h +++ b/bsd/net/pfvar.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Apple Inc. All rights reserved. + * Copyright (c) 2007-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,7 +86,7 @@ extern "C" { #ifdef KERNEL #include #include -#include +#include #include #include @@ -180,8 +180,7 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, PFTM_ESP_ESTABLISHED, PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE, PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL, PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE, - PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED, - PFTM_UNTIL_PACKET }; + PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED }; /* PFTM default values */ #define PFTM_TCP_FIRST_PACKET_VAL 120 /* First TCP packet */ @@ -1794,6 +1793,7 @@ struct pfioc_state_kill { struct pfioc_state_addr_kill psk_src; struct pfioc_state_addr_kill psk_dst; char psk_ifname[IFNAMSIZ]; + char psk_ownername[PF_OWNER_NAME_SIZE]; }; struct pfioc_states { diff --git a/bsd/net/pktap.c b/bsd/net/pktap.c index 5c1be2939..81d0c35fb 100644 --- a/bsd/net/pktap.c +++ b/bsd/net/pktap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include @@ -47,7 +47,7 @@ #include #include #include -#define _IP_VHL +#define _IP_VHL #include #include #include @@ -76,42 +76,42 @@ struct pktap_softc { }; #ifndef PKTAP_DEBUG -#define PKTAP_DEBUG 1 +#define PKTAP_DEBUG 1 #endif /* PKTAP_DEBUG */ -#define PKTAP_FILTER_OK 0 /* Packet passes filter checks */ -#define PKTAP_FILTER_SKIP 1 /* Do not tap this packet */ +#define PKTAP_FILTER_OK 0 /* Packet passes filter checks */ +#define PKTAP_FILTER_SKIP 1 /* Do not tap this packet */ static int pktap_inited = 0; SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, IFT_PKTAP, pktap, CTLFLAG_RW|CTLFLAG_LOCKED, 0, - "pktap virtual interface"); +SYSCTL_NODE(_net_link, IFT_PKTAP, pktap, + CTLFLAG_RW |CTLFLAG_LOCKED, 0, "pktap virtual interface"); -static int pktap_total_tap_count = 0; -SYSCTL_INT(_net_link_pktap, OID_AUTO, total_tap_count, CTLFLAG_RD | CTLFLAG_LOCKED, - &pktap_total_tap_count, 0, ""); +static int pktap_total_tap_count = 0; +SYSCTL_INT(_net_link_pktap, OID_AUTO, total_tap_count, + CTLFLAG_RD | CTLFLAG_LOCKED, &pktap_total_tap_count, 0, ""); static u_int64_t pktap_count_unknown_if_type = 0; -SYSCTL_QUAD(_net_link_pktap, OID_AUTO, count_unknown_if_type, CTLFLAG_RD | CTLFLAG_LOCKED, - &pktap_count_unknown_if_type, ""); +SYSCTL_QUAD(_net_link_pktap, OID_AUTO, count_unknown_if_type, + CTLFLAG_RD | CTLFLAG_LOCKED, &pktap_count_unknown_if_type, ""); static int pktap_log = 0; -SYSCTL_INT(_net_link_pktap, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED, - &pktap_log, 0, ""); +SYSCTL_INT(_net_link_pktap, OID_AUTO, log, + CTLFLAG_RW | CTLFLAG_LOCKED, &pktap_log, 0, ""); -#define PKTAP_LOG(mask, fmt, ...) \ +#define PKTAP_LOG(mask, fmt, ...) \ do { \ - if ((pktap_log & mask)) \ - printf("%s:%d " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ -} while(false) + if ((pktap_log & mask)) \ + printf("%s:%d " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ +} while (false) -#define PKTP_LOG_FUNC 0x01 -#define PKTP_LOG_FILTER 0x02 -#define PKTP_LOG_INPUT 0x04 -#define PKTP_LOG_OUTPUT 0x08 -#define PKTP_LOG_ERROR 0x10 -#define PKTP_LOG_NOPCB 0x20 +#define PKTP_LOG_FUNC 0x01 +#define PKTP_LOG_FILTER 0x02 +#define PKTP_LOG_INPUT 0x04 +#define PKTP_LOG_OUTPUT 0x08 +#define PKTP_LOG_ERROR 0x10 +#define PKTP_LOG_NOPCB 0x20 /* * pktap_lck_rw protects the global list of pktap interfaces @@ -121,59 +121,58 @@ static lck_rw_t *pktap_lck_rw = &pktap_lck_rw_data; static lck_grp_t *pktap_lck_grp = NULL; static lck_attr_t *pktap_lck_attr = NULL; -static LIST_HEAD(pktap_list, pktap_softc) pktap_list = LIST_HEAD_INITIALIZER(pktap_list); +static LIST_HEAD(pktap_list, pktap_softc) pktap_list = + LIST_HEAD_INITIALIZER(pktap_list); int pktap_clone_create(struct if_clone *, u_int32_t, void *); int pktap_clone_destroy(struct ifnet *); -static struct if_clone pktap_cloner = - IF_CLONE_INITIALIZER(PKTAP_IFNAME, - pktap_clone_create, +static struct if_clone pktap_cloner = + IF_CLONE_INITIALIZER(PKTAP_IFNAME, + pktap_clone_create, pktap_clone_destroy, - 0, + 0, IF_MAXUNIT); errno_t pktap_if_output(ifnet_t, mbuf_t); -errno_t pktap_demux(ifnet_t , mbuf_t, char *, protocol_family_t *); -errno_t pktap_add_proto(ifnet_t, protocol_family_t, const struct ifnet_demux_desc *, - u_int32_t); +errno_t pktap_demux(ifnet_t, mbuf_t, char *, protocol_family_t *); +errno_t pktap_add_proto(ifnet_t, protocol_family_t, + const struct ifnet_demux_desc *, u_int32_t); errno_t pktap_del_proto(ifnet_t, protocol_family_t); errno_t pktap_getdrvspec(ifnet_t, struct ifdrv64 *); errno_t pktap_setdrvspec(ifnet_t, struct ifdrv64 *); errno_t pktap_ioctl(ifnet_t, unsigned long, void *); void pktap_detach(ifnet_t); int pktap_filter_evaluate(struct pktap_softc *, struct ifnet *); -void pktap_bpf_tap(struct ifnet *, protocol_family_t , struct mbuf *, - u_int32_t , u_int32_t , int ); -errno_t pktap_tap_callback(ifnet_t , u_int32_t , bpf_tap_mode ); +void pktap_bpf_tap(struct ifnet *, protocol_family_t, struct mbuf *, + u_int32_t, u_int32_t, int); +errno_t pktap_tap_callback(ifnet_t, u_int32_t, bpf_tap_mode); static void pktap_hexdump(int mask, void *addr, size_t len) { unsigned char *buf = addr; size_t i; - + if (!(pktap_log & mask)) return; - + for (i = 0; i < len; i++) { unsigned char h = (buf[i] & 0xf0) >> 4; unsigned char l = buf[i] & 0x0f; - + if (i != 0) { - if (i % 32 == 0) + if (i % 32 == 0) printf("\n"); - else if (i % 4 == 0) + else if (i % 4 == 0) printf(" "); } - printf("%c%c", + printf("%c%c", h < 10 ? h + '0' : h - 10 + 'a', l < 10 ? l + '0' : l - 10 + 'a'); } if (i % 32 != 0) printf("\n"); - - return; } __private_extern__ void @@ -181,12 +180,12 @@ pktap_init(void) { int error = 0; lck_grp_attr_t *lck_grp_attr = NULL; - + /* Make sure we're called only once */ VERIFY(pktap_inited == 0); pktap_inited = 1; - + lck_grp_attr = lck_grp_attr_alloc_init(); pktap_lck_grp = lck_grp_alloc_init("pktap", lck_grp_attr); pktap_lck_attr = lck_attr_alloc_init(); @@ -197,10 +196,11 @@ pktap_init(void) lck_grp_attr_free(lck_grp_attr); LIST_INIT(&pktap_list); - + error = if_clone_attach(&pktap_cloner); if (error != 0) - panic("%s: if_clone_attach() failed, error %d\n", __func__, error); + panic("%s: if_clone_attach() failed, error %d\n", + __func__, error); } __private_extern__ int @@ -211,8 +211,9 @@ pktap_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params) struct ifnet_init_params if_init; PKTAP_LOG(PKTP_LOG_FUNC, "unit %u\n", unit); - - pktap = _MALLOC(sizeof(struct pktap_softc), M_DEVBUF, M_WAITOK | M_ZERO); + + pktap = _MALLOC(sizeof(struct pktap_softc), M_DEVBUF, + M_WAITOK | M_ZERO); if (pktap == NULL) { printf("%s: _MALLOC failed\n", __func__); error = ENOMEM; @@ -231,7 +232,7 @@ pktap_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params) pktap->pktp_filters[1].filter_param = PKTAP_FILTER_PARAM_IF_TYPE; pktap->pktp_filters[1].filter_param_if_type = IFT_IEEE1394; /* - * We do not use a set_bpf_tap() function as we rather rely on the more + * We do not use a set_bpf_tap() function as we rather rely on the more * accurate callback passed to bpf_attach() */ bzero(&if_init, sizeof(struct ifnet_init_params)); @@ -249,24 +250,25 @@ pktap_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params) error = ifnet_allocate(&if_init, &pktap->pktp_ifp); if (error != 0) { - printf("%s: ifnet_allocate failed, error %d\n", __func__, error); + printf("%s: ifnet_allocate failed, error %d\n", + __func__, error); goto done; } - + ifnet_set_flags(pktap->pktp_ifp, IFF_UP, IFF_UP); - + error = ifnet_attach(pktap->pktp_ifp, NULL); if (error != 0) { printf("%s: ifnet_attach failed - error %d\n", __func__, error); ifnet_release(pktap->pktp_ifp); goto done; } - + /* Attach DLT_PKTAP as the default DLT */ - bpf_attach(pktap->pktp_ifp, DLT_PKTAP, sizeof(struct pktap_header), NULL, - pktap_tap_callback); + bpf_attach(pktap->pktp_ifp, DLT_PKTAP, sizeof(struct pktap_header), + NULL, pktap_tap_callback); bpf_attach(pktap->pktp_ifp, DLT_RAW, 0, NULL, pktap_tap_callback); - + /* Take a reference and add to the global list */ ifnet_reference(pktap->pktp_ifp); lck_rw_lock_exclusive(pktap_lck_rw); @@ -288,13 +290,14 @@ pktap_clone_destroy(struct ifnet *ifp) PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); (void) ifnet_detach(ifp); - + return (error); } /* * This function is called whenever a DLT is set on the interface: - * - When interface is attached to a BPF device via BIOCSETIF for the default DLT + * - When interface is attached to a BPF device via BIOCSETIF for the + * default DLT * - Whenever a new DLT is selected via BIOCSDLT * - When the interface is detached from a BPF device (direction is zero) */ @@ -335,8 +338,8 @@ pktap_tap_callback(ifnet_t ifp, u_int32_t dlt, bpf_tap_mode direction) break; } done: - /* - * Attachements count must be positive and we're in trouble + /* + * Attachements count must be positive and we're in trouble * if we have more that 2**31 attachements */ VERIFY(pktap_total_tap_count >= 0); @@ -353,7 +356,7 @@ pktap_if_output(ifnet_t ifp, mbuf_t m) } __private_extern__ errno_t -pktap_demux(ifnet_t ifp, __unused mbuf_t m, __unused char *header, +pktap_demux(ifnet_t ifp, __unused mbuf_t m, __unused char *header, __unused protocol_family_t *ppf) { PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); @@ -383,7 +386,7 @@ pktap_getdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) int i; PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); - + pktap = ifp->if_softc; if (pktap == NULL) { error = ENOENT; @@ -394,11 +397,11 @@ pktap_getdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) switch (ifd->ifd_cmd) { case PKTP_CMD_FILTER_GET: { struct x_pktap_filter x_filters[PKTAP_MAX_FILTERS]; - + bzero(&x_filters, sizeof(x_filters)); if (ifd->ifd_len < PKTAP_MAX_FILTERS * sizeof(struct x_pktap_filter)) { - printf("%s: PKTP_CMD_FILTER_GET ifd_len %llu too small - error %d\n", + printf("%s: PKTP_CMD_FILTER_GET ifd_len %llu too small - error %d\n", __func__, ifd->ifd_len, error); error = EINVAL; break; @@ -406,10 +409,10 @@ pktap_getdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) for (i = 0; i < PKTAP_MAX_FILTERS; i++) { struct pktap_filter *pktap_filter = pktap->pktp_filters + i; struct x_pktap_filter *x_filter = x_filters + i; - + x_filter->filter_op = pktap_filter->filter_op; x_filter->filter_param = pktap_filter->filter_param; - + if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE) x_filter->filter_param_if_type = pktap_filter->filter_param_if_type; else if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) @@ -417,7 +420,7 @@ pktap_getdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) pktap_filter->filter_param_if_name, sizeof(x_filter->filter_param_if_name)); } - error = copyout(x_filters, ifd->ifd_data, + error = copyout(x_filters, ifd->ifd_data, PKTAP_MAX_FILTERS * sizeof(struct x_pktap_filter)); if (error) { printf("%s: PKTP_CMD_FILTER_GET copyout - error %d\n", __func__, error); @@ -427,9 +430,9 @@ pktap_getdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) } case PKTP_CMD_TAP_COUNT: { uint32_t tap_count = pktap->pktp_dlt_raw_count + pktap->pktp_dlt_pkttap_count; - + if (ifd->ifd_len < sizeof(tap_count)) { - printf("%s: PKTP_CMD_TAP_COUNT ifd_len %llu too small - error %d\n", + printf("%s: PKTP_CMD_TAP_COUNT ifd_len %llu too small - error %d\n", __func__, ifd->ifd_len, error); error = EINVAL; break; @@ -457,7 +460,7 @@ pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) struct pktap_softc *pktap; PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); - + pktap = ifp->if_softc; if (pktap == NULL) { error = ENOENT; @@ -470,9 +473,9 @@ pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) struct x_pktap_filter user_filters[PKTAP_MAX_FILTERS]; int i; int got_op_none = 0; - + if (ifd->ifd_len != PKTAP_MAX_FILTERS * sizeof(struct x_pktap_filter)) { - printf("%s: PKTP_CMD_FILTER_SET bad ifd_len %llu - error %d\n", + printf("%s: PKTP_CMD_FILTER_SET bad ifd_len %llu - error %d\n", __func__, ifd->ifd_len, error); error = EINVAL; break; @@ -487,7 +490,7 @@ pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) */ for (i = 0; i < PKTAP_MAX_FILTERS; i++) { struct x_pktap_filter *x_filter = user_filters + i; - + switch (x_filter->filter_op) { case PKTAP_FILTER_OP_NONE: /* Following entries must be PKTAP_FILTER_OP_NONE */ @@ -507,7 +510,7 @@ pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) } if (error != 0) break; - + switch (x_filter->filter_param) { case PKTAP_FILTER_OP_NONE: if (x_filter->filter_op != PKTAP_FILTER_OP_NONE) { @@ -515,7 +518,7 @@ pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) break; } break; - + /* * Do not allow to tap a pktap from a pktap */ @@ -523,23 +526,23 @@ pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) if (x_filter->filter_param_if_type == IFT_PKTAP || x_filter->filter_param_if_type > 0xff) { error = EINVAL; - break; + break; } break; case PKTAP_FILTER_PARAM_IF_NAME: if (x_filter->filter_param_if_name == 0 || - strncmp(x_filter->filter_param_if_name, PKTAP_IFNAME, + strncmp(x_filter->filter_param_if_name, PKTAP_IFNAME, strlen(PKTAP_IFNAME)) == 0) { error = EINVAL; - break; + break; } break; default: error = EINVAL; break; - } + } if (error != 0) break; } @@ -548,20 +551,20 @@ pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) for (i = 0; i < PKTAP_MAX_FILTERS; i++) { struct pktap_filter *pktap_filter = pktap->pktp_filters + i; struct x_pktap_filter *x_filter = user_filters + i; - + pktap_filter->filter_op = x_filter->filter_op; pktap_filter->filter_param = x_filter->filter_param; - + if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE) pktap_filter->filter_param_if_type = x_filter->filter_param_if_type; else if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) { size_t len; - + strlcpy(pktap_filter->filter_param_if_name, x_filter->filter_param_if_name, sizeof(pktap_filter->filter_param_if_name)); /* - * If name does not end with a number then it's a "wildcard" match + * If name does not end with a number then it's a "wildcard" match * where we compare the prefix of the interface name */ len = strlen(pktap_filter->filter_param_if_name); @@ -591,30 +594,30 @@ pktap_ioctl(ifnet_t ifp, unsigned long cmd, void *data) if ((cmd & IOC_IN)) { error = kauth_authorize_generic(kauth_cred_get(), KAUTH_GENERIC_ISSUSER); if (error) { - PKTAP_LOG(PKTP_LOG_ERROR, - "%s: kauth_authorize_generic(KAUTH_GENERIC_ISSUSER) - error %d\n", + PKTAP_LOG(PKTP_LOG_ERROR, + "%s: kauth_authorize_generic(KAUTH_GENERIC_ISSUSER) - error %d\n", __func__, error); goto done; } } - + switch (cmd) { case SIOCGDRVSPEC32: { struct ifdrv64 ifd; struct ifdrv32 *ifd32 = (struct ifdrv32 *)data; - + memcpy(ifd.ifd_name, ifd32->ifd_name, sizeof(ifd.ifd_name)); ifd.ifd_cmd = ifd32->ifd_cmd; ifd.ifd_len = ifd32->ifd_len; ifd.ifd_data = ifd32->ifd_data; - + error = pktap_getdrvspec(ifp, &ifd); - + break; } case SIOCGDRVSPEC64: { struct ifdrv64 *ifd64 = (struct ifdrv64 *)data; - + error = pktap_getdrvspec(ifp, ifd64); break; @@ -652,7 +655,7 @@ pktap_detach(ifnet_t ifp) struct pktap_softc *pktap; PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); - + lck_rw_lock_exclusive(pktap_lck_rw); pktap = ifp->if_softc; @@ -663,7 +666,7 @@ pktap_detach(ifnet_t ifp) /* Drop reference as it's no more on the global list */ ifnet_release(ifp); - + _FREE(pktap, M_DEVBUF); /* This is for the reference taken by ifnet_attach() */ @@ -676,12 +679,12 @@ pktap_filter_evaluate(struct pktap_softc *pktap, struct ifnet *ifp) int i; int result = PKTAP_FILTER_SKIP; /* Need positive matching rule to pass */ int match = 0; - + for (i = 0; i < PKTAP_MAX_FILTERS; i++) { struct pktap_filter *pktap_filter = pktap->pktp_filters + i; size_t len = pktap_filter->filter_ifname_prefix_len != 0 ? pktap_filter->filter_ifname_prefix_len : PKTAP_IFXNAMESIZE; - + switch (pktap_filter->filter_op) { case PKTAP_FILTER_OP_NONE: match = 1; @@ -693,7 +696,7 @@ pktap_filter_evaluate(struct pktap_softc *pktap, struct ifnet *ifp) ifp->if_type == pktap_filter->filter_param_if_type) { result = PKTAP_FILTER_OK; match = 1; - PKTAP_LOG(PKTP_LOG_FILTER, "pass %s match type %u\n", + PKTAP_LOG(PKTP_LOG_FILTER, "pass %s match type %u\n", ifp->if_xname, pktap_filter->filter_param_if_type); break; } @@ -703,7 +706,7 @@ pktap_filter_evaluate(struct pktap_softc *pktap, struct ifnet *ifp) len) == 0) { result = PKTAP_FILTER_OK; match = 1; - PKTAP_LOG(PKTP_LOG_FILTER, "pass %s match name %s\n", + PKTAP_LOG(PKTP_LOG_FILTER, "pass %s match name %s\n", ifp->if_xname, pktap_filter->filter_param_if_name); break; } @@ -716,17 +719,17 @@ pktap_filter_evaluate(struct pktap_softc *pktap, struct ifnet *ifp) ifp->if_type == pktap_filter->filter_param_if_type) { result = PKTAP_FILTER_SKIP; match = 1; - PKTAP_LOG(PKTP_LOG_FILTER, "skip %s match type %u\n", + PKTAP_LOG(PKTP_LOG_FILTER, "skip %s match type %u\n", ifp->if_xname, pktap_filter->filter_param_if_type); break; } } if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) { - if (strncmp(ifp->if_xname, pktap_filter->filter_param_if_name, + if (strncmp(ifp->if_xname, pktap_filter->filter_param_if_name, len) == 0) { result = PKTAP_FILTER_SKIP; match = 1; - PKTAP_LOG(PKTP_LOG_FILTER, "skip %s match name %s\n", + PKTAP_LOG(PKTP_LOG_FILTER, "skip %s match name %s\n", ifp->if_xname, pktap_filter->filter_param_if_name); break; } @@ -738,36 +741,94 @@ pktap_filter_evaluate(struct pktap_softc *pktap, struct ifnet *ifp) } if (match == 0) { - PKTAP_LOG(PKTP_LOG_FILTER, "%s no match\n", + PKTAP_LOG(PKTP_LOG_FILTER, "%s no match\n", ifp->if_xname); } return (result); } +static void +pktap_set_procinfo(struct pktap_header *hdr, struct so_procinfo *soprocinfo) +{ + hdr->pth_pid = soprocinfo->spi_pid; + proc_name(soprocinfo->spi_pid, hdr->pth_comm, MAXCOMLEN); + if (soprocinfo->spi_pid != 0) + uuid_copy(hdr->pth_uuid, soprocinfo->spi_uuid); + + /* + * When not delegated, the effective pid is the same as the real pid + */ + if (soprocinfo->spi_epid != soprocinfo->spi_pid) { + hdr->pth_flags |= PTH_FLAG_PROC_DELEGATED; + hdr->pth_epid = soprocinfo->spi_epid; + proc_name(soprocinfo->spi_epid, hdr->pth_ecomm, MAXCOMLEN); + if (soprocinfo->spi_epid != 0) + uuid_copy(hdr->pth_uuid, soprocinfo->spi_euuid); + } +} + __private_extern__ void -pktap_fill_proc_info(struct pktap_header *hdr, protocol_family_t proto, +pktap_finalize_proc_info(struct pktap_header *hdr) +{ + int found; + struct so_procinfo soprocinfo; + + if (!(hdr->pth_flags & PTH_FLAG_DELAY_PKTAP)) + return; + + /* + * Clear the flag as it's internal + */ + hdr->pth_flags &= ~PTH_FLAG_DELAY_PKTAP; + + if (hdr->pth_ipproto == IPPROTO_TCP) + found = inp_findinpcb_procinfo(&tcbinfo, hdr->pth_flowid, + &soprocinfo); + else if (hdr->pth_ipproto == IPPROTO_UDP) + found = inp_findinpcb_procinfo(&udbinfo, hdr->pth_flowid, + &soprocinfo); + else + found = inp_findinpcb_procinfo(&ripcbinfo, hdr->pth_flowid, + &soprocinfo); + + if (found == 1) + pktap_set_procinfo(hdr, &soprocinfo); +} + +__private_extern__ void +pktap_fill_proc_info(struct pktap_header *hdr, protocol_family_t proto, struct mbuf *m, u_int32_t pre, int outgoing, struct ifnet *ifp) { int found = 0; struct so_procinfo soprocinfo; - + /* * Getting the pid and procname is expensive - * For outgoing, do the lookup only if there's an + * For outgoing, do the lookup only if there's an * associated socket as indicated by the flowhash */ if (outgoing != 0 && (m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC)) == (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) && m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { - if (m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK) - found = inp_findinpcb_procinfo(&ripcbinfo, m->m_pkthdr.pkt_flowid, &soprocinfo); - else if (m->m_pkthdr.pkt_proto == IPPROTO_TCP) - found = inp_findinpcb_procinfo(&tcbinfo, m->m_pkthdr.pkt_flowid, &soprocinfo); - else if (m->m_pkthdr.pkt_proto == IPPROTO_UDP) - found = inp_findinpcb_procinfo(&udbinfo, m->m_pkthdr.pkt_flowid, &soprocinfo); + /* + * To avoid lock ordering issues we delay the process lookup + * to the BPF read as we cannot + * assume the socket lock is unlocked on output + */ + if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK) || + m->m_pkthdr.pkt_proto == IPPROTO_TCP || + m->m_pkthdr.pkt_proto == IPPROTO_UDP) { + found = 0; + hdr->pth_flags |= PTH_FLAG_DELAY_PKTAP; + hdr->pth_flowid = m->m_pkthdr.pkt_flowid; + if (m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK) + hdr->pth_ipproto = IPPROTO_RAW; + else + hdr->pth_ipproto = m->m_pkthdr.pkt_proto; + } } else if (outgoing == 0) { struct inpcb *inp = NULL; - + if (proto == PF_INET) { struct ip ip; errno_t error; @@ -776,57 +837,60 @@ pktap_fill_proc_info(struct pktap_header *hdr, protocol_family_t proto, u_short fport, lport; struct inpcbinfo *pcbinfo = NULL; int wildcard = 0; - + error = mbuf_copydata(m, pre, sizeof(struct ip), &ip); if (error != 0) { - PKTAP_LOG(PKTP_LOG_ERROR, "mbuf_copydata tcp v4 failed for %s\n", - hdr->pth_ifname); + PKTAP_LOG(PKTP_LOG_ERROR, + "mbuf_copydata tcp v4 failed for %s\n", + hdr->pth_ifname); goto done; } hlen = IP_VHL_HL(ip.ip_vhl) << 2; - + faddr = ip.ip_src; laddr = ip.ip_dst; if (ip.ip_p == IPPROTO_TCP) { struct tcphdr th; - - error = mbuf_copydata(m, pre + hlen, + + error = mbuf_copydata(m, pre + hlen, sizeof(struct tcphdr), &th); if (error != 0) goto done; - + fport = th.th_sport; lport = th.th_dport; pcbinfo = &tcbinfo; } else if (ip.ip_p == IPPROTO_UDP) { struct udphdr uh; - - error = mbuf_copydata(m, pre + hlen, + + error = mbuf_copydata(m, pre + hlen, sizeof(struct udphdr), &uh); if (error != 0) { - PKTAP_LOG(PKTP_LOG_ERROR, "mbuf_copydata udp v4 failed for %s\n", - hdr->pth_ifname); + PKTAP_LOG(PKTP_LOG_ERROR, + "mbuf_copydata udp v4 failed for %s\n", + hdr->pth_ifname); goto done; } fport = uh.uh_sport; lport = uh.uh_dport; - + pcbinfo = &udbinfo; wildcard = 1; } if (pcbinfo != NULL) { inp = in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, wildcard, outgoing ? NULL : ifp); - + if (inp == NULL && hdr->pth_iftype != IFT_LOOP) - PKTAP_LOG(PKTP_LOG_NOPCB, "in_pcblookup_hash no pcb %s\n", - hdr->pth_ifname); + PKTAP_LOG(PKTP_LOG_NOPCB, + "in_pcblookup_hash no pcb %s\n", + hdr->pth_ifname); } else { - PKTAP_LOG(PKTP_LOG_NOPCB, "unknown ip_p %u on %s\n", - ip.ip_p, - hdr->pth_ifname); + PKTAP_LOG(PKTP_LOG_NOPCB, + "unknown ip_p %u on %s\n", + ip.ip_p, hdr->pth_ifname); pktap_hexdump(PKTP_LOG_NOPCB, &ip, sizeof(struct ip)); } } else if (proto == PF_INET6) { @@ -837,57 +901,60 @@ pktap_fill_proc_info(struct pktap_header *hdr, protocol_family_t proto, u_short fport, lport; struct inpcbinfo *pcbinfo = NULL; int wildcard = 0; - + error = mbuf_copydata(m, pre, sizeof(struct ip6_hdr), &ip6); if (error != 0) goto done; - + faddr = &ip6.ip6_src; laddr = &ip6.ip6_dst; - + if (ip6.ip6_nxt == IPPROTO_TCP) { struct tcphdr th; - - error = mbuf_copydata(m, pre + sizeof(struct ip6_hdr), + + error = mbuf_copydata(m, pre + sizeof(struct ip6_hdr), sizeof(struct tcphdr), &th); if (error != 0) { - PKTAP_LOG(PKTP_LOG_ERROR, "mbuf_copydata tcp v6 failed for %s\n", - hdr->pth_ifname); + PKTAP_LOG(PKTP_LOG_ERROR, + "mbuf_copydata tcp v6 failed for %s\n", + hdr->pth_ifname); goto done; } - + fport = th.th_sport; lport = th.th_dport; - + pcbinfo = &tcbinfo; } else if (ip6.ip6_nxt == IPPROTO_UDP) { struct udphdr uh; - - error = mbuf_copydata(m, pre + sizeof(struct ip6_hdr), + + error = mbuf_copydata(m, pre + sizeof(struct ip6_hdr), sizeof(struct udphdr), &uh); if (error != 0) { - PKTAP_LOG(PKTP_LOG_ERROR, "mbuf_copydata udp v6 failed for %s\n", - hdr->pth_ifname); + PKTAP_LOG(PKTP_LOG_ERROR, + "mbuf_copydata udp v6 failed for %s\n", + hdr->pth_ifname); goto done; } - + fport = uh.uh_sport; lport = uh.uh_dport; - + pcbinfo = &udbinfo; wildcard = 1; } if (pcbinfo != NULL) { inp = in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, wildcard, outgoing ? NULL : ifp); - + if (inp == NULL && hdr->pth_iftype != IFT_LOOP) - PKTAP_LOG(PKTP_LOG_NOPCB, "in6_pcblookup_hash no pcb %s\n", - hdr->pth_ifname); + PKTAP_LOG(PKTP_LOG_NOPCB, + "in6_pcblookup_hash no pcb %s\n", + hdr->pth_ifname); } else { - PKTAP_LOG(PKTP_LOG_NOPCB, "unknown ip6.ip6_nxt %u on %s\n", - ip6.ip6_nxt, - hdr->pth_ifname); + PKTAP_LOG(PKTP_LOG_NOPCB, + "unknown ip6.ip6_nxt %u on %s\n", + ip6.ip6_nxt, hdr->pth_ifname); pktap_hexdump(PKTP_LOG_NOPCB, &ip6, sizeof(struct ip6_hdr)); } } @@ -899,32 +966,14 @@ pktap_fill_proc_info(struct pktap_header *hdr, protocol_family_t proto, in_pcb_checkstate(inp, WNT_RELEASE, 0); } } +done: /* * -1 means PID not found */ hdr->pth_pid = -1; hdr->pth_epid = -1; - if (found != 0) { - hdr->pth_pid = soprocinfo.spi_pid; - if (soprocinfo.spi_pid == 0) - strlcpy(hdr->pth_comm, "mach_kernel", sizeof(hdr->pth_comm)); - else - proc_name(soprocinfo.spi_pid, hdr->pth_comm, MAXCOMLEN); - - /* - * When not delegated, the effective pid is the same as the real pid - */ - if (soprocinfo.spi_epid != soprocinfo.spi_pid) { - hdr->pth_flags |= PTH_FLAG_PROC_DELEGATED; - hdr->pth_epid = soprocinfo.spi_epid; - if (soprocinfo.spi_epid == 0) - strlcpy(hdr->pth_ecomm, "mach_kernel", sizeof(hdr->pth_ecomm)); - else - proc_name(soprocinfo.spi_epid, hdr->pth_ecomm, MAXCOMLEN); - } - } -done: - return; + if (found != 0) + pktap_set_procinfo(hdr, &soprocinfo); } __private_extern__ void @@ -932,18 +981,18 @@ pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, u_int32_t pre, u_int32_t post, int outgoing) { struct pktap_softc *pktap; - void (*bpf_tap_func)(ifnet_t , u_int32_t , mbuf_t , void * , size_t ) = + void (*bpf_tap_func)(ifnet_t, u_int32_t, mbuf_t, void *, size_t) = outgoing ? bpf_tap_out : bpf_tap_in; lck_rw_lock_shared(pktap_lck_rw); /* - * No need to take the ifnet_lock as the struct ifnet field if_bpf is + * No need to take the ifnet_lock as the struct ifnet field if_bpf is * protected by the BPF subsystem */ LIST_FOREACH(pktap, &pktap_list, pktp_link) { int filter_result; - + filter_result = pktap_filter_evaluate(pktap, ifp); if (filter_result == PKTAP_FILTER_SKIP) continue; @@ -953,7 +1002,7 @@ pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, if ((proto == AF_INET ||proto == AF_INET6) && !(m->m_pkthdr.pkt_flags & PKTF_INET_RESOLVE)) { /* - * We can play just with the length of the first mbuf in the + * We can play just with the length of the first mbuf in the * chain because bpf_tap_imp() disregard the packet length * of the mbuf packet header. */ @@ -963,7 +1012,7 @@ pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, } } } - + if (pktap->pktp_dlt_pkttap_count > 0) { struct { struct pktap_header hdr; @@ -974,10 +1023,10 @@ pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, int unknown_if_type = 0; size_t data_adjust = 0; u_int32_t pre_adjust = 0; - - /* Verify the structure is packed */ + + /* Verify the structure is packed */ _CASSERT(sizeof(hdr_buffer) == sizeof(struct pktap_header) + sizeof(u_int32_t)); - + bzero(&hdr_buffer, sizeof(hdr_buffer)); hdr->pth_length = sizeof(struct pktap_header); hdr->pth_type_next = PTH_TYPE_PACKET; @@ -991,9 +1040,9 @@ pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, case IFT_STF: case IFT_CELLULAR: /* - * Packets from pdp interfaces have no loopback + * Packets from pdp interfaces have no loopback * header that contain the protocol number. - * As BPF just concatenate the header and the + * As BPF just concatenate the header and the * packet content in a single buffer, * stash the protocol after the pktap header * and adjust the size of the header accordingly @@ -1035,7 +1084,7 @@ pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, /* * Skip the protocol in the mbuf as it's in network order */ - pre = 4; + pre = 4; data_adjust = 4; hdr->pth_dlt = DLT_NULL; hdr_buffer.proto = proto; @@ -1050,11 +1099,12 @@ pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, break; } if (unknown_if_type) { - PKTAP_LOG(PKTP_LOG_FUNC, "unknown if_type %u for %s\n", - ifp->if_type,ifp->if_xname); + PKTAP_LOG(PKTP_LOG_FUNC, + "unknown if_type %u for %s\n", + ifp->if_type, ifp->if_xname); pktap_count_unknown_if_type += 1; } else { - snprintf(hdr->pth_ifname, sizeof(hdr->pth_ifname), "%s", + snprintf(hdr->pth_ifname, sizeof(hdr->pth_ifname), "%s", ifp->if_xname); hdr->pth_flags |= outgoing ? PTH_FLAG_DIR_OUT : PTH_FLAG_DIR_IN; hdr->pth_protocol_family = proto; @@ -1062,16 +1112,16 @@ pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, hdr->pth_frame_post_length = post; hdr->pth_iftype = ifp->if_type; hdr->pth_ifunit = ifp->if_unit; - + pktap_fill_proc_info(hdr, proto, m, pre, outgoing, ifp); - + hdr->pth_svc = so_svc2tc(m->m_pkthdr.pkt_svc); - + if (data_adjust == 0) { bpf_tap_func(pktap->pktp_ifp, DLT_PKTAP, m, hdr, hdr_size); } else { /* - * We can play just with the length of the first mbuf in the + * We can play just with the length of the first mbuf in the * chain because bpf_tap_imp() disregard the packet length * of the mbuf packet header. */ @@ -1102,16 +1152,16 @@ pktap_input(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, if (frame_header != NULL && frame_header >= start && frame_header <= hdr) { size_t o_len = m->m_len; u_int32_t pre = hdr - frame_header; - + if (mbuf_setdata(m, frame_header, o_len + pre) == 0) { - PKTAP_LOG(PKTP_LOG_INPUT, "ifp %s proto %u pre %u post %u\n", + PKTAP_LOG(PKTP_LOG_INPUT, "ifp %s proto %u pre %u post %u\n", ifp->if_xname, proto, pre, 0); pktap_bpf_tap(ifp, proto, m, pre, 0, 0); mbuf_setdata(m, hdr, o_len); } } else { - PKTAP_LOG(PKTP_LOG_INPUT, "ifp %s proto %u pre %u post %u\n", + PKTAP_LOG(PKTP_LOG_INPUT, "ifp %s proto %u pre %u post %u\n", ifp->if_xname, proto, 0, 0); pktap_bpf_tap(ifp, proto, m, 0, 0, 0); @@ -1120,13 +1170,13 @@ pktap_input(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, __private_extern__ void pktap_output(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, - u_int32_t pre, u_int32_t post) + u_int32_t pre, u_int32_t post) { /* Fast path */ if (pktap_total_tap_count == 0) return; - PKTAP_LOG(PKTP_LOG_OUTPUT, "ifp %s proto %u pre %u post %u\n", + PKTAP_LOG(PKTP_LOG_OUTPUT, "ifp %s proto %u pre %u post %u\n", ifp->if_xname, proto, pre, post); pktap_bpf_tap(ifp, proto, m, pre, post, 1); diff --git a/bsd/net/pktap.h b/bsd/net/pktap.h index cecb5cb94..d3406bce1 100644 --- a/bsd/net/pktap.h +++ b/bsd/net/pktap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -29,8 +29,10 @@ #ifndef _NET_PKTAP_H_ #define _NET_PKTAP_H_ +#include #include #include +#include #ifdef PRIVATE @@ -98,21 +100,26 @@ struct pktap_filter { * In theory, there could be several types of blocks in a chain before the actual packet */ struct pktap_header { - uint32_t pth_length; /* length of this header */ - uint32_t pth_type_next; /* type of data following */ - uint32_t pth_dlt; /* DLT of packet */ - char pth_ifname[PKTAP_IFXNAMESIZE]; /* interface name */ - uint32_t pth_flags; /* flags */ - uint32_t pth_protocol_family; - uint32_t pth_frame_pre_length; - uint32_t pth_frame_post_length; - pid_t pth_pid; /* process ID */ - char pth_comm[MAXCOMLEN+1]; /* process command name */ - uint32_t pth_svc; /* service class */ - uint16_t pth_iftype; - uint16_t pth_ifunit; - pid_t pth_epid; /* effective process ID */ - char pth_ecomm[MAXCOMLEN+1]; /* effective command name */ + uint32_t pth_length; /* length of this header */ + uint32_t pth_type_next; /* type of data following */ + uint32_t pth_dlt; /* DLT of packet */ + char pth_ifname[PKTAP_IFXNAMESIZE]; /* interface name */ + uint32_t pth_flags; /* flags */ + uint32_t pth_protocol_family; + uint32_t pth_frame_pre_length; + uint32_t pth_frame_post_length; + pid_t pth_pid; /* process ID */ + char pth_comm[MAXCOMLEN+1]; /* process name */ + uint32_t pth_svc; /* service class */ + uint16_t pth_iftype; + uint16_t pth_ifunit; + pid_t pth_epid; /* effective process ID */ + char pth_ecomm[MAXCOMLEN+1]; /* effective command name */ + uint32_t pth_flowid; + uint32_t pth_ipproto; + struct timeval32 pth_tstamp; + uuid_t pth_uuid; + uuid_t pth_euuid; }; /* @@ -121,10 +128,14 @@ struct pktap_header { #define PTH_TYPE_NONE 0 /* No more data following */ #define PTH_TYPE_PACKET 1 /* Actual captured packet data */ -#define PTH_FLAG_DIR_IN 0x0001 /* Outgoing packet */ -#define PTH_FLAG_DIR_OUT 0x0002 /* Incoming packet */ -#define PTH_FLAG_PROC_DELEGATED 0x0004 /* Process delegated */ -#define PTH_FLAG_IF_DELEGATED 0x0008 /* Interface delegated */ +#define PTH_FLAG_DIR_IN 0x0001 /* Outgoing packet */ +#define PTH_FLAG_DIR_OUT 0x0002 /* Incoming packet */ +#define PTH_FLAG_PROC_DELEGATED 0x0004 /* Process delegated */ +#define PTH_FLAG_IF_DELEGATED 0x0008 /* Interface delegated */ +#ifdef BSD_KERNEL_PRIVATE +#define PTH_FLAG_DELAY_PKTAP 0x1000 /* Finalize pktap header on read */ +#endif /* BSD_KERNEL_PRIVATE */ +#define PTH_FLAG_TSTAMP 0x2000 /* Has time stamp */ #ifdef BSD_KERNEL_PRIVATE @@ -134,6 +145,7 @@ extern void pktap_output(struct ifnet *, protocol_family_t, struct mbuf *, u_int32_t, u_int32_t); extern void pktap_fill_proc_info(struct pktap_header *, protocol_family_t , struct mbuf *, u_int32_t , int , struct ifnet *); +extern void pktap_finalize_proc_info(struct pktap_header *); #endif /* BSD_KERNEL_PRIVATE */ #endif /* PRIVATE */ diff --git a/bsd/net/pktsched/pktsched.h b/bsd/net/pktsched/pktsched.h index aa3361b37..a1ecb25db 100644 --- a/bsd/net/pktsched/pktsched.h +++ b/bsd/net/pktsched/pktsched.h @@ -56,6 +56,7 @@ extern "C" { #define PKTSCHEDF_QALG_SFB 0x8 /* use SFB */ #define PKTSCHEDF_QALG_ECN 0x10 /* enable ECN */ #define PKTSCHEDF_QALG_FLOWCTL 0x20 /* enable flow control advisories */ +#define PKTSCHEDF_QALG_DELAYBASED 0x40 /* Delay based queueing */ /* macro for timeout/untimeout */ /* use old-style timeout/untimeout */ diff --git a/bsd/net/pktsched/pktsched_qfq.c b/bsd/net/pktsched/pktsched_qfq.c index cc696f730..2e0428233 100644 --- a/bsd/net/pktsched/pktsched_qfq.c +++ b/bsd/net/pktsched/pktsched_qfq.c @@ -582,6 +582,10 @@ qfq_class_create(struct qfq_if *qif, u_int32_t weight, u_int32_t qlimit, if (flags & QFCF_SFB) cl->cl_qflags |= SFBF_FLOWCTL; } + if (flags & QFCF_DELAYBASED) { + if (flags & QFCF_SFB) + cl->cl_qflags |= SFBF_DELAYBASED; + } if (flags & QFCF_CLEARDSCP) { if (flags & QFCF_RIO) cl->cl_qflags |= RIOF_CLEARDSCP; @@ -1814,6 +1818,8 @@ qfq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags) qflags |= QFCF_ECN; if (flags & PKTSCHEDF_QALG_FLOWCTL) qflags |= QFCF_FLOWCTL; + if (flags & PKTSCHEDF_QALG_DELAYBASED) + qflags |= QFCF_DELAYBASED; qif = qfq_alloc(ifp, M_WAITOK, FALSE); if (qif == NULL) diff --git a/bsd/net/pktsched/pktsched_qfq.h b/bsd/net/pktsched/pktsched_qfq.h index 825cc9215..ca3a2c4c3 100644 --- a/bsd/net/pktsched/pktsched_qfq.h +++ b/bsd/net/pktsched/pktsched_qfq.h @@ -76,6 +76,7 @@ extern "C" { #define QFCF_SFB 0x0200 /* use SFB */ #define QFCF_FLOWCTL 0x0400 /* enable flow control advisories */ #define QFCF_DEFAULTCLASS 0x1000 /* default class */ +#define QFCF_DELAYBASED 0x2000 /* queue sizing is delay based */ #ifdef BSD_KERNEL_PRIVATE #define QFCF_LAZY 0x10000000 /* on-demand resource allocation */ #endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/net/pktsched/pktsched_tcq.c b/bsd/net/pktsched/pktsched_tcq.c index 308199d3e..ecadb8bbd 100644 --- a/bsd/net/pktsched/pktsched_tcq.c +++ b/bsd/net/pktsched/pktsched_tcq.c @@ -398,6 +398,10 @@ tcq_class_create(struct tcq_if *tif, int pri, u_int32_t qlimit, if (flags & TQCF_SFB) cl->cl_qflags |= SFBF_FLOWCTL; } + if (flags & TQCF_DELAYBASED) { + if (flags & TQCF_SFB) + cl->cl_qflags |= SFBF_DELAYBASED; + } if (flags & TQCF_CLEARDSCP) { if (flags & TQCF_RIO) cl->cl_qflags |= RIOF_CLEARDSCP; @@ -1012,6 +1016,8 @@ tcq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags) qflags |= TQCF_ECN; if (flags & PKTSCHEDF_QALG_FLOWCTL) qflags |= TQCF_FLOWCTL; + if (flags & PKTSCHEDF_QALG_DELAYBASED) + qflags |= TQCF_DELAYBASED; tif = tcq_alloc(ifp, M_WAITOK, FALSE); if (tif == NULL) diff --git a/bsd/net/pktsched/pktsched_tcq.h b/bsd/net/pktsched/pktsched_tcq.h index 8b85caace..57bb9fea0 100644 --- a/bsd/net/pktsched/pktsched_tcq.h +++ b/bsd/net/pktsched/pktsched_tcq.h @@ -52,6 +52,7 @@ extern "C" { #define TQCF_SFB 0x0200 /* use SFB */ #define TQCF_FLOWCTL 0x0400 /* enable flow control advisories */ #define TQCF_DEFAULTCLASS 0x1000 /* default class */ +#define TQCF_DELAYBASED 0x2000 /* queue sizing is delay based */ #ifdef BSD_KERNEL_PRIVATE #define TQCF_LAZY 0x10000000 /* on-demand resource allocation */ #endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/net/route.c b/bsd/net/route.c index 03c66249f..8f077340b 100644 --- a/bsd/net/route.c +++ b/bsd/net/route.c @@ -73,7 +73,7 @@ #include #include #include -#include +#include #include #include @@ -83,7 +83,6 @@ #include #include -#include #include #include @@ -336,7 +335,7 @@ uint32_t route_genid_inet6 = 0; #define ASSERT_SIN6IFSCOPE(sa) { \ if ((sa)->sa_family != AF_INET6 || \ (sa)->sa_len < sizeof (struct sockaddr_in6)) \ - panic("%s: bad sockaddr_in %p\n", __func__, sa); \ + panic("%s: bad sockaddr_in6 %p\n", __func__, sa); \ } /* @@ -1458,14 +1457,8 @@ out: int rtioctl(unsigned long req, caddr_t data, struct proc *p) { -#pragma unused(p) -#if INET && MROUTING - return (mrt_ioctl(req, data)); -#else -#pragma unused(req) -#pragma unused(data) +#pragma unused(p, req, data) return (ENXIO); -#endif } struct ifaddr * @@ -1879,6 +1872,19 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, case RTM_RESOLVE: if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) senderr(EINVAL); + /* + * According to the UNIX conformance tests, we need to return + * ENETUNREACH when the parent route is RTF_REJECT. + * However, there isn't any point in cloning RTF_REJECT + * routes, so we immediately return an error. + */ + if (rt->rt_flags & RTF_REJECT) { + if (rt->rt_flags & RTF_HOST) { + senderr(EHOSTUNREACH); + } else { + senderr(ENETUNREACH); + } + } /* * If cloning, we have the parent route given by the caller * and will use its rt_gateway, rt_rmx as part of the cloning @@ -2342,9 +2348,14 @@ int rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { int dlen = SA_SIZE(dst->sa_len), glen = SA_SIZE(gate->sa_len); - struct radix_node_head *rnh = rt_tables[dst->sa_family]; + struct radix_node_head *rnh = NULL; boolean_t loop = FALSE; + if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) { + return (EINVAL); + } + + rnh = rt_tables[dst->sa_family]; lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); @@ -2352,8 +2363,9 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) * If this is for a route that is on its way of being removed, * or is temporarily frozen, reject the modification request. */ - if (rt->rt_flags & RTF_CONDEMNED) + if (rt->rt_flags & RTF_CONDEMNED) { return (EBUSY); + } /* Add an extra ref for ourselves */ RT_ADDREF_LOCKED(rt); @@ -3787,7 +3799,6 @@ bad: void rt_revalidate_gwroute(struct rtentry *rt, struct rtentry *gwrt) { - VERIFY(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)); VERIFY(gwrt != NULL); RT_LOCK_SPIN(rt); @@ -3796,6 +3807,7 @@ rt_revalidate_gwroute(struct rtentry *rt, struct rtentry *gwrt) rt_key(gwrt)->sa_family && (rt->rt_gwroute == NULL || !(rt->rt_gwroute->rt_flags & RTF_UP))) { boolean_t isequal; + VERIFY(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)); if (rt->rt_gateway->sa_family == AF_INET || rt->rt_gateway->sa_family == AF_INET6) { diff --git a/bsd/net/route.h b/bsd/net/route.h index 458c28f66..d382013da 100644 --- a/bsd/net/route.h +++ b/bsd/net/route.h @@ -92,12 +92,7 @@ struct rt_metrics { #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #ifdef PRIVATE -#ifndef KERNEL -/* Private declaration for user-space (needed by ip_mroute.h) */ -struct route { -#else /* KERNEL */ struct route_old { -#endif /* KERNEL */ void *ro_rt; uint32_t ro_flags; struct sockaddr ro_dst; diff --git a/bsd/net/rtsock.c b/bsd/net/rtsock.c index 84db38b7a..2c2ae2dcd 100644 --- a/bsd/net/rtsock.c +++ b/bsd/net/rtsock.c @@ -74,7 +74,7 @@ #include #include #include -#include +#include #include #include diff --git a/bsd/netinet/Makefile b/bsd/netinet/Makefile index dcd60c164..03bb36728 100644 --- a/bsd/netinet/Makefile +++ b/bsd/netinet/Makefile @@ -11,7 +11,7 @@ DATAFILES = \ bootp.h icmp6.h if_ether.h icmp_var.h \ igmp.h igmp_var.h in.h in_pcb.h \ in_systm.h in_var.h ip.h ip6.h \ - ip_icmp.h ip_mroute.h ip_var.h tcp.h \ + ip_icmp.h ip_var.h tcp.h \ tcp_fsm.h tcp_seq.h tcp_timer.h tcp_var.h \ tcpip.h udp.h udp_var.h @@ -24,11 +24,8 @@ PRIVATE_DATAFILES = \ ip_fw.h ip_fw2.h \ tcp_debug.h \ in_gif.h ip_compat.h \ - flow_divert_proto.h - -ifeq ($(PLATFORM),iPhoneOS) -PRIVATE_DATAFILES += mptcp_var.h -endif + flow_divert_proto.h \ + mptcp_var.h PRIVATE_KERNELFILES = ${KERNELFILES} \ ip_ecn.h ip_encap.h @@ -47,4 +44,3 @@ INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} include $(MakeInc_rule) include $(MakeInc_dir) - diff --git a/bsd/netinet/cbrtf.c b/bsd/netinet/cbrtf.c new file mode 100644 index 000000000..568535a68 --- /dev/null +++ b/bsd/netinet/cbrtf.c @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include + +/* Function to efficiently compute cube root */ + +float cbrtf(float x); + +struct cbrt_table_entry { + double x; + double cbrt_x; + double recip_cbrt_x; + double recip_x; +}; + +static const struct cbrt_table_entry cbrt_table[] = { + /* mantissa = 0x1.00... */ + {0x1.0000000000000p+0, 0x1.0000000000000p+0, + 0x1.0000000000000p+0, 0x1.0000000000000p+0}, /* exponent = 0 */ + {0x1.037e200000000p+1, 0x1.4400000000000p+0, + 0x1.948b0fcd6e9e0p-1, 0x1.f91bd1b62b9cfp-2}, /* exponent = 1 */ + {0x1.0315800000000p+2, 0x1.9800000000000p+0, + 0x1.4141414141414p-1, 0x1.f9e7cba5753afp-3}, /* exponent = 2 */ + + /* mantissa = 0x1.04... */ + {0x1.060c080000000p+0, 0x1.0200000000000p+0, + 0x1.fc07f01fc07f0p-1, 0x1.f42f61dacddc6p-1}, /* exponent = 0 */ + {0x1.05ff4c356ff40p+1, 0x1.450a000000000p+0, + 0x1.933fff9b30002p-1, 0x1.f447b132ca3acp-2}, /* exponent = 1 */ + {0x1.06e9aa0000000p+2, 0x1.9a00000000000p+0, + 0x1.3fb013fb013fbp-1, 0x1.f289bb31fd41cp-3}, /* exponent = 2 */ + + /* mantissa = 0x1.08...*/ + {0x1.09fe97c0b2e80p+0, 0x1.034a000000000p+0, + 0x1.f9815c85b04a3p-1, 0x1.ecc3168ac46e4p-1}, // exponent = 0 + {0x1.0853ec0000000p+1, 0x1.4600000000000p+0, 0x1.920fb49d0e229p-1, 0x1.efde7dcdacefdp-2}, // exponent = 1 + {0x1.0ac7700000000p+2, 0x1.9c00000000000p+0, 0x1.3e22cbce4a902p-1, 0x1.eb501ca81bb3ep-3}, // exponent = 2 + + /* mantissa = 0x1.0c...*/ + {0x1.0c30400000000p+0, 0x1.0400000000000p+0, 0x1.f81f81f81f820p-1, 0x1.e8bb1d5b6e585p-1}, // exponent = 0 + {0x1.0d39000000000p+1, 0x1.4800000000000p+0, 0x1.8f9c18f9c18fap-1, 0x1.e6da80ced1523p-2}, // exponent = 1 + {0x1.0eaede0000000p+2, 0x1.9e00000000000p+0, 0x1.3c995a47babe7p-1, 0x1.e43a0fc24fe4bp-3}, // exponent = 2 + + /* mantissa = 0x1.10...*/ + {0x1.126cd80000000p+0, 0x1.0600000000000p+0, 0x1.f44659e4a4271p-1, 0x1.dd9fb30af3365p-1}, // exponent = 0 + {0x1.122d740000000p+1, 0x1.4a00000000000p+0, 0x1.8d3018d3018d3p-1, 0x1.de0e209af882ep-2}, // exponent = 1 + {0x1.12a0000000000p+2, 0x1.a000000000000p+0, 0x1.3b13b13b13b14p-1, 0x1.dd46baab49c24p-3}, // exponent = 2 + + /* mantissa = 0x1.14...*/ + {0x1.15f9b5b480000p+0, 0x1.0720000000000p+0, 0x1.f222c82dba316p-1, 0x1.d786108fd7a9fp-1}, // exponent = 0 + {0x1.1731600000000p+1, 0x1.4c00000000000p+0, 0x1.8acb90f6bf3aap-1, 0x1.d577b2f5c6f87p-2}, // exponent = 1 + {0x1.169ae20000000p+2, 0x1.a200000000000p+0, 0x1.3991c2c187f63p-1, 0x1.d67549c6f9b67p-3}, // exponent = 2 + + /* mantissa = 0x1.18...*/ + {0x1.18c2000000000p+0, 0x1.0800000000000p+0, 0x1.f07c1f07c1f08p-1, 0x1.d2d9cbd756afdp-1}, // exponent = 0 + {0x1.19fb2ce620540p+1, 0x1.4d1a000000000p+0, 0x1.897d564f5cf98p-1, 0x1.d0d34ccd78141p-2}, // exponent = 1 + {0x1.1a9f900000000p+2, 0x1.a400000000000p+0, 0x1.3813813813814p-1, 0x1.cfc4ef7db5bffp-3}, // exponent = 2 + + /* mantissa = 0x1.1c...*/ + {0x1.1f2fe80000000p+0, 0x1.0a00000000000p+0, 0x1.ecc07b301ecc0p-1, 0x1.c86636f753a66p-1}, // exponent = 0 + {0x1.1c44dc0000000p+1, 0x1.4e00000000000p+0, 0x1.886e5f0abb04ap-1, 0x1.cd159cdbba714p-2}, // exponent = 1 + {0x1.1eae160000000p+2, 0x1.a600000000000p+0, 0x1.3698df3de0748p-1, 0x1.c934e4095d202p-3}, // exponent = 2 + + /* mantissa = 0x1.20...*/ + {0x1.21fac7ca59c00p+0, 0x1.0adc000000000p+0, 0x1.eb2a412496abdp-1, 0x1.c40112c606d3ep-1}, // exponent = 0 + {0x1.2168000000000p+1, 0x1.5000000000000p+0, 0x1.8618618618618p-1, 0x1.c4e651e0c37d7p-2}, // exponent = 1 + {0x1.22c6800000000p+2, 0x1.a800000000000p+0, 0x1.3521cfb2b78c1p-1, 0x1.c2c46544650c1p-3}, // exponent = 2 + + /* mantissa = 0x1.24...*/ + {0x1.25b6c00000000p+0, 0x1.0c00000000000p+0, 0x1.e9131abf0b767p-1, 0x1.be41e7ee3f7edp-1}, // exponent = 0 + {0x1.269ae40000000p+1, 0x1.5200000000000p+0, 0x1.83c977ab2beddp-1, 0x1.bce853967753cp-2}, // exponent = 1 + {0x1.26e8da0000000p+2, 0x1.aa00000000000p+0, 0x1.33ae45b57bcb2p-1, 0x1.bc72b67ab9ce7p-3}, // exponent = 2 + + /* mantissa = 0x1.28...*/ + {0x1.29ff9aaaa2c00p+0, 0x1.0d4c000000000p+0, 0x1.e6b8275501adbp-1, 0x1.b7d7596e80007p-1}, // exponent = 0 + {0x1.2bdda00000000p+1, 0x1.5400000000000p+0, 0x1.8181818181818p-1, 0x1.b51a30f9739f8p-2}, // exponent = 1 + {0x1.2b15300000000p+2, 0x1.ac00000000000p+0, 0x1.323e34a2b10bfp-1, 0x1.b63f203c60c07p-3}, // exponent = 2 + + /* mantissa = 0x1.2c...*/ + {0x1.2c56b80000000p+0, 0x1.0e00000000000p+0, 0x1.e573ac901e574p-1, 0x1.b469f4adc7794p-1}, // exponent = 0 + {0x1.2dfff74f29dc0p+1, 0x1.54ce000000000p+0, 0x1.80987c755886ap-1, 0x1.b203708429799p-2}, // exponent = 1 + {0x1.2f4b8e0000000p+2, 0x1.ae00000000000p+0, 0x1.30d190130d190p-1, 0x1.b028f031c8644p-3}, // exponent = 2 + + /* mantissa = 0x1.30...*/ + {0x1.3310000000000p+0, 0x1.1000000000000p+0, 0x1.e1e1e1e1e1e1ep-1, 0x1.aadb93d39ae9cp-1}, // exponent = 0 + {0x1.31304c0000000p+1, 0x1.5600000000000p+0, 0x1.7f405fd017f40p-1, 0x1.ad7a85e593e54p-2}, // exponent = 1 + {0x1.338c000000000p+2, 0x1.b000000000000p+0, 0x1.2f684bda12f68p-1, 0x1.aa2f78f1b4cc6p-3}, // exponent = 2 + + /* mantissa = 0x1.34... */ + {0x1.35fb6f4579c00p+0, 0x1.10dc000000000p+0, 0x1.e05d5a24448c5p-1, 0x1.a6d6548fa984dp-1}, // exponent = 0 + {0x1.3693000000000p+1, 0x1.5800000000000p+0, 0x1.7d05f417d05f4p-1, 0x1.a607fa909db1fp-2}, // exponent = 1 + {0x1.37d6920000000p+2, 0x1.b200000000000p+0, 0x1.2e025c04b8097p-1, 0x1.a45211d8b748ap-3}, // exponent = 2 + +/* mantissa = 0x1.38... */ + {0x1.39e2c80000000p+0, 0x1.1200000000000p+0, 0x1.de5d6e3f8868ap-1, 0x1.a1941b013022dp-1}, // exponent = 0 + {0x1.39fe541ac7840p+1, 0x1.5942000000000p+0, 0x1.7ba298eae8947p-1, 0x1.a16f787114257p-2}, // exponent = 1 + {0x1.39ffaac000000p+2, 0x1.b300000000000p+0, 0x1.2d50a012d50a0p-1, 0x1.a16db0ec408b2p-3}, // exponent = 2 + + /* mantissa = 0x1.3c... */ + {0x1.3dfc1312b0000p+0, 0x1.1330000000000p+0, 0x1.dc4cfaf10eb5cp-1, 0x1.9c322b87f17e8p-1}, // exponent = 0 + {0x1.3c05d40000000p+1, 0x1.5a00000000000p+0, 0x1.7ad2208e0ecc3p-1, 0x1.9ec1430b0dfc7p-2}, // exponent = 1 + {0x1.3c2b500000000p+2, 0x1.b400000000000p+0, 0x1.2c9fb4d812ca0p-1, 0x1.9e9016e2211b6p-3}, // exponent = 2 + + /* mantissa = 0x1.40... */ + {0x1.40cf400000000p+0, 0x1.1400000000000p+0, 0x1.dae6076b981dbp-1, 0x1.9890fd4bf368fp-1}, // exponent = 0 + {0x1.4188e00000000p+1, 0x1.5c00000000000p+0, 0x1.78a4c8178a4c8p-1, 0x1.97a51ec6b707ep-2}, // exponent = 1 + {0x1.408a460000000p+2, 0x1.b600000000000p+0, 0x1.2b404ad012b40p-1, 0x1.98e8e88261b62p-3}, // exponent = 2 + + /* mantissa = 0x1.44... */ + {0x1.47d5980000000p+0, 0x1.1600000000000p+0, 0x1.d77b654b82c34p-1, 0x1.8fcfc9c44e2f4p-1}, // exponent = 0 + {0x1.471c3c0000000p+1, 0x1.5e00000000000p+0, 0x1.767dce434a9b1p-1, 0x1.90b25822e2a9fp-2}, // exponent = 1 + {0x1.44f3800000000p+2, 0x1.b800000000000p+0, 0x1.29e4129e4129ep-1, 0x1.935beb82c1ae7p-3}, // exponent = 2 + + /* mantissa = 0x1.48... */ + {0x1.49feb2bc0dc00p+0, 0x1.169c000000000p+0, 0x1.d67366d6ddfd0p-1, 0x1.8d31a9f2d47fbp-1}, // exponent = 0 + {0x1.49fcfb130a6c0p+1, 0x1.5f06000000000p+0, 0x1.75664a1a72c8dp-1, 0x1.8d33bb2686480p-2}, // exponent = 1 + {0x1.49670a0000000p+2, 0x1.ba00000000000p+0, 0x1.288b01288b013p-1, 0x1.8de888de6c48fp-3}, // exponent = 2 + + /* mantissa = 0x1.4c... */ + {0x1.4ef6000000000p+0, 0x1.1800000000000p+0, 0x1.d41d41d41d41dp-1, 0x1.874e2a121159fp-1}, // exponent = 0 + {0x1.4cc0000000000p+1, 0x1.6000000000000p+0, 0x1.745d1745d1746p-1, 0x1.89e7c3fdb1246p-2}, // exponent = 1 + {0x1.4de4f00000000p+2, 0x1.bc00000000000p+0, 0x1.27350b8812735p-1, 0x1.888e2da0ba19dp-3}, // exponent = 2 + + /* mantissa = 0x1.50... */ + {0x1.51ff889bc6000p+0, 0x1.18d8000000000p+0, 0x1.d2b539aeee152p-1, 0x1.83ca00a5a8f32p-1}, // exponent = 0 + {0x1.5274440000000p+1, 0x1.6200000000000p+0, 0x1.724287f46debcp-1, 0x1.8344414a70cbdp-2}, // exponent = 1 + {0x1.526d3e0000000p+2, 0x1.be00000000000p+0, 0x1.25e22708092f1p-1, 0x1.834c4ac4afd3bp-3}, // exponent = 2 + + /* mantissa = 0x1.54... */ + {0x1.5630a80000000p+0, 0x1.1a00000000000p+0, 0x1.d0cb58f6ec074p-1, 0x1.7f09e124e78b8p-1}, // exponent = 0 + {0x1.55fc05a5df140p+1, 0x1.633a000000000p+0, 0x1.70fb3e12b41c4p-1, 0x1.7f44d50c76c8ep-2}, // exponent = 1 + {0x1.5700000000000p+2, 0x1.c000000000000p+0, 0x1.2492492492492p-1, 0x1.7e225515a4f1dp-3}, // exponent = 2 + + /* mantissa = 0x1.58... */ + {0x1.59fc8db9a7e80p+0, 0x1.1b0a000000000p+0, 0x1.cf1688b3b4e6ap-1, 0x1.7ad5e68ed5f8cp-1}, // exponent = 0 + {0x1.5839200000000p+1, 0x1.6400000000000p+0, 0x1.702e05c0b8170p-1, 0x1.7cc6b8acae7cbp-2}, // exponent = 1 + {0x1.5b9d420000000p+2, 0x1.c200000000000p+0, 0x1.23456789abcdfp-1, 0x1.790fc51106751p-3}, // exponent = 2 + + /* mantissa = 0x1.5c... */ + {0x1.5d85c00000000p+0, 0x1.1c00000000000p+0, 0x1.cd85689039b0bp-1, 0x1.7700c9f78cc63p-1}, // exponent = 0 + {0x1.5e0eac0000000p+1, 0x1.6600000000000p+0, 0x1.6e1f76b4337c7p-1, 0x1.766e1c17c26ecp-2}, // exponent = 1 + {0x1.5dfdce5811360p+2, 0x1.c306000000000p+0, 0x1.229c346a04441p-1, 0x1.7680273c586edp-3}, // exponent = 2 + + /* mantissa = 0x1.60... */ + {0x1.61fbc0c515400p+0, 0x1.1d34000000000p+0, 0x1.cb92ff3a86d65p-1, 0x1.7246f92d40d4cp-1}, // exponent = 0 + {0x1.63f5000000000p+1, 0x1.6800000000000p+0, 0x1.6c16c16c16c17p-1, 0x1.70396672a04e5p-2}, // exponent = 1 + {0x1.6045100000000p+2, 0x1.c400000000000p+0, 0x1.21fb78121fb78p-1, 0x1.741416c92a70bp-3}, // exponent = 2 + + /* mantissa = 0x1.64... */ + {0x1.64f5780000000p+0, 0x1.1e00000000000p+0, 0x1.ca4b3055ee191p-1, 0x1.6f30d6649f11bp-1}, // exponent = 0 + {0x1.65fa1cdfa11c0p+1, 0x1.68ae000000000p+0, 0x1.6b671c62a2d0ap-1, 0x1.6e257c2026aefp-2}, // exponent = 1 + {0x1.64f7760000000p+2, 0x1.c600000000000p+0, 0x1.20b470c67c0d9p-1, 0x1.6f2ec9c929a29p-3}, // exponent = 2 + + /* mantissa = 0x1.68... */ + {0x1.69fc04b688980p+0, 0x1.1f56000000000p+0, 0x1.c829b51036037p-1, 0x1.6a17c8a1a662ep-1}, // exponent = 0 + {0x1.69ec340000000p+1, 0x1.6a00000000000p+0, 0x1.6a13cd1537290p-1, 0x1.6a279b3fb4a4ep-2}, // exponent = 1 + {0x1.69b4800000000p+2, 0x1.c800000000000p+0, 0x1.1f7047dc11f70p-1, 0x1.6a5f60f9b4c97p-3}, // exponent = 2 + + /* mantissa = 0x1.6c... */ + {0x1.6c80000000000p+0, 0x1.2000000000000p+0, 0x1.c71c71c71c71cp-1, 0x1.67980e0bf08c7p-1}, // exponent = 0 + {0x1.6ff4600000000p+1, 0x1.6c00000000000p+0, 0x1.6816816816817p-1, 0x1.6437c6489c8e0p-2}, // exponent = 1 + {0x1.6e7c3a0000000p+2, 0x1.ca00000000000p+0, 0x1.1e2ef3b3fb874p-1, 0x1.65a56286dbe08p-3}, // exponent = 2 + + /* mantissa = 0x1.70... */ + {0x1.71fc3c5870000p+0, 0x1.2170000000000p+0, 0x1.c4d9cd40d7cfdp-1, 0x1.6243421ae7a84p-1}, // exponent = 0 + {0x1.71fef1bff2600p+1, 0x1.6cac000000000p+0, 0x1.676caae4b2e0fp-1, 0x1.6240aa2fa0dfdp-2}, // exponent = 1 + {0x1.734eb00000000p+2, 0x1.cc00000000000p+0, 0x1.1cf06ada2811dp-1, 0x1.610057c6bdd38p-3}, // exponent = 2 + + /* mantissa = 0x1.74... */ + {0x1.7425880000000p+0, 0x1.2200000000000p+0, 0x1.c3f8f01c3f8f0p-1, 0x1.60348d4756756p-1}, // exponent = 0 + {0x1.760d9c0000000p+1, 0x1.6e00000000000p+0, 0x1.661ec6a5122f9p-1, 0x1.5e68fb4d877a7p-2}, // exponent = 1 + {0x1.75fb34f0902a0p+2, 0x1.cd1a000000000p+0, 0x1.1c4227955e4f1p-1, 0x1.5e7a396f89f71p-3}, // exponent = 2 + + /* mantissa = 0x1.78... */ + {0x1.7be6400000000p+0, 0x1.2400000000000p+0, 0x1.c0e070381c0e0p-1, 0x1.5904842e0271bp-1}, // exponent = 0 + {0x1.79fec8fa79000p+1, 0x1.6f48000000000p+0, 0x1.64def50b37b22p-1, 0x1.5ac1740057116p-2}, // exponent = 1 + {0x1.782bee0000000p+2, 0x1.ce00000000000p+0, 0x1.1bb4a4046ed29p-1, 0x1.5c6fcd2117a65p-3}, // exponent = 2 + + /* mantissa = 0x1.7c... */ + {0x1.7dfa08e162000p+0, 0x1.2488000000000p+0, 0x1.c00fc08dc4fbfp-1, 0x1.57242f8b50298p-1}, // exponent = 0 + {0x1.7c38000000000p+1, 0x1.7000000000000p+0, 0x1.642c8590b2164p-1, 0x1.58ba55b815609p-2}, // exponent = 1 + {0x1.7d14000000000p+2, 0x1.d000000000000p+0, 0x1.1a7b9611a7b96p-1, 0x1.57f351f7aa6eap-3}, // exponent = 2 + + /* mantissa = 0x1.80... */ + {0x1.83c2580000000p+0, 0x1.2600000000000p+0, 0x1.bdd2b899406f7p-1, 0x1.520635a583b96p-1}, // exponent = 0 + {0x1.8273a40000000p+1, 0x1.7200000000000p+0, 0x1.623fa77016240p-1, 0x1.532af851862acp-2}, // exponent = 1 + {0x1.8206f20000000p+2, 0x1.d200000000000p+0, 0x1.19453808ca29cp-1, 0x1.538a788f6fdd6p-3}, // exponent = 2 + + /* mantissa = 0x1.84... */ + {0x1.85fd33ff90000p+0, 0x1.2690000000000p+0, 0x1.bcf8c69606a07p-1, 0x1.50176a58004f0p-1}, // exponent = 0 + {0x1.85fccde240000p+1, 0x1.7320000000000p+0, 0x1.612cc01b977f0p-1, 0x1.5017c2589970ep-2}, // exponent = 1 + {0x1.8704d00000000p+2, 0x1.d400000000000p+0, 0x1.1811811811812p-1, 0x1.4f34d5fa956d6p-3}, // exponent = 2 + + /* mantissa = 0x1.88... */ + {0x1.8bba000000000p+0, 0x1.2800000000000p+0, 0x1.bacf914c1bad0p-1, 0x1.4b37f67f9d05cp-1}, // exponent = 0 + {0x1.88c0a00000000p+1, 0x1.7400000000000p+0, 0x1.6058160581606p-1, 0x1.4dba0cfc11861p-2}, // exponent = 1 + {0x1.89fbb1ca4e0e0p+2, 0x1.d52e000000000p+0, 0x1.175d3b160af03p-1, 0x1.4caf2b205f9ddp-3}, // exponent = 2 + + /* mantissa = 0x1.8c... */ + {0x1.8dfca52590000p+0, 0x1.2890000000000p+0, 0x1.b9f88e001b9f9p-1, 0x1.495664ea7f47dp-1}, // exponent = 0 + {0x1.8f1f0c0000000p+1, 0x1.7600000000000p+0, 0x1.5e75bb8d015e7p-1, 0x1.4866c46f405dbp-2}, // exponent = 1 + {0x1.8c0da60000000p+2, 0x1.d600000000000p+0, 0x1.16e0689427379p-1, 0x1.4af2020336a59p-3}, // exponent = 2 + + /* mantissa = 0x1.90... */ + {0x1.93cd680000000p+0, 0x1.2a00000000000p+0, 0x1.b7d6c3dda338bp-1, 0x1.44982ca42a2ebp-1}, // exponent = 0 + {0x1.91fabaf07d200p+1, 0x1.76e4000000000p+0, 0x1.5da09741396f7p-1, 0x1.461102bc1cb8fp-2}, // exponent = 1 + {0x1.9121800000000p+2, 0x1.d800000000000p+0, 0x1.15b1e5f75270dp-1, 0x1.46c19716cf2c0p-3}, // exponent = 2 + + /* mantissa = 0x1.94... */ + {0x1.95ff68a951e80p+0, 0x1.2a8a000000000p+0, 0x1.b70b72f76e7ddp-1, 0x1.42d6dab45c848p-1}, // exponent = 0 + {0x1.958f000000000p+1, 0x1.7800000000000p+0, 0x1.5c9882b931057p-1, 0x1.433055f7235dbp-2}, // exponent = 1 + {0x1.96406a0000000p+2, 0x1.da00000000000p+0, 0x1.1485f0e0acd3bp-1, 0x1.42a332325db6bp-3}, // exponent = 2 + + /* mantissa = 0x1.98... */ + {0x1.9bfcc00000000p+0, 0x1.2c00000000000p+0, 0x1.b4e81b4e81b4fp-1, 0x1.3e254e465d72cp-1}, // exponent = 0 + {0x1.99ffaac1ec3c0p+1, 0x1.795e000000000p+0, 0x1.5b55320eae3fdp-1, 0x1.3fb056724ebb2p-2}, // exponent = 1 + {0x1.9b6a700000000p+2, 0x1.dc00000000000p+0, 0x1.135c81135c811p-1, 0x1.3e9672cf3131dp-3}, // exponent = 2 + + /* mantissa = 0x1.9c... */ + {0x1.9dfc708557c00p+0, 0x1.2c7c000000000p+0, 0x1.b433cf4756912p-1, 0x1.3c9c1357411b6p-1}, // exponent = 0 + {0x1.9c10940000000p+1, 0x1.7a00000000000p+0, 0x1.5ac056b015ac0p-1, 0x1.3e15ff3643c49p-2}, // exponent = 1 + {0x1.9dfe6c1816fe0p+2, 0x1.dcfe000000000p+0, 0x1.12c9df926137bp-1, 0x1.3c9a8f2a1f8a5p-3}, // exponent = 2 + + /* mantissa = 0x1.a0... */ + {0x1.a1f8756df7480p+0, 0x1.2d72000000000p+0, 0x1.b2cfd6b4a2ec0p-1, 0x1.39976b1b376fbp-1}, // exponent = 0 + {0x1.a2a3e00000000p+1, 0x1.7c00000000000p+0, 0x1.58ed2308158edp-1, 0x1.391703ea2d9b9p-2}, // exponent = 1 + {0x1.a09f9e0000000p+2, 0x1.de00000000000p+0, 0x1.12358e75d3033p-1, 0x1.3a9afad059b87p-3}, // exponent = 2 + + /* mantissa = 0x1.a4... */ + {0x1.a448380000000p+0, 0x1.2e00000000000p+0, 0x1.b2036406c80d9p-1, 0x1.37dde124a87f2p-1}, // exponent = 0 + {0x1.a5fad7a3ee040p+1, 0x1.7d02000000000p+0, 0x1.580391c97b3f3p-1, 0x1.369cab16c4bb8p-2}, // exponent = 1 + {0x1.a5e0000000000p+2, 0x1.e000000000000p+0, 0x1.1111111111111p-1, 0x1.36b06e70b7421p-3}, // exponent = 2 + + /* mantissa = 0x1.a8... */ + {0x1.a9fbaa05b1c00p+0, 0x1.2f5c000000000p+0, 0x1.b01182b5ac1cep-1, 0x1.33b1676d97a5bp-1}, // exponent = 0 + {0x1.a948fc0000000p+1, 0x1.7e00000000000p+0, 0x1.571ed3c506b3ap-1, 0x1.3432adb274266p-2}, // exponent = 1 + {0x1.ab2ba20000000p+2, 0x1.e200000000000p+0, 0x1.0fef010fef011p-1, 0x1.32d67431a0280p-3}, // exponent = 2 + + /* mantissa = 0x1.ac... */ + {0x1.acb0000000000p+0, 0x1.3000000000000p+0, 0x1.af286bca1af28p-1, 0x1.31c079d2b089fp-1}, // exponent = 0 + {0x1.adffcaf535000p+1, 0x1.7f68000000000p+0, 0x1.55dca75792aa1p-1, 0x1.30d1b5accf7d2p-2}, // exponent = 1 + {0x1.adfb1053dbae0p+2, 0x1.e30e000000000p+0, 0x1.0f57023f898dcp-1, 0x1.30d50fe844fd2p-3}, // exponent = 2 + + /* mantissa = 0x1.b0... */ + {0x1.b1ff52f400000p+0, 0x1.3140000000000p+0, 0x1.ad646ddd321c2p-1, 0x1.2e02d4701d501p-1}, // exponent = 0 + {0x1.b000000000000p+1, 0x1.8000000000000p+0, 0x1.5555555555555p-1, 0x1.2f684bda12f68p-2}, // exponent = 1 + {0x1.b082900000000p+2, 0x1.e400000000000p+0, 0x1.0ecf56be69c90p-1, 0x1.2f0cb4ca19e1ep-3}, // exponent = 2 + + /* mantissa = 0x1.b4... */ + {0x1.b534480000000p+0, 0x1.3200000000000p+0, 0x1.ac5701ac5701bp-1, 0x1.2bcbbb0cb73f6p-1}, // exponent = 0 + {0x1.b6c9040000000p+1, 0x1.8200000000000p+0, 0x1.5390948f40febp-1, 0x1.2ab733230f96fp-2}, // exponent = 1 + {0x1.b5e4d60000000p+2, 0x1.e600000000000p+0, 0x1.0db20a88f4696p-1, 0x1.2b52db169e95ep-3}, // exponent = 2 + + /* mantissa = 0x1.b8... */ + {0x1.b9fa0378e5c00p+0, 0x1.331c000000000p+0, 0x1.aacae5fd5e77dp-1, 0x1.288f0567537ffp-1}, // exponent = 0 + {0x1.b9fd76ec78000p+1, 0x1.82f0000000000p+0, 0x1.52bdf6a7a2620p-1, 0x1.288cb4a41a9b5p-2}, // exponent = 1 + {0x1.bb52800000000p+2, 0x1.e800000000000p+0, 0x1.0c9714fbcda3bp-1, 0x1.27a894096a4f5p-3}, // exponent = 2 + + /* mantissa = 0x1.bc... */ + {0x1.bdd5400000000p+0, 0x1.3400000000000p+0, 0x1.a98ef606a63bep-1, 0x1.25fe5513ebf45p-1}, // exponent = 0 + {0x1.bda4200000000p+1, 0x1.8400000000000p+0, 0x1.51d07eae2f815p-1, 0x1.261ebd944131ep-2}, // exponent = 1 + {0x1.bdfd332712ca0p+2, 0x1.e8fa000000000p+0, 0x1.0c0dc264ce74bp-1, 0x1.25e3ff656ec87p-3}, // exponent = 2 + + /* mantissa = 0x1.c0... */ + {0x1.c1fc1c0569400p+0, 0x1.34f4000000000p+0, 0x1.a83eded1251e7p-1, 0x1.2347ec39d66b0p-1}, // exponent = 0 + {0x1.c1fd3bf5cf840p+1, 0x1.8542000000000p+0, 0x1.50b90cb22a299p-1, 0x1.234731d751cccp-2}, // exponent = 1 + {0x1.c0cb9a0000000p+2, 0x1.ea00000000000p+0, 0x1.0b7e6ec259dc8p-1, 0x1.240d8e9b4ae5dp-3}, // exponent = 2 + + /* mantissa = 0x1.c4... */ + {0x1.c693180000000p+0, 0x1.3600000000000p+0, 0x1.a6d01a6d01a6dp-1, 0x1.2057051321929p-1}, // exponent = 0 + {0x1.c4916c0000000p+1, 0x1.8600000000000p+0, 0x1.5015015015015p-1, 0x1.219e4a4924f1fp-2}, // exponent = 1 + {0x1.c650300000000p+2, 0x1.ec00000000000p+0, 0x1.0a6810a6810a7p-1, 0x1.20817bbcedd1fp-3}, // exponent = 2 + + /* mantissa = 0x1.c8... */ + {0x1.c9fc4ad339d80p+0, 0x1.36c6000000000p+0, 0x1.a5c2b87b4e25ap-1, 0x1.1e3144d16fd97p-1}, // exponent = 0 + {0x1.cb91000000000p+1, 0x1.8800000000000p+0, 0x1.4e5e0a72f0539p-1, 0x1.1d353d43a7247p-2}, // exponent = 1 + {0x1.cbe04e0000000p+2, 0x1.ee00000000000p+0, 0x1.0953f39010954p-1, 0x1.1d040e48a75cdp-3}, // exponent = 2 + + /* mantissa = 0x1.cc... */ + {0x1.cf6e000000000p+0, 0x1.3800000000000p+0, 0x1.a41a41a41a41ap-1, 0x1.1ad4948b6e145p-1}, // exponent = 0 + {0x1.cdfd181598000p+1, 0x1.88b0000000000p+0, 0x1.4dc82df5d0542p-1, 0x1.1bb66cda74540p-2}, // exponent = 1 + {0x1.cdfeef0724420p+2, 0x1.eec2000000000p+0, 0x1.08ebe9d4e24aep-1, 0x1.1bb54ba55bb8ep-3}, // exponent = 2 + + /* mantissa = 0x1.d0... */ + {0x1.d1f9c6201cc80p+0, 0x1.3892000000000p+0, 0x1.a35607552f1cdp-1, 0x1.1948fa1f5ff30p-1}, // exponent = 0 + {0x1.d2a2f40000000p+1, 0x1.8a00000000000p+0, 0x1.4cab88725af6ep-1, 0x1.18e2ff3fca5acp-2}, // exponent = 1 + {0x1.d17c000000000p+2, 0x1.f000000000000p+0, 0x1.0842108421084p-1, 0x1.1994faf4aec92p-3}, // exponent = 2 + + /* mantissa = 0x1.d4... */ + {0x1.d5f8615bde180p+0, 0x1.3976000000000p+0, 0x1.a22504db000b7p-1, 0x1.16e4ee12da718p-1}, // exponent = 0 + {0x1.d5f9b87878000p+1, 0x1.8af0000000000p+0, 0x1.4be15f5393e98p-1, 0x1.16e4227697dbfp-2}, // exponent = 1 + {0x1.d723520000000p+2, 0x1.f200000000000p+0, 0x1.073260a47f7c6p-1, 0x1.1633f845cb3dep-3}, // exponent = 2 + + /* mantissa = 0x1.d8... */ + {0x1.d866280000000p+0, 0x1.3a00000000000p+0, 0x1.a16d3f97a4b02p-1, 0x1.1575d8c8402f4p-1}, // exponent = 0 + {0x1.d9c7600000000p+1, 0x1.8c00000000000p+0, 0x1.4afd6a052bf5bp-1, 0x1.14a6fd8916ecfp-2}, // exponent = 1 + {0x1.d9fb5ac000000p+2, 0x1.f300000000000p+0, 0x1.06ab59c7912fbp-1, 0x1.1488a6b10c148p-3}, // exponent = 2 + + /* mantissa = 0x1.dc... */ + {0x1.ddfdfe805bc00p+0, 0x1.3b3c000000000p+0, 0x1.9fcacece0b241p-1, 0x1.1236b509d4023p-1}, // exponent = 0 + {0x1.ddff55aa1e600p+1, 0x1.8d2c000000000p+0, 0x1.4a036770fd266p-1, 0x1.1235f02ce295ap-2}, // exponent = 1 + {0x1.dcd6500000000p+2, 0x1.f400000000000p+0, 0x1.0624dd2f1a9fcp-1, 0x1.12e0be826d695p-3}, // exponent = 2 + + /* mantissa = 0x1.e0... */ + {0x1.e17bc00000000p+0, 0x1.3c00000000000p+0, 0x1.9ec8e951033d9p-1, 0x1.1039b25a7f122p-1}, // exponent = 0 + {0x1.e0fe5c0000000p+1, 0x1.8e00000000000p+0, 0x1.49539e3b2d067p-1, 0x1.1080a9d1be542p-2}, // exponent = 1 + {0x1.e295060000000p+2, 0x1.f600000000000p+0, 0x1.05197f7d73404p-1, 0x1.0f9b07a631f92p-3}, // exponent = 2 + + /* mantissa = 0x1.e4... */ + {0x1.e5ff3ecf6fc00p+0, 0x1.3cfc000000000p+0, 0x1.9d7f292cef9bap-1, 0x1.0db275be001a6p-1}, // exponent = 0 + {0x1.e5fefa40c0000p+1, 0x1.8f60000000000p+0, 0x1.48315b6c3fc79p-1, 0x1.0db29bc986108p-2}, // exponent = 1 + {0x1.e5fe06d9140e0p+2, 0x1.f72e000000000p+0, 0x1.047cca585fbe4p-1, 0x1.0db322dce8431p-3}, // exponent = 2 + + /* mantissa = 0x1.e8... */ + {0x1.eaaef80000000p+0, 0x1.3e00000000000p+0, 0x1.9c2d14ee4a102p-1, 0x1.0b1f0c9a4ed7cp-1}, // exponent = 0 + {0x1.e848000000000p+1, 0x1.9000000000000p+0, 0x1.47ae147ae147bp-1, 0x1.0c6f7a0b5ed8dp-2}, // exponent = 1 + {0x1.e85f800000000p+2, 0x1.f800000000000p+0, 0x1.0410410410410p-1, 0x1.0c628f55c92dep-3}, // exponent = 2 + + /* mantissa = 0x1.ec... */ + {0x1.edfb5912a5180p+0, 0x1.3eb6000000000p+0, 0x1.9b41b55ca11fcp-1, 0x1.0956733c0be03p-1}, // exponent = 0 + {0x1.efa4640000000p+1, 0x1.9200000000000p+0, 0x1.460cbc7f5cf9ap-1, 0x1.0872e8415508dp-2}, // exponent = 1 + {0x1.ee35ca0000000p+2, 0x1.fa00000000000p+0, 0x1.03091b51f5e1ap-1, 0x1.093712d33ff42p-3}, // exponent = 2 + + /* mantissa = 0x1.f0... */ + {0x1.f1fd112ab0c80p+0, 0x1.3f92000000000p+0, 0x1.9a2696dd75ba1p-1, 0x1.0733ed7907e73p-1}, // exponent = 0 + {0x1.f1fc8b255bc40p+1, 0x1.92a2000000000p+0, 0x1.45898cb57730cp-1, 0x1.0734344eaebefp-2}, // exponent = 1 + {0x1.f1ff2ff2d4ba0p+2, 0x1.fb4a000000000p+0, 0x1.02609989a73cfp-1, 0x1.0732ce999c3d1p-3}, // exponent = 2 + + /* mantissa = 0x1.f4... */ + {0x1.f400000000000p+0, 0x1.4000000000000p+0, 0x1.999999999999ap-1, 0x1.0624dd2f1a9fcp-1}, // exponent = 0 + {0x1.f713a00000000p+1, 0x1.9400000000000p+0, 0x1.446f86562d9fbp-1, 0x1.048a727489527p-2}, // exponent = 1 + {0x1.f417f00000000p+2, 0x1.fc00000000000p+0, 0x1.0204081020408p-1, 0x1.061850f2a7123p-3}, // exponent = 2 + + /* mantissa = 0x1.f8... */ + {0x1.f9fe36d7a7d80p+0, 0x1.4146000000000p+0, 0x1.97f9f956c92fdp-1, 0x1.030a055aebeddp-1}, // exponent = 0 + {0x1.f9f8b6ce70ec0p+1, 0x1.94c6000000000p+0, 0x1.43d0d2af8e146p-1, 0x1.030cd637fd65ep-2}, // exponent = 1 + {0x1.fa05fe0000000p+2, 0x1.fe00000000000p+0, 0x1.0101010101010p-1, 0x1.03060a0f151c2p-3}, // exponent = 2 + + /* mantissa = 0x1.fc... */ + {0x1.fd6f080000000p+0, 0x1.4200000000000p+0, 0x1.970e4f80cb872p-1, 0x1.014a239d8b1a9p-1}, // exponent = 0 + {0x1.fe95cc0000000p+1, 0x1.9600000000000p+0, 0x1.42d6625d51f87p-1, 0x1.00b59a78a8ffcp-2}, // exponent = 1 + {0x1.0000000000000p+3, 0x1.0000000000000p+1, 0x1.0000000000000p-1, 0x1.0000000000000p-3}, // exponent = 2 +}; + +union floatdata { float f; int32_t x; }; + +float cbrtf(float x) { + union floatdata xabs, result; + int32_t mantissa_key; + double r; + const struct cbrt_table_entry *table; + + if (x != x) return x + x; + + /* Reset the sign bit to get the absolute value */ + xabs.f = (float)((int32_t)x & 0x7fffffff); + if (xabs.f == __builtin_inff()) return (x); + + if (xabs.f < 0x1.0p-126f) { // denormal path + if (xabs.f == 0.0f) return x; + xabs.f *= 0x1.0p45f; + + result.x = ((xabs.x & 0x7f800000U) >> 23) - 1; + mantissa_key = ((xabs.x & 0x007e0000U) >> 17) * 3; + + table = cbrt_table + mantissa_key + result.x%3; + + xabs.x = (xabs.x & 0x007fffffU) | ((result.x%3 + 127) << 23); + r = ((double)xabs.f - table->x)*(table->recip_x); + result.x = (result.x / 3 + 70) << 23; + result.x = (result.x & 0x7fffffff) + | (*(int32_t *) &x & 0x80000000); + } else { + result.x = ((xabs.x & 0x7f800000U) >> 23) - 1; + mantissa_key = ((xabs.x & 0x007e0000U) >> 17) * 3; + + table = cbrt_table + mantissa_key + result.x%3; + + xabs.x = (xabs.x & 0x007fffffU) | ((result.x%3 + 127) << 23); + r = ((double)xabs.f - table->x)*(table->recip_x); + result.x = (result.x / 3 + 85) << 23; + result.x = (result.x & 0x7fffffff) + | (*(int32_t *) &x & 0x80000000); + } + + /* Bigger polynomial for correctly rounded cbrt. */ + double poly = 1.0 + (.333333333333341976693463092094589 + (-.111111111111154331658603135046499 + (0.617283944244925372967204212785709e-1 + (-0.411522622533364699898800342654033e-1 + (0.301852863186459692668300411679515e-1 - 0.234797653033909108182788624401527e-1*r)*r)*r)*r)*r)*r; + + poly *= table->cbrt_x; + result.f *= (float)poly; + return(result.f); +} diff --git a/bsd/netinet/flow_divert.c b/bsd/netinet/flow_divert.c index 8fbe88a80..697016c49 100644 --- a/bsd/netinet/flow_divert.c +++ b/bsd/netinet/flow_divert.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -560,6 +560,7 @@ flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, int data_len, Boo struct ifnet *ifp = NULL; Boolean cell = FALSE; Boolean wifi = FALSE; + Boolean wired = FALSE; inp = sotoinpcb(fd_cb->so); if (inp == NULL) { @@ -570,14 +571,15 @@ flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, int data_len, Boo if (ifp != NULL) { cell = IFNET_IS_CELLULAR(ifp); wifi = (!cell && IFNET_IS_WIFI(ifp)); + wired = (!wifi && IFNET_IS_WIRED(ifp)); } if (send) { - INP_ADD_STAT(inp, cell, wifi, txpackets, 1); - INP_ADD_STAT(inp, cell, wifi, txbytes, data_len); + INP_ADD_STAT(inp, cell, wifi, wired, txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, txbytes, data_len); } else { - INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); - INP_ADD_STAT(inp, cell, wifi, rxbytes, data_len); + INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, data_len); } } @@ -585,17 +587,24 @@ static errno_t flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb) { struct inpcb *inp = NULL; - struct ifnet *ifp = NULL; inp = sotoinpcb(fd_cb->so); - if ((inp != NULL) && (inp->inp_flags & INP_NO_IFT_CELLULAR)) { - ifp = inp->inp_last_outifp; - if (ifp != NULL) { - if (IFNET_IS_CELLULAR(ifp)) { - return EHOSTUNREACH; - } - } - } + if (inp && INP_NO_CELLULAR(inp) && inp->inp_last_outifp && + IFNET_IS_CELLULAR(inp->inp_last_outifp)) + return EHOSTUNREACH; + + return 0; +} + +static errno_t +flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb) +{ + struct inpcb *inp = NULL; + + inp = sotoinpcb(fd_cb->so); + if (inp && INP_NO_EXPENSIVE(inp) && inp->inp_last_outifp && + IFNET_IS_EXPENSIVE(inp->inp_last_outifp)) + return EHOSTUNREACH; return 0; } @@ -861,16 +870,10 @@ flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet, Boolean en } static int -flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, proc_t p) +flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, mbuf_t connect_packet) { - mbuf_t connect_packet = NULL; int error = 0; - error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet); - if (error) { - goto done; - } - error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(fd_cb->so->so_traffic_class), @@ -921,7 +924,6 @@ flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, pro } else { uint32_t ctl_unit = htonl(fd_cb->control_group_unit); int port; - int release_proc; error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit); if (error) { @@ -946,30 +948,6 @@ flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, pro if (error) { goto done; } - - release_proc = flow_divert_get_src_proc(fd_cb->so, &p, FALSE); - if (p != PROC_NULL) { - proc_lock(p); - if (p->p_csflags & CS_VALID) { - const char *signing_id = cs_identity_get(p); - if (signing_id != NULL) { - error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_SIGNING_ID, strlen(signing_id), signing_id); - } - - if (error == 0) { - unsigned char cdhash[SHA1_RESULTLEN]; - error = proc_getcdhash(p, cdhash); - if (error == 0) { - error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CDHASH, sizeof(cdhash), cdhash); - } - } - } - proc_unlock(p); - - if (release_proc) { - proc_rele(p); - } - } } error = flow_divert_send_packet(fd_cb, connect_packet, TRUE); @@ -978,10 +956,6 @@ flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, pro } done: - if (error && connect_packet != NULL) { - mbuf_free(connect_packet); - } - return error; } @@ -1586,7 +1560,8 @@ flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t off FDLOCK(fd_cb); if (fd_cb->so != NULL) { socket_lock(fd_cb->so, 0); - if (flow_divert_check_no_cellular(fd_cb)) { + if (flow_divert_check_no_cellular(fd_cb) || + flow_divert_check_no_expensive(fd_cb)) { flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE); flow_divert_send_close(fd_cb, SHUT_RDWR); soisdisconnected(fd_cb->so); @@ -1833,7 +1808,7 @@ flow_divert_handle_app_map_create(mbuf_t packet, int offset) new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size); if (new_node_idx != NULL_TRIE_IDX) { if (is_dns) { - FDLOG(LOG_NOTICE, &nil_pcb, "Setting group unit for %s to %d", FLOW_DIVERT_DNS_SERVICE_SIGNING_ID, DNS_SERVICE_GROUP_UNIT); + FDLOG(LOG_INFO, &nil_pcb, "Setting group unit for %s to %d", FLOW_DIVERT_DNS_SERVICE_SIGNING_ID, DNS_SERVICE_GROUP_UNIT); TRIE_NODE(&new_trie, new_node_idx).group_unit = DNS_SERVICE_GROUP_UNIT; } } else { @@ -2225,6 +2200,9 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) int error = 0; struct inpcb *inp = sotoinpcb(so); struct sockaddr_in *sinp; + mbuf_t connect_packet = NULL; + char *signing_id = NULL; + int free_signing_id = 0; VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); @@ -2266,9 +2244,102 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) } } + error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet); + if (error) { + goto done; + } + + error = EPERM; + + if (fd_cb->connect_token != NULL) { + size_t sid_size = 0; + int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size); + if (find_error == 0 && sid_size > 0) { + MALLOC(signing_id, char *, sid_size + 1, M_TEMP, M_WAITOK | M_ZERO); + if (signing_id != NULL) { + flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL); + FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id); + free_signing_id = 1; + } + } + } + + socket_unlock(so, 0); + if (g_signing_id_trie.root != NULL_TRIE_IDX) { + proc_t src_proc = p; + int release_proc = 0; + + if (signing_id == NULL) { + release_proc = flow_divert_get_src_proc(so, &src_proc, FALSE); + if (src_proc != PROC_NULL) { + proc_lock(src_proc); + if (src_proc->p_csflags & CS_VALID) { + signing_id = (char *)cs_identity_get(src_proc); + } else { + FDLOG0(LOG_WARNING, fd_cb, "Signature is invalid"); + } + } else { + FDLOG0(LOG_WARNING, fd_cb, "Failed to determine the current proc"); + } + } else { + src_proc = PROC_NULL; + } + + if (signing_id != NULL) { + uint16_t result = NULL_TRIE_IDX; + lck_rw_lock_shared(&g_flow_divert_group_lck); + result = flow_divert_trie_search(&g_signing_id_trie, (const uint8_t *)signing_id); + lck_rw_done(&g_flow_divert_group_lck); + if (result != NULL_TRIE_IDX) { + error = 0; + FDLOG(LOG_INFO, fd_cb, "%s matched", signing_id); + + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_SIGNING_ID, strlen(signing_id), signing_id); + if (error == 0) { + if (src_proc != PROC_NULL) { + unsigned char cdhash[SHA1_RESULTLEN]; + error = proc_getcdhash(src_proc, cdhash); + if (error == 0) { + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CDHASH, sizeof(cdhash), cdhash); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error); + } + } else { + FDLOG(LOG_ERR, fd_cb, "failed to get the cdhash: %d", error); + } + } + } else { + FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error); + } + } else { + FDLOG(LOG_WARNING, fd_cb, "%s did not match", signing_id); + } + } else { + FDLOG0(LOG_WARNING, fd_cb, "Failed to get the code signing identity"); + } + + if (src_proc != PROC_NULL) { + proc_unlock(src_proc); + if (release_proc) { + proc_rele(src_proc); + } + } + } else { + FDLOG0(LOG_WARNING, fd_cb, "The signing ID trie is empty"); + } + socket_lock(so, 0); + + if (free_signing_id) { + FREE(signing_id, M_TEMP); + } + + if (error) { + goto done; + } + FDLOG0(LOG_INFO, fd_cb, "Connecting"); - error = flow_divert_send_connect(fd_cb, to, p); + error = flow_divert_send_connect(fd_cb, to, connect_packet); if (error) { goto done; } @@ -2278,6 +2349,9 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) soisconnecting(so); done: + if (error && connect_packet != NULL) { + mbuf_free(connect_packet); + } return error; } @@ -2513,7 +2587,8 @@ flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr goto done; /* We don't support OOB data */ } - error = flow_divert_check_no_cellular(fd_cb); + error = flow_divert_check_no_cellular(fd_cb) || + flow_divert_check_no_expensive(fd_cb); if (error) { goto done; } @@ -2551,68 +2626,6 @@ done: return error; } -boolean_t -flow_divert_is_dns_service(struct socket *so) -{ - uint32_t ctl_unit = 0; - flow_divert_check_policy(so, NULL, TRUE, &ctl_unit); - FDLOG(LOG_INFO, &nil_pcb, "Check for DNS resulted in %u", ctl_unit); - return (ctl_unit == DNS_SERVICE_GROUP_UNIT); -} - -errno_t -flow_divert_check_policy(struct socket *so, proc_t p, boolean_t match_delegate, uint32_t *ctl_unit) -{ - int error = EPROTOTYPE; - - if (ctl_unit != NULL) { - *ctl_unit = 0; - } - - if (SOCK_DOM(so) != PF_INET -#if INET6 - && SOCK_DOM(so) != PF_INET6 -#endif - ) - { - return error; - } - - if (g_signing_id_trie.root != NULL_TRIE_IDX) { - int release_proc = flow_divert_get_src_proc(so, &p, match_delegate); - if (p != PROC_NULL) { - proc_lock(p); - if (p->p_csflags & CS_VALID) { - const char *signing_id = cs_identity_get(p); - if (signing_id != NULL) { - uint16_t result = NULL_TRIE_IDX; - lck_rw_lock_shared(&g_flow_divert_group_lck); - result = flow_divert_trie_search(&g_signing_id_trie, (const uint8_t *)signing_id); - if (result != NULL_TRIE_IDX) { - uint32_t unit = TRIE_NODE(&g_signing_id_trie, result).group_unit; - - error = 0; - - FDLOG(LOG_INFO, &nil_pcb, "%s matched, ctl_unit = %u", signing_id, unit); - - if (ctl_unit != NULL) { - *ctl_unit = unit; - } - } - lck_rw_done(&g_flow_divert_group_lck); - } - } - proc_unlock(p); - - if (release_proc) { - proc_rele(p); - } - } - } - - return error; -} - static void flow_divert_set_protosw(struct socket *so) { @@ -3122,7 +3135,7 @@ flow_divert_kctl_init(void) memset(&ctl_reg, 0, sizeof(ctl_reg)); - strncpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name)); + strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name)); ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name)-1] = '\0'; ctl_reg.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED; ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE; @@ -3147,7 +3160,7 @@ void flow_divert_init(void) { memset(&nil_pcb, 0, sizeof(nil_pcb)); - nil_pcb.log_level = LOG_INFO; + nil_pcb.log_level = LOG_NOTICE; g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM); diff --git a/bsd/netinet/flow_divert.h b/bsd/netinet/flow_divert.h index 522deb241..0d1f6255b 100644 --- a/bsd/netinet/flow_divert.h +++ b/bsd/netinet/flow_divert.h @@ -72,9 +72,6 @@ void flow_divert_detach(struct socket *so); errno_t flow_divert_token_set(struct socket *so, struct sockopt *sopt); errno_t flow_divert_token_get(struct socket *so, struct sockopt *sopt); errno_t flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit); -errno_t flow_divert_check_policy(struct socket *so, proc_t p, boolean_t match_delegate, uint32_t *ctl_unit); errno_t flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p); -void flow_divert_so_init(struct socket *so, proc_t p); -boolean_t flow_divert_is_dns_service(struct socket *so); #endif /* __FLOW_DIVERT_H__ */ diff --git a/bsd/netinet/flow_divert_proto.h b/bsd/netinet/flow_divert_proto.h index 06c79d3e7..d3bf02c4f 100644 --- a/bsd/netinet/flow_divert_proto.h +++ b/bsd/netinet/flow_divert_proto.h @@ -65,6 +65,7 @@ #define FLOW_DIVERT_TLV_PID 26 #define FLOW_DIVERT_TLV_UUID 27 #define FLOW_DIVERT_TLV_PREFIX_COUNT 28 +#define FLOW_DIVERT_TLV_FLAGS 29 #define FLOW_DIVERT_CHUNK_SIZE 4096 @@ -72,6 +73,8 @@ #define FLOW_DIVERT_DNS_SERVICE_SIGNING_ID "com.apple.mDNSResponder" +#define FLOW_DIVERT_TOKEN_FLAG_VALIDATED 0x0000001 + struct flow_divert_packet_header { uint8_t packet_type; uint32_t conn_id; diff --git a/bsd/netinet/igmp.c b/bsd/netinet/igmp.c index ceacb9611..572a083ec 100644 --- a/bsd/netinet/igmp.c +++ b/bsd/netinet/igmp.c @@ -1887,6 +1887,10 @@ igmp_timeout(void *arg) interface_timers_running = 0; LIST_FOREACH(igi, &igi_head, igi_link) { IGI_LOCK(igi); + if (igi->igi_version != IGMP_VERSION_3) { + IGI_UNLOCK(igi); + continue; + } if (igi->igi_v3_timer == 0) { /* Do nothing. */ } else if (--igi->igi_v3_timer == 0) { @@ -3812,11 +3816,7 @@ igmp_sendpkt(struct mbuf *m) imo->imo_multicast_ttl = 1; imo->imo_multicast_vif = -1; -#if MROUTING - imo->imo_multicast_loop = (ip_mrouter != NULL); -#else imo->imo_multicast_loop = 0; -#endif /* * If the user requested that IGMP traffic be explicitly diff --git a/bsd/netinet/in.c b/bsd/netinet/in.c index 6229060c5..83d2a024a 100644 --- a/bsd/netinet/in.c +++ b/bsd/netinet/in.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -104,7 +104,6 @@ static int inctl_arpipll(struct ifnet *, struct ifreq *); static int inctl_setrouter(struct ifnet *, struct ifreq *); static int inctl_ifaddr(struct ifnet *, struct in_ifaddr *, u_long, struct ifreq *); -static int inctl_lifaddr(struct ifnet *, u_long, struct if_laddrreq *); static int inctl_ifdstaddr(struct ifnet *, struct in_ifaddr *, u_long, struct ifreq *); static int inctl_ifbrdaddr(struct ifnet *, struct in_ifaddr *, u_long, @@ -112,8 +111,6 @@ static int inctl_ifbrdaddr(struct ifnet *, struct in_ifaddr *, u_long, static int inctl_ifnetmask(struct ifnet *, struct in_ifaddr *, u_long, struct ifreq *); -static int in_mask2len(struct in_addr *); -static void in_len2mask(struct in_addr *, int); static void in_socktrim(struct sockaddr_in *); static int in_ifinit(struct ifnet *, struct in_ifaddr *, struct sockaddr_in *, int); @@ -310,41 +307,6 @@ in_socktrim(struct sockaddr_in *ap) } } -static int -in_mask2len(struct in_addr *mask) -{ - size_t x, y; - u_char *p; - - p = (u_char *)mask; - for (x = 0; x < sizeof (*mask); x++) { - if (p[x] != 0xff) - break; - } - y = 0; - if (x < sizeof (*mask)) { - for (y = 0; y < 8; y++) { - if ((p[x] & (0x80 >> y)) == 0) - break; - } - } - return (x * 8 + y); -} - -static void -in_len2mask(struct in_addr *mask, int len) -{ - int i; - u_char *p; - - p = (u_char *)mask; - bzero(mask, sizeof(*mask)); - for (i = 0; i < len / 8; i++) - p[i] = 0xff; - if (len % 8) - p[i] = (0xff00 >> (len % 8)) & 0xff; -} - static int in_interfaces; /* number of external internet interfaces */ static int @@ -713,7 +675,7 @@ inctl_ifaddr(struct ifnet *ifp, struct in_ifaddr *ia, u_long cmd, in_event_data.ia_subnetmask = ia->ia_subnetmask; in_event_data.ia_netbroadcast = ia->ia_netbroadcast; IFA_UNLOCK(&ia->ia_ifa); - (void) strncpy(&in_event_data.link_data.if_name[0], + (void) strlcpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; in_event_data.link_data.if_unit = ifp->if_unit; @@ -758,7 +720,7 @@ inctl_ifaddr(struct ifnet *ifp, struct in_ifaddr *ia, u_long cmd, in_event_data.ia_subnetmask = ia->ia_subnetmask; in_event_data.ia_netbroadcast = ia->ia_netbroadcast; IFA_UNLOCK(&ia->ia_ifa); - (void) strncpy(&in_event_data.link_data.if_name[0], + (void) strlcpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; in_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; @@ -934,7 +896,7 @@ inctl_ifdstaddr(struct ifnet *ifp, struct in_ifaddr *ia, u_long cmd, in_event_data.ia_subnetmask = ia->ia_subnetmask; in_event_data.ia_netbroadcast = ia->ia_netbroadcast; IFA_UNLOCK(&ia->ia_ifa); - (void) strncpy(&in_event_data.link_data.if_name[0], + (void) strlcpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; in_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; @@ -1029,7 +991,7 @@ inctl_ifbrdaddr(struct ifnet *ifp, struct in_ifaddr *ia, u_long cmd, in_event_data.ia_subnetmask = ia->ia_subnetmask; in_event_data.ia_netbroadcast = ia->ia_netbroadcast; IFA_UNLOCK(&ia->ia_ifa); - (void) strncpy(&in_event_data.link_data.if_name[0], + (void) strlcpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; in_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; @@ -1107,7 +1069,7 @@ inctl_ifnetmask(struct ifnet *ifp, struct in_ifaddr *ia, u_long cmd, in_event_data.ia_subnetmask = ia->ia_subnetmask; in_event_data.ia_netbroadcast = ia->ia_netbroadcast; IFA_UNLOCK(&ia->ia_ifa); - (void) strncpy(&in_event_data.link_data.if_name[0], + (void) strlcpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; in_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; @@ -1231,21 +1193,6 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, ifnet_lock_done(ifp); return ((ifa == NULL) ? proto_unplumb(PF_INET, ifp) : EBUSY); /* NOTREACHED */ - - case SIOCALIFADDR: /* struct if_laddrreq */ - case SIOCDLIFADDR: /* struct if_laddrreq */ - if (!privileged) - return (EPERM); - /* FALLTHRU */ - case SIOCGLIFADDR: { /* struct if_laddrreq */ - struct if_laddrreq iflr; - - bcopy(data, &iflr, sizeof (iflr)); - error = inctl_lifaddr(ifp, cmd, &iflr); - bcopy(&iflr, data, sizeof (iflr)); - return (error); - /* NOTREACHED */ - } } /* @@ -1488,180 +1435,6 @@ done: return (error); } -/* - * SIOC[GAD]LIFADDR. - * SIOCGLIFADDR: get first address. (?!?) - * SIOCGLIFADDR with IFLR_PREFIX: - * get first address that matches the specified prefix. - * SIOCALIFADDR: add the specified address. - * SIOCALIFADDR with IFLR_PREFIX: - * EINVAL since we can't deduce hostid part of the address. - * SIOCDLIFADDR: delete the specified address. - * SIOCDLIFADDR with IFLR_PREFIX: - * delete the first address that matches the specified prefix. - * return values: - * EINVAL on invalid parameters - * EADDRNOTAVAIL on prefix match failed/specified address not found - * other values may be returned from in_ioctl() - */ -static __attribute__((noinline)) int -inctl_lifaddr(struct ifnet *ifp, u_long cmd, struct if_laddrreq *iflr) -{ - struct ifaddr *ifa; - - VERIFY(ifp != NULL); - - switch (cmd) { - case SIOCGLIFADDR: - /* address must be specified on GET with IFLR_PREFIX */ - if (!(iflr->flags & IFLR_PREFIX)) - break; - /* FALLTHROUGH */ - case SIOCALIFADDR: - case SIOCDLIFADDR: - /* address must be specified on ADD and DELETE */ - if (iflr->addr.ss_family != AF_INET) - return (EINVAL); - if (iflr->addr.ss_len != sizeof (struct sockaddr_in)) - return (EINVAL); - /* XXX need improvement */ - if (iflr->dstaddr.ss_family && - iflr->dstaddr.ss_family != AF_INET) - return (EINVAL); - if (iflr->dstaddr.ss_family && - iflr->dstaddr.ss_len != sizeof (struct sockaddr_in)) - return (EINVAL); - break; - default: - /* shouldn't happen */ - VERIFY(0); - /* NOTREACHED */ - } - if (sizeof (struct in_addr) * 8 < iflr->prefixlen) - return (EINVAL); - - switch (cmd) { - case SIOCALIFADDR: { - struct in_aliasreq ifra; - - if (iflr->flags & IFLR_PREFIX) - return (EINVAL); - - /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */ - bzero(&ifra, sizeof (ifra)); - bcopy(iflr->iflr_name, ifra.ifra_name, sizeof (ifra.ifra_name)); - bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len); - if (iflr->dstaddr.ss_family) { /* XXX */ - bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, - sizeof (struct sockaddr_in)); - } - ifra.ifra_mask.sin_family = AF_INET; - ifra.ifra_mask.sin_len = sizeof (struct sockaddr_in); - in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen); - - return (in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, - ifp, kernproc)); - } - - case SIOCGLIFADDR: - case SIOCDLIFADDR: { - struct in_ifaddr *ia; - struct in_addr mask, candidate; - struct in_addr match = { 0 }; - struct sockaddr_in *sin; - int cmp; - - bzero(&mask, sizeof(mask)); - if (iflr->flags & IFLR_PREFIX) { - /* lookup a prefix rather than address. */ - in_len2mask(&mask, iflr->prefixlen); - - sin = (struct sockaddr_in *)&iflr->addr; - match.s_addr = sin->sin_addr.s_addr; - match.s_addr &= mask.s_addr; - - /* if you set extra bits, that's wrong */ - if (match.s_addr != sin->sin_addr.s_addr) - return (EINVAL); - - cmp = 1; - } else { - if (cmd == SIOCGLIFADDR) { - /* on getting an address, take the 1st match */ - cmp = 0; /* XXX */ - } else { - /* on deleting an address, do exact match */ - in_len2mask(&mask, 32); - sin = (struct sockaddr_in *)&iflr->addr; - match.s_addr = sin->sin_addr.s_addr; - - cmp = 1; - } - } - - ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - IFA_LOCK(ifa); - if (ifa->ifa_addr->sa_family != AF_INET6) { - IFA_UNLOCK(ifa); - continue; - } - if (!cmp) { - IFA_UNLOCK(ifa); - break; - } - candidate.s_addr = SIN(&ifa->ifa_addr)->sin_addr.s_addr; - candidate.s_addr &= mask.s_addr; - IFA_UNLOCK(ifa); - if (candidate.s_addr == match.s_addr) - break; - } - if (ifa != NULL) - IFA_ADDREF(ifa); - ifnet_lock_done(ifp); - if (!ifa) - return (EADDRNOTAVAIL); - ia = (struct in_ifaddr *)ifa; - - if (cmd == SIOCGLIFADDR) { - IFA_LOCK(ifa); - /* fill in the if_laddrreq structure */ - bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len); - - if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { - bcopy(&ia->ia_dstaddr, &iflr->dstaddr, - ia->ia_dstaddr.sin_len); - } else { - bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); - } - iflr->prefixlen = - in_mask2len(&ia->ia_sockmask.sin_addr); - iflr->flags = 0; /* XXX */ - - IFA_UNLOCK(ifa); - IFA_REMREF(ifa); - return (0); - } else { - struct ifreq ifr; - - /* fill ifreq and do ioctl(SIOCDIFADDR) */ - bzero(&ifr, sizeof (ifr)); - bcopy(iflr->iflr_name, ifr.ifr_name, - sizeof (ifr.ifr_name)); - IFA_LOCK(ifa); - bcopy(&ia->ia_addr, &ifr.ifr_addr, - sizeof (struct sockaddr_in)); - IFA_UNLOCK(ifa); - IFA_REMREF(ifa); - return (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, - ifp, kernproc)); - } - } - } - - return (EOPNOTSUPP); /* just for safety */ -} - /* * Delete any existing route for an interface. */ diff --git a/bsd/netinet/in.h b/bsd/netinet/in.h index a3f565e57..53c436517 100644 --- a/bsd/netinet/in.h +++ b/bsd/netinet/in.h @@ -791,6 +791,18 @@ struct in_pktinfo { #include #undef __KAME_NETINET_IN_H_INCLUDED_ +#ifdef PRIVATE +/* + * Minimal sized structure to hold an IPv4 or IPv6 socket address + * as sockaddr_storage can waste memory + */ +union sockaddr_in_4_6 { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +}; +#endif /* PRIVATE */ + #ifdef KERNEL #ifdef BSD_KERNEL_PRIVATE #include diff --git a/bsd/netinet/in_arp.c b/bsd/netinet/in_arp.c index b8dbd6038..418f2d26c 100644 --- a/bsd/netinet/in_arp.c +++ b/bsd/netinet/in_arp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2013 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -104,7 +104,7 @@ static const size_t MAX_HW_LEN = 10; * * - Routing lock (rnh_lock) * - * la_hold, la_asked, la_llreach, la_lastused + * la_hold, la_asked, la_llreach, la_lastused, la_flags * * - Routing entry lock (rt_lock) * @@ -127,6 +127,8 @@ struct llinfo_arp { u_int64_t la_lastused; /* last used timestamp */ u_int32_t la_asked; /* # of requests sent */ u_int32_t la_maxtries; /* retry limit */ + uint32_t la_flags; +#define LLINFO_RTRFAIL_EVTSENT 0x1 /* sent an ARP event */ }; static LIST_HEAD(, llinfo_arp) llinfo_arp; @@ -220,7 +222,8 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED, &arp_verbose, 0, ""); struct arpstat arpstat; -SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, stats, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, arp_getstat, "S,arpstat", "ARP statistics (struct arpstat, net/if_arp.h)"); @@ -998,6 +1001,11 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, struct llinfo_arp *llinfo = NULL; uint64_t timenow; int unreachable = 0; + struct if_llreach *lr; + struct ifaddr *rt_ifa; + struct sockaddr *sa; + uint32_t rtflags; + struct sockaddr_dl sdl; if (net_dest->sin_family != AF_INET) return (EAFNOSUPPORT); @@ -1099,6 +1107,38 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, bcopy(gateway, ll_dest, MIN(gateway->sdl_len, ll_dest_len)); result = 0; arp_llreach_use(llinfo); /* Mark use timestamp */ + /* + * Start the unicast probe right before the entry expires. + */ + lr = llinfo->la_llreach; + if (lr == NULL) + goto release; + rt_ifa = route->rt_ifa; + /* Become a regular mutex, just in case */ + RT_CONVERT_LOCK(route); + IFLR_LOCK_SPIN(lr); + if (route->rt_expire <= timenow + arp_unicast_lim && + ifp->if_addrlen == IF_LLREACH_MAXLEN && + lr->lr_probes <= arp_unicast_lim) { + lr->lr_probes++; + bzero(&sdl, sizeof (sdl)); + sdl.sdl_alen = ifp->if_addrlen; + bcopy(&lr->lr_key.addr, LLADDR(&sdl), + ifp->if_addrlen); + IFLR_UNLOCK(lr); + IFA_LOCK_SPIN(rt_ifa); + IFA_ADDREF_LOCKED(rt_ifa); + sa = rt_ifa->ifa_addr; + IFA_UNLOCK(rt_ifa); + rtflags = route->rt_flags; + RT_UNLOCK(route); + dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, + (const struct sockaddr_dl *)&sdl, + (const struct sockaddr *)net_dest, rtflags); + IFA_REMREF(rt_ifa); + RT_LOCK(route); + } else + IFLR_UNLOCK(lr); goto release; } else if (unreachable) { /* @@ -1128,30 +1168,26 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, if (llinfo->la_asked == 0 || route->rt_expire != timenow) { rt_setexpire(route, timenow); if (llinfo->la_asked++ < llinfo->la_maxtries) { - struct if_llreach *lr = llinfo->la_llreach; - struct ifaddr *rt_ifa = route->rt_ifa; - struct sockaddr_dl *hw_dest = NULL, sdl; - struct sockaddr *sa; - u_int32_t rtflags, alen; + struct kev_msg ev_msg; + struct kev_in_arpfailure in_arpfailure; + boolean_t sendkev = FALSE; + rt_ifa = route->rt_ifa; + lr = llinfo->la_llreach; /* Become a regular mutex, just in case */ RT_CONVERT_LOCK(route); /* Update probe count, if applicable */ if (lr != NULL) { IFLR_LOCK_SPIN(lr); lr->lr_probes++; - alen = ifp->if_addrlen; - /* Ethernet only for now */ - if (alen == IF_LLREACH_MAXLEN && - lr->lr_probes <= arp_unicast_lim) { - bzero(&sdl, sizeof (sdl)); - sdl.sdl_alen = alen; - bcopy(&lr->lr_key.addr, - LLADDR(&sdl), alen); - hw_dest = &sdl; - } IFLR_UNLOCK(lr); } + if (ifp->if_addrlen == IF_LLREACH_MAXLEN && + route->rt_flags & RTF_ROUTER && + llinfo->la_asked > 1) { + sendkev = TRUE; + llinfo->la_flags |= LLINFO_RTRFAIL_EVTSENT; + } IFA_LOCK_SPIN(rt_ifa); IFA_ADDREF_LOCKED(rt_ifa); sa = rt_ifa->ifa_addr; @@ -1160,11 +1196,32 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, rtflags = route->rt_flags; RT_UNLOCK(route); dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, - (const struct sockaddr_dl *)hw_dest, - (const struct sockaddr *)net_dest, rtflags); + NULL, (const struct sockaddr *)net_dest, + rtflags); IFA_REMREF(rt_ifa); - RT_LOCK(route); + if (sendkev) { + bzero(&ev_msg, sizeof(ev_msg)); + bzero(&in_arpfailure, + sizeof(in_arpfailure)); + in_arpfailure.link_data.if_family = + ifp->if_family; + in_arpfailure.link_data.if_unit = + ifp->if_unit; + strlcpy(in_arpfailure.link_data.if_name, + ifp->if_name, IFNAMSIZ); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_INET_SUBCLASS; + ev_msg.event_code = + KEV_INET_ARPRTRFAILURE; + ev_msg.dv[0].data_ptr = &in_arpfailure; + ev_msg.dv[0].data_length = + sizeof(struct + kev_in_arpfailure); + kev_post_msg(&ev_msg); + } result = EJUSTRETURN; + RT_LOCK(route); goto release; } else { route->rt_flags |= RTF_REJECT; @@ -1354,7 +1411,7 @@ match: /* Send a kernel event so anyone can learn of the conflict */ in_collision->link_data.if_family = ifp->if_family; in_collision->link_data.if_unit = ifp->if_unit; - strncpy(&in_collision->link_data.if_name[0], + strlcpy(&in_collision->link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_collision->ia_ipaddr = sender_ip->sin_addr; in_collision->hw_len = (sender_hw->sdl_alen < MAX_HW_LEN) ? @@ -1632,8 +1689,31 @@ match: arp_llreach_alloc(route, ifp, LLADDR(gateway), gateway->sdl_alen, (arpop == ARPOP_REPLY)); - /* update the llinfo, send a queued packet if there is one */ llinfo = route->rt_llinfo; + /* send a notification that the route is back up */ + if (ifp->if_addrlen == IF_LLREACH_MAXLEN && + route->rt_flags & RTF_ROUTER && + llinfo->la_flags & LLINFO_RTRFAIL_EVTSENT) { + struct kev_msg ev_msg; + struct kev_in_arpfailure in_arpalive; + + llinfo->la_flags &= ~LLINFO_RTRFAIL_EVTSENT; + RT_UNLOCK(route); + bzero(&ev_msg, sizeof(ev_msg)); + bzero(&in_arpalive, sizeof(in_arpalive)); + in_arpalive.link_data.if_family = ifp->if_family; + in_arpalive.link_data.if_unit = ifp->if_unit; + strlcpy(in_arpalive.link_data.if_name, ifp->if_name, IFNAMSIZ); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_INET_SUBCLASS; + ev_msg.event_code = KEV_INET_ARPRTRALIVE; + ev_msg.dv[0].data_ptr = &in_arpalive; + ev_msg.dv[0].data_length = sizeof(struct kev_in_arpalive); + kev_post_msg(&ev_msg); + RT_LOCK(route); + } + /* update the llinfo, send a queued packet if there is one */ llinfo->la_asked = 0; if (llinfo->la_hold) { struct mbuf *m0 = llinfo->la_hold; @@ -1645,6 +1725,7 @@ match: route = NULL; } + respond: if (route != NULL) { /* Mark use timestamp if we're going to send a reply */ diff --git a/bsd/netinet/in_dhcp.c b/bsd/netinet/in_dhcp.c index 001e1058f..e7f8ab6a8 100644 --- a/bsd/netinet/in_dhcp.c +++ b/bsd/netinet/in_dhcp.c @@ -211,10 +211,10 @@ make_dhcp_request(struct dhcp * request, int request_size, uint8_t cid[ETHER_ADDR_LEN + 1]; uint8_t rfc_magic[RFC_MAGIC_SIZE] = RFC_OPTIONS_MAGIC; - if (hwlen > (int)sizeof(cid)) { + if (hwlen >= (int)sizeof(cid)) { printf("dhcp: hwlen is %d (> %d), truncating\n", hwlen, (int)sizeof(cid)); - hwlen = sizeof(cid); + hwlen = sizeof(cid) - 1; } bzero(request, request_size); request->dp_op = BOOTREQUEST; diff --git a/bsd/netinet/in_gif.c b/bsd/netinet/in_gif.c index cc700ae37..66bf01c30 100644 --- a/bsd/netinet/in_gif.c +++ b/bsd/netinet/in_gif.c @@ -85,10 +85,6 @@ #include #endif -#if MROUTING -#include -#endif /* MROUTING */ - #include #include diff --git a/bsd/netinet/in_mcast.c b/bsd/netinet/in_mcast.c index 13f0b9aa9..893665ada 100644 --- a/bsd/netinet/in_mcast.c +++ b/bsd/netinet/in_mcast.c @@ -1894,18 +1894,6 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) error = 0; switch (sopt->sopt_name) { -#ifdef MROUTING - case IP_MULTICAST_VIF: - if (imo != NULL) { - IMO_LOCK(imo); - optval = imo->imo_multicast_vif; - IMO_UNLOCK(imo); - } else - optval = -1; - error = sooptcopyout(sopt, &optval, sizeof(int)); - break; -#endif /* MROUTING */ - case IP_MULTICAST_IF: memset(&mreqn, 0, sizeof(struct ip_mreqn)); if (imo != NULL) { @@ -2919,9 +2907,6 @@ out_imo_locked: * it is not possible to merge the duplicate code, because the idempotence * of the IPv4 multicast part of the BSD Sockets API must be preserved; * the effects of these options must be treated as separate and distinct. - * - * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING - * is refactored to no longer use vifs. */ int inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) @@ -2943,36 +2928,6 @@ inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) return (EOPNOTSUPP); switch (sopt->sopt_name) { -#if MROUTING - case IP_MULTICAST_VIF: { - int vifi; - /* - * Select a multicast VIF for transmission. - * Only useful if multicast forwarding is active. - */ - if (legal_vif_num == NULL) { - error = EOPNOTSUPP; - break; - } - error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); - if (error) - break; - if (!legal_vif_num(vifi) && (vifi != -1)) { - error = EINVAL; - break; - } - imo = inp_findmoptions(inp); - if (imo == NULL) { - error = ENOMEM; - break; - } - IMO_LOCK(imo); - imo->imo_multicast_vif = vifi; - IMO_UNLOCK(imo); - IMO_REMREF(imo); /* from inp_findmoptions() */ - break; - } -#endif case IP_MULTICAST_IF: error = inp_set_multicast_if(inp, sopt); break; diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c index 973abc9ac..e74dccbc3 100644 --- a/bsd/netinet/in_pcb.c +++ b/bsd/netinet/in_pcb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,6 +77,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,7 @@ #include #include #include +#include #include #include @@ -100,19 +102,14 @@ #include #endif /* INET6 */ -#if IPSEC -#include -#include -#endif /* IPSEC */ - #include #include #include #include -#if FLOW_DIVERT -#include +#if NECP +#include #endif static lck_grp_t *inpcb_lock_grp; @@ -127,6 +124,16 @@ static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ static boolean_t inpcb_fast_timer_on = FALSE; + +/* + * If the total number of gc reqs is above a threshold, schedule + * garbage collect timer sooner + */ +static boolean_t inpcb_toomany_gcreq = FALSE; + +#define INPCB_GCREQ_THRESHOLD 50000 +#define INPCB_TOOMANY_GCREQ_TIMER (hz/10) /* 10 times a second */ + static void inpcb_sched_timeout(struct timeval *); static void inpcb_timeout(void *); int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ @@ -134,15 +141,11 @@ extern int tvtohz(struct timeval *); #if CONFIG_PROC_UUID_POLICY static void inp_update_cellular_policy(struct inpcb *, boolean_t); -#if FLOW_DIVERT -static void inp_update_flow_divert_policy(struct inpcb *, boolean_t); -#endif /* FLOW_DIVERT */ +#if NECP +static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t); +#endif /* NECP */ #endif /* !CONFIG_PROC_UUID_POLICY */ -#if IPSEC -extern int ipsec_bypass; -#endif /* IPSEC */ - #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) @@ -291,6 +294,12 @@ inpcb_timeout(void *arg) boolean_t t, gc; struct intimercount gccnt, tmcnt; struct timeval leeway; + boolean_t toomany_gc = FALSE; + + if (arg != NULL) { + VERIFY(arg == &inpcb_toomany_gcreq); + toomany_gc = *(boolean_t *)arg; + } /* * Update coarse-grained networking timestamp (in sec.); the idea @@ -299,11 +308,12 @@ inpcb_timeout(void *arg) */ net_update_uptime(); + bzero(&gccnt, sizeof(gccnt)); + bzero(&tmcnt, sizeof(tmcnt)); + lck_mtx_lock_spin(&inpcb_timeout_lock); gc = inpcb_garbage_collecting; inpcb_garbage_collecting = FALSE; - bzero(&gccnt, sizeof(gccnt)); - bzero(&tmcnt, sizeof(tmcnt)); t = inpcb_ticking; inpcb_ticking = FALSE; @@ -351,8 +361,12 @@ inpcb_timeout(void *arg) inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); /* re-arm the timer if there's work to do */ - inpcb_timeout_run--; - VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); + if (toomany_gc) { + inpcb_toomany_gcreq = FALSE; + } else { + inpcb_timeout_run--; + VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); + } bzero(&leeway, sizeof(leeway)); leeway.tv_sec = inpcb_timeout_lazy; @@ -402,8 +416,26 @@ void inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) { struct timeval leeway; + u_int32_t gccnt; lck_mtx_lock_spin(&inpcb_timeout_lock); inpcb_garbage_collecting = TRUE; + + gccnt = ipi->ipi_gc_req.intimer_nodelay + + ipi->ipi_gc_req.intimer_fast; + + if (gccnt > INPCB_GCREQ_THRESHOLD && !inpcb_toomany_gcreq) { + inpcb_toomany_gcreq = TRUE; + + /* + * There are toomany pcbs waiting to be garbage collected, + * schedule a much faster timeout in addition to + * the caller's request + */ + lck_mtx_convert_spin(&inpcb_timeout_lock); + timeout(inpcb_timeout, (void *)&inpcb_toomany_gcreq, + INPCB_TOOMANY_GCREQ_TIMER); + } + switch (type) { case INPCB_TIMER_NODELAY: atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); @@ -491,7 +523,6 @@ in_pcbinfo_detach(struct inpcbinfo *ipi) * Returns: 0 Success * ENOBUFS * ENOMEM - * ipsec_init_policy:??? [IPSEC] */ int in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) @@ -554,6 +585,15 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) /* NOTREACHED */ } + /* make sure inp_Wstat is always 64-bit aligned */ + inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store, + sizeof (u_int64_t)); + if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) + + sizeof (*inp->inp_Wstat) > sizeof (inp->inp_Wstat_store)) { + panic("%s: insufficient space to align inp_Wstat", __func__); + /* NOTREACHED */ + } + so->so_pcb = (caddr_t)inp; if (so->so_proto->pr_flags & PR_PCBLOCK) { @@ -561,7 +601,6 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) pcbinfo->ipi_lock_attr); } - #if INET6 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) inp->inp_flags |= IN6P_IPV6_V6ONLY; @@ -664,7 +703,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) u_short lport = 0, rand_port = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error, randomport, conflict = 0; + boolean_t anonport = FALSE; kauth_cred_t cred; + struct in_addr laddr; + struct ifnet *outif = NULL; if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); @@ -674,8 +716,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) wild = 1; socket_unlock(so, 0); /* keep reference on socket */ lck_rw_lock_exclusive(pcbinfo->ipi_lock); + + bzero(&laddr, sizeof(laddr)); + if (nam != NULL) { - struct ifnet *outif = NULL; if (nam->sa_len != sizeof (struct sockaddr_in)) { lck_rw_done(pcbinfo->ipi_lock); @@ -739,7 +783,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) struct inpcb *t; uid_t u; - /* GROSS */ if (ntohs(lport) < IPPORT_RESERVED) { cred = kauth_cred_proc_ref(p); error = priv_check_cred(cred, @@ -802,8 +845,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) } } } - inp->inp_laddr = SIN(nam)->sin_addr; - inp->inp_last_outifp = outif; + laddr = SIN(nam)->sin_addr; } if (lport == 0) { u_short first, last; @@ -814,15 +856,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) udp_use_randomport); /* - * TODO: - * - * The following should be moved into its own routine and - * thus can be shared with in6_pcbsetport(); the latter - * currently duplicates the logic. + * Even though this looks similar to the code in + * in6_pcbsetport, the v6 vs v4 checks are different. */ - - inp->inp_flags |= INP_ANONPORT; - + anonport = TRUE; if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; @@ -871,8 +908,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) if (count-- < 0) { /* completely used? */ lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - inp->inp_laddr.s_addr = INADDR_ANY; - inp->inp_last_outifp = NULL; return (EADDRNOTAVAIL); } --*lastport; @@ -880,7 +915,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local_and_cleanup(pcbinfo, - inp->inp_laddr, lport, wild)); + ((laddr.s_addr != INADDR_ANY) ? laddr : + inp->inp_laddr), lport, wild)); } else { /* * counting up @@ -896,8 +932,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) if (count-- < 0) { /* completely used? */ lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - inp->inp_laddr.s_addr = INADDR_ANY; - inp->inp_last_outifp = NULL; return (EADDRNOTAVAIL); } ++*lastport; @@ -905,15 +939,31 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local_and_cleanup(pcbinfo, - inp->inp_laddr, lport, wild)); + ((laddr.s_addr != INADDR_ANY) ? laddr : + inp->inp_laddr), lport, wild)); } } socket_lock(so, 0); + if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) { + lck_rw_done(pcbinfo->ipi_lock); + return (EINVAL); + } + + if (laddr.s_addr != INADDR_ANY) { + inp->inp_laddr = laddr; + inp->inp_last_outifp = outif; + } inp->inp_lport = lport; + if (anonport) + inp->inp_flags |= INP_ANONPORT; + if (in_pcbinshash(inp, 1) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; - inp->inp_lport = 0; inp->inp_last_outifp = NULL; + + inp->inp_lport = 0; + if (anonport) + inp->inp_flags &= ~INP_ANONPORT; lck_rw_done(pcbinfo->ipi_lock); return (EAGAIN); } @@ -946,11 +996,11 @@ int in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, unsigned int ifscope, struct ifnet **outif) { - boolean_t nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR); struct route *ro = &inp->inp_route; struct in_ifaddr *ia = NULL; struct sockaddr_in sin; int error = 0; + boolean_t restricted = FALSE; if (outif != NULL) *outif = NULL; @@ -1059,11 +1109,13 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, * If the route points to a cellular interface and the * caller forbids our using interfaces of such type, * pretend that there is no route. + * Apply the same logic for expensive interfaces. */ - if (nocell && IFNET_IS_CELLULAR(ro->ro_rt->rt_ifp)) { + if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) { RT_UNLOCK(ro->ro_rt); ROUTE_RELEASE(ro); error = EHOSTUNREACH; + restricted = TRUE; } else { /* Become a regular mutex */ RT_CONVERT_LOCK(ro->ro_rt); @@ -1140,11 +1192,13 @@ done: * If the source address belongs to a cellular interface * and the socket forbids our using interfaces of such * type, pretend that there is no source address. + * Apply the same logic for expensive interfaces. */ IFA_LOCK_SPIN(&ia->ia_ifa); - if (nocell && IFNET_IS_CELLULAR(ia->ia_ifa.ifa_ifp)) { + if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) { IFA_UNLOCK(&ia->ia_ifa); error = EHOSTUNREACH; + restricted = TRUE; } else if (error == 0) { *laddr = ia->ia_addr.sin_addr; if (outif != NULL) { @@ -1170,7 +1224,7 @@ done: ia = NULL; } - if (nocell && error == EHOSTUNREACH) { + if (restricted && error == EHOSTUNREACH) { soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED)); } @@ -1196,6 +1250,7 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; struct inpcb *pcb; int error; + struct socket *so = inp->inp_socket; /* * Call inner routine, to assign local interface address. @@ -1203,18 +1258,18 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0) return (error); - socket_unlock(inp->inp_socket, 0); + socket_unlock(so, 0); pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, inp->inp_lport, 0, NULL); - socket_lock(inp->inp_socket, 0); + socket_lock(so, 0); /* * Check if the socket is still in a valid state. When we unlock this * embryonic socket, it can get aborted if another thread is closing * the listener (radar 7947600). */ - if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) + if ((so->so_flags & SOF_ABORTED) != 0) return (ECONNREFUSED); if (pcb != NULL) { @@ -1232,9 +1287,9 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, * Lock inversion issue, mostly with udp * multicast packets. */ - socket_unlock(inp->inp_socket, 0); + socket_unlock(so, 0); lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); - socket_lock(inp->inp_socket, 0); + socket_lock(so, 0); } inp->inp_laddr = laddr; /* no reference needed */ @@ -1246,13 +1301,15 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, * Lock inversion issue, mostly with udp * multicast packets. */ - socket_unlock(inp->inp_socket, 0); + socket_unlock(so, 0); lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); - socket_lock(inp->inp_socket, 0); + socket_lock(so, 0); } } inp->inp_faddr = sin->sin_addr; inp->inp_fport = sin->sin_port; + if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) + nstat_pcb_invalidate_cache(inp); in_pcbrehash(inp); lck_rw_done(inp->inp_pcbinfo->ipi_lock); return (0); @@ -1263,6 +1320,9 @@ in_pcbdisconnect(struct inpcb *inp) { struct socket *so = inp->inp_socket; + if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) + nstat_pcb_cache(inp); + inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; @@ -1295,13 +1355,20 @@ in_pcbdetach(struct inpcb *inp) inp, so, SOCK_PROTO(so)); /* NOTREACHED */ } - + #if IPSEC if (inp->inp_sp != NULL) { (void) ipsec4_delete_pcbpolicy(inp); } #endif /* IPSEC */ - + + /* + * Let NetworkStatistics know this PCB is going away + * before we detach it. + */ + if (nstat_collect && + (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) + nstat_pcb_detach(inp); /* mark socket state as dead */ if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", @@ -1450,8 +1517,11 @@ in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) sin->sin_family = AF_INET; sin->sin_len = sizeof (*sin); - if ((inp = sotoinpcb(so)) == NULL || - (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if ((inp = sotoinpcb(so)) == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); sin->sin_port = inp->inp_lport; @@ -1498,8 +1568,11 @@ in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) sin->sin_family = AF_INET; sin->sin_len = sizeof (*sin); - if ((inp = sotoinpcb(so)) == NULL || - (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if ((inp = sotoinpcb(so)) == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { return (inp == NULL ? EINVAL : EPROTOTYPE); } @@ -1746,11 +1819,7 @@ in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, if (!(inp->inp_vflag & INP_IPV4)) continue; #endif /* INET6 */ - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (inp->inp_faddr.s_addr == faddr.s_addr && @@ -1786,11 +1855,7 @@ in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, if (!(inp->inp_vflag & INP_IPV4)) continue; #endif /* INET6 */ - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (inp->inp_faddr.s_addr == INADDR_ANY && @@ -1873,11 +1938,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, if (!(inp->inp_vflag & INP_IPV4)) continue; #endif /* INET6 */ - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (inp->inp_faddr.s_addr == faddr.s_addr && @@ -1914,11 +1975,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, if (!(inp->inp_vflag & INP_IPV4)) continue; #endif /* INET6 */ - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (inp->inp_faddr.s_addr == INADDR_ANY && @@ -2043,11 +2100,21 @@ in_pcbinshash(struct inpcb *inp, int locked) LIST_INIT(&phd->phd_pcblist); LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); } + + VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); inp->inp_phd = phd; LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); + inp->inp_flags2 |= INP2_INHASHLIST; + if (!locked) lck_rw_done(pcbinfo->ipi_lock); + +#if NECP + // This call catches the original setting of the local address + inp_update_necp_policy(inp, NULL, NULL, 0); +#endif /* NECP */ + return (0); } @@ -2074,8 +2141,19 @@ in_pcbrehash(struct inpcb *inp) inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; - LIST_REMOVE(inp, inp_hash); + if (inp->inp_flags2 & INP2_INHASHLIST) { + LIST_REMOVE(inp, inp_hash); + inp->inp_flags2 &= ~INP2_INHASHLIST; + } + + VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); LIST_INSERT_HEAD(head, inp, inp_hash); + inp->inp_flags2 |= INP2_INHASHLIST; + +#if NECP + // This call catches updates to the remote addresses + inp_update_necp_policy(inp, NULL, NULL, 0); +#endif /* NECP */ } /* @@ -2087,16 +2165,31 @@ in_pcbremlists(struct inpcb *inp) { inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; - if (inp->inp_lport) { + /* + * Check if it's in hashlist -- an inp is placed in hashlist when + * it's local port gets assigned. So it should also be present + * in the port list. + */ + if (inp->inp_flags2 & INP2_INHASHLIST) { struct inpcbport *phd = inp->inp_phd; + VERIFY(phd != NULL && inp->inp_lport > 0); + LIST_REMOVE(inp, inp_hash); + inp->inp_hash.le_next = NULL; + inp->inp_hash.le_prev = NULL; + LIST_REMOVE(inp, inp_portlist); - if (phd != NULL && (LIST_FIRST(&phd->phd_pcblist) == NULL)) { + inp->inp_portlist.le_next = NULL; + inp->inp_portlist.le_prev = NULL; + if (LIST_EMPTY(&phd->phd_pcblist)) { LIST_REMOVE(phd, phd_hash); FREE(phd, M_PCB); } + inp->inp_phd = NULL; + inp->inp_flags2 &= ~INP2_INHASHLIST; } + VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST)); if (inp->inp_flags2 & INP2_TIMEWAIT) { /* Remove from time-wait queue */ @@ -2414,25 +2507,58 @@ inp_clear_nocellular(struct inpcb *inp) } } -#if FLOW_DIVERT +void +inp_set_noexpensive(struct inpcb *inp) +{ + inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE; + + /* Blow away any cached route in the PCB */ + ROUTE_RELEASE(&inp->inp_route); +} + +void +inp_set_awdl_unrestricted(struct inpcb *inp) +{ + inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED; + + /* Blow away any cached route in the PCB */ + ROUTE_RELEASE(&inp->inp_route); +} + +boolean_t +inp_get_awdl_unrestricted(struct inpcb *inp) +{ + return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE; +} + +void +inp_clear_awdl_unrestricted(struct inpcb *inp) +{ + inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED; + + /* Blow away any cached route in the PCB */ + ROUTE_RELEASE(&inp->inp_route); +} + +#if NECP /* - * Called when PROC_UUID_FLOW_DIVERT is set. + * Called when PROC_UUID_NECP_APP_POLICY is set. */ void -inp_set_flow_divert(struct inpcb *inp) +inp_set_want_app_policy(struct inpcb *inp) { - inp->inp_flags2 |= INP2_WANT_FLOW_DIVERT; + inp->inp_flags2 |= INP2_WANT_APP_POLICY; } /* - * Called when PROC_UUID_FLOW_DIVERT is cleared. + * Called when PROC_UUID_NECP_APP_POLICY is cleared. */ void -inp_clear_flow_divert(struct inpcb *inp) +inp_clear_want_app_policy(struct inpcb *inp) { - inp->inp_flags2 &= ~INP2_WANT_FLOW_DIVERT; + inp->inp_flags2 &= ~INP2_WANT_APP_POLICY; } -#endif /* FLOW_DIVERT */ +#endif /* NECP */ /* * Calculate flow hash for an inp, used by an interface to identify a @@ -2561,6 +2687,9 @@ inp_fc_feedback(struct inpcb *inp) return; } + if (inp->inp_sndinprog_cnt > 0) + inp->inp_flags |= INP_FC_FEEDBACK; + /* * Return if the connection is not in flow-controlled state. * This can happen if the connection experienced @@ -2592,9 +2721,6 @@ inp_reset_fc_state(struct inpcb *inp) soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); } - if (inp->inp_sndinprog_cnt > 0) - inp->inp_flags |= INP_FC_FEEDBACK; - /* Give a write wakeup to unblock the socket */ if (needwakeup) sowwakeup(so); @@ -2690,13 +2816,18 @@ inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) struct socket *so = inp->inp_socket; soprocinfo->spi_pid = so->last_pid; + if (so->last_pid != 0) + uuid_copy(soprocinfo->spi_uuid, so->last_uuid); /* * When not delegated, the effective pid is the same as the real pid */ - if (so->so_flags & SOF_DELEGATED) + if (so->so_flags & SOF_DELEGATED) { soprocinfo->spi_epid = so->e_pid; - else + if (so->e_pid != 0) + uuid_copy(soprocinfo->spi_euuid, so->e_uuid); + } else { soprocinfo->spi_epid = so->last_pid; + } } int @@ -2736,13 +2867,13 @@ inp_update_cellular_policy(struct inpcb *inp, boolean_t set) VERIFY(so != NULL); VERIFY(inp->inp_state != INPCB_STATE_DEAD); - before = (inp->inp_flags & INP_NO_IFT_CELLULAR); + before = INP_NO_CELLULAR(inp); if (set) { inp_set_nocellular(inp); } else { inp_clear_nocellular(inp); } - after = (inp->inp_flags & INP_NO_IFT_CELLULAR); + after = INP_NO_CELLULAR(inp); if (net_io_policy_log && (before != after)) { static const char *ok = "OK"; static const char *nok = "NOACCESS"; @@ -2771,9 +2902,9 @@ inp_update_cellular_policy(struct inpcb *inp, boolean_t set) } } -#if FLOW_DIVERT +#if NECP static void -inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set) +inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set) { struct socket *so = inp->inp_socket; int before, after; @@ -2781,17 +2912,13 @@ inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set) VERIFY(so != NULL); VERIFY(inp->inp_state != INPCB_STATE_DEAD); - if (set && !(inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { - set = !flow_divert_is_dns_service(so); - } - - before = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); + before = (inp->inp_flags2 & INP2_WANT_APP_POLICY); if (set) { - inp_set_flow_divert(inp); + inp_set_want_app_policy(inp); } else { - inp_clear_flow_divert(inp); + inp_clear_want_app_policy(inp); } - after = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); + after = (inp->inp_flags2 & INP2_WANT_APP_POLICY); if (net_io_policy_log && (before != after)) { static const char *wanted = "WANTED"; static const char *unwanted = "UNWANTED"; @@ -2816,9 +2943,24 @@ inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set) ((before < after) ? wanted : unwanted)); } } -#endif /* FLOW_DIVERT */ +#endif /* NECP */ #endif /* !CONFIG_PROC_UUID_POLICY */ +#if NECP +void +inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface) +{ + necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface); + if (necp_socket_should_rescope(inp) && + inp->inp_lport == 0 && + inp->inp_laddr.s_addr == INADDR_ANY && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + // If we should rescope, and the socket is not yet bound + inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL); + } +} +#endif /* NECP */ + int inp_update_policy(struct inpcb *inp) { @@ -2863,14 +3005,14 @@ inp_update_policy(struct inpcb *inp) } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { inp_update_cellular_policy(inp, FALSE); } -#if FLOW_DIVERT - /* update flow divert policy for this socket */ - if (err == 0 && (pflags & PROC_UUID_FLOW_DIVERT)) { - inp_update_flow_divert_policy(inp, TRUE); - } else if (!(pflags & PROC_UUID_FLOW_DIVERT)) { - inp_update_flow_divert_policy(inp, FALSE); +#if NECP + /* update necp want app policy for this socket */ + if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) { + inp_update_necp_want_app_policy(inp, TRUE); + } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) { + inp_update_necp_want_app_policy(inp, FALSE); } -#endif /* FLOW_DIVERT */ +#endif /* NECP */ } return ((err == ENOENT) ? 0 : err); @@ -2879,16 +3021,35 @@ inp_update_policy(struct inpcb *inp) return (0); #endif /* !CONFIG_PROC_UUID_POLICY */ } - +/* + * Called when we need to enforce policy restrictions in the input path. + * + * Returns TRUE if we're not allowed to receive data, otherwise FALSE. + */ boolean_t -inp_restricted(struct inpcb *inp, struct ifnet *ifp) +inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) { VERIFY(inp != NULL); + /* + * Inbound restrictions. + */ if (!sorestrictrecv) return (FALSE); - if (ifp == NULL || !(ifp->if_eflags & IFEF_RESTRICTED_RECV)) + if (ifp == NULL) + return (FALSE); + + if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) + return (TRUE); + + if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) + return (TRUE); + + if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) + return (TRUE); + + if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) return (FALSE); if (inp->inp_flags & INP_RECV_ANYIF) @@ -2899,3 +3060,34 @@ inp_restricted(struct inpcb *inp, struct ifnet *ifp) return (TRUE); } + +/* + * Called when we need to enforce policy restrictions in the output path. + * + * Returns TRUE if we're not allowed to send data out, otherwise FALSE. + */ +boolean_t +inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) +{ + VERIFY(inp != NULL); + + /* + * Outbound restrictions. + */ + if (!sorestrictsend) + return (FALSE); + + if (ifp == NULL) + return (FALSE); + + if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) + return (TRUE); + + if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) + return (TRUE); + + if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) + return (TRUE); + + return (FALSE); +} diff --git a/bsd/netinet/in_pcb.h b/bsd/netinet/in_pcb.h index a43b45a17..c86c03c6c 100644 --- a/bsd/netinet/in_pcb.h +++ b/bsd/netinet/in_pcb.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -81,6 +81,13 @@ #endif /* BSD_KERNEL_PRIVATE */ #include /* for IPSEC */ +#if NECP +#include +#endif + +#if IPSEC +#include /* for IPSEC */ +#endif #ifdef BSD_KERNEL_PRIVATE /* @@ -206,20 +213,33 @@ struct inpcb { #if IPSEC struct inpcbpolicy *inp_sp; /* for IPSec */ #endif /* IPSEC */ +#if NECP + struct { + char *inp_domain; + char *inp_account; + } inp_necp_attributes; + struct necp_inpcb_result inp_policyresult; +#endif struct inp_stat *inp_stat; struct inp_stat *inp_cstat; /* cellular data */ struct inp_stat *inp_wstat; /* Wi-Fi data */ + struct inp_stat *inp_Wstat; /* Wired data */ u_int8_t inp_stat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; u_int8_t inp_cstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; u_int8_t inp_wstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; + u_int8_t inp_Wstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; + uint32_t inp_nstat_refcnt __attribute__((aligned(4))); }; -#define INP_ADD_STAT(_inp, _cnt_cellular, _cnt_wifi, _a, _n) do { \ +#define INP_ADD_STAT(_inp, _cnt_cellular, _cnt_wifi, _cnt_wired, _a, _n)\ +do { \ locked_add_64(&((_inp)->inp_stat->_a), (_n)); \ if (_cnt_cellular) \ locked_add_64(&((_inp)->inp_cstat->_a), (_n)); \ if (_cnt_wifi) \ locked_add_64(&((_inp)->inp_wstat->_a), (_n)); \ + if (_cnt_wired) \ + locked_add_64(&((_inp)->inp_Wstat->_a), (_n)); \ } while (0); #endif /* BSD_KERNEL_PRIVATE */ @@ -422,6 +442,7 @@ struct xinpcb_n { short inp6_hops; } inp_depend6; u_int32_t inp_flowhash; + u_int32_t inp_flags2; }; #endif /* PRIVATE */ @@ -575,6 +596,13 @@ struct inpcbinfo { #define INP_WAIT_FOR_IF_FEEDBACK(_inp_) \ (((_inp_)->inp_flags & (INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED)) != 0) +#define INP_NO_CELLULAR(_inp) \ + ((_inp)->inp_flags & INP_NO_IFT_CELLULAR) +#define INP_NO_EXPENSIVE(_inp) \ + ((_inp)->inp_flags2 & INP2_NO_IFF_EXPENSIVE) +#define INP_AWDL_UNRESTRICTED(_inp) \ + ((_inp)->inp_flags2 & INP2_AWDL_UNRESTRICTED) + #endif /* BSD_KERNEL_PRIVATE */ /* @@ -646,7 +674,10 @@ struct inpcbinfo { */ #define INP2_TIMEWAIT 0x00000001 /* in TIMEWAIT */ #define INP2_IN_FCTREE 0x00000002 /* in inp_fc_tree */ -#define INP2_WANT_FLOW_DIVERT 0x00000004 /* flow divert is desired */ +#define INP2_WANT_APP_POLICY 0x00000004 /* necp app policy check is desired */ +#define INP2_NO_IFF_EXPENSIVE 0x00000008 /* do not use expensive interface */ +#define INP2_INHASHLIST 0x00000010 /* pcb is in inp_hash list */ +#define INP2_AWDL_UNRESTRICTED 0x00000020 /* AWDL restricted mode allowed */ /* * Flags passed to in_pcblookup*() functions. @@ -724,6 +755,8 @@ extern void in_pcbremlists(struct inpcb *); extern void inpcb_to_compat(struct inpcb *, struct inpcb_compat *); extern void inpcb_to_xinpcb64(struct inpcb *, struct xinpcb64 *); extern int get_pcblist_n(short, struct sysctl_req *, struct inpcbinfo *); +#define INPCB_GET_PORTS_USED_WILDCARDOK 0x1 +#define INPCB_GET_PORTS_USED_NOWAKEUPOK 0x2 extern void inpcb_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *, struct inpcbinfo *); #define INPCB_OPPORTUNISTIC_THROTTLEON 0x0001 @@ -736,10 +769,15 @@ extern void inp_route_copyin(struct inpcb *, struct route *); extern int inp_bindif(struct inpcb *, unsigned int, struct ifnet **); extern void inp_set_nocellular(struct inpcb *); extern void inp_clear_nocellular(struct inpcb *); -#if FLOW_DIVERT -extern void inp_set_flow_divert(struct inpcb *); -extern void inp_clear_flow_divert(struct inpcb *); -#endif /* FLOW_DIVERT */ +extern void inp_set_noexpensive(struct inpcb *); +extern void inp_set_awdl_unrestricted(struct inpcb *); +extern boolean_t inp_get_awdl_unrestricted(struct inpcb *); +extern void inp_clear_awdl_unrestricted(struct inpcb *); +#if NECP +extern void inp_update_necp_policy(struct inpcb *, struct sockaddr *, struct sockaddr *, u_int); +extern void inp_set_want_app_policy(struct inpcb *); +extern void inp_clear_want_app_policy(struct inpcb *); +#endif /* NECP */ extern u_int32_t inp_calc_flowhash(struct inpcb *); extern void inp_reset_fc_state(struct inpcb *); extern int inp_set_fc_state(struct inpcb *, int advcode); @@ -749,7 +787,8 @@ extern int inp_flush(struct inpcb *, int); extern int inp_findinpcb_procinfo(struct inpcbinfo *, uint32_t, struct so_procinfo *); extern void inp_get_soprocinfo(struct inpcb *, struct so_procinfo *); extern int inp_update_policy(struct inpcb *); -extern boolean_t inp_restricted(struct inpcb *, struct ifnet *); +extern boolean_t inp_restricted_recv(struct inpcb *, struct ifnet *); +extern boolean_t inp_restricted_send(struct inpcb *, struct ifnet *); #endif /* BSD_KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE /* exported for PPP */ diff --git a/bsd/netinet/in_pcblist.c b/bsd/netinet/in_pcblist.c index 09cc79674..4df416c6a 100644 --- a/bsd/netinet/in_pcblist.c +++ b/bsd/netinet/in_pcblist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013 Apple Inc. All rights reserved. + * Copyright (c) 2010-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,25 +95,22 @@ #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n)) #endif -static void sotoxsocket_n(struct socket *, struct xsocket_n *); -static void sbtoxsockbuf_n(struct sockbuf *, struct xsockbuf_n *); -static void sbtoxsockstat_n(struct socket *, struct xsockstat_n *); static void inpcb_to_xinpcb_n(struct inpcb *, struct xinpcb_n *); static void tcpcb_to_xtcpcb_n(struct tcpcb *, struct xtcpcb_n *); -static void +__private_extern__ void sotoxsocket_n(struct socket *so, struct xsocket_n *xso) { xso->xso_len = sizeof (struct xsocket_n); xso->xso_kind = XSO_SOCKET; if (so != NULL) { - xso->xso_so = (u_int64_t)(uintptr_t)so; + xso->xso_so = (uint64_t)VM_KERNEL_ADDRPERM(so); xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; - xso->so_pcb = (u_int64_t)(uintptr_t)so->so_pcb; + xso->so_pcb = (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb); if (so->so_proto) { xso->xso_protocol = SOCK_PROTO(so); xso->xso_family = SOCK_DOM(so); @@ -128,10 +125,12 @@ sotoxsocket_n(struct socket *so, struct xsocket_n *xso) xso->so_pgid = so->so_pgid; xso->so_oobmark = so->so_oobmark; xso->so_uid = kauth_cred_getuid(so->so_cred); + xso->so_last_pid = so->last_pid; + xso->so_e_pid = so->e_pid; } } -static void +__private_extern__ void sbtoxsockbuf_n(struct sockbuf *sb, struct xsockbuf_n *xsb) { xsb->xsb_len = sizeof (struct xsockbuf_n); @@ -151,7 +150,7 @@ sbtoxsockbuf_n(struct sockbuf *sb, struct xsockbuf_n *xsb) } } -static void +__private_extern__ void sbtoxsockstat_n(struct socket *so, struct xsockstat_n *xst) { int i; @@ -172,10 +171,10 @@ inpcb_to_xinpcb_n(struct inpcb *inp, struct xinpcb_n *xinp) { xinp->xi_len = sizeof (struct xinpcb_n); xinp->xi_kind = XSO_INPCB; - xinp->xi_inpp = (u_int64_t)(uintptr_t)inp; + xinp->xi_inpp = (uint64_t)VM_KERNEL_ADDRPERM(inp); xinp->inp_fport = inp->inp_fport; xinp->inp_lport = inp->inp_lport; - xinp->inp_ppcb = (u_int64_t)(uintptr_t)inp->inp_ppcb; + xinp->inp_ppcb = (uint64_t)VM_KERNEL_ADDRPERM(inp->inp_ppcb); xinp->inp_gencnt = inp->inp_gencnt; xinp->inp_flags = inp->inp_flags; xinp->inp_flow = inp->inp_flow; @@ -190,23 +189,24 @@ inpcb_to_xinpcb_n(struct inpcb *inp, struct xinpcb_n *xinp) xinp->inp_depend6.inp6_ifindex = 0; xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; xinp->inp_flowhash = inp->inp_flowhash; + xinp->inp_flags2 = inp->inp_flags2; } __private_extern__ void tcpcb_to_xtcpcb_n(struct tcpcb *tp, struct xtcpcb_n *xt) { - int i; - xt->xt_len = sizeof (struct xtcpcb_n); xt->xt_kind = XSO_TCPCB; - xt->t_segq = (u_int32_t)(uintptr_t)tp->t_segq.lh_first; + xt->t_segq = (uint32_t)VM_KERNEL_ADDRPERM(tp->t_segq.lh_first); xt->t_dupacks = tp->t_dupacks; - for (i = 0; i < TCPT_NTIMERS_EXT; i++) - xt->t_timer[i] = tp->t_timer[i]; + xt->t_timer[TCPT_REXMT_EXT] = tp->t_timer[TCPT_REXMT]; + xt->t_timer[TCPT_PERSIST_EXT] = tp->t_timer[TCPT_PERSIST]; + xt->t_timer[TCPT_KEEP_EXT] = tp->t_timer[TCPT_KEEP]; + xt->t_timer[TCPT_2MSL_EXT] = tp->t_timer[TCPT_2MSL]; xt->t_state = tp->t_state; xt->t_flags = tp->t_flags; - xt->t_force = tp->t_force; + xt->t_force = (tp->t_flagsext & TF_FORCE) ? 1 : 0; xt->snd_una = tp->snd_una; xt->snd_max = tp->snd_max; xt->snd_nxt = tp->snd_nxt; @@ -394,14 +394,16 @@ done: } __private_extern__ void -inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t wildcardok, +inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags, bitstr_t *bitfield, struct inpcbinfo *pcbinfo) { struct inpcb *inp; struct socket *so; inp_gen_t gencnt; - uint32_t iswildcard; + bool iswildcard, wildcardok, nowakeok; + wildcardok = ((flags & INPCB_GET_PORTS_USED_WILDCARDOK) != 0); + nowakeok = ((flags & INPCB_GET_PORTS_USED_NOWAKEUPOK) != 0); lck_rw_lock_shared(pcbinfo->ipi_lock); gencnt = pcbinfo->ipi_gencnt; for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp; @@ -429,6 +431,10 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t wildcardok, if (!wildcardok && iswildcard) continue; + if ((so->so_options & SO_NOWAKEFROMSLEEP) && + !nowakeok) + continue; + if (!iswildcard && !(ifindex == 0 || inp->inp_last_outifp == NULL || ifindex == inp->inp_last_outifp->if_index)) diff --git a/bsd/netinet/in_proto.c b/bsd/netinet/in_proto.c index 6e3507bec..321e7819e 100644 --- a/bsd/netinet/in_proto.c +++ b/bsd/netinet/in_proto.c @@ -192,17 +192,6 @@ static struct protosw inetsw[] = { .pr_usrreqs = &rip_usrreqs, .pr_unlock = rip_unlock, }, -#if MROUTING -{ - .pr_type = SOCK_RAW, - .pr_protocol = IPPROTO_RSVP, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, - .pr_input = rsvp_input, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs, - .pr_unlock = rip_unlock, -}, -#endif /* MROUTING */ { .pr_type = SOCK_RAW, .pr_protocol = IPPROTO_GRE, diff --git a/bsd/netinet/in_rmx.c b/bsd/netinet/in_rmx.c index aa43a3caa..81ebd2641 100644 --- a/bsd/netinet/in_rmx.c +++ b/bsd/netinet/in_rmx.c @@ -77,7 +77,7 @@ #include #include #include -#include +#include #include #include diff --git a/bsd/netinet/in_tclass.c b/bsd/netinet/in_tclass.c index 45d86992c..d20a07d6e 100644 --- a/bsd/netinet/in_tclass.c +++ b/bsd/netinet/in_tclass.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2013 Apple Inc. All rights reserved. + * Copyright (c) 2009-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -778,6 +778,15 @@ so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off) ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off; } +__private_extern__ void +so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes, uint32_t tc) +{ + if (tc >= SO_TC_STATS_MAX) + tc = SO_TC_BE; + + so->so_tc_stats[tc].rxpackets += pkts; + so->so_tc_stats[tc].rxbytes +=bytes; +} __private_extern__ void set_tcp_stream_priority(struct socket *so) { @@ -793,7 +802,11 @@ set_tcp_stream_priority(struct socket *so) || SOCK_CHECK_DOM(so, PF_INET6)) && SOCK_CHECK_TYPE(so, SOCK_STREAM) && SOCK_CHECK_PROTO(so, IPPROTO_TCP)); - + + /* Return if the socket is in a terminal state */ + if (inp->inp_state == INPCB_STATE_DEAD) + return; + outifp = inp->inp_last_outifp; uptime = net_uptime(); diff --git a/bsd/netinet/in_var.h b/bsd/netinet/in_var.h index 9baa7ac54..8a3e7b94c 100644 --- a/bsd/netinet/in_var.h +++ b/bsd/netinet/in_var.h @@ -126,6 +126,15 @@ struct kev_in_collision { u_char hw_addr[0]; /* variable length hardware address */ }; +struct kev_in_arpfailure { + struct net_event_data link_data; /* link where ARP is being sent */ +}; + +struct kev_in_arpalive { + struct net_event_data link_data; /* link where ARP was received */ +}; + + #ifdef __APPLE_API_PRIVATE struct kev_in_portinuse { u_int16_t port; /* conflicting port number in host order */ @@ -149,6 +158,9 @@ struct kev_in_portinuse { #ifdef __APPLE_API_PRIVATE #define KEV_INET_PORTINUSE 8 /* use ken_in_portinuse */ #endif +#define KEV_INET_ARPRTRFAILURE 9 /* ARP resolution failed for router */ +#define KEV_INET_ARPRTRALIVE 10 /* ARP resolution succeeded for + router */ #ifdef BSD_KERNEL_PRIVATE #include diff --git a/bsd/netinet/ip_divert.c b/bsd/netinet/ip_divert.c index 16692077e..614c0c7c1 100644 --- a/bsd/netinet/ip_divert.c +++ b/bsd/netinet/ip_divert.c @@ -168,7 +168,7 @@ div_init(struct protosw *pp, struct domain *dp) */ divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &divcbinfo.ipi_hashmask); divcbinfo.ipi_porthashbase = hashinit(1, M_PCB, &divcbinfo.ipi_porthashmask); - divcbinfo.ipi_zone = zinit(sizeof(struct inpcb),(maxsockets * sizeof(struct inpcb)), + divcbinfo.ipi_zone = zinit(sizeof(struct inpcb),(512 * sizeof(struct inpcb)), 4096, "divzone"); pcbinfo = &divcbinfo; /* diff --git a/bsd/netinet/ip_dummynet.c b/bsd/netinet/ip_dummynet.c index be38174c0..38338ae58 100644 --- a/bsd/netinet/ip_dummynet.c +++ b/bsd/netinet/ip_dummynet.c @@ -2594,7 +2594,7 @@ ip_dn_init(void) default_rule.cmd[0].len = 1; default_rule.cmd[0].opcode = #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT - 1 ? O_ACCEPT : + (1) ? O_ACCEPT : #endif O_DENY; } diff --git a/bsd/netinet/ip_encap.c b/bsd/netinet/ip_encap.c index f0dfd14de..3c0ee4a48 100644 --- a/bsd/netinet/ip_encap.c +++ b/bsd/netinet/ip_encap.c @@ -102,9 +102,6 @@ #include #include #include -#if MROUTING -#include -#endif /* MROUTING */ #if INET6 #include @@ -262,18 +259,6 @@ encap4_input(m, off) return; } - /* for backward compatibility */ -# if MROUTING -# define COMPATFUNC ipip_input -# endif /*MROUTING*/ - -#if COMPATFUNC - if (proto == IPPROTO_IPV4) { - COMPATFUNC(m, off); - return; - } -#endif - /* last resort: inject to raw socket */ rip_input(m, off); } diff --git a/bsd/netinet/ip_fw2.c b/bsd/netinet/ip_fw2.c index be17daaa5..d9520158e 100644 --- a/bsd/netinet/ip_fw2.c +++ b/bsd/netinet/ip_fw2.c @@ -4053,7 +4053,7 @@ ipfw_init(void) default_rule.cmd[0].len = 1; default_rule.cmd[0].opcode = #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT - 1 ? O_ACCEPT : + (1) ? O_ACCEPT : #endif O_DENY; diff --git a/bsd/netinet/ip_icmp.c b/bsd/netinet/ip_icmp.c index 160c3eaac..ba2869a50 100644 --- a/bsd/netinet/ip_icmp.c +++ b/bsd/netinet/ip_icmp.c @@ -101,6 +101,10 @@ #include #endif +#if NECP +#include +#endif /* NECP */ + /* XXX This one should go in sys/mbuf.h. It is used to avoid that * a firewall-generated packet loops forever through the firewall. */ @@ -1103,7 +1107,11 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, int icmplen; int error = EINVAL; - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { if (inp != NULL) error = EPROTOTYPE; goto bad; diff --git a/bsd/netinet/ip_input.c b/bsd/netinet/ip_input.c index 99d474d3c..9dde8f80e 100644 --- a/bsd/netinet/ip_input.c +++ b/bsd/netinet/ip_input.c @@ -150,12 +150,6 @@ decl_lck_mtx_data(, sadb_stat_mutex_data); lck_mtx_t *sadb_stat_mutex = &sadb_stat_mutex_data; #endif /* IPSEC */ -#if MROUTING -int rsvp_on = 0; -static int ip_rsvp_on; -struct socket *ip_rsvpd; -#endif /* MROUTING */ - MBUFQ_HEAD(fq_head); static int frag_timeout_run; /* frag timer is scheduled to run */ @@ -279,7 +273,8 @@ static u_int32_t inaddr_hashp; /* next largest prime */ static int ip_getstat SYSCTL_HANDLER_ARGS; struct ipstat ipstat; -SYSCTL_PROC(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_inet_ip, IPCTL_STATS, stats, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, ip_getstat, "S,ipstat", "IP statistics (struct ipstat, netinet/ip_var.h)"); @@ -1019,20 +1014,6 @@ pass: return; } -#if MROUTING - /* - * greedy RSVP, snatches any PATH packet of the RSVP protocol and no - * matter if it is destined to another node, or whether it is - * a multicast one, RSVP wants it! and prevents it from being forwarded - * anywhere else. Also checks if the rsvp daemon is running before - * grabbing the packet. - */ - if (rsvp_on && ip->ip_p == IPPROTO_RSVP) { - ip_setdstifaddr_info(m, inifp->if_index, NULL); - goto ours; - } -#endif /* MROUTING */ - /* * Check our list of addresses, to see if the packet is for us. * If we don't have any addresses, assume any unicast packet @@ -1127,34 +1108,6 @@ pass: if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct in_multi *inm; -#if MROUTING - if (ip_mrouter) { - /* - * If we are acting as a multicast router, all - * incoming multicast packets are passed to the - * kernel-level multicast forwarding function. - * The packet is returned (relatively) intact; if - * ip_mforward() returns a non-zero value, the packet - * must be discarded, else it may be accepted below. - */ - if (ip_mforward && ip_mforward(ip, inifp, m, 0) != 0) { - OSAddAtomic(1, &ipstat.ips_cantforward); - m_freem(m); - return; - } - - /* - * The process-level routing daemon needs to receive - * all multicast IGMP packets, whether or not this - * host belongs to their destination groups. - */ - if (ip->ip_p == IPPROTO_IGMP) { - ip_setdstifaddr_info(m, inifp->if_index, NULL); - goto ours; - } - OSAddAtomic(1, &ipstat.ips_forward); - } -#endif /* MROUTING */ /* * See if we belong to the destination multicast group on the * arrival interface. @@ -3150,45 +3103,6 @@ no_mbufs: return (ENOBUFS); } -#if MROUTING -int -ip_rsvp_init(struct socket *so) -{ - if (so->so_type != SOCK_RAW || SOCK_PROTO(so) != IPPROTO_RSVP) - return (EOPNOTSUPP); - - if (ip_rsvpd != NULL) - return (EADDRINUSE); - - ip_rsvpd = so; - /* - * This may seem silly, but we need to be sure we don't over-increment - * the RSVP counter, in case something slips up. - */ - if (!ip_rsvp_on) { - ip_rsvp_on = 1; - rsvp_on++; - } - - return (0); -} - -int -ip_rsvp_done(void) -{ - ip_rsvpd = NULL; - /* - * This may seem silly, but we need to be sure we don't over-decrement - * the RSVP counter, in case something slips up. - */ - if (ip_rsvp_on) { - ip_rsvp_on = 0; - rsvp_on--; - } - return (0); -} -#endif /* MROUTING */ - static inline u_short ip_cksum(struct mbuf *m, int hlen) { diff --git a/bsd/netinet/ip_mroute.c b/bsd/netinet/ip_mroute.c deleted file mode 100644 index 6f9fdee99..000000000 --- a/bsd/netinet/ip_mroute.c +++ /dev/null @@ -1,2170 +0,0 @@ -/* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ -/* - * IP multicast forwarding procedures - * - * Written by David Waitzman, BBN Labs, August 1988. - * Modified by Steve Deering, Stanford, February 1989. - * Modified by Mark J. Steiglitz, Stanford, May, 1991 - * Modified by Van Jacobson, LBL, January 1993 - * Modified by Ajit Thyagarajan, PARC, August 1993 - * Modified by Bill Fenner, PARC, April 1995 - * - * MROUTING Revision: 3.5 - * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.56.2.2 2001/07/19 06:37:26 kris Exp $ - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if CONFIG_MACF_NET -#include -#endif - - -#if !MROUTING -extern u_int32_t _ip_mcast_src(int vifi); -extern int _ip_mforward(struct ip *ip, struct ifnet *ifp, - struct mbuf *m, struct ip_moptions *imo); -extern int _ip_mrouter_done(void); -extern int _ip_mrouter_get(struct socket *so, struct sockopt *sopt); -extern int _ip_mrouter_set(struct socket *so, struct sockopt *sopt); -extern int _mrt_ioctl(int req, caddr_t data, struct proc *p); - -/* - * Dummy routines and globals used when multicast routing is not compiled in. - */ - -struct socket *ip_mrouter = NULL; -u_int rsvpdebug = 0; - -int -_ip_mrouter_set(__unused struct socket *so, - __unused struct sockopt *sopt) -{ - return(EOPNOTSUPP); -} - -int (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set; - - -int -_ip_mrouter_get(__unused struct socket *so, - __unused sockopt *sopt) -{ - return(EOPNOTSUPP); -} - -int (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get; - -int -_ip_mrouter_done(void) -{ - return(0); -} - -int (*ip_mrouter_done)(void) = _ip_mrouter_done; - -int -_ip_mforward(__unused struct ip *ip, __unused struct ifnet *ifp, - __unused struct mbuf *m, __unused ip_moptions *imo) -{ - return(0); -} - -int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, - struct ip_moptions *) = _ip_mforward; - -int -_mrt_ioctl(__unused u_long req, __unused caddr_t data, __unused struct proc *p) -{ - return EOPNOTSUPP; -} - -int (*mrt_ioctl)(u_long, caddr_t, struct proc *) = _mrt_ioctl; - -void -rsvp_input(struct mbuf *m, int iphlen) /* XXX must fixup manually */ -{ - /* Can still get packets with rsvp_on = 0 if there is a local member - * of the group to which the RSVP packet is addressed. But in this - * case we want to throw the packet away. - */ - if (!rsvp_on) { - m_freem(m); - return; - } - - if (ip_rsvpd != NULL) { - if (rsvpdebug) - printf("rsvp_input: Sending packet up old-style socket\n"); - rip_input(m, iphlen); - return; - } - /* Drop the packet */ - m_freem(m); -} - -void ipip_input(struct mbuf *m, int iphlen) { /* XXX must fixup manually */ - rip_input(m, iphlen); -} - -int (*legal_vif_num)(int) = 0; - -/* - * This should never be called, since IP_MULTICAST_VIF should fail, but - * just in case it does get called, the code a little lower in ip_output - * will assign the packet a local address. - */ -u_int32_t -_ip_mcast_src(int vifi) { return INADDR_ANY; } -u_int32_t (*ip_mcast_src)(int) = _ip_mcast_src; - -int -ip_rsvp_vif_init(so, sopt) - struct socket *so; - struct sockopt *sopt; -{ - return(EINVAL); -} - -int -ip_rsvp_vif_done(so, sopt) - struct socket *so; - struct sockopt *sopt; -{ - return(EINVAL); -} - -void -ip_rsvp_force_done(so) - struct socket *so; -{ - return; -} - -#else /* MROUTING */ - -#define M_HASCL(m) ((m)->m_flags & M_EXT) - -#define INSIZ sizeof(struct in_addr) -#define same(a1, a2) \ - (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) - - -/* - * Globals. All but ip_mrouter and ip_mrtproto could be static, - * except for netstat or debugging purposes. - */ -#ifndef MROUTE_LKM -struct socket *ip_mrouter = NULL; -static struct mrtstat mrtstat; -#else /* MROUTE_LKM */ -extern void X_ipip_input(struct mbuf *m, int iphlen); -extern struct mrtstat mrtstat; -static int ip_mrtproto; -#endif - -#define NO_RTE_FOUND 0x1 -#define RTE_FOUND 0x2 - -static struct mfc *mfctable[CONFIG_MFCTBLSIZ]; -static u_char nexpire[CONFIG_MFCTBLSIZ]; -static struct vif viftable[CONFIG_MAXVIFS]; -static u_int mrtdebug = 0; /* debug level */ -#define DEBUG_MFC 0x02 -#define DEBUG_FORWARD 0x04 -#define DEBUG_EXPIRE 0x08 -#define DEBUG_XMIT 0x10 -static u_int tbfdebug = 0; /* tbf debug level */ -static u_int rsvpdebug = 0; /* rsvp debug level */ - -#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ -#define UPCALL_EXPIRE 6 /* number of timeouts */ - -/* - * Define the token bucket filter structures - * tbftable -> each vif has one of these for storing info - */ - -static struct tbf tbftable[CONFIG_MAXVIFS]; -#define TBF_REPROCESS (hz / 100) /* 100x / second */ - -/* - * 'Interfaces' associated with decapsulator (so we can tell - * packets that went through it from ones that get reflected - * by a broken gateway). These interfaces are never linked into - * the system ifnet list & no routes point to them. I.e., packets - * can't be sent this way. They only exist as a placeholder for - * multicast source verification. - */ -static struct ifnet multicast_decap_if[CONFIG_MAXVIFS]; - -#define ENCAP_TTL 64 -#define ENCAP_PROTO IPPROTO_IPIP /* 4 */ - -/* prototype IP hdr for encapsulated packets */ -static struct ip multicast_encap_iphdr = { -#if BYTE_ORDER == LITTLE_ENDIAN - sizeof(struct ip) >> 2, IPVERSION, -#else - IPVERSION, sizeof(struct ip) >> 2, -#endif - 0, /* tos */ - sizeof(struct ip), /* total length */ - 0, /* id */ - 0, /* frag offset */ - ENCAP_TTL, ENCAP_PROTO, - 0, /* checksum */ - { 0 }, { 0 } -}; - -/* - * Private variables. - */ -static vifi_t numvifs = 0; -static int have_encap_tunnel = 0; - -/* - * one-back cache used by ipip_input to locate a tunnel's vif - * given a datagram's src ip address. - */ -static u_int32_t last_encap_src; -static struct vif *last_encap_vif; - -static u_int32_t X_ip_mcast_src(int vifi); -static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); -static int X_ip_mrouter_done(void); -static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); -static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); -static int X_legal_vif_num(int vif); -static int X_mrt_ioctl(u_long cmd, caddr_t data); - -static int get_sg_cnt(struct sioc_sg_req *); -static int get_vif_cnt(struct sioc_vif_req *); -static int ip_mrouter_init(struct socket *, int); -static int add_vif(struct vifctl *); -static int del_vif(vifi_t); -static int add_mfc(struct mfcctl *); -static int del_mfc(struct mfcctl *); -static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); -static int set_assert(int); -static void expire_upcalls(void *); -static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, - vifi_t); -static void phyint_send(struct ip *, struct vif *, struct mbuf *); -static void encap_send(struct ip *, struct vif *, struct mbuf *); -static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_int32_t); -static void tbf_queue(struct vif *, struct mbuf *); -static void tbf_process_q(struct vif *); -static void tbf_reprocess_q(void *); -static int tbf_dq_sel(struct vif *, struct ip *); -static void tbf_send_packet(struct vif *, struct mbuf *); -static void tbf_update_tokens(struct vif *); -static int priority(struct vif *, struct ip *); -void multiencap_decap(struct mbuf *); - -/* - * whether or not special PIM assert processing is enabled. - */ -static int pim_assert; -/* - * Rate limit for assert notification messages, in usec - */ -#define ASSERT_MSG_TIME 3000000 - -/* - * Hash function for a source, group entry - */ -#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ - ((g) >> 20) ^ ((g) >> 10) ^ (g)) - -/* - * Find a route for a given origin IP address and Multicast group address - * Type of service parameter to be added in the future!!! - */ - -#define MFCFIND(o, g, rt) { \ - struct mfc *_rt = mfctable[MFCHASH(o,g)]; \ - rt = NULL; \ - ++mrtstat.mrts_mfc_lookups; \ - while (_rt) { \ - if ((_rt->mfc_origin.s_addr == o) && \ - (_rt->mfc_mcastgrp.s_addr == g) && \ - (_rt->mfc_stall == NULL)) { \ - rt = _rt; \ - break; \ - } \ - _rt = _rt->mfc_next; \ - } \ - if (rt == NULL) { \ - ++mrtstat.mrts_mfc_misses; \ - } \ -} - - -/* - * Macros to compute elapsed time efficiently - * Borrowed from Van Jacobson's scheduling code - */ -#define TV_DELTA(a, b, delta) { \ - int xxs; \ - \ - delta = (a).tv_usec - (b).tv_usec; \ - if ((xxs = (a).tv_sec - (b).tv_sec)) { \ - switch (xxs) { \ - case 2: \ - delta += 1000000; \ - /* fall through */ \ - case 1: \ - delta += 1000000; \ - break; \ - default: \ - delta += (1000000 * xxs); \ - } \ - } \ -} - -#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ - (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) - -#if UPCALL_TIMING -u_int32_t upcall_data[51]; -static void collate(struct timeval *); -#endif /* UPCALL_TIMING */ - - -/* - * Handle MRT setsockopt commands to modify the multicast routing tables. - */ -static int -X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) -{ - int error, optval; - vifi_t vifi; - struct vifctl vifc; - struct mfcctl mfc; - - if (so != ip_mrouter && sopt->sopt_name != MRT_INIT) - return (EPERM); - - error = 0; - switch (sopt->sopt_name) { - case MRT_INIT: - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); - if (error) - break; - error = ip_mrouter_init(so, optval); - break; - - case MRT_DONE: - error = ip_mrouter_done(); - break; - - case MRT_ADD_VIF: - error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); - if (error) - break; - error = add_vif(&vifc); - break; - - case MRT_DEL_VIF: - error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); - if (error) - break; - error = del_vif(vifi); - break; - - case MRT_ADD_MFC: - case MRT_DEL_MFC: - error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc); - if (error) - break; - if (sopt->sopt_name == MRT_ADD_MFC) - error = add_mfc(&mfc); - else - error = del_mfc(&mfc); - break; - - case MRT_ASSERT: - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); - if (error) - break; - set_assert(optval); - break; - - default: - error = EOPNOTSUPP; - break; - } - return (error); -} - -#if !defined(MROUTE_LKM) || !MROUTE_LKM -int (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set; -#endif - -/* - * Handle MRT getsockopt commands - */ -static int -X_ip_mrouter_get(__unused struct socket *so, struct sockopt *sopt) -{ - int error; - static int vers = 0x0305; /* !!! why is this here? XXX */ - - switch (sopt->sopt_name) { - case MRT_VERSION: - error = sooptcopyout(sopt, &vers, sizeof vers); - break; - - case MRT_ASSERT: - error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); - break; - default: - error = EOPNOTSUPP; - break; - } - return (error); -} - -#if !defined(MROUTE_LKM) || !MROUTE_LKM -int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get; -#endif - -/* - * Handle ioctl commands to obtain information from the cache - */ -static int -X_mrt_ioctl(u_long cmd, caddr_t data) -{ - int error = 0; - - switch (cmd) { - case (SIOCGETVIFCNT): - return (get_vif_cnt((struct sioc_vif_req *)data)); - break; - case (SIOCGETSGCNT): - return (get_sg_cnt((struct sioc_sg_req *)data)); - break; - default: - return (EINVAL); - break; - } - return error; -} - -#if !defined(MROUTE_LKM) || !MROUTE_LKM -int (*mrt_ioctl)(u_long, caddr_t) = X_mrt_ioctl; -#endif - -/* - * returns the packet, byte, rpf-failure count for the source group provided - */ -static int -get_sg_cnt(struct sioc_sg_req *req) -{ - struct mfc *rt; - - MFCFIND(req->src.s_addr, req->grp.s_addr, rt); - if (rt != NULL) { - req->pktcnt = rt->mfc_pkt_cnt; - req->bytecnt = rt->mfc_byte_cnt; - req->wrong_if = rt->mfc_wrong_if; - } else - req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; - - return 0; -} - -/* - * returns the input and output packet and byte counts on the vif provided - */ -static int -get_vif_cnt(struct sioc_vif_req *req) -{ - vifi_t vifi = req->vifi; - - if (vifi >= numvifs) return EINVAL; - - req->icount = viftable[vifi].v_pkt_in; - req->ocount = viftable[vifi].v_pkt_out; - req->ibytes = viftable[vifi].v_bytes_in; - req->obytes = viftable[vifi].v_bytes_out; - - return 0; -} - -/* - * Enable multicast routing - */ -static int -ip_mrouter_init(struct socket *so, int vers) -{ - if (mrtdebug) - log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n", - so->so_type, so->so_proto->pr_protocol); - - if (so->so_type != SOCK_RAW || - so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; - - if (vers != 1) - return ENOPROTOOPT; - - if (ip_mrouter != NULL) return EADDRINUSE; - - ip_mrouter = so; - - bzero((caddr_t)mfctable, sizeof(mfctable)); - bzero((caddr_t)nexpire, sizeof(nexpire)); - - pim_assert = 0; - - timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); - - if (mrtdebug) - log(LOG_DEBUG, "ip_mrouter_init\n"); - - return 0; -} - -/* - * Disable multicast routing - */ -static int -X_ip_mrouter_done(void) -{ - vifi_t vifi; - int i; - struct ifnet *ifp; - struct ifreq ifr; - struct mfc *rt; - struct rtdetq *rte; - - /* - * For each phyint in use, disable promiscuous reception of all IP - * multicasts. - */ - for (vifi = 0; vifi < numvifs; vifi++) { - if (viftable[vifi].v_lcl_addr.s_addr != 0 && - !(viftable[vifi].v_flags & VIFF_TUNNEL)) { - ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; - ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr - = INADDR_ANY; - ifp = viftable[vifi].v_ifp; - if_allmulti(ifp, 0); - } - } - bzero((caddr_t)tbftable, sizeof(tbftable)); - bzero((caddr_t)viftable, sizeof(viftable)); - numvifs = 0; - pim_assert = 0; - - untimeout(expire_upcalls, (caddr_t)NULL); - - /* - * Free all multicast forwarding cache entries. - */ - for (i = 0; i < CONFIG_MFCTBLSIZ; i++) { - for (rt = mfctable[i]; rt != NULL; ) { - struct mfc *nr = rt->mfc_next; - - for (rte = rt->mfc_stall; rte != NULL; ) { - struct rtdetq *n = rte->next; - - m_freem(rte->m); - FREE(rte, M_MRTABLE); - rte = n; - } - FREE(rt, M_MRTABLE); - rt = nr; - } - } - - bzero((caddr_t)mfctable, sizeof(mfctable)); - - /* - * Reset de-encapsulation cache - */ - last_encap_src = 0; - last_encap_vif = NULL; - have_encap_tunnel = 0; - - ip_mrouter = NULL; - - if (mrtdebug) - log(LOG_DEBUG, "ip_mrouter_done\n"); - - return 0; -} - -#if !defined(MROUTE_LKM) || !MROUTE_LKM -int (*ip_mrouter_done)(void) = X_ip_mrouter_done; -#endif - -/* - * Set PIM assert processing global - */ -static int -set_assert(int i) -{ - if ((i != 1) && (i != 0)) - return EINVAL; - - pim_assert = i; - - return 0; -} - -/* - * Add a vif to the vif table - */ -static int -add_vif(struct vifctl *vifcp) -{ - struct vif *vifp = viftable + vifcp->vifc_vifi; - static struct sockaddr_in sin = { sizeof sin, AF_INET, - 0 , {0}, {0,0,0,0,0,0,0,0,} }; - struct ifaddr *ifa; - struct ifnet *ifp; - int error, s; - struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; - - if (vifcp->vifc_vifi >= CONFIG_MAXVIFS) return EINVAL; - if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; - - /* Find the interface with an address in AF_INET family */ - sin.sin_addr = vifcp->vifc_lcl_addr; - ifa = ifa_ifwithaddr((struct sockaddr *)&sin); - if (ifa == 0) return EADDRNOTAVAIL; - ifp = ifa->ifa_ifp; - IFA_REMREF(ifa); - ifa = NULL; - - if (vifcp->vifc_flags & VIFF_TUNNEL) { - if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { - /* - * An encapsulating tunnel is wanted. Tell ipip_input() to - * start paying attention to encapsulated packets. - */ - if (have_encap_tunnel == 0) { - have_encap_tunnel = 1; - for (s = 0; s < CONFIG_MAXVIFS; ++s) { - multicast_decap_if[s].if_name = "mdecap"; - multicast_decap_if[s].if_unit = s; - multicast_decap_if[s].if_family = APPLE_IF_FAM_MDECAP; - } - } - /* - * Set interface to fake encapsulator interface - */ - ifp = &multicast_decap_if[vifcp->vifc_vifi]; - } else { - log(LOG_ERR, "source routed tunnels not supported\n"); - return EOPNOTSUPP; - } - } else { - /* Make sure the interface supports multicast */ - if ((ifp->if_flags & IFF_MULTICAST) == 0) - return EOPNOTSUPP; - - /* Enable promiscuous reception of all IP multicasts from the if */ - error = if_allmulti(ifp, 1); - if (error) - return error; - } - - /* define parameters for the tbf structure */ - vifp->v_tbf = v_tbf; - GET_TIME(vifp->v_tbf->tbf_last_pkt_t); - vifp->v_tbf->tbf_n_tok = 0; - vifp->v_tbf->tbf_q_len = 0; - vifp->v_tbf->tbf_max_q_len = MAXQSIZE; - vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; - - vifp->v_flags = vifcp->vifc_flags; - vifp->v_threshold = vifcp->vifc_threshold; - vifp->v_lcl_addr = vifcp->vifc_lcl_addr; - vifp->v_rmt_addr = vifcp->vifc_rmt_addr; - vifp->v_ifp = ifp; - /* scaling up here allows division by 1024 in critical code */ - vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; - vifp->v_rsvp_on = 0; - vifp->v_rsvpd = NULL; - /* initialize per vif pkt counters */ - vifp->v_pkt_in = 0; - vifp->v_pkt_out = 0; - vifp->v_bytes_in = 0; - vifp->v_bytes_out = 0; - - /* Adjust numvifs up if the vifi is higher than numvifs */ - if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; - - if (mrtdebug) - log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", - vifcp->vifc_vifi, - (u_int32_t)ntohl(vifcp->vifc_lcl_addr.s_addr), - (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", - (u_int32_t)ntohl(vifcp->vifc_rmt_addr.s_addr), - vifcp->vifc_threshold, - vifcp->vifc_rate_limit); - - return 0; -} - -/* - * Delete a vif from the vif table - */ -static int -del_vif(vifi_t vifi) -{ - struct vif *vifp = &viftable[vifi]; - struct mbuf *m; - struct ifnet *ifp; - struct ifreq ifr; - - if (vifi >= numvifs) return EINVAL; - if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; - - if (!(vifp->v_flags & VIFF_TUNNEL)) { - ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; - ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; - ifp = vifp->v_ifp; - if_allmulti(ifp, 0); - } - - if (vifp == last_encap_vif) { - last_encap_vif = 0; - last_encap_src = 0; - } - - /* - * Free packets queued at the interface - */ - while (vifp->v_tbf->tbf_q) { - m = vifp->v_tbf->tbf_q; - vifp->v_tbf->tbf_q = m->m_act; - m_freem(m); - } - - bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); - bzero((caddr_t)vifp, sizeof (*vifp)); - - if (mrtdebug) - log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs); - - /* Adjust numvifs down */ - for (vifi = numvifs; vifi > 0; vifi--) - if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; - numvifs = vifi; - - return 0; -} - -/* - * Add an mfc entry - */ -static int -add_mfc(struct mfcctl *mfccp) -{ - struct mfc *rt; - u_int32_t hash; - struct rtdetq *rte; - u_short nstl; - int i; - - MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt); - - /* If an entry already exists, just update the fields */ - if (rt) { - if (mrtdebug & DEBUG_MFC) - log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", - (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), - (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), - mfccp->mfcc_parent); - - rt->mfc_parent = mfccp->mfcc_parent; - for (i = 0; i < numvifs; i++) - rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; - return 0; - } - - /* - * Find the entry for which the upcall was made and update - */ - hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); - for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { - - if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && - (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && - (rt->mfc_stall != NULL)) { - - if (nstl++) - log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", - "multiple kernel entries", - (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), - (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), - mfccp->mfcc_parent, (void *)rt->mfc_stall); - - if (mrtdebug & DEBUG_MFC) - log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", - (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), - (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), - mfccp->mfcc_parent, (void *)rt->mfc_stall); - - rt->mfc_origin = mfccp->mfcc_origin; - rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; - rt->mfc_parent = mfccp->mfcc_parent; - for (i = 0; i < numvifs; i++) - rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; - /* initialize pkt counters per src-grp */ - rt->mfc_pkt_cnt = 0; - rt->mfc_byte_cnt = 0; - rt->mfc_wrong_if = 0; - rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; - - rt->mfc_expire = 0; /* Don't clean this guy up */ - nexpire[hash]--; - - /* free packets Qed at the end of this entry */ - for (rte = rt->mfc_stall; rte != NULL; ) { - struct rtdetq *n = rte->next; - - ip_mdq(rte->m, rte->ifp, rt, -1); - m_freem(rte->m); -#if UPCALL_TIMING - collate(&(rte->t)); -#endif /* UPCALL_TIMING */ - FREE(rte, M_MRTABLE); - rte = n; - } - rt->mfc_stall = NULL; - } - } - - /* - * It is possible that an entry is being inserted without an upcall - */ - if (nstl == 0) { - if (mrtdebug & DEBUG_MFC) - log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", - hash, (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), - (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), - mfccp->mfcc_parent); - - for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { - - if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && - (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { - - rt->mfc_origin = mfccp->mfcc_origin; - rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; - rt->mfc_parent = mfccp->mfcc_parent; - for (i = 0; i < numvifs; i++) - rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; - /* initialize pkt counters per src-grp */ - rt->mfc_pkt_cnt = 0; - rt->mfc_byte_cnt = 0; - rt->mfc_wrong_if = 0; - rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; - if (rt->mfc_expire) - nexpire[hash]--; - rt->mfc_expire = 0; - } - } - if (rt == NULL) { - /* no upcall, so make a new entry */ - rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT); - if (rt == NULL) { - return ENOBUFS; - } - - /* insert new entry at head of hash chain */ - rt->mfc_origin = mfccp->mfcc_origin; - rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; - rt->mfc_parent = mfccp->mfcc_parent; - for (i = 0; i < numvifs; i++) - rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; - /* initialize pkt counters per src-grp */ - rt->mfc_pkt_cnt = 0; - rt->mfc_byte_cnt = 0; - rt->mfc_wrong_if = 0; - rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; - rt->mfc_expire = 0; - rt->mfc_stall = NULL; - - /* link into table */ - rt->mfc_next = mfctable[hash]; - mfctable[hash] = rt; - } - } - return 0; -} - -#if UPCALL_TIMING -/* - * collect delay statistics on the upcalls - */ -static void -collate(struct timeval *t) -{ - u_int32_t d; - struct timeval tp; - u_int32_t delta; - - GET_TIME(tp); - - if (TV_LT(*t, tp)) - { - TV_DELTA(tp, *t, delta); - - d = delta >> 10; - if (d > 50) - d = 50; - - ++upcall_data[d]; - } -} -#endif /* UPCALL_TIMING */ - -/* - * Delete an mfc entry - */ -static int -del_mfc(struct mfcctl *mfccp) -{ - struct in_addr origin; - struct in_addr mcastgrp; - struct mfc *rt; - struct mfc **nptr; - u_int32_t hash; - - origin = mfccp->mfcc_origin; - mcastgrp = mfccp->mfcc_mcastgrp; - hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); - - if (mrtdebug & DEBUG_MFC) - log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", - (u_int32_t)ntohl(origin.s_addr), (u_int32_t)ntohl(mcastgrp.s_addr)); - - nptr = &mfctable[hash]; - while ((rt = *nptr) != NULL) { - if (origin.s_addr == rt->mfc_origin.s_addr && - mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && - rt->mfc_stall == NULL) - break; - - nptr = &rt->mfc_next; - } - if (rt == NULL) { - return EADDRNOTAVAIL; - } - - *nptr = rt->mfc_next; - FREE(rt, M_MRTABLE); - - return 0; -} - -/* - * Send a message to mrouted on the multicast routing socket - */ -static int -socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) -{ - socket_lock(s, 1); - if (s) { - if (sbappendaddr(&s->so_rcv, - (struct sockaddr *)src, - mm, (struct mbuf *)0, NULL) != 0) { - sorwakeup(s); - socket_unlock(s, 1); - return 0; - } - } - socket_unlock(s, 1); - m_freem(mm); - return -1; -} - -/* - * IP multicast forwarding function. This function assumes that the packet - * pointed to by "ip" has arrived on (or is about to be sent to) the interface - * pointed to by "ifp", and the packet is to be relayed to other networks - * that have members of the packet's destination IP multicast group. - * - * The packet is returned unscathed to the caller, unless it is - * erroneous, in which case a non-zero return value tells the caller to - * discard it. - */ - -#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ -#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ - -static int -X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, - struct ip_moptions *imo) -{ - struct mfc *rt; - u_char *ipoptions; - static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET, - 0 , {0}, {0,0,0,0,0,0,0,0,} }; - static int srctun = 0; - struct mbuf *mm; - vifi_t vifi; - struct vif *vifp; - - if (mrtdebug & DEBUG_FORWARD) - log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", - (u_int32_t)ntohl(ip->ip_src.s_addr), (u_int32_t)ntohl(ip->ip_dst.s_addr), - (void *)ifp); - - if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || - (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { - /* - * Packet arrived via a physical interface or - * an encapsulated tunnel. - */ - } else { - /* - * Packet arrived through a source-route tunnel. - * Source-route tunnels are no longer supported. - */ - if ((srctun++ % 1000) == 0) - log(LOG_ERR, - "ip_mforward: received source-routed packet from %lx\n", - (u_int32_t)ntohl(ip->ip_src.s_addr)); - - return 1; - } - - if (imo != NULL) - IMO_LOCK(imo); - if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) { - IMO_UNLOCK(imo); - if (ip->ip_ttl < 255) - ip->ip_ttl++; /* compensate for -1 in *_send routines */ - if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { - vifp = viftable + vifi; - printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n", - ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi, - (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", - if_name(vifp->v_ifp)); - } - return (ip_mdq(m, ifp, NULL, vifi)); - } else if (imo != NULL) { - IMO_UNLOCK(imo); - } - if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { - printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n", - ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr)); - if(!imo) - printf("In fact, no options were specified at all\n"); - } - - /* - * Don't forward a packet with time-to-live of zero or one, - * or a packet destined to a local-only group. - */ - if (ip->ip_ttl <= 1 || - ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) - return 0; - - /* - * Determine forwarding vifs from the forwarding cache table - */ - MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); - - /* Entry exists, so forward if necessary */ - if (rt != NULL) { - return (ip_mdq(m, ifp, rt, -1)); - } else { - /* - * If we don't have a route for packet's origin, - * Make a copy of the packet & - * send message to routing daemon - */ - - struct mbuf *mb0; - struct rtdetq *rte; - u_int32_t hash; - int hlen = ip->ip_hl << 2; -#if UPCALL_TIMING - struct timeval tp; - - GET_TIME(tp); -#endif - - mrtstat.mrts_no_route++; - if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) - log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", - (u_int32_t)ntohl(ip->ip_src.s_addr), - (u_int32_t)ntohl(ip->ip_dst.s_addr)); - - /* - * Allocate mbufs early so that we don't do extra work if we are - * just going to fail anyway. Make sure to pullup the header so - * that other people can't step on it. - */ - rte = (struct rtdetq *) _MALLOC((sizeof *rte), M_MRTABLE, M_NOWAIT); - if (rte == NULL) { - return ENOBUFS; - } - mb0 = m_copy(m, 0, M_COPYALL); - if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) - mb0 = m_pullup(mb0, hlen); - if (mb0 == NULL) { - FREE(rte, M_MRTABLE); - return ENOBUFS; - } - - /* is there an upcall waiting for this packet? */ - hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); - for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { - if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && - (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && - (rt->mfc_stall != NULL)) - break; - } - - if (rt == NULL) { - int i; - struct igmpmsg *im; - - /* no upcall, so make a new entry */ - rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT); - if (rt == NULL) { - FREE(rte, M_MRTABLE); - m_freem(mb0); - return ENOBUFS; - } - /* Make a copy of the header to send to the user level process */ - mm = m_copy(mb0, 0, hlen); - if (mm == NULL) { - FREE(rte, M_MRTABLE); - m_freem(mb0); - FREE(rt, M_MRTABLE); - return ENOBUFS; - } - - /* - * Send message to routing daemon to install - * a route into the kernel table - */ - k_igmpsrc.sin_addr = ip->ip_src; - - im = mtod(mm, struct igmpmsg *); - im->im_msgtype = IGMPMSG_NOCACHE; - im->im_mbz = 0; - - mrtstat.mrts_upcalls++; - - if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { - log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); - ++mrtstat.mrts_upq_sockfull; - FREE(rte, M_MRTABLE); - m_freem(mb0); - FREE(rt, M_MRTABLE); - return ENOBUFS; - } - - /* insert new entry at head of hash chain */ - rt->mfc_origin.s_addr = ip->ip_src.s_addr; - rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; - rt->mfc_expire = UPCALL_EXPIRE; - nexpire[hash]++; - for (i = 0; i < numvifs; i++) - rt->mfc_ttls[i] = 0; - rt->mfc_parent = -1; - - /* link into table */ - rt->mfc_next = mfctable[hash]; - mfctable[hash] = rt; - rt->mfc_stall = rte; - - } else { - /* determine if q has overflowed */ - int npkts = 0; - struct rtdetq **p; - - for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) - npkts++; - - if (npkts > MAX_UPQ) { - mrtstat.mrts_upq_ovflw++; - FREE(rte, M_MRTABLE); - m_freem(mb0); - return 0; - } - - /* Add this entry to the end of the queue */ - *p = rte; - } - - rte->m = mb0; - rte->ifp = ifp; -#if UPCALL_TIMING - rte->t = tp; -#endif - rte->next = NULL; - - return 0; - } -} - -#if !defined(MROUTE_LKM) || !MROUTE_LKM -int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, - struct ip_moptions *) = X_ip_mforward; -#endif - -/* - * Clean up the cache entry if upcall is not serviced - */ -static void -expire_upcalls(__unused void *unused) -{ - struct rtdetq *rte; - struct mfc *mfc, **nptr; - int i; - - for (i = 0; i < CONFIG_MFCTBLSIZ; i++) { - if (nexpire[i] == 0) - continue; - nptr = &mfctable[i]; - for (mfc = *nptr; mfc != NULL; mfc = *nptr) { - /* - * Skip real cache entries - * Make sure it wasn't marked to not expire (shouldn't happen) - * If it expires now - */ - if (mfc->mfc_stall != NULL && - mfc->mfc_expire != 0 && - --mfc->mfc_expire == 0) { - if (mrtdebug & DEBUG_EXPIRE) - log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", - (u_int32_t)ntohl(mfc->mfc_origin.s_addr), - (u_int32_t)ntohl(mfc->mfc_mcastgrp.s_addr)); - /* - * drop all the packets - * free the mbuf with the pkt, if, timing info - */ - for (rte = mfc->mfc_stall; rte; ) { - struct rtdetq *n = rte->next; - - m_freem(rte->m); - FREE(rte, M_MRTABLE); - rte = n; - } - ++mrtstat.mrts_cache_cleanups; - nexpire[i]--; - - *nptr = mfc->mfc_next; - FREE(mfc, M_MRTABLE); - } else { - nptr = &mfc->mfc_next; - } - } - } - timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); -} - -/* - * Packet forwarding routine once entry in the cache is made - */ -static int -ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, - vifi_t xmt_vif) -{ - struct ip *ip = mtod(m, struct ip *); - vifi_t vifi; - struct vif *vifp; - int plen = ip->ip_len; - -/* - * Macro to send packet on vif. Since RSVP packets don't get counted on - * input, they shouldn't get counted on output, so statistics keeping is - * seperate. - */ -#define MC_SEND(ip,vifp,m) { \ - if ((vifp)->v_flags & VIFF_TUNNEL) \ - encap_send((ip), (vifp), (m)); \ - else \ - phyint_send((ip), (vifp), (m)); \ -} - - /* - * If xmt_vif is not -1, send on only the requested vif. - * - * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) - */ - if (xmt_vif < numvifs) { - MC_SEND(ip, viftable + xmt_vif, m); - return 1; - } - - /* - * Don't forward if it didn't arrive from the parent vif for its origin. - */ - vifi = rt->mfc_parent; - if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { - /* came in the wrong interface */ - if (mrtdebug & DEBUG_FORWARD) - log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", - (void *)ifp, vifi, (void *)viftable[vifi].v_ifp); - ++mrtstat.mrts_wrong_if; - ++rt->mfc_wrong_if; - /* - * If we are doing PIM assert processing, and we are forwarding - * packets on this interface, and it is a broadcast medium - * interface (and not a tunnel), send a message to the routing daemon. - */ - if (pim_assert && rt->mfc_ttls[vifi] && - (ifp->if_flags & IFF_BROADCAST) && - !(viftable[vifi].v_flags & VIFF_TUNNEL)) { - struct sockaddr_in k_igmpsrc; - struct mbuf *mm; - struct igmpmsg *im; - int hlen = ip->ip_hl << 2; - struct timeval now; - u_int32_t delta; - - GET_TIME(now); - - TV_DELTA(rt->mfc_last_assert, now, delta); - - if (delta > ASSERT_MSG_TIME) { - mm = m_copy(m, 0, hlen); - if (mm && (M_HASCL(mm) || mm->m_len < hlen)) - mm = m_pullup(mm, hlen); - if (mm == NULL) { - return ENOBUFS; - } - - rt->mfc_last_assert = now; - - im = mtod(mm, struct igmpmsg *); - im->im_msgtype = IGMPMSG_WRONGVIF; - im->im_mbz = 0; - im->im_vif = vifi; - - k_igmpsrc.sin_addr = im->im_src; - - socket_send(ip_mrouter, mm, &k_igmpsrc); - } - } - return 0; - } - - /* If I sourced this packet, it counts as output, else it was input. */ - if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { - viftable[vifi].v_pkt_out++; - viftable[vifi].v_bytes_out += plen; - } else { - viftable[vifi].v_pkt_in++; - viftable[vifi].v_bytes_in += plen; - } - rt->mfc_pkt_cnt++; - rt->mfc_byte_cnt += plen; - - /* - * For each vif, decide if a copy of the packet should be forwarded. - * Forward if: - * - the ttl exceeds the vif's threshold - * - there are group members downstream on interface - */ - for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) - if ((rt->mfc_ttls[vifi] > 0) && - (ip->ip_ttl > rt->mfc_ttls[vifi])) { - vifp->v_pkt_out++; - vifp->v_bytes_out += plen; - MC_SEND(ip, vifp, m); - } - - return 0; -} - -/* - * check if a vif number is legal/ok. This is used by ip_output, to export - * numvifs there, - */ -static int -X_legal_vif_num(int vif) -{ - if (vif >= 0 && vif < numvifs) - return(1); - else - return(0); -} - -#if !defined(MROUTE_LKM) || !MROUTE_LKM -int (*legal_vif_num)(int) = X_legal_vif_num; -#endif - -/* - * Return the local address used by this vif - */ -static u_int32_t -X_ip_mcast_src(int vifi) -{ - if (vifi >= 0 && vifi < numvifs) - return viftable[vifi].v_lcl_addr.s_addr; - else - return INADDR_ANY; -} - -#if !defined(MROUTE_LKM) || !MROUTE_LKM -u_int32_t (*ip_mcast_src)(int) = X_ip_mcast_src; -#endif - -static void -phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) -{ - struct mbuf *mb_copy; - int hlen = ip->ip_hl << 2; - - /* - * Make a new reference to the packet; make sure that - * the IP header is actually copied, not just referenced, - * so that ip_output() only scribbles on the copy. - */ - mb_copy = m_copy(m, 0, M_COPYALL); - if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) - mb_copy = m_pullup(mb_copy, hlen); - if (mb_copy == NULL) - return; - - if (vifp->v_rate_limit == 0) - tbf_send_packet(vifp, mb_copy); - else - tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); -} - -static void -encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) -{ - struct mbuf *mb_copy; - struct ip *ip_copy; - int i, len = ip->ip_len; - - /* - * copy the old packet & pullup its IP header into the - * new mbuf so we can modify it. Try to fill the new - * mbuf since if we don't the ethernet driver will. - */ - MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); - if (mb_copy == NULL) - return; -#if CONFIG_MACF_NET - mac_mbuf_label_associate_multicast_encap(m, vifp->v_ifp, mb_copy); -#endif - mb_copy->m_data += max_linkhdr; - mb_copy->m_len = sizeof(multicast_encap_iphdr); - - if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { - m_freem(mb_copy); - return; - } - i = MHLEN - M_LEADINGSPACE(mb_copy); - if (i > len) - i = len; - mb_copy = m_pullup(mb_copy, i); - if (mb_copy == NULL) - return; - mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); - - /* - * fill in the encapsulating IP header. - */ - ip_copy = mtod(mb_copy, struct ip *); - *ip_copy = multicast_encap_iphdr; - ip_copy->ip_id = ip_randomid(); - ip_copy->ip_len += len; - ip_copy->ip_src = vifp->v_lcl_addr; - ip_copy->ip_dst = vifp->v_rmt_addr; - - /* - * turn the encapsulated IP header back into a valid one. - */ - ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); - --ip->ip_ttl; - -#if BYTE_ORDER != BIG_ENDIAN - HTONS(ip->ip_len); - HTONS(ip->ip_off); -#endif - - ip->ip_sum = 0; - mb_copy->m_data += sizeof(multicast_encap_iphdr); - ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); - mb_copy->m_data -= sizeof(multicast_encap_iphdr); - - if (vifp->v_rate_limit == 0) - tbf_send_packet(vifp, mb_copy); - else - tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); -} - -/* - * De-encapsulate a packet and feed it back through ip input (this - * routine is called whenever IP gets a packet with proto type - * ENCAP_PROTO and a local destination address). - */ -void -#if MROUTE_LKM -X_ipip_input(struct mbuf *m, int iphlen) -#else -ipip_input(struct mbuf *m, int iphlen) -#endif -{ - struct ifnet *ifp = m->m_pkthdr.rcvif; - struct ip *ip = mtod(m, struct ip *); - int hlen = ip->ip_hl << 2; - struct vif *vifp; - - if (!have_encap_tunnel) { - rip_input(m, iphlen); - return; - } - /* - * dump the packet if it's not to a multicast destination or if - * we don't have an encapsulating tunnel with the source. - * Note: This code assumes that the remote site IP address - * uniquely identifies the tunnel (i.e., that this site has - * at most one tunnel with the remote site). - */ - if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { - ++mrtstat.mrts_bad_tunnel; - m_freem(m); - return; - } - if (ip->ip_src.s_addr != last_encap_src) { - struct vif *vife; - - vifp = viftable; - vife = vifp + numvifs; - last_encap_src = ip->ip_src.s_addr; - last_encap_vif = 0; - for ( ; vifp < vife; ++vifp) - if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { - if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) - == VIFF_TUNNEL) - last_encap_vif = vifp; - break; - } - } - if ((vifp = last_encap_vif) == 0) { - last_encap_src = 0; - mrtstat.mrts_cant_tunnel++; /*XXX*/ - m_freem(m); - if (mrtdebug) - log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n", - (u_int32_t)ntohl(ip->ip_src.s_addr)); - return; - } - ifp = vifp->v_ifp; - - if (hlen > IP_HDR_LEN) - ip_stripoptions(m, (struct mbuf *) 0); - m->m_data += IP_HDR_LEN; - m->m_len -= IP_HDR_LEN; - m->m_pkthdr.len -= IP_HDR_LEN; - m->m_pkthdr.rcvif = ifp; - - proto_inject(PF_INET, m); -} - -/* - * Token bucket filter module - */ - -static void -tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, - u_int32_t p_len) -{ - struct tbf *t = vifp->v_tbf; - - if (p_len > MAX_BKT_SIZE) { - /* drop if packet is too large */ - mrtstat.mrts_pkt2large++; - m_freem(m); - return; - } - - tbf_update_tokens(vifp); - - /* if there are enough tokens, - * and the queue is empty, - * send this packet out - */ - - if (t->tbf_q_len == 0) { - /* queue empty, send packet if enough tokens */ - if (p_len <= t->tbf_n_tok) { - t->tbf_n_tok -= p_len; - tbf_send_packet(vifp, m); - } else { - /* queue packet and timeout till later */ - tbf_queue(vifp, m); - timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); - } - } else if (t->tbf_q_len < t->tbf_max_q_len) { - /* finite queue length, so queue pkts and process queue */ - tbf_queue(vifp, m); - tbf_process_q(vifp); - } else { - /* queue length too much, try to dq and queue and process */ - if (!tbf_dq_sel(vifp, ip)) { - mrtstat.mrts_q_overflow++; - m_freem(m); - return; - } else { - tbf_queue(vifp, m); - tbf_process_q(vifp); - } - } - return; -} - -/* - * adds a packet to the queue at the interface - */ -static void -tbf_queue(struct vif *vifp, struct mbuf *m) -{ - struct tbf *t = vifp->v_tbf; - - if (t->tbf_t == NULL) { - /* Queue was empty */ - t->tbf_q = m; - } else { - /* Insert at tail */ - t->tbf_t->m_act = m; - } - - /* Set new tail pointer */ - t->tbf_t = m; - -#if DIAGNOSTIC - /* Make sure we didn't get fed a bogus mbuf */ - if (m->m_act) - panic("tbf_queue: m_act"); -#endif - m->m_act = NULL; - - t->tbf_q_len++; -} - - -/* - * processes the queue at the interface - */ -static void -tbf_process_q(struct vif *vifp) -{ - struct mbuf *m; - int len; - struct tbf *t = vifp->v_tbf; - - /* loop through the queue at the interface and send as many packets - * as possible - */ - while (t->tbf_q_len > 0) { - m = t->tbf_q; - - len = mtod(m, struct ip *)->ip_len; - - /* determine if the packet can be sent */ - if (len <= t->tbf_n_tok) { - /* if so, - * reduce no of tokens, dequeue the packet, - * send the packet. - */ - t->tbf_n_tok -= len; - - t->tbf_q = m->m_act; - if (--t->tbf_q_len == 0) - t->tbf_t = NULL; - - m->m_act = NULL; - tbf_send_packet(vifp, m); - - } else break; - } -} - -static void -tbf_reprocess_q(void *xvifp) -{ - struct vif *vifp = xvifp; - - if (ip_mrouter == NULL) { - return; - } - - tbf_update_tokens(vifp); - - tbf_process_q(vifp); - - if (vifp->v_tbf->tbf_q_len) - timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); -} - -/* function that will selectively discard a member of the queue - * based on the precedence value and the priority - */ -static int -tbf_dq_sel(struct vif *vifp, struct ip *ip) -{ - u_int p; - struct mbuf *m, *last; - struct mbuf **np; - struct tbf *t = vifp->v_tbf; - - p = priority(vifp, ip); - - np = &t->tbf_q; - last = NULL; - while ((m = *np) != NULL) { - if (p > priority(vifp, mtod(m, struct ip *))) { - *np = m->m_act; - /* If we're removing the last packet, fix the tail pointer */ - if (m == t->tbf_t) - t->tbf_t = last; - m_freem(m); - /* it's impossible for the queue to be empty, but - * we check anyway. */ - if (--t->tbf_q_len == 0) - t->tbf_t = NULL; - mrtstat.mrts_drop_sel++; - return(1); - } - np = &m->m_act; - last = m; - } - return(0); -} - -static void -tbf_send_packet(struct vif *vifp, struct mbuf *m) -{ - int error; - struct route ro; - - bzero(&ro, sizeof (ro)); - if (vifp->v_flags & VIFF_TUNNEL) { - /* If tunnel options */ - ip_output(m, (struct mbuf *)0, &ro, - IP_FORWARDING, (struct ip_moptions *)0, NULL); - } else { - struct ip_moptions *imo; - - imo = ip_allocmoptions(M_DONTWAIT); - if (imo == NULL) { - error = ENOMEM; - goto done; - } - - imo->imo_multicast_ifp = vifp->v_ifp; - imo->imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; - imo->imo_multicast_loop = 1; - imo->imo_multicast_vif = -1; - - /* - * Re-entrancy should not be a problem here, because - * the packets that we send out and are looped back at us - * should get rejected because they appear to come from - * the loopback interface, thus preventing looping. - */ - error = ip_output(m, (struct mbuf *)0, &ro, - IP_FORWARDING, imo, NULL); - - IMO_REMREF(imo); -done: - if (mrtdebug & DEBUG_XMIT) - log(LOG_DEBUG, "phyint_send on vif %d err %d\n", - vifp - viftable, error); - } - ROUTE_RELEASE(&ro); -} - -/* determine the current time and then - * the elapsed time (between the last time and time now) - * in milliseconds & update the no. of tokens in the bucket - */ -static void -tbf_update_tokens(struct vif *vifp) -{ - struct timeval tp; - u_int32_t tm; - struct tbf *t = vifp->v_tbf; - - GET_TIME(tp); - - TV_DELTA(tp, t->tbf_last_pkt_t, tm); - - /* - * This formula is actually - * "time in seconds" * "bytes/second". - * - * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) - * - * The (1000/1024) was introduced in add_vif to optimize - * this divide into a shift. - */ - t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; - t->tbf_last_pkt_t = tp; - - if (t->tbf_n_tok > MAX_BKT_SIZE) - t->tbf_n_tok = MAX_BKT_SIZE; -} - -static int -priority(__unused struct vif *vifp, struct ip *ip) -{ - int prio; - - /* temporary hack; may add general packet classifier some day */ - - /* - * The UDP port space is divided up into four priority ranges: - * [0, 16384) : unclassified - lowest priority - * [16384, 32768) : audio - highest priority - * [32768, 49152) : whiteboard - medium priority - * [49152, 65536) : video - low priority - */ - if (ip->ip_p == IPPROTO_UDP) { - struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); - switch (ntohs(udp->uh_dport) & 0xc000) { - case 0x4000: - prio = 70; - break; - case 0x8000: - prio = 60; - break; - case 0xc000: - prio = 55; - break; - default: - prio = 50; - break; - } - if (tbfdebug > 1) - log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio); - } else { - prio = 50; - } - return prio; -} - -/* - * End of token bucket filter modifications - */ - -int -ip_rsvp_vif_init(struct socket *so, struct sockopt *sopt) -{ - int error, i; - - if (rsvpdebug) - printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", - so->so_type, so->so_proto->pr_protocol); - - if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) - return EOPNOTSUPP; - - /* Check mbuf. */ - error = sooptcopyin(sopt, &i, sizeof i, sizeof i); - if (error) - return (error); - - if (rsvpdebug) - printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on); - - /* Check vif. */ - if (!legal_vif_num(i)) { - return EADDRNOTAVAIL; - } - - /* Check if socket is available. */ - if (viftable[i].v_rsvpd != NULL) { - return EADDRINUSE; - } - - viftable[i].v_rsvpd = so; - /* This may seem silly, but we need to be sure we don't over-increment - * the RSVP counter, in case something slips up. - */ - if (!viftable[i].v_rsvp_on) { - viftable[i].v_rsvp_on = 1; - rsvp_on++; - } - - return 0; -} - -int -ip_rsvp_vif_done(struct socket *so, struct sockopt *sopt) -{ - int error, i; - - if (rsvpdebug) - printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", - so->so_type, so->so_proto->pr_protocol); - - if (so->so_type != SOCK_RAW || - so->so_proto->pr_protocol != IPPROTO_RSVP) - return EOPNOTSUPP; - - error = sooptcopyin(sopt, &i, sizeof i, sizeof i); - if (error) - return (error); - - /* Check vif. */ - if (!legal_vif_num(i)) { - return EADDRNOTAVAIL; - } - - if (rsvpdebug) - printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n", - viftable[i].v_rsvpd, so); - - viftable[i].v_rsvpd = NULL; - /* - * This may seem silly, but we need to be sure we don't over-decrement - * the RSVP counter, in case something slips up. - */ - if (viftable[i].v_rsvp_on) { - viftable[i].v_rsvp_on = 0; - rsvp_on--; - } - - return 0; -} - -void -ip_rsvp_force_done(struct socket *so) -{ - int vifi; - - /* Don't bother if it is not the right type of socket. */ - if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) - return; - - /* The socket may be attached to more than one vif...this - * is perfectly legal. - */ - for (vifi = 0; vifi < numvifs; vifi++) { - if (viftable[vifi].v_rsvpd == so) { - viftable[vifi].v_rsvpd = NULL; - /* This may seem silly, but we need to be sure we don't - * over-decrement the RSVP counter, in case something slips up. - */ - if (viftable[vifi].v_rsvp_on) { - viftable[vifi].v_rsvp_on = 0; - rsvp_on--; - } - } - } - - return; -} - -void -rsvp_input(struct mbuf *m, int iphlen) -{ - int vifi; - struct ip *ip = mtod(m, struct ip *); - static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET, - 0 , {0}, {0,0,0,0,0,0,0,0,} }; - struct ifnet *ifp; - - if (rsvpdebug) - printf("rsvp_input: rsvp_on %d\n",rsvp_on); - - /* Can still get packets with rsvp_on = 0 if there is a local member - * of the group to which the RSVP packet is addressed. But in this - * case we want to throw the packet away. - */ - if (!rsvp_on) { - m_freem(m); - return; - } - - if (rsvpdebug) - printf("rsvp_input: check vifs\n"); - -#if DIAGNOSTIC - if (!(m->m_flags & M_PKTHDR)) - panic("rsvp_input no hdr"); -#endif - - ifp = m->m_pkthdr.rcvif; - /* Find which vif the packet arrived on. */ - for (vifi = 0; vifi < numvifs; vifi++) - if (viftable[vifi].v_ifp == ifp) - break; - - if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { - /* - * If the old-style non-vif-associated socket is set, - * then use it. Otherwise, drop packet since there - * is no specific socket for this vif. - */ - if (ip_rsvpd != NULL) { - if (rsvpdebug) - printf("rsvp_input: Sending packet up old-style socket\n"); - rip_input(m, iphlen); /* xxx */ - } else { - if (rsvpdebug && vifi == numvifs) - printf("rsvp_input: Can't find vif for packet.\n"); - else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) - printf("rsvp_input: No socket defined for vif %d\n",vifi); - m_freem(m); - } - return; - } - rsvp_src.sin_addr = ip->ip_src; - - if (rsvpdebug && m) - printf("rsvp_input: m->m_len = %d, sbspace() = %d\n", - m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); - - if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { - if (rsvpdebug) - printf("rsvp_input: Failed to append to socket\n"); - } else { - if (rsvpdebug) - printf("rsvp_input: send packet up\n"); - } - -} - -#if MROUTE_LKM -#include -#include -#include -#include - -MOD_MISC("ip_mroute_mod") - -static int -ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) -{ - int i; - struct lkm_misc *args = lkmtp->private.lkm_misc; - int err = 0; - - switch(cmd) { - static int (*old_ip_mrouter_cmd)(); - static int (*old_ip_mrouter_done)(); - static int (*old_ip_mforward)(); - static int (*old_mrt_ioctl)(); - static void (*old_proto4_input)(); - static int (*old_legal_vif_num)(); - - case LKM_E_LOAD: - if(lkmexists(lkmtp) || ip_mrtproto) - return(EEXIST); - old_ip_mrouter_cmd = ip_mrouter_cmd; - ip_mrouter_cmd = X_ip_mrouter_cmd; - old_ip_mrouter_done = ip_mrouter_done; - ip_mrouter_done = X_ip_mrouter_done; - old_ip_mforward = ip_mforward; - ip_mforward = X_ip_mforward; - old_mrt_ioctl = mrt_ioctl; - mrt_ioctl = X_mrt_ioctl; - old_proto4_input = ip_protox[ENCAP_PROTO]->pr_input; - ip_protox[ENCAP_PROTO]->pr_input = X_ipip_input; - old_legal_vif_num = legal_vif_num; - legal_vif_num = X_legal_vif_num; - ip_mrtproto = IGMP_DVMRP; - - printf("\nIP multicast routing loaded\n"); - break; - - case LKM_E_UNLOAD: - if (ip_mrouter) - return EINVAL; - - ip_mrouter_cmd = old_ip_mrouter_cmd; - ip_mrouter_done = old_ip_mrouter_done; - ip_mforward = old_ip_mforward; - mrt_ioctl = old_mrt_ioctl; - ip_protox[ENCAP_PROTO]->pr_input = old_proto4_input; - legal_vif_num = old_legal_vif_num; - ip_mrtproto = 0; - break; - - default: - err = EINVAL; - break; - } - - return(err); -} - -int -ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { - DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, - nosys); -} - -#endif /* MROUTE_LKM */ -#endif /* MROUTING */ diff --git a/bsd/netinet/ip_mroute.h b/bsd/netinet/ip_mroute.h deleted file mode 100644 index 41985f3fa..000000000 --- a/bsd/netinet/ip_mroute.h +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1989 Stephen Deering. - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Stephen Deering of Stanford University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93 - */ - -#ifndef _NETINET_IP_MROUTE_H_ -#define _NETINET_IP_MROUTE_H_ -#include - -/* - * Definitions for IP multicast forwarding. - * - * Written by David Waitzman, BBN Labs, August 1988. - * Modified by Steve Deering, Stanford, February 1989. - * Modified by Ajit Thyagarajan, PARC, August 1993. - * Modified by Ajit Thyagarajan, PARC, August 1994. - * - * MROUTING Revision: 3.3.1.3 - */ - - -/* - * Multicast Routing set/getsockopt commands. - */ -#define MRT_INIT 100 /* initialize forwarder */ -#define MRT_DONE 101 /* shut down forwarder */ -#define MRT_ADD_VIF 102 /* create virtual interface */ -#define MRT_DEL_VIF 103 /* delete virtual interface */ -#define MRT_ADD_MFC 104 /* insert forwarding cache entry */ -#define MRT_DEL_MFC 105 /* delete forwarding cache entry */ -#define MRT_VERSION 106 /* get kernel version number */ -#define MRT_ASSERT 107 /* enable PIM assert processing */ - - -#ifdef BSD_KERNEL_PRIVATE -#define GET_TIME(t) microtime(&t) -#endif /* BSD_KERNEL_PRIVATE */ - -#ifndef CONFIG_MAXVIFS -#define CONFIG_MAXVIFS 32 /* 4635538 temp workaround */ -#endif - -#ifndef CONFIG_MFCTBLSIZ -#define CONFIG_MFCTBLSIZ 256 /* 4635538 temp workaround */ -#endif - -/* - * Types and macros for handling bitmaps with one bit per virtual interface. - */ -typedef u_int32_t vifbitmap_t; -typedef u_short vifi_t; /* type of a vif index */ -#define ALL_VIFS (vifi_t)-1 - -#define VIFM_SET(n, m) ((m) |= (1 << (n))) -#define VIFM_CLR(n, m) ((m) &= ~(1 << (n))) -#define VIFM_ISSET(n, m) ((m) & (1 << (n))) -#define VIFM_CLRALL(m) ((m) = 0x00000000) -#define VIFM_COPY(mfrom, mto) ((mto) = (mfrom)) -#define VIFM_SAME(m1, m2) ((m1) == (m2)) - - -/* - * Argument structure for MRT_ADD_VIF. - * (MRT_DEL_VIF takes a single vifi_t argument.) - */ -struct vifctl { - vifi_t vifc_vifi; /* the index of the vif to be added */ - u_char vifc_flags; /* VIFF_ flags defined below */ - u_char vifc_threshold; /* min ttl required to forward on vif */ - u_int vifc_rate_limit; /* max rate */ - struct in_addr vifc_lcl_addr; /* local interface address */ - struct in_addr vifc_rmt_addr; /* remote address (tunnels only) */ -}; - -#define VIFF_TUNNEL 0x1 /* vif represents a tunnel end-point */ -#define VIFF_SRCRT 0x2 /* tunnel uses IP source routing */ - -/* - * Argument structure for MRT_ADD_MFC and MRT_DEL_MFC - * (mfcc_tos to be added at a future point) - */ -struct mfcctl { - struct in_addr mfcc_origin; /* ip origin of mcasts */ - struct in_addr mfcc_mcastgrp; /* multicast group associated*/ - vifi_t mfcc_parent; /* incoming vif */ - u_char mfcc_ttls[CONFIG_MAXVIFS]; /* forwarding ttls on vifs */ -}; - -/* - * The kernel's multicast routing statistics. - */ -struct mrtstat { - u_int32_t mrts_mfc_lookups; /* # forw. cache hash table hits */ - u_int32_t mrts_mfc_misses; /* # forw. cache hash table misses */ - u_int32_t mrts_upcalls; /* # calls to mrouted */ - u_int32_t mrts_no_route; /* no route for packet's origin */ - u_int32_t mrts_bad_tunnel; /* malformed tunnel options */ - u_int32_t mrts_cant_tunnel; /* no room for tunnel options */ - u_int32_t mrts_wrong_if; /* arrived on wrong interface */ - u_int32_t mrts_upq_ovflw; /* upcall Q overflow */ - u_int32_t mrts_cache_cleanups; /* # entries with no upcalls */ - u_int32_t mrts_drop_sel; /* pkts dropped selectively */ - u_int32_t mrts_q_overflow; /* pkts dropped - Q overflow */ - u_int32_t mrts_pkt2large; /* pkts dropped - size > BKT SIZE */ - u_int32_t mrts_upq_sockfull; /* upcalls dropped - socket full */ -}; - -/* - * Argument structure used by mrouted to get src-grp pkt counts - */ -struct sioc_sg_req { - struct in_addr src; - struct in_addr grp; - u_int32_t pktcnt; - u_int32_t bytecnt; - u_int32_t wrong_if; -}; - -/* - * Argument structure used by mrouted to get vif pkt counts - */ -struct sioc_vif_req { - vifi_t vifi; /* vif number */ - u_int32_t icount; /* Input packet count on vif */ - u_int32_t ocount; /* Output packet count on vif */ - u_int32_t ibytes; /* Input byte count on vif */ - u_int32_t obytes; /* Output byte count on vif */ -}; - -#ifdef PRIVATE -#ifndef KERNEL -/* - * The kernel's virtual-interface structure. - * - * XXX: This is unused and is currently exposed for netstat. - */ -struct tbf; -struct ifnet; -struct socket; -struct vif { - u_char v_flags; /* VIFF_ flags defined above */ - u_char v_threshold; /* min ttl required to forward on vif*/ - u_int v_rate_limit; /* max rate */ - struct tbf *v_tbf; /* token bucket structure at intf. */ - struct in_addr v_lcl_addr; /* local interface address */ - struct in_addr v_rmt_addr; /* remote address (tunnels only) */ - struct ifnet *v_ifp; /* pointer to interface */ - u_int32_t v_pkt_in; /* # pkts in on interface */ - u_int32_t v_pkt_out; /* # pkts out on interface */ - u_int32_t v_bytes_in; /* # bytes in on interface */ - u_int32_t v_bytes_out; /* # bytes out on interface */ - struct route v_route; /* cached route if this is a tunnel */ - u_int v_rsvp_on; /* RSVP listening on this vif */ - struct socket *v_rsvpd; /* RSVP daemon socket */ -}; - -/* - * The kernel's multicast forwarding cache entry structure - * (A field for the type of service (mfc_tos) is to be added - * at a future point) - * - * XXX: This is unused and is currently exposed for netstat. - */ -struct mfc { - struct in_addr mfc_origin; /* IP origin of mcasts */ - struct in_addr mfc_mcastgrp; /* multicast group associated*/ - vifi_t mfc_parent; /* incoming vif */ - u_char mfc_ttls[CONFIG_MAXVIFS]; /* forwarding ttls on vifs */ - u_int32_t mfc_pkt_cnt; /* pkt count for src-grp */ - u_int32_t mfc_byte_cnt; /* byte count for src-grp */ - u_int32_t mfc_wrong_if; /* wrong if for src-grp */ - int mfc_expire; /* time to clean entry up */ - struct timeval mfc_last_assert; /* last time I sent an assert*/ - struct rtdetq *mfc_stall; /* q of packets awaiting mfc */ - struct mfc *mfc_next; /* next mfc entry */ -}; -#endif /* !KERNEL */ -#endif /* PRIVATE */ - -/* - * Struct used to communicate from kernel to multicast router - * note the convenient similarity to an IP packet - */ -struct igmpmsg { - u_int32_t unused1; - u_int32_t unused2; - u_char im_msgtype; /* what type of message */ -#define IGMPMSG_NOCACHE 1 -#define IGMPMSG_WRONGVIF 2 - u_char im_mbz; /* must be zero */ - u_char im_vif; /* vif rec'd on */ - u_char unused3; - struct in_addr im_src, im_dst; -}; - -#define MFCTBLSIZ CONFIG_MFCTBLSIZ - -#ifdef BSD_KERNEL_PRIVATE -/* - * Argument structure used for pkt info. while upcall is made - */ -struct rtdetq { - struct mbuf *m; /* A copy of the packet */ - struct ifnet *ifp; /* Interface pkt came in on */ - vifi_t xmt_vif; /* Saved copy of imo_multicast_vif */ -#if UPCALL_TIMING - struct timeval t; /* Timestamp */ -#endif /* UPCALL_TIMING */ - struct rtdetq *next; /* Next in list of packets */ -}; - -#if (CONFIG_MFCTBLSIZ & (CONFIG_MFCTBLSIZ - 1)) == 0 /* from sys:route.h */ -#define MFCHASHMOD(h) ((h) & (CONFIG_MFCTBLSIZ - 1)) -#else -#define MFCHASHMOD(h) ((h) % CONFIG_MFCTBLSIZ) -#endif - -#define MAX_UPQ 4 /* max. no of pkts in upcall Q */ - -/* - * Token Bucket filter code - */ -#define MAX_BKT_SIZE 10000 /* 10K bytes size */ -#define MAXQSIZE 10 /* max # of pkts in queue */ - -/* - * the token bucket filter at each vif - */ -struct tbf -{ - struct timeval tbf_last_pkt_t; /* arr. time of last pkt */ - u_int32_t tbf_n_tok; /* no of tokens in bucket */ - u_int32_t tbf_q_len; /* length of queue at this vif */ - u_int32_t tbf_max_q_len; /* max. queue length */ - struct mbuf *tbf_q; /* Packet queue */ - struct mbuf *tbf_t; /* tail-insertion pointer */ -}; - - -struct sockopt; - -extern int (*ip_mrouter_set)(struct socket *, struct sockopt *); -extern int (*ip_mrouter_get)(struct socket *, struct sockopt *); -extern int (*ip_mrouter_done)(void); -#if MROUTING -extern int (*mrt_ioctl)(u_long, caddr_t); -#else -extern int (*mrt_ioctl)(u_long, caddr_t, struct proc *); -#endif - -#endif /* BSD_KERNEL_PRIVATE */ -#endif /* _NETINET_IP_MROUTE_H_ */ diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index 3b0143b3c..f59d299a9 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -123,6 +123,10 @@ #endif #endif /* IPSEC */ +#if NECP +#include +#endif /* NECP */ + #if IPFIREWALL #include #if IPDIVERT @@ -261,6 +265,11 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, struct socket *so = NULL; struct secpolicy *sp = NULL; #endif /* IPSEC */ +#if NECP + necp_kernel_policy_result necp_result = 0; + necp_kernel_policy_result_parameter necp_result_parameter; + necp_kernel_policy_id necp_matched_policy_id = 0; +#endif /* NECP */ #if IPFIREWALL int ipfwoff; struct sockaddr_in *next_hop_from_ipfwd_tag = NULL; @@ -276,6 +285,9 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, #if IPSEC struct ipsec_output_state ipsec_state; #endif /* IPSEC */ +#if NECP + struct route necp_route; +#endif /* NECP */ #if IPFIREWALL || DUMMYNET struct ip_fw_args args; #endif /* IPFIREWALL || DUMMYNET */ @@ -288,6 +300,7 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, struct ipf_pktopts ipf_pktopts; } ipobz; #define ipsec_state ipobz.ipsec_state +#define necp_route ipobz.necp_route #define args ipobz.args #define sro_fwd ipobz.sro_fwd #define saved_route ipobz.saved_route @@ -299,6 +312,8 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, boolean_t nocell : 1; /* set once */ boolean_t isbroadcast : 1; boolean_t didfilter : 1; + boolean_t noexpensive : 1; /* set once */ + boolean_t awdl_unrestricted : 1; /* set once */ #if IPFIREWALL_FORWARD boolean_t fwd_rewrite_src : 1; #endif /* IPFIREWALL_FORWARD */ @@ -306,6 +321,11 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, uint32_t raw; } ipobf = { .raw = 0 }; +#define IP_CHECK_RESTRICTIONS(_ifp, _ipobf) \ + (((_ipobf).nocell && IFNET_IS_CELLULAR(_ifp)) || \ + ((_ipobf).noexpensive && IFNET_IS_EXPENSIVE(_ifp)) || \ + (!(_ipobf).awdl_unrestricted && IFNET_IS_AWDL_RESTRICTED(_ifp))) + KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); VERIFY(m0->m_flags & M_PKTHDR); @@ -380,15 +400,15 @@ ipfw_tags_done: if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) { /* If packet is bound to an interface, check bound policies */ if ((flags & IP_OUTARGS) && (ipoa != NULL) && - (ipoa->ipoa_flags & IPOAF_BOUND_IF) && - ipoa->ipoa_boundif != IFSCOPE_NONE) { + (ipoa->ipoa_flags & IPOAF_BOUND_IF) && + ipoa->ipoa_boundif != IFSCOPE_NONE) { if (ipsec4_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND, - &flags, ipoa, &sp) != 0) + &flags, ipoa, &sp) != 0) goto bad; } } #endif /* IPSEC */ - + VERIFY(ro != NULL); if (ip_doscopedroute && (flags & IP_OUTARGS)) { @@ -423,16 +443,30 @@ ipfw_tags_done: } } - if ((flags & IP_OUTARGS) && (ipoa->ipoa_flags & IPOAF_NO_CELLULAR)) { - ipobf.nocell = TRUE; - ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR; - } - if (flags & IP_OUTARGS) { + if (ipoa->ipoa_flags & IPOAF_NO_CELLULAR) { + ipobf.nocell = TRUE; + ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR; + } + if (ipoa->ipoa_flags & IPOAF_NO_EXPENSIVE) { + ipobf.noexpensive = TRUE; + ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE; + } + if (ipoa->ipoa_flags & IPOAF_AWDL_UNRESTRICTED) + ipobf.awdl_unrestricted = TRUE; adv = &ipoa->ipoa_flowadv; adv->code = FADV_SUCCESS; ipoa->ipoa_retflags = 0; } + +#if IPSEC + if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) { + so = ipsec_getsocket(m); + if (so != NULL) { + (void) ipsec_setsocket(m, NULL); + } + } +#endif /* IPSEC */ #if DUMMYNET if (args.fwa_ipfw_rule != NULL || args.fwa_pf_rule != NULL) { @@ -450,12 +484,7 @@ ipfw_tags_done: } RT_UNLOCK(ro->ro_rt); } -#if IPSEC - if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) { - so = ipsec_getsocket(m); - (void) ipsec_setsocket(m, NULL); - } -#endif /* IPSEC */ + #if IPFIREWALL if (args.fwa_ipfw_rule != NULL) goto skip_ipsec; @@ -465,13 +494,6 @@ ipfw_tags_done: } #endif /* DUMMYNET */ -#if IPSEC - if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) { - so = ipsec_getsocket(m); - (void) ipsec_setsocket(m, NULL); - } -#endif /* IPSEC */ - loopit: ipobf.isbroadcast = FALSE; ipobf.didfilter = FALSE; @@ -654,12 +676,13 @@ loopit: ia0 = in_selectsrcif(ip, ro, ifscope); /* - * If the source address belongs to a cellular interface - * and the caller forbids our using interfaces of such - * type, pretend that there is no route. + * If the source address belongs to a restricted + * interface and the caller forbids our using + * interfaces of such type, pretend that there is no + * route. */ - if (ipobf.nocell && ia0 != NULL && - IFNET_IS_CELLULAR(ia0->ifa_ifp)) { + if (ia0 != NULL && + IP_CHECK_RESTRICTIONS(ia0->ifa_ifp, ipobf)) { IFA_REMREF(ia0); ia0 = NULL; error = EHOSTUNREACH; @@ -751,13 +774,14 @@ loopit: rtalloc_scoped_ign(ro, ign, ifscope); /* - * If the route points to a cellular interface and the - * caller forbids our using interfaces of such type, + * If the route points to a cellular/expensive interface + * and the caller forbids our using interfaces of such type, * pretend that there is no route. */ - if (ipobf.nocell && ro->ro_rt != NULL) { + if (ro->ro_rt != NULL) { RT_LOCK_SPIN(ro->ro_rt); - if (IFNET_IS_CELLULAR(ro->ro_rt->rt_ifp)) { + if (IP_CHECK_RESTRICTIONS(ro->ro_rt->rt_ifp, + ipobf)) { RT_UNLOCK(ro->ro_rt); ROUTE_RELEASE(ro); if (flags & IP_OUTARGS) { @@ -867,11 +891,6 @@ loopit: if (imo->imo_multicast_ifp != NULL) ifp = imo->imo_multicast_ifp; IMO_UNLOCK(imo); -#if MROUTING - if (vif != -1 && (!(flags & IP_RAWOUTPUT) || - ip->ip_src.s_addr == INADDR_ANY)) - ip->ip_src.s_addr = ip_mcast_src(vif); -#endif /* MROUTING */ } else if (!(flags & IP_RAWOUTPUT)) { vif = -1; ip->ip_ttl = ttl; @@ -977,39 +996,6 @@ loopit: } ip_mloopback(srcifp, ifp, m, dst, hlen); } -#if MROUTING - else { - /* - * If we are acting as a multicast router, perform - * multicast forwarding as if the packet had just - * arrived on the interface to which we are about - * to send. The multicast forwarding function - * recursively calls this function, using the - * IP_FORWARDING flag to prevent infinite recursion. - * - * Multicasts that are looped back by ip_mloopback(), - * above, will be forwarded by the ip_input() routine, - * if necessary. - */ - if (ip_mrouter && !(flags & IP_FORWARDING)) { - /* - * Check if rsvp daemon is running. If not, - * don't set ip_moptions. This ensures that - * the packet is multicast and not just sent - * down one link as prescribed by rsvpd. - */ - if (!rsvp_on) - imo = NULL; - if (ip_mforward(ip, ifp, m, imo) != 0) { - m_freem(m); - if (inm != NULL) - INM_REMREF(inm); - OSAddAtomic(1, &ipstat.ips_cantforward); - goto done; - } - } - } -#endif /* MROUTING */ if (inm != NULL) INM_REMREF(inm); /* @@ -1178,28 +1164,63 @@ sendit: ipf_unref(); } +#if NECP + /* Process Network Extension Policy. Will Pass, Drop, or Rebind packet. */ + necp_matched_policy_id = necp_ip_output_find_policy_match (m, + flags, (flags & IP_OUTARGS) ? ipoa : NULL, &necp_result, &necp_result_parameter); + if (necp_matched_policy_id) { + necp_mark_packet_from_ip(m, necp_matched_policy_id); + switch (necp_result) { + case NECP_KERNEL_POLICY_RESULT_PASS: + goto skip_ipsec; + case NECP_KERNEL_POLICY_RESULT_DROP: + case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT: + /* Flow divert packets should be blocked at the IP layer */ + error = EHOSTUNREACH; + goto bad; + case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: { + /* Verify that the packet is being routed to the tunnel */ + struct ifnet *policy_ifp = necp_get_ifnet_from_result_parameter(&necp_result_parameter); + if (policy_ifp == ifp) { + goto skip_ipsec; + } else { + if (necp_packet_can_rebind_to_ifnet(m, policy_ifp, &necp_route, AF_INET)) { + /* Set ifp to the tunnel interface, since it is compatible with the packet */ + ifp = policy_ifp; + ro = &necp_route; + goto skip_ipsec; + } else { + error = ENETUNREACH; + goto bad; + } + } + break; + } + default: + break; + } + } +#endif /* NECP */ + #if IPSEC - /* temporary for testing only: bypass ipsec alltogether */ - if (ipsec_bypass != 0 || (flags & IP_NOIPSEC)) goto skip_ipsec; KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); - /* May have been set above if packet was bound */ if (sp == NULL) { /* get SP for this packet */ - if (so == NULL) - sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, - flags, &error); - else + if (so != NULL) { sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, - so, &error); - + so, &error); + } else { + sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, + flags, &error); + } if (sp == NULL) { IPSEC_STAT_INCREMENT(ipsecstat.out_inval); KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, - 0, 0, 0, 0, 0); + 0, 0, 0, 0, 0); goto bad; } } @@ -1236,8 +1257,6 @@ sendit: if (sp->ipsec_if) { /* Verify the redirect to ipsec interface */ if (sp->ipsec_if == ifp) { - /* Set policy for mbuf */ - m->m_pkthdr.ipsec_policy = sp->id; goto skip_ipsec; } goto bad; @@ -1697,9 +1716,8 @@ pass: ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { OSAddAtomic(1, &ipstat.ips_badaddr); - m_freem(m); error = EADDRNOTAVAIL; - goto done; + goto bad; } ip_output_checksum(ifp, m, (IP_VHL_HL(ip->ip_vhl) << 2), @@ -1741,6 +1759,10 @@ pass: error = dlil_output(ifp, PF_INET, m, ro->ro_rt, SA(dst), 0, adv); + if (dlil_verbose && error) { + printf("dlil_output error on interface %s: %d\n", + ifp->if_xname, error); + } scnt = 0; goto done; } else { @@ -1763,6 +1785,10 @@ sendchain: error = dlil_output(ifp, PF_INET, packetlist, ro->ro_rt, SA(dst), 0, adv); + if (dlil_verbose && error) { + printf("dlil_output error on interface %s: %d\n", + ifp->if_xname, error); + } pktcnt = 0; scnt = 0; bytecnt = 0; @@ -1834,6 +1860,10 @@ sendchain: } error = dlil_output(ifp, PF_INET, m, ro->ro_rt, SA(dst), 0, adv); + if (dlil_verbose && error) { + printf("dlil_output error on interface %s: %d\n", + ifp->if_xname, error); + } } else { m_freem(m); } @@ -1855,6 +1885,9 @@ done: key_freesp(sp, KEY_SADB_UNLOCKED); } #endif /* IPSEC */ +#if NECP + ROUTE_RELEASE(&necp_route); +#endif /* NECP */ #if DUMMYNET ROUTE_RELEASE(&saved_route); #endif /* DUMMYNET */ @@ -1865,7 +1898,9 @@ done: KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error, 0, 0, 0, 0); return (error); bad: - m_freem(m0); + if (pktcnt > 0) + m0 = packetlist; + m_freem_list(m0); goto done; #undef ipsec_state @@ -1873,6 +1908,7 @@ bad: #undef sro_fwd #undef saved_route #undef ipf_pktopts +#undef IP_CHECK_RESTRICTIONS } int @@ -2509,7 +2545,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) int priv; struct mbuf *m; int optname; - + if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ @@ -2595,7 +2631,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) break; /* once set, it cannot be unset */ - if (!optval && (inp->inp_flags & INP_NO_IFT_CELLULAR)) { + if (!optval && INP_NO_CELLULAR(inp)) { error = EINVAL; break; } @@ -2696,19 +2732,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) #if IPSEC case IP_IPSEC_POLICY: { - struct mbuf *m = NULL; - caddr_t req = NULL; - size_t len = 0; - - if (m != NULL) { - req = mtod(m, caddr_t); - len = m->m_len; - } - error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); - if (error == 0) - error = soopt_mcopyout(sopt, m); /* XXX */ - if (error == 0) - m_freem(m); + error = 0; /* This option is no longer supported */ break; } #endif /* IPSEC */ @@ -2730,7 +2754,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) break; case IP_NO_IFT_CELLULAR: - optval = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0; + optval = INP_NO_CELLULAR(inp) ? 1 : 0; error = sooptcopyout(sopt, &optval, sizeof (optval)); break; diff --git a/bsd/netinet/ip_var.h b/bsd/netinet/ip_var.h index 700fc6431..9440d9ad3 100644 --- a/bsd/netinet/ip_var.h +++ b/bsd/netinet/ip_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -284,6 +284,9 @@ struct ip_out_args { #define IPOAF_BOUND_IF 0x00000002 /* boundif value is valid */ #define IPOAF_BOUND_SRCADDR 0x00000004 /* bound to src address */ #define IPOAF_NO_CELLULAR 0x00000010 /* skip IFT_CELLULAR */ +#define IPOAF_NO_EXPENSIVE 0x00000020 /* skip IFT_EXPENSIVE */ +#define IPOAF_AWDL_UNRESTRICTED 0x00000040 /* can send over + AWDL_RESTRICTED */ u_int32_t ipoa_retflags; /* IPOARF return flags (see below) */ #define IPOARF_IFDENIED 0x00000001 /* denied access to interface */ }; @@ -297,24 +300,6 @@ extern struct protosw *ip_protox[]; extern struct pr_usrreqs rip_usrreqs; extern int ip_doscopedroute; -#if MROUTING -extern int (*legal_vif_num)(int); -extern u_int32_t (*ip_mcast_src)(int); -extern int rsvp_on; -extern struct socket *ip_rsvpd; /* reservation protocol daemon */ -extern struct socket *ip_mrouter; /* multicast routing daemon */ - -extern void rsvp_input(struct mbuf *, int); -extern int ip_rsvp_init(struct socket *); -extern int ip_rsvp_done(void); -extern int ip_rsvp_vif_init(struct socket *, struct sockopt *); -extern int ip_rsvp_vif_done(struct socket *, struct sockopt *); -extern void ip_rsvp_force_done(struct socket *); -extern void ipip_input(struct mbuf *, int); -extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, - struct ip_moptions *); -#endif /* MROUTING */ - extern void ip_moptions_init(void); extern struct ip_moptions *ip_allocmoptions(int); extern int inp_getmoptions(struct inpcb *, struct sockopt *); diff --git a/bsd/netinet/kpi_ipfilter.c b/bsd/netinet/kpi_ipfilter.c index edf1fd729..87250037e 100644 --- a/bsd/netinet/kpi_ipfilter.c +++ b/bsd/netinet/kpi_ipfilter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2013 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -157,6 +157,30 @@ ipf_addv6( return ipf_add(filter, filter_ref, &ipv6_filters); } +static errno_t +ipf_input_detached(void *cookie, mbuf_t *data, int offset, u_int8_t protocol) +{ +#pragma unused(cookie, data, offset, protocol) + +#if DEBUG + printf("ipf_input_detached\n"); +#endif /* DEBUG */ + + return (0); +} + +static errno_t +ipf_output_detached(void *cookie, mbuf_t *data, ipf_pktopts_t options) +{ +#pragma unused(cookie, data, options) + +#if DEBUG + printf("ipf_output_detached\n"); +#endif /* DEBUG */ + + return (0); +} + errno_t ipf_remove( ipfilter_t filter_ref) @@ -181,8 +205,8 @@ ipf_remove( if (kipf_ref) { kipf_delayed_remove++; TAILQ_INSERT_TAIL(&tbr_filters, match, ipf_tbr); - match->ipf_filter.ipf_input = 0; - match->ipf_filter.ipf_output = 0; + match->ipf_filter.ipf_input = ipf_input_detached; + match->ipf_filter.ipf_output = ipf_output_detached; lck_mtx_unlock(kipf_lock); } else { TAILQ_REMOVE(head, match, ipf_link); @@ -309,6 +333,8 @@ ipf_injectv4_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options) ipoa.ipoa_flags |= IPOAF_NO_CELLULAR; if (options->ippo_flags & IPPOF_BOUND_SRCADDR) ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR; + if (options->ippo_flags & IPPOF_NO_IFF_EXPENSIVE) + ipoa.ipoa_flags |= IPOAF_NO_EXPENSIVE; } bzero(&ro, sizeof(struct route)); @@ -383,6 +409,8 @@ ipf_injectv6_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options) ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR; if (options->ippo_flags & IPPOF_BOUND_SRCADDR) ip6oa.ip6oa_flags |= IP6OAF_BOUND_SRCADDR; + if (options->ippo_flags & IPPOF_NO_IFF_EXPENSIVE) + ip6oa.ip6oa_flags |= IP6OAF_NO_EXPENSIVE; } bzero(&ro, sizeof(struct route_in6)); diff --git a/bsd/netinet/kpi_ipfilter.h b/bsd/netinet/kpi_ipfilter.h index 6fe3727fe..210f99f97 100644 --- a/bsd/netinet/kpi_ipfilter.h +++ b/bsd/netinet/kpi_ipfilter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2012 Apple Inc. All rights reserved. + * Copyright (c) 2008-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -57,6 +57,7 @@ struct ipf_pktopts { #define IPPOF_SELECT_SRCIF 0x8 #define IPPOF_BOUND_SRCADDR 0x10 #define IPPOF_SHIFT_IFSCOPE 16 +#define IPPOF_NO_IFF_EXPENSIVE 0x20 #endif /* PRIVATE */ typedef struct ipf_pktopts *ipf_pktopts_t; diff --git a/bsd/netinet/mptcp.c b/bsd/netinet/mptcp.c index 8487a1c3f..1945ecfcf 100644 --- a/bsd/netinet/mptcp.c +++ b/bsd/netinet/mptcp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -89,9 +89,11 @@ SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED, /* - * MPTCP subflows have TCP keepalives set to ON + * MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime + * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout. + * Some carrier networks have a timeout of 10 or 15 minutes. */ -int mptcp_subflow_keeptime = 60; +int mptcp_subflow_keeptime = 60*14; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_subflow_keeptime, 0, "Keepalive in seconds"); @@ -109,6 +111,25 @@ int mptcp_remaddr_enable = 1; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, remaddr, CTLFLAG_RW | CTLFLAG_LOCKED, &mptcp_remaddr_enable, 0, "Enable REMOVE_ADDR option"); +/* + * FastJoin Option + */ +int mptcp_fastjoin = 1; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fastjoin, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_fastjoin, 0, "Enable FastJoin Option"); + +int mptcp_zerortt_fastjoin = 0; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, zerortt_fastjoin, CTLFLAG_RW | + CTLFLAG_LOCKED, &mptcp_zerortt_fastjoin, 0, + "Enable Zero RTT Fast Join"); + +/* + * R/W Notification on resume + */ +int mptcp_rwnotify = 0; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rwnotify, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_rwnotify, 0, "Enable RW notify on resume"); + /* * MPTCP input, called when data has been read from a subflow socket. */ @@ -120,8 +141,9 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) u_int64_t mb_dsn; u_int32_t mb_datalen; int count = 0; - struct mbuf *save = NULL; + struct mbuf *save = NULL, *prev = NULL; struct mbuf *freelist = NULL, *tail = NULL; + boolean_t in_fallback = FALSE; VERIFY(m->m_flags & M_PKTHDR); @@ -139,11 +161,21 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) count = mp_so->so_rcv.sb_cc; VERIFY(m != NULL); + mp_tp = mpte->mpte_mptcb; + VERIFY(mp_tp != NULL); + + /* Ok to check for this flag without lock as its set in this thread */ + in_fallback = (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP); + /* * In the degraded fallback case, data is accepted without DSS map */ - if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { - /* XXX need a check that this is indeed degraded */ + if (in_fallback) { +fallback: + /* + * assume degraded flow as this may be the first packet + * without DSS, and the subflow state is not updated yet. + */ if (sbappendstream(&mp_so->so_rcv, m)) sorwakeup(mp_so); DTRACE_MPTCP5(receive__degraded, struct mbuf *, m, @@ -156,13 +188,33 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) return; } - mp_tp = mpte->mpte_mptcb; - VERIFY(mp_tp != NULL); - MPT_LOCK(mp_tp); do { + /* If fallback occurs, mbufs will not have PKTF_MPTCP set */ + if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { + MPT_UNLOCK(mp_tp); + goto fallback; + } + save = m->m_next; - m->m_next = NULL; + /* + * A single TCP packet formed of multiple mbufs + * holds DSS mapping in the first mbuf of the chain. + * Other mbufs in the chain may have M_PKTHDR set + * even though they belong to the same TCP packet + * and therefore use the DSS mapping stored in the + * first mbuf of the mbuf chain. mptcp_input() can + * get an mbuf chain with multiple TCP packets. + */ + while (save && (!(save->m_flags & M_PKTHDR) || + !(save->m_pkthdr.pkt_flags & PKTF_MPTCP))) { + prev = save; + save = save->m_next; + } + if (prev) + prev->m_next = NULL; + else + m->m_next = NULL; mb_dsn = m->m_pkthdr.mp_dsn; mb_datalen = m->m_pkthdr.mp_rlen; @@ -185,19 +237,20 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) } if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvatmark)) { - VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP); - VERIFY(m->m_flags & M_PKTHDR); - VERIFY(m->m_len >= (int)mb_datalen); - VERIFY(m->m_pkthdr.len >= (int)mb_datalen); if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen), mp_tp->mpt_rcvatmark)) { if (freelist == NULL) - freelist = tail = m; - else { + freelist = m; + else tail->m_next = m; + + if (prev != NULL) + tail = prev; + else tail = m; - } + m = save; + prev = save = NULL; continue; } else { m_adj(m, (mp_tp->mpt_rcvatmark - mb_dsn)); @@ -228,6 +281,7 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); mp_tp->mpt_rcvatmark += count; m = save; + prev = save = NULL; count = mp_so->so_rcv.sb_cc; } while (m); MPT_UNLOCK(mp_tp); @@ -325,7 +379,7 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - MPTS_LOCK_SPIN(mpts); + MPTS_LOCK(mpts); if ((ignore) && (mpts == ignore)) { MPTS_UNLOCK(mpts); @@ -338,7 +392,12 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) break; } - if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) { + /* + * Subflows with Fastjoin allow data to be written before + * the subflow is mp capable. + */ + if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE) && + !(mpts->mpts_flags & MPTSF_FASTJ_REQD)) { MPTS_UNLOCK(mpts); continue; } @@ -348,11 +407,18 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) continue; } + if ((mpts->mpts_flags & MPTSF_DISCONNECTED) || + (mpts->mpts_flags & MPTSF_DISCONNECTING)) { + MPTS_UNLOCK(mpts); + continue; + } + if (mpts->mpts_flags & MPTSF_FAILINGOVER) { so = mpts->mpts_socket; if ((so) && (!(so->so_flags & SOF_PCBCLEARING))) { socket_lock(so, 1); - if (so->so_snd.sb_cc == 0) { + if ((so->so_snd.sb_cc == 0) && + (mptcp_no_rto_spike(so))) { mpts->mpts_flags &= ~MPTSF_FAILINGOVER; so->so_flags &= ~SOF_MP_TRYFAILOVER; fallback = mpts; @@ -375,8 +441,7 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) } /* When there are no preferred flows, use first one in list */ - if (fallback == NULL) - fallback = mpts; + fallback = mpts; MPTS_UNLOCK(mpts); } @@ -391,6 +456,31 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) return (mpts); } +struct mptsub * +mptcp_get_pending_subflow(struct mptses *mpte, struct mptsub *ignore) +{ + struct mptsub *mpts = NULL; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + MPTS_LOCK(mpts); + + if ((ignore) && (mpts == ignore)) { + MPTS_UNLOCK(mpts); + continue; + } + + if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) { + MPTS_UNLOCK(mpts); + break; + } + + MPTS_UNLOCK(mpts); + } + return (mpts); +} + void mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) { @@ -406,22 +496,30 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) break; case MPTCPS_ESTABLISHED: - if (event == MPCE_CLOSE) + if (event == MPCE_CLOSE) { mp_tp->mpt_state = MPTCPS_FIN_WAIT_1; - else if (event == MPCE_RECV_DATA_FIN) + mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ + } + else if (event == MPCE_RECV_DATA_FIN) { + mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ mp_tp->mpt_state = MPTCPS_CLOSE_WAIT; + } break; case MPTCPS_CLOSE_WAIT: - if (event == MPCE_CLOSE) + if (event == MPCE_CLOSE) { mp_tp->mpt_state = MPTCPS_LAST_ACK; + mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ + } break; case MPTCPS_FIN_WAIT_1: if (event == MPCE_RECV_DATA_ACK) mp_tp->mpt_state = MPTCPS_FIN_WAIT_2; - else if (event == MPCE_RECV_DATA_FIN) + else if (event == MPCE_RECV_DATA_FIN) { + mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ mp_tp->mpt_state = MPTCPS_CLOSING; + } break; case MPTCPS_CLOSING: @@ -431,22 +529,27 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) case MPTCPS_LAST_ACK: if (event == MPCE_RECV_DATA_ACK) - mp_tp->mpt_state = MPTCPS_CLOSED; + mp_tp->mpt_state = MPTCPS_TERMINATE; break; case MPTCPS_FIN_WAIT_2: - if (event == MPCE_RECV_DATA_FIN) + if (event == MPCE_RECV_DATA_FIN) { + mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ mp_tp->mpt_state = MPTCPS_TIME_WAIT; + } break; case MPTCPS_TIME_WAIT: break; case MPTCPS_FASTCLOSE_WAIT: - if (event == MPCE_CLOSE) - mp_tp->mpt_state = MPTCPS_CLOSED; + if (event == MPCE_CLOSE) { + /* no need to adjust for data FIN */ + mp_tp->mpt_state = MPTCPS_TERMINATE; + } + break; + case MPTCPS_TERMINATE: break; - default: VERIFY(0); /* NOTREACHED */ @@ -470,6 +573,16 @@ mptcp_data_ack_rcvd(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dack) if (acked) { mp_tp->mpt_snduna += acked; + /* In degraded mode, we may get some Data ACKs */ + if ((tp->t_mpflags & TMPF_TCP_FALLBACK) && + !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) && + MPTCP_SEQ_GT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) { + /* bring back sndnxt to retransmit MPTCP data */ + mp_tp->mpt_sndnxt = mp_tp->mpt_dsn_at_csum_fail; + mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC; + tp->t_inpcb->inp_socket->so_flags1 |= + SOF1_POST_FALLBACK_SYNC; + } } if ((full_dack == mp_tp->mpt_sndmax) && (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1)) { diff --git a/bsd/netinet/mptcp_opt.c b/bsd/netinet/mptcp_opt.c index a4ea96ef6..173e56075 100644 --- a/bsd/netinet/mptcp_opt.c +++ b/bsd/netinet/mptcp_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -53,6 +53,15 @@ #include +/* + * SYSCTL for enforcing 64 bit dsn + */ +int32_t force_64bit_dsn = 0; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, force_64bit_dsn, + CTLFLAG_RW|CTLFLAG_LOCKED, &force_64bit_dsn, 0, + "Force MPTCP 64bit dsn"); + + static int mptcp_validate_join_hmac(struct tcpcb *, u_char*, int); static int mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen); @@ -160,6 +169,17 @@ mptcp_setup_join_subflow_syn_opts(struct socket *so, int flags, u_char *opt, if ((flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { struct mptcp_mpjoin_opt_rsp mpjoin_rsp; + struct mptcb *mp_tp = tptomptp(tp); + + if (mp_tp == NULL) + return (optlen); + + MPT_LOCK(mp_tp); + if (mptcp_get_localkey(mp_tp) == 0) { + MPT_UNLOCK(mp_tp); + return (optlen); + } + MPT_UNLOCK(mp_tp); bzero(&mpjoin_rsp, sizeof (mpjoin_rsp)); mpjoin_rsp.mmjo_kind = TCPOPT_MULTIPATH; mpjoin_rsp.mmjo_len = sizeof (mpjoin_rsp); @@ -170,7 +190,7 @@ mptcp_setup_join_subflow_syn_opts(struct socket *so, int flags, u_char *opt, mptcp_get_rands(tp->t_local_aid, tptomptp(tp), &mpjoin_rsp.mmjo_rand, NULL); mpjoin_rsp.mmjo_mac = mptcp_get_trunced_hmac(tp->t_local_aid, - tptomptp(tp)); + mp_tp); memcpy(opt + optlen, &mpjoin_rsp, mpjoin_rsp.mmjo_len); optlen += mpjoin_rsp.mmjo_len; } else { @@ -192,6 +212,14 @@ mptcp_setup_join_subflow_syn_opts(struct socket *so, int flags, u_char *opt, &mpjoin_req.mmjo_rand, NULL); memcpy(opt + optlen, &mpjoin_req, mpjoin_req.mmjo_len); optlen += mpjoin_req.mmjo_len; + /* send an event up, if Fast Join is requested */ + if (mptcp_zerortt_fastjoin && + (so->so_flags & SOF_MPTCP_FASTJOIN)) { + soevent(so, + (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFASTJ)); + if (mptcp_dbg >= MP_ERR_DEBUG) + printf("%s: fast join request\n", __func__); + } } return (optlen); } @@ -216,6 +244,7 @@ mptcp_setup_join_ack_opts(struct tcpcb *tp, u_char *opt, unsigned optlen) sizeof (join_rsp2.mmjo_mac)); memcpy(opt + optlen, &join_rsp2, join_rsp2.mmjo_len); new_optlen = optlen + join_rsp2.mmjo_len; + tp->t_mpflags |= TMPF_FASTJOINBY2_SEND; return (new_optlen); } @@ -316,8 +345,7 @@ mptcp_send_infinite_mapping(struct tcpcb *tp, u_char *opt, unsigned int optlen) if (mp_tp->mpt_flags & MPTCPF_RECVD_MPFAIL) { infin_opt.mdss_dsn = (u_int32_t) MPTCP_DATASEQ_LOW32(mp_tp->mpt_dsn_at_csum_fail); - error = mptcp_get_map_for_dsn(so, mp_tp->mpt_dsn_at_csum_fail, - &infin_opt.mdss_subflow_seqn); + infin_opt.mdss_subflow_seqn = mp_tp->mpt_ssn_at_csum_fail; } else { infin_opt.mdss_dsn = (u_int32_t) MPTCP_DATASEQ_LOW32(mp_tp->mpt_snduna); @@ -407,7 +435,7 @@ unsigned int mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, unsigned int optlen, int flags, int datalen, unsigned int **dss_lenp, u_int8_t **finp, u_int64_t *dss_valp, - u_int32_t **sseqp) + u_int32_t **sseqp, boolean_t *p_mptcp_acknow) { struct inpcb *inp = (struct inpcb *)tp->t_inpcb; struct socket *so = inp->inp_socket; @@ -415,22 +443,25 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, boolean_t do_csum = FALSE; boolean_t send_64bit_dsn = FALSE; boolean_t send_64bit_ack = FALSE; + u_int32_t old_mpt_flags = tp->t_mpflags & + (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL); - if (mptcp_enable == 0) { + if ((mptcp_enable == 0) || + (mp_tp == NULL) || + (mp_tp->mpt_flags & MPTCPF_PEEL_OFF) || + (tp->t_state == TCPS_CLOSED)) { /* do nothing */ - return (optlen); - } - - if (mp_tp == NULL) { - return (optlen); + goto ret_optlen; } - if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) + if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) { do_csum = TRUE; + } /* tcp_output handles the SYN path separately */ - if (flags & TH_SYN) - return (optlen); + if (flags & TH_SYN) { + goto ret_optlen; + } if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpcapable_opt_common)) { @@ -440,13 +471,13 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, "len %d\n", __func__, optlen, flags, tp->t_mpflags, datalen); } - return (optlen); + goto ret_optlen; } if (tp->t_mpflags & TMPF_FASTCLOSE) { optlen = mptcp_send_fastclose(tp, opt, optlen, flags); VERIFY(datalen == 0); - return (optlen); + goto ret_optlen; } if (tp->t_mpflags & TMPF_TCP_FALLBACK) { @@ -454,12 +485,20 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, optlen = mptcp_send_mpfail(tp, opt, optlen); else if (!(tp->t_mpflags & TMPF_INFIN_SENT)) optlen = mptcp_send_infinite_mapping(tp, opt, optlen); - return (optlen); + goto ret_optlen; } if (tp->t_mpflags & TMPF_SND_MPPRIO) { optlen = mptcp_snd_mpprio(tp, opt, optlen); - return (optlen); + goto ret_optlen; + } + + if (((tp->t_mpflags & TMPF_FASTJOINBY2_SEND) || + (tp->t_mpflags & TMPF_FASTJOIN_SEND )) && + (datalen > 0)) { + tp->t_mpflags &= ~TMPF_FASTJOINBY2_SEND; + tp->t_mpflags &= ~TMPF_FASTJOIN_SEND; + goto fastjoin_send; } if ((tp->t_mpflags & TMPF_PREESTABLISHED) && @@ -468,7 +507,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, struct mptcp_mpcapable_opt_rsp1 mptcp_opt; if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpcapable_opt_rsp1)) - return (optlen); + goto ret_optlen; bzero(&mptcp_opt, sizeof (struct mptcp_mpcapable_opt_rsp1)); mptcp_opt.mmc_common.mmco_kind = TCPOPT_MULTIPATH; mptcp_opt.mmc_common.mmco_len = @@ -498,7 +537,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, if (mptcp_dbg >= MP_ERR_DEBUG) { printf("MPTCP SUCCESS %s: established.\n", __func__); } - return (optlen); + goto ret_optlen; } else if (tp->t_mpflags & TMPF_MPTCP_TRUE) { if (tp->t_mpflags & TMPF_SND_REM_ADDR) { int rem_opt_len = sizeof (struct mptcp_remaddr_opt); @@ -506,7 +545,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, mptcp_send_remaddr_opt(tp, (struct mptcp_remaddr_opt *)(opt + optlen)); optlen += rem_opt_len; - return (optlen); + goto ret_optlen; } else { tp->t_mpflags &= ~TMPF_SND_REM_ADDR; } @@ -518,6 +557,12 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, (!(tp->t_mpflags & TMPF_RECVD_JOIN)) && (tp->t_mpflags & TMPF_SENT_JOIN) && (!(tp->t_mpflags & TMPF_MPTCP_TRUE))) { + MPT_LOCK(mp_tp); + if (mptcp_get_localkey(mp_tp) == 0) { + MPT_UNLOCK(mp_tp); + goto ret_optlen; + } + MPT_UNLOCK(mp_tp); /* Do the ACK part */ optlen = mptcp_setup_join_ack_opts(tp, opt, optlen); if (!tp->t_mpuna) { @@ -526,16 +571,19 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, /* Start a timer to retransmit the ACK */ tp->t_timer[TCPT_JACK_RXMT] = OFFSET_FROM_START(tp, tcp_jack_rxmt); - return (optlen); + goto ret_optlen; } if (!(tp->t_mpflags & TMPF_MPTCP_TRUE)) - return (optlen); - - /* From here on, all options are sent only if MPTCP_TRUE */ + goto ret_optlen; +fastjoin_send: + /* + * From here on, all options are sent only if MPTCP_TRUE + * or when data is sent early on as in Fast Join + */ MPT_LOCK(mp_tp); - if (mp_tp->mpt_flags & MPTCPF_SND_64BITDSN) { + if ((mp_tp->mpt_flags & MPTCPF_SND_64BITDSN) || force_64bit_dsn) { send_64bit_dsn = TRUE; } if (mp_tp->mpt_flags & MPTCPF_SND_64BITACK) { @@ -550,7 +598,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, __func__, \ len, optlen); \ } \ - return (optlen); \ + goto ret_optlen; \ } \ } @@ -615,7 +663,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, if ((dsn_ack_opt.mdss_data_len == 0) || (dsn_ack_opt.mdss_dsn == 0)) { - return (optlen); + goto ret_optlen; } if (tp->t_mpflags & TMPF_SEND_DFIN) { @@ -650,7 +698,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, mptcp_ntoh64(dsn_ack_opt.mdss_ack)); } tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; - return (optlen); + goto ret_optlen; } if ((tp->t_mpflags & TMPF_SEND_DSN) && @@ -680,7 +728,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, if ((dsn_opt.mdss_data_len == 0) || (dsn_opt.mdss_dsn == 0)) { - return (optlen); + goto ret_optlen; } if (tp->t_mpflags & TMPF_SEND_DFIN) { @@ -706,7 +754,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, ntohs(dsn_opt.mdss_data_len)); } tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; - return (optlen); + goto ret_optlen; } /* 32-bit Data ACK option */ @@ -735,7 +783,7 @@ do_ack32_only: optlen += len; VERIFY(optlen <= MAX_TCPOPTLEN); tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; - return (optlen); + goto ret_optlen; } /* 64-bit Data ACK option */ @@ -767,7 +815,7 @@ do_ack64_only: optlen += len; VERIFY(optlen <= MAX_TCPOPTLEN); tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; - return (optlen); + goto ret_optlen; } /* 32-bit DSS+Data ACK option */ @@ -828,7 +876,7 @@ do_ack64_only: if (optlen > MAX_TCPOPTLEN) panic("optlen too large"); tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; - return (optlen); + goto ret_optlen; } /* 32-bit DSS + 64-bit DACK option */ @@ -887,7 +935,7 @@ do_ack64_only: if (optlen > MAX_TCPOPTLEN) panic("optlen too large"); tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; - return (optlen); + goto ret_optlen; } if (tp->t_mpflags & TMPF_SEND_DFIN) { @@ -902,14 +950,18 @@ do_ack64_only: bzero(&dss_ack_opt, sizeof (dss_ack_opt)); MPT_LOCK(mp_tp); - /* Data FIN occupies one sequence space */ - if ((mp_tp->mpt_sndnxt + 1) != mp_tp->mpt_sndmax) { + /* + * Data FIN occupies one sequence space. + * Don't send it if it has been Acked. + */ + if (((mp_tp->mpt_sndnxt + 1) != mp_tp->mpt_sndmax) || + (mp_tp->mpt_snduna == mp_tp->mpt_sndmax)) { MPT_UNLOCK(mp_tp); if (mptcp_dbg == MP_VERBOSE_DEBUG_2) printf("%s: Fin state %d %llu %llu\n", __func__, mp_tp->mpt_state, mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax); - return (optlen); + goto ret_optlen; } dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH; @@ -934,7 +986,34 @@ do_ack64_only: optlen += len; } - return (optlen); +ret_optlen: + if (TRUE == *p_mptcp_acknow ) { + VERIFY(old_mpt_flags != 0); + u_int32_t new_mpt_flags = tp->t_mpflags & + (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL); + + /* + * If none of the above mpflags were acted on by + * this routine, reset these flags and set p_mptcp_acknow + * to false. + * XXX The reset value of p_mptcp_acknow can be used + * to communicate tcp_output to NOT send a pure ack without any + * MPTCP options as it will be treated as a dup ack. + * Since the instances of mptcp_setup_opts not acting on + * these options are mostly corner cases and sending a dup + * ack here would only have an impact if the system + * has sent consecutive dup acks before this false one, + * we haven't modified the logic in tcp_output to avoid + * that. + */ + if (old_mpt_flags == new_mpt_flags) { + tp->t_mpflags &= ~(TMPF_SND_MPPRIO + | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL); + *p_mptcp_acknow = FALSE; + } + } + + return optlen; } /* @@ -1232,7 +1311,7 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) (tp->t_mpflags & TMPF_PREESTABLISHED)) { struct mptcp_mpjoin_opt_rsp2 *join_rsp2 = (struct mptcp_mpjoin_opt_rsp2 *)cp; - + if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp2)) { if (mptcp_dbg >= MP_ERR_DEBUG) { printf("ACK: unexpected optlen = %d mp option " @@ -1322,8 +1401,7 @@ mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, struct tcpcb *tp) if (MPTCP_SEQ_LEQ(full_dack, mp_tp->mpt_sndmax) && MPTCP_SEQ_GEQ(full_dack, mp_tp->mpt_snduna)) { mptcp_data_ack_rcvd(mp_tp, tp, full_dack); - if ((mp_tp->mpt_state == MPTCPS_CLOSED) || - (mp_tp->mpt_state > MPTCPS_FIN_WAIT_2)) + if (mp_tp->mpt_state > MPTCPS_FIN_WAIT_2) close_notify = 1; MPT_UNLOCK(mp_tp); mptcp_notify_mpready(tp->t_inpcb->inp_socket); @@ -1609,9 +1687,15 @@ mptcp_do_fin_opt(struct tcpcb *tp) if (!(tp->t_mpflags & TMPF_RECV_DFIN)) { if (mp_tp != NULL) { MPT_LOCK(mp_tp); - mp_tp->mpt_rcvnxt += 1; mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN); MPT_UNLOCK(mp_tp); + + if (tp->t_inpcb->inp_socket != NULL) { + soevent(tp->t_inpcb->inp_socket, + SO_FILT_HINT_LOCKED | + SO_FILT_HINT_MPCANTRCVMORE); + } + } tp->t_mpflags |= TMPF_RECV_DFIN; } @@ -1633,7 +1717,9 @@ mptcp_do_dss_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) if (!mp_tp) return; - if (tp->t_mpflags & TMPF_MPTCP_TRUE) { + /* We may get Data ACKs just during fallback, so don't ignore those */ + if ((tp->t_mpflags & TMPF_MPTCP_TRUE) || + (tp->t_mpflags & TMPF_TCP_FALLBACK)) { struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp; if (dss_rsp->mdss_subtype == MPO_DSS) { @@ -1711,8 +1797,20 @@ mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th) { struct mptcb *mp_tp = NULL; struct mptcp_mpfail_opt *fail_opt = (struct mptcp_mpfail_opt *)cp; + u_int32_t mdss_subflow_seqn = 0; + int error = 0; - if ((th->th_flags != TH_ACK) || (th->th_flags != TH_RST)) + /* + * mpfail could make us more vulnerable to attacks. Hence accept + * only those that are the next expected sequence number. + */ + if (th->th_seq != tp->rcv_nxt) { + tcpstat.tcps_invalid_opt++; + return; + } + + /* A packet without RST, must atleast have the ACK bit set */ + if ((th->th_flags != TH_ACK) && (th->th_flags != TH_RST)) return; if (fail_opt->mfail_len != sizeof (struct mptcp_mpfail_opt)) @@ -1721,11 +1819,15 @@ mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th) mp_tp = (struct mptcb *)tp->t_mptcb; if (mp_tp == NULL) return; - MPT_LOCK(mp_tp); mp_tp->mpt_flags |= MPTCPF_RECVD_MPFAIL; mp_tp->mpt_dsn_at_csum_fail = mptcp_hton64(fail_opt->mfail_dsn); MPT_UNLOCK(mp_tp); + error = mptcp_get_map_for_dsn(tp->t_inpcb->inp_socket, + mp_tp->mpt_dsn_at_csum_fail, &mdss_subflow_seqn); + if (error == 0) { + mp_tp->mpt_ssn_at_csum_fail = mdss_subflow_seqn; + } mptcp_notify_mpfail(tp->t_inpcb->inp_socket); } @@ -1816,9 +1918,9 @@ mptcp_send_remaddr_opt(struct tcpcb *tp, struct mptcp_remaddr_opt *opt) printf("%s: local id %d remove id %d \n", __func__, tp->t_local_aid, tp->t_rem_aid); - bzero(opt, sizeof (opt)); + bzero(opt, sizeof (*opt)); opt->mr_kind = TCPOPT_MULTIPATH; - opt->mr_len = sizeof (opt); + opt->mr_len = sizeof (*opt); opt->mr_subtype = MPO_REMOVE_ADDR; opt->mr_addr_id = tp->t_rem_aid; tp->t_mpflags &= ~TMPF_SND_REM_ADDR; diff --git a/bsd/netinet/mptcp_opt.h b/bsd/netinet/mptcp_opt.h index cbf8fc3f0..8c925b9b9 100644 --- a/bsd/netinet/mptcp_opt.h +++ b/bsd/netinet/mptcp_opt.h @@ -50,7 +50,7 @@ extern void mptcp_send_addaddr_opt(struct tcpcb *, struct mptcp_addaddr_opt *); extern void mptcp_send_remaddr_opt(struct tcpcb *, struct mptcp_remaddr_opt *); extern unsigned int mptcp_setup_opts(struct tcpcb *, int, u_char *, unsigned int, int, int, unsigned int **, u_int8_t **, u_int64_t *, - u_int32_t **); + u_int32_t **, boolean_t *); extern void mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *, struct tcpcb *, uint16_t); extern void mptcp_update_rcv_state_f(struct mptcp_dss_ack_opt *, diff --git a/bsd/netinet/mptcp_subr.c b/bsd/netinet/mptcp_subr.c index 83e1955f1..6537a1c5f 100644 --- a/bsd/netinet/mptcp_subr.c +++ b/bsd/netinet/mptcp_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -128,7 +128,7 @@ static void mptcp_thread_dowork(struct mptses *); static void mptcp_thread_func(void *, wait_result_t); static void mptcp_thread_destroy(struct mptses *); static void mptcp_key_pool_init(void); -static void mptcp_attach_to_subf(struct socket *, struct mptcb *, connid_t); +static void mptcp_attach_to_subf(struct socket *, struct mptcb *, uint8_t); static void mptcp_detach_mptcb_from_subf(struct mptcb *, struct socket *); static void mptcp_conn_properties(struct mptcb *); static void mptcp_init_statevars(struct mptcb *); @@ -145,6 +145,7 @@ static void mptcp_subflow_input(struct mptses *, struct mptsub *); static void mptcp_subflow_wupcall(struct socket *, void *, int); static void mptcp_subflow_eupcall(struct socket *, void *, uint32_t); static void mptcp_update_last_owner(struct mptsub *, struct socket *); +static void mptcp_output_needed(struct mptses *mpte, struct mptsub *to_mpts); /* * Possible return values for subflow event handlers. Note that success @@ -175,12 +176,15 @@ static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *); static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *); static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *); static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_fastjoin_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_deleteok_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_mpcantrcvmore_ev(struct mptses *, struct mptsub *); + static const char *mptcp_evret2str(ev_ret_t); static mptcp_key_t *mptcp_reserve_key(void); static int mptcp_do_sha1(mptcp_key_t *, char *, int); static int mptcp_init_authparms(struct mptcb *); -static int mptcp_delete_ok(struct mptses *mpte, struct mptsub *mpts); static unsigned int mptsub_zone_size; /* size of mptsub */ static struct zone *mptsub_zone; /* zone for mptsub */ @@ -217,6 +221,21 @@ uint32_t mptcp_socket_limit = MPPCB_LIMIT; SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, sk_lim, CTLFLAG_RW|CTLFLAG_LOCKED, &mptcp_socket_limit, 0, "MPTCP socket limit"); +/* + * SYSCTL to turn on delayed cellular subflow start. + */ +uint32_t mptcp_delayed_subf_start = 0; +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, delayed, CTLFLAG_RW|CTLFLAG_LOCKED, + &mptcp_delayed_subf_start, 0, "MPTCP Delayed Subflow start"); + +/* + * SYSCTL for RTT spike measurement threshold in msecs. + */ +int32_t mptcp_rto_spike_thresh = 3000; +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, rto_spikethresh, + CTLFLAG_RW|CTLFLAG_LOCKED, &mptcp_rto_spike_thresh, 0, + "MPTCP RTT spike thresh"); + static struct protosw mptcp_subflow_protosw; static struct pr_usrreqs mptcp_subflow_usrreqs; #if INET6 @@ -805,7 +824,8 @@ mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_flags &= ~MPTSF_CONNECT_PENDING; socket_lock(so, 0); - mptcp_attach_to_subf(so, mpte->mpte_mptcb, mpts->mpts_connid); + mptcp_attach_to_subf(so, mpte->mpte_mptcb, mpte->mpte_addrid_last); + /* connect the subflow socket */ error = soconnectxlocked(so, &mpts->mpts_src_sl, &mpts->mpts_dst_sl, mpts->mpts_mpcr.mpcr_proc, mpts->mpts_mpcr.mpcr_ifscope, @@ -813,6 +833,11 @@ mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts) &mpts->mpts_mpcr, sizeof (mpts->mpts_mpcr)); socket_unlock(so, 0); + /* Allocate a unique address id per subflow */ + mpte->mpte_addrid_last++; + if (mpte->mpte_addrid_last == 0) + mpte->mpte_addrid_last++; + DTRACE_MPTCP3(subflow__connect, struct mptses *, mpte, struct mptsub *, mpts, int, error); @@ -1027,7 +1052,7 @@ mptcp_subflow_sopeeloff(struct mptses *mpte, struct mptsub *mpts, */ so->so_flags &= ~SOF_MP_SUBFLOW; so->so_state &= ~SS_NOFDREF; - so->so_state &= ~SOF_MPTCP_TRUE; + so->so_flags &= ~SOF_MPTCP_TRUE; /* allow socket buffers to be compressed */ so->so_rcv.sb_flags &= ~SB_NOCOMPRESS; @@ -1038,8 +1063,10 @@ mptcp_subflow_sopeeloff(struct mptses *mpte, struct mptsub *mpts, * * This will increase the current 64k buffer size to whatever is best. */ - so->so_rcv.sb_flags |= SB_AUTOSIZE; - so->so_snd.sb_flags |= SB_AUTOSIZE; + if (!(so->so_rcv.sb_flags & SB_USRSIZE)) + so->so_rcv.sb_flags |= SB_AUTOSIZE; + if (!(so->so_snd.sb_flags & SB_USRSIZE)) + so->so_snd.sb_flags |= SB_AUTOSIZE; /* restore protocol-user requests */ VERIFY(mpts->mpts_oprotosw != NULL); @@ -1106,6 +1133,15 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, mp_so = mpte->mpte_mppcb->mpp_socket; mp_tp = mpte->mpte_mptcb; + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) { + /* If the remote end sends Data FIN, refuse subflow adds */ + error = ENOTCONN; + MPT_UNLOCK(mp_tp); + return (error); + } + MPT_UNLOCK(mp_tp); + MPTS_LOCK(mpts); VERIFY(!(mpts->mpts_flags & (MPTSF_CONNECTING|MPTSF_CONNECTED))); VERIFY(mpts->mpts_mpte == NULL); @@ -1149,13 +1185,20 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, if ((error = mptcp_subflow_socreate(mpte, mpts, af, p, &so)) != 0) goto out; + /* If fastjoin is requested, set state in mpts */ + if ((so->so_flags & SOF_MPTCP_FASTJOIN) && + (mp_tp->mpt_state == MPTCPS_ESTABLISHED) && + (mpte->mpte_nummpcapflows == 0)) { + mpts->mpts_flags |= MPTSF_FASTJ_REQD; + mpts->mpts_rel_seq = 1; + MPT_LOCK(mp_tp); + mpts->mpts_sndnxt = mp_tp->mpt_snduna; + MPT_UNLOCK(mp_tp); + } + /* - * XXX: adi@apple.com - * - * This probably needs to be made smarter, but for now simply - * increment the counter, while avoiding 0 (CONNID_ANY) and - * -1 (CONNID_ALL). Assume that an MPTCP connection will not - * live too long with (2^32)-2 subflow connection attempts. + * Increment the counter, while avoiding 0 (CONNID_ANY) and + * -1 (CONNID_ALL). */ mpte->mpte_connid_last++; if (mpte->mpte_connid_last == CONNID_ALL || @@ -1165,6 +1208,11 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, mpts->mpts_connid = mpte->mpte_connid_last; VERIFY(mpts->mpts_connid != CONNID_ANY && mpts->mpts_connid != CONNID_ALL); + + /* Allocate a unique address id per subflow */ + mpte->mpte_addrid_last++; + if (mpte->mpte_addrid_last == 0) + mpte->mpte_addrid_last++; /* bind subflow socket to the specified interface */ if (ifscope != IFSCOPE_NONE) { @@ -1261,7 +1309,8 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, SO_FILT_HINT_SUSPEND | SO_FILT_HINT_RESUME | SO_FILT_HINT_CONNECTED | SO_FILT_HINT_DISCONNECTED | SO_FILT_HINT_MPFAILOVER | SO_FILT_HINT_MPSTATUS | - SO_FILT_HINT_MUSTRST); + SO_FILT_HINT_MUSTRST | SO_FILT_HINT_MPFASTJ | + SO_FILT_HINT_DELETEOK | SO_FILT_HINT_MPCANTRCVMORE); /* sanity check */ VERIFY(!(mpts->mpts_flags & @@ -1288,6 +1337,12 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, } else { if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY)) mpts->mpts_flags |= MPTSF_CONNECT_PENDING; + + /* avoid starting up cellular subflow unless required */ + if ((mptcp_delayed_subf_start) && + (IFNET_IS_CELLULAR(mpts->mpts_outif))) { + mpts->mpts_flags |= MPTSF_CONNECT_PENDING; + } MPT_UNLOCK(mp_tp); mpcr.mpcr_type = MPTSUB_CONNREQ_MP_ADD; } @@ -1325,27 +1380,6 @@ out: return (error); } -static int -mptcp_delete_ok(struct mptses *mpte, struct mptsub *mpts) -{ - int ret = 1; - struct mptcb *mp_tp = NULL; - - MPTE_LOCK_ASSERT_HELD(mpte); - mp_tp = mpte->mpte_mptcb; - VERIFY(mp_tp != NULL); - MPTS_LOCK(mpts); - MPT_LOCK(mp_tp); - if ((mpts->mpts_soerror == 0) && - (mpts->mpts_flags & MPTSF_ACTIVE) && - (mp_tp->mpt_state != MPTCPS_CLOSED) && - (mp_tp->mpt_state <= MPTCPS_TIME_WAIT)) - ret = 0; - MPT_UNLOCK(mp_tp); - MPTS_UNLOCK(mpts); - return (ret); -} - /* * Delete/remove a subflow from an MPTCP. The underlying subflow socket * will no longer be accessible after a subflow is deleted, thus this @@ -1363,6 +1397,14 @@ mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts, boolean_t close) MPTS_LOCK(mpts); so = mpts->mpts_socket; VERIFY(so != NULL); + + if (close && !((mpts->mpts_flags & MPTSF_DELETEOK) && + (mpts->mpts_flags & MPTSF_USER_DISCONNECT))) { + MPTS_UNLOCK(mpts); + mptcplog((LOG_DEBUG, "%s: %d %x\n", __func__, + mpts->mpts_soerror, mpts->mpts_flags)); + return; + } mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d " "[close %s] %d %x\n", __func__, @@ -1381,6 +1423,8 @@ mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts, boolean_t close) TAILQ_REMOVE(&mpte->mpte_subflows, mpts, mpts_entry); VERIFY(mpte->mpte_numflows != 0); mpte->mpte_numflows--; + if (mpte->mpte_active_sub == mpts) + mpte->mpte_active_sub = NULL; /* * Drop references held by this subflow socket; there @@ -1388,7 +1432,9 @@ mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts, boolean_t close) */ (void) sock_setupcalls(so, NULL, NULL, NULL, NULL); (void) sock_catchevents(so, NULL, NULL, 0); + mptcp_detach_mptcb_from_subf(mpte->mpte_mptcb, so); + if (close) (void) mptcp_subflow_soclose(mpts, so); @@ -1485,7 +1531,12 @@ mptcp_subflow_rupcall(struct socket *so, void *arg, int waitf) struct mptsub *mpts = arg; struct mptses *mpte = mpts->mpts_mpte; - VERIFY(mpte != NULL); + /* + * mpte should never be NULL, except in a race with + * mptcp_subflow_del + */ + if (mpte == NULL) + return; lck_mtx_lock(&mpte->mpte_thread_lock); mptcp_thread_signal_locked(mpte); @@ -1524,9 +1575,24 @@ mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts) MPTS_UNLOCK(mpts); mpts_alt = mptcp_get_subflow(mpte, mpts); if (mpts_alt == NULL) { - mptcplog((LOG_ERR, "%s: no alt path cid %d\n", - __func__, mpts->mpts_connid)); - mpte->mpte_mppcb->mpp_socket->so_error = error; + if (mptcp_delayed_subf_start) { + mpts_alt = mptcp_get_pending_subflow(mpte, + mpts); + if (mpts_alt) { + mptcplog((LOG_INFO,"%s: pending %d\n", + __func__, mpts_alt->mpts_connid)); + } else { + mptcplog((LOG_ERR, "%s: no pending", + "%d\n", __func__, + mpts->mpts_connid)); + mpte->mpte_mppcb->mpp_socket->so_error = + error; + } + } else { + mptcplog((LOG_ERR, "%s: no alt path cid %d\n", + __func__, mpts->mpts_connid)); + mpte->mpte_mppcb->mpp_socket->so_error = error; + } } MPTS_LOCK(mpts); } else if (error == 0) { @@ -1572,7 +1638,14 @@ mptcp_subflow_wupcall(struct socket *so, void *arg, int waitf) struct mptsub *mpts = arg; struct mptses *mpte = mpts->mpts_mpte; - VERIFY(mpte != NULL); + /* + * mpte should never be NULL except in a race with + * mptcp_subflow_del which doesn't hold socket lock across critical + * section. This upcall is made after releasing the socket lock. + * Interleaving of socket operations becomes possible therefore. + */ + if (mpte == NULL) + return; lck_mtx_lock(&mpte->mpte_thread_lock); mptcp_thread_signal_locked(mpte); @@ -1594,7 +1667,8 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) u_int64_t mpt_dsn = 0; struct mptcb *mp_tp = mpte->mpte_mptcb; struct mbuf *mpt_mbuf = NULL; - unsigned int off = 0; + u_int64_t off = 0; + struct mbuf *head, *tail; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ MPTS_LOCK_ASSERT_HELD(mpts); @@ -1614,7 +1688,8 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) /* subflow socket is not MPTCP capable? */ if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE) && - !(mpts->mpts_flags & MPTSF_MP_DEGRADED)) { + !(mpts->mpts_flags & MPTSF_MP_DEGRADED) && + !(mpts->mpts_flags & MPTSF_FASTJ_SEND)) { mptcplog((LOG_ERR, "%s: mp_so 0x%llx cid %d not " "MPTCP capable\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid)); @@ -1664,9 +1739,9 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) MPT_LOCK(mp_tp); if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) { - int len = 0; + u_int64_t len = 0; len = mp_tp->mpt_snduna - mpt_dsn; - sbdrop(&mp_so->so_snd, len); + sbdrop(&mp_so->so_snd, (int)len); } @@ -1674,21 +1749,41 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) * In degraded mode, we don't receive data acks, so force free * mbufs less than snd_nxt */ + if (mp_so->so_snd.sb_mb == NULL) { + MPT_UNLOCK(mp_tp); + goto out; + } + mpt_dsn = mp_so->so_snd.sb_mb->m_pkthdr.mp_dsn; if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) && + (mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) && MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_sndnxt)) { - int len = 0; + u_int64_t len = 0; len = mp_tp->mpt_sndnxt - mpt_dsn; - sbdrop(&mp_so->so_snd, len); + sbdrop(&mp_so->so_snd, (int)len); mp_tp->mpt_snduna = mp_tp->mpt_sndnxt; } + if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) && + !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC)) { + mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC; + so->so_flags1 |= SOF1_POST_FALLBACK_SYNC; + if (mp_tp->mpt_flags & MPTCPF_RECVD_MPFAIL) + mpts->mpts_sndnxt = mp_tp->mpt_dsn_at_csum_fail; + } + /* * Adjust the subflow's notion of next byte to send based on * the last unacknowledged byte */ if (MPTCP_SEQ_LT(mpts->mpts_sndnxt, mp_tp->mpt_snduna)) { mpts->mpts_sndnxt = mp_tp->mpt_snduna; + /* + * With FastJoin, a write before the fastjoin event will use + * an uninitialized relative sequence number. + */ + if (mpts->mpts_rel_seq == 0) + mpts->mpts_rel_seq = 1; } /* @@ -1709,7 +1804,7 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) } if (MPTCP_SEQ_LT(mpts->mpts_sndnxt, mp_tp->mpt_sndmax)) { off = mpts->mpts_sndnxt - mp_tp->mpt_snduna; - sb_cc -= off; + sb_cc -= (size_t)off; } else { MPT_UNLOCK(mp_tp); goto out; @@ -1720,13 +1815,13 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; while (mpt_mbuf && ((mpt_mbuf->m_pkthdr.mp_rlen == 0) || - (mpt_mbuf->m_pkthdr.mp_rlen <= off))) { + (mpt_mbuf->m_pkthdr.mp_rlen <= (u_int32_t)off))) { off -= mpt_mbuf->m_pkthdr.mp_rlen; mpt_mbuf = mpt_mbuf->m_next; mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; } if ((mpts->mpts_connid == 2) || (mpts->mpts_flags & MPTSF_MP_DEGRADED)) - mptcplog((LOG_INFO, "%s: snduna = %llu off = %d id = %d" + mptcplog2((LOG_INFO, "%s: snduna = %llu off = %lld id = %d" " %llu \n", __func__, mp_tp->mpt_snduna, off, mpts->mpts_connid, @@ -1734,9 +1829,11 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) VERIFY(mpt_mbuf && (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP)); + head = tail = NULL; + while (tot_sent < sb_cc) { struct mbuf *m; - size_t mlen, len = 0; + size_t mlen; mlen = mpt_mbuf->m_pkthdr.mp_rlen; mlen -= off; @@ -1748,8 +1845,8 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) mlen, sb_cc); } - m = m_copym_mode(mpt_mbuf, off, mlen, M_DONTWAIT, - M_COPYM_COPY_HDR); + m = m_copym_mode(mpt_mbuf, (int)off, mlen, M_DONTWAIT, + M_COPYM_MUST_COPY_HDR); if (m == NULL) { error = ENOBUFS; break; @@ -1757,6 +1854,7 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) /* Create a DSN mapping for the data (m_copym does it) */ mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; + VERIFY(m->m_flags & M_PKTHDR); m->m_pkthdr.pkt_flags |= PKTF_MPTCP; m->m_pkthdr.pkt_flags &= ~PKTF_MPSO; m->m_pkthdr.mp_dsn = mpt_dsn + off; @@ -1765,6 +1863,13 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_rel_seq += mlen; m->m_pkthdr.len = mlen; + if (head == NULL) { + head = tail = m; + } else { + tail->m_next = m; + tail = m; + } + /* last contiguous mapping is stored for error cases */ if (mpts->mpts_lastmap.mptsl_dsn + mpts->mpts_lastmap.mptsl_len == mpt_dsn) { @@ -1778,18 +1883,29 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_lastmap.mptsl_len = m->m_pkthdr.mp_rlen; } - error = sock_sendmbuf(so, NULL, m, 0, &len); - DTRACE_MPTCP7(send, struct mbuf *, m, struct socket *, so, + tot_sent += mlen; + off = 0; + mpt_mbuf = mpt_mbuf->m_next; + } + + if (head != NULL) { + + if (mpts->mpts_flags & MPTSF_FASTJ_SEND) { + struct tcpcb *tp = intotcpcb(sotoinpcb(so)); + tp->t_mpflags |= TMPF_FASTJOIN_SEND; + } + + error = sock_sendmbuf(so, NULL, head, 0, NULL); + + DTRACE_MPTCP7(send, struct mbuf *, head, struct socket *, so, struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd, struct mptses *, mpte, struct mptsub *, mpts, - size_t, mlen); - if (error != 0) { - mptcplog((LOG_ERR, "%s: len = %zd error = %d \n", - __func__, len, error)); - break; - } - mpts->mpts_sndnxt += mlen; + size_t, tot_sent); + } + + if (error == 0) { + mpts->mpts_sndnxt += tot_sent; MPT_LOCK(mp_tp); if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mpts->mpts_sndnxt)) { if (MPTCP_DATASEQ_HIGH32(mpts->mpts_sndnxt) > @@ -1797,29 +1913,23 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) mp_tp->mpt_flags |= MPTCPF_SND_64BITDSN; mp_tp->mpt_sndnxt = mpts->mpts_sndnxt; } + mptcp_cancel_timer(mp_tp, MPTT_REXMT); MPT_UNLOCK(mp_tp); - if (len != mlen) { - mptcplog((LOG_ERR, "%s: cid %d wrote %d " - "(expected %d)\n", __func__, - mpts->mpts_connid, len, mlen)); + + /* Send once in SYN_SENT state to avoid sending SYN spam */ + if (mpts->mpts_flags & MPTSF_FASTJ_SEND) { + so->so_flags &= ~SOF_MPTCP_FASTJOIN; + mpts->mpts_flags &= ~MPTSF_FASTJ_SEND; } - tot_sent += mlen; - off = 0; - mpt_mbuf = mpt_mbuf->m_next; - } - if (error != 0 && error != EWOULDBLOCK) { - mptcplog((LOG_ERR, "MPTCP ERROR %s: cid %d error %d\n", - __func__, mpts->mpts_connid, error)); - } if (error == 0) { - if ((mpts->mpts_connid == 2) || + if ((mpts->mpts_connid >= 2) || (mpts->mpts_flags & MPTSF_MP_DEGRADED)) - mptcplog((LOG_DEBUG, "%s: cid %d wrote %d %d\n", - __func__, mpts->mpts_connid, tot_sent, - sb_cc)); - MPT_LOCK(mp_tp); - mptcp_cancel_timer(mp_tp, MPTT_REXMT); - MPT_UNLOCK(mp_tp); + mptcplog2((LOG_DEBUG, "%s: cid %d wrote %d %d\n", + __func__, mpts->mpts_connid, (int)tot_sent, + (int) sb_cc)); + } else { + mptcplog((LOG_ERR, "MPTCP ERROR %s: cid %d error %d len %zd\n", + __func__, mpts->mpts_connid, error, tot_sent)); } out: return (error); @@ -1858,7 +1968,7 @@ mptcp_subflow_eupcall(struct socket *so, void *arg, uint32_t events) static ev_ret_t mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts) { - uint32_t events; + uint32_t events, save_events; ev_ret_t ret = MPTS_EVRET_OK; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ @@ -1876,12 +1986,19 @@ mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts) events |= SO_FILT_HINT_MPFAILOVER; } + save_events = events; + DTRACE_MPTCP3(subflow__events, struct mptses *, mpte, struct mptsub *, mpts, uint32_t, events); mptcplog2((LOG_DEBUG, "%s: cid %d events=%b\n", __func__, mpts->mpts_connid, events, SO_FILT_HINT_BITS)); + if ((events & SO_FILT_HINT_MPCANTRCVMORE) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_mpcantrcvmore_ev(mpte, mpts); + events &= ~SO_FILT_HINT_MPCANTRCVMORE; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } if ((events & SO_FILT_HINT_MPFAILOVER) && (ret >= MPTS_EVRET_OK)) { ev_ret_t error = mptcp_subflow_failover_ev(mpte, mpts); events &= ~SO_FILT_HINT_MPFAILOVER; @@ -1942,11 +2059,22 @@ mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts) events &= ~SO_FILT_HINT_MPSTATUS; ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); } + if ((events & SO_FILT_HINT_DELETEOK) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_deleteok_ev(mpte, mpts); + events &= ~SO_FILT_HINT_DELETEOK; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } if ((events & SO_FILT_HINT_DISCONNECTED) && (ret >= MPTS_EVRET_OK)) { ev_ret_t error = mptcp_subflow_disconnected_ev(mpte, mpts); events &= ~SO_FILT_HINT_DISCONNECTED; ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); } + if ((events & SO_FILT_HINT_MPFASTJ) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_fastjoin_ev(mpte, mpts); + events &= ~SO_FILT_HINT_MPFASTJ; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + /* * We should be getting only events specified via sock_catchevents(), * so loudly complain if we have any unprocessed one(s). @@ -1960,8 +2088,8 @@ mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts) } /* clear the ones we've processed */ - atomic_bitclear_32(&mpts->mpts_evctl, ~events); - + atomic_bitclear_32(&mpts->mpts_evctl, save_events); + return (ret); } @@ -1988,21 +2116,26 @@ mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts) mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__, mpts->mpts_connid, (linger ? "YES" : "NO"))); - if (mpts->mpts_soerror == 0) - mpts->mpts_soerror = ECONNREFUSED; - /* * We got a TCP RST for this subflow connection. * * Right now, we simply propagate ECONNREFUSED to the MPTCP socket - * client if the MPTCP connection has not been established. Otherwise - * we close the socket. + * client if the MPTCP connection has not been established or + * if the connection has only one subflow and is a connection being + * resumed. Otherwise we close the socket. */ mptcp_subflow_disconnect(mpte, mpts, !linger); MPT_LOCK(mp_tp); if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { - mp_so->so_error = ECONNREFUSED; + mpts->mpts_soerror = mp_so->so_error = ECONNREFUSED; + } else if (mpte->mpte_nummpcapflows < 1) { + mpts->mpts_soerror = mp_so->so_error = ECONNRESET; + MPT_UNLOCK(mp_tp); + MPTS_UNLOCK(mpts); + soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNRESET); + MPTS_LOCK(mpts); + MPT_LOCK(mp_tp); } MPT_UNLOCK(mp_tp); @@ -2169,6 +2302,42 @@ mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts) return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); } +/* + * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that + * indicates that the remote side sent a Data FIN + */ +static ev_ret_t +mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *so, *mp_so; + struct mptcb *mp_tp; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + mp_so = mpte->mpte_mppcb->mpp_socket; + so = mpts->mpts_socket; + mp_tp = mpte->mpte_mptcb; + + mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid)); + + /* + * We got a Data FIN for the MPTCP connection. + * The FIN may arrive with data. The data is handed up to the + * mptcp socket and the user is notified so that it may close + * the socket if needed. + */ + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state == MPTCPS_CLOSE_WAIT) { + MPT_UNLOCK(mp_tp); + MPTS_UNLOCK(mpts); + soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE); + MPTS_LOCK(mpts); + MPT_LOCK(mp_tp); + } + MPT_UNLOCK(mp_tp); + return (MPTS_EVRET_OK); /* keep the subflow socket around */ +} + /* * Handle SO_FILT_HINT_MPFAILOVER subflow socket event */ @@ -2195,6 +2364,15 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) */ if (mpts_alt == NULL) { mptcplog2((LOG_WARNING, "%s: no alternate path\n", __func__)); + if (mptcp_delayed_subf_start) { + mpts_alt = mptcp_get_pending_subflow(mpte, mpts); + if (mpts_alt != NULL) { + MPTS_LOCK(mpts_alt); + (void) mptcp_subflow_soconnectx(mpte, + mpts_alt); + MPTS_UNLOCK(mpts_alt); + } + } MPTS_LOCK(mpts); goto done; } @@ -2203,8 +2381,9 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) so = mpts_alt->mpts_socket; if (mpts_alt->mpts_flags & MPTSF_FAILINGOVER) { socket_lock(so, 1); - /* All data acknowledged */ - if (so->so_snd.sb_cc == 0) { + /* All data acknowledged and no RTT spike */ + if ((so->so_snd.sb_cc == 0) && + (mptcp_no_rto_spike(so))) { so->so_flags &= ~SOF_MP_TRYFAILOVER; mpts_alt->mpts_flags &= ~MPTSF_FAILINGOVER; } else { @@ -2214,6 +2393,8 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) socket_unlock(so, 1); } if (altpath_exists) { + mptcplog2((LOG_INFO, "%s: cid = %d\n", + __func__, mpts_alt->mpts_connid)); mpts_alt->mpts_flags |= MPTSF_ACTIVE; struct mptcb *mp_tp = mpte->mpte_mptcb; /* Bring the subflow's notion of snd_nxt into the send window */ @@ -2242,12 +2423,14 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_flags |= MPTSF_FAILINGOVER; mpts->mpts_flags &= ~MPTSF_ACTIVE; } else { + mptcplog2((LOG_INFO, "%s: no alt cid = %d\n", + __func__, mpts->mpts_connid)); +done: so = mpts->mpts_socket; socket_lock(so, 1); so->so_flags &= ~SOF_MP_TRYFAILOVER; socket_unlock(so, 1); } -done: MPTS_LOCK_ASSERT_HELD(mpts); return (MPTS_EVRET_OK); } @@ -2377,6 +2560,16 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) if ((mpts->mpts_flags & MPTSF_DISCONNECTED) || (mpts->mpts_flags & MPTSF_DISCONNECTING)) { + socket_lock(so, 0); + if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) && + (so->so_state & SS_ISCONNECTED)) { + mptcplog((LOG_DEBUG, "%s: cid %d disconnect before tcp connect\n", + __func__, mpts->mpts_connid)); + (void) soshutdownlock(so, SHUT_RD); + (void) soshutdownlock(so, SHUT_WR); + (void) sodisconnectlocked(so); + } + socket_unlock(so, 0); return (MPTS_EVRET_OK); } @@ -2567,6 +2760,11 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) } } else if (mpok) { MPT_UNLOCK(mp_tp); + if (mptcp_rwnotify && (mpte->mpte_nummpcapflows == 0)) { + /* Experimental code, disabled by default. */ + sorwakeup(mp_so); + sowwakeup(mp_so); + } /* * case (b) above * In case of additional flows, the MPTCP socket is not @@ -2576,12 +2774,23 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) */ MPTS_LOCK(mpts); mpts->mpts_flags |= MPTSF_MPCAP_CTRSET; + mpts->mpts_flags &= ~MPTSF_FASTJ_REQD; mpte->mpte_nummpcapflows++; - mpts->mpts_rel_seq = 1; + /* With Fastjoin, rel sequence will be nonzero */ + if (mpts->mpts_rel_seq == 0) + mpts->mpts_rel_seq = 1; MPT_LOCK_SPIN(mp_tp); - mpts->mpts_sndnxt = mp_tp->mpt_snduna; + /* With Fastjoin, sndnxt is updated before connected_ev */ + if (mpts->mpts_sndnxt == 0) { + mpts->mpts_sndnxt = mp_tp->mpt_snduna; + } MPT_UNLOCK(mp_tp); + mptcp_output_needed(mpte, mpts); + } else { + MPT_UNLOCK(mp_tp); + MPTS_LOCK(mpts); } + MPTS_LOCK_ASSERT_HELD(mpts); return (MPTS_EVRET_OK); /* keep the subflow socket around */ @@ -2615,7 +2824,7 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts) /* * Clear flags that are used by getconninfo to return state. - * Retain like MPTSF_DELETEOK, MPTSF_ACTIVE for internal purposes. + * Retain like MPTSF_DELETEOK for internal purposes. */ mpts->mpts_flags &= ~(MPTSF_CONNECTING|MPTSF_CONNECT_PENDING| MPTSF_CONNECTED|MPTSF_DISCONNECTING|MPTSF_PREFERRED| @@ -2635,6 +2844,11 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts) if (mpts->mpts_flags & MPTSF_MPCAP_CTRSET) { mpte->mpte_nummpcapflows--; + if (mpte->mpte_active_sub == mpts) { + mpte->mpte_active_sub = NULL; + mptcplog((LOG_DEBUG, "%s: resetting active subflow \n", + __func__)); + } mpts->mpts_flags &= ~MPTSF_MPCAP_CTRSET; } @@ -2717,7 +2931,6 @@ mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts) done: MPT_UNLOCK(mp_tp); socket_unlock(so, 0); - return (ret); } @@ -2745,8 +2958,6 @@ mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts) if (mpts->mpts_soerror == 0) mpts->mpts_soerror = ECONNABORTED; - so->so_error = ECONNABORTED; - /* We got an invalid option or a fast close */ socket_lock(so, 0); struct tcptemp *t_template; @@ -2754,22 +2965,22 @@ mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts) struct tcpcb *tp = NULL; tp = intotcpcb(inp); + so->so_error = ECONNABORTED; t_template = tcp_maketemplate(tp); if (t_template) { - unsigned int ifscope, nocell = 0; + struct tcp_respond_args tra; + bzero(&tra, sizeof(tra)); if (inp->inp_flags & INP_BOUND_IF) - ifscope = inp->inp_boundifp->if_index; + tra.ifscope = inp->inp_boundifp->if_index; else - ifscope = IFSCOPE_NONE; - - if (inp->inp_flags & INP_NO_IFT_CELLULAR) - nocell = 1; + tra.ifscope = IFSCOPE_NONE; + tra.awdl_unrestricted = 1; tcp_respond(tp, t_template->tt_ipgen, &t_template->tt_t, (struct mbuf *)NULL, - tp->rcv_nxt, tp->snd_una, TH_RST, ifscope, nocell); + tp->rcv_nxt, tp->snd_una, TH_RST, &tra); (void) m_free(dtom(t_template)); mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx cid %d \n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), @@ -2779,10 +2990,12 @@ mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts) mptcp_subflow_disconnect(mpte, mpts, !linger); MPTS_UNLOCK(mpts); - soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); + soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED | + SO_FILT_HINT_CONNRESET); MPT_LOCK(mp_tp); - if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + if ((mp_tp->mpt_state < MPTCPS_ESTABLISHED) || + (mp_tp->mpt_state == MPTCPS_FASTCLOSE_WAIT)) { mp_so->so_error = ECONNABORTED; } MPT_UNLOCK(mp_tp); @@ -2795,6 +3008,50 @@ mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts) return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); } +static ev_ret_t +mptcp_fastjoin_ev(struct mptses *mpte, struct mptsub *mpts) +{ + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + VERIFY(mpte->mpte_mppcb != NULL); + + if (mpte->mpte_nummpcapflows == 0) { + struct mptcb *mp_tp = mpte->mpte_mptcb; + mptcplog((LOG_DEBUG,"%s %llx %llx \n", + __func__, mp_tp->mpt_snduna, mpts->mpts_sndnxt)); + mpte->mpte_active_sub = mpts; + mpts->mpts_flags |= (MPTSF_FASTJ_SEND | MPTSF_ACTIVE); + MPT_LOCK(mp_tp); + /* + * If mptcp_subflow_output is called before fastjoin_ev + * then mpts->mpts_sndnxt is initialized to mp_tp->mpt_snduna + * and further mpts->mpts_sndnxt is incremented by len copied. + */ + if (mpts->mpts_sndnxt == 0) { + mpts->mpts_sndnxt = mp_tp->mpt_snduna; + mpts->mpts_rel_seq = 1; + } + MPT_UNLOCK(mp_tp); + } + + return (MPTS_EVRET_OK); +} + +static ev_ret_t +mptcp_deleteok_ev(struct mptses *mpte, struct mptsub *mpts) +{ + MPTE_LOCK_ASSERT_HELD(mpte); + MPTS_LOCK_ASSERT_HELD(mpts); + VERIFY(mpte->mpte_mppcb != NULL); + mptcplog((LOG_DEBUG, "%s cid %d\n", __func__, mpts->mpts_connid)); + + mpts->mpts_flags |= MPTSF_DELETEOK; + if (mpts->mpts_flags & MPTSF_DISCONNECTED) + return (MPTS_EVRET_DELETE); + else + return (MPTS_EVRET_OK); +} + static const char * mptcp_evret2str(ev_ret_t ret) { @@ -3088,7 +3345,7 @@ mptcp_drop(struct mptses *mpte, struct mptcb *mp_tp, int errno) VERIFY(mpte->mpte_mptcb == mp_tp); mp_so = mpte->mpte_mppcb->mpp_socket; - mp_tp->mpt_state = MPTCPS_CLOSED; + mp_tp->mpt_state = MPTCPS_TERMINATE; DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, uint32_t, 0 /* event */); @@ -3129,6 +3386,7 @@ mptcp_close(struct mptses *mpte, struct mptcb *mp_tp) /* Clean up all subflows */ TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { MPTS_LOCK(mpts); + mpts->mpts_flags |= MPTSF_USER_DISCONNECT; mptcp_subflow_disconnect(mpte, mpts, TRUE); MPTS_UNLOCK(mpts); mptcp_subflow_del(mpte, mpts, TRUE); @@ -3236,9 +3494,7 @@ mptcp_thread_dowork(struct mptses *mpte) /* nothing to do */ break; case MPTS_EVRET_DELETE: - if (mptcp_delete_ok(mpte, mpts)) { - mptcp_subflow_del(mpte, mpts, TRUE); - } + mptcp_subflow_del(mpte, mpts, TRUE); break; case MPTS_EVRET_CONNECT_PENDING: connect_pending = TRUE; @@ -3306,6 +3562,17 @@ mptcp_thread_dowork(struct mptses *mpte) socket_unlock(so, 1); } else if (connect_pending) { + /* + * If delayed subflow start is set and cellular, + * delay the connect till a retransmission timeout + */ + + if ((mptcp_delayed_subf_start) && + (IFNET_IS_CELLULAR(mpts->mpts_outif))) { + MPTS_UNLOCK(mpts); + continue; + } + /* * The MPTCP connection has progressed to a state * where it supports full multipath semantics; allow @@ -3652,7 +3919,7 @@ mptcp_key_pool_init(void) static void mptcp_attach_to_subf(struct socket *so, struct mptcb *mp_tp, - connid_t conn_id) + uint8_t addr_id) { struct tcpcb *tp = sototcpcb(so); struct mptcp_subf_auth_entry *sauth_entry; @@ -3660,19 +3927,17 @@ mptcp_attach_to_subf(struct socket *so, struct mptcb *mp_tp, MPT_LOCK_SPIN(mp_tp); tp->t_mptcb = mp_tp; - MPT_UNLOCK(mp_tp); /* - * As long as the mpts_connid is unique it can be used as the - * address ID for additional subflows. * The address ID of the first flow is implicitly 0. */ if (mp_tp->mpt_state == MPTCPS_CLOSED) { tp->t_local_aid = 0; } else { - tp->t_local_aid = conn_id; + tp->t_local_aid = addr_id; tp->t_mpflags |= (TMPF_PREESTABLISHED | TMPF_JOINED_FLOW); so->so_flags |= SOF_MP_SEC_SUBFLOW; } + MPT_UNLOCK(mp_tp); sauth_entry = zalloc(mpt_subauth_zone); sauth_entry->msae_laddr_id = tp->t_local_aid; sauth_entry->msae_raddr_id = 0; @@ -3681,18 +3946,24 @@ try_again: sauth_entry->msae_laddr_rand = RandomULong(); if (sauth_entry->msae_laddr_rand == 0) goto try_again; + MPT_LOCK_SPIN(mp_tp); LIST_INSERT_HEAD(&mp_tp->mpt_subauth_list, sauth_entry, msae_next); + MPT_UNLOCK(mp_tp); } static void mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so) { struct mptcp_subf_auth_entry *sauth_entry; - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = NULL; int found = 0; - if (tp == NULL) + socket_lock(so, 0); + tp = sototcpcb(so); + if (tp == NULL) { + socket_unlock(so, 0); return; + } MPT_LOCK(mp_tp); LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) { @@ -3705,8 +3976,10 @@ mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so) LIST_REMOVE(sauth_entry, msae_next); zfree(mpt_subauth_zone, sauth_entry); } - tp->t_mptcb = NULL; MPT_UNLOCK(mp_tp); + + tp->t_mptcb = NULL; + socket_unlock(so, 0); } void @@ -4176,8 +4449,9 @@ mptcp_output_getm_dsnmap64(struct socket *so, int off, uint32_t datalen, __func__)); } else { /* case B */ - mptcplog((LOG_INFO, "%s: discontig %d %d \n", - __func__, datalen, contig_len)); + mptcplog((LOG_INFO, + "%s: discontig datalen %d contig_len %d cc %d \n", + __func__, datalen, contig_len, so->so_snd.sb_cc)); break; } mnext = mnext->m_next; @@ -4237,7 +4511,7 @@ mptcp_insert_rmap(struct tcpcb *tp, struct mbuf *m) } } -void +int mptcp_adj_rmap(struct socket *so, struct mbuf *m) { u_int64_t dsn; @@ -4246,7 +4520,7 @@ mptcp_adj_rmap(struct socket *so, struct mbuf *m) u_int32_t old_rcvnxt = 0; if (m_pktlen(m) == 0) - return; + return 0; if (m->m_pkthdr.pkt_flags & PKTF_MPTCP) { VERIFY(m->m_flags & M_PKTHDR); @@ -4256,14 +4530,16 @@ mptcp_adj_rmap(struct socket *so, struct mbuf *m) datalen = m->m_pkthdr.mp_rlen; } else { /* data arrived without an DSS option mapping */ + + /* initial subflow can fallback right after SYN handshake */ mptcp_notify_mpfail(so); - return; + return 0; } /* In the common case, data is in window and in sequence */ if (m->m_pkthdr.len == (int)datalen) { mptcp_adj_rcvnxt(tp, m); - return; + return 0; } if (m->m_pkthdr.len > (int)datalen) { @@ -4277,19 +4553,21 @@ mptcp_adj_rmap(struct socket *so, struct mbuf *m) int off = old_rcvnxt - sseq; m->m_pkthdr.mp_dsn += off; m->m_pkthdr.mp_rseq += off; - m->m_pkthdr.mp_rlen -= off; + m->m_pkthdr.mp_rlen = m->m_pkthdr.len; } else if (old_rcvnxt == sseq) { /* * Data was trimmed from the right */ m->m_pkthdr.mp_rlen = m->m_pkthdr.len; } else { - /* XXX handle gracefully with reass or fallback in January */ - panic("%s: partial map %u %u", __func__, old_rcvnxt, sseq); - /* NOTREACHED */ + /* handle gracefully with reass or fallback */ + mptcp_notify_mpfail(so); + m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP; + m_freem(m); + return -1; } mptcp_adj_rcvnxt(tp, m); - + return 0; } /* @@ -4312,6 +4590,9 @@ mptcp_act_on_txfail(struct socket *so) if (tp->t_state != TCPS_ESTABLISHED) mptcplog((LOG_INFO, "%s: state = %d \n", __func__, tp->t_state)); + + mptcplog((LOG_INFO, "%s: Failover = %d \n", __func__, + (so->so_flags & SOF_MP_TRYFAILOVER) ? 1 : 0)); if (so->so_flags & SOF_MP_TRYFAILOVER) { return; @@ -4344,6 +4625,8 @@ mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq) (MPTCP_SEQ_GEQ(dsn + datalen, dsn_fail))) { off = dsn_fail - dsn; *tcp_seq = m->m_pkthdr.mp_rseq + off; + mptcplog((LOG_INFO, "%s: %llu %llu \n", + __func__, dsn, dsn_fail)); return (0); } @@ -4361,6 +4644,7 @@ mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq) /* * Support for sending contiguous MPTCP bytes in subflow + * Also for preventing sending data with ACK in 3-way handshake */ int32_t mptcp_adj_sendlen(struct socket *so, int32_t off, int32_t len) @@ -4375,6 +4659,23 @@ mptcp_adj_sendlen(struct socket *so, int32_t off, int32_t len) mptcp_output_getm_dsnmap64(so, off, (u_int32_t)len, &mdss_dsn, &mdss_subflow_seq, &mdss_data_len); + /* + * Special case handling for Fast Join. We want to send data right + * after ACK of the 3-way handshake, but not piggyback the data + * with the 3rd ACK of the 3WHS. TMPF_FASTJOINBY2_SEND and + * mdss_data_len control this. + */ + struct tcpcb *tp = NULL; + tp = intotcpcb(sotoinpcb(so)); + if ((tp->t_mpflags & TMPF_JOINED_FLOW) && + (tp->t_mpflags & TMPF_PREESTABLISHED) && + (!(tp->t_mpflags & TMPF_RECVD_JOIN)) && + (tp->t_mpflags & TMPF_SENT_JOIN) && + (!(tp->t_mpflags & TMPF_MPTCP_TRUE)) && + (!(tp->t_mpflags & TMPF_FASTJOINBY2_SEND))) { + mdss_data_len = 0; + tp->t_mpflags |= TMPF_FASTJOINBY2_SEND; + } return (mdss_data_len); } @@ -4583,7 +4884,7 @@ mptcp_pcblist SYSCTL_HANDLER_ARGS struct mptsub *mpts; struct socket *so; conninfo_mptcp_t mptcpci; - mptcp_flow_t *flows; + mptcp_flow_t *flows = NULL; if (req->newptr != USER_ADDR_NULL) return (EPERM); @@ -4597,6 +4898,7 @@ mptcp_pcblist SYSCTL_HANDLER_ARGS return (0); } TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) { + flows = NULL; bzero(&mptcpci, sizeof(mptcpci)); lck_mtx_lock(&mpp->mpp_lock); VERIFY(mpp->mpp_flags & MPP_ATTACHED); @@ -4604,19 +4906,25 @@ mptcp_pcblist SYSCTL_HANDLER_ARGS VERIFY(mpte != NULL); mp_tp = mpte->mpte_mptcb; VERIFY(mp_tp != NULL); - len = sizeof(*flows) * mpte->mpte_numflows; - flows = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO); - if (flows == NULL) { - lck_mtx_unlock(&mpp->mpp_lock); - break; - } /* N.B. we don't take the mpt_lock just for the state. */ mptcpci.mptcpci_state = mp_tp->mpt_state; mptcpci.mptcpci_nflows = mpte->mpte_numflows; - mptcpci.mptcpci_len = sizeof(mptcpci) + - sizeof(*flows) * (mptcpci.mptcpci_nflows - 1); - error = SYSCTL_OUT(req, &mptcpci, - sizeof(mptcpci) - sizeof(*flows)); + len = sizeof(*flows) * mpte->mpte_numflows; + if (mpte->mpte_numflows != 0) { + flows = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO); + if (flows == NULL) { + lck_mtx_unlock(&mpp->mpp_lock); + break; + } + mptcpci.mptcpci_len = sizeof(mptcpci) + + sizeof(*flows) * (mptcpci.mptcpci_nflows - 1); + error = SYSCTL_OUT(req, &mptcpci, + sizeof(mptcpci) - sizeof(mptcp_flow_t)); + } else { + mptcpci.mptcpci_len = sizeof(mptcpci); + error = SYSCTL_OUT(req, &mptcpci, + sizeof(mptcpci)); + } if (error) { lck_mtx_unlock(&mpp->mpp_lock); FREE(flows, M_TEMP); @@ -4633,10 +4941,12 @@ mptcp_pcblist SYSCTL_HANDLER_ARGS f++; } lck_mtx_unlock(&mpp->mpp_lock); - error = SYSCTL_OUT(req, flows, len); - FREE(flows, M_TEMP); - if (error) - break; + if (flows) { + error = SYSCTL_OUT(req, flows, len); + FREE(flows, M_TEMP); + if (error) + break; + } } lck_mtx_unlock(&mtcbinfo.mppi_lock); @@ -4646,3 +4956,165 @@ mptcp_pcblist SYSCTL_HANDLER_ARGS SYSCTL_PROC(_net_inet_mptcp, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, mptcp_pcblist, "S,conninfo_mptcp_t", "List of active MPTCP connections"); + +/* + * Check the health of the other subflows and do an mptcp_output if + * there is no other active or functional subflow at the time of + * call of this function. + */ +static void +mptcp_output_needed(struct mptses *mpte, struct mptsub *to_mpts) +{ + struct mptsub *from_mpts = NULL; + + MPTE_LOCK_ASSERT_HELD(mpte); + + MPTS_UNLOCK(to_mpts); + + from_mpts = mpte->mpte_active_sub; + + if (from_mpts == NULL) + goto output_needed; + + MPTS_LOCK(from_mpts); + + if ((from_mpts->mpts_flags & MPTSF_DISCONNECTED) || + (from_mpts->mpts_flags & MPTSF_DISCONNECTING)) { + MPTS_UNLOCK(from_mpts); + goto output_needed; + } + + MPTS_UNLOCK(from_mpts); + MPTS_LOCK(to_mpts); + return; + +output_needed: + mptcp_output(mpte); + MPTS_LOCK(to_mpts); +} + + +/* + * When WiFi signal starts fading, there's more loss and RTT spikes. + * Check if there has been a large spike by comparing against + * a tolerable RTT spike threshold. + */ +boolean_t +mptcp_no_rto_spike(struct socket *so) +{ + struct tcpcb *tp = intotcpcb(sotoinpcb(so)); + int32_t spike = 0; + + if (tp->t_rxtcur > mptcp_rto_spike_thresh) { + spike = tp->t_rxtcur - mptcp_rto_spike_thresh; + + mptcplog2((LOG_INFO, "%s: spike = %d rto = %d", + "best = %d cur = %d\n", __func__, spike, + tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT, + tp->t_rttcur)); + + } + + if (spike > 0 ) { + return (FALSE); + } else { + return (TRUE); + } +} + +/* + * Set notsent lowat mark on the MPTCB + */ +int +mptcp_set_notsent_lowat(struct mptses *mpte, int optval) +{ + struct mptcb *mp_tp = NULL; + int error = 0; + + if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED) + mp_tp = mpte->mpte_mptcb; + + if (mp_tp) + mp_tp->mpt_notsent_lowat = optval; + else + error = EINVAL; + + return error; +} + +u_int32_t +mptcp_get_notsent_lowat(struct mptses *mpte) +{ + struct mptcb *mp_tp = NULL; + + if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED) + mp_tp = mpte->mpte_mptcb; + + if (mp_tp) + return mp_tp->mpt_notsent_lowat; + else + return 0; +} + +int +mptcp_notsent_lowat_check(struct socket *so) { + struct mptses *mpte; + struct mppcb *mpp; + struct mptcb *mp_tp; + struct mptsub *mpts; + + int notsent = 0; + + mpp = sotomppcb(so); + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + return (0); + } + + mpte = mptompte(mpp); + mp_tp = mpte->mpte_mptcb; + + MPT_LOCK(mp_tp); + notsent = so->so_snd.sb_cc; + + if ((notsent == 0) || + ((notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)) <= + mp_tp->mpt_notsent_lowat)) { + mptcplog3((LOG_INFO, "%s: lowat %d notsent %d actual %d \n", + __func__, mp_tp->mpt_notsent_lowat, notsent, + notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna))); + MPT_UNLOCK(mp_tp); + return (1); + } + MPT_UNLOCK(mp_tp); + + /* When Nagle's algorithm is not disabled, it is better + * to wakeup the client even before there is atleast one + * maxseg of data to write. + */ + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + int retval = 0; + MPTS_LOCK(mpts); + if (mpts->mpts_flags & MPTSF_ACTIVE) { + struct socket *subf_so = mpts->mpts_socket; + socket_lock(subf_so, 0); + struct tcpcb *tp = intotcpcb(sotoinpcb(subf_so)); + + notsent = so->so_snd.sb_cc - + (tp->snd_nxt - tp->snd_una); + + if ((tp->t_flags & TF_NODELAY) == 0 && + notsent > 0 && (notsent <= (int)tp->t_maxseg)) { + retval = 1; + } + mptcplog3((LOG_INFO, "%s: lowat %d notsent %d" + " nodelay false \n", + __func__, mp_tp->mpt_notsent_lowat, notsent)); + socket_unlock(subf_so, 0); + MPTS_UNLOCK(mpts); + return (retval); + } + MPTS_UNLOCK(mpts); + } + return (0); +} + diff --git a/bsd/netinet/mptcp_usrreq.c b/bsd/netinet/mptcp_usrreq.c index d4ea19cd1..268c7284f 100644 --- a/bsd/netinet/mptcp_usrreq.c +++ b/bsd/netinet/mptcp_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -70,6 +70,7 @@ static int mptcp_getconninfo(struct mptses *, connid_t *, uint32_t *, static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *, struct proc *); static int mptcp_disconnectx(struct mptses *, associd_t, connid_t); +static int mptcp_usr_disconnect(struct socket *); static int mptcp_usr_disconnectx(struct socket *, associd_t, connid_t); static struct mptses *mptcp_usrclosed(struct mptses *); static int mptcp_usr_peeloff(struct socket *, associd_t, struct socket **); @@ -93,6 +94,7 @@ struct pr_usrreqs mptcp_usrreqs = { .pru_connectx = mptcp_usr_connectx, .pru_control = mptcp_usr_control, .pru_detach = mptcp_usr_detach, + .pru_disconnect = mptcp_usr_disconnect, .pru_disconnectx = mptcp_usr_disconnectx, .pru_peeloff = mptcp_usr_peeloff, .pru_rcvd = mptcp_usr_rcvd, @@ -264,6 +266,7 @@ mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl, mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + DTRACE_MPTCP3(connectx, struct mptses *, mpte, associd_t, aid, struct socket *, mp_so); @@ -481,6 +484,8 @@ mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags, goto out; } } + mptcplog2((LOG_INFO, "%s: cid %d flags %x \n", + __func__, mpts->mpts_connid, mpts->mpts_flags)); out: MPTS_UNLOCK(mpts); return (error); @@ -551,7 +556,7 @@ mptcp_setconnorder(struct mptses *mpte, connid_t cid, uint32_t rank) if (mpts1 != mpts && (mpts1->mpts_flags & MPTSF_PREFERRED)) { mpts1->mpts_flags &= ~MPTSF_PREFERRED; - if (mpte->mpte_nummpcapflows > 1) + if (mpte->mpte_nummpcapflows > 1) mptcp_connorder_helper(mpts1); } else if (mpts1 == mpts) { mpts1->mpts_rank = 1; @@ -755,8 +760,8 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) mp_so = mpte->mpte_mppcb->mpp_socket; mp_tp = mpte->mpte_mptcb; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx aid %d cid %d\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid)); + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error)); DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, associd_t, aid, connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp); @@ -798,6 +803,7 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) if (mpts->mpts_connid != cid) continue; MPTS_LOCK(mpts); + mpts->mpts_flags |= MPTSF_USER_DISCONNECT; mptcp_subflow_disconnect(mpte, mpts, FALSE); MPTS_UNLOCK(mpts); break; @@ -822,6 +828,18 @@ out: return (error); } +/* + * Wrapper function to support disconnect on socket + */ +static int +mptcp_usr_disconnect(struct socket *mp_so) +{ + int error = 0; + + error = mptcp_usr_disconnectx(mp_so, ASSOCID_ALL, CONNID_ALL); + return (error); +} + /* * User-protocol pru_disconnectx callback. */ @@ -868,31 +886,27 @@ mptcp_usrclosed(struct mptses *mpte) MPT_LOCK(mp_tp); mptcp_close_fsm(mp_tp, MPCE_CLOSE); - if (mp_tp->mpt_state == TCPS_CLOSED) { + if (mp_tp->mpt_state == MPTCPS_CLOSED) { mpte = mptcp_close(mpte, mp_tp); MPT_UNLOCK(mp_tp); } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) { MPT_UNLOCK(mp_tp); soisdisconnected(mp_so); + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + MPTS_LOCK(mpts); + mpts->mpts_flags |= MPTSF_USER_DISCONNECT; + MPTS_UNLOCK(mpts); + } } else { - mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ MPT_UNLOCK(mp_tp); TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { MPTS_LOCK(mpts); + mpts->mpts_flags |= MPTSF_USER_DISCONNECT; mptcp_subflow_disconnect(mpte, mpts, FALSE); MPTS_UNLOCK(mpts); } } - /* - * XXX: adi@apple.com - * - * Do we need to handle time wait specially here? We need to handle - * the case where MPTCP has been established, but we have not usable - * subflow to use. Do we want to wait a while before forcibly - * tearing this MPTCP down, in case we have one or more subflows - * that are flow controlled? - */ return (mpte); } @@ -1398,6 +1412,8 @@ mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt) case SO_RECV_ANYIF: /* MP + subflow */ case SO_RESTRICTIONS: /* MP + subflow */ case SO_FLUSH: /* MP + subflow */ + case SO_MPTCP_FASTJOIN: /* MP + subflow */ + case SO_NOWAKEFROMSLEEP: /* * Tell the caller that these options are to be processed; * these will also be recorded later by mptcp_setopt(). @@ -1574,6 +1590,8 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) case SO_PRIVILEGED_TRAFFIC_CLASS: case SO_RECV_ANYIF: case SO_RESTRICTIONS: + case SO_NOWAKEFROMSLEEP: + case SO_MPTCP_FASTJOIN: /* record it */ break; case SO_FLUSH: @@ -1596,6 +1614,26 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) case PERSIST_TIMEOUT: /* eligible; record it */ break; + case TCP_NOTSENT_LOWAT: + /* record at MPTCP level */ + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + goto out; + if (optval < 0) { + error = EINVAL; + goto out; + } else { + if (optval == 0) { + mp_so->so_flags &= ~SOF_NOTSENT_LOWAT; + error = mptcp_set_notsent_lowat(mpte,0); + } else { + mp_so->so_flags |= SOF_NOTSENT_LOWAT; + error = mptcp_set_notsent_lowat(mpte, + optval); + } + } + goto out; default: /* not eligible */ error = ENOPROTOOPT; @@ -1701,6 +1739,7 @@ mptcp_getopt(struct mptses *mpte, struct sockopt *sopt) case TCP_CONNECTIONTIMEOUT: case TCP_RXT_CONNDROPTIME: case PERSIST_TIMEOUT: + case TCP_NOTSENT_LOWAT: /* eligible; get the default value just in case */ error = mptcp_default_tcp_optval(mpte, sopt, &optval); break; @@ -1710,6 +1749,15 @@ mptcp_getopt(struct mptses *mpte, struct sockopt *sopt) break; } + switch (sopt->sopt_name) { + case TCP_NOTSENT_LOWAT: + if (mpte->mpte_mppcb->mpp_socket->so_flags & SOF_NOTSENT_LOWAT) + optval = mptcp_get_notsent_lowat(mpte); + else + optval = 0; + goto out; + } + /* * Search for a previously-issued TCP level socket option and * return the recorded option value. This assumes that the @@ -1752,6 +1800,7 @@ mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval) case TCP_KEEPCNT: case TCP_CONNECTIONTIMEOUT: case TCP_RXT_CONNDROPTIME: + case TCP_NOTSENT_LOWAT: *optval = 0; break; @@ -1922,6 +1971,12 @@ mptcp_sopt2str(int level, int optname, char *dst, int size) case SO_RECV_ANYIF: o = "SO_RECV_ANYIF"; break; + case SO_NOWAKEFROMSLEEP: + o = "SO_NOWAKEFROMSLEEP"; + break; + case SO_MPTCP_FASTJOIN: + o = "SO_MPTCP_FASTJOIN"; + break; } break; case IPPROTO_TCP: diff --git a/bsd/netinet/mptcp_var.h b/bsd/netinet/mptcp_var.h index c2ac1c018..474477b26 100644 --- a/bsd/netinet/mptcp_var.h +++ b/bsd/netinet/mptcp_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -65,8 +65,9 @@ struct mptses { uint32_t mpte_thread_active; /* thread is running */ uint32_t mpte_thread_reqs; /* # of requests for thread */ struct mptsub *mpte_active_sub; /* ptr to last active subf */ - u_int8_t mpte_flags; /* per mptcp session flags */ - u_int8_t mpte_lost_aid; /* storing lost address id */ + uint8_t mpte_flags; /* per mptcp session flags */ + uint8_t mpte_lost_aid; /* storing lost address id */ + uint8_t mpte_addrid_last; /* storing address id parm */ }; /* @@ -207,12 +208,16 @@ struct mptsub { #define MPTSF_FAILINGOVER 0x20000 /* subflow not used for output */ #define MPTSF_ACTIVE 0x40000 /* subflow currently in use */ #define MPTSF_MPCAP_CTRSET 0x80000 /* mpcap counter */ +#define MPTSF_FASTJ_SEND 0x100000 /* send data after SYN in MP_JOIN */ +#define MPTSF_FASTJ_REQD 0x200000 /* fastjoin required */ +#define MPTSF_USER_DISCONNECT 0x400000 /* User triggered disconnect */ #define MPTSF_BITS \ "\020\1ATTACHED\2CONNECTING\3PENDING\4CONNECTED\5DISCONNECTING" \ "\6DISCONNECTED\7MP_CAPABLE\10MP_READY\11MP_DEGRADED\12SUSPENDED" \ "\13BOUND_IF\14BOUND_IP\15BOUND_PORT\16PREFERRED\17SOPT_OLDVAL" \ - "\20SOPT_INPROG\21NOLINGER\22FAILINGOVER\23ACTIVE\24MPCAP_CTRSET" + "\20SOPT_INPROG\21NOLINGER\22FAILINGOVER\23ACTIVE\24MPCAP_CTRSET" \ + "\25FASTJ_SEND\26FASTJ_REQD\27USER_DISCONNECT" #define MPTS_LOCK_ASSERT_HELD(_mpts) \ lck_mtx_assert(&(_mpts)->mpts_lock, LCK_MTX_ASSERT_OWNED) @@ -223,14 +228,6 @@ struct mptsub { #define MPTS_LOCK(_mpts) \ lck_mtx_lock(&(_mpts)->mpts_lock) -#define MPTS_LOCK_SPIN(_mpts) \ - lck_mtx_lock_spin(&(_mpts)->mpts_lock) - -#define MPTS_CONVERT_LOCK(_mpts) do { \ - MPTS_LOCK_ASSERT_HELD(_mpts); \ - lck_mtx_convert_spin(&(_mpts)->mpts_lock); \ -} while (0) - #define MPTS_UNLOCK(_mpts) \ lck_mtx_unlock(&(_mpts)->mpts_lock) @@ -258,6 +255,7 @@ typedef enum mptcp_state { MPTCPS_FIN_WAIT_2 = 7, /* have closed, DFIN is acked */ MPTCPS_TIME_WAIT = 8, /* in 2*MSL quiet wait after close */ MPTCPS_FASTCLOSE_WAIT = 9, /* sent MP_FASTCLOSE */ + MPTCPS_TERMINATE = 10, /* terminal state */ } mptcp_state_t; typedef u_int64_t mptcp_key_t; @@ -325,11 +323,14 @@ struct mptcb { * Fastclose */ u_int64_t mpt_dsn_at_csum_fail; /* MPFail Opt DSN */ + u_int32_t mpt_ssn_at_csum_fail; /* MPFail Subflow Seq */ /* * Zombie handling */ #define MPT_GC_TICKS (60) int32_t mpt_gc_ticks; /* Used for zombie deletion */ + + u_int32_t mpt_notsent_lowat; /* TCP_NOTSENT_LOWAT support */ }; /* valid values for mpt_flags (see also notes on mpts_flags above) */ @@ -341,10 +342,11 @@ struct mptcb { #define MPTCPF_SND_64BITDSN 0x20 /* Send full 64-bit DSN */ #define MPTCPF_SND_64BITACK 0x40 /* Send 64-bit ACK response */ #define MPTCPF_RCVD_64BITACK 0x80 /* Received 64-bit Data ACK */ +#define MPTCPF_POST_FALLBACK_SYNC 0x100 /* Post fallback resend data */ #define MPTCPF_BITS \ "\020\1CHECKSUM\2FALLBACK_TO_TCP\3JOIN_READY\4RECVD_MPFAIL\5PEEL_OFF" \ - "\6SND_64BITDSN\7SND_64BITACK\10RCVD_64BITACK" + "\6SND_64BITDSN\7SND_64BITACK\10RCVD_64BITACK\11POST_FALLBACK_SYNC" /* valid values for mpt_timer_vals */ #define MPTT_REXMT 0x01 /* Starting Retransmit Timer */ @@ -501,9 +503,13 @@ extern int mptcp_fail_thresh; /* Multipath failover thresh of retransmits */ extern int mptcp_subflow_keeptime; /* Multipath subflow TCP_KEEPALIVE opt */ extern int mptcp_mpprio_enable; /* MP_PRIO option enable/disable */ extern int mptcp_remaddr_enable;/* REMOVE_ADDR option enable/disable */ +extern int mptcp_fastjoin; /* Enable FastJoin */ +extern int mptcp_zerortt_fastjoin; /* Enable Data after SYN Fast Join */ +extern int mptcp_rwnotify; /* Enable RW notification on resume */ extern uint32_t mptcp_verbose; /* verbose and mptcp_dbg must be unified */ #define MPPCB_LIMIT 16 extern uint32_t mptcp_socket_limit; /* max number of mptcp sockets allowed */ +extern uint32_t mptcp_delayed_subf_start; /* delayed cellular subflow start */ extern int tcp_jack_rxmt; /* Join ACK retransmission value in msecs */ __BEGIN_DECLS @@ -575,12 +581,19 @@ extern void mptcp_output_getm_dsnmap64(struct socket *, int, uint32_t, extern void mptcp_send_dfin(struct socket *); extern void mptcp_act_on_txfail(struct socket *); extern struct mptsub *mptcp_get_subflow(struct mptses *, struct mptsub *); +extern struct mptsub *mptcp_get_pending_subflow(struct mptses *, + struct mptsub *); extern int mptcp_get_map_for_dsn(struct socket *, u_int64_t, u_int32_t *); extern int32_t mptcp_adj_sendlen(struct socket *so, int32_t off, int32_t len); extern int32_t mptcp_sbspace(struct mptcb *); extern void mptcp_notify_mpready(struct socket *); extern void mptcp_notify_mpfail(struct socket *); extern void mptcp_notify_close(struct socket *); +extern boolean_t mptcp_no_rto_spike(struct socket*); +extern int mptcp_set_notsent_lowat(struct mptses *mpte, int optval); +extern u_int32_t mptcp_get_notsent_lowat(struct mptses *mpte); +extern int mptcp_notsent_lowat_check(struct socket *so); + __END_DECLS #endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/netinet/raw_ip.c b/bsd/netinet/raw_ip.c index 89d1a5f3a..173b506a6 100644 --- a/bsd/netinet/raw_ip.c +++ b/bsd/netinet/raw_ip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -93,7 +93,6 @@ #include #include #include -#include #if INET6 #include @@ -120,10 +119,6 @@ int rip_disconnect(struct socket *); int rip_bind(struct socket *, struct sockaddr *, struct proc *); int rip_connect(struct socket *, struct sockaddr *, struct proc *); int rip_shutdown(struct socket *); - -#if IPSEC -extern int ipsec_bypass; -#endif struct inpcbhead ripcb; struct inpcbinfo ripcbinfo; @@ -231,29 +226,20 @@ rip_input(m, iphlen) if (inp->inp_faddr.s_addr && inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; - - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; - if (last) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); skipit = 0; -#if IPSEC - /* check AH/ESP integrity. */ - if (ipsec_bypass == 0 && n) { - if (ipsec4_in_reject_so(n, last->inp_socket)) { - m_freem(n); - IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); - /* do not inject data to pcb */ - skipit = 1; - } - } -#endif /*IPSEC*/ + +#if NECP + if (n && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, &ip->ip_dst, &ip->ip_src, ifp, NULL)) { + m_freem(n); + /* do not inject data to pcb */ + skipit = 1; + } +#endif /* NECP */ #if CONFIG_MACF_NET if (n && skipit == 0) { if (mac_inpcb_check_deliver(last, n, AF_INET, @@ -299,18 +285,14 @@ rip_input(m, iphlen) } skipit = 0; -#if IPSEC - /* check AH/ESP integrity. */ - if (ipsec_bypass == 0 && last) { - if (ipsec4_in_reject_so(m, last->inp_socket)) { - m_freem(m); - IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); - OSAddAtomic(1, &ipstat.ips_delivered); - /* do not inject data to pcb */ - skipit = 1; - } - } -#endif /*IPSEC*/ +#if NECP + if (last && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, &ip->ip_dst, &ip->ip_src, ifp, NULL)) { + m_freem(m); + OSAddAtomic(1, &ipstat.ips_delivered); + /* do not inject data to pcb */ + skipit = 1; + } +#endif /* NECP */ #if CONFIG_MACF_NET if (last && skipit == 0) { if (mac_inpcb_check_deliver(last, m, AF_INET, SOCK_RAW) != 0) { @@ -384,7 +366,11 @@ rip_output( control = NULL; } - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { if (m != NULL) m_freem(m); VERIFY(control == NULL); @@ -397,8 +383,12 @@ rip_output( ipoa.ipoa_boundif = inp->inp_boundifp->if_index; ipoa.ipoa_flags |= IPOAF_BOUND_IF; } - if (inp->inp_flags & INP_NO_IFT_CELLULAR) + if (INP_NO_CELLULAR(inp)) ipoa.ipoa_flags |= IPOAF_NO_CELLULAR; + if (INP_NO_EXPENSIVE(inp)) + ipoa.ipoa_flags |= IPOAF_NO_EXPENSIVE; + if (INP_AWDL_UNRESTRICTED(inp)) + ipoa.ipoa_flags |= IPOAF_AWDL_UNRESTRICTED; if (inp->inp_flowhash == 0) inp->inp_flowhash = inp_calc_flowhash(inp); @@ -447,9 +437,21 @@ rip_output( if (inp->inp_laddr.s_addr != INADDR_ANY) ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR; + +#if NECP + { + necp_kernel_policy_id policy_id; + if (!necp_socket_is_allowed_to_send_recv_v4(inp, 0, 0, &ip->ip_src, &ip->ip_dst, NULL, &policy_id)) { + m_freem(m); + return(EHOSTUNREACH); + } + necp_mark_packet_from_socket(m, inp, policy_id); + } +#endif /* NECP */ + #if IPSEC - if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) { + if (inp->inp_sp != NULL && ipsec_setsocket(m, so) != 0) { m_freem(m); return ENOBUFS; } @@ -511,11 +513,11 @@ rip_output( } /* - * If output interface was cellular, and this socket is denied - * access to it, generate an event. + * If output interface was cellular/expensive, and this socket is + * denied access to it, generate an event. */ if (error != 0 && (ipoa.ipoa_retflags & IPOARF_IFDENIED) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + (INP_NO_CELLULAR(inp) || INP_NO_EXPENSIVE(inp))) soevent(so, (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED)); return (error); @@ -595,19 +597,6 @@ rip_ctloutput(so, sopt) break ; #endif /* DUMMYNET */ -#if MROUTING - case MRT_INIT: - case MRT_DONE: - case MRT_ADD_VIF: - case MRT_DEL_VIF: - case MRT_ADD_MFC: - case MRT_DEL_MFC: - case MRT_VERSION: - case MRT_ASSERT: - error = ip_mrouter_get(so, sopt); - break; -#endif /* MROUTING */ - default: error = ip_ctloutput(so, sopt); break; @@ -671,36 +660,6 @@ rip_ctloutput(so, sopt) break ; #endif -#if MROUTING - case IP_RSVP_ON: - error = ip_rsvp_init(so); - break; - - case IP_RSVP_OFF: - error = ip_rsvp_done(); - break; - - /* XXX - should be combined */ - case IP_RSVP_VIF_ON: - error = ip_rsvp_vif_init(so, sopt); - break; - - case IP_RSVP_VIF_OFF: - error = ip_rsvp_vif_done(so, sopt); - break; - - case MRT_INIT: - case MRT_DONE: - case MRT_ADD_VIF: - case MRT_DEL_VIF: - case MRT_ADD_MFC: - case MRT_DEL_MFC: - case MRT_VERSION: - case MRT_ASSERT: - error = ip_mrouter_set(so, sopt); - break; -#endif /* MROUTING */ - case SO_FLUSH: if ((error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval))) != 0) @@ -854,13 +813,6 @@ rip_detach(struct socket *so) inp = sotoinpcb(so); if (inp == 0) panic("rip_detach"); -#if MROUTING - if (so == ip_mrouter) - ip_mrouter_done(); - ip_rsvp_force_done(so); - if (so == ip_rsvpd) - ip_rsvp_done(); -#endif /* MROUTING */ in_pcbdetach(inp); return 0; } @@ -889,7 +841,11 @@ rip_bind(struct socket *so, struct sockaddr *nam, struct proc *p) struct ifaddr *ifa = NULL; struct ifnet *outif = NULL; - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); if (nam->sa_len != sizeof (struct sockaddr_in)) @@ -930,7 +886,11 @@ rip_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) struct inpcb *inp = sotoinpcb(so); struct sockaddr_in *addr = (struct sockaddr_in *)(void *)nam; - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); if (nam->sa_len != sizeof(*addr)) return EINVAL; @@ -961,8 +921,15 @@ rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, u_int32_t dst; int error = 0; - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { - error = (inp == NULL ? EINVAL : EPROTOTYPE); + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp) && (error = EPROTOTYPE)) +#endif /* NECP */ + ) { + if (inp == NULL) + error = EINVAL; + else + error = EPROTOTYPE; goto bad; } @@ -1135,7 +1102,8 @@ rip_pcblist SYSCTL_HANDLER_ARGS return error; } -SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, +SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); @@ -1237,7 +1205,8 @@ rip_pcblist64 SYSCTL_HANDLER_ARGS return error; } -SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, +SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, rip_pcblist64, "S,xinpcb64", "List of active raw IP sockets"); @@ -1253,7 +1222,8 @@ rip_pcblist_n SYSCTL_HANDLER_ARGS return error; } -SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, +SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist_n, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, rip_pcblist_n, "S,xinpcb_n", "List of active raw IP sockets"); struct pr_usrreqs rip_usrreqs = { diff --git a/bsd/netinet/tcp.h b/bsd/netinet/tcp.h index da955548f..c4ecae9d8 100644 --- a/bsd/netinet/tcp.h +++ b/bsd/netinet/tcp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -144,7 +144,14 @@ struct tcphdr { #define TCPOPT_SACK_HDR (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK<<8) /* Miscellaneous constants */ #define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at sender side */ -#define TCP_MAX_SACK 3 /* MAX # SACKs sent in any segment */ + +/* + * A SACK option that specifies n blocks will have a length of (8*n + 2) + * bytes, so the 40 bytes available for TCP options can specify a + * maximum of 4 blocks. + */ + +#define TCP_MAX_SACK 4 /* MAX # SACKs sent in any segment */ /* @@ -209,10 +216,17 @@ struct tcphdr { #define TCP_KEEPINTVL 0x101 /* interval between keepalives */ #define TCP_KEEPCNT 0x102 /* number of keepalives before close */ #define TCP_SENDMOREACKS 0x103 /* always ack every other packet */ +#define TCP_ENABLE_ECN 0x104 /* Enable ECN on a connection */ + #ifdef PRIVATE #define TCP_INFO 0x200 /* retrieve tcp_info structure */ -#define TCP_NOTSENT_LOWAT 0x201 /* Low water mark for TCP unsent data */ #define TCP_MEASURE_SND_BW 0x202 /* Measure sender's bandwidth for this connection */ +#endif /* PRIVATE */ + + +#define TCP_NOTSENT_LOWAT 0x201 /* Low water mark for TCP unsent data */ + +#ifdef PRIVATE #define TCP_MEASURE_BW_BURST 0x203 /* Burst size to use for bandwidth measurement */ #define TCP_PEER_PID 0x204 /* Lookup pid of the process we're connected to */ #define TCP_ADAPTIVE_READ_TIMEOUT 0x205 /* Read timeout used as a multiple of RTT */ @@ -222,6 +236,8 @@ struct tcphdr { */ #define TCP_ENABLE_MSGS 0x206 #define TCP_ADAPTIVE_WRITE_TIMEOUT 0x207 /* Write timeout used as a multiple of RTT */ +#define TCP_NOTIMEWAIT 0x208 /* Avoid going into time-wait */ +#define TCP_DISABLE_BLACKHOLE_DETECTION 0x209 /* disable PMTU blackhole detection */ /* * The TCP_INFO socket option is a private API and is subject to change @@ -294,6 +310,10 @@ struct tcp_info { u_int64_t tcpi_wifi_rxbytes __attribute((aligned(8))); /* bytes received over Wi-Fi */ u_int64_t tcpi_wifi_txpackets __attribute((aligned(8))); /* packets transmitted over Wi-Fi */ u_int64_t tcpi_wifi_txbytes __attribute((aligned(8))); /* bytes transmitted over Wi-Fi */ + u_int64_t tcpi_wired_rxpackets __attribute((aligned(8))); /* packets received over Wired */ + u_int64_t tcpi_wired_rxbytes __attribute((aligned(8))); /* bytes received over Wired */ + u_int64_t tcpi_wired_txpackets __attribute((aligned(8))); /* packets transmitted over Wired */ + u_int64_t tcpi_wired_txbytes __attribute((aligned(8))); /* bytes transmitted over Wired */ }; struct tcp_measure_bw_burst { diff --git a/bsd/netinet/tcp_cc.c b/bsd/netinet/tcp_cc.c new file mode 100644 index 000000000..fe20ea9a9 --- /dev/null +++ b/bsd/netinet/tcp_cc.c @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +struct tcp_cc_debug_state { + u_int64_t ccd_tsns; + char ccd_srcaddr[INET6_ADDRSTRLEN]; + uint16_t ccd_srcport; + char ccd_destaddr[INET6_ADDRSTRLEN]; + uint16_t ccd_destport; + uint32_t ccd_snd_cwnd; + uint32_t ccd_snd_wnd; + uint32_t ccd_snd_ssthresh; + uint32_t ccd_rttcur; + uint32_t ccd_rxtcur; + uint32_t ccd_srtt; + uint32_t ccd_event; + uint32_t ccd_sndcc; + uint32_t ccd_sndhiwat; + uint32_t ccd_bytes_acked; + union { + struct { + uint32_t ccd_last_max; + uint32_t ccd_tcp_win; + uint32_t ccd_target_win; + uint32_t ccd_avg_lastmax; + uint32_t ccd_mean_deviation; + } cubic_state; + } u; +}; + +int tcp_cc_debug = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, cc_debug, CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_cc_debug, 0, "Enable debug data collection"); + +extern struct tcp_cc_algo tcp_cc_newreno; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno_sockets, + CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_cc_newreno.num_sockets, + 0, "Number of sockets using newreno"); + +extern struct tcp_cc_algo tcp_cc_ledbat; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_sockets, + CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_cc_ledbat.num_sockets, + 0, "Number of sockets using background transport"); + +extern struct tcp_cc_algo tcp_cc_cubic; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_sockets, + CTLFLAG_RD | CTLFLAG_LOCKED,&tcp_cc_cubic.num_sockets, + 0, "Number of sockets using cubic"); + +int tcp_use_newreno = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, use_newreno, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_use_newreno, 0, + "Use TCP NewReno by default"); + + #define SET_SNDSB_IDEAL_SIZE(sndsb, size) \ + sndsb->sb_idealsize = min(max(tcp_sendspace, tp->snd_ssthresh), \ + tcp_autosndbuf_max); + +/* Array containing pointers to currently implemented TCP CC algorithms */ +struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT]; +struct zone *tcp_cc_zone; + +/* Information for colelcting TCP debug information using control socket */ +#define TCP_CCDEBUG_CONTROL_NAME "com.apple.network.tcp_ccdebug" +#define TCP_CCDBG_NOUNIT 0xffffffff +static kern_ctl_ref tcp_ccdbg_ctlref = NULL; +volatile UInt32 tcp_ccdbg_unit = TCP_CCDBG_NOUNIT; + +void tcp_cc_init(void); +static void tcp_cc_control_register(void); +static errno_t tcp_ccdbg_control_connect(kern_ctl_ref kctl, + struct sockaddr_ctl *sac, void **uinfo); +static errno_t tcp_ccdbg_control_disconnect(kern_ctl_ref kctl, + u_int32_t unit, void *uinfo); +static struct tcp_cc_algo tcp_cc_algo_none; +/* + * Initialize TCP congestion control algorithms. + */ + +void +tcp_cc_init(void) +{ + bzero(&tcp_cc_algo_list, sizeof(tcp_cc_algo_list)); + bzero(&tcp_cc_algo_none, sizeof(tcp_cc_algo_none)); + + tcp_cc_algo_list[TCP_CC_ALGO_NONE] = &tcp_cc_algo_none; + tcp_cc_algo_list[TCP_CC_ALGO_NEWRENO_INDEX] = &tcp_cc_newreno; + tcp_cc_algo_list[TCP_CC_ALGO_BACKGROUND_INDEX] = &tcp_cc_ledbat; + tcp_cc_algo_list[TCP_CC_ALGO_CUBIC_INDEX] = &tcp_cc_cubic; + + tcp_cc_control_register(); +} + +static void +tcp_cc_control_register(void) +{ + struct kern_ctl_reg ccdbg_control; + errno_t err; + + bzero(&ccdbg_control, sizeof(ccdbg_control)); + strlcpy(ccdbg_control.ctl_name, TCP_CCDEBUG_CONTROL_NAME, + sizeof(ccdbg_control.ctl_name)); + ccdbg_control.ctl_connect = tcp_ccdbg_control_connect; + ccdbg_control.ctl_disconnect = tcp_ccdbg_control_disconnect; + ccdbg_control.ctl_flags |= CTL_FLAG_PRIVILEGED; + ccdbg_control.ctl_flags |= CTL_FLAG_REG_SOCK_STREAM; + + err = ctl_register(&ccdbg_control, &tcp_ccdbg_ctlref); + if (err != 0) { + log(LOG_ERR, "failed to register tcp_cc debug control"); + } +} + +/* Allow only one socket to connect at any time for debugging */ +static errno_t +tcp_ccdbg_control_connect(kern_ctl_ref kctl, struct sockaddr_ctl *sac, + void **uinfo) +{ +#pragma unused(kctl) +#pragma unused(uinfo) + + UInt32 old_value = TCP_CCDBG_NOUNIT; + UInt32 new_value = sac->sc_unit; + + if (tcp_ccdbg_unit != old_value) + return (EALREADY); + + if (OSCompareAndSwap(old_value, new_value, &tcp_ccdbg_unit)) + return (0); + else + return (EALREADY); +} + +static errno_t +tcp_ccdbg_control_disconnect(kern_ctl_ref kctl, u_int32_t unit, void *uinfo) +{ +#pragma unused(kctl, unit, uinfo) + + if (unit == tcp_ccdbg_unit) { + UInt32 old_value = tcp_ccdbg_unit; + UInt32 new_value = TCP_CCDBG_NOUNIT; + if (tcp_ccdbg_unit == new_value) + return (0); + + if (!OSCompareAndSwap(old_value, new_value, + &tcp_ccdbg_unit)) + log(LOG_DEBUG, + "failed to disconnect tcp_cc debug control"); + } + return (0); +} + +inline void +tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th, int32_t event) +{ +#if !CONFIG_DTRACE +#pragma unused(th) +#endif /* !CONFIG_DTRACE */ + struct inpcb *inp = tp->t_inpcb; + + if (tcp_cc_debug && tcp_ccdbg_unit > 0) { + struct tcp_cc_debug_state dbg_state; + struct timespec tv; + + bzero(&dbg_state, sizeof(dbg_state)); + + nanotime(&tv); + /* Take time in seconds */ + dbg_state.ccd_tsns = (tv.tv_sec * 1000000000) + tv.tv_nsec; + inet_ntop(SOCK_DOM(inp->inp_socket), + ((SOCK_DOM(inp->inp_socket) == PF_INET) ? + (void *)&inp->inp_laddr.s_addr : + (void *)&inp->in6p_laddr), dbg_state.ccd_srcaddr, + sizeof(dbg_state.ccd_srcaddr)); + dbg_state.ccd_srcport = ntohs(inp->inp_lport); + inet_ntop(SOCK_DOM(inp->inp_socket), + ((SOCK_DOM(inp->inp_socket) == PF_INET) ? + (void *)&inp->inp_faddr.s_addr : + (void *)&inp->in6p_faddr), dbg_state.ccd_destaddr, + sizeof(dbg_state.ccd_destaddr)); + dbg_state.ccd_destport = ntohs(inp->inp_fport); + + dbg_state.ccd_snd_cwnd = tp->snd_cwnd; + dbg_state.ccd_snd_wnd = tp->snd_wnd; + dbg_state.ccd_snd_ssthresh = tp->snd_ssthresh; + dbg_state.ccd_rttcur = tp->t_rttcur; + dbg_state.ccd_rxtcur = tp->t_rxtcur; + dbg_state.ccd_srtt = tp->t_srtt >> TCP_RTT_SHIFT; + dbg_state.ccd_event = event; + dbg_state.ccd_sndcc = inp->inp_socket->so_snd.sb_cc; + dbg_state.ccd_sndhiwat = inp->inp_socket->so_snd.sb_hiwat; + dbg_state.ccd_bytes_acked = tp->t_bytes_acked; + switch (tp->tcp_cc_index) { + case TCP_CC_ALGO_CUBIC_INDEX: + dbg_state.u.cubic_state.ccd_last_max = + tp->t_ccstate->cub_last_max; + dbg_state.u.cubic_state.ccd_tcp_win = + tp->t_ccstate->cub_tcp_win; + dbg_state.u.cubic_state.ccd_target_win = + tp->t_ccstate->cub_target_win; + dbg_state.u.cubic_state.ccd_avg_lastmax = + tp->t_ccstate->cub_avg_lastmax; + dbg_state.u.cubic_state.ccd_mean_deviation = + tp->t_ccstate->cub_mean_dev; + break; + default: + break; + } + + ctl_enqueuedata(tcp_ccdbg_ctlref, tcp_ccdbg_unit, + &dbg_state, sizeof(dbg_state), 0); + } + DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, + struct tcpcb *, tp, struct tcphdr *, th, int32_t, event); +} + +void tcp_cc_resize_sndbuf(struct tcpcb *tp) +{ + struct sockbuf *sb; + /* + * If the send socket buffer size is bigger than ssthresh, + * it is time to trim it because we do not want to hold + * too many mbufs in the socket buffer + */ + sb = &tp->t_inpcb->inp_socket->so_snd; + if (sb->sb_hiwat > tp->snd_ssthresh && + (sb->sb_flags & SB_AUTOSIZE)) { + if (sb->sb_idealsize > tp->snd_ssthresh) { + SET_SNDSB_IDEAL_SIZE(sb, tp->snd_ssthresh); + } + sb->sb_flags |= SB_TRIM; + } +} + +void tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp) +{ + struct sockbuf *sb; + sb = &tp->t_inpcb->inp_socket->so_snd; + if ((sb->sb_flags & (SB_TRIM|SB_AUTOSIZE)) == (SB_TRIM|SB_AUTOSIZE)) { + /* + * If there was a retransmission that was not necessary + * then the size of socket buffer can be restored to + * what it was before + */ + SET_SNDSB_IDEAL_SIZE(sb, tp->snd_ssthresh); + if (sb->sb_hiwat <= sb->sb_idealsize) { + sbreserve(sb, sb->sb_idealsize); + sb->sb_flags &= ~SB_TRIM; + } + } +} + +/* + * Calculate initial cwnd according to RFC3390. + * + * Keep the old ss_fltsz sysctl for ABI compabitility issues. + * but it will be overriden if tcp_do_rfc3390 sysctl when it is set. + */ +void +tcp_cc_cwnd_init_or_reset(struct tcpcb *tp) +{ + if (tp->t_flags & TF_LOCAL) { + tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local; + } else { + /* initial congestion window according to RFC 3390 */ + if (tcp_do_rfc3390) + tp->snd_cwnd = min(4 * tp->t_maxseg, + max(2 * tp->t_maxseg, TCP_CC_CWND_INIT_BYTES)); + else + tp->snd_cwnd = tp->t_maxseg * ss_fltsz; + } +} + +/* + * Indicate whether this ack should be delayed. + * Here is the explanation for different settings of tcp_delack_enabled: + * - when set to 1, the bhavior is same as when set to 2. We kept this + * for binary compatibility. + * - when set to 2, will "ack every other packet" + * - if our last ack wasn't a 0-sized window. + * - if the peer hasn't sent us a TH_PUSH data packet (radar 3649245). + * If TH_PUSH is set, take this as a clue that we need to ACK + * with no delay. This helps higher level protocols who + * won't send us more data even if the window is open + * because their last "segment" hasn't been ACKed + * - when set to 3, will do "streaming detection" + * - if we receive more than "maxseg_unacked" full packets + * in the last 100ms + * - if the connection is not in slow-start or idle or + * loss/recovery states + * - if those criteria aren't met, it will ack every other packet. + */ +int +tcp_cc_delay_ack(struct tcpcb *tp, struct tcphdr *th) +{ + /* If any flags other than TH_ACK is set, set "end-of-write" bit */ + if ((th->th_flags & ~TH_ACK)) + tp->t_flagsext |= TF_STREAMEOW; + else + tp->t_flagsext &= ~(TF_STREAMEOW); + + switch (tcp_delack_enabled) { + case 1: + case 2: + if ((tp->t_flags & TF_RXWIN0SENT) == 0 && + (th->th_flags & TH_PUSH) == 0 && + (tp->t_unacksegs == 1)) + return(1); + break; + case 3: + if ((tp->t_flags & TF_RXWIN0SENT) == 0 && + (th->th_flags & TH_PUSH) == 0 && + ((tp->t_unacksegs == 1) || + ((tp->t_flags & TF_STRETCHACK) != 0 && + tp->t_unacksegs < (maxseg_unacked)))) + return(1); + break; + } + return(0); +} + +void +tcp_cc_allocate_state(struct tcpcb *tp) +{ + if (tp->tcp_cc_index == TCP_CC_ALGO_CUBIC_INDEX && + tp->t_ccstate == NULL) { + tp->t_ccstate = (struct tcp_ccstate *)zalloc(tcp_cc_zone); + + /* + * If we could not allocate memory for congestion control + * state, revert to using TCP NewReno as it does not + * require any state + */ + if (tp->t_ccstate == NULL) + tp->tcp_cc_index = TCP_CC_ALGO_NEWRENO_INDEX; + else + bzero(tp->t_ccstate, sizeof(*tp->t_ccstate)); + } +} + +/* + * If stretch ack was disabled automatically on long standing connections, + * re-evaluate the situation after 15 minutes to enable it. + */ +#define TCP_STRETCHACK_DISABLE_WIN (15 * 60 * TCP_RETRANSHZ) +void +tcp_cc_after_idle_stretchack(struct tcpcb *tp) +{ + int32_t tdiff; + + if (!(tp->t_flagsext & TF_DISABLE_STRETCHACK)) + return; + + tdiff = timer_diff(tcp_now, 0, tp->rcv_nostrack_ts, 0); + if (tdiff < 0) + tdiff = -tdiff; + + if (tdiff > TCP_STRETCHACK_DISABLE_WIN) { + tp->t_flagsext &= ~TF_DISABLE_STRETCHACK; + tp->t_stretchack_delayed = 0; + + tcp_reset_stretch_ack(tp); + } +} diff --git a/bsd/netinet/tcp_cc.h b/bsd/netinet/tcp_cc.h index cf1f0fb80..6ee5567a6 100644 --- a/bsd/netinet/tcp_cc.h +++ b/bsd/netinet/tcp_cc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011 Apple Inc. All rights reserved. + * Copyright (c) 2010-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,19 +62,23 @@ #define _NETINET_CC_H_ #ifdef KERNEL - +#include #include +#include -#define TCP_CC_ALGO_NEWRENO_INDEX 0 /* default congestion control algorithm */ -#define TCP_CC_ALGO_BACKGROUND_INDEX 1 /* congestion control for background transport */ -#define TCP_CC_ALGO_COUNT 2 /* Count of CC algorithms defined */ +#define TCP_CC_ALGO_NONE 0 +#define TCP_CC_ALGO_NEWRENO_INDEX 1 +#define TCP_CC_ALGO_BACKGROUND_INDEX 2 /* CC for background transport */ +#define TCP_CC_ALGO_CUBIC_INDEX 3 /* default CC algorithm */ +#define TCP_CC_ALGO_COUNT 4 /* Count of CC algorithms */ #define TCP_CA_NAME_MAX 16 /* Maximum characters in the name of a CC algorithm */ /* - * Structure to hold definition various actions defined by a congestion control - * algorithm for TCP. This can be used to change the congestion control on a - * connection based on the user settings of priority of a connection. + * Structure to hold definition various actions defined by a congestion + * control algorithm for TCP. This can be used to change the congestion + * control on a connection based on the user settings of priority of a + * connection. */ struct tcp_cc_algo { char name[TCP_CA_NAME_MAX]; @@ -84,14 +88,20 @@ struct tcp_cc_algo { /* init the congestion algorithm for the specified control block */ int (*init) (struct tcpcb *tp); - /* cleanup any state that is stored in the connection related to the algorithm */ + /* + * cleanup any state that is stored in the connection + * related to the algorithm + */ int (*cleanup) (struct tcpcb *tp); /* initialize cwnd at the start of a connection */ void (*cwnd_init) (struct tcpcb *tp); - /* called on the receipt of in-sequence ack during congestion avoidance phase */ - void (*inseq_ack_rcvd) (struct tcpcb *tp, struct tcphdr *th); + /* + * called on the receipt of in-sequence ack during congestion + * avoidance phase + */ + void (*congestion_avd) (struct tcpcb *tp, struct tcphdr *th); /* called on the receipt of a valid ack */ void (*ack_rcvd) (struct tcpcb *tp, struct tcphdr *th); @@ -116,12 +126,22 @@ struct tcp_cc_algo { } __attribute__((aligned(4))); +extern struct zone *tcp_cc_zone; + extern struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT]; #define CC_ALGO(tp) (tcp_cc_algo_list[tp->tcp_cc_index]) +#define TCP_CC_CWND_INIT_BYTES 4380 +extern void tcp_cc_init(void); extern void tcp_cc_resize_sndbuf(struct tcpcb *tp); extern void tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp); +extern void tcp_cc_cwnd_init_or_reset(struct tcpcb *tp); +extern int tcp_cc_delay_ack(struct tcpcb *tp, struct tcphdr *th); +extern void tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th, + int32_t event); +extern void tcp_cc_allocate_state(struct tcpcb *tp); +extern void tcp_cc_after_idle_stretchack(struct tcpcb *tp); #endif /* KERNEL */ #endif /* _NETINET_CC_H_ */ diff --git a/bsd/netinet/tcp_cubic.c b/bsd/netinet/tcp_cubic.c new file mode 100644 index 000000000..7e2d00b07 --- /dev/null +++ b/bsd/netinet/tcp_cubic.c @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if INET6 +#include +#endif /* INET6 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int tcp_cubic_init(struct tcpcb *tp); +static int tcp_cubic_cleanup(struct tcpcb *tp); +static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp); +static void tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th); +static void tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); +static void tcp_cubic_pre_fr(struct tcpcb *tp); +static void tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th); +static void tcp_cubic_after_timeout(struct tcpcb *tp); +static int tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th); +static void tcp_cubic_switch_cc(struct tcpcb *tp, u_int16_t old_index); +static uint32_t tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt); +static uint32_t tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th); +static inline void tcp_cubic_clear_state(struct tcpcb *tp); + + +extern float cbrtf(float x); + +struct tcp_cc_algo tcp_cc_cubic = { + .name = "cubic", + .init = tcp_cubic_init, + .cleanup = tcp_cubic_cleanup, + .cwnd_init = tcp_cubic_cwnd_init_or_reset, + .congestion_avd = tcp_cubic_congestion_avd, + .ack_rcvd = tcp_cubic_ack_rcvd, + .pre_fr = tcp_cubic_pre_fr, + .post_fr = tcp_cubic_post_fr, + .after_idle = tcp_cubic_cwnd_init_or_reset, + .after_timeout = tcp_cubic_after_timeout, + .delay_ack = tcp_cubic_delay_ack, + .switch_to = tcp_cubic_switch_cc +}; + +const float tcp_cubic_backoff = 0.2; /* multiplicative decrease factor */ +const float tcp_cubic_coeff = 0.4; +const float tcp_cubic_fast_convergence_factor = 0.875; + +static int tcp_cubic_tcp_friendliness = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_tcp_friendliness, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_cubic_tcp_friendliness, 0, + "Enable TCP friendliness"); + +static int tcp_cubic_fast_convergence = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_fast_convergence, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_cubic_fast_convergence, 0, + "Enable fast convergence"); + +static int tcp_cubic_use_minrtt = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_use_minrtt, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_cubic_use_minrtt, 0, + "use a min of 5 sec rtt"); + +static int tcp_cubic_init(struct tcpcb *tp) +{ + OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); + + VERIFY(tp->t_ccstate != NULL); + tcp_cubic_clear_state(tp); + return (0); +} + +static int tcp_cubic_cleanup(struct tcpcb *tp) +{ +#pragma unused(tp) + OSDecrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); + return (0); +} + +/* + * Initialize the congestion window at the beginning of a connection or + * after idle time + */ +static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp) +{ + VERIFY(tp->t_ccstate != NULL); + + tcp_cubic_clear_state(tp); + tcp_cc_cwnd_init_or_reset(tp); + + /* + * slow start threshold could get initialized to a lower value + * when there is a cached value in the route metrics. In this case, + * the connection can enter congestion avoidance without any packet + * loss and Cubic will enter steady-state too early. It is better + * to always probe to find the initial slow-start threshold. + */ + if (tp->t_inpcb->inp_stat->txbytes <= TCP_CC_CWND_INIT_BYTES + && tp->snd_ssthresh < (TCP_MAXWIN << TCP_MAX_WINSHIFT)) + tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; + + /* Initialize cubic last max to be same as ssthresh */ + tp->t_ccstate->cub_last_max = tp->snd_ssthresh; + + /* If stretch ack was auto-disabled, re-evaluate it */ + tcp_cc_after_idle_stretchack(tp); +} + +/* + * Compute the target congestion window for the next RTT according to + * cubic equation when an ack is received. + * + * W(t) = C(t-K)^3 + W(last_max) + */ +static uint32_t +tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt) +{ + float K, var; + u_int32_t elapsed_time, win; + + VERIFY(tp->t_ccstate->cub_last_max > 0); + win = min(tp->snd_cwnd, tp->snd_wnd); + if (tp->t_ccstate->cub_epoch_start == 0) { + /* + * This is the beginning of a new epoch, initialize some of + * the variables that we need to use for computing the + * congestion window later. + */ + tp->t_ccstate->cub_epoch_start = tcp_now; + if (tp->t_ccstate->cub_epoch_start == 0) + tp->t_ccstate->cub_epoch_start = 1; + if (win < tp->t_ccstate->cub_last_max) { + + VERIFY(current_task() == kernel_task); + + /* + * Compute cubic epoch period, this is the time + * period that the window will take to increase to + * last_max again after backoff due to loss. + */ + K = (tp->t_ccstate->cub_last_max - win) + / tp->t_maxseg / tcp_cubic_coeff; + K = cbrtf(K); + tp->t_ccstate->cub_epoch_period = K * TCP_RETRANSHZ; + /* Origin point */ + tp->t_ccstate->cub_origin_point = + tp->t_ccstate->cub_last_max; + } else { + tp->t_ccstate->cub_epoch_period = 0; + tp->t_ccstate->cub_origin_point = win; + } + tp->t_ccstate->cub_target_win = 0; + } + + VERIFY(tp->t_ccstate->cub_origin_point > 0); + /* + * Compute the target window for the next RTT using smoothed RTT + * as an estimate for next RTT. + */ + elapsed_time = timer_diff(tcp_now, 0, + tp->t_ccstate->cub_epoch_start, 0); + + if (tcp_cubic_use_minrtt) + elapsed_time += max(tcp_cubic_use_minrtt, rtt); + else + elapsed_time += rtt; + var = (elapsed_time - tp->t_ccstate->cub_epoch_period) / TCP_RETRANSHZ; + var = var * var * var * (tcp_cubic_coeff * tp->t_maxseg); + + tp->t_ccstate->cub_target_win = tp->t_ccstate->cub_origin_point + var; + return (tp->t_ccstate->cub_target_win); +} + +/* + * Standard TCP utilizes bandwidth well in low RTT and low BDP connections + * even when there is some packet loss. Enabling TCP mode will help Cubic + * to achieve this kind of utilization. + * + * But if there is a bottleneck link in the path with a fixed size queue + * and fixed bandwidth, TCP Cubic will help to reduce packet loss at this + * link because of the steady-state behavior. Using average and mean + * absolute deviation of W(lastmax), we try to detect if the congestion + * window is close to the bottleneck bandwidth. In that case, disabling + * TCP mode will help to minimize packet loss at this link. + * + * Disable TCP mode if the W(lastmax) (the window where previous packet + * loss happened) is within a small range from the average last max + * calculated. + */ +#define TCP_CUBIC_ENABLE_TCPMODE(_tp_) \ + ((!soissrcrealtime((_tp_)->t_inpcb->inp_socket) && \ + (_tp_)->t_ccstate->cub_mean_dev > (tp->t_maxseg << 1)) ? 1 : 0) + +/* + * Compute the window growth if standard TCP (AIMD) was used with + * a backoff of 0.5 and additive increase of 1 packet per RTT. + * + * TCP window at time t can be calculated using the following equation + * with beta as 0.8 + * + * W(t) <- Wmax * beta + 3 * ((1 - beta)/(1 + beta)) * t/RTT + * + */ +static uint32_t +tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th) +{ + if (tp->t_ccstate->cub_tcp_win == 0) { + tp->t_ccstate->cub_tcp_win = min(tp->snd_cwnd, tp->snd_wnd); + tp->t_ccstate->cub_tcp_bytes_acked = 0; + } else { + tp->t_ccstate->cub_tcp_bytes_acked += + BYTES_ACKED(th, tp); + if (tp->t_ccstate->cub_tcp_bytes_acked >= + tp->t_ccstate->cub_tcp_win) { + tp->t_ccstate->cub_tcp_bytes_acked -= + tp->t_ccstate->cub_tcp_win; + tp->t_ccstate->cub_tcp_win += tp->t_maxseg; + } + } + return (tp->t_ccstate->cub_tcp_win); +} + +/* + * Handle an in-sequence ack during congestion avoidance phase. + */ +static void +tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th) +{ + u_int32_t cubic_target_win, tcp_win, rtt; + + tp->t_bytes_acked += BYTES_ACKED(th, tp); + + rtt = get_base_rtt(tp); + /* + * First compute cubic window. If cubic variables are not + * initialized (after coming out of recovery), this call will + * initialize them. + */ + cubic_target_win = tcp_cubic_update(tp, rtt); + + /* Compute TCP window if a multiplicative decrease of 0.2 is used */ + tcp_win = tcp_cubic_tcpwin(tp, th); + + if (tp->snd_cwnd < tcp_win && + (tcp_cubic_tcp_friendliness == 1 || + TCP_CUBIC_ENABLE_TCPMODE(tp))) { + /* this connection is in TCP-friendly region */ + if (tp->t_bytes_acked >= tp->snd_cwnd) { + tp->t_bytes_acked -= tp->snd_cwnd; + tp->snd_cwnd = min(tcp_win, TCP_MAXWIN << tp->snd_scale); + } + } else { + if (cubic_target_win > tp->snd_cwnd) { + /* + * The target win is computed for the next RTT. + * To reach this value, cwnd will have to be updated + * one segment at a time. Compute how many bytes + * need to be acknowledged before we can increase + * the cwnd by one segment. + */ + u_int64_t incr_win; + incr_win = tp->snd_cwnd * tp->t_maxseg; + incr_win /= (cubic_target_win - tp->snd_cwnd); + if (incr_win > 0 && + tp->t_bytes_acked >= incr_win) { + tp->t_bytes_acked -= incr_win; + tp->snd_cwnd = + min((tp->snd_cwnd + tp->t_maxseg), + TCP_MAXWIN << tp->snd_scale); + } + } + } +} + +static void +tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) +{ + if (tp->snd_cwnd >= tp->snd_ssthresh) { + /* Congestion avoidance phase */ + tcp_cubic_congestion_avd(tp, th); + } else { + /* + * Use 2*SMSS as limit on increment as suggested + * by RFC 3465 section 2.3 + */ + uint32_t acked, abc_lim, incr; + acked = BYTES_ACKED(th, tp); + abc_lim = (tcp_do_rfc3465_lim2 && + tp->snd_nxt == tp->snd_max) ? + 2 * tp->t_maxseg : tp->t_maxseg; + incr = min(acked, abc_lim); + + tp->snd_cwnd += incr; + tp->snd_cwnd = min(tp->snd_cwnd, + TCP_MAXWIN << tp->snd_scale); + } +} + +static void +tcp_cubic_pre_fr(struct tcpcb *tp) +{ + uint32_t win, avg; + int32_t dev; + tp->t_ccstate->cub_epoch_start = 0; + tp->t_ccstate->cub_tcp_win = 0; + tp->t_ccstate->cub_target_win = 0; + tp->t_ccstate->cub_tcp_bytes_acked = 0; + + win = min(tp->snd_cwnd, tp->snd_wnd); + /* + * Note the congestion window at which packet loss occurred as + * cub_last_max. + * + * If the congestion window is less than the last max window when + * loss occurred, it indicates that capacity available in the + * network has gone down. This can happen if a new flow has started + * and it is capturing some of the bandwidth. To reach convergence + * quickly, backoff a little more. Disable fast convergence to + * disable this behavior. + */ + if (win < tp->t_ccstate->cub_last_max && + tcp_cubic_fast_convergence == 1) + tp->t_ccstate->cub_last_max = win * + tcp_cubic_fast_convergence_factor; + else + tp->t_ccstate->cub_last_max = win; + + if (tp->t_ccstate->cub_last_max == 0) { + /* + * If last_max is zero because snd_wnd is zero or for + * any other reason, initialize it to the amount of data + * in flight + */ + tp->t_ccstate->cub_last_max = tp->snd_max - tp->snd_una; + } + + /* + * Compute average and mean absolute deviation of the + * window at which packet loss occurred. + */ + if (tp->t_ccstate->cub_avg_lastmax == 0) { + tp->t_ccstate->cub_avg_lastmax = tp->t_ccstate->cub_last_max; + } else { + /* + * Average is computed by taking 63 parts of + * history and one part of the most recent value + */ + avg = tp->t_ccstate->cub_avg_lastmax; + avg = (avg << 6) - avg; + tp->t_ccstate->cub_avg_lastmax = + (avg + tp->t_ccstate->cub_last_max) >> 6; + } + + /* caluclate deviation from average */ + dev = tp->t_ccstate->cub_avg_lastmax - tp->t_ccstate->cub_last_max; + + /* Take the absolute value */ + if (dev < 0) + dev = -dev; + + if (tp->t_ccstate->cub_mean_dev == 0) { + tp->t_ccstate->cub_mean_dev = dev; + } else { + dev = dev + ((tp->t_ccstate->cub_mean_dev << 4) + - tp->t_ccstate->cub_mean_dev); + tp->t_ccstate->cub_mean_dev = dev >> 4; + } + + /* Backoff congestion window by tcp_cubic_backoff factor */ + win = win - (win * tcp_cubic_backoff); + win = (win / tp->t_maxseg); + if (win < 2) + win = 2; + tp->snd_ssthresh = win * tp->t_maxseg; + tcp_cc_resize_sndbuf(tp); +} + +static void +tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th) +{ + uint32_t flight_size = 0; + + if (SEQ_LEQ(th->th_ack, tp->snd_max)) + flight_size = tp->snd_max - th->th_ack; + /* + * Complete ack. The current window was inflated for fast recovery. + * It has to be deflated post recovery. + * + * Window inflation should have left us with approx snd_ssthresh + * outstanding data. If the flight size is zero or one segment, + * make congestion window to be at least as big as 2 segments to + * avoid delayed acknowledgements. This is according to RFC 6582. + */ + if (flight_size < tp->snd_ssthresh) + tp->snd_cwnd = max(flight_size, tp->t_maxseg) + + tp->t_maxseg; + else + tp->snd_cwnd = tp->snd_ssthresh; + tp->t_ccstate->cub_tcp_win = 0; + tp->t_ccstate->cub_target_win = 0; + tp->t_ccstate->cub_tcp_bytes_acked = 0; +} + +static void +tcp_cubic_after_timeout(struct tcpcb *tp) +{ + VERIFY(tp->t_ccstate != NULL); + if (!IN_FASTRECOVERY(tp)) { + tcp_cubic_clear_state(tp); + tcp_cubic_pre_fr(tp); + } + + /* + * Close the congestion window down to one segment as a retransmit + * timeout might indicate severe congestion. + */ + tp->snd_cwnd = tp->t_maxseg; +} + +static int +tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th) +{ + return (tcp_cc_delay_ack(tp, th)); +} + +/* + * When switching from a different CC it is better for Cubic to start + * fresh. The state required for Cubic calculation might be stale and it + * might not represent the current state of the network. If it starts as + * a new connection it will probe and learn the existing network conditions. + */ +static void +tcp_cubic_switch_cc(struct tcpcb *tp, uint16_t old_cc_index) +{ +#pragma unused(old_cc_index) + tcp_cubic_cwnd_init_or_reset(tp); + /* Start counting bytes for RFC 3465 again */ + tp->t_bytes_acked = 0; + + OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets); +} + +static inline void tcp_cubic_clear_state(struct tcpcb *tp) +{ + tp->t_ccstate->cub_last_max = 0; + tp->t_ccstate->cub_epoch_start = 0; + tp->t_ccstate->cub_origin_point = 0; + tp->t_ccstate->cub_tcp_win = 0; + tp->t_ccstate->cub_tcp_bytes_acked = 0; + tp->t_ccstate->cub_epoch_period = 0; + tp->t_ccstate->cub_target_win = 0; +} diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c index 95b5b703e..ab5242853 100644 --- a/bsd/netinet/tcp_input.c +++ b/bsd/netinet/tcp_input.c @@ -151,12 +151,6 @@ struct tcphdr tcp_savetcp; tcp_cc tcp_ccgen; -#if IPSEC -extern int ipsec_bypass; -#endif - -extern int32_t total_sbmb_cnt; - struct tcpstat tcpstat; static int log_in_vain = 0; @@ -191,16 +185,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW | CTLFLAG_LOCKED, SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "TCP Segment Reassembly Queue"); -__private_extern__ int tcp_reass_maxseg = 0; -SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_reass_maxseg, 0, - "Global maximum number of TCP Segments in Reassembly Queue"); - -__private_extern__ int tcp_reass_qsize = 0; -SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcp_reass_qsize, 0, - "Global number of TCP Segments currently in Reassembly Queue"); - static int tcp_reass_overflows = 0; SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_reass_overflows, 0, @@ -279,19 +263,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale, CTLFLAG_RW | CTLFLA &tcp_obey_ifef_nowindowscale, 0, ""); #endif -/* This limit will determine when the receive socket buffer tuning will - * kick in. Currently it will start when the bw*delay measured in - * last RTT is more than half of the current hiwat on the buffer. - */ -uint32_t tcp_rbuf_hiwat_shift = 1; - -/* This limit will determine when the socket buffer will be increased - * to accommodate an application reading slowly. When the amount of - * space left in the buffer is less than one forth of the bw*delay - * measured in last RTT. - */ -uint32_t tcp_rbuf_win_shift = 2; - extern int tcp_TCPTV_MIN; extern int tcp_acc_iaj_high; extern int tcp_acc_iaj_react_limit; @@ -319,7 +290,8 @@ static inline int tcp_stretch_ack_enable(struct tcpcb *tp); static inline void tcp_adaptive_rwtimo_check(struct tcpcb *, int); #if TRAFFIC_MGT -static inline void update_iaj_state(struct tcpcb *tp, uint32_t tlen, int reset_size); +static inline void update_iaj_state(struct tcpcb *tp, uint32_t tlen, + int reset_size); void compute_iaj(struct tcpcb *tp, int nlropkts, int lro_delay_factor); static void compute_iaj_meat(struct tcpcb *tp, uint32_t cur_iaj); #endif /* TRAFFIC_MGT */ @@ -337,10 +309,11 @@ static inline void tcp_sbrcv_tstmp_check(struct tcpcb *tp); static inline void tcp_sbrcv_reserve(struct tcpcb *tp, struct sockbuf *sb, u_int32_t newsize, u_int32_t idealsize); static void tcp_bad_rexmt_restore_state(struct tcpcb *tp, struct tcphdr *th); -static int tcp_detect_bad_rexmt(struct tcpcb *tp, struct tcpopt *to); static void tcp_compute_rtt(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th); - +static void tcp_early_rexmt_check(struct tcpcb *tp, struct tcphdr *th); +static void tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th, + struct tcpopt *to); /* * Constants used for resizing receive socket buffer * when timestamps are not supported @@ -355,9 +328,6 @@ static void tcp_compute_rtt(struct tcpcb *tp, struct tcpopt *to, #define TCP_EARLY_REXMT_WIN (60 * TCP_RETRANSHZ) /* 60 seconds */ #define TCP_EARLY_REXMT_LIMIT 10 -extern void add_to_time_wait(struct tcpcb *, uint32_t delay); -extern void postevent(struct socket *, struct sockbuf *, int); - extern void ipfwsyslog( int level, const char *format,...); extern int fw_verbose; @@ -389,7 +359,6 @@ static int tcp_dropdropablreq(struct socket *head); static void tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th); static void update_base_rtt(struct tcpcb *tp, uint32_t rtt); -uint32_t get_base_rtt(struct tcpcb *tp); void tcp_set_background_cc(struct socket *so); void tcp_set_foreground_cc(struct socket *so); static void tcp_set_new_cc(struct socket *so, uint16_t cc_index); @@ -597,8 +566,10 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, int dowakeup = 0; struct mbuf *oodata = NULL; int copy_oodata = 0; + u_int16_t qlimit; boolean_t cell = IFNET_IS_CELLULAR(ifp); boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); + boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp)); /* * Call with th==0 after become established to @@ -607,21 +578,12 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, if (th == NULL) goto present; - /* If the reassembly queue already has entries or if we are going to add - * a new one, then the connection has reached a loss state. + /* + * If the reassembly queue already has entries or if we are going + * to add a new one, then the connection has reached a loss state. * Reset the stretch-ack algorithm at this point. */ - if ((tp->t_flags & TF_STRETCHACK) != 0) - tcp_reset_stretch_ack(tp); - - /* When the connection reaches a loss state, we need to send more acks - * for a period of time so that the sender's congestion window will - * open. Wait until we see some packets on the connection before - * stretching acks again. - */ - tp->t_flagsext |= TF_RCVUNACK_WAITSS; - tp->rcv_waitforss = 0; - + tcp_reset_stretch_ack(tp); #if TRAFFIC_MGT if (tp->acc_iaj > 0) @@ -635,8 +597,10 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, * queue. Always keep one global queue entry spare to be able to * process the missing segment. */ + qlimit = min(max(100, so->so_rcv.sb_hiwat >> 10), + tcp_autorcvbuf_max >> 10); if (th->th_seq != tp->rcv_nxt && - tcp_reass_qsize + 1 >= tcp_reass_maxseg) { + (tp->t_reassqlen + 1) >= qlimit) { tcp_reass_overflows++; tcpstat.tcps_rcvmemdrop++; m_freem(m); @@ -645,13 +609,13 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, } /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */ - te = (struct tseg_qent *) zalloc_noblock(tcp_reass_zone); + te = (struct tseg_qent *) zalloc(tcp_reass_zone); if (te == NULL) { tcpstat.tcps_rcvmemdrop++; m_freem(m); return (0); } - tcp_reass_qsize++; + tp->t_reassqlen++; /* * Find a segment which begins after this one does. @@ -676,15 +640,19 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, tcpstat.tcps_rcvduppack++; tcpstat.tcps_rcvdupbyte += *tlenp; if (nstat_collect) { - nstat_route_rx(inp->inp_route.ro_rt, 1, *tlenp, NSTAT_RX_FLAG_DUPLICATE); - INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); - INP_ADD_STAT(inp, cell, wifi, rxbytes, *tlenp); + nstat_route_rx(inp->inp_route.ro_rt, + 1, *tlenp, + NSTAT_RX_FLAG_DUPLICATE); + INP_ADD_STAT(inp, cell, wifi, wired, + rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, + rxbytes, *tlenp); tp->t_stat.rxduplicatebytes += *tlenp; } m_freem(m); zfree(tcp_reass_zone, te); te = NULL; - tcp_reass_qsize--; + tp->t_reassqlen--; /* * Try to present any queued data * at the left window edge to the user. @@ -701,9 +669,10 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, tcpstat.tcps_rcvoopack++; tcpstat.tcps_rcvoobyte += *tlenp; if (nstat_collect) { - nstat_route_rx(inp->inp_route.ro_rt, 1, *tlenp, NSTAT_RX_FLAG_OUT_OF_ORDER); - INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); - INP_ADD_STAT(inp, cell, wifi, rxbytes, *tlenp); + nstat_route_rx(inp->inp_route.ro_rt, 1, *tlenp, + NSTAT_RX_FLAG_OUT_OF_ORDER); + INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, *tlenp); tp->t_stat.rxoutoforderbytes += *tlenp; } @@ -726,7 +695,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); zfree(tcp_reass_zone, q); - tcp_reass_qsize--; + tp->t_reassqlen--; q = nq; } @@ -774,7 +743,11 @@ present: goto msg_unordered_delivery; return (0); - } + } + + /* lost packet was recovered, so ooo data can be returned */ + tcpstat.tcps_recovered_pkts++; + do { tp->rcv_nxt += q->tqe_len; flags = q->tqe_th->th_flags & TH_FIN; @@ -807,7 +780,7 @@ present: } } zfree(tcp_reass_zone, q); - tcp_reass_qsize--; + tp->t_reassqlen--; q = nq; } while (q && q->tqe_th->th_seq == tp->rcv_nxt); @@ -871,8 +844,8 @@ tcp_reduce_congestion_window( ENTER_FASTRECOVERY(tp); tp->snd_recover = tp->snd_max; tp->t_timer[TCPT_REXMT] = 0; + tp->t_timer[TCPT_PTO] = 0; tp->t_rtttime = 0; - tp->ecn_flags |= TE_SENDCWR; tp->snd_cwnd = tp->snd_ssthresh + tp->t_maxseg * tcprexmtthresh; } @@ -967,6 +940,8 @@ tcp_cansbgrow(struct sockbuf *sb) if ((total_sbmb_cnt < mblim) && (sb->sb_hiwat < sbspacelim)) { return(1); + } else { + OSIncrementAtomic64(&sbmb_limreached); } return(0); } @@ -1012,12 +987,13 @@ tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv, /* * Do not grow the receive socket buffer if * - auto resizing is disabled, globally or on this socket - * - the high water mark has already reached the maximum + * - the high water mark already reached the maximum * - the stream is in background and receive side is being * throttled * - if there are segments in reassembly queue indicating loss, * do not need to increase recv window during recovery as more - * data is not going to be sent. + * data is not going to be sent. A duplicate ack sent during + * recovery should not change the receive window */ if (tcp_do_autorcvbuf == 0 || (sbrcv->sb_flags & SB_AUTOSIZE) == 0 || @@ -1042,8 +1018,8 @@ tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv, * Timestamp option is not supported on this connection. * If the connection reached a state to indicate that * the receive socket buffer needs to grow, increase - * the high water mark. - */ + * the high water mark. + */ if (TSTMP_GEQ(tcp_now, tp->rfbuf_ts + TCPTV_RCVNOTS_QUANTUM)) { if (tp->rfbuf_cnt >= TCP_RCVNOTS_BYTELEVEL) { @@ -1054,38 +1030,46 @@ tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv, } else { tp->rfbuf_cnt += pktlen; return; - } + } } else if (to->to_tsecr != 0) { - /* If the timestamp shows that one RTT has + /* + * If the timestamp shows that one RTT has * completed, we can stop counting the * bytes. Here we consider increasing - * the socket buffer if it fits the following - * criteria: - * 1. the bandwidth measured in last rtt, is more - * than half of sb_hiwat, this will help to scale the - * buffer according to the bandwidth on the link. - * 2. the space left in sbrcv is less than - * one forth of the bandwidth measured in last rtt, this - * will help to accommodate an application reading slowly. + * the socket buffer if the bandwidth measured in + * last rtt, is more than half of sb_hiwat, this will + * help to scale the buffer according to the bandwidth + * on the link. */ if (TSTMP_GEQ(to->to_tsecr, tp->rfbuf_ts)) { - if ((tp->rfbuf_cnt > (sbrcv->sb_hiwat - - (sbrcv->sb_hiwat >> tcp_rbuf_hiwat_shift)) || - (sbrcv->sb_hiwat - sbrcv->sb_cc) < - (tp->rfbuf_cnt >> tcp_rbuf_win_shift))) { - u_int32_t rcvbuf_inc; + if (tp->rfbuf_cnt > (sbrcv->sb_hiwat - + (sbrcv->sb_hiwat >> 1))) { + int32_t rcvbuf_inc, min_incr; /* - * Increment the receive window by a multiple of - * maximum sized segments. This will prevent a - * connection from sending smaller segments on - * wire if it is limited by the receive window. + * Increment the receive window by a + * multiple of maximum sized segments. + * This will prevent a connection from + * sending smaller segments on wire if it + * is limited by the receive window. * - * Set the ideal size based on current bandwidth - * measurements. We set the ideal size on receive - * socket buffer to be twice the bandwidth delay - * product. + * Set the ideal size based on current + * bandwidth measurements. We set the + * ideal size on receive socket buffer to + * be twice the bandwidth delay product. + */ + rcvbuf_inc = (tp->rfbuf_cnt << 1) + - sbrcv->sb_hiwat; + + /* + * Make the increment equal to 8 segments + * at least */ - rcvbuf_inc = tp->t_maxseg << tcp_autorcvbuf_inc_shift; + min_incr = tp->t_maxseg << tcp_autorcvbuf_inc_shift; + if (rcvbuf_inc < min_incr) + rcvbuf_inc = min_incr; + + rcvbuf_inc = + (rcvbuf_inc / tp->t_maxseg) * tp->t_maxseg; tcp_sbrcv_reserve(tp, sbrcv, sbrcv->sb_hiwat + rcvbuf_inc, (tp->rfbuf_cnt * 2)); @@ -1209,7 +1193,7 @@ tcp_sbrcv_tstmp_check(struct tcpcb *tp) { static inline int tcp_stretch_ack_enable(struct tcpcb *tp) { - if (!(tp->t_flagsext & TF_NOSTRETCHACK) && + if (!(tp->t_flagsext & (TF_NOSTRETCHACK|TF_DISABLE_STRETCHACK)) && tp->rcv_by_unackwin >= (maxseg_unacked * tp->t_maxseg) && TSTMP_GT(tp->rcv_unackwin + tcp_maxrcvidle, tcp_now) && (!(tp->t_flagsext & TF_RCVUNACK_WAITSS) || @@ -1220,7 +1204,8 @@ tcp_stretch_ack_enable(struct tcpcb *tp) return(0); } -/* Reset the state related to stretch-ack algorithm. This will make +/* + * Reset the state related to stretch-ack algorithm. This will make * the receiver generate an ack every other packet. The receiver * will start re-evaluating the rate at which packets come to decide * if it can benefit by lowering the ack traffic. @@ -1231,6 +1216,15 @@ tcp_reset_stretch_ack(struct tcpcb *tp) tp->t_flags &= ~(TF_STRETCHACK); tp->rcv_by_unackwin = 0; tp->rcv_unackwin = tcp_now + tcp_rcvunackwin; + + /* + * When there is packet loss or packet re-ordering or CWR due to + * ECN, the sender's congestion window is reduced. In these states, + * generate an ack for every other packet for some time to allow + * the sender's congestion window to grow. + */ + tp->t_flagsext |= TF_RCVUNACK_WAITSS; + tp->rcv_waitforss = 0; } /* @@ -1245,21 +1239,30 @@ tcp_reset_stretch_ack(struct tcpcb *tp) * This function will return 1 if it is a spurious retransmit, * 0 otherwise. */ -static int -tcp_detect_bad_rexmt(struct tcpcb *tp, struct tcpopt *to) +int +tcp_detect_bad_rexmt(struct tcpcb *tp, struct tcphdr *th, + struct tcpopt *to, u_int32_t rxtime) { int32_t tdiff, bad_rexmt_win; - tdiff = (int32_t)(tcp_now - tp->t_rxtstart); bad_rexmt_win = (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); - if (TSTMP_SUPPORTED(tp) && tp->t_rxtstart > 0 && - (to->to_flags & TOF_TS) != 0 && - to->to_tsecr != 0 && - TSTMP_LT(to->to_tsecr, tp->t_rxtstart)) { - return (1); - } else if (tp->t_rxtshift == 1 && - tdiff < bad_rexmt_win) { - return(1); + /* If the ack has ECN CE bit, then cwnd has to be adjusted */ + if ((tp->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON) + && (th->th_flags & TH_ECE)) + return (0); + if (TSTMP_SUPPORTED(tp)) { + if (rxtime > 0 && (to->to_flags & TOF_TS) + && to->to_tsecr != 0 + && TSTMP_LT(to->to_tsecr, rxtime)) + return (1); + } else { + if ((tp->t_rxtshift == 1 + || (tp->t_flagsext & TF_SENT_TLPROBE)) + && rxtime > 0) { + tdiff = (int32_t)(tcp_now - rxtime); + if (tdiff < bad_rexmt_win) + return(1); + } } return(0); } @@ -1295,6 +1298,7 @@ tcp_bad_rexmt_restore_state(struct tcpcb *tp, struct tcphdr *th) if (tp->t_flags & TF_WASFRECOVERY) ENTER_FASTRECOVERY(tp); } + tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES); tp->snd_recover = tp->snd_recover_prev; tp->snd_nxt = tp->snd_max; tp->t_rxtshift = 0; @@ -1315,6 +1319,169 @@ tcp_bad_rexmt_restore_state(struct tcpcb *tp, struct tcphdr *th) tp->t_rtttime = 0; } +/* + * If the previous packet was sent in retransmission timer, and it was + * not needed, then restore the congestion window to the state before that + * transmission. + * + * If the last packet was sent in tail loss probe timeout, check if that + * recovered the last packet. If so, that will indicate a real loss and + * the congestion window needs to be lowered. + */ +static void +tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to) +{ + if (tp->t_rxtshift > 0 && + tcp_detect_bad_rexmt(tp, th, to, tp->t_rxtstart)) { + ++tcpstat.tcps_sndrexmitbad; + tcp_bad_rexmt_restore_state(tp, th); + tcp_ccdbg_trace(tp, th, TCP_CC_BAD_REXMT_RECOVERY); + } else if ((tp->t_flagsext & TF_SENT_TLPROBE) + && tp->t_tlphighrxt > 0 + && SEQ_GEQ(th->th_ack, tp->t_tlphighrxt) + && !tcp_detect_bad_rexmt(tp, th, to, tp->t_tlpstart)) { + /* + * The tail loss probe recovered the last packet and + * we need to adjust the congestion window to take + * this loss into account. + */ + ++tcpstat.tcps_tlp_recoverlastpkt; + if (!IN_FASTRECOVERY(tp)) { + tcp_reduce_congestion_window(tp); + EXIT_FASTRECOVERY(tp); + } + tcp_ccdbg_trace(tp, th, TCP_CC_TLP_RECOVER_LASTPACKET); + } + + tp->t_flagsext &= ~(TF_SENT_TLPROBE); + tp->t_tlphighrxt = 0; + tp->t_tlpstart = 0; + + /* + * check if the latest ack was for a segment sent during PMTU + * blackhole detection. If the timestamp on the ack is before + * PMTU blackhole detection, then revert the size of the max + * segment to previous size. + */ + if (tp->t_rxtshift > 0 && (tp->t_flags & TF_BLACKHOLE) && + tp->t_pmtud_start_ts > 0 && TSTMP_SUPPORTED(tp)) { + if ((to->to_flags & TOF_TS) && to->to_tsecr != 0 + && TSTMP_LT(to->to_tsecr, tp->t_pmtud_start_ts)) { + tcp_pmtud_revert_segment_size(tp); + } + } + if (tp->t_pmtud_start_ts > 0) + tp->t_pmtud_start_ts = 0; +} + +/* + * Check if early retransmit can be attempted according to RFC 5827. + * + * If packet reordering is detected on a connection, fast recovery will + * be delayed until it is clear that the packet was lost and not reordered. + * But reordering detection is done only when SACK is enabled. + * + * On connections that do not support SACK, there is a limit on the number + * of early retransmits that can be done per minute. This limit is needed + * to make sure that too many packets are not retransmitted when there is + * packet reordering. + */ +static void +tcp_early_rexmt_check (struct tcpcb *tp, struct tcphdr *th) +{ + u_int32_t obytes, snd_off; + int32_t snd_len; + struct socket *so = tp->t_inpcb->inp_socket; + + if (early_rexmt && (SACK_ENABLED(tp) || + tp->t_early_rexmt_count < TCP_EARLY_REXMT_LIMIT) && + SEQ_GT(tp->snd_max, tp->snd_una) && + (tp->t_dupacks == 1 || + (SACK_ENABLED(tp) && + !TAILQ_EMPTY(&tp->snd_holes)))) { + /* + * If there are only a few outstanding + * segments on the connection, we might need + * to lower the retransmit threshold. This + * will allow us to do Early Retransmit as + * described in RFC 5827. + */ + if (SACK_ENABLED(tp) && + !TAILQ_EMPTY(&tp->snd_holes)) { + obytes = (tp->snd_max - tp->snd_fack) + + tp->sackhint.sack_bytes_rexmit; + } else { + obytes = (tp->snd_max - tp->snd_una); + } + + /* + * In order to lower retransmit threshold the + * following two conditions must be met. + * 1. the amount of outstanding data is less + * than 4*SMSS bytes + * 2. there is no unsent data ready for + * transmission or the advertised window + * will limit sending new segments. + */ + snd_off = tp->snd_max - tp->snd_una; + snd_len = min(so->so_snd.sb_cc, tp->snd_wnd) - snd_off; + if (obytes < (tp->t_maxseg << 2) && + snd_len <= 0) { + u_int32_t osegs; + + osegs = obytes / tp->t_maxseg; + if ((osegs * tp->t_maxseg) < obytes) + osegs++; + + /* + * Since the connection might have already + * received some dupacks, we add them to + * to the outstanding segments count to get + * the correct retransmit threshold. + * + * By checking for early retransmit after + * receiving some duplicate acks when SACK + * is supported, the connection will + * enter fast recovery even if multiple + * segments are lost in the same window. + */ + osegs += tp->t_dupacks; + if (osegs < 4) { + tp->t_rexmtthresh = + ((osegs - 1) > 1) ? (osegs - 1) : 1; + tp->t_rexmtthresh = + min(tp->t_rexmtthresh, tcprexmtthresh); + tp->t_rexmtthresh = + max(tp->t_rexmtthresh, tp->t_dupacks); + + if (tp->t_early_rexmt_count == 0) + tp->t_early_rexmt_win = tcp_now; + + if (tp->t_flagsext & TF_SENT_TLPROBE) { + tcpstat.tcps_tlp_recovery++; + tcp_ccdbg_trace(tp, th, + TCP_CC_TLP_RECOVERY); + } else { + tcpstat.tcps_early_rexmt++; + tp->t_early_rexmt_count++; + tcp_ccdbg_trace(tp, th, + TCP_CC_EARLY_RETRANSMIT); + } + } + } + } + + /* + * If we ever sent a TLP probe, the acknowledgement will trigger + * early retransmit because the value of snd_fack will be close + * to snd_max. This will take care of adjustments to the + * congestion window. So we can reset TF_SENT_PROBE flag. + */ + tp->t_flagsext &= ~(TF_SENT_TLPROBE); + tp->t_tlphighrxt = 0; + tp->t_tlpstart = 0; +} + void tcp_input(m, off0) struct mbuf *m; @@ -1345,7 +1512,7 @@ tcp_input(m, off0) #endif struct m_tag *fwd_tag; u_char ip_ecn = IPTOS_ECN_NOTECT; - unsigned int ifscope, nocell = 0; + unsigned int ifscope; uint8_t isconnected, isdisconnected; struct ifnet *ifp = m->m_pkthdr.rcvif; int pktf_sw_lro_pkt = (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_PKT) ? 1 : 0; @@ -1357,6 +1524,8 @@ tcp_input(m, off0) #endif /* MPTCP */ boolean_t cell = IFNET_IS_CELLULAR(ifp); boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); + boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp)); + struct tcp_respond_args tra; #define TCP_INC_VAR(stat, npkts) do { \ stat += npkts; \ @@ -1634,40 +1803,24 @@ findpcb: */ if (inp != NULL && (inp->inp_flags & INP_BOUND_IF)) ifscope = inp->inp_boundifp->if_index; - - /* - * If the PCB is present and the socket isn't allowed to use - * the cellular interface, indicate it as such for tcp_respond. - */ - if (inp != NULL && (inp->inp_flags & INP_NO_IFT_CELLULAR)) - nocell = 1; - -#if IPSEC - if (ipsec_bypass == 0) { +#if NECP + if (inp != NULL && ( #if INET6 - if (isipv6) { - if (inp != NULL && ipsec6_in_reject_so(m, inp->inp_socket)) { - IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); - if (in_pcb_checkstate(inp, WNT_RELEASE, 0) == WNT_STOPUSING) - inp = NULL; // pretend we didn't find it - - IF_TCP_STATINC(ifp, badformatipsec); - - goto dropnosock; - } - } else -#endif /* INET6 */ - if (inp != NULL && ipsec4_in_reject_so(m, inp->inp_socket)) { - IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); - if (in_pcb_checkstate(inp, WNT_RELEASE, 0) == WNT_STOPUSING) - inp = NULL; // pretend we didn't find it - - IF_TCP_STATINC(ifp, badformatipsec); - - goto dropnosock; - } + isipv6 ? !necp_socket_is_allowed_to_send_recv_v6(inp, + th->th_dport, th->th_sport, &ip6->ip6_dst, + &ip6->ip6_src, ifp, NULL) : +#endif + !necp_socket_is_allowed_to_send_recv_v4(inp, th->th_dport, + th->th_sport, &ip->ip_dst, &ip->ip_src, + ifp, NULL))) { + if (in_pcb_checkstate(inp, WNT_RELEASE, 0) + == WNT_STOPUSING) { + inp = NULL; /* pretend we didn't find it */ + } + IF_TCP_STATINC(ifp, badformatipsec); + goto dropnosock; } -#endif /*IPSEC*/ +#endif /* NECP */ /* * If the state is CLOSED (i.e., TCB does not exist) then @@ -1816,15 +1969,19 @@ findpcb: struct inpcb *oinp = sotoinpcb(so); #endif /* INET6 */ struct ifnet *head_ifscope; - unsigned int head_nocell, head_recvanyif; + unsigned int head_nocell, head_recvanyif, + head_noexpensive, head_awdl_unrestricted; /* Get listener's bound-to-interface, if any */ head_ifscope = (inp->inp_flags & INP_BOUND_IF) ? inp->inp_boundifp : NULL; /* Get listener's no-cellular information, if any */ - head_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0; + head_nocell = INP_NO_CELLULAR(inp); /* Get listener's recv-any-interface, if any */ head_recvanyif = (inp->inp_flags & INP_RECV_ANYIF); + /* Get listener's no-expensive information, if any */ + head_noexpensive = INP_NO_EXPENSIVE(inp); + head_awdl_unrestricted = INP_AWDL_UNRESTRICTED(inp); /* * If the state is LISTEN then ignore segment if it contains an RST. @@ -1987,11 +2144,14 @@ findpcb: inp->inp_flags &= ~INP_BOUND_IF; } /* - * Inherit INP_NO_IFT_CELLULAR from listener. + * Inherit restrictions from listener. */ - if (head_nocell) { - inp->inp_flags |= INP_NO_IFT_CELLULAR; - } + if (head_nocell) + inp_set_nocellular(inp); + if (head_noexpensive) + inp_set_noexpensive(inp); + if (head_awdl_unrestricted) + inp_set_awdl_unrestricted(inp); /* * Inherit {IN,IN6}_RECV_ANYIF from listener. */ @@ -2069,7 +2229,7 @@ findpcb: struct tcpcb *, tp, int32_t, TCPS_LISTEN); tp->t_state = TCPS_LISTEN; tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT|TF_NODELAY); - tp->t_flagsext |= (tp0->t_flagsext & TF_RXTFINDROP); + tp->t_flagsext |= (tp0->t_flagsext & (TF_RXTFINDROP|TF_NOTIMEWAIT)); tp->t_keepinit = tp0->t_keepinit; tp->t_keepcnt = tp0->t_keepcnt; tp->t_keepintvl = tp0->t_keepintvl; @@ -2087,7 +2247,8 @@ findpcb: KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN | DBG_FUNC_END,0,0,0,0,0); } } - lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, + LCK_MTX_ASSERT_OWNED); if (tp->t_state == TCPS_ESTABLISHED && tlen > 0) { /* @@ -2148,11 +2309,12 @@ findpcb: /* * If we received an explicit notification of congestion in * ip tos ecn bits or by the CWR bit in TCP header flags, reset - * the ack-strteching state. + * the ack-strteching state. We need to handle ECN notification if + * an ECN setup SYN was sent even once. */ - if (tp->t_state == TCPS_ESTABLISHED && - (ip_ecn == IPTOS_ECN_CE || - (thflags & TH_CWR))) + if (tp->t_state == TCPS_ESTABLISHED + && (tp->ecn_flags & TE_SETUPSENT) + && (ip_ecn == IPTOS_ECN_CE || (thflags & TH_CWR))) tcp_reset_stretch_ack(tp); /* @@ -2330,27 +2492,16 @@ findpcb: SEQ_LEQ(th->th_ack, tp->snd_max) && tp->snd_cwnd >= tp->snd_ssthresh && (!IN_FASTRECOVERY(tp) && - ((!(SACK_ENABLED(tp)) && tp->t_dupacks < tp->t_rexmtthresh) || - (SACK_ENABLED(tp) && to.to_nsacks == 0 && - TAILQ_EMPTY(&tp->snd_holes))))) { + ((!(SACK_ENABLED(tp)) && + tp->t_dupacks < tp->t_rexmtthresh) || + (SACK_ENABLED(tp) && to.to_nsacks == 0 && + TAILQ_EMPTY(&tp->snd_holes))))) { /* * this is a pure ack for outstanding data. */ ++tcpstat.tcps_predack; - /* - * "bad retransmit" recovery - */ - if (tp->t_rxtshift > 0 && - tcp_detect_bad_rexmt(tp, &to)) { - ++tcpstat.tcps_sndrexmitbad; - tcp_bad_rexmt_restore_state(tp, th); - - DTRACE_TCP5(cc, void, NULL, - struct inpcb *, tp->t_inpcb, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_BAD_REXMT_RECOVERY); - } + tcp_bad_rexmt_check(tp, th, &to), /* Recalculate the RTT */ tcp_compute_rtt(tp, &to, th); @@ -2363,13 +2514,9 @@ findpcb: * avoidance phase. The calculations in this function * assume that snd_una is not updated yet. */ - if (CC_ALGO(tp)->inseq_ack_rcvd != NULL) - CC_ALGO(tp)->inseq_ack_rcvd(tp, th); - - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_INSEQ_ACK_RCVD); - + if (CC_ALGO(tp)->congestion_avd != NULL) + CC_ALGO(tp)->congestion_avd(tp, th); + tcp_ccdbg_trace(tp, th, TCP_CC_INSEQ_ACK_RCVD); sbdrop(&so->so_snd, acked); if (so->so_flags & SOF_ENABLE_MSGS) { VERIFY(acked <= so->so_msg_state->msg_serial_bytes); @@ -2404,10 +2551,14 @@ findpcb: * are ready to send, let tcp_output * decide between more output or persist. */ - if (tp->snd_una == tp->snd_max) + if (tp->snd_una == tp->snd_max) { tp->t_timer[TCPT_REXMT] = 0; - else if (tp->t_timer[TCPT_PERSIST] == 0) - tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); + tp->t_timer[TCPT_PTO] = 0; + } else if (tp->t_timer[TCPT_PERSIST] == 0) { + tp->t_timer[TCPT_REXMT] = + OFFSET_FROM_START(tp, + tp->t_rxtcur); + } if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 && tp->t_bwmeas != NULL) @@ -2474,12 +2625,14 @@ findpcb: tcpstat.tcps_rcvbyte += tlen; if (nstat_collect) { if (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_PKT) { - INP_ADD_STAT(inp, cell, wifi, rxpackets, - m->m_pkthdr.lro_npkts); + INP_ADD_STAT(inp, cell, wifi, wired, + rxpackets, m->m_pkthdr.lro_npkts); } else { - INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, + rxpackets, 1); } - INP_ADD_STAT(inp, cell, wifi, rxbytes, tlen); + INP_ADD_STAT(inp, cell, wifi, wired,rxbytes, + tlen); } /* @@ -2547,7 +2700,8 @@ findpcb: * Receive window is amount of space in rcv queue, * but not less than advertised window. */ - lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, + LCK_MTX_ASSERT_OWNED); win = tcp_sbspace(tp); if (win < 0) win = 0; @@ -2589,7 +2743,8 @@ findpcb: register struct sockaddr_in6 *sin6; #endif - lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, + LCK_MTX_ASSERT_OWNED); #if INET6 if (isipv6) { MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, @@ -2745,6 +2900,14 @@ findpcb: } if (thflags & TH_RST) { if ((thflags & TH_ACK) != 0) { +#if MPTCP + if ((so->so_flags & SOF_MPTCP_FASTJOIN) && + SEQ_GT(th->th_ack, tp->iss+1)) { + so->so_flags &= ~SOF_MPTCP_FASTJOIN; + /* ignore the RST and retransmit SYN */ + goto drop; + } +#endif /* MPTCP */ soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNRESET)); @@ -2765,6 +2928,7 @@ findpcb: if ((thflags & (TH_ECE | TH_CWR)) == (TH_ECE)) { /* ECN-setup SYN-ACK */ tp->ecn_flags |= TE_SETUPRECEIVED; + tcpstat.tcps_ecn_setup++; } else { /* non-ECN-setup SYN-ACK */ @@ -2829,6 +2993,11 @@ findpcb: if ((!(tp->t_mpflags & TMPF_MPTCP_TRUE)) && (tp->t_mpflags & TMPF_SENT_JOIN)) { isconnected = FALSE; + /* Start data xmit if fastjoin */ + if (mptcp_fastjoin && (so->so_flags & SOF_MPTCP_FASTJOIN)) { + soevent(so, (SO_FILT_HINT_LOCKED | + SO_FILT_HINT_MPFASTJ)); + } } else #endif /* MPTCP */ isconnected = TRUE; @@ -3041,8 +3210,10 @@ trimthenstep6: if (nstat_collect) { nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, tlen, NSTAT_RX_FLAG_DUPLICATE); - INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); - INP_ADD_STAT(inp, cell, wifi, rxbytes, tlen); + INP_ADD_STAT(inp, cell, wifi, wired, + rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, + rxbytes, tlen); tp->t_stat.rxduplicatebytes += tlen; } if (tlen) @@ -3107,8 +3278,8 @@ trimthenstep6: if (nstat_collect) { nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, todrop, NSTAT_RX_FLAG_DUPLICATE); - INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); - INP_ADD_STAT(inp, cell, wifi, rxbytes, todrop); + INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, todrop); tp->t_stat.rxduplicatebytes += todrop; } drop_hdrlen += todrop; /* drop from the top afterwards */ @@ -3321,17 +3492,10 @@ trimthenstep6: } if (SACK_ENABLED(tp) && (to.to_nsacks > 0 || !TAILQ_EMPTY(&tp->snd_holes))) - tcp_sack_doack(tp, &to, th->th_ack, &sack_bytes_acked); + tcp_sack_doack(tp, &to, th, &sack_bytes_acked); + #if MPTCP if ((tp->t_mpuna) && (SEQ_GEQ(th->th_ack, tp->t_mpuna))) { -#if 0 - if ((tp->t_mpflags & TMPF_MPTCP_TRUE) && - !(tp->t_mpflags & TMPF_MPTCP_READY)) { - printf("%s: fallback? %x %x \n", __func__, - th->th_ack, tp->t_mpuna); - tp->t_mpuna = 0; - } -#endif if (tp->t_mpflags & TMPF_PREESTABLISHED) { /* MP TCP establishment succeeded */ tp->t_mpuna = 0; @@ -3344,7 +3508,7 @@ trimthenstep6: so->so_flags |= SOF_MPTCP_TRUE; if (mptcp_dbg >= MP_ERR_DEBUG) printf("MPTCP SUCCESS" - "%s \n",__func__); + " %s \n",__func__); tp->t_timer[TCPT_JACK_RXMT] = 0; tp->t_mprxtshift = 0; isconnected = TRUE; @@ -3354,7 +3518,6 @@ trimthenstep6: } else { isconnected = TRUE; tp->t_mpflags &= ~TMPF_SENT_KEYS; - } } } @@ -3389,97 +3552,46 @@ process_dupack: if (to.to_flags & TOF_MPTCP) { goto drop; } + + if ((isconnected) && (tp->t_mpflags & TMPF_JOINED_FLOW)) { + if (mptcp_dbg >= MP_ERR_DEBUG) + printf("%s: bypass ack recovery\n",__func__); + break; + } #endif /* MPTCP */ + /* + * If a duplicate acknowledgement was seen + * after ECN, it indicates packet loss in + * addition to ECN. Reset INRECOVERY flag + * so that we can process partial acks + * correctly + */ + if (tp->ecn_flags & TE_INRECOVERY) + tp->ecn_flags &= ~TE_INRECOVERY; + tcpstat.tcps_rcvdupack++; ++tp->t_dupacks; - /* - * Check if we need to reset the limit on early - * retransmit + + /* + * Check if we need to reset the limit on + * early retransmit */ - if (TSTMP_GEQ(tcp_now, - (tp->t_early_rexmt_win + TCP_EARLY_REXMT_WIN))) + if (tp->t_early_rexmt_count > 0 && + TSTMP_GEQ(tcp_now, + (tp->t_early_rexmt_win + + TCP_EARLY_REXMT_WIN))) tp->t_early_rexmt_count = 0; /* * Is early retransmit needed? We check for * this when the connection is waiting for - * more duplicate acks to enter fast recovery. + * duplicate acks to enter fast recovery. */ - if (early_rexmt && - tp->t_early_rexmt_count < TCP_EARLY_REXMT_LIMIT && - !IN_FASTRECOVERY(tp) && - SEQ_GT(tp->snd_max, tp->snd_una) && - (tp->t_dupacks == 1 || - (SACK_ENABLED(tp) && - !TAILQ_EMPTY(&tp->snd_holes)))) { - /* - * If there are only a few outstanding - * segments on the connection, we might need - * to lower the retransmit threshold. This - * will allow us to do Early Retransmit as - * described in RFC 5827. - */ - u_int32_t obytes, snd_off; - int32_t snd_len; - if (SACK_ENABLED(tp) && - !TAILQ_EMPTY(&tp->snd_holes)) { - obytes = (tp->snd_max - tp->snd_fack) + - tp->sackhint.sack_bytes_rexmit; - } else { - obytes = (tp->snd_max - tp->snd_una); - } + if (!IN_FASTRECOVERY(tp)) + tcp_early_rexmt_check(tp, th); - /* In order to lower retransmit threshold the - * following two conditions must be met. - * 1. the amount of outstanding data is less - * than 4*SMSS bytes - * 2. there is no unsent data ready for - * transmission or the advertised window - * will limit sending new segments. - */ - snd_off = tp->snd_max - tp->snd_una; - snd_len = min(so->so_snd.sb_cc, tp->snd_wnd) - snd_off; - if (obytes < (tp->t_maxseg << 2) && - snd_len <= 0) { - u_int32_t osegs; - - - osegs = obytes / tp->t_maxseg; - if ((osegs * tp->t_maxseg) < obytes) - osegs++; - - /* - * Since the connection might have already - * received some dupacks, we add them to - * to the outstanding segments count to get - * the correct retransmit threshold. - * - * By checking for early retransmit after - * receiving some duplicate acks when SACK - * is supported, the connection will be able - * to enter fast recovery even if multiple - * segments are lost in the same window. - */ - osegs += tp->t_dupacks; - if (osegs < 4) { - tcpstat.tcps_early_rexmt++; - tp->t_rexmtthresh = ((osegs - 1) > 1) ? - (osegs - 1) : 1; - tp->t_rexmtthresh = min(tp->t_rexmtthresh, - tcprexmtthresh); - tp->t_rexmtthresh = max(tp->t_rexmtthresh, - tp->t_dupacks); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_EARLY_RETRANSMIT); - if (tp->t_early_rexmt_count == 0) - tp->t_early_rexmt_win = tcp_now; - tp->t_early_rexmt_count++; - } - } - } /* - * If we've seen exactly our rexmt threshold + * If we've seen exactly rexmt threshold * of duplicate acks, assume a packet * has been dropped and retransmit it. * Kludge snd_nxt & the congestion @@ -3499,12 +3611,32 @@ process_dupack: * network. */ if (tp->t_timer[TCPT_REXMT] == 0 || - (th->th_ack != tp->snd_una && sack_bytes_acked == 0)) { + (th->th_ack != tp->snd_una + && sack_bytes_acked == 0)) { tp->t_dupacks = 0; tp->t_rexmtthresh = tcprexmtthresh; } else if (tp->t_dupacks > tp->t_rexmtthresh || - IN_FASTRECOVERY(tp)) { - if (SACK_ENABLED(tp) && IN_FASTRECOVERY(tp)) { + IN_FASTRECOVERY(tp)) { + + /* + * If this connection was seeing packet + * reordering, then recovery might be + * delayed to disambiguate between + * reordering and loss + */ + if (SACK_ENABLED(tp) && !IN_FASTRECOVERY(tp) && + (tp->t_flagsext & + (TF_PKTS_REORDERED|TF_DELAY_RECOVERY)) == + (TF_PKTS_REORDERED|TF_DELAY_RECOVERY)) { + /* + * Since the SACK information is already + * updated, this ACK will be dropped + */ + break; + } + + if (SACK_ENABLED(tp) + && IN_FASTRECOVERY(tp)) { int awnd; /* @@ -3523,9 +3655,7 @@ process_dupack: } else tp->snd_cwnd += tp->t_maxseg; - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_IN_FASTRECOVERY); + tcp_ccdbg_trace(tp, th, TCP_CC_IN_FASTRECOVERY); (void) tcp_output(tp); goto drop; @@ -3543,6 +3673,8 @@ process_dupack: if (IN_FASTRECOVERY(tp)) { tp->t_dupacks = 0; break; + } else if (tp->t_flagsext & TF_DELAY_RECOVERY) { + break; } } else { if (SEQ_LEQ(th->th_ack, @@ -3552,6 +3684,33 @@ process_dupack: } } + tp->snd_recover = tp->snd_max; + tp->t_timer[TCPT_PTO] = 0; + tp->t_rtttime = 0; + + /* + * If the connection has seen pkt + * reordering, delay recovery until + * it is clear that the packet + * was lost. + */ + if (SACK_ENABLED(tp) && + (tp->t_flagsext & + (TF_PKTS_REORDERED|TF_DELAY_RECOVERY)) + == TF_PKTS_REORDERED && + !IN_FASTRECOVERY(tp) && + tp->t_reorderwin > 0 && + tp->t_state == TCPS_ESTABLISHED) { + tp->t_timer[TCPT_DELAYFR] = + OFFSET_FROM_START(tp, + tp->t_reorderwin); + tp->t_flagsext |= TF_DELAY_RECOVERY; + tcpstat.tcps_delay_recovery++; + tcp_ccdbg_trace(tp, th, + TCP_CC_DELAY_FASTRECOVERY); + break; + } + /* * If the current tcp cc module has * defined a hook for tasks to run @@ -3560,20 +3719,27 @@ process_dupack: if (CC_ALGO(tp)->pre_fr != NULL) CC_ALGO(tp)->pre_fr(tp); ENTER_FASTRECOVERY(tp); - tp->snd_recover = tp->snd_max; tp->t_timer[TCPT_REXMT] = 0; - tp->t_rtttime = 0; - if ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON) { + if ((tp->ecn_flags & TE_ECN_ON) + == TE_ECN_ON) tp->ecn_flags |= TE_SENDCWR; - } + if (SACK_ENABLED(tp)) { tcpstat.tcps_sack_recovery_episode++; tp->sack_newdata = tp->snd_nxt; tp->snd_cwnd = tp->t_maxseg; - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_ENTER_FASTRECOVERY); + /* + * Enable probe timeout to detect + * a tail loss in the recovery + * window. + */ + tp->t_timer[TCPT_PTO] = + OFFSET_FROM_START(tp, + max(10, (tp->t_srtt >> TCP_RTT_SHIFT))); + + tcp_ccdbg_trace(tp, th, + TCP_CC_ENTER_FASTRECOVERY); (void) tcp_output(tp); goto drop; @@ -3585,9 +3751,8 @@ process_dupack: tp->t_maxseg * tp->t_dupacks; if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_ENTER_FASTRECOVERY); + tcp_ccdbg_trace(tp, th, + TCP_CC_ENTER_FASTRECOVERY); goto drop; } else if (limited_txmt && ALLOW_LIMITED_TRANSMIT(tp) && @@ -3602,9 +3767,7 @@ process_dupack: tcpstat.tcps_limited_txt++; (void) tcp_output(tp); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_LIMITED_TRANSMIT); + tcp_ccdbg_trace(tp, th, TCP_CC_LIMITED_TRANSMIT); /* Reset snd_cwnd back to normal */ tp->snd_cwnd -= incr; @@ -3621,26 +3784,50 @@ process_dupack: */ if (IN_FASTRECOVERY(tp)) { if (SEQ_LT(th->th_ack, tp->snd_recover)) { + /* + * If we received an ECE and entered + * recovery, the subsequent ACKs should + * not be treated as partial acks. + */ + if (tp->ecn_flags & TE_INRECOVERY) + goto process_ACK; + if (SACK_ENABLED(tp)) tcp_sack_partialack(tp, th); else tcp_newreno_partial_ack(tp, th); - - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_PARTIAL_ACK); + tcp_ccdbg_trace(tp, th, TCP_CC_PARTIAL_ACK); } else { EXIT_FASTRECOVERY(tp); if (CC_ALGO(tp)->post_fr != NULL) CC_ALGO(tp)->post_fr(tp, th); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_EXIT_FASTRECOVERY); + + tcp_ccdbg_trace(tp, th, + TCP_CC_EXIT_FASTRECOVERY); + } + } else if ((tp->t_flagsext & + (TF_PKTS_REORDERED|TF_DELAY_RECOVERY)) + == (TF_PKTS_REORDERED|TF_DELAY_RECOVERY)) { + /* + * If the ack acknowledges upto snd_recover or if + * it acknowledges all the snd holes, exit + * recovery and cancel the timer. Otherwise, + * this is a partial ack. Wait for recovery timer + * to enter recovery. The snd_holes have already + * been updated. + */ + if (SEQ_GEQ(th->th_ack, tp->snd_recover) || + TAILQ_EMPTY(&tp->snd_holes)) { + tp->t_timer[TCPT_DELAYFR] = 0; + tp->t_flagsext &= ~TF_DELAY_RECOVERY; + EXIT_FASTRECOVERY(tp); + tcp_ccdbg_trace(tp, th, + TCP_CC_EXIT_FASTRECOVERY); } } else { /* - * We were not in fast recovery. Reset the duplicate ack - * counter. + * We were not in fast recovery. Reset the + * duplicate ack counter. */ tp->t_dupacks = 0; tp->t_rexmtthresh = tcprexmtthresh; @@ -3678,19 +3865,11 @@ process_ACK: * If the last packet was a retransmit, make sure * it was not spurious. * - * If the ack has ECE bit set, skip bad - * retransmit recovery. + * This will also take care of congestion window + * adjustment if a last packet was recovered due to a + * tail loss probe. */ - if (tp->t_rxtshift > 0 && - (thflags & TH_ECE) == 0 && - tcp_detect_bad_rexmt(tp, &to)) { - ++tcpstat.tcps_sndrexmitbad; - tcp_bad_rexmt_restore_state(tp, th); - - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_BAD_REXMT_RECOVERY); - } + tcp_bad_rexmt_check(tp, th, &to); /* Recalculate the RTT */ tcp_compute_rtt(tp, &to, th); @@ -3703,28 +3882,30 @@ process_ACK: */ if (th->th_ack == tp->snd_max) { tp->t_timer[TCPT_REXMT] = 0; + tp->t_timer[TCPT_PTO] = 0; needoutput = 1; } else if (tp->t_timer[TCPT_PERSIST] == 0) - tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); + tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, + tp->t_rxtcur); /* - * If no data (only SYN) was ACK'd, - * skip rest of ACK processing. + * If no data (only SYN) was ACK'd, skip rest of ACK + * processing. */ if (acked == 0) goto step6; + if ((thflags & TH_ECE) != 0 && ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON)) { /* - * Reduce the congestion window if we haven't done so. + * Reduce the congestion window if we haven't + * done so. */ - if (!SACK_ENABLED(tp) && !IN_FASTRECOVERY(tp) && - SEQ_GEQ(th->th_ack, tp->snd_recover)) { + if (!IN_FASTRECOVERY(tp)) { tcp_reduce_congestion_window(tp); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_ECN_RCVD); + tp->ecn_flags |= (TE_INRECOVERY|TE_SENDCWR); + tcp_ccdbg_trace(tp, th, TCP_CC_ECN_RCVD); } } @@ -3739,10 +3920,7 @@ process_ACK: if (!IN_FASTRECOVERY(tp)) { if (CC_ALGO(tp)->ack_rcvd != NULL) CC_ALGO(tp)->ack_rcvd(tp, th); - - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, th, - int32_t, TCP_CC_ACK_RCVD); + tcp_ccdbg_trace(tp, th, TCP_CC_ACK_RCVD); } if (acked > so->so_snd.sb_cc) { tp->snd_wnd -= so->so_snd.sb_cc; @@ -3837,7 +4015,11 @@ process_ACK: int32_t, TCPS_TIME_WAIT); tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - add_to_time_wait(tp, 2 * tcp_msl); + if (tp->t_flagsext & TF_NOTIMEWAIT) { + tp->t_flags |= TF_CLOSING; + } else { + add_to_time_wait(tp, 2 * tcp_msl); + } isconnected = FALSE; isdisconnected = TRUE; } @@ -3878,11 +4060,11 @@ process_ACK: * ack is ok. */ if (sack_ackadv == 1 && - tp->t_state == TCPS_ESTABLISHED && - SACK_ENABLED(tp) && - sack_bytes_acked > 0 && - tp->t_dupacks == 0 && - SEQ_LEQ(th->th_ack, tp->snd_una) && tlen == 0) { + tp->t_state == TCPS_ESTABLISHED && + SACK_ENABLED(tp) && sack_bytes_acked > 0 && + to.to_nsacks > 0 && tp->t_dupacks == 0 && + SEQ_LEQ(th->th_ack, tp->snd_una) && tlen == 0 && + !(tp->t_flagsext & TF_PKTS_REORDERED)) { tcpstat.tcps_sack_ackadv++; goto process_dupack; } @@ -4047,12 +4229,14 @@ dodata: tcpstat.tcps_rcvbyte += tlen; if (nstat_collect) { if (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_PKT) { - INP_ADD_STAT(inp, cell, wifi, rxpackets, - m->m_pkthdr.lro_npkts); + INP_ADD_STAT(inp, cell, wifi, wired, + rxpackets, m->m_pkthdr.lro_npkts); } else { - INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, + rxpackets, 1); } - INP_ADD_STAT(inp, cell, wifi, rxbytes, tlen); + INP_ADD_STAT(inp, cell, wifi, wired, + rxbytes, tlen); } tcp_sbrcv_grow(tp, &so->so_rcv, &to, tlen); so_recv_data_stat(so, m, drop_hdrlen); @@ -4156,13 +4340,12 @@ dodata: int32_t, TCPS_TIME_WAIT); tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - if (tp->cc_recv != 0 && - ((int)(tcp_now - tp->t_starttime)) < tcp_msl) { - /* For transaction client, force ACK now. */ - tp->t_flags |= TF_ACKNOW; - tp->t_unacksegs = 0; + tp->t_flags |= TF_ACKNOW; + if (tp->t_flagsext & TF_NOTIMEWAIT) { + tp->t_flags |= TF_CLOSING; + } else { + add_to_time_wait(tp, 2 * tcp_msl); } - add_to_time_wait(tp, 2 * tcp_msl); soisdisconnected(so); break; @@ -4267,16 +4450,19 @@ dropwithreset: tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif + bzero(&tra, sizeof(tra)); + tra.ifscope = ifscope; + tra.awdl_unrestricted = 1; if (thflags & TH_ACK) /* mtod() below is safe as long as hdr dropping is delayed */ tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack, - TH_RST, ifscope, nocell); + TH_RST, &tra); else { if (thflags & TH_SYN) tlen++; /* mtod() below is safe as long as hdr dropping is delayed */ tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen, - (tcp_seq)0, TH_RST|TH_ACK, ifscope, nocell); + (tcp_seq)0, TH_RST|TH_ACK, &tra); } /* destroy temporarily created socket */ if (dropsocket) { @@ -4513,7 +4699,7 @@ tcp_compute_rtt(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th) TSTMP_GEQ(tcp_now, to->to_tsecr)) { tcp_xmit_timer(tp, tcp_now - to->to_tsecr, to->to_tsecr, th->th_ack); - } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { + } else if (tp->t_rtttime != 0 && SEQ_GT(th->th_ack, tp->t_rtseq)) { tcp_xmit_timer(tp, tcp_now - tp->t_rtttime, 0, th->th_ack); } @@ -4693,9 +4879,9 @@ tcp_maxmtu6(struct rtentry *rt) * size (usually 512 or the default IP max size, but no more than the mtu * of the interface), as we can't discover anything about intervening * gateways or networks. We also initialize the congestion/slow start - * window to be a single segment if the destination isn't local. - * While looking at the routing entry, we also initialize other path-dependent - * parameters from pre-set or cached values in the routing entry. + * window. While looking at the routing entry, we also initialize + * other path-dependent parameters from pre-set or cached values + * in the routing entry. * * Also take into account the space needed for options that we * send regularly. Make maxseg shorter by that amount to assure @@ -4759,9 +4945,10 @@ tcp_mss(tp, offer, input_ifscope) ifp = rt->rt_ifp; /* * Slower link window correction: - * If a value is specificied for slowlink_wsize use it for PPP links - * believed to be on a serial modem (speed <128Kbps). Excludes 9600bps as - * it is the default value adversized by pseudo-devices over ppp. + * If a value is specificied for slowlink_wsize use it for + * PPP links believed to be on a serial modem (speed <128Kbps). + * Excludes 9600bps as it is the default value adversized + * by pseudo-devices over ppp. */ if (ifp->if_type == IFT_PPP && slowlink_wsize > 0 && ifp->if_baudrate > 9600 && ifp->if_baudrate <= 128000) { @@ -4901,8 +5088,8 @@ tcp_mss(tp, offer, input_ifscope) /* * There's some sort of gateway or interface * buffer limit on the path. Use this to set - * the slow start threshhold, but set the - * threshold to no less than 2*mss. + * slow-start threshold, but set the threshold to + * no less than 2*mss. */ tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); tcpstat.tcps_usedssthresh++; @@ -4910,7 +5097,6 @@ tcp_mss(tp, offer, input_ifscope) tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; } - /* * Set the slow-start flight size depending on whether this * is a local network or not. @@ -4918,8 +5104,7 @@ tcp_mss(tp, offer, input_ifscope) if (CC_ALGO(tp)->cwnd_init != NULL) CC_ALGO(tp)->cwnd_init(tp); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb, struct tcpcb *, tp, - struct tcphdr *, NULL, int32_t, TCP_CC_CWND_INIT); + tcp_ccdbg_trace(tp, NULL, TCP_CC_CWND_INIT); /* Route locked during lookup above */ RT_UNLOCK(rt); @@ -4995,6 +5180,7 @@ tcp_newreno_partial_ack(tp, th) tcp_seq onxt = tp->snd_nxt; u_int32_t ocwnd = tp->snd_cwnd; tp->t_timer[TCPT_REXMT] = 0; + tp->t_timer[TCPT_PTO] = 0; tp->t_rtttime = 0; tp->snd_nxt = th->th_ack; /* @@ -5227,7 +5413,10 @@ tcp_set_background_cc(struct socket *so) void tcp_set_foreground_cc(struct socket *so) { - tcp_set_new_cc(so, TCP_CC_ALGO_NEWRENO_INDEX); + if (tcp_use_newreno) + tcp_set_new_cc(so, TCP_CC_ALGO_NEWRENO_INDEX); + else + tcp_set_new_cc(so, TCP_CC_ALGO_CUBIC_INDEX); } static void @@ -5244,20 +5433,12 @@ tcp_set_new_cc(struct socket *so, uint16_t cc_index) CC_ALGO(tp)->cleanup(tp); tp->tcp_cc_index = cc_index; - /* Decide if the connection is just starting or if - * we have sent some packets on it. - */ - if (tp->snd_nxt > tp->iss) { - /* Already sent some packets */ - if (CC_ALGO(tp)->switch_to != NULL) - CC_ALGO(tp)->switch_to(tp, old_cc_index); - } else { - if (CC_ALGO(tp)->init != NULL) - CC_ALGO(tp)->init(tp); - } - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, NULL, - int32_t, TCP_CC_CHANGE_ALGO); + tcp_cc_allocate_state(tp); + + if (CC_ALGO(tp)->switch_to != NULL) + CC_ALGO(tp)->switch_to(tp, old_cc_index); + + tcp_ccdbg_trace(tp, NULL, TCP_CC_CHANGE_ALGO); } } @@ -5517,7 +5698,8 @@ tcp_input_checksum(int af, struct mbuf *m, struct tcphdr *th, int off, int tlen) return (0); } -SYSCTL_PROC(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, +SYSCTL_PROC(_net_inet_tcp, TCPCTL_STATS, stats, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, tcp_getstat, "S,tcpstat", "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); static int diff --git a/bsd/netinet/tcp_ledbat.c b/bsd/netinet/tcp_ledbat.c index 145188839..09f594baf 100644 --- a/bsd/netinet/tcp_ledbat.c +++ b/bsd/netinet/tcp_ledbat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 Apple Inc. All rights reserved. + * Copyright (c) 2010-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -58,7 +58,7 @@ int tcp_ledbat_init(struct tcpcb *tp); int tcp_ledbat_cleanup(struct tcpcb *tp); void tcp_ledbat_cwnd_init(struct tcpcb *tp); -void tcp_ledbat_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); +void tcp_ledbat_congestion_avd(struct tcpcb *tp, struct tcphdr *th); void tcp_ledbat_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); void tcp_ledbat_pre_fr(struct tcpcb *tp); void tcp_ledbat_post_fr(struct tcpcb *tp, struct tcphdr *th); @@ -72,7 +72,7 @@ struct tcp_cc_algo tcp_cc_ledbat = { .init = tcp_ledbat_init, .cleanup = tcp_ledbat_cleanup, .cwnd_init = tcp_ledbat_cwnd_init, - .inseq_ack_rcvd = tcp_ledbat_inseq_ack_rcvd, + .congestion_avd = tcp_ledbat_congestion_avd, .ack_rcvd = tcp_ledbat_ack_rcvd, .pre_fr = tcp_ledbat_pre_fr, .post_fr = tcp_ledbat_post_fr, @@ -82,10 +82,6 @@ struct tcp_cc_algo tcp_cc_ledbat = { .switch_to = tcp_ledbat_switch_cc }; -extern int tcp_do_rfc3465; -extern int tcp_do_rfc3465_lim2; -extern uint32_t get_base_rtt(struct tcpcb *tp); - /* Target queuing delay in milliseconds. This includes the processing * and scheduling delay on both of the end-hosts. A LEDBAT sender tries * to keep queuing delay below this limit. When the queuing delay @@ -224,7 +220,7 @@ tcp_ledbat_cwnd_init(struct tcpcb *tp) { * This gets called only during congestion avoidance phase. */ void -tcp_ledbat_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) { +tcp_ledbat_congestion_avd(struct tcpcb *tp, struct tcphdr *th) { int acked = 0; u_int32_t incr = 0; @@ -362,6 +358,9 @@ tcp_ledbat_after_idle(struct tcpcb *tp) { /* Reset the congestion window */ tp->snd_cwnd = tp->t_maxseg * bg_ss_fltsz; + + /* If stretch ack was auto disabled, re-evaluate the situation */ + tcp_cc_after_idle_stretchack(tp); } /* Function to change the congestion window when the retransmit diff --git a/bsd/netinet/tcp_newreno.c b/bsd/netinet/tcp_newreno.c index 158311244..a1e590a0a 100644 --- a/bsd/netinet/tcp_newreno.c +++ b/bsd/netinet/tcp_newreno.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 Apple Inc. All rights reserved. + * Copyright (c) 2010-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,7 +86,7 @@ int tcp_newreno_init(struct tcpcb *tp); int tcp_newreno_cleanup(struct tcpcb *tp); void tcp_newreno_cwnd_init_or_reset(struct tcpcb *tp); -void tcp_newreno_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); +void tcp_newreno_congestion_avd(struct tcpcb *tp, struct tcphdr *th); void tcp_newreno_ack_rcvd(struct tcpcb *tp, struct tcphdr *th); void tcp_newreno_pre_fr(struct tcpcb *tp); void tcp_newreno_post_fr(struct tcpcb *tp, struct tcphdr *th); @@ -100,7 +100,7 @@ struct tcp_cc_algo tcp_cc_newreno = { .init = tcp_newreno_init, .cleanup = tcp_newreno_cleanup, .cwnd_init = tcp_newreno_cwnd_init_or_reset, - .inseq_ack_rcvd = tcp_newreno_inseq_ack_rcvd, + .congestion_avd = tcp_newreno_congestion_avd, .ack_rcvd = tcp_newreno_ack_rcvd, .pre_fr = tcp_newreno_pre_fr, .post_fr = tcp_newreno_post_fr, @@ -110,47 +110,6 @@ struct tcp_cc_algo tcp_cc_newreno = { .switch_to = tcp_newreno_switch_cc }; -extern int tcp_do_rfc3465; -extern int tcp_do_rfc3465_lim2; -extern int maxseg_unacked; -extern u_int32_t tcp_autosndbuf_max; - -#define SET_SNDSB_IDEAL_SIZE(sndsb, size) \ - sndsb->sb_idealsize = min(max(tcp_sendspace, tp->snd_ssthresh), \ - tcp_autosndbuf_max); - -void tcp_cc_resize_sndbuf(struct tcpcb *tp) { - struct sockbuf *sb; - /* If the send socket buffer size is bigger than ssthresh, - * it is time to trim it because we do not want to hold - * too many mbufs in the socket buffer - */ - sb = &(tp->t_inpcb->inp_socket->so_snd); - if (sb->sb_hiwat > tp->snd_ssthresh && - (sb->sb_flags & SB_AUTOSIZE) != 0) { - if (sb->sb_idealsize > tp->snd_ssthresh) { - SET_SNDSB_IDEAL_SIZE(sb, tp->snd_ssthresh); - } - sb->sb_flags |= SB_TRIM; - } -} - -void tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp) { - struct sockbuf *sb; - sb = &(tp->t_inpcb->inp_socket->so_snd); - if ((sb->sb_flags & (SB_TRIM|SB_AUTOSIZE)) == (SB_TRIM|SB_AUTOSIZE)) { - /* If there was a retransmission that was not necessary - * then the size of socket buffer can be restored to - * what it was before - */ - SET_SNDSB_IDEAL_SIZE(sb, tp->snd_ssthresh); - if (sb->sb_hiwat <= sb->sb_idealsize) { - sbreserve(sb, sb->sb_idealsize); - sb->sb_flags &= ~SB_TRIM; - } - } -} - int tcp_newreno_init(struct tcpcb *tp) { #pragma unused(tp) OSIncrementAtomic((volatile SInt32 *)&tcp_cc_newreno.num_sockets); @@ -174,22 +133,10 @@ int tcp_newreno_cleanup(struct tcpcb *tp) { */ void tcp_newreno_cwnd_init_or_reset(struct tcpcb *tp) { - if ( tp->t_flags & TF_LOCAL ) - tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local; - else { - /* Calculate initial cwnd according to RFC3390, - * - On a standard link, this will result in a higher cwnd - * and improve initial transfer rate. - * - Keep the old ss_fltsz sysctl for ABI compabitility issues. - * but it will be overriden if tcp_do_rfc3390 sysctl is set. - */ + tcp_cc_cwnd_init_or_reset(tp); - if (tcp_do_rfc3390) - tp->snd_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, 4380)); - - else - tp->snd_cwnd = tp->t_maxseg * ss_fltsz; - } + /* If stretch ack was auto disabled, re-evaluate the situation */ + tcp_cc_after_idle_stretchack(tp); } @@ -197,8 +144,8 @@ tcp_newreno_cwnd_init_or_reset(struct tcpcb *tp) { * This will get called from header prediction code. */ void -tcp_newreno_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) { - int acked = 0; +tcp_newreno_congestion_avd(struct tcpcb *tp, struct tcphdr *th) { + uint32_t acked = 0; acked = BYTES_ACKED(th, tp); /* * Grow the congestion window, if the @@ -253,10 +200,10 @@ tcp_newreno_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) { * * (See RFC 3465 2.3 Choosing the Limit) */ - u_int abc_lim; - + uint32_t abc_lim; abc_lim = (tcp_do_rfc3465_lim2 && - tp->snd_nxt == tp->snd_max) ? incr * 2 : incr; + tp->snd_nxt == tp->snd_max) ? incr * 2 + : incr; incr = lmin(acked, abc_lim); } @@ -375,30 +322,7 @@ tcp_newreno_after_timeout(struct tcpcb *tp) { int tcp_newreno_delay_ack(struct tcpcb *tp, struct tcphdr *th) { - /* If any flags other than TH_ACK is set, set "end-of-write" bit */ - if ((th->th_flags & ~TH_ACK)) - tp->t_flagsext |= TF_STREAMEOW; - else - tp->t_flagsext &= ~(TF_STREAMEOW); - - switch (tcp_delack_enabled) { - case 1: - case 2: - if ((tp->t_flags & TF_RXWIN0SENT) == 0 && - (th->th_flags & TH_PUSH) == 0 && - (tp->t_unacksegs == 1)) - return(1); - break; - case 3: - if ((tp->t_flags & TF_RXWIN0SENT) == 0 && - (th->th_flags & TH_PUSH) == 0 && - ((tp->t_unacksegs == 1) || - ((tp->t_flags & TF_STRETCHACK) != 0 && - tp->t_unacksegs < (maxseg_unacked)))) - return(1); - break; - } - return(0); + return (tcp_cc_delay_ack(tp, th)); } /* Switch to newreno from a different CC. If the connection is in @@ -417,9 +341,7 @@ tcp_newreno_switch_cc(struct tcpcb *tp, uint16_t old_index) { } else { cwnd = cwnd / 2 / tp->t_maxseg; } - if (cwnd < 1) - cwnd = 1; - tp->snd_cwnd = cwnd * tp->t_maxseg; + tp->snd_cwnd = max(TCP_CC_CWND_INIT_BYTES, cwnd * tp->t_maxseg); /* Start counting bytes for RFC 3465 again */ tp->t_bytes_acked = 0; diff --git a/bsd/netinet/tcp_output.c b/bsd/netinet/tcp_output.c index fa6e5348c..b693e0512 100644 --- a/bsd/netinet/tcp_output.c +++ b/bsd/netinet/tcp_output.c @@ -133,76 +133,93 @@ #define DBG_FNC_TCP_OUTPUT NETDBG_CODE(DBG_NETTCP, (4 << 8) | 1) int path_mtu_discovery = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW | CTLFLAG_LOCKED, - &path_mtu_discovery, 1, "Enable Path MTU Discovery"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, + CTLFLAG_RW | CTLFLAG_LOCKED, &path_mtu_discovery, 1, + "Enable Path MTU Discovery"); int ss_fltsz = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW | CTLFLAG_LOCKED, - &ss_fltsz, 1, "Slow start flight size"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, + CTLFLAG_RW | CTLFLAG_LOCKED,&ss_fltsz, 1, + "Slow start flight size"); int ss_fltsz_local = 8; /* starts with eight segments max */ -SYSCTL_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, CTLFLAG_RW | CTLFLAG_LOCKED, - &ss_fltsz_local, 1, "Slow start flight size for local networks"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, + CTLFLAG_RW | CTLFLAG_LOCKED, &ss_fltsz_local, 1, + "Slow start flight size for local networks"); int tcp_do_tso = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_tso, 0, "Enable TCP Segmentation Offload"); - int tcp_ecn_outbound = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_initiate_out, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_outbound, - 0, "Initiate ECN for outbound connections"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_initiate_out, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_outbound, 0, + "Initiate ECN for outbound connections"); int tcp_ecn_inbound = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_negotiate_in, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_inbound, - 0, "Allow ECN negotiation for inbound connections"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_negotiate_in, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_inbound, 0, + "Allow ECN negotiation for inbound connections"); int tcp_packet_chaining = 50; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, packetchain, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_packet_chaining, - 0, "Enable TCP output packet chaining"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, packetchain, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_packet_chaining, 0, + "Enable TCP output packet chaining"); int tcp_output_unlocked = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, socket_unlocked_on_output, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_output_unlocked, - 0, "Unlock TCP when sending packets down to IP"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, socket_unlocked_on_output, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_output_unlocked, 0, + "Unlock TCP when sending packets down to IP"); int tcp_do_rfc3390 = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_do_rfc3390, 1, "Calculate intial slowstart cwnd depending on MSS"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_rfc3390, 1, + "Calculate intial slowstart cwnd depending on MSS"); int tcp_min_iaj_win = MIN_IAJ_WIN; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, min_iaj_win, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_min_iaj_win, 1, "Minimum recv win based on inter-packet arrival jitter"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, min_iaj_win, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_min_iaj_win, 1, + "Minimum recv win based on inter-packet arrival jitter"); int tcp_acc_iaj_react_limit = ACC_IAJ_REACT_LIMIT; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_react_limit, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_acc_iaj_react_limit, 1, "Accumulated IAJ when receiver starts to react"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_react_limit, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_acc_iaj_react_limit, 1, + "Accumulated IAJ when receiver starts to react"); uint32_t tcp_do_autosendbuf = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, doautosndbuf, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_do_autosendbuf, 1, "Enable send socket buffer auto-tuning"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, doautosndbuf, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_autosendbuf, 1, + "Enable send socket buffer auto-tuning"); uint32_t tcp_autosndbuf_inc = 8 * 1024; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, autosndbufinc, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_autosndbuf_inc, 1, "Increment in send socket bufffer size"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, autosndbufinc, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_autosndbuf_inc, 1, + "Increment in send socket bufffer size"); uint32_t tcp_autosndbuf_max = 512 * 1024; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, autosndbufmax, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_autosndbuf_max, 1, "Maximum send socket buffer size"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, autosndbufmax, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_autosndbuf_max, 1, + "Maximum send socket buffer size"); uint32_t tcp_prioritize_acks = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, ack_prioritize, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_prioritize_acks, 1, "Prioritize pure acks"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, ack_prioritize, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_prioritize_acks, 1, + "Prioritize pure acks"); uint32_t tcp_use_rtt_recvbg = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_recvbg, - CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_use_rtt_recvbg, 1, "Use RTT for bg recv algorithm"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_recvbg, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_use_rtt_recvbg, 1, + "Use RTT for bg recv algorithm"); uint32_t tcp_recv_throttle_minwin = 16 * 1024; SYSCTL_INT(_net_inet_tcp, OID_AUTO, recv_throttle_minwin, - CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_recv_throttle_minwin, 1, "Minimum recv win for throttling"); + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_recv_throttle_minwin, 1, + "Minimum recv win for throttling"); +int32_t tcp_enable_tlp = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, enable_tlp, + CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_enable_tlp, 1, "Enable Tail loss probe"); static int32_t packchain_newlist = 0; static int32_t packchain_looped = 0; @@ -222,12 +239,9 @@ extern int fw_bypass; /* firewall check: disable packet chaining if there is r extern u_int32_t dlil_filter_disable_tso_count; extern u_int32_t kipf_count; extern int tcp_recv_bg; -extern int maxseg_unacked; static int tcp_ip_output(struct socket *, struct tcpcb *, struct mbuf *, int, struct mbuf *, int, int, int32_t, boolean_t); - -extern uint32_t get_base_rtt(struct tcpcb *tp); static struct mbuf* tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th); static int tcp_recv_throttle(struct tcpcb *tp); @@ -264,13 +278,13 @@ tcp_output(struct tcpcb *tp) struct socket *so = inp->inp_socket; int32_t len, recwin, sendwin, off; int flags, error; - register struct mbuf *m; + struct mbuf *m; struct ip *ip = NULL; - register struct ipovly *ipov = NULL; + struct ipovly *ipov = NULL; #if INET6 struct ip6_hdr *ip6 = NULL; #endif /* INET6 */ - register struct tcphdr *th; + struct tcphdr *th; u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen; int idle, sendalot, lost = 0; @@ -281,11 +295,7 @@ tcp_output(struct tcpcb *tp) #if IPSEC unsigned ipsec_optlen = 0; #endif /* IPSEC */ - int last_off = 0; - int m_off = 0; int idle_time = 0; - struct mbuf *m_lastm = NULL; - struct mbuf *m_head = NULL; struct mbuf *packetlist = NULL; struct mbuf *tp_inp_options = inp->inp_depend4.inp4_options; #if INET6 @@ -303,10 +313,12 @@ tcp_output(struct tcpcb *tp) u_int8_t *finp = NULL; u_int32_t *sseqp = NULL; u_int64_t dss_val = 0; - int mptcp_acknow = 0; + boolean_t mptcp_acknow = FALSE; + boolean_t early_data_sent = FALSE; #endif /* MPTCP */ boolean_t cell = FALSE; boolean_t wifi = FALSE; + boolean_t wired = FALSE; /* * Determine length of data that should be transmitted, @@ -323,9 +335,7 @@ tcp_output(struct tcpcb *tp) if (idle && idle_time >= TCP_IDLETIMEOUT(tp)) { if (CC_ALGO(tp)->after_idle != NULL) CC_ALGO(tp)->after_idle(tp); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, NULL, - int32_t, TCP_CC_IDLE_TIMEOUT); + tcp_ccdbg_trace(tp, NULL, TCP_CC_IDLE_TIMEOUT); } tp->t_flags &= ~TF_LASTIDLE; if (idle) { @@ -385,6 +395,8 @@ again: /* Disable TSO for the socket until we know more */ tp->t_flags &= ~TF_TSO; + soif2kcl(so, FALSE); + if (isipv6) { ia6 = ifa_foraddr6(&inp->in6p_laddr); if (ia6 != NULL) @@ -405,13 +417,14 @@ again: return(EADDRNOTAVAIL); } - /* set Retransmit timer if it wasn't set + /* Set retransmit timer if it wasn't set, * reset Persist timer and shift register as the * advertised peer window may not be valid anymore */ if (!tp->t_timer[TCPT_REXMT]) { - tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); + tp->t_timer[TCPT_REXMT] = + OFFSET_FROM_START(tp, tp->t_rxtcur); if (tp->t_timer[TCPT_PERSIST]) { tp->t_timer[TCPT_PERSIST] = 0; tp->t_rxtshift = 0; @@ -428,8 +441,7 @@ again: if (so->so_flags & SOF_NOADDRAVAIL) { tcp_drop(tp, EADDRNOTAVAIL); return(EADDRNOTAVAIL); - } - else { + } else { tcp_check_timer_state(tp); return(0); /* silently ignore, keep data in socket: address may be back */ } @@ -448,6 +460,8 @@ again: if ((ifp = rt->rt_ifp) != NULL) { somultipages(so, (ifp->if_hwassist & IFNET_MULTIPAGES)); tcp_set_tso(tp, ifp); + soif2kcl(so, + (ifp->if_eflags & IFEF_2KCL)); } if (rt->rt_flags & RTF_UP) RT_GENID_SYNC(rt); @@ -455,12 +469,13 @@ again: * See if we should do MTU discovery. Don't do it if: * 1) it is disabled via the sysctl * 2) the route isn't up - * 3) the MTU is locked (if it is, then discovery has been - * disabled) + * 3) the MTU is locked (if it is, then discovery + * has been disabled) */ if (!path_mtu_discovery || ((rt != NULL) && - (!(rt->rt_flags & RTF_UP) || (rt->rt_rmx.rmx_locks & RTV_MTU)))) + (!(rt->rt_flags & RTF_UP) || + (rt->rt_rmx.rmx_locks & RTV_MTU)))) tp->t_flags &= ~TF_PMTUD; else tp->t_flags |= TF_PMTUD; @@ -471,6 +486,7 @@ again: if (rt != NULL) { cell = IFNET_IS_CELLULAR(rt->rt_ifp); wifi = (!cell && IFNET_IS_WIFI(rt->rt_ifp)); + wired = (!wifi && IFNET_IS_WIRED(rt->rt_ifp)); } /* @@ -489,11 +505,12 @@ again: flags = tcp_outflags[tp->t_state]; /* - * Send any SACK-generated retransmissions. If we're explicitly trying - * to send out new data (when sendalot is 1), bypass this function. - * If we retransmit in fast recovery mode, decrement snd_cwnd, since - * we're replacing a (future) new transmission with a retransmission - * now, and we previously incremented snd_cwnd in tcp_input(). + * Send any SACK-generated retransmissions. If we're explicitly + * trying to send out new data (when sendalot is 1), bypass this + * function. If we retransmit in fast recovery mode, decrement + * snd_cwnd, since we're replacing a (future) new transmission + * with a retransmission now, and we previously incremented + * snd_cwnd in tcp_input(). */ /* * Still in sack recovery , reset rxmit flag to zero. @@ -533,7 +550,7 @@ again: len = ((int32_t)min(cwin, p->end - p->rxmit)); } if (len > 0) { - off = p->rxmit - tp->snd_una; /* update off only if we really transmit SACK data */ + off = p->rxmit - tp->snd_una; sack_rxmit = 1; sendalot = 1; tcpstat.tcps_sack_rexmits++; @@ -543,9 +560,10 @@ again: nstat_route_tx(inp->inp_route.ro_rt, 1, min(len, tp->t_maxseg), NSTAT_TX_FLAG_RETRANSMIT); - INP_ADD_STAT(inp, cell, wifi, txpackets, 1); - INP_ADD_STAT(inp, cell, wifi, txbytes, - min(len, tp->t_maxseg)); + INP_ADD_STAT(inp, cell, wifi, wired, + txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, + txbytes, min(len, tp->t_maxseg)); tp->t_stat.txretransmitbytes += min(len, tp->t_maxseg); } } else { @@ -568,7 +586,7 @@ after_sack_rexmit: * and timer expired, we will send what we can * and go to transmit state. */ - if (tp->t_force) { + if (tp->t_flagsext & TF_FORCE) { if (sendwin == 0) { /* * If we still have some data to send, then @@ -646,6 +664,22 @@ after_sack_rexmit: } } +#if MPTCP + if ((tp->t_mpflags & TMPF_FASTJOIN_SEND) && + (tp->t_state == TCPS_SYN_SENT) && + (!(tp->t_flags & TF_CLOSING)) && + (so->so_snd.sb_cc != 0) && + (tp->t_rxtshift == 0)) { + flags &= ~TH_SYN; + flags |= TH_ACK; + off = 0; + len = min(so->so_snd.sb_cc, tp->t_maxseg); + early_data_sent = TRUE; + } else if (early_data_sent) { + /* for now, we allow only one data segment to be sent */ + return (0); + } +#endif /* MPTCP */ /* * Lop off SYN bit if it has already been sent. However, if this * is SYN-SENT state and if segment contains data and if we don't @@ -675,6 +709,7 @@ after_sack_rexmit: } + /* * tcp was closed while we were in ip, * resume close @@ -702,10 +737,11 @@ after_sack_rexmit: flags &= ~TH_FIN; } - /* The check here used to be (len < 0). Some times len is zero when - * the congestion window is closed and we need to check if persist timer - * has to be set in that case. But don't set persist until connection - * is established. + /* + * The check here used to be (len < 0). Some times len is zero + * when the congestion window is closed and we need to check + * if persist timer has to be set in that case. But don't set + * persist until connection is established. */ if (len <= 0 && !(flags & TH_SYN)) { /* @@ -721,16 +757,19 @@ after_sack_rexmit: len = 0; if (sendwin == 0) { tp->t_timer[TCPT_REXMT] = 0; + tp->t_timer[TCPT_PTO] = 0; tp->t_rxtshift = 0; tp->t_rxtstart = 0; tp->snd_nxt = tp->snd_una; + off = 0; if (tp->t_timer[TCPT_PERSIST] == 0) tcp_setpersist(tp); } } - /* Automatic sizing of send socket buffer. Increase the send socket buffer - * size if all of the following criteria are met + /* + * Automatic sizing of send socket buffer. Increase the send + * socket buffer size if all of the following criteria are met * 1. the receiver has enough buffer space for this data * 2. send buffer is filled to 7/8th with data (so we actually * have data to make use of it); @@ -743,25 +782,25 @@ after_sack_rexmit: (so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE && tcp_cansbgrow(&so->so_snd)) { if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat && - so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) && - sendwin >= (so->so_snd.sb_cc - - (tp->snd_nxt - tp->snd_una))) { + so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) && + sendwin >= (so->so_snd.sb_cc - + (tp->snd_nxt - tp->snd_una))) { /* Also increase the send buffer only if the * round-trip time is not increasing because we do - * not want to contribute to latency by filling buffers. + * not want to contribute to latency by filling + * buffers. * We also do not want to hold onto application's - * old data for too long. Interactive applications would - * rather discard old data. + * old data for too long. Interactive applications + * would rather discard old data. */ - if (tp->t_rttcur <= - (basertt + 25)) { - if (sbreserve(&so->so_snd, - min(so->so_snd.sb_hiwat + tcp_autosndbuf_inc, + if (tp->t_rttcur <= (basertt + 25)) { + if (sbreserve(&so->so_snd, + min(so->so_snd.sb_hiwat + tcp_autosndbuf_inc, tcp_autosndbuf_max)) == 1) { so->so_snd.sb_idealsize = so->so_snd.sb_hiwat; } } else { - so->so_snd.sb_idealsize = + so->so_snd.sb_idealsize = max(tcp_sendspace, so->so_snd.sb_hiwat - (2 * tcp_autosndbuf_inc)); so->so_snd.sb_flags |= SB_TRIM; @@ -794,11 +833,12 @@ after_sack_rexmit: if (ipsec_bypass == 0) ipsec_optlen = ipsec_hdrsiz_tcp(tp); #endif - if (len > tp->t_maxseg) { if ((tp->t_flags & TF_TSO) && tcp_do_tso && hwcksum_tx && - ip_use_randomid && kipf_count == 0 && dlil_filter_disable_tso_count == 0 && - tp->rcv_numsacks == 0 && sack_rxmit == 0 && sack_bytes_rxmt == 0 && + ip_use_randomid && kipf_count == 0 && + dlil_filter_disable_tso_count == 0 && + tp->rcv_numsacks == 0 && sack_rxmit == 0 && + sack_bytes_rxmt == 0 && inp->inp_options == NULL && inp->in6p_options == NULL #if IPSEC @@ -816,19 +856,30 @@ after_sack_rexmit: tso = 0; } } + + /* Send one segment or less as a tail loss probe */ + if (tp->t_flagsext & TF_SENT_TLPROBE) { + len = min(len, tp->t_maxseg); + sendalot = 0; + tso = 0; + } + #if MPTCP - if (so->so_flags & SOF_MP_SUBFLOW) { + if ((so->so_flags & SOF_MP_SUBFLOW) && + !(tp->t_mpflags & TMPF_TCP_FALLBACK)) { int newlen = len; - if ((tp->t_mpflags & TMPF_SND_MPPRIO) || + if (!(tp->t_mpflags & TMPF_PREESTABLISHED) && + (tp->t_state > TCPS_CLOSED) && + ((tp->t_mpflags & TMPF_SND_MPPRIO) || (tp->t_mpflags & TMPF_SND_REM_ADDR) || - (tp->t_mpflags & TMPF_SND_MPFAIL)) { + (tp->t_mpflags & TMPF_SND_MPFAIL))) { if (len > 0) { len = 0; } sendalot = 1; - mptcp_acknow = 1; + mptcp_acknow = TRUE; } else { - mptcp_acknow = 0; + mptcp_acknow = FALSE; } /* * The contiguous bytes in the subflow socket buffer can be @@ -844,16 +895,6 @@ after_sack_rexmit: } } #endif /* MPTCP */ - if (sack_rxmit) { - if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) - flags &= ~TH_FIN; - } else { - if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) - flags &= ~TH_FIN; - } - - recwin = tcp_sbspace(tp); - /* * If the socket is capable of doing unordered send, @@ -880,6 +921,16 @@ after_sack_rexmit: } } + if (sack_rxmit) { + if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) + flags &= ~TH_FIN; + } else { + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) + flags &= ~TH_FIN; + } + + recwin = tcp_sbspace(tp); + /* * Sender silly window avoidance. We transmit under the following * conditions when len is non-zero: @@ -893,7 +944,7 @@ after_sack_rexmit: * data (receiver may be limited the window size) */ if (len) { - if (tp->t_force) + if (tp->t_flagsext & TF_FORCE) goto send; if (SEQ_LT(tp->snd_nxt, tp->snd_max)) goto send; @@ -944,13 +995,15 @@ after_sack_rexmit: oldwin = tp->rcv_adv - tp->rcv_nxt; if (adv >= (int32_t) (2 * tp->t_maxseg)) { - /* Update only if the resulting scaled value of the window changed, or - * if there is a change in the sequence since the last ack. - * This avoids what appears as dupe ACKS (see rdar://5640997) + /* + * Update only if the resulting scaled value of + * the window changed, or if there is a change in + * the sequence since the last ack. This avoids + * what appears as dupe ACKS (see rdar://5640997) * - * If streaming is detected avoid sending too many window updates. - * We will depend on the delack timer to send a window update - * when needed. + * If streaming is detected avoid sending too many + * window updates. We will depend on the delack + * timer to send a window update when needed. */ if (!(tp->t_flags & TF_STRETCHACK) && (tp->last_ack_sent != tp->rcv_nxt || @@ -959,9 +1012,10 @@ after_sack_rexmit: goto send; } - /* Make sure that the delayed ack timer is set if we - * delayed sending a window update because of streaming - * detection. + /* + * Make sure that the delayed ack timer is set if + * we delayed sending a window update because of + * streaming detection. */ if ((tp->t_flags & TF_STRETCHACK) && !(tp->t_flags & TF_DELACK)) { @@ -975,7 +1029,7 @@ after_sack_rexmit: } /* - * Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW + * Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW * is also a catch-all for the retransmit timer timeout case. */ if (tp->t_flags & TF_ACKNOW) @@ -1005,7 +1059,8 @@ after_sack_rexmit: SEQ_GT(tp->snd_max, tp->snd_una) && tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) { - tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); + tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, + tp->t_rxtcur); goto just_return; } /* @@ -1048,7 +1103,8 @@ just_return: packchain_sent++; TCP_PKTLIST_CLEAR(tp); - error = tcp_ip_output(so, tp, packetlist, packchain_listadd, + error = tcp_ip_output(so, tp, packetlist, + packchain_listadd, tp_inp_options, (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0)), recwin, #if INET6 @@ -1125,47 +1181,56 @@ send: } /* - RFC 3168 states that: - - If you ever sent an ECN-setup SYN/SYN-ACK you must be prepared - to handle the TCP ECE flag, even if you also later send a - non-ECN-setup SYN/SYN-ACK. - - If you ever send a non-ECN-setup SYN/SYN-ACK, you must not set - the ip ECT flag. - - It is not clear how the ECE flag would ever be set if you never - set the IP ECT flag on outbound packets. All the same, we use - the TE_SETUPSENT to indicate that we have committed to handling - the TCP ECE flag correctly. We use the TE_SENDIPECT to indicate - whether or not we should set the IP ECT flag on outbound packets. - */ - /* + * RFC 3168 states that: + * - If you ever sent an ECN-setup SYN/SYN-ACK you must be prepared + * to handle the TCP ECE flag, even if you also later send a + * non-ECN-setup SYN/SYN-ACK. + * - If you ever send a non-ECN-setup SYN/SYN-ACK, you must not set + * the ip ECT flag. + * + * It is not clear how the ECE flag would ever be set if you never + * set the IP ECT flag on outbound packets. All the same, we use + * the TE_SETUPSENT to indicate that we have committed to handling + * the TCP ECE flag correctly. We use the TE_SENDIPECT to indicate + * whether or not we should set the IP ECT flag on outbound packet + * * For a SYN-ACK, send an ECN setup SYN-ACK */ - if (tcp_ecn_inbound && (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { + if ((tcp_ecn_inbound || (tp->t_flags & TF_ENABLE_ECN)) + && (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { if ((tp->ecn_flags & TE_SETUPRECEIVED) != 0) { if ((tp->ecn_flags & TE_SETUPSENT) == 0) { /* Setting TH_ECE makes this an ECN-setup SYN-ACK */ flags |= TH_ECE; /* - * Record that we sent the ECN-setup and default to - * setting IP ECT. + * Record that we sent the ECN-setup and + * default to setting IP ECT. */ - tp->ecn_flags |= (TE_SETUPSENT | TE_SENDIPECT); - } - else { + tp->ecn_flags |= (TE_SETUPSENT|TE_SENDIPECT); + tcpstat.tcps_ecn_setup++; + } else { /* - * We sent an ECN-setup SYN-ACK but it was dropped. - * Fallback to non-ECN-setup SYN-ACK and clear flag - * that to indicate we should not send data with IP ECT set. + * We sent an ECN-setup SYN-ACK but it was + * dropped. Fallback to non-ECN-setup + * SYN-ACK and clear flag to indicate that + * we should not send data with IP ECT set * - * Pretend we didn't receive an ECN-setup SYN. + * Pretend we didn't receive an + * ECN-setup SYN. */ tp->ecn_flags &= ~TE_SETUPRECEIVED; + /* + * We already incremented the counter + * assuming that the ECN setup will + * succeed. Decrementing here to + * correct it. + */ + tcpstat.tcps_ecn_setup--; } } - } - else if (tcp_ecn_outbound && (flags & (TH_SYN | TH_ACK)) == TH_SYN) { + } else if ((tcp_ecn_outbound || (tp->t_flags & TF_ENABLE_ECN)) + && (flags & (TH_SYN | TH_ACK)) == TH_SYN) { if ((tp->ecn_flags & TE_SETUPSENT) == 0) { /* Setting TH_ECE and TH_CWR makes this an ECN-setup SYN */ flags |= (TH_ECE | TH_CWR); @@ -1175,8 +1240,7 @@ send: * setting IP ECT. */ tp->ecn_flags |= (TE_SETUPSENT | TE_SENDIPECT); - } - else { + } else { /* * We sent an ECN-setup SYN but it was dropped. * Fall back to no ECN and clear flag indicating @@ -1197,6 +1261,7 @@ send: !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) { flags |= TH_CWR; tp->ecn_flags &= ~TE_SENDCWR; + tcpstat.tcps_sent_cwr++; } /* @@ -1204,6 +1269,7 @@ send: */ if ((tp->ecn_flags & TE_SENDECE) != 0 && len == 0) { flags |= TH_ECE; + tcpstat.tcps_sent_ece++; } /* @@ -1261,13 +1327,14 @@ send: * still advance the subflow level ACK and therefore make it * hard for the remote end to recover in low cwnd situations. */ - if (len != 0) + if (len != 0) { tp->t_mpflags |= (TMPF_SEND_DSN | TMPF_MPTCP_ACKNOW); - else + } else { tp->t_mpflags |= TMPF_MPTCP_ACKNOW; + } optlen = mptcp_setup_opts(tp, off, &opt[0], optlen, flags, - len, &dlenp, &finp, &dss_val, &sseqp); + len, &dlenp, &finp, &dss_val, &sseqp, &mptcp_acknow); tp->t_mpflags &= ~TMPF_SEND_DSN; } #endif /* MPTCP */ @@ -1343,8 +1410,9 @@ send: if (tp_inp_options) { ipoptlen = tp_inp_options->m_len - offsetof(struct ipoption, ipopt_list); - } else + } else { ipoptlen = 0; + } } #if IPSEC ipoptlen += ipsec_optlen; @@ -1365,20 +1433,23 @@ send: */ if (len + optlen + ipoptlen > tp->t_maxopd) { /* - * If there is still more to send, don't close the connection. + * If there is still more to send, + * don't close the connection. */ flags &= ~TH_FIN; if (tso) { int32_t tso_maxlen; - tso_maxlen = tp->tso_max_segment_size ? tp->tso_max_segment_size : TCP_MAXWIN; + tso_maxlen = tp->tso_max_segment_size ? + tp->tso_max_segment_size : TCP_MAXWIN; if (len > tso_maxlen - hdrlen - optlen) { len = tso_maxlen - hdrlen - optlen; len = len - (len % (tp->t_maxopd - optlen)); sendalot = 1; - } else if (tp->t_flags & TF_NEEDFIN) + } else if (tp->t_flags & TF_NEEDFIN) { sendalot = 1; + } } else { len = tp->t_maxopd - optlen - ipoptlen; sendalot = 1; @@ -1415,7 +1486,8 @@ send: (tp->t_flagsext & TF_BWMEAS_INPROGRESS) == 0 && (so->so_snd.sb_cc - (tp->snd_max - tp->snd_una)) >= tp->t_bwmeas->bw_minsize) { - tp->t_bwmeas->bw_size = min((so->so_snd.sb_cc - (tp->snd_max - tp->snd_una)), + tp->t_bwmeas->bw_size = min( + (so->so_snd.sb_cc - (tp->snd_max - tp->snd_una)), tp->t_bwmeas->bw_maxsize); tp->t_flagsext |= TF_BWMEAS_INPROGRESS; tp->t_bwmeas->bw_start = tp->snd_max; @@ -1429,7 +1501,7 @@ send: * the template for sends on this connection. */ if (len) { - if (tp->t_force && len == 1) + if ((tp->t_flagsext & TF_FORCE) && len == 1) tcpstat.tcps_sndprobe++; else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) { tcpstat.tcps_sndrexmitpack++; @@ -1437,8 +1509,10 @@ send: if (nstat_collect) { nstat_route_tx(inp->inp_route.ro_rt, 1, len, NSTAT_TX_FLAG_RETRANSMIT); - INP_ADD_STAT(inp, cell, wifi, txpackets, 1); - INP_ADD_STAT(inp, cell, wifi, txbytes, len); + INP_ADD_STAT(inp, cell, wifi, wired, + txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, + txbytes, len); tp->t_stat.txretransmitbytes += len; } } else { @@ -1446,8 +1520,10 @@ send: tcpstat.tcps_sndbyte += len; if (nstat_collect) { - INP_ADD_STAT(inp, cell, wifi, txpackets, 1); - INP_ADD_STAT(inp, cell, wifi, txbytes, len); + INP_ADD_STAT(inp, cell, wifi, wired, + txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, + txbytes, len); } } #if MPTCP @@ -1517,54 +1593,28 @@ send: */ copymode = M_COPYM_MOVE_HDR; #if MPTCP - if ((tp->t_mpflags & TMPF_MPTCP_TRUE) || - (tp->t_mpflags & TMPF_TCP_FALLBACK)) { + if (so->so_flags & SOF_MP_SUBFLOW) { copymode = M_COPYM_NOOP_HDR; } #endif /* MPTCP */ if (m != NULL) { - m->m_next = m_copym_mode(so->so_snd.sb_mb, off, - (int) len, M_DONTWAIT, copymode); + m->m_next = m_copym_mode(so->so_snd.sb_mb, + off, (int)len, M_DONTWAIT, copymode); if (m->m_next == NULL) { (void) m_free(m); error = ENOBUFS; goto out; } } else { - /* - * determine whether the mbuf pointer and - * offset passed back by the 'last' call to - * m_copym_with_hdrs are still valid... if the - * head of the socket chain has changed (due - * to an incoming ACK for instance), or the - * offset into the chain we just computed is - * different from the one last returned by - * m_copym_with_hdrs (perhaps we're re- - * transmitting a packet sent earlier), then - * we can't pass the mbuf pointer and offset - * into it as valid hints for m_copym_with_hdrs - * to use (if valid, these hints allow - * m_copym_with_hdrs to avoid rescanning from - * the beginning of the socket buffer mbuf list. - * - * Setting the mbuf pointer to NULL is - * sufficient to disable the hint mechanism. - */ - if (m_head != so->so_snd.sb_mb || sack_rxmit || - last_off != off) - m_lastm = NULL; - last_off = off + len; - m_head = so->so_snd.sb_mb; - /* * make sure we still have data left * to be sent at this point */ - if (m_head == NULL) { + if (so->so_snd.sb_mb == NULL) { error = 0; /* should we return an error? */ goto out; } - + /* * m_copym_with_hdrs will always return the * last mbuf pointer and the offset into it that @@ -1572,7 +1622,7 @@ send: * whether a valid 'hint' was passed in or not. */ if ((m = m_copym_with_hdrs(so->so_snd.sb_mb, - off, len, M_DONTWAIT, &m_lastm, &m_off, + off, len, M_DONTWAIT, NULL, NULL, copymode)) == NULL) { error = ENOBUFS; goto out; @@ -1633,6 +1683,10 @@ send: ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); } svc_flags |= PKT_SCF_IPV6; +#if PF_ECN + m->m_pkthdr.pf_mtag.pftag_hdr = (void *)ip6; + m->m_pkthdr.pf_mtag.pftag_flags |= PF_TAG_HDR_INET6; +#endif /* PF_ECN */ } else #endif /* INET6 */ { @@ -1645,6 +1699,10 @@ send: !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) { ip->ip_tos = IPTOS_ECN_ECT0; } +#if PF_ECN + m->m_pkthdr.pf_mtag.pftag_hdr = (void *)ip; + m->m_pkthdr.pf_mtag.pftag_flags |= PF_TAG_HDR_INET; +#endif /* PF_ECN */ } /* @@ -1680,7 +1738,12 @@ send: } th->th_ack = htonl(tp->rcv_nxt); tp->last_ack_sent = tp->rcv_nxt; - +#if MPTCP + /* Initialize the ACK field to a value as 0 ack fields are dropped */ + if (early_data_sent) { + th->th_ack = th->th_seq + 1; + } +#endif /* MPTCP */ if (optlen) { bcopy(opt, th + 1, optlen); th->th_off = (sizeof (struct tcphdr) + optlen) >> 2; @@ -1792,7 +1855,8 @@ send: * In transmit state, time the transmission and arrange for * the retransmit. In persist state, just set snd_max. */ - if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { + if (!(tp->t_flagsext & TF_FORCE) + || tp->t_timer[TCPT_PERSIST] == 0) { tcp_seq startseq = tp->snd_nxt; /* @@ -1826,23 +1890,68 @@ send: /* * Set retransmit timer if not currently set, * and not doing an ack or a keep-alive probe. - * Initial value for retransmit timer is smoothed - * round-trip time + 2 * round-trip time variance. - * Initialize shift counter which is used for backoff - * of retransmit time. */ timer: if (tp->t_timer[TCPT_REXMT] == 0 && ((sack_rxmit && tp->snd_nxt != tp->snd_max) || - tp->snd_nxt != tp->snd_una || - (flags & TH_FIN))) { + tp->snd_nxt != tp->snd_una || (flags & TH_FIN))) { if (tp->t_timer[TCPT_PERSIST]) { tp->t_timer[TCPT_PERSIST] = 0; tp->t_rxtshift = 0; tp->t_rxtstart = 0; tp->t_persist_stop = 0; } - tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); + tp->t_timer[TCPT_REXMT] = + OFFSET_FROM_START(tp, tp->t_rxtcur); + } + + /* + * Set tail loss probe timeout if new data is being + * transmitted. This will be supported only when + * SACK option is enabled on a connection. + * + * Every time new data is sent PTO will get reset. + */ + if (tcp_enable_tlp && tp->t_state == TCPS_ESTABLISHED && + SACK_ENABLED(tp) && !IN_FASTRECOVERY(tp) + && tp->snd_nxt == tp->snd_max + && SEQ_GT(tp->snd_nxt, tp->snd_una) + && tp->t_rxtshift == 0 + && (tp->t_flagsext & (TF_SENT_TLPROBE|TF_PKTS_REORDERED)) == 0) { + u_int32_t pto, srtt, new_rto = 0; + + /* + * Using SRTT alone to set PTO can cause spurious + * retransmissions on wireless networks where there + * is a lot of variance in RTT. Taking variance + * into account will avoid this. + */ + srtt = tp->t_srtt >> TCP_RTT_SHIFT; + pto = ((TCP_REXMTVAL(tp)) * 3) >> 1; + pto = max (2 * srtt, pto); + if ((tp->snd_max - tp->snd_una) == tp->t_maxseg) + pto = max(pto, + (((3 * pto) >> 2) + tcp_delack * 2)); + else + pto = max(10, pto); + + /* if RTO is less than PTO, choose RTO instead */ + if (tp->t_rxtcur < pto) { + /* + * Schedule PTO instead of RTO in favor of + * fast recovery. + */ + pto = tp->t_rxtcur; + + /* Reset the next RTO to be after PTO. */ + TCPT_RANGESET(new_rto, + (pto + TCP_REXMTVAL(tp)), + max(tp->t_rttmin, tp->t_rttcur + 2), + TCPTV_REXMTMAX, 0); + tp->t_timer[TCPT_REXMT] = + OFFSET_FROM_START(tp, new_rto); + } + tp->t_timer[TCPT_PTO] = OFFSET_FROM_START(tp, pto); } } else { /* @@ -1922,10 +2031,23 @@ timer: #endif /* INET6 */ if (path_mtu_discovery && (tp->t_flags & TF_PMTUD)) ip->ip_off |= IP_DF; + +#if NECP + { + necp_kernel_policy_id policy_id; + if (!necp_socket_is_allowed_to_send_recv(inp, &policy_id)) { + m_freem(m); + error = EHOSTUNREACH; + goto out; + } + necp_mark_packet_from_socket(m, inp, policy_id); + } +#endif /* NECP */ + #if IPSEC - if (ipsec_bypass == 0) - ipsec_setsocket(m, so); + if (inp->inp_sp != NULL) + ipsec_setsocket(m, so); #endif /*IPSEC*/ /* @@ -2016,16 +2138,11 @@ timer: } if (sendalot == 0 || (tp->t_state != TCPS_ESTABLISHED) || - (tp->snd_cwnd <= (tp->snd_wnd / 8)) || - (tp->t_flags & (TH_PUSH | TF_ACKNOW)) || tp->t_force != 0 || - tp->t_lastchain >= tcp_packet_chaining) { + (tp->snd_cwnd <= (tp->snd_wnd / 8)) || + (tp->t_flags & (TH_PUSH | TF_ACKNOW)) || + (tp->t_flagsext & TF_FORCE) || + tp->t_lastchain >= tcp_packet_chaining) { error = 0; - - /* - * Reset the stack memory of offset as the socket - * may get unlocked - */ - m_lastm = NULL; while (inp->inp_sndinprog_cnt == 0 && tp->t_pktlist_head != NULL) { packetlist = tp->t_pktlist_head; @@ -2043,7 +2160,6 @@ timer: #else /* INET6 */ 0); #endif /* !INET6 */ - if (error) { /* * Take into account the rest of unsent @@ -2079,7 +2195,8 @@ timer: * the recent call to ip_output_list() plus the amount of * user data in the packet list for this tcp at the moment. */ - if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { + if (!(tp->t_flagsext & TF_FORCE) + || tp->t_timer[TCPT_PERSIST] == 0) { /* * No need to check for TH_FIN here because * the TF_SENTFIN flag handles that case. @@ -2113,16 +2230,12 @@ out: !tp->t_timer[TCPT_PERSIST]) tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); - tp->snd_cwnd = tp->t_maxseg; tp->t_bytes_acked = 0; - tcp_check_timer_state(tp); KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, NULL, - int32_t, TCP_CC_OUTPUT_ERROR); + tcp_ccdbg_trace(tp, NULL, TCP_CC_OUTPUT_ERROR); return (0); } if (error == EMSGSIZE) { @@ -2152,15 +2265,10 @@ out: * treat EHOSTUNREACH/ENETDOWN as a soft error. */ if ((error == EHOSTUNREACH || error == ENETDOWN) && - TCPS_HAVERCVDSYN(tp->t_state) && - !((inp->inp_flags & INP_NO_IFT_CELLULAR) && - inp->inp_last_outifp != NULL && - IFNET_IS_CELLULAR(inp->inp_last_outifp))) { - tp->t_softerror = error; - tcp_check_timer_state(tp); - KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, - 0, 0, 0, 0, 0); - return (0); + TCPS_HAVERCVDSYN(tp->t_state) && + !inp_restricted_send(inp, inp->inp_last_outifp)) { + tp->t_softerror = error; + error = 0; } tcp_check_timer_state(tp); KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); @@ -2215,13 +2323,31 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, } } - if (inp->inp_flags & INP_NO_IFT_CELLULAR) { + if (INP_NO_CELLULAR(inp)) { #if INET6 if (isipv6) ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR; else #endif /* INET6 */ ipoa.ipoa_flags |= IPOAF_NO_CELLULAR; + } + if (INP_NO_EXPENSIVE(inp)) { +#if INET6 + if (isipv6) + ip6oa.ip6oa_flags |= IP6OAF_NO_EXPENSIVE; + else +#endif /* INET6 */ + ipoa.ipoa_flags |= IPOAF_NO_EXPENSIVE; + + } + if (INP_AWDL_UNRESTRICTED(inp)) { +#if INET6 + if (isipv6) + ip6oa.ip6oa_flags |= IP6OAF_AWDL_UNRESTRICTED; + else +#endif /* INET6 */ + ipoa.ipoa_flags |= IPOAF_AWDL_UNRESTRICTED; + } #if INET6 if (isipv6) @@ -2266,7 +2392,7 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, */ if (tcp_output_unlocked && !so->so_upcallusecount && (tp->t_state == TCPS_ESTABLISHED) && (sack_in_progress == 0) && - ((tp->t_flags & TF_FASTRECOVERY) == 0)) { + !IN_FASTRECOVERY(tp)) { unlocked = TRUE; socket_unlock(so, 0); @@ -2351,9 +2477,8 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, rc = inp_set_fc_state(inp, adv->code); if (rc == 1) - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, struct tcphdr *, NULL, - int32_t, ((adv->code == FADV_FLOW_CONTROLLED) ? + tcp_ccdbg_trace(tp, NULL, + ((adv->code == FADV_FLOW_CONTROLLED) ? TCP_CC_FLOW_CONTROL : TCP_CC_SUSPEND)); } @@ -2380,7 +2505,8 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, inp->inp_last_outifp) inp->inp_last_outifp = outif; - if (error != 0 && ifdenied && (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (error != 0 && ifdenied && + (INP_NO_CELLULAR(inp) || INP_NO_EXPENSIVE(inp))) soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED)); diff --git a/bsd/netinet/tcp_sack.c b/bsd/netinet/tcp_sack.c index 4ca79aeac..e3b339360 100644 --- a/bsd/netinet/tcp_sack.c +++ b/bsd/netinet/tcp_sack.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -102,6 +102,8 @@ #include #endif /*IPSEC*/ +#include + int tcp_do_sack = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_sack, 0, "Enable/Disable TCP SACK support"); @@ -115,7 +117,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalmaxholes, CTLFLAG_RW | CTLFLAG_LO &tcp_sack_globalmaxholes, 0, "Global maximum number of TCP SACK holes"); -static int tcp_sack_globalholes = 0; +static SInt32 tcp_sack_globalholes = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalholes, CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_sack_globalholes, 0, "Global number of TCP SACK holes currently allocated"); @@ -242,7 +244,7 @@ tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end) return NULL; } - hole = (struct sackhole *)zalloc_noblock(sack_hole_zone); + hole = (struct sackhole *)zalloc(sack_hole_zone); if (hole == NULL) return NULL; @@ -251,7 +253,7 @@ tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end) hole->rxmit = start; tp->snd_numholes++; - tcp_sack_globalholes++; + OSIncrementAtomic(&tcp_sack_globalholes); return hole; } @@ -265,7 +267,7 @@ tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole) zfree(sack_hole_zone, hole); tp->snd_numholes--; - tcp_sack_globalholes--; + OSDecrementAtomic(&tcp_sack_globalholes); } /* @@ -281,7 +283,7 @@ tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end, hole = tcp_sackhole_alloc(tp, start, end); if (hole == NULL) return NULL; - + hole->rxmit_start = tcp_now; /* Insert the new SACK hole into scoreboard */ if (after != NULL) TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink); @@ -311,6 +313,75 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) /* Free this SACK hole. */ tcp_sackhole_free(tp, hole); } +/* + * When a new ack with SACK is received, check if it indicates packet + * reordering. If there is packet reordering, the socket is marked and + * the late time offset by which the packet was reordered with + * respect to its closest neighboring packets is computed. + */ +static void +tcp_sack_detect_reordering(struct tcpcb *tp, struct sackhole *s, + tcp_seq sacked_seq, tcp_seq snd_fack) +{ + int32_t rext = 0, reordered = 0; + + /* + * If the SACK hole is past snd_fack, this is from new SACK + * information, so we can ignore it. + */ + if (SEQ_GT(s->end, snd_fack)) + return; + /* + * If there has been a retransmit timeout, then the timestamp on + * the SACK segment will be newer. This might lead to a + * false-positive. Avoid re-ordering detection in this case. + */ + if (tp->t_rxtshift > 0) + return; + + /* + * Detect reordering from SACK information by checking + * if recently sacked data was never retransmitted from this hole. + */ + if (SEQ_LT(s->rxmit, sacked_seq)) { + reordered = 1; + tcpstat.tcps_avoid_rxmt++; + } + + if (reordered) { + if (!(tp->t_flagsext & TF_PKTS_REORDERED)) { + tp->t_flagsext |= TF_PKTS_REORDERED; + tcpstat.tcps_detect_reordering++; + } + + tcpstat.tcps_reordered_pkts++; + + VERIFY(SEQ_GEQ(snd_fack, s->rxmit)); + + if (s->rxmit_start > 0) { + rext = timer_diff(tcp_now, 0, s->rxmit_start, 0); + if (rext < 0) + return; + + /* + * We take the maximum reorder window to schedule + * DELAYFR timer as that will take care of jitter + * on the network path. + * + * Computing average and standard deviation seems + * to cause unnecessary retransmissions when there + * is high jitter. + * + * We set a maximum of SRTT/2 and a minimum of + * 10 ms on the reorder window. + */ + tp->t_reorderwin = max(tp->t_reorderwin, rext); + tp->t_reorderwin = min(tp->t_reorderwin, + (tp->t_srtt >> (TCP_RTT_SHIFT - 1))); + tp->t_reorderwin = max(tp->t_reorderwin, 10); + } + } +} /* * Process cumulative ACK and the TCP SACK option to update the scoreboard. @@ -318,12 +389,13 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) * the sequence space). */ void -tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack, +tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, u_int32_t *newbytes_acked) { struct sackhole *cur, *temp; struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; int i, j, num_sack_blks; + tcp_seq old_snd_fack = 0, th_ack = th->th_ack; num_sack_blks = 0; /* @@ -359,6 +431,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack, if (num_sack_blks == 0) return; + VERIFY(num_sack_blks <= (TCP_MAX_SACK + 1)); /* * Sort the SACK blocks so we can update the scoreboard * with just one pass. The overhead of sorting upto 4+1 elements @@ -384,6 +457,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack, *newbytes_acked += (tp->snd_fack - tp->snd_una); } + old_snd_fack = tp->snd_fack; /* * In the while-loop below, incoming SACK blocks (sack_blocks[]) * and SACK holes (snd_holes) are traversed from their tails with @@ -463,6 +537,9 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack, if (SEQ_GEQ(sblkp->end, cur->end)) { /* Acks entire hole, so delete hole */ *newbytes_acked += (cur->end - cur->start); + + tcp_sack_detect_reordering(tp, cur, + cur->end, old_snd_fack); temp = cur; cur = TAILQ_PREV(cur, sackhole_head, scblink); tcp_sackhole_remove(tp, temp); @@ -474,6 +551,8 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack, } else { /* Move start of hole forward */ *newbytes_acked += (sblkp->end - cur->start); + tcp_sack_detect_reordering(tp, cur, + sblkp->end, old_snd_fack); cur->start = sblkp->end; cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); } @@ -482,16 +561,20 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack, if (SEQ_GEQ(sblkp->end, cur->end)) { /* Move end of hole backward */ *newbytes_acked += (cur->end - sblkp->start); + tcp_sack_detect_reordering(tp, cur, + cur->end, old_snd_fack); cur->end = sblkp->start; cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); } else { /* - * ACKs some data in middle of a hole; need to - * split current hole + * ACKs some data in the middle of a hole; + * need to split current hole */ *newbytes_acked += (sblkp->end - sblkp->start); + tcp_sack_detect_reordering(tp, cur, + sblkp->end, old_snd_fack); temp = tcp_sackhole_insert(tp, sblkp->end, - cur->end, cur); + cur->end, cur); if (temp != NULL) { if (SEQ_GT(cur->rxmit, temp->rxmit)) { temp->rxmit = cur->rxmit; @@ -502,6 +585,13 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack, cur->end = sblkp->start; cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); + /* + * Reset the rxmit_start to that of + * the current hole as that will + * help to compute the reorder + * window correctly + */ + temp->rxmit_start = cur->rxmit_start; } } } @@ -670,3 +760,28 @@ tcp_sack_adjust(struct tcpcb *tp) tp->snd_nxt = tp->snd_fack; return; } + +/* + * This function returns true if more than (tcprexmtthresh - 1) * SMSS + * bytes with sequence numbers greater than snd_una have been SACKed. + */ +boolean_t +tcp_sack_byte_islost(struct tcpcb *tp) +{ + u_int32_t unacked_bytes, sndhole_bytes = 0; + struct sackhole *sndhole; + if (!SACK_ENABLED(tp) || IN_FASTRECOVERY(tp) || + TAILQ_EMPTY(&tp->snd_holes) || + (tp->t_flagsext & TF_PKTS_REORDERED)) + return (FALSE); + + unacked_bytes = tp->snd_max - tp->snd_una; + + TAILQ_FOREACH(sndhole, &tp->snd_holes, scblink) { + sndhole_bytes += (sndhole->end - sndhole->start); + } + + VERIFY(unacked_bytes >= sndhole_bytes); + return ((unacked_bytes - sndhole_bytes) > + ((tcprexmtthresh - 1) * tp->t_maxseg)); +} diff --git a/bsd/netinet/tcp_subr.c b/bsd/netinet/tcp_subr.c index c9a7a6bb7..e68e181da 100644 --- a/bsd/netinet/tcp_subr.c +++ b/bsd/netinet/tcp_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -90,6 +90,7 @@ #include #include +#include #define tcp_minmssoverload fring #define _IP_VHL @@ -134,6 +135,10 @@ #endif #endif /*IPSEC*/ +#if NECP +#include +#endif /* NECP */ + #undef tcp_minmssoverload #if CONFIG_MACF_NET @@ -150,10 +155,6 @@ extern int tcp_lq_overflow; -/* temporary: for testing */ -#if IPSEC -extern int ipsec_bypass; -#endif extern struct tcptimerlist tcp_timer_list; extern struct tcptailq tcp_tw_tailq; @@ -229,31 +230,16 @@ __private_extern__ int tcp_use_randomport = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, randomize_ports, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_use_randomport, 0, "Randomize TCP port numbers"); -extern struct tcp_cc_algo tcp_cc_newreno; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno_sockets, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcp_cc_newreno.num_sockets, 0, "Number of sockets using newreno"); - -extern struct tcp_cc_algo tcp_cc_ledbat; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_sockets, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcp_cc_ledbat.num_sockets, 0, "Number of sockets using background transport"); - __private_extern__ int tcp_win_scale = 3; SYSCTL_INT(_net_inet_tcp, OID_AUTO, win_scale_factor, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_win_scale, 0, "Window scaling factor"); static void tcp_cleartaocache(void); static void tcp_notify(struct inpcb *, int); -static void tcp_cc_init(void); struct zone *sack_hole_zone; struct zone *tcp_reass_zone; struct zone *tcp_bwmeas_zone; -#if 0 -static unsigned int tcp_mptcp_dsnm_sz; -struct zone *tcp_mptcp_dsnm_zone; -#endif -/* The array containing pointers to currently implemented TCP CC algorithms */ -struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT]; extern int slowlink_wsize; /* window correction for slow links */ extern int path_mtu_discovery; @@ -306,13 +292,11 @@ static lck_grp_t *tcp_uptime_mtx_grp = NULL; /* mutex group definition */ static lck_grp_attr_t *tcp_uptime_mtx_grp_attr = NULL; /* mutex group attributes */ int tcp_notsent_lowat_check(struct socket *so); - int get_inpcb_str_size(void) { return sizeof(struct inpcb); } - int get_tcp_str_size(void) { return sizeof(struct tcpcb); @@ -320,16 +304,42 @@ int get_tcp_str_size(void) int tcp_freeq(struct tcpcb *tp); +static int scale_to_powerof2(int size); + /* - * Initialize TCP congestion control algorithms. - */ + * This helper routine returns one of the following scaled value of size: + * 1. Rounded down power of two value of size if the size value passed as + * argument is not a power of two and the rounded up value overflows. + * OR + * 2. Rounded up power of two value of size if the size value passed as + * argument is not a power of two and the rounded up value does not overflow + * OR + * 3. Same value as argument size if it is already a power of two. + */ +static int scale_to_powerof2(int size) { + /* Handle special case of size = 0 */ + int ret = size ? size : 1; + + if (!powerof2(ret)) { + while(!powerof2(size)) { + /* + * Clear out least significant + * set bit till size is left with + * its highest set bit at which point + * it is rounded down power of two. + */ + size = size & (size -1); + } -void -tcp_cc_init(void) -{ - bzero(&tcp_cc_algo_list, sizeof(tcp_cc_algo_list)); - tcp_cc_algo_list[TCP_CC_ALGO_NEWRENO_INDEX] = &tcp_cc_newreno; - tcp_cc_algo_list[TCP_CC_ALGO_BACKGROUND_INDEX] = &tcp_cc_ledbat; + /* Check for overflow when rounding up */ + if (0 == (size << 1)) { + ret = size; + } else { + ret = size << 1; + } + } + + return ret; } /* @@ -384,10 +394,24 @@ tcp_init(struct protosw *pp, struct domain *dp) /* NOTREACHED */ } + if (tcp_tcbhashsize == 0) { + /* Set to default */ + tcp_tcbhashsize = 512; + } + if (!powerof2(tcp_tcbhashsize)) { - printf("WARNING: TCB hash size not a power of 2\n"); - tcp_tcbhashsize = 512; /* safe default */ + int old_hash_size = tcp_tcbhashsize; + tcp_tcbhashsize = scale_to_powerof2(tcp_tcbhashsize); + /* Lower limit of 16 */ + if (tcp_tcbhashsize < 16) { + tcp_tcbhashsize = 16; + } + printf("WARNING: TCB hash size not a power of 2, " + "scaled from %d to %d.\n", + old_hash_size, + tcp_tcbhashsize); } + tcbinfo.ipi_hashbase = hashinit(tcp_tcbhashsize, M_PCB, &tcbinfo.ipi_hashmask); tcbinfo.ipi_porthashbase = hashinit(tcp_tcbhashsize, M_PCB, &tcbinfo.ipi_porthashmask); @@ -404,9 +428,8 @@ tcp_init(struct protosw *pp, struct domain *dp) zone_change(sack_hole_zone, Z_CALLERACCT, FALSE); zone_change(sack_hole_zone, Z_EXPAND, TRUE); - tcp_reass_maxseg = nmbclusters / 16; str_size = P2ROUNDUP(sizeof(struct tseg_qent), sizeof(u_int64_t)); - tcp_reass_zone = zinit(str_size, (tcp_reass_maxseg + 1) * str_size, + tcp_reass_zone = zinit(str_size, (nmbclusters >> 4) * str_size, 0, "tcp_reass_zone"); if (tcp_reass_zone == NULL) { panic("%s: failed allocating tcp_reass_zone", __func__); @@ -424,6 +447,11 @@ tcp_init(struct protosw *pp, struct domain *dp) zone_change(tcp_bwmeas_zone, Z_CALLERACCT, FALSE); zone_change(tcp_bwmeas_zone, Z_EXPAND, TRUE); + str_size = P2ROUNDUP(sizeof(struct tcp_ccstate), sizeof(u_int64_t)); + tcp_cc_zone = zinit(str_size, 20000 * str_size, 0, "tcp_cc_zone"); + zone_change(tcp_cc_zone, Z_CALLERACCT, FALSE); + zone_change(tcp_cc_zone, Z_EXPAND, TRUE); + #if INET6 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) #else /* INET6 */ @@ -451,8 +479,6 @@ tcp_init(struct protosw *pp, struct domain *dp) if ((tcp_timer_list.mtx = lck_mtx_alloc_init(tcp_timer_list.mtx_grp, tcp_timer_list.mtx_attr)) == NULL) { panic("failed to allocate memory for tcp_timer_list.mtx\n"); }; - tcp_timer_list.fast_quantum = TCP_FASTTIMER_QUANTUM; - tcp_timer_list.slow_quantum = TCP_SLOWTIMER_QUANTUM; if ((tcp_timer_list.call = thread_call_allocate(tcp_run_timerlist, NULL)) == NULL) { panic("failed to allocate call entry 1 in tcp_init\n"); } @@ -465,11 +491,17 @@ tcp_init(struct protosw *pp, struct domain *dp) tcp_uptime_mtx_attr = lck_attr_alloc_init(); tcp_uptime_lock = lck_spin_alloc_init(tcp_uptime_mtx_grp, tcp_uptime_mtx_attr); - /* Initialize TCP congestion control algorithms list */ - tcp_cc_init(); - /* Initialize TCP LRO data structures */ tcp_lro_init(); + + /* + * If more than 60 MB of mbuf pool is available, increase the + * maximum allowed receive and send socket buffer size. + */ + if (nmbclusters > 30720) { + tcp_autorcvbuf_max = 1024 * 1024; + tcp_autosndbuf_max = 1024 * 1024; + } } /* @@ -568,19 +600,10 @@ tcp_maketemplate(tp) * NOTE: If m != NULL, then ti must point to *inside* the mbuf. */ void -tcp_respond( - struct tcpcb *tp, - void *ipgen, - register struct tcphdr *th, - register struct mbuf *m, - tcp_seq ack, - tcp_seq seq, - int flags, - unsigned int ifscope, - unsigned int nocell - ) +tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, + tcp_seq ack, tcp_seq seq, int flags, struct tcp_respond_args *tra) { - register int tlen; + int tlen; int win = 0; struct route *ro = 0; struct route sro; @@ -751,8 +774,14 @@ tcp_respond( if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); #endif + +#if NECP + necp_mark_packet_from_socket(m, tp ? tp->t_inpcb : NULL, 0); +#endif /* NECP */ + #if IPSEC - if (ipsec_bypass == 0 && ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL) != 0) { + if (tp != NULL && tp->t_inpcb->inp_sp != NULL && + ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL) != 0) { m_freem(m); return; } @@ -780,13 +809,17 @@ tcp_respond( #if INET6 if (isipv6) { - struct ip6_out_args ip6oa = { ifscope, { 0 }, + struct ip6_out_args ip6oa = { tra->ifscope, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR, 0 }; - if (ifscope != IFSCOPE_NONE) + if (tra->ifscope != IFSCOPE_NONE) ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; - if (nocell) + if (tra->nocell) ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR; + if (tra->noexpensive) + ip6oa.ip6oa_flags |= IP6OAF_NO_EXPENSIVE; + if (tra->awdl_unrestricted) + ip6oa.ip6oa_flags |= IP6OAF_AWDL_UNRESTRICTED; (void) ip6_output(m, NULL, ro6, IPV6_OUTARGS, NULL, NULL, &ip6oa); @@ -801,13 +834,17 @@ tcp_respond( } else #endif /* INET6 */ { - struct ip_out_args ipoa = { ifscope, { 0 }, + struct ip_out_args ipoa = { tra->ifscope, { 0 }, IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR, 0 }; - if (ifscope != IFSCOPE_NONE) + if (tra->ifscope != IFSCOPE_NONE) ipoa.ipoa_flags |= IPOAF_BOUND_IF; - if (nocell) + if (tra->nocell) ipoa.ipoa_flags |= IPOAF_NO_CELLULAR; + if (tra->noexpensive) + ipoa.ipoa_flags |= IPOAF_NO_EXPENSIVE; + if (tra->awdl_unrestricted) + ipoa.ipoa_flags |= IPOAF_AWDL_UNRESTRICTED; if (ro != &sro) { /* Copy the cached route and take an extra reference */ @@ -883,15 +920,18 @@ tcp_newtcpcb(inp) tp->t_rttmin = tcp_TCPTV_MIN; tp->t_rxtcur = TCPTV_RTOBASE; - /* Initialize congestion control algorithm for this connection - * to newreno by default - */ - tp->tcp_cc_index = TCP_CC_ALGO_NEWRENO_INDEX; - if (CC_ALGO(tp)->init != NULL) { + if (tcp_use_newreno) + /* use newreno by default */ + tp->tcp_cc_index = TCP_CC_ALGO_NEWRENO_INDEX; + else + tp->tcp_cc_index = TCP_CC_ALGO_CUBIC_INDEX; + + tcp_cc_allocate_state(tp); + + if (CC_ALGO(tp)->init != NULL) CC_ALGO(tp)->init(tp); - } - tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->snd_cwnd = TCP_CC_CWND_INIT_BYTES; tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_ssthresh_prev = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->t_rcvtime = tcp_now; @@ -997,7 +1037,7 @@ tcp_close(tp) int dosavessthresh; /* tcp_close was called previously, bail */ - if ( inp->inp_ppcb == NULL) + if (inp->inp_ppcb == NULL) return(NULL); tcp_canceltimers(tp); @@ -1024,10 +1064,6 @@ tcp_close(tp) DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, struct tcpcb *, tp, int32_t, TCPS_CLOSED); - if (CC_ALGO(tp)->cleanup != NULL) { - CC_ALGO(tp)->cleanup(tp); - } - #if INET6 ro = (isipv6 ? (struct route *)&inp->in6p_route : &inp->inp_route); #else @@ -1065,13 +1101,10 @@ tcp_close(tp) #endif /* INET6 */ if (ROUTE_UNUSABLE(ro) || SIN(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) { - if (tp->t_state >= TCPS_CLOSE_WAIT) { - DTRACE_TCP4(state__change, - void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, - TCPS_CLOSING); - tp->t_state = TCPS_CLOSING; - } + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, inp, struct tcpcb *, tp, + int32_t, TCPS_CLOSED); + tp->t_state = TCPS_CLOSED; goto no_valid_rt; } @@ -1179,12 +1212,19 @@ no_valid_rt: #if MPTCP /* Clear MPTCP state */ + if ((so->so_flags & SOF_MPTCP_TRUE) || + (so->so_flags & SOF_MP_SUBFLOW)) { + soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_DELETEOK)); + } tp->t_mpflags = 0; + tp->t_mptcb = NULL; #endif /* MPTCP */ if (so->cached_in_sock_layer) inp->inp_saved_ppcb = (caddr_t) tp; + tp->t_state = TCPS_CLOSED; + /* Issue a wakeup before detach so that we don't miss * a wakeup */ @@ -1194,12 +1234,31 @@ no_valid_rt: * Clean up any LRO state */ if (tp->t_flagsext & TF_LRO_OFFLOADED) { - tcp_lro_remove_state(inp->inp_laddr, inp->inp_faddr, - inp->inp_lport, - inp->inp_fport); + tcp_lro_remove_state(inp->inp_laddr, inp->inp_faddr, + inp->inp_lport, inp->inp_fport); tp->t_flagsext &= ~TF_LRO_OFFLOADED; } - tp->t_state = TCPS_CLOSED; + + /* + * If this is a socket that does not want to wakeup the device + * for it's traffic, the application might need to know that the + * socket is closed, send a notification. + */ + if ((so->so_options & SO_NOWAKEFROMSLEEP) && + inp->inp_state != INPCB_STATE_DEAD && + !(inp->inp_flags2 & INP2_TIMEWAIT)) + socket_post_kev_msg_closed(so); + + if (CC_ALGO(tp)->cleanup != NULL) { + CC_ALGO(tp)->cleanup(tp); + } + + if (tp->t_ccstate != NULL) { + zfree(tcp_cc_zone, tp->t_ccstate); + tp->t_ccstate = NULL; + } + tp->tcp_cc_index = TCP_CC_ALGO_NONE; + #if INET6 if (SOCK_CHECK_DOM(so, PF_INET6)) in6_pcbdetach(inp); @@ -1210,7 +1269,8 @@ no_valid_rt: /* Call soisdisconnected after detach because it might unlock the socket */ soisdisconnected(so); tcpstat.tcps_closed++; - KERNEL_DEBUG(DBG_FNC_TCP_CLOSE | DBG_FUNC_END, tcpstat.tcps_closed,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_TCP_CLOSE | DBG_FUNC_END, + tcpstat.tcps_closed, 0, 0, 0, 0); return(NULL); } @@ -1226,9 +1286,9 @@ tcp_freeq(tp) LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); zfree(tcp_reass_zone, q); - tcp_reass_qsize--; rv = 1; } + tp->t_reassqlen = 0; return (rv); } @@ -1341,16 +1401,16 @@ tcp_bwmeas_free(struct tcpcb* tp) static void tcpcb_to_otcpcb(struct tcpcb *tp, struct otcpcb *otp) { - int i; - - otp->t_segq = (u_int32_t)(uintptr_t)tp->t_segq.lh_first; + otp->t_segq = (uint32_t)VM_KERNEL_ADDRPERM(tp->t_segq.lh_first); otp->t_dupacks = tp->t_dupacks; - for (i = 0; i < TCPT_NTIMERS_EXT; i++) - otp->t_timer[i] = tp->t_timer[i]; - otp->t_inpcb = (_TCPCB_PTR(struct inpcb *))(uintptr_t)tp->t_inpcb; + otp->t_timer[TCPT_REXMT_EXT] = tp->t_timer[TCPT_REXMT]; + otp->t_timer[TCPT_PERSIST_EXT] = tp->t_timer[TCPT_PERSIST]; + otp->t_timer[TCPT_KEEP_EXT] = tp->t_timer[TCPT_KEEP]; + otp->t_timer[TCPT_2MSL_EXT] = tp->t_timer[TCPT_2MSL]; + otp->t_inpcb = (_TCPCB_PTR(struct inpcb *))VM_KERNEL_ADDRPERM(tp->t_inpcb); otp->t_state = tp->t_state; otp->t_flags = tp->t_flags; - otp->t_force = tp->t_force; + otp->t_force = (tp->t_flagsext & TF_FORCE) ? 1 : 0; otp->snd_una = tp->snd_una; otp->snd_max = tp->snd_max; otp->snd_nxt = tp->snd_nxt; @@ -1517,22 +1577,23 @@ tcp_pcblist SYSCTL_HANDLER_ARGS return error; } -SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, +SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); static void tcpcb_to_xtcpcb64(struct tcpcb *tp, struct xtcpcb64 *otp) { - int i; - - otp->t_segq = (u_int32_t)(uintptr_t)tp->t_segq.lh_first; + otp->t_segq = (uint32_t)VM_KERNEL_ADDRPERM(tp->t_segq.lh_first); otp->t_dupacks = tp->t_dupacks; - for (i = 0; i < TCPT_NTIMERS_EXT; i++) - otp->t_timer[i] = tp->t_timer[i]; + otp->t_timer[TCPT_REXMT_EXT] = tp->t_timer[TCPT_REXMT]; + otp->t_timer[TCPT_PERSIST_EXT] = tp->t_timer[TCPT_PERSIST]; + otp->t_timer[TCPT_KEEP_EXT] = tp->t_timer[TCPT_KEEP]; + otp->t_timer[TCPT_2MSL_EXT] = tp->t_timer[TCPT_2MSL]; otp->t_state = tp->t_state; otp->t_flags = tp->t_flags; - otp->t_force = tp->t_force; + otp->t_force = (tp->t_flagsext & TF_FORCE) ? 1 : 0; otp->snd_una = tp->snd_una; otp->snd_max = tp->snd_max; otp->snd_nxt = tp->snd_nxt; @@ -1664,7 +1725,7 @@ tcp_pcblist64 SYSCTL_HANDLER_ARGS bzero(&xt, sizeof(xt)); xt.xt_len = sizeof xt; inpcb_to_xinpcb64(inp, &xt.xt_inpcb); - xt.xt_inpcb.inp_ppcb = (u_int64_t)(uintptr_t)inp->inp_ppcb; + xt.xt_inpcb.inp_ppcb = (uint64_t)VM_KERNEL_ADDRPERM(inp->inp_ppcb); if (inp->inp_ppcb != NULL) tcpcb_to_xtcpcb64((struct tcpcb *)inp->inp_ppcb, &xt); if (inp->inp_socket) @@ -1692,7 +1753,8 @@ tcp_pcblist64 SYSCTL_HANDLER_ARGS return error; } -SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist64, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, tcp_pcblist64, "S,xtcpcb64", "List of active TCP connections"); @@ -1708,15 +1770,17 @@ tcp_pcblist_n SYSCTL_HANDLER_ARGS } -SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist_n, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, tcp_pcblist_n, "S,xtcpcb_n", "List of active TCP connections"); __private_extern__ void -tcp_get_ports_used(uint32_t ifindex, int protocol, uint32_t wildcardok, +tcp_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags, bitstr_t *bitfield) { - inpcb_get_ports_used(ifindex, protocol, wildcardok, bitfield, &tcbinfo); + inpcb_get_ports_used(ifindex, protocol, flags, + bitfield, &tcbinfo); } __private_extern__ uint32_t @@ -2223,10 +2287,12 @@ tcp_rtlookup(inp, input_ifscope) somultipages(inp->inp_socket, (rt->rt_ifp->if_hwassist & IFNET_MULTIPAGES)); tcp_set_tso(tp, rt->rt_ifp); + soif2kcl(inp->inp_socket, + (rt->rt_ifp->if_eflags & IFEF_2KCL)); } /* Note if the peer is local */ - if (rt != NULL && + if (rt != NULL && !(rt->rt_ifp->if_flags & IFF_POINTOPOINT) && (rt->rt_gateway->sa_family == AF_LINK || rt->rt_ifp->if_flags & IFF_LOOPBACK || in_localaddr(inp->inp_faddr))) { @@ -2327,10 +2393,12 @@ tcp_rtlookup6(inp, input_ifscope) somultipages(inp->inp_socket, (rt->rt_ifp->if_hwassist & IFNET_MULTIPAGES)); tcp_set_tso(tp, rt->rt_ifp); + soif2kcl(inp->inp_socket, + (rt->rt_ifp->if_eflags & IFEF_2KCL)); } /* Note if the peer is local */ - if (rt != NULL && + if (rt != NULL && !(rt->rt_ifp->if_flags & IFF_POINTOPOINT) && (IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr) || IN6_IS_ADDR_LINKLOCAL(&inp->in6p_faddr) || rt->rt_gateway->sa_family == AF_LINK || @@ -2532,7 +2600,7 @@ tcp_getlock( */ static void tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb) { - u_int32_t rcvbufinc = tp->t_maxseg << tcp_autorcvbuf_inc_shift; + u_int32_t rcvbufinc = tp->t_maxseg << 4; u_int32_t rcvbuf = sb->sb_hiwat; struct socket *so = tp->t_inpcb->inp_socket; @@ -2547,8 +2615,11 @@ tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb) { tcp_cansbgrow(sb) && (tp->t_flags & TF_SLOWLINK) == 0 && (rcvbuf - sb->sb_cc) < rcvbufinc && - (rcvbuf < tcp_autorcvbuf_max)) { - sbreserve(sb, (sb->sb_hiwat + rcvbufinc)); + rcvbuf < tcp_autorcvbuf_max && + (sb->sb_idealsize > 0 && + sb->sb_hiwat <= (sb->sb_idealsize + rcvbufinc))) { + sbreserve(sb, + min((sb->sb_hiwat + rcvbufinc), tcp_autorcvbuf_max)); } } @@ -2559,6 +2630,7 @@ tcp_sbspace(struct tcpcb *tp) u_int32_t rcvbuf = sb->sb_hiwat; int32_t space; struct socket *so = tp->t_inpcb->inp_socket; + int32_t pending = 0; /* * If message delivery is enabled, do not count @@ -2577,6 +2649,15 @@ tcp_sbspace(struct tcpcb *tp) if (space < 0) space = 0; +#if CONTENT_FILTER + /* Compensate for data being processed by content filters */ + pending = cfil_sock_data_space(sb); +#endif /* CONTENT_FILTER */ + if (pending > space) + space = 0; + else + space -= pending; + /* Avoid increasing window size if the current window * is already very low, we could be in "persist" mode and * we could break some apps (see rdar://5409343) @@ -2593,11 +2674,16 @@ tcp_sbspace(struct tcpcb *tp) return space; } /* - * Checks TCP Segment Offloading capability for a given connection and interface pair. + * Checks TCP Segment Offloading capability for a given connection + * and interface pair. */ void tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp) { +#if INET6 + struct inpcb *inp; + int isipv6; +#endif /* INET6 */ #if MPTCP /* * We can't use TSO if this tcpcb belongs to an MPTCP session. @@ -2608,8 +2694,8 @@ tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp) } #endif #if INET6 - struct inpcb *inp = tp->t_inpcb; - int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; + inp = tp->t_inpcb; + isipv6 = (inp->inp_vflag & INP_IPV6) != 0; if (isipv6) { if (ifp && (ifp->if_hwassist & IFNET_TSO_IPV6)) { @@ -2668,10 +2754,10 @@ calculate_tcp_clock() /* time to update the clock */ lck_spin_lock(tcp_uptime_lock); if (timevalcmp(&tcp_uptime, &now, >=)) { - /* clock got updated while we were waiting for the lock */ + /* clock got updated while waiting for the lock */ lck_spin_unlock(tcp_uptime_lock); return; - } + } microuptime(&now); hold_now = now; diff --git a/bsd/netinet/tcp_timer.c b/bsd/netinet/tcp_timer.c index b1ac3138b..aa2317164 100644 --- a/bsd/netinet/tcp_timer.c +++ b/bsd/netinet/tcp_timer.c @@ -79,6 +79,7 @@ #include #include +#include #include #include @@ -104,11 +105,6 @@ #include #include -extern void postevent(struct socket *, struct sockbuf *, - int); -#define DBG_FNC_TCP_FAST NETDBG_CODE(DBG_NETTCP, (5 << 8)) -#define DBG_FNC_TCP_SLOW NETDBG_CODE(DBG_NETTCP, (5 << 8) | 1) - #define TIMERENTRY_TO_TP(te) ((struct tcpcb *)((uintptr_t)te - offsetof(struct tcpcb, tentry.le.le_next))) #define VERIFY_NEXT_LINK(elm,field) do { \ @@ -123,16 +119,24 @@ extern void postevent(struct socket *, struct sockbuf *, panic("Bad link elm %p prev->next != elm", (elm)); \ } while(0) +#define TCP_SET_TIMER_MODE(mode, i) do { \ + if (IS_TIMER_HZ_10MS(i)) \ + (mode) |= TCP_TIMERLIST_10MS_MODE; \ + else if (IS_TIMER_HZ_100MS(i)) \ + (mode) |= TCP_TIMERLIST_100MS_MODE; \ + else \ + (mode) |= TCP_TIMERLIST_500MS_MODE; \ +} while(0) + +/* Max number of times a stretch ack can be delayed on a connection */ +#define TCP_STRETCHACK_DELAY_THRESHOLD 5 + /* tcp timer list */ struct tcptimerlist tcp_timer_list; /* List of pcbs in timewait state, protected by tcbinfo's ipi_lock */ struct tcptailq tcp_tw_tailq; -static int background_io_trigger = 5; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_io_trigger, CTLFLAG_RW | CTLFLAG_LOCKED, - &background_io_trigger, 0, "Background IO Trigger Setting"); - static int sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS { @@ -155,45 +159,59 @@ sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS } int tcp_keepinit; -SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); int tcp_keepidle; -SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); int tcp_keepintvl; -SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); int tcp_keepcnt; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_keepcnt, 0, "number of times to repeat keepalive"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_keepcnt, 0, "number of times to repeat keepalive"); int tcp_msl; -SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); /* - * Avoid DoS via TCP Robustness in Persist Condition (see http://www.ietf.org/id/draft-ananth-tcpm-persist-02.txt) - * by allowing a system wide maximum persistence timeout value when in Zero Window Probe mode. - * Expressed in milliseconds to be consistent without timeout related values, the TCP socket option is in seconds. + * Avoid DoS via TCP Robustness in Persist Condition + * (see http://www.ietf.org/id/draft-ananth-tcpm-persist-02.txt) + * by allowing a system wide maximum persistence timeout value when in + * Zero Window Probe mode. + * + * Expressed in milliseconds to be consistent without timeout related + * values, the TCP socket option is in seconds. */ u_int32_t tcp_max_persist_timeout = 0; -SYSCTL_PROC(_net_inet_tcp, OID_AUTO, max_persist_timeout, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_max_persist_timeout, 0, sysctl_msec_to_ticks, "I", "Maximum persistence timout for ZWP"); +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, max_persist_timeout, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_max_persist_timeout, 0, sysctl_msec_to_ticks, "I", + "Maximum persistence timeout for ZWP"); static int always_keepalive = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW | CTLFLAG_LOCKED, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, + CTLFLAG_RW | CTLFLAG_LOCKED, &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); -/* This parameter determines how long the timer list will stay in fast mode even - * though all connections are idle. In fast mode, the timer will fire more frequently - * anticipating new data. +/* + * This parameter determines how long the timer list will stay in fast or + * quick mode even though all connections are idle. In this state, the + * timer will run more frequently anticipating new data. */ -int timer_fastmode_idlemax = TCP_FASTMODE_IDLEGEN_MAX; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_fastmode_idlemax, CTLFLAG_RW | CTLFLAG_LOCKED, - &timer_fastmode_idlemax, 0, "Maximum idle generations in fast mode"); +int timer_fastmode_idlemax = TCP_FASTMODE_IDLERUN_MAX; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_fastmode_idlemax, + CTLFLAG_RW | CTLFLAG_LOCKED, + &timer_fastmode_idlemax, 0, "Maximum idle generations in fast mode"); /* * See tcp_syn_backoff[] for interval values between SYN retransmits; @@ -203,42 +221,49 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_fastmode_idlemax, CTLFLAG_RW | CTLFLAG * two options. */ static int tcp_broken_peer_syn_rxmit_thres = 7; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rxmit_thres, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_broken_peer_syn_rxmit_thres, 0, "Number of retransmitted SYNs before " +SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rxmit_thres, + CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_broken_peer_syn_rxmit_thres, 0, + "Number of retransmitted SYNs before " "TCP disables rfc1323 and rfc1644 during the rest of attempts"); /* A higher threshold on local connections for disabling RFC 1323 options */ static int tcp_broken_peer_syn_rxmit_thres_local = 10; SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rexmit_thres_local, - CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_broken_peer_syn_rxmit_thres_local, 0, - "Number of retransmitted SYNs before disabling RFC 1323 options on local connections"); + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_broken_peer_syn_rxmit_thres_local, 0, + "Number of retransmitted SYNs before disabling RFC 1323 " + "options on local connections"); static int tcp_timer_advanced = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_timer_advanced, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcp_timer_advanced, 0, "Number of times one of the timers was advanced"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_timer_advanced, + CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_timer_advanced, 0, + "Number of times one of the timers was advanced"); static int tcp_resched_timerlist = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_resched_timerlist, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcp_resched_timerlist, 0, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_resched_timerlist, + CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_resched_timerlist, 0, "Number of times timer list was rescheduled as part of processing a packet"); int tcp_pmtud_black_hole_detect = 1 ; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_pmtud_black_hole_detect, 0, "Path MTU Discovery Black Hole Detection"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_pmtud_black_hole_detect, 0, + "Path MTU Discovery Black Hole Detection"); int tcp_pmtud_black_hole_mss = 1200 ; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_pmtud_black_hole_mss, 0, "Path MTU Discovery Black Hole Detection lowered MSS"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_pmtud_black_hole_mss, 0, + "Path MTU Discovery Black Hole Detection lowered MSS"); /* performed garbage collection of "used" sockets */ static boolean_t tcp_gc_done = FALSE; - /* max idle probes */ +/* max idle probes */ int tcp_maxpersistidle; -/* TCP delack timer is set to 100 ms. Since the processing of timer list in fast - * mode will happen no faster than 100 ms, the delayed ack timer will fire some where - * between 100 and 200 ms. +/* + * TCP delack timer is set to 100 ms. Since the processing of timer list + * in fast mode will happen no faster than 100 ms, the delayed ack timer + * will fire some where between 100 and 200 ms. */ int tcp_delack = TCP_RETRANSHZ / 10; @@ -249,45 +274,37 @@ int tcp_delack = TCP_RETRANSHZ / 10; int tcp_jack_rxmt = TCP_RETRANSHZ / 2; #endif /* MPTCP */ -/* The frequency of running through the TCP timer list in - * fast and slow mode can be configured. - */ -SYSCTL_UINT(_net_inet_tcp, OID_AUTO, timer_fastquantum, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_timer_list.fast_quantum, TCP_FASTTIMER_QUANTUM, - "Frequency of running timer list in fast mode"); - -SYSCTL_UINT(_net_inet_tcp, OID_AUTO, timer_slowquantum, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_timer_list.slow_quantum, TCP_SLOWTIMER_QUANTUM, - "Frequency of running timer list in slow mode"); - static void tcp_remove_timer(struct tcpcb *tp); static void tcp_sched_timerlist(uint32_t offset); -static uint32_t tcp_run_conn_timer(struct tcpcb *tp, uint16_t *next_index); +static u_int32_t tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *mode); static void tcp_sched_timers(struct tcpcb *tp); static inline void tcp_set_lotimer_index(struct tcpcb *); static void tcp_rexmt_save_state(struct tcpcb *tp); -void tcp_remove_from_time_wait(struct inpcb *inp); +__private_extern__ void tcp_remove_from_time_wait(struct inpcb *inp); +__private_extern__ void tcp_report_stats(void); -/* Macro to compare two timers. If there is a reset of the sign bit, it is - * safe to assume that the timer has wrapped around. By doing signed comparision, - * we take care of wrap around such that the value with the sign bit reset is - * actually ahead of the other. +/* + * Macro to compare two timers. If there is a reset of the sign bit, + * it is safe to assume that the timer has wrapped around. By doing + * signed comparision, we take care of wrap around such that the value + * with the sign bit reset is actually ahead of the other. */ - -static inline int32_t +inline int32_t timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2) { return (int32_t)((t1 + toff1) - (t2 + toff2)); }; +static u_int64_t tcp_last_report_time; +#define TCP_REPORT_STATS_INTERVAL 345600 /* 4 days, in seconds */ + /* Returns true if the timer is on the timer list */ #define TIMER_IS_ON_LIST(tp) ((tp)->t_flags & TF_TIMER_ONLIST) /* Run the TCP timerlist atleast once every hour */ -#define TCP_TIMERLIST_MAX_OFFSET (60 * 60 * TCP_RETRANSHZ) +#define TCP_TIMERLIST_MAX_OFFSET (60 * 60 * TCP_RETRANSHZ) -static void add_to_time_wait_locked(struct tcpcb *tp, uint32_t delay); -void add_to_time_wait(struct tcpcb *tp, uint32_t delay) ; +static void add_to_time_wait_locked(struct tcpcb *tp, uint32_t delay); static boolean_t tcp_garbage_collect(struct inpcb *, int); /* @@ -330,6 +347,8 @@ void add_to_time_wait(struct tcpcb *tp, uint32_t delay) { struct inpcbinfo *pcbinfo = &tcbinfo; + if (tp->t_inpcb->inp_socket->so_options & SO_NOWAKEFROMSLEEP) + socket_post_kev_msg_closed(tp->t_inpcb->inp_socket); if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { tcp_unlock(tp->t_inpcb->inp_socket, 0, 0); @@ -608,6 +627,30 @@ static void tcp_rexmt_save_state(struct tcpcb *tp) tp->t_flagsext &= ~(TF_RECOMPUTE_RTT); } +/* + * Revert to the older segment size if there is an indication that PMTU + * blackhole detection was not needed. + */ +void tcp_pmtud_revert_segment_size(struct tcpcb *tp) +{ + int32_t optlen; + + VERIFY(tp->t_pmtud_saved_maxopd > 0); + tp->t_flags |= TF_PMTUD; + tp->t_flags &= ~TF_BLACKHOLE; + optlen = tp->t_maxopd - tp->t_maxseg; + tp->t_maxopd = tp->t_pmtud_saved_maxopd; + tp->t_maxseg = tp->t_maxopd - optlen; + /* + * Reset the slow-start flight size as it + * may depend on the new MSS + */ + if (CC_ALGO(tp)->cwnd_init != NULL) + CC_ALGO(tp)->cwnd_init(tp); + tp->t_pmtud_start_ts = 0; + tcpstat.tcps_pmtudbh_reverted++; +} + /* * TCP timer processing. */ @@ -616,12 +659,9 @@ tcp_timers(tp, timer) register struct tcpcb *tp; int timer; { - register int rexmt; + int32_t rexmt, optlen = 0, idle_time = 0; struct socket *so; struct tcptemp *t_template; - int optlen = 0; - int idle_time = 0; - #if TCPDEBUG int ostate; #endif @@ -662,20 +702,24 @@ tcp_timers(tp, timer) * to a longer retransmit interval and retransmit one segment. */ case TCPT_REXMT: - /* Drop a connection in the retransmit timer - * 1. If we have retransmitted more than TCP_MAXRXTSHIFT times - * 2. If the time spent in this retransmission episode is more than - * the time limit set with TCP_RXT_CONNDROPTIME socket option - * 3. If TCP_RXT_FINDROP socket option was set and we have already - * retransmitted the FIN 3 times without receiving an ack + /* + * Drop a connection in the retransmit timer + * 1. If we have retransmitted more than TCP_MAXRXTSHIFT + * times + * 2. If the time spent in this retransmission episode is + * more than the time limit set with TCP_RXT_CONNDROPTIME + * socket option + * 3. If TCP_RXT_FINDROP socket option was set and + * we have already retransmitted the FIN 3 times without + * receiving an ack */ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT || - (tp->t_rxt_conndroptime > 0 && tp->t_rxtstart > 0 && - (tcp_now - tp->t_rxtstart) >= tp->t_rxt_conndroptime) || - ((tp->t_flagsext & TF_RXTFINDROP) != 0 && + (tp->t_rxt_conndroptime > 0 + && tp->t_rxtstart > 0 && + (tcp_now - tp->t_rxtstart) >= tp->t_rxt_conndroptime) + || ((tp->t_flagsext & TF_RXTFINDROP) != 0 && (tp->t_flags & TF_SENTFIN) != 0 && tp->t_rxtshift >= 4)) { - if ((tp->t_flagsext & TF_RXTFINDROP) != 0) { tcpstat.tcps_rxtfindrop++; } else { @@ -706,7 +750,7 @@ tcp_timers(tp, timer) tcp_rexmt_save_state(tp); } #if MPTCP - if ((tp->t_rxtshift == mptcp_fail_thresh) && + if ((tp->t_rxtshift >= mptcp_fail_thresh) && (tp->t_state == TCPS_ESTABLISHED) && (tp->t_mpflags & TMPF_MPTCP_TRUE)) { mptcp_act_on_txfail(so); @@ -723,12 +767,34 @@ tcp_timers(tp, timer) SO_FILT_HINT_ADAPTIVE_WTIMO)); } + /* + * If this is a retransmit timeout after PTO, the PTO + * was not effective + */ + if (tp->t_flagsext & TF_SENT_TLPROBE) { + tp->t_flagsext &= ~(TF_SENT_TLPROBE); + tcpstat.tcps_rto_after_pto++; + } + + if (tp->t_flagsext & TF_DELAY_RECOVERY) { + /* + * Retransmit timer fired before entering recovery + * on a connection with packet re-ordering. This + * suggests that the reordering metrics computed + * are not accurate. + */ + tp->t_reorderwin = 0; + tp->t_timer[TCPT_DELAYFR] = 0; + tp->t_flagsext &= ~(TF_DELAY_RECOVERY); + } + if (tp->t_state == TCPS_SYN_SENT) { rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; tp->t_stat.synrxtshift = tp->t_rxtshift; - } - else + } else { rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; + } + TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin, TCPTV_REXMTMAX, TCP_ADD_REXMTSLOP(tp)); @@ -739,16 +805,19 @@ tcp_timers(tp, timer) tcp_free_sackholes(tp); /* - * Check for potential Path MTU Discovery Black Hole + * Check for potential Path MTU Discovery Black Hole */ - - if (tcp_pmtud_black_hole_detect && (tp->t_state == TCPS_ESTABLISHED)) { - if (((tp->t_flags & (TF_PMTUD|TF_MAXSEGSNT)) == (TF_PMTUD|TF_MAXSEGSNT)) && + if (tcp_pmtud_black_hole_detect && + !(tp->t_flagsext & TF_NOBLACKHOLE_DETECTION) && + (tp->t_state == TCPS_ESTABLISHED)) { + if (((tp->t_flags & (TF_PMTUD|TF_MAXSEGSNT)) + == (TF_PMTUD|TF_MAXSEGSNT)) && (tp->t_rxtshift == 2)) { /* * Enter Path MTU Black-hole Detection mechanism: * - Disable Path MTU Discovery (IP "DF" bit). - * - Reduce MTU to lower value than what we negociated with peer. + * - Reduce MTU to lower value than what we + * negotiated with the peer. */ /* Disable Path MTU Discovery for now */ tp->t_flags &= ~TF_PMTUD; @@ -757,6 +826,9 @@ tcp_timers(tp, timer) optlen = tp->t_maxopd - tp->t_maxseg; /* Keep track of previous MSS */ tp->t_pmtud_saved_maxopd = tp->t_maxopd; + tp->t_pmtud_start_ts = tcp_now; + if (tp->t_pmtud_start_ts == 0) + tp->t_pmtud_start_ts++; /* Reduce the MSS to intermediary value */ if (tp->t_maxopd > tcp_pmtud_black_hole_mss) { tp->t_maxopd = tcp_pmtud_black_hole_mss; @@ -777,35 +849,28 @@ tcp_timers(tp, timer) CC_ALGO(tp)->cwnd_init(tp); } /* - * If further retransmissions are still unsuccessful with a lowered MTU, - * maybe this isn't a Black Hole and we restore the previous MSS and - * blackhole detection flags. + * If further retransmissions are still + * unsuccessful with a lowered MTU, maybe this + * isn't a Black Hole and we restore the previous + * MSS and blackhole detection flags. */ else { - if ((tp->t_flags & TF_BLACKHOLE) && (tp->t_rxtshift > 4)) { - tp->t_flags |= TF_PMTUD; - tp->t_flags &= ~TF_BLACKHOLE; - optlen = tp->t_maxopd - tp->t_maxseg; - tp->t_maxopd = tp->t_pmtud_saved_maxopd; - tp->t_maxseg = tp->t_maxopd - optlen; - /* - * Reset the slow-start flight size as it - * may depend on the new MSS - */ - if (CC_ALGO(tp)->cwnd_init != NULL) - CC_ALGO(tp)->cwnd_init(tp); + if ((tp->t_flags & TF_BLACKHOLE) && + (tp->t_rxtshift > 4)) { + tcp_pmtud_revert_segment_size(tp); } } } /* - * Disable rfc1323 and rfc1644 if we haven't got any response to - * our SYN (after we reach the threshold) to work-around some - * broken terminal servers (most of which have hopefully been - * retired) that have bad VJ header compression code which - * trashes TCP segments containing unknown-to-them TCP options. + * Disable rfc1323 and rfc1644 if we haven't got any + * response to our SYN (after we reach the threshold) + * to work-around some broken terminal servers (most of + * which have hopefully been retired) that have bad VJ + * header compression code which trashes TCP segments + * containing unknown-to-them TCP options. * Do this only on non-local connections. */ if (tp->t_state == TCPS_SYN_SENT && @@ -843,14 +908,16 @@ tcp_timers(tp, timer) * Force a segment to be sent. */ tp->t_flags |= TF_ACKNOW; - /* - * If timing a segment in this window, stop the timer. - */ + + /* If timing a segment in this window, stop the timer */ tp->t_rtttime = 0; - EXIT_FASTRECOVERY(tp); + if (!IN_FASTRECOVERY(tp) && tp->t_rxtshift == 1) + tcpstat.tcps_tailloss_rto++; + - /* RFC 5681 says: when a TCP sender detects segment loss + /* + * RFC 5681 says: when a TCP sender detects segment loss * using retransmit timer and the given segment has already * been retransmitted by way of the retransmission timer at * least once, the value of ssthresh is held constant @@ -859,6 +926,7 @@ tcp_timers(tp, timer) CC_ALGO(tp)->after_timeout != NULL) CC_ALGO(tp)->after_timeout(tp); + EXIT_FASTRECOVERY(tp); /* CWR notifications are to be sent on new data right after * RTOs, Fast Retransmits and ECE notification receipts. @@ -867,9 +935,7 @@ tcp_timers(tp, timer) tp->ecn_flags |= TE_SENDCWR; } fc_output: - DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb, - struct tcpcb *, tp, struct tcphdr *, NULL, - int32_t, TCP_CC_REXMT_TIMEOUT); + tcp_ccdbg_trace(tp, NULL, TCP_CC_REXMT_TIMEOUT); (void) tcp_output(tp); break; @@ -904,9 +970,9 @@ fc_output: break; } tcp_setpersist(tp); - tp->t_force = 1; + tp->t_flagsext |= TF_FORCE; (void) tcp_output(tp); - tp->t_force = 0; + tp->t_flagsext &= ~TF_FORCE; break; /* @@ -921,8 +987,8 @@ fc_output: * MPTCP must not also, after sending Data FINs. */ struct mptcb *mp_tp = tp->t_mptcb; - if ((tp->t_mpflags & TMPF_MPTCP_TRUE) && - (mp_tp == NULL)) { + if ((tp->t_mpflags & TMPF_MPTCP_TRUE) && + (tp->t_state > TCPS_ESTABLISHED)) { goto dropit; } else if (mp_tp != NULL) { if ((mptcp_ok_to_keepalive(mp_tp) == 0)) @@ -952,24 +1018,20 @@ fc_output: tcpstat.tcps_keepprobe++; t_template = tcp_maketemplate(tp); if (t_template) { - unsigned int ifscope, nocell = 0; + struct inpcb *inp = tp->t_inpcb; + struct tcp_respond_args tra; + bzero(&tra, sizeof(tra)); + tra.nocell = INP_NO_CELLULAR(inp); + tra.noexpensive = INP_NO_EXPENSIVE(inp); + tra.awdl_unrestricted = INP_AWDL_UNRESTRICTED(inp); if (tp->t_inpcb->inp_flags & INP_BOUND_IF) - ifscope = tp->t_inpcb->inp_boundifp->if_index; + tra.ifscope = tp->t_inpcb->inp_boundifp->if_index; else - ifscope = IFSCOPE_NONE; - - /* - * If the socket isn't allowed to use the - * cellular interface, indicate it as such. - */ - if (tp->t_inpcb->inp_flags & INP_NO_IFT_CELLULAR) - nocell = 1; - + tra.ifscope = IFSCOPE_NONE; tcp_respond(tp, t_template->tt_ipgen, &t_template->tt_t, (struct mbuf *)NULL, - tp->rcv_nxt, tp->snd_una - 1, 0, ifscope, - nocell); + tp->rcv_nxt, tp->snd_una - 1, 0, &tra); (void) m_free(dtom(t_template)); if (tp->t_flagsext & TF_DETECT_READSTALL) tp->t_rtimo_probes++; @@ -1006,16 +1068,39 @@ fc_output: tp->t_timer[TCPT_DELACK] = 0; tp->t_flags |= TF_ACKNOW; - /* If delayed ack timer fired while stretching acks - * go back to acking every other packet + /* + * If delayed ack timer fired while stretching + * acks, count the number of times the streaming + * detection was not correct. If this exceeds a + * threshold, disable strech ack on this + * connection + * + * Also, go back to acking every other packet. */ - if ((tp->t_flags & TF_STRETCHACK) != 0) + if ((tp->t_flags & TF_STRETCHACK)) { + if (tp->t_unacksegs > 1 && + tp->t_unacksegs < maxseg_unacked) + tp->t_stretchack_delayed++; + + if (tp->t_stretchack_delayed > + TCP_STRETCHACK_DELAY_THRESHOLD) { + tp->t_flagsext |= TF_DISABLE_STRETCHACK; + /* + * Note the time at which stretch + * ack was disabled automatically + */ + tp->rcv_nostrack_ts = tcp_now; + tcpstat.tcps_nostretchack++; + tp->t_stretchack_delayed = 0; + } tcp_reset_stretch_ack(tp); + } - /* If we are measuring inter packet arrival jitter for - * throttling a connection, this delayed ack might be - * the reason for accumulating some jitter. So let's - * restart the measurement. + /* + * If we are measuring inter packet arrival jitter + * for throttling a connection, this delayed ack + * might be the reason for accumulating some + * jitter. So let's restart the measurement. */ CLEAR_IAJ_STATE(tp); @@ -1051,11 +1136,110 @@ fc_output: break; #endif /* MPTCP */ -#if TCPDEBUG - if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) - tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, - PRU_SLOWTIMO); -#endif + case TCPT_PTO: + { + tcp_seq old_snd_nxt; + int32_t snd_len; + boolean_t rescue_rxt = FALSE; + + tp->t_flagsext &= ~(TF_SENT_TLPROBE); + + /* + * Check if the connection is in the right state to + * send a probe + */ + if (tp->t_state != TCPS_ESTABLISHED || + tp->t_rxtshift > 0 || tp->snd_max == tp->snd_una || + !SACK_ENABLED(tp) || TAILQ_EMPTY(&tp->snd_holes) || + (IN_FASTRECOVERY(tp) && + (SEQ_GEQ(tp->snd_fack, tp->snd_recover) || + SEQ_GT(tp->snd_nxt, tp->sack_newdata)))) + break; + + tcpstat.tcps_pto++; + + /* If timing a segment in this window, stop the timer */ + tp->t_rtttime = 0; + + if (IN_FASTRECOVERY(tp)) { + /* + * Send a probe to detect tail loss in a + * recovery window when the connection is in + * fast_recovery. + */ + old_snd_nxt = tp->snd_nxt; + rescue_rxt = TRUE; + VERIFY(SEQ_GEQ(tp->snd_fack, tp->snd_una)); + snd_len = min((tp->snd_recover - tp->snd_fack), + tp->t_maxseg); + tp->snd_nxt = tp->snd_recover - snd_len; + tcpstat.tcps_pto_in_recovery++; + tcp_ccdbg_trace(tp, NULL, TCP_CC_TLP_IN_FASTRECOVERY); + } else { + /* + * If there is no new data to send or if the + * connection is limited by receive window then + * retransmit the last segment, otherwise send + * new data. + */ + snd_len = min(so->so_snd.sb_cc, tp->snd_wnd) + - (tp->snd_max - tp->snd_una); + if (snd_len > 0) { + tp->snd_nxt = tp->snd_max; + } else { + snd_len = min((tp->snd_max - tp->snd_una), + tp->t_maxseg); + tp->snd_nxt = tp->snd_max - snd_len; + } + } + + /* Note that tail loss probe is being sent */ + tp->t_flagsext |= TF_SENT_TLPROBE; + tp->t_tlpstart = tcp_now; + + tp->snd_cwnd += tp->t_maxseg; + (void )tcp_output(tp); + tp->snd_cwnd -= tp->t_maxseg; + + tp->t_tlphighrxt = tp->snd_nxt; + + /* + * If a tail loss probe was sent after entering recovery, + * restore the old snd_nxt value so that other packets + * will get retransmitted correctly. + */ + if (rescue_rxt) + tp->snd_nxt = old_snd_nxt; + break; + } + case TCPT_DELAYFR: + tp->t_flagsext &= ~TF_DELAY_RECOVERY; + + /* + * Don't do anything if one of the following is true: + * - the connection is already in recovery + * - sequence until snd_recover has been acknowledged. + * - retransmit timeout has fired + */ + if (IN_FASTRECOVERY(tp) || + SEQ_GEQ(tp->snd_una, tp->snd_recover) || + tp->t_rxtshift > 0) + break; + + VERIFY(SACK_ENABLED(tp)); + if (CC_ALGO(tp)->pre_fr != NULL) + CC_ALGO(tp)->pre_fr(tp); + ENTER_FASTRECOVERY(tp); + if ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON) + tp->ecn_flags |= TE_SENDCWR; + + tp->t_timer[TCPT_REXMT] = 0; + tcpstat.tcps_sack_recovery_episode++; + tp->sack_newdata = tp->snd_nxt; + tp->snd_cwnd = tp->t_maxseg; + tcp_ccdbg_trace(tp, NULL, TCP_CC_ENTER_FASTRECOVERY); + (void) tcp_output(tp); + break; dropit: tcpstat.tcps_keepdrops++; postevent(so, 0, EV_TIMEOUT); @@ -1064,6 +1248,11 @@ fc_output: tp = tcp_drop(tp, ETIMEDOUT); break; } +#if TCPDEBUG + if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, + PRU_SLOWTIMO); +#endif return (tp); } @@ -1098,26 +1287,25 @@ tcp_remove_timer(struct tcpcb *tp) lck_mtx_unlock(listp->mtx); } -/* Function to check if the timerlist needs to be rescheduled to run +/* + * Function to check if the timerlist needs to be rescheduled to run * the timer entry correctly. Basically, this is to check if we can avoid * taking the list lock. */ static boolean_t -need_to_resched_timerlist(uint32_t runtime, uint16_t index) { +need_to_resched_timerlist(u_int32_t runtime, u_int16_t mode) +{ struct tcptimerlist *listp = &tcp_timer_list; int32_t diff; - boolean_t is_fast; - - if (index == TCPT_NONE) - return FALSE; - is_fast = !(IS_TIMER_SLOW(index)); - /* If the list is being processed then the state of the list is in flux. - * In this case always acquire the lock and set the state correctly. + /* + * If the list is being processed then the state of the list is + * in flux. In this case always acquire the lock and set the state + * correctly. */ if (listp->running) - return TRUE; + return (TRUE); if (!listp->scheduled) return (TRUE); @@ -1125,17 +1313,20 @@ need_to_resched_timerlist(uint32_t runtime, uint16_t index) { diff = timer_diff(listp->runtime, 0, runtime, 0); if (diff <= 0) { /* The list is going to run before this timer */ - return FALSE; + return (FALSE); } else { - if (is_fast) { - if (diff <= listp->fast_quantum) - return FALSE; + if (mode & TCP_TIMERLIST_10MS_MODE) { + if (diff <= TCP_TIMER_10MS_QUANTUM) + return (FALSE); + } else if (mode & TCP_TIMERLIST_100MS_MODE) { + if (diff <= TCP_TIMER_100MS_QUANTUM) + return (FALSE); } else { - if (diff <= listp->slow_quantum) - return FALSE; + if (diff <= TCP_TIMER_500MS_QUANTUM) + return (FALSE); } } - return TRUE; + return (TRUE); } void @@ -1149,39 +1340,46 @@ tcp_sched_timerlist(uint32_t offset) offset = min(offset, TCP_TIMERLIST_MAX_OFFSET); listp->runtime = tcp_now + offset; - if (listp->runtime == 0) + if (listp->runtime == 0) { listp->runtime++; + offset++; + } - clock_interval_to_deadline(offset, NSEC_PER_SEC / TCP_RETRANSHZ, - &deadline); + clock_interval_to_deadline(offset, USEC_PER_SEC, &deadline); thread_call_enter_delayed(listp->call, deadline); listp->scheduled = TRUE; } -/* Function to run the timers for a connection. +/* + * Function to run the timers for a connection. * * Returns the offset of next timer to be run for this connection which * can be used to reschedule the timerlist. + * + * te_mode is an out parameter that indicates the modes of active + * timers for this connection. */ -uint32_t -tcp_run_conn_timer(struct tcpcb *tp, uint16_t *next_index) { +u_int32_t +tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *te_mode) { - struct socket *so; - uint16_t i = 0, index = TCPT_NONE, lo_index = TCPT_NONE; - uint32_t timer_val, offset = 0, lo_timer = 0; + struct socket *so; + u_int16_t i = 0, index = TCPT_NONE, lo_index = TCPT_NONE; + u_int32_t timer_val, offset = 0, lo_timer = 0; int32_t diff; boolean_t needtorun[TCPT_NTIMERS]; int count = 0; - VERIFY(tp != NULL); - bzero(needtorun, sizeof(needtorun)); + VERIFY(tp != NULL); + bzero(needtorun, sizeof(needtorun)); + *te_mode = 0; - tcp_lock(tp->t_inpcb->inp_socket, 1, 0); + tcp_lock(tp->t_inpcb->inp_socket, 1, 0); - so = tp->t_inpcb->inp_socket; + so = tp->t_inpcb->inp_socket; /* Release the want count on inp */ - if (in_pcb_checkstate(tp->t_inpcb, WNT_RELEASE, 1) == WNT_STOPUSING) { + if (in_pcb_checkstate(tp->t_inpcb, WNT_RELEASE, 1) + == WNT_STOPUSING) { if (TIMER_IS_ON_LIST(tp)) { tcp_remove_timer(tp); } @@ -1192,19 +1390,21 @@ tcp_run_conn_timer(struct tcpcb *tp, uint16_t *next_index) { goto done; } - /* Since the timer thread needs to wait for tcp lock, it may race - * with another thread that can cancel or reschedule the timer that is - * about to run. Check if we need to run anything. - */ + /* + * Since the timer thread needs to wait for tcp lock, it may race + * with another thread that can cancel or reschedule the timer + * that is about to run. Check if we need to run anything. + */ if ((index = tp->tentry.index) == TCPT_NONE) goto done; + timer_val = tp->t_timer[index]; diff = timer_diff(tp->tentry.runtime, 0, tcp_now, 0); if (diff > 0) { if (tp->tentry.index != TCPT_NONE) { offset = diff; - *(next_index) = tp->tentry.index; + *(te_mode) = tp->tentry.mode; } goto done; } @@ -1212,18 +1412,20 @@ tcp_run_conn_timer(struct tcpcb *tp, uint16_t *next_index) { tp->t_timer[index] = 0; if (timer_val > 0) { tp = tcp_timers(tp, index); - if (tp == NULL) + if (tp == NULL) goto done; } - /* Check if there are any other timers that need to be run. While doing it, - * adjust the timer values wrt tcp_now. + /* + * Check if there are any other timers that need to be run. + * While doing it, adjust the timer values wrt tcp_now. */ + tp->tentry.mode = 0; for (i = 0; i < TCPT_NTIMERS; ++i) { if (tp->t_timer[i] != 0) { - diff = timer_diff(tp->tentry.timer_start, tp->t_timer[i], tcp_now, 0); + diff = timer_diff(tp->tentry.timer_start, + tp->t_timer[i], tcp_now, 0); if (diff <= 0) { - tp->t_timer[i] = 0; needtorun[i] = TRUE; count++; } else { @@ -1233,27 +1435,31 @@ tcp_run_conn_timer(struct tcpcb *tp, uint16_t *next_index) { lo_timer = diff; lo_index = i; } + TCP_SET_TIMER_MODE(tp->tentry.mode, i); } } } tp->tentry.timer_start = tcp_now; tp->tentry.index = lo_index; - if (lo_index != TCPT_NONE) { - tp->tentry.runtime = tp->tentry.timer_start + tp->t_timer[lo_index]; + VERIFY(tp->tentry.index == TCPT_NONE || tp->tentry.mode > 0); + + if (tp->tentry.index != TCPT_NONE) { + tp->tentry.runtime = tp->tentry.timer_start + + tp->t_timer[tp->tentry.index]; if (tp->tentry.runtime == 0) tp->tentry.runtime++; } if (count > 0) { - /* run any other timers that are also outstanding at this time. */ + /* run any other timers outstanding at this time. */ for (i = 0; i < TCPT_NTIMERS; ++i) { if (needtorun[i]) { tp->t_timer[i] = 0; tp = tcp_timers(tp, i); if (tp == NULL) { offset = 0; - *(next_index) = TCPT_NONE; + *(te_mode) = 0; goto done; } } @@ -1263,7 +1469,7 @@ tcp_run_conn_timer(struct tcpcb *tp, uint16_t *next_index) { if (tp->tentry.index < TCPT_NONE) { offset = tp->t_timer[tp->tentry.index]; - *(next_index) = tp->tentry.index; + *(te_mode) = tp->tentry.mode; } done: @@ -1271,23 +1477,21 @@ done: tcp_remove_timer(tp); offset = 0; } - tcp_unlock(so, 1, 0); - return offset; + + tcp_unlock(so, 1, 0); + return(offset); } void tcp_run_timerlist(void * arg1, void * arg2) { - #pragma unused(arg1, arg2) - struct tcptimerentry *te, *next_te; struct tcptimerlist *listp = &tcp_timer_list; struct tcpcb *tp; - uint32_t next_timer = 0; - uint16_t index = TCPT_NONE; - boolean_t need_fast = FALSE; + uint32_t next_timer = 0; /* offset of the next timer on the list */ + u_int16_t te_mode = 0; /* modes of all active timers in a tcpcb */ + u_int16_t list_mode = 0; /* cumulative of modes of all tcpcbs */ uint32_t active_count = 0; - uint32_t mode = TCP_TIMERLIST_FASTMODE; calculate_tcp_clock(); @@ -1303,19 +1507,24 @@ tcp_run_timerlist(void * arg1, void * arg2) { if (next_timer == 0 || offset < next_timer) { next_timer = offset; } + list_mode |= te->mode; continue; } - active_count++; tp = TIMERENTRY_TO_TP(te); - /* Acquire an inp wantcnt on the inpcb so that the socket won't get - * detached even if tcp_close is called + /* + * Acquire an inp wantcnt on the inpcb so that the socket + * won't get detached even if tcp_close is called */ - if (in_pcb_checkstate(tp->t_inpcb, WNT_ACQUIRE, 0) == WNT_STOPUSING) { - /* Some how this pcb went into dead state while on the timer list, - * just take it off the list. Since the timer list entry pointers - * are protected by the timer list lock, we can do it here + if (in_pcb_checkstate(tp->t_inpcb, WNT_ACQUIRE, 0) + == WNT_STOPUSING) { + /* + * Some how this pcb went into dead state while + * on the timer list, just take it off the list. + * Since the timer list entry pointers are + * protected by the timer list lock, we can + * do it here without the socket lock. */ if (TIMER_IS_ON_LIST(tp)) { tp->t_flags &= ~(TF_TIMER_ONLIST); @@ -1327,10 +1536,13 @@ tcp_run_timerlist(void * arg1, void * arg2) { } continue; } + active_count++; - /* Store the next timerentry pointer before releasing the list lock. - * If that entry has to be removed when we release the lock, this - * pointer will be updated to the element after that. + /* + * Store the next timerentry pointer before releasing the + * list lock. If that entry has to be removed when we + * release the lock, this pointer will be updated to the + * element after that. */ listp->next_te = next_te; @@ -1339,58 +1551,59 @@ tcp_run_timerlist(void * arg1, void * arg2) { lck_mtx_unlock(listp->mtx); - index = TCPT_NONE; - offset = tcp_run_conn_timer(tp, &index); + offset = tcp_run_conn_timer(tp, &te_mode); lck_mtx_lock(listp->mtx); next_te = listp->next_te; listp->next_te = NULL; - if (offset > 0) { - if (index < TCPT_NONE) { - /* Check if this is a fast_timer. */ - if (!need_fast && !(IS_TIMER_SLOW(index))) { - need_fast = TRUE; - } + if (offset > 0 && te_mode != 0) { + list_mode |= te_mode; - if (next_timer == 0 || offset < next_timer) { - next_timer = offset; - } - } + if (next_timer == 0 || offset < next_timer) + next_timer = offset; } } if (!LIST_EMPTY(&listp->lhead)) { - if (listp->mode == TCP_TIMERLIST_FASTMODE) { - if (need_fast || active_count > 0 || - listp->pref_mode == TCP_TIMERLIST_FASTMODE) { - listp->idlegen = 0; - } else { - listp->idlegen++; - if (listp->idlegen > timer_fastmode_idlemax) { - mode = TCP_TIMERLIST_SLOWMODE; - listp->idlegen = 0; - } - } - } else { - if (!need_fast) { - mode = TCP_TIMERLIST_SLOWMODE; - } - } + u_int16_t next_mode = 0; + if ((list_mode & TCP_TIMERLIST_10MS_MODE) || + (listp->pref_mode & TCP_TIMERLIST_10MS_MODE)) + next_mode = TCP_TIMERLIST_10MS_MODE; + else if ((list_mode & TCP_TIMERLIST_100MS_MODE) || + (listp->pref_mode & TCP_TIMERLIST_100MS_MODE)) + next_mode = TCP_TIMERLIST_100MS_MODE; + else + next_mode = TCP_TIMERLIST_500MS_MODE; - if (mode == TCP_TIMERLIST_FASTMODE || - listp->pref_mode == TCP_TIMERLIST_FASTMODE) { - next_timer = listp->fast_quantum; + if (next_mode != TCP_TIMERLIST_500MS_MODE) { + listp->idleruns = 0; } else { - if (listp->pref_offset != 0 && - listp->pref_offset < next_timer) - next_timer = listp->pref_offset; - if (next_timer < listp->slow_quantum) - next_timer = listp->slow_quantum; + /* + * the next required mode is slow mode, but if + * the last one was a faster mode and we did not + * have enough idle runs, repeat the last mode. + * + * We try to keep the timer list in fast mode for + * some idle time in expectation of new data. + */ + if (listp->mode != next_mode && + listp->idleruns < timer_fastmode_idlemax) { + listp->idleruns++; + next_mode = listp->mode; + next_timer = TCP_TIMER_100MS_QUANTUM; + } else { + listp->idleruns = 0; + } } + listp->mode = next_mode; + if (listp->pref_offset != 0) + next_timer = min(listp->pref_offset, next_timer); - listp->mode = mode; + if (listp->mode == TCP_TIMERLIST_500MS_MODE) + next_timer = max(next_timer, + TCP_TIMER_500MS_QUANTUM); tcp_sched_timerlist(next_timer); } else { @@ -1408,16 +1621,19 @@ tcp_run_timerlist(void * arg1, void * arg2) { lck_mtx_unlock(listp->mtx); } -/* Function to verify if a change in timer state is required for a connection */ +/* + * Function to check if the timerlist needs to be reschduled to run this + * connection's timers correctly. + */ void tcp_sched_timers(struct tcpcb *tp) { struct tcptimerentry *te = &tp->tentry; - uint16_t index = te->index; + u_int16_t index = te->index; + u_int16_t mode = te->mode; struct tcptimerlist *listp = &tcp_timer_list; int32_t offset = 0; - boolean_t is_fast; - int list_locked = 0; + boolean_t list_locked = FALSE; if (tp->t_inpcb->inp_state == INPCB_STATE_DEAD) { /* Just return without adding the dead pcb to the list */ @@ -1428,62 +1644,65 @@ tcp_sched_timers(struct tcpcb *tp) } if (index == TCPT_NONE) { + /* Nothing to run */ tcp_remove_timer(tp); return; } - is_fast = !(IS_TIMER_SLOW(index)); + /* + * compute the offset at which the next timer for this connection + * has to run. + */ offset = timer_diff(te->runtime, 0, tcp_now, 0); if (offset <= 0) { offset = 1; tcp_timer_advanced++; } - if (is_fast) - offset = listp->fast_quantum; if (!TIMER_IS_ON_LIST(tp)) { if (!list_locked) { lck_mtx_lock(listp->mtx); - list_locked = 1; + list_locked = TRUE; } LIST_INSERT_HEAD(&listp->lhead, te, le); tp->t_flags |= TF_TIMER_ONLIST; - listp->entries++; - if (listp->entries > listp->maxentries) - listp->maxentries = listp->entries; + listp->entries++; + if (listp->entries > listp->maxentries) + listp->maxentries = listp->entries; /* if the list is not scheduled, just schedule it */ if (!listp->scheduled) goto schedule; - } - /* timer entry is currently on the list */ - if (need_to_resched_timerlist(te->runtime, index)) { + /* + * Timer entry is currently on the list, check if the list needs + * to be rescheduled. + */ + if (need_to_resched_timerlist(te->runtime, mode)) { tcp_resched_timerlist++; if (!list_locked) { lck_mtx_lock(listp->mtx); - list_locked = 1; + list_locked = TRUE; } VERIFY_NEXT_LINK(te, le); VERIFY_PREV_LINK(te, le); if (listp->running) { - if (is_fast) { - listp->pref_mode = TCP_TIMERLIST_FASTMODE; - } else if (listp->pref_offset == 0 || + listp->pref_mode |= mode; + if (listp->pref_offset == 0 || offset < listp->pref_offset) { listp->pref_offset = offset; } } else { /* - * The list could have got scheduled while this - * thread was waiting for the lock + * The list could have got rescheduled while + * this thread was waiting for the lock */ if (listp->scheduled) { int32_t diff; @@ -1501,9 +1720,20 @@ tcp_sched_timers(struct tcpcb *tp) goto done; schedule: - if (is_fast) { - listp->mode = TCP_TIMERLIST_FASTMODE; - listp->idlegen = 0; + /* + * Since a connection with timers is getting scheduled, the timer + * list moves from idle to active state and that is why idlegen is + * reset + */ + if (mode & TCP_TIMERLIST_10MS_MODE) { + listp->mode = TCP_TIMERLIST_10MS_MODE; + listp->idleruns = 0; + offset = min(offset, TCP_TIMER_10MS_QUANTUM); + } else if (mode & TCP_TIMERLIST_100MS_MODE) { + if (listp->mode > TCP_TIMERLIST_100MS_MODE) + listp->mode = TCP_TIMERLIST_100MS_MODE; + listp->idleruns = 0; + offset = min(offset, TCP_TIMER_100MS_QUANTUM); } tcp_sched_timerlist(offset); @@ -1514,20 +1744,26 @@ done: return; } -void +static inline void tcp_set_lotimer_index(struct tcpcb *tp) { - uint16_t i, lo_index = TCPT_NONE; + uint16_t i, lo_index = TCPT_NONE, mode = 0; uint32_t lo_timer = 0; for (i = 0; i < TCPT_NTIMERS; ++i) { - if (tp->t_timer[i] != 0 && - (lo_timer == 0 || tp->t_timer[i] < lo_timer)) { - lo_timer = tp->t_timer[i]; - lo_index = i; + if (tp->t_timer[i] != 0) { + TCP_SET_TIMER_MODE(mode, i); + if (lo_timer == 0 || tp->t_timer[i] < lo_timer) { + lo_timer = tp->t_timer[i]; + lo_index = i; + } } } tp->tentry.index = lo_index; - if (lo_index != TCPT_NONE) { - tp->tentry.runtime = tp->tentry.timer_start + tp->t_timer[lo_index]; + tp->tentry.mode = mode; + VERIFY(tp->tentry.index == TCPT_NONE || tp->tentry.mode > 0); + + if (tp->tentry.index != TCPT_NONE) { + tp->tentry.runtime = tp->tentry.timer_start + + tp->t_timer[tp->tentry.index]; if (tp->tentry.runtime == 0) tp->tentry.runtime++; } @@ -1546,3 +1782,94 @@ tcp_check_timer_state(struct tcpcb *tp) { tcp_sched_timers(tp); return; } + +__private_extern__ void +tcp_report_stats(void) +{ + struct nstat_sysinfo_data data; + struct sockaddr_in dst; + struct sockaddr_in6 dst6; + struct rtentry *rt = NULL; + u_int64_t var, uptime; + +#define stat data.u.tcp_stats + if (((uptime = net_uptime()) - tcp_last_report_time) < + TCP_REPORT_STATS_INTERVAL) + return; + + tcp_last_report_time = uptime; + + bzero(&data, sizeof(data)); + data.flags = NSTAT_SYSINFO_TCP_STATS; + + bzero(&dst, sizeof(dst)); + dst.sin_len = sizeof(dst); + dst.sin_family = AF_INET; + + /* ipv4 avg rtt */ + lck_mtx_lock(rnh_lock); + rt = rt_lookup(TRUE, (struct sockaddr *)&dst, NULL, + rt_tables[AF_INET], IFSCOPE_NONE); + lck_mtx_unlock(rnh_lock); + if (rt != NULL) { + RT_LOCK(rt); + if (rt_primary_default(rt, rt_key(rt)) && + rt->rt_stats != NULL) { + stat.ipv4_avgrtt = rt->rt_stats->nstat_avg_rtt; + } + RT_UNLOCK(rt); + rtfree(rt); + rt = NULL; + } + + /* ipv6 avg rtt */ + bzero(&dst6, sizeof(dst6)); + dst6.sin6_len = sizeof(dst6); + dst6.sin6_family = AF_INET6; + + lck_mtx_lock(rnh_lock); + rt = rt_lookup(TRUE,(struct sockaddr *)&dst6, NULL, + rt_tables[AF_INET6], IFSCOPE_NONE); + lck_mtx_unlock(rnh_lock); + if (rt != NULL) { + RT_LOCK(rt); + if (rt_primary_default(rt, rt_key(rt)) && + rt->rt_stats != NULL) { + stat.ipv6_avgrtt = rt->rt_stats->nstat_avg_rtt; + } + RT_UNLOCK(rt); + rtfree(rt); + rt = NULL; + } + + /* send packet loss rate, shift by 10 for precision */ + if (tcpstat.tcps_sndpack > 0 && tcpstat.tcps_sndrexmitpack > 0) { + var = tcpstat.tcps_sndrexmitpack << 10; + stat.send_plr = (var * 100) / tcpstat.tcps_sndpack; + } + + /* recv packet loss rate, shift by 10 for precision */ + if (tcpstat.tcps_rcvpack > 0 && tcpstat.tcps_recovered_pkts > 0) { + var = tcpstat.tcps_recovered_pkts << 10; + stat.recv_plr = (var * 100) / tcpstat.tcps_rcvpack; + } + + /* RTO after tail loss, shift by 10 for precision */ + if (tcpstat.tcps_sndrexmitpack > 0 + && tcpstat.tcps_tailloss_rto > 0) { + var = tcpstat.tcps_tailloss_rto << 10; + stat.send_tlrto_rate = + (var * 100) / tcpstat.tcps_sndrexmitpack; + } + + /* packet reordering */ + if (tcpstat.tcps_sndpack > 0 && tcpstat.tcps_reordered_pkts > 0) { + var = tcpstat.tcps_reordered_pkts << 10; + stat.send_reorder_rate = + (var * 100) / tcpstat.tcps_sndpack; + } + + nstat_sysinfo_send_data(&data); + +#undef stat +} diff --git a/bsd/netinet/tcp_timer.h b/bsd/netinet/tcp_timer.h index f7b775db5..0e7a43f11 100644 --- a/bsd/netinet/tcp_timer.h +++ b/bsd/netinet/tcp_timer.h @@ -65,32 +65,23 @@ #define _NETINET_TCP_TIMER_H_ #include -#ifdef KERNEL +#ifdef BSD_KERNEL_PRIVATE #include -#endif /* KERNEL */ - -/* - * Definitions of the TCP timers. - */ -#define TCPT_NTIMERS (TCPT_MAX + 1) +#endif /* BSD_KERNEL_PRIVATE */ /* Keep the external definition the same for binary compatibility */ #define TCPT_NTIMERS_EXT 4 -#define TCPT_REXMT 0 /* retransmit */ -#define TCPT_PERSIST 1 /* retransmit persistence */ -#define TCPT_KEEP 2 /* keep alive */ -#define TCPT_2MSL 3 /* 2*msl quiet time timer */ -#define TCPT_DELACK 4 /* delayed ack timer */ -#if MPTCP -#define TCPT_JACK_RXMT 5 /* retransmit timer for join ack */ -#define TCPT_MAX 5 -#else /* MPTCP */ -#define TCPT_MAX 4 -#endif /* !MPTCP */ -#define TCPT_NONE (TCPT_MAX + 1) - /* + * Definitions of the TCP timers. + * + * The TCPT_PTO timer is used for probing for a tail loss in a send window. + * If this probe gets acknowledged using SACK, it will allow the connection + * to enter fast-recovery instead of hitting a retransmit timeout. A probe + * timeout will send the last unacknowledged segment to generate more acks + * with SACK information which can be used for fast-retransmiting the lost + * packets. This will fire in the order of 10ms. + * * The TCPT_REXMT timer is used to force retransmissions. * The TCP has the TCPT_REXMT timer set whenever segments * have been sent for which ACKs are expected but not yet @@ -101,9 +92,12 @@ * we retransmit one unacknowledged segment, and do a backoff * on the retransmit timer. * + * The TCPT_DELACK timer is used for transmitting delayed acknowledgements + * if an acknowledgement was delayed in anticipation of a new segment. + * * The TCPT_PERSIST timer is used to keep window size information * flowing even if the window goes shut. If all previous transmissions - * have been acknowledged (so that there are no retransmissions in progress), + * have been acknowledged(so that there are no retransmissions in progress), * and the window is too small to bother sending anything, then we start * the TCPT_PERSIST timer. When it expires, if the window is nonzero, * we go to transmit state. Otherwise, at intervals send a single byte @@ -114,112 +108,165 @@ * a window update from the peer. * * The TCPT_KEEP timer is used to keep connections alive. If an - * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, - * but not yet established, then we drop the connection. Once the connection - * is established, if the connection is idle for TCPTV_KEEP_IDLE time - * (and keepalives have been enabled on the socket), we begin to probe - * the connection. We force the peer to send us a segment by sending: + * connection is idle (no segments received) for TCPTV_KEEP_INIT amount + * of time, but not yet established, then we drop the connection. + * Once the connection is established, if the connection is idle for + * TCPTV_KEEP_IDLE time (and keepalives have been enabled on the socket), + * we begin to probe the connection. We force the peer to send us a + * segment by sending: * * This segment is (deliberately) outside the window, and should elicit * an ack segment in response from the peer. If, despite the TCPT_KEEP - * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE - * amount of time probing, then we drop the connection. + * initiated segments we cannot elicit a response from a peer in + * TCPT_MAXIDLE amount of time probing, then we drop the connection. + * + * The TCPT_2MSL timer is used for keeping the conenction in Time-wait state + * before fully closing it so that the connection 4-tuple can be reused. */ +#ifdef BSD_KERNEL_PRIVATE + +#define TCPT_PTO 0 /* Probe timeout */ +#define TCPT_DELAYFR 1 /* Delay recovery if there is reordering */ +#define TCPT_REXMT 2 /* retransmit */ +#define TCPT_DELACK 3 /* delayed ack */ +#define TCPT_PERSIST 4 /* retransmit persistence */ +#define TCPT_KEEP 5 /* keep alive */ +#define TCPT_2MSL 6 /* 2*msl quiet time timer */ +#if MPTCP +#define TCPT_JACK_RXMT 7 /* retransmit timer for join ack */ +#define TCPT_MAX 7 +#else /* MPTCP */ +#define TCPT_MAX 6 +#endif /* !MPTCP */ + +#define TCPT_NONE (TCPT_MAX + 1) +#define TCPT_NTIMERS (TCPT_MAX + 1) + +/* External definitions */ +#define TCPT_REXMT_EXT 0 +#define TCPT_PERSIST_EXT 1 +#define TCPT_KEEP_EXT 2 +#define TCPT_2MSL_EXT 3 +#define TCPT_DELACK_EXT 4 + +#else /* !BSD_KERNEL_PRIVATE */ +#define TCPT_REXMT 0 /* retransmit */ +#define TCPT_PERSIST 1 /* retransmit persistence */ +#define TCPT_KEEP 2 /* keep alive */ +#define TCPT_2MSL 3 /* 2*msl quiet time timer */ +#define TCPT_DELACK 4 /* delayed ack timer */ +#if MPTCP +#define TCPT_JACK_RXMT 5 /* retransmit timer for join ack */ +#define TCPT_MAX 5 +#else /* MPTCP */ +#define TCPT_MAX 4 +#endif /* !MPTCP */ +#define TCPT_NONE (TCPT_MAX + 1) +#define TCPT_NTIMERS (TCPT_MAX + 1) -#ifdef PRIVATE +#endif /* BSD_KERNEL_PRIVATE */ +#ifdef BSD_KERNEL_PRIVATE /* * Time constants. */ -#define TCPTV_MSL ( 15*TCP_RETRANSHZ) /* max seg lifetime (hah!) */ -#define TCPTV_SRTTBASE 0 /* base roundtrip time; - if 0, no idea yet */ -#define TCPTV_RTOBASE ( 1*TCP_RETRANSHZ) /* assumed RTO if no info */ -#define TCPTV_SRTTDFLT ( 1*TCP_RETRANSHZ) /* assumed RTT if no info */ - -#define TCPTV_PERSMIN ( 5*TCP_RETRANSHZ) /* retransmit persistence */ -#define TCPTV_PERSMAX ( 60*TCP_RETRANSHZ) /* maximum persist interval */ - -#define TCPTV_KEEP_INIT ( 75*TCP_RETRANSHZ) /* initial connect keep alive */ -#define TCPTV_KEEP_IDLE (120*60*TCP_RETRANSHZ) /* dflt time before probing */ -#define TCPTV_KEEPINTVL ( 75*TCP_RETRANSHZ) /* default probe interval */ +#define TCPTV_MSL ( 15*TCP_RETRANSHZ) /* max seg lifetime */ +#define TCPTV_SRTTBASE 0 /* base roundtrip time; if 0, no idea yet */ +#define TCPTV_RTOBASE ( 1*TCP_RETRANSHZ) /* assumed RTO if no info */ +#define TCPTV_SRTTDFLT ( 1*TCP_RETRANSHZ) /* assumed RTT if no info */ +#define TCPTV_PERSMIN ( 5*TCP_RETRANSHZ) /* retransmit persistence */ +#define TCPTV_PERSMAX ( 60*TCP_RETRANSHZ) /* maximum persist interval */ + +#define TCPTV_KEEP_INIT ( 75*TCP_RETRANSHZ) /* connect keep alive */ +#define TCPTV_KEEP_IDLE (120*60*TCP_RETRANSHZ) /* time before probing */ +#define TCPTV_KEEPINTVL ( 75*TCP_RETRANSHZ) /* default probe interval */ #define TCPTV_KEEPCNT 8 /* max probes before drop */ -#define TCPTV_REXMTMAX ( 64*TCP_RETRANSHZ ) /* max allowable REXMT value */ +#define TCPTV_REXMTMAX ( 64*TCP_RETRANSHZ ) /* max REXMT value */ #define TCPTV_REXMTMIN ( TCP_RETRANSHZ/33 ) /* min REXMT for non-local connections */ -#define TCPTV_UNACKWIN ( TCP_RETRANSHZ/10 ) /* Window for counting rcv bytes to see if - ack-stretching can start (default 100 ms) */ -#define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 ) /* Receiver idle time, avoid ack-stretching after that*/ -#define TCPTV_RCVBUFIDLE (TCP_RETRANSHZ/2) /* Receiver idle time, for rcv socket buffer resizing */ -/* No ack stretching during slow-start, until we see some packets. +/* + * Window for counting received bytes to see if ack-stretching + * can start (default 100 ms) + */ +#define TCPTV_UNACKWIN ( TCP_RETRANSHZ/10 ) + +/* Receiver idle time, avoid ack-stretching after this idle time */ +#define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 ) + +/* + * No ack stretching during slow-start, until we see some packets. * By the time the receiver gets 512 packets, the senders cwnd * should open by a few hundred packets consdering the * slow-start progression. */ #define TCP_RCV_SS_PKTCOUNT 512 -#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */ +/* Receiver idle time, for rcv socket buffer resizing */ +#define TCPTV_RCVBUFIDLE (TCP_RETRANSHZ/2) +#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */ -#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ +#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ -#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ +#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ #ifdef TCPTIMERS static char *tcptimers[] = { "REXMT", "PERSIST", "KEEP", "2MSL" , "DELACK"}; -#endif - -#ifdef KERNEL +#endif /* TCPTIMERS */ -/* We consider persist, keep and 2msl as slow timers which can be coalesced - * at a higher granularity (500 ms). Rexmt and delayed ack are considered fast - * timers which fire in the order of 100ms. +/* + * Persist, keep, 2msl and MPTCP's join-ack timer as slow timers which can + * be coalesced at a higher granularity (500 ms). * - * The following conditional is to check if a timer is one of the slow timers. This - * is fast and works well for now. If we add more slow timers for any reason, - * we may need to change this. + * Rexmt and delayed ack timers are considered as fast timers which run + * in the order of 100ms. + * + * Probe timeout is a quick timer which will run in the order of 10ms. */ -#define IS_TIMER_SLOW(ind) ((ind & 0x3) != 0) +#define IS_TIMER_HZ_500MS(i) ((i) >= TCPT_PERSIST) +#define IS_TIMER_HZ_100MS(i) ((i) >= TCPT_REXMT && (i) < TCPT_PERSIST) +#define IS_TIMER_HZ_10MS(i) ((i) < TCPT_REXMT) struct tcptimerlist; struct tcptimerentry { - LIST_ENTRY(tcptimerentry) le; /* links for timer list */ - uint32_t timer_start; /* tcp clock when the timer was started */ - uint16_t index; /* index of lowest timer that needs to run first */ - uint32_t runtime; /* deadline at which the first timer has to fire */ + LIST_ENTRY(tcptimerentry) le; /* links for timer list */ + uint32_t timer_start; /* tcp clock when the timer was started */ + uint16_t index; /* index of lowest timer that needs to run first */ + uint16_t mode; /* Bit-wise OR of timers that are active */ + uint32_t runtime; /* deadline at which the first timer has to fire */ }; LIST_HEAD(timerlisthead, tcptimerentry); struct tcptimerlist { - struct timerlisthead lhead; /* head of the list of timer entries */ - lck_mtx_t *mtx; /* lock to protect the list */ - lck_attr_t *mtx_attr; /* mutex attributes */ - lck_grp_t *mtx_grp; /* mutex group definition */ + struct timerlisthead lhead; /* head of the list */ + lck_mtx_t *mtx; /* lock to protect the list */ + lck_attr_t *mtx_attr; /* mutex attributes */ + lck_grp_t *mtx_grp; /* mutex group definition */ lck_grp_attr_t *mtx_grp_attr; /* mutex group attributes */ - uint32_t fast_quantum; /* minimum time quantum to coalesce fast timers */ - uint32_t slow_quantum; /* minimum time quantum to coalesce slow timers */ - thread_call_t call; /* call entry */ - uint32_t runtime; /* time at which this list is going to run */ - uint32_t entries; /* Number of entries on the list */ - uint32_t maxentries; /* Max number of entries at any time */ + thread_call_t call; /* call entry */ + uint32_t runtime; /* time at which this list is going to run */ + uint32_t entries; /* Number of entries on the list */ + uint32_t maxentries; /* Max number of entries at any time */ /* Set desired mode when timer list running */ - boolean_t running; /* Set when timer list is being processed */ - boolean_t scheduled; /* Set when timer is scheduled */ -#define TCP_TIMERLIST_FASTMODE 0x1 -#define TCP_TIMERLIST_SLOWMODE 0x2 - uint32_t mode; /* Current mode, fast or slow */ - uint32_t pref_mode; /* Preferred mode set by a connection, fast or slow */ - uint32_t pref_offset; /* Preferred offset set by a connection */ - uint32_t idlegen; /* Number of times the list has been idle in fast mode */ - struct tcptimerentry *next_te; /* Store the next timer entry pointer to process */ + boolean_t running; /* Set when timer list is being processed */ + boolean_t scheduled; /* set when the timer is scheduled */ +#define TCP_TIMERLIST_10MS_MODE 0x1 +#define TCP_TIMERLIST_100MS_MODE 0x2 +#define TCP_TIMERLIST_500MS_MODE 0x4 + uint32_t mode; /* Current mode of the timer */ + uint32_t pref_mode; /* Preferred mode set by a connection */ + uint32_t pref_offset; /* Preferred offset set by a connection */ + uint32_t idleruns; /* Number of times the list has been idle in fast mode */ + struct tcptimerentry *next_te; /* next timer entry pointer to process */ }; -#define TCP_FASTMODE_IDLEGEN_MAX 20 /* Approximately 2 seconds */ +/* number of idle runs allowed for TCP timer list in fast or quick modes */ +#define TCP_FASTMODE_IDLERUN_MAX 10 /* * Minimum retransmit timeout is set to 30ms. We add a slop of @@ -236,7 +283,7 @@ struct tcptimerlist { * the initial value is high (1s) and delayed ack is not a problem in * that case. */ -#define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 ) /* rexmt slop allowed (200 ms) */ +#define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 ) /* extra 200 ms slop */ /* macro to decide when retransmit slop (described above) should be added */ #define TCP_ADD_REXMTSLOP(tp) (tp->t_state >= TCPS_ESTABLISHED) @@ -262,30 +309,25 @@ struct tcptimerlist { #define TCP_CONN_MAXIDLE(tp) \ (TCP_CONN_KEEPCNT(tp) * TCP_CONN_KEEPINTVL(tp)) -/* Since we did not add rexmt slop for local connections, we should add - * it to idle timeout. Otherwise local connections will reach idle state - * quickly - */ #define TCP_IDLETIMEOUT(tp) \ (((TCP_ADD_REXMTSLOP(tp)) ? 0 : tcp_rexmt_slop) + tp->t_rxtcur) TAILQ_HEAD(tcptailq, tcpcb); -extern int tcp_keepinit; /* time to establish connection */ -extern int tcp_keepidle; /* time before keepalive probes begin */ -extern int tcp_keepintvl; /* time between keepalive probes */ -extern int tcp_keepcnt; /* number of keepalives */ -extern int tcp_delack; /* delayed ack timer */ +extern int tcp_keepinit; /* time to establish connection */ +extern int tcp_keepidle; /* time before keepalive probes begin */ +extern int tcp_keepintvl; /* time between keepalive probes */ +extern int tcp_keepcnt; /* number of keepalives */ +extern int tcp_delack; /* delayed ack timer */ extern int tcp_maxpersistidle; extern int tcp_msl; -extern int tcp_ttl; /* time to live for TCP segs */ +extern int tcp_ttl; /* time to live for TCP segs */ extern int tcp_backoff[]; extern int tcp_rexmt_slop; extern u_int32_t tcp_max_persist_timeout; /* Maximum persistence for Zero Window Probes */ #define OFFSET_FROM_START(tp, off) ((tcp_now + (off)) - (tp)->tentry.timer_start) -#endif /* KERNEL */ -#endif /* PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !_NETINET_TCP_TIMER_H_ */ diff --git a/bsd/netinet/tcp_usrreq.c b/bsd/netinet/tcp_usrreq.c index 6484f92b6..350884ae1 100644 --- a/bsd/netinet/tcp_usrreq.c +++ b/bsd/netinet/tcp_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -79,6 +79,7 @@ #include #include #include +#include #include #include @@ -226,16 +227,27 @@ out: return error; } +#if NECP #define COMMON_START() TCPDEBUG0; \ do { \ if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \ return (EINVAL); \ - if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) \ + if (necp_socket_should_use_flow_divert(inp)) \ return (EPROTOTYPE); \ tp = intotcpcb(inp); \ TCPDEBUG1(); \ calculate_tcp_clock(); \ } while (0) +#else /* NECP */ +#define COMMON_START() TCPDEBUG0; \ +do { \ + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \ + return (EINVAL); \ + tp = intotcpcb(inp); \ + TCPDEBUG1(); \ + calculate_tcp_clock(); \ +} while (0) +#endif /* !NECP */ #define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out @@ -409,23 +421,27 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) } else return EINVAL; } +#if NECP #if FLOW_DIVERT - else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) { - uint32_t fd_ctl_unit = 0; - error = flow_divert_check_policy(so, p, FALSE, &fd_ctl_unit); - if (error == 0) { - if (fd_ctl_unit > 0) { - error = flow_divert_pcb_init(so, fd_ctl_unit); - if (error == 0) { - error = flow_divert_connect_out(so, nam, p); - } - } else { - error = ENETDOWN; + else if (necp_socket_should_use_flow_divert(inp)) { + uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp); + if (fd_ctl_unit > 0) { + error = flow_divert_pcb_init(so, fd_ctl_unit); + if (error == 0) { + error = flow_divert_connect_out(so, nam, p); } + } else { + error = ENETDOWN; } return error; } #endif /* FLOW_DIVERT */ +#if CONTENT_FILTER + error = cfil_sock_attach(so); + if (error != 0) + return error; +#endif /* CONTENT_FILTER */ +#endif /* NECP */ tp = intotcpcb(inp); TCPDEBUG1(); @@ -480,6 +496,10 @@ tcp_usr_connectx_common(struct socket *so, int af, VERIFY(dst_se->se_addr->sa_family == af); VERIFY(src_se == NULL || src_se->se_addr->sa_family == af); +#if NECP + inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL, dst_se ? dst_se->se_addr : NULL, ifscope); +#endif /* NECP */ + /* * We get here for 2 cases: * @@ -575,23 +595,28 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) } else return EINVAL; } +#if NECP #if FLOW_DIVERT - else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) { - uint32_t fd_ctl_unit = 0; - error = flow_divert_check_policy(so, p, FALSE, &fd_ctl_unit); - if (error == 0) { - if (fd_ctl_unit > 0) { - error = flow_divert_pcb_init(so, fd_ctl_unit); - if (error == 0) { - error = flow_divert_connect_out(so, nam, p); - } - } else { - error = ENETDOWN; + else if (necp_socket_should_use_flow_divert(inp)) { + uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp); + if (fd_ctl_unit > 0) { + error = flow_divert_pcb_init(so, fd_ctl_unit); + if (error == 0) { + error = flow_divert_connect_out(so, nam, p); } + } else { + error = ENETDOWN; } return error; } #endif /* FLOW_DIVERT */ +#if CONTENT_FILTER + error = cfil_sock_attach(so); + if (error != 0) + return error; +#endif /* CONTENT_FILTER */ +#endif /* NECP */ + tp = intotcpcb(inp); TCPDEBUG1(); @@ -709,8 +734,15 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam) } if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) return (EINVAL); - else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) +#if NECP + else if (necp_socket_should_use_flow_divert(inp)) return (EPROTOTYPE); +#if CONTENT_FILTER + error = cfil_sock_attach(so); + if (error != 0) + return (error); +#endif /* CONTENT_FILTER */ +#endif /* NECP */ tp = intotcpcb(inp); TCPDEBUG1(); @@ -735,8 +767,15 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam) } if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) return (EINVAL); - else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) +#if NECP + else if (necp_socket_should_use_flow_divert(inp)) return (EPROTOTYPE); +#if CONTENT_FILTER + error = cfil_sock_attach(so); + if (error != 0) + return (error); +#endif /* CONTENT_FILTER */ +#endif /* NECP */ tp = intotcpcb(inp); TCPDEBUG1(); @@ -784,7 +823,12 @@ tcp_usr_shutdown(struct socket *so) */ tp = intotcpcb(inp); TCPDEBUG1(); - if (tp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + + if (tp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { if (tp != NULL) error = EPROTOTYPE; goto out; @@ -800,6 +844,12 @@ tcp_usr_shutdown(struct socket *so) goto out; } #endif +#if CONTENT_FILTER + /* Don't send a FIN yet */ + if (tp && !(so->so_state & SS_ISDISCONNECTED) && + cfil_sock_data_pending(&so->so_snd)) + goto out; +#endif /* CONTENT_FILTER */ if (tp) error = tcp_output(tp); COMMON_END(PRU_SHUTDOWN); @@ -822,6 +872,11 @@ tcp_usr_rcvd(struct socket *so, __unused int flags) tcp_sbrcv_trim(tp, &so->so_rcv); tcp_output(tp); + +#if CONTENT_FILTER + cfil_sock_buf_update(&so->so_rcv); +#endif /* CONTENT_FILTER */ + COMMON_END(PRU_RCVD); } @@ -869,8 +924,11 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, #endif TCPDEBUG0; - if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD || - (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { /* * OOPS! we lost a race, the TCP session got reset after * we checked SS_CANTSENDMORE, eg: while doing uiomove or a @@ -882,10 +940,11 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, m_freem(control); control = NULL; } - if (inp != NULL && (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) - error = EPROTOTYPE; - else + + if (inp == NULL) error = ECONNRESET; /* XXX EPIPE? */ + else + error = EPROTOTYPE; tp = NULL; TCPDEBUG1(); goto out; @@ -1013,9 +1072,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, tcp_mss(tp, -1, IFSCOPE_NONE); } tp->snd_up = tp->snd_una + so->so_snd.sb_cc; - tp->t_force = 1; + tp->t_flagsext |= TF_FORCE; error = tcp_output(tp); - tp->t_force = 0; + tp->t_flagsext &= ~TF_FORCE; } COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); @@ -1471,6 +1530,11 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) ti->tcpi_wifi_rxbytes = inp->inp_wstat->rxbytes; ti->tcpi_wifi_txpackets = inp->inp_wstat->txpackets; ti->tcpi_wifi_txbytes = inp->inp_wstat->txbytes; + + ti->tcpi_wired_rxpackets = inp->inp_Wstat->rxpackets; + ti->tcpi_wired_rxbytes = inp->inp_Wstat->rxbytes; + ti->tcpi_wired_txpackets = inp->inp_Wstat->txpackets; + ti->tcpi_wired_txbytes = inp->inp_Wstat->txbytes; } } @@ -1688,6 +1752,7 @@ tcp_ctloutput(so, sopt) case TCP_NODELAY: case TCP_NOOPT: case TCP_NOPUSH: + case TCP_ENABLE_ECN: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) @@ -1703,6 +1768,9 @@ tcp_ctloutput(so, sopt) case TCP_NOPUSH: opt = TF_NOPUSH; break; + case TCP_ENABLE_ECN: + opt = TF_ENABLE_ECN; + break; default: opt = 0; /* dead code to fool gcc */ break; @@ -1714,11 +1782,22 @@ tcp_ctloutput(so, sopt) tp->t_flags &= ~opt; break; case TCP_RXT_FINDROP: + case TCP_NOTIMEWAIT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; - opt = TF_RXTFINDROP; + switch (sopt->sopt_name) { + case TCP_RXT_FINDROP: + opt = TF_RXTFINDROP; + break; + case TCP_NOTIMEWAIT: + opt = TF_NOTIMEWAIT; + break; + default: + opt = 0; + break; + } if (optval) tp->t_flagsext |= opt; else @@ -1981,6 +2060,22 @@ tcp_ctloutput(so, sopt) tp->t_flagsext |= TF_NOSTRETCHACK; } break; + case TCP_DISABLE_BLACKHOLE_DETECTION: + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + if (optval < 0 || optval > 1) { + error = EINVAL; + } else if (optval == 0) { + tp->t_flagsext &= ~TF_NOBLACKHOLE_DETECTION; + } else { + tp->t_flagsext |= TF_NOBLACKHOLE_DETECTION; + if ((tp->t_flags & TF_BLACKHOLE) && + tp->t_pmtud_saved_maxopd > 0) + tcp_pmtud_revert_segment_size(tp); + } + break; case SO_FLUSH: if ((error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval))) != 0) @@ -2032,6 +2127,9 @@ tcp_ctloutput(so, sopt) case TCP_NOPUSH: optval = tp->t_flags & TF_NOPUSH; break; + case TCP_ENABLE_ECN: + optval = (tp->t_flags & TF_ENABLE_ECN) ? 1 : 0; + break; case TCP_CONNECTIONTIMEOUT: optval = tp->t_keepinit / TCP_RETRANSHZ; break; @@ -2044,6 +2142,9 @@ tcp_ctloutput(so, sopt) case TCP_RXT_FINDROP: optval = tp->t_flagsext & TF_RXTFINDROP; break; + case TCP_NOTIMEWAIT: + optval = (tp->t_flagsext & TF_NOTIMEWAIT) ? 1 : 0; + break; case TCP_MEASURE_SND_BW: optval = tp->t_flagsext & TF_MEASURESNDBW; break; @@ -2088,6 +2189,12 @@ tcp_ctloutput(so, sopt) else optval = 0; break; + case TCP_DISABLE_BLACKHOLE_DETECTION: + if (tp->t_flagsext & TF_NOBLACKHOLE_DETECTION) + optval = 1; + else + optval = 0; + break; case TCP_PEER_PID: { pid_t pid; error = tcp_lookup_peer_pid_locked(so, &pid); @@ -2226,9 +2333,8 @@ tcp_attach(so, p) so->so_state |= nofd; return (ENOBUFS); } - if (nstat_collect) { + if (nstat_collect) nstat_tcp_new_pcb(inp); - } tp->t_state = TCPS_CLOSED; return (0); } diff --git a/bsd/netinet/tcp_var.h b/bsd/netinet/tcp_var.h index f92c3e618..bf9fb3f20 100644 --- a/bsd/netinet/tcp_var.h +++ b/bsd/netinet/tcp_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -79,16 +79,16 @@ struct name { \ #define _TCPCB_LIST_HEAD(name, type) LIST_HEAD(name, type) #endif -#define TCP_RETRANSHZ 1000 /* granularity of TCP timestamps, 1ms */ -#define TCP_TIMERHZ 100 /* frequency of TCP fast timer, 100 ms */ +#ifdef KERNEL_PRIVATE +#define TCP_RETRANSHZ 1000 /* granularity of TCP timestamps, 1ms */ /* Minimum time quantum within which the timers are coalesced */ -#define TCP_FASTTIMER_QUANTUM TCP_TIMERHZ /* fast mode, once every 100ms */ -#define TCP_SLOWTIMER_QUANTUM (TCP_RETRANSHZ/2) /* slow mode, once every 500ms */ +#define TCP_TIMER_10MS_QUANTUM (TCP_RETRANSHZ/100) /* every 10ms */ +#define TCP_TIMER_100MS_QUANTUM (TCP_RETRANSHZ/10) /* every 100ms */ +#define TCP_TIMER_500MS_QUANTUM (TCP_RETRANSHZ/2) /* every 500ms */ #define TCP_RETRANSHZ_TO_USEC 1000 -#ifdef KERNEL_PRIVATE #define N_TIME_WAIT_SLOTS 128 /* must be power of 2 */ /* Base RTT is stored for N_MIN_RTT_HISTORY slots. This is used to @@ -159,11 +159,6 @@ struct tseg_qent { struct mbuf *tqe_m; /* mbuf contains packet */ }; LIST_HEAD(tsegqe_head, tseg_qent); -extern int tcp_reass_maxseg; -extern int tcp_reass_qsize; -#ifdef MALLOC_DECLARE -MALLOC_DECLARE(M_TSEGQ); -#endif struct sackblk { tcp_seq start; /* start seq no. of sack block */ @@ -174,6 +169,7 @@ struct sackhole { tcp_seq start; /* start seq no. of hole */ tcp_seq end; /* end seq no. */ tcp_seq rxmit; /* next seq. no in hole to be retransmitted */ + u_int32_t rxmit_start; /* timestamp of first retransmission */ TAILQ_ENTRY(sackhole) scblink; /* scoreboard linkage */ }; @@ -207,6 +203,31 @@ struct mpt_dsn_map { }; #define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ +struct tcp_ccstate { + union { + struct tcp_cubic_state { + u_int32_t tc_last_max; /* cwnd at last loss */ + u_int32_t tc_epoch_start; /* TS of last loss */ + u_int32_t tc_origin_point; /* window at the start of an epoch */ + u_int32_t tc_tcp_win; /* computed tcp win */ + u_int32_t tc_tcp_bytes_acked; /* bytes acked */ + u_int32_t tc_target_win; /* cubic target win */ + u_int32_t tc_avg_lastmax; /* Average of last max */ + u_int32_t tc_mean_deviation; /* Mean absolute deviation */ + float tc_epoch_period; /* K parameter */ + } _cubic_state_; +#define cub_last_max __u__._cubic_state_.tc_last_max +#define cub_epoch_start __u__._cubic_state_.tc_epoch_start +#define cub_origin_point __u__._cubic_state_.tc_origin_point +#define cub_tcp_win __u__._cubic_state_.tc_tcp_win +#define cub_tcp_bytes_acked __u__._cubic_state_.tc_tcp_bytes_acked +#define cub_epoch_period __u__._cubic_state_.tc_epoch_period +#define cub_target_win __u__._cubic_state_.tc_target_win +#define cub_avg_lastmax __u__._cubic_state_.tc_avg_lastmax +#define cub_mean_dev __u__._cubic_state_.tc_mean_deviation + } __u__; +}; + /* * Tcp control block, one per tcp; fields: * Organized for 16 byte cacheline efficiency. @@ -245,6 +266,7 @@ struct tcpcb { #define TF_WASFRECOVERY 0x400000 /* was in NewReno Fast Recovery */ #define TF_SIGNATURE 0x800000 /* require MD5 digests (RFC2385) */ #define TF_MAXSEGSNT 0x1000000 /* last segment sent was a full segment */ +#define TF_ENABLE_ECN 0x2000000 /* Enable ECN */ #define TF_PMTUD 0x4000000 /* Perform Path MTU Discovery for this connection */ #define TF_CLOSING 0x8000000 /* pending tcp close */ #define TF_TSO 0x10000000 /* TCP Segment Offloading is enable on this connection */ @@ -252,8 +274,6 @@ struct tcpcb { #define TF_TIMER_ONLIST 0x40000000 /* pcb is on tcp_timer_list */ #define TF_STRETCHACK 0x80000000 /* receiver is going to delay acks */ - int t_force; /* 1 if forcing out a byte */ - tcp_seq snd_una; /* send unacknowledged */ tcp_seq snd_max; /* highest sequence number sent; * used to recognize retransmits @@ -294,7 +314,8 @@ struct tcpcb { int t_srtt; /* smoothed round-trip time */ int t_rttvar; /* variance in round-trip time */ - int t_rxtshift; /* log(2) of rexmt exp. backoff */ + u_int16_t t_reassqlen; /* length of reassembly queue */ + u_int16_t t_rxtshift; /* log(2) of rexmt exp. backoff */ u_int t_rttmin; /* minimum rtt allowed */ u_int t_rttbest; /* best rtt we've seen */ u_int t_rttcur; /* most recent value of rtt */ @@ -310,13 +331,19 @@ struct tcpcb { #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 /* RFC 1323 variables */ - u_int8_t snd_scale; /* window scaling for send window */ + u_int8_t snd_scale; /* window scaling for send window */ u_int8_t rcv_scale; /* window scaling for recv window */ u_int8_t request_r_scale; /* pending window scaling */ u_int8_t requested_s_scale; u_int8_t tcp_cc_index; /* index of congestion control algorithm */ u_int8_t t_adaptive_rtimo; /* Read timeout used as a multiple of RTT */ u_int8_t t_adaptive_wtimo; /* Write timeout used as a multiple of RTT */ + u_int8_t t_stretchack_delayed; /* stretch ack delayed */ + +/* State for limiting early retransmits when SACK is not enabled */ + u_int16_t t_early_rexmt_count; /* count of early rexmts */ + u_int32_t t_early_rexmt_win; /* window for limiting early rexmts */ + u_int32_t ts_recent; /* timestamp echo data */ u_int32_t ts_recent_age; /* when last updated */ @@ -338,6 +365,7 @@ struct tcpcb { /* Receiver state for stretch-ack algorithm */ u_int32_t rcv_unackwin; /* to measure win for stretching acks */ u_int32_t rcv_by_unackwin; /* bytes seen during the last ack-stretching win */ + u_int32_t rcv_nostrack_ts; /* timestamp when stretch ack was disabled automatically */ u_int16_t rcv_waitforss; /* wait for packets during slow-start */ u_int16_t ecn_flags; #define TE_SETUPSENT 0x01 /* Indicate we have sent ECN-SETUP SYN or SYN-ACK */ @@ -345,19 +373,19 @@ struct tcpcb { #define TE_SENDIPECT 0x04 /* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */ #define TE_SENDCWR 0x08 /* Indicate that the next non-retransmit should have the TCP CWR flag set */ #define TE_SENDECE 0x10 /* Indicate that the next packet should have the TCP ECE flag set */ +#define TE_INRECOVERY 0x20 /* connection entered recovery after receiving ECE */ #define TE_ECN_ON (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */ /* state for bad retransmit recovery */ u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ - u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ + u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ int t_srtt_prev; /* srtt prior to retransmit */ int t_rttvar_prev; /* rttvar prior to retransmit */ - u_int32_t t_badrexmt_time; /* bad rexmt detection time */ + u_int32_t t_badrexmt_time; /* bad rexmt detection time */ -/* state to limit the number of early retransmits */ - u_int32_t t_early_rexmt_win; /* window for limiting early retransmits */ - u_int16_t t_early_rexmt_count; /* number of early rexmts seen in past window */ +/* Packet reordering metric */ + u_int16_t t_reorderwin; /* Reordering late time offset */ /* SACK related state */ int16_t snd_numholes; /* number of holes seen by sender */ @@ -380,8 +408,9 @@ struct tcpcb { u_int32_t t_keepintvl; /* interval between keepalives */ u_int32_t t_keepcnt; /* number of keepalives before close */ - u_int32_t tso_max_segment_size; /* TCP Segment Offloading maximum segment unit for NIC */ - u_int t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */ + u_int32_t tso_max_segment_size; /* TSO maximum segment unit for NIC */ + u_int32_t t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */ + u_int32_t t_pmtud_start_ts; /* Time of PMTUD blackhole detection */ struct { @@ -409,6 +438,14 @@ struct tcpcb { #define TF_RECV_THROTTLE 0x100 /* Input throttling active */ #define TF_NOSTRETCHACK 0x200 /* ack every other packet */ #define TF_STREAMEOW 0x400 /* Last packet was small indicating end of write */ +#define TF_NOTIMEWAIT 0x800 /* Avoid going into time-wait */ +#define TF_SENT_TLPROBE 0x1000 /* Sent data in PTO */ +#define TF_PKTS_REORDERED 0x2000 /* Detected reordering */ +#define TF_DELAY_RECOVERY 0x4000 /* delay fast recovery */ +#define TF_FORCE 0x8000 /* force 1 byte out */ +#define TF_DISABLE_STRETCHACK 0x10000 /* auto-disable stretch ack */ +#define TF_NOBLACKHOLE_DETECTION 0x20000 /* Disable PMTU blackhole detection */ + #if TRAFFIC_MGT /* Inter-arrival jitter related state */ uint32_t iaj_rcv_ts; /* tcp clock when the first packet was received */ @@ -424,6 +461,10 @@ struct tcpcb { uint32_t t_lropktlen; /* Bytes in a LRO frame */ tcp_seq t_idleat; /* rcv_nxt at idle time */ TAILQ_ENTRY(tcpcb) t_twentry; /* link for time wait queue */ + struct tcp_ccstate *t_ccstate; /* congestion control related state */ +/* Tail loss probe related state */ + tcp_seq t_tlphighrxt; /* snd_nxt after PTO */ + u_int32_t t_tlpstart; /* timestamp at PTO */ #if MPTCP u_int32_t t_mpflags; /* flags for multipath TCP */ @@ -449,6 +490,9 @@ struct tcpcb { #define TMPF_MPTCP_READY 0x00080000 /* Can send DSS options on data */ #define TMPF_INFIN_SENT 0x00100000 /* Sent infinite mapping */ #define TMPF_SND_MPFAIL 0x00200000 /* Received mapping csum failure */ +#define TMPF_FASTJOIN_SEND 0x00400000 /* Fast join early data send */ +#define TMPF_FASTJOINBY2_SEND 0x00800000 /* Fast join send after 3 WHS */ + void *t_mptcb; /* pointer to MPTCP TCB */ tcp_seq t_mpuna; /* unacknowledged sequence */ struct mpt_dsn_map t_rcv_map; /* Receive mapping list */ @@ -479,6 +523,8 @@ struct tcpcb { (_tp_)->t_dupacks = 0; \ (_tp_)->t_rexmtthresh = tcprexmtthresh; \ (_tp_)->t_bytes_acked = 0; \ + (_tp_)->ecn_flags &= ~TE_INRECOVERY; \ + (_tp_)->t_timer[TCPT_PTO] = 0; \ } while(0) /* @@ -488,7 +534,9 @@ struct tcpcb { extern int tcprexmtthresh; #define ALLOW_LIMITED_TRANSMIT(_tp_) \ ((_tp_)->t_dupacks > 0 && \ - (_tp_)->t_dupacks < (_tp_)->t_rexmtthresh) + (_tp_)->t_dupacks < (_tp_)->t_rexmtthresh && \ + ((_tp_)->t_flagsext & (TF_PKTS_REORDERED|TF_DELAY_RECOVERY)) \ + != (TF_PKTS_REORDERED|TF_DELAY_RECOVERY)) /* * This condition is true is timestamp option is supported @@ -504,7 +552,6 @@ extern int tcprexmtthresh; #define BYTES_ACKED(_th_, _tp_) \ ((_th_)->th_ack - (_tp_)->snd_una) -#if CONFIG_DTRACE enum tcp_cc_event { TCP_CC_CWND_INIT, TCP_CC_INSEQ_ACK_RCVD, @@ -522,9 +569,12 @@ enum tcp_cc_event { TCP_CC_FLOW_CONTROL, TCP_CC_SUSPEND, TCP_CC_LIMITED_TRANSMIT, - TCP_CC_EARLY_RETRANSMIT + TCP_CC_EARLY_RETRANSMIT, + TCP_CC_TLP_RECOVERY, + TCP_CC_TLP_RECOVER_LASTPACKET, + TCP_CC_DELAY_FASTRECOVERY, + TCP_CC_TLP_IN_FASTRECOVERY }; -#endif /* CONFIG_DTRACE */ /* * Structure to hold TCP options that are only used during segment @@ -867,6 +917,24 @@ struct tcpstat { u_int32_t tcps_mp_sndpacks; /* number of data packs sent */ u_int32_t tcps_mp_sndbytes; /* number of bytes sent */ u_int32_t tcps_join_rxmts; /* join ack retransmits */ + u_int32_t tcps_tailloss_rto; /* RTO due to tail loss */ + u_int32_t tcps_reordered_pkts; /* packets reorderd */ + u_int32_t tcps_recovered_pkts; /* recovered after loss */ + u_int32_t tcps_pto; /* probe timeout */ + u_int32_t tcps_rto_after_pto; /* RTO after a probe */ + u_int32_t tcps_tlp_recovery; /* TLP induced fast recovery */ + u_int32_t tcps_tlp_recoverlastpkt; /* TLP recoverd last pkt */ + u_int32_t tcps_ecn_setup; /* connection negotiated ECN */ + u_int32_t tcps_sent_cwr; /* Sent CWR, ECE received */ + u_int32_t tcps_sent_ece; /* Sent ECE notification */ + u_int32_t tcps_detect_reordering; /* Detect pkt reordering */ + u_int32_t tcps_delay_recovery; /* Delay fast recovery */ + u_int32_t tcps_avoid_rxmt; /* Retransmission was avoided */ + u_int32_t tcps_unnecessary_rxmt; /* Retransmission was not needed */ + u_int32_t tcps_nostretchack; /* disabled stretch ack algorithm on a connection */ + u_int32_t tcps_rescue_rxmt; /* SACK rescue retransmit */ + u_int32_t tcps_pto_in_recovery; /* PTO during fast recovery */ + u_int32_t tcps_pmtudbh_reverted; /* PMTU Blackhole detection, segement size reverted */ }; struct tcpstat_local { @@ -1136,15 +1204,16 @@ extern int ss_fltsz; extern int ss_fltsz_local; extern int tcp_do_rfc3390; /* Calculate ss_fltsz according to RFC 3390 */ extern int target_qdelay; -#ifdef __APPLE__ extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ extern struct timeval tcp_uptime; extern lck_spin_t *tcp_uptime_lock; - extern int tcp_delack_enabled; -#endif /* __APPLE__ */ - extern int tcp_do_sack; /* SACK enabled/disabled */ +extern int tcp_do_rfc3465; +extern int tcp_do_rfc3465_lim2; +extern int maxseg_unacked; +extern int tcp_use_newreno; + #if CONFIG_IFEF_NOWINDOWSCALE extern int tcp_obey_ifef_nowindowscale; @@ -1153,6 +1222,13 @@ extern int tcp_obey_ifef_nowindowscale; struct protosw; struct domain; +struct tcp_respond_args { + unsigned int ifscope; + unsigned int nocell:1, + noexpensive:1, + awdl_unrestricted:1; +}; + void tcp_canceltimers(struct tcpcb *); struct tcpcb * tcp_close(struct tcpcb *); @@ -1173,23 +1249,24 @@ void tcp_mtudisc(struct inpcb *, int); struct tcpcb * tcp_newtcpcb(struct inpcb *); int tcp_output(struct tcpcb *); -void tcp_respond(struct tcpcb *, void *, - struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int, - unsigned int, unsigned int); -struct rtentry *tcp_rtlookup(struct inpcb *, unsigned int); +void tcp_respond(struct tcpcb *, void *, struct tcphdr *, struct mbuf *, + tcp_seq, tcp_seq, int, struct tcp_respond_args *); +struct rtentry * + tcp_rtlookup(struct inpcb *, unsigned int); void tcp_setpersist(struct tcpcb *); -void tcp_gc(struct inpcbinfo *); +void tcp_gc(struct inpcbinfo *); void tcp_check_timer_state(struct tcpcb *tp); void tcp_run_timerlist(void *arg1, void *arg2); -struct tcptemp * - tcp_maketemplate(struct tcpcb *); +struct tcptemp *tcp_maketemplate(struct tcpcb *); void tcp_fillheaders(struct tcpcb *, void *, void *); -struct tcpcb * - tcp_timers(struct tcpcb *, int); +struct tcpcb *tcp_timers(struct tcpcb *, int); void tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int); -void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq, u_int32_t *); +void tcp_sack_doack(struct tcpcb *, struct tcpopt *, struct tcphdr *, + u_int32_t *); +int tcp_detect_bad_rexmt(struct tcpcb *, struct tcphdr *, struct tcpopt *, + u_int32_t rxtime); void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); void tcp_clean_sackreport(struct tcpcb *tp); void tcp_sack_adjust(struct tcpcb *tp); @@ -1205,11 +1282,13 @@ uint32_t tcp_find_anypcb_byaddr(struct ifaddr *ifa); void tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so); struct bwmeas* tcp_bwmeas_alloc(struct tcpcb *tp); void tcp_bwmeas_free(struct tcpcb *tp); +extern int32_t timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2); extern void tcp_set_background_cc(struct socket *); extern void tcp_set_foreground_cc(struct socket *); extern void tcp_set_recv_bg(struct socket *); extern void tcp_clear_recv_bg(struct socket *); +extern boolean_t tcp_sack_byte_islost(struct tcpcb *tp); #define IS_TCP_RECV_BG(_so) \ ((_so)->so_traffic_mgt_flags & TRAFFIC_MGT_TCP_RECVBG) @@ -1222,8 +1301,8 @@ int tcp_lock (struct socket *, int, void *); int tcp_unlock (struct socket *, int, void *); void calculate_tcp_clock(void); -extern void mptcp_insert_rmap(struct tcpcb *, struct mbuf *); extern void tcp_keepalive_reset(struct tcpcb *); +extern uint32_t get_base_rtt(struct tcpcb *tp); #ifdef _KERN_LOCKS_H_ lck_mtx_t * tcp_getlock (struct socket *, int); @@ -1239,11 +1318,14 @@ tcp_seq tcp_new_isn(struct tcpcb *); extern int tcp_input_checksum(int, struct mbuf *, struct tcphdr *, int, int); extern void tcp_getconninfo(struct socket *, struct conninfo_tcp *); +extern void add_to_time_wait(struct tcpcb *, uint32_t delay); +extern void tcp_pmtud_revert_segment_size(struct tcpcb *tp); #if MPTCP extern uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, int); extern void mptcp_output_csum(struct tcpcb *, struct mbuf *, int32_t, unsigned, u_int64_t, u_int32_t *); extern int mptcp_adj_mss(struct tcpcb *, boolean_t); +extern void mptcp_insert_rmap(struct tcpcb *, struct mbuf *); #endif #endif /* BSD_KERNEL_RPIVATE */ diff --git a/bsd/netinet/udp_usrreq.c b/bsd/netinet/udp_usrreq.c index 1b2bd8b1f..af468dc31 100644 --- a/bsd/netinet/udp_usrreq.c +++ b/bsd/netinet/udp_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -110,6 +110,10 @@ extern int ipsec_bypass; extern int esp_udp_encap_port; #endif /* IPSEC */ +#if NECP +#include +#endif /* NECP */ + #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETUDP, 0) #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETUDP, 2) #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETUDP, 1) @@ -169,7 +173,8 @@ extern void ipfw_stealth_stats_incr_udp(void); static int udp_getstat SYSCTL_HANDLER_ARGS; struct udpstat udpstat; /* from udp_var.h */ -SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_getstat, "S,udpstat", "UDP statistics (struct udpstat, netinet/udp_var.h)"); @@ -296,6 +301,7 @@ udp_input(struct mbuf *m, int iphlen) struct ifnet *ifp = m->m_pkthdr.rcvif; boolean_t cell = IFNET_IS_CELLULAR(ifp); boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); + boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp)); bzero(&udp_in, sizeof (udp_in)); udp_in.sin_len = sizeof (struct sockaddr_in); @@ -425,11 +431,7 @@ udp_input(struct mbuf *m, int iphlen) if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif /* INET6 */ - if (inp_restricted(inp, ifp)) - continue; - - if (IFNET_IS_CELLULAR(ifp) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if ((inp->inp_moptions == NULL) && @@ -505,17 +507,14 @@ udp_input(struct mbuf *m, int iphlen) reuse_sock = (inp->inp_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)); -#if IPSEC +#if NECP skipit = 0; - /* check AH/ESP integrity. */ - if (ipsec_bypass == 0 && - ipsec4_in_reject_so(m, inp->inp_socket)) { - IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); + if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport, uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL)) { /* do not inject data to pcb */ skipit = 1; } if (skipit == 0) -#endif /*IPSEC*/ +#endif /* NECP */ { struct mbuf *n = NULL; @@ -683,16 +682,13 @@ udp_input(struct mbuf *m, int iphlen) IF_UDP_STATINC(ifp, cleanup); goto bad; } -#if IPSEC - if (ipsec_bypass == 0 && inp != NULL) { - if (ipsec4_in_reject_so(m, inp->inp_socket)) { - IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); - udp_unlock(inp->inp_socket, 1, 0); - IF_UDP_STATINC(ifp, badipsec); - goto bad; - } +#if NECP + if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport, uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL)) { + udp_unlock(inp->inp_socket, 1, 0); + IF_UDP_STATINC(ifp, badipsec); + goto bad; } -#endif /* IPSEC */ +#endif /* NECP */ /* * Construct sockaddr format source address. @@ -737,8 +733,8 @@ udp_input(struct mbuf *m, int iphlen) append_sa = (struct sockaddr *)&udp_in; } if (nstat_collect) { - INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); - INP_ADD_STAT(inp, cell, wifi, rxbytes, m->m_pkthdr.len); + INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, m->m_pkthdr.len); } so_recv_data_stat(inp->inp_socket, m, 0); if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa, @@ -795,6 +791,7 @@ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, struct mbuf *opts = 0; boolean_t cell = IFNET_IS_CELLULAR(ifp); boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); + boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp)); int ret = 0; #if CONFIG_MACF_NET @@ -842,8 +839,9 @@ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, #endif /* INET6 */ append_sa = (struct sockaddr *)pudp_in; if (nstat_collect) { - INP_ADD_STAT(last, cell, wifi, rxpackets, 1); - INP_ADD_STAT(last, cell, wifi, rxbytes, n->m_pkthdr.len); + INP_ADD_STAT(last, cell, wifi, wired, rxpackets, 1); + INP_ADD_STAT(last, cell, wifi, wired, rxbytes, + n->m_pkthdr.len); } so_recv_data_stat(last->inp_socket, n, 0); m_adj(n, off); @@ -1080,7 +1078,7 @@ udp_pcblist SYSCTL_HANDLER_ARGS } SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, - CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); @@ -1185,7 +1183,7 @@ udp_pcblist64 SYSCTL_HANDLER_ARGS } SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist64, - CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist64, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist64, "S,xinpcb64", "List of active UDP sockets"); @@ -1197,14 +1195,14 @@ udp_pcblist_n SYSCTL_HANDLER_ARGS } SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist_n, - CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist_n, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist_n, "S,xinpcb_n", "List of active UDP sockets"); __private_extern__ void -udp_get_ports_used(uint32_t ifindex, int protocol, uint32_t wildcardok, +udp_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags, bitstr_t *bitfield) { - inpcb_get_ports_used(ifindex, protocol, wildcardok, bitfield, &udbinfo); + inpcb_get_ports_used(ifindex, protocol, flags, bitfield, &udbinfo); } __private_extern__ uint32_t @@ -1313,7 +1311,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, struct ifnet *outif = NULL; struct flowadv *adv = &ipoa.ipoa_flowadv; mbuf_svc_class_t msc = MBUF_SC_UNSPEC; - struct ifnet *origoutifp; + struct ifnet *origoutifp = NULL; int flowadv = 0; /* Enable flow advisory only when connected */ @@ -1367,8 +1365,12 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, outif = inp->inp_boundifp; ipoa.ipoa_boundif = outif->if_index; } - if (inp->inp_flags & INP_NO_IFT_CELLULAR) + if (INP_NO_CELLULAR(inp)) ipoa.ipoa_flags |= IPOAF_NO_CELLULAR; + if (INP_NO_EXPENSIVE(inp)) + ipoa.ipoa_flags |= IPOAF_NO_EXPENSIVE; + if (INP_AWDL_UNRESTRICTED(inp)) + ipoa.ipoa_flags |= IPOAF_AWDL_UNRESTRICTED; soopts |= IP_OUTARGS; /* @@ -1407,8 +1409,6 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, IFA_REMREF(&ia->ia_ifa); } - origoutifp = inp->inp_last_outifp; - /* * IP_PKTINFO option check. If a temporary scope or src address * is provided, use it for this packet only and make sure we forget @@ -1553,9 +1553,21 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, KERNEL_DEBUG(DBG_LAYER_OUT_END, ui->ui_dport, ui->ui_sport, ui->ui_src.s_addr, ui->ui_dst.s_addr, ui->ui_ulen); + +#if NECP + { + necp_kernel_policy_id policy_id; + if (!necp_socket_is_allowed_to_send_recv_v4(inp, lport, fport, &laddr, &faddr, NULL, &policy_id)) { + error = EHOSTUNREACH; + goto abort; + } + necp_mark_packet_from_socket(m, inp, policy_id); + } +#endif /* NECP */ + #if IPSEC - if (ipsec_bypass == 0 && ipsec_setsocket(m, inp->inp_socket) != 0) { + if (inp->inp_sp != NULL && ipsec_setsocket(m, inp->inp_socket) != 0) { error = ENOBUFS; goto abort; } @@ -1602,16 +1614,17 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, IMO_REMREF(mopts); if (error == 0 && nstat_collect) { - boolean_t cell, wifi; + boolean_t cell, wifi, wired; if (ro.ro_rt != NULL) { cell = IFNET_IS_CELLULAR(ro.ro_rt->rt_ifp); wifi = (!cell && IFNET_IS_WIFI(ro.ro_rt->rt_ifp)); + wired = (!wifi && IFNET_IS_WIRED(ro.ro_rt->rt_ifp)); } else { - cell = wifi = FALSE; + cell = wifi = wired = FALSE; } - INP_ADD_STAT(inp, cell, wifi, txpackets, 1); - INP_ADD_STAT(inp, cell, wifi, txbytes, len); + INP_ADD_STAT(inp, cell, wifi, wired, txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, wired, txbytes, len); } if (flowadv && (adv->code == FADV_FLOW_CONTROLLED || @@ -1661,11 +1674,11 @@ abort: } /* - * If output interface was cellular, and this socket is denied - * access to it, generate an event. + * If output interface was cellular/expensive, and this socket is + * denied access to it, generate an event. */ if (error != 0 && (ipoa.ipoa_retflags & IPOARF_IFDENIED) && - (inp->inp_flags & INP_NO_IFT_CELLULAR)) + (INP_NO_CELLULAR(inp) || INP_NO_EXPENSIVE(inp))) soevent(so, (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED)); release: @@ -1781,7 +1794,11 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct proc *p) return (EAFNOSUPPORT); inp = sotoinpcb(so); - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); error = in_pcbbind(inp, nam, p); return (error); @@ -1794,7 +1811,11 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p) int error; inp = sotoinpcb(so); - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); if (inp->inp_faddr.s_addr != INADDR_ANY) return (EISCONN); @@ -1833,6 +1854,10 @@ udp_connectx_common(struct socket *so, int af, VERIFY(dst_se->se_addr->sa_family == af); VERIFY(src_se == NULL || src_se->se_addr->sa_family == af); +#if NECP + inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL, dst_se ? dst_se->se_addr : NULL, ifscope); +#endif /* NECP */ + /* bind socket to the specified interface, if requested */ if (ifscope != IFSCOPE_NONE && (error = inp_bindif(inp, ifscope, NULL)) != 0) @@ -1886,6 +1911,16 @@ udp_detach(struct socket *so) panic("%s: so=%p null inp\n", __func__, so); /* NOTREACHED */ } + + /* + * If this is a socket that does not want to wakeup the device + * for it's traffic, the application might be waiting for + * close to complete before going to sleep. Send a notification + * for this kind of sockets + */ + if (so->so_options & SO_NOWAKEFROMSLEEP) + socket_post_kev_msg_closed(so); + in_pcbdetach(inp); inp->inp_state = INPCB_STATE_DEAD; return (0); @@ -1897,7 +1932,11 @@ udp_disconnect(struct socket *so) struct inpcb *inp; inp = sotoinpcb(so); - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); if (inp->inp_faddr.s_addr == INADDR_ANY) return (ENOTCONN); @@ -1931,7 +1970,11 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct inpcb *inp; inp = sotoinpcb(so); - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { if (m != NULL) m_freem(m); if (control != NULL) diff --git a/bsd/netinet6/Makefile b/bsd/netinet6/Makefile index 8d3721e43..ae89bdbda 100644 --- a/bsd/netinet6/Makefile +++ b/bsd/netinet6/Makefile @@ -8,12 +8,12 @@ include $(MakeInc_cmd) include $(MakeInc_def) DATAFILES = \ - ah.h ipsec.h pim6.h \ + ah.h ipsec.h \ esp.h in6.h ipcomp.h raw_ip6.h \ - in6_var.h ip6_mroute.h nd6.h + in6_var.h nd6.h PRIVATE_DATAFILES = \ - in6_pcb.h ip6_var.h pim6_var.h mld6_var.h ip6_fw.h + in6_pcb.h ip6_var.h mld6_var.h ip6_fw.h PRIVATE_KERNELFILES = \ ah6.h esp6.h esp_rijndael.h in6_gif.h in6_ifattach.h \ diff --git a/bsd/netinet6/ah_core.c b/bsd/netinet6/ah_core.c index a471825e9..417c67360 100644 --- a/bsd/netinet6/ah_core.c +++ b/bsd/netinet6/ah_core.c @@ -136,7 +136,7 @@ static int ah_hmac_sha1_mature(struct secasvar *); static int ah_hmac_sha1_init(struct ah_algorithm_state *, struct secasvar *); static void ah_hmac_sha1_loop(struct ah_algorithm_state *, caddr_t, size_t); static void ah_hmac_sha1_result(struct ah_algorithm_state *, caddr_t, size_t); -#if ALLCRYPTO +#if AH_ALL_CRYPTO static int ah_sumsiz_sha2_256(struct secasvar *); static int ah_hmac_sha2_256_mature(struct secasvar *); static int ah_hmac_sha2_256_init(struct ah_algorithm_state *, @@ -155,7 +155,7 @@ static int ah_hmac_sha2_512_init(struct ah_algorithm_state *, struct secasvar *); static void ah_hmac_sha2_512_loop(struct ah_algorithm_state *, caddr_t, size_t); static void ah_hmac_sha2_512_result(struct ah_algorithm_state *, caddr_t, size_t); -#endif /* ALLCRYPTO */ +#endif /* AH_ALL_CRYPTO */ static void ah_update_mbuf(struct mbuf *, int, int, const struct ah_algorithm *, struct ah_algorithm_state *); @@ -184,7 +184,7 @@ ah_algorithm_lookup(idx) static struct ah_algorithm ah_none = { ah_sumsiz_zero, ah_none_mature, 0, 2048, "none", ah_none_init, ah_none_loop, ah_none_result, }; -#if ALLCRYPTO +#if AH_ALL_CRYPTO static struct ah_algorithm hmac_sha2_256 = { ah_sumsiz_sha2_256, ah_hmac_sha2_256_mature, 256, 256, "hmac-sha2-256", @@ -200,7 +200,7 @@ ah_algorithm_lookup(idx) "hmac-sha2-512", ah_hmac_sha2_512_init, ah_hmac_sha2_512_loop, ah_hmac_sha2_512_result, }; -#endif /* ALLCRYPTO */ +#endif /* AH_ALL_CRYPTO */ switch (idx) { case SADB_AALG_MD5HMAC: @@ -213,14 +213,14 @@ ah_algorithm_lookup(idx) return &keyed_sha1; case SADB_X_AALG_NULL: return &ah_none; -#if ALLCRYPTO +#if AH_ALL_CRYPTO case SADB_X_AALG_SHA2_256: return &hmac_sha2_256; case SADB_X_AALG_SHA2_384: return &hmac_sha2_384; case SADB_X_AALG_SHA2_512: return &hmac_sha2_512; -#endif /* ALLCRYPTO */ +#endif /* AH_ALL_CRYPTO */ default: return NULL; } @@ -760,7 +760,7 @@ ah_hmac_sha1_result(state, addr, l) FREE(state->foo, M_TEMP); } -#if ALLCRYPTO +#if AH_ALL_CRYPTO static int ah_sumsiz_sha2_256(sav) struct secasvar *sav; @@ -1188,7 +1188,7 @@ ah_hmac_sha2_512_result(state, addr, l) FREE(state->foo, M_TEMP); } -#endif /* ALLCRYPTO */ +#endif /* AH_ALL_CRYPTO */ /*------------------------------------------------------------*/ diff --git a/bsd/netinet6/ah_input.c b/bsd/netinet6/ah_input.c index 8d13ca5e5..00967821b 100644 --- a/bsd/netinet6/ah_input.c +++ b/bsd/netinet6/ah_input.c @@ -76,6 +76,7 @@ #include #include +#include #include #include #include @@ -140,7 +141,6 @@ ah4_input(struct mbuf *m, int off) size_t stripsiz = 0; sa_family_t ifamily; -#ifndef PULLDOWN_TEST if (m->m_len < off + sizeof(struct newah)) { m = m_pullup(m, off + sizeof(struct newah)); if (!m) { @@ -156,19 +156,6 @@ ah4_input(struct mbuf *m, int off) ip = mtod(m, struct ip *); ah = (struct ah *)(void *)(((caddr_t)ip) + off); -#else - /* Expect 32-bit aligned data pointer on strict-align platforms */ - MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); - - ip = mtod(m, struct ip *); - IP6_EXTHDR_GET(ah, struct ah *, m, off, sizeof(struct newah)); - if (ah == NULL) { - ipseclog((LOG_DEBUG, "IPv4 AH input: can't pullup;" - "dropping the packet for simplicity\n")); - IPSEC_STAT_INCREMENT(ipsecstat.in_inval); - goto fail; - } -#endif nxt = ah->ah_nxt; #ifdef _IP_VHL hlen = IP_VHL_HL(ip->ip_vhl) << 2; @@ -258,7 +245,6 @@ ah4_input(struct mbuf *m, int off) goto fail; } -#ifndef PULLDOWN_TEST if (m->m_len < off + sizeof(struct ah) + sizoff + siz1) { m = m_pullup(m, off + sizeof(struct ah) + sizoff + siz1); if (!m) { @@ -272,15 +258,6 @@ ah4_input(struct mbuf *m, int off) ip = mtod(m, struct ip *); ah = (struct ah *)(void *)(((caddr_t)ip) + off); } -#else - IP6_EXTHDR_GET(ah, struct ah *, m, off, - sizeof(struct ah) + sizoff + siz1); - if (ah == NULL) { - ipseclog((LOG_DEBUG, "IPv4 AH input: can't pullup\n")); - IPSEC_STAT_INCREMENT(ipsecstat.in_inval); - goto fail; - } -#endif } /* @@ -510,6 +487,17 @@ ah4_input(struct mbuf *m, int off) IFA_REMREF(ifa); } } + + // Input via IPSec interface + if (sav->sah->ipsec_if != NULL) { + if (ipsec_inject_inbound_packet(sav->sah->ipsec_if, m) == 0) { + m = NULL; + goto done; + } else { + goto fail; + } + } + if (proto_input(PF_INET, m) != 0) goto fail; nxt = IPPROTO_DONE; @@ -519,7 +507,6 @@ ah4_input(struct mbuf *m, int off) */ ip = mtod(m, struct ip *); -#ifndef PULLDOWN_TEST /* * We do deep-copy since KAME requires that * the packet is placed in a single external mbuf. @@ -528,34 +515,6 @@ ah4_input(struct mbuf *m, int off) m->m_data += stripsiz; m->m_len -= stripsiz; m->m_pkthdr.len -= stripsiz; -#else - /* - * even in m_pulldown case, we need to strip off AH so that - * we can compute checksum for multiple AH correctly. - */ - if (m->m_len >= stripsiz + off) { - ovbcopy((caddr_t)ip, ((caddr_t)ip) + stripsiz, off); - m->m_data += stripsiz; - m->m_len -= stripsiz; - m->m_pkthdr.len -= stripsiz; - } else { - /* - * this comes with no copy if the boundary is on - * cluster - */ - struct mbuf *n; - - n = m_split(m, off, M_DONTWAIT); - if (n == NULL) { - /* m is retained by m_split */ - goto fail; - } - m_adj(n, stripsiz); - /* m_cat does not update m_pkthdr.len */ - m->m_pkthdr.len += n->m_pkthdr.len; - m_cat(m, n); - } -#endif if (m->m_len < sizeof(*ip)) { m = m_pullup(m, sizeof(*ip)); @@ -584,6 +543,20 @@ ah4_input(struct mbuf *m, int off) struct ip *, ip, struct ip6_hdr *, NULL); if (nxt != IPPROTO_DONE) { + // Input via IPSec interface + if (sav->sah->ipsec_if != NULL) { + ip->ip_len = htons(ip->ip_len + hlen); + ip->ip_off = htons(ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = ip_cksum_hdr_in(m, hlen); + if (ipsec_inject_inbound_packet(sav->sah->ipsec_if, m) == 0) { + m = NULL; + goto done; + } else { + goto fail; + } + } + if ((ip_protox[nxt]->pr_flags & PR_LASTHDR) != 0 && ipsec4_in_reject(m, NULL)) { IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); @@ -594,7 +567,7 @@ ah4_input(struct mbuf *m, int off) m_freem(m); m = NULL; } - +done: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ah4_input call free SA:0x%llx\n", @@ -636,17 +609,8 @@ ah6_input(struct mbuf **mp, int *offp, int proto) size_t stripsiz = 0; -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct ah), {return IPPROTO_DONE;}); ah = (struct ah *)(void *)(mtod(m, caddr_t) + off); -#else - IP6_EXTHDR_GET(ah, struct ah *, m, off, sizeof(struct newah)); - if (ah == NULL) { - ipseclog((LOG_DEBUG, "IPv6 AH input: can't pullup\n")); - ipsec6stat.in_inval++; - return IPPROTO_DONE; - } -#endif /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); @@ -724,19 +688,8 @@ ah6_input(struct mbuf **mp, int *offp, int proto) IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); goto fail; } -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct ah) + sizoff + siz1, {return IPPROTO_DONE;}); -#else - IP6_EXTHDR_GET(ah, struct ah *, m, off, - sizeof(struct ah) + sizoff + siz1); - if (ah == NULL) { - ipseclog((LOG_NOTICE, "couldn't pullup gather IPv6 AH checksum part")); - IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); - m = NULL; - goto fail; - } -#endif } /* @@ -935,6 +888,17 @@ ah6_input(struct mbuf **mp, int *offp, int proto) } } + // Input via IPSec interface + if (sav->sah->ipsec_if != NULL) { + if (ipsec_inject_inbound_packet(sav->sah->ipsec_if, m) == 0) { + m = NULL; + nxt = IPPROTO_DONE; + goto done; + } else { + goto fail; + } + } + if (proto_input(PF_INET6, m) != 0) goto fail; nxt = IPPROTO_DONE; @@ -953,7 +917,6 @@ ah6_input(struct mbuf **mp, int *offp, int proto) *prvnxtp = nxt; ip6 = mtod(m, struct ip6_hdr *); -#ifndef PULLDOWN_TEST /* * We do deep-copy since KAME requires that * the packet is placed in a single mbuf. @@ -962,34 +925,6 @@ ah6_input(struct mbuf **mp, int *offp, int proto) m->m_data += stripsiz; m->m_len -= stripsiz; m->m_pkthdr.len -= stripsiz; -#else - /* - * even in m_pulldown case, we need to strip off AH so that - * we can compute checksum for multiple AH correctly. - */ - if (m->m_len >= stripsiz + off) { - ovbcopy((caddr_t)ip6, ((caddr_t)ip6) + stripsiz, off); - m->m_data += stripsiz; - m->m_len -= stripsiz; - m->m_pkthdr.len -= stripsiz; - } else { - /* - * this comes with no copy if the boundary is on - * cluster - */ - struct mbuf *n; - - n = m_split(m, off, M_DONTWAIT); - if (n == NULL) { - /* m is retained by m_split */ - goto fail; - } - m_adj(n, stripsiz); - /* m_cat does not update m_pkthdr.len */ - m->m_pkthdr.len += n->m_pkthdr.len; - m_cat(m, n); - } -#endif ip6 = mtod(m, struct ip6_hdr *); /* XXX jumbogram */ ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - stripsiz); @@ -999,11 +934,22 @@ ah6_input(struct mbuf **mp, int *offp, int proto) IPSEC_STAT_INCREMENT(ipsec6stat.in_nomem); goto fail; } + + // Input via IPSec interface + if (sav->sah->ipsec_if != NULL) { + if (ipsec_inject_inbound_packet(sav->sah->ipsec_if, m) == 0) { + m = NULL; + nxt = IPPROTO_DONE; + goto done; + } else { + goto fail; + } + } } +done: *offp = off; *mp = m; - if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ah6_input call free SA:0x%llx\n", diff --git a/bsd/netinet6/dest6.c b/bsd/netinet6/dest6.c index 993ee1a91..4feef21cb 100644 --- a/bsd/netinet6/dest6.c +++ b/bsd/netinet6/dest6.c @@ -63,24 +63,12 @@ dest6_input(struct mbuf **mp, int *offp, int proto) u_int8_t *opt; /* validation of the length of the header */ -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(*dstopts), return IPPROTO_DONE); dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off); -#else - IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, sizeof(*dstopts)); - if (dstopts == NULL) - return IPPROTO_DONE; -#endif dstoptlen = (dstopts->ip6d_len + 1) << 3; -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, dstoptlen, return IPPROTO_DONE); dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off); -#else - IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, dstoptlen); - if (dstopts == NULL) - return IPPROTO_DONE; -#endif off += dstoptlen; dstoptlen -= sizeof(struct ip6_dest); opt = (u_int8_t *)dstopts + sizeof(struct ip6_dest); diff --git a/bsd/netinet6/esp_core.c b/bsd/netinet6/esp_core.c index 8a1d06c0a..b5236fdd7 100644 --- a/bsd/netinet6/esp_core.c +++ b/bsd/netinet6/esp_core.c @@ -431,7 +431,7 @@ esp_cbc_mature(sav) algo = esp_algorithm_lookup(sav->alg_enc); if (!algo) { ipseclog((LOG_ERR, - "esp_cbc_mature %s: unsupported algorithm.\n", algo->name)); + "esp_cbc_mature: unsupported algorithm.\n")); return 1; } @@ -748,7 +748,7 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen) /* just in case */ bzero(iv, sizeof(iv)); - bzero(sbuf, sizeof(sbuf)); + bzero(sbuf, blocklen); end: if (sbuf != NULL) FREE(sbuf, M_SECA); @@ -977,7 +977,7 @@ esp_cbc_encrypt( /* just in case */ bzero(iv, sizeof(iv)); - bzero(sbuf, sizeof(sbuf)); + bzero(sbuf, blocklen); key_sa_stir_iv(sav); end: diff --git a/bsd/netinet6/esp_input.c b/bsd/netinet6/esp_input.c index 9d0e549f8..c8a809490 100644 --- a/bsd/netinet6/esp_input.c +++ b/bsd/netinet6/esp_input.c @@ -76,6 +76,7 @@ #include #include +#include #include #include #include @@ -511,14 +512,10 @@ noreplaycheck: } else if (ifamily == AF_INET6) { struct sockaddr_in6 *ip6addr; -#ifndef PULLDOWN_TEST /* * m_pullup is prohibited in KAME IPv6 input processing * but there's no other way! */ -#else - /* okay to pullup in m_pulldown style */ -#endif if (m->m_len < sizeof(*ip6)) { m = m_pullup(m, sizeof(*ip6)); if (!m) { @@ -581,6 +578,16 @@ noreplaycheck: /* Clear the csum flags, they can't be valid for the inner headers */ m->m_pkthdr.csum_flags = 0; + // Input via IPSec interface + if (sav->sah->ipsec_if != NULL) { + if (ipsec_inject_inbound_packet(sav->sah->ipsec_if, m) == 0) { + m = NULL; + goto done; + } else { + goto bad; + } + } + if (sav->utun_in_fn) { if (!(sav->utun_in_fn(sav->utun_pcb, &m, ifamily == AF_INET ? PF_INET : PF_INET6))) { m = NULL; @@ -681,6 +688,20 @@ noreplaycheck: struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif, struct ip *, ip, struct ip6_hdr *, NULL); + // Input via IPSec interface + if (sav->sah->ipsec_if != NULL) { + ip->ip_len = htons(ip->ip_len + hlen); + ip->ip_off = htons(ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = ip_cksum_hdr_in(m, hlen); + if (ipsec_inject_inbound_packet(sav->sah->ipsec_if, m) == 0) { + m = NULL; + goto done; + } else { + goto bad; + } + } + if (sav->utun_in_fn) { if (!(sav->utun_in_fn(sav->utun_pcb, &m, PF_INET))) { m = NULL; @@ -695,6 +716,7 @@ noreplaycheck: m = NULL; } +done: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP esp4_input call free SA:0x%llx\n", @@ -1047,6 +1069,17 @@ noreplaycheck: } } + // Input via IPSec interface + if (sav->sah->ipsec_if != NULL) { + if (ipsec_inject_inbound_packet(sav->sah->ipsec_if, m) == 0) { + m = NULL; + nxt = IPPROTO_DONE; + goto done; + } else { + goto bad; + } + } + if (sav->utun_in_fn) { if (!(sav->utun_in_fn(sav->utun_pcb, &m, PF_INET6))) { m = NULL; @@ -1154,6 +1187,17 @@ noreplaycheck: goto bad; } + // Input via IPSec interface + if (sav->sah->ipsec_if != NULL) { + if (ipsec_inject_inbound_packet(sav->sah->ipsec_if, m) == 0) { + m = NULL; + nxt = IPPROTO_DONE; + goto done; + } else { + goto bad; + } + } + if (sav->utun_in_fn) { if (!(sav->utun_in_fn(sav->utun_pcb, &m, PF_INET6))) { m = NULL; @@ -1163,9 +1207,9 @@ noreplaycheck: } } +done: *offp = off; *mp = m; - if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP esp6_input call free SA:0x%llx\n", diff --git a/bsd/netinet6/frag6.c b/bsd/netinet6/frag6.c index 6a92d7380..abe6b2e21 100644 --- a/bsd/netinet6/frag6.c +++ b/bsd/netinet6/frag6.c @@ -298,14 +298,8 @@ frag6_input(struct mbuf **mp, int *offp, int proto) MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); ip6 = mtod(m, struct ip6_hdr *); -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done); ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); -#else - IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); - if (ip6f == NULL) - goto done; -#endif #ifdef IN6_IFSTAT_STRICT /* find the destination interface of the packet. */ diff --git a/bsd/netinet6/icmp6.c b/bsd/netinet6/icmp6.c index c146a8995..99d92784c 100644 --- a/bsd/netinet6/icmp6.c +++ b/bsd/netinet6/icmp6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -131,12 +131,14 @@ #if IPSEC #include #include - -extern int ipsec_bypass; #endif #include +#if NECP +#include +#endif + extern struct ip6protosw *ip6_protox[]; extern uint32_t rip_sendspace; @@ -146,8 +148,11 @@ struct icmp6stat icmp6stat; extern struct inpcbhead ripcb; extern int icmp6errppslim; +extern int icmp6rappslim; static int icmp6errpps_count = 0; +static int icmp6rapps_count = 0; static struct timeval icmp6errppslim_last; +static struct timeval icmp6rappslim_last; extern int icmp6_nodeinfo; extern struct inpcbinfo ripcbinfo; @@ -2057,11 +2062,7 @@ icmp6_rip6_input(mp, off) in6p->in6p_icmp6filt)) continue; - if (inp_restricted(in6p, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (in6p->in6p_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(in6p, ifp)) continue; if (last) { @@ -2318,11 +2319,6 @@ icmp6_reflect(m, off) */ m->m_flags &= ~(M_BCAST|M_MCAST); -#if IPSEC - /* Don't lookup socket */ - if (ipsec_bypass == 0) - (void)ipsec_setsocket(m, NULL); -#endif /*IPSEC*/ if (outif != NULL) { ifnet_release(outif); @@ -2852,12 +2848,6 @@ noredhdropt:; = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen)); /* send the packet to outside... */ -#if IPSEC - /* Don't lookup socket */ - if (ipsec_bypass == 0) - (void)ipsec_setsocket(m, NULL); -#endif /*IPSEC*/ - ip6oa.ip6oa_boundif = ifp->if_index; ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; @@ -3033,7 +3023,11 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr_in6 *dst = (struct sockaddr_in6 *)(void *)nam; struct icmp6_hdr *icmp6; - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { error = (inp == NULL ? EINVAL : EPROTOTYPE); goto bad; } @@ -3149,20 +3143,24 @@ icmp6_dgram_attach(struct socket *so, int proto, struct proc *p) * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate * limitation. * - * XXX per-destination/type check necessary? + * XXX per-destination check necessary? */ static int icmp6_ratelimit( __unused const struct in6_addr *dst, /* not used at this moment */ - __unused const int type, /* not used at this moment */ - __unused const int code) /* not used at this moment */ + const int type, + __unused const int code) { int ret; ret = 0; /* okay to send */ /* PPS limit */ - if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count, + if (type == ND_ROUTER_ADVERT) { + if (!ppsratecheck(&icmp6rappslim_last, &icmp6rapps_count, + icmp6rappslim)) + ret++; + } else if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count, icmp6errppslim)) { /* The packet is subject to rate limit */ ret++; diff --git a/bsd/netinet6/in6.c b/bsd/netinet6/in6.c index 261fdc420..e0db9a422 100644 --- a/bsd/netinet6/in6.c +++ b/bsd/netinet6/in6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2013 Apple Inc. All rights reserved. + * Copyright (c) 2003-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -134,7 +134,6 @@ #include #include #include -#include #include #include #include @@ -172,8 +171,6 @@ const struct sockaddr_in6 sa6_any = { sizeof (sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 }; -static int in6ctl_lifaddr(struct ifnet *, u_long, struct if_laddrreq *, - boolean_t); static int in6ctl_associd(struct socket *, u_long, caddr_t); static int in6ctl_connid(struct socket *, u_long, caddr_t); static int in6ctl_conninfo(struct socket *, u_long, caddr_t); @@ -762,6 +759,7 @@ static __attribute__((noinline)) int in6ctl_llstop(struct ifnet *ifp) { struct in6_ifaddr *ia; + struct nd_prefix pr0, *pr; VERIFY(ifp != NULL); @@ -794,6 +792,23 @@ in6ctl_llstop(struct ifnet *ifp) ia = ia->ia_next; } lck_rw_done(&in6_ifaddr_rwlock); + + /* Delete the link local prefix */ + bzero(&pr0, sizeof(pr0)); + pr0.ndpr_plen = 64; + pr0.ndpr_ifp = ifp; + pr0.ndpr_prefix.sin6_addr.s6_addr16[0] = IPV6_ADDR_INT16_ULL; + in6_setscope(&pr0.ndpr_prefix.sin6_addr, ifp, NULL); + pr = nd6_prefix_lookup(&pr0); + if (pr) { + lck_mtx_lock(nd6_mutex); + NDPR_LOCK(pr); + prelist_remove(pr); + NDPR_UNLOCK(pr); + NDPR_REMREF(pr); /* Drop the reference from lookup */ + lck_mtx_unlock(nd6_mutex); + } + return (0); } @@ -1090,14 +1105,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * ioctls which don't require ifp, may require socket. */ switch (cmd) { -#if MROUTING - case SIOCGETSGCNT_IN6: /* struct sioc_sg_req6 */ - case SIOCGETMIFCNT_IN6_32: /* struct sioc_mif_req6_32 */ - case SIOCGETMIFCNT_IN6_64: /* struct sioc_mif_req6_64 */ - return (mrt6_ioctl(cmd, data)); - /* NOTREACHED */ -#endif /* MROUTING */ - case SIOCAADDRCTL_POLICY: /* struct in6_addrpolicy */ case SIOCDADDRCTL_POLICY: /* struct in6_addrpolicy */ if (!privileged) @@ -1235,22 +1242,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, case SIOCGSCOPE6DEF: /* struct in6_ifreq (deprecated) */ return (EOPNOTSUPP); /* NOTREACHED */ - - case SIOCALIFADDR: /* struct if_laddrreq */ - case SIOCDLIFADDR: /* struct if_laddrreq */ - if (!privileged) - return (EPERM); - /* FALLTHRU */ - case SIOCGLIFADDR: { /* struct if_laddrreq */ - struct if_laddrreq iflr; - - bcopy(data, &iflr, sizeof (iflr)); - error = in6ctl_lifaddr(ifp, cmd, &iflr, p64); - bcopy(&iflr, data, sizeof (iflr)); - return (error); - /* NOTREACHED */ - } - + case SIOCLL_CGASTART_32: /* struct in6_llstartreq_32 */ case SIOCLL_CGASTART_64: /* struct in6_llstartreq_64 */ if (!privileged) @@ -1803,6 +1795,13 @@ in6_ifaupdate_aux(struct in6_ifaddr *ia, struct ifnet *ifp, int ifaupflags) ifa = &ia->ia_ifa; in6m_sol = NULL; + nd6log2((LOG_DEBUG, "%s - %s ifp %s ia6_flags 0x%x ifaupflags 0x%x\n", + __func__, + ip6_sprintf(&ia->ia_addr.sin6_addr), + if_name(ia->ia_ifp), + ia->ia6_flags, + ifaupflags)); + /* * Mark the address as tentative before joining multicast addresses, * so that corresponding MLD responses would not have a tentative @@ -2011,7 +2010,13 @@ in6_ifaupdate_aux(struct in6_ifaddr *ia, struct ifnet *ifp, int ifaupflags) IFA_UNLOCK(ifa); delayptr = NULL; - if ((ifaupflags & IN6_IFAUPDATE_DADDELAY)) { + /* + * Avoid the DAD delay if the caller wants us to skip it. + * This is not compliant with RFC 2461, but it's only being + * used for signalling and not for actual DAD. + */ + if ((ifaupflags & IN6_IFAUPDATE_DADDELAY) && + !(ia->ia6_flags & IN6_IFF_SWIFTDAD)) { /* * We need to impose a delay before sending an NS * for DAD. Check if we also needed a delay for the @@ -2383,7 +2388,7 @@ in6_purgeaddr(struct ifaddr *ifa) /* in6_unlink_ifa() will need exclusive access */ in6_unlink_ifa(ia, ifp); - in6_post_msg(ifp, KEV_INET6_ADDR_DELETED, ia); + in6_post_msg(ifp, KEV_INET6_ADDR_DELETED, ia, NULL); (void) ifnet_notify_address(ifp, AF_INET6); } @@ -2508,341 +2513,6 @@ in6_purgeif(struct ifnet *ifp) in6_ifdetach(ifp); } -/* - * SIOC[GAD]LIFADDR. - * SIOCGLIFADDR: get first address. (?) - * SIOCGLIFADDR with IFLR_PREFIX: - * get first address that matches the specified prefix. - * SIOCALIFADDR: add the specified address. - * SIOCALIFADDR with IFLR_PREFIX: - * add the specified prefix, filling hostaddr part from - * the first link-local address. prefixlen must be <= 64. - * SIOCDLIFADDR: delete the specified address. - * SIOCDLIFADDR with IFLR_PREFIX: - * delete the first address that matches the specified prefix. - * return values: - * EINVAL on invalid parameters - * EADDRNOTAVAIL on prefix match failed/specified address not found - * other values may be returned from in6_ioctl() - * - * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64. - * this is to accomodate address naming scheme other than RFC2374, - * in the future. - * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374 - * address encoding scheme. (see figure on page 8) - */ -static __attribute__((noinline)) int -in6ctl_lifaddr(struct ifnet *ifp, u_long cmd, struct if_laddrreq *iflr, - boolean_t p64) -{ - struct in6_aliasreq ifra; - struct ifaddr *ifa; - struct sockaddr *sa; - - VERIFY(ifp != NULL); - - switch (cmd) { - case SIOCGLIFADDR: - /* address must be specified on GET with IFLR_PREFIX */ - if (!(iflr->flags & IFLR_PREFIX)) - break; - /* FALLTHROUGH */ - case SIOCALIFADDR: - case SIOCDLIFADDR: - /* address must be specified on ADD and DELETE */ - sa = (struct sockaddr *)&iflr->addr; - if (sa->sa_family != AF_INET6) - return (EINVAL); - if (sa->sa_len != sizeof (struct sockaddr_in6)) - return (EINVAL); - /* XXX need improvement */ - sa = (struct sockaddr *)&iflr->dstaddr; - if (sa->sa_family && sa->sa_family != AF_INET6) - return (EINVAL); - if (sa->sa_len && sa->sa_len != sizeof (struct sockaddr_in6)) - return (EINVAL); - break; - default: - /* shouldn't happen */ - VERIFY(0); - /* NOTREACHED */ - } - if (sizeof (struct in6_addr) * 8 < iflr->prefixlen) - return (EINVAL); - - switch (cmd) { - case SIOCALIFADDR: { - struct in6_addr hostaddr; - int prefixlen; - int hostid_found = 0; - - if ((iflr->flags & IFLR_PREFIX) != 0) { - struct sockaddr_in6 *sin6; - - /* - * hostaddr is to fill in the hostaddr part of the - * address. hostaddr points to the first link-local - * address attached to the interface. - */ - ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); - if (!ifa) - return (EADDRNOTAVAIL); - IFA_LOCK_SPIN(ifa); - hostaddr = *IFA_IN6(ifa); - IFA_UNLOCK(ifa); - hostid_found = 1; - IFA_REMREF(ifa); - ifa = NULL; - - /* prefixlen must be <= 64. */ - if (64 < iflr->prefixlen) - return (EINVAL); - prefixlen = iflr->prefixlen; - - /* hostid part must be zero. */ - sin6 = (struct sockaddr_in6 *)&iflr->addr; - if (sin6->sin6_addr.s6_addr32[2] != 0 || - sin6->sin6_addr.s6_addr32[3] != 0) { - return (EINVAL); - } - } else { - prefixlen = iflr->prefixlen; - } - /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ - bzero(&ifra, sizeof (ifra)); - bcopy(iflr->iflr_name, ifra.ifra_name, sizeof (ifra.ifra_name)); - - bcopy(&iflr->addr, &ifra.ifra_addr, - ((struct sockaddr *)&iflr->addr)->sa_len); - if (hostid_found) { - /* fill in hostaddr part */ - ifra.ifra_addr.sin6_addr.s6_addr32[2] = - hostaddr.s6_addr32[2]; - ifra.ifra_addr.sin6_addr.s6_addr32[3] = - hostaddr.s6_addr32[3]; - } - - if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */ - bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, - ((struct sockaddr *)&iflr->dstaddr)->sa_len); - if (hostid_found) { - ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] = - hostaddr.s6_addr32[2]; - ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] = - hostaddr.s6_addr32[3]; - } - } - - ifra.ifra_prefixmask.sin6_len = sizeof (struct sockaddr_in6); - in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen); - - ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX; - if (!p64) { -#if defined(__LP64__) - struct in6_aliasreq_32 ifra_32; - /* - * Use 32-bit ioctl and structure for 32-bit process. - */ - in6_aliasreq_64_to_32((struct in6_aliasreq_64 *)&ifra, - &ifra_32); - return (in6_control(NULL, SIOCAIFADDR_IN6_32, - (caddr_t)&ifra_32, ifp, kernproc)); -#else - return (in6_control(NULL, SIOCAIFADDR_IN6, - (caddr_t)&ifra, ifp, kernproc)); -#endif /* __LP64__ */ - } else { -#if defined(__LP64__) - return (in6_control(NULL, SIOCAIFADDR_IN6, - (caddr_t)&ifra, ifp, kernproc)); -#else - struct in6_aliasreq_64 ifra_64; - /* - * Use 64-bit ioctl and structure for 64-bit process. - */ - in6_aliasreq_32_to_64((struct in6_aliasreq_32 *)&ifra, - &ifra_64); - return (in6_control(NULL, SIOCAIFADDR_IN6_64, - (caddr_t)&ifra_64, ifp, kernproc)); -#endif /* __LP64__ */ - } - /* NOTREACHED */ - } - - case SIOCGLIFADDR: - case SIOCDLIFADDR: { - struct in6_ifaddr *ia; - struct in6_addr mask, candidate, match; - struct sockaddr_in6 *sin6; - int cmp; - - bzero(&mask, sizeof (mask)); - if (iflr->flags & IFLR_PREFIX) { - /* lookup a prefix rather than address. */ - in6_prefixlen2mask(&mask, iflr->prefixlen); - - sin6 = (struct sockaddr_in6 *)&iflr->addr; - bcopy(&sin6->sin6_addr, &match, sizeof (match)); - match.s6_addr32[0] &= mask.s6_addr32[0]; - match.s6_addr32[1] &= mask.s6_addr32[1]; - match.s6_addr32[2] &= mask.s6_addr32[2]; - match.s6_addr32[3] &= mask.s6_addr32[3]; - - /* if you set extra bits, that's wrong */ - if (bcmp(&match, &sin6->sin6_addr, sizeof (match))) - return (EINVAL); - - cmp = 1; - } else { - if (cmd == SIOCGLIFADDR) { - /* on getting an address, take the 1st match */ - cmp = 0; /* XXX */ - } else { - /* on deleting an address, do exact match */ - in6_prefixlen2mask(&mask, 128); - sin6 = (struct sockaddr_in6 *)&iflr->addr; - bcopy(&sin6->sin6_addr, &match, sizeof (match)); - - cmp = 1; - } - } - - ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { - IFA_LOCK(ifa); - if (ifa->ifa_addr->sa_family != AF_INET6) { - IFA_UNLOCK(ifa); - continue; - } - if (!cmp) { - IFA_UNLOCK(ifa); - break; - } - - bcopy(IFA_IN6(ifa), &candidate, sizeof (candidate)); - IFA_UNLOCK(ifa); - /* - * XXX: this is adhoc, but is necessary to allow - * a user to specify fe80::/64 (not /10) for a - * link-local address. - */ - if (IN6_IS_ADDR_LINKLOCAL(&candidate)) - candidate.s6_addr16[1] = 0; - candidate.s6_addr32[0] &= mask.s6_addr32[0]; - candidate.s6_addr32[1] &= mask.s6_addr32[1]; - candidate.s6_addr32[2] &= mask.s6_addr32[2]; - candidate.s6_addr32[3] &= mask.s6_addr32[3]; - if (IN6_ARE_ADDR_EQUAL(&candidate, &match)) - break; - } - if (ifa != NULL) - IFA_ADDREF(ifa); - ifnet_lock_done(ifp); - if (!ifa) - return (EADDRNOTAVAIL); - ia = ifa2ia6(ifa); - - if (cmd == SIOCGLIFADDR) { - struct sockaddr_in6 *s6; - - IFA_LOCK(ifa); - /* fill in the if_laddrreq structure */ - bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len); - s6 = (struct sockaddr_in6 *)&iflr->addr; - if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) { - s6->sin6_addr.s6_addr16[1] = 0; - s6->sin6_scope_id = - in6_addr2scopeid(ifp, &s6->sin6_addr); - } - if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { - bcopy(&ia->ia_dstaddr, &iflr->dstaddr, - ia->ia_dstaddr.sin6_len); - s6 = (struct sockaddr_in6 *)&iflr->dstaddr; - if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) { - s6->sin6_addr.s6_addr16[1] = 0; - s6->sin6_scope_id = - in6_addr2scopeid(ifp, - &s6->sin6_addr); - } - } else - bzero(&iflr->dstaddr, sizeof (iflr->dstaddr)); - - iflr->prefixlen = - in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); - - iflr->flags = ia->ia6_flags; /* XXX */ - IFA_UNLOCK(ifa); - IFA_REMREF(ifa); - return (0); - } else { - /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ - bzero(&ifra, sizeof (ifra)); - bcopy(iflr->iflr_name, ifra.ifra_name, - sizeof (ifra.ifra_name)); - - IFA_LOCK(ifa); - bcopy(&ia->ia_addr, &ifra.ifra_addr, - ia->ia_addr.sin6_len); - if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { - bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, - ia->ia_dstaddr.sin6_len); - } else { - bzero(&ifra.ifra_dstaddr, - sizeof (ifra.ifra_dstaddr)); - } - bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr, - ia->ia_prefixmask.sin6_len); - - ifra.ifra_flags = ia->ia6_flags; - IFA_UNLOCK(ifa); - IFA_REMREF(ifa); - if (!p64) { -#if defined(__LP64__) - struct in6_aliasreq_32 ifra_32; - /* - * Use 32-bit structure for 32-bit process. - * SIOCDIFADDR_IN6 is encoded with in6_ifreq, - * so it stays the same since the size does - * not change. The data part of the ioctl, - * however, is of a different structure, i.e. - * in6_aliasreq. - */ - in6_aliasreq_64_to_32( - (struct in6_aliasreq_64 *)&ifra, &ifra_32); - return (in6_control(NULL, SIOCDIFADDR_IN6, - (caddr_t)&ifra_32, ifp, kernproc)); -#else - return (in6_control(NULL, SIOCDIFADDR_IN6, - (caddr_t)&ifra, ifp, kernproc)); -#endif /* __LP64__ */ - } else { -#if defined(__LP64__) - return (in6_control(NULL, SIOCDIFADDR_IN6, - (caddr_t)&ifra, ifp, kernproc)); -#else - struct in6_aliasreq_64 ifra_64; - /* - * Use 64-bit structure for 64-bit process. - * SIOCDIFADDR_IN6 is encoded with in6_ifreq, - * so it stays the same since the size does - * not change. The data part of the ioctl, - * however, is of a different structure, i.e. - * in6_aliasreq. - */ - in6_aliasreq_32_to_64( - (struct in6_aliasreq_32 *)&ifra, &ifra_64); - return (in6_control(NULL, SIOCDIFADDR_IN6, - (caddr_t)&ifra_64, ifp, kernproc)); -#endif /* __LP64__ */ - } - /* NOTREACHED */ - } - } - } - - return (EOPNOTSUPP); /* just for safety */ -} - /* * Initialize an interface's internet6 address and routing table entry. */ @@ -3765,7 +3435,8 @@ in6if_do_dad( * for now, even when not marked as using the alternative * interface. This is for historical reasons. */ - if (ifp->if_eflags & (IFEF_IPV6_ND6ALT|IFEF_LOCALNET_PRIVATE)) + if (ifp->if_eflags & + (IFEF_IPV6_ND6ALT|IFEF_LOCALNET_PRIVATE|IFEF_DIRECTLINK)) return (0); switch (ifp->if_type) { @@ -3970,7 +3641,8 @@ in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) * are large enough to span 68 years. */ void -in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa) +in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa, + uint8_t *mac) { struct kev_msg ev_msg; struct kev_in6_data in6_event_data; @@ -4000,12 +3672,16 @@ in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa) IFA_UNLOCK(&ifa->ia_ifa); if (ifp != NULL) { - (void) strncpy(&in6_event_data.link_data.if_name[0], + (void) strlcpy(&in6_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in6_event_data.link_data.if_family = ifp->if_family; in6_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; } + if (mac != NULL) + memcpy(&in6_event_data.ia_mac, mac, + sizeof(in6_event_data.ia_mac)); + ev_msg.dv[0].data_ptr = &in6_event_data; ev_msg.dv[0].data_length = sizeof (in6_event_data); ev_msg.dv[1].data_length = 0; @@ -4170,10 +3846,29 @@ in6_ifaddr_trace(struct ifaddr *ifa, int refhold) static void in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia) { + struct ifnet* ifp = ia->ia_ifp; uint32_t flags = IN6_IFF_TENTATIVE; uint32_t optdad = nd6_optimistic_dad; - if (optdad && (ia->ia_ifp->if_eflags & IFEF_IPV6_ROUTER) == 0) { + if (optdad) { + if ((ifp->if_eflags & IFEF_IPV6_ROUTER) != 0) { + optdad = 0; + } else { + struct nd_ifinfo *ndi; + + lck_rw_lock_shared(nd_if_rwlock); + ndi = ND_IFINFO(ifp); + VERIFY (ndi != NULL && ndi->initialized); + lck_mtx_lock(&ndi->lock); + if ((ndi->flags & ND6_IFF_REPLICATED) != 0) { + optdad = 0; + } + lck_mtx_unlock(&ndi->lock); + lck_rw_done(nd_if_rwlock); + } + } + + if (optdad) { if ((optdad & ND6_OPTIMISTIC_DAD_LINKLOCAL) && IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) flags = IN6_IFF_OPTIMISTIC; @@ -4194,11 +3889,28 @@ in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia) } else { flags = IN6_IFF_OPTIMISTIC; } + } else if ((optdad & ND6_OPTIMISTIC_DAD_MANUAL) && + (ia->ia6_flags & IN6_IFF_OPTIMISTIC)) { + /* + * rdar://17483438 + * Bypass tentative for address assignments + * not covered above (e.g. manual) upon request + */ + if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr) && + !(ia->ia6_flags & IN6_IFF_AUTOCONF) && + !(ia->ia6_flags & IN6_IFF_DYNAMIC)) + flags = IN6_IFF_OPTIMISTIC; } } ia->ia6_flags &= ~(IN6_IFF_DUPLICATED | IN6_IFF_DADPROGRESS); ia->ia6_flags |= flags; + + nd6log2((LOG_DEBUG, "%s - %s ifp %s ia6_flags 0x%x\n", + __func__, + ip6_sprintf(&ia->ia_addr.sin6_addr), + if_name(ia->ia_ifp), + ia->ia6_flags)); } /* @@ -4302,7 +4014,7 @@ in6_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, /* source address and port */ sin6.sin6_port = in6p->in6p_lport; - bcopy(&in6p->in6p_laddr, &sin6.sin6_addr, sizeof (struct in6_addr)); + in6_recoverscope(&sin6, &in6p->in6p_laddr, NULL); if (*src_len == 0) { *src_len = sin6.sin6_len; } else { @@ -4317,7 +4029,7 @@ in6_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, /* destination address and port */ sin6.sin6_port = in6p->in6p_fport; - bcopy(&in6p->in6p_faddr, &sin6.sin6_addr, sizeof (struct in6_addr)); + in6_recoverscope(&sin6, &in6p->in6p_faddr, NULL); if (*dst_len == 0) { *dst_len = sin6.sin6_len; } else { @@ -4377,9 +4089,6 @@ in6ioctl_cassert(void) case 0: /* bsd/netinet6/in6_var.h */ - case SIOCGETSGCNT_IN6: - case SIOCGETMIFCNT_IN6_32: - case SIOCGETMIFCNT_IN6_64: case SIOCAADDRCTL_POLICY: case SIOCDADDRCTL_POLICY: case SIOCDRADD_IN6_32: diff --git a/bsd/netinet6/in6.h b/bsd/netinet6/in6.h index 863d53935..d8d71fe3e 100644 --- a/bsd/netinet6/in6.h +++ b/bsd/netinet6/in6.h @@ -457,12 +457,7 @@ extern const struct in6_addr in6addr_linklocal_allv2routers; #endif /* KERNEL */ #ifdef PRIVATE -#ifndef KERNEL -/* Private declaration for user-space (needed by ip6_mroute.h) */ -struct route_in6 { -#else /* KERNEL */ struct route_in6_old { -#endif /* KERNEL */ void *ro_rt; uint32_t ro_flags; struct sockaddr_in6 ro_dst; @@ -838,7 +833,7 @@ struct ip6_mtuinfo { { 0, 0 }, \ { 0, 0 }, \ { 0, 0 }, \ - { "pim6", CTLTYPE_NODE }, \ + { 0, 0 }, \ } /* * Redefinition of mbuf flags diff --git a/bsd/netinet6/in6_ifattach.c b/bsd/netinet6/in6_ifattach.c index 06e82afb5..e2a232a90 100644 --- a/bsd/netinet6/in6_ifattach.c +++ b/bsd/netinet6/in6_ifattach.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2013 Apple Inc. All rights reserved. + * Copyright (c) 2003-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -65,7 +65,7 @@ #include #include #include -#include +#include #include #include @@ -181,7 +181,7 @@ in6_generate_tmp_iid( /* XXX assumption on the size of IFID */ bcopy(seed1, &seed[8], 8); - if (0) { /* for debugging purposes only */ + if ((0)) { /* for debugging purposes only */ int i; printf("%s: new randomized ID from: ", __func__); @@ -226,7 +226,7 @@ in6_generate_tmp_iid( */ bcopy(&digest[8], seed0, 8); - if (0) { /* for debugging purposes only */ + if ((0)) { /* for debugging purposes only */ int i; printf("to: "); @@ -526,7 +526,7 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct in6_aliasreq *ifra) } } - in6_post_msg(ifp, KEV_INET6_NEW_LL_ADDR, ia); + in6_post_msg(ifp, KEV_INET6_NEW_LL_ADDR, ia, NULL); IFA_REMREF(&ia->ia_ifa); /* Drop use count held above during lookup/add */ @@ -551,7 +551,7 @@ in6_ifattach_loopback( * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ - strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); + strlcpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); ifra.ifra_prefixmask.sin6_len = sizeof (struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; @@ -576,9 +576,6 @@ in6_ifattach_loopback( /* we don't need to perform DAD on loopback interfaces. */ ifra.ifra_flags |= IN6_IFF_NODAD; - /* skip registration to the prefix list. XXX should be temporary. */ - ifra.ifra_flags |= IN6_IFF_NOPFX; - /* add the new interface address */ error = in6_update_ifa(ifp, &ifra, 0, &ia); if (error != 0) { @@ -623,7 +620,7 @@ in6_nigroup( if (p - name > sizeof (n) - 1) return (-1); /* label too long */ l = p - name; - strncpy(n, name, l); + strlcpy(n, name, l); n[(int)l] = '\0'; for (q = (u_char *) n; *q; q++) { if ('A' <= *q && *q <= 'Z') @@ -801,7 +798,7 @@ in6_ifattach_aliasreq(struct ifnet *ifp, struct ifnet *altifp, * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ - strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); + strlcpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); /* Initialize the IPv6 interface address in our in6_aliasreq block */ if ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0 && ifra0 != NULL) { @@ -909,7 +906,7 @@ in6_ifattach_llstartreq(struct ifnet *ifp, struct in6_llstartreq *llsr) } bzero(&ifra, sizeof (ifra)); - strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); + strlcpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_len = sizeof (struct sockaddr_in6); diff --git a/bsd/netinet6/in6_pcb.c b/bsd/netinet6/in6_pcb.c index e340b968a..63beb9a91 100644 --- a/bsd/netinet6/in6_pcb.c +++ b/bsd/netinet6/in6_pcb.c @@ -108,6 +108,7 @@ #include #include #include +#include #include #include @@ -136,6 +137,10 @@ #include #endif /* IPSEC */ +#if NECP +#include +#endif /* NECP */ + /* * in6_pcblookup_local_and_cleanup does everything * in6_pcblookup_local does but it checks for a socket @@ -184,6 +189,8 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); + struct ifnet *outif = NULL; + struct sockaddr_in6 sin6; int error; kauth_cred_t cred; @@ -195,10 +202,9 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) wild = 1; socket_unlock(so, 0); /* keep reference */ lck_rw_lock_exclusive(pcbinfo->ipi_lock); - if (nam != NULL) { - struct ifnet *outif = NULL; - struct sockaddr_in6 sin6; + bzero(&sin6, sizeof (sin6)); + if (nam != NULL) { if (nam->sa_len != sizeof (struct sockaddr_in6)) { lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); @@ -214,7 +220,6 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) } lport = SIN6(nam)->sin6_port; - bzero(&sin6, sizeof (sin6)); *(&sin6) = *SIN6(nam); /* KAME hack: embed scopeid */ @@ -359,13 +364,26 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) } } } + } + + socket_lock(so, 0); + /* check if the socket got bound when the lock was released */ + if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + lck_rw_done(pcbinfo->ipi_lock); + return (EINVAL); + } + + if (!IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) { inp->in6p_laddr = sin6.sin6_addr; inp->in6p_last_outifp = outif; } - socket_lock(so, 0); + if (lport == 0) { int e; if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, p, 1)) != 0) { + /* Undo any address bind from above. */ + inp->in6p_laddr = in6addr_any; + inp->in6p_last_outifp = NULL; lck_rw_done(pcbinfo->ipi_lock); return (e); } @@ -461,8 +479,7 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, if (addr6 == NULL) { if (outif != NULL && (*outif) != NULL && - (inp->inp_flags & INP_NO_IFT_CELLULAR) && - IFNET_IS_CELLULAR(*outif)) { + inp_restricted_send(inp, *outif)) { soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED)); error = EHOSTUNREACH; @@ -495,6 +512,7 @@ in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p) struct inpcb *pcb; int error = 0; struct ifnet *outif = NULL; + struct socket *so = inp->inp_socket; /* * Call inner routine, to assign local interface address. @@ -505,17 +523,16 @@ in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p) * whenever it's non-NULL. */ if ((error = in6_pcbladdr(inp, nam, &addr6, &outif)) != 0) { - if ((inp->inp_flags & INP_NO_IFT_CELLULAR) && outif != NULL && - IFNET_IS_CELLULAR(outif)) - soevent(inp->inp_socket, + if (outif != NULL && inp_restricted_send(inp, outif)) + soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED)); goto done; } - socket_unlock(inp->inp_socket, 0); + socket_unlock(so, 0); pcb = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? &addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL); - socket_lock(inp->inp_socket, 0); + socket_lock(so, 0); if (pcb != NULL) { in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); error = EADDRINUSE; @@ -533,13 +550,14 @@ in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p) } if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { /* lock inversion issue, mostly with udp multicast packets */ - socket_unlock(inp->inp_socket, 0); + socket_unlock(so, 0); lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); - socket_lock(inp->inp_socket, 0); + socket_lock(so, 0); } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; - + if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) + nstat_pcb_invalidate_cache(inp); in_pcbrehash(inp); lck_rw_done(inp->inp_pcbinfo->ipi_lock); @@ -561,6 +579,8 @@ in6_pcbdisconnect(struct inpcb *inp) lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); socket_lock(so, 0); } + if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) + nstat_pcb_cache(inp); bzero((caddr_t)&inp->in6p_faddr, sizeof (inp->in6p_faddr)); inp->inp_fport = 0; /* clear flowinfo - RFC 6437 */ @@ -587,13 +607,20 @@ in6_pcbdetach(struct inpcb *inp) inp, so, SOCK_PROTO(so)); /* NOTREACHED */ } - + #if IPSEC if (inp->in6p_sp != NULL) { (void) ipsec6_delete_pcbpolicy(inp); } #endif /* IPSEC */ + /* + * Let NetworkStatistics know this PCB is going away + * before we detach it. + */ + if (nstat_collect && + (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) + nstat_pcb_detach(inp); /* mark socket state as dead */ if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", @@ -714,8 +741,11 @@ in6_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) VERIFY(ss != NULL); bzero(ss, sizeof (*ss)); - if ((inp = sotoinpcb(so)) == NULL || - (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if ((inp = sotoinpcb(so)) == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); port = inp->inp_lport; @@ -754,8 +784,11 @@ in6_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) VERIFY(ss != NULL); bzero(ss, sizeof (*ss)); - if ((inp = sotoinpcb(so)) == NULL || - (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if ((inp = sotoinpcb(so)) == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); port = inp->inp_fport; @@ -1081,11 +1114,7 @@ in6_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, if (!(inp->inp_vflag & INP_IPV6)) continue; - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->in6p_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && @@ -1114,11 +1143,7 @@ in6_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, if (!(inp->inp_vflag & INP_IPV6)) continue; - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->in6p_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && @@ -1182,11 +1207,7 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, if (!(inp->inp_vflag & INP_IPV6)) continue; - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->in6p_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && @@ -1216,11 +1237,7 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, if (!(inp->inp_vflag & INP_IPV6)) continue; - if (inp_restricted(inp, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (inp->in6p_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(inp, ifp)) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && diff --git a/bsd/netinet6/in6_proto.c b/bsd/netinet6/in6_proto.c index a87c6e662..ee974e44e 100644 --- a/bsd/netinet6/in6_proto.c +++ b/bsd/netinet6/in6_proto.c @@ -124,12 +124,9 @@ #include #include #include -#include #include #include -#include - #if IPSEC #include #if INET6 @@ -319,18 +316,6 @@ struct ip6protosw inet6sw[] = { .pr_usrreqs = &rip6_usrreqs, .pr_unlock = rip_unlock, }, -#if MROUTING -{ - .pr_type = SOCK_RAW, - .pr_protocol = IPPROTO_PIM, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, - .pr_input = pim6_input, - .pr_output = rip6_pr_output, - .pr_ctloutput = rip6_ctloutput, - .pr_usrreqs = &rip6_usrreqs, - .pr_unlock = rip_unlock, -}, -#endif /* MROUTING */ /* raw wildcard */ { .pr_type = SOCK_RAW, @@ -494,6 +479,7 @@ u_int32_t rip6_recvspace = RIPV6RCVQ; int icmp6_rediraccept = 1; /* accept and process redirects */ int icmp6_redirtimeout = 10 * 60; /* 10 minutes */ int icmp6errppslim = 500; /* 500 packets per second */ +int icmp6rappslim = 10; /* 10 packets per second */ int icmp6_nodeinfo = 3; /* enable/disable NI response */ /* UDP on IP6 parameters */ @@ -579,7 +565,8 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_sendredirects, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_defhlim, 0, ""); -SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_STATS, stats, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, ip6_getstat, "S,ip6stat", ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, CTLFLAG_RD | CTLFLAG_LOCKED, @@ -624,10 +611,6 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_use_defzone, 0,""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_mcast_pmtu, 0, ""); -#if MROUTING -SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RD | CTLFLAG_LOCKED, - &mrt6stat, mrt6stat, ""); -#endif SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NEIGHBORGCTHRESH, neighborgcthresh, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_neighborgcthresh, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFPREFIXES, @@ -665,6 +648,8 @@ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp6_nodeinfo, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp6errppslim, 0, ""); +SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, + rappslimit, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp6rappslim, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_debug, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861, diff --git a/bsd/netinet6/in6_rmx.c b/bsd/netinet6/in6_rmx.c index e2fe40208..cdcbf6252 100644 --- a/bsd/netinet6/in6_rmx.c +++ b/bsd/netinet6/in6_rmx.c @@ -109,7 +109,7 @@ #include #include #include -#include +#include #include #include diff --git a/bsd/netinet6/in6_src.c b/bsd/netinet6/in6_src.c index 522f0d556..86b703bf5 100644 --- a/bsd/netinet6/in6_src.c +++ b/bsd/netinet6/in6_src.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -104,7 +104,7 @@ #include #include #include -#include +#include #include #include @@ -220,8 +220,13 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, if (inp != NULL) { mopts = inp->in6p_moptions; - if (inp->inp_flags & INP_NO_IFT_CELLULAR) + if (INP_NO_CELLULAR(inp)) ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR; + if (INP_NO_EXPENSIVE(inp)) + ip6oa.ip6oa_flags |= IP6OAF_NO_EXPENSIVE; + if (INP_AWDL_UNRESTRICTED(inp)) + ip6oa.ip6oa_flags |= IP6OAF_AWDL_UNRESTRICTED; + } else { mopts = NULL; } @@ -274,8 +279,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } IFA_LOCK_SPIN(&ia6->ia_ifa); if ((ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) || - ((ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) && - IFNET_IS_CELLULAR(ia6->ia_ifa.ifa_ifp))) { + (inp && inp_restricted_send(inp, ia6->ia_ifa.ifa_ifp))) { IFA_UNLOCK(&ia6->ia_ifa); IFA_REMREF(&ia6->ia_ifa); *errorp = EHOSTUNREACH; @@ -354,10 +358,10 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, /* Rule 1: Prefer same address */ if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) - BREAK(1); /* there should be no better candidate */ + BREAK(IP6S_SRCRULE_1); /* there should be no better candidate */ if (ia_best == NULL) - REPLACE(0); + REPLACE(IP6S_SRCRULE_0); /* Rule 2: Prefer appropriate scope */ if (dst_scope < 0) @@ -365,12 +369,12 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) - REPLACE(2); - NEXTSRC(2); + REPLACE(IP6S_SRCRULE_2); + NEXTSRC(IP6S_SRCRULE_2); } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) - NEXTSRC(2); - REPLACE(2); + NEXTSRC(IP6S_SRCRULE_2); + REPLACE(IP6S_SRCRULE_2); } /* @@ -379,10 +383,10 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, */ if (!IFA6_IS_DEPRECATED(ia_best, secs) && IFA6_IS_DEPRECATED(ia, secs)) - NEXTSRC(3); + NEXTSRC(IP6S_SRCRULE_3); if (IFA6_IS_DEPRECATED(ia_best, secs) && !IFA6_IS_DEPRECATED(ia, secs)) - REPLACE(3); + REPLACE(IP6S_SRCRULE_3); /* * RFC 4429 says that optimistic addresses are equivalent to @@ -390,10 +394,10 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, */ if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) == 0 && (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) - NEXTSRC(3); + NEXTSRC(IP6S_SRCRULE_3); if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) != 0 && (ia->ia6_flags & IN6_IFF_OPTIMISTIC) == 0) - REPLACE(3); + REPLACE(IP6S_SRCRULE_3); /* Rule 4: Prefer home addresses */ /* @@ -403,9 +407,53 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, /* Rule 5: Prefer outgoing interface */ if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) - NEXTSRC(5); + NEXTSRC(IP6S_SRCRULE_5); if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) - REPLACE(5); + REPLACE(IP6S_SRCRULE_5); + + /* Rule 5.5: Prefer addresses in a prefix advertised by the next hop. */ + if (ro != NULL && ro->ro_rt != NULL && ia_best->ia6_ndpr != NULL && + ia->ia6_ndpr != NULL) { + struct rtentry *rta, *rtb; + int op; + + NDPR_LOCK(ia_best->ia6_ndpr); + rta = ia_best->ia6_ndpr->ndpr_rt; + if (rta != NULL) + RT_ADDREF(rta); + NDPR_UNLOCK(ia_best->ia6_ndpr); + + NDPR_LOCK(ia->ia6_ndpr); + rtb = ia->ia6_ndpr->ndpr_rt; + if (rtb != NULL) + RT_ADDREF(rtb); + NDPR_UNLOCK(ia->ia6_ndpr); + + if (rta == NULL || rtb == NULL) + op = 0; + else if (rta == ro->ro_rt && rtb != ro->ro_rt) + op = 1; + else if (rta != ro->ro_rt && rtb == ro->ro_rt) + op = 2; + else + op = 0; + + if (rta != NULL) + RT_REMREF(rta); + if (rtb != NULL) + RT_REMREF(rtb); + + switch (op) { + case 1: + NEXTSRC(IP6S_SRCRULE_5_5); + break; + case 2: + REPLACE(IP6S_SRCRULE_5_5); + break; + default: + break; + } + } /* * Rule 6: Prefer matching label @@ -417,17 +465,17 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, new_policy = in6_addrsel_lookup_policy(&ia->ia_addr); if (dst_policy->label == best_policy->label && dst_policy->label != new_policy->label) - NEXTSRC(6); + NEXTSRC(IP6S_SRCRULE_6); if (dst_policy->label != best_policy->label && dst_policy->label == new_policy->label) - REPLACE(6); + REPLACE(IP6S_SRCRULE_6); } /* - * Rule 7: Prefer public addresses. + * Rule 7: Prefer temporary addresses. * We allow users to reverse the logic by configuring - * a sysctl variable, so that privacy conscious users can - * always prefer temporary addresses. + * a sysctl variable, so that transparency conscious users can + * always prefer stable addresses. * Don't use temporary addresses for local destinations or * for multicast addresses unless we were passed in an option. */ @@ -446,44 +494,37 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && (ia->ia6_flags & IN6_IFF_TEMPORARY)) { if (prefer_tempaddr) - REPLACE(7); + REPLACE(IP6S_SRCRULE_7); else - NEXTSRC(7); + NEXTSRC(IP6S_SRCRULE_7); } if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { if (prefer_tempaddr) - NEXTSRC(7); + NEXTSRC(IP6S_SRCRULE_7); else - REPLACE(7); + REPLACE(IP6S_SRCRULE_7); } /* - * Rule 8: prefer addresses on alive interfaces. + * Rule 7x: prefer addresses on alive interfaces. * This is a KAME specific rule. */ if ((ia_best->ia_ifp->if_flags & IFF_UP) && !(ia->ia_ifp->if_flags & IFF_UP)) - NEXTSRC(8); + NEXTSRC(IP6S_SRCRULE_7x); if (!(ia_best->ia_ifp->if_flags & IFF_UP) && (ia->ia_ifp->if_flags & IFF_UP)) - REPLACE(8); + REPLACE(IP6S_SRCRULE_7x); /* - * Rule 14: Use longest matching prefix. - * Note: in the address selection draft, this rule is - * documented as "Rule 8". However, since it is also - * documented that this rule can be overridden, we assign - * a large number so that it is easy to assign smaller numbers - * to more preferred rules. + * Rule 8: Use longest matching prefix. */ new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); if (best_matchlen < new_matchlen) - REPLACE(14); + REPLACE(IP6S_SRCRULE_8); if (new_matchlen < best_matchlen) - NEXTSRC(14); - - /* Rule 15 is reserved. */ + NEXTSRC(IP6S_SRCRULE_8); /* * Last resort: just keep the current candidate. @@ -521,9 +562,8 @@ out: lck_rw_done(&in6_ifaddr_rwlock); - if (ia_best != NULL && - (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) && - IFNET_IS_CELLULAR(ia_best->ia_ifa.ifa_ifp)) { + if (ia_best != NULL && inp && + inp_restricted_send(inp, ia_best->ia_ifa.ifa_ifp)) { IFA_REMREF(&ia_best->ia_ifa); ia_best = NULL; *errorp = EHOSTUNREACH; @@ -1015,21 +1055,30 @@ validateroute: } done: - if (error == 0) { - if (ip6oa != NULL && - (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) && - ((ifp != NULL && IFNET_IS_CELLULAR(ifp)) || - (route != NULL && route->ro_rt != NULL && - IFNET_IS_CELLULAR(route->ro_rt->rt_ifp)))) { - if (route != NULL && route->ro_rt != NULL) { - ROUTE_RELEASE(route); - route = NULL; - } - ifp = NULL; /* ditch ifp; keep ifp0 */ - error = EHOSTUNREACH; - ip6oa->ip6oa_retflags |= IP6OARF_IFDENIED; + /* + * Check for interface restrictions. + */ +#define CHECK_RESTRICTIONS(_ip6oa, _ifp) \ + ((((_ip6oa)->ip6oa_flags & IP6OAF_NO_CELLULAR) && \ + IFNET_IS_CELLULAR(_ifp)) || \ + (((_ip6oa)->ip6oa_flags & IP6OAF_NO_EXPENSIVE) && \ + IFNET_IS_EXPENSIVE(_ifp)) || \ + (!((_ip6oa)->ip6oa_flags & IP6OAF_AWDL_UNRESTRICTED) && \ + IFNET_IS_AWDL_RESTRICTED(_ifp))) + + if (error == 0 && ip6oa != NULL && + ((ifp && CHECK_RESTRICTIONS(ip6oa, ifp)) || + (route && route->ro_rt && + CHECK_RESTRICTIONS(ip6oa, route->ro_rt->rt_ifp)))) { + if (route != NULL && route->ro_rt != NULL) { + ROUTE_RELEASE(route); + route = NULL; } + ifp = NULL; /* ditch ifp; keep ifp0 */ + error = EHOSTUNREACH; + ip6oa->ip6oa_retflags |= IP6OARF_IFDENIED; } +#undef CHECK_RESTRICTIONS /* * If the interface is disabled for IPv6, then ENETDOWN error. @@ -1227,14 +1276,27 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct proc *p, lck_rw_lock_exclusive(pcbinfo->ipi_lock); socket_lock(inp->inp_socket, 0); } + + /* + * Check if a local port was assigned to the inp while + * this thread was waiting for the pcbinfo lock + */ + if (inp->inp_lport != 0) { + VERIFY(inp->inp_flags2 & INP2_INHASHLIST); + lck_rw_done(pcbinfo->ipi_lock); + + /* + * It is not an error if another thread allocated + * a port + */ + return (0); + } } /* XXX: this is redundant when called from in6_pcbbind */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = INPLOOKUP_WILDCARD; - inp->inp_flags |= INP_ANONPORT; - if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; @@ -1312,10 +1374,14 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct proc *p, } inp->inp_lport = lport; + inp->inp_flags |= INP_ANONPORT; + if (in_pcbinshash(inp, 1) != 0) { inp->in6p_laddr = in6addr_any; - inp->inp_lport = 0; inp->in6p_last_outifp = NULL; + + inp->inp_lport = 0; + inp->inp_flags &= ~INP_ANONPORT; if (!locked) lck_rw_done(pcbinfo->ipi_lock); return (EAGAIN); @@ -1351,11 +1417,10 @@ void addrsel_policy_init(void) { /* - * Default address selection policy based on RFC 3484 and - * draft-arifumi-6man-rfc3484-revise-03. + * Default address selection policy based on RFC 6724. */ static const struct in6_addrpolicy defaddrsel[] = { - /* localhost */ + /* Loopback -- prefix=::1/128, precedence=50, label=0 */ { .addr = { .sin6_family = AF_INET6, @@ -1367,90 +1432,91 @@ addrsel_policy_init(void) .sin6_addr = IN6MASK128, .sin6_len = sizeof (struct sockaddr_in6) }, - .preced = 60, + .preced = 50, .label = 0 }, - /* ULA */ + /* Unspecified -- prefix=::/0, precedence=40, label=1 */ { .addr = { .sin6_family = AF_INET6, - .sin6_addr = {{{ 0xfc }}}, + .sin6_addr = IN6ADDR_ANY_INIT, .sin6_len = sizeof (struct sockaddr_in6) }, .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK7, + .sin6_addr = IN6MASK0, .sin6_len = sizeof (struct sockaddr_in6) }, - .preced = 50, + .preced = 40, .label = 1 }, - /* any IPv6 src */ + /* IPv4 Mapped -- prefix=::ffff:0:0/96, precedence=35, label=4 */ { .addr = { .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_addr = IN6ADDR_V4MAPPED_INIT, .sin6_len = sizeof (struct sockaddr_in6) }, .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK0, + .sin6_addr = IN6MASK96, .sin6_len = sizeof (struct sockaddr_in6) }, - .preced = 40, - .label = 2 }, + .preced = 35, + .label = 4 + }, - /* any IPv4 src */ + /* 6to4 -- prefix=2002::/16, precedence=30, label=2 */ { .addr = { .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_V4MAPPED_INIT, + .sin6_addr = {{{ 0x20, 0x02 }}}, .sin6_len = sizeof (struct sockaddr_in6) }, .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK96, + .sin6_addr = IN6MASK16, .sin6_len = sizeof (struct sockaddr_in6) }, .preced = 30, - .label = 3 + .label = 2 }, - /* 6to4 */ + /* Teredo -- prefix=2001::/32, precedence=5, label=5 */ { .addr = { .sin6_family = AF_INET6, - .sin6_addr = {{{ 0x20, 0x02 }}}, + .sin6_addr = {{{ 0x20, 0x01 }}}, .sin6_len = sizeof (struct sockaddr_in6) }, .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK16, + .sin6_addr = IN6MASK32, .sin6_len = sizeof (struct sockaddr_in6) }, - .preced = 20, - .label = 4 + .preced = 5, + .label = 5 }, - /* Teredo */ + /* Unique Local (ULA) -- prefix=fc00::/7, precedence=3, label=13 */ { .addr = { .sin6_family = AF_INET6, - .sin6_addr = {{{ 0x20, 0x01 }}}, + .sin6_addr = {{{ 0xfc }}}, .sin6_len = sizeof (struct sockaddr_in6) }, .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK32, + .sin6_addr = IN6MASK7, .sin6_len = sizeof (struct sockaddr_in6) }, - .preced = 10, - .label = 5 + .preced = 3, + .label = 13 }, - /* v4 compat addresses */ + /* IPv4 Compatible -- prefix=::/96, precedence=1, label=3 */ { .addr = { .sin6_family = AF_INET6, @@ -1463,10 +1529,10 @@ addrsel_policy_init(void) .sin6_len = sizeof (struct sockaddr_in6) }, .preced = 1, - .label = 10 + .label = 3 }, - /* site-local (deprecated) */ + /* Site-local (deprecated) -- prefix=fec0::/10, precedence=1, label=11 */ { .addr = { .sin6_family = AF_INET6, @@ -1482,7 +1548,7 @@ addrsel_policy_init(void) .label = 11 }, - /* 6bone (deprecated) */ + /* 6bone (deprecated) -- prefix=3ffe::/16, precedence=1, label=12 */ { .addr = { .sin6_family = AF_INET6, @@ -1768,7 +1834,7 @@ in6_embedscope(struct in6_addr *in6, const struct sockaddr_in6 *sin6, scopeid = scope6_addr2default(in6); #endif - if (IN6_IS_SCOPE_LINKLOCAL(in6)) { + if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { struct in6_pktinfo *pi; struct ifnet *im6o_multicast_ifp = NULL; @@ -1853,7 +1919,7 @@ in6_recoverscope( */ sin6->sin6_scope_id = 0; - if (IN6_IS_SCOPE_LINKLOCAL(in6)) { + if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { /* * KAME assumption: link id == interface id */ diff --git a/bsd/netinet6/in6_var.h b/bsd/netinet6/in6_var.h index 80723feed..7157b4c04 100644 --- a/bsd/netinet6/in6_var.h +++ b/bsd/netinet6/in6_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -100,6 +100,7 @@ #include #endif /* BSD_KERNEL_PRIVATE */ #include +#include /* * pltime/vltime are just for future reference (required to implements 2 @@ -514,6 +515,7 @@ struct kev_in6_data { u_int32_t ia_plen; /* prefix length */ u_int32_t ia6_flags; /* address flags from in6_ifaddr */ struct kev_in6_addrlifetime ia_lifetime; /* address life info */ + uint8_t ia_mac[ETHER_ADDR_LEN]; }; /* @@ -530,7 +532,7 @@ struct kev_in6_data { #ifdef BSD_KERNEL_PRIVATE /* Utility function used inside netinet6 kernel code for generating events */ -void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *); +void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *, uint8_t *mac); #define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \ @@ -719,23 +721,24 @@ void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *); * translation between those and the publicly-defined ones below. */ #endif /* BSD_KERNEL_PRIVATE */ -#define IN6_IFF_ANYCAST 0x01 /* anycast address */ -#define IN6_IFF_TENTATIVE 0x02 /* tentative address */ -#define IN6_IFF_DUPLICATED 0x04 /* DAD detected duplicate */ -#define IN6_IFF_DETACHED 0x08 /* may be detached from the link */ -#define IN6_IFF_DEPRECATED 0x10 /* deprecated address */ +#define IN6_IFF_ANYCAST 0x0001 /* anycast address */ +#define IN6_IFF_TENTATIVE 0x0002 /* tentative address */ +#define IN6_IFF_DUPLICATED 0x0004 /* DAD detected duplicate */ +#define IN6_IFF_DETACHED 0x0008 /* may be detached from the link */ +#define IN6_IFF_DEPRECATED 0x0010 /* deprecated address */ /* don't perform DAD on this address (used only at first SIOC* call) */ -#define IN6_IFF_NODAD 0x20 +#define IN6_IFF_NODAD 0x0020 -#define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */ -#define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */ -#define IN6_IFF_DYNAMIC 0x100 /* assigned by DHCPv6 service */ -#define IN6_IFF_OPTIMISTIC 0x200 /* optimistic DAD, i.e. RFC 4429 */ -#define IN6_IFF_SECURED 0x400 /* cryptographically generated */ - -/* skip kernel prefix management. XXX: this should be temporary. */ -#define IN6_IFF_NOPFX 0x8000 +#define IN6_IFF_AUTOCONF 0x0040 /* autoconfigurable address. */ +#define IN6_IFF_TEMPORARY 0x0080 /* temporary (anonymous) address. */ +#define IN6_IFF_DYNAMIC 0x0100 /* assigned by DHCPv6 service */ +#define IN6_IFF_OPTIMISTIC 0x0200 /* optimistic DAD, i.e. RFC 4429 */ +#define IN6_IFF_SECURED 0x0400 /* cryptographically generated */ +#ifdef PRIVATE +#define IN6_IFF_SWIFTDAD 0x0800 /* DAD with no delay */ +#endif +#define IN6_IFF_NOPFX 0x8000 /* Depreciated. Don't use. */ /* Duplicate Address Detection [DAD] in progress. */ #define IN6_IFF_DADPROGRESS (IN6_IFF_TENTATIVE|IN6_IFF_OPTIMISTIC) diff --git a/bsd/netinet6/ip6_forward.c b/bsd/netinet6/ip6_forward.c index 6fdaa1069..4f3c61ee0 100644 --- a/bsd/netinet6/ip6_forward.c +++ b/bsd/netinet6/ip6_forward.c @@ -198,8 +198,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, #endif /*IPSEC*/ /* - * Do not forward packets to multicast destination (should be handled - * by ip6_mforward(). + * Do not forward packets to multicast destination. * Do not forward packets with unspecified source. It was discussed * in July 2000, on ipngwg mailing list. */ @@ -654,7 +653,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, * rthdr. (itojun) */ #if 1 - if (0) + if ((0)) #else if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0) #endif diff --git a/bsd/netinet6/ip6_fw.c b/bsd/netinet6/ip6_fw.c index 3f0e4b23f..dfa12ff11 100644 --- a/bsd/netinet6/ip6_fw.c +++ b/bsd/netinet6/ip6_fw.c @@ -571,6 +571,7 @@ ip6_fw_chk(struct ip6_hdr **pip6, u_int16_t ignport = ntohs(*cookie); #endif struct timeval timenow; + struct tcp_respond_args tra; getmicrotime(&timenow); @@ -872,8 +873,11 @@ got_match: flags = TH_RST|TH_ACK; } bcopy(&ti, ip6, sizeof(ti)); + bzero(&tra, sizeof(tra)); + tra.ifscope = IFSCOPE_NONE; + tra.awdl_unrestricted = 1; tcp_respond(NULL, ip6, (struct tcphdr *)(ip6 + 1), - *m, ack, seq, flags, IFSCOPE_NONE, 0); + *m, ack, seq, flags, &tra); *m = NULL; break; } diff --git a/bsd/netinet6/ip6_input.c b/bsd/netinet6/ip6_input.c index bef42ee38..266547020 100644 --- a/bsd/netinet6/ip6_input.c +++ b/bsd/netinet6/ip6_input.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2013 Apple Inc. All rights reserved. + * Copyright (c) 2003-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -206,9 +206,6 @@ extern void addrsel_policy_init(void); static void ip6_init_delayed(void); static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); -#if PULLDOWN_TEST -static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); -#endif #if NSTF extern void stfattach(void); @@ -563,7 +560,6 @@ ip6_input(struct mbuf *m) in6_ifstat_inc_na(inifp, ifs6_in_receive); ip6stat.ip6s_total++; -#ifndef PULLDOWN_TEST /* * L2 bridge code and some other code can return mbuf chain * that does not conform to KAME requirement. too bad. @@ -591,7 +587,6 @@ ip6_input(struct mbuf *m) m = n; } IP6_EXTHDR_CHECK(m, 0, sizeof (struct ip6_hdr), { goto done; }); -#endif if (m->m_len < sizeof (struct ip6_hdr)) { if ((m = m_pullup(m, sizeof (struct ip6_hdr))) == 0) { @@ -743,7 +738,7 @@ check_with_pf: ip6stat.ip6s_badscope++; goto bad; } - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) && + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst) && ip6->ip6_dst.s6_addr16[1]) { ip6stat.ip6s_badscope++; goto bad; @@ -754,13 +749,13 @@ check_with_pf: if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) ip6->ip6_src.s6_addr16[1] = htons(m->m_pkthdr.src_ifindex); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) ip6->ip6_dst.s6_addr16[1] = htons(m->m_pkthdr.dst_ifindex); } else { if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) ip6->ip6_src.s6_addr16[1] = htons(inifp->if_index); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) ip6->ip6_dst.s6_addr16[1] = htons(inifp->if_index); } @@ -781,11 +776,7 @@ check_with_pf: if (in6m != NULL) { IN6M_REMREF(in6m); ours = 1; - } else if (!nd6_prproxy -#if MROUTING - && !ip6_mrouter -#endif /* MROUTING */ - ) { + } else if (!nd6_prproxy) { ip6stat.ip6s_notmember++; ip6stat.ip6s_cantforward++; in6_ifstat_inc(inifp, ifs6_in_discard); @@ -972,17 +963,8 @@ hbhcheck: (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); goto done; } -#ifndef PULLDOWN_TEST /* ip6_hopopts_input() ensures that mbuf is contiguous */ hbh = (struct ip6_hbh *)(ip6 + 1); -#else - IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, - sizeof (struct ip6_hdr), sizeof (struct ip6_hbh)); - if (hbh == NULL) { - ip6stat.ip6s_tooshort++; - goto done; - } -#endif nxt = hbh->ip6h_nxt; /* @@ -1049,20 +1031,6 @@ hbhcheck: * Forward if desirable. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { - /* - * If we are acting as a multicast router, all - * incoming multicast packets are passed to the - * kernel-level multicast forwarding function. - * The packet is returned (relatively) intact; if - * ip6_mforward() returns a non-zero value, the packet - * must be discarded, else it may be accepted below. - */ -#if MROUTING - if (ip6_mrouter && ip6_mforward(ip6, inifp, m)) { - ip6stat.ip6s_cantforward++; - goto bad; - } -#endif /* MROUTING */ if (!ours && nd6_prproxy) { /* * If this isn't for us, this might be a Neighbor @@ -1316,28 +1284,12 @@ ip6_hopopts_input(uint32_t *plenp, uint32_t *rtalertp, struct mbuf **mp, u_int8_t *opt; /* validation of the length of the header */ -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof (*hbh), return (-1)); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); hbhlen = (hbh->ip6h_len + 1) << 3; IP6_EXTHDR_CHECK(m, off, hbhlen, return (-1)); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); -#else - IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof (struct ip6_hdr), - sizeof (struct ip6_hbh)); - if (hbh == NULL) { - ip6stat.ip6s_tooshort++; - return (-1); - } - hbhlen = (hbh->ip6h_len + 1) << 3; - IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof (struct ip6_hdr), - hbhlen); - if (hbh == NULL) { - ip6stat.ip6s_tooshort++; - return (-1); - } -#endif off += hbhlen; hbhlen -= sizeof (struct ip6_hbh); opt = (u_int8_t *)hbh + sizeof (struct ip6_hbh); @@ -1670,28 +1622,8 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { struct ip6_hbh *hbh; int hbhlen = 0; -#if PULLDOWN_TEST - struct mbuf *ext; -#endif - -#ifndef PULLDOWN_TEST hbh = (struct ip6_hbh *)(ip6 + 1); hbhlen = (hbh->ip6h_len + 1) << 3; -#else - ext = ip6_pullexthdr(m, sizeof (struct ip6_hdr), - ip6->ip6_nxt); - if (ext == NULL) { - ip6stat.ip6s_tooshort++; - return (0); - } - hbh = mtod(ext, struct ip6_hbh *); - hbhlen = (hbh->ip6h_len + 1) << 3; - if (hbhlen != ext->m_len) { - m_freem(ext); - ip6stat.ip6s_tooshort++; - return (0); - } -#endif /* * XXX: We copy the whole header even if a @@ -1704,9 +1636,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS), IPPROTO_IPV6, mp); -#if PULLDOWN_TEST - m_freem(ext); -#endif if (*mp == NULL) { goto no_mbufs; } @@ -1726,9 +1655,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) while (1) { /* is explicit loop prevention necessary? */ struct ip6_ext *ip6e = NULL; int elen; -#if PULLDOWN_TEST - struct mbuf *ext = NULL; -#endif /* * if it is not an extension header, don't try to @@ -1744,7 +1670,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) goto loopend; } -#ifndef PULLDOWN_TEST if (off + sizeof (*ip6e) > m->m_len) goto loopend; ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); @@ -1754,23 +1679,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) elen = (ip6e->ip6e_len + 1) << 3; if (off + elen > m->m_len) goto loopend; -#else - ext = ip6_pullexthdr(m, off, nxt); - if (ext == NULL) { - ip6stat.ip6s_tooshort++; - return (0); - } - ip6e = mtod(ext, struct ip6_ext *); - if (nxt == IPPROTO_AH) - elen = (ip6e->ip6e_len + 2) << 2; - else - elen = (ip6e->ip6e_len + 1) << 3; - if (elen != ext->m_len) { - m_freem(ext); - ip6stat.ip6s_tooshort++; - return (0); - } -#endif switch (nxt) { case IPPROTO_DSTOPTS: @@ -1781,9 +1689,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) IS2292(in6p, IPV6_2292DSTOPTS, IPV6_DSTOPTS), IPPROTO_IPV6, mp); if (*mp == NULL) { -#if PULLDOWN_TEST - m_freem(ext); -#endif goto no_mbufs; } break; @@ -1795,9 +1700,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR), IPPROTO_IPV6, mp); if (*mp == NULL) { -#if PULLDOWN_TEST - m_freem(ext); -#endif goto no_mbufs; } break; @@ -1812,9 +1714,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) * the code just in case (nxt overwritten or * other cases). */ -#if PULLDOWN_TEST - m_freem(ext); -#endif goto loopend; } @@ -1823,10 +1722,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) off += elen; nxt = ip6e->ip6e_nxt; ip6e = NULL; -#if PULLDOWN_TEST - m_freem(ext); - ext = NULL; -#endif } loopend: ; @@ -1876,62 +1771,6 @@ ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu) } } -#if PULLDOWN_TEST -/* - * pull single extension header from mbuf chain. returns single mbuf that - * contains the result, or NULL on error. - */ -static struct mbuf * -ip6_pullexthdr(m, off, nxt) - struct mbuf *m; - size_t off; - int nxt; -{ - struct ip6_ext ip6e; - size_t elen; - struct mbuf *n; - -#if DIAGNOSTIC - switch (nxt) { - case IPPROTO_DSTOPTS: - case IPPROTO_ROUTING: - case IPPROTO_HOPOPTS: - case IPPROTO_AH: /* is it possible? */ - break; - default: - printf("ip6_pullexthdr: invalid nxt=%d\n", nxt); - } -#endif - - m_copydata(m, off, sizeof (ip6e), (caddr_t)&ip6e); - if (nxt == IPPROTO_AH) - elen = (ip6e.ip6e_len + 2) << 2; - else - elen = (ip6e.ip6e_len + 1) << 3; - - MGET(n, M_DONTWAIT, MT_DATA); - if (n && elen >= MLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - m_free(n); - n = NULL; - } - } - if (!n) - return (NULL); - - n->m_len = 0; - if (elen >= M_TRAILINGSPACE(n)) { - m_free(n); - return (NULL); - } - - m_copydata(m, off, elen, mtod(n, caddr_t)); - n->m_len = elen; - return (n); -} -#endif - /* * Get pointer to the previous header followed by the header * currently processed. diff --git a/bsd/netinet6/ip6_mroute.c b/bsd/netinet6/ip6_mroute.c deleted file mode 100644 index 1870bdf28..000000000 --- a/bsd/netinet6/ip6_mroute.c +++ /dev/null @@ -1,1919 +0,0 @@ -/* - * Copyright (c) 2003-2013 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $FreeBSD: src/sys/netinet6/ip6_mroute.c,v 1.16.2.1 2002/12/18 21:39:40 suz Exp $ */ -/* $KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $ */ - -/* - * Copyright (C) 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ - -/* BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp */ - -/* - * IP multicast forwarding procedures - * - * Written by David Waitzman, BBN Labs, August 1988. - * Modified by Steve Deering, Stanford, February 1989. - * Modified by Mark J. Steiglitz, Stanford, May, 1991 - * Modified by Van Jacobson, LBL, January 1993 - * Modified by Ajit Thyagarajan, PARC, August 1993 - * Modified by Bill Fenenr, PARC, April 1994 - * - * MROUTING Revision: 3.5.1.2 + PIM-SMv2 (pimd) Support - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#if CONFIG_MACF_NET -#include -#endif /* MAC_NET */ - -#ifndef __APPLE__ -static MALLOC_DEFINE(M_MRTABLE, "mf6c", "multicast forwarding cache entry"); -#endif - -#define M_HASCL(m) ((m)->m_flags & M_EXT) - -static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *); -static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); - -static int set_pim6(int *); -static int socket_send(struct socket *, struct mbuf *, - struct sockaddr_in6 *); -static int register_send(struct ip6_hdr *, struct mif6 *, - struct mbuf *); - -/* - * Globals. All but ip6_mrouter, ip6_mrtproto and mrt6stat could be static, - * except for netstat or debugging purposes. - */ -struct socket *ip6_mrouter = NULL; -int ip6_mrouter_ver = 0; -int ip6_mrtproto = IPPROTO_PIM; /* for netstat only */ - -#if MROUTING - -struct mrt6stat mrt6stat; - -#define NO_RTE_FOUND 0x1 -#define RTE_FOUND 0x2 - -struct mf6c *mf6ctable[MF6CTBLSIZ]; -u_char n6expire[MF6CTBLSIZ]; -static struct mif6 mif6table[MAXMIFS]; -#if MRT6DEBUG -u_int mrt6debug = 0; /* debug level */ -#define DEBUG_MFC 0x02 -#define DEBUG_FORWARD 0x04 -#define DEBUG_EXPIRE 0x08 -#define DEBUG_XMIT 0x10 -#define DEBUG_REG 0x20 -#define DEBUG_PIM 0x40 -#endif - -static void expire_upcalls(void *); - -#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ -#define UPCALL_EXPIRE 6 /* number of timeouts */ - -#if INET -#if MROUTING -extern struct socket *ip_mrouter; -#endif -#endif - -/* - * 'Interfaces' associated with decapsulator (so we can tell - * packets that went through it from ones that get reflected - * by a broken gateway). These interfaces are never linked into - * the system ifnet list & no routes point to them. I.e., packets - * can't be sent this way. They only exist as a placeholder for - * multicast source verification. - */ -struct ifnet multicast_register_if; - -#define ENCAP_HOPS 64 - -/* - * Private variables. - */ -static mifi_t nummifs = 0; -static mifi_t reg_mif_num = (mifi_t)-1; - -static struct pim6stat pim6stat; -static int pim6; - -/* - * Hash function for a source, group entry - */ -#define MF6CHASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \ - (a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \ - (g).s6_addr32[0] ^ (g).s6_addr32[1] ^ \ - (g).s6_addr32[2] ^ (g).s6_addr32[3]) - -/* - * Find a route for a given origin IPv6 address and Multicast group address. - * Quality of service parameter to be added in the future!!! - */ - -#define MF6CFIND(o, g, rt) do { \ - struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \ - rt = NULL; \ - mrt6stat.mrt6s_mfc_lookups++; \ - while (_rt) { \ - if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \ - IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \ - (_rt->mf6c_stall == NULL)) { \ - rt = _rt; \ - break; \ - } \ - _rt = _rt->mf6c_next; \ - } \ - if (rt == NULL) { \ - mrt6stat.mrt6s_mfc_misses++; \ - } \ -} while (0) - -/* - * Macros to compute elapsed time efficiently - * Borrowed from Van Jacobson's scheduling code - */ -#define TV_DELTA(a, b, delta) do { \ - int xxs; \ - \ - delta = (a).tv_usec - (b).tv_usec; \ - if ((xxs = (a).tv_sec - (b).tv_sec)) { \ - switch (xxs) { \ - case 2: \ - delta += 1000000; \ - /* fall through */ \ - case 1: \ - delta += 1000000; \ - break; \ - default: \ - delta += (1000000 * xxs); \ - } \ - } \ -} while (0) - -#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ - (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) - -#if UPCALL_TIMING -#define UPCALL_MAX 50 -u_int32_t upcall_data[UPCALL_MAX + 1]; -static void collate(); -#endif /* UPCALL_TIMING */ - -static int get_sg_cnt(struct sioc_sg_req6 *); -static int get_mif6_cnt(void *, int); -static int ip6_mrouter_init(struct socket *, int, int); -static int add_m6if(struct mif6ctl *); -static int del_m6if(mifi_t *); -static int add_m6fc(struct mf6cctl *); -static int del_m6fc(struct mf6cctl *); - -/* - * Handle MRT setsockopt commands to modify the multicast routing tables. - */ -int -ip6_mrouter_set(so, sopt) - struct socket *so; - struct sockopt *sopt; -{ - int error = 0; - int optval; - struct mif6ctl mifc; - struct mf6cctl mfcc; - mifi_t mifi; - - if (so != ip6_mrouter && sopt->sopt_name != MRT6_INIT) - return (EACCES); - - switch (sopt->sopt_name) { - case MRT6_INIT: -#if MRT6_OINIT - case MRT6_OINIT: -#endif - error = sooptcopyin(sopt, &optval, sizeof(optval), - sizeof(optval)); - if (error) - break; - error = ip6_mrouter_init(so, optval, sopt->sopt_name); - break; - case MRT6_DONE: - error = ip6_mrouter_done(); - break; - case MRT6_ADD_MIF: - error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc)); - if (error) - break; - error = add_m6if(&mifc); - break; - case MRT6_ADD_MFC: - error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc)); - if (error) - break; - error = add_m6fc(&mfcc); - break; - case MRT6_DEL_MFC: - error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc)); - if (error) - break; - error = del_m6fc(&mfcc); - break; - case MRT6_DEL_MIF: - error = sooptcopyin(sopt, &mifi, sizeof(mifi), sizeof(mifi)); - if (error) - break; - error = del_m6if(&mifi); - break; - case MRT6_PIM: - error = sooptcopyin(sopt, &optval, sizeof(optval), - sizeof(optval)); - if (error) - break; - error = set_pim6(&optval); - break; - default: - error = EOPNOTSUPP; - break; - } - - return (error); -} - -/* - * Handle MRT getsockopt commands - */ -int -ip6_mrouter_get(so, sopt) - struct socket *so; - struct sockopt *sopt; -{ - int error = 0; - - if (so != ip6_mrouter) return EACCES; - - switch (sopt->sopt_name) { - case MRT6_PIM: - error = sooptcopyout(sopt, &pim6, sizeof(pim6)); - break; - } - return (error); -} - -/* - * Handle ioctl commands to obtain information from the cache - */ -int -mrt6_ioctl(u_long cmd, caddr_t data) -{ - int error = 0; - - switch (cmd) { - case SIOCGETSGCNT_IN6: { /* struct sioc_sg_req6 */ - struct sioc_sg_req6 req; - - bcopy(data, &req, sizeof (req)); - error = get_sg_cnt(®); - bcopy(&req, data, sizeof (req)); - break; - } - - case SIOCGETMIFCNT_IN6_32: /* struct sioc_mif_req6_32 */ - case SIOCGETMIFCNT_IN6_64: /* struct sioc_mif_req6_64 */ - return (get_mif6_cnt(data, cmd == SIOCGETMIFCNT_IN6_64)); - /* NOTREACHED */ - - default: - error = EINVAL; - break; - } - return (error); -} - -/* - * returns the packet, byte, rpf-failure count for the source group provided - */ -static int -get_sg_cnt(req) - struct sioc_sg_req6 *req; -{ - struct mf6c *rt; - - MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt); - if (rt != NULL) { - req->pktcnt = rt->mf6c_pkt_cnt; - req->bytecnt = rt->mf6c_byte_cnt; - req->wrong_if = rt->mf6c_wrong_if; - } else - return(ESRCH); -#if 0 - req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; -#endif - - return 0; -} - -/* - * returns the input and output packet and byte counts on the mif provided - */ -static int -get_mif6_cnt(void *data, int p64) -{ - if (p64) { - struct sioc_mif_req6_64 *req = data; - mifi_t mifi; - - bcopy(&req->mifi, &mifi, sizeof (mifi)); - if (mifi >= nummifs) - return (EINVAL); - - bcopy(&mif6table[mifi].m6_pkt_in, &req->icount, - sizeof (req->icount)); - bcopy(&mif6table[mifi].m6_pkt_out, &req->ocount, - sizeof (req->ocount)); - bcopy(&mif6table[mifi].m6_bytes_in, &req->ibytes, - sizeof (req->ibytes)); - bcopy(&mif6table[mifi].m6_bytes_out, &req->obytes, - sizeof (req->obytes)); - } else { - struct sioc_mif_req6_32 *req = data; - mifi_t mifi; - - bcopy(&req->mifi, &mifi, sizeof (mifi)); - if (mifi >= nummifs) - return (EINVAL); - - bcopy(&mif6table[mifi].m6_pkt_in, &req->icount, - sizeof (req->icount)); - bcopy(&mif6table[mifi].m6_pkt_out, &req->ocount, - sizeof (req->ocount)); - bcopy(&mif6table[mifi].m6_bytes_in, &req->ibytes, - sizeof (req->ibytes)); - bcopy(&mif6table[mifi].m6_bytes_out, &req->obytes, - sizeof (req->obytes)); - } - return (0); -} - -static int -set_pim6(i) - int *i; -{ - if ((*i != 1) && (*i != 0)) - return EINVAL; - - pim6 = *i; - - return 0; -} - -/* - * Enable multicast routing - */ -static int -ip6_mrouter_init(so, v, cmd) - struct socket *so; - int v; - int cmd; -{ -#if MRT6DEBUG - if (mrt6debug) - log(LOG_DEBUG, - "ip6_mrouter_init: so_type = %d, pr_protocol = %d\n", - so->so_type, so->so_proto->pr_protocol); -#endif - - if (so->so_type != SOCK_RAW || - so->so_proto->pr_protocol != IPPROTO_ICMPV6) - return EOPNOTSUPP; - - if (v != 1) - return (ENOPROTOOPT); - - if (ip6_mrouter != NULL) return EADDRINUSE; - - ip6_mrouter = so; - ip6_mrouter_ver = cmd; - - bzero((caddr_t)mf6ctable, sizeof(mf6ctable)); - bzero((caddr_t)n6expire, sizeof(n6expire)); - - pim6 = 0;/* used for stubbing out/in pim stuff */ - - timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); - -#if MRT6DEBUG - if (mrt6debug) - log(LOG_DEBUG, "ip6_mrouter_init\n"); -#endif - - return 0; -} - -/* - * Disable multicast routing - */ -int -ip6_mrouter_done() -{ - mifi_t mifi; - int i; - struct mf6c *rt; - struct rtdetq *rte; - - - /* - * For each phyint in use, disable promiscuous reception of all IPv6 - * multicasts. - */ -#if INET -#if MROUTING - /* - * If there is still IPv4 multicast routing daemon, - * we remain interfaces to receive all muliticasted packets. - * XXX: there may be an interface in which the IPv4 multicast - * daemon is not interested... - */ - if (!ip_mrouter) -#endif -#endif - { - for (mifi = 0; mifi < nummifs; mifi++) { - if (mif6table[mifi].m6_ifp && - !(mif6table[mifi].m6_flags & MIFF_REGISTER)) { -#ifdef __APPLE__ - if_allmulti(mif6table[mifi].m6_ifp, 0); -#else - { - struct ifnet *ifp; - struct in6_ifreq ifr; - - ifr.ifr_addr.sin6_family = AF_INET6; - ifr.ifr_addr.sin6_addr= in6addr_any; - ifp = mif6table[mifi].m6_ifp; - ifnet_ioctl(ifp, 0, SIOCDELMULTI, &ifr); - } -#endif - } - } - } - bzero((caddr_t)mif6table, sizeof(mif6table)); - nummifs = 0; - - pim6 = 0; /* used to stub out/in pim specific code */ - - untimeout(expire_upcalls, (caddr_t)NULL); - - /* - * Free all multicast forwarding cache entries. - *###LD 5/27 needs locking - */ - for (i = 0; i < MF6CTBLSIZ; i++) { - rt = mf6ctable[i]; - while (rt) { - struct mf6c *frt; - - for (rte = rt->mf6c_stall; rte != NULL; ) { - struct rtdetq *n = rte->next; - - m_free(rte->m); - FREE(rte, M_MRTABLE); - rte = n; - } - frt = rt; - rt = rt->mf6c_next; - FREE(frt, M_MRTABLE); - } - } - - bzero((caddr_t)mf6ctable, sizeof(mf6ctable)); - - /* - * Reset de-encapsulation cache - */ - reg_mif_num = -1; - - ip6_mrouter = NULL; - ip6_mrouter_ver = 0; - - -#if MRT6DEBUG - if (mrt6debug) - log(LOG_DEBUG, "ip6_mrouter_done\n"); -#endif - - return 0; -} - -static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 , - 0, 0, IN6ADDR_ANY_INIT, 0}; - -/* - * Add a mif to the mif table - */ -static int -add_m6if(mifcp) - struct mif6ctl *mifcp; -{ - struct mif6 *mifp; - struct ifnet *ifp; - int error; -#ifdef notyet - struct tbf *m_tbf = tbftable + mifcp->mif6c_mifi; -#endif - - if (mifcp->mif6c_mifi >= MAXMIFS) - return EINVAL; - mifp = mif6table + mifcp->mif6c_mifi; - if (mifp->m6_ifp) - return (EADDRINUSE); /* XXX: is it appropriate? */ - if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > if_index) - return (ENXIO); - - ifnet_head_lock_shared(); - if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > if_index) { - ifnet_head_done(); - return ENXIO; - } - ifp = ifindex2ifnet[mifcp->mif6c_pifi]; - ifnet_head_done(); - - if (ifp == NULL) { - return ENXIO; - } - if (mifcp->mif6c_flags & MIFF_REGISTER) { - if (reg_mif_num == (mifi_t)-1) { - multicast_register_if.if_name = "register_mif"; - multicast_register_if.if_flags |= IFF_LOOPBACK; - multicast_register_if.if_index = mifcp->mif6c_mifi; - reg_mif_num = mifcp->mif6c_mifi; - } - - ifp = &multicast_register_if; - - } /* if REGISTER */ - else { - /* Make sure the interface supports multicast */ - if ((ifp->if_flags & IFF_MULTICAST) == 0) - return EOPNOTSUPP; - - error = if_allmulti(ifp, 1); - if (error) - return error; - } - - mifp->m6_flags = mifcp->mif6c_flags; - mifp->m6_ifp = ifp; - - /* initialize per mif pkt counters */ - mifp->m6_pkt_in = 0; - mifp->m6_pkt_out = 0; - mifp->m6_bytes_in = 0; - mifp->m6_bytes_out = 0; - - /* Adjust nummifs up if the mifi is higher than nummifs */ - if (nummifs <= mifcp->mif6c_mifi) - nummifs = mifcp->mif6c_mifi + 1; - -#if MRT6DEBUG - if (mrt6debug) - log(LOG_DEBUG, - "add_mif #%d, phyint %s\n", - mifcp->mif6c_mifi, - if_name(ifp)); -#endif - - return 0; -} - -/* - * Delete a mif from the mif table - */ -static int -del_m6if(mifip) - mifi_t *mifip; -{ - struct mif6 *mifp = mif6table + *mifip; - mifi_t mifi; - struct ifnet *ifp; - - if (*mifip >= nummifs) - return EINVAL; - if (mifp->m6_ifp == NULL) - return EINVAL; - - - if (!(mifp->m6_flags & MIFF_REGISTER)) { - /* - * XXX: what if there is yet IPv4 multicast daemon - * using the interface? - */ - ifp = mifp->m6_ifp; - - if_allmulti(ifp, 0); - } - - bzero((caddr_t)mifp, sizeof(*mifp)); - - /* Adjust nummifs down */ - for (mifi = nummifs; mifi > 0; mifi--) - if (mif6table[mifi - 1].m6_ifp) - break; - nummifs = mifi; - - -#if MRT6DEBUG - if (mrt6debug) - log(LOG_DEBUG, "del_m6if %d, nummifs %d\n", *mifip, nummifs); -#endif - - return 0; -} - -/* - * Add an mfc entry - */ -static int -add_m6fc(mfccp) - struct mf6cctl *mfccp; -{ - struct mf6c *rt; - u_int32_t hash; - struct rtdetq *rte; - u_short nstl; - - MF6CFIND(mfccp->mf6cc_origin.sin6_addr, - mfccp->mf6cc_mcastgrp.sin6_addr, rt); - - /* If an entry already exists, just update the fields */ - if (rt) { -#if MRT6DEBUG - if (mrt6debug & DEBUG_MFC) - log(LOG_DEBUG, - "add_m6fc no upcall h %d o %s g %s p %x\n", - ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr), - ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr), - mfccp->mf6cc_parent); -#endif - - rt->mf6c_parent = mfccp->mf6cc_parent; - rt->mf6c_ifset = mfccp->mf6cc_ifset; - return 0; - } - - /* - * Find the entry for which the upcall was made and update - */ - hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr, - mfccp->mf6cc_mcastgrp.sin6_addr); - for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) { - if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr, - &mfccp->mf6cc_origin.sin6_addr) && - IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr, - &mfccp->mf6cc_mcastgrp.sin6_addr) && - (rt->mf6c_stall != NULL)) { - - if (nstl++) - log(LOG_ERR, - "add_m6fc: %s o %s g %s p %x dbx %p\n", - "multiple kernel entries", - ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr), - ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr), - mfccp->mf6cc_parent, rt->mf6c_stall); - -#if MRT6DEBUG - if (mrt6debug & DEBUG_MFC) - log(LOG_DEBUG, - "add_m6fc o %s g %s p %x dbg %x\n", - ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr), - ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr), - mfccp->mf6cc_parent, rt->mf6c_stall); -#endif - - rt->mf6c_origin = mfccp->mf6cc_origin; - rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp; - rt->mf6c_parent = mfccp->mf6cc_parent; - rt->mf6c_ifset = mfccp->mf6cc_ifset; - /* initialize pkt counters per src-grp */ - rt->mf6c_pkt_cnt = 0; - rt->mf6c_byte_cnt = 0; - rt->mf6c_wrong_if = 0; - - rt->mf6c_expire = 0; /* Don't clean this guy up */ - n6expire[hash]--; - - /* free packets Qed at the end of this entry */ - for (rte = rt->mf6c_stall; rte != NULL; ) { - struct rtdetq *n = rte->next; - ip6_mdq(rte->m, rte->ifp, rt); - m_freem(rte->m); -#if UPCALL_TIMING - collate(&(rte->t)); -#endif /* UPCALL_TIMING */ - FREE(rte, M_MRTABLE); - rte = n; - } - rt->mf6c_stall = NULL; - } - } - - /* - * It is possible that an entry is being inserted without an upcall - */ - if (nstl == 0) { -#if MRT6DEBUG - if (mrt6debug & DEBUG_MFC) - log(LOG_DEBUG,"add_mfc no upcall h %d o %s g %s p %x\n", - hash, - ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr), - ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr), - mfccp->mf6cc_parent); -#endif - - for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) { - - if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr, - &mfccp->mf6cc_origin.sin6_addr)&& - IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr, - &mfccp->mf6cc_mcastgrp.sin6_addr)) { - - rt->mf6c_origin = mfccp->mf6cc_origin; - rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp; - rt->mf6c_parent = mfccp->mf6cc_parent; - rt->mf6c_ifset = mfccp->mf6cc_ifset; - /* initialize pkt counters per src-grp */ - rt->mf6c_pkt_cnt = 0; - rt->mf6c_byte_cnt = 0; - rt->mf6c_wrong_if = 0; - - if (rt->mf6c_expire) - n6expire[hash]--; - rt->mf6c_expire = 0; - } - } - if (rt == NULL) { - /* no upcall, so make a new entry */ - rt = (struct mf6c *)_MALLOC(sizeof(*rt), M_MRTABLE, - M_NOWAIT); - if (rt == NULL) { - return ENOBUFS; - } - - /* insert new entry at head of hash chain */ - rt->mf6c_origin = mfccp->mf6cc_origin; - rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp; - rt->mf6c_parent = mfccp->mf6cc_parent; - rt->mf6c_ifset = mfccp->mf6cc_ifset; - /* initialize pkt counters per src-grp */ - rt->mf6c_pkt_cnt = 0; - rt->mf6c_byte_cnt = 0; - rt->mf6c_wrong_if = 0; - rt->mf6c_expire = 0; - rt->mf6c_stall = NULL; - - /* link into table */ - rt->mf6c_next = mf6ctable[hash]; - mf6ctable[hash] = rt; - } - } - return 0; -} - -#if UPCALL_TIMING -/* - * collect delay statistics on the upcalls - */ -static void -collate(t) - struct timeval *t; -{ - u_int32_t d; - struct timeval tp; - u_int32_t delta; - - GET_TIME(tp); - - if (TV_LT(*t, tp)) - { - TV_DELTA(tp, *t, delta); - - d = delta >> 10; - if (d > UPCALL_MAX) - d = UPCALL_MAX; - - ++upcall_data[d]; - } -} -#endif /* UPCALL_TIMING */ - -/* - * Delete an mfc entry - */ -static int -del_m6fc(mfccp) - struct mf6cctl *mfccp; -{ - struct sockaddr_in6 origin; - struct sockaddr_in6 mcastgrp; - struct mf6c *rt; - struct mf6c **nptr; - u_int32_t hash; - - origin = mfccp->mf6cc_origin; - mcastgrp = mfccp->mf6cc_mcastgrp; - hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr); - -#if MRT6DEBUG - if (mrt6debug & DEBUG_MFC) - log(LOG_DEBUG,"del_m6fc orig %s mcastgrp %s\n", - ip6_sprintf(&origin.sin6_addr), - ip6_sprintf(&mcastgrp.sin6_addr)); -#endif - - - nptr = &mf6ctable[hash]; - while ((rt = *nptr) != NULL) { - if (IN6_ARE_ADDR_EQUAL(&origin.sin6_addr, - &rt->mf6c_origin.sin6_addr) && - IN6_ARE_ADDR_EQUAL(&mcastgrp.sin6_addr, - &rt->mf6c_mcastgrp.sin6_addr) && - rt->mf6c_stall == NULL) - break; - - nptr = &rt->mf6c_next; - } - if (rt == NULL) { - return EADDRNOTAVAIL; - } - - *nptr = rt->mf6c_next; - FREE(rt, M_MRTABLE); - - - return 0; -} - -static int -socket_send(s, mm, src) - struct socket *s; - struct mbuf *mm; - struct sockaddr_in6 *src; -{ -//### LD 5/27/04 needs locking! -// - if (s) { - if (sbappendaddr(&s->so_rcv, - (struct sockaddr *)src, - mm, (struct mbuf *)0, NULL) != 0) { - sorwakeup(s); - return 0; - } - } - return -1; -} - -/* - * IPv6 multicast forwarding function. This function assumes that the packet - * pointed to by "ip6" has arrived on (or is about to be sent to) the interface - * pointed to by "ifp", and the packet is to be relayed to other networks - * that have members of the packet's destination IPv6 multicast group. - * - * The packet is returned unscathed to the caller, unless it is - * erroneous, in which case a non-zero return value tells the caller to - * discard it. - */ - -int -ip6_mforward(ip6, ifp, m) - struct ip6_hdr *ip6; - struct ifnet *ifp; - struct mbuf *m; -{ - struct mf6c *rt; - struct mif6 *mifp; - struct mbuf *mm; - mifi_t mifi; - uint64_t curtime = net_uptime(); - -#if MRT6DEBUG - if (mrt6debug & DEBUG_FORWARD) - log(LOG_DEBUG, "ip6_mforward: src %s, dst %s, ifindex %d\n", - ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), - ifp->if_index); -#endif - - /* - * Don't forward a packet with Hop limit of zero or one, - * or a packet destined to a local-only group. - */ - if (ip6->ip6_hlim <= 1 || IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst) || - IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) - return 0; - ip6->ip6_hlim--; - - /* - * Source address check: do not forward packets with unspecified - * source. It was discussed in July 2000, on ipngwg mailing list. - * This is rather more serious than unicast cases, because some - * MLD packets can be sent with the unspecified source address - * (although such packets must normally set 1 to the hop limit field). - */ - if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { - ip6stat.ip6s_cantforward++; - if (ip6_log_time + ip6_log_interval < curtime) { - ip6_log_time = curtime; - log(LOG_DEBUG, - "cannot forward " - "from %s to %s nxt %d received on %s\n", - ip6_sprintf(&ip6->ip6_src), - ip6_sprintf(&ip6->ip6_dst), - ip6->ip6_nxt, - if_name(m->m_pkthdr.rcvif)); - } - return 0; - } - - /* - * Determine forwarding mifs from the forwarding cache table - */ - MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt); - - /* Entry exists, so forward if necessary */ - if (rt) { - return (ip6_mdq(m, ifp, rt)); - } else { - /* - * If we don't have a route for packet's origin, - * Make a copy of the packet & - * send message to routing daemon - */ - - struct mbuf *mb0; - struct rtdetq *rte; - u_int32_t hash; -/* int i, npkts;*/ -#if UPCALL_TIMING - struct timeval tp; - - GET_TIME(tp); -#endif /* UPCALL_TIMING */ - - mrt6stat.mrt6s_no_route++; -#if MRT6DEBUG - if (mrt6debug & (DEBUG_FORWARD | DEBUG_MFC)) - log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n", - ip6_sprintf(&ip6->ip6_src), - ip6_sprintf(&ip6->ip6_dst)); -#endif - - /* - * Allocate mbufs early so that we don't do extra work if we - * are just going to fail anyway. - */ - rte = (struct rtdetq *)_MALLOC(sizeof(*rte), M_MRTABLE, - M_NOWAIT); - if (rte == NULL) { - return ENOBUFS; - } - mb0 = m_copy(m, 0, M_COPYALL); - /* - * Pullup packet header if needed before storing it, - * as other references may modify it in the meantime. - */ - if (mb0 && - (M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr))) - mb0 = m_pullup(mb0, sizeof(struct ip6_hdr)); - if (mb0 == NULL) { - FREE(rte, M_MRTABLE); - return ENOBUFS; - } - - /* is there an upcall waiting for this packet? */ - hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst); - for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) { - if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, - &rt->mf6c_origin.sin6_addr) && - IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, - &rt->mf6c_mcastgrp.sin6_addr) && - (rt->mf6c_stall != NULL)) - break; - } - - if (rt == NULL) { - struct mrt6msg *im; -#if MRT6_OINIT - struct omrt6msg *oim; -#endif - - /* no upcall, so make a new entry */ - rt = (struct mf6c *)_MALLOC(sizeof(*rt), M_MRTABLE, - M_NOWAIT); - if (rt == NULL) { - FREE(rte, M_MRTABLE); - m_freem(mb0); - return ENOBUFS; - } - /* - * Make a copy of the header to send to the user - * level process - */ - mm = m_copy(mb0, 0, sizeof(struct ip6_hdr)); - - if (mm == NULL) { - FREE(rte, M_MRTABLE); - m_freem(mb0); - FREE(rt, M_MRTABLE); - return ENOBUFS; - } - - /* - * Send message to routing daemon - */ - sin6.sin6_addr = ip6->ip6_src; - - im = NULL; -#if MRT6_OINIT - oim = NULL; -#endif - switch (ip6_mrouter_ver) { -#if MRT6_OINIT - case MRT6_OINIT: - oim = mtod(mm, struct omrt6msg *); - oim->im6_msgtype = MRT6MSG_NOCACHE; - oim->im6_mbz = 0; - break; -#endif - case MRT6_INIT: - im = mtod(mm, struct mrt6msg *); - im->im6_msgtype = MRT6MSG_NOCACHE; - im->im6_mbz = 0; - break; - default: - FREE(rte, M_MRTABLE); - m_freem(mb0); - FREE(rt, M_MRTABLE); - return EINVAL; - } - -#if MRT6DEBUG - if (mrt6debug & DEBUG_FORWARD) - log(LOG_DEBUG, - "getting the iif info in the kernel\n"); -#endif - - for (mifp = mif6table, mifi = 0; - mifi < nummifs && mifp->m6_ifp != ifp; - mifp++, mifi++) - ; - - switch (ip6_mrouter_ver) { -#if MRT6_OINIT - case MRT6_OINIT: - oim->im6_mif = mifi; - break; -#endif - case MRT6_INIT: - im->im6_mif = mifi; - break; - } - - if (socket_send(ip6_mrouter, mm, &sin6) < 0) { - log(LOG_WARNING, "ip6_mforward: ip6_mrouter " - "socket queue full\n"); - mrt6stat.mrt6s_upq_sockfull++; - FREE(rte, M_MRTABLE); - m_freem(mb0); - FREE(rt, M_MRTABLE); - return ENOBUFS; - } - - mrt6stat.mrt6s_upcalls++; - - /* insert new entry at head of hash chain */ - bzero(rt, sizeof(*rt)); - rt->mf6c_origin.sin6_family = AF_INET6; - rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6); - rt->mf6c_origin.sin6_addr = ip6->ip6_src; - rt->mf6c_mcastgrp.sin6_family = AF_INET6; - rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6); - rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst; - rt->mf6c_expire = UPCALL_EXPIRE; - n6expire[hash]++; - rt->mf6c_parent = MF6C_INCOMPLETE_PARENT; - - /* link into table */ - rt->mf6c_next = mf6ctable[hash]; - mf6ctable[hash] = rt; - /* Add this entry to the end of the queue */ - rt->mf6c_stall = rte; - } else { - /* determine if q has overflowed */ - struct rtdetq **p; - int npkts = 0; - - for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next) - if (++npkts > MAX_UPQ6) { - mrt6stat.mrt6s_upq_ovflw++; - FREE(rte, M_MRTABLE); - m_freem(mb0); - return 0; - } - - /* Add this entry to the end of the queue */ - *p = rte; - } - - rte->next = NULL; - rte->m = mb0; - rte->ifp = ifp; -#if UPCALL_TIMING - rte->t = tp; -#endif /* UPCALL_TIMING */ - - - return 0; - } -} - -/* - * Clean up cache entries if upcalls are not serviced - * Call from the Slow Timeout mechanism, every half second. - */ -static void -expire_upcalls( - __unused void *unused) -{ - struct rtdetq *rte; - struct mf6c *mfc, **nptr; - int i; - - for (i = 0; i < MF6CTBLSIZ; i++) { - if (n6expire[i] == 0) - continue; - nptr = &mf6ctable[i]; - while ((mfc = *nptr) != NULL) { - rte = mfc->mf6c_stall; - /* - * Skip real cache entries - * Make sure it wasn't marked to not expire (shouldn't happen) - * If it expires now - */ - if (rte != NULL && - mfc->mf6c_expire != 0 && - --mfc->mf6c_expire == 0) { -#if MRT6DEBUG - if (mrt6debug & DEBUG_EXPIRE) - log(LOG_DEBUG, "expire_upcalls: expiring (%s %s)\n", - ip6_sprintf(&mfc->mf6c_origin.sin6_addr), - ip6_sprintf(&mfc->mf6c_mcastgrp.sin6_addr)); -#endif - /* - * drop all the packets - * free the mbuf with the pkt, if, timing info - */ - do { - struct rtdetq *n = rte->next; - m_freem(rte->m); - FREE(rte, M_MRTABLE); - rte = n; - } while (rte != NULL); - mrt6stat.mrt6s_cache_cleanups++; - n6expire[i]--; - - *nptr = mfc->mf6c_next; - FREE(mfc, M_MRTABLE); - } else { - nptr = &mfc->mf6c_next; - } - } - } - - timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); -} - -/* - * Packet forwarding routine once entry in the cache is made - */ -static int -ip6_mdq(m, ifp, rt) - struct mbuf *m; - struct ifnet *ifp; - struct mf6c *rt; -{ - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - mifi_t mifi, iif; - struct mif6 *mifp; - int plen = m->m_pkthdr.len; - struct in6_addr src0, dst0; /* copies for local work */ - u_int32_t iszone, idzone, oszone, odzone; - int error = 0; - -/* - * Macro to send packet on mif. Since RSVP packets don't get counted on - * input, they shouldn't get counted on output, so statistics keeping is - * separate. - */ - -#define MC6_SEND(ip6, mifp, m) do { \ - if ((mifp)->m6_flags & MIFF_REGISTER) \ - register_send((ip6), (mifp), (m)); \ - else \ - phyint_send((ip6), (mifp), (m)); \ -} while (0) - - /* - * Don't forward if it didn't arrive from the parent mif - * for its origin. - */ - mifi = rt->mf6c_parent; - if ((mifi >= nummifs) || (mif6table[mifi].m6_ifp != ifp)) { - /* came in the wrong interface */ -#if MRT6DEBUG - if (mrt6debug & DEBUG_FORWARD) - log(LOG_DEBUG, - "wrong if: ifid %d mifi %d mififid %x\n", - ifp->if_index, mifi, - mif6table[mifi].m6_ifp->if_index); -#endif - mrt6stat.mrt6s_wrong_if++; - rt->mf6c_wrong_if++; - /* - * If we are doing PIM processing, and we are forwarding - * packets on this interface, send a message to the - * routing daemon. - */ - /* have to make sure this is a valid mif */ - if (mifi < nummifs && mif6table[mifi].m6_ifp) - if (pim6 && (m->m_flags & M_LOOP) == 0) { - /* - * Check the M_LOOP flag to avoid an - * unnecessary PIM assert. - * XXX: M_LOOP is an ad-hoc hack... - */ - static struct sockaddr_in6 addr = - { sizeof(addr), AF_INET6 , 0, 0, IN6ADDR_ANY_INIT, 0}; - - struct mbuf *mm; - struct mrt6msg *im; -#if MRT6_OINIT - struct omrt6msg *oim; -#endif - - mm = m_copy(m, 0, sizeof(struct ip6_hdr)); - if (mm && - (M_HASCL(mm) || - mm->m_len < sizeof(struct ip6_hdr))) - mm = m_pullup(mm, sizeof(struct ip6_hdr)); - if (mm == NULL) - return ENOBUFS; - -#if MRT6_OINIT - oim = NULL; -#endif - im = NULL; - switch (ip6_mrouter_ver) { -#if MRT6_OINIT - case MRT6_OINIT: - oim = mtod(mm, struct omrt6msg *); - oim->im6_msgtype = MRT6MSG_WRONGMIF; - oim->im6_mbz = 0; - break; -#endif - case MRT6_INIT: - im = mtod(mm, struct mrt6msg *); - im->im6_msgtype = MRT6MSG_WRONGMIF; - im->im6_mbz = 0; - break; - default: - m_freem(mm); - return EINVAL; - } - - for (mifp = mif6table, iif = 0; - iif < nummifs && mifp && - mifp->m6_ifp != ifp; - mifp++, iif++) - ; - - switch (ip6_mrouter_ver) { -#if MRT6_OINIT - case MRT6_OINIT: - oim->im6_mif = iif; - addr.sin6_addr = oim->im6_src; - break; -#endif - case MRT6_INIT: - im->im6_mif = iif; - addr.sin6_addr = im->im6_src; - break; - } - - mrt6stat.mrt6s_upcalls++; - - if (socket_send(ip6_mrouter, mm, &addr) < 0) { -#if MRT6DEBUG - if (mrt6debug) - log(LOG_WARNING, "mdq, ip6_mrouter socket queue full\n"); -#endif - ++mrt6stat.mrt6s_upq_sockfull; - return ENOBUFS; - } /* if socket Q full */ - } /* if PIM */ - return 0; - } /* if wrong iif */ - - /* If I sourced this packet, it counts as output, else it was input. */ - if (m->m_pkthdr.rcvif == NULL) { - /* XXX: is rcvif really NULL when output?? */ - mif6table[mifi].m6_pkt_out++; - mif6table[mifi].m6_bytes_out += plen; - } else { - mif6table[mifi].m6_pkt_in++; - mif6table[mifi].m6_bytes_in += plen; - } - rt->mf6c_pkt_cnt++; - rt->mf6c_byte_cnt += plen; - - /* - * For each mif, forward a copy of the packet if there are group - * members downstream on the interface. - */ - src0 = ip6->ip6_src; - dst0 = ip6->ip6_dst; - if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 || - (error = in6_setscope(&dst0, ifp, &idzone)) != 0) { - ip6stat.ip6s_badscope++; - return (error); - } - for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) { - if (IF_ISSET(mifi, &rt->mf6c_ifset)) { - /* - * check if the outgoing packet is going to break - * a scope boundary. - * XXX For packets through PIM register tunnel - * interface, we believe a routing daemon. - */ - if (!(mif6table[rt->mf6c_parent].m6_flags & - MIFF_REGISTER) && - !(mif6table[mifi].m6_flags & MIFF_REGISTER)) { - if (in6_setscope(&src0, mif6table[mifi].m6_ifp, - &oszone) || - in6_setscope(&dst0, mif6table[mifi].m6_ifp, - &odzone) || - iszone != oszone || - idzone != odzone) { - ip6stat.ip6s_badscope++; - continue; - } - } - - mifp->m6_pkt_out++; - mifp->m6_bytes_out += plen; - MC6_SEND(ip6, mifp, m); - } - } - return 0; -} - -static void -phyint_send(ip6, mifp, m) - struct ip6_hdr *ip6; - struct mif6 *mifp; - struct mbuf *m; -{ - struct mbuf *mb_copy; - struct ifnet *ifp = mifp->m6_ifp; - int error = 0; - static struct route_in6 ro; - struct in6_multi *in6m; - struct sockaddr_in6 *dst6; - - /* - * Make a new reference to the packet; make sure that - * the IPv6 header is actually copied, not just referenced, - * so that ip6_output() only scribbles on the copy. - */ - mb_copy = m_copy(m, 0, M_COPYALL); - if (mb_copy && - (M_HASCL(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr))) - mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr)); - if (mb_copy == NULL) { - return; - } - /* set MCAST flag to the outgoing packet */ - mb_copy->m_flags |= M_MCAST; - - /* - * If we sourced the packet, call ip6_output since we may devide - * the packet into fragments when the packet is too big for the - * outgoing interface. - * Otherwise, we can simply send the packet to the interface - * sending queue. - */ - if (m->m_pkthdr.rcvif == NULL) { - struct ip6_moptions *im6o; - - im6o = ip6_allocmoptions(M_DONTWAIT); - if (im6o == NULL) { - m_freem(mb_copy); - return; - } - - im6o->im6o_multicast_ifp = ifp; - /* XXX: ip6_output will override ip6->ip6_hlim */ - im6o->im6o_multicast_hlim = ip6->ip6_hlim; - im6o->im6o_multicast_loop = 1; - error = ip6_output(mb_copy, NULL, &ro, IPV6_FORWARDING, - im6o, NULL, NULL); - - IM6O_REMREF(im6o); -#if MRT6DEBUG - if (mrt6debug & DEBUG_XMIT) - log(LOG_DEBUG, "phyint_send on mif %d err %d\n", - mifp - mif6table, error); -#endif - return; - } - - /* - * If we belong to the destination multicast group - * on the outgoing interface, loop back a copy. - */ - dst6 = (struct sockaddr_in6 *)&ro.ro_dst; - in6_multihead_lock_shared(); - IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); - in6_multihead_lock_done(); - if (in6m != NULL) { - IN6M_REMREF(in6m); - dst6->sin6_len = sizeof(struct sockaddr_in6); - dst6->sin6_family = AF_INET6; - dst6->sin6_addr = ip6->ip6_dst; - ip6_mloopback(NULL, ifp, m, (struct sockaddr_in6 *)&ro.ro_dst, - -1, -1); - } - /* - * Put the packet into the sending queue of the outgoing interface - * if it would fit in the MTU of the interface. - */ - if (mb_copy->m_pkthdr.len <= ifp->if_mtu || ifp->if_mtu < IPV6_MMTU) { - dst6->sin6_len = sizeof(struct sockaddr_in6); - dst6->sin6_family = AF_INET6; - dst6->sin6_addr = ip6->ip6_dst; - /* - * We just call if_output instead of nd6_output here, since - * we need no ND for a multicast forwarded packet...right? - */ -#ifdef __APPLE__ - /* Make sure the HW checksum flags are cleaned before sending the packet */ - - mb_copy->m_pkthdr.rcvif = 0; - mb_copy->m_pkthdr.csum_data = 0; - mb_copy->m_pkthdr.csum_flags = 0; - - error = dlil_output(ifp, PF_INET6, mb_copy, - NULL, (struct sockaddr *)&ro.ro_dst, 0, NULL); -#else - error = (*ifp->if_output)(ifp, mb_copy, - (struct sockaddr *)&ro.ro_dst, - NULL); -#endif -#if MRT6DEBUG - if (mrt6debug & DEBUG_XMIT) - log(LOG_DEBUG, "phyint_send on mif %d err %d\n", - mifp - mif6table, error); -#endif - } else { - /* - * pMTU discovery is intentionally disabled by default, since - * various router may notify pMTU in multicast, which can be - * a DDoS to a router - */ - if (ip6_mcast_pmtu) - icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); -#if MRT6DEBUG - else { - if (mrt6debug & DEBUG_XMIT) { - log(LOG_DEBUG, - "phyint_send: packet too big on %s o %s " - "g %s size %d(discarded)\n", - if_name(ifp), - ip6_sprintf(&ip6->ip6_src), - ip6_sprintf(&ip6->ip6_dst), - mb_copy->m_pkthdr.len); - } - } -#endif /* MRT6DEBUG */ - m_freem(mb_copy); /* simply discard the packet */ - - } -} - -static int -register_send(ip6, mif, m) - struct ip6_hdr *ip6; - struct mif6 *mif; - struct mbuf *m; -{ - struct mbuf *mm; - int i, len = m->m_pkthdr.len; - static struct sockaddr_in6 addr = { sizeof(addr), AF_INET6 , - 0, 0, IN6ADDR_ANY_INIT, 0}; - struct mrt6msg *im6; - -#if MRT6DEBUG - if (mrt6debug) - log(LOG_DEBUG, "** IPv6 register_send **\n src %s dst %s\n", - ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst)); -#endif - ++pim6stat.pim6s_snd_registers; - - /* Make a copy of the packet to send to the user level process */ - MGETHDR(mm, M_DONTWAIT, MT_HEADER); - if (mm == NULL) - return ENOBUFS; -#ifdef notyet -#if CONFIG_MACF_NET - mac_create_mbuf_multicast_encap(m, mif->m6_ifp, mm); -#endif -#endif - mm->m_pkthdr.rcvif = NULL; - mm->m_data += max_linkhdr; - mm->m_len = sizeof(struct ip6_hdr); - - if ((mm->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { - m_freem(mm); - return ENOBUFS; - } - i = MHLEN - M_LEADINGSPACE(mm); - if (i > len) - i = len; - mm = m_pullup(mm, i); - if (mm == NULL){ - m_freem(mm); - return ENOBUFS; - } -/* TODO: check it! */ - mm->m_pkthdr.len = len + sizeof(struct ip6_hdr); - - /* - * Send message to routing daemon - */ - addr.sin6_addr = ip6->ip6_src; - - im6 = mtod(mm, struct mrt6msg *); - im6->im6_msgtype = MRT6MSG_WHOLEPKT; - im6->im6_mbz = 0; - - im6->im6_mif = mif - mif6table; - - /* iif info is not given for reg. encap.n */ - mrt6stat.mrt6s_upcalls++; - - if (socket_send(ip6_mrouter, mm, &addr) < 0) { -#if MRT6DEBUG - if (mrt6debug) - log(LOG_WARNING, - "register_send: ip6_mrouter socket queue full\n"); -#endif - ++mrt6stat.mrt6s_upq_sockfull; - return ENOBUFS; - } - return 0; -} - -/* - * PIM sparse mode hook - * Receives the pim control messages, and passes them up to the listening - * socket, using rip6_input. - * The only message processed is the REGISTER pim message; the pim header - * is stripped off, and the inner packet is passed to register_mforward. - */ -int -pim6_input(struct mbuf **mp, int *offp, int proto) -{ - struct pim *pim; /* pointer to a pim struct */ - struct ip6_hdr *ip6; - int pimlen; - struct mbuf *m = *mp; - int minlen; - int off = *offp; - - ++pim6stat.pim6s_rcv_total; - - /* Expect 32-bit aligned data pointer on strict-align platforms */ - MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); - - ip6 = mtod(m, struct ip6_hdr *); - pimlen = m->m_pkthdr.len - *offp; - - /* - * Validate lengths - */ - if (pimlen < PIM_MINLEN) { - ++pim6stat.pim6s_rcv_tooshort; -#if MRT6DEBUG - if (mrt6debug & DEBUG_PIM) - log(LOG_DEBUG,"pim6_input: PIM packet too short\n"); -#endif - m_freem(m); - return(IPPROTO_DONE); - } - - /* - * if the packet is at least as big as a REGISTER, go ahead - * and grab the PIM REGISTER header size, to avoid another - * possible m_pullup() later. - * - * PIM_MINLEN == pimhdr + u_int32 == 8 - * PIM6_REG_MINLEN == pimhdr + reghdr + eip6hdr == 4 + 4 + 40 - */ - minlen = (pimlen >= PIM6_REG_MINLEN) ? PIM6_REG_MINLEN : PIM_MINLEN; - - /* - * Make sure that the IP6 and PIM headers in contiguous memory, and - * possibly the PIM REGISTER header - */ -#ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, minlen, return IPPROTO_DONE); - /* adjust pointer */ - ip6 = mtod(m, struct ip6_hdr *); - - /* adjust mbuf to point to the PIM header */ - pim = (struct pim *)((caddr_t)ip6 + off); -#else - IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen); - if (pim == NULL) { - pim6stat.pim6s_rcv_tooshort++; - return IPPROTO_DONE; - } -#endif - -#define PIM6_CHECKSUM -#ifdef PIM6_CHECKSUM - { - int cksumlen; - - /* - * Validate checksum. - * If PIM REGISTER, exclude the data packet - */ - if (pim->pim_type == PIM_REGISTER) - cksumlen = PIM_MINLEN; - else - cksumlen = pimlen; - - if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) { - ++pim6stat.pim6s_rcv_badsum; -#if MRT6DEBUG - if (mrt6debug & DEBUG_PIM) - log(LOG_DEBUG, - "pim6_input: invalid checksum\n"); -#endif - m_freem(m); - return(IPPROTO_DONE); - } - } -#endif /* PIM_CHECKSUM */ - - /* PIM version check */ - if (pim->pim_ver != PIM_VERSION) { - ++pim6stat.pim6s_rcv_badversion; -#if MRT6DEBUG - log(LOG_ERR, - "pim6_input: incorrect version %d, expecting %d\n", - pim->pim_ver, PIM_VERSION); -#endif - m_freem(m); - return(IPPROTO_DONE); - } - - if (pim->pim_type == PIM_REGISTER) { - /* - * since this is a REGISTER, we'll make a copy of the register - * headers ip6+pim+u_int32_t+encap_ip6, to be passed up to the - * routing daemon. - */ - static struct sockaddr_in6 dst = { sizeof(dst), AF_INET6 , - 0, 0, IN6ADDR_ANY_INIT, 0 }; - - struct mbuf *mcp; - struct ip6_hdr *eip6; - u_int32_t *reghdr; - - ++pim6stat.pim6s_rcv_registers; - - if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) { -#if MRT6DEBUG - if (mrt6debug & DEBUG_PIM) - log(LOG_DEBUG, - "pim6_input: register mif not set: %d\n", - reg_mif_num); -#endif - m_freem(m); - return(IPPROTO_DONE); - } - - reghdr = (u_int32_t *)(pim + 1); - - if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) - goto pim6_input_to_daemon; - - /* - * Validate length - */ - if (pimlen < PIM6_REG_MINLEN) { - ++pim6stat.pim6s_rcv_tooshort; - ++pim6stat.pim6s_rcv_badregisters; -#if MRT6DEBUG - log(LOG_ERR, - "pim6_input: register packet size too " - "small %d from %s\n", - pimlen, ip6_sprintf(&ip6->ip6_src)); -#endif - m_freem(m); - return(IPPROTO_DONE); - } - - eip6 = (struct ip6_hdr *) (reghdr + 1); -#if MRT6DEBUG - if (mrt6debug & DEBUG_PIM) - log(LOG_DEBUG, - "pim6_input[register], eip6: %s -> %s, " - "eip6 plen %d\n", - ip6_sprintf(&eip6->ip6_src), - ip6_sprintf(&eip6->ip6_dst), - ntohs(eip6->ip6_plen)); -#endif - - /* verify the version number of the inner packet */ - if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { - ++pim6stat.pim6s_rcv_badregisters; -#if MRT6DEBUG - log(LOG_DEBUG, "pim6_input: invalid IP version (%d) " - "of the inner packet\n", - (eip6->ip6_vfc & IPV6_VERSION)); -#endif - m_freem(m); - return(IPPROTO_NONE); - } - - /* verify the inner packet is destined to a mcast group */ - if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) { - ++pim6stat.pim6s_rcv_badregisters; -#if MRT6DEBUG - if (mrt6debug & DEBUG_PIM) - log(LOG_DEBUG, - "pim6_input: inner packet of register " - "is not multicast %s\n", - ip6_sprintf(&eip6->ip6_dst)); -#endif - m_freem(m); - return(IPPROTO_DONE); - } - - /* - * make a copy of the whole header to pass to the daemon later. - */ - mcp = m_copy(m, 0, off + PIM6_REG_MINLEN); - if (mcp == NULL) { -#if MRT6DEBUG - log(LOG_ERR, - "pim6_input: pim register: " - "could not copy register head\n"); -#endif - m_freem(m); - return(IPPROTO_DONE); - } - - /* - * forward the inner ip6 packet; point m_data at the inner ip6. - */ - m_adj(m, off + PIM_MINLEN); -#if MRT6DEBUG - if (mrt6debug & DEBUG_PIM) { - log(LOG_DEBUG, - "pim6_input: forwarding decapsulated register: " - "src %s, dst %s, mif %d\n", - ip6_sprintf(&eip6->ip6_src), - ip6_sprintf(&eip6->ip6_dst), - reg_mif_num); - } -#endif - -#ifdef __APPLE__ - - if (lo_ifp) { - dlil_output(lo_ifp, PF_INET6, m, 0, (struct sockaddr *)&dst, 0, NULL); - } - else { - printf("Warning: pim6_input call to dlil_find_dltag failed!\n"); - m_freem(m); - } -#else - (void) if_simloop(mif6table[reg_mif_num].m6_ifp, m, - dst.sin6_family, NULL); -#endif - - /* prepare the register head to send to the mrouting daemon */ - m = mcp; - } - - /* - * Pass the PIM message up to the daemon; if it is a register message - * pass the 'head' only up to the daemon. This includes the - * encapsulator ip6 header, pim header, register header and the - * encapsulated ip6 header. - */ - pim6_input_to_daemon: - rip6_input(&m, offp); - return(IPPROTO_DONE); -} -#endif diff --git a/bsd/netinet6/ip6_mroute.h b/bsd/netinet6/ip6_mroute.h deleted file mode 100644 index abdf59fd9..000000000 --- a/bsd/netinet6/ip6_mroute.h +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Copyright (c) 2008-2013 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* $FreeBSD: src/sys/netinet6/ip6_mroute.h,v 1.2.2.2 2001/07/03 11:01:53 ume Exp $ */ -/* $KAME: ip6_mroute.h,v 1.17 2001/02/10 02:05:52 itojun Exp $ */ - -/* - * Copyright (C) 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* BSDI ip_mroute.h,v 2.5 1996/10/11 16:01:48 pjd Exp */ - -/* - * Definitions for IP multicast forwarding. - * - * Written by David Waitzman, BBN Labs, August 1988. - * Modified by Steve Deering, Stanford, February 1989. - * Modified by Ajit Thyagarajan, PARC, August 1993. - * Modified by Ajit Thyagarajan, PARC, August 1994. - * Modified by Ahmed Helmy, USC, September 1996. - * - * MROUTING Revision: 1.2 - */ - -#ifndef _NETINET6_IP6_MROUTE_H_ -#define _NETINET6_IP6_MROUTE_H_ -#include - -/* - * Multicast Routing set/getsockopt commands. - */ -#ifdef XNU_KERNEL_PRIVATE -#define MRT6_OINIT 100 /* initialize forwarder (omrt6msg) */ -#endif /* XNU_KERNEL_PRIVATE */ -#define MRT6_DONE 101 /* shut down forwarder */ -#define MRT6_ADD_MIF 102 /* add multicast interface */ -#define MRT6_DEL_MIF 103 /* delete multicast interface */ -#define MRT6_ADD_MFC 104 /* insert forwarding cache entry */ -#define MRT6_DEL_MFC 105 /* delete forwarding cache entry */ -#define MRT6_PIM 107 /* enable pim code */ -#define MRT6_INIT 108 /* initialize forwarder (mrt6msg) */ - -#ifdef __APPLE__ -#define GET_TIME(t) getmicrotime(&t) -#endif - -/* - * Types and macros for handling bitmaps with one bit per multicast interface. - */ -typedef u_short mifi_t; /* type of a mif index */ -#define MAXMIFS 64 - -#ifndef IF_SETSIZE -#define IF_SETSIZE 256 -#endif - -typedef u_int32_t if_mask; -#define NIFBITS (sizeof(if_mask) * NBBY) /* bits per mask */ - -#ifndef howmany -#define howmany(x, y) ((((x) % (y)) == 0) ? ((x) / (y)) : (((x) / (y)) + 1)) -#endif - -typedef struct if_set { - if_mask ifs_bits[howmany(IF_SETSIZE, NIFBITS)]; -} if_set; - -#define IF_SET(n, p) ((p)->ifs_bits[(n)/NIFBITS] |= (1 << ((n) % NIFBITS))) -#define IF_CLR(n, p) ((p)->ifs_bits[(n)/NIFBITS] &= ~(1 << ((n) % NIFBITS))) -#define IF_ISSET(n, p) ((p)->ifs_bits[(n)/NIFBITS] & (1 << ((n) % NIFBITS))) -#define IF_COPY(f, t) bcopy(f, t, sizeof(*(f))) -#define IF_ZERO(p) bzero(p, sizeof(*(p))) - -/* - * Argument structure for MRT6_ADD_IF. - */ -struct mif6ctl { - mifi_t mif6c_mifi; /* the index of the mif to be added */ - u_char mif6c_flags; /* MIFF_ flags defined below */ - u_short mif6c_pifi; /* the index of the physical IF */ -#ifdef notyet - u_int mif6c_rate_limit; /* max rate */ -#endif -}; - -#define MIFF_REGISTER 0x1 /* mif represents a register end-point */ - -/* - * Argument structure for MRT6_ADD_MFC and MRT6_DEL_MFC - */ -struct mf6cctl { - struct sockaddr_in6 mf6cc_origin; /* IPv6 origin of mcasts */ - struct sockaddr_in6 mf6cc_mcastgrp; /* multicast group associated */ - mifi_t mf6cc_parent; /* incoming ifindex */ - struct if_set mf6cc_ifset; /* set of forwarding ifs */ -}; - -/* - * The kernel's multicast routing statistics. - */ -struct mrt6stat { - u_quad_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */ - u_quad_t mrt6s_mfc_misses; /* # forw. cache hash table misses */ - u_quad_t mrt6s_upcalls; /* # calls to mrouted */ - u_quad_t mrt6s_no_route; /* no route for packet's origin */ - u_quad_t mrt6s_bad_tunnel; /* malformed tunnel options */ - u_quad_t mrt6s_cant_tunnel; /* no room for tunnel options */ - u_quad_t mrt6s_wrong_if; /* arrived on wrong interface */ - u_quad_t mrt6s_upq_ovflw; /* upcall Q overflow */ - u_quad_t mrt6s_cache_cleanups; /* # entries with no upcalls */ - u_quad_t mrt6s_drop_sel; /* pkts dropped selectively */ - u_quad_t mrt6s_q_overflow; /* pkts dropped - Q overflow */ - u_quad_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */ - u_quad_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */ -}; - -#ifdef XNU_KERNEL_PRIVATE -#if MRT6_OINIT -/* - * Struct used to communicate from kernel to multicast router - * note the convenient similarity to an IPv6 header. - * XXX old version, superseded by mrt6msg. - */ -struct omrt6msg { - u_int32_t unused1; - u_char im6_msgtype; /* what type of message */ -#if 0 -#define MRT6MSG_NOCACHE 1 -#define MRT6MSG_WRONGMIF 2 -#define MRT6MSG_WHOLEPKT 3 /* used for user level encap*/ -#endif - u_char im6_mbz; /* must be zero */ - u_char im6_mif; /* mif rec'd on */ - u_char unused2; - struct in6_addr im6_src, im6_dst; -}; -#endif -#endif /* XNU_KERNEL_PRIVATE */ - -/* - * Structure used to communicate from kernel to multicast router. - * We'll overlay the structure onto an MLD header (not an IPv6 header - * like igmpmsg{} used for IPv4 implementation). This is because this - * structure will be passed via an IPv6 raw socket, on which an application - * will only receive the payload i.e. the data after the IPv6 header and all - * the extension headers. (see Section 3 of draft-ietf-ipngwg-2292bis-01) - */ -struct mrt6msg { -#define MRT6MSG_NOCACHE 1 -#define MRT6MSG_WRONGMIF 2 -#define MRT6MSG_WHOLEPKT 3 /* used for user level encap*/ - u_char im6_mbz; /* must be zero */ - u_char im6_msgtype; /* what type of message */ - u_int16_t im6_mif; /* mif rec'd on */ - u_int32_t im6_pad; /* padding for 64bit arch */ - struct in6_addr im6_src, im6_dst; -}; - -/* - * Argument structure used by multicast routing daemon to get src-grp - * packet counts - */ -struct sioc_sg_req6 { - struct sockaddr_in6 src; - struct sockaddr_in6 grp; - u_quad_t pktcnt; - u_quad_t bytecnt; - u_quad_t wrong_if; -}; - -/* - * Argument structure used by mrouted to get mif pkt counts - */ -struct sioc_mif_req6 { - mifi_t mifi; /* mif number */ - u_quad_t icount; /* Input packet count on mif */ - u_quad_t ocount; /* Output packet count on mif */ - u_quad_t ibytes; /* Input byte count on mif */ - u_quad_t obytes; /* Output byte count on mif */ -}; - -#if defined(XNU_KERNEL_PRIVATE) -struct sioc_mif_req6_32 { - mifi_t mifi; - u_quad_t icount; - u_quad_t ocount; - u_quad_t ibytes; - u_quad_t obytes; -} __attribute__((aligned(4), packed)); - -struct sioc_mif_req6_64 { - mifi_t mifi; - u_quad_t icount __attribute__((aligned(8))); - u_quad_t ocount; - u_quad_t ibytes; - u_quad_t obytes; -} __attribute__((aligned(8))); -#endif /* XNU_KERNEL_PRIVATE */ - -#ifdef PRIVATE -#ifndef KERNEL -/* - * The kernel's multicast-interface structure. - * - * XXX: This is unused and is currently exposed for netstat. - */ -struct mif6 { - u_char m6_flags; /* MIFF_ flags defined above */ - u_int m6_rate_limit; /* max rate */ -#ifdef notyet - struct tbf *m6_tbf; /* token bucket structure at intf. */ -#endif - struct in6_addr m6_lcl_addr; /* local interface address */ - struct ifnet *m6_ifp; /* pointer to interface */ - u_quad_t m6_pkt_in; /* # pkts in on interface */ - u_quad_t m6_pkt_out; /* # pkts out on interface */ - u_quad_t m6_bytes_in; /* # bytes in on interface */ - u_quad_t m6_bytes_out; /* # bytes out on interface */ - struct route_in6 m6_route;/* cached route if this is a tunnel */ -#ifdef notyet - u_int m6_rsvp_on; /* RSVP listening on this vif */ - struct socket *m6_rsvpd; /* RSVP daemon socket */ -#endif -}; - -/* - * The kernel's multicast forwarding cache entry structure - * - * XXX: This is unused and is currently exposed for netstat. - */ -struct mf6c { - struct sockaddr_in6 mf6c_origin; /* IPv6 origin of mcasts */ - struct sockaddr_in6 mf6c_mcastgrp; /* multicast group associated*/ - mifi_t mf6c_parent; /* incoming IF */ - struct if_set mf6c_ifset; /* set of outgoing IFs */ - - u_quad_t mf6c_pkt_cnt; /* pkt count for src-grp */ - u_quad_t mf6c_byte_cnt; /* byte count for src-grp */ - u_quad_t mf6c_wrong_if; /* wrong if for src-grp */ - int mf6c_expire; /* time to clean entry up */ - struct timeval mf6c_last_assert; /* last time I sent an assert*/ - struct rtdetq *mf6c_stall; /* pkts waiting for route */ - struct mf6c *mf6c_next; /* hash table linkage */ -}; -#endif /* !KERNEL */ - -#define MF6C_INCOMPLETE_PARENT ((mifi_t)-1) - -#define MF6CTBLSIZ 256 -#if (MF6CTBLSIZ & (MF6CTBLSIZ - 1)) == 0 /* from sys:route.h */ -#define MF6CHASHMOD(h) ((h) & (MF6CTBLSIZ - 1)) -#else -#define MF6CHASHMOD(h) ((h) % MF6CTBLSIZ) -#endif - -#define MAX_UPQ6 4 /* max. no of pkts in upcall Q */ - -#ifdef BSD_KERNEL_PRIVATE -/* - * Argument structure used for pkt info. while upcall is made - */ -#ifndef _NETINET_IP_MROUTE_H_ -struct rtdetq { /* XXX: rtdetq is also defined in ip_mroute.h */ - struct mbuf *m; /* A copy of the packet */ - struct ifnet *ifp; /* Interface pkt came in on */ -#if UPCALL_TIMING - struct timeval t; /* Timestamp */ -#endif /* UPCALL_TIMING */ - struct rtdetq *next; -}; -#endif /* _NETINET_IP_MROUTE_H_ */ - -extern struct mrt6stat mrt6stat; - -#if MROUTING -extern int ip6_mrouter_set(struct socket *, struct sockopt *); -extern int ip6_mrouter_get(struct socket *, struct sockopt *); -extern int ip6_mrouter_done(void); -extern int mrt6_ioctl(u_long, caddr_t); -#endif /* MROUTING */ -#endif /* BSD_KERNEL_PRIVATE */ -#endif /* PRIVATE */ -#endif /* !_NETINET6_IP6_MROUTE_H_ */ diff --git a/bsd/netinet6/ip6_output.c b/bsd/netinet6/ip6_output.c index 1dc5dec38..812bf2b3a 100644 --- a/bsd/netinet6/ip6_output.c +++ b/bsd/netinet6/ip6_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -140,6 +140,10 @@ extern int ipsec_bypass; #endif /* IPSEC */ +#if NECP +#include +#endif /* NECP */ + #if CONFIG_MACF_NET #include #endif /* CONFIG_MACF_NET */ @@ -294,6 +298,11 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, struct route_in6 *ipsec_saved_route = NULL; boolean_t needipsectun = FALSE; #endif /* IPSEC */ +#if NECP + necp_kernel_policy_result necp_result = 0; + necp_kernel_policy_result_parameter necp_result_parameter; + necp_kernel_policy_id necp_matched_policy_id = 0; +#endif /* NECP */ struct { struct ipf_pktopts ipf_pktopts; struct ip6_exthdrs exthdrs; @@ -301,6 +310,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, #if IPSEC struct ipsec_output_state ipsec_state; #endif /* IPSEC */ +#if NECP + struct route_in6 necp_route; +#endif /* NECP */ #if DUMMYNET struct route_in6 saved_route; struct route_in6 saved_ro_pmtu; @@ -311,6 +323,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, #define exthdrs ip6obz.exthdrs #define ip6route ip6obz.ip6route #define ipsec_state ip6obz.ipsec_state +#define necp_route ip6obz.necp_route #define saved_route ip6obz.saved_route #define saved_ro_pmtu ip6obz.saved_ro_pmtu #define args ip6obz.args @@ -378,27 +391,27 @@ tags_done: m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO); #if IPSEC - /* for AH processing. stupid to have "socket" variable in IP layer... */ if (ipsec_bypass == 0) { so = ipsec_getsocket(m); - (void) ipsec_setsocket(m, NULL); - + if (so != NULL) { + (void) ipsec_setsocket(m, NULL); + } /* If packet is bound to an interface, check bound policies */ if ((flags & IPV6_OUTARGS) && - (ip6oa->ip6oa_flags & IPOAF_BOUND_IF) && - ip6oa->ip6oa_boundif != IFSCOPE_NONE) { + (ip6oa->ip6oa_flags & IPOAF_BOUND_IF) && + ip6oa->ip6oa_boundif != IFSCOPE_NONE) { /* ip6obf.noipsec is a bitfield, use temp integer */ int noipsec = 0; if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND, - flags, ip6oa, &noipsec, &sp) != 0) + flags, ip6oa, &noipsec, &sp) != 0) goto bad; ip6obf.noipsec = (noipsec != 0); } } #endif /* IPSEC */ - + ip6 = mtod(m, struct ip6_hdr *); nxt0 = ip6->ip6_nxt; finaldst = ip6->ip6_dst; @@ -432,10 +445,11 @@ tags_done: } } - if ((flags & IPV6_OUTARGS) && (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR)) - ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR; - if (flags & IPV6_OUTARGS) { + if (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) + ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR; + if (ip6oa->ip6oa_flags & IP6OAF_NO_EXPENSIVE) + ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE; adv = &ip6oa->ip6oa_flowadv; adv->code = FADV_SUCCESS; ip6oa->ip6oa_retflags = 0; @@ -484,19 +498,63 @@ tags_done: #undef MAKE_EXTHDR +#if NECP + necp_matched_policy_id = necp_ip6_output_find_policy_match (m, flags, (flags & IPV6_OUTARGS) ? ip6oa : NULL, + &necp_result, &necp_result_parameter); + if (necp_matched_policy_id) { + necp_mark_packet_from_ip(m, necp_matched_policy_id); + switch (necp_result) { + case NECP_KERNEL_POLICY_RESULT_PASS: + goto skip_ipsec; + case NECP_KERNEL_POLICY_RESULT_DROP: + case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT: + /* Flow divert packets should be blocked at the IP layer */ + error = EHOSTUNREACH; + goto bad; + case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: { + /* Verify that the packet is being routed to the tunnel */ + struct ifnet *policy_ifp = necp_get_ifnet_from_result_parameter(&necp_result_parameter); + if (policy_ifp == ifp) { + goto skip_ipsec; + } else { + if (necp_packet_can_rebind_to_ifnet(m, policy_ifp, (struct route *)&necp_route, AF_INET6)) { + /* Set scoped index to the tunnel interface, since it is compatible with the packet */ + /* This will only work for callers who pass IPV6_OUTARGS, but that covers all of the + clients we care about today */ + if (flags & IPV6_OUTARGS) { + ip6oa->ip6oa_boundif = policy_ifp->if_index; + ip6oa->ip6oa_flags |= IP6OAF_BOUND_IF; + } + if (opt != NULL && opt->ip6po_pktinfo != NULL) { + opt->ip6po_pktinfo->ipi6_ifindex = policy_ifp->if_index; + } + ro = &necp_route; + goto skip_ipsec; + } else { + error = ENETUNREACH; + goto bad; + } + } + break; + } + default: + break; + } + } +#endif /* NECP */ + #if IPSEC if (ipsec_bypass != 0 || ip6obf.noipsec) goto skip_ipsec; - /* May have been set above if packet was bound */ if (sp == NULL) { /* get a security policy for this packet */ - if (so == NULL) { - sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, - 0, &error); - } else { + if (so != NULL) { sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, - so, &error); + so, &error); + } else { + sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, + 0, &error); } if (sp == NULL) { IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); @@ -529,13 +587,7 @@ tags_done: goto freehdrs; } if (sp->ipsec_if) { - /* Verify the redirect to ipsec interface */ - if (sp->ipsec_if == ifp) { - /* Set policy for mbuf */ - m->m_pkthdr.ipsec_policy = sp->id; - goto skip_ipsec; - } - goto bad; + goto skip_ipsec; } else { ip6obf.needipsec = TRUE; } @@ -1028,7 +1080,6 @@ skip_ipsec: * then rt (for unicast) and ifp must be non-NULL valid values. */ if (!(flags & IPV6_FORWARDING)) { - /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc_na(ifp, ifs6_out_request); } if (rt != NULL) { @@ -1164,40 +1215,8 @@ routefound: * forbid loopback, loop back a copy. */ ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0); - } else { - if (im6o != NULL) - IM6O_UNLOCK(im6o); - /* - * If we are acting as a multicast router, perform - * multicast forwarding as if the packet had just - * arrived on the interface to which we are about - * to send. The multicast forwarding function - * recursively calls this function, using the - * IPV6_FORWARDING flag to prevent infinite recursion. - * - * Multicasts that are looped back by ip6_mloopback(), - * above, will be forwarded by the ip6_input() routine, - * if necessary. - */ -#if MROUTING - if (ip6_mrouter && !(flags & IPV6_FORWARDING)) { - /* - * XXX: ip6_mforward expects that rcvif is NULL - * when it is called from the originating path. - * However, it is not always the case, since - * some versions of MGETHDR() does not - * initialize the field. - */ - m->m_pkthdr.rcvif = NULL; - if (ip6_mforward(ip6, ifp, m) != 0) { - m_freem(m); - if (in6m != NULL) - IN6M_REMREF(in6m); - goto done; - } - } -#endif /* MROUTING */ - } + } else if (im6o != NULL) + IM6O_UNLOCK(im6o); if (in6m != NULL) IN6M_REMREF(in6m); /* @@ -1593,6 +1612,9 @@ done: if (sp != NULL) key_freesp(sp, KEY_SADB_UNLOCKED); #endif /* IPSEC */ +#if NECP + ROUTE_RELEASE(&necp_route); +#endif /* NECP */ #if DUMMYNET ROUTE_RELEASE(&saved_route); ROUTE_RELEASE(&saved_ro_pmtu); @@ -1986,6 +2008,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, if (ro_pmtu->ro_rt != NULL) { u_int32_t ifmtu; + if (ifp == NULL) + ifp = ro_pmtu->ro_rt->rt_ifp; lck_rw_lock_shared(nd_if_rwlock); /* Access without acquiring nd_ifinfo lock for performance */ ifmtu = IN6_LINKMTU(ifp); @@ -2404,16 +2428,16 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) caddr_t req = NULL; size_t len = 0; struct mbuf *m; - + if ((error = soopt_getm(sopt, &m)) != 0) break; if ((error = soopt_mcopyin(sopt, m)) != 0) break; - + req = mtod(m, caddr_t); len = m->m_len; error = ipsec6_set_policy(in6p, optname, req, - len, privileged); + len, privileged); m_freem(m); break; } @@ -2466,8 +2490,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) break; /* once set, it cannot be unset */ - if (!optval && - (in6p->inp_flags & INP_NO_IFT_CELLULAR)) { + if (!optval && INP_NO_CELLULAR(in6p)) { error = EINVAL; break; } @@ -2658,26 +2681,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) break; #if IPSEC case IPV6_IPSEC_POLICY: { - caddr_t req = NULL; - size_t len = 0; - struct mbuf *m = NULL; - struct mbuf *mp = NULL; - - error = soopt_getm(sopt, &m); - if (error != 0) - break; - error = soopt_mcopyin(sopt, m); - if (error != 0) - break; - - req = mtod(m, caddr_t); - len = m->m_len; - error = ipsec6_get_policy(in6p, req, len, &mp); - if (error == 0) - error = soopt_mcopyout(sopt, mp); - if (mp != NULL) - m_freem(mp); - m_freem(m); + error = 0; /* This option is no longer supported */ break; } #endif /* IPSEC */ @@ -2700,8 +2704,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) break; case IPV6_NO_IFT_CELLULAR: - optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR) - ? 1 : 0; + optval = INP_NO_CELLULAR(in6p) ? 1 : 0; error = sooptcopyout(sopt, &optval, sizeof (optval)); break; diff --git a/bsd/netinet6/ip6_var.h b/bsd/netinet6/ip6_var.h index ac02672df..30926d2dc 100644 --- a/bsd/netinet6/ip6_var.h +++ b/bsd/netinet6/ip6_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -327,6 +327,12 @@ struct ip6stat { u_quad_t ip6s_dad_collide; }; +enum ip6s_sources_rule_index { + IP6S_SRCRULE_0, IP6S_SRCRULE_1, IP6S_SRCRULE_2, IP6S_SRCRULE_3, IP6S_SRCRULE_4, + IP6S_SRCRULE_5, IP6S_SRCRULE_5_5, IP6S_SRCRULE_6, IP6S_SRCRULE_7, + IP6S_SRCRULE_7x, IP6S_SRCRULE_8 +}; + #ifdef BSD_KERNEL_PRIVATE /* * IPv6 onion peeling state. @@ -397,6 +403,8 @@ struct ip6_out_args { #define IP6OAF_BOUND_IF 0x00000002 /* boundif value is valid */ #define IP6OAF_BOUND_SRCADDR 0x00000004 /* bound to src address */ #define IP6OAF_NO_CELLULAR 0x00000010 /* skip IFT_CELLULAR */ +#define IP6OAF_NO_EXPENSIVE 0x00000020 /* skip IFEF_EXPENSIVE */ +#define IP6OAF_AWDL_UNRESTRICTED 0x00000040 /* privileged AWDL */ u_int32_t ip6oa_retflags; /* IP6OARF return flags (see below) */ #define IP6OARF_IFDENIED 0x00000001 /* denied access to interface */ }; @@ -417,9 +425,6 @@ extern int ip6_neighborgcthresh; /* Threshold # of NDP entries for GC */ extern int ip6_maxifprefixes; /* Max acceptable prefixes via RA per IF */ extern int ip6_maxifdefrouters; /* Max acceptable def routers via RA */ extern int ip6_maxdynroutes; /* Max # of routes created via redirect */ -#if MROUTING -extern struct socket *ip6_mrouter; /* multicast routing daemon */ -#endif /* MROUTING */ extern int ip6_sendredirects; /* send IP redirects when forwarding? */ extern int ip6_accept_rtadv; /* deprecated */ extern int ip6_log_interval; @@ -484,9 +489,6 @@ extern struct ip6aux *ip6_addaux(struct mbuf *); extern struct ip6aux *ip6_findaux(struct mbuf *); extern void ip6_delaux(struct mbuf *); -#if MROUTING -extern int ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *); -#endif /* MROUTING */ extern int ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *, u_int32_t *); extern struct mbuf **ip6_savecontrol_v4(struct inpcb *, struct mbuf *, diff --git a/bsd/netinet6/ipcomp_core.c b/bsd/netinet6/ipcomp_core.c index 55dcbf506..533bfd198 100644 --- a/bsd/netinet6/ipcomp_core.c +++ b/bsd/netinet6/ipcomp_core.c @@ -50,7 +50,7 @@ #include #include -#if ZLIB +#if IPCOMP_ZLIB #include #endif #include @@ -66,7 +66,7 @@ #include -#if ZLIB +#if IPCOMP_ZLIB static void *deflate_alloc(void *, u_int, u_int); static void deflate_free(void *, void *); static int deflate_common(struct mbuf *, struct mbuf *, size_t *, int); @@ -86,24 +86,26 @@ static int deflate_memlevel = MAX_MEM_LEVEL; static z_stream deflate_stream; static z_stream inflate_stream; -#endif /* ZLIB */ +#endif /* IPCOMP_ZLIB */ +#if IPCOMP_ZLIB static const struct ipcomp_algorithm ipcomp_algorithms[] = { -#if ZLIB { deflate_compress, deflate_decompress, 90 }, -#endif /* ZLIB */ }; +#else +static const struct ipcomp_algorithm ipcomp_algorithms[] __unused = {}; +#endif const struct ipcomp_algorithm * ipcomp_algorithm_lookup( -#if ZLIB +#if IPCOMP_ZLIB int idx #else __unused int idx #endif ) { -#if ZLIB +#if IPCOMP_ZLIB if (idx == SADB_X_CALG_DEFLATE) { /* * Avert your gaze, ugly hack follows! @@ -140,11 +142,11 @@ ipcomp_algorithm_lookup( return &ipcomp_algorithms[0]; } -#endif /* ZLIB */ +#endif /* IPCOMP_ZLIB */ return NULL; } -#if ZLIB +#if IPCOMP_ZLIB static void * deflate_alloc( __unused void *aux, @@ -410,4 +412,4 @@ deflate_decompress(m, md, lenp) return deflate_common(m, md, lenp, 1); } -#endif /* ZLIB */ +#endif /* IPCOMP_ZLIB */ diff --git a/bsd/netinet6/ipsec.c b/bsd/netinet6/ipsec.c index 9be2fb12e..90920d436 100644 --- a/bsd/netinet6/ipsec.c +++ b/bsd/netinet6/ipsec.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2013 Apple Inc. All rights reserved. + * Copyright (c) 2008-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -82,6 +82,7 @@ #include #include +#include #include #include @@ -256,8 +257,7 @@ static struct inpcbpolicy *ipsec_newpcbpolicy(void); static void ipsec_delpcbpolicy(struct inpcbpolicy *); static struct secpolicy *ipsec_deepcopy_policy(struct secpolicy *src); static int ipsec_set_policy(struct secpolicy **pcb_sp, - int optname, caddr_t request, size_t len, int priv); -static int ipsec_get_policy(struct secpolicy *pcb_sp, struct mbuf **mp); + int optname, caddr_t request, size_t len, int priv); static void vshiftl(unsigned char *, int, int); static int ipsec_in_reject(struct secpolicy *, struct mbuf *); #if INET6 @@ -267,6 +267,7 @@ static struct ipsec_tag *ipsec_addaux(struct mbuf *); static struct ipsec_tag *ipsec_findaux(struct mbuf *); static void ipsec_optaux(struct mbuf *, struct ipsec_tag *); int ipsec_send_natt_keepalive(struct secasvar *sav); +bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ipsec_offload_frame *frame, size_t frame_data_offset); static int sysctl_def_policy SYSCTL_HANDLER_ARGS @@ -302,34 +303,33 @@ sysctl_def_policy SYSCTL_HANDLER_ARGS * NOTE: IPv6 mapped adddress concern is implemented here. */ struct secpolicy * -ipsec4_getpolicybysock(m, dir, so, error) - struct mbuf *m; - u_int dir; - struct socket *so; - int *error; +ipsec4_getpolicybysock(struct mbuf *m, + u_int dir, + struct socket *so, + int *error) { struct inpcbpolicy *pcbsp = NULL; struct secpolicy *currsp = NULL; /* policy on socket */ struct secpolicy *kernsp = NULL; /* policy on kernel */ - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (m == NULL || so == NULL || error == NULL) panic("ipsec4_getpolicybysock: NULL pointer was passed.\n"); - - if (so->so_pcb == NULL) { - printf("ipsec4_getpolicybysock: so->so_pcb == NULL\n"); - return ipsec4_getpolicybyaddr(m, dir, 0, error); - } + + if (so->so_pcb == NULL) { + printf("ipsec4_getpolicybysock: so->so_pcb == NULL\n"); + return ipsec4_getpolicybyaddr(m, dir, 0, error); + } switch (SOCK_DOM(so)) { - case PF_INET: - pcbsp = sotoinpcb(so)->inp_sp; - break; + case PF_INET: + pcbsp = sotoinpcb(so)->inp_sp; + break; #if INET6 - case PF_INET6: - pcbsp = sotoin6pcb(so)->in6p_sp; - break; + case PF_INET6: + pcbsp = sotoin6pcb(so)->in6p_sp; + break; #endif } @@ -337,86 +337,144 @@ ipsec4_getpolicybysock(m, dir, so, error) /* Socket has not specified an IPSEC policy */ return ipsec4_getpolicybyaddr(m, dir, 0, error); } - + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_START, 0,0,0,0,0); - + switch (SOCK_DOM(so)) { - case PF_INET: - /* set spidx in pcb */ - *error = ipsec4_setspidx_inpcb(m, sotoinpcb(so)); - break; + case PF_INET: + /* set spidx in pcb */ + *error = ipsec4_setspidx_inpcb(m, sotoinpcb(so)); + break; #if INET6 - case PF_INET6: - /* set spidx in pcb */ - *error = ipsec6_setspidx_in6pcb(m, sotoin6pcb(so)); - break; + case PF_INET6: + /* set spidx in pcb */ + *error = ipsec6_setspidx_in6pcb(m, sotoin6pcb(so)); + break; #endif - default: - panic("ipsec4_getpolicybysock: unsupported address family\n"); + default: + panic("ipsec4_getpolicybysock: unsupported address family\n"); } if (*error) { KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 1,*error,0,0,0); return NULL; } - + /* sanity check */ if (pcbsp == NULL) panic("ipsec4_getpolicybysock: pcbsp is NULL.\n"); - - switch (dir) { - case IPSEC_DIR_INBOUND: - currsp = pcbsp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - currsp = pcbsp->sp_out; - break; - default: - panic("ipsec4_getpolicybysock: illegal direction.\n"); - } - + + switch (dir) { + case IPSEC_DIR_INBOUND: + currsp = pcbsp->sp_in; + break; + case IPSEC_DIR_OUTBOUND: + currsp = pcbsp->sp_out; + break; + default: + panic("ipsec4_getpolicybysock: illegal direction.\n"); + } + /* sanity check */ if (currsp == NULL) panic("ipsec4_getpolicybysock: currsp is NULL.\n"); - + /* when privilieged socket */ - if (pcbsp->priv) { - switch (currsp->policy) { + if (pcbsp->priv) { + switch (currsp->policy) { + case IPSEC_POLICY_BYPASS: + lck_mtx_lock(sadb_mutex); + currsp->refcnt++; + lck_mtx_unlock(sadb_mutex); + *error = 0; + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 2,*error,0,0,0); + return currsp; + + case IPSEC_POLICY_ENTRUST: + /* look for a policy in SPD */ + kernsp = key_allocsp(&currsp->spidx, dir); + + /* SP found */ + if (kernsp != NULL) { + KEYDEBUG(KEYDEBUG_IPSEC_STAMP, + printf("DP ipsec4_getpolicybysock called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); + *error = 0; + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 3,*error,0,0,0); + return kernsp; + } + + /* no SP found */ + lck_mtx_lock(sadb_mutex); + if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD + && ip4_def_policy.policy != IPSEC_POLICY_NONE) { + ipseclog((LOG_INFO, + "fixed system default policy: %d->%d\n", + ip4_def_policy.policy, IPSEC_POLICY_NONE)); + ip4_def_policy.policy = IPSEC_POLICY_NONE; + } + ip4_def_policy.refcnt++; + lck_mtx_unlock(sadb_mutex); + *error = 0; + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 4,*error,0,0,0); + return &ip4_def_policy; + + case IPSEC_POLICY_IPSEC: + lck_mtx_lock(sadb_mutex); + currsp->refcnt++; + lck_mtx_unlock(sadb_mutex); + *error = 0; + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 5,*error,0,0,0); + return currsp; + + default: + ipseclog((LOG_ERR, "ipsec4_getpolicybysock: " + "Invalid policy for PCB %d\n", currsp->policy)); + *error = EINVAL; + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 6,*error,0,0,0); + return NULL; + } + /* NOTREACHED */ + } + + /* when non-privilieged socket */ + /* look for a policy in SPD */ + kernsp = key_allocsp(&currsp->spidx, dir); + + /* SP found */ + if (kernsp != NULL) { + KEYDEBUG(KEYDEBUG_IPSEC_STAMP, + printf("DP ipsec4_getpolicybysock called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); + *error = 0; + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 7,*error,0,0,0); + return kernsp; + } + + /* no SP found */ + switch (currsp->policy) { case IPSEC_POLICY_BYPASS: - lck_mtx_lock(sadb_mutex); - currsp->refcnt++; - lck_mtx_unlock(sadb_mutex); - *error = 0; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 2,*error,0,0,0); - return currsp; - + ipseclog((LOG_ERR, "ipsec4_getpolicybysock: " + "Illegal policy for non-priviliged defined %d\n", + currsp->policy)); + *error = EINVAL; + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 8,*error,0,0,0); + return NULL; + case IPSEC_POLICY_ENTRUST: - /* look for a policy in SPD */ - kernsp = key_allocsp(&currsp->spidx, dir); - - /* SP found */ - if (kernsp != NULL) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec4_getpolicybysock called " - "to allocate SP:0x%llx\n", - (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); - *error = 0; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 3,*error,0,0,0); - return kernsp; - } - - /* no SP found */ lck_mtx_lock(sadb_mutex); if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD - && ip4_def_policy.policy != IPSEC_POLICY_NONE) { + && ip4_def_policy.policy != IPSEC_POLICY_NONE) { ipseclog((LOG_INFO, - "fixed system default policy: %d->%d\n", - ip4_def_policy.policy, IPSEC_POLICY_NONE)); + "fixed system default policy: %d->%d\n", + ip4_def_policy.policy, IPSEC_POLICY_NONE)); ip4_def_policy.policy = IPSEC_POLICY_NONE; } ip4_def_policy.refcnt++; lck_mtx_unlock(sadb_mutex); *error = 0; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 4,*error,0,0,0); + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 9,*error,0,0,0); return &ip4_def_policy; case IPSEC_POLICY_IPSEC: @@ -424,73 +482,15 @@ ipsec4_getpolicybysock(m, dir, so, error) currsp->refcnt++; lck_mtx_unlock(sadb_mutex); *error = 0; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 5,*error,0,0,0); + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 10,*error,0,0,0); return currsp; - + default: ipseclog((LOG_ERR, "ipsec4_getpolicybysock: " - "Invalid policy for PCB %d\n", currsp->policy)); + "Invalid policy for PCB %d\n", currsp->policy)); *error = EINVAL; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 6,*error,0,0,0); + KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 11,*error,0,0,0); return NULL; - } - /* NOTREACHED */ - } - - /* when non-privilieged socket */ - /* look for a policy in SPD */ - kernsp = key_allocsp(&currsp->spidx, dir); - - /* SP found */ - if (kernsp != NULL) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec4_getpolicybysock called " - "to allocate SP:0x%llx\n", - (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); - *error = 0; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 7,*error,0,0,0); - return kernsp; - } - - /* no SP found */ - switch (currsp->policy) { - case IPSEC_POLICY_BYPASS: - ipseclog((LOG_ERR, "ipsec4_getpolicybysock: " - "Illegal policy for non-priviliged defined %d\n", - currsp->policy)); - *error = EINVAL; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 8,*error,0,0,0); - return NULL; - - case IPSEC_POLICY_ENTRUST: - lck_mtx_lock(sadb_mutex); - if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD - && ip4_def_policy.policy != IPSEC_POLICY_NONE) { - ipseclog((LOG_INFO, - "fixed system default policy: %d->%d\n", - ip4_def_policy.policy, IPSEC_POLICY_NONE)); - ip4_def_policy.policy = IPSEC_POLICY_NONE; - } - ip4_def_policy.refcnt++; - lck_mtx_unlock(sadb_mutex); - *error = 0; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 9,*error,0,0,0); - return &ip4_def_policy; - - case IPSEC_POLICY_IPSEC: - lck_mtx_lock(sadb_mutex); - currsp->refcnt++; - lck_mtx_unlock(sadb_mutex); - *error = 0; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 10,*error,0,0,0); - return currsp; - - default: - ipseclog((LOG_ERR, "ipsec4_getpolicybysock: " - "Invalid policy for PCB %d\n", currsp->policy)); - *error = EINVAL; - KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 11,*error,0,0,0); - return NULL; } /* NOTREACHED */ } @@ -641,86 +641,138 @@ ipsec4_getpolicybyinterface(struct mbuf *m, * others: a pointer to SP */ struct secpolicy * -ipsec6_getpolicybysock(m, dir, so, error) - struct mbuf *m; - u_int dir; - struct socket *so; - int *error; +ipsec6_getpolicybysock(struct mbuf *m, + u_int dir, + struct socket *so, + int *error) { struct inpcbpolicy *pcbsp = NULL; struct secpolicy *currsp = NULL; /* policy on socket */ struct secpolicy *kernsp = NULL; /* policy on kernel */ - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (m == NULL || so == NULL || error == NULL) panic("ipsec6_getpolicybysock: NULL pointer was passed.\n"); - + #if DIAGNOSTIC - if (SOCK_DOM(so) != PF_INET6) - panic("ipsec6_getpolicybysock: socket domain != inet6\n"); + if (SOCK_DOM(so) != PF_INET6) + panic("ipsec6_getpolicybysock: socket domain != inet6\n"); #endif - - pcbsp = sotoin6pcb(so)->in6p_sp; + + pcbsp = sotoin6pcb(so)->in6p_sp; + + if (!pcbsp){ + return ipsec6_getpolicybyaddr(m, dir, 0, error); + } - if (!pcbsp){ - return ipsec6_getpolicybyaddr(m, dir, 0, error); - } - /* set spidx in pcb */ ipsec6_setspidx_in6pcb(m, sotoin6pcb(so)); - + /* sanity check */ if (pcbsp == NULL) panic("ipsec6_getpolicybysock: pcbsp is NULL.\n"); - - switch (dir) { - case IPSEC_DIR_INBOUND: - currsp = pcbsp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - currsp = pcbsp->sp_out; - break; - default: - panic("ipsec6_getpolicybysock: illegal direction.\n"); - } - + + switch (dir) { + case IPSEC_DIR_INBOUND: + currsp = pcbsp->sp_in; + break; + case IPSEC_DIR_OUTBOUND: + currsp = pcbsp->sp_out; + break; + default: + panic("ipsec6_getpolicybysock: illegal direction.\n"); + } + /* sanity check */ if (currsp == NULL) panic("ipsec6_getpolicybysock: currsp is NULL.\n"); - + /* when privilieged socket */ - if (pcbsp->priv) { - switch (currsp->policy) { + if (pcbsp->priv) { + switch (currsp->policy) { + case IPSEC_POLICY_BYPASS: + lck_mtx_lock(sadb_mutex); + currsp->refcnt++; + lck_mtx_unlock(sadb_mutex); + *error = 0; + return currsp; + + case IPSEC_POLICY_ENTRUST: + /* look for a policy in SPD */ + kernsp = key_allocsp(&currsp->spidx, dir); + + /* SP found */ + if (kernsp != NULL) { + KEYDEBUG(KEYDEBUG_IPSEC_STAMP, + printf("DP ipsec6_getpolicybysock called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); + *error = 0; + return kernsp; + } + + /* no SP found */ + lck_mtx_lock(sadb_mutex); + if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD + && ip6_def_policy.policy != IPSEC_POLICY_NONE) { + ipseclog((LOG_INFO, + "fixed system default policy: %d->%d\n", + ip6_def_policy.policy, IPSEC_POLICY_NONE)); + ip6_def_policy.policy = IPSEC_POLICY_NONE; + } + ip6_def_policy.refcnt++; + lck_mtx_unlock(sadb_mutex); + *error = 0; + return &ip6_def_policy; + + case IPSEC_POLICY_IPSEC: + lck_mtx_lock(sadb_mutex); + currsp->refcnt++; + lck_mtx_unlock(sadb_mutex); + *error = 0; + return currsp; + + default: + ipseclog((LOG_ERR, "ipsec6_getpolicybysock: " + "Invalid policy for PCB %d\n", currsp->policy)); + *error = EINVAL; + return NULL; + } + /* NOTREACHED */ + } + + /* when non-privilieged socket */ + /* look for a policy in SPD */ + kernsp = key_allocsp(&currsp->spidx, dir); + + /* SP found */ + if (kernsp != NULL) { + KEYDEBUG(KEYDEBUG_IPSEC_STAMP, + printf("DP ipsec6_getpolicybysock called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); + *error = 0; + return kernsp; + } + + /* no SP found */ + switch (currsp->policy) { case IPSEC_POLICY_BYPASS: - lck_mtx_lock(sadb_mutex); - currsp->refcnt++; - lck_mtx_unlock(sadb_mutex); - *error = 0; - return currsp; - + ipseclog((LOG_ERR, "ipsec6_getpolicybysock: " + "Illegal policy for non-priviliged defined %d\n", + currsp->policy)); + *error = EINVAL; + return NULL; + case IPSEC_POLICY_ENTRUST: - /* look for a policy in SPD */ - kernsp = key_allocsp(&currsp->spidx, dir); - - /* SP found */ - if (kernsp != NULL) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec6_getpolicybysock called " - "to allocate SP:0x%llx\n", - (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); - *error = 0; - return kernsp; - } - - /* no SP found */ lck_mtx_lock(sadb_mutex); if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD - && ip6_def_policy.policy != IPSEC_POLICY_NONE) { + && ip6_def_policy.policy != IPSEC_POLICY_NONE) { ipseclog((LOG_INFO, - "fixed system default policy: %d->%d\n", - ip6_def_policy.policy, IPSEC_POLICY_NONE)); + "fixed system default policy: %d->%d\n", + ip6_def_policy.policy, IPSEC_POLICY_NONE)); ip6_def_policy.policy = IPSEC_POLICY_NONE; } ip6_def_policy.refcnt++; @@ -734,66 +786,13 @@ ipsec6_getpolicybysock(m, dir, so, error) lck_mtx_unlock(sadb_mutex); *error = 0; return currsp; - + default: - ipseclog((LOG_ERR, "ipsec6_getpolicybysock: " - "Invalid policy for PCB %d\n", currsp->policy)); + ipseclog((LOG_ERR, + "ipsec6_policybysock: Invalid policy for PCB %d\n", + currsp->policy)); *error = EINVAL; return NULL; - } - /* NOTREACHED */ - } - - /* when non-privilieged socket */ - /* look for a policy in SPD */ - kernsp = key_allocsp(&currsp->spidx, dir); - - /* SP found */ - if (kernsp != NULL) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec6_getpolicybysock called " - "to allocate SP:0x%llx\n", - (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); - *error = 0; - return kernsp; - } - - /* no SP found */ - switch (currsp->policy) { - case IPSEC_POLICY_BYPASS: - ipseclog((LOG_ERR, "ipsec6_getpolicybysock: " - "Illegal policy for non-priviliged defined %d\n", - currsp->policy)); - *error = EINVAL; - return NULL; - - case IPSEC_POLICY_ENTRUST: - lck_mtx_lock(sadb_mutex); - if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD - && ip6_def_policy.policy != IPSEC_POLICY_NONE) { - ipseclog((LOG_INFO, - "fixed system default policy: %d->%d\n", - ip6_def_policy.policy, IPSEC_POLICY_NONE)); - ip6_def_policy.policy = IPSEC_POLICY_NONE; - } - ip6_def_policy.refcnt++; - lck_mtx_unlock(sadb_mutex); - *error = 0; - return &ip6_def_policy; - - case IPSEC_POLICY_IPSEC: - lck_mtx_lock(sadb_mutex); - currsp->refcnt++; - lck_mtx_unlock(sadb_mutex); - *error = 0; - return currsp; - - default: - ipseclog((LOG_ERR, - "ipsec6_policybysock: Invalid policy for PCB %d\n", - currsp->policy)); - *error = EINVAL; - return NULL; } /* NOTREACHED */ } @@ -1010,40 +1009,40 @@ bad: static int ipsec4_setspidx_inpcb(m, pcb) - struct mbuf *m; - struct inpcb *pcb; +struct mbuf *m; +struct inpcb *pcb; { struct secpolicyindex *spidx; int error; - + if (ipsec_bypass != 0) return 0; - + /* sanity check */ if (pcb == NULL) panic("ipsec4_setspidx_inpcb: no PCB found.\n"); - if (pcb->inp_sp == NULL) - panic("ipsec4_setspidx_inpcb: no inp_sp found.\n"); - if (pcb->inp_sp->sp_out == NULL || pcb->inp_sp->sp_in == NULL) - panic("ipsec4_setspidx_inpcb: no sp_in/out found.\n"); - - bzero(&pcb->inp_sp->sp_in->spidx, sizeof(*spidx)); - bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx)); - - spidx = &pcb->inp_sp->sp_in->spidx; - error = ipsec_setspidx(m, spidx, 1, 0); - if (error) - goto bad; + if (pcb->inp_sp == NULL) + panic("ipsec4_setspidx_inpcb: no inp_sp found.\n"); + if (pcb->inp_sp->sp_out == NULL || pcb->inp_sp->sp_in == NULL) + panic("ipsec4_setspidx_inpcb: no sp_in/out found.\n"); + + bzero(&pcb->inp_sp->sp_in->spidx, sizeof(*spidx)); + bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx)); + + spidx = &pcb->inp_sp->sp_in->spidx; + error = ipsec_setspidx(m, spidx, 1, 0); + if (error) + goto bad; spidx->dir = IPSEC_DIR_INBOUND; - + spidx = &pcb->inp_sp->sp_out->spidx; error = ipsec_setspidx(m, spidx, 1, 0); if (error) goto bad; spidx->dir = IPSEC_DIR_OUTBOUND; - + return 0; - + bad: bzero(&pcb->inp_sp->sp_in->spidx, sizeof(*spidx)); bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx)); @@ -1053,37 +1052,37 @@ bad: #if INET6 static int ipsec6_setspidx_in6pcb(m, pcb) - struct mbuf *m; - struct in6pcb *pcb; +struct mbuf *m; +struct in6pcb *pcb; { struct secpolicyindex *spidx; int error; - + /* sanity check */ if (pcb == NULL) panic("ipsec6_setspidx_in6pcb: no PCB found.\n"); - if (pcb->in6p_sp == NULL) - panic("ipsec6_setspidx_in6pcb: no in6p_sp found.\n"); - if (pcb->in6p_sp->sp_out == NULL || pcb->in6p_sp->sp_in == NULL) - panic("ipsec6_setspidx_in6pcb: no sp_in/out found.\n"); - - bzero(&pcb->in6p_sp->sp_in->spidx, sizeof(*spidx)); - bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx)); - - spidx = &pcb->in6p_sp->sp_in->spidx; - error = ipsec_setspidx(m, spidx, 1, 0); - if (error) - goto bad; - spidx->dir = IPSEC_DIR_INBOUND; - - spidx = &pcb->in6p_sp->sp_out->spidx; + if (pcb->in6p_sp == NULL) + panic("ipsec6_setspidx_in6pcb: no in6p_sp found.\n"); + if (pcb->in6p_sp->sp_out == NULL || pcb->in6p_sp->sp_in == NULL) + panic("ipsec6_setspidx_in6pcb: no sp_in/out found.\n"); + + bzero(&pcb->in6p_sp->sp_in->spidx, sizeof(*spidx)); + bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx)); + + spidx = &pcb->in6p_sp->sp_in->spidx; + error = ipsec_setspidx(m, spidx, 1, 0); + if (error) + goto bad; + spidx->dir = IPSEC_DIR_INBOUND; + + spidx = &pcb->in6p_sp->sp_out->spidx; error = ipsec_setspidx(m, spidx, 1, 0); if (error) goto bad; spidx->dir = IPSEC_DIR_OUTBOUND; - + return 0; - + bad: bzero(&pcb->in6p_sp->sp_in->spidx, sizeof(*spidx)); bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx)); @@ -1296,10 +1295,9 @@ ipsec4_setspidx_ipaddr(m, spidx) #if INET6 static void -ipsec6_get_ulp(m, spidx, needport) - struct mbuf *m; - struct secpolicyindex *spidx; - int needport; +ipsec6_get_ulp(struct mbuf *m, + struct secpolicyindex *spidx, + int needport) { int off, nxt; struct tcphdr th; @@ -1353,9 +1351,8 @@ ipsec6_get_ulp(m, spidx, needport) /* assumes that m is sane */ static int -ipsec6_setspidx_ipaddr(m, spidx) - struct mbuf *m; - struct secpolicyindex *spidx; +ipsec6_setspidx_ipaddr(struct mbuf *m, + struct secpolicyindex *spidx) { struct ip6_hdr *ip6 = NULL; struct ip6_hdr ip6buf; @@ -1398,53 +1395,51 @@ static struct inpcbpolicy * ipsec_newpcbpolicy() { struct inpcbpolicy *p; - + p = (struct inpcbpolicy *)_MALLOC(sizeof(*p), M_SECA, M_WAITOK); return p; } static void -ipsec_delpcbpolicy(p) - struct inpcbpolicy *p; +ipsec_delpcbpolicy(struct inpcbpolicy *p) { FREE(p, M_SECA); } /* initialize policy in PCB */ int -ipsec_init_policy(so, pcb_sp) - struct socket *so; - struct inpcbpolicy **pcb_sp; +ipsec_init_policy(struct socket *so, + struct inpcbpolicy **pcb_sp) { struct inpcbpolicy *new; - + /* sanity check. */ if (so == NULL || pcb_sp == NULL) panic("ipsec_init_policy: NULL pointer was passed.\n"); - - new = ipsec_newpcbpolicy(); - if (new == NULL) { - ipseclog((LOG_DEBUG, "ipsec_init_policy: No more memory.\n")); - return ENOBUFS; - } + + new = ipsec_newpcbpolicy(); + if (new == NULL) { + ipseclog((LOG_DEBUG, "ipsec_init_policy: No more memory.\n")); + return ENOBUFS; + } bzero(new, sizeof(*new)); - + #ifdef __APPLE__ if (kauth_cred_issuser(so->so_cred)) #else - if (so->so_cred != 0 && !suser(so->so_cred->pc_ucred, NULL)) + if (so->so_cred != 0 && !suser(so->so_cred->pc_ucred, NULL)) #endif - new->priv = 1; - else - new->priv = 0; - - if ((new->sp_in = key_newsp()) == NULL) { - ipsec_delpcbpolicy(new); - return ENOBUFS; - } + new->priv = 1; + else + new->priv = 0; + + if ((new->sp_in = key_newsp()) == NULL) { + ipsec_delpcbpolicy(new); + return ENOBUFS; + } new->sp_in->state = IPSEC_SPSTATE_ALIVE; new->sp_in->policy = IPSEC_POLICY_ENTRUST; - + if ((new->sp_out = key_newsp()) == NULL) { key_freesp(new->sp_in, KEY_SADB_UNLOCKED); ipsec_delpcbpolicy(new); @@ -1452,58 +1447,57 @@ ipsec_init_policy(so, pcb_sp) } new->sp_out->state = IPSEC_SPSTATE_ALIVE; new->sp_out->policy = IPSEC_POLICY_ENTRUST; - + *pcb_sp = new; - + return 0; } /* copy old ipsec policy into new */ int -ipsec_copy_policy(old, new) - struct inpcbpolicy *old, *new; +ipsec_copy_policy(struct inpcbpolicy *old, + struct inpcbpolicy *new) { struct secpolicy *sp; - + if (ipsec_bypass != 0) return 0; - + sp = ipsec_deepcopy_policy(old->sp_in); if (sp) { key_freesp(new->sp_in, KEY_SADB_UNLOCKED); new->sp_in = sp; } else return ENOBUFS; - + sp = ipsec_deepcopy_policy(old->sp_out); if (sp) { key_freesp(new->sp_out, KEY_SADB_UNLOCKED); new->sp_out = sp; } else return ENOBUFS; - + new->priv = old->priv; - + return 0; } /* deep-copy a policy in PCB */ static struct secpolicy * -ipsec_deepcopy_policy(src) - struct secpolicy *src; +ipsec_deepcopy_policy(struct secpolicy *src) { struct ipsecrequest *newchain = NULL; struct ipsecrequest *p; struct ipsecrequest **q; struct ipsecrequest *r; struct secpolicy *dst; - + if (src == NULL) return NULL; dst = key_newsp(); if (dst == NULL) return NULL; - + /* * deep-copy IPsec request chain. This is required since struct * ipsecrequest is not reference counted. @@ -1511,32 +1505,32 @@ ipsec_deepcopy_policy(src) q = &newchain; for (p = src->req; p; p = p->next) { *q = (struct ipsecrequest *)_MALLOC(sizeof(struct ipsecrequest), - M_SECA, M_WAITOK); + M_SECA, M_WAITOK); if (*q == NULL) goto fail; bzero(*q, sizeof(**q)); (*q)->next = NULL; - + (*q)->saidx.proto = p->saidx.proto; (*q)->saidx.mode = p->saidx.mode; (*q)->level = p->level; (*q)->saidx.reqid = p->saidx.reqid; - + bcopy(&p->saidx.src, &(*q)->saidx.src, sizeof((*q)->saidx.src)); bcopy(&p->saidx.dst, &(*q)->saidx.dst, sizeof((*q)->saidx.dst)); - + (*q)->sp = dst; - + q = &((*q)->next); } - + dst->req = newchain; dst->state = src->state; dst->policy = src->policy; /* do not touch the refcnt fields */ - + return dst; - + fail: for (p = newchain; p; p = r) { r = p->next; @@ -1549,100 +1543,73 @@ fail: /* set policy and ipsec request if present. */ static int -ipsec_set_policy( - struct secpolicy **pcb_sp, - __unused int optname, - caddr_t request, - size_t len, - int priv) +ipsec_set_policy(struct secpolicy **pcb_sp, + __unused int optname, + caddr_t request, + size_t len, + int priv) { struct sadb_x_policy *xpl; struct secpolicy *newsp = NULL; int error; - + /* sanity check. */ if (pcb_sp == NULL || *pcb_sp == NULL || request == NULL) return EINVAL; if (len < sizeof(*xpl)) return EINVAL; xpl = (struct sadb_x_policy *)(void *)request; - + KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("ipsec_set_policy: passed policy\n"); - kdebug_sadb_x_policy((struct sadb_ext *)xpl)); - + printf("ipsec_set_policy: passed policy\n"); + kdebug_sadb_x_policy((struct sadb_ext *)xpl)); + /* check policy type */ /* ipsec_set_policy() accepts IPSEC, ENTRUST and BYPASS. */ if (xpl->sadb_x_policy_type == IPSEC_POLICY_DISCARD || xpl->sadb_x_policy_type == IPSEC_POLICY_NONE) return EINVAL; - + /* check privileged socket */ if (priv == 0 && xpl->sadb_x_policy_type == IPSEC_POLICY_BYPASS) return EACCES; - + /* allocation new SP entry */ if ((newsp = key_msg2sp(xpl, len, &error)) == NULL) return error; - + newsp->state = IPSEC_SPSTATE_ALIVE; - + /* clear old SP and set new SP */ key_freesp(*pcb_sp, KEY_SADB_UNLOCKED); *pcb_sp = newsp; KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("ipsec_set_policy: new policy\n"); - kdebug_secpolicy(newsp)); - - return 0; -} - -static int -ipsec_get_policy(pcb_sp, mp) - struct secpolicy *pcb_sp; - struct mbuf **mp; -{ - - - /* sanity check. */ - if (pcb_sp == NULL || mp == NULL) - return EINVAL; - - *mp = key_sp2msg(pcb_sp); - if (!*mp) { - ipseclog((LOG_DEBUG, "ipsec_get_policy: No more memory.\n")); - return ENOBUFS; - } - - m_mchtype(*mp, MT_DATA); - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("ipsec_get_policy:\n"); - kdebug_mbuf(*mp)); - + printf("ipsec_set_policy: new policy\n"); + kdebug_secpolicy(newsp)); + return 0; } int -ipsec4_set_policy(inp, optname, request, len, priv) - struct inpcb *inp; - int optname; - caddr_t request; - size_t len; - int priv; +ipsec4_set_policy(struct inpcb *inp, + int optname, + caddr_t request, + size_t len, + int priv) { struct sadb_x_policy *xpl; struct secpolicy **pcb_sp; int error = 0; struct sadb_x_policy xpl_aligned_buf; u_int8_t *xpl_unaligned; - + /* sanity check. */ if (inp == NULL || request == NULL) return EINVAL; if (len < sizeof(*xpl)) return EINVAL; xpl = (struct sadb_x_policy *)(void *)request; - + /* This is a new mbuf allocated by soopt_getm() */ if (IPSEC_IS_P2ALIGNED(xpl)) { xpl_unaligned = NULL; @@ -1651,139 +1618,83 @@ ipsec4_set_policy(inp, optname, request, len, priv) memcpy(&xpl_aligned_buf, xpl, sizeof(xpl_aligned_buf)); xpl = (__typeof__(xpl))&xpl_aligned_buf; } - + if (inp->inp_sp == NULL) { error = ipsec_init_policy(inp->inp_socket, &inp->inp_sp); if (error) return error; } - + /* select direction */ switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = &inp->inp_sp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = &inp->inp_sp->sp_out; - break; - default: - ipseclog((LOG_ERR, "ipsec4_set_policy: invalid direction=%u\n", - xpl->sadb_x_policy_dir)); - return EINVAL; + case IPSEC_DIR_INBOUND: + pcb_sp = &inp->inp_sp->sp_in; + break; + case IPSEC_DIR_OUTBOUND: + pcb_sp = &inp->inp_sp->sp_out; + break; + default: + ipseclog((LOG_ERR, "ipsec4_set_policy: invalid direction=%u\n", + xpl->sadb_x_policy_dir)); + return EINVAL; } - + /* turn bypass off */ if (ipsec_bypass != 0) ipsec_bypass = 0; - + return ipsec_set_policy(pcb_sp, optname, request, len, priv); } -int -ipsec4_get_policy(inp, request, len, mp) - struct inpcb *inp; - caddr_t request; - size_t len; - struct mbuf **mp; -{ - struct sadb_x_policy *xpl; - struct secpolicy *pcb_sp; - int error = 0; - struct sadb_x_policy xpl_aligned_buf; - u_int8_t *xpl_unaligned; - - lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); - - /* sanity check. */ - if (inp == NULL || request == NULL || mp == NULL) - return EINVAL; - if (len < sizeof(*xpl)) - return EINVAL; - xpl = (struct sadb_x_policy *)(void *)request; - - /* This is a new mbuf allocated by soopt_getm() */ - if (IPSEC_IS_P2ALIGNED(xpl)) { - xpl_unaligned = NULL; - } else { - xpl_unaligned = (__typeof__(xpl_unaligned))xpl; - memcpy(&xpl_aligned_buf, xpl, sizeof(xpl_aligned_buf)); - xpl = (__typeof__(xpl))&xpl_aligned_buf; - } - - if (inp->inp_sp == NULL) { - error = ipsec_init_policy(inp->inp_socket, &inp->inp_sp); - if (error) - return error; - } - - /* select direction */ - switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = inp->inp_sp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = inp->inp_sp->sp_out; - break; - default: - ipseclog((LOG_ERR, "ipsec4_set_policy: invalid direction=%u\n", - xpl->sadb_x_policy_dir)); - return EINVAL; - } - - return ipsec_get_policy(pcb_sp, mp); -} - /* delete policy in PCB */ int -ipsec4_delete_pcbpolicy(inp) - struct inpcb *inp; +ipsec4_delete_pcbpolicy(struct inpcb *inp) { - + /* sanity check. */ if (inp == NULL) panic("ipsec4_delete_pcbpolicy: NULL pointer was passed.\n"); - - if (inp->inp_sp == NULL) - return 0; - + + if (inp->inp_sp == NULL) + return 0; + if (inp->inp_sp->sp_in != NULL) { key_freesp(inp->inp_sp->sp_in, KEY_SADB_UNLOCKED); inp->inp_sp->sp_in = NULL; } - + if (inp->inp_sp->sp_out != NULL) { key_freesp(inp->inp_sp->sp_out, KEY_SADB_UNLOCKED); inp->inp_sp->sp_out = NULL; } - + ipsec_delpcbpolicy(inp->inp_sp); inp->inp_sp = NULL; - + return 0; } #if INET6 int -ipsec6_set_policy(in6p, optname, request, len, priv) - struct in6pcb *in6p; - int optname; - caddr_t request; - size_t len; - int priv; +ipsec6_set_policy(struct in6pcb *in6p, + int optname, + caddr_t request, + size_t len, + int priv) { struct sadb_x_policy *xpl; struct secpolicy **pcb_sp; int error = 0; struct sadb_x_policy xpl_aligned_buf; u_int8_t *xpl_unaligned; - + /* sanity check. */ if (in6p == NULL || request == NULL) return EINVAL; if (len < sizeof(*xpl)) return EINVAL; xpl = (struct sadb_x_policy *)(void *)request; - + /* This is a new mbuf allocated by soopt_getm() */ if (IPSEC_IS_P2ALIGNED(xpl)) { xpl_unaligned = NULL; @@ -1792,111 +1703,54 @@ ipsec6_set_policy(in6p, optname, request, len, priv) memcpy(&xpl_aligned_buf, xpl, sizeof(xpl_aligned_buf)); xpl = (__typeof__(xpl))&xpl_aligned_buf; } - + if (in6p->in6p_sp == NULL) { error = ipsec_init_policy(in6p->inp_socket, &in6p->in6p_sp); if (error) return error; } - + /* select direction */ switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = &in6p->in6p_sp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = &in6p->in6p_sp->sp_out; - break; - default: - ipseclog((LOG_ERR, "ipsec6_set_policy: invalid direction=%u\n", - xpl->sadb_x_policy_dir)); - return EINVAL; + case IPSEC_DIR_INBOUND: + pcb_sp = &in6p->in6p_sp->sp_in; + break; + case IPSEC_DIR_OUTBOUND: + pcb_sp = &in6p->in6p_sp->sp_out; + break; + default: + ipseclog((LOG_ERR, "ipsec6_set_policy: invalid direction=%u\n", + xpl->sadb_x_policy_dir)); + return EINVAL; } - /* turn bypass off */ - if (ipsec_bypass != 0) - ipsec_bypass = 0; - return ipsec_set_policy(pcb_sp, optname, request, len, priv); } int -ipsec6_get_policy(in6p, request, len, mp) - struct in6pcb *in6p; - caddr_t request; - size_t len; - struct mbuf **mp; -{ - struct sadb_x_policy *xpl; - struct secpolicy *pcb_sp; - int error = 0; - struct sadb_x_policy xpl_aligned_buf; - u_int8_t *xpl_unaligned; - - /* sanity check. */ - if (in6p == NULL || request == NULL || mp == NULL) - return EINVAL; - if (len < sizeof(*xpl)) - return EINVAL; - xpl = (struct sadb_x_policy *)(void *)request; - - /* This is a new mbuf allocated by soopt_getm() */ - if (IPSEC_IS_P2ALIGNED(xpl)) { - xpl_unaligned = NULL; - } else { - xpl_unaligned = (__typeof__(xpl_unaligned))xpl; - memcpy(&xpl_aligned_buf, xpl, sizeof(xpl_aligned_buf)); - xpl = (__typeof__(xpl))&xpl_aligned_buf; - } - - if (in6p->in6p_sp == NULL) { - error = ipsec_init_policy(in6p->inp_socket, &in6p->in6p_sp); - if (error) - return error; - } - - /* select direction */ - switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = in6p->in6p_sp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = in6p->in6p_sp->sp_out; - break; - default: - ipseclog((LOG_ERR, "ipsec6_set_policy: invalid direction=%u\n", - xpl->sadb_x_policy_dir)); - return EINVAL; - } - - return ipsec_get_policy(pcb_sp, mp); -} - -int -ipsec6_delete_pcbpolicy(in6p) - struct in6pcb *in6p; +ipsec6_delete_pcbpolicy(struct in6pcb *in6p) { - + /* sanity check. */ if (in6p == NULL) panic("ipsec6_delete_pcbpolicy: NULL pointer was passed.\n"); - - if (in6p->in6p_sp == NULL) - return 0; - + + if (in6p->in6p_sp == NULL) + return 0; + if (in6p->in6p_sp->sp_in != NULL) { key_freesp(in6p->in6p_sp->sp_in, KEY_SADB_UNLOCKED); in6p->in6p_sp->sp_in = NULL; } - + if (in6p->in6p_sp->sp_out != NULL) { key_freesp(in6p->in6p_sp->sp_out, KEY_SADB_UNLOCKED); in6p->in6p_sp->sp_out = NULL; } - + ipsec_delpcbpolicy(in6p->in6p_sp); in6p->in6p_sp = NULL; - + return 0; } #endif @@ -2126,7 +1980,7 @@ ipsec4_in_reject_so(m, so) if (so == NULL) sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); else - sp = ipsec4_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error); + sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_INBOUND, 0, &error); if (sp == NULL) return 0; /* XXX should be panic ? @@ -2186,7 +2040,7 @@ ipsec6_in_reject_so(m, so) if (so == NULL) sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); else - sp = ipsec6_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error); + sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_INBOUND, 0, &error); if (sp == NULL) return 0; /* XXX should be panic ? */ @@ -2322,7 +2176,7 @@ ipsec4_hdrsiz(m, dir, inp) if (inp == NULL) sp = ipsec4_getpolicybyaddr(m, dir, IP_FORWARDING, &error); else - sp = ipsec4_getpolicybysock(m, dir, inp->inp_socket, &error); + sp = ipsec4_getpolicybyaddr(m, dir, 0, &error); if (sp == NULL) return 0; /* XXX should be panic ? */ @@ -2364,7 +2218,7 @@ ipsec6_hdrsiz(m, dir, in6p) if (in6p == NULL) sp = ipsec6_getpolicybyaddr(m, dir, IP_FORWARDING, &error); else - sp = ipsec6_getpolicybysock(m, dir, in6p->in6p_socket, &error); + sp = ipsec6_getpolicybyaddr(m, dir, 0, &error); if (sp == NULL) return 0; @@ -3159,21 +3013,172 @@ ipsec_dumpmbuf(m) /* * IPsec output logic for IPv4. */ -int -ipsec4_output( - struct ipsec_output_state *state, - struct secpolicy *sp, - __unused int flags) +static int +ipsec4_output_internal(struct ipsec_output_state *state, struct secasvar *sav) { struct ip *ip = NULL; - struct ipsecrequest *isr = NULL; - struct secasindex saidx; - struct secasvar *sav = NULL; int error = 0; struct sockaddr_in *dst4; - struct sockaddr_in *sin; struct route *ro4; + /* validity check */ + if (sav == NULL || sav->sah == NULL) { + error = EINVAL; + goto bad; + } + + /* + * If there is no valid SA, we give up to process any + * more. In such a case, the SA's status is changed + * from DYING to DEAD after allocating. If a packet + * send to the receiver by dead SA, the receiver can + * not decode a packet because SA has been dead. + */ + if (sav->state != SADB_SASTATE_MATURE + && sav->state != SADB_SASTATE_DYING) { + IPSEC_STAT_INCREMENT(ipsecstat.out_nosa); + error = EINVAL; + goto bad; + } + + state->outgoing_if = sav->sah->outgoing_if; + + /* + * There may be the case that SA status will be changed when + * we are refering to one. So calling splsoftnet(). + */ + + if (sav->sah->saidx.mode == IPSEC_MODE_TUNNEL) { + /* + * build IPsec tunnel. + */ + /* XXX should be processed with other familiy */ + if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET) { + ipseclog((LOG_ERR, "ipsec4_output: " + "family mismatched between inner and outer spi=%u\n", + (u_int32_t)ntohl(sav->spi))); + error = EAFNOSUPPORT; + goto bad; + } + + state->m = ipsec4_splithdr(state->m); + if (!state->m) { + error = ENOMEM; + goto bad; + } + error = ipsec4_encapsulate(state->m, sav); + if (error) { + state->m = NULL; + goto bad; + } + ip = mtod(state->m, struct ip *); + + // grab sadb_mutex, before updating sah's route cache + lck_mtx_lock(sadb_mutex); + ro4= &sav->sah->sa_route; + dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst; + if (ro4->ro_rt != NULL) { + RT_LOCK(ro4->ro_rt); + } + if (ROUTE_UNUSABLE(ro4) || + dst4->sin_addr.s_addr != ip->ip_dst.s_addr) { + if (ro4->ro_rt != NULL) + RT_UNLOCK(ro4->ro_rt); + ROUTE_RELEASE(ro4); + } + if (ro4->ro_rt == 0) { + dst4->sin_family = AF_INET; + dst4->sin_len = sizeof(*dst4); + dst4->sin_addr = ip->ip_dst; + rtalloc(ro4); + if (ro4->ro_rt == 0) { + OSAddAtomic(1, &ipstat.ips_noroute); + error = EHOSTUNREACH; + // release sadb_mutex, after updating sah's route cache + lck_mtx_unlock(sadb_mutex); + goto bad; + } + RT_LOCK(ro4->ro_rt); + } + + /* + * adjust state->dst if tunnel endpoint is offlink + * + * XXX: caching rt_gateway value in the state is + * not really good, since it may point elsewhere + * when the gateway gets modified to a larger + * sockaddr via rt_setgate(). This is currently + * addressed by SA_SIZE roundup in that routine. + */ + if (ro4->ro_rt->rt_flags & RTF_GATEWAY) + dst4 = (struct sockaddr_in *)(void *)ro4->ro_rt->rt_gateway; + RT_UNLOCK(ro4->ro_rt); + ROUTE_RELEASE(&state->ro); + route_copyout(&state->ro, ro4, sizeof(state->ro)); + state->dst = (struct sockaddr *)dst4; + state->tunneled = 4; + // release sadb_mutex, after updating sah's route cache + lck_mtx_unlock(sadb_mutex); + } + + state->m = ipsec4_splithdr(state->m); + if (!state->m) { + error = ENOMEM; + goto bad; + } + switch (sav->sah->saidx.proto) { + case IPPROTO_ESP: +#if IPSEC_ESP + if ((error = esp4_output(state->m, sav)) != 0) { + state->m = NULL; + goto bad; + } + break; +#else + m_freem(state->m); + state->m = NULL; + error = EINVAL; + goto bad; +#endif + case IPPROTO_AH: + if ((error = ah4_output(state->m, sav)) != 0) { + state->m = NULL; + goto bad; + } + break; + case IPPROTO_IPCOMP: + if ((error = ipcomp4_output(state->m, sav)) != 0) { + state->m = NULL; + goto bad; + } + break; + default: + ipseclog((LOG_ERR, + "ipsec4_output: unknown ipsec protocol %d\n", + sav->sah->saidx.proto)); + m_freem(state->m); + state->m = NULL; + error = EINVAL; + goto bad; + } + + if (state->m == 0) { + error = ENOMEM; + goto bad; + } + + return 0; + +bad: + return error; +} + +int +ipsec4_interface_output(struct ipsec_output_state *state, ifnet_t interface) +{ + int error = 0; + struct secasvar *sav = NULL; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); if (!state) @@ -3182,26 +3187,56 @@ ipsec4_output( panic("state->m == NULL in ipsec4_output"); if (!state->dst) panic("state->dst == NULL in ipsec4_output"); + + sav = key_alloc_outbound_sav_for_interface(interface, AF_INET); + if (sav == NULL) { + goto bad; + } + + if ((error = ipsec4_output_internal(state, sav)) != 0) { + goto bad; + } + + KERNEL_DEBUG(DBG_FNC_IPSEC_OUT | DBG_FUNC_END, 0,0,0,0,0); + if (sav) + key_freesav(sav, KEY_SADB_UNLOCKED); + return 0; + +bad: + if (sav) + key_freesav(sav, KEY_SADB_UNLOCKED); + m_freem(state->m); + state->m = NULL; + KERNEL_DEBUG(DBG_FNC_IPSEC_OUT | DBG_FUNC_END, error,0,0,0,0); + return error; +} +int +ipsec4_output(struct ipsec_output_state *state, struct secpolicy *sp, __unused int flags) +{ + struct ip *ip = NULL; + struct ipsecrequest *isr = NULL; + struct secasindex saidx; + struct secasvar *sav = NULL; + int error = 0; + struct sockaddr_in *sin; + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + + if (!state) + panic("state == NULL in ipsec4_output"); + if (!state->m) + panic("state->m == NULL in ipsec4_output"); + if (!state->dst) + panic("state->dst == NULL in ipsec4_output"); + KERNEL_DEBUG(DBG_FNC_IPSEC_OUT | DBG_FUNC_START, 0,0,0,0,0); - + KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("ipsec4_output: applyed SP\n"); - kdebug_secpolicy(sp)); - + printf("ipsec4_output: applied SP\n"); + kdebug_secpolicy(sp)); + for (isr = sp->req; isr != NULL; isr = isr->next) { - -#if 0 /* give up to check restriction of transport mode */ - /* XXX but should be checked somewhere */ - /* - * some of the IPsec operation must be performed only in - * originating case. - */ - if (isr->saidx.mode == IPSEC_MODE_TRANSPORT - && (flags & IP_FORWARDING)) - continue; -#endif - /* make SA index for search proper SA */ ip = mtod(state->m, struct ip *); bcopy(&isr->saidx, &saidx, sizeof(saidx)); @@ -3213,20 +3248,20 @@ ipsec4_output( sin->sin_family = AF_INET; sin->sin_port = IPSEC_PORT_ANY; bcopy(&ip->ip_src, &sin->sin_addr, - sizeof(sin->sin_addr)); + sizeof(sin->sin_addr)); } sin = (struct sockaddr_in *)&saidx.dst; if (sin->sin_len == 0) { sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = IPSEC_PORT_ANY; - /* - * Get port from packet if upper layer is UDP and nat traversal - * is enabled and transport mode. - */ - + /* + * Get port from packet if upper layer is UDP and nat traversal + * is enabled and transport mode. + */ + if ((esp_udp_encap_port & 0xFFFF) != 0 && - isr->saidx.mode == IPSEC_MODE_TRANSPORT) { + isr->saidx.mode == IPSEC_MODE_TRANSPORT) { if (ip->ip_p == IPPROTO_UDP) { struct udphdr *udp; @@ -3239,8 +3274,7 @@ ipsec4_output( if (state->m->m_len < hlen + sizeof(struct udphdr)) { state->m = m_pullup(state->m, hlen + sizeof(struct udphdr)); if (!state->m) { - ipseclog((LOG_DEBUG, - "IPv4 output: can't pullup UDP header\n")); + ipseclog((LOG_DEBUG, "IPv4 output: can't pullup UDP header\n")); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); goto bad; } @@ -3250,11 +3284,11 @@ ipsec4_output( sin->sin_port = udp->uh_dport; } } - + bcopy(&ip->ip_dst, &sin->sin_addr, - sizeof(sin->sin_addr)); + sizeof(sin->sin_addr)); } - + if ((error = key_checkrequest(isr, &saidx, &sav)) != 0) { /* * IPsec processing is required, but no SA found. @@ -3266,163 +3300,28 @@ ipsec4_output( IPSEC_STAT_INCREMENT(ipsecstat.out_nosa); goto bad; } - + /* validity check */ if (sav == NULL) { switch (ipsec_get_reqlevel(isr)) { - case IPSEC_LEVEL_USE: - continue; - case IPSEC_LEVEL_REQUIRE: - /* must be not reached here. */ - panic("ipsec4_output: no SA found, but required."); - } - } - - /* - * If there is no valid SA, we give up to process any - * more. In such a case, the SA's status is changed - * from DYING to DEAD after allocating. If a packet - * send to the receiver by dead SA, the receiver can - * not decode a packet because SA has been dead. - */ - if (sav->state != SADB_SASTATE_MATURE - && sav->state != SADB_SASTATE_DYING) { - IPSEC_STAT_INCREMENT(ipsecstat.out_nosa); - error = EINVAL; - goto bad; - } - - /* - * There may be the case that SA status will be changed when - * we are refering to one. So calling splsoftnet(). - */ - - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { - /* - * build IPsec tunnel. - */ - /* XXX should be processed with other familiy */ - if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET) { - ipseclog((LOG_ERR, "ipsec4_output: " - "family mismatched between inner and outer spi=%u\n", - (u_int32_t)ntohl(sav->spi))); - error = EAFNOSUPPORT; - goto bad; - } - - state->m = ipsec4_splithdr(state->m); - if (!state->m) { - error = ENOMEM; - goto bad; - } - error = ipsec4_encapsulate(state->m, sav); - if (error) { - state->m = NULL; - goto bad; - } - ip = mtod(state->m, struct ip *); - - // grab sadb_mutex, before updating sah's route cache - lck_mtx_lock(sadb_mutex); - ro4= &sav->sah->sa_route; - dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst; - if (ro4->ro_rt != NULL) { - RT_LOCK(ro4->ro_rt); - } - if (ROUTE_UNUSABLE(ro4) || - dst4->sin_addr.s_addr != ip->ip_dst.s_addr) { - if (ro4->ro_rt != NULL) - RT_UNLOCK(ro4->ro_rt); - ROUTE_RELEASE(ro4); - } - if (ro4->ro_rt == 0) { - dst4->sin_family = AF_INET; - dst4->sin_len = sizeof(*dst4); - dst4->sin_addr = ip->ip_dst; - rtalloc(ro4); - if (ro4->ro_rt == 0) { - OSAddAtomic(1, &ipstat.ips_noroute); - error = EHOSTUNREACH; - // release sadb_mutex, after updating sah's route cache - lck_mtx_unlock(sadb_mutex); - goto bad; - } - RT_LOCK(ro4->ro_rt); - } - - /* - * adjust state->dst if tunnel endpoint is offlink - * - * XXX: caching rt_gateway value in the state is - * not really good, since it may point elsewhere - * when the gateway gets modified to a larger - * sockaddr via rt_setgate(). This is currently - * addressed by SA_SIZE roundup in that routine. - */ - if (ro4->ro_rt->rt_flags & RTF_GATEWAY) - dst4 = (struct sockaddr_in *)(void *)ro4->ro_rt->rt_gateway; - RT_UNLOCK(ro4->ro_rt); - ROUTE_RELEASE(&state->ro); - route_copyout(&state->ro, ro4, sizeof(state->ro)); - state->dst = (struct sockaddr *)dst4; - state->tunneled = 4; - // release sadb_mutex, after updating sah's route cache - lck_mtx_unlock(sadb_mutex); - } - - state->m = ipsec4_splithdr(state->m); - if (!state->m) { - error = ENOMEM; - goto bad; - } - switch (isr->saidx.proto) { - case IPPROTO_ESP: -#if IPSEC_ESP - if ((error = esp4_output(state->m, sav)) != 0) { - state->m = NULL; - goto bad; - } - break; -#else - m_freem(state->m); - state->m = NULL; - error = EINVAL; - goto bad; -#endif - case IPPROTO_AH: - if ((error = ah4_output(state->m, sav)) != 0) { - state->m = NULL; - goto bad; - } - break; - case IPPROTO_IPCOMP: - if ((error = ipcomp4_output(state->m, sav)) != 0) { - state->m = NULL; - goto bad; + case IPSEC_LEVEL_USE: + continue; + case IPSEC_LEVEL_REQUIRE: + /* must be not reached here. */ + panic("ipsec4_output: no SA found, but required."); } - break; - default: - ipseclog((LOG_ERR, - "ipsec4_output: unknown ipsec protocol %d\n", - isr->saidx.proto)); - m_freem(state->m); - state->m = NULL; - error = EINVAL; - goto bad; } - - if (state->m == 0) { - error = ENOMEM; + + if ((error = ipsec4_output_internal(state, sav)) != 0) { goto bad; } - ip = mtod(state->m, struct ip *); } - + KERNEL_DEBUG(DBG_FNC_IPSEC_OUT | DBG_FUNC_END, 0,0,0,0,0); if (sav) key_freesav(sav, KEY_SADB_UNLOCKED); return 0; - + bad: if (sav) key_freesav(sav, KEY_SADB_UNLOCKED); @@ -3431,29 +3330,102 @@ bad: KERNEL_DEBUG(DBG_FNC_IPSEC_OUT | DBG_FUNC_END, error,0,0,0,0); return error; } + #endif #if INET6 /* * IPsec output logic for IPv6, transport mode. */ -int -ipsec6_output_trans( +static int +ipsec6_output_trans_internal( struct ipsec_output_state *state, + struct secasvar *sav, u_char *nexthdrp, - struct mbuf *mprev, - struct secpolicy *sp, - __unused int flags, - int *tun) + struct mbuf *mprev) +{ + struct ip6_hdr *ip6; + int error = 0; + int plen; + + /* validity check */ + if (sav == NULL || sav->sah == NULL) { + error = EINVAL; + goto bad; + } + + /* + * If there is no valid SA, we give up to process. + * see same place at ipsec4_output(). + */ + if (sav->state != SADB_SASTATE_MATURE + && sav->state != SADB_SASTATE_DYING) { + IPSEC_STAT_INCREMENT(ipsec6stat.out_nosa); + error = EINVAL; + goto bad; + } + + state->outgoing_if = sav->sah->outgoing_if; + + switch (sav->sah->saidx.proto) { + case IPPROTO_ESP: +#if IPSEC_ESP + error = esp6_output(state->m, nexthdrp, mprev->m_next, sav); +#else + m_freem(state->m); + error = EINVAL; +#endif + break; + case IPPROTO_AH: + error = ah6_output(state->m, nexthdrp, mprev->m_next, sav); + break; + case IPPROTO_IPCOMP: + error = ipcomp6_output(state->m, nexthdrp, mprev->m_next, sav); + break; + default: + ipseclog((LOG_ERR, "ipsec6_output_trans: " + "unknown ipsec protocol %d\n", sav->sah->saidx.proto)); + m_freem(state->m); + IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); + error = EINVAL; + break; + } + if (error) { + state->m = NULL; + goto bad; + } + plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr); + if (plen > IPV6_MAXPACKET) { + ipseclog((LOG_ERR, "ipsec6_output_trans: " + "IPsec with IPv6 jumbogram is not supported\n")); + IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); + error = EINVAL; /*XXX*/ + goto bad; + } + ip6 = mtod(state->m, struct ip6_hdr *); + ip6->ip6_plen = htons(plen); + + return 0; +bad: + return error; +} + +int +ipsec6_output_trans( + struct ipsec_output_state *state, + u_char *nexthdrp, + struct mbuf *mprev, + struct secpolicy *sp, + __unused int flags, + int *tun) { struct ip6_hdr *ip6; struct ipsecrequest *isr = NULL; struct secasindex saidx; int error = 0; - int plen; struct sockaddr_in6 *sin6; struct secasvar *sav = NULL; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); if (!state) @@ -3468,10 +3440,10 @@ ipsec6_output_trans( panic("sp == NULL in ipsec6_output_trans"); if (!tun) panic("tun == NULL in ipsec6_output_trans"); - + KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("ipsec6_output_trans: applyed SP\n"); - kdebug_secpolicy(sp)); + printf("ipsec6_output_trans: applyed SP\n"); + kdebug_secpolicy(sp)); *tun = 0; for (isr = sp->req; isr; isr = isr->next) { @@ -3479,7 +3451,7 @@ ipsec6_output_trans( /* the rest will be handled by ipsec6_output_tunnel() */ break; } - + /* make SA index for search proper SA */ ip6 = mtod(state->m, struct ip6_hdr *); bcopy(&isr->saidx, &saidx, sizeof(saidx)); @@ -3491,7 +3463,7 @@ ipsec6_output_trans( sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; bcopy(&ip6->ip6_src, &sin6->sin6_addr, - sizeof(ip6->ip6_src)); + sizeof(ip6->ip6_src)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; @@ -3504,14 +3476,14 @@ ipsec6_output_trans( sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; bcopy(&ip6->ip6_dst, &sin6->sin6_addr, - sizeof(ip6->ip6_dst)); + sizeof(ip6->ip6_dst)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); } } - + if (key_checkrequest(isr, &saidx, &sav) == ENOENT) { /* * IPsec processing is required, but no SA found. @@ -3522,7 +3494,7 @@ ipsec6_output_trans( */ IPSEC_STAT_INCREMENT(ipsec6stat.out_nosa); error = ENOENT; - + /* * Notify the fact that the packet is discarded * to ourselves. I believe this is better than @@ -3532,91 +3504,313 @@ ipsec6_output_trans( * pfctlinputs? */ icmp6_error(state->m, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADMIN, 0); + ICMP6_DST_UNREACH_ADMIN, 0); state->m = NULL; /* icmp6_error freed the mbuf */ goto bad; } - + /* validity check */ if (sav == NULL) { switch (ipsec_get_reqlevel(isr)) { - case IPSEC_LEVEL_USE: - continue; - case IPSEC_LEVEL_REQUIRE: - /* must be not reached here. */ - panic("ipsec6_output_trans: no SA found, but required."); + case IPSEC_LEVEL_USE: + continue; + case IPSEC_LEVEL_REQUIRE: + /* must be not reached here. */ + panic("ipsec6_output_trans: no SA found, but required."); } } + + if ((error = ipsec6_output_trans_internal(state, sav, nexthdrp, mprev)) != 0) { + goto bad; + } + } + + /* if we have more to go, we need a tunnel mode processing */ + if (isr != NULL) + *tun = 1; + + if (sav) + key_freesav(sav, KEY_SADB_UNLOCKED); + return 0; + +bad: + if (sav) + key_freesav(sav, KEY_SADB_UNLOCKED); + m_freem(state->m); + state->m = NULL; + return error; +} +/* + * IPsec output logic for IPv6, tunnel mode. + */ +static int +ipsec6_output_tunnel_internal(struct ipsec_output_state *state, struct secasvar *sav, int *must_be_last) +{ + struct ip6_hdr *ip6; + int error = 0; + int plen; + struct sockaddr_in6* dst6; + struct route *ro6; + + /* validity check */ + if (sav == NULL || sav->sah == NULL || sav->sah->saidx.mode != IPSEC_MODE_TUNNEL) { + error = EINVAL; + goto bad; + } + + /* + * If there is no valid SA, we give up to process. + * see same place at ipsec4_output(). + */ + if (sav->state != SADB_SASTATE_MATURE + && sav->state != SADB_SASTATE_DYING) { + IPSEC_STAT_INCREMENT(ipsec6stat.out_nosa); + error = EINVAL; + goto bad; + } + + state->outgoing_if = sav->sah->outgoing_if; + + if (sav->sah->saidx.mode == IPSEC_MODE_TUNNEL) { /* - * If there is no valid SA, we give up to process. - * see same place at ipsec4_output(). + * build IPsec tunnel. */ - if (sav->state != SADB_SASTATE_MATURE - && sav->state != SADB_SASTATE_DYING) { - IPSEC_STAT_INCREMENT(ipsec6stat.out_nosa); - error = EINVAL; + state->m = ipsec6_splithdr(state->m); + if (!state->m) { + IPSEC_STAT_INCREMENT(ipsec6stat.out_nomem); + error = ENOMEM; + goto bad; + } + + if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family == AF_INET6) { + error = ipsec6_encapsulate(state->m, sav); + if (error) { + state->m = 0; + goto bad; + } + ip6 = mtod(state->m, struct ip6_hdr *); + } else if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family == AF_INET) { + + struct ip *ip; + struct sockaddr_in* dst4; + struct route *ro4 = NULL; + struct route ro4_copy; + struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, + IPOAF_SELECT_SRCIF, 0 }; + + if (must_be_last) + *must_be_last = 1; + + state->tunneled = 4; /* must not process any further in ip6_output */ + error = ipsec64_encapsulate(state->m, sav); + if (error) { + state->m = 0; + goto bad; + } + /* Now we have an IPv4 packet */ + ip = mtod(state->m, struct ip *); + + // grab sadb_mutex, to update sah's route cache and get a local copy of it + lck_mtx_lock(sadb_mutex); + ro4 = &sav->sah->sa_route; + dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst; + if (ro4->ro_rt) { + RT_LOCK(ro4->ro_rt); + } + if (ROUTE_UNUSABLE(ro4) || + dst4->sin_addr.s_addr != ip->ip_dst.s_addr) { + if (ro4->ro_rt != NULL) + RT_UNLOCK(ro4->ro_rt); + ROUTE_RELEASE(ro4); + } + if (ro4->ro_rt == NULL) { + dst4->sin_family = AF_INET; + dst4->sin_len = sizeof(*dst4); + dst4->sin_addr = ip->ip_dst; + } else { + RT_UNLOCK(ro4->ro_rt); + } + route_copyout(&ro4_copy, ro4, sizeof(ro4_copy)); + // release sadb_mutex, after updating sah's route cache and getting a local copy + lck_mtx_unlock(sadb_mutex); + state->m = ipsec4_splithdr(state->m); + if (!state->m) { + error = ENOMEM; + ROUTE_RELEASE(&ro4_copy); + goto bad; + } + switch (sav->sah->saidx.proto) { + case IPPROTO_ESP: +#if IPSEC_ESP + if ((error = esp4_output(state->m, sav)) != 0) { + state->m = NULL; + ROUTE_RELEASE(&ro4_copy); + goto bad; + } + break; + +#else + m_freem(state->m); + state->m = NULL; + error = EINVAL; + ROUTE_RELEASE(&ro4_copy); + goto bad; +#endif + case IPPROTO_AH: + if ((error = ah4_output(state->m, sav)) != 0) { + state->m = NULL; + ROUTE_RELEASE(&ro4_copy); + goto bad; + } + break; + case IPPROTO_IPCOMP: + if ((error = ipcomp4_output(state->m, sav)) != 0) { + state->m = NULL; + ROUTE_RELEASE(&ro4_copy); + goto bad; + } + break; + default: + ipseclog((LOG_ERR, + "ipsec4_output: unknown ipsec protocol %d\n", + sav->sah->saidx.proto)); + m_freem(state->m); + state->m = NULL; + error = EINVAL; + ROUTE_RELEASE(&ro4_copy); + goto bad; + } + + if (state->m == 0) { + error = ENOMEM; + ROUTE_RELEASE(&ro4_copy); + goto bad; + } + ipsec_set_pkthdr_for_interface(sav->sah->ipsec_if, state->m, AF_INET); + ip = mtod(state->m, struct ip *); + ip->ip_len = ntohs(ip->ip_len); /* flip len field before calling ip_output */ + error = ip_output(state->m, NULL, &ro4_copy, IP_OUTARGS, NULL, &ipoa); + state->m = NULL; + // grab sadb_mutex, to synchronize the sah's route cache with the local copy + lck_mtx_lock(sadb_mutex); + route_copyin(&ro4_copy, ro4, sizeof(ro4_copy)); + lck_mtx_unlock(sadb_mutex); + if (error != 0) + goto bad; + goto done; + } else { + ipseclog((LOG_ERR, "ipsec6_output_tunnel: " + "unsupported inner family, spi=%u\n", + (u_int32_t)ntohl(sav->spi))); + IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); + error = EAFNOSUPPORT; + goto bad; + } + + // grab sadb_mutex, before updating sah's route cache + lck_mtx_lock(sadb_mutex); + ro6 = &sav->sah->sa_route; + dst6 = (struct sockaddr_in6 *)(void *)&ro6->ro_dst; + if (ro6->ro_rt) { + RT_LOCK(ro6->ro_rt); + } + if (ROUTE_UNUSABLE(ro6) || + !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst)) { + if (ro6->ro_rt != NULL) + RT_UNLOCK(ro6->ro_rt); + ROUTE_RELEASE(ro6); + } + if (ro6->ro_rt == 0) { + bzero(dst6, sizeof(*dst6)); + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof(*dst6); + dst6->sin6_addr = ip6->ip6_dst; + rtalloc(ro6); + if (ro6->ro_rt) { + RT_LOCK(ro6->ro_rt); + } + } + if (ro6->ro_rt == 0) { + ip6stat.ip6s_noroute++; + IPSEC_STAT_INCREMENT(ipsec6stat.out_noroute); + error = EHOSTUNREACH; + // release sadb_mutex, after updating sah's route cache + lck_mtx_unlock(sadb_mutex); goto bad; } - - switch (isr->saidx.proto) { + + /* + * adjust state->dst if tunnel endpoint is offlink + * + * XXX: caching rt_gateway value in the state is + * not really good, since it may point elsewhere + * when the gateway gets modified to a larger + * sockaddr via rt_setgate(). This is currently + * addressed by SA_SIZE roundup in that routine. + */ + if (ro6->ro_rt->rt_flags & RTF_GATEWAY) + dst6 = (struct sockaddr_in6 *)(void *)ro6->ro_rt->rt_gateway; + RT_UNLOCK(ro6->ro_rt); + ROUTE_RELEASE(&state->ro); + route_copyout(&state->ro, ro6, sizeof(state->ro)); + state->dst = (struct sockaddr *)dst6; + state->tunneled = 6; + // release sadb_mutex, after updating sah's route cache + lck_mtx_unlock(sadb_mutex); + } + + state->m = ipsec6_splithdr(state->m); + if (!state->m) { + IPSEC_STAT_INCREMENT(ipsec6stat.out_nomem); + error = ENOMEM; + goto bad; + } + ip6 = mtod(state->m, struct ip6_hdr *); + switch (sav->sah->saidx.proto) { case IPPROTO_ESP: #if IPSEC_ESP - error = esp6_output(state->m, nexthdrp, mprev->m_next, sav); + error = esp6_output(state->m, &ip6->ip6_nxt, state->m->m_next, sav); #else m_freem(state->m); error = EINVAL; #endif break; case IPPROTO_AH: - error = ah6_output(state->m, nexthdrp, mprev->m_next, sav); + error = ah6_output(state->m, &ip6->ip6_nxt, state->m->m_next, sav); break; case IPPROTO_IPCOMP: - error = ipcomp6_output(state->m, nexthdrp, mprev->m_next, sav); - break; + /* XXX code should be here */ + /*FALLTHROUGH*/ default: - ipseclog((LOG_ERR, "ipsec6_output_trans: " - "unknown ipsec protocol %d\n", isr->saidx.proto)); + ipseclog((LOG_ERR, "ipsec6_output_tunnel: " + "unknown ipsec protocol %d\n", sav->sah->saidx.proto)); m_freem(state->m); IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); error = EINVAL; break; - } - if (error) { - state->m = NULL; - goto bad; - } - plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr); - if (plen > IPV6_MAXPACKET) { - ipseclog((LOG_ERR, "ipsec6_output_trans: " - "IPsec with IPv6 jumbogram is not supported\n")); - IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); - error = EINVAL; /*XXX*/ - goto bad; - } - ip6 = mtod(state->m, struct ip6_hdr *); - ip6->ip6_plen = htons(plen); } - - /* if we have more to go, we need a tunnel mode processing */ - if (isr != NULL) - *tun = 1; - - if (sav) - key_freesav(sav, KEY_SADB_UNLOCKED); + if (error) { + state->m = NULL; + goto bad; + } + plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr); + if (plen > IPV6_MAXPACKET) { + ipseclog((LOG_ERR, "ipsec6_output_tunnel: " + "IPsec with IPv6 jumbogram is not supported\n")); + IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); + error = EINVAL; /*XXX*/ + goto bad; + } + ip6 = mtod(state->m, struct ip6_hdr *); + ip6->ip6_plen = htons(plen); +done: return 0; - + bad: - if (sav) - key_freesav(sav, KEY_SADB_UNLOCKED); - m_freem(state->m); - state->m = NULL; return error; } -/* - * IPsec output logic for IPv6, tunnel mode. - */ int ipsec6_output_tunnel( struct ipsec_output_state *state, @@ -3628,9 +3822,6 @@ ipsec6_output_tunnel( struct secasindex saidx; struct secasvar *sav = NULL; int error = 0; - int plen; - struct sockaddr_in6* dst6; - struct route *ro6; lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); @@ -3730,248 +3921,22 @@ ipsec6_output_tunnel( error = EINVAL; goto bad; } - - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { - /* - * build IPsec tunnel. - */ - state->m = ipsec6_splithdr(state->m); - if (!state->m) { - IPSEC_STAT_INCREMENT(ipsec6stat.out_nomem); - error = ENOMEM; - goto bad; - } - - if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family == AF_INET6) { - error = ipsec6_encapsulate(state->m, sav); - if (error) { - state->m = 0; - goto bad; - } - ip6 = mtod(state->m, struct ip6_hdr *); - } else if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family == AF_INET) { - - struct ip *ip; - struct sockaddr_in* dst4; - struct route *ro4 = NULL; - struct route ro4_copy; - struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, - IPOAF_SELECT_SRCIF, 0 }; - - /* - * must be last isr because encapsulated IPv6 packet - * will be sent by calling ip_output - */ - if (isr->next) { - ipseclog((LOG_ERR, "ipsec6_output_tunnel: " - "IPv4 must be outer layer, spi=%u\n", - (u_int32_t)ntohl(sav->spi))); - error = EINVAL; - goto bad; - } - state->tunneled = 4; /* must not process any further in ip6_output */ - error = ipsec64_encapsulate(state->m, sav); - if (error) { - state->m = 0; - goto bad; - } - /* Now we have an IPv4 packet */ - ip = mtod(state->m, struct ip *); - - // grab sadb_mutex, to update sah's route cache and get a local copy of it - lck_mtx_lock(sadb_mutex); - ro4 = &sav->sah->sa_route; - dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst; - if (ro4->ro_rt) { - RT_LOCK(ro4->ro_rt); - } - if (ROUTE_UNUSABLE(ro4) || - dst4->sin_addr.s_addr != ip->ip_dst.s_addr) { - if (ro4->ro_rt != NULL) - RT_UNLOCK(ro4->ro_rt); - ROUTE_RELEASE(ro4); - } - if (ro4->ro_rt == NULL) { - dst4->sin_family = AF_INET; - dst4->sin_len = sizeof(*dst4); - dst4->sin_addr = ip->ip_dst; - } else { - RT_UNLOCK(ro4->ro_rt); - } - route_copyout(&ro4_copy, ro4, sizeof(ro4_copy)); - // release sadb_mutex, after updating sah's route cache and getting a local copy - lck_mtx_unlock(sadb_mutex); - state->m = ipsec4_splithdr(state->m); - if (!state->m) { - error = ENOMEM; - ROUTE_RELEASE(&ro4_copy); - goto bad; - } - switch (isr->saidx.proto) { - case IPPROTO_ESP: -#if IPSEC_ESP - if ((error = esp4_output(state->m, sav)) != 0) { - state->m = NULL; - ROUTE_RELEASE(&ro4_copy); - goto bad; - } - break; - -#else - m_freem(state->m); - state->m = NULL; - error = EINVAL; - ROUTE_RELEASE(&ro4_copy); - goto bad; -#endif - case IPPROTO_AH: - if ((error = ah4_output(state->m, sav)) != 0) { - state->m = NULL; - ROUTE_RELEASE(&ro4_copy); - goto bad; - } - break; - case IPPROTO_IPCOMP: - if ((error = ipcomp4_output(state->m, sav)) != 0) { - state->m = NULL; - ROUTE_RELEASE(&ro4_copy); - goto bad; - } - break; - default: - ipseclog((LOG_ERR, - "ipsec4_output: unknown ipsec protocol %d\n", - isr->saidx.proto)); - m_freem(state->m); - state->m = NULL; - error = EINVAL; - ROUTE_RELEASE(&ro4_copy); - goto bad; - } - if (state->m == 0) { - error = ENOMEM; - ROUTE_RELEASE(&ro4_copy); - goto bad; - } - ip = mtod(state->m, struct ip *); - ip->ip_len = ntohs(ip->ip_len); /* flip len field before calling ip_output */ - error = ip_output(state->m, NULL, &ro4_copy, IP_OUTARGS, NULL, &ipoa); - state->m = NULL; - // grab sadb_mutex, to synchronize the sah's route cache with the local copy - lck_mtx_lock(sadb_mutex); - route_copyin(&ro4_copy, ro4, sizeof(ro4_copy)); - lck_mtx_unlock(sadb_mutex); - if (error != 0) - goto bad; - goto done; - } else { - ipseclog((LOG_ERR, "ipsec6_output_tunnel: " - "unsupported inner family, spi=%u\n", - (u_int32_t)ntohl(sav->spi))); - IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); - error = EAFNOSUPPORT; - goto bad; - } - - // grab sadb_mutex, before updating sah's route cache - lck_mtx_lock(sadb_mutex); - ro6 = &sav->sah->sa_route; - dst6 = (struct sockaddr_in6 *)(void *)&ro6->ro_dst; - if (ro6->ro_rt) { - RT_LOCK(ro6->ro_rt); - } - if (ROUTE_UNUSABLE(ro6) || - !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst)) { - if (ro6->ro_rt != NULL) - RT_UNLOCK(ro6->ro_rt); - ROUTE_RELEASE(ro6); - } - if (ro6->ro_rt == 0) { - bzero(dst6, sizeof(*dst6)); - dst6->sin6_family = AF_INET6; - dst6->sin6_len = sizeof(*dst6); - dst6->sin6_addr = ip6->ip6_dst; - rtalloc(ro6); - if (ro6->ro_rt) { - RT_LOCK(ro6->ro_rt); - } - } - if (ro6->ro_rt == 0) { - ip6stat.ip6s_noroute++; - IPSEC_STAT_INCREMENT(ipsec6stat.out_noroute); - error = EHOSTUNREACH; - // release sadb_mutex, after updating sah's route cache - lck_mtx_unlock(sadb_mutex); - goto bad; - } - - /* - * adjust state->dst if tunnel endpoint is offlink - * - * XXX: caching rt_gateway value in the state is - * not really good, since it may point elsewhere - * when the gateway gets modified to a larger - * sockaddr via rt_setgate(). This is currently - * addressed by SA_SIZE roundup in that routine. - */ - if (ro6->ro_rt->rt_flags & RTF_GATEWAY) - dst6 = (struct sockaddr_in6 *)(void *)ro6->ro_rt->rt_gateway; - RT_UNLOCK(ro6->ro_rt); - ROUTE_RELEASE(&state->ro); - route_copyout(&state->ro, ro6, sizeof(state->ro)); - state->dst = (struct sockaddr *)dst6; - state->tunneled = 6; - // release sadb_mutex, after updating sah's route cache - lck_mtx_unlock(sadb_mutex); - } - - state->m = ipsec6_splithdr(state->m); - if (!state->m) { - IPSEC_STAT_INCREMENT(ipsec6stat.out_nomem); - error = ENOMEM; + int must_be_last = 0; + + if ((error = ipsec6_output_tunnel_internal(state, sav, &must_be_last)) != 0) { goto bad; } - ip6 = mtod(state->m, struct ip6_hdr *); - switch (isr->saidx.proto) { - case IPPROTO_ESP: -#if IPSEC_ESP - error = esp6_output(state->m, &ip6->ip6_nxt, state->m->m_next, sav); -#else - m_freem(state->m); - error = EINVAL; -#endif - break; - case IPPROTO_AH: - error = ah6_output(state->m, &ip6->ip6_nxt, state->m->m_next, sav); - break; - case IPPROTO_IPCOMP: - /* XXX code should be here */ - /*FALLTHROUGH*/ - default: + + if (must_be_last && isr->next) { ipseclog((LOG_ERR, "ipsec6_output_tunnel: " - "unknown ipsec protocol %d\n", isr->saidx.proto)); - m_freem(state->m); - IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); + "IPv4 must be outer layer, spi=%u\n", + (u_int32_t)ntohl(sav->spi))); error = EINVAL; - break; - } - if (error) { - state->m = NULL; - goto bad; - } - plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr); - if (plen > IPV6_MAXPACKET) { - ipseclog((LOG_ERR, "ipsec6_output_tunnel: " - "IPsec with IPv6 jumbogram is not supported\n")); - IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); - error = EINVAL; /*XXX*/ goto bad; } - ip6 = mtod(state->m, struct ip6_hdr *); - ip6->ip6_plen = htons(plen); } -done: + if (sav) key_freesav(sav, KEY_SADB_UNLOCKED); return 0; @@ -3984,6 +3949,51 @@ bad: state->m = NULL; return error; } + +int +ipsec6_interface_output(struct ipsec_output_state *state, ifnet_t interface, u_char *nexthdrp, struct mbuf *mprev) +{ + int error = 0; + struct secasvar *sav = NULL; + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + + if (!state) + panic("state == NULL in ipsec6_output"); + if (!state->m) + panic("state->m == NULL in ipsec6_output"); + if (!nexthdrp) + panic("nexthdrp == NULL in ipsec6_output"); + if (!mprev) + panic("mprev == NULL in ipsec6_output"); + + sav = key_alloc_outbound_sav_for_interface(interface, AF_INET6); + if (sav == NULL) { + goto bad; + } + + if (sav->sah && sav->sah->saidx.mode == IPSEC_MODE_TUNNEL) { + if ((error = ipsec6_output_tunnel_internal(state, sav, NULL)) != 0) { + goto bad; + } + } + else { + if ((error = ipsec6_output_trans_internal(state, sav, nexthdrp, mprev)) != 0) { + goto bad; + } + } + + if (sav) + key_freesav(sav, KEY_SADB_UNLOCKED); + return 0; + +bad: + if (sav) + key_freesav(sav, KEY_SADB_UNLOCKED); + m_freem(state->m); + state->m = NULL; + return error; +} #endif /*INET6*/ #if INET @@ -4115,8 +4125,9 @@ ipsec4_tunnel_validate(m, off, nxt0, sav, ifamily) if (bcmp(&oip->ip_dst, &sin->sin_addr, sizeof(oip->ip_dst)) != 0) return 0; - if (sav->utun_in_fn) { - // the utun SAs don't have a policy (yet). + if (sav->utun_in_fn || + sav->sah->ipsec_if != NULL) { + // the ipsec/utun interface SAs don't have a policies. if (nxt == IPPROTO_IPV4) { *ifamily = AF_INET; } else if (nxt == IPPROTO_IPV6) { @@ -4455,12 +4466,10 @@ ipsec_optaux( } int -ipsec_setsocket( - struct mbuf *m, - struct socket *so) +ipsec_setsocket(struct mbuf *m, struct socket *so) { struct ipsec_tag *tag; - + /* if so == NULL, don't insist on getting the aux mbuf */ if (so) { tag = ipsec_addaux(m); @@ -4476,8 +4485,7 @@ ipsec_setsocket( } struct socket * -ipsec_getsocket( - struct mbuf *m) +ipsec_getsocket(struct mbuf *m) { struct ipsec_tag *itag; @@ -4552,13 +4560,18 @@ ipsec_send_natt_keepalive( struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 }; struct route ro; + int keepalive_interval = natt_keepalive_interval; lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); if ((esp_udp_encap_port & 0xFFFF) == 0 || sav->remote_ike_port == 0) return FALSE; + if (sav->natt_interval != 0) { + keepalive_interval = (int)sav->natt_interval; + } + // natt timestamp may have changed... reverify - if ((natt_now - sav->natt_last_activity) < natt_keepalive_interval) return FALSE; + if ((natt_now - sav->natt_last_activity) < keepalive_interval) return FALSE; if (sav->flags & SADB_X_EXT_ESP_KEEPALIVE) return FALSE; // don't send these from the kernel @@ -4608,6 +4621,8 @@ ipsec_send_natt_keepalive( route_copyout(&ro, &sav->sah->sa_route, sizeof(ro)); lck_mtx_unlock(sadb_mutex); + + necp_mark_packet_as_keepalive(m, TRUE); error = ip_output(m, NULL, &ro, IP_OUTARGS | IP_NOIPSEC, NULL, &ipoa); @@ -4621,3 +4636,80 @@ ipsec_send_natt_keepalive( } return FALSE; } + +__private_extern__ bool +ipsec_fill_offload_frame(ifnet_t ifp, + struct secasvar *sav, + struct ipsec_offload_frame *frame, + size_t frame_data_offset) +{ + u_int8_t *data = NULL; + struct ip *ip = NULL; + struct udphdr *uh = NULL; + + if (sav == NULL || sav->sah == NULL || frame == NULL || + (ifp != NULL && ifp->if_index != sav->sah->outgoing_if) || + sav->sah->saidx.dst.ss_family != AF_INET || + !(sav->flags & SADB_X_EXT_NATT) || + !(sav->flags & SADB_X_EXT_NATT_KEEPALIVE) || + !(sav->flags & SADB_X_EXT_NATT_KEEPALIVE_OFFLOAD) || + sav->flags & SADB_X_EXT_ESP_KEEPALIVE || + (esp_udp_encap_port & 0xFFFF) == 0 || + sav->remote_ike_port == 0 || + (natt_keepalive_interval == 0 && sav->natt_interval == 0)) { + /* SA is not eligible for keepalive offload on this interface */ + return (FALSE); + } + + if (frame_data_offset + sizeof(struct udpiphdr) + 1 > IPSEC_OFFLOAD_FRAME_DATA_SIZE) { + /* Not enough room in this data frame */ + return (FALSE); + } + + data = frame->data; + ip = (__typeof__(ip))(void *)(data + frame_data_offset); + uh = (__typeof__(uh))(void *)(data + frame_data_offset + sizeof(*ip)); + + frame->length = frame_data_offset + sizeof(struct udpiphdr) + 1; + bzero(data, IPSEC_OFFLOAD_FRAME_DATA_SIZE); + + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(struct ip) >> 2; + ip->ip_off &= htons(~IP_OFFMASK); + ip->ip_off &= htons(~IP_MF); + switch (ip4_ipsec_dfbit) { + case 0: /* clear DF bit */ + ip->ip_off &= htons(~IP_DF); + break; + case 1: /* set DF bit */ + ip->ip_off |= htons(IP_DF); + break; + default: /* copy DF bit */ + break; + } + ip->ip_len = htons(sizeof(struct udpiphdr) + 1); + ip->ip_id = ip_randomid(); + ip->ip_ttl = ip_defttl; + ip->ip_p = IPPROTO_UDP; + ip->ip_sum = 0; + if (sav->sah->dir != IPSEC_DIR_INBOUND) { + ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr; + ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr; + } else { + ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr; + ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr; + } + ip->ip_sum = in_cksum_hdr_opt(ip); + uh->uh_sport = htons((u_short)esp_udp_encap_port); + uh->uh_dport = htons(sav->remote_ike_port); + uh->uh_ulen = htons(1 + sizeof(*uh)); + uh->uh_sum = 0; + *(u_int8_t*)(data + frame_data_offset + sizeof(*ip) + sizeof(*uh)) = 0xFF; + + if (sav->natt_interval != 0) { + frame->interval = sav->natt_interval; + } else { + frame->interval = natt_keepalive_interval; + } + return (TRUE); +} diff --git a/bsd/netinet6/ipsec.h b/bsd/netinet6/ipsec.h index 75234b206..9c452d26f 100644 --- a/bsd/netinet6/ipsec.h +++ b/bsd/netinet6/ipsec.h @@ -294,10 +294,11 @@ struct ipsecstat { #define IPSEC_GET_P2UNALIGNED_OFS(p) 0 struct ipsec_output_state { - int tunneled; + int tunneled; struct mbuf *m; struct route ro; struct sockaddr *dst; + u_int outgoing_if; }; struct ipsec_history { @@ -328,15 +329,15 @@ extern struct secpolicy *ipsec4_getpolicybyaddr(struct mbuf *, u_int, int, extern int ipsec4_getpolicybyinterface(struct mbuf *, u_int, int *, struct ip_out_args *, struct secpolicy **); +extern u_int ipsec_get_reqlevel(struct ipsecrequest *); + struct inpcb; extern int ipsec_init_policy(struct socket *so, struct inpcbpolicy **); extern int ipsec_copy_policy(struct inpcbpolicy *, struct inpcbpolicy *); extern u_int ipsec_get_reqlevel(struct ipsecrequest *); extern int ipsec4_set_policy(struct inpcb *inp, int optname, - caddr_t request, size_t len, int priv); -extern int ipsec4_get_policy(struct inpcb *inpcb, caddr_t request, - size_t len, struct mbuf **mp); + caddr_t request, size_t len, int priv); extern int ipsec4_delete_pcbpolicy(struct inpcb *); extern int ipsec4_in_reject_so(struct mbuf *, struct socket *); extern int ipsec4_in_reject(struct mbuf *, struct inpcb *); @@ -356,6 +357,7 @@ extern const char *ipsec_logsastr(struct secasvar *); extern void ipsec_dumpmbuf(struct mbuf *); +extern int ipsec4_interface_output(struct ipsec_output_state *state, ifnet_t interface); extern int ipsec4_output(struct ipsec_output_state *, struct secpolicy *, int); #if INET extern struct mbuf * ipsec4_splithdr(struct mbuf *); diff --git a/bsd/netinet6/ipsec6.h b/bsd/netinet6/ipsec6.h index 66775a8e9..b5b065526 100644 --- a/bsd/netinet6/ipsec6.h +++ b/bsd/netinet6/ipsec6.h @@ -66,8 +66,6 @@ extern int ipsec6_in_reject_so(struct mbuf *, struct socket *); extern int ipsec6_delete_pcbpolicy(struct inpcb *); extern int ipsec6_set_policy(struct inpcb *inp, int optname, caddr_t request, size_t len, int priv); -extern int ipsec6_get_policy(struct inpcb *inp, caddr_t request, size_t len, - struct mbuf **mp); extern int ipsec6_in_reject(struct mbuf *, struct inpcb *); struct tcp6cb; @@ -77,6 +75,7 @@ extern size_t ipsec6_hdrsiz(struct mbuf *, u_int, struct inpcb *); struct ip6_hdr; extern const char *ipsec6_logpacketstr(struct ip6_hdr *, u_int32_t); +extern int ipsec6_interface_output(struct ipsec_output_state *, ifnet_t, u_char *, struct mbuf *); extern int ipsec6_output_trans(struct ipsec_output_state *, u_char *, struct mbuf *, struct secpolicy *, int, int *); extern int ipsec6_output_tunnel(struct ipsec_output_state *, diff --git a/bsd/netinet6/mld6.c b/bsd/netinet6/mld6.c index 38c46dd72..228767199 100644 --- a/bsd/netinet6/mld6.c +++ b/bsd/netinet6/mld6.c @@ -1603,6 +1603,10 @@ mld_timeout(void *arg) interface_timers_running6 = 0; LIST_FOREACH(mli, &mli_head, mli_link) { MLI_LOCK(mli); + if (mli->mli_version != MLD_VERSION_2) { + MLI_UNLOCK(mli); + continue; + } if (mli->mli_v2_timer == 0) { /* Do nothing. */ } else if (--mli->mli_v2_timer == 0) { @@ -2487,7 +2491,10 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli, MLI_UNLOCK(mli); retval *= -1; goto done; + } else { + retval = 0; } + /* * If record(s) were enqueued, start the state-change * report timer for this group. @@ -3453,11 +3460,7 @@ mld_dispatch_packet(struct mbuf *m) } im6o->im6o_multicast_hlim = 1; -#if MROUTING - im6o->im6o_multicast_loop = (ip6_mrouter != NULL); -#else im6o->im6o_multicast_loop = 0; -#endif im6o->im6o_multicast_ifp = ifp; if (m->m_flags & M_MLDV1) { diff --git a/bsd/netinet6/nd6.c b/bsd/netinet6/nd6.c index 3935be28d..15faf1316 100644 --- a/bsd/netinet6/nd6.c +++ b/bsd/netinet6/nd6.c @@ -132,7 +132,7 @@ int nd6_debug = 0; int nd6_optimistic_dad = (ND6_OPTIMISTIC_DAD_LINKLOCAL|ND6_OPTIMISTIC_DAD_AUTOCONF| ND6_OPTIMISTIC_DAD_TEMPORARY|ND6_OPTIMISTIC_DAD_DYNAMIC| - ND6_OPTIMISTIC_DAD_SECURED); + ND6_OPTIMISTIC_DAD_SECURED|ND6_OPTIMISTIC_DAD_MANUAL); /* for debugging? */ static int nd6_inuse, nd6_allocated; @@ -260,11 +260,11 @@ static int nd6_init_done; SYSCTL_DECL(_net_inet6_icmp6); SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, - CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, nd6_sysctl_drlist, "S,in6_defrouter", ""); SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, - CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, nd6_sysctl_prlist, "S,in6_defrouter", ""); void @@ -502,12 +502,12 @@ nd6_ifattach(struct ifnet *ifp) ndi = &nd_ifinfo[ifp->if_index]; if (!ndi->initialized) { lck_mtx_init(&ndi->lock, nd_if_lock_grp, nd_if_lock_attr); + ndi->flags = ND6_IFF_PERFORMNUD; ndi->initialized = TRUE; } lck_mtx_lock(&ndi->lock); - ndi->flags = ND6_IFF_PERFORMNUD; if (!(ifp->if_flags & IFF_MULTICAST)) ndi->flags |= ND6_IFF_IFDISABLED; @@ -1197,7 +1197,9 @@ addrloop: * prefix is not necessary. */ NDPR_LOCK(pr); - if (pr->ndpr_stateflags & NDPRF_PROCESSED_SERVICE) { + if (pr->ndpr_stateflags & NDPRF_PROCESSED_SERVICE || + pr->ndpr_stateflags & NDPRF_DEFUNCT) { + pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE; NDPR_UNLOCK(pr); pr = pr->ndpr_next; continue; @@ -1211,8 +1213,8 @@ addrloop: NDPR_ADDREF_LOCKED(pr); prelist_remove(pr); NDPR_UNLOCK(pr); - pfxlist_onlink_check(); NDPR_REMREF(pr); + pfxlist_onlink_check(); pr = nd_prefix.lh_first; ap->killed++; } else { @@ -1270,6 +1272,7 @@ static void nd6_timeout(void *arg) { struct nd6svc_arg sarg; + uint32_t buf; lck_mtx_lock(rnh_lock); bzero(&sarg, sizeof (sarg)); @@ -1295,7 +1298,8 @@ nd6_timeout(void *arg) atv.tv_usec = 0; atv.tv_sec = MAX(nd6_prune, lazy); ltv.tv_usec = 0; - ltv.tv_sec = MAX(random() % lazy, 1) * 2; + read_frandom(&buf, sizeof(buf)); + ltv.tv_sec = MAX(buf % lazy, 1) * 2; leeway = <v; } nd6_sched_timeout(&atv, leeway); @@ -1516,6 +1520,7 @@ nd6_purge(struct ifnet *ifp) struct llinfo_nd6 *ln; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; + boolean_t removed; /* Nuke default router list entries toward ifp */ lck_mtx_lock(nd6_mutex); @@ -1546,10 +1551,12 @@ nd6_purge(struct ifnet *ifp) } /* Nuke prefix list entries toward ifp */ + removed = FALSE; for (pr = nd_prefix.lh_first; pr; pr = npr) { - npr = pr->ndpr_next; NDPR_LOCK(pr); - if (pr->ndpr_ifp == ifp) { + npr = pr->ndpr_next; + if (pr->ndpr_ifp == ifp && + !(pr->ndpr_stateflags & NDPRF_DEFUNCT)) { /* * Because if_detach() does *not* release prefixes * while purging addresses the reference count will @@ -1569,12 +1576,15 @@ nd6_purge(struct ifnet *ifp) NDPR_ADDREF_LOCKED(pr); prelist_remove(pr); NDPR_UNLOCK(pr); - pfxlist_onlink_check(); NDPR_REMREF(pr); + removed = TRUE; + npr = nd_prefix.lh_first; } else { NDPR_UNLOCK(pr); } } + if (removed) + pfxlist_onlink_check(); lck_mtx_unlock(nd6_mutex); /* cancel default outgoing interface setting */ diff --git a/bsd/netinet6/nd6.h b/bsd/netinet6/nd6.h index 7996b090d..0b1c36bff 100644 --- a/bsd/netinet6/nd6.h +++ b/bsd/netinet6/nd6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -198,6 +198,7 @@ struct nd_ifinfo { #if defined(PRIVATE) #define ND6_IFF_INSECURE 0x80 #endif +#define ND6_IFF_REPLICATED 0x100 /* sleep proxy registered */ struct in6_nbrinfo { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ @@ -457,6 +458,7 @@ struct in6_ndifreq_64 { #ifdef BSD_KERNEL_PRIVATE #define NDPRF_PROCESSED_ONLINK 0x08000 #define NDPRF_PROCESSED_SERVICE 0x10000 +#define NDPRF_DEFUNCT 0x20000 #endif /* protocol constants */ @@ -480,7 +482,9 @@ extern lck_rw_t *nd_if_rwlock; /* * In a more readable form, we derive linkmtu based on: * - * if (ND_IFINFO(ifp) == NULL || !ND_IFINFO(ifp)->initialized) + * if (ifp == NULL) + * linkmtu = IPV6_MMTU + * else if (ND_IFINFO(ifp) == NULL || !ND_IFINFO(ifp)->initialized) * linkmtu = ifp->if_mtu; * else if (ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < ifp->if_mtu) * linkmtu = ND_IFINFO(ifp)->linkmtu; @@ -490,7 +494,8 @@ extern lck_rw_t *nd_if_rwlock; * linkmtu = ifp->if_mtu; */ #define IN6_LINKMTU(ifp) \ - ((ND_IFINFO(ifp) == NULL || !ND_IFINFO(ifp)->initialized) ? \ + (ifp == NULL ? IPV6_MMTU : \ + (ND_IFINFO(ifp) == NULL || !ND_IFINFO(ifp)->initialized) ? \ (ifp)->if_mtu : ((ND_IFINFO(ifp)->linkmtu && \ ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) ? ND_IFINFO(ifp)->linkmtu : \ ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) ? \ @@ -749,6 +754,7 @@ extern int nd6_optimistic_dad; #define ND6_OPTIMISTIC_DAD_TEMPORARY (1 << 2) #define ND6_OPTIMISTIC_DAD_DYNAMIC (1 << 3) #define ND6_OPTIMISTIC_DAD_SECURED (1 << 4) +#define ND6_OPTIMISTIC_DAD_MANUAL (1 << 5) /* nd6_rtr.c */ extern int nd6_defifindex; diff --git a/bsd/netinet6/nd6_nbr.c b/bsd/netinet6/nd6_nbr.c index fa8125a07..5de3a3b4f 100644 --- a/bsd/netinet6/nd6_nbr.c +++ b/bsd/netinet6/nd6_nbr.c @@ -95,7 +95,6 @@ #if INET6 #include #endif -extern int ipsec_bypass; #endif struct dadq; @@ -103,7 +102,7 @@ static struct dadq *nd6_dad_find(struct ifaddr *); void nd6_dad_stoptimer(struct ifaddr *); static void nd6_dad_timer(struct ifaddr *); static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); -static void nd6_dad_ns_input(struct mbuf *, struct ifaddr *); +static void nd6_dad_ns_input(struct mbuf *, struct ifaddr *, char *, int); static struct mbuf *nd6_dad_na_input(struct mbuf *, struct ifnet *, struct in6_addr *, caddr_t, int); static void dad_addref(struct dadq *, int); @@ -273,16 +272,8 @@ nd6_ns_input( /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len, return); nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off); -#else - IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len); - if (nd_ns == NULL) { - icmp6stat.icp6s_tooshort++; - return; - } -#endif m->m_pkthdr.pkt_flags |= PKTF_INET6_RESOLVE; ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */ @@ -482,7 +473,7 @@ nd6_ns_input( * silently ignore it. */ if (is_dad_probe) - nd6_dad_ns_input(m, ifa); + nd6_dad_ns_input(m, ifa, lladdr, lladdrlen); goto freeit; } @@ -712,7 +703,14 @@ nd6_ns_output( IFA_REMREF(&ia->ia_ifa); ia = NULL; } - + /* + * RFC 4429 section 3.2: + * When a node has a unicast packet to send + * from an Optimistic Address to a neighbor, + * but does not know the neighbor's link-layer + * address, it MUST NOT perform Address + * Resolution. + */ ia = in6ifa_ifpwithaddr(ifp, src); if (!ia || (ia->ia6_flags & IN6_IFF_OPTIMISTIC)) { nd6log((LOG_DEBUG, @@ -774,11 +772,6 @@ nd6_ns_output( nd_ns->nd_ns_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len); -#if IPSEC - /* Don't lookup socket */ - if (ipsec_bypass == 0) - (void) ipsec_setsocket(m, NULL); -#endif flags = dad ? IPV6_UNSPECSRC : 0; flags |= IPV6_OUTARGS; @@ -874,16 +867,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) goto bad; } -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len, return); nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off); -#else - IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len); - if (nd_na == NULL) { - icmp6stat.icp6s_tooshort++; - return; - } -#endif m->m_pkthdr.pkt_flags |= PKTF_INET6_RESOLVE; flags = nd_na->nd_na_flags_reserved; @@ -1395,11 +1380,6 @@ nd6_na_output( nd_na->nd_na_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len); -#if IPSEC - /* Don't lookup socket */ - if (ipsec_bypass == 0) - (void) ipsec_setsocket(m, NULL); -#endif m->m_pkthdr.pkt_flags |= PKTF_INET6_RESOLVE; if (ifp->if_eflags & IFEF_TXSTART) { @@ -1469,6 +1449,7 @@ struct dadq { int dad_ns_icount; int dad_na_icount; int dad_nd_ixcount; /* Count of IFDISABLED eligible ND rx'd */ + uint8_t dad_ehsrc[ETHER_ADDR_LEN]; }; static struct dadq_head dadq; @@ -1535,6 +1516,12 @@ nd6_dad_start( struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; + nd6log2((LOG_DEBUG, "%s - %s ifp %s ia6_flags 0x%x\n", + __func__, + ip6_sprintf(&ia->ia_addr.sin6_addr), + if_name(ia->ia_ifp), + ia->ia6_flags)); + /* * If we don't need DAD, don't do it. * There are several cases: @@ -1585,7 +1572,7 @@ nd6_dad_start( nd6log((LOG_DEBUG, "%s: starting %sDAD for %s\n", if_name(ifa->ifa_ifp), - (ia->ia_flags & IN6_IFF_OPTIMISTIC) ? "optimistic " : "", + (ia->ia6_flags & IN6_IFF_OPTIMISTIC) ? "optimistic " : "", ip6_sprintf(&ia->ia_addr.sin6_addr))); /* @@ -1685,7 +1672,6 @@ nd6_dad_stop(struct ifaddr *ifa) DAD_REMREF(dp); /* drop our reference */ } - static void nd6_unsol_na_output(struct ifaddr *ifa) { @@ -1718,12 +1704,20 @@ nd6_dad_timer(struct ifaddr *ifa) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp = NULL; + struct nd_ifinfo *ndi; /* Sanity check */ if (ia == NULL) { log(LOG_ERR, "nd6_dad_timer: called with null parameter\n"); goto done; } + + nd6log2((LOG_DEBUG, "%s - %s ifp %s ia6_flags 0x%x\n", + __func__, + ip6_sprintf(&ia->ia_addr.sin6_addr), + if_name(ia->ia_ifp), + ia->ia6_flags)); + dp = nd6_dad_find(ifa); if (dp == NULL) { log(LOG_ERR, "nd6_dad_timer: DAD structure not found\n"); @@ -1762,7 +1756,6 @@ nd6_dad_timer(struct ifaddr *ifa) /* Need more checks? */ if (dp->dad_ns_ocount < dp->dad_count) { u_int32_t retrans; - struct nd_ifinfo *ndi; DAD_UNLOCK(dp); /* @@ -1810,6 +1803,8 @@ nd6_dad_timer(struct ifaddr *ifa) nd6_dad_duplicated(ifa); /* (*dp) will be freed in nd6_dad_duplicated() */ } else { + boolean_t txunsolna; + /* * We are done with DAD. No NA came, no NS came. * No duplicate address found. @@ -1818,17 +1813,25 @@ nd6_dad_timer(struct ifaddr *ifa) ia->ia6_flags &= ~IN6_IFF_DADPROGRESS; IFA_UNLOCK(&ia->ia_ifa); + lck_rw_lock_shared(nd_if_rwlock); + ndi = ND_IFINFO(ifa->ifa_ifp); + VERIFY(ndi != NULL && ndi->initialized); + lck_mtx_lock(&ndi->lock); + txunsolna = (ndi->flags & ND6_IFF_REPLICATED) != 0; + lck_mtx_unlock(&ndi->lock); + lck_rw_done(nd_if_rwlock); + + if (txunsolna) { + nd6_unsol_na_output(ifa); + } + nd6log((LOG_DEBUG, - "%s: DAD complete for %s - no duplicates found\n", + "%s: DAD complete for %s - no duplicates found%s\n", if_name(ifa->ifa_ifp), - ip6_sprintf(&ia->ia_addr.sin6_addr))); - /* - * Send an Unsolicited Neighbor Advertisement so that - * other machines on the network are aware of us - * (important when we are waking from sleep). - */ - nd6_unsol_na_output(ifa); - in6_post_msg(ia->ia_ifp, KEV_INET6_NEW_USER_ADDR, ia); + ip6_sprintf(&ia->ia_addr.sin6_addr), + txunsolna ? ", tx unsolicited NA with O=1" : ".")); + in6_post_msg(ia->ia_ifp, KEV_INET6_NEW_USER_ADDR, ia, + dp->dad_ehsrc); nd6_dad_detach(dp, ifa); } } @@ -1890,7 +1893,7 @@ nd6_dad_duplicated(struct ifaddr *ifa) * duplicate address will be notified to the user and will * be removed. */ - in6_post_msg(ifp, KEV_INET6_NEW_USER_ADDR, ia); + in6_post_msg(ifp, KEV_INET6_NEW_USER_ADDR, ia, dp->dad_ehsrc); nd6_dad_detach(dp, ifa); DAD_REMREF(dp); /* drop our reference */ } @@ -1922,22 +1925,21 @@ nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa) } static void -nd6_dad_ns_input(struct mbuf *m, struct ifaddr *ifa) +nd6_dad_ns_input(struct mbuf *m, struct ifaddr *ifa, char *lladdr, + int lladdrlen) { struct dadq *dp; struct in6_ifaddr *ia; boolean_t candisable, dadstarted; + struct ip6aux *ip6a; VERIFY(ifa != NULL); candisable = FALSE; IFA_LOCK(ifa); ia = (struct in6_ifaddr *) ifa; if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { - struct ip6aux *ip6a; - - candisable = TRUE; ip6a = ip6_findaux(m); - + candisable = TRUE; if (ip6a && (ip6a->ip6a_flags & IP6A_HASEEN) != 0) { struct in6_addr in6 = ia->ia_addr.sin6_addr; @@ -1988,6 +1990,8 @@ nd6_dad_ns_input(struct mbuf *m, struct ifaddr *ifa) ++dp->dad_nd_ixcount; if (dp->dad_ns_ocount > 0) dadstarted = TRUE; + if (lladdr && lladdrlen >= ETHER_ADDR_LEN) + memcpy(dp->dad_ehsrc, lladdr, ETHER_ADDR_LEN); DAD_UNLOCK(dp); DAD_REMREF(dp); dp = NULL; @@ -2013,27 +2017,27 @@ nd6_dad_na_input(struct mbuf *m, struct ifnet *ifp, struct in6_addr *taddr, struct in6_ifaddr *ia; struct dadq *dp; struct nd_ifinfo *ndi; - boolean_t candisable, ignoring; + boolean_t candisable, replicated; ifa = (struct ifaddr *) in6ifa_ifpwithaddr(ifp, taddr); if (ifa == NULL) return m; candisable = FALSE; - ignoring = FALSE; + replicated = FALSE; - /* The ND6_IFF_IGNORE_NA flag is here for legacy reasons. */ + /* Get the ND6_IFF_REPLICATED flag. */ lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); if (ndi != NULL && ndi->initialized) { lck_mtx_lock(&ndi->lock); - ignoring = !!(ndi->flags & ND6_IFF_IGNORE_NA); + replicated = !!(ndi->flags & ND6_IFF_REPLICATED); lck_mtx_unlock(&ndi->lock); } lck_rw_done(nd_if_rwlock); - if (ignoring) { + if (replicated) { nd6log((LOG_INFO, "%s: ignoring duplicate NA on " - "%s [ND6_IFF_IGNORE_NA]\n", __func__, if_name(ifp))); + "replicated interface %s\n", __func__, if_name(ifp))); goto done; } @@ -2050,7 +2054,6 @@ nd6_dad_na_input(struct mbuf *m, struct ifnet *ifp, struct in6_addr *taddr, * hardware address is not also ours, which is a transitory possibility * in the presence of network-resident sleep proxies on the local link. */ - if (!(ia->ia6_flags & IN6_IFF_DADPROGRESS)) { IFA_UNLOCK(ifa); nd6log((LOG_INFO, "%s: ignoring duplicate NA on " @@ -2064,7 +2067,7 @@ nd6_dad_na_input(struct mbuf *m, struct ifnet *ifp, struct in6_addr *taddr, * the L2-header source address, if we have seen it, with the target * address, and ignoring the NA if they don't match. */ - if (lladdr != NULL && lladdrlen == ETHER_ADDR_LEN) { + if (lladdr != NULL && lladdrlen >= ETHER_ADDR_LEN) { struct ip6aux *ip6a = ip6_findaux(m); if (ip6a && (ip6a->ip6a_flags & IP6A_HASEEN) != 0 && bcmp(ip6a->ip6a_ehsrc, lladdr, ETHER_ADDR_LEN) != 0) { @@ -2146,6 +2149,8 @@ nd6_dad_na_input(struct mbuf *m, struct ifnet *ifp, struct in6_addr *taddr, } DAD_LOCK_SPIN(dp); + if (lladdr != NULL && lladdrlen >= ETHER_ADDR_LEN) + memcpy(dp->dad_ehsrc, lladdr, ETHER_ADDR_LEN); dp->dad_na_icount++; if (candisable) dp->dad_nd_ixcount++; diff --git a/bsd/netinet6/nd6_prproxy.c b/bsd/netinet6/nd6_prproxy.c index f4935bd9a..b0898905c 100644 --- a/bsd/netinet6/nd6_prproxy.c +++ b/bsd/netinet6/nd6_prproxy.c @@ -290,13 +290,16 @@ nd6_prproxy_prelist_setroute(boolean_t enable, SLIST_FOREACH_SAFE(up, up_head, ndprl_le, ndprl_tmp) { struct rtentry *rt; - boolean_t prproxy; + boolean_t prproxy, set_allmulti = FALSE; + int allmulti_sw; + struct ifnet *ifp = NULL; SLIST_REMOVE(up_head, up, nd6_prproxy_prelist, ndprl_le); pr = up->ndprl_pr; VERIFY(up->ndprl_up == NULL); NDPR_LOCK(pr); + ifp = pr->ndpr_ifp; prproxy = (pr->ndpr_stateflags & NDPRF_PRPROXY); VERIFY(!prproxy || ((pr->ndpr_stateflags & NDPRF_ONLINK) && !(pr->ndpr_stateflags & NDPRF_IFSCOPE))); @@ -308,11 +311,13 @@ nd6_prproxy_prelist_setroute(boolean_t enable, if (enable && pr->ndpr_allmulti_cnt == 0) { nd6_prproxy++; pr->ndpr_allmulti_cnt++; - if_allmulti(pr->ndpr_ifp, TRUE); + set_allmulti = TRUE; + allmulti_sw = TRUE; } else if (!enable && pr->ndpr_allmulti_cnt > 0) { nd6_prproxy--; pr->ndpr_allmulti_cnt--; - if_allmulti(pr->ndpr_ifp, FALSE); + set_allmulti = TRUE; + allmulti_sw = FALSE; } if ((rt = pr->ndpr_rt) != NULL) { @@ -324,6 +329,12 @@ nd6_prproxy_prelist_setroute(boolean_t enable, } else { NDPR_UNLOCK(pr); } + + /* Call the following ioctl after releasing NDPR lock */ + if (set_allmulti && ifp != NULL) + if_allmulti(ifp, allmulti_sw); + + NDPR_REMREF(pr); if (rt != NULL) { rt_set_proxy(rt, enable); @@ -335,7 +346,9 @@ nd6_prproxy_prelist_setroute(boolean_t enable, SLIST_FOREACH_SAFE(down, down_head, ndprl_le, ndprl_tmp) { struct nd_prefix *pr_up; struct rtentry *rt; - boolean_t prproxy; + boolean_t prproxy, set_allmulti = FALSE; + int allmulti_sw; + struct ifnet *ifp = NULL; SLIST_REMOVE(down_head, down, nd6_prproxy_prelist, ndprl_le); pr = down->ndprl_pr; @@ -343,6 +356,7 @@ nd6_prproxy_prelist_setroute(boolean_t enable, VERIFY(pr_up != NULL); NDPR_LOCK(pr_up); + ifp = pr->ndpr_ifp; prproxy = (pr_up->ndpr_stateflags & NDPRF_PRPROXY); VERIFY(!prproxy || ((pr_up->ndpr_stateflags & NDPRF_ONLINK) && !(pr_up->ndpr_stateflags & NDPRF_IFSCOPE))); @@ -351,10 +365,12 @@ nd6_prproxy_prelist_setroute(boolean_t enable, NDPR_LOCK(pr); if (enable && pr->ndpr_allmulti_cnt == 0) { pr->ndpr_allmulti_cnt++; - if_allmulti(pr->ndpr_ifp, TRUE); + set_allmulti = TRUE; + allmulti_sw = TRUE; } else if (!enable && pr->ndpr_allmulti_cnt > 0) { pr->ndpr_allmulti_cnt--; - if_allmulti(pr->ndpr_ifp, FALSE); + set_allmulti = TRUE; + allmulti_sw = FALSE; } if ((rt = pr->ndpr_rt) != NULL) { @@ -366,6 +382,9 @@ nd6_prproxy_prelist_setroute(boolean_t enable, } else { NDPR_UNLOCK(pr); } + if (set_allmulti && ifp != NULL) + if_allmulti(ifp, allmulti_sw); + NDPR_REMREF(pr); NDPR_REMREF(pr_up); if (rt != NULL) { diff --git a/bsd/netinet6/nd6_rtr.c b/bsd/netinet6/nd6_rtr.c index b4f2cf456..ceb0f7d9c 100644 --- a/bsd/netinet6/nd6_rtr.c +++ b/bsd/netinet6/nd6_rtr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2013 Apple Inc. All rights reserved. + * Copyright (c) 2003-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,7 +72,7 @@ #include -#include +#include #include #include @@ -316,17 +316,8 @@ nd6_rs_input( } } -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len, return); nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off); -#else - IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len); - if (nd_rs == NULL) { - icmp6stat.icp6s_tooshort++; - return; - } -#endif - icmp6len -= sizeof (*nd_rs); nd6_option_init(nd_rs + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { @@ -430,16 +421,8 @@ nd6_ra_input( goto bad; } -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len, return); nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off); -#else - IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len); - if (nd_ra == NULL) { - icmp6stat.icp6s_tooshort++; - return; - } -#endif icmp6len -= sizeof (*nd_ra); nd6_option_init(nd_ra + 1, icmp6len, &ndopts); @@ -1506,12 +1489,16 @@ defrouter_select(struct ifnet *ifp) ++update; /* - * If the installed router is no longe reachable, remove + * If the installed router is no longer reachable, remove * it and install the selected router instead. */ - if (installed_dr != NULL && selected_dr != NULL && - installed_dr != selected_dr && found_installedrt == FALSE) { - installed_dr0 = installed_dr; /* skip it below */ + if (installed_dr != NULL + && selected_dr != NULL + && installed_dr != selected_dr + && found_installedrt == FALSE + && installed_dr->ifp == selected_dr->ifp) { + /* skip it below */ + installed_dr0 = installed_dr; /* NB: we previousled referenced installed_dr */ installed_dr = NULL; selected_dr->genid = -1; @@ -2006,14 +1993,15 @@ purge_detached(struct ifnet *ifp) struct nd_prefix *pr, *pr_next; struct in6_ifaddr *ia; struct ifaddr *ifa, *ifa_next; + boolean_t removed = FALSE; lck_mtx_lock(nd6_mutex); pr = nd_prefix.lh_first; repeat: while (pr) { - pr_next = pr->ndpr_next; NDPR_LOCK(pr); + pr_next = pr->ndpr_next; if (pr->ndpr_ifp != ifp || IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) || ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && @@ -2022,11 +2010,12 @@ repeat: pr = pr_next; continue; } + NDPR_ADDREF_LOCKED(pr); NDPR_UNLOCK(pr); ifnet_lock_shared(ifp); for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa_next) { - ifa_next = ifa->ifa_list.tqe_next; IFA_LOCK(ifa); + ifa_next = ifa->ifa_list.tqe_next; if (ifa->ifa_addr->sa_family != AF_INET6) { IFA_UNLOCK(ifa); continue; @@ -2046,6 +2035,7 @@ repeat: in6_purgeaddr(ifa); IFA_REMREF(ifa); /* drop ours */ lck_mtx_lock(nd6_mutex); + NDPR_REMREF(pr); pr = nd_prefix.lh_first; goto repeat; } @@ -2053,18 +2043,25 @@ repeat: } ifnet_lock_done(ifp); NDPR_LOCK(pr); - if (pr->ndpr_addrcnt == 0) { - NDPR_ADDREF_LOCKED(pr); + if (pr->ndpr_addrcnt == 0 && + !(pr->ndpr_stateflags & NDPRF_DEFUNCT)) { prelist_remove(pr); NDPR_UNLOCK(pr); - pfxlist_onlink_check(); - NDPR_REMREF(pr); + removed = TRUE; + /* + * Reset the search from the beginning because + * nd6_mutex may have been dropped in + * prelist_remove(). + */ + pr_next = nd_prefix.lh_first; } else { NDPR_UNLOCK(pr); } + NDPR_REMREF(pr); pr = pr_next; } - + if (removed) + pfxlist_onlink_check(); lck_mtx_unlock(nd6_mutex); } @@ -2203,6 +2200,18 @@ prelist_remove(struct nd_prefix *pr) lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); NDPR_LOCK_ASSERT_HELD(pr); + if (pr->ndpr_stateflags & NDPRF_DEFUNCT) + return; + + /* + * If there are no more addresses, defunct the prefix. This is needed + * because we don't want multiple threads calling prelist_remove() for + * the same prefix and this might happen because we unlock nd6_mutex + * down below. + */ + if (pr->ndpr_addrcnt == 0) + pr->ndpr_stateflags |= NDPRF_DEFUNCT; + /* make sure to invalidate the prefix until it is really freed. */ pr->ndpr_vltime = 0; pr->ndpr_pltime = 0; @@ -2212,8 +2221,7 @@ prelist_remove(struct nd_prefix *pr) * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users * when executing "ndp -p". */ - - if ((pr->ndpr_stateflags & NDPRF_ONLINK)) { + if (pr->ndpr_stateflags & NDPRF_ONLINK) { NDPR_ADDREF_LOCKED(pr); NDPR_UNLOCK(pr); lck_mtx_unlock(nd6_mutex); @@ -2230,8 +2238,14 @@ prelist_remove(struct nd_prefix *pr) return; } - if (pr->ndpr_addrcnt > 0) - return; /* notice here? */ + if (pr->ndpr_addrcnt > 0) { + /* + * The state might have changed if we called + * nd6_prefix_offlink(). + */ + pr->ndpr_stateflags &= ~NDPRF_DEFUNCT; + return; /* notice here? */ + } /* unlink ndpr_entry from nd_prefix list */ LIST_REMOVE(pr, ndpr_entry); @@ -2298,7 +2312,6 @@ prelist_update( #endif } - if ((pr = nd6_prefix_lookup(new)) != NULL) { /* * nd6_prefix_lookup() ensures that pr and new have the same @@ -2323,6 +2336,7 @@ prelist_update( pr->ndpr_lastupdate = net_uptime(); } + NDPR_ADDREF_LOCKED(pr); if (new->ndpr_raf_onlink && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { int e; @@ -2346,6 +2360,7 @@ prelist_update( } else { NDPR_UNLOCK(pr); } + NDPR_REMREF(pr); lck_mtx_unlock(nd6_mutex); } else { struct nd_prefix *newpr = NULL; @@ -2905,6 +2920,8 @@ ndpr_getexpire(struct nd_prefix *pr) * A supplement function used in the on-link detection below; * detect if a given prefix has a (probably) reachable advertising router. * XXX: lengthy function name... + * + * Callers *must* increase the reference count of nd_prefix. */ static struct nd_pfxrouter * find_pfxlist_reachable_router(struct nd_prefix *pr) @@ -3003,8 +3020,10 @@ pfxlist_onlink_check(void) NDPR_ADDREF_LOCKED(pr); if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr) && (pr->ndpr_debug & IFD_ATTACHED)) { - NDPR_UNLOCK(pr); - NDPR_REMREF(pr); + if (NDPR_REMREF_LOCKED(pr) == NULL) + pr = NULL; + else + NDPR_UNLOCK(pr); break; } pr->ndpr_stateflags |= NDPRF_PROCESSED_ONLINK; @@ -3022,7 +3041,6 @@ pfxlist_onlink_check(void) prclear->ndpr_stateflags &= ~NDPRF_PROCESSED_ONLINK; NDPR_UNLOCK(prclear); } - /* * If we have no such prefix, check whether we still have a router * that does not advertise any prefixes. @@ -3109,13 +3127,6 @@ pfxlist_onlink_check(void) prclear->ndpr_stateflags &= ~NDPRF_PROCESSED_ONLINK; NDPR_UNLOCK(prclear); } - VERIFY(nd_prefix_busy); - nd_prefix_busy = FALSE; - if (nd_prefix_waiters > 0) { - nd_prefix_waiters = 0; - wakeup(nd_prefix_waitchan); - } - /* * Remove each interface route associated with a (just) detached * prefix, and reinstall the interface route for a (just) attached @@ -3130,11 +3141,15 @@ pfxlist_onlink_check(void) NDPR_LOCK(pr); if (pr->ndpr_raf_onlink == 0 || - pr->ndpr_stateflags & NDPRF_STATIC) { + pr->ndpr_stateflags & NDPRF_STATIC || + pr->ndpr_stateflags & NDPRF_PROCESSED_ONLINK || + pr->ndpr_stateflags & NDPRF_DEFUNCT) { NDPR_UNLOCK(pr); pr = pr->ndpr_next; continue; } + pr->ndpr_stateflags |= NDPRF_PROCESSED_ONLINK; + NDPR_ADDREF_LOCKED(pr); if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { NDPR_UNLOCK(pr); @@ -3147,6 +3162,7 @@ pfxlist_onlink_check(void) pr->ndpr_plen, e)); } lck_mtx_lock(nd6_mutex); + NDPR_REMREF(pr); pr = nd_prefix.lh_first; continue; } @@ -3161,11 +3177,26 @@ pfxlist_onlink_check(void) ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } + NDPR_REMREF(pr); + pr = nd_prefix.lh_first; + continue; } else { NDPR_UNLOCK(pr); } + NDPR_REMREF(pr); pr = pr->ndpr_next; } + LIST_FOREACH(prclear, &nd_prefix, ndpr_entry) { + NDPR_LOCK(prclear); + prclear->ndpr_stateflags &= ~NDPRF_PROCESSED_ONLINK; + NDPR_UNLOCK(prclear); + } + VERIFY(nd_prefix_busy); + nd_prefix_busy = FALSE; + if (nd_prefix_waiters > 0) { + nd_prefix_waiters = 0; + wakeup(nd_prefix_waitchan); + } /* * Changes on the prefix status might affect address status as well. @@ -3203,14 +3234,17 @@ pfxlist_onlink_check(void) IFA_UNLOCK(&ifa->ia_ifa); continue; } - NDPR_ADDREF(ndpr); IFA_UNLOCK(&ifa->ia_ifa); NDPR_LOCK(ndpr); + NDPR_ADDREF_LOCKED(ndpr); if (find_pfxlist_reachable_router(ndpr)) { - NDPR_UNLOCK(ndpr); - NDPR_REMREF(ndpr); - found = 1; + if (NDPR_REMREF_LOCKED(ndpr) == NULL) { + found = 0; + } else { + NDPR_UNLOCK(ndpr); + found = 1; + } break; } NDPR_UNLOCK(ndpr); @@ -3230,9 +3264,9 @@ pfxlist_onlink_check(void) IFA_UNLOCK(&ifa->ia_ifa); continue; } - NDPR_ADDREF(ndpr); IFA_UNLOCK(&ifa->ia_ifa); NDPR_LOCK(ndpr); + NDPR_ADDREF_LOCKED(ndpr); if (find_pfxlist_reachable_router(ndpr)) { NDPR_UNLOCK(ndpr); IFA_LOCK(&ifa->ia_ifa); @@ -3560,7 +3594,8 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped, * TODO: If the prefix route exists, we should really find it and * refer the prefix to it; otherwise ndpr_rt is NULL. */ - if (rt != NULL || error == EEXIST) { + if (!(pr->ndpr_stateflags & NDPRF_DEFUNCT) && + (rt != NULL || error == EEXIST)) { struct nd_ifinfo *ndi; VERIFY(pr->ndpr_prproxy_sols_cnt == 0); @@ -3591,7 +3626,8 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped, lck_mtx_unlock(&ndi->lock); lck_rw_done(nd_if_rwlock); - } + } else if (rt != NULL && pr->ndpr_stateflags & NDPRF_DEFUNCT) + rtfree(rt); prproxy = (pr->ndpr_stateflags & NDPRF_PRPROXY); VERIFY(!prproxy || !(pr->ndpr_stateflags & NDPRF_IFSCOPE)); @@ -3720,8 +3756,8 @@ nd6_prefix_offlink(struct nd_prefix *pr) &opr->ndpr_prefix.sin6_addr, plen)) { int e; + NDPR_ADDREF_LOCKED(opr); NDPR_UNLOCK(opr); - lck_mtx_unlock(nd6_mutex); if ((e = nd6_prefix_onlink(opr)) != 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: failed to " @@ -3732,7 +3768,7 @@ nd6_prefix_offlink(struct nd_prefix *pr) opr->ndpr_plen, if_name(ifp), if_name(opr->ndpr_ifp), e)); } - lck_mtx_lock(nd6_mutex); + NDPR_REMREF(opr); opr = nd_prefix.lh_first; } else { NDPR_UNLOCK(opr); @@ -3841,7 +3877,7 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp) } bzero(&ifra, sizeof (ifra)); - strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); + strlcpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_len = sizeof (struct sockaddr_in6); @@ -3951,7 +3987,7 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp) } VERIFY(ia6 != NULL); - in6_post_msg(ifp, KEV_INET6_NEW_RTADV_ADDR, ia6); + in6_post_msg(ifp, KEV_INET6_NEW_RTADV_ADDR, ia6, NULL); goto done; unlock2: @@ -3982,7 +4018,7 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen) struct nd_prefix *ndpr; bzero(&ifra, sizeof (ifra)); - strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); + strlcpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); IFA_LOCK(&IA6_NONCONST(ia0)->ia_ifa); ifra.ifra_addr = ia0->ia_addr; /* copy prefix mask */ @@ -4297,7 +4333,7 @@ nd6_setdefaultiface( } /* - * Our current implementation assumes one-to-one maping between + * Our current implementation assumes one-to-one mapping between * interfaces and links, so it would be natural to use the * default interface as the default link. */ diff --git a/bsd/netinet6/nd6_send.c b/bsd/netinet6/nd6_send.c index 916607bd6..04f5223d4 100644 --- a/bsd/netinet6/nd6_send.c +++ b/bsd/netinet6/nd6_send.c @@ -42,6 +42,11 @@ #include #include +#if CONFIG_MACF +#include +#include +#endif + SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */ SYSCTL_NODE(_net_inet6, OID_AUTO, send, CTLFLAG_RW | CTLFLAG_LOCKED, 0, @@ -84,6 +89,9 @@ sysctl_cga_parameters SYSCTL_HANDLER_ARGS int error; char *buffer; u_int16_t u16; +#if CONFIG_MACF + kauth_cred_t cred; +#endif namelen = arg2; if (namelen != 0) { @@ -98,6 +106,16 @@ sysctl_cga_parameters SYSCTL_HANDLER_ARGS return (EINVAL); } +#if CONFIG_MACF + cred = kauth_cred_proc_ref(current_proc()); + error = mac_system_check_info(cred, "net.inet6.send.cga_parameters"); + kauth_cred_unref(&cred); + if (error != 0) { + log(LOG_ERR, "%s: mac_system_check_info denied.\n", __func__); + return (EPERM); + } +#endif + MALLOC(buffer, char *, SYSCTL_CGA_PARAMETERS_BUFFER_SIZE, M_IP6CGA, M_WAITOK); if (buffer == NULL) { diff --git a/bsd/netinet6/pim6.h b/bsd/netinet6/pim6.h deleted file mode 100644 index 1ae3f1afb..000000000 --- a/bsd/netinet6/pim6.h +++ /dev/null @@ -1,70 +0,0 @@ -/* $FreeBSD: src/sys/netinet6/pim6.h,v 1.1.2.1 2000/07/15 07:14:36 kris Exp $ */ -/* $KAME: pim6.h,v 1.3 2000/03/25 07:23:58 sumikawa Exp $ */ - -/* - * Copyright (C) 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * Protocol Independent Multicast (PIM) definitions - * - * Written by Ahmed Helmy, SGI, July 1996 - * - * MULTICAST - */ -#include - -/* - * PIM packet header - */ -#define PIM_VERSION 2 -struct pim { -#if defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN) - u_char pim_type:4, /* the PIM message type, currently they are: - * Hello, Register, Register-Stop, Join/Prune, - * Bootstrap, Assert, Graft (PIM-DM only), - * Graft-Ack (PIM-DM only), C-RP-Adv - */ - pim_ver:4; /* PIM version number; 2 for PIMv2 */ -#else - u_char pim_ver:4, /* PIM version */ - pim_type:4; /* PIM type */ -#endif - u_char pim_rsv; /* Reserved */ - u_short pim_cksum; /* IP style check sum */ -}; - -#define PIM_MINLEN 8 /* The header min. length is 8 */ -#define PIM6_REG_MINLEN (PIM_MINLEN+40) /* Register message + inner IP6 header */ - -/* - * Message types - */ -#define PIM_REGISTER 1 /* PIM Register type is 1 */ - -/* second bit in reg_head is the null bit */ -#define PIM_NULL_REGISTER 0x40000000 diff --git a/bsd/netinet6/pim6_var.h b/bsd/netinet6/pim6_var.h deleted file mode 100644 index a84637b1a..000000000 --- a/bsd/netinet6/pim6_var.h +++ /dev/null @@ -1,72 +0,0 @@ -/* $FreeBSD: src/sys/netinet6/pim6_var.h,v 1.2.2.1 2000/07/15 07:14:36 kris Exp $ */ -/* $KAME: pim6_var.h,v 1.8 2000/06/06 08:07:43 jinmei Exp $ */ - -/* - * Copyright (C) 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _NETINET6_PIM6_VAR_H_ -#define _NETINET6_PIM6_VAR_H_ -#include - -/* - * Protocol Independent Multicast (PIM), - * implementation-specific definitions. - * - * Written by George Edmond Eddy (Rusty), ISI, February 1998 - * Modified by Pavlin Ivanov Radoslavov, USC/ISI, May 1998 - */ - -struct pim6stat { - u_quad_t pim6s_rcv_total; /* total PIM messages received */ - u_quad_t pim6s_rcv_tooshort; /* received with too few bytes */ - u_quad_t pim6s_rcv_badsum; /* received with bad checksum */ - u_quad_t pim6s_rcv_badversion; /* received bad PIM version */ - u_quad_t pim6s_rcv_registers; /* received registers */ - u_quad_t pim6s_rcv_badregisters; /* received invalid registers */ - u_quad_t pim6s_snd_registers; /* sent registers */ -}; - - - -/* - * Names for PIM sysctl objects - */ -#define PIM6CTL_STATS 1 /* statistics (read-only) */ -#define PIM6CTL_MAXID 2 - -#ifdef BSD_KERNEL_PRIVATE -#define PIM6CTL_NAMES { \ - { 0, 0 }, \ - { 0, 0 }, \ -} - -int pim6_input(struct mbuf **, int*); - -#endif /* BSD_KERNEL_PRIVATE */ -#endif /* _NETINET6_PIM6_VAR_H_ */ diff --git a/bsd/netinet6/raw_ip6.c b/bsd/netinet6/raw_ip6.c index 09728f630..3ac3106bd 100644 --- a/bsd/netinet6/raw_ip6.c +++ b/bsd/netinet6/raw_ip6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -110,7 +110,6 @@ #include #include #include -#include #include #include #include @@ -123,9 +122,12 @@ #if IPSEC #include #include -extern int ipsec_bypass; #endif /*IPSEC*/ +#if NECP +#include +#endif + /* * Raw interface to IP6 protocol. */ @@ -178,11 +180,7 @@ rip6_input( !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; - if (inp_restricted(in6p, ifp)) - continue; - - if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && - (in6p->in6p_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(in6p, ifp)) continue; if (proto == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { @@ -196,16 +194,12 @@ rip6_input( if (last) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); -#if IPSEC - /* - * Check AH/ESP integrity. - */ - if (ipsec_bypass == 0 && n && ipsec6_in_reject_so(n, last->inp_socket)) { - m_freem(n); - IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); - /* do not inject data into pcb */ +#if NECP + if (n && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL)) { + m_freem(n); + /* do not inject data into pcb */ } else -#endif /*IPSEC*/ +#endif /* NECP */ if (n) { if ((last->in6p_flags & INP_CONTROLOPTS) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 || @@ -233,17 +227,13 @@ rip6_input( last = in6p; } -#if IPSEC - /* - * Check AH/ESP integrity. - */ - if (ipsec_bypass == 0 && last && ipsec6_in_reject_so(m, last->inp_socket)) { - m_freem(m); - IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); - ip6stat.ip6s_delivered--; - /* do not inject data into pcb */ +#if NECP + if (last && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL)) { + m_freem(m); + ip6stat.ip6s_delivered--; + /* do not inject data into pcb */ } else -#endif /*IPSEC*/ +#endif /* NECP */ if (last) { if ((last->in6p_flags & INP_CONTROLOPTS) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 || @@ -359,8 +349,15 @@ rip6_output( in6p = sotoin6pcb(so); - if (in6p == NULL || (in6p->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { - error = (in6p == NULL ? EINVAL : EPROTOTYPE); + if (in6p == NULL +#if NECP + || (necp_socket_should_use_flow_divert(in6p)) +#endif /* NECP */ + ) { + if (in6p == NULL) + error = EINVAL; + else + error = EPROTOTYPE; goto bad; } if (dstsock != NULL && IN6_IS_ADDR_V4MAPPED(&dstsock->sin6_addr)) { @@ -372,8 +369,12 @@ rip6_output( ip6oa.ip6oa_boundif = in6p->inp_boundifp->if_index; ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; } - if (in6p->inp_flags & INP_NO_IFT_CELLULAR) + if (INP_NO_CELLULAR(in6p)) ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR; + if (INP_NO_EXPENSIVE(in6p)) + ip6oa.ip6oa_flags |= IP6OAF_NO_EXPENSIVE; + if (INP_AWDL_UNRESTRICTED(in6p)) + ip6oa.ip6oa_flags |= IP6OAF_AWDL_UNRESTRICTED; dst = &dstsock->sin6_addr; if (control) { @@ -546,9 +547,21 @@ rip6_output( *p = 0; *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen); } + +#if NECP + { + necp_kernel_policy_id policy_id; + if (!necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, &ip6->ip6_src, &ip6->ip6_dst, NULL, &policy_id)) { + error = EHOSTUNREACH; + goto bad; + } + necp_mark_packet_from_socket(m, in6p, policy_id); + } +#endif /* NECP */ + #if IPSEC - if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) { + if (in6p->in6p_sp != NULL && ipsec_setsocket(m, so) != 0) { error = ENOBUFS; goto bad; } @@ -607,11 +620,11 @@ rip6_output( } /* - * If output interface was cellular, and this socket is denied - * access to it, generate an event. + * If output interface was cellular/expensive, and this socket is + * denied access to it, generate an event. */ if (error != 0 && (ip6oa.ip6oa_retflags & IP6OARF_IFDENIED) && - (in6p->inp_flags & INP_NO_IFT_CELLULAR)) + (INP_NO_CELLULAR(in6p) || INP_NO_EXPENSIVE(in6p))) soevent(in6p->inp_socket, (SO_FILT_HINT_LOCKED| SO_FILT_HINT_IFDENIED)); @@ -687,20 +700,6 @@ rip6_ctloutput( error = ENOPROTOOPT; break; #endif - - case MRT6_INIT: - case MRT6_DONE: - case MRT6_ADD_MIF: - case MRT6_DEL_MIF: - case MRT6_ADD_MFC: - case MRT6_DEL_MFC: - case MRT6_PIM: -#if MROUTING - error = ip6_mrouter_get(so, sopt); -#else - error = ENOPROTOOPT; -#endif /* MROUTING */ - break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; @@ -726,19 +725,6 @@ rip6_ctloutput( break; #endif - case MRT6_INIT: - case MRT6_DONE: - case MRT6_ADD_MIF: - case MRT6_DEL_MIF: - case MRT6_ADD_MFC: - case MRT6_DEL_MFC: - case MRT6_PIM: -#if MROUTING - error = ip6_mrouter_set(so, sopt); -#else - error = ENOPROTOOPT; -#endif - break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; @@ -801,10 +787,6 @@ rip6_detach(struct socket *so) if (inp == 0) panic("rip6_detach"); /* xxx: RSVP */ -#if MROUTING - if (so == ip6_mrouter) - ip6_mrouter_done(); -#endif if (inp->in6p_icmp6filt) { FREE(inp->in6p_icmp6filt, M_PCB); inp->in6p_icmp6filt = NULL; @@ -841,7 +823,11 @@ rip6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) struct ifnet *outif = NULL; int error; - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); if (nam->sa_len != sizeof (struct sockaddr_in6)) @@ -896,7 +882,11 @@ rip6_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) unsigned int ifscope; struct ifnet *outif = NULL; - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); if (nam->sa_len != sizeof(*addr)) return EINVAL; @@ -947,8 +937,15 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct sockaddr_in6 *dst = (struct sockaddr_in6 *)(void *)nam; int error = 0; - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { - error = (inp == NULL ? EINVAL : EPROTOTYPE); + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { + if (inp == NULL) + error = EINVAL; + else + error = EPROTOTYPE; goto bad; } diff --git a/bsd/netinet6/route6.c b/bsd/netinet6/route6.c index 9a8dc3cae..f2cb83006 100644 --- a/bsd/netinet6/route6.c +++ b/bsd/netinet6/route6.c @@ -95,7 +95,6 @@ route6_input(struct mbuf **mp, int *offp, int proto) } #endif /* notyet */ -#ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(*rh), return IPPROTO_DONE); /* Expect 32-bit aligned data pointer on strict-align platforms */ @@ -103,17 +102,6 @@ route6_input(struct mbuf **mp, int *offp, int proto) ip6 = mtod(m, struct ip6_hdr *); rh = (struct ip6_rthdr *)((caddr_t)ip6 + off); -#else - /* Expect 32-bit aligned data pointer on strict-align platforms */ - MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); - - ip6 = mtod(m, struct ip6_hdr *); - IP6_EXTHDR_GET(rh, struct ip6_rthdr *, m, off, sizeof(*rh)); - if (rh == NULL) { - ip6stat.ip6s_tooshort++; - return (IPPROTO_DONE); - } -#endif switch (rh->ip6r_type) { default: diff --git a/bsd/netinet6/udp6_output.c b/bsd/netinet6/udp6_output.c index 71d7cdffd..2d64d6b94 100644 --- a/bsd/netinet6/udp6_output.c +++ b/bsd/netinet6/udp6_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -129,11 +129,9 @@ #include #include -#if IPSEC -#include -#include -extern int ipsec_bypass; -#endif /* IPSEC */ +#if NECP +#include +#endif /* NECP */ #include @@ -179,8 +177,12 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, ip6oa.ip6oa_boundif = in6p->inp_boundifp->if_index; ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; } - if (in6p->inp_flags & INP_NO_IFT_CELLULAR) + if (INP_NO_CELLULAR(in6p)) ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR; + if (INP_NO_EXPENSIVE(in6p)) + ip6oa.ip6oa_flags |= IP6OAF_NO_EXPENSIVE; + if (INP_AWDL_UNRESTRICTED(in6p)) + ip6oa.ip6oa_flags |= IP6OAF_AWDL_UNRESTRICTED; if (control) { msc = mbuf_service_class_from_control(control); @@ -348,12 +350,25 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, flags = IPV6_OUTARGS; udp6stat.udp6s_opackets++; + +#if NECP + { + necp_kernel_policy_id policy_id; + if (!necp_socket_is_allowed_to_send_recv_v6(in6p, in6p->in6p_lport, fport, laddr, faddr, NULL, &policy_id)) { + error = EHOSTUNREACH; + goto release; + } + + necp_mark_packet_from_socket(m, in6p, policy_id); + } +#endif /* NECP */ + #if IPSEC - if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) { + if (in6p->in6p_sp != NULL && ipsec_setsocket(m, so) != 0) { error = ENOBUFS; goto release; } -#endif /* IPSEC */ +#endif /*IPSEC*/ /* In case of IPv4-mapped address used in previous send */ if (ROUTE_UNUSABLE(&in6p->in6p_route) || @@ -395,18 +410,20 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, IM6O_REMREF(im6o); if (error == 0 && nstat_collect) { - boolean_t cell, wifi; + boolean_t cell, wifi, wired; if (in6p->in6p_route.ro_rt != NULL) { cell = IFNET_IS_CELLULAR(in6p->in6p_route. ro_rt->rt_ifp); wifi = (!cell && IFNET_IS_WIFI(in6p->in6p_route. ro_rt->rt_ifp)); + wired = (!wifi && IFNET_IS_WIRED(in6p->in6p_route. + ro_rt->rt_ifp)); } else { - cell = wifi = FALSE; + cell = wifi = wired = FALSE; } - INP_ADD_STAT(in6p, cell, wifi, txpackets, 1); - INP_ADD_STAT(in6p, cell, wifi, txbytes, ulen); + INP_ADD_STAT(in6p, cell, wifi, wired, txpackets, 1); + INP_ADD_STAT(in6p, cell, wifi, wired, txbytes, ulen); } if (flowadv && (adv->code == FADV_FLOW_CONTROLLED || @@ -452,11 +469,11 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, } /* - * If output interface was cellular, and this socket is - * denied access to it, generate an event. + * If output interface was cellular/expensive, and this + * socket is denied access to it, generate an event. */ if (error != 0 && (ip6oa.ip6oa_retflags & IP6OARF_IFDENIED) && - (in6p->inp_flags & INP_NO_IFT_CELLULAR)) + (INP_NO_CELLULAR(in6p) || INP_NO_EXPENSIVE(in6p))) soevent(in6p->inp_socket, (SO_FILT_HINT_LOCKED| SO_FILT_HINT_IFDENIED)); break; diff --git a/bsd/netinet6/udp6_usrreq.c b/bsd/netinet6/udp6_usrreq.c index df5a5ff73..29d037b58 100644 --- a/bsd/netinet6/udp6_usrreq.c +++ b/bsd/netinet6/udp6_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -132,9 +132,12 @@ #if IPSEC #include #include -extern int ipsec_bypass; #endif /* IPSEC */ +#if NECP +#include +#endif /* NECP */ + /* * UDP protocol inplementation. * Per RFC 768, August, 1980. @@ -204,6 +207,7 @@ udp6_append(struct inpcb *last, struct ip6_hdr *ip6, int ret = 0; boolean_t cell = IFNET_IS_CELLULAR(ifp); boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); + boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp)); #if CONFIG_MACF_NET if (mac_inpcb_check_deliver(last, n, AF_INET6, SOCK_DGRAM) != 0) { @@ -223,8 +227,8 @@ udp6_append(struct inpcb *last, struct ip6_hdr *ip6, } m_adj(n, off); if (nstat_collect) { - INP_ADD_STAT(last, cell, wifi, rxpackets, 1); - INP_ADD_STAT(last, cell, wifi, rxbytes, n->m_pkthdr.len); + INP_ADD_STAT(last, cell, wifi, wired, rxpackets, 1); + INP_ADD_STAT(last, cell, wifi, wired, rxbytes, n->m_pkthdr.len); } so_recv_data_stat(last->in6p_socket, n, 0); if (sbappendaddr(&last->in6p_socket->so_rcv, @@ -246,7 +250,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) struct mbuf *opts = NULL; int off = *offp; int plen, ulen, ret = 0; - boolean_t cell, wifi; + boolean_t cell, wifi, wired; struct sockaddr_in6 udp_in6; struct inpcbinfo *pcbinfo = &udbinfo; struct sockaddr_in6 fromsa; @@ -260,6 +264,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) ip6 = mtod(m, struct ip6_hdr *); cell = IFNET_IS_CELLULAR(ifp); wifi = (!cell && IFNET_IS_WIFI(ifp)); + wired = (!wifi && IFNET_IS_WIRED(ifp)); udpstat.udps_ipackets++; @@ -294,7 +299,6 @@ udp6_input(struct mbuf **mp, int *offp, int proto) if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { int reuse_sock = 0, mcast_delivered = 0; struct ip6_moptions *imo; - struct mbuf *n = NULL; /* * Deliver a multicast datagram to all sockets @@ -346,11 +350,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) if ((in6p->inp_vflag & INP_IPV6) == 0) continue; - if (inp_restricted(in6p, ifp)) - continue; - - if (IFNET_IS_CELLULAR(ifp) && - (in6p->in6p_flags & INP_NO_IFT_CELLULAR)) + if (inp_restricted_recv(in6p, ifp)) continue; if (in_pcb_checkstate(in6p, WNT_ACQUIRE, 0) == @@ -407,18 +407,18 @@ udp6_input(struct mbuf **mp, int *offp, int proto) reuse_sock = in6p->inp_socket->so_options & (SO_REUSEPORT | SO_REUSEADDR); -#if IPSEC +#if NECP skipit = 0; - /* Check AH/ESP integrity. */ - if (ipsec_bypass == 0 && - ipsec6_in_reject_so(m, in6p->inp_socket)) { - IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); + if (!necp_socket_is_allowed_to_send_recv_v6(in6p, + uh->uh_dport, uh->uh_sport, &ip6->ip6_dst, + &ip6->ip6_src, ifp, NULL)) { /* do not inject data to pcb */ skipit = 1; } if (skipit == 0) -#endif /* IPSEC */ +#endif /* NECP */ { + struct mbuf *n = NULL; /* * KAME NOTE: do not * m_copy(m, offset, ...) below. @@ -431,6 +431,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) udp6_append(in6p, ip6, &udp_in6, m, off + sizeof (struct udphdr), ifp); mcast_delivered++; + m = n; } udp_unlock(in6p->in6p_socket, 1, 0); @@ -442,7 +443,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) * port. It assumes that an application will never * clear these options after setting them. */ - if (reuse_sock == 0 || ((m = n) == NULL)) + if (reuse_sock == 0 || m == NULL) break; /* @@ -471,7 +472,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto) goto bad; } - if (reuse_sock != 0) /* free the extra copy of mbuf */ + /* free the extra copy of mbuf or skipped by NECP */ + if (m != NULL) m_freem(m); return (IPPROTO_DONE); } @@ -512,19 +514,14 @@ udp6_input(struct mbuf **mp, int *offp, int proto) icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return (IPPROTO_DONE); } -#if IPSEC - /* - * Check AH/ESP integrity. - */ - if (ipsec_bypass == 0) { - if (ipsec6_in_reject_so(m, in6p->in6p_socket)) { - IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); - in_pcb_checkstate(in6p, WNT_RELEASE, 0); - IF_UDP_STATINC(ifp, badipsec); - goto bad; - } +#if NECP + if (!necp_socket_is_allowed_to_send_recv_v6(in6p, uh->uh_dport, + uh->uh_sport, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL)) { + in_pcb_checkstate(in6p, WNT_RELEASE, 0); + IF_UDP_STATINC(ifp, badipsec); + goto bad; } -#endif /* IPSEC */ +#endif /* NECP */ /* * Construct sockaddr format source address. @@ -551,8 +548,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto) } m_adj(m, off + sizeof (struct udphdr)); if (nstat_collect) { - INP_ADD_STAT(in6p, cell, wifi, rxpackets, 1); - INP_ADD_STAT(in6p, cell, wifi, rxbytes, m->m_pkthdr.len); + INP_ADD_STAT(in6p, cell, wifi, wired, rxpackets, 1); + INP_ADD_STAT(in6p, cell, wifi, wired, rxbytes, m->m_pkthdr.len); } so_recv_data_stat(in6p->in6p_socket, m, 0); if (sbappendaddr(&in6p->in6p_socket->so_rcv, @@ -697,7 +694,11 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) int error; inp = sotoinpcb(so); - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); inp->inp_vflag &= ~INP_IPV4; @@ -731,7 +732,11 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) int error; inp = sotoinpcb(so); - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { @@ -807,7 +812,11 @@ udp6_disconnect(struct socket *so) struct inpcb *inp; inp = sotoinpcb(so); - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) return (inp == NULL ? EINVAL : EPROTOTYPE); if (inp->inp_vflag & INP_IPV4) { @@ -849,8 +858,15 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, int error = 0; inp = sotoinpcb(so); - if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { - error = (inp == NULL ? EINVAL : EPROTOTYPE); + if (inp == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) { + if (inp == NULL) + error = EINVAL; + else + error = EPROTOTYPE; goto bad; } diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c index f80c0c0ad..91b6ba040 100644 --- a/bsd/netkey/key.c +++ b/bsd/netkey/key.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2013 Apple Inc. All rights reserved. + * Copyright (c) 2008-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -97,13 +97,6 @@ #include #endif /* INET6 */ -#if INET -#include -#endif -#if INET6 -#include -#endif /* INET6 */ - #include #include #include @@ -422,7 +415,7 @@ if (d_e) bcopy((d_e), &(idx)->dst_range.end, ((struct sockaddr *)(d_e))->sa_len) * set parameters into secasindex buffer. * Must allocate secasindex buffer before calling this function. */ -#define KEY_SETSECASIDX(p, m, r, s, d, idx) \ +#define KEY_SETSECASIDX(p, m, r, s, d, ifi, idx) \ do { \ bzero((idx), sizeof(struct secasindex)); \ (idx)->proto = (p); \ @@ -430,6 +423,7 @@ bzero((idx), sizeof(struct secasindex)); \ (idx)->reqid = (r); \ bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ +(idx)->ipsec_ifindex = (ifi); \ } while (0) /* key statistics */ @@ -473,9 +467,10 @@ static struct mbuf *key_setdumpsp(struct secpolicy *, u_int8_t, u_int32_t, u_int32_t); static u_int key_getspreqmsglen(struct secpolicy *); static int key_spdexpire(struct secpolicy *); -static struct secashead *key_newsah(struct secasindex *, u_int8_t); +static struct secashead *key_newsah(struct secasindex *, ifnet_t, u_int, u_int8_t); static struct secasvar *key_newsav(struct mbuf *, - const struct sadb_msghdr *, struct secashead *, int *); + const struct sadb_msghdr *, struct secashead *, int *, + struct socket *); static struct secashead *key_getsah(struct secasindex *); static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t); static void key_setspi __P((struct secasvar *, u_int32_t)); @@ -495,7 +490,7 @@ static struct mbuf *key_setsadbipsecif(ifnet_t, ifnet_t, ifnet_t, int); static struct mbuf *key_setsadbident(u_int16_t, u_int16_t, caddr_t, int, u_int64_t); #endif -static struct mbuf *key_setsadbxsa2(u_int8_t, u_int32_t, u_int32_t); +static struct mbuf *key_setsadbxsa2(u_int8_t, u_int32_t, u_int32_t, u_int16_t); static struct mbuf *key_setsadbxpolicy(u_int16_t, u_int8_t, u_int32_t); static void *key_newbuf(const void *, u_int); @@ -590,6 +585,8 @@ static int key_setsaval2(struct secasvar *sav, extern int ipsec_bypass; extern int esp_udp_encap_port; int ipsec_send_natt_keepalive(struct secasvar *sav); +bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ipsec_offload_frame *frame, size_t frame_data_offset); +u_int32_t key_fill_offload_frames_for_savs (ifnet_t ifp, struct ipsec_offload_frame *frames_array, u_int32_t frames_array_count, size_t frame_data_offset); void key_init(struct protosw *, struct domain *); @@ -673,12 +670,14 @@ key_start_timehandler(void) { /* must be called while locked */ lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); -#ifndef IPSEC_DEBUG2 if (key_timehandler_running == 0) { key_timehandler_running = 1; (void)timeout((void *)key_timehandler, (void *)0, hz); } -#endif /*IPSEC_DEBUG2*/ + + /* Turn off the ipsec bypass */ + if (ipsec_bypass != 0) + ipsec_bypass = 0; } /* %%% IPsec policy management */ @@ -831,6 +830,59 @@ found: return sp; } +struct secasvar *key_alloc_outbound_sav_for_interface(ifnet_t interface, int family) +{ + struct secashead *sah; + struct secasvar *sav; + u_int stateidx; + u_int state; + const u_int *saorder_state_valid; + int arraysize; + struct sockaddr_in *sin; + u_int16_t dstport; + + if (interface == NULL) + return NULL; + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + + lck_mtx_lock(sadb_mutex); + + LIST_FOREACH(sah, &sahtree, chain) { + if (sah->ipsec_if == interface && + (family == AF_INET6 || sah->saidx.dst.ss_family == family) && /* IPv6 can go over IPv4 */ + sah->dir == IPSEC_DIR_OUTBOUND) { + /* This SAH is linked to the IPSec interface, and the right family. We found it! */ + if (key_preferred_oldsa) { + saorder_state_valid = saorder_state_valid_prefer_old; + arraysize = _ARRAYLEN(saorder_state_valid_prefer_old); + } else { + saorder_state_valid = saorder_state_valid_prefer_new; + arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); + } + + sin = (struct sockaddr_in *)&sah->saidx.dst; + dstport = sin->sin_port; + if (sah->saidx.mode == IPSEC_MODE_TRANSPORT) + sin->sin_port = IPSEC_PORT_ANY; + + for (stateidx = 0; stateidx < arraysize; stateidx++) { + state = saorder_state_valid[stateidx]; + sav = key_do_allocsa_policy(sah, state, dstport); + if (sav != NULL) { + lck_mtx_unlock(sadb_mutex); + return sav; + } + } + + break; + } + } + + lck_mtx_unlock(sadb_mutex); + return NULL; +} + /* * allocating an SA entry for an *OUTBOUND* packet. * checking each request entries in SP, and acquire an SA if need. @@ -1441,7 +1493,6 @@ key_do_get_translated_port( /* * Must be called after calling key_allocsp(). - * For both the packet without socket and key_freeso(). */ void key_freesp( @@ -1469,102 +1520,6 @@ key_freesp( return; } -#if 0 -static void key_freesp_so(struct secpolicy **); - -/* - * Must be called after calling key_allocsp(). - * For the packet with socket. - */ -void -key_freeso( - struct socket *so) -{ - - /* sanity check */ - if (so == NULL) - panic("key_freeso: NULL pointer is passed.\n"); - - lck_mtx_lock(sadb_mutex); - switch (SOCK_DOM(so)) { -#if INET - case PF_INET: - { - struct inpcb *pcb = sotoinpcb(so); - - /* Does it have a PCB ? */ - if (pcb == NULL || pcb->inp_sp == NULL) - goto done; - key_freesp_so(&pcb->inp_sp->sp_in); - key_freesp_so(&pcb->inp_sp->sp_out); - } - break; -#endif -#if INET6 - case PF_INET6: - { -#if HAVE_NRL_INPCB - struct inpcb *pcb = sotoinpcb(so); - - /* Does it have a PCB ? */ - if (pcb == NULL || pcb->inp_sp == NULL) - goto done; - key_freesp_so(&pcb->inp_sp->sp_in); - key_freesp_so(&pcb->inp_sp->sp_out); -#else - struct in6pcb *pcb = sotoin6pcb(so); - - /* Does it have a PCB ? */ - if (pcb == NULL || pcb->in6p_sp == NULL) - goto done; - key_freesp_so(&pcb->in6p_sp->sp_in); - key_freesp_so(&pcb->in6p_sp->sp_out); -#endif - } - break; -#endif /* INET6 */ - default: - ipseclog((LOG_DEBUG, "key_freeso: unknown address family=%d.\n", - SOCK_DOM(so))); - break; - } -done: - lck_mtx_unlock(sadb_mutex); - - return; -} - -static void -key_freesp_so( - struct secpolicy **sp) -{ - - /* sanity check */ - if (sp == NULL || *sp == NULL) - panic("key_freesp_so: sp == NULL\n"); - - lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - - switch ((*sp)->policy) { - case IPSEC_POLICY_IPSEC: - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP freeso calls free SP:0x%llx\n", - (uint64_t)VM_KERNEL_ADDRPERM(*sp))); - key_freesp(*sp, KEY_SADB_LOCKED); - *sp = NULL; - break; - case IPSEC_POLICY_ENTRUST: - case IPSEC_POLICY_BYPASS: - return; - default: - panic("key_freesp_so: Invalid policy found %d", (*sp)->policy); - } - - return; -} - -#endif - /* * Must be called after calling key_allocsa(). * This function is called by key_freesp() to free some SA allocated @@ -2267,7 +2222,7 @@ key_spdadd( } } - /* checking the direciton. */ + /* checking the direction. */ switch (xpl0->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: case IPSEC_DIR_OUTBOUND: @@ -2629,7 +2584,7 @@ key_spddelete( xpl0 = (struct sadb_x_policy *)(void *)mhp->ext[SADB_X_EXT_POLICY]; ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[SADB_X_EXT_IPSECIF]; - /* checking the direciton. */ + /* checking the direction. */ switch (xpl0->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: case IPSEC_DIR_OUTBOUND: @@ -3487,9 +3442,10 @@ fail: * others : pointer to new SA head. */ static struct secashead * -key_newsah( - struct secasindex *saidx, - u_int8_t dir) +key_newsah(struct secasindex *saidx, + ifnet_t ipsec_if, + u_int outgoing_if, + u_int8_t dir) { struct secashead *newsah; @@ -3525,11 +3481,17 @@ key_newsah( break; } + newsah->outgoing_if = outgoing_if; + if (ipsec_if) { + ifnet_reference(ipsec_if); + newsah->ipsec_if = ipsec_if; + } newsah->dir = dir; /* add to saidxtree */ newsah->state = SADB_SASTATE_MATURE; LIST_INSERT_HEAD(&sahtree, newsah, chain); key_start_timehandler(); + return(newsah); } @@ -3585,6 +3547,11 @@ key_delsah( ROUTE_RELEASE(&sah->sa_route); + if (sah->ipsec_if) { + ifnet_release(sah->ipsec_if); + sah->ipsec_if = NULL; + } + if (sah->idents) { KFREE(sah->idents); } @@ -3619,7 +3586,8 @@ key_newsav( struct mbuf *m, const struct sadb_msghdr *mhp, struct secashead *sah, - int *errp) + int *errp, + struct socket *so) { struct secasvar *newsav; const struct sadb_sa *xsa; @@ -3660,7 +3628,7 @@ key_newsav( case SADB_ADD: /* sanity check */ if (mhp->ext[SADB_EXT_SA] == NULL) { - KFREE(newsav); + key_delsav(newsav); ipseclog((LOG_DEBUG, "key_newsa: invalid message is passed.\n")); *errp = EINVAL; return NULL; @@ -3670,7 +3638,7 @@ key_newsav( newsav->seq = mhp->msg->sadb_msg_seq; break; default: - KFREE(newsav); + key_delsav(newsav); *errp = EINVAL; return NULL; } @@ -3678,15 +3646,17 @@ key_newsav( if (mhp->ext[SADB_X_EXT_SA2] != NULL) { if (((struct sadb_x_sa2 *)(void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_alwaysexpire) newsav->always_expire = 1; + newsav->flags2 = ((struct sadb_x_sa2 *)(void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_flags; + if (newsav->flags2 & SADB_X_EXT_SA2_DELETE_ON_DETACH) { + newsav->so = so; + } } /* copy sav values */ if (mhp->msg->sadb_msg_type != SADB_GETSPI) { *errp = key_setsaval(newsav, m, mhp); if (*errp) { - if (newsav->spihash.le_prev || newsav->spihash.le_next) - LIST_REMOVE(newsav, spihash); - KFREE(newsav); + key_delsav(newsav); return NULL; } } else { @@ -3706,7 +3676,7 @@ key_newsav( lck_mtx_lock(sadb_mutex); if (newsav->lft_c == NULL) { ipseclog((LOG_DEBUG, "key_newsa: No more memory.\n")); - KFREE(newsav); + key_delsav(newsav); *errp = ENOBUFS; return NULL; } @@ -3723,14 +3693,14 @@ key_newsav( if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) { ipseclog((LOG_DEBUG, "key_newsa: invalid hard lifetime ext len.\n")); - KFREE(newsav); + key_delsav(newsav); *errp = EINVAL; return NULL; } newsav->lft_h = (struct sadb_lifetime *)key_newbuf(lft0, sizeof(*lft0)); if (newsav->lft_h == NULL) { ipseclog((LOG_DEBUG, "key_newsa: No more memory.\n")); - KFREE(newsav); + key_delsav(newsav); *errp = ENOBUFS; return NULL; } @@ -3830,9 +3800,7 @@ key_newsav2(struct secashead *sah, pid, lifetime_hard, lifetime_soft)) { - if (newsav->spihash.le_prev || newsav->spihash.le_next) - LIST_REMOVE(newsav, spihash); - KFREE(newsav); + key_delsav(newsav); return NULL; } @@ -3935,8 +3903,7 @@ key_delsav( * others : found, pointer to a SA. */ static struct secashead * -key_getsah( - struct secasindex *saidx) +key_getsah(struct secasindex *saidx) { struct secashead *sah; @@ -3962,7 +3929,7 @@ key_newsah2 (struct secasindex *saidx, sah = key_getsah(saidx); if (!sah) { - return(key_newsah(saidx, dir)); + return(key_newsah(saidx, NULL, 0, dir)); } return sah; } @@ -4122,6 +4089,7 @@ key_setsaval( goto fail; } sav->remote_ike_port = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_port; + sav->natt_interval = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_interval; } /* @@ -4819,7 +4787,8 @@ key_setdumpsa( case SADB_X_EXT_SA2: m = key_setsadbxsa2(sav->sah->saidx.mode, sav->replay ? sav->replay->count : 0, - sav->sah->saidx.reqid); + sav->sah->saidx.reqid, + sav->flags2); if (!m) goto fail; break; @@ -4907,6 +4876,13 @@ key_setdumpsa( m_cat(result, tres); + if (sav->sah && (sav->sah->outgoing_if || sav->sah->ipsec_if)) { + m = key_setsadbipsecif(NULL, ifindex2ifnet[sav->sah->outgoing_if], sav->sah->ipsec_if, 0); + if (!m) + goto fail; + m_cat(result, m); + } + if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) @@ -5085,11 +5061,11 @@ key_setsadbipsecif(ifnet_t internal_if, p->sadb_x_ipsecif_exttype = SADB_X_EXT_IPSECIF; if (internal_if && internal_if->if_xname) - strncpy(p->sadb_x_ipsecif_internal_if, internal_if->if_xname, IFXNAMSIZ); + strlcpy(p->sadb_x_ipsecif_internal_if, internal_if->if_xname, IFXNAMSIZ); if (outgoing_if && outgoing_if->if_xname) - strncpy(p->sadb_x_ipsecif_outgoing_if, outgoing_if->if_xname, IFXNAMSIZ); + strlcpy(p->sadb_x_ipsecif_outgoing_if, outgoing_if->if_xname, IFXNAMSIZ); if (ipsec_if && ipsec_if->if_xname) - strncpy(p->sadb_x_ipsecif_ipsec_if, ipsec_if->if_xname, IFXNAMSIZ); + strlcpy(p->sadb_x_ipsecif_ipsec_if, ipsec_if->if_xname, IFXNAMSIZ); p->sadb_x_ipsecif_init_disabled = init_disabled; @@ -5214,7 +5190,8 @@ static struct mbuf * key_setsadbxsa2( u_int8_t mode, u_int32_t seq, - u_int32_t reqid) + u_int32_t reqid, + u_int16_t flags) { struct mbuf *m; struct sadb_x_sa2 *p; @@ -5238,6 +5215,7 @@ key_setsadbxsa2( p->sadb_x_sa2_reserved2 = 0; p->sadb_x_sa2_sequence = seq; p->sadb_x_sa2_reqid = reqid; + p->sadb_x_sa2_flags = flags; return m; } @@ -5427,6 +5405,9 @@ key_cmpsaidx( if (saidx0 == NULL || saidx1 == NULL) return 0; + if (saidx0->ipsec_ifindex != 0 && saidx0->ipsec_ifindex != saidx1->ipsec_ifindex) + return 0; + if (saidx0->proto != saidx1->proto) return 0; @@ -5976,7 +5957,7 @@ key_timehandler(void) */ if (savkabuf && savkacount < savbufcount) { sav = LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]); //%%% should we check dying list if this is empty??? - if (natt_keepalive_interval && sav && + if (sav && (natt_keepalive_interval || sav->natt_interval) && (sav->flags & (SADB_X_EXT_NATT_KEEPALIVE | SADB_X_EXT_ESP_KEEPALIVE)) != 0) { sav->refcnt++; *savkaptr++ = sav; @@ -6260,14 +6241,14 @@ key_timehandler(void) KFREE(savexbuf); } -#ifndef IPSEC_DEBUG2 - if (stop_handler) + if (stop_handler) { key_timehandler_running = 0; - else { + /* Turn on the ipsec bypass */ + ipsec_bypass = 1; + } else { /* do exchange to tick time !! */ (void)timeout((void *)key_timehandler, (void *)0, hz); } -#endif /* IPSEC_DEBUG2 */ lck_mtx_unlock(sadb_mutex); return; @@ -6382,6 +6363,38 @@ key_proto2satype( /* NOTREACHED */ } +static ifnet_t +key_get_ipsec_if_from_message (const struct sadb_msghdr *mhp) +{ + struct sadb_x_ipsecif *ipsecifopts = NULL; + ifnet_t ipsec_if = NULL; + + ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[SADB_X_EXT_IPSECIF]; + if (ipsecifopts != NULL) { + if (ipsecifopts->sadb_x_ipsecif_internal_if) { + ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_ipsec_if, &ipsec_if); + } + } + + return ipsec_if; +} + +static u_int +key_get_outgoing_ifindex_from_message (const struct sadb_msghdr *mhp) +{ + struct sadb_x_ipsecif *ipsecifopts = NULL; + ifnet_t outgoing_if = NULL; + + ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[SADB_X_EXT_IPSECIF]; + if (ipsecifopts != NULL) { + if (ipsecifopts->sadb_x_ipsecif_outgoing_if) { + ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_outgoing_if, &outgoing_if); + } + } + + return outgoing_if ? outgoing_if->if_index : 0; +} + /* %%% PF_KEY */ /* * SADB_GETSPI processing is to receive @@ -6405,6 +6418,7 @@ key_getspi( struct secasindex saidx; struct secashead *newsah; struct secasvar *newsav; + ifnet_t ipsec_if = NULL; u_int8_t proto; u_int32_t spi; u_int8_t mode; @@ -6440,6 +6454,8 @@ key_getspi( src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); + ipsec_if = key_get_ipsec_if_from_message(mhp); + /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "key_getspi: invalid satype is passed.\n")); @@ -6481,7 +6497,7 @@ key_getspi( } /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); lck_mtx_lock(sadb_mutex); @@ -6496,7 +6512,7 @@ key_getspi( /* get a SA index */ if ((newsah = key_getsah(&saidx)) == NULL) { /* create a new SA index: key_addspi is always used for inbound spi */ - if ((newsah = key_newsah(&saidx, IPSEC_DIR_INBOUND)) == NULL) { + if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp), IPSEC_DIR_INBOUND)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_getspi: No more memory.\n")); return key_senderror(so, m, ENOBUFS); @@ -6505,7 +6521,7 @@ key_getspi( /* get a new SA */ /* XXX rewrite */ - newsav = key_newsav(m, mhp, newsah, &error); + newsav = key_newsav(m, mhp, newsah, &error, so); if (newsav == NULL) { /* XXX don't free new SA index allocated in above. */ lck_mtx_unlock(sadb_mutex); @@ -6614,7 +6630,7 @@ key_getspi2(struct sockaddr *src, lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src, dst, &saidx); + KEY_SETSECASIDX(proto, mode, reqid, src, dst, 0, &saidx); /* make sure if port number is zero. */ switch (((struct sockaddr *)&saidx.src)->sa_family) { @@ -6755,12 +6771,14 @@ key_update( { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; + ifnet_t ipsec_if = NULL; struct secasindex saidx; struct secashead *sah; struct secasvar *sav; u_int16_t proto; u_int8_t mode; u_int32_t reqid; + u_int16_t flags2; int error; lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); @@ -6800,18 +6818,21 @@ key_update( (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; reqid = ((struct sadb_x_sa2 *) (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; + flags2 = ((struct sadb_x_sa2 *)(void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_flags; } else { mode = IPSEC_MODE_ANY; reqid = 0; + flags2 = 0; } /* XXX boundary checking for other extensions */ sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); + ipsec_if = key_get_ipsec_if_from_message(mhp); /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); lck_mtx_lock(sadb_mutex); @@ -6883,7 +6904,12 @@ key_update( lck_mtx_unlock(sadb_mutex); return key_senderror(so, m, error); } - + + sav->flags2 = flags2; + if (flags2 & SADB_X_EXT_SA2_DELETE_ON_DETACH) { + sav->so = so; + } + /* * Verify if SADB_X_EXT_NATT_MULTIPLEUSERS flag is set that * this SA is for transport mode - otherwise clear it. @@ -6979,6 +7005,7 @@ key_add( { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; + ifnet_t ipsec_if = NULL; struct secasindex saidx; struct secashead *newsah; struct secasvar *newsav; @@ -7033,16 +7060,17 @@ key_add( sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; + ipsec_if = key_get_ipsec_if_from_message(mhp); /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); lck_mtx_lock(sadb_mutex); /* get a SA header */ if ((newsah = key_getsah(&saidx)) == NULL) { /* create a new SA header: key_addspi is always used for outbound spi */ - if ((newsah = key_newsah(&saidx, IPSEC_DIR_OUTBOUND)) == NULL) { + if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp), IPSEC_DIR_OUTBOUND)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_add: No more memory.\n")); return key_senderror(so, m, ENOBUFS); @@ -7064,7 +7092,7 @@ key_add( ipseclog((LOG_DEBUG, "key_add: SA already exists.\n")); return key_senderror(so, m, EEXIST); } - newsav = key_newsav(m, mhp, newsah, &error); + newsav = key_newsav(m, mhp, newsah, &error, so); if (newsav == NULL) { lck_mtx_unlock(sadb_mutex); return key_senderror(so, m, error); @@ -7250,6 +7278,7 @@ key_delete( { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; + ifnet_t ipsec_if = NULL; struct secasindex saidx; struct secashead *sah; struct secasvar *sav = NULL; @@ -7299,9 +7328,10 @@ key_delete( sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); + ipsec_if = key_get_ipsec_if_from_message(mhp); /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); /* get a SA header */ LIST_FOREACH(sah, &sahtree, chain) { @@ -7363,6 +7393,7 @@ key_delete_all( u_int16_t proto) { struct sadb_address *src0, *dst0; + ifnet_t ipsec_if = NULL; struct secasindex saidx; struct secashead *sah; struct secasvar *sav, *nextsav; @@ -7372,9 +7403,10 @@ key_delete_all( src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); + ipsec_if = key_get_ipsec_if_from_message(mhp); /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); LIST_FOREACH(sah, &sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) @@ -7453,6 +7485,7 @@ key_get( { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; + ifnet_t ipsec_if = NULL; struct secasindex saidx; struct secashead *sah; struct secasvar *sav = NULL; @@ -7486,9 +7519,10 @@ key_get( sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; + ipsec_if = key_get_ipsec_if_from_message(mhp); /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); lck_mtx_lock(sadb_mutex); @@ -8200,6 +8234,7 @@ key_acquire2( const struct sadb_msghdr *mhp) { const struct sadb_address *src0, *dst0; + ifnet_t ipsec_if = NULL; struct secasindex saidx; struct secashead *sah; u_int16_t proto; @@ -8281,10 +8316,11 @@ key_acquire2( src0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; + ipsec_if = key_get_ipsec_if_from_message(mhp); /* XXX boundary check against sa_len */ /* cast warnings */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx); /* get a SA index */ LIST_FOREACH(sah, &sahtree, chain) { @@ -8489,6 +8525,32 @@ setmsg: } } +static void +key_delete_all_for_socket (struct socket *so) +{ + struct secashead *sah, *nextsah; + struct secasvar *sav, *nextsav; + u_int stateidx; + u_int state; + + for (sah = LIST_FIRST(&sahtree); + sah != NULL; + sah = nextsah) { + nextsah = LIST_NEXT(sah, chain); + for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); stateidx++) { + state = saorder_state_any[stateidx]; + for (sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) { + nextsav = LIST_NEXT(sav, chain); + if (sav->flags2 & SADB_X_EXT_SA2_DELETE_ON_DETACH && + sav->so == so) { + key_sa_chgstate(sav, SADB_SASTATE_DEAD); + key_freesav(sav, KEY_SADB_LOCKED); + } + } + } + } +} + /* * free secreg entry registered. * XXX: I want to do free a socket marked done SADB_RESIGER to socket. @@ -8510,6 +8572,7 @@ key_freereg( * one socket is registered to multiple type of SA. */ lck_mtx_lock(sadb_mutex); + key_delete_all_for_socket(so); for (i = 0; i <= SADB_SATYPE_MAX; i++) { LIST_FOREACH(reg, ®tree[i], chain) { if (reg->so == so @@ -8573,7 +8636,8 @@ key_expire( /* create SA extension */ m = key_setsadbxsa2(sav->sah->saidx.mode, sav->replay ? sav->replay->count : 0, - sav->sah->saidx.reqid); + sav->sah->saidx.reqid, + sav->flags2); if (!m) { error = ENOBUFS; goto fail; @@ -9885,6 +9949,10 @@ fail: void key_delsp_for_ipsec_if (ifnet_t ipsec_if) { + struct secashead *sah; + struct secasvar *sav, *nextsav; + u_int stateidx; + u_int state; struct secpolicy *sp, *nextsp; int dir; @@ -9914,6 +9982,58 @@ key_delsp_for_ipsec_if (ifnet_t ipsec_if) } } + LIST_FOREACH(sah, &sahtree, chain) { + if (sah->ipsec_if == ipsec_if) { + /* This SAH is linked to the IPSec interface. It now needs to close. */ + ifnet_release(sah->ipsec_if); + sah->ipsec_if = NULL; + + for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); stateidx++) { + state = saorder_state_any[stateidx]; + for (sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) { + nextsav = LIST_NEXT(sav, chain); + + key_sa_chgstate(sav, SADB_SASTATE_DEAD); + key_freesav(sav, KEY_SADB_LOCKED); + } + } + + sah->state = SADB_SASTATE_DEAD; + } + } + lck_mtx_unlock(sadb_mutex); - +} + +__private_extern__ u_int32_t +key_fill_offload_frames_for_savs (ifnet_t ifp, + struct ipsec_offload_frame *frames_array, + u_int32_t frames_array_count, + size_t frame_data_offset) +{ + struct secashead *sah = NULL; + struct secasvar *sav = NULL; + struct ipsec_offload_frame *frame = frames_array; + u_int32_t frame_index = 0; + + if (frame == NULL || frames_array_count == 0) { + return (frame_index); + } + + lck_mtx_lock(sadb_mutex); + LIST_FOREACH(sah, &sahtree, chain) { + LIST_FOREACH(sav, &sah->savtree[SADB_SASTATE_MATURE], chain) { + if (ipsec_fill_offload_frame(ifp, sav, frame, frame_data_offset)) { + frame_index++; + if (frame_index >= frames_array_count) { + lck_mtx_unlock(sadb_mutex); + return (frame_index); + } + frame = &(frames_array[frame_index]); + } + } + } + lck_mtx_unlock(sadb_mutex); + + return (frame_index); } diff --git a/bsd/netkey/key.h b/bsd/netkey/key.h index ad5255a92..82c97c639 100644 --- a/bsd/netkey/key.h +++ b/bsd/netkey/key.h @@ -57,13 +57,13 @@ extern struct secpolicy *key_allocsp(struct secpolicyindex *, u_int); extern struct secasvar *key_allocsa_policy(struct secasindex *); extern struct secpolicy *key_gettunnel(struct sockaddr *, struct sockaddr *, struct sockaddr *, struct sockaddr *); +extern struct secasvar *key_alloc_outbound_sav_for_interface(ifnet_t, int); extern int key_checkrequest(struct ipsecrequest *isr, struct secasindex *, struct secasvar **sav); extern struct secasvar *key_allocsa(u_int, caddr_t, caddr_t, u_int, u_int32_t); extern u_int16_t key_natt_get_translated_port(struct secasvar *); extern void key_freesp(struct secpolicy *, int); -extern void key_freeso(struct socket *); extern void key_freesav(struct secasvar *, int); extern struct secpolicy *key_newsp(void); extern struct secpolicy *key_msg2sp(struct sadb_x_policy *, size_t, int *); diff --git a/bsd/netkey/keydb.h b/bsd/netkey/keydb.h index dc1563a60..c2e463073 100644 --- a/bsd/netkey/keydb.h +++ b/bsd/netkey/keydb.h @@ -38,7 +38,7 @@ #include #include -/* Security Assocciation Index */ +/* Security Association Index */ /* NOTE: Ensure to be same address family */ struct secasindex { struct sockaddr_storage src; /* srouce address for SA */ @@ -47,6 +47,7 @@ struct secasindex { u_int8_t mode; /* mode of protocol, see ipsec.h */ u_int32_t reqid; /* reqid id who owned this SA */ /* see IPSEC_MANUAL_REQID_MAX. */ + u_int ipsec_ifindex; }; /* Security Association Data Base */ @@ -59,6 +60,8 @@ struct secashead { struct sadb_ident *identd; /* destination identity */ /* XXX I don't know how to use them. */ + ifnet_t ipsec_if; + u_int outgoing_if; u_int8_t dir; /* IPSEC_DIR_INBOUND or IPSEC_DIR_OUTBOUND */ u_int8_t state; /* MATURE or DEAD. */ LIST_HEAD(_satree, secasvar) savtree[SADB_SASTATE_MAX+1]; @@ -82,6 +85,7 @@ struct secasvar { u_int8_t alg_enc; /* Cipher Algorithm Identifier */ u_int32_t spi; /* SPI Value, network byte order */ u_int32_t flags; /* holder for SADB_KEY_FLAGS */ + u_int16_t flags2; /* holder for SADB_SA2_KEY_FLAGS */ struct sadb_key *key_auth; /* Key for Authentication */ struct sadb_key *key_enc; /* Key for Encryption */ @@ -97,6 +101,8 @@ struct secasvar { struct sadb_lifetime *lft_h; /* HARD lifetime */ struct sadb_lifetime *lft_s; /* SOFT lifetime */ + struct socket *so; /* Associated socket */ + u_int32_t seq; /* sequence number */ pid_t pid; /* message's pid */ @@ -106,6 +112,7 @@ struct secasvar { u_int32_t natt_last_activity; u_int16_t remote_ike_port; u_int16_t natt_encapsulated_src_port; /* network byte order */ + u_int16_t natt_interval; /* Interval in seconds */ u_int8_t always_expire; /* Send expire/delete messages even if unused */ diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h index 45d3ade4a..370ddaa83 100644 --- a/bsd/nfs/nfs.h +++ b/bsd/nfs/nfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -744,7 +744,19 @@ struct nfsstats { */ #define NFSCLNT_LOCKDANS 0x200 #define NFSCLNT_LOCKDNOTIFY 0x400 - +#define NFSCLNT_TESTIDMAP 0x001 + +#include /* for guid_t below */ +#define MAXIDNAMELEN 1024 +struct nfs_testmapid { + uint32_t ntm_name2id; /* lookup name 2 id or id 2 name */ + uint32_t ntm_grpflag; /* Is this a group or user maping */ + uint32_t ntm_id; /* id to map or return */ + uint32_t pad; + guid_t ntm_guid; /* intermidiate guid used in conversion */ + char ntm_name[MAXIDNAMELEN]; /* name to map or return */ +}; + /* * fs.nfs sysctl(3) identifiers */ @@ -917,7 +929,10 @@ extern lck_grp_t *nfs_request_grp; #define R_XID32(x) ((x) & 0xffffffff) -#define NFSREQNOLIST ((struct nfsreq *)0xdeadbeef) /* sentinel value for nfsreq lists */ +#define NFSNOLIST ((void *)0x0badcafe) /* sentinel value for nfs lists */ +#define NFSREQNOLIST NFSNOLIST /* sentinel value for nfsreq lists */ +#define NFSIODCOMPLETING ((void *)0x10d) /* sentinel value for iod processing + async I/O w/callback being completed */ /* Flag values for r_flags */ #define R_TIMING 0x00000001 /* timing request (in mntp) */ @@ -936,6 +951,7 @@ extern lck_grp_t *nfs_request_grp; #define R_ASYNCWAIT 0x00002000 /* async request now being waited on */ #define R_RESENDQ 0x00004000 /* async request currently on resendq */ #define R_SENDING 0x00008000 /* request currently being sent */ +#define R_SOFT 0x00010000 /* request is soft - don't retry or reconnect */ #define R_NOINTR 0x20000000 /* request should not be interupted by a signal */ #define R_RECOVER 0x40000000 /* a state recovery RPC - during NFSSTA_RECOVER */ @@ -954,7 +970,7 @@ extern int nfs_lockd_mounts, nfs_lockd_request_sent, nfs_single_des; extern int nfs_tprintf_initial_delay, nfs_tprintf_delay; extern int nfsiod_thread_count, nfsiod_thread_max, nfs_max_async_writes; extern int nfs_idmap_ctrl, nfs_callback_port; -extern int nfs_is_mobile; +extern int nfs_is_mobile, nfs_readlink_nocache; extern uint32_t nfs_squishy_flags; extern uint32_t nfs_debug_ctl; @@ -1143,7 +1159,12 @@ int nfs_connect(struct nfsmount *, int, int); void nfs_disconnect(struct nfsmount *); void nfs_need_reconnect(struct nfsmount *); void nfs_mount_sock_thread_wake(struct nfsmount *); -void nfs_mount_check_dead_timeout(struct nfsmount *); +int nfs_mount_check_dead_timeout(struct nfsmount *); +int nfs_mount_gone(struct nfsmount *); +void nfs_mount_rele(struct nfsmount *); +void nfs_mount_zombie(struct nfsmount *, int); +void nfs_mount_make_zombie(struct nfsmount *); + void nfs_rpc_record_state_init(struct nfs_rpc_record_state *); void nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *); int nfs_rpc_record_read(socket_t, struct nfs_rpc_record_state *, int, int *, mbuf_t *); @@ -1329,8 +1350,8 @@ int nfs_read_rpc(nfsnode_t, uio_t, vfs_context_t); int nfs_write_rpc(nfsnode_t, uio_t, vfs_context_t, int *, uint64_t *); int nfs_write_rpc2(nfsnode_t, uio_t, thread_t, kauth_cred_t, int *, uint64_t *); -int nfs3_access_rpc(nfsnode_t, u_int32_t *, vfs_context_t); -int nfs4_access_rpc(nfsnode_t, u_int32_t *, vfs_context_t); +int nfs3_access_rpc(nfsnode_t, u_int32_t *, int, vfs_context_t); +int nfs4_access_rpc(nfsnode_t, u_int32_t *, int, vfs_context_t); int nfs3_getattr_rpc(nfsnode_t, mount_t, u_char *, size_t, int, vfs_context_t, struct nfs_vattr *, u_int64_t *); int nfs4_getattr_rpc(nfsnode_t, mount_t, u_char *, size_t, int, vfs_context_t, struct nfs_vattr *, u_int64_t *); int nfs3_setattr_rpc(nfsnode_t, struct vnode_attr *, vfs_context_t); @@ -1427,8 +1448,9 @@ int nfsrv_symlink(struct nfsrv_descript *, struct nfsrv_sock *, vfs_context_t, m int nfsrv_write(struct nfsrv_descript *, struct nfsrv_sock *, vfs_context_t, mbuf_t *); void nfs_interval_timer_start(thread_call_t, int); +int nfs_use_cache(struct nfsmount *); void nfs_up(struct nfsmount *, thread_t, int, const char *); -void nfs_down(struct nfsmount *, thread_t, int, int, const char *); +void nfs_down(struct nfsmount *, thread_t, int, int, const char *, int); int nfs_msg(thread_t, const char *, const char *, int); int nfs_mountroot(void); @@ -1462,11 +1484,13 @@ void nfsrv_uc_dequeue(struct nfsrv_sock *); #define NFS_FAC_VNOP 0x08 #define NFS_FAC_BIO 0x10 #define NFS_FAC_GSS 0x20 +#define NFS_FAC_VFS 0x40 #define NFS_DBG(fac, lev, fmt, ...) \ if (__builtin_expect(NFS_DEBUG_LEVEL, 0)) nfs_printf(fac, lev, "%s: %d: " fmt, __func__, __LINE__, ## __VA_ARGS__) void nfs_printf(int, int, const char *, ...) __printflike(3,4); +int nfs_mountopts(struct nfsmount *, char *, int); __END_DECLS diff --git a/bsd/nfs/nfs4_subs.c b/bsd/nfs/nfs4_subs.c index 50c657b94..376ae3443 100644 --- a/bsd/nfs/nfs4_subs.c +++ b/bsd/nfs/nfs4_subs.c @@ -459,7 +459,7 @@ nfs4_secinfo_rpc(struct nfsmount *nmp, struct nfsreq_secinfo_args *siap, kauth_c struct nfsm_chain nmreq, nmrep; *seccountp = 0; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; np = siap->rsia_np; @@ -2458,7 +2458,7 @@ restart: break; if (!(nmp->nm_sockflags & NMSOCK_READY)) error = EPIPE; - if (nmp->nm_state & NFSSTA_FORCE) + if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) error = ENXIO; if (nmp->nm_sockflags & NMSOCK_UNMOUNT) error = ENXIO; diff --git a/bsd/nfs/nfs4_vnops.c b/bsd/nfs/nfs4_vnops.c index 85ebceb47..2682d94be 100644 --- a/bsd/nfs/nfs4_vnops.c +++ b/bsd/nfs/nfs4_vnops.c @@ -80,7 +80,7 @@ #include int -nfs4_access_rpc(nfsnode_t np, u_int32_t *access, vfs_context_t ctx) +nfs4_access_rpc(nfsnode_t np, u_int32_t *access, int rpcflags, vfs_context_t ctx) { int error = 0, lockerror = ENOENT, status, numops, slot; u_int64_t xid; @@ -115,7 +115,9 @@ nfs4_access_rpc(nfsnode_t np, u_int32_t *access, vfs_context_t ctx) nfsm_chain_build_done(error, &nmreq); nfsm_assert(error, (numops == 0), EPROTO); nfsmout_if(error); - error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status); + error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, + vfs_context_thread(ctx), vfs_context_ucred(ctx), + &si, rpcflags, &nmrep, &xid, &status); if ((lockerror = nfs_node_lock(np))) error = lockerror; @@ -189,7 +191,7 @@ nfs4_getattr_rpc( struct nfsm_chain nmreq, nmrep; struct nfsreq_secinfo_args si; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; acls = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL); @@ -202,6 +204,9 @@ nfs4_getattr_rpc( if (flags & NGA_MONITOR) /* vnode monitor requests should be soft */ rpcflags = R_RECOVER; + if (flags & NGA_SOFT) /* Return ETIMEDOUT if server not responding */ + rpcflags |= R_SOFT; + NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -255,7 +260,7 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx) struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) return (EINVAL); @@ -324,7 +329,7 @@ nfs4_read_rpc_async( struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) @@ -373,7 +378,7 @@ nfs4_read_rpc_async_finish( struct nfsm_chain nmrep; nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { nfs_request_async_cancel(req); return (ENXIO); } @@ -431,7 +436,7 @@ nfs4_write_rpc_async( struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) @@ -491,7 +496,7 @@ nfs4_write_rpc_async_finish( struct nfsm_chain nmrep; nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { nfs_request_async_cancel(req); return (ENXIO); } @@ -503,7 +508,7 @@ nfs4_write_rpc_async_finish( if (error == EINPROGRESS) /* async request restarted */ return (error); nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) error = ENXIO; if (!error && (lockerror = nfs_node_lock(np))) error = lockerror; @@ -560,7 +565,7 @@ nfs4_remove_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) @@ -634,7 +639,7 @@ nfs4_rename_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(fdnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if (fdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) @@ -733,7 +738,7 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx) struct nfsreq_secinfo_args si; nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; nmreaddirsize = nmp->nm_readdirsize; @@ -1081,7 +1086,7 @@ nfs4_lookup_rpc_async( struct nfsreq_secinfo_args si; nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) @@ -1230,7 +1235,7 @@ nfs4_commit_rpc( nmp = NFSTONMP(np); FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) return (EINVAL); @@ -1305,7 +1310,7 @@ nfs4_pathconf_rpc( struct nfs_vattr nvattr; struct nfsreq_secinfo_args si; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) @@ -1373,7 +1378,8 @@ nfs4_vnop_getattr( struct nfs_vattr nva; int error, acls, ngaflags; - if (!(nmp = VTONMP(ap->a_vp))) + nmp = VTONMP(ap->a_vp); + if (nfs_mount_gone(nmp)) return (ENXIO); acls = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL); @@ -1484,7 +1490,7 @@ nfs4_setattr_rpc( nfs_stateid stateid; struct nfsreq_secinfo_args si; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) @@ -1647,7 +1653,7 @@ nfs_mount_state_in_use_start(struct nfsmount *nmp, thread_t thd) struct timespec ts = { 1, 0 }; int error = 0, slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); lck_mtx_lock(&nmp->nm_lock); if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) { @@ -1678,7 +1684,7 @@ nfs_mount_state_in_use_end(struct nfsmount *nmp, int error) { int restart = nfs_mount_state_error_should_restart(error); - if (!nmp) + if (nfs_mount_gone(nmp)) return (restart); lck_mtx_lock(&nmp->nm_lock); if (restart && (error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE)) { @@ -1759,7 +1765,7 @@ nfs_open_state_set_busy(nfsnode_t np, thread_t thd) int error = 0, slpflag; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0; @@ -1902,7 +1908,7 @@ nfs_open_owner_set_busy(struct nfs_open_owner *noop, thread_t thd) int error = 0, slpflag; nmp = noop->noo_mount; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0; @@ -2083,7 +2089,7 @@ nfs_open_file_set_busy(struct nfs_open_file *nofp, thread_t thd) int error = 0, slpflag; nmp = nofp->nof_owner->noo_mount; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0; @@ -2669,7 +2675,7 @@ nfs_vnop_mmap( struct nfs_open_file *nofp = NULL; nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!vnode_isreg(vp) || !(ap->a_fflags & (PROT_READ|PROT_WRITE))) @@ -2872,7 +2878,7 @@ nfs_vnop_mnomap( int is_mapped_flag = 0; nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfs_node_lock_force(np); @@ -3055,7 +3061,7 @@ nfs_lock_owner_set_busy(struct nfs_lock_owner *nlop, thread_t thd) int error = 0, slpflag; nmp = nlop->nlo_open_owner->noo_mount; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0; @@ -3215,7 +3221,7 @@ nfs4_setlock_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) return (EINVAL); @@ -3338,7 +3344,7 @@ nfs4_unlock_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) return (EINVAL); @@ -3415,7 +3421,7 @@ nfs4_getlock_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) return (EINVAL); @@ -3499,7 +3505,7 @@ nfs_advlock_getlock( int error = 0, answered = 0; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); restart: @@ -3575,7 +3581,7 @@ nfs_advlock_setlock( struct timespec ts = {1, 0}; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0; @@ -3987,7 +3993,7 @@ nfs_advlock_unlock( int error = 0, willsplit = 0, send_unlock_rpcs = 1; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); restart: @@ -4242,7 +4248,7 @@ nfs_vnop_advlock( #define OFF_MAX QUAD_MAX nmp = VTONMP(ap->a_vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); lck_mtx_lock(&nmp->nm_lock); if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED)) { @@ -4653,7 +4659,7 @@ nfs4_open_rpc_internal( return (EINVAL); nmp = VTONMP(dvp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR); @@ -4990,7 +4996,7 @@ nfs4_claim_delegated_open_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -5242,7 +5248,7 @@ nfs4_open_reclaim_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -5429,7 +5435,7 @@ nfs4_open_downgrade_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -5499,7 +5505,7 @@ nfs4_close_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -5874,7 +5880,7 @@ nfs4_delegation_return_enqueue(nfsnode_t np) struct nfsmount *nmp; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return; lck_mtx_lock(&np->n_openlock); @@ -5900,7 +5906,7 @@ nfs4_delegation_return(nfsnode_t np, int flags, thread_t thd, kauth_cred_t cred) int error; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); /* first, make sure the node's marked for delegation return */ @@ -6035,7 +6041,7 @@ nfs_vnop_read( np = VTONFS(vp); nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (np->n_flag & NREVOKE) return (EIO); @@ -6126,7 +6132,7 @@ nfs4_vnop_create( struct nfs_open_file *newnofp = NULL, *nofp = NULL; nmp = VTONMP(dvp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (vap) @@ -6296,7 +6302,7 @@ nfs4_create_rpc( struct nfsreq_secinfo_args si; nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR); @@ -6483,7 +6489,7 @@ nfs4_vnop_mknod( int error; nmp = VTONMP(ap->a_dvp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!VATTR_IS_ACTIVE(ap->a_vap, va_type)) @@ -6575,7 +6581,7 @@ nfs4_vnop_link( return (EXDEV); nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) @@ -6691,7 +6697,7 @@ nfs4_vnop_rmdir( return (EINVAL); nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR); @@ -6775,7 +6781,7 @@ nfs4_named_attr_dir_get(nfsnode_t np, int fetch, vfs_context_t ctx) struct nfsreq_secinfo_args si; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (NULL); if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) return (NULL); @@ -6943,7 +6949,7 @@ nfs4_named_attr_get( slen = sizeof(sbuf); nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); NVATTR_INIT(&nvattr); negnamecache = !NMFLAG(nmp, NONEGNAMECACHE); @@ -7628,7 +7634,7 @@ nfs4_named_attr_remove(nfsnode_t np, nfsnode_t anp, const char *name, vfs_contex int error, putanp = 0; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); bzero(&cn, sizeof(cn)); @@ -7696,7 +7702,7 @@ nfs4_vnop_getxattr( int error = 0, isrsrcfork; nmp = VTONMP(ap->a_vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)) @@ -7760,7 +7766,7 @@ nfs4_vnop_setxattr( struct vnop_write_args vwa; nmp = VTONMP(ap->a_vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)) @@ -7898,7 +7904,7 @@ nfs4_vnop_removexattr( struct nfsmount *nmp = VTONMP(ap->a_vp); int error; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)) return (ENOTSUP); @@ -7933,7 +7939,7 @@ nfs4_vnop_listxattr( struct direntry *dp; nmp = VTONMP(ap->a_vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)) @@ -8079,7 +8085,7 @@ nfs4_vnop_getnamedstream( int error = 0; nmp = VTONMP(ap->a_vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)) @@ -8126,7 +8132,7 @@ nfs4_vnop_makenamedstream( int error = 0; nmp = VTONMP(ap->a_vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)) @@ -8164,7 +8170,7 @@ nfs4_vnop_removenamedstream( nfsnode_t np = ap->a_vp ? VTONFS(ap->a_vp) : NULL; nfsnode_t anp = ap->a_svp ? VTONFS(ap->a_svp) : NULL; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); /* diff --git a/bsd/nfs/nfs_bio.c b/bsd/nfs/nfs_bio.c index b9d22e786..a58e5d866 100644 --- a/bsd/nfs/nfs_bio.c +++ b/bsd/nfs/nfs_bio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -346,7 +346,7 @@ nfs_buf_page_inval(vnode_t vp, off_t offset) struct nfsbuf *bp; int error = 0; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); lck_mtx_lock(nfs_buf_mutex); @@ -658,7 +658,7 @@ nfs_buf_get( if (bufsize > NFS_MAXBSIZE) panic("nfs_buf_get: buffer larger than NFS_MAXBSIZE requested"); - if (!nmp) { + if (nfs_mount_gone(nmp)) { FSDBG_BOT(541, np, blkno, 0, ENXIO); return (ENXIO); } @@ -1291,7 +1291,7 @@ nfs_buf_check_write_verifier(nfsnode_t np, struct nfsbuf *bp) return; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return; if (!ISSET(bp->nb_flags, NB_STALEWVERF) && (bp->nb_verf == nmp->nm_verf)) return; @@ -1546,7 +1546,7 @@ nfs_buf_read_rpc(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred) struct nfsreq_cbinfo cb; nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { bp->nb_error = error = ENXIO; SET(bp->nb_flags, NB_ERROR); nfs_buf_iodone(bp); @@ -1669,7 +1669,7 @@ finish: nfs_request_ref(req, 0); nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { SET(bp->nb_flags, NB_ERROR); bp->nb_error = error = ENXIO; } @@ -1842,7 +1842,7 @@ nfs_buf_readahead(nfsnode_t np, int ioflag, daddr64_t *rabnp, daddr64_t lastrabn int error = 0; uint32_t nra; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (nmp->nm_readahead <= 0) return (0); @@ -2327,17 +2327,26 @@ nfs_buf_write(struct nfsbuf *bp) thd = async ? NULL : current_thread(); /* We need to make sure the pages are locked before doing I/O. */ - if (!ISSET(bp->nb_flags, NB_META) && UBCINFOEXISTS(NFSTOV(np))) { - if (!ISSET(bp->nb_flags, NB_PAGELIST)) { - error = nfs_buf_upl_setup(bp); - if (error) { - printf("nfs_buf_write: upl create failed %d\n", error); - SET(bp->nb_flags, NB_ERROR); - bp->nb_error = error = EIO; - nfs_buf_iodone(bp); - goto out; + if (!ISSET(bp->nb_flags, NB_META)) { + if (UBCINFOEXISTS(NFSTOV(np))) { + if (!ISSET(bp->nb_flags, NB_PAGELIST)) { + error = nfs_buf_upl_setup(bp); + if (error) { + printf("nfs_buf_write: upl create failed %d\n", error); + SET(bp->nb_flags, NB_ERROR); + bp->nb_error = error = EIO; + nfs_buf_iodone(bp); + goto out; + } + nfs_buf_upl_check(bp); } - nfs_buf_upl_check(bp); + } else { + /* We should never be in nfs_buf_write() with no UBCINFO. */ + printf("nfs_buf_write: ubcinfo already gone\n"); + SET(bp->nb_flags, NB_ERROR); + bp->nb_error = error = EIO; + nfs_buf_iodone(bp); + goto out; } } @@ -2346,7 +2355,7 @@ nfs_buf_write(struct nfsbuf *bp) nfs_buf_check_write_verifier(np, bp); if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { struct nfsmount *nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { SET(bp->nb_flags, NB_ERROR); bp->nb_error = error = EIO; nfs_buf_iodone(bp); @@ -2696,7 +2705,7 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred char uio_buf [ UIO_SIZEOF(1) ]; nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { bp->nb_error = error = ENXIO; SET(bp->nb_flags, NB_ERROR); nfs_buf_iodone(bp); @@ -2825,7 +2834,7 @@ finish: nfs_request_ref(req, 0); nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { SET(bp->nb_flags, NB_ERROR); bp->nb_error = error = ENXIO; } @@ -3037,7 +3046,7 @@ nfs_flushcommits(nfsnode_t np, int nowait) LIST_INIT(&commitlist); nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { error = ENXIO; goto done; } @@ -3251,7 +3260,7 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr) FSDBG_TOP(517, np, waitfor, ignore_writeerr, 0); - if (!nmp) { + if (nfs_mount_gone(nmp)) { error = ENXIO; goto out; } @@ -3620,11 +3629,21 @@ nfs_vinvalbuf2(vnode_t vp, int flags, thread_t thd, kauth_cred_t cred, int intrf nfsnode_t np = VTONFS(vp); struct nfsmount *nmp = VTONMP(vp); int error, slpflag, slptimeo, nflags, retry = 0; + int ubcflags = UBC_PUSHALL | UBC_SYNC | UBC_INVALIDATE; struct timespec ts = { 2, 0 }; off_t size; FSDBG_TOP(554, np, flags, intrflg, 0); + /* + * If the mount is gone no sense to try and write anything. + * and hang trying to do IO. + */ + if (nfs_mount_gone(nmp)) { + flags &= ~V_SAVE; + ubcflags &= ~UBC_PUSHALL; + } + if (nmp && !NMFLAG(nmp, INTR)) intrflg = 0; if (intrflg) { @@ -3662,14 +3681,16 @@ again: /* get the pages out of vm also */ if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp))) - if ((error = ubc_msync(vp, 0, size, NULL, UBC_PUSHALL | UBC_SYNC | UBC_INVALIDATE))) { + if ((error = ubc_msync(vp, 0, size, NULL, ubcflags))) { if (error == EINVAL) panic("nfs_vinvalbuf(): ubc_msync failed!, error %d", error); - if (retry++ < 10) /* retry invalidating a few times */ + if (retry++ < 10) { /* retry invalidating a few times */ + if (retry > 1 || error == ENXIO) + ubcflags &= ~UBC_PUSHALL; goto again; + } /* give up */ - printf("nfs_vinvalbuf(): ubc_msync failed!, error %d", error); - + printf("nfs_vinvalbuf(): ubc_msync failed!, error %d\n", error); } done: lck_mtx_lock(nfs_buf_mutex); @@ -3747,8 +3768,11 @@ nfs_asyncio_finish(struct nfsreq *req) FSDBG_TOP(552, nmp, 0, 0, 0); again: - if (((nmp = req->r_nmp)) == NULL) + nmp = req->r_nmp; + + if (nmp == NULL) return; + lck_mtx_lock(nfsiod_mutex); niod = nmp->nm_niod; @@ -3783,8 +3807,9 @@ again: lck_mtx_unlock(nfsiod_mutex); wakeup(niod); } else if (nfsiod_thread_count > 0) { - /* just queue it up on nfsiod mounts queue */ - TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); + /* just queue it up on nfsiod mounts queue if needed */ + if (nmp->nm_iodlink.tqe_next == NFSNOLIST) + TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); lck_mtx_unlock(nfsiod_mutex); } else { printf("nfs_asyncio(): no nfsiods? %d %d (%d)\n", nfsiod_thread_count, NFSIOD_MAX, started); @@ -3808,7 +3833,7 @@ nfs_asyncio_resend(struct nfsreq *req) { struct nfsmount *nmp = req->r_nmp; - if (!nmp) + if (nfs_mount_gone(nmp)) return; nfs_gss_clnt_rpcdone(req); lck_mtx_lock(&nmp->nm_lock); @@ -3833,7 +3858,7 @@ nfs_buf_readdir(struct nfsbuf *bp, vfs_context_t ctx) struct nfsmount *nmp = NFSTONMP(np); int error = 0; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (nmp->nm_vers < NFS_VER4) diff --git a/bsd/nfs/nfs_gss.c b/bsd/nfs/nfs_gss.c index f1dd81b5f..9f98a9a50 100644 --- a/bsd/nfs/nfs_gss.c +++ b/bsd/nfs/nfs_gss.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Apple Inc. All rights reserved. + * Copyright (c) 2007-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -87,6 +87,7 @@ #include #include +#include #include #include @@ -186,13 +187,15 @@ static u_char iv0[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; // DES #if NFSCLIENT static int nfs_gss_clnt_ctx_find(struct nfsreq *); -static int nfs_gss_clnt_ctx_failover(struct nfsreq *); static int nfs_gss_clnt_ctx_init(struct nfsreq *, struct nfs_gss_clnt_ctx *); static int nfs_gss_clnt_ctx_init_retry(struct nfsreq *, struct nfs_gss_clnt_ctx *); static int nfs_gss_clnt_ctx_callserver(struct nfsreq *, struct nfs_gss_clnt_ctx *); static uint8_t *nfs_gss_clnt_svcname(struct nfsmount *, gssd_nametype *, uint32_t *); static int nfs_gss_clnt_gssd_upcall(struct nfsreq *, struct nfs_gss_clnt_ctx *); -static void nfs_gss_clnt_ctx_remove(struct nfsmount *, struct nfs_gss_clnt_ctx *); +void nfs_gss_clnt_ctx_neg_cache_enter(struct nfs_gss_clnt_ctx *, struct nfsmount *); +static void nfs_gss_clnt_ctx_clean(struct nfs_gss_clnt_ctx *); +static void nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *); +static void nfs_gss_clnt_log_error(struct nfsreq *, struct nfs_gss_clnt_ctx *, uint32_t, uint32_t); #endif /* NFSCLIENT */ #if NFSSERVER @@ -257,25 +260,6 @@ nfs_gss_init(void) #if NFSCLIENT -/* - * Is it OK to fall back to using AUTH_SYS? - */ -static int -nfs_gss_sysok(struct nfsreq *req) -{ - struct nfsmount *nmp = req->r_nmp; - int i; - - if (req->r_wrongsec) /* Not OK if we're trying to handle a wrongsec error */ - return (0); - if (!nmp->nm_sec.count) /* assume it's OK if we don't have a set of flavors */ - return (1); - for (i=0; i < nmp->nm_sec.count; i++) - if (nmp->nm_sec.flavors[i] == RPCAUTH_SYS) - return (1); - return (0); -} - /* * Find the context for a particular user. * @@ -292,6 +276,43 @@ nfs_gss_sysok(struct nfsreq *req) #define kauth_cred_getasid(cred) ((cred)->cr_audit.as_aia_p->ai_asid) #define kauth_cred_getauid(cred) ((cred)->cr_audit.as_aia_p->ai_auid) +/* + * Debugging + */ +static void +nfs_gss_clnt_ctx_dump(struct nfsmount *nmp) +{ + struct nfs_gss_clnt_ctx *cp; + + lck_mtx_lock(&nmp->nm_lock); + NFS_GSS_DBG("Enter"); + TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { + lck_mtx_lock(cp->gss_clnt_mtx); + printf("context %d/%d: refcnt = %d, flags = %x\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getauid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt, cp->gss_clnt_flags); + lck_mtx_unlock(cp->gss_clnt_mtx); + } + + TAILQ_FOREACH(cp, &nmp->nm_gssnccl, gss_clnt_entries) { + lck_mtx_lock(cp->gss_clnt_mtx); + printf("context %d/%d: refcnt = %d, flags = %x\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getauid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt, cp->gss_clnt_flags); + lck_mtx_unlock(cp->gss_clnt_mtx); + } + NFS_GSS_DBG("Exit"); + lck_mtx_unlock(&nmp->nm_lock); +} + +#define NFS_GSS_CLNT_CTX_DUMP(nmp) \ + do { \ + if (NFS_GSS_ISDBG && (NFS_DEBUG_FLAGS & 0x2)) \ + nfs_gss_clnt_ctx_dump((nmp)); \ + } while (0) + static int nfs_gss_clnt_ctx_cred_match(kauth_cred_t cred1, kauth_cred_t cred2) { @@ -307,16 +328,43 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) struct nfsmount *nmp = req->r_nmp; struct nfs_gss_clnt_ctx *cp; int error = 0; - + struct timeval now; + + microuptime(&now); lck_mtx_lock(&nmp->nm_lock); TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { + lck_mtx_lock(cp->gss_clnt_mtx); + if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { + NFS_GSS_DBG("Found destroyed context %d/%d. refcnt = %d continuing\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getauid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt); + lck_mtx_unlock(cp->gss_clnt_mtx); + continue; + } if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, req->r_cred)) { - if (cp->gss_clnt_flags & GSS_CTX_INVAL) - continue; - nfs_gss_clnt_ctx_ref(req, cp); + if (nmp->nm_gsscl.tqh_first != cp) { + TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); + TAILQ_INSERT_HEAD(&nmp->nm_gsscl, cp, gss_clnt_entries); + } + if (cp->gss_clnt_flags & GSS_CTX_INVAL) { + /* + * We haven't been moved to the neg cache list + * but we're about to be, finding an entry on + * the negative cache list will result in an + * NFSERR_EAUTH for GSS_NEG_CACHE_TO so we just + * return that now. + */ + lck_mtx_unlock(cp->gss_clnt_mtx); + lck_mtx_unlock(&nmp->nm_lock); + return (NFSERR_EAUTH); + } + lck_mtx_unlock(cp->gss_clnt_mtx); lck_mtx_unlock(&nmp->nm_lock); + nfs_gss_clnt_ctx_ref(req, cp); return (0); } + lck_mtx_unlock(cp->gss_clnt_mtx); } if (kauth_cred_getuid(req->r_cred) == 0) { @@ -328,103 +376,99 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) * in case one is set up for it. */ TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { - if (!(cp->gss_clnt_flags & GSS_CTX_INVAL)) { + if (!(cp->gss_clnt_flags & (GSS_CTX_INVAL|GSS_CTX_DESTROY))) { nfs_gss_clnt_ctx_ref(req, cp); lck_mtx_unlock(&nmp->nm_lock); + NFS_GSS_DBG("Root stole context %d/%d\n", + kauth_cred_getasid(cp->gss_clnt_cred), kauth_cred_getauid(cp->gss_clnt_cred)); return (0); } } } /* - * Not found - create a new context + * Check negative context cache + * If found and the cache has not expired + * return NFSERR_EAUTH, else remove + * from the cache and try to create a new context */ + TAILQ_FOREACH(cp, &nmp->nm_gssnccl, gss_clnt_entries) { + lck_mtx_lock(cp->gss_clnt_mtx); + if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { + NFS_GSS_DBG("Found destroyed context %d/%d. refcnt = %d continuing\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getauid(cp->gss_clnt_cred), cp->gss_clnt_refcnt); + lck_mtx_unlock(cp->gss_clnt_mtx); + continue; + } + if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, req->r_cred)) { + /* + * If we're still being used and invalid or we're not expired + * just return and don't bother gssd again. + */ + if (cp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec) { + NFS_GSS_DBG("Context %d/%d (refcnt = %d) not expired returning EAUTH nctime = %ld now = %ld\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getauid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt, cp->gss_clnt_nctime, now.tv_sec); + lck_mtx_unlock(cp->gss_clnt_mtx); + lck_mtx_unlock(&nmp->nm_lock); + return (NFSERR_EAUTH); + } + if (cp->gss_clnt_refcnt && (cp->gss_clnt_flags & GSS_CTX_INVAL)) { + NFS_GSS_DBG("Context %d/%d has expired but we still have %d references\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getauid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt); + lck_mtx_unlock(cp->gss_clnt_mtx); + lck_mtx_unlock(&nmp->nm_lock); + return (NFSERR_EAUTH); + } + TAILQ_REMOVE(&nmp->nm_gssnccl, cp, gss_clnt_entries); + lck_mtx_unlock(cp->gss_clnt_mtx); + nmp->nm_ncentries--; + break; + } + lck_mtx_unlock(cp->gss_clnt_mtx); + } + + NFS_GSS_DBG("Context %d/%d %sfound in Neg Cache @ %ld\n", + kauth_cred_getasid(req->r_cred), + kauth_cred_getauid(req->r_cred), + cp == NULL ? "not " : "", + cp == NULL ? 0L : cp->gss_clnt_nctime); + /* - * If the thread is async, then it cannot get - * kerberos creds and set up a proper context. - * If no sec= mount option is given, attempt - * to failover to sec=sys. + * Not found - create a new context */ - if (req->r_thread == NULL) { - if (nfs_gss_sysok(req)) { - error = nfs_gss_clnt_ctx_failover(req); - } else { - printf("nfs_gss_clnt_ctx_find: no context for async\n"); - error = NFSERR_EAUTH; - } - lck_mtx_unlock(&nmp->nm_lock); - return (error); - } - - MALLOC(cp, struct nfs_gss_clnt_ctx *, sizeof(*cp), M_TEMP, M_WAITOK|M_ZERO); if (cp == NULL) { - lck_mtx_unlock(&nmp->nm_lock); - return (ENOMEM); + MALLOC(cp, struct nfs_gss_clnt_ctx *, sizeof(*cp), M_TEMP, M_WAITOK|M_ZERO); + if (cp == NULL) { + lck_mtx_unlock(&nmp->nm_lock); + return (ENOMEM); + } + cp->gss_clnt_cred = req->r_cred; + kauth_cred_ref(cp->gss_clnt_cred); + cp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); + cp->gss_clnt_ptime = now.tv_sec - GSS_PRINT_DELAY; + } else { + nfs_gss_clnt_ctx_clean(cp); } - - cp->gss_clnt_cred = req->r_cred; - kauth_cred_ref(cp->gss_clnt_cred); - cp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); + cp->gss_clnt_thread = current_thread(); nfs_gss_clnt_ctx_ref(req, cp); - TAILQ_INSERT_TAIL(&nmp->nm_gsscl, cp, gss_clnt_entries); + TAILQ_INSERT_HEAD(&nmp->nm_gsscl, cp, gss_clnt_entries); lck_mtx_unlock(&nmp->nm_lock); error = nfs_gss_clnt_ctx_init_retry(req, cp); // Initialize new context - if (error) + if (error) nfs_gss_clnt_ctx_unref(req); - /* - * If we failed to set up a Kerberos context for this - * user and no sec= mount option was given, but the - * server indicated that it could support AUTH_SYS, then set - * up a dummy context that allows this user to attempt - * sec=sys calls. - */ - if (error && nfs_gss_sysok(req) && - (error != ENXIO) && (error != ETIMEDOUT)) { - lck_mtx_lock(&nmp->nm_lock); - error = nfs_gss_clnt_ctx_failover(req); - lck_mtx_unlock(&nmp->nm_lock); - } - return (error); } -/* - * Set up a dummy context to allow the use of sec=sys - * for this user, if the server allows sec=sys. - * The context is valid for GSS_CLNT_SYS_VALID seconds, - * so that the user will periodically attempt to fail back - * and get a real credential. - * - * Assumes context list (nm_lock) is locked - */ -static int -nfs_gss_clnt_ctx_failover(struct nfsreq *req) -{ - struct nfsmount *nmp = req->r_nmp; - struct nfs_gss_clnt_ctx *cp; - struct timeval now; - - MALLOC(cp, struct nfs_gss_clnt_ctx *, sizeof(*cp), M_TEMP, M_WAITOK|M_ZERO); - if (cp == NULL) - return (ENOMEM); - - cp->gss_clnt_service = RPCSEC_GSS_SVC_SYS; - cp->gss_clnt_cred = req->r_cred; - kauth_cred_ref(cp->gss_clnt_cred); - cp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); - microuptime(&now); - cp->gss_clnt_ctime = now.tv_sec; // time stamp - nfs_gss_clnt_ctx_ref(req, cp); - TAILQ_INSERT_TAIL(&nmp->nm_gsscl, cp, gss_clnt_entries); - - return (0); -} - /* * Inserts an RPCSEC_GSS credential into an RPC header. * After the credential is inserted, the code continues @@ -444,7 +488,6 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) struct gss_seq *gsp; u_char tokbuf[KRB5_SZ_TOKMAX(MAX_DIGEST)]; u_char cksum[MAX_DIGEST]; - struct timeval now; gss_key_info *ki; slpflag = (PZERO-1); @@ -465,30 +508,6 @@ retry: } cp = req->r_gss_ctx; - /* - * If it's a dummy context for a user that's using - * a fallback to sec=sys, then just return an error - * so rpchead can encode an RPCAUTH_UNIX cred. - */ - if (cp->gss_clnt_service == RPCSEC_GSS_SVC_SYS) { - /* - * The dummy context is valid for just - * GSS_CLNT_SYS_VALID seconds. If the context - * is older than this, mark it invalid and try - * again to get a real one. - */ - lck_mtx_lock(cp->gss_clnt_mtx); - microuptime(&now); - if (now.tv_sec > cp->gss_clnt_ctime + GSS_CLNT_SYS_VALID) { - cp->gss_clnt_flags |= GSS_CTX_INVAL; - lck_mtx_unlock(cp->gss_clnt_mtx); - nfs_gss_clnt_ctx_unref(req); - goto retry; - } - lck_mtx_unlock(cp->gss_clnt_mtx); - return (ENEEDAUTH); - } - /* * If the context thread isn't null, then the context isn't * yet complete and is for the exclusive use of the thread @@ -719,8 +738,7 @@ nfs_gss_clnt_verf_get( if (verftype != RPCSEC_GSS) { if (verftype != RPCAUTH_NULL) return (NFSERR_EAUTH); - if (cp->gss_clnt_flags & GSS_CTX_COMPLETE && - cp->gss_clnt_service != RPCSEC_GSS_SVC_SYS) + if (cp->gss_clnt_flags & GSS_CTX_COMPLETE) return (NFSERR_EAUTH); if (verflen > 0) nfsm_chain_adv(error, nmc, nfsm_rndup(verflen)); @@ -999,15 +1017,17 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) int server_complete = 0; u_char cksum1[MAX_DIGEST], cksum2[MAX_DIGEST]; int error = 0; - struct timeval now; gss_key_info *ki = &cp->gss_clnt_kinfo; /* Initialize a new client context */ + - cp->gss_clnt_svcname = nfs_gss_clnt_svcname(nmp, &cp->gss_clnt_svcnt, &cp->gss_clnt_svcnamlen); if (cp->gss_clnt_svcname == NULL) { - error = NFSERR_EAUTH; - goto nfsmout; + cp->gss_clnt_svcname = nfs_gss_clnt_svcname(nmp, &cp->gss_clnt_svcnt, &cp->gss_clnt_svcnamlen); + if (cp->gss_clnt_svcname == NULL) { + error = NFSERR_EAUTH; + goto nfsmout; + } } cp->gss_clnt_proc = RPCSEC_GSS_INIT; @@ -1063,11 +1083,7 @@ retry: server_complete = 1; if (client_complete) break; - } else if (cp->gss_clnt_major != GSS_S_CONTINUE_NEEDED) { - error = NFSERR_EAUTH; - goto nfsmout; } - cp->gss_clnt_proc = RPCSEC_GSS_CONTINUE_INIT; } @@ -1078,9 +1094,6 @@ retry: cp->gss_clnt_flags |= GSS_CTX_COMPLETE; lck_mtx_unlock(cp->gss_clnt_mtx); cp->gss_clnt_proc = RPCSEC_GSS_DATA; - microuptime(&now); - cp->gss_clnt_ctime = now.tv_sec; // time stamp - /* * Compute checksum of the server's window @@ -1166,7 +1179,7 @@ nfs_gss_clnt_ctx_init_retry(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) int retries = 0; int timeo = NFS_TRYLATERDEL; - if (nmp == NULL) { + if (nfs_mount_gone(nmp)) { error = ENXIO; goto bad; } @@ -1188,7 +1201,7 @@ nfs_gss_clnt_ctx_init_retry(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) retries++; /* If it's a soft mount just give up after a while */ - if (NMFLAG(nmp, SOFT) && (retries > nmp->nm_retry)) { + if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (retries > nmp->nm_retry)) { error = ETIMEDOUT; goto bad; } @@ -1230,9 +1243,10 @@ nfs_gss_clnt_ctx_callserver(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) { struct nfsm_chain nmreq, nmrep; int error = 0, status; + uint32_t major = cp->gss_clnt_major, minor = cp->gss_clnt_minor; int sz; - if (!req->r_nmp) + if (nfs_mount_gone(req->r_nmp)) return (ENXIO); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -1292,17 +1306,10 @@ nfs_gss_clnt_ctx_callserver(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) */ if (cp->gss_clnt_major != GSS_S_COMPLETE && cp->gss_clnt_major != GSS_S_CONTINUE_NEEDED) { - char who[] = "server"; - char unknown[] = ""; - (void) mach_gss_log_error( - cp->gss_clnt_mport, - !req->r_nmp ? unknown : - vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname, - kauth_cred_getuid(cp->gss_clnt_cred), - who, - cp->gss_clnt_major, - cp->gss_clnt_minor); + printf("nfs_gss_clnt_ctx_callserver: gss_clnt_major = %d\n", cp->gss_clnt_major); + nfs_gss_clnt_log_error(req, cp, major, minor); + } nfsmout: @@ -1330,7 +1337,7 @@ nfs_gss_clnt_svcname(struct nfsmount *nmp, gssd_nametype *nt, uint32_t *len) char *svcname, *d, *server; int lindx, sindx; - if (!nmp) + if (nfs_mount_gone(nmp)) return (NULL); if (nmp->nm_sprinc) { @@ -1404,6 +1411,74 @@ nfs_gss_clnt_get_upcall_port(kauth_cred_t credp) return (uc_port); } + +static void +nfs_gss_clnt_log_error(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp, uint32_t major, uint32_t minor) +{ +#define GETMAJERROR(x) (((x) >> GSS_C_ROUTINE_ERROR_OFFSET) & GSS_C_ROUTINE_ERROR_MASK) + struct nfsmount *nmp = req->r_nmp; + char who[] = "client"; + uint32_t gss_error = GETMAJERROR(cp->gss_clnt_major); + const char *procn = "unkown"; + proc_t proc; + pid_t pid = -1; + struct timeval now; + + if (req->r_thread) { + proc = (proc_t)get_bsdthreadtask_info(req->r_thread); + if (proc != NULL && (proc->p_fd == NULL || (proc->p_lflag & P_LVFORK))) + proc = NULL; + if (proc) { + if (*proc->p_comm) + procn = proc->p_comm; + pid = proc->p_pid; + } + } else { + procn = "kernproc"; + pid = 0; + } + + microuptime(&now); + if ((cp->gss_clnt_major != major || cp->gss_clnt_minor != minor || + cp->gss_clnt_ptime + GSS_PRINT_DELAY < now.tv_sec) && + (nmp->nm_state & NFSSTA_MOUNTED)) { + /* + * Will let gssd do some logging in hopes that it can translate + * the minor code. + */ + if (cp->gss_clnt_minor && cp->gss_clnt_minor != minor) { + (void) mach_gss_log_error( + cp->gss_clnt_mport, + vfs_statfs(nmp->nm_mountp)->f_mntfromname, + kauth_cred_getuid(cp->gss_clnt_cred), + who, + cp->gss_clnt_major, + cp->gss_clnt_minor); + } + gss_error = gss_error ? gss_error : cp->gss_clnt_major; + + /* + *%%% It would be really nice to get the terminal from the proc or auditinfo_addr struct and print that here. + */ + printf("NFS: gssd auth failure by %s on audit session %d uid %d proc %s/%d for mount %s. Error: major = %d minor = %d\n", + cp->gss_clnt_display ? cp->gss_clnt_display : who, kauth_cred_getasid(req->r_cred), kauth_cred_getuid(req->r_cred), + procn, pid, vfs_statfs(nmp->nm_mountp)->f_mntfromname, gss_error, (int32_t)cp->gss_clnt_minor); + cp->gss_clnt_ptime = now.tv_sec; + switch (gss_error) { + case 7: printf("NFS: gssd does not have credentials for session %d/%d, (kinit)?\n", + kauth_cred_getasid(req->r_cred), kauth_cred_getauid(req->r_cred)); + break; + case 11: printf("NFS: gssd has expired credentals for session %d/%d, (kinit)?\n", + kauth_cred_getasid(req->r_cred), kauth_cred_getauid(req->r_cred)); + break; + } + } else { + NFS_GSS_DBG("NFS: gssd auth failure by %s on audit session %d uid %d proc %s/%d for mount %s. Error: major = %d minor = %d\n", + cp->gss_clnt_display ? cp->gss_clnt_display : who, kauth_cred_getasid(req->r_cred), kauth_cred_getuid(req->r_cred), + procn, pid, vfs_statfs(nmp->nm_mountp)->f_mntfromname, gss_error, (int32_t)cp->gss_clnt_minor); + } +} + /* * Make an upcall to the gssd using Mach RPC * The upcall is made using a host special port. @@ -1431,6 +1506,7 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) uint32_t ret_flags; uint32_t nfs_1des = (cp->gss_clnt_gssd_flags & GSSD_NFS_1DES); struct nfsmount *nmp; + uint32_t major = cp->gss_clnt_major, minor = cp->gss_clnt_minor; /* * NFS currently only supports default principals or @@ -1541,21 +1617,7 @@ skip: */ if (cp->gss_clnt_major != GSS_S_COMPLETE && cp->gss_clnt_major != GSS_S_CONTINUE_NEEDED) { -#define GETMAJERROR(x) (((x) >> GSS_C_ROUTINE_ERROR_OFFSET) & GSS_C_ROUTINE_ERROR_MASK) - char who[] = "client"; - uint32_t gss_error = GETMAJERROR(cp->gss_clnt_major); - - (void) mach_gss_log_error( - cp->gss_clnt_mport, - vfs_statfs(nmp->nm_mountp)->f_mntfromname, - kauth_cred_getuid(cp->gss_clnt_cred), - who, - cp->gss_clnt_major, - cp->gss_clnt_minor); - gss_error = gss_error ? gss_error : cp->gss_clnt_major; - printf("NFS gssd auth failure mount %s for %s major = %d minor = %d\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, cp->gss_clnt_display ? cp->gss_clnt_display : who, - gss_error, (int32_t)cp->gss_clnt_minor); + nfs_gss_clnt_log_error(req, cp, major, minor); } if (skeylen > 0) { @@ -1690,6 +1752,9 @@ nfs_gss_clnt_ctx_unref(struct nfsreq *req) { struct nfsmount *nmp = req->r_nmp; struct nfs_gss_clnt_ctx *cp = req->r_gss_ctx; + int neg_cache = 0; + int on_neg_cache = 0; + int destroy = 0; if (cp == NULL) return; @@ -1697,51 +1762,149 @@ nfs_gss_clnt_ctx_unref(struct nfsreq *req) req->r_gss_ctx = NULL; lck_mtx_lock(cp->gss_clnt_mtx); - if (--cp->gss_clnt_refcnt == 0 - && cp->gss_clnt_flags & GSS_CTX_INVAL) { - lck_mtx_unlock(cp->gss_clnt_mtx); + if (--cp->gss_clnt_refcnt < 0) + panic("Over release of gss context!\n"); - if (nmp) + if (cp->gss_clnt_refcnt == 0 && (cp->gss_clnt_flags & GSS_CTX_DESTROY)) { + destroy = 1; + if (cp->gss_clnt_flags & GSS_CTX_NC) + on_neg_cache = 1; + } else if ((cp->gss_clnt_flags & (GSS_CTX_INVAL | GSS_CTX_NC)) == GSS_CTX_INVAL) { + neg_cache = 1; + } + lck_mtx_unlock(cp->gss_clnt_mtx); + if (destroy) { + if (nmp) { lck_mtx_lock(&nmp->nm_lock); - nfs_gss_clnt_ctx_remove(nmp, cp); - if (nmp) + if (cp->gss_clnt_entries.tqe_next != NFSNOLIST) { + if (on_neg_cache) + TAILQ_REMOVE(&nmp->nm_gssnccl, cp, gss_clnt_entries); + else + TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); + } lck_mtx_unlock(&nmp->nm_lock); + } + nfs_gss_clnt_ctx_destroy(cp); + } else if (neg_cache) + nfs_gss_clnt_ctx_neg_cache_enter(cp, nmp); + NFS_GSS_CLNT_CTX_DUMP(nmp); +} +/* + * Enter the gss context associated with req on to the neg context + * cache queue. + */ +void +nfs_gss_clnt_ctx_neg_cache_enter(struct nfs_gss_clnt_ctx *cp, struct nfsmount *nmp) +{ + struct nfs_gss_clnt_ctx *nccp, *tcp; + struct timeval now; + int reaped = 0; + + if (nmp == NULL) return; - } + + microuptime(&now); + lck_mtx_lock(&nmp->nm_lock); + + lck_mtx_lock(cp->gss_clnt_mtx); + if (cp->gss_clnt_entries.tqe_next != NFSNOLIST) + TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); + + cp->gss_clnt_flags |= GSS_CTX_NC; + cp->gss_clnt_nctime = now.tv_sec; lck_mtx_unlock(cp->gss_clnt_mtx); + + TAILQ_INSERT_TAIL(&nmp->nm_gssnccl, cp, gss_clnt_entries); + nmp->nm_ncentries++; + + NFS_GSS_DBG("Reaping contexts ncentries = %d\n", nmp->nm_ncentries); + /* Try and reap old, unreferenced, expired contexts */ + TAILQ_FOREACH_SAFE(nccp, &nmp->nm_gssnccl, gss_clnt_entries, tcp) { + int destroy = 0; + + /* Keep up to GSS_MAX_NEG_CACHE_ENTRIES */ + if (nmp->nm_ncentries <= GSS_MAX_NEG_CACHE_ENTRIES) + break; + /* Contexts to young */ + if (nccp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec) + break; + /* Not referenced, remove it. */ + lck_mtx_lock(nccp->gss_clnt_mtx); + if (nccp->gss_clnt_refcnt == 0) { + TAILQ_REMOVE(&nmp->nm_gssnccl, nccp, gss_clnt_entries); + reaped++; + destroy = 1; + } + lck_mtx_unlock(nccp->gss_clnt_mtx); + if (destroy) + nfs_gss_clnt_ctx_destroy(nccp); + nmp->nm_ncentries--; + } + NFS_GSS_DBG("Reaped %d contexts ncentries = %d\n", reaped, nmp->nm_ncentries); + lck_mtx_unlock(&nmp->nm_lock); +} + +/* + * Clean a context to be cached + */ +static void +nfs_gss_clnt_ctx_clean(struct nfs_gss_clnt_ctx *cp) +{ + cp->gss_clnt_flags = 0; + if (cp->gss_clnt_handle) { + FREE(cp->gss_clnt_handle, M_TEMP); + cp->gss_clnt_handle = NULL; + } + if (cp->gss_clnt_seqbits) { + FREE(cp->gss_clnt_seqbits, M_TEMP); + cp->gss_clnt_seqbits = NULL; + } + if (cp->gss_clnt_token) { + FREE(cp->gss_clnt_token, M_TEMP); + cp->gss_clnt_token = NULL; + } + if (cp->gss_clnt_svcname) { + FREE(cp->gss_clnt_svcname, M_TEMP); + cp->gss_clnt_svcname = NULL; + } + cp->gss_clnt_flags = 0; + cp->gss_clnt_seqwin = 0; + cp->gss_clnt_seqnum = 0; } /* * Remove a context */ static void -nfs_gss_clnt_ctx_remove(struct nfsmount *nmp, struct nfs_gss_clnt_ctx *cp) +nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *cp) { - /* - * If dequeueing, assume nmp->nm_lock is held - */ - if (nmp != NULL) - TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries); + NFS_GSS_DBG("Destroying context %d/%d\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getauid(cp->gss_clnt_cred)); host_release_special_port(cp->gss_clnt_mport); - - if (cp->gss_clnt_mtx) + cp->gss_clnt_mport = IPC_PORT_NULL; + + if (cp->gss_clnt_mtx) { lck_mtx_destroy(cp->gss_clnt_mtx, nfs_gss_clnt_grp); + cp->gss_clnt_mtx = (lck_mtx_t *)NULL; + } if (IS_VALID_CRED(cp->gss_clnt_cred)) kauth_cred_unref(&cp->gss_clnt_cred); - if (cp->gss_clnt_principal) + cp->gss_clnt_entries.tqe_next = NFSNOLIST; + cp->gss_clnt_entries.tqe_prev = NFSNOLIST; + if (cp->gss_clnt_principal) { FREE(cp->gss_clnt_principal, M_TEMP); - if (cp->gss_clnt_display) + cp->gss_clnt_principal = NULL; + } + if (cp->gss_clnt_display) { FREE(cp->gss_clnt_display, M_TEMP); - if (cp->gss_clnt_handle) - FREE(cp->gss_clnt_handle, M_TEMP); - if (cp->gss_clnt_seqbits) - FREE(cp->gss_clnt_seqbits, M_TEMP); - if (cp->gss_clnt_token) - FREE(cp->gss_clnt_token, M_TEMP); - if (cp->gss_clnt_svcname) - FREE(cp->gss_clnt_svcname, M_TEMP); + cp->gss_clnt_display = NULL; + } + + nfs_gss_clnt_ctx_clean(cp); + FREE(cp, M_TEMP); } @@ -1755,10 +1918,10 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) { struct nfs_gss_clnt_ctx *cp = req->r_gss_ctx; struct nfsmount *nmp = req->r_nmp; + struct nfs_gss_clnt_ctx tmp; struct nfs_gss_clnt_ctx *ncp; + int error = 0; - kauth_cred_t saved_cred; - mach_port_t saved_mport; if (cp == NULL) return (0); @@ -1769,12 +1932,23 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) nfs_gss_clnt_ctx_unref(req); return (0); // already being renewed } - saved_cred = cp->gss_clnt_cred; - kauth_cred_ref(saved_cred); - saved_mport = host_copy_special_port(cp->gss_clnt_mport); - /* Remove the old context */ - cp->gss_clnt_flags |= GSS_CTX_INVAL; + bzero(&tmp, sizeof(tmp)); + tmp.gss_clnt_cred = cp->gss_clnt_cred; + kauth_cred_ref(tmp.gss_clnt_cred); + tmp.gss_clnt_mport = host_copy_special_port(cp->gss_clnt_mport); + tmp.gss_clnt_principal = cp->gss_clnt_principal; + cp->gss_clnt_principal = NULL; + tmp.gss_clnt_prinlen = cp->gss_clnt_prinlen; + tmp.gss_clnt_prinnt = cp->gss_clnt_prinnt; + tmp.gss_clnt_major = cp->gss_clnt_major; + tmp.gss_clnt_minor = cp->gss_clnt_minor; + tmp.gss_clnt_ptime = cp->gss_clnt_ptime; + + NFS_GSS_DBG("Renewing context %d/%d\n", + kauth_cred_getasid(tmp.gss_clnt_cred), + kauth_cred_getauid(tmp.gss_clnt_cred)); + cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY); /* * If there's a thread waiting @@ -1796,11 +1970,10 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) goto out; } - ncp->gss_clnt_cred = saved_cred; - kauth_cred_ref(ncp->gss_clnt_cred); - ncp->gss_clnt_mport = host_copy_special_port(saved_mport); // re-use the gssd port + *ncp = tmp; ncp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); ncp->gss_clnt_thread = current_thread(); + lck_mtx_lock(&nmp->nm_lock); TAILQ_INSERT_TAIL(&nmp->nm_gsscl, ncp, gss_clnt_entries); lck_mtx_unlock(&nmp->nm_lock); @@ -1809,16 +1982,14 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) nfs_gss_clnt_ctx_unref(req); nfs_gss_clnt_ctx_ref(req, ncp); - error = nfs_gss_clnt_ctx_init_retry(req, ncp); // Initialize new context + error = nfs_gss_clnt_ctx_init_retry(req, ncp); out: - host_release_special_port(saved_mport); - kauth_cred_unref(&saved_cred); if (error) nfs_gss_clnt_ctx_unref(req); - return (error); } + /* * Destroy all the contexts associated with a mount. * The contexts are also destroyed by the server. @@ -1830,28 +2001,31 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp) struct nfsm_chain nmreq, nmrep; int error, status; struct nfsreq req; - req.r_nmp = nmp; + if (!nmp) + return; + for (;;) { lck_mtx_lock(&nmp->nm_lock); cp = TAILQ_FIRST(&nmp->nm_gsscl); - if (cp) { - lck_mtx_lock(cp->gss_clnt_mtx); - cp->gss_clnt_refcnt++; - lck_mtx_unlock(cp->gss_clnt_mtx); - req.r_gss_ctx = cp; + if (cp == NULL) { + lck_mtx_unlock(&nmp->nm_lock); + goto remove_neg_cache; } + + lck_mtx_lock(cp->gss_clnt_mtx); + cp->gss_clnt_refcnt++; + lck_mtx_unlock(cp->gss_clnt_mtx); + req.r_gss_ctx = cp; + lck_mtx_unlock(&nmp->nm_lock); - if (cp == NULL) - break; - + /* * Tell the server to destroy its context. - * But don't bother if it's a forced unmount - * or if it's a dummy sec=sys context. + * But don't bother if it's a forced unmount. */ - if (!(nmp->nm_state & NFSSTA_FORCE) && (cp->gss_clnt_service != RPCSEC_GSS_SVC_SYS)) { + if (!nfs_mount_gone(nmp)) { cp->gss_clnt_proc = RPCSEC_GSS_DESTROY; error = 0; @@ -1872,48 +2046,118 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp) * refcount is zero. */ lck_mtx_lock(cp->gss_clnt_mtx); - cp->gss_clnt_flags |= GSS_CTX_INVAL; + cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY); + lck_mtx_unlock(cp->gss_clnt_mtx); + nfs_gss_clnt_ctx_unref(&req); + } + + /* Now all the remaining contexts should be on the negative cache list */ +remove_neg_cache: + for (;;) { + lck_mtx_lock(&nmp->nm_lock); + cp = TAILQ_FIRST(&nmp->nm_gssnccl); + if (cp == NULL) { + lck_mtx_unlock(&nmp->nm_lock); + return; + } + req.r_gss_ctx = cp; + TAILQ_REMOVE(&nmp->nm_gssnccl, cp, gss_clnt_entries); + cp->gss_clnt_entries.tqe_next = NFSNOLIST; + + lck_mtx_lock(cp->gss_clnt_mtx); + if (cp->gss_clnt_refcnt) + NFS_GSS_DBG("Context %d/%d found with %d references\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getauid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt); + cp->gss_clnt_refcnt++; + cp->gss_clnt_flags |= GSS_CTX_DESTROY; lck_mtx_unlock(cp->gss_clnt_mtx); + lck_mtx_unlock(&nmp->nm_lock); + nfs_gss_clnt_ctx_unref(&req); } + NFS_GSS_CLNT_CTX_DUMP(nmp); } /* - * Destroy a mounts context for a credential + * Removes a mounts context for a credential */ int -nfs_gss_clnt_ctx_destroy(struct nfsmount *nmp, kauth_cred_t cred) +nfs_gss_clnt_ctx_remove(struct nfsmount *nmp, kauth_cred_t cred) { struct nfs_gss_clnt_ctx *cp; struct nfsreq req; req.r_nmp = nmp; + NFS_GSS_DBG("Enter\n"); + NFS_GSS_CLNT_CTX_DUMP(nmp); lck_mtx_lock(&nmp->nm_lock); TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { + lck_mtx_lock(cp->gss_clnt_mtx); if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, cred)) { - if (cp->gss_clnt_flags & GSS_CTX_INVAL) + if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { + NFS_GSS_DBG("Found destroyed context %d/%d. refcnt = %d continuing\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getauid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt); + lck_mtx_unlock(cp->gss_clnt_mtx); continue; - lck_mtx_lock(cp->gss_clnt_mtx); + } cp->gss_clnt_refcnt++; - cp->gss_clnt_flags |= GSS_CTX_INVAL; + cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY); lck_mtx_unlock(cp->gss_clnt_mtx); req.r_gss_ctx = cp; - break; + lck_mtx_unlock(&nmp->nm_lock); + /* + * Drop the reference to remove it if its + * refcount is zero. + */ + NFS_GSS_DBG("Removed context %d/%d refcnt = %d\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getuid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt); + nfs_gss_clnt_ctx_unref(&req); + return (0); } + lck_mtx_unlock(cp->gss_clnt_mtx); } - lck_mtx_unlock(&nmp->nm_lock); - if (cp == NULL) - return (ENOENT); - - /* - * Drop the reference to remove it if its - * refcount is zero. - */ - nfs_gss_clnt_ctx_unref(&req); + TAILQ_FOREACH(cp, &nmp->nm_gssnccl, gss_clnt_entries) { + lck_mtx_lock(cp->gss_clnt_mtx); + if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, cred)) { + if (cp->gss_clnt_flags & GSS_CTX_DESTROY) { + NFS_GSS_DBG("Found destroyed context %d/%d refcnt = %d continuing\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getuid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt); + lck_mtx_unlock(cp->gss_clnt_mtx); + continue; + } + cp->gss_clnt_refcnt++; + cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY); + lck_mtx_unlock(cp->gss_clnt_mtx); + req.r_gss_ctx = cp; + lck_mtx_unlock(&nmp->nm_lock); + /* + * Drop the reference to remove it if its + * refcount is zero. + */ + NFS_GSS_DBG("Removed context from neg cache %d/%d refcnt = %d\n", + kauth_cred_getasid(cp->gss_clnt_cred), + kauth_cred_getuid(cp->gss_clnt_cred), + cp->gss_clnt_refcnt); + nfs_gss_clnt_ctx_unref(&req); + return (0); + } + lck_mtx_unlock(cp->gss_clnt_mtx); + } - return (0); + lck_mtx_unlock(&nmp->nm_lock); + + NFS_GSS_DBG("Returning ENOENT\n"); + return (ENOENT); } @@ -2035,6 +2279,8 @@ nfs_gss_svc_ctx_timer(__unused void *param1, __unused void *param2) lck_mtx_lock(nfs_gss_svc_ctx_mutex); clock_get_uptime(&timenow); + NFS_GSS_DBG("is running\n"); + /* * Scan all the hash chains */ @@ -2052,6 +2298,7 @@ nfs_gss_svc_ctx_timer(__unused void *param1, __unused void *param2) * A stale context - remove it */ LIST_REMOVE(cp, gss_svc_entries); + NFS_GSS_DBG("Removing contex for %d\n", cp->gss_svc_uid); if (cp->gss_svc_seqbits) FREE(cp->gss_svc_seqbits, M_TEMP); lck_mtx_destroy(cp->gss_svc_mtx, nfs_gss_svc_grp); diff --git a/bsd/nfs/nfs_gss.h b/bsd/nfs/nfs_gss.h index 7c7b245dd..1588eba02 100644 --- a/bsd/nfs/nfs_gss.h +++ b/bsd/nfs/nfs_gss.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Apple Inc. All rights reserved. + * Copyright (c) 2007-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -48,7 +48,6 @@ enum rpcsec_gss_service { RPCSEC_GSS_SVC_NONE = 1, // sec=krb5 RPCSEC_GSS_SVC_INTEGRITY = 2, // sec=krb5i RPCSEC_GSS_SVC_PRIVACY = 3, // sec=krb5p - RPCSEC_GSS_SVC_SYS = 4 // sec=sys (fallback) }; /* encoded krb5 OID */ @@ -92,12 +91,15 @@ typedef uint32_t OM_uint32; #define GSS_SVC_MAXCONTEXTS 500000 // Max contexts supported #define GSS_SVC_SEQWINDOW 256 // Server's sequence window #define GSS_CLNT_SEQLISTMAX 32 // Max length of req seq num list -#define GSS_CLNT_SYS_VALID 300 // Valid time (sec) for failover ctx #define SKEYLEN 8 // length of DES key #define SKEYLEN3 24 // length of DES3 keyboard #define MAX_SKEYLEN SKEYLEN3 +#define GSS_MAX_NEG_CACHE_ENTRIES 16 +#define GSS_NEG_CACHE_TO 3 +#define GSS_PRINT_DELAY (8 * 3600) // Wait day before printing the same error message + typedef struct { uint32_t type; // See defines below uint32_t keybytes; // Session key length bytes; @@ -129,7 +131,7 @@ struct nfs_gss_clnt_ctx { thread_t gss_clnt_thread; // Thread creating context TAILQ_ENTRY(nfs_gss_clnt_ctx) gss_clnt_entries; uint32_t gss_clnt_flags; // Flag bits - see below - uint32_t gss_clnt_refcnt; // Reference count + int32_t gss_clnt_refcnt; // Reference count kauth_cred_t gss_clnt_cred; // Owner of this context uint8_t *gss_clnt_principal; // Principal to use for this credential uint32_t gss_clnt_prinlen; // Length of principal @@ -140,7 +142,7 @@ struct nfs_gss_clnt_ctx { uint32_t gss_clnt_service; // Indicates krb5, krb5i or krb5p uint8_t *gss_clnt_handle; // Identifies server context uint32_t gss_clnt_handle_len; // Size of server's ctx handle - time_t gss_clnt_ctime; // When context was created + time_t gss_clnt_nctime; // When context was put in the negative cache uint32_t gss_clnt_seqwin; // Server's seq num window uint32_t *gss_clnt_seqbits; // Bitmap to track seq numbers in use mach_port_t gss_clnt_mport; // Mach port for gssd upcall @@ -156,6 +158,7 @@ struct nfs_gss_clnt_ctx { uint32_t gss_clnt_gssd_flags; // Special flag bits to gssd uint32_t gss_clnt_major; // GSS major result from gssd or server uint32_t gss_clnt_minor; // GSS minor result from gssd or server + time_t gss_clnt_ptime; // When last error message was printed }; /* @@ -166,6 +169,8 @@ struct nfs_gss_clnt_ctx { #define GSS_CTX_INCOMPLETE 0x00000004 // Context needs to be inited #define GSS_NEEDSEQ 0x00000008 // Need a sequence number #define GSS_NEEDCTX 0x00000010 // Need the context +#define GSS_CTX_NC 0x00000020 // Context is in negative cache +#define GSS_CTX_DESTROY 0x00000040 // Context is being destroyed, don't cache /* * The server's RPCSEC_GSS context information @@ -224,7 +229,7 @@ int nfs_gss_clnt_ctx_renew(struct nfsreq *); void nfs_gss_clnt_ctx_ref(struct nfsreq *, struct nfs_gss_clnt_ctx *); void nfs_gss_clnt_ctx_unref(struct nfsreq *); void nfs_gss_clnt_ctx_unmount(struct nfsmount *); -int nfs_gss_clnt_ctx_destroy(struct nfsmount *, kauth_cred_t cred); +int nfs_gss_clnt_ctx_remove(struct nfsmount *, kauth_cred_t cred); int nfs_gss_svc_cred_get(struct nfsrv_descript *, struct nfsm_chain *); int nfs_gss_svc_verf_put(struct nfsrv_descript *, struct nfsm_chain *); int nfs_gss_svc_ctx_init(struct nfsrv_descript *, struct nfsrv_sock *, mbuf_t *); diff --git a/bsd/nfs/nfs_lock.c b/bsd/nfs/nfs_lock.c index ad6a0cb53..981779afa 100644 --- a/bsd/nfs/nfs_lock.c +++ b/bsd/nfs/nfs_lock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2010 Apple Inc. All rights reserved. + * Copyright (c) 2002-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -155,7 +155,14 @@ nfs_lockd_mount_unregister(struct nfsmount *nmp) kern_return_t kr; lck_mtx_lock(nfs_lock_mutex); + if (nmp->nm_ldlink.tqe_next == NFSNOLIST) { + lck_mtx_unlock(nfs_lock_mutex); + return; + } + TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink); + nmp->nm_ldlink.tqe_next = NFSNOLIST; + nfs_lockd_mounts--; /* send a shutdown request if there are no more lockd mounts */ @@ -602,7 +609,7 @@ wait_for_granted: ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { lck_mtx_unlock(&nmp->nm_lock); lastmsg = now.tv_sec; - nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding"); + nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 0); wentdown = 1; } else lck_mtx_unlock(&nmp->nm_lock); @@ -751,7 +758,7 @@ nfs3_setlock_rpc( LOCKD_MSG *msg; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!nlop->nlo_open_owner) { @@ -842,7 +849,7 @@ nfs3_getlock_rpc( LOCKD_MSG *msg; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); /* set up lock message request structure */ diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c index fe04bd587..c2a8867f4 100644 --- a/bsd/nfs/nfs_node.c +++ b/bsd/nfs/nfs_node.c @@ -145,7 +145,7 @@ nfs_case_insensitive(mount_t mp) int answer = 0; int skip = 0; - if (nmp == NULL) { + if (nfs_mount_gone(nmp)) { return (0); } @@ -207,7 +207,7 @@ nfs_nget( FSDBG_TOP(263, mp, dnp, flags, npp); /* Check for unmount in progress */ - if (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)) { + if (!mp || vfs_isforce(mp)) { *npp = NULL; error = ENXIO; FSDBG_BOT(263, mp, dnp, 0xd1e, error); @@ -610,7 +610,7 @@ nfs_vnop_inactive(ap) mp = vnode_mount(vp); restart: - force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)); + force = (!mp || vfs_isforce(mp)); error = 0; inuse = (nfs_mount_state_in_use_start(nmp, NULL) == 0); @@ -869,7 +869,7 @@ nfs_vnop_reclaim(ap) int force; FSDBG_TOP(265, vp, np, np->n_flag, 0); - force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)); + force = (!mp || vfs_isforce(mp) || nfs_mount_gone(nmp)); /* There shouldn't be any open or lock state at this point */ lck_mtx_lock(&np->n_openlock); diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c index c163bfa04..ab4cdbe8f 100644 --- a/bsd/nfs/nfs_socket.c +++ b/bsd/nfs/nfs_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -1636,7 +1636,7 @@ nfs_reconnect(struct nfsmount *nmp) microuptime(&now); if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) { lastmsg = now.tv_sec; - nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect"); + nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect", 0); wentdown = 1; } lck_mtx_lock(&nmp->nm_lock); @@ -1647,7 +1647,13 @@ nfs_reconnect(struct nfsmount *nmp) NFS_SOCK_DBG("Not mounted returning %d\n", error); return (error); } - nfs_mount_check_dead_timeout(nmp); + + if (nfs_mount_check_dead_timeout(nmp)) { + nfs_mount_make_zombie(nmp); + lck_mtx_unlock(&nmp->nm_lock); + return (ENXIO); + } + if ((error = nfs_sigintr(nmp, NULL, thd, 1))) { lck_mtx_unlock(&nmp->nm_lock); return (error); @@ -1771,7 +1777,6 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) int do_reconnect_sleep = 0; lck_mtx_lock(&nmp->nm_lock); - while (!(nmp->nm_sockflags & NMSOCK_READY) || !TAILQ_EMPTY(&nmp->nm_resendq) || !LIST_EMPTY(&nmp->nm_monlist) || @@ -1782,7 +1787,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) if (nmp->nm_sockflags & NMSOCK_UNMOUNT) break; /* do reconnect, if necessary */ - if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_FORCE)) { + if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { if (nmp->nm_reconnect_start <= 0) { microuptime(&now); nmp->nm_reconnect_start = now.tv_sec; @@ -1813,14 +1818,14 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) if ((nmp->nm_sockflags & NMSOCK_READY) && (nmp->nm_state & NFSSTA_RECOVER) && !(nmp->nm_sockflags & NMSOCK_UNMOUNT) && - !(nmp->nm_state & NFSSTA_FORCE)) { + !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { /* perform state recovery */ lck_mtx_unlock(&nmp->nm_lock); nfs_recover(nmp); lck_mtx_lock(&nmp->nm_lock); } /* handle NFSv4 delegation returns */ - while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & NFSSTA_FORCE) && + while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) && ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) { lck_mtx_unlock(&nmp->nm_lock); @@ -1828,11 +1833,12 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) lck_mtx_lock(&nmp->nm_lock); } /* do resends, if necessary/possible */ - while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || (nmp->nm_state & NFSSTA_FORCE)) && + while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || + (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) && ((req = TAILQ_FIRST(&nmp->nm_resendq)))) { if (req->r_resendtime) microuptime(&now); - while (req && !(nmp->nm_state & NFSSTA_FORCE) && req->r_resendtime && (now.tv_sec < req->r_resendtime)) + while (req && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && req->r_resendtime && (now.tv_sec < req->r_resendtime)) req = TAILQ_NEXT(req, r_rchain); if (!req) break; @@ -1865,9 +1871,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) NFS_SOCK_DBG("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n", nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); - error = !req->r_nmp ? ENXIO : 0; /* unmounted? */ - if (!error) - error = nfs_sigintr(nmp, req, req->r_thread, 0); + error = nfs_sigintr(nmp, req, req->r_thread, 0); if (!error) error = nfs_request_add_header(req); if (!error) @@ -1888,9 +1892,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) } NFS_SOCK_DBG("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n", req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); - error = !req->r_nmp ? ENXIO : 0; /* unmounted? */ - if (!error) - error = nfs_sigintr(nmp, req, req->r_thread, 0); + error = nfs_sigintr(nmp, req, req->r_thread, 0); if (!error) { req->r_flags |= R_SENDING; lck_mtx_unlock(&req->r_mtx); @@ -1915,15 +1917,19 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) nfs_asyncio_finish(req); lck_mtx_lock(&nmp->nm_lock); } - if (nmp->nm_deadto_start) - nfs_mount_check_dead_timeout(nmp); + if (nfs_mount_check_dead_timeout(nmp)) { + nfs_mount_make_zombie(nmp); + break; + } + if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) break; /* check monitored nodes, if necessary/possible */ if (!LIST_EMPTY(&nmp->nm_monlist)) { nmp->nm_state |= NFSSTA_MONITOR_SCAN; LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) { - if (!(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE))) + if (!(nmp->nm_sockflags & NMSOCK_READY) || + (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE|NFSSTA_DEAD))) break; np->n_mflag |= NMMONSCANINPROG; lck_mtx_unlock(&nmp->nm_lock); @@ -1936,7 +1942,8 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) np->n_mflag &= ~NMMONSCANWANT; wakeup(&np->n_mflag); } - if (error || !(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE))) + if (error || !(nmp->nm_sockflags & NMSOCK_READY) || + (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE|NFSSTA_DEAD))) break; } nmp->nm_state &= ~NFSSTA_MONITOR_SCAN; @@ -1985,27 +1992,52 @@ nfs_mount_sock_thread_wake(struct nfsmount *nmp) * unresponsive mount has reached the dead timeout. * (must be called with nmp locked) */ -void +int nfs_mount_check_dead_timeout(struct nfsmount *nmp) { struct timeval now; - if (nmp->nm_deadto_start == 0) - return; if (nmp->nm_state & NFSSTA_DEAD) - return; + return 1; + if (nmp->nm_deadto_start == 0) + return 0; nfs_is_squishy(nmp); if (nmp->nm_curdeadtimeout <= 0) - return; + return 0; microuptime(&now); if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_curdeadtimeout) + return 0; + return 1; +} + +/* + * Call nfs_mount_zombie to remove most of the + * nfs state for the mount, and then ask to be forcibly unmounted. + * + * Assumes the nfs mount structure lock nm_lock is held. + */ + +void +nfs_mount_make_zombie(struct nfsmount *nmp) +{ + fsid_t fsid; + + if (!nmp) + return; + + if (nmp->nm_state & NFSSTA_DEAD) return; + printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : ""); - nmp->nm_state |= NFSSTA_DEAD; - vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0); + fsid = vfs_statfs(nmp->nm_mountp)->f_fsid; + lck_mtx_unlock(&nmp->nm_lock); + nfs_mount_zombie(nmp, NFSSTA_DEAD); + vfs_event_signal(&fsid, VQ_DEAD, 0); + lck_mtx_lock(&nmp->nm_lock); } + /* * NFS callback channel socket state */ @@ -2972,11 +3004,11 @@ again: lck_mtx_lock(&nmp->nm_lock); while (!(nmp->nm_sockflags & NMSOCK_READY)) { /* don't bother waiting if the socket thread won't be reconnecting it */ - if (nmp->nm_state & NFSSTA_FORCE) { + if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) { error = EIO; break; } - if (NMFLAG(nmp, SOFT) && (nmp->nm_reconnect_start > 0)) { + if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (nmp->nm_reconnect_start > 0)) { struct timeval now; microuptime(&now); if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) { @@ -3094,11 +3126,11 @@ again: msg.msg_namelen = sendnam->sa_len; } error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen); -#ifdef NFS_SOCKET_DEBUGGING - if (error || (sentlen != req->r_mreqlen)) + if (error || (sentlen != req->r_mreqlen)) { NFS_SOCK_DBG("nfs_send: 0x%llx sent %d/%d error %d\n", - req->r_xid, (int)sentlen, (int)req->r_mreqlen, error); -#endif + req->r_xid, (int)sentlen, (int)req->r_mreqlen, error); + } + if (!error && (sentlen != req->r_mreqlen)) error = EWOULDBLOCK; needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen)); @@ -3265,6 +3297,7 @@ nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) mbuf_t m; int error = 0; int recv = 1; + int wup = 0; if (nmp->nm_sockflags & NMSOCK_CONNECTING) return; @@ -3295,19 +3328,16 @@ nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) nfs_request_match_reply(nmp, m); } - lck_mtx_lock(&nmp->nm_lock); - if (nmp->nm_nso == nso) { - /* still the same socket, so update socket's RPC parsing state */ - lck_mtx_unlock(&nmp->nm_lock); - lck_mtx_lock(&nso->nso_lock); - nso->nso_rrs = nrrs; - nso->nso_flags &= ~NSO_UPCALL; - lck_mtx_unlock(&nso->nso_lock); - if (nmp->nm_sockflags & NMSOCK_DISCONNECTING) - wakeup(&nmp->nm_sockflags); - } else { - lck_mtx_unlock(&nmp->nm_lock); - } + /* Update the sockets's rpc parsing state */ + lck_mtx_lock(&nso->nso_lock); + nso->nso_rrs = nrrs; + if (nso->nso_flags & NSO_DISCONNECTING) + wup = 1; + nso->nso_flags &= ~NSO_UPCALL; + lck_mtx_unlock(&nso->nso_lock); + if (wup) + wakeup(&nso->nso_flags); + #ifdef NFS_SOCKET_DEBUGGING if (!recv && (error != EWOULDBLOCK)) NFS_SOCK_DBG("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error); @@ -3335,6 +3365,9 @@ nfs_sock_poke(struct nfsmount *nmp) lck_mtx_lock(&nmp->nm_lock); if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) { + /* Nothing to poke */ + nmp->nm_sockflags &= ~NMSOCK_POKE; + wakeup(&nmp->nm_sockflags); lck_mtx_unlock(&nmp->nm_lock); return; } @@ -3347,6 +3380,10 @@ nfs_sock_poke(struct nfsmount *nmp) msg.msg_iovlen = 1; error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len); NFS_SOCK_DBG("nfs_sock_poke: error %d\n", error); + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_sockflags &= ~NMSOCK_POKE; + wakeup(&nmp->nm_sockflags); + lck_mtx_unlock(&nmp->nm_lock); nfs_is_dead(error, nmp); } @@ -3573,14 +3610,14 @@ nfs_request_create( req->r_flags = R_ALLOCATED; nmp = VFSTONFS(np ? NFSTOMP(np) : mp); - if (!nmp) { + if (nfs_mount_gone(nmp)) { if (newreq) FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); return (ENXIO); } lck_mtx_lock(&nmp->nm_lock); - if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) == - (NFSSTA_FORCE|NFSSTA_TIMEO)) { + if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && + (nmp->nm_state & NFSSTA_TIMEO)) { lck_mtx_unlock(&nmp->nm_lock); mbuf_freem(nmrest->nmc_mhead); nmrest->nmc_mhead = NULL; @@ -3588,7 +3625,7 @@ nfs_request_create( FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); return (ENXIO); } - + if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS)) OSAddAtomic64(1, &nfsstats.rpccnt[procnum]); if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL)) @@ -3596,6 +3633,7 @@ nfs_request_create( lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL); req->r_nmp = nmp; + nmp->nm_ref++; req->r_np = np; req->r_thread = thd; if (!thd) @@ -3642,23 +3680,30 @@ nfs_request_destroy(struct nfsreq *req) { struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; struct gss_seq *gsp, *ngsp; - struct timespec ts = { 1, 0 }; int clearjbtimeo = 0; + struct timespec ts = { 1, 0 }; if (!req || !(req->r_flags & R_INITTED)) return; req->r_flags &= ~R_INITTED; if (req->r_lflags & RL_QUEUED) nfs_reqdequeue(req); - if (req->r_achain.tqe_next != NFSREQNOLIST) { - /* still on an async I/O queue? */ + + if (req->r_achain.tqe_next != NFSREQNOLIST && + req->r_achain.tqe_next != NFSIODCOMPLETING) { + /* + * Still on an async I/O queue? + * %%% But which one, we may be on a local iod. + */ lck_mtx_lock(nfsiod_mutex); - if (nmp && (req->r_achain.tqe_next != NFSREQNOLIST)) { + if (nmp && req->r_achain.tqe_next != NFSREQNOLIST && + req->r_achain.tqe_next != NFSIODCOMPLETING) { TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain); req->r_achain.tqe_next = NFSREQNOLIST; } lck_mtx_unlock(nfsiod_mutex); } + lck_mtx_lock(&req->r_mtx); if (nmp) { lck_mtx_lock(&nmp->nm_lock); @@ -3691,9 +3736,11 @@ nfs_request_destroy(struct nfsreq *req) } lck_mtx_unlock(&nmp->nm_lock); } + /* Wait for the mount_sock_thread to finish with the resend */ while (req->r_flags & R_RESENDQ) msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts); lck_mtx_unlock(&req->r_mtx); + if (clearjbtimeo) nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); if (req->r_mhead) @@ -3712,7 +3759,8 @@ nfs_request_destroy(struct nfsreq *req) nfs_gss_clnt_ctx_unref(req); if (req->r_wrongsec) FREE(req->r_wrongsec, M_TEMP); - + if (nmp) + nfs_mount_rele(nmp); lck_mtx_destroy(&req->r_mtx, nfs_request_grp); if (req->r_flags & R_ALLOCATED) FREE_ZONE(req, sizeof(*req), M_NFSREQ); @@ -3764,7 +3812,7 @@ nfs_request_add_header(struct nfsreq *req) } nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead); @@ -3773,10 +3821,10 @@ nfs_request_add_header(struct nfsreq *req) req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead); nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); lck_mtx_lock(&nmp->nm_lock); - if (NMFLAG(nmp, SOFT)) + if (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) req->r_retry = nmp->nm_retry; else req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ @@ -3802,7 +3850,7 @@ nfs_request_send(struct nfsreq *req, int wait) lck_mtx_lock(nfs_request_mutex); nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; - if (!nmp) { + if (nfs_mount_gone(nmp)) { lck_mtx_unlock(nfs_request_mutex); return (ENXIO); } @@ -3930,8 +3978,8 @@ nfs_request_finish( */ if (!error) { if ((req->r_flags & R_TPRINTFMSG) || - (nmp && NMFLAG(nmp, SOFT) && - ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO))) + (nmp && (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && + ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE|NFSSTA_DEAD)) == NFSSTA_TIMEO))) nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again"); else nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL); @@ -4047,9 +4095,10 @@ nfs_request_finish( lck_mtx_unlock(&nmp->nm_lock); } nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO, - "resource temporarily unavailable (jukebox)"); + "resource temporarily unavailable (jukebox)", 0); } - if (NMFLAG(nmp, SOFT) && (req->r_delay == 30) && !(req->r_flags & R_NOINTR)) { + if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (req->r_delay == 30) && + !(req->r_flags & R_NOINTR)) { /* for soft mounts, just give up after a short while */ OSAddAtomic64(1, &nfsstats.rpctimeouts); nfs_softterm(req); @@ -4133,14 +4182,6 @@ nfs_request_finish( /* find the next flavor to try */ for(i=0; i < NX_MAX_SEC_FLAVORS; i++) if (req->r_wrongsec[i] != RPCAUTH_INVALID) { - if (((req->r_wrongsec[i] == RPCAUTH_KRB5P) || - (req->r_wrongsec[i] == RPCAUTH_KRB5I) || - (req->r_wrongsec[i] == RPCAUTH_KRB5)) && (req->r_gss_ctx && - (req->r_gss_ctx->gss_clnt_service == RPCSEC_GSS_SVC_SYS))) { - /* don't bother trying Kerberos if we've already got a fallback context */ - req->r_wrongsec[i] = RPCAUTH_INVALID; - continue; - } if (!srvcount) /* no server list, just try it */ break; /* check that it's in the server's list */ @@ -4291,7 +4332,7 @@ nfs_request2( if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req))) return (error); - req->r_flags |= (flags & R_OPTMASK); + req->r_flags |= (flags & (R_OPTMASK | R_SOFT)); if (si) req->r_secinfo = *si; @@ -4334,7 +4375,7 @@ nfs_request_gss( int *status) { struct nfsreq rq, *req = &rq; - int error; + int error, wait = 1; if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req))) return (error); @@ -4347,6 +4388,13 @@ nfs_request_gss( } nfs_gss_clnt_ctx_ref(req, cp); + /* + * Don't wait for a reply to a context destroy advisory + * to avoid hanging on a dead server. + */ + if (cp->gss_clnt_proc == RPCSEC_GSS_DESTROY) + wait = 0; + FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0); do { req->r_error = 0; @@ -4354,15 +4402,21 @@ nfs_request_gss( if ((error = nfs_request_add_header(req))) break; - if ((error = nfs_request_send(req, 1))) + if ((error = nfs_request_send(req, wait))) break; + if (!wait) + break; + nfs_request_wait(req); if ((error = nfs_request_finish(req, nmrepp, status))) break; } while (req->r_flags & R_RESTART); FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error); + + nfs_gss_clnt_ctx_unref(req); nfs_request_rele(req); + return (error); } @@ -4409,7 +4463,8 @@ nfs_request_async( int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; struct timespec ts = { 2, 0 }; while (!(req->r_flags & R_SENT)) { - if ((req->r_flags & R_RESENDQ) && ((nmp = req->r_nmp))) { + nmp = req->r_nmp; + if ((req->r_flags & R_RESENDQ) && !nfs_mount_gone(nmp)) { lck_mtx_lock(&nmp->nm_lock); if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { /* @@ -4440,12 +4495,14 @@ nfs_request_async( } sent = req->r_flags & R_SENT; lck_mtx_unlock(&req->r_mtx); - if (error && req->r_callback.rcb_func && !sent) + if (error && req->r_callback.rcb_func && !sent) { nfs_request_rele(req); + } } FSDBG(274, R_XID32(req->r_xid), np, procnum, error); if (error || req->r_callback.rcb_func) nfs_request_rele(req); + return (error); } @@ -4467,6 +4524,7 @@ nfs_request_async_finish( req->r_flags |= R_ASYNCWAIT; while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */ struct timespec ts = { 2, 0 }; + if ((nmp = req->r_nmp)) { lck_mtx_lock(&nmp->nm_lock); if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { @@ -4528,7 +4586,6 @@ nfs_request_async_finish( void nfs_request_async_cancel(struct nfsreq *req) { - nfs_reqdequeue(req); FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E); nfs_request_rele(req); } @@ -4542,7 +4599,7 @@ nfs_softterm(struct nfsreq *req) struct nfsmount *nmp = req->r_nmp; req->r_flags |= R_SOFTTERM; req->r_error = ETIMEDOUT; - if (!(req->r_flags & R_CWND) || !nmp) + if (!(req->r_flags & R_CWND) || nfs_mount_gone(nmp)) return; /* update congestion window */ req->r_flags &= ~R_CWND; @@ -4634,6 +4691,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) struct timeval now; TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue; +restart: lck_mtx_lock(nfs_request_mutex); req = TAILQ_FIRST(&nfs_reqq); if (req == NULL) { /* no requests - turn timer off */ @@ -4648,8 +4706,10 @@ nfs_request_timer(__unused void *param0, __unused void *param1) microuptime(&now); for ( ; req != NULL ; req = nfs_reqnext(req)) { nmp = req->r_nmp; - if (!nmp) /* unmounted */ + if (nmp == NULL) { + NFS_SOCK_DBG("Found a request with out a mount!\n"); continue; + } if (req->r_error || req->r_nmrep.nmc_mhead) continue; if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) { @@ -4673,7 +4733,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { req->r_lastmsg = now.tv_sec; nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, - "not responding"); + "not responding", 1); req->r_flags |= R_TPRINTFMSG; lck_mtx_lock(&nmp->nm_lock); if (!(nmp->nm_state & NFSSTA_MOUNTED)) { @@ -4696,7 +4756,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) * Put a reasonable limit on the maximum timeout, * and reduce that limit when soft mounts get timeouts or are in reconnect. */ - if (!NMFLAG(nmp, SOFT) && !nfs_can_squish(nmp)) + if (!(NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && !nfs_can_squish(nmp)) maxtime = NFS_MAXTIMEO; else if ((req->r_flags & (R_SETUP|R_RECOVER)) || ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8))) @@ -4731,6 +4791,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) if (timeo > maxtime) timeo = maxtime; if (req->r_rtt <= timeo) { + NFS_SOCK_DBG("nfs timeout: req time %d and timeo is %d continue\n", req->r_rtt, timeo); lck_mtx_unlock(&nmp->nm_lock); lck_mtx_unlock(&req->r_mtx); continue; @@ -4742,7 +4803,48 @@ nfs_request_timer(__unused void *param0, __unused void *param1) (now.tv_sec - req->r_start)*NFS_HZ, maxtime); if (nmp->nm_timeouts < 8) nmp->nm_timeouts++; - nfs_mount_check_dead_timeout(nmp); + if (nfs_mount_check_dead_timeout(nmp)) { + /* Unbusy this request */ + req->r_lflags &= ~RL_BUSY; + if (req->r_lflags & RL_WAITING) { + req->r_lflags &= ~RL_WAITING; + wakeup(&req->r_lflags); + } + lck_mtx_unlock(&req->r_mtx); + + /* No need to poke this mount */ + if (nmp->nm_sockflags & NMSOCK_POKE) { + nmp->nm_sockflags &= ~NMSOCK_POKE; + TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq); + } + /* Release our lock state, so we can become a zombie */ + lck_mtx_unlock(nfs_request_mutex); + + /* + * Note nfs_mount_make zombie(nmp) must be + * called with nm_lock held. After doing some + * work we release nm_lock in + * nfs_make_mount_zombie with out acquiring any + * other locks. (Later, in nfs_mount_zombie we + * will acquire nfs_request_mutex, r_mtx, + * nm_lock in that order). So we should not be + * introducing deadlock here. We take a reference + * on the mount so that its still there when we + * release the lock. + */ + nmp->nm_ref++; + nfs_mount_make_zombie(nmp); + lck_mtx_unlock(&nmp->nm_lock); + nfs_mount_rele(nmp); + + /* + * All the request for this mount have now been + * removed from the request queue. Restart to + * process the remaining mounts + */ + goto restart; + } + /* if it's been a few seconds, try poking the socket */ if ((nmp->nm_sotype == SOCK_STREAM) && ((now.tv_sec - req->r_start) >= 3) && @@ -4755,7 +4857,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) } /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */ - if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) && + if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER|R_SOFT))) && ((req->r_rexmit >= req->r_retry) || /* too many */ ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */ OSAddAtomic64(1, &nfsstats.rpctimeouts); @@ -4765,7 +4867,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) /* make sure we note the unresponsive server */ /* (maxtime may be less than tprintf delay) */ nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, - "not responding"); + "not responding", 1); req->r_lastmsg = now.tv_sec; req->r_flags |= R_TPRINTFMSG; } else { @@ -4821,10 +4923,6 @@ nfs_request_timer(__unused void *param0, __unused void *param1) while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) { TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq); nfs_sock_poke(nmp); - lck_mtx_lock(&nmp->nm_lock); - nmp->nm_sockflags &= ~NMSOCK_POKE; - wakeup(&nmp->nm_sockflags); - lck_mtx_unlock(&nmp->nm_lock); } nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY); @@ -4855,7 +4953,7 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocke proc_t p; int error = 0; - if (nmp == NULL) + if (!nmp) return (ENXIO); if (req && (req->r_flags & R_SOFTTERM)) @@ -4868,7 +4966,7 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocke if (nmp->nm_state & NFSSTA_FORCE) { /* If a force unmount is in progress then fail. */ error = EIO; - } else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) { + } else if (vfs_isforce(nmp->nm_mountp)) { /* Someone is unmounting us, go soft and mark it. */ NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT); nmp->nm_state |= NFSSTA_FORCE; @@ -4928,7 +5026,7 @@ nfs_sndlock(struct nfsreq *req) int error = 0, slpflag = 0; struct timespec ts = { 0, 0 }; - if (nmp == NULL) + if (nfs_mount_gone(nmp)) return (ENXIO); lck_mtx_lock(&nmp->nm_lock); @@ -4963,7 +5061,7 @@ nfs_sndunlock(struct nfsreq *req) struct nfsmount *nmp = req->r_nmp; int *statep, wake = 0; - if (nmp == NULL) + if (!nmp) return; lck_mtx_lock(&nmp->nm_lock); statep = &nmp->nm_state; @@ -5032,9 +5130,21 @@ nfs_aux_request( } if (sotype == SOCK_STREAM) { - on = 4; /* don't wait too long for the socket to connect */ - sock_setsockopt(newso, IPPROTO_TCP, TCP_CONNECTIONTIMEOUT, &on, sizeof(on)); - error = sock_connect(newso, saddr, 0); +# define NFS_AUX_CONNECTION_TIMEOUT 4 /* 4 second timeout for connections */ + int count = 0; + + error = sock_connect(newso, saddr, MSG_DONTWAIT); + if (error == EINPROGRESS) + error = 0; + nfsmout_if(error); + + while ((error = sock_connectwait(newso, &tv)) == EINPROGRESS) { + /* After NFS_AUX_CONNECTION_TIMEOUT bail */ + if (++count >= NFS_AUX_CONNECTION_TIMEOUT) { + error = ETIMEDOUT; + break; + } + } nfsmout_if(error); } if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) || @@ -5356,7 +5466,7 @@ nfs_is_squishy(struct nfsmount *nmp) int squishy = 0; int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT; - NFS_SOCK_DBG("%s: nm_curdeadtiemout = %d, nfs_is_mobile = %d\n", + NFS_SOCK_DBG("%s: nm_curdeadtimeout = %d, nfs_is_mobile = %d\n", vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile); if (!nfs_can_squish(nmp)) @@ -5396,45 +5506,77 @@ out: * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise. */ -static int -nfs_is_dead_lock(int error, struct nfsmount *nmp) +int +nfs_is_dead(int error, struct nfsmount *nmp) { - if (nmp->nm_state & NFSSTA_DEAD) + fsid_t fsid; + + lck_mtx_lock(&nmp->nm_lock); + if (nmp->nm_state & NFSSTA_DEAD) { + lck_mtx_unlock(&nmp->nm_lock); return (1); + } if ((error != ENETUNREACH && error != EHOSTUNREACH && error != EADDRNOTAVAIL) || - !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1)) + !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1)) { + lck_mtx_unlock(&nmp->nm_lock); return (0); + } if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) { printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); - nmp->nm_state |= NFSSTA_DEAD; - vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0); + fsid = vfs_statfs(nmp->nm_mountp)->f_fsid; + lck_mtx_unlock(&nmp->nm_lock); + nfs_mount_zombie(nmp, NFSSTA_DEAD); + vfs_event_signal(&fsid, VQ_DEAD, 0); return (1); } + lck_mtx_unlock(&nmp->nm_lock); return (0); } +/* + * If we've experienced timeouts and we're not really a + * classic hard mount, then just return cached data to + * the caller instead of likely hanging on an RPC. + */ int -nfs_is_dead(int error, struct nfsmount *nmp) +nfs_use_cache(struct nfsmount *nmp) { - int is_dead; + /* + *%%% We always let mobile users goto the cache, + * perhaps we should not even require them to have + * a timeout? + */ + int cache_ok = (nfs_is_mobile || NMFLAG(nmp, SOFT) || + nfs_can_squish(nmp) || nmp->nm_deadtimeout); - lck_mtx_lock(&nmp->nm_lock); - is_dead = nfs_is_dead_lock(error, nmp); - lck_mtx_unlock(&nmp->nm_lock); + int timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; + + /* + * So if we have a timeout and we're not really a hard hard-mount, + * return 1 to not get things out of the cache. + */ - return (is_dead); + return ((nmp->nm_state & timeoutmask) && cache_ok); } +/* + * Log a message that nfs or lockd server is unresponsive. Check if we + * can be squished and if we can, or that our dead timeout has + * expired, and we're not holding state, set our mount as dead, remove + * our mount state and ask to be unmounted. If we are holding state + * we're being called from the nfs_request_timer and will soon detect + * that we need to unmount. + */ void -nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg) +nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg, int holding_state) { int timeoutmask, wasunresponsive, unresponsive, softnobrowse; - uint32_t do_vfs_signal; + uint32_t do_vfs_signal = 0; struct timeval now; - if (nmp == NULL) + if (nfs_mount_gone(nmp)) return; lck_mtx_lock(&nmp->nm_lock); @@ -5463,17 +5605,17 @@ nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *m if (!wasunresponsive) { nmp->nm_deadto_start = now.tv_sec; nfs_mount_sock_thread_wake(nmp); - } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout) { + } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout && !holding_state) { if (!(nmp->nm_state & NFSSTA_DEAD)) printf("nfs server %s: %sdead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : ""); - nmp->nm_state |= NFSSTA_DEAD; + do_vfs_signal = VQ_DEAD; } } lck_mtx_unlock(&nmp->nm_lock); - if (nmp->nm_state & NFSSTA_DEAD) - do_vfs_signal = VQ_DEAD; + if (do_vfs_signal == VQ_DEAD && !(nmp->nm_state & NFSSTA_DEAD)) + nfs_mount_zombie(nmp, NFSSTA_DEAD); else if (softnobrowse || wasunresponsive || !unresponsive) do_vfs_signal = 0; else @@ -5490,7 +5632,7 @@ nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg) int timeoutmask, wasunresponsive, unresponsive, softnobrowse; int do_vfs_signal; - if (nmp == NULL) + if (nfs_mount_gone(nmp)) return; if (msg) diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index 2ed209532..3872e7ff0 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -1624,7 +1624,8 @@ nfs_attrcachetimeout(nfsnode_t np) int isdir; uint32_t timeo; - if (!(nmp = NFSTONMP(np))) + nmp = NFSTONMP(np); + if (nfs_mount_gone(nmp)) return (0); isdir = vnode_isdir(NFSTOV(np)); @@ -1669,6 +1670,7 @@ nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int flags) struct nfs_vattr *nvap; struct timeval nowup; int32_t timeo; + struct nfsmount *nmp; /* Check if the attributes are valid. */ if (!NATTRVALID(np) || ((flags & NGA_ACL) && !NACLVALID(np))) { @@ -1677,18 +1679,27 @@ nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int flags) return (ENOENT); } - /* Verify the cached attributes haven't timed out. */ - timeo = nfs_attrcachetimeout(np); - microuptime(&nowup); - if ((nowup.tv_sec - np->n_attrstamp) >= timeo) { - FSDBG(528, np, 0, 0xffffff02, ENOENT); - OSAddAtomic64(1, &nfsstats.attrcache_misses); - return (ENOENT); - } - if ((flags & NGA_ACL) && ((nowup.tv_sec - np->n_aclstamp) >= timeo)) { - FSDBG(528, np, 0, 0xffffff02, ENOENT); - OSAddAtomic64(1, &nfsstats.attrcache_misses); - return (ENOENT); + nmp = NFSTONMP(np); + if (nfs_mount_gone(nmp)) + return (ENXIO); + /* + * Verify the cached attributes haven't timed out. + * If the server isn't responding, skip the check + * and return cached attributes. + */ + if (!nfs_use_cache(nmp)) { + timeo = nfs_attrcachetimeout(np); + microuptime(&nowup); + if ((nowup.tv_sec - np->n_attrstamp) >= timeo) { + FSDBG(528, np, 0, 0xffffff02, ENOENT); + OSAddAtomic64(1, &nfsstats.attrcache_misses); + return (ENOENT); + } + if ((flags & NGA_ACL) && ((nowup.tv_sec - np->n_aclstamp) >= timeo)) { + FSDBG(528, np, 0, 0xffffff02, ENOENT); + OSAddAtomic64(1, &nfsstats.attrcache_misses); + return (ENOENT); + } } nvap = &np->n_vattr; @@ -1997,6 +2008,40 @@ nfs_printf(int facility, int level, const char *fmt, ...) va_end(ap); } +/* Is a mount gone away? */ +int +nfs_mount_gone(struct nfsmount *nmp) +{ + return (!nmp || vfs_isforce(nmp->nm_mountp) || (nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD))); +} + +/* + * Return some of the more significant mount options + * as a string, e.g. "'ro,hard,intr,tcp,vers=3,sec=krb5,deadtimeout=0' + */ +int +nfs_mountopts(struct nfsmount *nmp, char *buf, int buflen) +{ + int c; + + c = snprintf(buf, buflen, "%s,%s,%s,%s,vers=%d,sec=%s,%sdeadtimeout=%d", + (vfs_flags(nmp->nm_mountp) & MNT_RDONLY) ? "ro" : "rw", + NMFLAG(nmp, SOFT) ? "soft" : "hard", + NMFLAG(nmp, INTR) ? "intr" : "nointr", + nmp->nm_sotype == SOCK_STREAM ? "tcp" : "udp", + nmp->nm_vers, + nmp->nm_auth == RPCAUTH_KRB5 ? "krb5" : + nmp->nm_auth == RPCAUTH_KRB5I ? "krb5i" : + nmp->nm_auth == RPCAUTH_KRB5P ? "krb5p" : + nmp->nm_auth == RPCAUTH_SYS ? "sys" : "none", + nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED ? "locks," : + nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED ? "nolocks," : + nmp->nm_lockmode == NFS_LOCK_MODE_LOCAL ? "locallocks," : "", + nmp->nm_deadtimeout); + + return (c > buflen ? ENOMEM : 0); +} + #endif /* NFSCLIENT */ /* diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index ceeb803ff..5fa063cea 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -169,7 +169,7 @@ SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLA SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, ""); SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, ""); - +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, ""); #endif /* NFSCLIENT */ @@ -200,6 +200,68 @@ SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | C #if NFSCLIENT +static int +mapname2id(struct nfs_testmapid *map) +{ + int error; + + error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag); + if (error) + return (error); + + if (map->ntm_grpflag) + error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id); + else + error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id); + + return (error); +} + +static int +mapid2name(struct nfs_testmapid *map) +{ + int error; + int len = sizeof(map->ntm_name); + + if (map->ntm_grpflag) + error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid); + else + error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid); + + if (error) + return (error); + + error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag); + + return (error); + +} + + +static int +nfsclnt_testidmap(proc_t p, user_addr_t argp) +{ + struct nfs_testmapid mapid; + int error, coerror; + + /* Let root make this call. */ + error = proc_suser(p); + if (error) + return (error); + + error = copyin(argp, &mapid, sizeof(mapid)); + if (error) + return (error); + if (mapid.ntm_name2id) + error = mapname2id(&mapid); + else + error = mapid2name(&mapid); + + coerror = copyout(&mapid, argp, sizeof(mapid)); + + return (error ? error : coerror); +} + int nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) { @@ -215,12 +277,16 @@ nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) case NFSCLNT_LOCKDNOTIFY: error = nfslockdnotify(p, uap->argp); break; + case NFSCLNT_TESTIDMAP: + error = nfsclnt_testidmap(p, uap->argp); + break; default: error = EINVAL; } return (error); } + /* * Asynchronous I/O threads for client NFS. * They do read-ahead and write-behind operations on the block I/O cache. @@ -337,6 +403,11 @@ nfsiod_continue(int error) worktodo: while ((nmp = niod->niod_nmp)) { + if (nmp == NULL){ + niod->niod_nmp = NULL; + break; + } + /* * Service this mount's async I/O queue. * @@ -355,7 +426,9 @@ worktodo: /* process the queue */ TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { TAILQ_REMOVE(&iodq, req, r_achain); - req->r_achain.tqe_next = NFSREQNOLIST; + lck_mtx_lock(nfsiod_mutex); + req->r_achain.tqe_next = NFSIODCOMPLETING; + lck_mtx_unlock(nfsiod_mutex); req->r_callback.rcb_func(req); } @@ -363,8 +436,11 @@ worktodo: lck_mtx_lock(nfsiod_mutex); morework = !TAILQ_EMPTY(&nmp->nm_iodq); if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { - /* we're going to stop working on this mount */ - if (morework) /* mount still needs more work so queue it up */ + /* + * we're going to stop working on this mount but if the + * mount still needs more work so queue it up + */ + if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); nmp->nm_niod = NULL; niod->niod_nmp = NULL; @@ -375,6 +451,7 @@ worktodo: if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); + niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST; } if (niod->niod_nmp) goto worktodo; diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c index 50af3f4c3..ad7d5a271 100644 --- a/bsd/nfs/nfs_vfsops.c +++ b/bsd/nfs/nfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -119,6 +119,8 @@ #include +#define NFS_VFS_DBG(...) NFS_DBG(NFS_FAC_VFS, 7, ## __VA_ARGS__) + /* * NFS client globals */ @@ -180,6 +182,7 @@ static int nfs_mount_diskless(struct nfs_dlmount *, const char *, int, vnode_t * static int nfs_mount_diskless_private(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t); #endif /* NO_MOUNT_PRIVATE */ int nfs_mount_connect(struct nfsmount *); +void nfs_mount_drain_and_cleanup(struct nfsmount *); void nfs_mount_cleanup(struct nfsmount *); int nfs_mountinfo_assemble(struct nfsmount *, struct xdrbuf *); int nfs4_mount_update_path_with_symlink(struct nfsmount *, struct nfs_fs_path *, uint32_t, fhandle_t *, int *, fhandle_t *, vfs_context_t); @@ -373,7 +376,10 @@ nfs3_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); nfsm_chain_build_done(error, &nmreq); nfsmout_if(error); - error = nfs_request(np, NULL, &nmreq, NFSPROC_FSSTAT, ctx, NULL, &nmrep, &xid, &status); + error = nfs_request2(np, NULL, &nmreq, NFSPROC_FSSTAT, vfs_context_thread(ctx), + vfs_context_ucred(ctx), NULL, R_SOFT, &nmrep, &xid, &status); + if (error == ETIMEDOUT) + goto nfsmout; if ((lockerror = nfs_node_lock(np))) error = lockerror; if (nfsvers == NFS_VER3) @@ -461,7 +467,9 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) nfsm_chain_build_done(error, &nmreq); nfsm_assert(error, (numops == 0), EPROTO); nfsmout_if(error); - error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status); + error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, + vfs_context_thread(ctx), vfs_context_ucred(ctx), + NULL, R_SOFT, &nmrep, &xid, &status); nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); @@ -502,7 +510,8 @@ nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx) uint32_t bsize; int error = 0, nfsvers; - if (!(nmp = VFSTONFS(mp))) + nmp = VFSTONFS(mp); + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -537,7 +546,7 @@ nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx) lck_mtx_unlock(&nmp->nm_lock); } - if (refresh) + if (refresh && !nfs_use_cache(nmp)) error = nmp->nm_funcs->nf_update_statfs(nmp, ctx); if ((error == ESTALE) || (error == ETIMEDOUT)) error = 0; @@ -1309,6 +1318,7 @@ nfs_mount_diskless_private( strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSNAMELEN-1); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; + vp = NULLVP; mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get()); (void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0); (void) copystr(ndmntp->ndm_mntfrom, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0); @@ -1424,6 +1434,7 @@ nfs_mount_diskless_private( /* do the mount */ if ((error = mountnfs(xdrbuf, mp, ctx, &vp))) { printf("nfs_mountroot: mount %s failed: %d\n", mntname, error); + vnode_put(mp->mnt_vnodecovered); mount_list_lock(); vfsp->vfc_refcount--; mount_list_unlock(); @@ -2664,6 +2675,7 @@ mountnfs( TAILQ_INIT(&nmp->nm_resendq); TAILQ_INIT(&nmp->nm_iodq); TAILQ_INIT(&nmp->nm_gsscl); + TAILQ_INIT(&nmp->nm_gssnccl); LIST_INIT(&nmp->nm_monlist); vfs_setfsprivate(mp, nmp); vfs_getnewfsid(mp); @@ -2675,6 +2687,7 @@ mountnfs( nmp->nm_args = xdrbuf; /* set up defaults */ + nmp->nm_ref = 0; nmp->nm_vers = 0; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; @@ -2697,6 +2710,7 @@ mountnfs( nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; nmp->nm_auth = RPCAUTH_SYS; + nmp->nm_iodlink.tqe_next = NFSNOLIST; nmp->nm_deadtimeout = 0; nmp->nm_curdeadtimeout = 0; NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_NOACL); @@ -3315,7 +3329,7 @@ nfs_mirror_mount_domount(vnode_t dvp, vnode_t vp, vfs_context_t ctx) xb_init(&xbnew, 0); - if (!nmp || (nmp->nm_state & NFSSTA_FORCE)) + if (!nmp || (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) return (ENXIO); /* allocate a couple path buffers we need */ @@ -4182,7 +4196,9 @@ nfs_vfs_unmount( vflush(mp, NULLVP, FORCECLOSE); - nfs_mount_cleanup(nmp); + /* Wait for all other references to be released and free the mount */ + nfs_mount_drain_and_cleanup(nmp); + return (0); } @@ -4234,11 +4250,40 @@ nfs_fs_locations_cleanup(struct nfs_fs_locations *nfslsp) nfslsp->nl_locations = NULL; } +void +nfs_mount_rele(struct nfsmount *nmp) +{ + int wup = 0; + + lck_mtx_lock(&nmp->nm_lock); + if (nmp->nm_ref < 1) + panic("nfs zombie mount underflow\n"); + nmp->nm_ref--; + if (nmp->nm_ref == 0) + wup = nmp->nm_state & NFSSTA_MOUNT_DRAIN; + lck_mtx_unlock(&nmp->nm_lock); + if (wup) + wakeup(&nmp->nm_ref); +} + +void +nfs_mount_drain_and_cleanup(struct nfsmount *nmp) +{ + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_state |= NFSSTA_MOUNT_DRAIN; + while (nmp->nm_ref > 0) { + msleep(&nmp->nm_ref, &nmp->nm_lock, PZERO-1, "nfs_mount_drain", NULL); + } + assert(nmp->nm_ref == 0); + lck_mtx_unlock(&nmp->nm_lock); + nfs_mount_cleanup(nmp); +} + /* - * cleanup/destroy an nfsmount + * nfs_mount_zombie */ void -nfs_mount_cleanup(struct nfsmount *nmp) +nfs_mount_zombie(struct nfsmount *nmp, int nm_state_flags) { struct nfsreq *req, *treq; struct nfs_reqqhead iodq; @@ -4247,6 +4292,11 @@ nfs_mount_cleanup(struct nfsmount *nmp) nfsnode_t np; int docallback; + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_state |= nm_state_flags; + nmp->nm_ref++; + lck_mtx_unlock(&nmp->nm_lock); + /* stop callbacks */ if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid) nfs4_mount_callback_shutdown(nmp); @@ -4261,23 +4311,19 @@ nfs_mount_cleanup(struct nfsmount *nmp) /* Have the socket thread send the unmount RPC, if requested/appropriate. */ if ((nmp->nm_vers < NFS_VER4) && (nmp->nm_state & NFSSTA_MOUNTED) && - !(nmp->nm_state & NFSSTA_FORCE) && NMFLAG(nmp, CALLUMNT)) + !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && NMFLAG(nmp, CALLUMNT)) nfs_mount_sock_thread_wake(nmp); /* wait for the socket thread to terminate */ - while (nmp->nm_sockthd) { + while (nmp->nm_sockthd && current_thread() != nmp->nm_sockthd) { wakeup(&nmp->nm_sockthd); msleep(&nmp->nm_sockthd, &nmp->nm_lock, PZERO-1, "nfswaitsockthd", &ts); } - lck_mtx_unlock(&nmp->nm_lock); /* tear down the socket */ nfs_disconnect(nmp); - if (nmp->nm_mountp) - vfs_setfsprivate(nmp->nm_mountp, NULL); - lck_mtx_lock(&nmp->nm_lock); if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid) { @@ -4294,10 +4340,6 @@ nfs_mount_cleanup(struct nfsmount *nmp) thread_call_free(nmp->nm_renew_timer); } - if (nmp->nm_saddr) - FREE(nmp->nm_saddr, M_SONAME); - if ((nmp->nm_vers < NFS_VER4) && nmp->nm_rqsaddr) - FREE(nmp->nm_rqsaddr, M_SONAME); lck_mtx_unlock(&nmp->nm_lock); if (nmp->nm_state & NFSSTA_MOUNTED) @@ -4307,8 +4349,10 @@ nfs_mount_cleanup(struct nfsmount *nmp) break; case NFS_LOCK_MODE_ENABLED: default: - if (nmp->nm_vers <= NFS_VER3) + if (nmp->nm_vers <= NFS_VER3) { nfs_lockd_mount_unregister(nmp); + nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED; + } break; } @@ -4330,27 +4374,14 @@ nfs_mount_cleanup(struct nfsmount *nmp) lck_mtx_lock(nfs_request_mutex); TAILQ_FOREACH(req, &nfs_reqq, r_chain) { if (req->r_nmp == nmp) { - lck_mtx_lock(&req->r_mtx); - req->r_nmp = NULL; - lck_mtx_unlock(&req->r_mtx); - if (req->r_callback.rcb_func) { + if (req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT)) { /* async I/O RPC needs to be finished */ lck_mtx_lock(nfsiod_mutex); - if (req->r_achain.tqe_next == NFSREQNOLIST) + if (req->r_achain.tqe_next == NFSREQNOLIST) { TAILQ_INSERT_TAIL(&iodq, req, r_achain); - lck_mtx_unlock(nfsiod_mutex); - } - lck_mtx_lock(&req->r_mtx); - lck_mtx_lock(&nmp->nm_lock); - if (req->r_flags & R_RESENDQ) { - if (req->r_rchain.tqe_next != NFSREQNOLIST) { - TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); - req->r_rchain.tqe_next = NFSREQNOLIST; } - req->r_flags &= ~R_RESENDQ; + lck_mtx_unlock(nfsiod_mutex); } - lck_mtx_unlock(&nmp->nm_lock); - lck_mtx_unlock(&req->r_mtx); wakeup(req); } } @@ -4358,11 +4389,15 @@ nfs_mount_cleanup(struct nfsmount *nmp) /* finish any async I/O RPCs queued up */ lck_mtx_lock(nfsiod_mutex); + if (nmp->nm_iodlink.tqe_next != NFSNOLIST) + TAILQ_REMOVE(&nfsiodmounts, nmp, nm_iodlink); TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); lck_mtx_unlock(nfsiod_mutex); TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { TAILQ_REMOVE(&iodq, req, r_achain); - req->r_achain.tqe_next = NFSREQNOLIST; + lck_mtx_lock(nfsiod_mutex); + req->r_achain.tqe_next = NFSIODCOMPLETING; + lck_mtx_unlock(nfsiod_mutex); lck_mtx_lock(&req->r_mtx); req->r_error = ENXIO; docallback = !(req->r_flags & R_WAITSENT); @@ -4395,6 +4430,41 @@ nfs_mount_cleanup(struct nfsmount *nmp) } lck_mtx_unlock(&nmp->nm_lock); } + + nfs_mount_rele(nmp); +} + +/* + * cleanup/destroy an nfsmount + */ +void +nfs_mount_cleanup(struct nfsmount *nmp) +{ + if (!nmp) + return; + + nfs_mount_zombie(nmp, 0); + + NFS_VFS_DBG("Unmounting %s from %s\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, + vfs_statfs(nmp->nm_mountp)->f_mntonname); + NFS_VFS_DBG("nfs state = %x\n", nmp->nm_state); + NFS_VFS_DBG("nfs socket flags = %x\n", nmp->nm_sockflags); + NFS_VFS_DBG("nfs mount ref count is %d\n", nmp->nm_ref); + NFS_VFS_DBG("mount ref count is %d\n", nmp->nm_mountp->mnt_count); + + if (nmp->nm_mountp) + vfs_setfsprivate(nmp->nm_mountp, NULL); + + lck_mtx_lock(&nmp->nm_lock); + if (nmp->nm_ref) + panic("Some one has grabbed a ref %d\n", nmp->nm_ref); + + if (nmp->nm_saddr) + FREE(nmp->nm_saddr, M_SONAME); + if ((nmp->nm_vers < NFS_VER4) && nmp->nm_rqsaddr) + FREE(nmp->nm_rqsaddr, M_SONAME); + if (IS_VALID_CRED(nmp->nm_mcred)) kauth_cred_unref(&nmp->nm_mcred); @@ -4409,6 +4479,9 @@ nfs_mount_cleanup(struct nfsmount *nmp) if (nmp->nm_args) xb_free(nmp->nm_args); + + lck_mtx_unlock(&nmp->nm_lock); + lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp); if (nmp->nm_fh) FREE(nmp->nm_fh, M_TEMP); @@ -4665,7 +4738,8 @@ nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t c uid_t euid = kauth_cred_getuid(vfs_context_ucred(ctx)); struct dqblk *dqb = (struct dqblk*)datap; - if (!(nmp = VFSTONFS(mp))) + nmp = VFSTONFS(mp); + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -5119,15 +5193,20 @@ int nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen, vfs_context_t ctx) { - int error = 0, val, softnobrowse; + int error = 0, val; + int softnobrowse; struct sysctl_req *req = NULL; union union_vfsidctl vc; mount_t mp; struct nfsmount *nmp = NULL; struct vfsquery vq; + struct nfsreq *rq; boolean_t is_64_bit; fsid_t fsid; struct xdrbuf xb; + struct netfs_status *nsp = NULL; + int timeoutmask; + uint pos, totlen, count, numThreads; #if NFSSERVER struct nfs_exportfs *nxfs; struct nfs_export *nx; @@ -5139,7 +5218,7 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, struct nfs_user_stat_user_rec ustat_rec; struct nfs_user_stat_path_rec upath_rec; uint bytes_avail, bytes_total, recs_copied; - uint numExports, totlen, pos, numRecs, count; + uint numExports, numRecs; #endif /* NFSSERVER */ /* @@ -5153,9 +5232,13 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, /* common code for "new style" VFS_CTL sysctl, get the mount. */ switch (name[0]) { case VFS_CTL_TIMEO: - case VFS_CTL_QUERY: case VFS_CTL_NOLOCKS: + case VFS_CTL_NSTATUS: + case VFS_CTL_QUERY: req = CAST_DOWN(struct sysctl_req *, oldp); + if (req == NULL) { + return EFAULT; + } error = SYSCTL_IN(req, &vc, is_64_bit? sizeof(vc.vc64):sizeof(vc.vc32)); if (error) return (error); @@ -5163,7 +5246,7 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, if (mp == NULL) return (ENOENT); nmp = VFSTONFS(mp); - if (nmp == NULL) + if (!nmp) return (ENOENT); bzero(&vq, sizeof(vq)); req->newidx = 0; @@ -5174,6 +5257,7 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, req->newptr = CAST_USER_ADDR_T(vc.vc32.vc_ptr); req->newlen = vc.vc32.vc_len; } + break; } switch(name[0]) { @@ -5535,6 +5619,88 @@ ustat_skip: lck_mtx_unlock(&nmp->nm_lock); } break; + case VFS_CTL_NSTATUS: + /* + * Return the status of this mount. This is much more + * information than VFS_CTL_QUERY. In addition to the + * vq_flags return the significant mount options along + * with the list of threads blocked on the mount and + * how long the threads have been waiting. + */ + + lck_mtx_lock(nfs_request_mutex); + lck_mtx_lock(&nmp->nm_lock); + + /* + * Count the number of requests waiting for a reply. + * Note: there could be multiple requests from the same thread. + */ + numThreads = 0; + TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { + if (rq->r_nmp == nmp) + numThreads++; + } + + /* Calculate total size of result buffer */ + totlen = sizeof(struct netfs_status) + (numThreads * sizeof(uint64_t)); + + if (req->oldptr == USER_ADDR_NULL) { // Caller is querying buffer size + lck_mtx_unlock(&nmp->nm_lock); + lck_mtx_unlock(nfs_request_mutex); + return SYSCTL_OUT(req, NULL, totlen); + } + if (req->oldlen < totlen) { // Check if caller's buffer is big enough + lck_mtx_unlock(&nmp->nm_lock); + lck_mtx_unlock(nfs_request_mutex); + return (ERANGE); + } + + MALLOC(nsp, struct netfs_status *, totlen, M_TEMP, M_WAITOK|M_ZERO); + if (nsp == NULL) { + lck_mtx_unlock(&nmp->nm_lock); + lck_mtx_unlock(nfs_request_mutex); + return (ENOMEM); + } + timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; + if (nmp->nm_state & timeoutmask) + nsp->ns_status |= VQ_NOTRESP; + if (nmp->nm_state & NFSSTA_DEAD) + nsp->ns_status |= VQ_DEAD; + + (void) nfs_mountopts(nmp, nsp->ns_mountopts, sizeof(nsp->ns_mountopts)); + nsp->ns_threadcount = numThreads; + + /* + * Get the thread ids of threads waiting for a reply + * and find the longest wait time. + */ + if (numThreads > 0) { + struct timeval now; + time_t sendtime; + + microuptime(&now); + count = 0; + sendtime = now.tv_sec; + TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { + if (rq->r_nmp == nmp) { + if (rq->r_start < sendtime) + sendtime = rq->r_start; + // A thread_id of zero is used to represent an async I/O request. + nsp->ns_threadids[count] = + rq->r_thread ? thread_tid(rq->r_thread) : 0; + if (++count >= numThreads) + break; + } + } + nsp->ns_waittime = now.tv_sec - sendtime; + } + + lck_mtx_unlock(&nmp->nm_lock); + lck_mtx_unlock(nfs_request_mutex); + + error = SYSCTL_OUT(req, nsp, totlen); + FREE(nsp, M_TEMP); + break; default: return (ENOTSUP); } diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index a8c2017b4..4f155940f 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -120,6 +120,7 @@ #include #define NFS_VNOP_DBG(...) NFS_DBG(NFS_FAC_VNOP, 7, ## __VA_ARGS__) +#define DEFAULT_READLINK_NOCACHE 0 /* * NFS vnode ops @@ -445,8 +446,9 @@ struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc = { &fifo_nfsv4nodeop_p, fifo_nfsv4nodeop_entries }; #endif /* FIFO */ - int nfs_sillyrename(nfsnode_t,nfsnode_t,struct componentname *,vfs_context_t); +int nfs_getattr_internal(nfsnode_t, struct nfs_vattr *, vfs_context_t, int); +int nfs_refresh_fh(nfsnode_t, vfs_context_t); /* * Find the slot in the access cache for this UID. @@ -471,7 +473,7 @@ nfs_node_access_slot(nfsnode_t np, uid_t uid, int add) } int -nfs3_access_rpc(nfsnode_t np, u_int32_t *access, vfs_context_t ctx) +nfs3_access_rpc(nfsnode_t np, u_int32_t *access, int rpcflags, vfs_context_t ctx) { int error = 0, lockerror = ENOENT, status, slot; uint32_t access_result = 0; @@ -488,7 +490,9 @@ nfs3_access_rpc(nfsnode_t np, u_int32_t *access, vfs_context_t ctx) nfsm_chain_add_32(error, &nmreq, *access); nfsm_chain_build_done(error, &nmreq); nfsmout_if(error); - error = nfs_request(np, NULL, &nmreq, NFSPROC_ACCESS, ctx, NULL, &nmrep, &xid, &status); + error = nfs_request2(np, NULL, &nmreq, NFSPROC_ACCESS, + vfs_context_thread(ctx), vfs_context_ucred(ctx), + NULL, rpcflags, &nmrep, &xid, &status); if ((lockerror = nfs_node_lock(np))) error = lockerror; nfsm_chain_postop_attr_update(error, &nmrep, np, &xid); @@ -527,19 +531,6 @@ nfsmout: return (error); } -/* - * See if our mount is in trouble. Note this is inherently racey. - */ -static int -nfs_notresponding(struct nfsmount *nmp) -{ - int timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; - if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ - timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; - - return ((nmp->nm_state & timeoutmask) || !(nmp->nm_sockflags & NMSOCK_READY)); -} - /* * NFS access vnode op. * For NFS version 2, just return ok. File accesses may fail later. @@ -557,7 +548,7 @@ nfs_vnop_access( { vfs_context_t ctx = ap->a_context; vnode_t vp = ap->a_vp; - int error = 0, slot, dorpc; + int error = 0, slot, dorpc, rpcflags = 0; u_int32_t access, waccess; nfsnode_t np = VTONFS(vp); struct nfsmount *nmp; @@ -566,7 +557,7 @@ nfs_vnop_access( uid_t uid; nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -654,46 +645,35 @@ nfs_vnop_access( dorpc = 0; waccess = 0; } else if (NACCESSVALID(np, slot)) { - /* - * In addition if the kernel is checking for access, i.e., - * KAUTH_VNODE_ACCESS is not set, and the server does not seem - * to be responding just return if we have something in the - * cache even if its stale for the user. If were granted access - * by the cache and we're a kernel access, then call it good - * enough. We want to avoid having this particular request going - * over the wire causing a hang. This is because at this moment - * we do not know what the state of the server is and what ever - * we get back be it either yea or nay is going to be stale. - * Finder (Desktop services/FileURL) might hang when going over - * the wire when just asking getattrlist for the roots FSID - * since we are going to be called to see if we're authorized - * for search. - * - * N.B. This is also the strategy that SMB is using. - */ - int granted = ((np->n_access[slot] & access) == access); - - if (!(ap->a_action & KAUTH_VNODE_ACCESS)) { - if (granted || nfs_notresponding(nmp)) { - dorpc = 0; - waccess = np->n_access[slot]; - } - } else { - int stale; - microuptime(&now); - stale = (now.tv_sec >= (np->n_accessstamp[slot] + nfs_access_cache_timeout)); - if (granted && !stale) { + microuptime(&now); + if (((now.tv_sec < (np->n_accessstamp[slot] + nfs_access_cache_timeout)) && + ((np->n_access[slot] & access) == access)) || nfs_use_cache(nmp)) { /* OSAddAtomic(1, &nfsstats.accesscache_hits); */ - dorpc = 0; - waccess = np->n_access[slot]; - } + dorpc = 0; + waccess = np->n_access[slot]; } } nfs_node_unlock(np); if (dorpc) { /* Either a no, or a don't know. Go to the wire. */ /* OSAddAtomic(1, &nfsstats.accesscache_misses); */ - error = nmp->nm_funcs->nf_access_rpc(np, &waccess, ctx); + + /* + * Allow an access call to timeout if we have it cached + * so we won't hang if the server isn't responding. + */ + if (NACCESSVALID(np, slot)) + rpcflags |= R_SOFT; + + error = nmp->nm_funcs->nf_access_rpc(np, &waccess, rpcflags, ctx); + + /* + * If the server didn't respond return the cached access. + */ + if ((error == ETIMEDOUT) && (rpcflags & R_SOFT)) { + error = 0; + waccess = np->n_access[slot]; + } } if (!error && ((waccess & access) != access)) error = EACCES; @@ -731,7 +711,7 @@ nfs_vnop_open( return (EINVAL); nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (np->n_flag & NREVOKE) return (EIO); @@ -1059,7 +1039,7 @@ nfs_vnop_close( */ uint32_t writers; mount_t mp = vnode_mount(vp); - int force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)); + int force = (!mp || vfs_isforce(mp)); writers = nfs_no_of_open_file_writers(np); nfs_release_open_state_for_node(np, force); @@ -1288,8 +1268,6 @@ v3close: } - - int nfs3_getattr_rpc( nfsnode_t np, @@ -1305,13 +1283,16 @@ nfs3_getattr_rpc( int error = 0, status, nfsvers, rpcflags = 0; struct nfsm_chain nmreq, nmrep; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if (flags & NGA_MONITOR) /* vnode monitor requests should be soft */ rpcflags = R_RECOVER; + if (flags & NGA_SOFT) /* Return ETIMEDOUT if server not responding */ + rpcflags |= R_SOFT; + nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -1334,9 +1315,159 @@ nfsmout: return (error); } +/* + * nfs_refresh_fh will attempt to update the file handle for the node. + * + * It only does this for symbolic links and regular files that are not currently opened. + * + * On Success returns 0 and the nodes file handle is updated, or ESTALE on failure. + */ +int +nfs_refresh_fh(nfsnode_t np, vfs_context_t ctx) +{ + vnode_t dvp, vp = NFSTOV(np); + nfsnode_t dnp; + const char *v_name = vnode_getname(vp); + char *name; + int namelen, fhsize, refreshed; + int error, wanted = 0; + uint8_t *fhp; + struct timespec ts = {2, 0}; + + NFS_VNOP_DBG("vnode is %d\n", vnode_vtype(vp)); + + dvp = vnode_parent(vp); + if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VLNK) || + v_name == NULL || *v_name == '\0' || dvp == NULL) { + if (v_name != NULL) + vnode_putname(v_name); + return (ESTALE); + } + dnp = VTONFS(dvp); + + namelen = strlen(v_name); + MALLOC(name, char *, namelen + 1, M_TEMP, M_WAITOK); + if (name == NULL) { + vnode_putname(v_name); + return (ESTALE); + } + bcopy(v_name, name, namelen+1); + NFS_VNOP_DBG("Trying to refresh %s : %s\n", v_name, name); + vnode_putname(v_name); + + /* Allocate the maximum size file handle */ + MALLOC(fhp, uint8_t *, NFS4_FHSIZE, M_TEMP, M_WAITOK); + if (fhp == NULL) { + FREE(name, M_TEMP); + return (ESTALE); + } + + if ((error = nfs_node_lock(np))) { + FREE(name, M_TEMP); + FREE(fhp, M_TEMP); + return (ESTALE); + } + + fhsize = np->n_fhsize; + bcopy(np->n_fhp, fhp, fhsize); + while (ISSET(np->n_flag, NREFRESH)) { + SET(np->n_flag, NREFRESHWANT); + NFS_VNOP_DBG("Waiting for refresh of %s\n", name); + msleep(np, &np->n_lock, PZERO-1, "nfsrefreshwant", &ts); + if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) + break; + } + refreshed = error ? 0 : !NFS_CMPFH(np, fhp, fhsize); + SET(np->n_flag, NREFRESH); + nfs_node_unlock(np); + + NFS_VNOP_DBG("error = %d, refreshed = %d\n", error, refreshed); + if (error || refreshed) + goto nfsmout; + + /* Check that there are no open references for this file */ + lck_mtx_lock(&np->n_openlock); + if (np->n_openrefcnt || !TAILQ_EMPTY(&np->n_opens) || !TAILQ_EMPTY(&np->n_lock_owners)) { + int cnt = 0; + struct nfs_open_file *ofp; + + TAILQ_FOREACH(ofp, &np->n_opens, nof_link) { + cnt += ofp->nof_opencnt; + } + if (cnt) { + lck_mtx_unlock(&np->n_openlock); + NFS_VNOP_DBG("Can not refresh file handle for %s with open state\n", name); + NFS_VNOP_DBG("\topenrefcnt = %d, opens = %d lock_owners = %d\n", + np->n_openrefcnt, cnt, !TAILQ_EMPTY(&np->n_lock_owners)); + error = ESTALE; + goto nfsmout; + } + } + lck_mtx_unlock(&np->n_openlock); + /* + * Since the FH is currently stale we should not be able to + * establish any open state until the FH is refreshed. + */ + + error = nfs_node_lock(np); + nfsmout_if(error); + /* + * Symlinks should never need invalidations and are holding + * the one and only nfsbuf in an uncached acquired state + * trying to do a readlink. So we will hang if we invalidate + * in that case. Only in in the VREG case do we need to + * invalidate. + */ + if (vnode_vtype(vp) == VREG) { + np->n_flag &= ~NNEEDINVALIDATE; + nfs_node_unlock(np); + error = nfs_vinvalbuf(vp, V_IGNORE_WRITEERR, ctx, 1); + if (error) + NFS_VNOP_DBG("nfs_vinvalbuf returned %d\n", error); + nfsmout_if(error); + } else { + nfs_node_unlock(np); + } + + NFS_VNOP_DBG("Looking up %s\n", name); + error = nfs_lookitup(dnp, name, namelen, ctx, &np); + if (error) + NFS_VNOP_DBG("nfs_lookitup returned %d\n", error); + +nfsmout: + nfs_node_lock_force(np); + wanted = ISSET(np->n_flag, NREFRESHWANT); + CLR(np->n_flag, NREFRESH|NREFRESHWANT); + nfs_node_unlock(np); + if (wanted) + wakeup(np); + + if (error == 0) + NFS_VNOP_DBG("%s refreshed file handle\n", name); + + FREE(name, M_TEMP); + FREE(fhp, M_TEMP); + + return (error ? ESTALE : 0); +} int nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags) +{ + int error; + +retry: + error = nfs_getattr_internal(np, nvap, ctx, flags); + if (error == ESTALE) { + error = nfs_refresh_fh(np, ctx); + if (!error) + goto retry; + } + return (error); +} + +int +nfs_getattr_internal(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags) { struct nfsmount *nmp; int error = 0, nfsvers, inprogset = 0, wanted = 0, avoidfloods; @@ -1346,7 +1477,9 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags) FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag); - if (!(nmp = NFSTONMP(np))) + nmp = NFSTONMP(np); + + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -1370,7 +1503,8 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags) /* * Use the cache or wait for any getattr in progress if: * - it's a cached request, or - * - we have a delegation + * - we have a delegation, or + * - the server isn't responding */ while (1) { error = nfs_getattrcache(np, nvap, flags); @@ -1406,11 +1540,19 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags) nfs_node_unlock(np); nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) error = ENXIO; if (error) goto nfsmout; + /* + * Return cached attributes if they are valid, + * if the server doesn't respond, and this is + * some softened up style of mount. + */ + if (NATTRVALID(np) && nfs_use_cache(nmp)) + flags |= NGA_SOFT; + /* * We might want to try to get both the attributes and access info by * making an ACCESS call and seeing if it returns updated attributes. @@ -1421,7 +1563,17 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags) if (nfs_attrcachetimeout(np) > 0) { /* OSAddAtomic(1, &nfsstats.accesscache_misses); */ u_int32_t access = NFS_ACCESS_ALL; - error = nmp->nm_funcs->nf_access_rpc(np, &access, ctx); + int rpcflags = 0; + + /* Return cached attrs if server doesn't respond */ + if (flags & NGA_SOFT) + rpcflags |= R_SOFT; + + error = nmp->nm_funcs->nf_access_rpc(np, &access, rpcflags, ctx); + + if (error == ETIMEDOUT) + goto returncached; + if (error) goto nfsmout; nfs_node_lock_force(np); @@ -1435,6 +1587,7 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags) } avoidfloods = 0; + tryagain: error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, flags, ctx, nvap, &xid); if (!error) { @@ -1442,7 +1595,22 @@ tryagain: error = nfs_loadattrcache(np, nvap, &xid, 0); nfs_node_unlock(np); } + + /* + * If the server didn't respond, return cached attributes. + */ +returncached: + if ((flags & NGA_SOFT) && (error == ETIMEDOUT)) { + nfs_node_lock_force(np); + error = nfs_getattrcache(np, nvap, flags); + if (!error || (error != ENOENT)) { + nfs_node_unlock(np); + goto nfsmout; + } + nfs_node_unlock(np); + } nfsmout_if(error); + if (!xid) { /* out-of-order rpc - attributes were dropped */ FSDBG(513, -1, np, np->n_xid >> 32, np->n_xid); if (avoidfloods++ < 20) @@ -1620,7 +1788,7 @@ nfs_vnop_setattr( struct nfs_open_file *nofp = NULL; nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR); @@ -1923,7 +2091,7 @@ nfs3_setattr_rpc( u_int64_t xid, nextxid; struct nfsm_chain nmreq, nmrep; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -2109,7 +2277,7 @@ nfs_vnop_lookup( mp = vnode_mount(dvp); nmp = VFSTONFS(mp); - if (!nmp) { + if (nfs_mount_gone(nmp)) { error = ENXIO; goto error_return; } @@ -2182,7 +2350,7 @@ nfs_vnop_lookup( /* do we know this name is too long? */ nmp = VTONMP(dvp); - if (!nmp) { + if (nfs_mount_gone(nmp)) { error = ENXIO; goto error_return; } @@ -2275,6 +2443,8 @@ error_return: return (error); } +int nfs_readlink_nocache = DEFAULT_READLINK_NOCACHE; + /* * NFS readlink call */ @@ -2294,6 +2464,8 @@ nfs_vnop_readlink( uint32_t buflen; uio_t uio = ap->a_uio; struct nfsbuf *bp = NULL; + struct timespec ts; + int timeo; if (vnode_vtype(ap->a_vp) != VLNK) return (EPERM); @@ -2304,34 +2476,66 @@ nfs_vnop_readlink( return (EINVAL); nmp = VTONMP(ap->a_vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; + /* nfs_getattr() will check changed and purge caches */ - if ((error = nfs_getattr(np, NULL, ctx, NGA_CACHED))) { + if ((error = nfs_getattr(np, NULL, ctx, nfs_readlink_nocache ? NGA_UNCACHED : NGA_CACHED))) { FSDBG(531, np, 0xd1e0001, 0, error); return (error); } + if (nfs_readlink_nocache) { + timeo = nfs_attrcachetimeout(np); + nanouptime(&ts); + } + +retry: OSAddAtomic64(1, &nfsstats.biocache_readlinks); - error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_READ, &bp); + error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_META, &bp); if (error) { FSDBG(531, np, 0xd1e0002, 0, error); return (error); } + + if (nfs_readlink_nocache) { + NFS_VNOP_DBG("timeo = %d ts.tv_sec = %ld need refresh = %d cached = %d\n", timeo, ts.tv_sec, + (np->n_rltim.tv_sec + timeo) < ts.tv_sec || nfs_readlink_nocache > 1, + ISSET(bp->nb_flags, NB_CACHE) == NB_CACHE); + /* n_rltim is synchronized by the associated nfs buf */ + if (ISSET(bp->nb_flags, NB_CACHE) && ((nfs_readlink_nocache > 1) || ((np->n_rltim.tv_sec + timeo) < ts.tv_sec))) { + SET(bp->nb_flags, NB_INVAL); + nfs_buf_release(bp, 0); + goto retry; + } + } if (!ISSET(bp->nb_flags, NB_CACHE)) { +readagain: OSAddAtomic64(1, &nfsstats.readlink_bios); buflen = bp->nb_bufsize; error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx); if (error) { + if (error == ESTALE) { + NFS_VNOP_DBG("Stale FH from readlink rpc\n"); + error = nfs_refresh_fh(np, ctx); + if (error == 0) + goto readagain; + } SET(bp->nb_flags, NB_ERROR); bp->nb_error = error; + NFS_VNOP_DBG("readlink failed %d\n", error); } else { bp->nb_validoff = 0; bp->nb_validend = buflen; + np->n_rltim = ts; + NFS_VNOP_DBG("readlink of %.*s\n", bp->nb_validend, (char *)bp->nb_data); } + } else { + NFS_VNOP_DBG("got cached link of %.*s\n", bp->nb_validend, (char *)bp->nb_data); } + if (!error && (bp->nb_validend > 0)) error = uiomove(bp->nb_data, bp->nb_validend, uio); FSDBG(531, np, bp->nb_validend, 0, error); @@ -2352,7 +2556,7 @@ nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx) struct nfsm_chain nmreq, nmrep; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; nfsm_chain_null(&nmreq); @@ -2409,7 +2613,7 @@ nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx) FSDBG_TOP(536, np, uio_offset(uio), uio_resid(uio), 0); nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; nmrsize = nmp->nm_rsize; @@ -2481,7 +2685,7 @@ nfs3_read_rpc_async( struct nfsm_chain nmreq; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -2519,7 +2723,7 @@ nfs3_read_rpc_async_finish( struct nfsm_chain nmrep; nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { nfs_request_async_cancel(req); return (ENXIO); } @@ -2668,9 +2872,59 @@ nfs_vnop_write( goto out; if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) { - /* it looks like we'll be extending the file, so take the data lock exclusive */ + /* + * It looks like we'll be extending the file, so take the data lock exclusive. + */ nfs_data_unlock(np); nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE); + + /* + * Also, if the write begins after the previous EOF buffer, make sure to zero + * and validate the new bytes in that buffer. + */ + struct nfsbuf *eofbp = NULL; + daddr64_t eofbn = np->n_size / biosize; + int eofoff = np->n_size % biosize; + lbn = uio_offset(uio) / biosize; + + if (eofoff && (eofbn < lbn)) { + if ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE|NBLK_ONLYVALID, &eofbp))) + goto out; + np->n_size += (biosize - eofoff); + nfs_node_lock_force(np); + CLR(np->n_flag, NUPDATESIZE); + np->n_flag |= NMODIFIED; + nfs_node_unlock(np); + FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001); + ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */ + if (eofbp) { + /* + * For the old last page, don't zero bytes if there + * are invalid bytes in that page (i.e. the page isn't + * currently valid). + * For pages after the old last page, zero them and + * mark them as valid. + */ + char *d; + int i; + if (ioflag & IO_NOCACHE) + SET(eofbp->nb_flags, NB_NOCACHE); + NFS_BUF_MAP(eofbp); + FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e); + d = eofbp->nb_data; + i = eofoff/PAGE_SIZE; + while (eofoff < biosize) { + int poff = eofoff & PAGE_MASK; + if (!poff || NBPGVALID(eofbp,i)) { + bzero(d + eofoff, PAGE_SIZE - poff); + NBPGVALID_SET(eofbp, i); + } + eofoff += PAGE_SIZE - poff; + i++; + } + nfs_buf_release(eofbp, 1); + } + } } do { @@ -2770,17 +3024,11 @@ again: * and zero the new bytes. */ if ((uio_offset(uio) + n) > (off_t)np->n_size) { - struct nfsbuf *eofbp = NULL; daddr64_t eofbn = np->n_size / biosize; - int eofoff = np->n_size % biosize; int neweofoff = (uio_offset(uio) + n) % biosize; FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff); - if (eofoff && (eofbn < lbn) && - ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE|NBLK_ONLYVALID, &eofbp)))) - goto out; - /* if we're extending within the same last block */ /* and the block is flagged as being cached... */ if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) { @@ -2817,38 +3065,6 @@ again: nfs_node_unlock(np); FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001); ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */ - if (eofbp) { - /* - * We may need to zero any previously invalid data - * after the old EOF in the previous EOF buffer. - * - * For the old last page, don't zero bytes if there - * are invalid bytes in that page (i.e. the page isn't - * currently valid). - * For pages after the old last page, zero them and - * mark them as valid. - */ - char *d; - int i; - if (ioflag & IO_NOCACHE) - SET(eofbp->nb_flags, NB_NOCACHE); - NFS_BUF_MAP(eofbp); - FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e); - d = eofbp->nb_data; - i = eofoff/PAGE_SIZE; - while (eofoff < biosize) { - int poff = eofoff & PAGE_MASK; - if (!poff || NBPGVALID(eofbp,i)) { - bzero(d + eofoff, PAGE_SIZE - poff); - NBPGVALID_SET(eofbp, i); - } - if (bp->nb_validend == eofoff) - bp->nb_validend += PAGE_SIZE - poff; - eofoff += PAGE_SIZE - poff; - i++; - } - nfs_buf_release(eofbp, 1); - } } /* * If dirtyend exceeds file size, chop it down. This should @@ -3166,7 +3382,7 @@ nfs_write_rpc2( #endif FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep); nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; nmwsize = nmp->nm_wsize; @@ -3198,7 +3414,7 @@ nfs_write_rpc2( if (!error) error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2); nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) error = ENXIO; if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) && (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */ @@ -3283,7 +3499,7 @@ nfs3_write_rpc_async( struct nfsm_chain nmreq; nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -3332,7 +3548,7 @@ nfs3_write_rpc_async_finish( struct nfsm_chain nmrep; nmp = NFSTONMP(np); - if (!nmp) { + if (nfs_mount_gone(nmp)) { nfs_request_async_cancel(req); return (ENXIO); } @@ -3344,7 +3560,7 @@ nfs3_write_rpc_async_finish( if (error == EINPROGRESS) /* async request restarted */ return (error); nmp = NFSTONMP(np); - if (!nmp) + if (nfs_mount_gone(nmp)) error = ENXIO; if (!error && (lockerror = nfs_node_lock(np))) error = lockerror; @@ -3429,7 +3645,7 @@ nfs3_vnop_mknod( struct nfsreq rq, *req = &rq; nmp = VTONMP(dvp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -3579,7 +3795,7 @@ nfs3_vnop_create( struct nfs_dulookup dul; nmp = VTONMP(dvp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -3758,7 +3974,7 @@ nfs_vnop_remove( /* XXX prevent removing a sillyrenamed file? */ nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR); @@ -3912,7 +4128,7 @@ int nfs_removeit(struct nfs_sillyrename *nsp) { struct nfsmount *nmp = NFSTONMP(nsp->nsr_dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); return nmp->nm_funcs->nf_remove_rpc(nsp->nsr_dnp, nsp->nsr_name, nsp->nsr_namlen, NULL, nsp->nsr_cred); } @@ -3936,7 +4152,7 @@ nfs3_remove_rpc( struct nfsm_chain nmreq, nmrep; nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if ((nfsvers == NFS_VER2) && (namelen > NFS_MAXNAMLEN)) @@ -4010,7 +4226,7 @@ nfs_vnop_rename( tnp = tvp ? VTONFS(tvp) : NULL; nmp = NFSTONMP(fdnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -4169,7 +4385,7 @@ nfs3_rename_rpc( struct nfsm_chain nmreq, nmrep; nmp = NFSTONMP(fdnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if ((nfsvers == NFS_VER2) && @@ -4251,7 +4467,7 @@ nfs3_vnop_link( return (EXDEV); nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) @@ -4352,7 +4568,7 @@ nfs3_vnop_symlink( struct nfs_dulookup dul; nmp = VTONMP(dvp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -4508,7 +4724,7 @@ nfs3_vnop_mkdir( struct nfs_dulookup dul; nmp = VTONMP(dvp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) @@ -4653,7 +4869,7 @@ nfs3_vnop_rmdir( struct nfs_dulookup dul; nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) @@ -4776,7 +4992,7 @@ nfs_vnop_readdir( thread_t thd; nmp = VTONMP(dvp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; bigcookies = (nmp->nm_state & NFSSTA_BIGCOOKIES); @@ -5138,7 +5354,7 @@ nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp) * Let's search the directory's buffers for the cookie. */ nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); dpptc = NULL; found = 0; @@ -5309,7 +5525,8 @@ nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cn daddr64_t lbn, nextlbn; int dotunder = (cnp->cn_namelen > 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '_'); - if (!(nmp = NFSTONMP(dnp))) + nmp = NFSTONMP(dnp); + if (nfs_mount_gone(nmp)) return (ENXIO); if (!purge) *npp = NULL; @@ -5450,7 +5667,7 @@ nfs3_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx) struct timeval now; nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; nmreaddirsize = nmp->nm_readdirsize; @@ -5733,7 +5950,7 @@ nfs_sillyrename( struct nfsmount *nmp; nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfs_name_cache_purge(dnp, np, cnp, ctx); @@ -5812,7 +6029,7 @@ nfs3_lookup_rpc_async( int error = 0, nfsvers; nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -5911,7 +6128,7 @@ nfs_lookitup( struct nfsreq rq, *req = &rq; nmp = NFSTONMP(dnp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) && @@ -6112,7 +6329,7 @@ nfs3_commit_rpc( nmp = NFSTONMP(np); FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) return (0); @@ -6206,7 +6423,7 @@ nfs3_pathconf_rpc( struct nfsmount *nmp = NFSTONMP(np); uint32_t val = 0; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); nfsvers = nmp->nm_vers; @@ -6228,6 +6445,7 @@ nfs3_pathconf_rpc( error = status; nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxlink); nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxname); + nfsap->nfsa_flags &= ~(NFS_FSFLAG_NO_TRUNC|NFS_FSFLAG_CHOWN_RESTRICTED|NFS_FSFLAG_CASE_INSENSITIVE|NFS_FSFLAG_CASE_PRESERVING); nfsm_chain_get_32(error, &nmrep, val); if (val) nfsap->nfsa_flags |= NFS_FSFLAG_NO_TRUNC; @@ -6258,6 +6476,7 @@ nfs3_pathconf_cache(struct nfsmount *nmp, struct nfs_fsattr *nfsap) { nmp->nm_fsattr.nfsa_maxlink = nfsap->nfsa_maxlink; nmp->nm_fsattr.nfsa_maxname = nfsap->nfsa_maxname; + nmp->nm_fsattr.nfsa_flags &= ~(NFS_FSFLAG_NO_TRUNC|NFS_FSFLAG_CHOWN_RESTRICTED|NFS_FSFLAG_CASE_INSENSITIVE|NFS_FSFLAG_CASE_PRESERVING); nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC; nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED; nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE; @@ -6297,7 +6516,7 @@ nfs_vnop_pathconf( uint nbits; nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); switch (ap->a_name) { @@ -6338,7 +6557,7 @@ nfs_vnop_pathconf( if (error) return (error); nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); lck_mtx_lock(&nmp->nm_lock); if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS) { @@ -6358,7 +6577,7 @@ nfs_vnop_pathconf( if (error) return (error); nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); lck_mtx_lock(&nmp->nm_lock); nfsap = &nfsa; @@ -6700,7 +6919,7 @@ nfs_vnop_ioctl( error = nfs_flush(VTONFS(vp), MNT_WAIT, vfs_context_thread(ctx), 0); break; case NFS_FSCTL_DESTROY_CRED: - error = nfs_gss_clnt_ctx_destroy(mp, vfs_context_ucred(ctx)); + error = nfs_gss_clnt_ctx_remove(mp, vfs_context_ucred(ctx)); break; } @@ -6794,7 +7013,7 @@ nfs_vnop_pagein( &uio_buf, sizeof(uio_buf)); nmp = VTONMP(vp); - if (!nmp) { + if (nfs_mount_gone(nmp)) { if (!nofreeupl) ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); @@ -7497,7 +7716,7 @@ nfs_vnop_blktooff( vnode_t vp = ap->a_vp; struct nfsmount *nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); biosize = nmp->nm_biosize; @@ -7519,7 +7738,7 @@ nfs_vnop_offtoblk( vnode_t vp = ap->a_vp; struct nfsmount *nmp = VTONMP(vp); - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); biosize = nmp->nm_biosize; @@ -7546,7 +7765,7 @@ nfs_vnop_monitor( struct nfsmount *nmp = VTONMP(ap->a_vp); int error = 0; - if (!nmp) + if (nfs_mount_gone(nmp)) return (ENXIO); /* make sure that the vnode's monitoring status is up to date */ diff --git a/bsd/nfs/nfsmount.h b/bsd/nfs/nfsmount.h index 4e9e88e6c..4d28a9774 100644 --- a/bsd/nfs/nfsmount.h +++ b/bsd/nfs/nfsmount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -214,7 +214,7 @@ struct nfs_funcs { int (*nf_mount)(struct nfsmount *, vfs_context_t, nfsnode_t *); int (*nf_update_statfs)(struct nfsmount *, vfs_context_t); int (*nf_getquota)(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *); - int (*nf_access_rpc)(nfsnode_t, u_int32_t *, vfs_context_t); + int (*nf_access_rpc)(nfsnode_t, u_int32_t *, int, vfs_context_t); int (*nf_getattr_rpc)(nfsnode_t, mount_t, u_char *, size_t, int, vfs_context_t, struct nfs_vattr *, u_int64_t *); int (*nf_setattr_rpc)(nfsnode_t, struct vnode_attr *, vfs_context_t); int (*nf_read_rpc_async)(nfsnode_t, off_t, size_t, thread_t, kauth_cred_t, struct nfsreq_cbinfo *, struct nfsreq **); @@ -258,6 +258,7 @@ struct nfsmount { char * nm_realm; /* Kerberos realm to use */ char * nm_principal; /* GSS principal to use on initial mount */ char * nm_sprinc; /* Kerberos principal of the server */ + int nm_ref; /* Reference count on this mount */ int nm_state; /* Internal state flags */ int nm_vers; /* NFS version */ struct nfs_funcs *nm_funcs; /* version-specific functions */ @@ -266,7 +267,9 @@ struct nfsmount { nfsnode_t nm_dnp; /* root directory nfsnode pointer */ struct nfs_fs_locations nm_locations; /* file system locations */ uint32_t nm_numgrps; /* Max. size of groupslist */ - TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gsscl; /* GSS user contexts */ + TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gsscl; /* GSS user contexts */ + TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gssnccl; /* GSS neg cache contexts */ + uint32_t nm_ncentries; /* GSS expired negative cache entries */ int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */ int nm_retry; /* Max retries */ uint32_t nm_rsize; /* Max size of read rpc */ @@ -380,6 +383,7 @@ struct nfsmount { #define NFSSTA_RECOVER_EXPIRED 0x10000000 /* mount state expired */ #define NFSSTA_REVOKE 0x20000000 /* need to scan for revoked nodes */ #define NFSSTA_SQUISHY 0x40000000 /* we can ask to be forcibly unmounted */ +#define NFSSTA_MOUNT_DRAIN 0x80000000 /* mount is draining references */ /* flags for nm_sockflags */ #define NMSOCK_READY 0x0001 /* socket is ready for use */ diff --git a/bsd/nfs/nfsnode.h b/bsd/nfs/nfsnode.h index 94ddeef1d..de4913f33 100644 --- a/bsd/nfs/nfsnode.h +++ b/bsd/nfs/nfsnode.h @@ -195,8 +195,6 @@ struct nfsbuf { LIST_HEAD(nfsbuflists, nfsbuf); TAILQ_HEAD(nfsbuffreehead, nfsbuf); -#define NFSNOLIST ((void*)0xdeadbeef) - extern lck_mtx_t *nfs_buf_mutex; extern int nfsbufcnt, nfsbufmin, nfsbufmax, nfsbufmetacnt, nfsbufmetamax; extern int nfsbuffreecnt, nfsbuffreemetacnt, nfsbufdelwricnt, nfsneedbuffer; @@ -348,6 +346,7 @@ struct nfs_vattr { #define NGA_UNCACHED 0x0002 /* fetch new attributes */ #define NGA_ACL 0x0004 /* fetch ACL */ #define NGA_MONITOR 0x0008 /* vnode monitor attr update poll */ +#define NGA_SOFT 0x0010 /* use cached attributes if ETIMEOUT */ /* macros for initting/cleaning up nfs_vattr structures */ #define NVATTR_INIT(NVAP) \ @@ -583,6 +582,7 @@ struct nfsnode { int n_error; /* Save write error value */ union { struct timespec ns_atim; /* Special file times */ + struct timespec nl_rltim; /* Time of last readlink */ daddr64_t nf_lastread; /* last block# read from (for readahead) */ uint64_t nd_cookieverf; /* Cookie verifier (dir only) */ } n_un1; @@ -650,6 +650,7 @@ struct nfsnode { #define n_atim n_un1.ns_atim #define n_mtim n_un2.ns_mtim +#define n_rltim n_un1.nl_rltim #define n_lastread n_un1.nf_lastread #define n_lastrahead n_un2.nf_lastrahead #define n_sillyrename n_un3.nf_silly @@ -688,6 +689,8 @@ struct nfsnode { #define NISDOTZFS 0x04000 /* a ".zfs" directory */ #define NISDOTZFSCHILD 0x08000 /* a child of a ".zfs" directory */ #define NISMAPPED 0x10000 /* node is mmapped */ +#define NREFRESH 0x20000 /* node's fh needs to be refreshed */ +#define NREFRESHWANT 0x40000 /* Waiting for fh to be refreshed */ /* * Flags for n_hflag diff --git a/bsd/security/audit/audit.c b/bsd/security/audit/audit.c index 1ee6c85cd..8fffb654b 100644 --- a/bsd/security/audit/audit.c +++ b/bsd/security/audit/audit.c @@ -76,7 +76,6 @@ #include #include #include -#include #include #include @@ -487,6 +486,26 @@ audit_commit(struct kaudit_record *ar, int error, int retval) ar->k_ar.ar_arg_fflags, error); break; + case AUE_OPENAT_RWTC: + /* + * The openat syscall always writes a + * AUE_OPENAT_RWTC event; change it to the proper type of + * event based on the flags and the error value. + */ + ar->k_ar.ar_event = audit_flags_and_error_to_openatevent( + ar->k_ar.ar_arg_fflags, error); + break; + + case AUE_OPENBYID_RWT: + /* + * The openbyid syscall always writes a + * AUE_OPENBYID_RWT event; change it to the proper type of + * event based on the flags and the error value. + */ + ar->k_ar.ar_event = audit_flags_and_error_to_openbyidevent( + ar->k_ar.ar_arg_fflags, error); + break; + case AUE_SYSCTL: ar->k_ar.ar_event = audit_ctlname_to_sysctlevent( ar->k_ar.ar_arg_ctlname, ar->k_ar.ar_valid_arg); diff --git a/bsd/security/audit/audit_arg.c b/bsd/security/audit/audit_arg.c index 19ba08a57..4b16e76b6 100644 --- a/bsd/security/audit/audit_arg.c +++ b/bsd/security/audit/audit_arg.c @@ -75,7 +75,6 @@ #include #include #include -#include #include #include diff --git a/bsd/security/audit/audit_bsm.c b/bsd/security/audit/audit_bsm.c index 6f665d890..d63c131ea 100644 --- a/bsd/security/audit/audit_bsm.c +++ b/bsd/security/audit/audit_bsm.c @@ -57,8 +57,6 @@ #include #include -#include - #if CONFIG_AUDIT MALLOC_DEFINE(M_AUDITBSM, "audit_bsm", "Audit BSM data"); @@ -1022,6 +1020,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) case AUE_FUTIMES: case AUE_GETDIRENTRIES: case AUE_GETDIRENTRIESATTR: + case AUE_GETATTRLISTBULK: #if 0 /* XXXss new */ case AUE_POLL: #endif @@ -1282,22 +1281,47 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) } break; - case AUE_OPENAT_RC: - case AUE_OPENAT_RTC: - case AUE_OPENAT_RWC: - case AUE_OPENAT_RWTC: - case AUE_OPENAT_WC: - case AUE_OPENAT_WTC: + case AUE_OPEN: + case AUE_OPEN_R: + case AUE_OPEN_RT: + case AUE_OPEN_RW: + case AUE_OPEN_RWT: + case AUE_OPEN_W: + case AUE_OPEN_WT: + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_OPEN_RC: + case AUE_OPEN_RTC: + case AUE_OPEN_RWC: + case AUE_OPEN_RWTC: + case AUE_OPEN_WC: + case AUE_OPEN_WTC: if (ARG_IS_VALID(kar, ARG_MODE)) { tok = au_to_arg32(3, "mode", ar->ar_arg_mode); kau_write(rec, tok); } if (ARG_IS_VALID(kar, ARG_FFLAGS)) { - tok = au_to_arg32(3, "flags", ar->ar_arg_fflags); + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); kau_write(rec, tok); } - if (ARG_IS_VALID(kar, ARG_FD)) { - tok = au_to_arg32(1, "dir fd", ar->ar_arg_fd); + UPATH1_VNODE1_TOKENS; + break; + + case AUE_OPEN_EXTENDED: + case AUE_OPEN_EXTENDED_R: + case AUE_OPEN_EXTENDED_RT: + case AUE_OPEN_EXTENDED_RW: + case AUE_OPEN_EXTENDED_RWT: + case AUE_OPEN_EXTENDED_W: + case AUE_OPEN_EXTENDED_WT: + EXTENDED_TOKENS(3); + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); kau_write(rec, tok); } UPATH1_VNODE1_TOKENS; @@ -1317,23 +1341,6 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) UPATH1_VNODE1_TOKENS; break; - case AUE_OPEN_RC: - case AUE_OPEN_RTC: - case AUE_OPEN_RWC: - case AUE_OPEN_RWTC: - case AUE_OPEN_WC: - case AUE_OPEN_WTC: - if (ARG_IS_VALID(kar, ARG_MODE)) { - tok = au_to_arg32(3, "mode", ar->ar_arg_mode); - kau_write(rec, tok); - } - if (ARG_IS_VALID(kar, ARG_FFLAGS)) { - tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); - kau_write(rec, tok); - } - UPATH1_VNODE1_TOKENS; - break; - case AUE_OPENAT: case AUE_OPENAT_R: case AUE_OPENAT_RT: @@ -1352,36 +1359,59 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) UPATH1_VNODE1_TOKENS; break; - case AUE_OPEN_EXTENDED: - case AUE_OPEN_EXTENDED_R: - case AUE_OPEN_EXTENDED_RT: - case AUE_OPEN_EXTENDED_RW: - case AUE_OPEN_EXTENDED_RWT: - case AUE_OPEN_EXTENDED_W: - case AUE_OPEN_EXTENDED_WT: - EXTENDED_TOKENS(3); + case AUE_OPENAT_RC: + case AUE_OPENAT_RTC: + case AUE_OPENAT_RWC: + case AUE_OPENAT_RWTC: + case AUE_OPENAT_WC: + case AUE_OPENAT_WTC: + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(4, "mode", ar->ar_arg_mode); + kau_write(rec, tok); + } if (ARG_IS_VALID(kar, ARG_FFLAGS)) { - tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + tok = au_to_arg32(3, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_FD)) { + tok = au_to_arg32(1, "dir fd", ar->ar_arg_fd); kau_write(rec, tok); } UPATH1_VNODE1_TOKENS; break; - case AUE_OPEN: - case AUE_OPEN_R: - case AUE_OPEN_RT: - case AUE_OPEN_RW: - case AUE_OPEN_RWT: - case AUE_OPEN_W: - case AUE_OPEN_WT: + case AUE_OPENBYID: + case AUE_OPENBYID_R: + case AUE_OPENBYID_RT: + case AUE_OPENBYID_RW: + case AUE_OPENBYID_RWT: + case AUE_OPENBYID_W: + case AUE_OPENBYID_WT: if (ARG_IS_VALID(kar, ARG_FFLAGS)) { - tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + tok = au_to_arg32(3, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(1, "volfsid", ar->ar_arg_value32); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VALUE64)) { + tok = au_to_arg64(2, "objid", ar->ar_arg_value64); kau_write(rec, tok); } - UPATH1_VNODE1_TOKENS; break; + case AUE_RENAMEAT: + case AUE_FACCESSAT: + case AUE_FCHMODAT: + case AUE_FCHOWNAT: + case AUE_FSTATAT: + case AUE_LINKAT: case AUE_UNLINKAT: + case AUE_READLINKAT: + case AUE_SYMLINKAT: + case AUE_MKDIRAT: + case AUE_GETATTRLISTAT: if (ARG_IS_VALID(kar, ARG_FD)) { tok = au_to_arg32(1, "dir fd", ar->ar_arg_fd); kau_write(rec, tok); diff --git a/bsd/security/audit/audit_bsm_klib.c b/bsd/security/audit/audit_bsm_klib.c index 02fd0ead5..c5588d98e 100644 --- a/bsd/security/audit/audit_bsm_klib.c +++ b/bsd/security/audit/audit_bsm_klib.c @@ -451,6 +451,169 @@ audit_flags_and_error_to_openextendedevent(int oflags, int error) return (aevent); } +/* + * Convert an open flags specifier into a specific type of open_extended event + * for auditing purposes. + */ +au_event_t +audit_flags_and_error_to_openatevent(int oflags, int error) +{ + au_event_t aevent; + + /* + * Need to check only those flags we care about. + */ + oflags = oflags & (O_RDONLY | O_CREAT | O_TRUNC | O_RDWR | O_WRONLY); + + /* + * These checks determine what flags are on with the condition that + * ONLY that combination is on, and no other flags are on. + */ + switch (oflags) { + case O_RDONLY: + aevent = AUE_OPENAT_R; + break; + + case (O_RDONLY | O_CREAT): + aevent = AUE_OPENAT_RC; + break; + + case (O_RDONLY | O_CREAT | O_TRUNC): + aevent = AUE_OPENAT_RTC; + break; + + case (O_RDONLY | O_TRUNC): + aevent = AUE_OPENAT_RT; + break; + + case O_RDWR: + aevent = AUE_OPENAT_RW; + break; + + case (O_RDWR | O_CREAT): + aevent = AUE_OPENAT_RWC; + break; + + case (O_RDWR | O_CREAT | O_TRUNC): + aevent = AUE_OPENAT_RWTC; + break; + + case (O_RDWR | O_TRUNC): + aevent = AUE_OPENAT_RWT; + break; + + case O_WRONLY: + aevent = AUE_OPENAT_W; + break; + + case (O_WRONLY | O_CREAT): + aevent = AUE_OPENAT_WC; + break; + + case (O_WRONLY | O_CREAT | O_TRUNC): + aevent = AUE_OPENAT_WTC; + break; + + case (O_WRONLY | O_TRUNC): + aevent = AUE_OPENAT_WT; + break; + + default: + aevent = AUE_OPENAT; + break; + } + + /* + * Convert chatty errors to better matching events. Failures to + * find a file are really just attribute events -- so recast them as + * such. + * + * XXXAUDIT: Solaris defines that AUE_OPENAT will never be returned, it + * is just a placeholder. However, in Darwin we return that in + * preference to other events. + * + * XXXRW: This behavior differs from FreeBSD, so possibly revise this + * code or this comment. + */ + switch (aevent) { + case AUE_OPENAT_R: + case AUE_OPENAT_RT: + case AUE_OPENAT_RW: + case AUE_OPENAT_RWT: + case AUE_OPENAT_W: + case AUE_OPENAT_WT: + if (error == ENOENT) + aevent = AUE_OPENAT; + } + return (aevent); +} + +/* + * Convert an open flags specifier into a specific type of openbyid event + * for auditing purposes. + */ +au_event_t +audit_flags_and_error_to_openbyidevent(int oflags, int error) +{ + au_event_t aevent; + + /* + * Need to check only those flags we care about. + */ + oflags = oflags & (O_RDONLY | O_TRUNC | O_RDWR | O_WRONLY); + + /* + * These checks determine what flags are on with the condition that + * ONLY that combination is on, and no other flags are on. + */ + switch (oflags) { + case O_RDONLY: + aevent = AUE_OPENBYID_R; + break; + + case (O_RDONLY | O_TRUNC): + aevent = AUE_OPENBYID_RT; + break; + + case O_RDWR: + aevent = AUE_OPENBYID_RW; + break; + + case (O_RDWR | O_TRUNC): + aevent = AUE_OPENBYID_RWT; + break; + + case O_WRONLY: + aevent = AUE_OPENBYID_W; + break; + + case (O_WRONLY | O_TRUNC): + aevent = AUE_OPENBYID_WT; + break; + + default: + aevent = AUE_OPENBYID; + break; + } + + /* + * Convert chatty errors to better matching events. Failures to + * find a file are really just attribute events -- so recast them as + * such. + */ + switch (aevent) { + case AUE_OPENBYID_R: + case AUE_OPENBYID_RT: + case AUE_OPENBYID_RW: + case AUE_OPENBYID_RWT: + case AUE_OPENBYID_W: + case AUE_OPENBYID_WT: + if (error == ENOENT) + aevent = AUE_OPENBYID; + } + return (aevent); +} + /* * Convert a MSGCTL command to a specific event. */ @@ -595,91 +758,9 @@ auditon_command_event(int cmd) au_event_t audit_fcntl_command_event(int cmd, int oflags, int error) { - au_event_t aevent; - switch(cmd) { case F_OPENFROM: - /* - * Need to check only those flags we care about. - */ - oflags = oflags & (O_RDONLY | O_CREAT | O_TRUNC | O_RDWR | - O_WRONLY); - - /* - * These checks determine what flags are on with the condition - * that ONLY that combination is on, and no other flags are on. - */ - switch (oflags) { - case O_RDONLY: - aevent = AUE_OPENAT_R; - break; - - case (O_RDONLY | O_CREAT): - aevent = AUE_OPENAT_RC; - break; - - case (O_RDONLY | O_CREAT | O_TRUNC): - aevent = AUE_OPENAT_RTC; - break; - - case (O_RDONLY | O_TRUNC): - aevent = AUE_OPENAT_RT; - break; - - case O_RDWR: - aevent = AUE_OPENAT_RW; - break; - - case (O_RDWR | O_CREAT): - aevent = AUE_OPENAT_RWC; - break; - - case (O_RDWR | O_CREAT | O_TRUNC): - aevent = AUE_OPENAT_RWTC; - break; - - case (O_RDWR | O_TRUNC): - aevent = AUE_OPENAT_RWT; - break; - - case O_WRONLY: - aevent = AUE_OPENAT_W; - break; - - case (O_WRONLY | O_CREAT): - aevent = AUE_OPENAT_WC; - break; - - case (O_WRONLY | O_CREAT | O_TRUNC): - aevent = AUE_OPENAT_WTC; - break; - - case (O_WRONLY | O_TRUNC): - aevent = AUE_OPENAT_WT; - break; - - default: - aevent = AUE_OPENAT; - break; - } - - /* - * Convert chatty errors to better matching events. Failures to - * find a file are really just attribute events -- so recast - * them as such. - */ - switch (aevent) { - case AUE_OPENAT_R: - case AUE_OPENAT_RT: - case AUE_OPENAT_RW: - case AUE_OPENAT_RWT: - case AUE_OPENAT_W: - case AUE_OPENAT_WT: - if (error == ENOENT) - aevent = AUE_OPENAT; - } - - return (aevent); + return (audit_flags_and_error_to_openatevent(oflags, error)); case F_UNLINKFROM: return (AUE_UNLINKAT); diff --git a/bsd/security/audit/audit_mac.c b/bsd/security/audit/audit_mac.c index edddd58a7..f1f065561 100644 --- a/bsd/security/audit/audit_mac.c +++ b/bsd/security/audit/audit_mac.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include diff --git a/bsd/security/audit/audit_private.h b/bsd/security/audit/audit_private.h index aa26d7ede..40f0ee658 100644 --- a/bsd/security/audit/audit_private.h +++ b/bsd/security/audit/audit_private.h @@ -389,6 +389,10 @@ au_event_t audit_ctlname_to_sysctlevent(int name[], uint64_t valid_arg); au_event_t audit_flags_and_error_to_openevent(int oflags, int error); au_event_t audit_flags_and_error_to_openextendedevent(int oflags, int error); +au_event_t audit_flags_and_error_to_openatevent(int oflags, + int error); +au_event_t audit_flags_and_error_to_openbyidevent(int oflags, + int error); au_event_t audit_msgctl_to_event(int cmd); au_event_t audit_semctl_to_event(int cmr); int audit_canon_path(struct vnode *cwd_vp, char *path, diff --git a/bsd/security/audit/audit_syscalls.c b/bsd/security/audit/audit_syscalls.c index a99464aef..67c4bab7a 100644 --- a/bsd/security/audit/audit_syscalls.c +++ b/bsd/security/audit/audit_syscalls.c @@ -73,7 +73,6 @@ #include #include #include -#include #include #include diff --git a/bsd/security/audit/audit_worker.c b/bsd/security/audit/audit_worker.c index 5ebb842ef..9a7a99281 100644 --- a/bsd/security/audit/audit_worker.c +++ b/bsd/security/audit/audit_worker.c @@ -68,7 +68,6 @@ #include #include -#include #include #include #include diff --git a/bsd/sys/Makefile b/bsd/sys/Makefile index 30d592d47..880af7e3d 100644 --- a/bsd/sys/Makefile +++ b/bsd/sys/Makefile @@ -14,7 +14,6 @@ EXPINC_SUBDIRS = \ _types # Installs header file for user level - -# $(DSTROOT)/System/Library/Frameworks/System.framework/Headers # $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders # $(DSTROOT)/usr/include/ DATAFILES = \ @@ -24,11 +23,11 @@ DATAFILES = \ errno.h ev.h event.h fasttrap.h fasttrap_isa.h fcntl.h file.h filedesc.h filio.h gmon.h \ ioccom.h ioctl.h \ ioctl_compat.h ipc.h kernel.h kernel_types.h kern_event.h loadable_fs.h lock.h lockf.h \ - kauth.h kdebug.h kern_control.h kern_memorystatus.h lctx.h malloc.h \ + kauth.h kdebug.h kern_control.h lctx.h lockstat.h malloc.h \ mbuf.h mman.h mount.h msg.h msgbuf.h netport.h param.h paths.h pipe.h poll.h \ proc.h proc_info.h ptrace.h queue.h quota.h random.h reboot.h resource.h resourcevar.h \ sbuf.h posix_sem.h posix_shm.h sdt.h \ - select.h sem.h semaphore.h shm.h signal.h signalvar.h socket.h socketvar.h sockio.h stat.h \ + select.h sem.h semaphore.h shm.h signal.h signalvar.h socket.h socketvar.h sockio.h stat.h stdio.h \ sysctl.h syslimits.h syslog.h sys_domain.h termios.h time.h \ timeb.h times.h tprintf.h trace.h tty.h ttychars.h ttycom.h \ ttydefaults.h ttydev.h types.h ubc.h ucontext.h ucred.h uio.h un.h unistd.h unpcb.h \ @@ -40,8 +39,10 @@ DATAFILES = \ # Installs header file for Apple internal use in user level - # $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders PRIVATE_DATAFILES = \ + coalition.h \ codesign.h \ content_protection.h \ + csr.h \ decmpfs.h \ disklabel.h \ fileport.h \ @@ -52,7 +53,9 @@ PRIVATE_DATAFILES = \ imgsrc.h \ ipcs.h \ kas_info.h \ + kern_memorystatus.h \ kern_overrides.h \ + sfi.h \ shm_internal.h \ spawn_internal.h \ tree.h \ @@ -71,11 +74,11 @@ KERNELFILES = \ errno.h ev.h event.h fcntl.h file.h filio.h \ ioccom.h ioctl.h ipc.h \ ioctl_compat.h kernel.h kernel_types.h kern_event.h lctx.h lock.h lockf.h \ - kauth.h kdebug.h md5.h kern_control.h kern_memorystatus.h imgact.h malloc.h namei.h \ + kauth.h kdebug.h md5.h kern_control.h imgact.h malloc.h namei.h \ mman.h mbuf.h mount.h netport.h param.h paths.h \ proc.h queue.h random.h resource.h \ sbuf.h posix_sem.h posix_shm.h sem.h shm.h \ - select.h signal.h socket.h socketvar.h sockio.h stat.h \ + select.h signal.h socket.h socketvar.h sockio.h stat.h stdio.h \ sysctl.h syslimits.h syslog.h systm.h sys_domain.h time.h \ types.h ubc.h ucontext.h ucred.h uio.h un.h unistd.h unpcb.h \ utfconv.h vm.h vmparam.h vnode.h vnode_if.h xattr.h \ @@ -94,15 +97,19 @@ PRIVATE_KERNELFILES = \ codesign.h \ cprotect.h \ content_protection.h \ + csr.h \ decmpfs.h \ disktab.h \ fbt.h \ fileport.h \ fsctl.h \ fslog.h \ + kasl.h \ + kern_memorystatus.h \ mach_swapon.h \ msgbuf.h \ eventvar.h \ + proc_info.h \ pthread_shims.h \ quota.h \ sem_internal.h \ @@ -147,12 +154,17 @@ INSTALL_KF_MI_GEN_LIST = MAKESYSCALLS = $(SRCROOT)/bsd/kern/makesyscalls.sh -syscall.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) +$(OBJROOT)/cscope.genhdrs: + $(_v)mkdir -p $(OBJROOT)/cscope.genhdrs + +syscall.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) $(OBJROOT)/cscope.genhdrs @echo "Generating bsd/sys/$@ from $<"; + @echo "$(OBJPATH)/bsd/sys/$@" > $(OBJROOT)/cscope.genhdrs/$@.path $(_v)$(MAKESYSCALLS) $< header > /dev/null -sysproto.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) +sysproto.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) $(OBJROOT)/cscope.genhdrs @echo "Generating bsd/sys/$@ from $<"; + @echo "$(OBJPATH)/bsd/sys/$@" > $(OBJROOT)/cscope.genhdrs/$@.path $(_v)$(MAKESYSCALLS) $< proto > /dev/null MAKE_POSIX_AVAILABILITY = $(SRCROOT)/bsd/sys/make_posix_availability.sh diff --git a/bsd/sys/_endian.h b/bsd/sys/_endian.h index 3fb0f062f..6e00b6c0b 100644 --- a/bsd/sys/_endian.h +++ b/bsd/sys/_endian.h @@ -113,10 +113,16 @@ __END_DECLS #define htons(x) ((__uint16_t)(x)) #if defined(KERNEL) || (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) + +#define ntohll(x) ((__uint64_t)(x)) +#define htonll(x) ((__uint64_t)(x)) + #define NTOHL(x) (x) #define NTOHS(x) (x) +#define NTOHLL(x) (x) #define HTONL(x) (x) #define HTONS(x) (x) +#define HTONLL(x) (x) #endif /* defined(KERNEL) || (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) */ #else /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ @@ -130,10 +136,16 @@ __END_DECLS #define htonl(x) __DARWIN_OSSwapInt32(x) #if defined(KERNEL) || (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) + +#define ntohll(x) __DARWIN_OSSwapInt64(x) +#define htonll(x) __DARWIN_OSSwapInt64(x) + #define NTOHL(x) (x) = ntohl((__uint32_t)x) #define NTOHS(x) (x) = ntohs((__uint16_t)x) +#define NTOHLL(x) (x) = ntohll((__uint64_t)x) #define HTONL(x) (x) = htonl((__uint32_t)x) #define HTONS(x) (x) = htons((__uint16_t)x) +#define HTONLL(x) (x) = htonll((__uint64_t)x) #endif /* defined(KERNEL) || (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) */ #endif /* __DARWIN_BYTE_ORDER */ #endif /* !_SYS__ENDIAN_H_ */ diff --git a/bsd/sys/_structs.h b/bsd/sys/_structs.h index 99e4c3250..63a41609f 100644 --- a/bsd/sys/_structs.h +++ b/bsd/sys/_structs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -28,131 +28,3 @@ #include #include - -#ifdef __need_ucontext_t -#ifndef __need_struct_ucontext -#define __need_struct_ucontext -#endif /* __need_struct_ucontext */ -#endif /* __need_ucontext_t */ - -#ifdef __need_ucontext64_t -#ifndef __need_struct_ucontext64 -#define __need_struct_ucontext64 -#endif /* __need_struct_ucontext64 */ -#endif /* __need_ucontext64_t */ - -#ifdef __need_struct_ucontext -#ifndef __need_struct_mcontext -#define __need_struct_mcontext -#endif /* __need_struct_mcontext */ -#endif /* __need_struct_ucontext */ - -#ifdef __need_struct_ucontext64 -#ifndef __need_struct_mcontext64 -#define __need_struct_mcontext64 -#endif /* __need_struct_mcontext64 */ -#endif /* __need_struct_ucontext64 */ - -#if defined(__need_struct_mcontext) || defined(__need_struct_mcontext64) -#include -#endif /* __need_struct_mcontext || __need_struct_mcontext64 */ - -#if defined(__need_stack_t) || defined(__need_struct_ucontext) || defined(__need_struct_ucontext64) -#ifndef __need_struct_sigaltstack -#define __need_struct_sigaltstack -#endif /* __need_struct_sigaltstack */ -#endif /* __need_stack_t || __need_struct_ucontext || __need_struct_ucontext64 */ - -#ifdef __need_struct_sigaltstack -#undef __need_struct_sigaltstack -#include -#endif /* __need_struct_sigaltstack */ - -#ifdef __need_struct_timespec -#undef __need_struct_timespec -#include -#endif /* __need_struct_timespec */ - -#ifdef __need_struct_timeval -#undef __need_struct_timeval -#include -#endif /* __need_struct_timeval */ - -#ifdef __need_struct_timeval32 -#undef __need_struct_timeval32 -#include -#endif /* __need_struct_timeval32 */ - -#ifdef __need_struct_ucontext -#undef __need_struct_ucontext -#include -#endif /* __need_struct_ucontext */ - -#ifdef __need_struct_ucontext64 -#undef __need_struct_ucontext64 -#include -#endif /* __need_struct_ucontext64 */ - -#ifdef KERNEL -/* LP64 version of struct timespec. time_t is a long and must grow when - * we're dealing with a 64-bit process. - * WARNING - keep in sync with struct timespec - */ -#ifdef __need_struct_user_timespec -#undef __need_struct_user_timespec -#include -#endif /* __need_struct_user_timespec */ - -#ifdef __need_struct_user64_timespec -#undef __need_struct_user64_timespec -#include -#endif /* __need_struct_user64_timespec */ - -#ifdef __need_struct_user32_timespec -#undef __need_struct_user32_timespec -#include -#endif /* __need_struct_user32_timespec */ - -#ifdef __need_struct_user_timeval -#undef __need_struct_user_timeval -#include -#endif /* __need_struct_user_timeval */ - -#ifdef __need_struct_user64_timeval -#undef __need_struct_user64_timeval -#include -#endif /* __need_struct_user64_timeval */ - -#ifdef __need_struct_user32_timeval -#undef __need_struct_user32_timeval -#include -#endif /* __need_struct_user32_timeval */ - -#ifdef __need_struct_user64_itimerval -#undef __need_struct_user64_itimerval -#include -#endif /* __need_struct_user64_itimerval */ - -#ifdef __need_struct_user32_itimerval -#undef __need_struct_user32_itimerval -#include -#endif /* __need_struct_user32_itimerval */ - -#endif /* KERNEL */ - -#ifdef __need_fd_set -#undef __need_fd_set -#include -#endif /* __need_fd_set */ - -#ifdef __need_stack_t -#undef __need_stack_t -#endif /* __need_stack_t */ - -#ifdef __need_ucontext_t -#undef __need_ucontext_t -#endif /* __need_ucontext_t */ - -#ifdef __need_ucontext64_t -#undef __need_ucontext64_t -#endif /* __need_ucontext64_t */ diff --git a/bsd/sys/_types.h b/bsd/sys/_types.h index 5d67dafc8..5a532d455 100644 --- a/bsd/sys/_types.h +++ b/bsd/sys/_types.h @@ -32,45 +32,6 @@ #include #include -/* pthread opaque structures */ -#if defined(__LP64__) -#define __PTHREAD_SIZE__ 1168 -#define __PTHREAD_ATTR_SIZE__ 56 -#define __PTHREAD_MUTEXATTR_SIZE__ 8 -#define __PTHREAD_MUTEX_SIZE__ 56 -#define __PTHREAD_CONDATTR_SIZE__ 8 -#define __PTHREAD_COND_SIZE__ 40 -#define __PTHREAD_ONCE_SIZE__ 8 -#define __PTHREAD_RWLOCK_SIZE__ 192 -#define __PTHREAD_RWLOCKATTR_SIZE__ 16 -#else /* __LP64__ */ -#define __PTHREAD_SIZE__ 596 -#define __PTHREAD_ATTR_SIZE__ 36 -#define __PTHREAD_MUTEXATTR_SIZE__ 8 -#define __PTHREAD_MUTEX_SIZE__ 40 -#define __PTHREAD_CONDATTR_SIZE__ 4 -#define __PTHREAD_COND_SIZE__ 24 -#define __PTHREAD_ONCE_SIZE__ 4 -#define __PTHREAD_RWLOCK_SIZE__ 124 -#define __PTHREAD_RWLOCKATTR_SIZE__ 12 -#endif /* __LP64__ */ - -struct __darwin_pthread_handler_rec -{ - void (*__routine)(void *); /* Routine to call */ - void *__arg; /* Argument to pass */ - struct __darwin_pthread_handler_rec *__next; -}; -struct _opaque_pthread_attr_t { long __sig; char __opaque[__PTHREAD_ATTR_SIZE__]; }; -struct _opaque_pthread_cond_t { long __sig; char __opaque[__PTHREAD_COND_SIZE__]; }; -struct _opaque_pthread_condattr_t { long __sig; char __opaque[__PTHREAD_CONDATTR_SIZE__]; }; -struct _opaque_pthread_mutex_t { long __sig; char __opaque[__PTHREAD_MUTEX_SIZE__]; }; -struct _opaque_pthread_mutexattr_t { long __sig; char __opaque[__PTHREAD_MUTEXATTR_SIZE__]; }; -struct _opaque_pthread_once_t { long __sig; char __opaque[__PTHREAD_ONCE_SIZE__]; }; -struct _opaque_pthread_rwlock_t { long __sig; char __opaque[__PTHREAD_RWLOCK_SIZE__]; }; -struct _opaque_pthread_rwlockattr_t { long __sig; char __opaque[__PTHREAD_RWLOCKATTR_SIZE__]; }; -struct _opaque_pthread_t { long __sig; struct __darwin_pthread_handler_rec *__cleanup_stack; char __opaque[__PTHREAD_SIZE__]; }; - /* * Type definitions; takes common type definitions that must be used * in multiple header files due to [XSI], removes them from the system @@ -109,25 +70,6 @@ typedef __darwin_mach_port_name_t __darwin_mach_port_t; /* Used by mach */ typedef __uint16_t __darwin_mode_t; /* [???] Some file attributes */ typedef __int64_t __darwin_off_t; /* [???] Used for file sizes */ typedef __int32_t __darwin_pid_t; /* [???] process and group IDs */ -typedef struct _opaque_pthread_attr_t - __darwin_pthread_attr_t; /* [???] Used for pthreads */ -typedef struct _opaque_pthread_cond_t - __darwin_pthread_cond_t; /* [???] Used for pthreads */ -typedef struct _opaque_pthread_condattr_t - __darwin_pthread_condattr_t; /* [???] Used for pthreads */ -typedef unsigned long __darwin_pthread_key_t; /* [???] Used for pthreads */ -typedef struct _opaque_pthread_mutex_t - __darwin_pthread_mutex_t; /* [???] Used for pthreads */ -typedef struct _opaque_pthread_mutexattr_t - __darwin_pthread_mutexattr_t; /* [???] Used for pthreads */ -typedef struct _opaque_pthread_once_t - __darwin_pthread_once_t; /* [???] Used for pthreads */ -typedef struct _opaque_pthread_rwlock_t - __darwin_pthread_rwlock_t; /* [???] Used for pthreads */ -typedef struct _opaque_pthread_rwlockattr_t - __darwin_pthread_rwlockattr_t; /* [???] Used for pthreads */ -typedef struct _opaque_pthread_t - *__darwin_pthread_t; /* [???] Used for pthreads */ typedef __uint32_t __darwin_sigset_t; /* [???] signal set */ typedef __int32_t __darwin_suseconds_t; /* [???] microseconds */ typedef __uint32_t __darwin_uid_t; /* [???] user IDs */ @@ -135,9 +77,18 @@ typedef __uint32_t __darwin_useconds_t; /* [???] microseconds */ typedef unsigned char __darwin_uuid_t[16]; typedef char __darwin_uuid_string_t[37]; +#ifndef KERNEL +#include +#endif /* KERNEL */ + +#if defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 5 || __GNUC__ > 3) +#define __offsetof(type, field) __builtin_offsetof(type, field) +#else /* !(gcc >= 3.5) */ +#define __offsetof(type, field) ((size_t)(&((type *)0)->field)) +#endif /* (gcc >= 3.5) */ + #ifdef KERNEL -#ifndef offsetof -#define offsetof(type, member) ((size_t)(&((type *)0)->member)) -#endif /* offsetof */ +#include #endif /* KERNEL */ + #endif /* _SYS__TYPES_H_ */ diff --git a/bsd/sys/_types/Makefile b/bsd/sys/_types/Makefile index e72f4a682..318b9dce8 100644 --- a/bsd/sys/_types/Makefile +++ b/bsd/sys/_types/Makefile @@ -16,7 +16,6 @@ EXPINC_SUBDIRS = # $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders # $(DSTROOT)/usr/include/ DATAFILES = \ - ___offsetof.h \ _blkcnt_t.h \ _blksize_t.h \ _clock_t.h \ @@ -33,6 +32,7 @@ DATAFILES = \ _filesec_t.h \ _fsblkcnt_t.h \ _fsfilcnt_t.h \ + _fsid_t.h \ _gid_t.h \ _guid_t.h \ _id_t.h \ @@ -55,19 +55,10 @@ DATAFILES = \ _o_dsync.h \ _o_sync.h \ _off_t.h \ + _offsetof.h \ _os_inline.h \ _pid_t.h \ _posix_vdisable.h \ - _pthread_attr_t.h \ - _pthread_cond_t.h \ - _pthread_condattr_t.h \ - _pthread_key_t.h \ - _pthread_mutex_t.h \ - _pthread_mutexattr_t.h \ - _pthread_once_t.h \ - _pthread_rwlock_t.h \ - _pthread_rwlockattr_t.h \ - _pthread_t.h \ _ptrdiff_t.h \ _rsize_t.h \ _rune_t.h \ diff --git a/bsd/sys/_types/___offsetof.h b/bsd/sys/_types/_fsid_t.h similarity index 86% rename from bsd/sys/_types/___offsetof.h rename to bsd/sys/_types/_fsid_t.h index 852dc98af..5532b5715 100644 --- a/bsd/sys/_types/___offsetof.h +++ b/bsd/sys/_types/_fsid_t.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,9 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef __offsetof -#define __offsetof(type, field) ((size_t)(&((type *)0)->field)) -#endif /* __offsetof */ +#ifndef _FSID_T +#define _FSID_T +typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ +#endif /* _FSID_T */ diff --git a/bsd/sys/_types/_pthread_t.h b/bsd/sys/_types/_offsetof.h similarity index 92% rename from bsd/sys/_types/_pthread_t.h rename to bsd/sys/_types/_offsetof.h index 97ecb7cec..16832a355 100644 --- a/bsd/sys/_types/_pthread_t.h +++ b/bsd/sys/_types/_offsetof.h @@ -25,7 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _PTHREAD_T -#define _PTHREAD_T -typedef __darwin_pthread_t pthread_t; -#endif /* _PTHREAD_T */ +#ifndef offsetof +#define offsetof(type, field) __offsetof(type, field) +#endif /* offsetof */ diff --git a/bsd/sys/_types/_pthread_cond_t.h b/bsd/sys/_types/_pthread_cond_t.h deleted file mode 100644 index ce07f92dd..000000000 --- a/bsd/sys/_types/_pthread_cond_t.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _PTHREAD_COND_T -#define _PTHREAD_COND_T -typedef __darwin_pthread_cond_t pthread_cond_t; -#endif /* _PTHREAD_COND_T */ diff --git a/bsd/sys/_types/_pthread_condattr_t.h b/bsd/sys/_types/_pthread_condattr_t.h deleted file mode 100644 index 4dad3cfb7..000000000 --- a/bsd/sys/_types/_pthread_condattr_t.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _PTHREAD_CONDATTR_T -#define _PTHREAD_CONDATTR_T -typedef __darwin_pthread_condattr_t pthread_condattr_t; -#endif /* _PTHREAD_CONDATTR_T */ diff --git a/bsd/sys/_types/_pthread_mutex_t.h b/bsd/sys/_types/_pthread_mutex_t.h deleted file mode 100644 index 4a580ba91..000000000 --- a/bsd/sys/_types/_pthread_mutex_t.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _PTHREAD_MUTEX_T -#define _PTHREAD_MUTEX_T -typedef __darwin_pthread_mutex_t pthread_mutex_t; -#endif /*_PTHREAD_MUTEX_T */ diff --git a/bsd/sys/_types/_pthread_mutexattr_t.h b/bsd/sys/_types/_pthread_mutexattr_t.h deleted file mode 100644 index 26879560e..000000000 --- a/bsd/sys/_types/_pthread_mutexattr_t.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _PTHREAD_MUTEXATTR_T -#define _PTHREAD_MUTEXATTR_T -typedef __darwin_pthread_mutexattr_t pthread_mutexattr_t; -#endif /* _PTHREAD_MUTEXATTR_T */ diff --git a/bsd/sys/_types/_pthread_once_t.h b/bsd/sys/_types/_pthread_once_t.h deleted file mode 100644 index e5bcf2925..000000000 --- a/bsd/sys/_types/_pthread_once_t.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _PTHREAD_ONCE_T -#define _PTHREAD_ONCE_T -typedef __darwin_pthread_once_t pthread_once_t; -#endif /* _PTHREAD_ONCE_T */ diff --git a/bsd/sys/_types/_pthread_rwlock_t.h b/bsd/sys/_types/_pthread_rwlock_t.h deleted file mode 100644 index f6d018e7f..000000000 --- a/bsd/sys/_types/_pthread_rwlock_t.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _PTHREAD_RWLOCK_T -#define _PTHREAD_RWLOCK_T -typedef __darwin_pthread_rwlock_t pthread_rwlock_t; -#endif /* _PTHREAD_RWLOCK_T */ diff --git a/bsd/sys/_types/_pthread_rwlockattr_t.h b/bsd/sys/_types/_pthread_rwlockattr_t.h deleted file mode 100644 index ab28658a2..000000000 --- a/bsd/sys/_types/_pthread_rwlockattr_t.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _PTHREAD_RWLOCKATTR_T -#define _PTHREAD_RWLOCKATTR_T -typedef __darwin_pthread_rwlockattr_t pthread_rwlockattr_t; -#endif /* _PTHREAD_RWLOCKATTR_T */ diff --git a/bsd/sys/aio.h b/bsd/sys/aio.h index 71b2a8133..7a887288f 100644 --- a/bsd/sys/aio.h +++ b/bsd/sys/aio.h @@ -66,6 +66,7 @@ #include #include +#ifndef KERNEL struct aiocb { int aio_fildes; /* File descriptor */ off_t aio_offset; /* File offset */ @@ -75,6 +76,7 @@ struct aiocb { struct sigevent aio_sigevent; /* Signal number and value */ int aio_lio_opcode; /* Operation to be performed */ }; +#endif /* KERNEL */ #ifdef KERNEL diff --git a/bsd/sys/attr.h b/bsd/sys/attr.h index cd5ecec18..8dc62498d 100644 --- a/bsd/sys/attr.h +++ b/bsd/sys/attr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -53,6 +53,8 @@ #define FSOPT_EXCHANGE_DATA_ONLY 0x0000010 #endif +#define FSOPT_ATTR_CMN_EXTENDED 0x00000020 + /* we currently aren't anywhere near this amount for a valid * fssearchblock.sizeofsearchparams1 or fssearchblock.sizeofsearchparams2 * but we put a sanity check in to avoid abuse of the value passed in from @@ -79,7 +81,7 @@ typedef u_int32_t attrgroup_t; struct attrlist { u_short bitmapcount; /* number of attr. bit sets in list (should be 5) */ - u_int16_t reserved; /* (to maintain 4-byte alignment) */ + u_int16_t reserved; /* (to maintain 4-byte alignment) */ attrgroup_t commonattr; /* common attribute group */ attrgroup_t volattr; /* Volume attribute group */ attrgroup_t dirattr; /* directory attribute group */ @@ -340,8 +342,19 @@ typedef struct vol_attributes_attr { #define ATTR_CMN_GRPID 0x00010000 #define ATTR_CMN_ACCESSMASK 0x00020000 #define ATTR_CMN_FLAGS 0x00040000 -/* #define ATTR_CMN_NAMEDATTRCOUNT 0x00080000 not implemented */ -/* #define ATTR_CMN_NAMEDATTRLIST 0x00100000 not implemented */ + +/* The following were defined as: */ +/* #define ATTR_CMN_NAMEDATTRCOUNT 0x00080000 */ +/* #define ATTR_CMN_NAMEDATTRLIST 0x00100000 */ +/* These bits have been salvaged for use as: */ +/* #define ATTR_CMN_GEN_COUNT 0x00080000 */ +/* #define ATTR_CMN_DOCUMENT_ID 0x00100000 */ +/* They can only be used with the FSOPT_ATTR_CMN_EXTENDED */ +/* option flag. */ + +#define ATTR_CMN_GEN_COUNT 0x00080000 +#define ATTR_CMN_DOCUMENT_ID 0x00100000 + #define ATTR_CMN_USERACCESS 0x00200000 #define ATTR_CMN_EXTENDED_SECURITY 0x00400000 #define ATTR_CMN_UUID 0x00800000 @@ -350,14 +363,16 @@ typedef struct vol_attributes_attr { #define ATTR_CMN_PARENTID 0x04000000 #define ATTR_CMN_FULLPATH 0x08000000 #define ATTR_CMN_ADDEDTIME 0x10000000 +#define ATTR_CMN_ERROR 0x20000000 +#define ATTR_CMN_DATA_PROTECT_FLAGS 0x40000000 /* - * ATTR_CMN_RETURNED_ATTRS is only valid with getattrlist(2). - * It is always the first attribute in the return buffer. + * ATTR_CMN_RETURNED_ATTRS is only valid with getattrlist(2) and + * getattrlistbulk(2). It is always the first attribute in the return buffer. */ -#define ATTR_CMN_RETURNED_ATTRS 0x80000000 +#define ATTR_CMN_RETURNED_ATTRS 0x80000000 -#define ATTR_CMN_VALIDMASK 0xBFFFFFFF +#define ATTR_CMN_VALIDMASK 0xFFFFFFFF #define ATTR_CMN_SETMASK 0x01C7FF00 #define ATTR_CMN_VOLSETMASK 0x00006700 @@ -420,13 +435,16 @@ typedef struct vol_attributes_attr { #define ATTR_FORK_SETMASK 0x00000000 /* Obsolete, implemented, not supported */ -#define ATTR_CMN_NAMEDATTRCOUNT 0x00080000 /* not implemented */ -#define ATTR_CMN_NAMEDATTRLIST 0x00100000 /* not implemented */ +#define ATTR_CMN_NAMEDATTRCOUNT 0x00080000 +#define ATTR_CMN_NAMEDATTRLIST 0x00100000 #define ATTR_FILE_CLUMPSIZE 0x00000010 /* obsolete */ #define ATTR_FILE_FILETYPE 0x00000040 /* always zero */ #define ATTR_FILE_DATAEXTENTS 0x00000800 /* obsolete, HFS-specific */ #define ATTR_FILE_RSRCEXTENTS 0x00004000 /* obsolete, HFS-specific */ +/* Required attributes for getattrlistbulk(2) */ +#define ATTR_BULK_REQUIRED (ATTR_CMN_NAME | ATTR_CMN_RETURNED_ATTRS) + /* * Searchfs */ diff --git a/bsd/sys/bsdtask_info.h b/bsd/sys/bsdtask_info.h index 416e4b475..0bebc2c7f 100644 --- a/bsd/sys/bsdtask_info.h +++ b/bsd/sys/bsdtask_info.h @@ -102,10 +102,15 @@ void vm_map_region_top_walk(vm_map_entry_t entry, vm_region_top_info_t top); void vm_map_region_walk(vm_map_t map, vm_map_offset_t a, vm_map_entry_t entry, vm_object_offset_t offset, vm_object_size_t range, vm_region_extended_info_t extended); kern_return_t vnode_pager_get_object_vnode(memory_object_t mem_obj, uintptr_t * vnodeaddr, uint32_t * vid); extern uint32_t vnode_vid(void *vp); +#if CONFIG_IOSCHED +kern_return_t vnode_pager_get_object_devvp(memory_object_t mem_obj, uintptr_t *devvp); +extern struct vnode *vnode_mountdevvp(struct vnode *); +#endif #endif /* MACH_KERNEL_PRIVATE */ extern int fill_procregioninfo(task_t t, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vp, uint32_t *vid); +extern int fill_procregioninfo_onlymappedvnodes(task_t t, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vp, uint32_t *vid); void fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo); int fill_taskthreadinfo(task_t task, uint64_t thaddr, int thuniqueid, struct proc_threadinfo_internal * ptinfo, void *, int *); int fill_taskthreadlist(task_t task, void * buffer, int thcount); diff --git a/bsd/sys/buf.h b/bsd/sys/buf.h index 93c885982..6db9a6937 100644 --- a/bsd/sys/buf.h +++ b/bsd/sys/buf.h @@ -275,7 +275,7 @@ void buf_seterror(buf_t, errno_t); /*! @function buf_setflags @abstract Set flags on a buffer. - @discussion: buffer_flags |= flags + @discussion buffer_flags |= flags @param bp Buffer whose flags to set. @param flags Flags to add to buffer's mask. B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO/B_FUA @return void. @@ -285,7 +285,7 @@ void buf_setflags(buf_t, int32_t); /*! @function buf_clearflags @abstract Clear flags on a buffer. - @discussion: buffer_flags &= ~flags + @discussion buffer_flags &= ~flags @param bp Buffer whose flags to clear. @param flags Flags to remove from buffer's mask. B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO/B_FUA @return void. @@ -1002,7 +1002,7 @@ buf_t buf_geteblk(int); /*! @function buf_clear_redundancy_flags @abstract Clear flags on a buffer. - @discussion: buffer_redundancy_flags &= ~flags + @discussion buffer_redundancy_flags &= ~flags @param bp Buffer whose flags to clear. @param flags Flags to remove from buffer's mask @return void. @@ -1020,7 +1020,7 @@ uint32_t buf_redundancy_flags(buf_t); /*! @function buf_setredundancyflags @abstract Set redundancy flags on a buffer. - @discussion: buffer_redundancy_flags |= flags + @discussion buffer_redundancy_flags |= flags @param bp Buffer whose flags to set. @param flags Flags to add to buffer's redundancy flags @return void. @@ -1054,8 +1054,9 @@ int buf_static(buf_t); #ifdef KERNEL_PRIVATE void buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void (**)(buf_t, void *), void **); +/* bufattr allocation/duplication/deallocation functions */ bufattr_t bufattr_alloc(void); - +bufattr_t bufattr_dup (bufattr_t bap); void bufattr_free(bufattr_t bap); /*! @@ -1104,7 +1105,7 @@ int bufattr_rawencrypted(bufattr_t bap); @function bufattr_markgreedymode @abstract Mark a buffer to use the greedy mode for writing. @param bap Buffer attributes to mark. - @discussion Greedy Mode: request improved write performance from the underlying device at the expense of storage effeciency + @discussion Greedy Mode: request improved write performance from the underlying device at the expense of storage efficiency @return void. */ void bufattr_markgreedymode(bufattr_t bap); @@ -1113,11 +1114,30 @@ int bufattr_rawencrypted(bufattr_t bap); @function bufattr_greedymode @abstract Check if a buffer is written using the Greedy Mode @param bap Buffer attributes to test. - @discussion Greedy Mode: request improved write performance from the underlying device at the expense of storage effeciency + @discussion Greedy Mode: request improved write performance from the underlying device at the expense of storage efficiency @return Nonzero if buffer uses greedy mode, 0 otherwise. */ int bufattr_greedymode(bufattr_t bap); +/*! + @function bufattr_markisochronous + @abstract Mark a buffer to use the isochronous throughput mode for writing. + @param bap Buffer attributes to mark. + @discussion isochronous mode: request improved write performance from the underlying device at the expense of storage efficiency + @return void. + */ + void bufattr_markisochronous(bufattr_t bap); + + /*! + @function bufattr_isochronous + @abstract Check if a buffer is written using the isochronous + @param bap Buffer attributes to test. + @discussion isochronous mode: request improved write performance from the underlying device at the expense of storage efficiency + @return Nonzero if buffer uses isochronous mode, 0 otherwise. + */ +int bufattr_isochronous(bufattr_t bap); + + /*! @function bufattr_throttled @abstract Check if a buffer is throttled. @@ -1126,6 +1146,14 @@ int bufattr_greedymode(bufattr_t bap); */ int bufattr_throttled(bufattr_t bap); +/*! + @function bufattr_passive + @abstract Check if a buffer is marked passive. + @param bap Buffer attribute to test. + @return Nonzero if the buffer is marked passive, 0 otherwise. + */ +int bufattr_passive(bufattr_t bap); + /*! @function bufattr_nocache @abstract Check if a buffer has nocache attribute. @@ -1136,13 +1164,22 @@ int bufattr_nocache(bufattr_t bap); /*! @function bufattr_meta - @abstract Check if a buffer has meta attribute. + @abstract Check if a buffer has the bufattr meta attribute. @param bap Buffer attribute to test. @return Nonzero if the buffer has meta attribute, 0 otherwise. */ int bufattr_meta(bufattr_t bap); +/*! + @function bufattr_markmeta + @abstract Set the bufattr meta attribute. + @param bap Buffer attribute to manipulate. + @return void + */ +void bufattr_markmeta(bufattr_t bap); + + /*! @function bufattr_delayidlesleep @abstract Check if a buffer is marked to delay idle sleep on disk IO. diff --git a/bsd/sys/buf_internal.h b/bsd/sys/buf_internal.h index 6c698d044..e0674869f 100644 --- a/bsd/sys/buf_internal.h +++ b/bsd/sys/buf_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -256,13 +256,26 @@ extern vm_offset_t buf_kernel_addrperm; #define BA_DELAYIDLESLEEP 0x00000004 /* Process is marked to delay idle sleep on disk IO */ #define BA_NOCACHE 0x00000008 #define BA_META 0x00000010 -#define BA_IO_TIER_MASK 0x00000f00 -#define BA_IO_TIER_SHIFT 8 #define BA_GREEDY_MODE 0x00000020 /* High speed writes that consume more storage */ #define BA_QUICK_COMPLETE 0x00000040 /* Request quick completion at expense of storage efficiency */ +#define BA_PASSIVE 0x00000080 + +/* + * Note: IO_TIERs consume 0x0100, 0x0200, 0x0400, 0x0800 + * These are now in-use by the I/O tiering system. + */ +#define BA_IO_TIER_MASK 0x00000f00 +#define BA_IO_TIER_SHIFT 8 + +#define BA_ISOCHRONOUS 0x00001000 /* device specific isochronous throughput to media */ + #define GET_BUFATTR_IO_TIER(bap) ((bap->ba_flags & BA_IO_TIER_MASK) >> BA_IO_TIER_SHIFT) -#define SET_BUFATTR_IO_TIER(bap, tier) (bap->ba_flags |= ((tier << BA_IO_TIER_SHIFT) & BA_IO_TIER_MASK)) +#define SET_BUFATTR_IO_TIER(bap, tier) \ +do { \ + (bap)->ba_flags &= (~BA_IO_TIER_MASK); \ + (bap)->ba_flags |= (((tier) << BA_IO_TIER_SHIFT) & BA_IO_TIER_MASK); \ +} while(0) extern int niobuf_headers; /* The number of IO buffer headers for cluster IO */ extern int nbuf_headers; /* The number of buffer headers */ diff --git a/bsd/sys/cdefs.h b/bsd/sys/cdefs.h index ede0beecd..7c729026f 100644 --- a/bsd/sys/cdefs.h +++ b/bsd/sys/cdefs.h @@ -239,6 +239,27 @@ # define __header_always_inline __header_inline #endif +/* + * Compiler-dependent macros that bracket portions of code where the + * "-Wunreachable-code" warning should be ignored. Please use sparingly. + */ +#if defined(__clang__) +# define __unreachable_ok_push \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wunreachable-code\"") +# define __unreachable_ok_pop \ + _Pragma("clang diagnostic pop") +#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) +# define __unreachable_ok_push \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wunreachable-code\"") +# define __unreachable_ok_pop \ + _Pragma("GCC diagnostic pop") +#else +# define __unreachable_ok_push +# define __unreachable_ok_pop +#endif + /* * Compiler-dependent macros to declare that functions take printf-like * or scanf-like arguments. They are null except for versions of gcc @@ -329,8 +350,12 @@ #define __DARWIN_ONLY_64_BIT_INO_T 0 #define __DARWIN_ONLY_UNIX_CONFORMANCE 0 #define __DARWIN_ONLY_VERS_1050 0 -#define __DARWIN_SUF_DARWIN10 "_darwin10" -#define __DARWIN10_ALIAS(sym) __asm("_" __STRING(sym) __DARWIN_SUF_DARWIN10) +#if defined(__x86_64__) +#define __DARWIN_SUF_DARWIN14 "_darwin14" +#define __DARWIN14_ALIAS(sym) __asm("_" __STRING(sym) __DARWIN_SUF_DARWIN14) +#else +#define __DARWIN14_ALIAS(sym) +#endif #else /* !KERNEL */ #ifdef PLATFORM_iPhoneOS /* Platform: iPhoneOS */ @@ -344,6 +369,18 @@ #define __DARWIN_ONLY_UNIX_CONFORMANCE 1 #define __DARWIN_ONLY_VERS_1050 1 #endif /* PLATFORM_iPhoneSimulator */ +#ifdef PLATFORM_iPhoneOSNano +/* Platform: iPhoneOSNano */ +#define __DARWIN_ONLY_64_BIT_INO_T 1 +#define __DARWIN_ONLY_UNIX_CONFORMANCE 1 +#define __DARWIN_ONLY_VERS_1050 1 +#endif /* PLATFORM_iPhoneOSNano */ +#ifdef PLATFORM_iPhoneNanoSimulator +/* Platform: iPhoneNanoSimulator */ +#define __DARWIN_ONLY_64_BIT_INO_T 1 +#define __DARWIN_ONLY_UNIX_CONFORMANCE 1 +#define __DARWIN_ONLY_VERS_1050 1 +#endif /* PLATFORM_iPhoneNanoSimulator */ #ifdef PLATFORM_MacOSX /* Platform: MacOSX */ #define __DARWIN_ONLY_64_BIT_INO_T 0 @@ -499,6 +536,7 @@ #define __DARWIN_ALIAS(sym) __asm("_" __STRING(sym) __DARWIN_SUF_UNIX03) #define __DARWIN_ALIAS_C(sym) __asm("_" __STRING(sym) __DARWIN_SUF_NON_CANCELABLE __DARWIN_SUF_UNIX03) #define __DARWIN_ALIAS_I(sym) __asm("_" __STRING(sym) __DARWIN_SUF_64_BIT_INO_T __DARWIN_SUF_UNIX03) +#define __DARWIN_NOCANCEL(sym) __asm("_" __STRING(sym) __DARWIN_SUF_NON_CANCELABLE) #define __DARWIN_INODE64(sym) __asm("_" __STRING(sym) __DARWIN_SUF_64_BIT_INO_T) #define __DARWIN_1050(sym) __asm("_" __STRING(sym) __DARWIN_SUF_1050) diff --git a/bsd/sys/coalition.h b/bsd/sys/coalition.h new file mode 100644 index 000000000..73eca26a8 --- /dev/null +++ b/bsd/sys/coalition.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_COALITION_H_ +#define _SYS_COALITION_H_ + +#include +#include +#include +#include + +__BEGIN_DECLS + +#ifndef KERNEL +/* Userspace syscall prototypes */ + +/* Syscalls */ +int coalition_create(uint64_t *cid_out, uint32_t flags); +int coalition_terminate(uint64_t cid, uint32_t flags); +int coalition_reap(uint64_t cid, uint32_t flags); + +/* This struct is also defined in osfmk/kern/coalition.h. Keep in sync. */ +struct coalition_resource_usage { + uint64_t tasks_started; + uint64_t tasks_exited; + uint64_t time_nonempty; + uint64_t cpu_time; + uint64_t interrupt_wakeups; + uint64_t platform_idle_wakeups; + uint64_t bytesread; + uint64_t byteswritten; + uint64_t gpu_time; +}; + +/* Wrappers around __coalition_info syscall (with proper struct types) */ +int coalition_info_resource_usage(uint64_t cid, struct coalition_resource_usage *cru, size_t sz); + +#endif /* KERNEL */ + +/* Flags shared by userspace and xnu */ + +#define COALITION_CREATE_FLAG_PRIVILEGED ((uint32_t)0x1) + +#define COALITION_CREATE_FLAG_MASK ((uint32_t)0x1) + +#ifdef PRIVATE +/* Flavors shared by only xnu + Libsyscall */ + +/* Syscall flavors */ +#define COALITION_OP_CREATE 1 +#define COALITION_OP_TERMINATE 2 +#define COALITION_OP_REAP 3 + +/* coalition_info flavors */ +#define COALITION_INFO_RESOURCE_USAGE 1 + +#endif /* PRIVATE */ + +__END_DECLS + +#endif /* _SYS_COALITION_H_ */ diff --git a/bsd/machine/ucontext.h b/bsd/sys/codedir_internal.h similarity index 76% rename from bsd/machine/ucontext.h rename to bsd/sys/codedir_internal.h index 60e157643..e0070c5ca 100644 --- a/bsd/machine/ucontext.h +++ b/bsd/sys/codedir_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2007 Apple Inc. All rights reserved. + * Copyright (c) 2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,13 +25,23 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _MACHINE_UCONTEXT_H_ -#define _MACHINE_UCONTEXT_H_ +/* + * File: codesign_internal.h + * Author: Greg Kerr + * 05-Dec-2013 + * + * Header file for Code Directory Functions. + * + */ +#ifndef _SYS_CODEDIR_INTERNAL_H_ +#define _SYS_CODEDIR_INTERNAL_H_ -#if defined (__i386__) || defined(__x86_64__) -#include "i386/ucontext.h" -#else -#error architecture not supported -#endif +#include -#endif /* _MACHINE_UCONTEXT_H_ */ +const +CS_CodeDirectory *findCodeDirectory( + const CS_SuperBlob *embedded, + char *lower_bound, + char *upper_bound); + +#endif diff --git a/bsd/sys/codesign.h b/bsd/sys/codesign.h index 21e05670b..6461895d5 100644 --- a/bsd/sys/codesign.h +++ b/bsd/sys/codesign.h @@ -32,20 +32,27 @@ /* code signing attributes of a process */ #define CS_VALID 0x0000001 /* dynamically valid */ #define CS_ADHOC 0x0000002 /* ad hoc signed */ +#define CS_GET_TASK_ALLOW 0x0000004 /* has get-task-allow entitlement */ +#define CS_INSTALLER 0x0000008 /* has installer entitlement */ #define CS_HARD 0x0000100 /* don't load invalid pages */ #define CS_KILL 0x0000200 /* kill process if it becomes invalid */ #define CS_CHECK_EXPIRATION 0x0000400 /* force expiration checking */ #define CS_RESTRICT 0x0000800 /* tell dyld to treat restricted */ #define CS_ENFORCEMENT 0x0001000 /* require enforcement */ +#define CS_REQUIRE_LV 0x0002000 /* require library validation */ #define CS_ALLOWED_MACHO 0x00ffffe #define CS_EXEC_SET_HARD 0x0100000 /* set CS_HARD on any exec'ed process */ #define CS_EXEC_SET_KILL 0x0200000 /* set CS_KILL on any exec'ed process */ #define CS_EXEC_SET_ENFORCEMENT 0x0400000 /* set CS_ENFORCEMENT on any exec'ed process */ +#define CS_EXEC_SET_INSTALLER 0x0800000 /* set CS_INSTALLER on any exec'ed process */ #define CS_KILLED 0x1000000 /* was killed by kernel for invalidity */ +#define CS_DYLD_PLATFORM 0x2000000 /* dyld used to load this is a platform binary */ + +#define CS_ENTITLEMENT_FLAGS (CS_GET_TASK_ALLOW | CS_INSTALLER) /* csops operations */ #define CS_OPS_STATUS 0 /* return status */ @@ -89,6 +96,7 @@ enum { CSMAGIC_BLOBWRAPPER = 0xfade0b01, /* CMS Signature, among other things */ CS_SUPPORTSSCATTER = 0x20100, + CS_SUPPORTSTEAMID = 0x20200, CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */ CSSLOT_INFOSLOT = 1, @@ -128,6 +136,8 @@ typedef struct __CodeDirectory { uint32_t spare2; /* unused (must be zero) */ /* Version 0x20100 */ uint32_t scatterOffset; /* offset of optional scatter vector */ + /* Version 0x20200 */ + uint32_t teamOffset; /* offset of optional team identifier */ /* followed by dynamic content as located by offset fields above */ } CS_CodeDirectory; @@ -165,6 +175,7 @@ typedef struct __SC_Scatter { #ifndef KERNEL #include +#include __BEGIN_DECLS /* code sign operations */ @@ -175,8 +186,11 @@ __END_DECLS #else /* !KERNEL */ #include +#include struct vnode; +struct cs_blob; +struct fileglob; struct cscsr_functions { int csr_version; @@ -187,10 +201,23 @@ struct cscsr_functions { __BEGIN_DECLS int cs_enforcement(struct proc *); -int cs_entitlements_blob_get(struct proc *, void **out_start, size_t *out_length); +int cs_require_lv(struct proc *); +uint32_t cs_entitlement_flags(struct proc *p); +int cs_entitlements_blob_get(struct proc *, void **, size_t *); uint8_t * cs_get_cdhash(struct proc *); void cs_register_cscsr(struct cscsr_functions *); +const CS_GenericBlob * + cs_find_blob(struct cs_blob *, uint32_t, uint32_t); + +const char * csblob_get_teamid(struct cs_blob *); +const char * csproc_get_teamid(struct proc *); +const char * csvnode_get_teamid(struct vnode *, off_t); +int csproc_get_platform_binary(struct proc *); +const char * csfg_get_teamid(struct fileglob *); +int csfg_get_path(struct fileglob *, char *, int *); +int csfg_get_platform_binary(struct fileglob *); + __END_DECLS #ifdef XNU_KERNEL_PRIVATE diff --git a/bsd/sys/conf.h b/bsd/sys/conf.h index 2d7dd1d33..d03a3df43 100644 --- a/bsd/sys/conf.h +++ b/bsd/sys/conf.h @@ -257,7 +257,7 @@ struct linesw { extern struct linesw linesw[]; -extern int nlinesw; +extern const int nlinesw; int ldisc_register(int , struct linesw *); void ldisc_deregister(int); diff --git a/bsd/sys/cprotect.h b/bsd/sys/cprotect.h index 9a7c36d77..67cdd1e57 100644 --- a/bsd/sys/cprotect.h +++ b/bsd/sys/cprotect.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2012 Apple Inc. All rights reserved. + * Copyright (c) 2009-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -40,8 +40,39 @@ extern "C" { #include #include +#include + +#define CP_CODE(code) FSDBG_CODE(DBG_CONTENT_PROT, code) +/* + * Class DBG_FSYSTEM == 0x03 + * Subclass DBG_CONTENT_PROT == 0xCF + * These debug codes are of the form 0x03CFzzzz + */ + +enum { + CPDBG_OFFSET_IO = CP_CODE(0), /* 0x03CF0000 */ +}; + +/* normally the debug events are no-ops */ +#define CP_DEBUG(x,a,b,c,d,e) do {} while (0); + +/* dev kernels only! */ +#if !SECURE_KERNEL + +/* KDEBUG events used by content protection subsystem */ +#if 0 +#undef CP_DEBUG +#define CP_DEBUG KERNEL_DEBUG_CONSTANT +#endif + +#endif + + + #define CP_IV_KEYSIZE 20 /* 16x8 = 128, but SHA1 pushes 20 bytes so keep space for that */ #define CP_MAX_KEYSIZE 32 /* 8x4 = 32, 32x8 = 256 */ +#define CP_MAX_CACHEBUFLEN 64 /* Maximum size of cp cache buffer/array */ + #define CP_MAX_WRAPPEDKEYSIZE 128 /* The size of the largest allowed key */ #define CP_INITIAL_WRAPPEDKEYSIZE 40 #define CP_V2_WRAPPEDKEYSIZE 40 /* Size of the wrapped key in a v2 EA */ @@ -64,6 +95,8 @@ extern "C" { #define CP_RELOCATION_INFLIGHT 0x10 /* File with offset IVs is in the process of being relocated. */ #define CP_SEP_WRAPPEDKEY 0x20 /* Wrapped key delivered from keybag */ + + /* Content Protection VNOP Operation flags */ #define CP_READ_ACCESS 0x1 #define CP_WRITE_ACCESS 0x2 @@ -78,9 +111,16 @@ extern "C" { #define CP_PREV_MAJOR_VERS 2 #define CP_MINOR_VERS 0 +/* the class occupies the lowest 5 bits, so there are 32 values (0-31) */ +#define CP_EFFECTIVE_CLASSMASK 0x0000001f + +/* macros for quick access/typing to mask out the classmask */ +#define CP_CLASS(x) ((uint32_t)(CP_EFFECTIVE_CLASSMASK & (x))) + +#define CP_CRYPTO_G1 0x00000020 + typedef struct cprotect *cprotect_t; typedef struct cp_wrap_func *cp_wrap_func_t; -typedef struct cp_global_state *cp_global_state_t; typedef struct cp_xattr *cp_xattr_t; typedef struct cnode * cnode_ptr_t; @@ -94,30 +134,50 @@ typedef struct { void *iv_key; unsigned iv_key_len; uint32_t flags; -} cp_raw_key_s, *cp_raw_key_t; +} cp_raw_key_s; + +typedef cp_raw_key_s* cp_raw_key_t; typedef struct { void *key; unsigned key_len; uint32_t dp_class; -} cp_wrapped_key_s, *cp_wrapped_key_t; +} cp_wrapped_key_s; + +typedef cp_wrapped_key_s* cp_wrapped_key_t; typedef struct { ino64_t inode; uint32_t volume; pid_t pid; uid_t uid; -} cp_cred_s, *cp_cred_t; +} cp_cred_s; + +typedef cp_cred_s* cp_cred_t; /* The wrappers are invoked on the AKS kext */ typedef int unwrapper_t(cp_cred_t access, const cp_wrapped_key_t wrapped_key_in, cp_raw_key_t key_out); typedef int rewrapper_t(cp_cred_t access, uint32_t dp_class, const cp_wrapped_key_t wrapped_key_in, cp_wrapped_key_t wrapped_key_out); typedef int new_key_t(cp_cred_t access, uint32_t dp_class, cp_raw_key_t key_out, cp_wrapped_key_t wrapped_key_out); typedef int invalidater_t(cp_cred_t access); /* invalidates keys */ +typedef int backup_key_t(cp_cred_t access, const cp_wrapped_key_t wrapped_key_in, cp_wrapped_key_t wrapped_key_out); -/* Flags for Interaction between AKS / Kernel */ -#define CP_RAW_KEY_WRAPPEDKEY 0x00000001 +/* + * Flags for Interaction between AKS / Kernel + * These are twiddled via the input/output structs in the above + * wrapper/unwrapper functions. + */ +#define CP_RAW_KEY_WRAPPEDKEY 0x00000001 + + +/* + * Flags for Key Generation Behavior + * + * These are passed to cp_generate_keys() and cp_new() in the + * flags arguments + */ +#define CP_KEYWRAP_DIFFCLASS 0x00000001 /* wrapping with a different class bag is OK */ /* @@ -137,26 +197,22 @@ typedef int invalidater_t(cp_cred_t access); /* invalidates keys */ */ struct cprotect { uint32_t cp_flags; - uint32_t cp_pclass; + uint32_t cp_pclass; /* persistent class stored on-disk */ aes_encrypt_ctx cp_cache_iv_ctx; uint32_t cp_cache_key_len; - uint8_t cp_cache_key[CP_MAX_KEYSIZE]; + uint8_t cp_cache_key[CP_MAX_CACHEBUFLEN]; uint32_t cp_persistent_key_len; void* cp_backing_cnode; uint8_t cp_persistent_key[]; }; +/* Structure to store pointers for AKS functions */ struct cp_wrap_func { new_key_t *new_key; unwrapper_t *unwrapper; rewrapper_t *rewrapper; invalidater_t *invalidater; -}; - -struct cp_global_state { - uint8_t wrap_functions_set; - uint8_t lock_state; - u_int16_t reserved; + backup_key_t *backup_key; }; /* @@ -232,7 +288,7 @@ struct cp_xattr_v4 { */ int cp_vnode_getclass(vnode_t, int *); int cp_vnode_setclass(vnode_t, uint32_t); -int cp_vnode_transcode(vnode_t); +int cp_vnode_transcode(vnode_t vp, void *key, unsigned *len); int cp_key_store_action(int); int cp_register_wraps(cp_wrap_func_t); @@ -248,7 +304,8 @@ int cp_fs_protected (mount_t); int cp_getrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *outxattr); int cp_setrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *newxattr); int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, uint32_t fileid, int options); -int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, struct cprotect **newentry); +int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, + uint32_t flags, struct cprotect **newentry); int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppliedclass, mode_t cmode, struct cprotect **tmpentry); int cp_handle_relocate (cnode_ptr_t cp, struct hfsmount *hfsmp); @@ -256,6 +313,10 @@ int cp_handle_open(struct vnode *vp, int mode); int cp_get_root_major_vers (struct vnode *vp, uint32_t *level); int cp_get_default_level (struct vnode *vp, uint32_t *level); int cp_is_valid_class (int isdir, int32_t protectionclass); +int cp_set_trimmed(struct hfsmount *hfsmp); +int cp_set_rewrapped(struct hfsmount *hfsmp); +int cp_flop_generation (struct hfsmount *hfsmp); + #endif /* KERNEL_PRIVATE */ diff --git a/bsd/sys/csr.h b/bsd/sys/csr.h new file mode 100644 index 000000000..f418eef68 --- /dev/null +++ b/bsd/sys/csr.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_CSR_H_ +#define _SYS_CSR_H_ + +#include +#include +#include + +#ifdef __APPLE_API_PRIVATE + +typedef uint32_t csr_config_t; +typedef uint32_t csr_op_t; + +/* Rootless configuration flags */ +#define CSR_ALLOW_UNTRUSTED_KEXTS (1 << 0) +#define CSR_ALLOW_UNRESTRICTED_FS (1 << 1) +#define CSR_ALLOW_TASK_FOR_PID (1 << 2) +#define CSR_ALLOW_KERNEL_DEBUGGER (1 << 3) +#define CSR_ALLOW_APPLE_INTERNAL (1 << 4) +#define CSR_ALLOW_DESTRUCTIVE_DTRACE (1 << 5) /* name deprecated */ +#define CSR_ALLOW_UNRESTRICTED_DTRACE (1 << 5) +#define CSR_ALLOW_UNRESTRICTED_NVRAM (1 << 6) + +#define CSR_VALID_FLAGS (CSR_ALLOW_UNTRUSTED_KEXTS | \ + CSR_ALLOW_UNRESTRICTED_FS | \ + CSR_ALLOW_TASK_FOR_PID | \ + CSR_ALLOW_KERNEL_DEBUGGER | \ + CSR_ALLOW_APPLE_INTERNAL | \ + CSR_ALLOW_UNRESTRICTED_DTRACE | \ + CSR_ALLOW_UNRESTRICTED_NVRAM) + +#ifdef PRIVATE +/* Private system call interface between Libsyscall and xnu */ + +/* Syscall flavors */ +#define CSR_OP_CHECK 0 +#define CSR_OP_GET_ACTIVE_CONFIG 1 +#define CSR_OP_GET_PENDING_CONFIG 2 + +#endif /* PRIVATE */ + +__BEGIN_DECLS + +#ifdef XNU_KERNEL_PRIVATE +void csr_init(void); +#endif + +#if KERNEL_PRIVATE +void csr_set_allow_all(int value); +#endif + +/* Syscalls */ +int csr_check(csr_config_t mask); +int csr_get_active_config(csr_config_t *config); +int csr_get_pending_config(csr_config_t *config); + +__END_DECLS + +#endif /* __APPLE_API_PRIVATE */ + +#endif /* _SYS_CSR_H_ */ diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h index 0e136a413..48c1ab9cc 100644 --- a/bsd/sys/disk.h +++ b/bsd/sys/disk.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -75,6 +75,7 @@ * DKIOCGETCOMMANDPOOLSIZE get device's queue depth */ +#define DK_FEATURE_PRIORITY 0x00000004 #define DK_FEATURE_UNMAP 0x00000010 typedef struct @@ -113,13 +114,23 @@ typedef struct dk_extent_t * extents; uint32_t extentsCount; -#ifdef __LP64__ + uint32_t options; + +#ifndef __LP64__ uint8_t reserved0096[4]; /* reserved, clear to zero */ -#else /* !__LP64__ */ - uint8_t reserved0064[8]; /* reserved, clear to zero */ #endif /* !__LP64__ */ } dk_unmap_t; + +#ifdef KERNEL +#ifdef PRIVATE + +/* Definitions of option bits for dk_unmap_t */ +#define _DK_UNMAP_INITIALIZE 0x00000100 + +#endif /* PRIVATE */ +#endif /* KERNEL */ + #define DKIOCEJECT _IO('d', 21) #define DKIOCSYNCHRONIZECACHE _IO('d', 22) @@ -135,6 +146,7 @@ typedef struct #define DKIOCREQUESTIDLE _IO('d', 30) #define DKIOCUNMAP _IOW('d', 31, dk_unmap_t) +#define _DKIOCCORESTORAGE _IO('d', 32) #define DKIOCGETMAXBLOCKCOUNTREAD _IOR('d', 64, uint64_t) #define DKIOCGETMAXBLOCKCOUNTWRITE _IOR('d', 65, uint64_t) @@ -156,6 +168,16 @@ typedef struct #ifdef KERNEL #define DK_FEATURE_FORCE_UNIT_ACCESS 0x00000001 +#define DK_ENCRYPTION_TYPE_AES_CBC 1 +#define DK_ENCRYPTION_TYPE_AES_XEX 2 +#define DK_ENCRYPTION_TYPE_AES_XTS 3 + +#define DK_TIER_MASK 0xC0 +#define DK_TIER_SHIFT 6 + +#define DK_TIER_TO_PRIORITY(tier) (((tier) << DK_TIER_SHIFT) | ~DK_TIER_MASK) +#define DK_PRIORITY_TO_TIER(priority) ((priority) >> DK_TIER_SHIFT) + typedef struct { uint64_t offset; @@ -166,6 +188,20 @@ typedef struct dev_t dev; } dk_physical_extent_t; +typedef struct +{ + dk_extent_t * extents; + uint32_t extentsCount; + + uint8_t tier; + +#ifdef __LP64__ + uint8_t reserved0104[3]; /* reserved, clear to zero */ +#else /* !__LP64__ */ + uint8_t reserved0072[7]; /* reserved, clear to zero */ +#endif /* !__LP64__ */ +} dk_set_tier_t; + #define DKIOCGETBLOCKCOUNT32 _IOR('d', 25, uint32_t) #define DKIOCSETBLOCKSIZE _IOW('d', 24, uint32_t) #define DKIOCGETBSDUNIT _IOR('d', 27, uint32_t) @@ -176,7 +212,9 @@ typedef struct #define DKIOCLOCKPHYSICALEXTENTS _IO('d', 81) #define DKIOCGETPHYSICALEXTENT _IOWR('d', 82, dk_physical_extent_t) #define DKIOCUNLOCKPHYSICALEXTENTS _IO('d', 83) -#define DKIOCGETMAXPRIORITYCOUNT _IOR('d', 84, uint32_t) +#define DKIOCSETTIER _IOW('d', 85, dk_set_tier_t) +#define DKIOCGETENCRYPTIONTYPE _IOR('d', 86, uint32_t) +#define DKIOCISLOWPOWERMODE _IOR('d', 87, uint32_t) #ifdef XNU_KERNEL_PRIVATE typedef struct @@ -196,24 +234,31 @@ typedef struct _dk_cs_pin { dk_extent_t cp_extent; int64_t cp_flags; } _dk_cs_pin_t; -#define _DKIOCSPINDISCARDDATA (1 << 0) +#define _DKIOCCSPINFORHIBERNATION (1 << 0) +#define _DKIOCCSPINDISCARDBLACKLIST (1 << 1) #define _DKIOCCSPINEXTENT _IOW('d', 199, _dk_cs_pin_t) #define _DKIOCCSUNPINEXTENT _IOW('d', 200, _dk_cs_pin_t) #define _DKIOCGETMIGRATIONUNITBYTESIZE _IOR('d', 201, uint32_t) + typedef struct _dk_cs_map { dk_extent_t cm_extent; uint64_t cm_bytes_mapped; } _dk_cs_map_t; + +typedef struct _dk_cs_unmap { + dk_extent_t *extents; + uint32_t extentsCount; + uint32_t options; +} _dk_cs_unmap_t; + #define _DKIOCCSMAP _IOWR('d', 202, _dk_cs_map_t) #define _DKIOCCSSETFSVNODE _IOW('d', 203, vnode_t) #define _DKIOCCSGETFREEBYTES _IOR('d', 204, uint64_t) +#define _DKIOCCSUNMAP _IOWR('d', 205, _dk_cs_unmap_t) #endif /* PRIVATE */ #endif /* KERNEL */ #ifdef PRIVATE -#ifdef TARGET_OS_EMBEDDED -#define _DKIOCSETSTATIC _IO('d', 84) -#endif /* TARGET_OS_EMBEDDED */ #endif /* PRIVATE */ #endif /* _SYS_DISK_H_ */ diff --git a/bsd/sys/dtrace.h b/bsd/sys/dtrace.h index 4d3fffb84..fa41389d4 100644 --- a/bsd/sys/dtrace.h +++ b/bsd/sys/dtrace.h @@ -89,6 +89,7 @@ extern "C" { #endif #include +#include #include #ifndef NULL @@ -335,6 +336,7 @@ typedef enum dtrace_probespec { #if defined(__APPLE__) #define DIF_VAR_PTHREAD_SELF 0x0200 /* Apple specific PTHREAD_SELF (Not currently supported!) */ #define DIF_VAR_DISPATCHQADDR 0x0201 /* Apple specific dispatch queue addr */ +#define DIF_VAR_MACHTIMESTAMP 0x0202 /* mach_absolute_timestamp() */ #endif /* __APPLE __ */ #define DIF_SUBR_RAND 0 @@ -381,13 +383,15 @@ typedef enum dtrace_probespec { #define DIF_SUBR_INET_NTOP 41 #define DIF_SUBR_INET_NTOA 42 #define DIF_SUBR_INET_NTOA6 43 +#define DIF_SUBR_TOUPPER 44 +#define DIF_SUBR_TOLOWER 45 #if !defined(__APPLE__) -#define DIF_SUBR_MAX 43 /* max subroutine value */ +#define DIF_SUBR_MAX 45 /* max subroutine value */ #else -#define DIF_SUBR_COREPROFILE 44 +#define DIF_SUBR_COREPROFILE 46 -#define DIF_SUBR_MAX 44 /* max subroutine value */ +#define DIF_SUBR_MAX 46 /* max subroutine value */ #endif /* __APPLE__ */ typedef uint32_t dif_instr_t; @@ -497,6 +501,8 @@ typedef struct dtrace_difv { #define DTRACEACT_PRINTF 3 /* printf() action */ #define DTRACEACT_PRINTA 4 /* printa() action */ #define DTRACEACT_LIBACT 5 /* library-controlled action */ +#define DTRACEACT_TRACEMEM 6 /* tracemem() action */ +#define DTRACEACT_TRACEMEM_DYNSIZE 7 /* dynamic tracemem() size */ #if defined(__APPLE__) #define DTRACEACT_APPLEBINARY 50 /* Apple DT perf. tool action */ @@ -1138,11 +1144,14 @@ typedef struct dtrace_fmtdesc { #define DTRACEOPT_AGGSORTREV 24 /* reverse-sort aggregations */ #define DTRACEOPT_AGGSORTPOS 25 /* agg. position to sort on */ #define DTRACEOPT_AGGSORTKEYPOS 26 /* agg. key position to sort on */ +#define DTRACEOPT_AGGHIST 27 /* histogram aggregation output */ +#define DTRACEOPT_AGGPACK 28 /* packed aggregation output */ +#define DTRACEOPT_AGGZOOM 29 /* zoomed aggregation scaling */ #if !defined(__APPLE__) -#define DTRACEOPT_MAX 27 /* number of options */ +#define DTRACEOPT_MAX 30 /* number of options */ #else -#define DTRACEOPT_STACKSYMBOLS 27 /* clear to prevent stack symbolication */ -#define DTRACEOPT_MAX 28 /* number of options */ +#define DTRACEOPT_STACKSYMBOLS 30 /* clear to prevent stack symbolication */ +#define DTRACEOPT_MAX 31 /* number of options */ #endif /* __APPLE__ */ #define DTRACEOPT_UNSET (dtrace_optval_t)-2 /* unset option */ @@ -1383,11 +1392,12 @@ typedef struct dtrace_providerdesc { #define DTRACEIOC_REPLICATE (DTRACEIOC | 18) /* replicate enab */ #define DTRACEIOC_MODUUIDSLIST (DTRACEIOC | 30) /* APPLE ONLY, query for modules with missing symbols */ #define DTRACEIOC_PROVMODSYMS (DTRACEIOC | 31) /* APPLE ONLY, provide missing symbols for a given module */ - +#define DTRACEIOC_PROCWAITFOR (DTRACEIOC | 32) /* APPLE ONLY, wait for process exec */ + /* * The following structs are used to provide symbol information to the kernel from userspace. */ - + typedef struct dtrace_symbol { uint64_t dtsym_addr; /* address of the symbol */ uint64_t dtsym_size; /* size of the symbol, must be uint64_t to maintain alignment when called by 64b uproc in i386 kernel */ @@ -1399,16 +1409,21 @@ typedef struct dtrace_module_symbols { uint64_t dtmodsyms_count; dtrace_symbol_t dtmodsyms_symbols[1]; } dtrace_module_symbols_t; - + #define DTRACE_MODULE_SYMBOLS_SIZE(count) (sizeof(dtrace_module_symbols_t) + ((count - 1) * sizeof(dtrace_symbol_t))) - + typedef struct dtrace_module_uuids_list { uint64_t dtmul_count; UUID dtmul_uuid[1]; } dtrace_module_uuids_list_t; - + #define DTRACE_MODULE_UUIDS_LIST_SIZE(count) (sizeof(dtrace_module_uuids_list_t) + ((count - 1) * sizeof(UUID))) +typedef struct dtrace_procdesc { + char p_comm[MAXCOMLEN+1]; + pid_t p_pid; +} dtrace_procdesc_t; + #endif /* __APPLE__ */ /* diff --git a/bsd/sys/dtrace_glue.h b/bsd/sys/dtrace_glue.h index 71f688919..494cbfcd2 100644 --- a/bsd/sys/dtrace_glue.h +++ b/bsd/sys/dtrace_glue.h @@ -32,7 +32,6 @@ #ifdef KERNEL_BUILD #include -#include #include #include #include @@ -99,8 +98,14 @@ int suword8(user_addr_t, uint8_t value); * cpuvar */ extern lck_mtx_t cpu_lock; +extern lck_mtx_t cyc_lock; extern lck_mtx_t mod_lock; +/* + * wrap_timer_call: wrapper of timer_call for cyclic timers. + */ +struct wrap_timer_call; + /* * Per-CPU data. */ @@ -112,6 +117,9 @@ typedef struct dtrace_cpu { hrtime_t cpu_dtrace_chillmark; /* DTrace: chill mark time */ hrtime_t cpu_dtrace_chilled; /* DTrace: total chill time */ boolean_t cpu_dtrace_invop_underway; /* DTrace gaurds against invalid op re-entrancy */ + + /* Local cyclic timers on this CPU */ + LIST_HEAD(cyc_list_head, wrap_timer_call) cpu_cyc_list; } dtrace_cpu_t; extern dtrace_cpu_t *cpu_list; @@ -179,9 +187,6 @@ extern void unregister_cpu_setup_func(cpu_setup_func_t *, void *); #define CPU_DTRACE_KPRIV 0x0080 /* DTrace fault: bad kernel access */ #define CPU_DTRACE_UPRIV 0x0100 /* DTrace fault: bad user access */ #define CPU_DTRACE_TUPOFLOW 0x0200 /* DTrace fault: tuple stack overflow */ -#if defined(__sparc) -//#define CPU_DTRACE_FAKERESTORE 0x0400 /* pid provider hint to getreg */ -#endif #define CPU_DTRACE_USTACK_FP 0x0400 /* pid provider hint to ustack() */ #define CPU_DTRACE_ENTRY 0x0800 /* pid provider hint to ustack() */ #define CPU_DTRACE_BADSTACK 0x1000 /* DTrace fault: bad stack */ @@ -208,7 +213,7 @@ typedef struct modctl { char mod_modname[KMOD_MAX_NAME]; int mod_loadcnt; char mod_loaded; - char mod_flags; // See flags below + uint16_t mod_flags; // See flags below int mod_nenabled; // # of enabled DTrace probes in module vm_address_t mod_address; // starting address (of Mach-o header blob) vm_size_t mod_size; // total size (of blob) @@ -217,13 +222,15 @@ typedef struct modctl { } modctl_t; /* Definitions for mod_flags */ -#define MODCTL_IS_MACH_KERNEL 0x01 // This module represents /mach_kernel -#define MODCTL_HAS_KERNEL_SYMBOLS 0x02 // Kernel symbols (nlist) are available -#define MODCTL_FBT_PROBES_PROVIDED 0x04 // fbt probes have been provided -#define MODCTL_FBT_INVALID 0x08 // Module is invalid for fbt probes -#define MODCTL_SDT_PROBES_PROVIDED 0x10 // sdt probes have been provided -#define MODCTL_SDT_INVALID 0x20 // Module is invalid for sdt probes -#define MODCTL_HAS_UUID 0x40 // Module has UUID +#define MODCTL_IS_MACH_KERNEL 0x01 // This module represents /mach_kernel +#define MODCTL_HAS_KERNEL_SYMBOLS 0x02 // Kernel symbols (nlist) are available +#define MODCTL_FBT_PROBES_PROVIDED 0x04 // fbt probes have been provided +#define MODCTL_FBT_INVALID 0x08 // Module is invalid for fbt probes +#define MODCTL_SDT_PROBES_PROVIDED 0x10 // sdt probes have been provided +#define MODCTL_SDT_INVALID 0x20 // Module is invalid for sdt probes +#define MODCTL_HAS_UUID 0x40 // Module has UUID +#define MODCTL_FBT_PRIVATE_PROBES_PROVIDED 0x80 // fbt private probes have been provided +#define MODCTL_FBT_PROVIDE_PRIVATE_PROBES 0x100 // fbt provider must provide private probes /* Simple/singular mod_flags accessors */ #define MOD_IS_MACH_KERNEL(mod) (mod->mod_flags & MODCTL_IS_MACH_KERNEL) @@ -234,9 +241,12 @@ typedef struct modctl { #define MOD_SDT_PROBES_PROVIDED(mod) (mod->mod_flags & MODCTL_SDT_PROBES_PROVIDED) #define MOD_SDT_INVALID(mod) (mod->mod_flags & MODCTL_SDT_INVALID) #define MOD_HAS_UUID(mod) (mod->mod_flags & MODCTL_HAS_UUID) +#define MOD_FBT_PRIVATE_PROBES_PROVIDED(mod) (mod->mod_flags & MODCTL_FBT_PRIVATE_PROBES_PROVIDED) +#define MOD_FBT_PROVIDE_PRIVATE_PROBES(mod) (mod->mod_flags & MODCTL_FBT_PROVIDE_PRIVATE_PROBES) /* Compound accessors */ -#define MOD_FBT_DONE(mod) (MOD_FBT_PROBES_PROVIDED(mod) || MOD_FBT_INVALID(mod)) +#define MOD_FBT_PRIVATE_PROBES_DONE(mod) (MOD_FBT_PRIVATE_PROBES_PROVIDED(mod) || !MOD_FBT_PROVIDE_PRIVATE_PROBES(mod)) +#define MOD_FBT_DONE(mod) ((MOD_FBT_PROBES_PROVIDED(mod) && MOD_FBT_PRIVATE_PROBES_DONE(mod)) || MOD_FBT_INVALID(mod)) #define MOD_SDT_DONE(mod) (MOD_SDT_PROBES_PROVIDED(mod) || MOD_SDT_INVALID(mod)) #define MOD_SYMBOLS_DONE(mod) (MOD_FBT_DONE(mod) && MOD_SDT_DONE(mod)) @@ -304,6 +314,8 @@ typedef struct cyc_omni_handler { void *cyo_arg; } cyc_omni_handler_t; +extern void dtrace_install_cpu_hooks(void); + extern cyclic_id_t cyclic_add(cyc_handler_t *, cyc_time_t *); extern void cyclic_remove(cyclic_id_t); diff --git a/bsd/sys/dtrace_impl.h b/bsd/sys/dtrace_impl.h index 2b02707c7..71dc020f2 100644 --- a/bsd/sys/dtrace_impl.h +++ b/bsd/sys/dtrace_impl.h @@ -46,6 +46,11 @@ extern "C" { #include +/* + * DTrace Implementation Locks + */ +extern lck_mtx_t dtrace_procwaitfor_lock; + /* * DTrace Implementation Constants and Typedefs */ @@ -892,6 +897,7 @@ typedef struct dtrace_vstate { #define DTRACE_MSTATE_WALLTIMESTAMP 0x00000100 #define DTRACE_MSTATE_USTACKDEPTH 0x00000200 #define DTRACE_MSTATE_UCALLER 0x00000400 +#define DTRACE_MSTATE_MACHTIMESTAMP 0x00000800 typedef struct dtrace_mstate { uintptr_t dtms_scratch_base; /* base of scratch space */ @@ -902,6 +908,7 @@ typedef struct dtrace_mstate { dtrace_epid_t dtms_epid; /* current EPID */ uint64_t dtms_timestamp; /* cached timestamp */ hrtime_t dtms_walltimestamp; /* cached wall timestamp */ + uint64_t dtms_machtimestamp; /* cached mach absolute timestamp */ int dtms_stackdepth; /* cached stackdepth */ int dtms_ustackdepth; /* cached ustackdepth */ struct dtrace_probe *dtms_probe; /* current probe */ @@ -978,9 +985,9 @@ typedef enum dtrace_activity { DTRACE_ACTIVITY_KILLED /* killed */ } dtrace_activity_t; -#if defined(__APPLE__) + /* - * DTrace dof modes + * APPLE NOTE: DTrace dof modes implementation * * DTrace has four "dof modes". They are: * @@ -1047,7 +1054,6 @@ typedef enum dtrace_activity { #define DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE 2 #define DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL 3 -#endif /* __APPLE__ */ /* * DTrace Helper Implementation @@ -1202,9 +1208,7 @@ struct dtrace_state { dtrace_optval_t dts_options[DTRACEOPT_MAX]; /* options */ dtrace_cred_t dts_cred; /* credentials */ size_t dts_nretained; /* number of retained enabs */ -#if defined(__APPLE__) uint64_t dts_arg_error_illval; -#endif /* __APPLE__ */ }; struct dtrace_provider { @@ -1215,15 +1219,15 @@ struct dtrace_provider { void *dtpv_arg; /* provider argument */ uint_t dtpv_defunct; /* boolean: defunct provider */ struct dtrace_provider *dtpv_next; /* next provider */ - uint64_t probe_count; /* no. of associated probes */ - uint64_t ecb_count; /* no. of associated enabled ECBs */ + uint64_t dtpv_probe_count; /* number of associated probes */ + uint64_t dtpv_ecb_count; /* number of associated enabled ECBs */ }; struct dtrace_meta { dtrace_mops_t dtm_mops; /* meta provider operations */ char *dtm_name; /* meta provider name */ void *dtm_arg; /* meta provider user arg */ - uint64_t dtm_count; /* no. of associated provs. */ + uint64_t dtm_count; /* number of associated providers */ }; /* @@ -1314,58 +1318,37 @@ extern int dtrace_getipl(void); extern uintptr_t dtrace_caller(int); extern uint32_t dtrace_cas32(uint32_t *, uint32_t, uint32_t); extern void *dtrace_casptr(void *, void *, void *); -#if !defined(__APPLE__) -extern void dtrace_copyin(uintptr_t, uintptr_t, size_t, volatile uint16_t *); -extern void dtrace_copyinstr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); -extern void dtrace_copyout(uintptr_t, uintptr_t, size_t, volatile uint16_t *); -extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t, - volatile uint16_t *); -#else extern void dtrace_copyin(user_addr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_copyinstr(user_addr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_copyout(uintptr_t, user_addr_t, size_t, volatile uint16_t *); extern void dtrace_copyoutstr(uintptr_t, user_addr_t, size_t, volatile uint16_t *); -#endif /* __APPLE__ */ extern void dtrace_getpcstack(pc_t *, int, int, uint32_t *); -#if !defined(__APPLE__) -extern ulong_t dtrace_getreg(struct regs *, uint_t); -#else extern uint64_t dtrace_getreg(struct regs *, uint_t); -#endif /* __APPLE__ */ extern int dtrace_getstackdepth(int); extern void dtrace_getupcstack(uint64_t *, int); extern void dtrace_getufpstack(uint64_t *, uint64_t *, int); extern int dtrace_getustackdepth(void); extern uintptr_t dtrace_fulword(void *); -#if !defined(__APPLE__) -extern uint8_t dtrace_fuword8(void *); -extern uint16_t dtrace_fuword16(void *); -extern uint32_t dtrace_fuword32(void *); -extern uint64_t dtrace_fuword64(void *); -extern void dtrace_probe_error(dtrace_state_t *, dtrace_epid_t, int, int, - int, uintptr_t); -#else extern uint8_t dtrace_fuword8(user_addr_t); extern uint16_t dtrace_fuword16(user_addr_t); extern uint32_t dtrace_fuword32(user_addr_t); extern uint64_t dtrace_fuword64(user_addr_t); +extern int dtrace_proc_waitfor(dtrace_procdesc_t*); extern void dtrace_probe_error(dtrace_state_t *, dtrace_epid_t, int, int, int, uint64_t); -#endif /* __APPLE__ */ extern int dtrace_assfail(const char *, const char *, int); extern int dtrace_attached(void); extern hrtime_t dtrace_gethrestime(void); extern void dtrace_isa_init(void); -#ifdef __sparc -extern void dtrace_flush_windows(void); -extern void dtrace_flush_user_windows(void); -extern uint_t dtrace_getotherwin(void); -extern uint_t dtrace_getfprs(void); -#else extern void dtrace_copy(uintptr_t, uintptr_t, size_t); extern void dtrace_copystr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); -#endif + +/* + * DTrace restriction checks + */ +extern boolean_t dtrace_is_restricted(void); +extern boolean_t dtrace_can_attach_to_proc(proc_t); /* * DTrace Assertions diff --git a/bsd/sys/dtrace_ptss.h b/bsd/sys/dtrace_ptss.h index 58454dacf..26381cfa9 100644 --- a/bsd/sys/dtrace_ptss.h +++ b/bsd/sys/dtrace_ptss.h @@ -71,7 +71,10 @@ extern "C" { * things simple for now. */ -#define DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD 64 + + +#define DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD (64) + #define DTRACE_PTSS_ENTRIES_PER_PAGE (PAGE_SIZE / DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD) struct dtrace_ptss_page_entry { @@ -81,7 +84,7 @@ struct dtrace_ptss_page_entry { struct dtrace_ptss_page { struct dtrace_ptss_page* next; - struct dtrace_ptss_page_entry entries[DTRACE_PTSS_ENTRIES_PER_PAGE]; + struct dtrace_ptss_page_entry entries[PAGE_MAX_SIZE / DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD]; }; struct dtrace_ptss_page_entry* dtrace_ptss_claim_entry(struct proc* p); /* sprlock not held */ diff --git a/bsd/sys/event.h b/bsd/sys/event.h index 6e834d594..66efc61b0 100644 --- a/bsd/sys/event.h +++ b/bsd/sys/event.h @@ -253,7 +253,6 @@ enum { #define NOTE_EXITSTATUS 0x04000000 /* exit status to be returned, valid for child process only */ #define NOTE_EXIT_DETAIL 0x02000000 /* provide details on reasons for exit */ - #define NOTE_PDATAMASK 0x000fffff /* mask for signal & exit status */ #define NOTE_PCTRLMASK (~NOTE_PDATAMASK) @@ -278,13 +277,14 @@ enum { /* * If NOTE_EXIT_MEMORY is present, these bits indicate specific jetsam condition. */ -#define NOTE_EXIT_MEMORY_DETAIL_MASK 0xfc000000 +#define NOTE_EXIT_MEMORY_DETAIL_MASK 0xfe000000 #define NOTE_EXIT_MEMORY_VMPAGESHORTAGE 0x80000000 /* jetsam condition: lowest jetsam priority proc killed due to vm page shortage */ #define NOTE_EXIT_MEMORY_VMTHRASHING 0x40000000 /* jetsam condition: lowest jetsam priority proc killed due to vm thrashing */ #define NOTE_EXIT_MEMORY_HIWAT 0x20000000 /* jetsam condition: process reached its high water mark */ #define NOTE_EXIT_MEMORY_PID 0x10000000 /* jetsam condition: special pid kill requested */ #define NOTE_EXIT_MEMORY_IDLE 0x08000000 /* jetsam condition: idle process cleaned up */ #define NOTE_EXIT_MEMORY_VNODE 0X04000000 /* jetsam condition: virtual node kill */ +#define NOTE_EXIT_MEMORY_FCTHRASHING 0x02000000 /* jetsam condition: lowest jetsam priority proc killed due to filecache thrashing */ #endif @@ -304,6 +304,7 @@ enum { #define NOTE_MEMORYSTATUS_PRESSURE_NORMAL 0x00000001 /* system memory pressure has returned to normal */ #define NOTE_MEMORYSTATUS_PRESSURE_WARN 0x00000002 /* system memory pressure has changed to the warning state */ #define NOTE_MEMORYSTATUS_PRESSURE_CRITICAL 0x00000004 /* system memory pressure has changed to the critical state */ +#define NOTE_MEMORYSTATUS_LOW_SWAP 0x00000008 /* system is in a low-swap state */ typedef enum vm_pressure_level { kVMPressureNormal = 0, diff --git a/bsd/sys/eventvar.h b/bsd/sys/eventvar.h index b94187572..29adde75d 100644 --- a/bsd/sys/eventvar.h +++ b/bsd/sys/eventvar.h @@ -90,5 +90,6 @@ typedef void (*kqueue_continue_t)(struct kqueue *, void *, int); extern int kevent_register(struct kqueue *, struct kevent64_s *, struct proc *); extern int kqueue_scan(struct kqueue *, kevent_callback_t, kqueue_continue_t, void *, struct timeval *, struct proc *); +extern int kqueue_stat(struct kqueue *, void *, int, proc_t); #endif /* !_SYS_EVENTVAR_H_ */ diff --git a/bsd/sys/fasttrap_impl.h b/bsd/sys/fasttrap_impl.h index 840fcde2d..439d36864 100644 --- a/bsd/sys/fasttrap_impl.h +++ b/bsd/sys/fasttrap_impl.h @@ -171,19 +171,14 @@ typedef struct fasttrap_hash { #define fasttrap_fuword32 fuword32 #define fasttrap_suword32 suword32 -#if defined __APPLE__ /* - * xnu runs in 32 bit mode even when supporting 64 bit processes. We need - * to make size explicit. + * APPLE NOTE: xnu supports both 32bit and 64bit user processes. + * We need to make size explicit. */ #define fasttrap_fuword64 fuword64 #define fasttrap_suword64 suword64 #define fasttrap_fuword64_noerr fuword64_noerr #define fasttrap_fuword32_noerr fuword32_noerr -#else -#define fasttrap_fulword fulword -#define fasttrap_sulword sulword -#endif extern void fasttrap_sigtrap(proc_t *, uthread_t, user_addr_t); diff --git a/bsd/sys/fbt.h b/bsd/sys/fbt.h index dd07bc527..4c67eef5c 100644 --- a/bsd/sys/fbt.h +++ b/bsd/sys/fbt.h @@ -54,9 +54,6 @@ typedef struct fbt_probe { char fbtp_name[MAX_FBTP_NAME_CHARS]; struct modctl *fbtp_ctl; int fbtp_loadcnt; -#if !defined(__APPLE__) - int fbtp_symndx; -#endif struct fbt_probe *fbtp_next; } fbt_probe_t; diff --git a/bsd/sys/fcntl.h b/bsd/sys/fcntl.h index 22d4eb8ea..4c80591ff 100644 --- a/bsd/sys/fcntl.h +++ b/bsd/sys/fcntl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -132,6 +132,22 @@ #define FDEFER 0x2000 /* defer for next gc pass */ #define FHASLOCK 0x4000 /* descriptor holds advisory lock */ #endif + +#if __DARWIN_C_LEVEL >= 200809L +/* + * Descriptor value for the current working directory + */ +#define AT_FDCWD -2 + +/* + * Flags for the at functions + */ +#define AT_EACCESS 0x0010 /* Use effective ids in access check */ +#define AT_SYMLINK_NOFOLLOW 0x0020 /* Act on the symlink itself not the target */ +#define AT_SYMLINK_FOLLOW 0x0040 /* Act on target of symlink */ +#define AT_REMOVEDIR 0x0080 /* Path refers to directory */ +#endif + #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define O_EVTONLY 0x8000 /* descriptor requested for event notifications only */ #endif @@ -291,7 +307,7 @@ #define F_SETBACKINGSTORE 70 /* Mark the file as being the backing store for another filesystem */ #define F_GETPATH_MTMINFO 71 /* return the full path of the FD, but error in specific mtmd circumstances */ -/* 72 is free. It used to be F_GETENCRYPTEDDATA, which is now removed. */ +#define F_GETCODEDIR 72 /* Returns the code directory, with associated hashes, to the caller */ #define F_SETNOSIGPIPE 73 /* No SIGPIPE generated on EPIPE */ #define F_GETNOSIGPIPE 74 /* Status of SIGPIPE for this fd */ @@ -313,6 +329,11 @@ * written should be written in greedy mode for additional speed at * the cost of storage efficiency. A nonzero value enables it, 0 disables it. */ + +#define F_SETIOTYPE 82 /* + * Use parameters to describe content being written to the FD. See + * flag definitions below for argument bits. + */ #endif @@ -344,6 +365,14 @@ #define F_ABORT 0x200 /* lock attempt aborted (force umount) */ #endif +#if PRIVATE +/* + * ISOCHRONOUS attempts to sustain a minimum platform-dependent throughput + * for the duration of the I/O delivered to the driver. + */ +#define F_IOTYPE_ISOCHRONOUS 0x0001 +#endif + /* * [XSI] The values used for l_whence shall be defined as described * in @@ -413,6 +442,51 @@ struct radvisory { #pragma pack() #endif /* KERNEL */ +#ifndef KERNEL +/** Information the user passes in to get the codeblobs out of the kernel */ +typedef struct fcodeblobs { + void *f_cd_hash; + size_t f_hash_size; + void *f_cd_buffer; + size_t f_cd_size; + unsigned int *f_out_size; + int f_arch; + int __padding; +} fcodeblobs_t; +#endif /* KERNEL */ + +#ifdef KERNEL +typedef struct user32_fcodeblobs { + user32_addr_t f_cd_hash; + user32_size_t f_hash_size; + user32_addr_t f_cd_buffer; + user32_size_t f_cd_size; + user32_addr_t f_out_size; + int f_arch; +} user32_fcodeblobs_t; + +/* LP64 version of fcodeblobs */ +typedef struct user64_fcodeblobs { + user64_addr_t f_cd_hash; + user64_size_t f_hash_size; + user64_addr_t f_cd_buffer; + user64_size_t f_cd_size; + user64_addr_t f_out_size; + int f_arch; + int __padding; +} user64_fcodeblobs_t; + +/* kernel version of fcodeblobs */ +typedef struct user_fcodeblobs { + user_addr_t f_cd_hash; + user_size_t f_hash_size; + user_addr_t f_cd_buffer; + user_size_t f_cd_size; + user_addr_t f_out_size; + int f_arch; +} user_fcodeblobs_t; +#endif /* KERNEL */ + /* * detached code signatures data type - * information passed by user to system used by F_ADDSIGS and F_ADDFILESIGS. @@ -586,6 +660,9 @@ typedef enum { __BEGIN_DECLS int open(const char *, int, ...) __DARWIN_ALIAS_C(open); +#if __DARWIN_C_LEVEL >= 200809L +int openat(int, const char *, int, ...) __DARWIN_NOCANCEL(openat) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +#endif int creat(const char *, mode_t) __DARWIN_ALIAS_C(creat); int fcntl(int, int, ...) __DARWIN_ALIAS_C(fcntl); #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) @@ -608,7 +685,10 @@ int fileport_makeport(int, fileport_t*); int fileport_makefd(fileport_t); #endif /* PRIVATE */ int openx_np(const char *, int, filesec_t); -/* data-protected non-portable open(2) */ +/* + * data-protected non-portable open(2) : + int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode) + */ int open_dprotected_np ( const char *, int, int, int, ...); int flock(int, int); filesec_t filesec_init(void); diff --git a/bsd/sys/file_internal.h b/bsd/sys/file_internal.h index 52836e020..aaf248554 100644 --- a/bsd/sys/file_internal.h +++ b/bsd/sys/file_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -155,6 +155,8 @@ typedef enum { #define FG_WRMMSGQ 0x10 /* wait for the fileglob to be removed from msgqueue */ #define FG_PORTMADE 0x20 /* a port was at some point created for this fileglob */ #define FG_NOSIGPIPE 0x40 /* don't deliver SIGPIPE with EPIPE return */ +#define FG_OFF_LOCKED 0x80 /* Used as a mutex for offset changes (for vnodes) */ +#define FG_OFF_LOCKWANT 0x100 /* Somebody's wating for the lock */ struct fileglob { LIST_ENTRY(fileglob) f_msglist;/* list of active files */ @@ -182,6 +184,7 @@ struct fileglob { } *fg_ops; off_t fg_offset; void *fg_data; /* vnode or socket or SHM or semaphore */ + void *fg_vn_data; /* Per fd vnode data, used for directories */ lck_mtx_t fg_lock; #if CONFIG_MACF struct label *fg_label; /* JMM - use the one in the cred? */ @@ -252,6 +255,8 @@ extern int fdgetf_noref(proc_t, int, struct fileproc **); extern struct fileproc *fileproc_alloc_init(void *crargs); extern void fileproc_free(struct fileproc *fp); extern void guarded_fileproc_free(struct fileproc *fp); +extern void fg_vn_data_free(void *fgvndata); +extern int nameiat(struct nameidata *ndp, int dirfd); __END_DECLS #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/sys/fsctl.h b/bsd/sys/fsctl.h index 789876fd8..c99d47ba7 100644 --- a/bsd/sys/fsctl.h +++ b/bsd/sys/fsctl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -275,15 +275,14 @@ typedef struct package_ext_info { #define FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS _IOW('A', 11, int32_t) #define FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS) -#define FSIOC_TRACKED_HANDLER_GET _IOW('A', 12, struct namespace_handler_info) -#define FSCTL_TRACKED_HANDLER_GET IOCBASECMD(FSIOC_TRACKED_HANDLER_GET) +/* 12 was used for TRACKED_HANDLER_GET which has now been removed + as it is no longer used. */ #define FSIOC_SNAPSHOT_HANDLER_GET_EXT _IOW('A', 13, struct namespace_handler_info_ext) #define FSCTL_SNAPSHOT_HANDLER_GET_EXT IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT) -#define FSIOC_NAMESPACE_HANDLER_GETDATA _IOW('A', 14, struct namespace_handler_data) -#define FSCTL_NAMESPACE_HANDLER_GETDATA IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GETDATA) - +/* 14 was used for NAMESPACE_HANDLER_GETDATA which has now been + removed as it is no longer used. */ // // IO commands 15, 16, and 17 are currently unused diff --git a/bsd/sys/fsgetpath.h b/bsd/sys/fsgetpath.h index 735553e0c..941f31c02 100644 --- a/bsd/sys/fsgetpath.h +++ b/bsd/sys/fsgetpath.h @@ -50,6 +50,24 @@ __BEGIN_DECLS #define fsgetpath(buf, bufsize, fsid, objid) \ (ssize_t)syscall(SYS_fsgetpath, buf, (size_t)bufsize, fsid, (uint64_t)objid) +/* + * openbyid_np: open a file given a file system id and a file system object id + * + * fsid : value corresponding to getattlist ATTR_CMN_FSID attribute, or + * value of stat's st.st_dev ; set fsid = {st.st_dev, 0} + * + * objid: value (link id/node id) corresponding to getattlist ATTR_CMN_OBJID + * attribute , or + * value of stat's st.st_ino (node id); set objid = st.st_ino + * + * For hfs the value of getattlist ATTR_CMN_FSID is a link id which uniquely identifies a + * parent in the case of hard linked files; this allows unique path access validation. + * Not all file systems support getattrlist ATTR_CMN_OBJID (link id). + * A node id does not uniquely identify a parent in the case of hard linked files and may + * resolve to a path for which access validation can fail. + */ +int openbyid_np(fsid_t* fsid, fsobj_id_t* objid, int flags); + __END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/fslog.h b/bsd/sys/fslog.h index 002455668..b0a1e94b4 100644 --- a/bsd/sys/fslog.h +++ b/bsd/sys/fslog.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -33,103 +33,15 @@ #include #include -#ifdef KERNEL -/* Log file system related error in key-value format identified by Apple - * system log (ASL) facility. The key-value pairs are string pointers - * (char *) and are provided as variable arguments list. A NULL value - * indicates end of the list. - * - * Keys can not contain '[', ']', space, and newline. Values can not - * contain '[', ']', and newline. If any key-value contains any of the - * reserved characters, the behavior is undefined. The caller of the - * function should escape any occurrences of '[' and ']' by prefixing - * it with '\'. - * - * The function takes a message ID which can be used to logically group - * different ASL messages. Messages in same logical group have same message - * ID and have information to describe order of the message --- first, - * middle, or last. - * - * The following message IDs have special meaning - - * FSLOG_MSG_FIRST - This message is the first message in its logical - * group. This generates a unique message ID, creates two key-value - * pairs with message ID and order of the message as "First". - * FSLOG_MSG_LAST - This is really a MASK which should be logically OR'ed - * with message ID to indicate the last message for a logical group. - * This also creates two key-value pairs with message ID and order of - * message as "Last". - * FSLOG_MSG_SINGLE - This signifies that the message is the only message - * in its logical group. Therefore no extra key-values are generated - * for this option. - * For all other values of message IDs, it regards them as intermediate - * message and generates two key-value pairs with message ID and order of - * message as "Middle". - * - * Returns - - * Message ID of the ASL message printed. The caller should use - * this value to print intermediate messages or end the logical message - * group. - * For FSLOG_MSG_SINGLE option, it returns FSLOG_MSG_SINGLE. - */ -unsigned long fslog_err(unsigned long msg_id, ... ); - -/* Reserved message IDs to determine message order */ -#define FSLOG_MSG_SINGLE ULONG_MAX -#define FSLOG_MSG_FIRST 0x0 -#define FSLOG_MSG_LAST (~(ULONG_MAX >> 1)) - -#ifdef BSD_KERNEL_PRIVATE - -/* Log information about runtime file system corruption detected */ -void fslog_fs_corrupt(struct mount *mnt); - -/* Log information about IO error detected */ -void fslog_io_error(const buf_t bp); - -#endif /* BSD_KERNEL_PRIVATE */ - #ifdef XNU_KERNEL_PRIVATE - /* Log information about external modification of a target process */ void fslog_extmod_msgtracer(proc_t caller, proc_t target); - #endif /* XNU_KERNEL_PRIVATE */ -#endif /* KERNEL */ - /* Keys used by FSLog */ -#define FSLOG_KEY_FACILITY "Facility" /* Facility generating messages */ -#define FSLOG_KEY_LEVEL "Level" /* Priority level */ -#define FSLOG_KEY_MSG_ID "FSLogMsgID" /* Message ID */ -#define FSLOG_KEY_MSG_ORDER "FSLogMsgOrder" /* Message Order */ -#define FSLOG_KEY_READ_UID "ReadUID" /* Allow read access to this UID only */ - -/* Values for message order (FSLOG_KEY_MSG_ORDER) */ -#define FSLOG_VAL_ORDER_FIRST "First" -#define FSLOG_VAL_ORDER_MIDDLE "Middle" -#define FSLOG_VAL_ORDER_LAST "Last" - -/* Keys for IO/FS logging using FSLog */ -#define FSLOG_KEY_ERR_TYPE "ErrType" /* Type of problem (IO, FS Corruption) */ -#define FSLOG_KEY_ERRNO "ErrNo" /* Error number (Integer) */ -#define FSLOG_KEY_IOTYPE "IOType" /* Type of IO (Read/Write) */ -#define FSLOG_KEY_PHYS_BLKNUM "PBlkNum" /* Physical block number */ -#define FSLOG_KEY_LOG_BLKNUM "LBlkNum" /* Logical block number */ -#define FSLOG_KEY_DEVNODE "DevNode" /* Device node (f_mntfromname) */ -#define FSLOG_KEY_PATH "Path" /* File system path */ -#define FSLOG_KEY_MNTPT "MountPt" /* Mount point */ +#define FSLOG_KEY_LEVEL "Level" /* Priority level */ /* Values used by FSLog */ -#define FSLOG_VAL_FACILITY "com.apple.system.fs" /* Facility generating messages */ -#define FSLOG_VAL_LEVEL LOG_ERR /* Priority level */ -#define FSLOG_VAL_READ_UID 0 /* Allow read access to root only */ - -/* Values for type of error (FSLOG_KEY_ERR_TYPE) */ -#define FSLOG_VAL_ERR_TYPE_IO "IO" /* IO error */ -#define FSLOG_VAL_ERR_TYPE_FS "FS" /* FS error */ - -/* Values for type of operation (FSLOG_KEY_IOTYPE) */ -#define FSLOG_VAL_IOTYPE_READ "Read" -#define FSLOG_VAL_IOTYPE_WRITE "Write" +#define FSLOG_VAL_FACILITY "com.apple.system.fs" /* Facility generating messages */ #endif /* !_FSLOG_H_ */ diff --git a/bsd/sys/guarded.h b/bsd/sys/guarded.h index 57c9661e7..dc55c7a59 100644 --- a/bsd/sys/guarded.h +++ b/bsd/sys/guarded.h @@ -32,6 +32,7 @@ #include #include +#include #ifdef PRIVATE __BEGIN_DECLS @@ -46,10 +47,16 @@ typedef __uint64_t guardid_t; #if !defined(KERNEL) extern int guarded_open_np(const char *path, const guardid_t *guard, u_int guardflags, int flags, ...); +extern int guarded_open_dprotected_np(const char *path, + const guardid_t *guard, u_int guardflags, int flags, + int dpclass, int dpflags, ...); extern int guarded_kqueue_np(const guardid_t *guard, u_int guardflags); extern int guarded_close_np(int fd, const guardid_t *guard); extern int change_fdguard_np(int fd, const guardid_t *guard, u_int guardflags, const guardid_t *nguard, u_int nguardflags, int *fdflagsp); +extern user_ssize_t guarded_write_np(int fd, const guardid_t *guard, user_addr_t cbuf, user_size_t nbyte); +extern user_ssize_t guarded_pwrite_np(int fd, const guardid_t *guard, user_addr_t buf, user_size_t nbyte, off_t offset); +extern user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iovec *iovp, u_int iovcnt); #endif /* KERNEL */ /* @@ -85,6 +92,11 @@ extern int change_fdguard_np(int fd, const guardid_t *guard, u_int guardflags, */ #define GUARD_FILEPORT (1u << 3) +/* + * Forbid writes on a guarded fd + */ +#define GUARD_WRITE (1u << 4) + /* * Violating a guard results in an error (EPERM), and potentially * an exception with one or more of the following bits set. @@ -95,7 +107,8 @@ enum guard_exception_codes { kGUARD_EXC_NOCLOEXEC = 1u << 2, /* clear close-on-exec */ kGUARD_EXC_SOCKET_IPC = 1u << 3, /* sendmsg of a guarded fd */ kGUARD_EXC_FILEPORT = 1u << 4, /* fileport_makeport .. */ - kGUARD_EXC_MISMATCH = 1u << 5 /* wrong guard for guarded fd */ + kGUARD_EXC_MISMATCH = 1u << 5, /* wrong guard for guarded fd */ + kGUARD_EXC_WRITE = 1u << 6 /* write on a guarded fd */ }; #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ diff --git a/bsd/sys/imgact.h b/bsd/sys/imgact.h index 83812f5b2..945da513d 100644 --- a/bsd/sys/imgact.h +++ b/bsd/sys/imgact.h @@ -113,6 +113,7 @@ struct image_params { struct label *ip_scriptlabelp; /* label of the script */ struct vnode *ip_scriptvp; /* script */ unsigned int ip_csflags; /* code signing flags */ + int ip_mac_return; /* return code from mac policy checks */ void *ip_px_sa; void *ip_px_sfa; void *ip_px_spa; @@ -130,5 +131,6 @@ struct image_params { #define IMGPF_SPAWN 0x00000010 /* spawn (without setexec) */ #define IMGPF_DISABLE_ASLR 0x00000020 /* disable ASLR */ #define IMGPF_ALLOW_DATA_EXEC 0x00000040 /* forcibly disallow data execution */ +#define IMGPF_VFORK_EXEC 0x00000080 /* vfork followed by exec */ #endif /* !_SYS_IMGACT */ diff --git a/bsd/sys/kasl.h b/bsd/sys/kasl.h index 080d18e38..c9eef57a9 100644 --- a/bsd/sys/kasl.h +++ b/bsd/sys/kasl.h @@ -34,6 +34,8 @@ #define KASL_KEY_FACILITY "Facility" /* Facility generating messages */ #define KASL_KEY_LEVEL "Level" /* Priority level */ +#endif /* BSD_KERNEL_PRIVATE */ + extern int kern_asl_msg_va(int level, const char *facility, int num_pairs, va_list vargs, ...); @@ -43,6 +45,4 @@ kern_asl_msg(int level, const char *facility, int num_pairs, ...); int escape_str(char *str, int len, int buflen); -#endif /* BSD_KERNEL_PRIVATE */ - #endif /* !_SYS_KASL_H_ */ diff --git a/bsd/sys/kauth.h b/bsd/sys/kauth.h index 9539319e8..8a533524e 100644 --- a/bsd/sys/kauth.h +++ b/bsd/sys/kauth.h @@ -296,7 +296,7 @@ extern kauth_cred_t kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int g struct uthread; extern void kauth_cred_uthread_update(struct uthread *, proc_t); #ifdef CONFIG_MACF -extern int kauth_proc_label_update_execve(struct proc *p, struct vfs_context *ctx, struct vnode *vp, struct vnode *scriptvp, struct label *scriptlabel, struct label *execlabel, void *psattr); +extern void kauth_proc_label_update_execve(struct proc *p, struct vfs_context *ctx, struct vnode *vp, off_t offset, struct vnode *scriptvp, struct label *scriptlabel, struct label *execlabel, unsigned int *csflags, void *psattr, int *disjoint, int *update_return); #endif extern int kauth_cred_getgroups(kauth_cred_t _cred, gid_t *_groups, int *_groupcount); extern int kauth_cred_assume(uid_t _uid); diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index 99441af70..80a04ea1f 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -152,16 +152,21 @@ extern void kernel_debug_enter( #define DBG_DRIVERS 6 #define DBG_TRACE 7 #define DBG_DLIL 8 -#define DBG_SECURITY 9 +#define DBG_WORKQUEUE 9 #define DBG_CORESTORAGE 10 #define DBG_CG 11 #define DBG_MISC 20 +#define DBG_SECURITY 30 #define DBG_DYLD 31 #define DBG_QT 32 #define DBG_APPS 33 #define DBG_LAUNCHD 34 #define DBG_PERF 37 #define DBG_IMPORTANCE 38 +#define DBG_BANK 40 +#define DBG_XPC 41 +#define DBG_ATM 42 + #define DBG_MIG 255 /* **** The Kernel Debug Sub Classes for Mach (DBG_MACH) **** */ @@ -185,9 +190,12 @@ extern void kernel_debug_enter( #define DBG_MACH_MSGID_INVALID 0x50 /* Messages - invalid */ #define DBG_MACH_LOCKS 0x60 /* new lock APIs */ #define DBG_MACH_PMAP 0x70 /* pmap */ +/* #define unused 0x80 */ #define DBG_MACH_MP 0x90 /* MP related */ #define DBG_MACH_VM_PRESSURE 0xA0 /* Memory Pressure Events */ #define DBG_MACH_STACKSHOT 0xA1 /* Stackshot/Microstackshot subsystem */ +#define DBG_MACH_SFI 0xA2 /* Selective Forced Idle (SFI) */ +#define DBG_MACH_ENERGY_PERF 0xA3 /* Energy/performance resource stats */ /* Codes for Scheduler (DBG_MACH_SCHED) */ #define MACH_SCHED 0x0 /* Scheduler */ @@ -220,10 +228,29 @@ extern void kernel_debug_enter( #define MACH_CPU_THROTTLE_DISABLE 0x1b /* Global CPU Throttle Disable */ #define MACH_RW_PROMOTE 0x1c /* promoted due to RW lock promotion */ #define MACH_RW_DEMOTE 0x1d /* promotion due to RW lock undone */ +#define MACH_SCHED_MAINTENANCE 0x1f /* periodic maintenance thread */ +#define MACH_DISPATCH 0x20 /* context switch completed */ +#define MACH_QUANTUM_HANDOFF 0x21 /* quantum handoff occurred */ +#define MACH_MULTIQ_DEQUEUE 0x22 /* Result of multiq dequeue */ +#define MACH_SCHED_THREAD_SWITCH 0x23 /* attempt direct context switch to hinted thread */ +#define MACH_SCHED_SMT_BALANCE 0x24 /* SMT load balancing ASTs */ + +/* Variants for MACH_MULTIQ_DEQUEUE */ +#define MACH_MULTIQ_BOUND 1 +#define MACH_MULTIQ_GROUP 2 +#define MACH_MULTIQ_GLOBAL 3 /* Codes for IPC (DBG_MACH_IPC) */ -#define MACH_TASK_SUSPEND 0x0 /* Suspended a task */ -#define MACH_TASK_RESUME 0x1 /* Resumed a task */ +#define MACH_TASK_SUSPEND 0x0 /* Suspended a task */ +#define MACH_TASK_RESUME 0x1 /* Resumed a task */ +#define MACH_THREAD_SET_VOUCHER 0x2 +#define MACH_IPC_MSG_SEND 0x3 /* mach msg send, uniq msg info */ +#define MACH_IPC_MSG_RECV 0x4 /* mach_msg receive */ +#define MACH_IPC_MSG_RECV_VOUCHER_REFUSED 0x5 /* mach_msg receive, voucher refused */ +#define MACH_IPC_KMSG_FREE 0x6 /* kernel free of kmsg data */ +#define MACH_IPC_VOUCHER_CREATE 0x7 /* Voucher added to global voucher hashtable */ +#define MACH_IPC_VOUCHER_CREATE_ATTR_DATA 0x8 /* Attr data for newly created voucher */ +#define MACH_IPC_VOUCHER_DESTROY 0x9 /* Voucher removed from global voucher hashtable */ /* Codes for pmap (DBG_MACH_PMAP) */ #define PMAP__CREATE 0x0 @@ -241,11 +268,24 @@ extern void kernel_debug_enter( #define PMAP__QUERY_RESIDENT 0xc #define PMAP__FLUSH_KERN_TLBS 0xd #define PMAP__FLUSH_DELAYED_TLBS 0xe +#define PMAP__FLUSH_TLBS_TO 0xf /* Codes for Stackshot/Microstackshot (DBG_MACH_STACKSHOT) */ #define MICROSTACKSHOT_RECORD 0x0 #define MICROSTACKSHOT_GATHER 0x1 +/* Codes for Selective Forced Idle (DBG_MACH_SFI) */ +#define SFI_SET_WINDOW 0x0 +#define SFI_CANCEL_WINDOW 0x1 +#define SFI_SET_CLASS_OFFTIME 0x2 +#define SFI_CANCEL_CLASS_OFFTIME 0x3 +#define SFI_THREAD_DEFER 0x4 +#define SFI_OFF_TIMER 0x5 +#define SFI_ON_TIMER 0x6 +#define SFI_WAIT_CANCELED 0x7 +#define SFI_PID_SET_MANAGED 0x8 +#define SFI_PID_CLEAR_MANAGED 0x9 + /* **** The Kernel Debug Sub Classes for Network (DBG_NETWORK) **** */ #define DBG_NETIP 1 /* Internet Protocol */ #define DBG_NETARP 2 /* Address Resolution Protocol */ @@ -270,7 +310,9 @@ extern void kernel_debug_enter( #define DBG_NETAFP 107 /* Apple Filing Protocol */ #define DBG_NETRTMP 108 /* Routing Table Maintenance Protocol */ #define DBG_NETAURP 109 /* Apple Update Routing Protocol */ + #define DBG_NETIPSEC 128 /* IPsec Protocol */ +#define DBG_NETVMNET 129 /* VMNet */ /* **** The Kernel Debug Sub Classes for IOKIT (DBG_IOKIT) **** */ #define DBG_IOINTC 0 /* Interrupt controller */ @@ -359,6 +401,7 @@ extern void kernel_debug_enter( #define DBG_MSDOS 0xF /* FAT-specific events; see the msdosfs project */ #define DBG_ACFS 0x10 /* Xsan-specific events; see the XsanFS project */ #define DBG_THROTTLE 0x11 /* I/O Throttling events */ +#define DBG_CONTENT_PROT 0xCF /* Content Protection Events: see bsd/sys/cprotect.h */ /* The Kernel Debug Sub Classes for BSD */ #define DBG_BSD_PROC 0x01 /* process/signals related */ @@ -382,15 +425,31 @@ extern void kernel_debug_enter( #define BSD_MEMSTAT_UPDATE 6 /* priority update */ #define BSD_MEMSTAT_IDLE_DEMOTE 7 /* idle demotion fired */ #define BSD_MEMSTAT_CLEAR_ERRORS 8 /* reset termination error state */ +#define BSD_MEMSTAT_DIRTY_TRACK 9 /* track the process state */ +#define BSD_MEMSTAT_DIRTY_SET 10 /* set the process state */ +#define BSD_MEMSTAT_DIRTY_CLEAR 11 /* clear the process state */ +#ifdef PRIVATE +#define BSD_MEMSTAT_GRP_SET_PROP 12 /* set group properties */ +#define BSD_MEMSTAT_DO_KILL 13 /* memorystatus kills */ +#endif /* PRIVATE */ /* The Kernel Debug Sub Classes for DBG_TRACE */ #define DBG_TRACE_DATA 0 #define DBG_TRACE_STRING 1 #define DBG_TRACE_INFO 2 +/* + * TRACE_DATA_NEWTHREAD 0x1 + * TRACE_DATA_EXEC 0x2 + */ +#define TRACE_DATA_THREAD_TERMINATE 0x3 /* thread has been queued for deallocation and can no longer run */ + /* The Kernel Debug Sub Classes for DBG_CORESTORAGE */ #define DBG_CS_IO 0 +/* The Kernel Debug Sub Classes for DBG_SECURITY */ +#define DBG_SEC_KERNEL 0 /* raw entropy collected by the kernel */ + /* Sub-class codes for CoreGraphics (DBG_CG) are defined in its component. */ /* The Kernel Debug Sub Classes for DBG_MISC */ @@ -432,6 +491,9 @@ extern void kernel_debug_enter( #define IMP_TASK_SUPPRESSION 0x17 /* Task changed suppression behaviors */ #define IMP_TASK_APPTYPE 0x18 /* Task launched with apptype */ #define IMP_UPDATE 0x19 /* Requested -> effective calculation */ +#define IMP_USYNCH_QOS_OVERRIDE 0x1A /* Userspace synchronization applied QoS override to resource owning thread */ +#define IMP_DONOR_CHANGE 0x1B /* The iit_donor bit changed */ +#define IMP_MAIN_THREAD_QOS 0x1C /* The task's main thread QoS was set */ /* DBG_IMPORTANCE subclasses 0x20 - 0x3F reserved for task policy flavors */ /* Codes for IMP_ASSERTION */ @@ -450,6 +512,38 @@ extern void kernel_debug_enter( /* Codes for IMP_UPDATE */ #define IMP_UPDATE_TASK_CREATE 0x1 +/* Codes for IMP_USYNCH_QOS_OVERRIDE */ +#define IMP_USYNCH_ADD_OVERRIDE 0x0 /* add override for a contended resource */ +#define IMP_USYNCH_REMOVE_OVERRIDE 0x1 /* remove override for a contended resource */ + +/* Codes for IMP_DONOR_CHANGE */ +#define IMP_DONOR_UPDATE_LIVE_DONOR_STATE 0x0 +#define IMP_DONOR_INIT_DONOR_STATE 0x1 + +/* Subclasses for MACH Bank Voucher Attribute Manager (DBG_BANK) */ +#define BANK_ACCOUNT_INFO 0x10 /* Trace points related to bank account struct */ +#define BANK_TASK_INFO 0x11 /* Trace points related to bank task struct */ + +/* Subclasses for MACH ATM Voucher Attribute Manager (ATM) */ +#define ATM_SUBAID_INFO 0x10 +#define ATM_GETVALUE_INFO 0x20 +#define ATM_UNREGISTER_INFO 0x30 + +/* Codes for BANK_ACCOUNT_INFO */ +#define BANK_SETTLE_CPU_TIME 0x1 /* Bank ledger(chit) rolled up to tasks. */ + +/* Codes for ATM_SUBAID_INFO */ +#define ATM_MIN_CALLED 0x1 +#define ATM_MIN_LINK_LIST 0x2 + +/* Codes for ATM_GETVALUE_INFO */ +#define ATM_VALUE_REPLACED 0x1 +#define ATM_VALUE_ADDED 0x2 + +/* Codes for ATM_UNREGISTER_INFO */ +#define ATM_VALUE_UNREGISTERED 0x1 +#define ATM_VALUE_DIFF_MAILBOX 0x2 + /**********************************************************************/ #define KDBG_CODE(Class, SubClass, code) (((Class & 0xff) << 24) | ((SubClass & 0xff) << 16) | ((code & 0x3fff) << 2)) @@ -484,6 +578,8 @@ extern void kernel_debug_enter( #define IMPORTANCE_CODE(SubClass, code) KDBG_CODE(DBG_IMPORTANCE, (SubClass), (code)) +#define BANK_CODE(SubClass, code) KDBG_CODE(DBG_BANK, (SubClass), (code)) +#define ATM_CODE(SubClass, code) KDBG_CODE(DBG_ATM, (SubClass), (code)) /* Usage: * kernel_debug((KDBG_CODE(DBG_NETWORK, DNET_PROTOCOL, 51) | DBG_FUNC_START), @@ -514,7 +610,7 @@ extern void kernel_debug_enter( extern unsigned int kdebug_enable; #define KDEBUG_ENABLE_TRACE 0x1 -#define KDEBUG_ENABLE_ENTROPY 0x2 +#define KDEBUG_ENABLE_ENTROPY 0x2 /* Obsolescent */ #define KDEBUG_ENABLE_CHUD 0x4 #define KDEBUG_ENABLE_PPT 0x8 @@ -553,6 +649,12 @@ do { \ kernel_debug1(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c, \ (uintptr_t)d,(uintptr_t)e); \ } while(0) + +#define KERNEL_DEBUG_EARLY(x,a,b,c,d) \ +do { \ + kernel_debug_early((uint32_t)x, (uintptr_t)a, (uintptr_t)b, \ + (uintptr_t)c, (uintptr_t)d); \ +} while(0) #else /* XNU_KERNEL_PRIVATE */ #define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e) \ do { \ @@ -567,18 +669,25 @@ do { \ kernel_debug1(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c, \ (uintptr_t)d,(uintptr_t)e); \ } while(0) + +#define KERNEL_DEBUG_EARLY(x,a,b,c,d) \ +do { \ + kernel_debug_early((uint32_t)x, (uintptr_t)a, (uintptr_t)b, \ + (uintptr_t)c, (uintptr_t)d); \ +} while(0) #endif /* XNU_KERNEL_PRIVATE */ #else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */ #define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e) do { } while(0) #define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e) do { } while(0) +#define KERNEL_DEBUG_EARLY(x,a,b,c,d) do { } while(0) #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */ /* * Specify KDEBUG_PPT to indicate that the event belongs to the * limited PPT set. */ -#define KDEBUG_COMMON (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_ENTROPY|KDEBUG_ENABLE_CHUD|KDEBUG_ENABLE_PPT) -#define KDEBUG_TRACE (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_ENTROPY|KDEBUG_ENABLE_CHUD) +#define KDEBUG_COMMON (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_CHUD|KDEBUG_ENABLE_PPT) +#define KDEBUG_TRACE (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_CHUD) #define KDEBUG_PPT (KDEBUG_ENABLE_PPT) /* @@ -625,6 +734,15 @@ extern void kernel_debug1( uintptr_t arg4, uintptr_t arg5); +extern void kernel_debug_early( + uint32_t debugid, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4); + +extern void kernel_debug_string( + const char *message); #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL) #ifdef XNU_KERNEL_PRIVATE @@ -679,6 +797,7 @@ extern void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *a extern void kdbg_dump_trace_to_file(const char *); void start_kern_tracing(unsigned int, boolean_t); +void start_kern_tracing_with_typefilter(unsigned int, boolean_t, unsigned int); struct task; extern void kdbg_get_task_name(char*, int, struct task *task); void disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags); diff --git a/bsd/sys/kern_control.h b/bsd/sys/kern_control.h index 7b51d55e8..6ac130dd7 100644 --- a/bsd/sys/kern_control.h +++ b/bsd/sys/kern_control.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004, 2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2004, 2012-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -135,14 +135,63 @@ struct ctl_info { @field sc_reserved Reserved, must be set to zero. */ struct sockaddr_ctl { - u_char sc_len; /* depends on size of bundle ID string */ - u_char sc_family; /* AF_SYSTEM */ + u_char sc_len; /* depends on size of bundle ID string */ + u_char sc_family; /* AF_SYSTEM */ u_int16_t ss_sysaddr; /* AF_SYS_KERNCONTROL */ u_int32_t sc_id; /* Controller unique identifier */ u_int32_t sc_unit; /* Developer private unit number */ u_int32_t sc_reserved[5]; }; +#ifdef PRIVATE + +struct xkctl_reg { + u_int32_t xkr_len; + u_int32_t xkr_kind; + u_int32_t xkr_id; + u_int32_t xkr_reg_unit; + u_int32_t xkr_flags; + u_int64_t xkr_kctlref; + u_int32_t xkr_recvbufsize; + u_int32_t xkr_sendbufsize; + u_int32_t xkr_lastunit; + u_int32_t xkr_pcbcount; + u_int64_t xkr_connect; + u_int64_t xkr_disconnect; + u_int64_t xkr_send; + u_int64_t xkr_send_list; + u_int64_t xkr_setopt; + u_int64_t xkr_getopt; + u_int64_t xkr_rcvd; + char xkr_name[MAX_KCTL_NAME]; +}; + +struct xkctlpcb { + u_int32_t xkp_len; + u_int32_t xkp_kind; + u_int64_t xkp_kctpcb; + u_int32_t xkp_unit; + u_int32_t xkp_kctlid; + u_int64_t xkp_kctlref; + char xkp_kctlname[MAX_KCTL_NAME]; +}; + +struct kctlstat { + u_int64_t kcs_reg_total __attribute__((aligned(8))); + u_int64_t kcs_reg_count __attribute__((aligned(8))); + u_int64_t kcs_pcbcount __attribute__((aligned(8))); + u_int64_t kcs_gencnt __attribute__((aligned(8))); + u_int64_t kcs_connections __attribute__((aligned(8))); + u_int64_t kcs_conn_fail __attribute__((aligned(8))); + u_int64_t kcs_send_fail __attribute__((aligned(8))); + u_int64_t kcs_send_list_fail __attribute__((aligned(8))); + u_int64_t kcs_enqueue_fail __attribute__((aligned(8))); + u_int64_t kcs_enqueue_fullsock __attribute__((aligned(8))); + +}; + +#endif /* PRIVATE */ + #ifdef KERNEL #include @@ -189,6 +238,13 @@ typedef void * kern_ctl_ref; the extended fields within the kern_ctl_reg structure. */ #define CTL_FLAG_REG_EXTENDED 0x8 + +/*! + @defined CTL_FLAG_REG_CRIT + @discussion This flag indicates that this kernel control utilizes the + the extended fields within the kern_ctl_reg structure. +*/ +#define CTL_FLAG_REG_CRIT 0x10 #endif /* KERNEL_PRIVATE */ /* Data flags for controllers */ @@ -201,6 +257,7 @@ typedef void * kern_ctl_ref; the client after all of the data has been enqueued. */ #define CTL_DATA_NOWAKEUP 0x1 + /*! @defined CTL_DATA_EOR @discussion The CTL_DATA_EOR flag can be used for the enqueue @@ -208,6 +265,16 @@ typedef void * kern_ctl_ref; */ #define CTL_DATA_EOR 0x2 +#ifdef KERNEL_PRIVATE +/*! + @defined CTL_DATA_CRIT + @discussion This flag indicates the data is critical to the client + and that it needs to be forced into the socket buffer + by resizing it if needed. +*/ +#define CTL_DATA_CRIT 0x4 +#endif /* KERNEL_PRIVATE */ + __BEGIN_DECLS /*! @@ -325,7 +392,25 @@ typedef errno_t (*ctl_getopt_func)(kern_ctl_ref kctlref, u_int32_t unit, void *u @param flags The recv flags. See the recv(2) man page. */ typedef void (*ctl_rcvd_func)(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, - int flags); + int flags); + +/*! + @typedef ctl_send_list_func + @discussion The ctl_send_list_func is used to receive data sent from + the client to the kernel control. + @param kctlref The control ref of the kernel control. + @param unit The unit number of the kernel control instance the client has + connected to. + @param unitinfo The user-defined private data initialized by the + ctl_connect_func callback. + @param m The data sent by the client to the kernel control in an + mbuf packet chain. Your function is responsible for releasing + mbuf packet chain. + @param flags The flags specified by the client when calling + send/sendto/sendmsg (MSG_OOB/MSG_DONTROUTE). + */ +typedef errno_t (*ctl_send_list_func)(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + mbuf_t m, int flags); #endif /* KERNEL_PRIVATE */ /*! @@ -382,6 +467,7 @@ struct kern_ctl_reg ctl_getopt_func ctl_getopt; #ifdef KERNEL_PRIVATE ctl_rcvd_func ctl_rcvd; /* Only valid if CTL_FLAG_REG_EXTENDED is set */ + ctl_send_list_func ctl_send_list; /* Only valid if CTL_FLAG_REG_EXTENDED is set */ #endif /* KERNEL_PRIVATE */ }; @@ -452,6 +538,30 @@ ctl_enqueuedata(kern_ctl_ref kctlref, u_int32_t unit, void *data, size_t len, u_ errno_t ctl_enqueuembuf(kern_ctl_ref kctlref, u_int32_t unit, mbuf_t m, u_int32_t flags); +#ifdef PRIVATE +/*! + @function ctl_enqueuembuf_list + @discussion Send data stored in an mbuf packet chain from the kernel + control to the client. The caller is responsible for freeing + the mbuf chain if ctl_enqueuembuf returns an error. + Not valid if ctl_flags contains CTL_FLAG_REG_SOCK_STREAM. + @param kctlref The control reference of the kernel control. + @param unit The unit number of the kernel control instance. + @param m An mbuf chain containing the data to send to the client. + @param flags Send flags. CTL_DATA_NOWAKEUP is + the only supported flags. + @param m_remain A pointer to the list of mbuf packets in the chain that + could not be enqueued. + @result 0 - Data was enqueued to be read by the client. + EINVAL - Invalid parameters. + ENOBUFS - The queue is full. + */ +errno_t +ctl_enqueuembuf_list(kern_ctl_ref kctlref, u_int32_t unit, mbuf_t m_list, + u_int32_t flags, mbuf_t *m_remain); + + +#endif /*! @function ctl_getenqueuespace @@ -466,7 +576,69 @@ ctl_enqueuembuf(kern_ctl_ref kctlref, u_int32_t unit, mbuf_t m, u_int32_t flags) errno_t ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space); +/*! + @function ctl_getenqueuereadable + @discussion Retrieve the difference between enqueued bytes and + low-water mark for the socket receive buffer. + @param kctlref The control reference of the kernel control. + @param unit The unit number of the kernel control instance. + @param u_int32_t The address at which to return the current difference + between the low-water mark for the socket and the number of bytes + enqueued. 0 indicates that the socket is readable by the client + (the number of bytes in the buffer is above the low-water mark). + @result 0 - Success; the difference is returned to caller. + EINVAL - Invalid parameters. + */ +errno_t +ctl_getenqueuereadable(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *difference); + #ifdef KERNEL_PRIVATE + +#include +#include + +/* + * internal structure maintained for each register controller + */ +struct ctl_cb; +struct socket; + +struct kctl { + TAILQ_ENTRY(kctl) next; /* controller chain */ + + /* controller information provided when registering */ + char name[MAX_KCTL_NAME]; /* unique nke identifier, provided by DTS */ + u_int32_t id; + u_int32_t reg_unit; + + /* misc communication information */ + u_int32_t flags; /* support flags */ + u_int32_t recvbufsize; /* request more than the default buffer size */ + u_int32_t sendbufsize; /* request more than the default buffer size */ + + /* Dispatch functions */ + ctl_connect_func connect; /* Make contact */ + ctl_disconnect_func disconnect; /* Break contact */ + ctl_send_func send; /* Send data to nke */ + ctl_send_list_func send_list; /* Send list of packets */ + ctl_setopt_func setopt; /* set kctl configuration */ + ctl_getopt_func getopt; /* get kctl configuration */ + ctl_rcvd_func rcvd; /* Notify nke when client reads data */ + + TAILQ_HEAD(, ctl_cb) kcb_head; + u_int32_t lastunit; +}; + +struct ctl_cb { + TAILQ_ENTRY(ctl_cb) next; /* controller chain */ + lck_mtx_t *mtx; + struct socket *so; /* controlling socket */ + struct kctl *kctl; /* back pointer to controller */ + void *userdata; + u_int32_t unit; + u_int32_t usecount; +}; + u_int32_t ctl_id_by_name(const char *name); errno_t ctl_name_by_id(u_int32_t id, char *out_name, size_t maxsize); #endif /* KERNEL_PRIVATE */ diff --git a/bsd/sys/kern_event.h b/bsd/sys/kern_event.h index 03b30f16f..bd65869e5 100644 --- a/bsd/sys/kern_event.h +++ b/bsd/sys/kern_event.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -218,6 +218,28 @@ struct kev_vendor_code { */ #define SIOCGKEVVENDOR _IOWR('e', 4, struct kev_vendor_code) +#ifdef PRIVATE +struct xkevtpcb { + u_int32_t kep_len; + u_int32_t kep_kind; + u_int64_t kep_evtpcb; + u_int32_t kep_vendor_code_filter; + u_int32_t kep_class_filter; + u_int32_t kep_subclass_filter; +}; + +struct kevtstat { + u_int64_t kes_pcbcount __attribute__((aligned(8))); + u_int64_t kes_gencnt __attribute__((aligned(8))); + u_int64_t kes_badvendor __attribute__((aligned(8))); + u_int64_t kes_toobig __attribute__((aligned(8))); + u_int64_t kes_nomem __attribute__((aligned(8))); + u_int64_t kes_fullsock __attribute__((aligned(8))); + u_int64_t kes_posted __attribute__((aligned(8))); + +}; +#endif /* PRIVATE */ + #ifdef KERNEL /*! @define N_KEV_VECTORS diff --git a/bsd/sys/kern_memorystatus.h b/bsd/sys/kern_memorystatus.h index bacc7588d..4e8d01c3e 100644 --- a/bsd/sys/kern_memorystatus.h +++ b/bsd/sys/kern_memorystatus.h @@ -36,6 +36,8 @@ #define JETSAM_PRIORITY_REVISION 2 +#define JETSAM_PRIORITY_IDLE_HEAD -2 +/* The value -1 is an alias to JETSAM_PRIORITY_DEFAULT */ #define JETSAM_PRIORITY_IDLE 0 #define JETSAM_PRIORITY_IDLE_DEFERRED 1 #define JETSAM_PRIORITY_BACKGROUND_OPPORTUNISTIC 2 @@ -120,6 +122,9 @@ typedef struct jetsam_snapshot_entry { uint64_t user_data; uint8_t uuid[16]; uint32_t fds; + uint32_t max_pages_lifetime; + uint32_t purgeable_pages; + struct timeval cpu_time; } memorystatus_jetsam_snapshot_entry_t; typedef struct jetsam_snapshot { @@ -154,6 +159,7 @@ enum { kMemorystatusKilledVnodes, kMemorystatusKilledVMPageShortage, kMemorystatusKilledVMThrashing, + kMemorystatusKilledFCThrashing, kMemorystatusKilledPerProcessLimit, kMemorystatusKilledDiagnostic, kMemorystatusKilledIdleExit @@ -178,7 +184,11 @@ int memorystatus_control(uint32_t command, int32_t pid, uint32_t flags, void *bu #define MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES 2 #define MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT 3 #define MEMORYSTATUS_CMD_GET_PRESSURE_STATUS 4 -#define MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK 5 /* TODO: deprecate */ +#define MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK 5 +#define MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT 6 + +/* Group Commands */ +#define MEMORYSTATUS_CMD_GRP_SET_PROPERTIES 7 #if PRIVATE /* Test commands */ @@ -215,18 +225,20 @@ typedef struct memorystatus_priority_properties { #define P_MEMSTAT_FOREGROUND 0x00000100 #define P_MEMSTAT_DIAG_SUSPENDED 0x00000200 #define P_MEMSTAT_PRIOR_THAW 0x00000400 -#define P_MEMSTAT_MEMLIMIT_BACKGROUND 0x00000800 +#define P_MEMSTAT_MEMLIMIT_BACKGROUND 0x00000800 /* Task has a memory limit for when it's in the background. Used for a process' "high water mark".*/ #define P_MEMSTAT_INTERNAL 0x00001000 +#define P_MEMSTAT_FATAL_MEMLIMIT 0x00002000 /* cross this limit and the process is killed. Types: system-wide default task memory limit and per-task custom memory limit. */ extern void memorystatus_init(void) __attribute__((section("__TEXT, initcode"))); extern int memorystatus_add(proc_t p, boolean_t locked); -extern int memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background); +extern int memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background, boolean_t is_fatal_limit); extern int memorystatus_remove(proc_t p, boolean_t locked); extern int memorystatus_dirty_track(proc_t p, uint32_t pcontrol); extern int memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol); extern int memorystatus_dirty_get(proc_t p); +extern int memorystatus_dirty_clear(proc_t p, uint32_t pcontrol); extern int memorystatus_on_terminate(proc_t p); @@ -258,14 +270,14 @@ extern unsigned int memorystatus_jetsam_running; boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async); boolean_t memorystatus_kill_on_VM_thrashing(boolean_t async); +boolean_t memorystatus_kill_on_FC_thrashing(boolean_t async); boolean_t memorystatus_kill_on_vnode_limit(void); void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb); +void jetsam_on_ledger_cpulimit_exceeded(void); void memorystatus_pages_update(unsigned int pages_avail); -extern boolean_t memorystatus_is_foreground_locked(proc_t p); - #else /* CONFIG_JETSAM */ boolean_t memorystatus_idle_exit_from_VM(void); @@ -301,14 +313,13 @@ extern void memorystatus_freeze_init(void) __attribute__((section("__TEXT, initc #if VM_PRESSURE_EVENTS -#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4 - extern kern_return_t memorystatus_update_vm_pressure(boolean_t); -#if CONFIG_JETSAM +#if CONFIG_MEMORYSTATUS extern int memorystatus_send_pressure_note(int pid); +extern boolean_t memorystatus_is_foreground_locked(proc_t p); extern boolean_t memorystatus_bg_pressure_eligible(proc_t p); -#endif +#endif /* CONFIG_MEMORYSTATUS */ #endif /* VM_PRESSURE_EVENTS */ diff --git a/bsd/sys/kern_overrides.h b/bsd/sys/kern_overrides.h index 377d8487a..b1b486528 100644 --- a/bsd/sys/kern_overrides.h +++ b/bsd/sys/kern_overrides.h @@ -37,10 +37,12 @@ __BEGIN_DECLS /* System Overrides Flags */ +#define SYS_OVERRIDE_DISABLE 0x0 #define SYS_OVERRIDE_IO_THROTTLE 0x1 #define SYS_OVERRIDE_CPU_THROTTLE 0x2 -#define SYS_OVERRIDE_FLAGS_MASK (SYS_OVERRIDE_IO_THROTTLE | SYS_OVERRIDE_CPU_THROTTLE) + +#define SYS_OVERRIDE_FLAGS_MASK (SYS_OVERRIDE_DISABLE | SYS_OVERRIDE_IO_THROTTLE | SYS_OVERRIDE_CPU_THROTTLE) #ifdef BSD_KERNEL_PRIVATE void init_system_override(void); diff --git a/bsd/sys/kern_tests.h b/bsd/sys/kern_tests.h index 5ed6cc727..df71d9e86 100644 --- a/bsd/sys/kern_tests.h +++ b/bsd/sys/kern_tests.h @@ -1,4 +1,5 @@ #ifndef _KERN_TESTS_H #define _KERN_TESTS_H + #endif /* !defined(_KERN_TESTS_H) */ diff --git a/bsd/sys/kpi_mbuf.h b/bsd/sys/kpi_mbuf.h index a17245d51..e4ac6702c 100644 --- a/bsd/sys/kpi_mbuf.h +++ b/bsd/sys/kpi_mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2013 Apple Inc. All rights reserved. + * Copyright (c) 2008-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -209,10 +209,7 @@ typedef u_int32_t mbuf_csum_performed_flags_t; /*! @enum mbuf_how_t @abstract Method of allocating an mbuf. - @discussion Blocking will cause the funnel to be dropped. If the - funnel is dropped, other threads may make changes to networking - data structures. This can lead to very bad things happening. - Blocking on the input our output path can also impact + @discussion Blocking on the input or output path can impact performance. There are some cases where making a blocking call is acceptable. When in doubt, use MBUF_DONTWAIT. @constant MBUF_WAITOK Allow a call to allocate an mbuf to block. @@ -954,6 +951,19 @@ extern size_t mbuf_pkthdr_len(const mbuf_t mbuf); */ extern void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len); +#ifdef XNU_KERNEL_PRIVATE +/*! + @function mbuf_pkthdr_maxlen + @discussion Retrieves the maximum length of data that may be stored + in this mbuf packet. This value assumes that the data pointer + was set to the start of the possible range for that pointer + for each mbuf in the packet chain + @param mbuf The mbuf. + @result The maximum lenght of data for this mbuf. + */ +extern size_t mbuf_pkthdr_maxlen(const mbuf_t mbuf); +#endif /* XNU_KERNEL_PRIVATE */ + /*! @function mbuf_pkthdr_adjustlen @discussion Adjusts the length of the packet in the packet header. @@ -1620,6 +1630,27 @@ extern errno_t mbuf_get_driver_scratch(mbuf_t m, u_int8_t **area, size_t *area_ln); #endif /* KERNEL_PRIVATE */ +#ifdef XNU_KERNEL_PRIVATE +/*! + @function mbuf_pkt_list_len + @discussion Retrieves the length of the list of mbuf packets. + @param mbuf The mbuf. + @result The length of the mbuf packet list. + */ +extern size_t mbuf_pkt_list_len(const mbuf_t mbuf); + +/*! + @function mbuf_pkt_list_maxlen + @discussion Retrieves the maximum length of data that may be stored + in the list of mbuf packet. This value assumes that the data pointer + was set to the start of the possible range for that pointer + for each mbuf in the packet chain + @param mbuf The mbuf. + @result The maximum length of data for this mbuf. + */ +extern size_t mbuf_pkt_list_maxlen(const mbuf_t mbuf); +#endif /* XNU_KERNEL_PRIVATE */ + /* IF_QUEUE interaction */ #define IF_ENQUEUE_MBUF(ifq, m) { \ diff --git a/bsd/sys/kpi_socket.h b/bsd/sys/kpi_socket.h index 658522422..8a8f186b3 100644 --- a/bsd/sys/kpi_socket.h +++ b/bsd/sys/kpi_socket.h @@ -551,6 +551,14 @@ extern errno_t sock_setupcalls(socket_t sock, sock_upcall read_callback, */ extern errno_t sock_catchevents(socket_t sock, sock_evupcall event_callback, void *event_context, u_int32_t event_mask); +/* + @function sock_iskernel + @discussion Returns true if the socket was created by the kernel or + is owned by the kernel. + @param sock The socket. + @result True if the kernel owns the socket. +*/ +extern int sock_iskernel(socket_t); #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/sys/loadable_fs.h b/bsd/sys/loadable_fs.h index 4006f46a4..123e742fb 100644 --- a/bsd/sys/loadable_fs.h +++ b/bsd/sys/loadable_fs.h @@ -45,7 +45,7 @@ /* - * Constants for Loadabls FS Utilities (in "/System/Library/Filesystems") + * Constants for Loadable FS Utilities (in "/System/Library/Filesystems") * * Example of a /usr/filesystems directory * diff --git a/bsd/sys/malloc.h b/bsd/sys/malloc.h index 8aea6f647..ef423f531 100644 --- a/bsd/sys/malloc.h +++ b/bsd/sys/malloc.h @@ -179,7 +179,7 @@ #define M_IP6NDP 86 /* IPv6 Neighbour Discovery*/ #define M_IP6OPT 87 /* IPv6 options management */ #define M_IP6MISC 88 /* IPv6 misc. memory */ -#define M_TSEGQ 89 /* TCP segment queue entry, unused */ +/* unused 89 */ #define M_IGMP 90 #define M_JNL_JNL 91 /* Journaling: "struct journal" */ #define M_JNL_TR 92 /* Journaling: "struct transaction" */ @@ -210,8 +210,14 @@ #define M_FLOW_DIVERT_PCB 115 /* flow divert control block */ #define M_FLOW_DIVERT_GROUP 116 /* flow divert socket group */ #define M_IP6CGA 117 +#define M_NECP 118 /* General NECP policy data */ +#define M_NECP_SESSION_POLICY 119 /* NECP session policies */ +#define M_NECP_SOCKET_POLICY 120 /* NECP socket-level policies */ +#define M_NECP_IP_POLICY 121 /* NECP IP-level policies */ +#define M_FD_VN_DATA 122 /* Per fd vnode data */ +#define M_FD_DIRBUF 123 /* Directory entries' buffer */ -#define M_LAST 118 /* Must be last type + 1 */ +#define M_LAST 124 /* Must be last type + 1 */ #else /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/sys/mbuf.h b/bsd/sys/mbuf.h index 8a849338b..5a7913ea8 100644 --- a/bsd/sys/mbuf.h +++ b/bsd/sys/mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2013 Apple Inc. All rights reserved. + * Copyright (c) 1999-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -255,14 +255,6 @@ struct tcp_mtag { }; }; -/* - * IPSec mbuf tag - */ -struct ipsec_mtag { - uint32_t policy_id; -#define ipsec_policy proto_mtag.__pr_u.ipsec.policy_id -}; - /* * Protocol specific mbuf tag (at most one protocol metadata per mbuf). * @@ -274,10 +266,17 @@ struct ipsec_mtag { struct proto_mtag { union { struct tcp_mtag tcp; /* TCP specific */ - struct ipsec_mtag ipsec; /* IPSec specific */ } __pr_u; }; +/* + * NECP specific mbuf tag. + */ +struct necp_mtag { + uint32_t necp_policy_id; + uint32_t necp_last_interface_index; +}; + /* * Record/packet header in first mbuf of chain; valid only if M_PKTHDR set. */ @@ -355,12 +354,14 @@ struct pkthdr { #if MEASURE_BW u_int64_t pkt_bwseq; /* sequence # */ #endif /* MEASURE_BW */ + u_int64_t pkt_enqueue_ts; /* enqueue time */ /* * Tags (external and built-in) */ SLIST_HEAD(packet_tags, m_tag) tags; /* list of external tags */ struct proto_mtag proto_mtag; /* built-in protocol-specific tag */ struct pf_mtag pf_mtag; /* built-in PF tag */ + struct necp_mtag necp_mtag; /* built-in NECP tag */ /* * Module private scratch space (32-bit aligned), currently 16-bytes * large. Anything stored here is not guaranteed to survive across @@ -433,6 +434,8 @@ struct pkthdr { #define PKTF_IFAINFO 0x4000 /* pkt has valid interface addr info */ #define PKTF_SO_BACKGROUND 0x8000 /* data is from background source */ #define PKTF_FORWARDED 0x10000 /* pkt was forwarded from another i/f */ +#define PKTF_PRIV_GUARDED 0x20000 /* pkt_mpriv area guard enabled */ +#define PKTF_KEEPALIVE 0x40000 /* pkt is kernel-generated keepalive */ /* flags related to flow control/advisory and identification */ #define PKTF_FLOW_MASK \ (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK) @@ -946,6 +949,7 @@ struct omb_class_stat { u_int64_t mbcl_purge_cnt; /* # of purges so far */ u_int64_t mbcl_fail_cnt; /* # of allocation failures */ u_int32_t mbcl_ctotal; /* total only for this class */ + u_int32_t mbcl_release_cnt; /* amount of memory returned */ /* * Cache layer statistics */ @@ -974,6 +978,7 @@ typedef struct mb_class_stat { u_int64_t mbcl_purge_cnt; /* # of purges so far */ u_int64_t mbcl_fail_cnt; /* # of allocation failures */ u_int32_t mbcl_ctotal; /* total only for this class */ + u_int32_t mbcl_release_cnt; /* amount of memory returned */ /* * Cache layer statistics */ @@ -982,7 +987,8 @@ typedef struct mb_class_stat { u_int32_t mbcl_mc_waiter_cnt; /* # waiters on the cache */ u_int32_t mbcl_mc_wretry_cnt; /* # of wait retries */ u_int32_t mbcl_mc_nwretry_cnt; /* # of no-wait retry attempts */ - u_int64_t mbcl_reserved[4]; /* for future use */ + u_int32_t mbcl_peak_reported; /* last usage peak reported */ + u_int32_t mbcl_reserved[7]; /* for future use */ } mb_class_stat_t; #define MCS_DISABLED 0 /* cache is permanently disabled */ @@ -1082,6 +1088,8 @@ struct mbuf; #define M_COPYM_NOOP_HDR 0 /* don't copy/move pkthdr contents */ #define M_COPYM_COPY_HDR 1 /* copy pkthdr from old to new */ #define M_COPYM_MOVE_HDR 2 /* move pkthdr from old to new */ +#define M_COPYM_MUST_COPY_HDR 3 /* MUST copy pkthdr from old to new */ +#define M_COPYM_MUST_MOVE_HDR 4 /* MUST move pkthdr from old to new */ /* * These macros are mapped to the appropriate KPIs, so that private code @@ -1289,6 +1297,8 @@ __private_extern__ struct mbuf *m_getpackets_internal(unsigned int *, int, __private_extern__ struct mbuf *m_allocpacket_internal(unsigned int *, size_t, unsigned int *, int, int, size_t); +__private_extern__ void m_drain(void); + /* * Packets may have annotations attached by affixing a list of "packet * tags" to the pkthdr structure. Packet tags are dynamically allocated @@ -1329,7 +1339,7 @@ enum { KERNEL_TAG_TYPE_ENCAP = 8, KERNEL_TAG_TYPE_INET6 = 9, KERNEL_TAG_TYPE_IPSEC = 10, - KERNEL_TAG_TYPE_DRVAUX = 11 + KERNEL_TAG_TYPE_DRVAUX = 11, }; /* Packet tag routines */ diff --git a/bsd/sys/mcache.h b/bsd/sys/mcache.h index 34c76988f..9bd70a21e 100644 --- a/bsd/sys/mcache.h +++ b/bsd/sys/mcache.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2013 Apple Inc. All rights reserved. + * Copyright (c) 2006-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -324,22 +324,23 @@ typedef struct mcache { #define MCACHE_STACK_DEPTH 16 +#define MCA_TRN_MAX 2 /* Number of transactions to record */ + typedef struct mcache_audit { struct mcache_audit *mca_next; /* next audit struct */ void *mca_addr; /* address of buffer */ mcache_t *mca_cache; /* parent cache of the buffer */ - struct thread *mca_thread; /* thread doing transaction */ - struct thread *mca_pthread; /* previous transaction thread */ size_t mca_contents_size; /* size of saved contents */ void *mca_contents; /* user-specific saved contents */ - uint32_t mca_tstamp; /* transaction timestamp (ms) */ - uint32_t mca_ptstamp; /* prev transaction timestamp (ms) */ - uint16_t mca_depth; /* pc stack depth */ - uint16_t mca_pdepth; /* previous transaction pc stack */ - void *mca_stack[MCACHE_STACK_DEPTH]; - void *mca_pstack[MCACHE_STACK_DEPTH]; void *mca_uptr; /* user-specific pointer */ uint32_t mca_uflags; /* user-specific flags */ + uint32_t mca_next_trn; + struct mca_trn { + struct thread *mca_thread; /* thread doing transaction */ + uint32_t mca_tstamp; + uint16_t mca_depth; + void *mca_stack[MCACHE_STACK_DEPTH]; + } mca_trns[MCA_TRN_MAX]; } mcache_audit_t; __private_extern__ int assfail(const char *, const char *, int); @@ -358,7 +359,7 @@ __private_extern__ unsigned int mcache_alloc_ext(mcache_t *, mcache_obj_t **, unsigned int, int); __private_extern__ void mcache_free_ext(mcache_t *, mcache_obj_t *); __private_extern__ void mcache_reap(void); -__private_extern__ boolean_t mcache_purge_cache(mcache_t *); +__private_extern__ boolean_t mcache_purge_cache(mcache_t *, boolean_t); __private_extern__ void mcache_waiter_inc(mcache_t *); __private_extern__ void mcache_waiter_dec(mcache_t *); __private_extern__ boolean_t mcache_bkt_isempty(mcache_t *); @@ -377,6 +378,9 @@ __private_extern__ char *mcache_dump_mca(mcache_audit_t *); __private_extern__ void mcache_audit_panic(mcache_audit_t *, void *, size_t, int64_t, int64_t); +extern int32_t total_sbmb_cnt; +extern int32_t total_sbmb_cnt_peak; +extern int64_t sbmb_limreached; extern mcache_t *mcache_audit_cache; #ifdef __cplusplus diff --git a/bsd/sys/mman.h b/bsd/sys/mman.h index d7469fbf3..acdbeb59f 100644 --- a/bsd/sys/mman.h +++ b/bsd/sys/mman.h @@ -215,6 +215,11 @@ int madvise(void *, size_t, int); int mincore(const void *, size_t, char *); int minherit(void *, size_t, int); #endif + +#ifdef PRIVATE +int mremap_encrypted(void *, size_t, __uint32_t, __uint32_t, __uint32_t); +#endif + __END_DECLS #else /* KERNEL */ diff --git a/bsd/sys/mount.h b/bsd/sys/mount.h index d2d05608a..2f6437348 100644 --- a/bsd/sys/mount.h +++ b/bsd/sys/mount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -85,7 +85,7 @@ #include #endif -typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ +#include /* file system id type */ /* * file system statistics @@ -366,7 +366,6 @@ struct vfs_attr { #define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */ #define VFS_CONF 2 /* struct: vfsconf for filesystem given as next argument */ -#define VFS_SET_PACKAGE_EXTS 3 /* set package extension list */ /* * Flags for various system call interfaces. @@ -445,6 +444,7 @@ union union_vfsidctl { /* the fields vc_vers and vc_fsid are compatible */ #define VFS_CTL_SADDR 0x00010007 /* get server address */ #define VFS_CTL_DISC 0x00010008 /* server disconnected */ #define VFS_CTL_SERVERINFO 0x00010009 /* information about fs server */ +#define VFS_CTL_NSTATUS 0x0001000A /* netfs mount status */ struct vfsquery { u_int32_t vq_flags; @@ -456,6 +456,17 @@ struct vfs_server { u_int8_t vs_server_name[MAXHOSTNAMELEN*3]; /* UTF8 server name to display (null terminated) */ }; +/* + * NetFS mount status - returned by VFS_CTL_NSTATUS + */ +struct netfs_status { + u_int32_t ns_status; // Current status of mount (vfsquery flags) + char ns_mountopts[512]; // Significant mount options + uint32_t ns_waittime; // Time waiting for reply (sec) + uint32_t ns_threadcount; // Number of threads blocked on network calls + uint64_t ns_threadids[0]; // Thread IDs of those blocked threads +}; + /* vfsquery flags */ #define VQ_NOTRESP 0x0001 /* server down */ #define VQ_NEEDAUTH 0x0002 /* server bad auth */ @@ -510,6 +521,9 @@ struct vfsioattr { #define VFS_TBLVNOP_PAGEINV2 0x2000 #define VFS_TBLVNOP_PAGEOUTV2 0x4000 #define VFS_TBLVNOP_NOUPDATEID_RENAME 0x8000 /* vfs should not call vnode_update_ident on rename */ +#if CONFIG_SECLUDED_RENAME +#define VFS_TBLVNOP_SECLUDE_RENAME 0x10000 +#endif struct vfs_fsentry { @@ -697,6 +711,7 @@ struct vfsops { */ #ifdef PRIVATE #define VFS_ITERATE_TAIL_FIRST (1 << 0) +#define VFS_ITERATE_CB_DROPREF (1 << 1) // Callback will drop the iterref #endif /* PRIVATE */ /* @@ -1187,7 +1202,6 @@ vnode_t vfs_vnodecovered(mount_t mp); /* Returns vnode with an iocount that must vnode_t vfs_devvp(mount_t mp); /* Please see block comment with implementation */ int vfs_nativexattrs (mount_t mp); /* whether or not the FS supports EAs natively */ void * vfs_mntlabel(mount_t mp); /* Safe to cast to "struct label*"; returns "void*" to limit dependence of mount.h on security headers. */ -void vfs_setunmountpreflight(mount_t mp); void vfs_setcompoundopen(mount_t mp); uint64_t vfs_throttle_mask(mount_t mp); diff --git a/bsd/sys/mount_internal.h b/bsd/sys/mount_internal.h index f246bc077..965d0a630 100644 --- a/bsd/sys/mount_internal.h +++ b/bsd/sys/mount_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,6 +84,7 @@ #include /* XXX for AF_MAX */ #include #include +#include struct label; @@ -203,6 +204,8 @@ struct mount { */ #define MNT_IOFLAGS_FUA_SUPPORTED 0x00000001 #define MNT_IOFLAGS_UNMAP_SUPPORTED 0x00000002 +#define MNT_IOFLAGS_IOSCHED_SUPPORTED 0x00000004 +#define MNT_IOFLAGS_CSUNMAP_SUPPORTED 0x00000008 /* * ioqueue depth for devices that don't report one @@ -228,6 +231,7 @@ extern struct mount * dead_mountp; * because the bits here were broken out from the high bits * of the mount flags. */ +#define MNTK_SWAP_MOUNT 0x00000100 /* we are swapping to this mount */ #define MNTK_DENY_READDIREXT 0x00000200 /* Deny Extended-style readdir's for this volume */ #define MNTK_PERMIT_UNMOUNT 0x00000400 /* Allow (non-forced) unmounts by UIDs other than the one that mounted the volume */ #ifdef NFSCLIENT @@ -253,7 +257,6 @@ extern struct mount * dead_mountp; #if REV_ENDIAN_FS #define MNT_REVEND 0x08000000 /* Reverse endian FS */ #endif /* REV_ENDIAN_FS */ -#define MNTK_FRCUNMOUNT 0x10000000 /* Forced unmount wanted. */ #define MNTK_AUTH_OPAQUE 0x20000000 /* authorisation decisions are not made locally */ #define MNTK_AUTH_OPAQUE_ACCESS 0x40000000 /* VNOP_ACCESS is reliable for remote auth */ #define MNTK_EXTENDED_SECURITY 0x80000000 /* extended security supported */ @@ -267,6 +270,7 @@ extern struct mount * dead_mountp; #define MNT_LWAIT 0x00000040 /* wait for unmount op */ #define MNT_LITERWAIT 0x00000080 /* mount in iteration */ #define MNT_LDEAD 0x00000100 /* mount already unmounted*/ +#define MNT_LNOSUB 0x00000200 /* submount - no recursion */ /* @@ -302,6 +306,7 @@ struct vfstable { int vfc_vfsflags; /* for optional types */ void * vfc_descptr; /* desc table allocated address */ int vfc_descsize; /* size allocated for desc table */ + struct sysctl_oid *vfc_sysctl; /* dynamically registered sysctl node */ }; /* vfc_vfsflags: */ @@ -316,12 +321,16 @@ struct vfstable { #define VFC_VFSVNOP_PAGEINV2 0x2000 #define VFC_VFSVNOP_PAGEOUTV2 0x4000 #define VFC_VFSVNOP_NOUPDATEID_RENAME 0x8000 +#if CONFIG_SECLUDED_RENAME +#define VFC_VFSVNOP_SECLUDE_RENAME 0x10000 +#endif -extern int maxvfsconf; /* highest defined filesystem type */ +extern int maxvfstypenum; /* highest defined filesystem type */ extern struct vfstable *vfsconf; /* head of list of filesystem types */ -extern int maxvfsslots; /* Maximum slots available to be used */ -extern int numused_vfsslots; /* number of slots already used */ +extern const int maxvfsslots; /* Maximum statically allocated slots available to be used */ +extern int numused_vfsslots; /* number of statically allocated slots already used */ +extern int numregistered_fses; /* number of total registered filesystems */ /* the following two are xnu private */ struct vfstable * vfstable_add(struct vfstable *); @@ -426,6 +435,7 @@ int vfs_mountroot(void); void vfs_unmountall(void); int safedounmount(struct mount *, int, vfs_context_t); int dounmount(struct mount *, int, int, vfs_context_t); +void dounmount_submounts(struct mount *, int, vfs_context_t); /* xnu internal api */ void mount_dropcrossref(mount_t, vnode_t, int); @@ -447,7 +457,7 @@ void mount_set_noreaddirext (mount_t); /* Private NFS spi */ #define KERNEL_MOUNT_NOAUTH 0x01 /* Don't check the UID of the directory we are mounting on */ #define KERNEL_MOUNT_PERMIT_UNMOUNT 0x02 /* Allow (non-forced) unmounts by users other the one who mounted the volume */ -#if NFSCLIENT +#if NFSCLIENT || DEVFS /* * NOTE: kernel_mount() does not force MNT_NOSUID, MNT_NOEXEC, or MNT_NODEC for non-privileged * mounting credentials, as the mount(2) system call does. @@ -468,11 +478,18 @@ int throttle_get_passive_io_policy(struct uthread **ut); int throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp); void *throttle_info_update_by_mount(mount_t mp); void rethrottle_thread(uthread_t ut); +void throttle_info_reset_window(uthread_t ut); + /* throttled I/O helper function */ /* convert the lowest bit to a device index */ extern int num_trailing_0(uint64_t n); +/* sync lock */ +extern lck_mtx_t * sync_mtx_lck; + +extern int sync_timeout; + __END_DECLS #endif /* !_SYS_MOUNT_INTERNAL_H_ */ diff --git a/bsd/sys/munge.h b/bsd/sys/munge.h index 170d1a51e..89b71a2b7 100644 --- a/bsd/sys/munge.h +++ b/bsd/sys/munge.h @@ -1,7 +1,5 @@ -#ifndef __MUNGE_H__ -#define __MUNGE_H__ /* - * Coyright (c) 2005-2011 Apple Computer, Inc. All rights reserved. + * Coyright (c) 2005-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -27,46 +25,93 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -void munge_w(const void *arg0 __unused, void *args); -void munge_ww(const void *arg0 __unused, void *args); -void munge_www(const void *arg0 __unused, void *args); -void munge_wwww(const void *arg0 __unused, void *args); -void munge_wwwww(const void *arg0 __unused, void *args); -void munge_wwwwww(const void *arg0 __unused, void *args); -void munge_wwwwwww(const void *arg0 __unused, void *args); -void munge_wwwwwwww(const void *arg0 __unused, void *args); -void munge_wl(const void *arg0 __unused, void *args); -void munge_wwl(const void *arg0 __unused, void *args); -void munge_wwlw(const void *arg0 __unused, void *args); -void munge_wwlll(const void *arg0 __unused, void *args); -void munge_wwllww(const void *arg0 __unused, void *args); -void munge_wlw(const void *arg0 __unused, void *args); -void munge_wlwwwll(const void *arg0 __unused, void *args); -void munge_wlwwwllw(const void *arg0 __unused, void *args); -void munge_wlwwlwlw(const void *arg0 __unused, void *args); -void munge_wll(const void *arg0 __unused, void *args); -void munge_wllww(const void *arg0 __unused, void *args); -void munge_wlll(const void *arg0 __unused, void *args); -void munge_wllwwll(const void *arg0 __unused, void *args); -void munge_wwwlw(const void *arg0 __unused, void *args); -void munge_wwwlww(const void *arg0 __unused, void *args); -void munge_wwwl(const void *arg0 __unused, void *args); -void munge_wwwwlw(const void *arg0 __unused, void *args); -void munge_wwwwl(const void *arg0 __unused, void *args); -void munge_wwwwwl(const void *arg0 __unused, void *args); -void munge_wwwwwlww(const void *arg0 __unused, void *args); -void munge_wwwwwllw(const void *arg0 __unused, void *args); -void munge_wwwwwlll(const void *arg0 __unused, void *args); -void munge_wwwwwwl(const void *arg0 __unused, void *args); -void munge_wwwwwwlw(const void *arg0 __unused, void *args); -void munge_wwwwwwll(const void *arg0 __unused, void *args); -void munge_wsw(const void *arg0 __unused, void *args); -void munge_wws(const void *arg0 __unused, void *args); -void munge_wwwsw(const void *arg0 __unused, void *args); -void munge_llllll(const void *arg0 __unused, void *args __unused); -void munge_l(const void *arg0 __unused, void *args __unused); -void munge_ll(const void *arg0 __unused, void *args __unused); -void munge_lw(const void *arg0 __unused, void *args); -void munge_lwww(const void *arg0 __unused, void *args); -void munge_wwlwww(const void *arg0 __unused, void *args); + +#ifndef __MUNGE_H__ +#define __MUNGE_H__ + +/* + * Syscall argument mungers. + * + * The data to be munged has been explicitly copied in to the argument + * area, and will be munged in place in the uu_arg[] array. These + * mungers are for 32-bit app's syscalls, since 64-bit args are copied + * from the save area to the uu_args in the order the + * syscall ABI calls for. + * + * The issue is that the incoming args are 32-bit, but we must expand + * them in place into 64-bit args, as if they were from a 64-bit process. + * + * There are several functions in this file with the following prototype + * + * void munge_XXXX(void *uu_args); + * + * The name of the function encodes the number and type of the parameters, + * as follows: + * + * w = a 32-bit value such as an int or a 32-bit ptr, that does not + * require sign extension. These are handled by zeroing a word + * of output, and copying a word from input to output. + * + * s = a 32-bit value such as a long, which must be sign-extended to + * a 64-bit long-long in the uu_args. These are handled by + * loading a word of input and sign extending it to a double, + * and storing two words of output. + * + * l = a 64-bit long-long. These are handled by copying two words + * of input to the output. + * + * For example, "munge_wls" takes a word, a long-long, and a word. This + * takes four words in the uu_arg[] area: the first word is in one, the + * long-long takes two, and the final word is in the fourth. We store six + * words: the low word is left in place, followed by a 0, followed by the + * two words of the long-long, followed by the low word and the sign extended + * high word of the preceeding low word. + * + * Because this is an in-place modification, we actually start at the end + * of uu_arg[] and work our way back to the beginning of the array. + */ + +void munge_w(void *args); +void munge_ww(void *args); +void munge_www(void *args); +void munge_wwww(void *args); +void munge_wwwww(void *args); +void munge_wwwwww(void *args); +void munge_wwwwwww(void *args); +void munge_wwwwwwww(void *args); +void munge_wl(void *args); +void munge_wwl(void *args); +void munge_wwlw(void *args); +void munge_wwlll(void *args); +void munge_wwllww(void *args); +void munge_wlw(void *args); +void munge_wlwwwll(void *args); +void munge_wlwwwllw(void *args); +void munge_wlwwlwlw(void *args); +void munge_wll(void *args); +void munge_wllww(void *args); +void munge_wlll(void *args); +void munge_wllwwll(void *args); +void munge_wwwlw(void *args); +void munge_wwwlww(void *args); +void munge_wwwl(void *args); +void munge_wwwwlw(void *args); +void munge_wwwwl(void *args); +void munge_wwwwwl(void *args); +void munge_wwwwwlww(void *args); +void munge_wwwwwllw(void *args); +void munge_wwwwwlll(void *args); +void munge_wwwwwwl(void *args); +void munge_wwwwwwlw(void *args); +void munge_wwwwwwll(void *args); +void munge_wsw(void *args); +void munge_wws(void *args); +void munge_wwwsw(void *args); +void munge_llllll(void *args); +void munge_l(void *args); +void munge_ll(void *args); +void munge_lw(void *args); +void munge_lwww(void *args); +void munge_wwlwww(void *args); + #endif /* __MUNGE_H__ */ diff --git a/bsd/sys/namei.h b/bsd/sys/namei.h index 9ea13c01e..57b577d90 100644 --- a/bsd/sys/namei.h +++ b/bsd/sys/namei.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -147,7 +147,6 @@ struct nameidata { #define NOFOLLOW 0x00000000 /* do not follow symbolic links (pseudo) */ /* public FOLLOW 0x00000040 see vnode.h */ #define SHAREDLEAF 0x00000080 /* OK to have shared leaf lock */ -/* public NOTRIGGER 0x10000000 see vnode.h */ #define MODMASK 0x100000fc /* mask of operational modifiers */ /* * Namei parameter descriptors. @@ -180,7 +179,11 @@ struct nameidata { #define CN_WANTSRSRCFORK 0x04000000 #define CN_ALLOWRSRCFORK 0x08000000 #endif -/* public NOTRIGGER 0x10000000 see vnode.h */ +#if CONFIG_SECLUDED_RENAME +#ifdef BSD_KERNEL_PRIVATE +#define CN_SECLUDE_RENAME 0x10000000 /*rename iff ¬(hard-linked ∨ opened ∨ mmaped)*/ +#endif +#endif #define CN_NBMOUNTLOOK 0x20000000 /* do not block for cross mount lookups */ #ifdef BSD_KERNEL_PRIVATE #define CN_SKIPNAMECACHE 0x40000000 /* skip cache during lookup(), allow FS to handle all components */ @@ -231,9 +234,6 @@ struct nameidata { * This structure describes the elements in the cache of recent * names looked up by namei. */ - -#define NCHASHMASK 0x7fffffff - struct namecache { TAILQ_ENTRY(namecache) nc_entry; /* chain of all entries */ LIST_ENTRY(namecache) nc_hash; /* hash chain */ @@ -244,8 +244,7 @@ struct namecache { } nc_un; vnode_t nc_dvp; /* vnode of parent of name */ vnode_t nc_vp; /* vnode the name refers to */ - unsigned int nc_whiteout:1, /* name has whiteout applied */ - nc_hashval:31; /* hashval of stringname */ + unsigned int nc_hashval; /* hashval of stringname */ const char *nc_name; /* pointer to segment name in string cache */ }; diff --git a/bsd/sys/param.h b/bsd/sys/param.h index 764907b0a..012e75630 100644 --- a/bsd/sys/param.h +++ b/bsd/sys/param.h @@ -185,7 +185,7 @@ * primarily determines the size of buffers in the buffer pool. It may be * made larger than MAXPHYS without any effect on existing file systems; * however making it smaller may make some file systems unmountable. - * We set this to track the value of (MAX_UPL_TRANSFER*PAGE_SIZE) from + * We set this to track the value of MAX_UPL_TRANSFER_BYTES from * osfmk/mach/memory_object_types.h to bound it at the maximum UPL size. */ #define MAXBSIZE (256 * 4096) diff --git a/bsd/sys/priv.h b/bsd/sys/priv.h index e81fcf533..27d7eb34e 100644 --- a/bsd/sys/priv.h +++ b/bsd/sys/priv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Apple Inc. All rights reserved. + * Copyright (c) 2010-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -81,6 +81,9 @@ #define PRIV_PROC_UUID_POLICY 1001 /* Change process uuid policy table. */ #define PRIV_GLOBAL_PROC_INFO 1002 /* Query information for processes owned by other users */ #define PRIV_SYSTEM_OVERRIDE 1003 /* Override global system settings for various subsystems for a limited duration/system-mode */ +#define PRIV_HW_DEBUG_DATA 1004 /* Extract hw-specific debug data (e.g. ECC data) */ +#define PRIV_SELECTIVE_FORCED_IDLE 1005 /* Configure and control Selective Forced Idle (SFI) subsystem */ +#define PRIV_PROC_TRACE_INSPECT 1006 /* Request trace memory of arbitrary process to be inspected */ /* * Virtual memory privileges. @@ -96,12 +99,20 @@ #define PRIV_NET_PRIVILEGED_SOCKET_DELEGATE 10001 /* Set delegate on a socket */ #define PRIV_NET_INTERFACE_CONTROL 10002 /* Enable interface debug logging. */ #define PRIV_NET_PRIVILEGED_NETWORK_STATISTICS 10003 /* Access to all sockets */ +#define PRIV_NET_PRIVILEGED_NECP_POLICIES 10004 /* Access to privileged Network Extension policies */ +#define PRIV_NET_RESTRICTED_AWDL 10005 /* Access to restricted AWDL mode */ +#define PRIV_NET_PRIVILEGED_NECP_MATCH 10006 /* Privilege verified by Network Extension policies */ /* * IPv4 and IPv6 privileges. */ #define PRIV_NETINET_RESERVEDPORT 11000 /* Bind low port number. */ +/* + * VFS privileges + */ +#define PRIV_VFS_OPEN_BY_ID 14000 /*Allow calling openbyid_np()*/ + #ifdef KERNEL /* * Privilege check interface. No flags are currently defined for the API. diff --git a/bsd/sys/proc.h b/bsd/sys/proc.h index afe765190..8c4ebb790 100644 --- a/bsd/sys/proc.h +++ b/bsd/sys/proc.h @@ -78,6 +78,7 @@ #include #ifdef KERNEL #include +#include #endif #include @@ -187,7 +188,7 @@ struct extern_proc { #define P_DEPENDENCY_CAPABLE 0x00100000 /* process is ok to call vfs_markdependency() */ #define P_REBOOT 0x00200000 /* Process called reboot() */ -#define P_TBE 0x00400000 /* Process is TBE */ +#define P_RESV6 0x00400000 /* used to be P_TBE */ #define P_RESV7 0x00800000 /* (P_SIGEXC)signal exceptions */ #define P_THCWD 0x01000000 /* process has thread cwd */ @@ -216,6 +217,7 @@ struct extern_proc { #define P_DIRTY_BUSY 0x00000040 /* serialization flag */ #define P_DIRTY_MARKED 0x00000080 /* marked dirty previously */ #define P_DIRTY_DEFER_IN_PROGRESS 0x00000100 /* deferral to idle-band in process */ +#define P_DIRTY_LAUNCH_IN_PROGRESS 0x00000200 /* launch is in progress */ #define P_DIRTY_IS_DIRTY (P_DIRTY | P_DIRTY_SHUTDOWN) #define P_DIRTY_IDLE_EXIT_ENABLED (P_DIRTY_TRACK|P_DIRTY_ALLOW_IDLE_EXIT) @@ -335,12 +337,22 @@ extern uint64_t proc_uniqueid(proc_t); extern uint64_t proc_puniqueid(proc_t); extern void proc_getexecutableuuid(proc_t, unsigned char *, unsigned long); +extern int proc_get_originatorbgstate(uint32_t *is_backgrounded); + +/* Kernel interface to get the uuid of the originator of the work.*/ +extern int proc_pidoriginatoruuid(uuid_t uuid_buf, uint32_t buffersize); extern uint64_t proc_was_throttled(proc_t); extern uint64_t proc_did_throttle(proc_t); +extern uint64_t proc_coalitionid(proc_t); + #endif /* XNU_KERNEL_PRIVATE*/ +#ifdef KERNEL_PRIVATE +extern vnode_t proc_getexecutablevnode(proc_t); /* Returned with iocount, use vnode_put() to drop */ +#endif + __END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/proc_info.h b/bsd/sys/proc_info.h index 670f05f57..e8ca29c4f 100644 --- a/bsd/sys/proc_info.h +++ b/bsd/sys/proc_info.h @@ -41,6 +41,7 @@ #include #include #include +#include __BEGIN_DECLS @@ -110,6 +111,19 @@ struct proc_bsdinfowithuniqid { struct proc_bsdinfo pbsd; struct proc_uniqidentifierinfo p_uniqidentifier; }; + +struct proc_archinfo { + cpu_type_t p_cputype; + cpu_subtype_t p_cpusubtype; +}; + +struct proc_pidcoalitioninfo { + uint64_t coalition_id; + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; +}; + #endif @@ -143,7 +157,8 @@ struct proc_bsdinfowithuniqid { #define PROC_FLAG_ADAPTIVE_IMPORTANT 0x200000 /* Process is adaptive, and is currently important */ #define PROC_FLAG_IMPORTANCE_DONOR 0x400000 /* Process is marked as an importance donor */ #define PROC_FLAG_SUPPRESSED 0x800000 /* Process is suppressed */ -#define PROC_FLAG_IOS_APPLICATION 0x1000000 /* Process is an application */ +#define PROC_FLAG_APPLICATION 0x1000000 /* Process is an application */ +#define PROC_FLAG_IOS_APPLICATION PROC_FLAG_APPLICATION /* Process is an application */ #endif @@ -688,6 +703,23 @@ struct proc_fileportinfo { #define PROC_PIDT_BSDINFOWITHUNIQID 18 #define PROC_PIDT_BSDINFOWITHUNIQID_SIZE \ (sizeof(struct proc_bsdinfowithuniqid)) + +#define PROC_PIDARCHINFO 19 +#define PROC_PIDARCHINFO_SIZE \ + (sizeof(struct proc_archinfo)) + +#define PROC_PIDCOALITIONINFO 20 +#define PROC_PIDCOALITIONINFO_SIZE (sizeof(struct proc_pidcoalitioninfo)) + +#define PROC_PIDNOTEEXIT 21 +#define PROC_PIDNOTEEXIT_SIZE (sizeof(uint32_t)) + +#define PROC_PIDREGIONPATHINFO2 22 +#define PROC_PIDREGIONPATHINFO2_SIZE (sizeof(struct proc_regionwithpathinfo)) + +#define PROC_PIDREGIONPATHINFO3 23 +#define PROC_PIDREGIONPATHINFO3_SIZE (sizeof(struct proc_regionwithpathinfo)) + #endif /* Flavors for proc_pidfdinfo */ @@ -745,19 +777,29 @@ struct proc_fileportinfo { #define PROC_DIRTYCONTROL_TRACK 1 #define PROC_DIRTYCONTROL_SET 2 #define PROC_DIRTYCONTROL_GET 3 +#define PROC_DIRTYCONTROL_CLEAR 4 /* proc_track_dirty() flags */ #define PROC_DIRTY_TRACK 0x1 #define PROC_DIRTY_ALLOW_IDLE_EXIT 0x2 #define PROC_DIRTY_DEFER 0x4 +#define PROC_DIRTY_LAUNCH_IN_PROGRESS 0x8 /* proc_get_dirty() flags */ #define PROC_DIRTY_TRACKED 0x1 #define PROC_DIRTY_ALLOWS_IDLE_EXIT 0x2 #define PROC_DIRTY_IS_DIRTY 0x4 +#define PROC_DIRTY_LAUNCH_IS_IN_PROGRESS 0x8 #ifdef PRIVATE +/* Flavors for proc_pidoriginatorinfo */ +#define PROC_PIDORIGINATOR_UUID 0x1 +#define PROC_PIDORIGINATOR_UUID_SIZE (sizeof(uuid_t)) + +#define PROC_PIDORIGINATOR_BGSTATE 0x2 +#define PROC_PIDORIGINATOR_BGSTATE_SIZE (sizeof(uint32_t)) + /* __proc_info() call numbers */ #define PROC_INFO_CALL_LISTPIDS 0x1 #define PROC_INFO_CALL_PIDINFO 0x2 @@ -768,6 +810,7 @@ struct proc_fileportinfo { #define PROC_INFO_CALL_TERMINATE 0x7 #define PROC_INFO_CALL_DIRTYCONTROL 0x8 #define PROC_INFO_CALL_PIDRUSAGE 0x9 +#define PROC_INFO_CALL_PIDORIGINATORINFO 0xa #endif /* PRIVATE */ diff --git a/bsd/sys/proc_internal.h b/bsd/sys/proc_internal.h index 428618c9d..96c7c3392 100644 --- a/bsd/sys/proc_internal.h +++ b/bsd/sys/proc_internal.h @@ -336,6 +336,13 @@ struct proc { uint32_t p_pcaction; /* action for process control on starvation */ uint8_t p_uuid[16]; /* from LC_UUID load command */ + /* + * CPU type and subtype of binary slice executed in + * this process. Protected by proc lock. + */ + cpu_type_t p_cputype; + cpu_subtype_t p_cpusubtype; + /* End area that is copied on creation. */ /* XXXXXXXXXXXXX End of BCOPY'ed on fork (AIOLOCK)XXXXXXXXXXXXXXXX */ #define p_endcopy p_aio_total_count @@ -353,14 +360,16 @@ struct proc { /* DEPRECATE following field */ u_short p_acflag; /* Accounting flags. */ - volatile u_short p_vfs_iopolicy; /* VFS iopolicy flags. */ + volatile u_short p_vfs_iopolicy; /* VFS iopolicy flags. (atomic bit ops) */ struct lctx *p_lctx; /* Pointer to login context. */ LIST_ENTRY(proc) p_lclist; /* List of processes in lctx. */ user_addr_t p_threadstart; /* pthread start fn */ user_addr_t p_wqthread; /* pthread workqueue fn */ int p_pthsize; /* pthread size */ + uint32_t p_pth_tsd_offset; /* offset from pthread_t to TSD for new threads */ user_addr_t p_targconc; /* target concurrency ptr */ + user_addr_t p_stack_addr_hint; /* stack allocation hint for wq threads */ void * p_wqptr; /* workq ptr */ int p_wqsize; /* allocated size */ boolean_t p_wqiniting; /* semaphore to serialze wq_open */ @@ -393,8 +402,8 @@ struct proc { uint32_t p_memstat_state; /* state */ int32_t p_memstat_effectivepriority; /* priority after transaction state accounted for */ int32_t p_memstat_requestedpriority; /* active priority */ - uint64_t p_memstat_userdata; /* user state */ uint32_t p_memstat_dirty; /* dirty state */ + uint64_t p_memstat_userdata; /* user state */ uint64_t p_memstat_idledeadline; /* time at which process became clean */ #if CONFIG_JETSAM int32_t p_memstat_memlimit; /* cached memory limit */ @@ -464,6 +473,7 @@ struct proc { #define P_JETSAM_PID 0x30000000 /* jetsam: pid */ #define P_JETSAM_IDLEEXIT 0x40000000 /* jetsam: idle exit */ #define P_JETSAM_VNODE 0x50000000 /* jetsam: vnode kill */ +#define P_JETSAM_FCTHRASHING 0x60000000 /* jetsam: lowest jetsam priority proc, killed due to filecache thrashing */ #define P_JETSAM_MASK 0x70000000 /* jetsam type mask */ /* Process control state for resource starvation */ @@ -721,14 +731,14 @@ extern int msleep0(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, int t extern void vfork_return(struct proc *child, int32_t *retval, int rval); extern int exit1(struct proc *, int, int *); extern int exit1_internal(struct proc *, int, int *, boolean_t, boolean_t, int); -extern int fork1(proc_t, thread_t *, int); +extern int fork1(proc_t, thread_t *, int, coalition_t); extern void vfork_exit_internal(struct proc *p, int rv, int forced); extern void proc_reparentlocked(struct proc *child, struct proc * newparent, int cansignal, int locked); extern int pgrp_iterate(struct pgrp * pgrp, int flags, int (*callout)(proc_t , void *), void *arg, int (*filterfn)(proc_t , void *), void *filterarg); extern int proc_iterate(int flags, int (*callout)(proc_t , void *), void *arg, int (*filterfn)(proc_t , void *), void *filterarg); extern int proc_rebootscan(int (*callout)(proc_t , void *), void *arg, int (*filterfn)(proc_t , void *), void *filterarg); extern int proc_childrenwalk(proc_t p, int (*callout)(proc_t , void *), void *arg); -extern proc_t proc_findinternal(int pid, int funneled); +extern proc_t proc_findinternal(int pid, int locked); extern proc_t proc_findthread(thread_t thread); extern void proc_refdrain(proc_t); extern void proc_childdrainlocked(proc_t); @@ -757,7 +767,7 @@ int itimerfix(struct timeval *tv); int itimerdecr(struct proc * p, struct itimerval *itp, int usec); void proc_signalstart(struct proc *, int locked); void proc_signalend(struct proc *, int locked); -int proc_transstart(struct proc *, int locked); +int proc_transstart(struct proc *, int locked, int non_blocking); void proc_transcommit(struct proc *, int locked); void proc_transend(struct proc *, int locked); int proc_transwait(struct proc *, int locked); @@ -773,7 +783,7 @@ void proc_resetregister(proc_t p); thread_t proc_thread(proc_t); extern int proc_pendingsignals(proc_t, sigset_t); int proc_getpcontrol(int pid, int * pcontrolp); -int proc_dopcontrol(proc_t p, void *unused_arg); +int proc_dopcontrol(proc_t p); int proc_resetpcontrol(int pid); #if PSYNCH void pth_proc_hashinit(proc_t); diff --git a/bsd/sys/proc_uuid_policy.h b/bsd/sys/proc_uuid_policy.h index 18118dc15..9838993b3 100644 --- a/bsd/sys/proc_uuid_policy.h +++ b/bsd/sys/proc_uuid_policy.h @@ -54,6 +54,9 @@ __BEGIN_DECLS /* The namespace of flags are managed by in-kernel clients */ #define PROC_UUID_POLICY_FLAGS_NONE 0x00000000 #define PROC_UUID_NO_CELLULAR 0x00000001 +#define PROC_UUID_NECP_APP_POLICY 0x00000002 + +/* To be removed, replaced by PROC_UUID_NECP_APP_POLICY */ #define PROC_UUID_FLOW_DIVERT 0x00000002 #if BSD_KERNEL_PRIVATE @@ -79,6 +82,8 @@ __BEGIN_DECLS extern int proc_uuid_policy_lookup(uuid_t uuid, uint32_t *flags, int32_t *gencount); extern void proc_uuid_policy_init(void); + +extern int proc_uuid_policy_kernel(uint32_t operation, uuid_t uuid, uint32_t flags); #endif /* BSD_KERNEL_PRIVATE */ #ifndef KERNEL @@ -86,13 +91,15 @@ extern void proc_uuid_policy_init(void); * Upload a policy indexed by UUID. * * Parameters: - * operation CLEAR Remove all existing entries + * operation CLEAR Clear specified flags for all entries. + * Entries are removed if they have no remaining flags. * ADD Add the specified UUID and flags to the policy table. - * Existing entries for the UUID are replaced. - * REMOVE Remove entry for the specified UUID. + * Flags are ORed with existing entries for the UUID. + * REMOVE Mask out flags in the entry for the specified UUID. + * Entry is removed if it has no remaining flags. * uuid Pointer to UUID for Mach-O executable * uuidlen sizeof(uuid_t) - * flags Flags to be stored in the policy table + * flags Flags to be stored in the policy table. See operation notes above. * * Return: * 0 Success, operation completed without error. diff --git a/bsd/sys/process_policy.h b/bsd/sys/process_policy.h index 35341278f..4aec9e13e 100644 --- a/bsd/sys/process_policy.h +++ b/bsd/sys/process_policy.h @@ -34,6 +34,10 @@ #include #include +#ifndef XNU_KERNEL_PRIVATE +#include +#endif + __BEGIN_DECLS /* defns of scope */ @@ -61,11 +65,7 @@ __BEGIN_DECLS #define PROC_POLICY_HARDWARE_ACCESS 2 /* access to various hardware */ #define PROC_POLICY_RESOURCE_STARVATION 3 /* behavior on resource starvation */ #define PROC_POLICY_RESOURCE_USAGE 4 /* behavior on resource consumption */ -#if BUILD_LIBSYSCALL -#define PROC_POLICY_APP_LIFECYCLE 5 /* app life cycle management */ -#else /* BUILD_LIBSYSCALL */ #define PROC_POLICY_RESERVED 5 /* behavior on resource consumption */ -#endif /* BUILD_LIBSYSCALL */ #define PROC_POLICY_APPTYPE 6 /* behavior on resource consumption */ #define PROC_POLICY_BOOST 7 /* importance boost/drop */ @@ -75,11 +75,7 @@ __BEGIN_DECLS #define PROC_POLICY_BG_DISKTHROTTLE 2 /* disk accesses throttled */ #define PROC_POLICY_BG_NETTHROTTLE 4 /* network accesses throttled */ #define PROC_POLICY_BG_GPUDENY 8 /* no access to GPU */ -#if BUILD_LIBSYSCALL -#define PROC_POLICY_BG_ALL 0x0F -#else /* BUILD_LIBSYSCALL */ #define PROC_POLICY_BG_ALL 0x07 -#endif /* BUILD_LIBSYSCALL */ #define PROC_POLICY_BG_DEFAULT PROC_POLICY_BG_ALL /* sub policies for hardware */ @@ -165,20 +161,10 @@ typedef struct proc_policy_cpuusage_attr { uint64_t ppattr_cpu_attr_deadline; /* 64bit deadline in nsecs */ } proc_policy_cpuusage_attr_t; -#if BUILD_LIBSYSCALL -/* sub policies for app lifecycle management */ -#define PROC_POLICY_APPLIFE_NONE 0 /* does nothing.. */ -#define PROC_POLICY_APPLIFE_STATE 1 /* sets the app to various lifecycle states */ -#define PROC_POLICY_APPLIFE_DEVSTATUS 2 /* notes the device in inactive or short/long term */ -#define PROC_POLICY_APPLIFE_PIDBIND 3 /* a thread is to be bound to another processes app state */ -#endif /* BUILD_LIBSYSCALL */ /* sub policies for PROC_POLICY_APPTYPE */ #define PROC_POLICY_APPTYPE_NONE 0 /* does nothing.. */ #define PROC_POLICY_APPTYPE_MODIFY 1 /* sets the app to various lifecycle states */ -#if BUILD_LIBSYSCALL -#define PROC_POLICY_APPTYPE_THREADTHR 2 /* notes the device in inactive or short/long term */ -#endif /* BUILD_LIBSYSCALL */ /* exported apptypes for PROC_POLICY_APPTYPE */ #define PROC_POLICY_OSX_APPTYPE_TAL 1 /* TAL-launched app */ diff --git a/bsd/sys/protosw.h b/bsd/sys/protosw.h index d16f4a3aa..b7d319d18 100644 --- a/bsd/sys/protosw.h +++ b/bsd/sys/protosw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -463,6 +463,8 @@ struct pr_usrreqs { int (*pru_rcvoob)(struct socket *, struct mbuf *, int); int (*pru_send)(struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); + int (*pru_send_list)(struct socket *, int, struct mbuf *, + struct sockaddr *, struct mbuf *, struct proc *); #define PRUS_OOB 0x1 #define PRUS_EOF 0x2 #define PRUS_MORETOCOME 0x4 @@ -472,8 +474,12 @@ struct pr_usrreqs { int (*pru_sopoll)(struct socket *, int, struct ucred *, void *); int (*pru_soreceive)(struct socket *, struct sockaddr **, struct uio *, struct mbuf **, struct mbuf **, int *); + int (*pru_soreceive_list)(struct socket *, struct sockaddr **, + struct uio **, u_int, struct mbuf **, struct mbuf **, int *); int (*pru_sosend)(struct socket *, struct sockaddr *, struct uio *, struct mbuf *, struct mbuf *, int); + int (*pru_sosend_list)(struct socket *, struct sockaddr *, + struct uio **, u_int, struct mbuf *, struct mbuf *, int); int (*pru_socheckopt)(struct socket *, struct sockopt *); }; @@ -509,14 +515,21 @@ extern int pru_rcvd_notsupp(struct socket *so, int flags); extern int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags); extern int pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct proc *p); +extern int pru_send_list_notsupp(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *addr, struct mbuf *control, struct proc *p); extern int pru_sense_null(struct socket *so, void * sb, int isstat64); extern int pru_shutdown_notsupp(struct socket *so); extern int pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam); extern int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, - struct uio *uio, struct mbuf *top, struct mbuf *control, int flags); + struct uio *uio, struct mbuf *top, struct mbuf *control, int flags); +extern int pru_sosend_list_notsupp(struct socket *so, struct sockaddr *addr, + struct uio **uio, u_int, struct mbuf *top, struct mbuf *control, int flags); extern int pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); +extern int pru_soreceive_list_notsupp(struct socket *so, + struct sockaddr **paddr, struct uio **uio, u_int, struct mbuf **mp0, + struct mbuf **controlp, int *flagsp); extern int pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, void *); #ifdef XNU_KERNEL_PRIVATE diff --git a/bsd/sys/pthread_shims.h b/bsd/sys/pthread_shims.h index 1e530ab4a..5c403a81b 100644 --- a/bsd/sys/pthread_shims.h +++ b/bsd/sys/pthread_shims.h @@ -50,6 +50,13 @@ struct uthread; typedef void (*sched_call_t)(int type, thread_t thread); #endif +/* + * Increment each time new reserved slots are used. When the pthread + * kext registers this table, it will include the version of the xnu + * headers that it was built against. + */ +#define PTHREAD_FUNCTIONS_TABLE_VERSION 1 + typedef struct pthread_functions_s { int version; @@ -87,8 +94,14 @@ typedef struct pthread_functions_s { sched_call_t (*workqueue_get_sched_callback)(void); + /* New register function with TSD offset */ + int (*bsdthread_register2)(struct proc *p, user_addr_t threadstart, user_addr_t wqthread, uint32_t flags, user_addr_t stack_addr_hint, user_addr_t targetconc_ptr, uint32_t dispatchqueue_offset, uint32_t tsd_offset, int32_t *retval); + + /* New pthreadctl system. */ + int (*bsdthread_ctl)(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval); + /* padding for future */ - void* _pad[99]; + void* _pad[97]; } *pthread_functions_t; typedef struct pthread_callbacks_s { @@ -200,8 +213,25 @@ typedef struct pthread_callbacks_s { uint64_t (*proc_get_dispatchqueue_serialno_offset)(struct proc *p); void (*proc_set_dispatchqueue_serialno_offset)(struct proc *p, uint64_t offset); + user_addr_t (*proc_get_stack_addr_hint)(struct proc *p); + void (*proc_set_stack_addr_hint)(struct proc *p, user_addr_t stack_addr_hint); + + uint32_t (*proc_get_pthread_tsd_offset)(struct proc *p); + void (*proc_set_pthread_tsd_offset)(struct proc *p, uint32_t pthread_tsd_offset); + + kern_return_t (*thread_set_tsd_base)(thread_t thread, mach_vm_offset_t tsd_base); + + int (*proc_usynch_get_requested_thread_qos)(struct uthread *); + boolean_t (*proc_usynch_thread_qos_add_override)(struct uthread *, uint64_t tid, int override_qos, boolean_t first_override_for_resource); + boolean_t (*proc_usynch_thread_qos_remove_override)(struct uthread *, uint64_t tid); + + kern_return_t (*thread_policy_get)(thread_t t, thread_policy_flavor_t flavor, thread_policy_t info, mach_msg_type_number_t *count, boolean_t *get_default); + boolean_t (*qos_main_thread_active)(void); + + kern_return_t (*thread_set_voucher_name)(mach_port_name_t voucher_name); + /* padding for future */ - void* _pad[98]; + void* _pad[87]; } *pthread_callbacks_t; diff --git a/bsd/sys/random.h b/bsd/sys/random.h index 2df36fc21..9b77c2585 100644 --- a/bsd/sys/random.h +++ b/bsd/sys/random.h @@ -35,6 +35,8 @@ #ifdef __APPLE_API_UNSTABLE __BEGIN_DECLS void read_random(void* buffer, u_int numBytes); +void read_frandom(void* buffer, u_int numBytes); +int write_random(void* buffer, u_int numBytes); __END_DECLS #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/sys/resource.h b/bsd/sys/resource.h index d47395073..6a4164fa8 100644 --- a/bsd/sys/resource.h +++ b/bsd/sys/resource.h @@ -111,6 +111,22 @@ typedef __uint64_t rlim_t; #define PRIO_DARWIN_THREAD 3 /* Second argument is always 0 (current thread) */ #define PRIO_DARWIN_PROCESS 4 /* Second argument is a PID */ +#ifdef PRIVATE + +#define PRIO_DARWIN_GPU 5 /* Second argument is a PID */ + +#define PRIO_DARWIN_GPU_ALLOW 0x1 +#define PRIO_DARWIN_GPU_DENY 0x2 + +#define PRIO_DARWIN_ROLE 6 /* Second argument is a PID */ + +#define PRIO_DARWIN_ROLE_DEFAULT 0x0 /* Default state */ +#define PRIO_DARWIN_ROLE_UI_FOCAL 0x1 /* On screen, focal UI */ +#define PRIO_DARWIN_ROLE_UI 0x2 /* On screen, non-focal UI */ +#define PRIO_DARWIN_ROLE_NON_UI 0x3 /* Off screen, non-focal UI */ + +#endif /* PRIVATE */ + /* * Range limitations for the value of the third parameter to setpriority(). */ @@ -126,7 +142,7 @@ typedef __uint64_t rlim_t; /* * use PRIO_DARWIN_NONUI to restrict a process's ability to make calls to - * the GPU. + * the GPU. (deprecated) */ #define PRIO_DARWIN_NONUI 0x1001 @@ -191,7 +207,8 @@ struct rusage { #define RUSAGE_INFO_V0 0 #define RUSAGE_INFO_V1 1 #define RUSAGE_INFO_V2 2 -#define RUSAGE_INFO_CURRENT RUSAGE_INFO_V2 +#define RUSAGE_INFO_V3 3 +#define RUSAGE_INFO_CURRENT RUSAGE_INFO_V3 typedef void *rusage_info_t; @@ -251,13 +268,46 @@ struct rusage_info_v2 { uint64_t ri_diskio_byteswritten; }; +struct rusage_info_v3 { + uint8_t ri_uuid[16]; + uint64_t ri_user_time; + uint64_t ri_system_time; + uint64_t ri_pkg_idle_wkups; + uint64_t ri_interrupt_wkups; + uint64_t ri_pageins; + uint64_t ri_wired_size; + uint64_t ri_resident_size; + uint64_t ri_phys_footprint; + uint64_t ri_proc_start_abstime; + uint64_t ri_proc_exit_abstime; + uint64_t ri_child_user_time; + uint64_t ri_child_system_time; + uint64_t ri_child_pkg_idle_wkups; + uint64_t ri_child_interrupt_wkups; + uint64_t ri_child_pageins; + uint64_t ri_child_elapsed_abstime; + uint64_t ri_diskio_bytesread; + uint64_t ri_diskio_byteswritten; + uint64_t ri_cpu_time_qos_default; + uint64_t ri_cpu_time_qos_maintenance; + uint64_t ri_cpu_time_qos_background; + uint64_t ri_cpu_time_qos_utility; + uint64_t ri_cpu_time_qos_legacy; + uint64_t ri_cpu_time_qos_user_initiated; + uint64_t ri_cpu_time_qos_user_interactive; + uint64_t ri_billed_system_time; + uint64_t ri_serviced_system_time; +}; + +typedef struct rusage_info_v3 rusage_info_current; + #endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ #ifdef KERNEL struct rusage_superset { struct rusage ru; - struct rusage_info_v2 ri; + rusage_info_current ri; }; struct rusage_info_child { @@ -269,11 +319,6 @@ struct rusage_info_child { uint64_t ri_child_elapsed_abstime; }; -struct rusage_info_diskiobytes { - volatile uint64_t ri_bytesread __attribute__((aligned(8))); - volatile uint64_t ri_byteswritten __attribute__((aligned(8))); -}; - struct user64_rusage { struct user64_timeval ru_utime; /* user time used */ struct user64_timeval ru_stime; /* system time used */ @@ -366,6 +411,7 @@ struct rlimit { */ #define RLIMIT_WAKEUPS_MONITOR 0x1 /* Configure the wakeups monitor. */ #define RLIMIT_CPU_USAGE_MONITOR 0x2 /* Configure the CPU usage monitor. */ +#define RLIMIT_THREAD_CPULIMITS 0x3 /* Configure a blocking, per-thread, CPU limits. */ /* * Flags for wakeups monitor control. diff --git a/bsd/sys/resourcevar.h b/bsd/sys/resourcevar.h index 89aa0aa6e..57ae8eb7c 100644 --- a/bsd/sys/resourcevar.h +++ b/bsd/sys/resourcevar.h @@ -87,7 +87,6 @@ struct pstats { uint64_t ps_start; /* starting time ; compat only */ #ifdef KERNEL struct rusage_info_child ri_child; /* (PL) sum of additional stats for reaped children (proc_pid_rusage) */ - struct rusage_info_diskiobytes ri_diskiobytes; /* Bytes of Disk I/O done by the process */ struct user_uprof { /* profile arguments */ struct user_uprof *pr_next; /* multiple prof buffers allowed */ user_addr_t pr_base; /* buffer base */ @@ -132,7 +131,7 @@ void addupc_task(struct proc *p, user_addr_t pc, u_int ticks); void calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *ip); void ruadd(struct rusage *ru, struct rusage *ru2); -void update_rusage_info_child(struct rusage_info_child *ru, struct rusage_info_v2 *ru2); +void update_rusage_info_child(struct rusage_info_child *ru, rusage_info_current *ru_current); void proc_limitget(proc_t p, int whichi, struct rlimit * limp); void proc_limitdrop(proc_t p, int exiting); void proc_limitfork(proc_t parent, proc_t child); diff --git a/bsd/sys/sdt_impl.h b/bsd/sys/sdt_impl.h index aca7c5a6c..7517dd627 100644 --- a/bsd/sys/sdt_impl.h +++ b/bsd/sys/sdt_impl.h @@ -42,9 +42,7 @@ extern const char *sdt_prefix; typedef struct sdt_probedesc { char *sdpd_name; /* name of this probe */ -#if defined(__APPLE__) - char *sdpd_func; -#endif /* __APPLE__ */ + char *sdpd_func; /* APPLE NOTE: function name */ unsigned long sdpd_offset; /* offset of call in text */ struct sdt_probedesc *sdpd_next; /* next static probe */ } sdt_probedesc_t; @@ -67,9 +65,7 @@ struct module { }; extern int sdt_invop(uintptr_t, uintptr_t *, uintptr_t); -#if defined (__APPLE__) extern uint64_t sdt_getarg(void *, dtrace_id_t, void *, int, int); -#endif /* __APPLE__ */ void sdt_provide_module(void *, struct modctl *); void sdt_init(void); diff --git a/bsd/sys/semaphore.h b/bsd/sys/semaphore.h index 379c4baaf..96e2535fe 100644 --- a/bsd/sys/semaphore.h +++ b/bsd/sys/semaphore.h @@ -51,9 +51,9 @@ typedef int sem_t; __BEGIN_DECLS int sem_close(sem_t *); -int sem_destroy(sem_t *); -int sem_getvalue(sem_t * __restrict, int * __restrict); -int sem_init(sem_t *, int, unsigned int); +int sem_destroy(sem_t *) __deprecated; +int sem_getvalue(sem_t * __restrict, int * __restrict) __deprecated; +int sem_init(sem_t *, int, unsigned int) __deprecated; sem_t * sem_open(const char *, int, ...); int sem_post(sem_t *); int sem_trywait(sem_t *); diff --git a/bsd/sys/sfi.h b/bsd/sys/sfi.h new file mode 100644 index 000000000..bd3d22aea --- /dev/null +++ b/bsd/sys/sfi.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_SFI_H_ +#define _SYS_SFI_H_ + +#include +#include +#include +/* + * Selective Forced Idle (SFI) is a mechanism for doing + * phase-aligned modulation of runnable tasks + */ + +/* Flags for use with sfi_process_set_flags() */ +#define SFI_PROCESS_SET_MANAGED 0x00000001 +#define SFI_PROCESS_SET_UNMANAGED 0x00000002 + +#define SFI_PROCESS_SET_MANAGED_MASK 0x00000003 + +#ifndef KERNEL +/* + * The system_set_sfi_window() call can be used to set "Selective + * Forced Idle" window for the system. sfi_window_usec is the + * interval in microseconds and when the "Selective Forced Idle" is + * active its "off phase" starts every sfi_window_usec. A nonzero + * value lower than MIN_SFI_WINDOW_USEC will be automatically changed + * to MIN_SFI_WINDOW_USEC. The actual window size used is + * implementation dependent and may be longer. + * + * system_get_sfi_window() can be used to determine the actual value. + * + * A value of 0 for sfi_window_usec can be used to disable "Selective + * Forced Idle". + */ +int system_set_sfi_window(uint64_t sfi_window_usec); + +int system_get_sfi_window(uint64_t *sfi_window_usec); + +/* + * sfi_set_class_offtime() can be used to set the "off time" interval + * for all threads placed in a "Selective Forced Idle" class class_id. + * A thread placed in class_id will not execute for offtime_usec + * microseconds at the beginning of each "Selective Forced Idle" + * window. A nonzero value lower than MIN_OFFTIME_USEC will be + * automatically changed to MIN_OFFTIME_USEC. The actual "off time" + * used is implementation dependent and may be + * longer. sfi_get_class_offtime() can be used to determine the actual + * value. + * + * A value of 0 for offtime_usec can be used to disable "Selective + * Forced Idle" for all the threads placed in class_id. + * + * A value of offtime_usec greater than the value of system wide + * "Selective Forced Idle" window will cause failure. + */ +int sfi_set_class_offtime(sfi_class_id_t class_id, uint64_t offtime_usec); + +int sfi_get_class_offtime(sfi_class_id_t class_id, uint64_t *offtime_usec); + +/* + * The sfi_process_set_class() can be used to place all the threads in + * a process ID pid in the "Selective Forced Idle" managed class, or clear + * the managed state. + */ + +int sfi_process_set_flags(pid_t pid, uint32_t flags); + +int sfi_process_get_flags(pid_t pid, uint32_t *flags); + +#endif /* !KERNEL */ + +#if PRIVATE + +/* This is the private system call interface between Libsyscall and xnu */ +#define SFI_CTL_OPERATION_SFI_SET_WINDOW 0x00000001 +#define SFI_CTL_OPERATION_SFI_GET_WINDOW 0x00000002 +#define SFI_CTL_OPERATION_SET_CLASS_OFFTIME 0x00000003 +#define SFI_CTL_OPERATION_GET_CLASS_OFFTIME 0x00000004 + +#define SFI_PIDCTL_OPERATION_PID_SET_FLAGS 0x00000001 +#define SFI_PIDCTL_OPERATION_PID_GET_FLAGS 0x00000002 + +int __sfi_ctl(uint32_t operation, uint32_t sfi_class, uint64_t time, uint64_t *out_time); +int __sfi_pidctl(uint32_t operation, pid_t pid, uint32_t sfi_flags, uint32_t *out_sfi_flags); + +#endif /* PRIVATE */ + +#endif /* _SYS_SFI_H_ */ diff --git a/bsd/sys/signal.h b/bsd/sys/signal.h index 3aab026e5..2ff005609 100644 --- a/bsd/sys/signal.h +++ b/bsd/sys/signal.h @@ -143,11 +143,15 @@ #include #include + +#ifndef KERNEL +#include +#endif /* KERNEL */ + #include #include #include -#include #include #include #include @@ -162,6 +166,7 @@ union sigval { #define SIGEV_SIGNAL 1 /* aio - completion notification */ #define SIGEV_THREAD 3 /* [NOTIMP] [RTS] call notification function */ +#ifndef KERNEL struct sigevent { int sigev_notify; /* Notification type */ int sigev_signo; /* Signal number */ @@ -169,6 +174,7 @@ struct sigevent { void (*sigev_notify_function)(union sigval); /* Notification function */ pthread_attr_t *sigev_notify_attributes; /* Notification attributes */ }; +#endif /* KERNEL */ #ifdef BSD_KERNEL_PRIVATE diff --git a/bsd/sys/signalvar.h b/bsd/sys/signalvar.h index 63f37d442..cd5c2d133 100644 --- a/bsd/sys/signalvar.h +++ b/bsd/sys/signalvar.h @@ -104,9 +104,9 @@ struct sigacts { * values should be non-intersecting with values defined in signal.h, e.g.: * SIG_IGN, SIG_DFL, SIG_ERR, SIG_IGN. */ -#define KERN_SIG_CATCH (void (*)(int))2 -#define KERN_SIG_HOLD (void (*)(int))3 -#define KERN_SIG_WAIT (void (*)(int))4 +#define KERN_SIG_CATCH CAST_USER_ADDR_T(2) +#define KERN_SIG_HOLD CAST_USER_ADDR_T(3) +#define KERN_SIG_WAIT CAST_USER_ADDR_T(4) /* * get signal action for process and signal; currently only for current process diff --git a/bsd/sys/socket.h b/bsd/sys/socket.h index 4c4ec1865..d58bdd7b5 100644 --- a/bsd/sys/socket.h +++ b/bsd/sys/socket.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -169,6 +169,10 @@ struct so_tcdbg { #define SO_WANTMORE 0x4000 /* APPLE: Give hint when more data ready */ #define SO_WANTOOBFLAG 0x8000 /* APPLE: Want OOB in MSG_FLAG on receive */ +#ifdef PRIVATE +#define SO_NOWAKEFROMSLEEP 0x10000 /* Don't wake for traffic to this socket */ +#endif + #endif /* (!__APPLE__) */ #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ @@ -204,6 +208,7 @@ struct so_tcdbg { #define SO_RESTRICT_DENY_IN 0x1 /* deny inbound (trapdoor) */ #define SO_RESTRICT_DENY_OUT 0x2 /* deny outbound (trapdoor) */ #define SO_RESTRICT_DENY_CELLULAR 0x4 /* deny use of cellular (trapdoor) */ +#define SO_RESTRICT_DENY_EXPENSIVE 0x8 /* deny use of expensive if (trapdoor)*/ #endif /* PRIVATE */ #define SO_RANDOMPORT 0x1082 /* APPLE: request local port randomization */ #define SO_NP_EXTENSIONS 0x1083 /* To turn off some POSIX behavior */ @@ -319,8 +324,17 @@ struct so_tcdbg { #define SO_DELEGATED 0x1107 /* set socket as delegate (pid_t) */ #define SO_DELEGATED_UUID 0x1108 /* set socket as delegate (uuid_t) */ +#define SO_NECP_ATTRIBUTES 0x1109 /* NECP socket attributes (domain, account, etc.) */ +#define SO_CFIL_SOCK_ID 0x1110 /* get content filter socket ID (cfil_sock_id_t) */ +#if MPTCP +#define SO_MPTCP_FASTJOIN 0x1111 /* fast join MPTCP */ +#endif /* MPTCP */ +#define SO_AWDL_UNRESTRICTED 0x1113 /* try to use AWDL in restricted mode */ #endif /* PRIVATE */ + +#define SO_NUMRCVPKT 0x1112 /* number of datagrams in receive socket buffer */ + #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ /* @@ -659,7 +673,29 @@ struct msghdr { int msg_flags; /* [XSI] flags on received message */ }; -#ifdef KERNEL +#ifdef PRIVATE +/* + * Extended version for sendmsg_x() and recvmsg_x() calls + * + * For recvmsg_x(), the size of the data received is given by the field + * msg_datalen. + * + * For sendmsg_x(), the size of the data to send is given by the length of + * the iovec array -- like sendmsg(). The field msg_datalen is ignored. + */ +struct msghdr_x { + void *msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + struct iovec *msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + void *msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ + size_t msg_datalen; /* byte length of buffer in msg_iov */ +}; +#endif /* PRIVATE */ + +#ifdef XNU_KERNEL_PRIVATE /* * In-kernel representation of "struct msghdr" from * userspace. Has enough precision for 32-bit or @@ -706,7 +742,56 @@ struct user32_msghdr { int msg_flags; /* flags on received message */ }; -#endif // KERNEL +/* + * In-kernel representation of "struct msghdr_x" from + * userspace. Has enough precision for 32-bit or + * 64-bit clients, but does not need to be packed. + */ + +struct user_msghdr_x { + user_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + user_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ + size_t msg_datalen; /* byte length of buffer in msg_iov */ +}; + +/* + * LP64 user version of struct msghdr_x + * WARNING - keep in sync with struct msghdr_x + */ + +struct user64_msghdr_x { + user64_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + user64_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user64_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ + user64_size_t msg_datalen; /* byte length of buffer in msg_iov */ +}; + +/* + * ILP32 user version of struct msghdr_x + * WARNING - keep in sync with struct msghdr_x + */ + +struct user32_msghdr_x { + user32_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + user32_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user32_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ + user32_size_t msg_datalen; /* byte length of buffer in msg_iov */ +}; + +#endif /* XNU_KERNEL_PRIVATE */ #define MSG_OOB 0x1 /* process out-of-band data */ #define MSG_PEEK 0x2 /* peek at incoming message */ @@ -738,6 +823,7 @@ struct user32_msghdr { #define MSG_NEEDSA 0x10000 /* Fail receive if socket address cannot be allocated */ #ifdef KERNEL_PRIVATE #define MSG_NBIO 0x20000 /* FIONBIO mode, used by fifofs */ +#define MSG_SKIPCFIL 0x40000 /* skip pass content filter */ #endif #ifdef KERNEL #define MSG_USEUPCALL 0x80000000 /* Inherit upcall in sock_accept */ @@ -1059,6 +1145,28 @@ struct kev_netpolicy_ifdenied { struct netpolicy_event_data ev_data; }; +/* + * Socket subclass (of KEV_NETWORK_CLASS) + */ +#define KEV_SOCKET_SUBCLASS 4 + +/* + * Events for KEV_SOCKET_SUBCLASS of KEV_NETWORK_CLASS + */ +#define KEV_SOCKET_CLOSED 1 /* completely closed by protocol */ + +/* + * Common structure for KEV_SOCKET_SUBCLASS + */ +struct kev_socket_event_data { + struct sockaddr_storage kev_sockname; + struct sockaddr_storage kev_peername; +}; + +struct kev_socket_closed { + struct kev_socket_event_data ev_data; +}; + #ifndef KERNEL __BEGIN_DECLS extern int connectx(int s, struct sockaddr *, socklen_t, struct sockaddr *, @@ -1066,6 +1174,74 @@ extern int connectx(int s, struct sockaddr *, socklen_t, struct sockaddr *, extern int disconnectx(int s, associd_t, connid_t); extern int peeloff(int s, associd_t); extern int socket_delegate(int, int, int, pid_t); + +/* + * recvmsg_x() is a system call similar to recvmsg(2) to receive + * several datagrams at once in the array of message headers "msgp". + * + * recvmsg_x() can be used only with protocols handlers that have been specially + * modified to handle sending and receiving several datagrams at once. + * + * The size of the array "msgp" is given by the argument "cnt". + * + * The "flags" arguments supports only the value MSG_DONTWAIT. + * + * Each member of "msgp" array is of type "struct msghdr_x". + * + * The "msg_iov" and "msg_iovlen" are input parameters that describe where to + * store a datagram in a scatter gather locations of buffers -- see recvmsg(2). + * On output the field "msg_datalen" gives the length of the received datagram. + * + * The field "msg_flags" must be set to zero on input. On output, "msg_flags" + * may have MSG_TRUNC set to indicate the trailing portion of the datagram was + * discarded because the datagram was larger than the buffer supplied. + * recvmsg_x() returns as soon as a datagram is truncated. + * + * recvmsg_x() may return with less than "cnt" datagrams received based on + * the low water mark and the amount of data pending in the socket buffer. + * + * Address and ancillary data are not supported so the following fields + * must be set to zero on input: + * "msg_name", "msg_namelen", "msg_control" and "msg_controllen". + * + * recvmsg_x() returns the number of datagrams that have been received , + * or -1 if an error occurred. + * + * NOTE: This a private system call, the API is subject to change. + */ +ssize_t recvmsg_x(int s, const struct msghdr_x *msgp, u_int cnt, int flags); + +/* + * sendmsg_x() is a system call similar to send(2) to send + * several datagrams at once in the array of message headers "msgp". + * + * sendmsg_x() can be used only with protocols handlers that have been specially + * modified to support to handle sending and receiving several datagrams at once. + * + * The size of the array "msgp" is given by the argument "cnt". + * + * The "flags" arguments supports only the value MSG_DONTWAIT. + * + * Each member of "msgp" array is of type "struct msghdr_x". + * + * The "msg_iov" and "msg_iovlen" are input parameters that specify the + * data to be sent in a scatter gather locations of buffers -- see sendmsg(2). + * + * sendmsg_x() fails with EMSGSIZE if the sum of the length of the datagrams + * is greater than the high water mark. + * + * Address and ancillary data are not supported so the following fields + * must be set to zero on input: + * "msg_name", "msg_namelen", "msg_control" and "msg_controllen". + * + * The field "msg_flags" and "msg_datalen" must be set to zero on input. + * + * sendmsg_x() returns the number of datagrams that have been sent, + * or -1 if an error occurred. + * + * NOTE: This a private system call, the API is subject to change. + */ +ssize_t sendmsg_x(int s, const struct msghdr_x *msgp, u_int cnt, int flags); __END_DECLS #endif /* !KERNEL */ #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ diff --git a/bsd/sys/socketvar.h b/bsd/sys/socketvar.h index 423634cca..33ddf98e2 100644 --- a/bsd/sys/socketvar.h +++ b/bsd/sys/socketvar.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -201,7 +201,8 @@ struct socket { void *sb_upcallarg; /* Arg for above */ u_int32_t sb_wantlock; /* # of SB_LOCK waiters */ u_int32_t sb_waiters; /* # of data/space waiters */ - u_int32_t _reserved[2]; /* for future use */ + thread_t sb_cfil_thread; /* content filter thread */ + u_int32_t sb_cfil_refs; /* # of nested calls */ } so_rcv, so_snd; #define SB_MAX (8192*1024) /* default for max chars in sockbuf */ #define LOW_SB_MAX (2*9*1024) /* lower limit on max socket buffer @@ -238,41 +239,42 @@ struct socket { struct mbuf *so_tail; struct socket_filter_entry *so_filt; /* NKE hook */ u_int32_t so_flags; /* Flags */ -#define SOF_NOSIGPIPE 0x1 -#define SOF_NOADDRAVAIL 0x2 /* EADDRNOTAVAIL if src addr is gone */ -#define SOF_PCBCLEARING 0x4 /* pru_disconnect done; don't call pru_detach */ -#define SOF_DEFUNCT 0x8 /* socket marked as inactive */ -#define SOF_CLOSEWAIT 0x10 /* blocked in close awaiting some events */ -#define SOF_REUSESHAREUID 0x40 /* Allows SO_REUSEADDR/SO_REUSEPORT - for multiple so_uid */ -#define SOF_MULTIPAGES 0x80 /* jumbo clusters may be used for sosend */ -#define SOF_ABORTED 0x100 /* soabort was already called once */ -#define SOF_OVERFLOW 0x200 /* socket was dropped as overflow of listen q */ -#ifdef __APPLE_API_PRIVATE -#define SOF_NOTIFYCONFLICT 0x400 /* notify that a bind was done on a +#define SOF_NOSIGPIPE 0x00000001 +#define SOF_NOADDRAVAIL 0x00000002 /* EADDRNOTAVAIL if src addr is gone */ +#define SOF_PCBCLEARING 0x00000004 /* pru_disconnect done; don't call pru_detach */ +#define SOF_DEFUNCT 0x00000008 /* socket marked as inactive */ +#define SOF_CLOSEWAIT 0x00000010 /* blocked in close awaiting some events */ +#define SOF_REUSESHAREUID 0x00000040 /* Allows SO_REUSEADDR/SO_REUSEPORT + for multiple so_uid */ +#define SOF_MULTIPAGES 0x00000080 /* jumbo clusters may be used for sosend */ +#define SOF_ABORTED 0x00000100 /* soabort was already called once */ +#define SOF_OVERFLOW 0x00000200 /* socket was dropped as overflow of listen q */ +#define SOF_NOTIFYCONFLICT 0x00000400 /* notify that a bind was done on a port already in use */ -#endif -#define SOF_UPCALLCLOSEWAIT 0x800 /* block close until upcall returns */ -#define SOF_BINDRANDOMPORT 0x1000 /* Randomized port number for bind */ -#define SOF_NPX_SETOPTSHUT 0x2000 /* Non POSIX extension to allow +#define SOF_UPCALLCLOSEWAIT 0x00000800 /* block close until upcall returns */ +#define SOF_BINDRANDOMPORT 0x00001000 /* Randomized port number for bind */ +#define SOF_NPX_SETOPTSHUT 0x00002000 /* Non POSIX extension to allow setsockopt(2) after shut down */ -#define SOF_RECV_TRAFFIC_CLASS 0x4000 /* Receive TC as ancillary data */ -#define SOF_NODEFUNCT 0x8000 /* socket cannot be defunct'd */ -#define SOF_PRIVILEGED_TRAFFIC_CLASS 0x10000 /* traffic class is privileged */ -#define SOF_SUSPENDED 0x20000 /* i/f output queue is suspended */ -#define SOF_INCOMP_INPROGRESS 0x40000 /* incomp socket is being processed */ -#define SOF_NOTSENT_LOWAT 0x80000 /* A different lowat on not sent +#define SOF_RECV_TRAFFIC_CLASS 0x00004000 /* Receive TC as ancillary data */ +#define SOF_NODEFUNCT 0x00008000 /* socket cannot be defunct'd */ +#define SOF_PRIVILEGED_TRAFFIC_CLASS 0x00010000 /* traffic class is privileged */ +#define SOF_SUSPENDED 0x00020000 /* i/f output queue is suspended */ +#define SOF_INCOMP_INPROGRESS 0x00040000 /* incomp socket is being processed */ +#define SOF_NOTSENT_LOWAT 0x00080000 /* A different lowat on not sent data has been set */ -#define SOF_KNOTE 0x100000 /* socket is on the EV_SOCK klist */ -#define SOF_USELRO 0x200000 /* TCP must use LRO on these sockets */ -#define SOF_ENABLE_MSGS 0x400000 /* TCP must enable message delivery */ -#define SOF_FLOW_DIVERT 0x800000 /* Flow Divert is enabled */ -#define SOF_MP_SUBFLOW 0x1000000 /* is a multipath subflow socket */ -#define SOF_MPTCP_TRUE 0x2000000 /* Established e2e MPTCP connection */ -#define SOF_MPTCP_CLIENT 0x4000000 /* Only client starts addtnal flows */ -#define SOF_MP_SEC_SUBFLOW 0x8000000 /* Set up secondary flow */ +#define SOF_KNOTE 0x00100000 /* socket is on the EV_SOCK klist */ +#define SOF_USELRO 0x00200000 /* TCP must use LRO on these sockets */ +#define SOF_ENABLE_MSGS 0x00400000 /* TCP must enable message delivery */ +#define SOF_FLOW_DIVERT 0x00800000 /* Flow Divert is enabled */ +#define SOF_MP_SUBFLOW 0x01000000 /* is a multipath subflow socket */ +#define SOF_MPTCP_TRUE 0x02000000 /* Established e2e MPTCP connection */ +#define SOF_MPTCP_CLIENT 0x04000000 /* Only client starts addtnal flows */ +#define SOF_MP_SEC_SUBFLOW 0x08000000 /* Set up secondary flow */ #define SOF_MP_TRYFAILOVER 0x10000000 /* Failing subflow */ #define SOF_DELEGATED 0x20000000 /* on behalf of another process */ +#define SOF_MPTCP_FASTJOIN 0x40000000 /* fast join support */ +#define SOF_CONTENT_FILTER 0x80000000 /* Content filter enabled */ + uint32_t so_upcallusecount; /* number of upcalls in progress */ int so_usecount; /* refcounting of socket use */; int so_retaincnt; @@ -287,7 +289,6 @@ struct socket { int next_lock_lr; void *unlock_lr[SO_LCKDBG_MAX]; /* unlocking caller history */ int next_unlock_lr; - void *reserved; /* reserved for future use */ struct label *so_label; /* MAC label for socket */ struct label *so_peerlabel; /* cached MAC label for socket peer */ @@ -302,6 +303,9 @@ struct socket { struct msg_state *so_msg_state; /* unordered snd/rcv state */ struct flow_divert_pcb *so_fd_pcb; /* Flow Divert control block */ + + struct cfil_info *so_cfil; + u_int32_t so_eventmask; /* event mask */ u_int64_t e_upid; /* upid of the effective owner */ @@ -309,9 +313,16 @@ struct socket { uuid_t last_uuid; /* uuid of most recent accessor */ uuid_t e_uuid; /* uuid of effective owner */ + uuid_t so_vuuid; /* UUID of the Voucher originator */ int32_t so_policy_gencnt; /* UUID policy gencnt */ u_int32_t so_ifdenied_notifies; /* # of notifications generated */ + + u_int32_t so_flags1; +#define SOF1_POST_FALLBACK_SYNC 0x00000001 /* fallback to TCP */ +#define SOF1_AWDL_PRIVILEGED 0x00000002 +#define SOF1_IF_2KCL 0x00000004 /* interface prefers 2 KB clusters */ +#define SOF1_DEFUNCTINPROG 0x00000008 }; /* Control message accessor in mbufs */ @@ -394,8 +405,8 @@ struct xsocket { short so_linger; short so_state; _XSOCKET_PTR(caddr_t) so_pcb; /* another convenient handle */ - int xso_protocol; - int xso_family; + int xso_protocol; + int xso_family; short so_qlen; short so_incqlen; short so_qlimit; @@ -403,8 +414,8 @@ struct xsocket { u_short so_error; pid_t so_pgid; u_int32_t so_oobmark; - struct xsockbuf so_rcv; - struct xsockbuf so_snd; + struct xsockbuf so_rcv; + struct xsockbuf so_snd; uid_t so_uid; /* XXX */ }; @@ -437,6 +448,9 @@ struct xsocket64 { #define XSO_STATS 0x008 #define XSO_INPCB 0x010 #define XSO_TCPCB 0x020 +#define XSO_KCREG 0x040 +#define XSO_KCB 0x080 +#define XSO_EVT 0x100 struct xsocket_n { u_int32_t xso_len; /* length of this structure */ @@ -447,8 +461,8 @@ struct xsocket_n { short so_linger; short so_state; u_int64_t so_pcb; /* another convenient handle */ - int xso_protocol; - int xso_family; + int xso_protocol; + int xso_family; short so_qlen; short so_incqlen; short so_qlimit; @@ -457,6 +471,8 @@ struct xsocket_n { pid_t so_pgid; u_int32_t so_oobmark; uid_t so_uid; /* XXX */ + pid_t so_last_pid; + pid_t so_e_pid; }; struct xsockbuf_n { @@ -539,12 +555,16 @@ struct kextcb { #define SO_FILT_HINT_MPFAILOVER 0x00008000 /* multipath failover */ #define SO_FILT_HINT_MPSTATUS 0x00010000 /* multipath status */ #define SO_FILT_HINT_MUSTRST 0x00020000 /* must send RST and close */ +#define SO_FILT_HINT_MPFASTJ 0x00040000 /* can do MPTCP fast join */ +#define SO_FILT_HINT_DELETEOK 0x00100000 /* Ok to delete socket */ +#define SO_FILT_HINT_MPCANTRCVMORE 0x00200000 /* MPTCP DFIN Received */ #define SO_FILT_HINT_BITS \ "\020\1LOCKED\2CONNRESET\3CANTRCVMORE\4CANTSENDMORE\5TIMEOUT" \ "\6NOSRCADDR\7IFDENIED\10SUSPEND\11RESUME\12KEEPALIVE\13AWTIMO" \ - "\14ARTIMO\15CONNECTED\16DISCONNECTED\17CONNINFO_UPDATED" \ - "\20MPFAILOVER\21MPSTATUS\22MUSTRST" + "\14ARTIMO\15CONNECTED\16DISCONNECTED\17CONNINFO_UPDATED" \ + "\20MPFAILOVER\21MPSTATUS\22MUSTRST\23MPFASTJ\24DELETEOK" \ + "\25MPCANTRCVMORE" /* Mask for hints that have corresponding kqueue events */ #define SO_FILT_HINT_EV \ @@ -614,9 +634,10 @@ struct sf_buf { struct so_procinfo { pid_t spi_pid; pid_t spi_epid; + uuid_t spi_uuid; + uuid_t spi_euuid; }; -extern int maxsockets; extern u_int32_t sb_max; extern so_gen_t so_gencnt; extern int socket_debug; @@ -625,6 +646,7 @@ extern int sosendjcl_ignore_capab; extern int sodefunctlog; extern int sothrottlelog; extern int sorestrictrecv; +extern int sorestrictsend; extern int somaxconn; extern uint32_t tcp_autosndbuf_max; extern u_int32_t sotcdb; @@ -672,6 +694,11 @@ extern int soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc); extern void sorwakeup(struct socket *so); extern int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags); +extern int sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uio, + u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags); +extern int soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uio, + u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); +extern void sonullevent(struct socket *so, void *arg, uint32_t hint); __END_DECLS #ifdef BSD_KERNEL_PRIVATE @@ -762,6 +789,7 @@ extern int sodisconnectlocked(struct socket *so); extern void soreference(struct socket *so); extern void sodereference(struct socket *so); extern void somultipages(struct socket *, boolean_t); +extern void soif2kcl(struct socket *, boolean_t); extern int sosetdefunct(struct proc *, struct socket *, int level, boolean_t); extern int sodefunct(struct proc *, struct socket *, int level); extern void sohasoutofband(struct socket *so); @@ -769,6 +797,7 @@ extern void sodisconnectwakeup(struct socket *so); extern int soisthrottled(struct socket *so); extern int soisprivilegedtraffic(struct socket *so); extern int soissrcbackground(struct socket *so); +extern int soissrcrealtime(struct socket *so); extern int solisten(struct socket *so, int backlog); extern struct socket *sodropablereq(struct socket *head); extern int socket_lock(struct socket *so, int refcount); @@ -784,6 +813,7 @@ extern int sogetoptlock(struct socket *so, struct sockopt *sopt, int); extern int sosetoptlock(struct socket *so, struct sockopt *sopt, int); extern int soshutdown(struct socket *so, int how); extern int soshutdownlock(struct socket *so, int how); +extern int soshutdownlock_final(struct socket *so, int how); extern void sotoxsocket(struct socket *so, struct xsocket *xso); extern void sotoxsocket64(struct socket *so, struct xsocket64 *xso); extern int sosendallatonce(struct socket *so); @@ -844,6 +874,7 @@ extern void so_set_lro(struct socket*, int); extern int so_isdstlocal(struct socket *); extern void so_recv_data_stat(struct socket *, struct mbuf *, size_t); +extern void so_inc_recv_data_stat(struct socket *, size_t, size_t, uint32_t); extern int so_wait_for_if_feedback(struct socket *); extern int msgq_sbspace(struct socket *so, struct mbuf *control); extern int soopt_getm(struct sockopt *sopt, struct mbuf **mp); @@ -854,10 +885,31 @@ extern boolean_t so_cache_timer(void); extern void mptcp_preproc_sbdrop(struct mbuf *, unsigned int); extern void mptcp_postproc_sbdrop(struct mbuf *, u_int64_t, u_int32_t, u_int32_t); -extern void mptcp_adj_rmap(struct socket *, struct mbuf *); +extern int mptcp_adj_rmap(struct socket *, struct mbuf *); extern void netpolicy_post_msg(uint32_t, struct netpolicy_event_data *, uint32_t); +extern void socket_post_kev_msg(uint32_t, struct kev_socket_event_data *, + uint32_t); +extern void socket_post_kev_msg_closed(struct socket *); +/* + * Socket operation routines. + * These routines are called by the routines in + * sys_socket.c or from a system process, and + * implement the semantics of socket operations by + * switching out to the protocol specific routines. + */ +extern void postevent(struct socket *, struct sockbuf *, int); +extern void evsofree(struct socket *); + +extern int tcp_notsent_lowat_check(struct socket *so); + +extern user_ssize_t uio_array_resid(struct uio **, u_int); + +void sotoxsocket_n(struct socket *, struct xsocket_n *); +void sbtoxsockbuf_n(struct sockbuf *, struct xsockbuf_n *); +void sbtoxsockstat_n(struct socket *, struct xsockstat_n *); + __END_DECLS #endif /* BSD_KERNEL_PRIVATE */ #endif /* KERNEL_PRIVATE */ diff --git a/bsd/sys/sockio.h b/bsd/sys/sockio.h index 8de12db1d..d020b9573 100644 --- a/bsd/sys/sockio.h +++ b/bsd/sys/sockio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -105,12 +105,6 @@ #define SIOCSIFMETRIC _IOW('i', 24, struct ifreq) /* set IF metric */ #define SIOCDIFADDR _IOW('i', 25, struct ifreq) /* delete IF addr */ #define SIOCAIFADDR _IOW('i', 26, struct ifaliasreq)/* add/chg IF alias */ -#define SIOCGETVIFCNT _IOWR('r', 27, struct sioc_vif_req)/* get vif pkt cnt */ -#define SIOCGETSGCNT _IOWR('r', 28, struct sioc_sg_req) /* get s,g pkt cnt */ -#define SIOCALIFADDR _IOW('i', 29, struct if_laddrreq) /* add IF addr */ -#define SIOCGLIFADDR _IOWR('i', 30, struct if_laddrreq) /* get IF addr */ -#define SIOCDLIFADDR _IOW('i', 31, struct if_laddrreq) /* delete IF addr */ - #define SIOCGIFADDR _IOWR('i', 33, struct ifreq) /* get ifnet address */ #define SIOCGIFDSTADDR _IOWR('i', 34, struct ifreq) /* get p-p address */ @@ -150,8 +144,6 @@ #define SIOCGIFPSRCADDR _IOWR('i', 63, struct ifreq) /* get gif psrc addr */ #define SIOCGIFPDSTADDR _IOWR('i', 64, struct ifreq) /* get gif pdst addr */ #define SIOCDIFPHYADDR _IOW('i', 65, struct ifreq) /* delete gif addrs */ -#define SIOCSLIFPHYADDR _IOW('i', 66, struct if_laddrreq) /* set gif addrs */ -#define SIOCGLIFPHYADDR _IOWR('i', 67, struct if_laddrreq) /* get gif addrs */ #define SIOCGIFDEVMTU _IOWR('i', 68, struct ifreq) /* get if ifdevmtu */ #define SIOCSIFALTMTU _IOW('i', 69, struct ifreq) /* set if alternate mtu */ @@ -258,5 +250,9 @@ #define SIOCGIFDELEGATE _IOWR('i', 157, struct ifreq) #define SIOCGIFLLADDR _IOWR('i', 158, struct ifreq) /* get link level addr */ #define SIOCGIFTYPE _IOWR('i', 159, struct ifreq) /* get interface type */ +#define SIOCGIFEXPENSIVE _IOWR('i', 160, struct ifreq) /* get interface expensive flag */ +#define SIOCSIFEXPENSIVE _IOWR('i', 161, struct ifreq) /* mark interface expensive */ +#define SIOCGIF2KCL _IOWR('i', 162, struct ifreq) /* interface prefers 2 KB clusters */ +#define SIOCSIF2KCL _IOWR('i', 163, struct ifreq) #endif /* PRIVATE */ #endif /* !_SYS_SOCKIO_H_ */ diff --git a/bsd/sys/spawn.h b/bsd/sys/spawn.h index 85377d5f0..edd7020d6 100644 --- a/bsd/sys/spawn.h +++ b/bsd/sys/spawn.h @@ -60,12 +60,10 @@ #define POSIX_SPAWN_START_SUSPENDED 0x0080 #ifdef PRIVATE #define _POSIX_SPAWN_DISABLE_ASLR 0x0100 -/* unused 0x0200 */ -/* for compat sake */ -#define POSIX_SPAWN_OSX_TALAPP_START 0x0400 -#define POSIX_SPAWN_OSX_WIDGET_START 0x0800 -#define POSIX_SPAWN_OSX_DBCLIENT_START 0x0800 /* not a bug, same as widget just rename */ -#define POSIX_SPAWN_OSX_RESVAPP_START 0x1000 /* reserved for app start usages */ +#define _POSIX_SPAWN_NANO_ALLOCATOR 0x0200 +/* unused 0x0400 */ +/* unused 0x0800 */ +/* unused 0x1000 */ #define _POSIX_SPAWN_ALLOW_DATA_EXEC 0x2000 #endif /* PRIVATE */ #define POSIX_SPAWN_CLOEXEC_DEFAULT 0x4000 diff --git a/bsd/sys/spawn_internal.h b/bsd/sys/spawn_internal.h index c96434f3a..7054f6bb9 100644 --- a/bsd/sys/spawn_internal.h +++ b/bsd/sys/spawn_internal.h @@ -135,23 +135,23 @@ typedef struct _posix_spawnattr { int psa_apptype; /* app type and process spec behav */ uint64_t psa_cpumonitor_percent; /* CPU usage monitor percentage */ uint64_t psa_cpumonitor_interval; /* CPU usage monitor interval, in seconds */ - /* - * TODO: cleanup - see . psa_ports is a pointer, - * meaning that the fields following differ in alignment between 32 and - * 64-bit architectures. All pointers (existing and new) should therefore - * be placed at the end; changing this now, however, would currently break - * some legacy dependencies. The radar will be used to track resolution when - * appropriate. - */ + uint64_t psa_coalitionid; /* coalition to spawn into */ short psa_jetsam_flags; /* jetsam flags */ short short_padding; /* Padding for alignment issues */ int psa_priority; /* jetsam relative importance */ int psa_high_water_mark; /* jetsam resident page count limit */ int int_padding; /* Padding for alignment issues */ - /* MAC policy-specific extensions. */ + + uint64_t psa_qos_clamp; /* QoS Clamp to set on the new process */ + + /* + * NOTE: Extensions array pointers must stay at the end so that + * everything above this point stays the same size on different bitnesses + * see + */ _posix_spawn_port_actions_t psa_ports; /* special/exception ports */ - _posix_spawn_mac_policy_extensions_t psa_mac_extensions; + _posix_spawn_mac_policy_extensions_t psa_mac_extensions; /* MAC policy-specific extensions. */ } *_posix_spawnattr_t; /* @@ -161,6 +161,7 @@ typedef struct _posix_spawnattr { #define POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY 0x1 #define POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND 0x2 +#define POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL 0x4 /* * Deprecated posix_spawn psa_flags values @@ -208,6 +209,12 @@ typedef struct _posix_spawnattr { #define POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND 0x00000500 #define POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE 0x00000600 +#define POSIX_SPAWN_PROC_CLAMP_NONE 0x00000000 +#define POSIX_SPAWN_PROC_CLAMP_UTILITY 0x00000001 +#define POSIX_SPAWN_PROC_CLAMP_BACKGROUND 0x00000002 +#define POSIX_SPAWN_PROC_CLAMP_MAINTENANCE 0x00000003 +#define POSIX_SPAWN_PROC_CLAMP_LAST 0x00000004 + /* * Allowable posix_spawn() file actions */ diff --git a/bsd/sys/stat.h b/bsd/sys/stat.h index 2f6dbcbff..3614aaac2 100644 --- a/bsd/sys/stat.h +++ b/bsd/sys/stat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -469,7 +469,11 @@ extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp); */ /* #define UF_NOUNLINK 0x00000010 */ /* file may not be removed or renamed */ #define UF_COMPRESSED 0x00000020 /* file is hfs-compressed */ -#define UF_TRACKED 0x00000040 /* file renames and deletes are tracked */ + +/* UF_TRACKED is used for dealing with document IDs. We no longer issue + notifications for deletes or renames for files which have UF_TRACKED set. */ +#define UF_TRACKED 0x00000040 + /* Bits 0x0080 through 0x4000 are currently undefined. */ #define UF_HIDDEN 0x00008000 /* hint that this item should not be */ /* displayed in a GUI */ @@ -480,6 +484,7 @@ extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp); #define SF_ARCHIVED 0x00010000 /* file is archived */ #define SF_IMMUTABLE 0x00020000 /* file may not be changed */ #define SF_APPEND 0x00040000 /* writes to file may only append */ +#define SF_RESTRICTED 0x00080000 /* restricted access */ /* * The following two bits are reserved for FreeBSD. They are not @@ -513,6 +518,12 @@ int stat(const char *, struct stat *) __DARWIN_INODE64(stat); int mknod(const char *, mode_t, dev_t); mode_t umask(mode_t); +#if __DARWIN_C_LEVEL >= 200809L +int fchmodat(int, const char *, mode_t, int) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int fstatat(int, const char *, struct stat *, int) __DARWIN_INODE64(fstatat) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int mkdirat(int, const char *, mode_t) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +#endif + #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #include diff --git a/bsd/machine/setjmp.h b/bsd/sys/stdio.h similarity index 75% rename from bsd/machine/setjmp.h rename to bsd/sys/stdio.h index b349878d0..f90426f43 100644 --- a/bsd/machine/setjmp.h +++ b/bsd/sys/stdio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,12 +25,23 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved - */ -#ifndef _MACHINE_SETJMP_H_ -#define _MACHINE_SETJMP_H_ -#include +#ifndef _SYS_STDIO_H_ +#define _SYS_STDIO_H_ + +#include + +#ifndef KERNEL +#if __DARWIN_C_LEVEL >= 200809L +#include + +__BEGIN_DECLS + +int renameat(int, const char *, int, const char *) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); + +__END_DECLS + +#endif /* __DARWIN_C_LEVEL >= 200809L */ -#endif /* _MACHINE_SETJMP_H_ */ +#endif /* !KERNEL */ +#endif /* _SYS_STDIO_H_ */ diff --git a/bsd/sys/sys_domain.h b/bsd/sys/sys_domain.h index d760373e5..8a12b455c 100644 --- a/bsd/sys/sys_domain.h +++ b/bsd/sys/sys_domain.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005, 2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2005, 2012, 2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,15 +30,12 @@ #ifndef _SYSTEM_DOMAIN_H_ #define _SYSTEM_DOMAIN_H_ - #include #include #include #ifdef KERNEL_PRIVATE -#include -#include -#include +#include #endif /* KERNEL_PRIVATE */ /* Kernel Events Protocol */ @@ -49,67 +46,33 @@ #define AF_SYS_CONTROL 2 /* corresponding sub address type */ /* System family socket address */ -struct sockaddr_sys -{ - u_char ss_len; /* sizeof(struct sockaddr_sys) */ - u_char ss_family; /* AF_SYSTEM */ +struct sockaddr_sys { + u_char ss_len; /* sizeof(struct sockaddr_sys) */ + u_char ss_family; /* AF_SYSTEM */ u_int16_t ss_sysaddr; /* protocol address in AF_SYSTEM */ u_int32_t ss_reserved[7]; /* reserved to the protocol use */ }; - -#ifdef KERNEL_PRIVATE - -/* - * internal structure maintained for each register controller - */ -struct ctl_cb; -struct socket; - -struct kctl -{ - TAILQ_ENTRY(kctl) next; /* controller chain */ - - /* controller information provided when registering */ - char name[MAX_KCTL_NAME]; /* unique nke identifier, provided by DTS */ - u_int32_t id; - u_int32_t reg_unit; - - /* misc communication information */ - u_int32_t flags; /* support flags */ - u_int32_t recvbufsize; /* request more than the default buffer size */ - u_int32_t sendbufsize; /* request more than the default buffer size */ - - /* Dispatch functions */ - ctl_connect_func connect; /* Make contact */ - ctl_disconnect_func disconnect; /* Break contact */ - ctl_send_func send; /* Send data to nke */ - ctl_setopt_func setopt; /* set kctl configuration */ - ctl_getopt_func getopt; /* get kctl configuration */ - ctl_rcvd_func rcvd; /* Notify nke when client reads data */ - - TAILQ_HEAD(, ctl_cb) kcb_head; - u_int32_t lastunit; +#ifdef PRIVATE +struct xsystmgen { + u_int32_t xg_len; /* length of this structure */ + u_int32_t xg_count; /* number of PCBs at this time */ + u_int64_t xg_gen; /* generation count at this time */ + u_int64_t xg_sogen; /* current socket generation count */ }; +#endif /* PRIVATE */ -struct ctl_cb { - TAILQ_ENTRY(ctl_cb) next; /* controller chain */ - lck_mtx_t *mtx; - struct socket *so; /* controlling socket */ - struct kctl *kctl; /* back pointer to controller */ - void *userdata; - u_int32_t unit; - u_int32_t usecount; -}; +#ifdef KERNEL_PRIVATE extern struct domain *systemdomain; +SYSCTL_DECL(_net_systm); + /* built in system domain protocols init function */ __BEGIN_DECLS void kern_event_init(struct domain *); void kern_control_init(struct domain *); __END_DECLS - #endif /* KERNEL_PRIVATE */ #endif /* _SYSTEM_DOMAIN_H_ */ diff --git a/bsd/sys/sysctl.h b/bsd/sys/sysctl.h index b566fe453..f224e999a 100644 --- a/bsd/sys/sysctl.h +++ b/bsd/sys/sysctl.h @@ -83,8 +83,10 @@ #include #include #else +#ifndef XNU_KERNEL_PRIVATE #include #endif +#endif #include #include @@ -110,14 +112,12 @@ * type given below. Each sysctl level defines a set of name/type * pairs to be used by sysctl(1) in manipulating the subsystem. * - * When declaring new sysctl names, unless your sysctl is callable - * from the paging path, please use the CTLFLAG_LOCKED flag in the + * When declaring new sysctl names, use the CTLFLAG_LOCKED flag in the * type to indicate that all necessary locking will be handled * within the sysctl. * * Any sysctl defined without CTLFLAG_LOCKED is considered legacy - * and will be protected by both wiring the user process pages and, - * if it is a 32 bit legacy KEXT, by the obsolete kernel funnel. + * and will be protected by a global mutex. * * Note: This is not optimal, so it is best to handle locking * yourself, if you are able to do so. A simple design @@ -295,11 +295,43 @@ __END_DECLS #else #define SYSCTL_LINKER_SET_ENTRY(a, b) #endif +/* + * Macros to define sysctl entries. Which to use? Pure data that are + * returned without modification, SYSCTL_ is for you, like + * SYSCTL_QUAD for a 64-bit value. When you want to run a handler of your + * own, SYSCTL_PROC. + * + * parent: parent in name hierarchy (e.g. _kern for "kern") + * nbr: ID. Almost certainly OID_AUTO ("pick one for me") for you. + * name: name for this particular item (e.g. "thesysctl" for "kern.thesysctl") + * kind/access: Control flags (CTLFLAG_*). Some notable options include: + * CTLFLAG_ANYBODY: non-root users allowed + * CTLFLAG_MASKED: don't show in sysctl listing in userland + * CTLFLAG_LOCKED: does own locking (no additional protection needed) + * CTLFLAG_KERN: valid inside kernel (best avoided generally) + * CTLFLAG_WR: "new" value accepted + * a1, a2: entry-data, passed to handler (see specific macros) + * Format String: Tells "sysctl" tool how to print data from this entry. + * "A" - string + * "I" - list of integers. "IU" - list of unsigned integers. space-separated. + * "-" - do not print + * "L" - longs, as ints with I + * "P" - pointer + * "Q" - quads + * "S","T" - clock info, see sysctl.c in system_cmds (you probably don't need this) + * Description: unused + */ + + /* This constructs a "raw" MIB oid. */ +#define SYSCTL_STRUCT_INIT(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ + { \ + &sysctl_##parent##_children, { 0 }, \ + nbr, (int)(kind|CTLFLAG_OID2), a1, (int)(a2), #name, handler, fmt, descr, SYSCTL_OID_VERSION, 0 \ + } + #define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ - struct sysctl_oid sysctl_##parent##_##name = { \ - &sysctl_##parent##_children, { 0 }, \ - nbr, (int)(kind|CTLFLAG_OID2), a1, (int)(a2), #name, handler, fmt, descr, SYSCTL_OID_VERSION, 0 }; \ + struct sysctl_oid sysctl_##parent##_##name = SYSCTL_STRUCT_INIT(parent, nbr, name, kind, a1, a2, handler, fmt, descr); \ SYSCTL_LINKER_SET_ENTRY(__sysctl_set, sysctl_##parent##_##name) /* This constructs a node from which other oids can hang. */ @@ -357,7 +389,11 @@ __END_DECLS ptr, sizeof(struct type), sysctl_handle_opaque, \ "S," #type, descr) -/* Oid for a procedure. Specified by a pointer and an arg. */ +/* + * Oid for a procedure. Specified by a pointer and an arg. + * CTLTYPE_* macros can determine how the "sysctl" tool deals with + * input (e.g. converting to int). + */ #define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \ SYSCTL_OID(parent, nbr, name, access, \ ptr, arg, handler, fmt, descr) @@ -541,7 +577,7 @@ SYSCTL_DECL(_user); /* Don't use 13 as it is overloaded with KERN_VNODE */ #define KERN_KDPIDEX 14 #define KERN_KDSETRTCDEC 15 -#define KERN_KDGETENTROPY 16 +#define KERN_KDGETENTROPY 16 /* Obsolescent */ #define KERN_KDWRITETR 17 #define KERN_KDWRITEMAP 18 #define KERN_KDENABLE_BG_TRACE 19 @@ -1088,28 +1124,6 @@ struct user64_loadavg { #ifdef KERNEL -#if DEBUG -/* - * CTL_DEBUG variables. - * - * These are declared as separate variables so that they can be - * individually initialized at the location of their associated - * variable. The loader prevents multiple use by issuing errors - * if a variable is initialized in more than one place. They are - * aggregated into an array in debug_sysctl(), so that it can - * conveniently locate them when querried. If more debugging - * variables are added, they must also be declared here and also - * entered into the array. - */ -struct ctldebug { - char *debugname; /* name of debugging variable */ - int *debugvar; /* pointer to debugging variable */ -}; -extern struct ctldebug debug0, debug1, debug2, debug3, debug4; -extern struct ctldebug debug5, debug6, debug7, debug8, debug9; -extern struct ctldebug debug10, debug11, debug12, debug13, debug14; -extern struct ctldebug debug15, debug16, debug17, debug18, debug19; -#endif /* DEBUG */ #ifdef BSD_KERNEL_PRIVATE extern char machine[]; @@ -1122,36 +1136,9 @@ struct linker_set; void sysctl_register_set(const char *set); void sysctl_unregister_set(const char *set); void sysctl_mib_init(void); -int kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, - size_t *oldlenp, void *newp, size_t newlen); -int userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t old, - size_t *oldlenp, user_addr_t newp, size_t newlen, - size_t *retval); - -/* - * Internal sysctl function calling convention: - * - * (*sysctlfn)(name, namelen, oldval, oldlenp, newval, newlen); - * - * The name parameter points at the next component of the name to be - * interpreted. The namelen parameter is the number of integers in - * the name. - */ -typedef int (sysctlfn) - (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, struct proc *); int sysctl_int(user_addr_t, size_t *, user_addr_t, size_t, int *); -int sysctl_rdint(user_addr_t, size_t *, user_addr_t, int); int sysctl_quad(user_addr_t, size_t *, user_addr_t, size_t, quad_t *); -int sysctl_rdquad(user_addr_t, size_t *, user_addr_t, quad_t); -int sysctl_string(user_addr_t, size_t *, user_addr_t, size_t, char *, int); -int sysctl_trstring(user_addr_t, size_t *, user_addr_t, size_t, char *, int); -int sysctl_rdstring(user_addr_t, size_t *, user_addr_t, char *); -int sysctl_rdstruct(user_addr_t, size_t *, user_addr_t, void *, int); - -/* XXX should be in , but not a real system call */ -struct sysctl_args; -int new_sysctl(struct proc *, struct sysctl_args *); void sysctl_register_all(void); diff --git a/bsd/sys/sysent.h b/bsd/sys/sysent.h index 33c991c14..e69e93b2b 100644 --- a/bsd/sys/sysent.h +++ b/bsd/sys/sysent.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -36,12 +36,15 @@ #ifdef __APPLE_API_PRIVATE typedef int32_t sy_call_t(struct proc *, void *, int *); -typedef void sy_munge_t(const void *, void *); +#if CONFIG_REQUIRES_U32_MUNGING +typedef void sy_munge_t(void *); +#endif struct sysent { /* system call table */ sy_call_t *sy_call; /* implementing function */ +#if CONFIG_REQUIRES_U32_MUNGING sy_munge_t *sy_arg_munge32; /* system call arguments munger for 32-bit process */ - sy_munge_t *sy_arg_munge64; /* system call arguments munger for 64-bit process */ +#endif int32_t sy_return_type; /* system call return types */ int16_t sy_narg; /* number of args */ uint16_t sy_arg_bytes; /* Total size of arguments in bytes for @@ -54,7 +57,7 @@ extern struct sysent sysent[]; #endif /* __INIT_SYSENT_C__ */ extern int nsysent; -#define NUM_SYSENT 456 /* Current number of defined syscalls */ +#define NUM_SYSENT 490 /* Current number of defined syscalls */ /* * Valid values for sy_cancel diff --git a/bsd/sys/systm.h b/bsd/sys/systm.h index a3dfef0a9..df3379053 100644 --- a/bsd/sys/systm.h +++ b/bsd/sys/systm.h @@ -134,12 +134,6 @@ extern int securelevel; /* system security level */ extern dev_t rootdev; /* root device */ extern struct vnode *rootvp; /* vnode equivalent to above */ -#ifdef XNU_KERNEL_PRIVATE -#define NO_FUNNEL 0 -#define KERNEL_FUNNEL 1 -extern funnel_t * kernel_flock; -#endif /* XNU_KERNEL_PRIVATE */ - #endif /* KERNEL_PRIVATE */ #define SYSINIT(a,b,c,d,e) @@ -185,7 +179,6 @@ struct time_value; void get_procrustime(struct time_value *tv); void load_init_program(struct proc *p); void __pthread_testcancel(int presyscall); -void syscall_exit_funnelcheck(void); void throttle_info_get_last_io_time(mount_t mp, struct timeval *tv); void update_last_io_time(mount_t mp); #endif /* BSD_KERNEL_PRIVATE */ @@ -235,7 +228,7 @@ typedef struct __throttle_info_handle *throttle_info_handle_t; int throttle_info_ref_by_mask(uint64_t throttle_mask, throttle_info_handle_t *throttle_info_handle); void throttle_info_rel_by_mask(throttle_info_handle_t throttle_info_handle); void throttle_info_update_by_mask(void *throttle_info_handle, int flags); - +void throttle_info_disable_throttle(int devno); /* * 'throttle_info_handle' acquired via 'throttle_info_ref_by_mask' * 'policy' should be specified as either IOPOL_UTILITY or IPOL_THROTTLE, diff --git a/bsd/sys/tty.h b/bsd/sys/tty.h index 28ee788a5..7fdcd02c7 100644 --- a/bsd/sys/tty.h +++ b/bsd/sys/tty.h @@ -222,6 +222,7 @@ struct clist; #endif #define TS_IOCTL_NOT_OK 0x1000000 /* Workaround */ +#define TS_PGRPHUP 0x2000000 /* Don't change Foregroud process group */ /* Character type information. */ @@ -327,6 +328,8 @@ int ttysleep(struct tty *tp, int ttywait(struct tty *tp); struct tty *ttymalloc(void); void ttyfree(struct tty *); +void ttysetpgrphup(struct tty *tp); +void ttyclrpgrphup(struct tty *tp); __END_DECLS diff --git a/bsd/sys/types.h b/bsd/sys/types.h index 6590597cd..a9fc63938 100644 --- a/bsd/sys/types.h +++ b/bsd/sys/types.h @@ -220,21 +220,25 @@ struct uio; #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ #endif /* __ASSEMBLER__ */ +#ifndef KERNEL + #ifndef __POSIX_LIB__ -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #endif /* __POSIX_LIB__ */ -#include +#include + +#endif /* KERNEL */ /* statvfs and fstatvfs */ diff --git a/bsd/sys/ubc.h b/bsd/sys/ubc.h index 2982d5008..c138d2936 100644 --- a/bsd/sys/ubc.h +++ b/bsd/sys/ubc.h @@ -40,7 +40,7 @@ #include #include -/* defns for ubc_sync_range() and ubc_msync */ +/* defns for ubc_msync() and ubc_msync */ #define UBC_PUSHDIRTY 0x01 /* clean any dirty pages in the specified range to the backing store */ #define UBC_PUSHALL 0x02 /* push both dirty and precious pages to the backing store */ @@ -54,11 +54,21 @@ daddr64_t ubc_offtoblk(struct vnode *, off_t); off_t ubc_getsize(struct vnode *); int ubc_setsize(struct vnode *, off_t); +#ifdef KERNEL_PRIVATE + +enum { + UBC_SETSIZE_NO_FS_REENTRY = 1 +}; +typedef uint32_t ubc_setsize_opts_t; + +errno_t ubc_setsize_ex(vnode_t vp, off_t nsize, ubc_setsize_opts_t opts); + +#endif // KERNEL_PRIVATE + kauth_cred_t ubc_getcred(struct vnode *); struct thread; int ubc_setthreadcred(struct vnode *, struct proc *, struct thread *); -int ubc_sync_range(vnode_t, off_t, off_t, int); errno_t ubc_msync(vnode_t, off_t, off_t, off_t *, int); int ubc_pages_resident(vnode_t); int ubc_page_op(vnode_t, off_t, int, ppnum_t *, int *); @@ -70,6 +80,12 @@ int ubc_setcred(struct vnode *, struct proc *) __deprecated; /* code signing */ struct cs_blob; struct cs_blob *ubc_cs_blob_get(vnode_t, cpu_type_t, off_t); + +/* apis to handle generation count for cs blob */ +void cs_blob_reset_cache(void); +int ubc_cs_blob_revalidate(vnode_t, struct cs_blob *); +int ubc_cs_generation_check(vnode_t); + int cs_entitlements_blob_get(proc_t, void **, size_t *); int cs_blob_get(proc_t, void **, size_t *); const char *cs_identity_get(proc_t); diff --git a/bsd/sys/ubc_internal.h b/bsd/sys/ubc_internal.h index 7f0644724..093b4a895 100644 --- a/bsd/sys/ubc_internal.h +++ b/bsd/sys/ubc_internal.h @@ -101,7 +101,6 @@ struct cs_blob { cpu_type_t csb_cpu_type; unsigned int csb_flags; off_t csb_base_offset; /* Offset of Mach-O binary in fat binary */ - off_t csb_blob_offset; /* offset of blob itself, from csb_base_offset */ off_t csb_start_offset; /* Blob coverage area start, from csb_base_offset */ off_t csb_end_offset; /* Blob coverage area end, from csb_base_offset */ ipc_port_t csb_mem_handle; @@ -110,6 +109,8 @@ struct cs_blob { vm_address_t csb_mem_kaddr; unsigned char csb_sha1[SHA1_RESULTLEN]; unsigned int csb_sigpup; + const char *csb_teamid; + unsigned int csb_platform_binary; }; /* @@ -119,10 +120,11 @@ struct cs_blob { struct ubc_info { memory_object_t ui_pager; /* pager */ memory_object_control_t ui_control; /* VM control for the pager */ - uint32_t ui_flags; /* flags */ vnode_t ui_vnode; /* vnode for this ubc_info */ kauth_cred_t ui_ucred; /* holds credentials for NFS paging */ off_t ui_size; /* file size for the vnode */ + uint32_t ui_flags; /* flags */ + uint32_t cs_add_gen; /* generation count when csblob was validated */ struct cl_readahead *cl_rahead; /* cluster read ahead context */ struct cl_writebehind *cl_wbehind; /* cluster write behind context */ @@ -184,13 +186,16 @@ int ubc_isinuse_locked(vnode_t, int, int); int ubc_getcdhash(vnode_t, off_t, unsigned char *); +__attribute__((pure)) boolean_t ubc_is_mapped(const struct vnode *, boolean_t *writable); +__attribute__((pure)) boolean_t ubc_is_mapped_writable(const struct vnode *); + #ifdef XNU_KERNEL_PRIVATE -int UBCINFOEXISTS(vnode_t); +int UBCINFOEXISTS(const struct vnode *); #endif /* XNU_KERNEL_PRIVATE */ /* code signing */ struct cs_blob; -int ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, off_t, vm_size_t); +int ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t); int ubc_cs_sigpup_add(vnode_t, vm_address_t, vm_size_t); struct cs_blob *ubc_get_cs_blobs(vnode_t); void ubc_get_cs_mtime(vnode_t, struct timespec *); diff --git a/bsd/sys/unistd.h b/bsd/sys/unistd.h index 88f3cd574..5868b8ae0 100644 --- a/bsd/sys/unistd.h +++ b/bsd/sys/unistd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -173,4 +173,45 @@ struct accessx_descriptor { /* configurable system strings */ #define _CS_PATH 1 +#ifndef KERNEL +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL + +#include +#include +#include <_types/_uint64_t.h> +#include + +__BEGIN_DECLS + +int getattrlistbulk(int, void *, void *, size_t, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); + +__END_DECLS + +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ + +#if __DARWIN_C_LEVEL >= 200809L + +#include +#include +#include +#include +#include +#include +#include + +__BEGIN_DECLS + +int faccessat(int, const char *, int, int) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int fchownat(int, const char *, uid_t, gid_t, int) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int linkat(int, const char *, int, const char *, int) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +ssize_t readlinkat(int, const char *, char *, size_t) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int symlinkat(const char *, int, const char *) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int unlinkat(int, const char *, int) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int getattrlistat(int, const char *, void *, void *, size_t, unsigned long) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); + +__END_DECLS + +#endif /* __DARWIN_C_LEVEL >= 200809L */ +#endif /* !KERNEL */ + #endif /* !_SYS_UNISTD_H_ */ diff --git a/bsd/sys/user.h b/bsd/sys/user.h index 27ff9bdab..8fc6c578a 100644 --- a/bsd/sys/user.h +++ b/bsd/sys/user.h @@ -132,8 +132,8 @@ struct label; /* MAC label dummy struct */ struct uthread { /* syscall parameters, results and catches */ u_int64_t uu_arg[8]; /* arguments to current system call */ - int *uu_ap; /* pointer to arglist */ int uu_rval[2]; + unsigned int syscall_code; /* current syscall code */ /* thread exception handling */ mach_exception_code_t uu_code; /* ``code'' to trap */ @@ -145,9 +145,9 @@ struct uthread { struct _select_data { u_int64_t abstime; char * wql; - int poll; - int error; int count; + struct select_nocancel_args *args; /* original syscall arguments */ + int32_t *retval; /* place to store return val */ } ss_select_data; struct _kqueue_scan { kevent_callback_t call; /* per-event callback */ @@ -171,12 +171,22 @@ struct uthread { } uu_kauth; struct ksyn_waitq_element uu_kwe; /* user for pthread synch */ + + struct _waitid_data { + struct waitid_nocancel_args *args; /* original syscall arguments */ + int32_t *retval; /* place to store return val */ + } uu_waitid_data; + + struct _wait4_data { + struct wait4_nocancel_args *args; /* original syscall arguments */ + int32_t *retval; /* place to store return val */ + } uu_wait4_data; } uu_kevent; + /* Persistent memory allocations across system calls */ struct _select { u_int32_t *ibits, *obits; /* bits to select on */ uint nbytes; /* number of bytes in ibits and obits */ - struct _select_data *data; } uu_select; /* saved state for select() */ /* internal support for continuation framework */ @@ -216,7 +226,6 @@ struct uthread { struct kern_sigaltstack uu_sigstk; vnode_t uu_vreclaims; vnode_t uu_cdir; /* per thread CWD */ - int uu_notrigger; /* XXX - flag for autofs */ int uu_dupfd; /* fd in fdesc_open/dupfdopen */ int uu_defer_reclaims; diff --git a/bsd/sys/ux_exception.h b/bsd/sys/ux_exception.h index 9883cf6fa..836c658ca 100644 --- a/bsd/sys/ux_exception.h +++ b/bsd/sys/ux_exception.h @@ -61,7 +61,7 @@ #include -#if defined(__x86_64__) +#if defined(__x86_64__) || defined(__arm64__) extern mach_port_t ux_exception_port; #else extern mach_port_name_t ux_exception_port; diff --git a/bsd/sys/vnode.h b/bsd/sys/vnode.h index ea55aa920..72e1a8532 100644 --- a/bsd/sys/vnode.h +++ b/bsd/sys/vnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -81,8 +81,14 @@ /* * Vnode types. VNON means no type. */ -enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD, VSTR, - VCPLX }; +enum vtype { + /* 0 */ + VNON, + /* 1 - 5 */ + VREG, VDIR, VBLK, VCHR, VLNK, + /* 6 - 10 */ + VSOCK, VFIFO, VBAD, VSTR, VCPLX +}; /* * Vnode tag types. @@ -90,10 +96,21 @@ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD, VSTR, * and should NEVER be inspected by the kernel. */ enum vtagtype { - VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_MSDOSFS, VT_LFS, VT_LOFS, VT_FDESC, - VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS, - VT_UNION, VT_HFS, VT_ZFS, VT_DEVFS, VT_WEBDAV, VT_UDF, VT_AFP, - VT_CDDA, VT_CIFS, VT_OTHER}; + /* 0 */ + VT_NON, + /* 1 reserved, overlaps with (CTL_VFS, VFS_NUMMNTOPS) */ + VT_UFS, + /* 2 - 5 */ + VT_NFS, VT_MFS, VT_MSDOSFS, VT_LFS, + /* 6 - 10 */ + VT_LOFS, VT_FDESC, VT_PORTAL, VT_NULL, VT_UMAP, + /* 11 - 15 */ + VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS, VT_MOCKFS, + /* 16 - 20 */ + VT_HFS, VT_ZFS, VT_DEVFS, VT_WEBDAV, VT_UDF, + /* 21 - 24 */ + VT_AFP, VT_CDDA, VT_CIFS, VT_OTHER +}; /* @@ -152,6 +169,7 @@ enum vtagtype { #define IO_SINGLE_WRITER 0x80000 #define IO_SYSCALL_DISPATCH 0x100000 /* I/O was originated from a file table syscall */ #define IO_SWAP_DISPATCH 0x200000 /* I/O was originated from the swap layer */ +#define IO_SKIP_ENCRYPTION 0x400000 /* Skips en(de)cryption on the IO. Must be initiated from kernel */ /* * Component Name: this structure describes the pathname @@ -199,7 +217,6 @@ struct componentname { * component name operational modifier flags */ #define FOLLOW 0x00000040 /* follow symbolic links */ -#define NOTRIGGER 0x10000000 /* don't trigger automounts */ /* * component name parameter descriptors. @@ -207,9 +224,6 @@ struct componentname { #define ISDOTDOT 0x00002000 /* current component name is .. */ #define MAKEENTRY 0x00004000 /* entry is to be added to name cache */ #define ISLASTCN 0x00008000 /* this is last component of pathname */ -#define ISWHITEOUT 0x00020000 /* OBSOLETE: found whiteout */ -#define DOWHITEOUT 0x00040000 /* OBSOLETE: do whiteouts */ - /* The following structure specifies a vnode for creation */ struct vnode_fsparam { @@ -280,12 +294,6 @@ enum path_operation { OP_MAXOP /* anything beyond previous entry is invalid */ }; -/* - * is operation a traditional trigger (autofs)? - * 1 if trigger, 0 if no trigger - */ -extern int vfs_istraditionaltrigger(enum path_operation op, const struct componentname *cnp); - /*! @enum resolver status @abstract Constants defining resolver status @@ -477,6 +485,7 @@ struct vnode_trigger_param { #define VATTR_IS_SUPPORTED(v, a) ((v)->va_supported & VNODE_ATTR_ ## a) #define VATTR_CLEAR_ACTIVE(v, a) ((v)->va_active &= ~VNODE_ATTR_ ## a) #define VATTR_CLEAR_SUPPORTED(v, a) ((v)->va_supported &= ~VNODE_ATTR_ ## a) +#define VATTR_CLEAR_SUPPORTED_ALL(v) ((v)->va_supported = 0) #define VATTR_IS_ACTIVE(v, a) ((v)->va_active & VNODE_ATTR_ ## a) #define VATTR_ALL_SUPPORTED(v) (((v)->va_active & (v)->va_supported) == (v)->va_active) #define VATTR_INACTIVE_SUPPORTED(v) do {(v)->va_active &= ~(v)->va_supported; (v)->va_supported = 0;} while(0) @@ -525,6 +534,15 @@ struct vnode_trigger_param { #define VNODE_ATTR_va_dataprotect_class (1LL<<31) /* 80000000 */ #define VNODE_ATTR_va_dataprotect_flags (1LL<<32) /* 100000000 */ #define VNODE_ATTR_va_document_id (1LL<<33) /* 200000000 */ +#define VNODE_ATTR_va_devid (1LL<<34) /* 400000000 */ +#define VNODE_ATTR_va_objtype (1LL<<35) /* 800000000 */ +#define VNODE_ATTR_va_objtag (1LL<<36) /* 1000000000 */ +#define VNODE_ATTR_va_user_access (1LL<<37) /* 2000000000 */ +#define VNODE_ATTR_va_finderinfo (1LL<<38) /* 4000000000 */ +#define VNODE_ATTR_va_rsrc_length (1LL<<39) /* 8000000000 */ +#define VNODE_ATTR_va_rsrc_alloc (1LL<<40) /* 10000000000 */ +#define VNODE_ATTR_va_fsid64 (1LL<<41) /* 20000000000 */ +#define VNODE_ATTR_va_write_gencount (1LL<<42) /* 40000000000 */ #define VNODE_ATTR_BIT(n) (VNODE_ATTR_ ## n) /* @@ -546,7 +564,16 @@ struct vnode_trigger_param { VNODE_ATTR_BIT(va_type) | \ VNODE_ATTR_BIT(va_nchildren) | \ VNODE_ATTR_BIT(va_dirlinkcount) | \ - VNODE_ATTR_BIT(va_addedtime)) + VNODE_ATTR_BIT(va_addedtime) | \ + VNODE_ATTR_BIT(va_devid) | \ + VNODE_ATTR_BIT(va_objtype) | \ + VNODE_ATTR_BIT(va_objtag) | \ + VNODE_ATTR_BIT(va_user_access) | \ + VNODE_ATTR_BIT(va_finderinfo) | \ + VNODE_ATTR_BIT(va_rsrc_length) | \ + VNODE_ATTR_BIT(va_rsrc_alloc) | \ + VNODE_ATTR_BIT(va_fsid64) | \ + VNODE_ATTR_BIT(va_write_gencount)) /* * Attributes that can be applied to a new file object. */ @@ -567,6 +594,7 @@ struct vnode_trigger_param { VNODE_ATTR_BIT(va_dataprotect_flags) | \ VNODE_ATTR_BIT(va_document_id)) +#include struct vnode_attr { /* bitfields */ @@ -622,7 +650,6 @@ struct vnode_attr { uint64_t va_nchildren; /* Number of items in a directory */ uint64_t va_dirlinkcount; /* Real references to dir (i.e. excluding "." and ".." refs) */ - /* add new fields here only */ #ifdef BSD_KERNEL_PRIVATE struct kauth_acl *va_base_acl; #else @@ -633,7 +660,23 @@ struct vnode_attr { /* Data Protection fields */ uint32_t va_dataprotect_class; /* class specified for this file if it didn't exist */ uint32_t va_dataprotect_flags; /* flags from NP open(2) to the filesystem */ + + /* Document revision tracking */ uint32_t va_document_id; + + /* Fields for Bulk args */ + uint32_t va_devid; /* devid of filesystem */ + uint32_t va_objtype; /* type of object */ + uint32_t va_objtag; /* vnode tag of filesystem */ + uint32_t va_user_access; /* access for user */ + uint8_t va_finderinfo[32]; /* Finder Info */ + uint64_t va_rsrc_length; /* Resource Fork length */ + uint64_t va_rsrc_alloc; /* Resource Fork allocation size */ + fsid_t va_fsid64; /* fsid, of the correct type */ + + uint32_t va_write_gencount; /* counter that increments each time the file changes */ + + /* add new fields here only */ }; #ifdef BSD_KERNEL_PRIVATE @@ -1199,7 +1242,7 @@ proc_t vfs_context_proc(vfs_context_t); @abstract Get the credential associated with a vfs_context_t. @discussion Succeeds if and only if the context has a thread, the thread has a task, and the task has a BSD proc. @param ctx Context whose associated process to find. - @return Process if available, NULL otherwise. + @returns credential if process available; NULL otherwise */ kauth_cred_t vfs_context_ucred(vfs_context_t); @@ -1598,7 +1641,6 @@ int vn_getpath(struct vnode *vp, char *pathbuf, int *len); */ #define VNODE_LOOKUP_NOFOLLOW 0x01 #define VNODE_LOOKUP_NOCROSSMOUNT 0x02 -#define VNODE_LOOKUP_DOWHITEOUT 0x04 /* OBSOLETE */ /*! @function vnode_lookup @abstract Convert a path into a vnode. @@ -1846,6 +1888,35 @@ void vnode_putname(const char *name); */ vnode_t vnode_getparent(vnode_t vp); +/*! + @function vnode_setdirty + @abstract Mark the vnode as having data or metadata that needs to be written out during reclaim + @discussion The vnode should be marked as dirty anytime a file system defers flushing of data or meta-data associated with it. + @param the vnode to mark as dirty + @return 0 if successful else an error code. + */ +int vnode_setdirty(vnode_t); + +/*! + @function vnode_cleardirty + @abstract Mark the vnode as clean i.e. all its data or metadata has been flushed + @discussion The vnode should be marked as clean whenever the file system is done flushing data or meta-data associated with it. + @param the vnode to clear as being dirty + @return 0 if successful else an error code. + */ +int vnode_cleardirty(vnode_t); + +/*! + @function vnode_isdirty + @abstract Determine if a vnode is marked dirty. + @discussion The vnode should be marked as clean whenever the file system is done flushing data or meta-data associated with it. + @param vp the vnode to test. + @return Non-zero if the vnode is dirty, 0 otherwise. + */ +int vnode_isdirty(vnode_t); + + + #ifdef KERNEL_PRIVATE /*! @function vnode_lookup_continue_needed @@ -1895,6 +1966,7 @@ int vnode_makeimode(int, int); enum vtype vnode_iftovt(int); int vnode_vttoif(enum vtype); int vnode_isshadow(vnode_t); +boolean_t vnode_on_reliable_media(vnode_t); /* * Indicate that a file has multiple hard links. VFS will always call * VNOP_LOOKUP on this vnode. Volfs will always ask for it's parent @@ -1924,9 +1996,43 @@ void vnode_setnoflush(vnode_t); void vnode_clearnoflush(vnode_t); /* XXX temporary until we can arrive at a KPI for NFS, Seatbelt */ thread_t vfs_context_thread(vfs_context_t); - +#if CONFIG_IOSCHED +vnode_t vnode_mountdevvp(vnode_t); +#endif #endif /* BSD_KERNEL_PRIVATE */ +/* + * Helper functions for implementing VNOP_GETATTRLISTBULK for a filesystem + */ + +/*! + @function vfs_setup_vattr_from_attrlist + @abstract Setup a vnode_attr structure given an attrlist structure. + @Used by a VNOP_GETATTRLISTBULK implementation to setup a vnode_attr structure from a attribute list. It also returns the fixed size of the attribute buffer required. + @param alp Pointer to attribute list structure. + @param vap Pointer to vnode_attr structure. + @param obj_vtype Type of object - If VNON is passed, then the type is ignored and common, file and dir attrs are used to initialise the vattrs. If set to VDIR, only common and directory attributes are used. For all other types, only common and file attrbutes are used. + @param attr_fixed_sizep. Returns the fixed length required in the attrbute buffer for the object. NULL should be passed if it is not required. + @param ctx vfs context of caller. + @return error. + */ +errno_t vfs_setup_vattr_from_attrlist(struct attrlist * /* alp */, struct vnode_attr * /* vap */, enum vtype /* obj_vtype */, ssize_t * /* attr_fixed_sizep */, vfs_context_t /* ctx */); + +/*! + @function vfs_attr_pack + @abstract Pack a vnode_attr structure into a buffer in the same format as getattrlist(2). + @Used by a VNOP_GETATTRLISTBULK implementation to pack data provided into a vnode_attr structure into a buffer the way getattrlist(2) does. + @param vp If available, the vnode for which the attributes are being given, NULL if vnode is not available (which will usually be the case for a VNOP_GETATTRLISTBULK implementation. + @param auio - a uio_t initialised with one iovec.. + @param alp - Pointer to an attrlist structure. + @param options - options for call (same as options for getattrlistbulk(2)). + @param vap Pointer to a filled in vnode_attr structure. Data from the vnode_attr structure will be used to copy and lay out the data in the required format for getatrlistbulk(2) by this function. + @param fndesc Currently unused + @param ctx vfs context of caller. + @return error. + */ +errno_t vfs_attr_pack(vnode_t /* vp */, uio_t /* uio */, struct attrlist * /* alp */, uint64_t /* options */, struct vnode_attr * /* vap */, void * /* fndesc */, vfs_context_t /* ctx */); + __END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/vnode_if.h b/bsd/sys/vnode_if.h index 1c8796d13..f2a0be407 100644 --- a/bsd/sys/vnode_if.h +++ b/bsd/sys/vnode_if.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -100,7 +100,7 @@ extern struct vnodeop_desc vnop_compound_mkdir_desc; extern struct vnodeop_desc vnop_compound_rmdir_desc; #endif /* KERNEL_PRIVATE */ extern struct vnodeop_desc vnop_create_desc; -extern struct vnodeop_desc vnop_whiteout_desc; +extern struct vnodeop_desc vnop_whiteout_desc; // obsolete extern struct vnodeop_desc vnop_mknod_desc; extern struct vnodeop_desc vnop_open_desc; extern struct vnodeop_desc vnop_close_desc; @@ -124,6 +124,7 @@ extern struct vnodeop_desc vnop_rmdir_desc; extern struct vnodeop_desc vnop_symlink_desc; extern struct vnodeop_desc vnop_readdir_desc; extern struct vnodeop_desc vnop_readdirattr_desc; +extern struct vnodeop_desc vnop_getattrlistbulk_desc; extern struct vnodeop_desc vnop_readlink_desc; extern struct vnodeop_desc vnop_inactive_desc; extern struct vnodeop_desc vnop_reclaim_desc; @@ -214,7 +215,7 @@ struct vnop_whiteout_args { /*! @function VNOP_WHITEOUT - @abstract Call down to a filesystem to create a whiteout. + @abstract Obsolete - no longer supported. @discussion Whiteouts are used to support the union filesystem, whereby one filesystem is mounted "transparently" on top of another. A whiteout in the upper layer of a union mount is a "deletion" of a file in the lower layer; lookups will catch the whiteout and fail, setting ISWHITEOUT in the componentname structure, even if an underlying @@ -295,9 +296,6 @@ struct vnop_compound_open_args { void *a_reserved; }; -/* Control flags */ -#define VNOP_COMPOUND_OPEN_DO_CREATE 0x00000001 - /* Results */ #define COMPOUND_OPEN_STATUS_DID_CREATE 0x00000001 #endif /* KERNEL_PRIVATE */ @@ -955,6 +953,38 @@ struct vnop_readdirattr_args { extern errno_t VNOP_READDIRATTR(vnode_t, struct attrlist *, struct uio *, uint32_t, uint32_t, uint32_t *, int *, uint32_t *, vfs_context_t); #endif /* XNU_KERNEL_PRIVATE */ +struct vnop_getattrlistbulk_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct attrlist *a_alist; + struct vnode_attr *a_vap; + struct uio *a_uio; + void *a_private; + uint64_t a_options; + int32_t *a_eofflag; + int32_t *a_actualcount; + vfs_context_t a_context; +}; + +/*! + @function VNOP_GETATTRLISTBULK + @abstract Call down to get file attributes for many files in a directory at once. + @discussion VNOP_GETATTRLISTBULK() packs a buffer with file attributes, as if the results of many "getattrlist" calls. + @param vp Directory in which to enumerate entries' attributes. + @param alist Which attributes are wanted for each directory entry. + @param uio Destination information for resulting attributes. + @param vap initialised vnode_attr structure pointer. This structure also has memory allocated (MAXPATHLEN bytes) and assigned to the va_name field for filesystems to use. + @param private reserved for future use. + @param options + @param eofflag Should be set to 1 if the end of the directory has been reached. + @param actualcount Should be set to number of files whose attributes were written into buffer. + @param ctx Context to authenticate for getattrlistbulk request. + @return 0 for success, else an error code. + */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_GETATTRLISTBULK(vnode_t, struct attrlist *, struct vnode_attr *, uio_t, void *, uint64_t, int32_t *, int32_t *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_readlink_args { struct vnodeop_desc *a_desc; vnode_t a_vp; diff --git a/bsd/sys/vnode_internal.h b/bsd/sys/vnode_internal.h index e57f25676..a7f7b9cb2 100644 --- a/bsd/sys/vnode_internal.h +++ b/bsd/sys/vnode_internal.h @@ -259,6 +259,47 @@ struct vnode { #define VOPENEVT 0x800000 /* if process is P_CHECKOPENEVT, then or in the O_EVTONLY flag on open */ #define VNEEDSSNAPSHOT 0x1000000 #define VNOCS 0x2000000 /* is there no code signature available */ +#define VISDIRTY 0x4000000 /* vnode will need IO if reclaimed */ + +/* + * This structure describes vnode data which is specific to a file descriptor. + * It is currently used only for file descriptors which are for opened for + * directories. + */ +struct fd_vn_data { + lck_mtx_t fv_lock; /* Used to serialize directory enumeration on fd */ + off_t fv_offset; /* Offset to be used */ + void *fv_dircookie; /* If FS doesn't like offsets in directories */ + caddr_t fv_buf; /* Temporary buffer to store directory entries */ + size_t fv_bufsiz; /* Valid size of fv_buf */ + size_t fv_bufdone; /* How much of fv_buf is processed */ + size_t fv_bufallocsiz; /* Allocation size determined for Buffer*/ + off_t fv_soff; /* Starting FS offset for this buffer */ + off_t fv_eoff; /* Ending FS offset for this buffer */ + int fv_eofflag;/* Does fv_eoff represent EOF ? */ +}; + +/* + * FV_DIRBUF_START_SIZ is the initial size of the buffer passed to VNOP_READDIR. + * That may not be enough for some filesytems so the current algorithm works its + * way upto FV_DIRBUF_MAX_SIZ + */ +#define FV_DIRBUF_DIRENTRY_SIZ (sizeof(struct direntry)) +#define FV_DIRBUF_START_SIZ FV_DIRBUF_DIRENTRY_SIZ +#define FV_DIRBUF_MAX_SIZ (4*(sizeof(struct direntry))) + +#define FV_LOCK(fvd) lck_mtx_lock(&(((struct fd_vn_data *)fvd)->fv_lock)) +#define FV_UNLOCK(fvd) lck_mtx_unlock(&(((struct fd_vn_data *)fvd)->fv_lock)) +#define FV_BUF_FREE(fvd, tag) \ +do { \ + FREE(fvd->fv_buf, tag); \ + fvd->fv_buf = NULL; \ + fvd->fv_bufsiz = 0; \ + fvd->fv_bufdone = 0; \ + fvd->fv_soff = 0; \ + fvd->fv_eoff = 0; \ + fvd->fv_eofflag = 0; \ +} while (0); /* * Global vnode data. @@ -483,6 +524,7 @@ errno_t vnode_suspend(vnode_t); errno_t vnode_mtime(vnode_t, struct timespec *, vfs_context_t); +errno_t vnode_flags(vnode_t, uint32_t *, vfs_context_t); errno_t vnode_size(vnode_t, off_t *, vfs_context_t); errno_t vnode_setsize(vnode_t, off_t, int ioflag, vfs_context_t); @@ -525,16 +567,20 @@ void vnode_unlock(vnode_t); */ void vfs_op_init(void); void vfs_opv_init(void); -int vfs_sysctl(int *name, uint32_t namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, struct proc *p); -int sysctl_vnode(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); #ifdef BSD_KERNEL_PRIVATE +int vfs_sysctl_node SYSCTL_HANDLER_ARGS; void vnode_setneedinactive(vnode_t); int vnode_hasnamedstreams(vnode_t); /* Does this vnode have associated named streams? */ void nspace_proc_exit(struct proc *p); +errno_t +vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, + int *numdirent, vfs_context_t ctxp); + +void vnode_setswapmount(vnode_t); + #if CONFIG_TRIGGERS /* VFS Internal Vnode Trigger Interfaces (Private) */ int vnode_trigger_resolve(vnode_t, struct nameidata *, vfs_context_t); diff --git a/bsd/uxkern/ux_exception.c b/bsd/uxkern/ux_exception.c index 3175c3163..c3313018c 100644 --- a/bsd/uxkern/ux_exception.c +++ b/bsd/uxkern/ux_exception.c @@ -90,7 +90,7 @@ static void ux_exception(int exception, mach_exception_code_t code, mach_exception_subcode_t subcode, int *ux_signal, mach_exception_code_t *ux_code); -#if defined(__x86_64__) +#if defined(__x86_64__) || defined(__arm64__) mach_port_t ux_exception_port; #else mach_port_name_t ux_exception_port; diff --git a/bsd/vfs/kpi_vfs.c b/bsd/vfs/kpi_vfs.c index 200cb51c4..1a71f6b69 100644 --- a/bsd/vfs/kpi_vfs.c +++ b/bsd/vfs/kpi_vfs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -476,7 +476,7 @@ vfs_isreload(mount_t mp) int vfs_isforce(mount_t mp) { - if ((mp->mnt_lflag & MNT_LFORCE) || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)) + if (mp->mnt_lflag & MNT_LFORCE) return(1); else return(0); @@ -822,7 +822,7 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) newvfstbl->vfc_vfsops = vfe->vfe_vfsops; strncpy(&newvfstbl->vfc_name[0], vfe->vfe_fsname, MFSNAMELEN); if ((vfe->vfe_flags & VFS_TBLNOTYPENUM)) - newvfstbl->vfc_typenum = maxvfsconf++; + newvfstbl->vfc_typenum = maxvfstypenum++; else newvfstbl->vfc_typenum = vfe->vfe_fstypenum; @@ -872,6 +872,7 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) newvfstbl->vfc_descptr = descptr; newvfstbl->vfc_descsize = descsize; + newvfstbl->vfc_sysctl = NULL; for (i= 0; i< desccount; i++ ) { opv_desc_vector_p = vfe->vfe_opvdescs[i]->opv_desc_vector_p; @@ -941,8 +942,8 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) *handle = vfstable_add(newvfstbl); - if (newvfstbl->vfc_typenum <= maxvfsconf ) - maxvfsconf = newvfstbl->vfc_typenum + 1; + if (newvfstbl->vfc_typenum <= maxvfstypenum ) + maxvfstypenum = newvfstbl->vfc_typenum + 1; if (newvfstbl->vfc_vfsops->vfs_init) { struct vfsconf vfsc; @@ -1371,6 +1372,17 @@ vnode_mount(vnode_t vp) return (vp->v_mount); } +#if CONFIG_IOSCHED +vnode_t +vnode_mountdevvp(vnode_t vp) +{ + if (vp->v_mount) + return (vp->v_mount->mnt_devvp); + else + return ((vnode_t)0); +} +#endif + mount_t vnode_mountedhere(vnode_t vp) { @@ -2800,7 +2812,7 @@ VNOP_COMPOUND_OPEN(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t fla uint32_t tmp_status = 0; struct componentname *cnp = &ndp->ni_cnd; - want_create = (flags & VNOP_COMPOUND_OPEN_DO_CREATE); + want_create = (flags & O_CREAT); a.a_desc = &vnop_compound_open_desc; a.a_dvp = dvp; @@ -2926,23 +2938,10 @@ struct vnop_whiteout_args { }; #endif /* 0*/ errno_t -VNOP_WHITEOUT(vnode_t dvp, struct componentname * cnp, int flags, vfs_context_t ctx) +VNOP_WHITEOUT(__unused vnode_t dvp, __unused struct componentname *cnp, + __unused int flags, __unused vfs_context_t ctx) { - int _err; - struct vnop_whiteout_args a; - - a.a_desc = &vnop_whiteout_desc; - a.a_dvp = dvp; - a.a_cnp = cnp; - a.a_flags = flags; - a.a_context = ctx; - - _err = (*dvp->v_op[vnop_whiteout_desc.vdesc_offset])(&a); - DTRACE_FSINFO(whiteout, vnode_t, dvp); - - post_event_if_success(dvp, _err, NOTE_WRITE); - - return (_err); + return (ENOTSUP); // XXX OBSOLETE } #if 0 @@ -3235,7 +3234,7 @@ VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) #endif if (ctx == NULL) { - ctx = vfs_context_current(); + return EINVAL; } a.a_desc = &vnop_read_desc; @@ -3276,7 +3275,7 @@ VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) #endif if (ctx == NULL) { - ctx = vfs_context_current(); + return EINVAL; } a.a_desc = &vnop_write_desc; @@ -3726,6 +3725,7 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s char *xtoname = NULL; #endif /* CONFIG_APPLEDOUBLE */ int batched; + uint32_t tdfflags; // Target directory file flags batched = vnode_compound_rename_available(fdvp); @@ -3734,6 +3734,13 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s panic("Not batched, and no fvp?"); } +#if CONFIG_SECLUDED_RENAME + if ((fcnp->cn_flags & CN_SECLUDE_RENAME) && + (((*fvpp)->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_SECLUDE_RENAME) == 0)) { + return ENOTSUP; + } +#endif + #if CONFIG_APPLEDOUBLE /* * We need to preflight any potential AppleDouble file for the source file @@ -3825,6 +3832,24 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s } #endif + /* + * If moved to a new directory that is restricted, + * set the restricted flag on the item moved. + */ + if (_err == 0) { + _err = vnode_flags(tdvp, &tdfflags, ctx); + if (_err == 0 && (tdfflags & SF_RESTRICTED)) { + uint32_t fflags; + _err = vnode_flags(*fvpp, &fflags, ctx); + if (_err == 0 && !(fflags & SF_RESTRICTED)) { + struct vnode_attr va; + VATTR_INIT(&va); + VATTR_SET(&va, va_flags, fflags | SF_RESTRICTED); + _err = vnode_setattr(*fvpp, &va, ctx); + } + } + } + #if CONFIG_APPLEDOUBLE /* * Rename any associated extended attribute file (._ AppleDouble file). @@ -4610,6 +4635,49 @@ VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, uint return (_err); } +#if 0 +struct vnop_getttrlistbulk_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct attrlist *a_alist; + struct vnode_attr *a_vap; + struct uio *a_uio; + void *a_private + uint64_t a_options; + int *a_eofflag; + uint32_t *a_actualcount; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_GETATTRLISTBULK(struct vnode *vp, struct attrlist *alist, + struct vnode_attr *vap, struct uio *uio, void *private, uint64_t options, + int32_t *eofflag, int32_t *actualcount, vfs_context_t ctx) +{ + int _err; + struct vnop_getattrlistbulk_args a; +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif + + a.a_desc = &vnop_getattrlistbulk_desc; + a.a_vp = vp; + a.a_alist = alist; + a.a_vap = vap; + a.a_uio = uio; + a.a_private = private; + a.a_options = options; + a.a_eofflag = eofflag; + a.a_actualcount = actualcount; + a.a_context = ctx; + + _err = (*vp->v_op[vnop_getattrlistbulk_desc.vdesc_offset])(&a); + DTRACE_FSINFO_IO(getattrlistbulk, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); + + return (_err); +} + #if 0 /* *# diff --git a/bsd/vfs/vfs_attrlist.c b/bsd/vfs/vfs_attrlist.c index ca03027b2..e06df1640 100644 --- a/bsd/vfs/vfs_attrlist.c +++ b/bsd/vfs/vfs_attrlist.c @@ -37,9 +37,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -57,17 +59,6 @@ #define ATTR_TIME_SIZE -1 -/* - * SPI. - */ -#define FSOPT_ATTRLIST_EXTENDED 0x00000020 - -/* Valid only if FSOPT_ATTRLIST_EXTENDED is set */ -#define ATTR_CMN_GEN_COUNT 0x00080000 /* same as ATTR_CMN_NAMEDATTRCOUNT */ -#define ATTR_CMN_DOCUMENT_ID 0x00100000 /* same as ATTR_CMN_NAMEDATTRLIST */ - -#define ATTR_CMN_ERROR 0x20000000 - /* * Structure describing the state of an in-progress attrlist operation. */ @@ -128,9 +119,8 @@ attrlist_pack_fixed(struct _attrlist_buf *ab, void *source, ssize_t count) */ static void attrlist_pack_variable2(struct _attrlist_buf *ab, const void *source, ssize_t count, - const void *ext, ssize_t extcount) + const void *ext, ssize_t extcount) { - /* Use ssize_t's for pointer math ease */ struct attrreference ar; ssize_t fit; @@ -234,6 +224,8 @@ attrlist_pack_string(struct _attrlist_buf *ab, const char *source, ssize_t count space = ab->allocated - (ab->varcursor - ab->base); fit = lmin(count, space); if (space > 0) { + int bytes_to_zero; + /* * If there is space remaining, copy data in, and * accommodate the trailing NUL terminator. @@ -250,6 +242,14 @@ attrlist_pack_string(struct _attrlist_buf *ab, const char *source, ssize_t count if (space > fit) { ab->varcursor[fit++] = '\0'; /* 'fit' now the number of bytes AFTER adding in the NUL */ + /* + * Zero out any additional bytes we might have as a + * result of rounding up. + */ + bytes_to_zero = min((roundup(fit, 4) - fit), + space - fit); + if (bytes_to_zero) + bzero(&(ab->varcursor[fit]), bytes_to_zero); } } /* @@ -490,40 +490,7 @@ static struct getattrlist_attrtab getattrlist_common_tab[] = { {ATTR_CMN_GRPID, VATTR_BIT(va_gid), sizeof(gid_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_ACCESSMASK, VATTR_BIT(va_mode), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_FLAGS, VATTR_BIT(va_flags), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_USERACCESS, 0, sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_EXTENDED_SECURITY, VATTR_BIT(va_acl), sizeof(struct attrreference), KAUTH_VNODE_READ_SECURITY}, - {ATTR_CMN_UUID, VATTR_BIT(va_uuuid), sizeof(guid_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_GRPUUID, VATTR_BIT(va_guuid), sizeof(guid_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_FILEID, VATTR_BIT(va_fileid), sizeof(uint64_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_PARENTID, VATTR_BIT(va_parentid), sizeof(uint64_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_FULLPATH, 0, sizeof(struct attrreference), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_ADDEDTIME, VATTR_BIT(va_addedtime), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_RETURNED_ATTRS, 0, sizeof(attribute_set_t), 0}, - {ATTR_CMN_ERROR, 0, sizeof(uint32_t), 0}, - {0, 0, 0, 0} -}; - -static struct getattrlist_attrtab getattrlist_common_tab_extended[] = { - {ATTR_CMN_NAME, VATTR_BIT(va_name), sizeof(struct attrreference), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_DEVID, 0, sizeof(dev_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_FSID, VATTR_BIT(va_fsid), sizeof(fsid_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_OBJTYPE, 0, sizeof(fsobj_type_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_OBJTAG, 0, sizeof(fsobj_tag_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_OBJID, VATTR_BIT(va_fileid) | VATTR_BIT(va_linkid), sizeof(fsobj_id_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_OBJPERMANENTID, VATTR_BIT(va_fileid) | VATTR_BIT(va_linkid), sizeof(fsobj_id_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_PAROBJID, VATTR_BIT(va_parentid), sizeof(fsobj_id_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_SCRIPT, VATTR_BIT(va_encoding), sizeof(text_encoding_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_CRTIME, VATTR_BIT(va_create_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_MODTIME, VATTR_BIT(va_modify_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_CHGTIME, VATTR_BIT(va_change_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_ACCTIME, VATTR_BIT(va_access_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_BKUPTIME, VATTR_BIT(va_backup_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_FNDRINFO, 0, 32, KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_OWNERID, VATTR_BIT(va_uid), sizeof(uid_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_GRPID, VATTR_BIT(va_gid), sizeof(gid_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_ACCESSMASK, VATTR_BIT(va_mode), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_FLAGS, VATTR_BIT(va_flags), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_GEN_COUNT, VATTR_BIT(va_gen), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_GEN_COUNT, VATTR_BIT(va_write_gencount), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_DOCUMENT_ID, VATTR_BIT(va_document_id), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_USERACCESS, 0, sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_EXTENDED_SECURITY, VATTR_BIT(va_acl), sizeof(struct attrreference), KAUTH_VNODE_READ_SECURITY}, @@ -532,9 +499,10 @@ static struct getattrlist_attrtab getattrlist_common_tab_extended[] = { {ATTR_CMN_FILEID, VATTR_BIT(va_fileid), sizeof(uint64_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_PARENTID, VATTR_BIT(va_parentid), sizeof(uint64_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_FULLPATH, 0, sizeof(struct attrreference), KAUTH_VNODE_READ_ATTRIBUTES}, - {ATTR_CMN_ADDEDTIME, VATTR_BIT(va_addedtime), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_ADDEDTIME, VATTR_BIT(va_addedtime), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_RETURNED_ATTRS, 0, sizeof(attribute_set_t), 0}, {ATTR_CMN_ERROR, 0, sizeof(uint32_t), 0}, + {ATTR_CMN_DATA_PROTECT_FLAGS, VATTR_BIT(va_dataprotect_class), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, {0, 0, 0, 0} }; @@ -557,6 +525,31 @@ static struct getattrlist_attrtab getattrlist_file_tab[] = { {0, 0, 0, 0} }; +/* + * This table is for attributes which are only set from the getattrlistbulk(2) + * call. These attributes have already been set from the common, file and + * directory tables but the vattr bits have not been recorded. Since these + * vattr bits are only used from the bulk call, we have a seperate table for + * these. + * The sizes are not returned from here since the sizes have already been + * accounted from the common, file and directory tables. + */ +static struct getattrlist_attrtab getattrlistbulk_common_tab[] = { + {ATTR_CMN_DEVID, VATTR_BIT(va_devid), 0, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_FSID, VATTR_BIT(va_fsid64), 0, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_OBJTYPE, VATTR_BIT(va_objtype), 0, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_OBJTAG, VATTR_BIT(va_objtag), 0, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_USERACCESS, VATTR_BIT(va_user_access), 0, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_FNDRINFO, VATTR_BIT(va_finderinfo), 0, KAUTH_VNODE_READ_ATTRIBUTES}, + {0, 0, 0, 0} +}; + +static struct getattrlist_attrtab getattrlistbulk_file_tab[] = { + {ATTR_FILE_RSRCLENGTH, VATTR_BIT(va_rsrc_length), 0, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_FILE_RSRCALLOCSIZE, VATTR_BIT(va_rsrc_alloc), 0, KAUTH_VNODE_READ_ATTRIBUTES}, + {0, 0, 0, 0} +}; + /* * The following are attributes that VFS can derive. * @@ -580,9 +573,11 @@ static struct getattrlist_attrtab getattrlist_file_tab[] = { ATTR_CMN_ACCESSMASK | ATTR_CMN_FLAGS | \ ATTR_CMN_USERACCESS | ATTR_CMN_FILEID | \ ATTR_CMN_PARENTID | ATTR_CMN_RETURNED_ATTRS | \ - ATTR_CMN_DOCUMENT_ID | ATTR_CMN_GEN_COUNT) + ATTR_CMN_DOCUMENT_ID | ATTR_CMN_GEN_COUNT | \ + ATTR_CMN_DATA_PROTECT_FLAGS) -#define VFS_DFLT_ATTR_CMN_EXT (ATTR_CMN_EXT_GEN_COUNT | ATTR_CMN_EXT_DOCUMENT_ID) +#define VFS_DFLT_ATT_CMN_EXT (ATTR_CMN_EXT_GEN_COUNT | ATTR_CMN_EXT_DOCUMENT_ID |\ + ATTR_CMN_EXT_DATA_PROTECT_FLAGS) #define VFS_DFLT_ATTR_DIR (ATTR_DIR_LINKCOUNT | ATTR_DIR_MOUNTSTATUS) @@ -593,8 +588,9 @@ static struct getattrlist_attrtab getattrlist_file_tab[] = { ATTR_FILE_RSRCALLOCSIZE) static int -getattrlist_parsetab(struct getattrlist_attrtab *tab, attrgroup_t attrs, struct vnode_attr *vap, - ssize_t *sizep, kauth_action_t *actionp, int is_64bit) +getattrlist_parsetab(struct getattrlist_attrtab *tab, attrgroup_t attrs, + struct vnode_attr *vap, ssize_t *sizep, kauth_action_t *actionp, + int is_64bit) { attrgroup_t recognised; @@ -603,17 +599,23 @@ getattrlist_parsetab(struct getattrlist_attrtab *tab, attrgroup_t attrs, struct /* is this attribute set? */ if (tab->attr & attrs) { recognised |= tab->attr; - vap->va_active |= tab->bits; - if (tab->size == ATTR_TIME_SIZE) { - if (is_64bit) { - *sizep += sizeof(struct user64_timespec); + if (vap) + vap->va_active |= tab->bits; + if (sizep) { + if (tab->size == ATTR_TIME_SIZE) { + if (is_64bit) { + *sizep += sizeof( + struct user64_timespec); + } else { + *sizep += sizeof( + struct user32_timespec); + } } else { - *sizep += sizeof(struct user32_timespec); + *sizep += tab->size; } - } else { - *sizep += tab->size; } - *actionp |= tab->action; + if (actionp) + *actionp |= tab->action; if (attrs == recognised) break; /* all done, get out */ } @@ -630,23 +632,17 @@ getattrlist_parsetab(struct getattrlist_attrtab *tab, attrgroup_t attrs, struct * the data from a filesystem. */ static int -getattrlist_setupvattr(struct attrlist *alp, int attr_cmn_extended, struct vnode_attr *vap, ssize_t *sizep, kauth_action_t *actionp, int is_64bit, int isdir) +getattrlist_setupvattr(struct attrlist *alp, struct vnode_attr *vap, ssize_t *sizep, kauth_action_t *actionp, int is_64bit, int isdir) { int error; - struct getattrlist_attrtab *cmn_tab; - - if (attr_cmn_extended) - cmn_tab = getattrlist_common_tab_extended; - else - cmn_tab = getattrlist_common_tab; /* * Parse the above tables. */ *sizep = sizeof(uint32_t); /* length count */ *actionp = 0; if (alp->commonattr && - (error = getattrlist_parsetab(cmn_tab, alp->commonattr, vap, sizep, actionp, is_64bit)) != 0) + (error = getattrlist_parsetab(getattrlist_common_tab, alp->commonattr, vap, sizep, actionp, is_64bit)) != 0) return(error); if (isdir && alp->dirattr && (error = getattrlist_parsetab(getattrlist_dir_tab, alp->dirattr, vap, sizep, actionp, is_64bit)) != 0) @@ -658,6 +654,68 @@ getattrlist_setupvattr(struct attrlist *alp, int attr_cmn_extended, struct vnode return(0); } +/* + * Given the attributes listed in alp, configure vap to request + * the data from a filesystem. + */ +static int +getattrlist_setupvattr_all(struct attrlist *alp, struct vnode_attr *vap, + enum vtype obj_type, ssize_t *fixedsize, int is_64bit) +{ + int error = 0; + + /* + * Parse the above tables. + */ + if (fixedsize) { + *fixedsize = sizeof(uint32_t); + } + if (alp->commonattr) { + error = getattrlist_parsetab(getattrlist_common_tab, + alp->commonattr, vap, fixedsize, NULL, is_64bit); + + if (!error) { + /* Ignore any errrors from the bulk table */ + (void)getattrlist_parsetab(getattrlistbulk_common_tab, + alp->commonattr, vap, fixedsize, NULL, is_64bit); + /* + * turn off va_fsid since we will be using only + * va_fsid64 for ATTR_CMN_FSID. + */ + VATTR_CLEAR_ACTIVE(vap, va_fsid); + } + } + + if (!error && (obj_type == VNON || obj_type == VDIR) && alp->dirattr) { + error = getattrlist_parsetab(getattrlist_dir_tab, alp->dirattr, + vap, fixedsize, NULL, is_64bit); + } + + if (!error && (obj_type != VDIR) && alp->fileattr) { + error = getattrlist_parsetab(getattrlist_file_tab, + alp->fileattr, vap, fixedsize, NULL, is_64bit); + + if (!error) { + /*Ignore any errors from the bulk table */ + (void)getattrlist_parsetab(getattrlistbulk_file_tab, + alp->fileattr, vap, fixedsize, NULL, is_64bit); + } + } + + return (error); +} + +int +vfs_setup_vattr_from_attrlist(struct attrlist *alp, struct vnode_attr *vap, + enum vtype obj_vtype, ssize_t *attrs_fixed_sizep, vfs_context_t ctx) +{ + return (getattrlist_setupvattr_all(alp, vap, obj_vtype, + attrs_fixed_sizep, IS_64BIT_PROCESS(vfs_context_proc(ctx)))); +} + + + + /* * Given the attributes listed in asp and those supported * in the vap, fixup the asp attributes to reflect any @@ -783,8 +841,9 @@ getattrlist_findnamecomp(const char *mn, const char **np, ssize_t *nl) static int -getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, - vfs_context_t ctx, int is_64bit) +getvolattrlist(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, + user_addr_t attributeBuffer, size_t bufferSize, uint64_t options, + enum uio_seg segflg, int is_64bit) { struct vfs_attr vs; struct vnode_attr va; @@ -804,9 +863,10 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, vs.f_vol_name = NULL; mnt = vp->v_mount; + /* Check for special packing semantics */ return_valid = (alp->commonattr & ATTR_CMN_RETURNED_ATTRS); - pack_invalid = (uap->options & FSOPT_PACK_INVAL_ATTRS); + pack_invalid = (options & FSOPT_PACK_INVAL_ATTRS); if (pack_invalid) { /* FSOPT_PACK_INVAL_ATTRS requires ATTR_CMN_RETURNED_ATTRS */ if (!return_valid) { @@ -1013,13 +1073,13 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, * Note that since we won't ever copy out more than the caller requested, * we never need to allocate more than they offer. */ - ab.allocated = ulmin(uap->bufferSize, fixedsize + varsize); + ab.allocated = ulmin(bufferSize, fixedsize + varsize); if (ab.allocated > ATTR_MAX_BUFFER) { error = ENOMEM; VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER); goto out; } - MALLOC(ab.base, char *, ab.allocated, M_TEMP, M_WAITOK); + MALLOC(ab.base, char *, ab.allocated, M_TEMP, M_ZERO | M_WAITOK); if (ab.base == NULL) { error = ENOMEM; VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not allocate %d for copy buffer", ab.allocated); @@ -1042,11 +1102,6 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, attrlist_pack_string(&ab, cnp, cnl); ab.actual.commonattr |= ATTR_CMN_NAME; } - if ((alp->commonattr & ATTR_CMN_ERROR) && - (!return_valid || pack_invalid)) { - ATTR_PACK4(ab, 0); - ab.actual.commonattr |= ATTR_CMN_ERROR; - } if (alp->commonattr & ATTR_CMN_DEVID) { ATTR_PACK4(ab, mnt->mnt_vfsstat.f_fsid.val[0]); ab.actual.commonattr |= ATTR_CMN_DEVID; @@ -1314,7 +1369,7 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, * of the result buffer, even if we copied less out. The caller knows how big a buffer * they gave us, so they can always check for truncation themselves. */ - *(uint32_t *)ab.base = (uap->options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); + *(uint32_t *)ab.base = (options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); /* Return attribute set output if requested. */ if (return_valid) { @@ -1326,7 +1381,12 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, } bcopy(&ab.actual, ab.base + sizeof(uint32_t), sizeof (ab.actual)); } - error = copyout(ab.base, uap->attributeBuffer, ab.allocated); + + if (UIO_SEG_IS_USER_SPACE(segflg)) + error = copyout(ab.base, CAST_USER_ADDR_T(attributeBuffer), + ab.allocated); + else + bcopy(ab.base, (void *)attributeBuffer, (size_t)ab.allocated); out: if (vs.f_vol_name != NULL) @@ -1341,235 +1401,718 @@ out: } /* - * Obtain attribute information about a filesystem object. + * Pack ATTR_COMMON attributes into a user buffer. + * alp is a pointer to the bitmap of attributes required. + * abp is the state of the attribute filling operation. + * The attribute data (along with some other fields that are required + * are in ad. */ - -static int -getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, - __unused struct componentname *getattr_name, proc_t p, vfs_context_t ctx) +static errno_t +attr_pack_common(vfs_context_t ctx, struct vnode *vp, struct attrlist *alp, + struct _attrlist_buf *abp, struct vnode_attr *vap, int proc_is64, + const char *cnp, ssize_t cnl, const char *fullpathptr, + ssize_t fullpathlen, int return_valid, int pack_invalid, int vtype, + int is_bulk) { - struct attrlist al; - struct vnode_attr va; - struct _attrlist_buf ab; - kauth_action_t action; - ssize_t fixedsize, varsize; - const char *cnp; - const char *vname = NULL; - char *fullpathptr; - ssize_t fullpathlen; - ssize_t cnl; - int proc_is64; - int error; - int return_valid; - int pack_invalid; - int attr_extended; - int vtype = 0; uint32_t perms = 0; + int error = 0; - proc_is64 = proc_is64bit(p); - VATTR_INIT(&va); - va.va_name = NULL; - ab.base = NULL; - cnp = "unknown"; - cnl = 0; - fullpathptr = NULL; - fullpathlen = 0; - - /* - * Fetch the attribute request. - */ - if ((error = copyin(uap->alist, &al, sizeof(al))) != 0) - goto out; - if (al.bitmapcount != ATTR_BIT_MAP_COUNT) { - error = EINVAL; - goto out; + if ((alp->commonattr & ATTR_CMN_ERROR) && + (!return_valid || pack_invalid)) { + ATTR_PACK4((*abp), 0); + abp->actual.commonattr |= ATTR_CMN_ERROR; } - - VFS_DEBUG(ctx, vp, "%p ATTRLIST - %s request common %08x vol %08x file %08x dir %08x fork %08x %sfollow on '%s'", - vp, p->p_comm, al.commonattr, al.volattr, al.fileattr, al.dirattr, al.forkattr, - (uap->options & FSOPT_NOFOLLOW) ? "no":"", vp->v_name); - -#if CONFIG_MACF - error = mac_vnode_check_getattrlist(ctx, vp, &al); - if (error) - goto out; -#endif /* MAC */ - - /* - * It is legal to request volume or file attributes, - * but not both. - */ - if (al.volattr) { - if (al.fileattr || al.dirattr || al.forkattr) { - error = EINVAL; - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: mixed volume/file/directory/fork attributes"); - goto out; + if (alp->commonattr & ATTR_CMN_NAME) { + attrlist_pack_string(abp, cnp, cnl); + abp->actual.commonattr |= ATTR_CMN_NAME; + } + if (alp->commonattr & ATTR_CMN_DEVID) { + if (vp) { + ATTR_PACK4((*abp), + vp->v_mount->mnt_vfsstat.f_fsid.val[0]); + abp->actual.commonattr |= ATTR_CMN_DEVID; + } else if (VATTR_IS_SUPPORTED(vap, va_devid)) { + ATTR_PACK4((*abp), vap->va_devid); + abp->actual.commonattr |= ATTR_CMN_DEVID; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0); } - /* handle volume attribute request */ - error = getvolattrlist(vp, uap, &al, ctx, proc_is64); - goto out; } + if (alp->commonattr & ATTR_CMN_FSID) { + if (vp) { + ATTR_PACK8((*abp), + vp->v_mount->mnt_vfsstat.f_fsid); + abp->actual.commonattr |= ATTR_CMN_FSID; + } else if (VATTR_IS_SUPPORTED(vap, va_fsid64)) { + ATTR_PACK8((*abp), vap->va_fsid64); + abp->actual.commonattr |= ATTR_CMN_FSID; + } else if (VATTR_IS_SUPPORTED(vap, va_fsid)) { + fsid_t fsid; + + /* va_fsid is 32 bits */ + fsid.val[0] = vap->va_fsid; + fsid.val[1] = 0; + ATTR_PACK8((*abp), fsid); + abp->actual.commonattr |= ATTR_CMN_FSID; + } else if (!return_valid || pack_invalid) { + fsid_t fsid = {{0}}; - /* Check for special packing semantics */ - return_valid = (al.commonattr & ATTR_CMN_RETURNED_ATTRS) ? 1 : 0; - pack_invalid = (uap->options & FSOPT_PACK_INVAL_ATTRS) ? 1 : 0; - attr_extended = (uap->options & FSOPT_ATTRLIST_EXTENDED) ? 1 : 0; - if (pack_invalid) { - /* FSOPT_PACK_INVAL_ATTRS requires ATTR_CMN_RETURNED_ATTRS */ - if (!return_valid || al.forkattr) { - error = EINVAL; - goto out; + ATTR_PACK8((*abp), fsid); } - /* Keep invalid attrs from being uninitialized */ - bzero(&va, sizeof (va)); - /* Generate a valid mask for post processing */ - bcopy(&al.commonattr, &ab.valid, sizeof (attribute_set_t)); } - - /* Pick up the vnode type. If the FS is bad and changes vnode types on us, we - * will have a valid snapshot that we can work from here. - */ - vtype = vp->v_type; - - - /* - * Set up the vnode_attr structure and authorise. - */ - if ((error = getattrlist_setupvattr(&al, attr_extended, &va, &fixedsize, &action, proc_is64, (vtype == VDIR))) != 0) { - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: setup for request failed"); - goto out; + if (alp->commonattr & ATTR_CMN_OBJTYPE) { + if (vp) { + ATTR_PACK4((*abp), vtype); + abp->actual.commonattr |= ATTR_CMN_OBJTYPE; + } else if (VATTR_IS_SUPPORTED(vap, va_objtype)) { + ATTR_PACK4((*abp), vap->va_objtype); + abp->actual.commonattr |= ATTR_CMN_OBJTYPE; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0); + } } - if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) { - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: authorisation failed/denied"); - goto out; + if (alp->commonattr & ATTR_CMN_OBJTAG) { + if (vp) { + ATTR_PACK4((*abp), vp->v_tag); + abp->actual.commonattr |= ATTR_CMN_OBJTAG; + } else if (VATTR_IS_SUPPORTED(vap, va_objtag)) { + ATTR_PACK4((*abp), vap->va_objtag); + abp->actual.commonattr |= ATTR_CMN_OBJTAG; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0); + } + } + if (alp->commonattr & ATTR_CMN_OBJID) { + fsobj_id_t f; + /* + * Carbon can't deal with us reporting the target ID + * for links. So we ask the filesystem to give us the + * source ID as well, and if it gives us one, we use + * it instead. + */ + if (VATTR_IS_SUPPORTED(vap, va_linkid)) { + f.fid_objno = vap->va_linkid; + } else { + f.fid_objno = vap->va_fileid; + } + f.fid_generation = 0; + ATTR_PACK8((*abp), f); + abp->actual.commonattr |= ATTR_CMN_OBJID; + } + if (alp->commonattr & ATTR_CMN_OBJPERMANENTID) { + fsobj_id_t f; + /* + * Carbon can't deal with us reporting the target ID + * for links. So we ask the filesystem to give us the + * source ID as well, and if it gives us one, we use + * it instead. + */ + if (VATTR_IS_SUPPORTED(vap, va_linkid)) { + f.fid_objno = vap->va_linkid; + } else { + f.fid_objno = vap->va_fileid; + } + f.fid_generation = 0; + ATTR_PACK8((*abp), f); + abp->actual.commonattr |= ATTR_CMN_OBJPERMANENTID; } + if (alp->commonattr & ATTR_CMN_PAROBJID) { + fsobj_id_t f; + f.fid_objno = vap->va_parentid; /* could be lossy here! */ + f.fid_generation = 0; + ATTR_PACK8((*abp), f); + abp->actual.commonattr |= ATTR_CMN_PAROBJID; + } + if (alp->commonattr & ATTR_CMN_SCRIPT) { + if (VATTR_IS_SUPPORTED(vap, va_encoding)) { + ATTR_PACK4((*abp), vap->va_encoding); + abp->actual.commonattr |= ATTR_CMN_SCRIPT; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0x7e); + } + } + if (alp->commonattr & ATTR_CMN_CRTIME) { + ATTR_PACK_TIME((*abp), vap->va_create_time, proc_is64); + abp->actual.commonattr |= ATTR_CMN_CRTIME; + } + if (alp->commonattr & ATTR_CMN_MODTIME) { + ATTR_PACK_TIME((*abp), vap->va_modify_time, proc_is64); + abp->actual.commonattr |= ATTR_CMN_MODTIME; + } + if (alp->commonattr & ATTR_CMN_CHGTIME) { + ATTR_PACK_TIME((*abp), vap->va_change_time, proc_is64); + abp->actual.commonattr |= ATTR_CMN_CHGTIME; + } + if (alp->commonattr & ATTR_CMN_ACCTIME) { + ATTR_PACK_TIME((*abp), vap->va_access_time, proc_is64); + abp->actual.commonattr |= ATTR_CMN_ACCTIME; + } + if (alp->commonattr & ATTR_CMN_BKUPTIME) { + ATTR_PACK_TIME((*abp), vap->va_backup_time, proc_is64); + abp->actual.commonattr |= ATTR_CMN_BKUPTIME; + } /* - * If we're asking for the full path, allocate a buffer for that. + * They are requesting user access, we should obtain this before getting + * the finder info. For some network file systems this is a performance + * improvement. */ - if (al.commonattr & (ATTR_CMN_FULLPATH)) { - fullpathptr = (char*) kalloc(MAXPATHLEN); - if (fullpathptr == NULL) { - error = ENOMEM; - VFS_DEBUG(ctx,vp, "ATTRLIST - ERROR: cannot allocate fullpath buffer"); - goto out; + if (alp->commonattr & ATTR_CMN_USERACCESS) { /* this is expensive */ + if (vp && !is_bulk) { + if (vtype == VDIR) { + if (vnode_authorize(vp, NULL, + KAUTH_VNODE_ACCESS | KAUTH_VNODE_ADD_FILE | + KAUTH_VNODE_ADD_SUBDIRECTORY | + KAUTH_VNODE_DELETE_CHILD, ctx) == 0) + perms |= W_OK; + + if (vnode_authorize(vp, NULL, + KAUTH_VNODE_ACCESS | + KAUTH_VNODE_LIST_DIRECTORY, ctx) == 0) + perms |= R_OK; + + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | + KAUTH_VNODE_SEARCH, ctx) == 0) + perms |= X_OK; + } else { + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | + KAUTH_VNODE_WRITE_DATA, ctx) == 0) + perms |= W_OK; + + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_READ_DATA, ctx) == 0) + perms |= R_OK; + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_EXECUTE, ctx) == 0) + perms |= X_OK; + } + } else if (is_bulk && + VATTR_IS_SUPPORTED(vap, va_user_access)) { + perms = vap->va_user_access; } } + if (alp->commonattr & ATTR_CMN_FNDRINFO) { + size_t fisize = 32; + error = 0; + if (vp && !is_bulk) { + uio_t auio; + char uio_buf[UIO_SIZEOF(1)]; - if (va.va_active != 0) { - /* - * If we're going to ask for va_name, allocate a buffer to point it at - */ - if (VATTR_IS_ACTIVE(&va, va_name)) { - va.va_name = (char *) kalloc(MAXPATHLEN); - if (va.va_name == NULL) { + if ((auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, + UIO_READ, uio_buf, sizeof(uio_buf))) == NULL) { error = ENOMEM; - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: cannot allocate va_name buffer"); goto out; } - } - - /* - * Call the filesystem. - */ - if ((error = vnode_getattr(vp, &va, ctx)) != 0) { - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: filesystem returned %d", error); - goto out; - } - - /* did we ask for something the filesystem doesn't support? */ - if (!VATTR_ALL_SUPPORTED(&va)) { + uio_addiov(auio, CAST_USER_ADDR_T(abp->fixedcursor), + fisize); + /* fisize may be reset to 0 after this call */ + error = vn_getxattr(vp, XATTR_FINDERINFO_NAME, auio, + &fisize, XATTR_NOSECURITY, ctx); + uio_free(auio); /* - * There are a couple of special cases. If we are after object IDs, - * we can make do with va_fileid. + * Default to zeros if its not available, + * unless ATTR_CMN_RETURNED_ATTRS was requested. */ - if ((al.commonattr & (ATTR_CMN_OBJID | ATTR_CMN_OBJPERMANENTID | ATTR_CMN_FILEID)) && !VATTR_IS_SUPPORTED(&va, va_linkid)) - VATTR_CLEAR_ACTIVE(&va, va_linkid); /* forget we wanted this */ - + if (error && + (!return_valid || pack_invalid) && + ((error == ENOATTR) || (error == ENOENT) || + (error == ENOTSUP) || (error == EPERM))) { + VFS_DEBUG(ctx, vp, "ATTRLIST - No system.finderinfo attribute, returning zeroes"); + bzero(abp->fixedcursor, 32); + error = 0; + } + + if (error == 0) { + abp->fixedcursor += 32; + abp->actual.commonattr |= ATTR_CMN_FNDRINFO; + } else if (!return_valid) { + goto out; + } else { + /* + * If we can inform the caller that we can't + * return this attribute, reset error and + * continue with the rest of the attributes. + */ + error = 0; + } + } else if (VATTR_IS_SUPPORTED(vap, va_finderinfo)) { + bcopy(&vap->va_finderinfo[0], abp->fixedcursor, fisize); + abp->fixedcursor += fisize; + abp->actual.commonattr |= ATTR_CMN_FNDRINFO; + } else if (!return_valid || pack_invalid) { + bzero(abp->fixedcursor, fisize); + abp->fixedcursor += fisize; + } + } + if (alp->commonattr & ATTR_CMN_OWNERID) { + ATTR_PACK4((*abp), vap->va_uid); + abp->actual.commonattr |= ATTR_CMN_OWNERID; + } + if (alp->commonattr & ATTR_CMN_GRPID) { + ATTR_PACK4((*abp), vap->va_gid); + abp->actual.commonattr |= ATTR_CMN_GRPID; + } + if (alp->commonattr & ATTR_CMN_ACCESSMASK) { + ATTR_PACK4((*abp), vap->va_mode); + abp->actual.commonattr |= ATTR_CMN_ACCESSMASK; + } + if (alp->commonattr & ATTR_CMN_FLAGS) { + ATTR_PACK4((*abp), vap->va_flags); + abp->actual.commonattr |= ATTR_CMN_FLAGS; + } + if (alp->commonattr & ATTR_CMN_GEN_COUNT) { + if (VATTR_IS_SUPPORTED(vap, va_write_gencount)) { + ATTR_PACK4((*abp), vap->va_write_gencount); + abp->actual.commonattr |= ATTR_CMN_GEN_COUNT; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0); + } + } + + if (alp->commonattr & ATTR_CMN_DOCUMENT_ID) { + if (VATTR_IS_SUPPORTED(vap, va_document_id)) { + ATTR_PACK4((*abp), vap->va_document_id); + abp->actual.commonattr |= ATTR_CMN_DOCUMENT_ID; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0); + } + } + /* We already obtain the user access, so just fill in the buffer here */ + if (alp->commonattr & ATTR_CMN_USERACCESS) { +#if CONFIG_MACF + if (!is_bulk && vp) { /* - * Many filesystems don't know their parent object id. - * If necessary, attempt to derive it from the vnode. + * Rather than MAC preceding DAC, in this case we want + * the smallest set of permissions granted by both MAC & + * DAC checks. We won't add back any permissions. */ - if ((al.commonattr & (ATTR_CMN_PAROBJID | ATTR_CMN_PARENTID)) && - !VATTR_IS_SUPPORTED(&va, va_parentid)) { - vnode_t dvp; - - if ((dvp = vnode_getparent(vp)) != NULLVP) { - struct vnode_attr lva; - - VATTR_INIT(&lva); - VATTR_WANTED(&lva, va_fileid); - if (vnode_getattr(dvp, &lva, ctx) == 0 && - VATTR_IS_SUPPORTED(&va, va_fileid)) { - va.va_parentid = lva.va_fileid; - VATTR_SET_SUPPORTED(&va, va_parentid); - } - vnode_put(dvp); - } - } + if (perms & W_OK) + if (mac_vnode_check_access(ctx, vp, W_OK) != 0) + perms &= ~W_OK; + if (perms & R_OK) + if (mac_vnode_check_access(ctx, vp, R_OK) != 0) + perms &= ~R_OK; + if (perms & X_OK) + if (mac_vnode_check_access(ctx, vp, X_OK) != 0) + perms &= ~X_OK; + } +#endif /* MAC */ + VFS_DEBUG(ctx, vp, "ATTRLIST - granting perms %d", perms); + if (!is_bulk && vp) { + ATTR_PACK4((*abp), perms); + abp->actual.commonattr |= ATTR_CMN_USERACCESS; + } else if (is_bulk && VATTR_IS_SUPPORTED(vap, va_user_access)) { + ATTR_PACK4((*abp), perms); + abp->actual.commonattr |= ATTR_CMN_USERACCESS; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0); + } + } + if (alp->commonattr & ATTR_CMN_EXTENDED_SECURITY) { + if (VATTR_IS_SUPPORTED(vap, va_acl) && (vap->va_acl != NULL)) { + struct kauth_filesec fsec; /* - * And we can report datasize/alloc from total. + * We want to return a kauth_filesec (for now), but all we have is a kauth_acl. */ - if ((al.fileattr & ATTR_FILE_DATALENGTH) && !VATTR_IS_SUPPORTED(&va, va_data_size)) - VATTR_CLEAR_ACTIVE(&va, va_data_size); - if ((al.fileattr & ATTR_FILE_DATAALLOCSIZE) && !VATTR_IS_SUPPORTED(&va, va_data_alloc)) - VATTR_CLEAR_ACTIVE(&va, va_data_alloc); + fsec.fsec_magic = KAUTH_FILESEC_MAGIC; + fsec.fsec_owner = kauth_null_guid; + fsec.fsec_group = kauth_null_guid; + attrlist_pack_variable2(abp, &fsec, __offsetof(struct kauth_filesec, fsec_acl), vap->va_acl, KAUTH_ACL_COPYSIZE(vap->va_acl)); + abp->actual.commonattr |= ATTR_CMN_EXTENDED_SECURITY; + } else if (!return_valid || pack_invalid) { + attrlist_pack_variable(abp, NULL, 0); + } + } + if (alp->commonattr & ATTR_CMN_UUID) { + if (VATTR_IS_SUPPORTED(vap, va_uuuid)) { + ATTR_PACK(abp, vap->va_uuuid); + abp->actual.commonattr |= ATTR_CMN_UUID; + } else if (!return_valid || pack_invalid) { + ATTR_PACK(abp, kauth_null_guid); + } + } + if (alp->commonattr & ATTR_CMN_GRPUUID) { + if (VATTR_IS_SUPPORTED(vap, va_guuid)) { + ATTR_PACK(abp, vap->va_guuid); + abp->actual.commonattr |= ATTR_CMN_GRPUUID; + } else if (!return_valid || pack_invalid) { + ATTR_PACK(abp, kauth_null_guid); + } + } + if (alp->commonattr & ATTR_CMN_FILEID) { + ATTR_PACK8((*abp), vap->va_fileid); + abp->actual.commonattr |= ATTR_CMN_FILEID; + } + if (alp->commonattr & ATTR_CMN_PARENTID) { + ATTR_PACK8((*abp), vap->va_parentid); + abp->actual.commonattr |= ATTR_CMN_PARENTID; + } + + if (alp->commonattr & ATTR_CMN_FULLPATH) { + attrlist_pack_string (abp, fullpathptr, fullpathlen); + abp->actual.commonattr |= ATTR_CMN_FULLPATH; + } + + if (alp->commonattr & ATTR_CMN_ADDEDTIME) { + if (VATTR_IS_SUPPORTED(vap, va_addedtime)) { + ATTR_PACK_TIME((*abp), vap->va_addedtime, proc_is64); + abp->actual.commonattr |= ATTR_CMN_ADDEDTIME; + } else if (!return_valid || pack_invalid) { + struct timespec zerotime = {0, 0}; + + ATTR_PACK_TIME((*abp), zerotime, proc_is64); + } + } + if (alp->commonattr & ATTR_CMN_DATA_PROTECT_FLAGS) { + if (VATTR_IS_SUPPORTED(vap, va_dataprotect_class)) { + ATTR_PACK4((*abp), vap->va_dataprotect_class); + abp->actual.commonattr |= ATTR_CMN_DATA_PROTECT_FLAGS; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0); + } + } +out: + return (error); +} + +static errno_t +attr_pack_dir(struct vnode *vp, struct attrlist *alp, struct _attrlist_buf *abp, + struct vnode_attr *vap) +{ + if (alp->dirattr & ATTR_DIR_LINKCOUNT) { /* full count of entries */ + ATTR_PACK4((*abp), (uint32_t)vap->va_dirlinkcount); + abp->actual.dirattr |= ATTR_DIR_LINKCOUNT; + } + if (alp->dirattr & ATTR_DIR_ENTRYCOUNT) { + ATTR_PACK4((*abp), (uint32_t)vap->va_nchildren); + abp->actual.dirattr |= ATTR_DIR_ENTRYCOUNT; + } + if (alp->dirattr & ATTR_DIR_MOUNTSTATUS) { + uint32_t mntstat; + if (vp) { + /* + * The vnode that is passed down may either be a + * top level vnode of a mount stack or a mounted + * on vnode. In either case, the directory should + * be reported as a mount point. + */ + if ((vp->v_flag & VROOT) || vnode_mountedhere(vp)) { + mntstat = DIR_MNTSTATUS_MNTPOINT; + } else { + mntstat = 0; + } +#if CONFIG_TRIGGERS /* - * If we don't have an encoding, go with UTF-8 + * Report back on active vnode triggers + * that can directly trigger a mount */ - if ((al.commonattr & ATTR_CMN_SCRIPT) && - !VATTR_IS_SUPPORTED(&va, va_encoding) && !return_valid) - VATTR_RETURN(&va, va_encoding, 0x7e /* kTextEncodingMacUnicode */); + if (vp->v_resolve && + !(vp->v_resolve->vr_flags & VNT_NO_DIRECT_MOUNT)) { + mntstat |= DIR_MNTSTATUS_TRIGGER; + } +#endif + } else { + mntstat = 0; + } + + ATTR_PACK4((*abp), mntstat); + abp->actual.dirattr |= ATTR_DIR_MOUNTSTATUS; + } + + return 0; +} + +/* + * The is_bulk parameter differentiates whether the function is called from + * getattrlist or getattrlistbulk. When coming in from getattrlistbulk, + * the corresponding va_* values are expected to be the values filled and no + * attempt is made to retrieve them by calling back into the filesystem. + */ +static errno_t +attr_pack_file(vfs_context_t ctx, struct vnode *vp, struct attrlist *alp, + struct _attrlist_buf *abp, struct vnode_attr *vap, int return_valid, + int pack_invalid, int is_bulk) +{ + size_t rsize = 0; + uint64_t rlength = 0; + uint64_t ralloc = 0; + int error = 0; + + /* + * Pre-fetch the rsrc attributes now so we only get them once. + * Fetch the resource fork size/allocation via xattr interface + */ + if (vp && !is_bulk && + (alp->fileattr & (ATTR_FILE_TOTALSIZE | ATTR_FILE_ALLOCSIZE | + ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE))) { + + error = vn_getxattr(vp, XATTR_RESOURCEFORK_NAME, NULL, + &rsize, XATTR_NOSECURITY, ctx); + if (error) { + if ((error == ENOENT) || (error == ENOATTR) || + (error == ENOTSUP) || (error == EPERM) || + (error == EACCES)) { + rsize = 0; + error = 0; + } else { + goto out; + } + } + rlength = rsize; + + if (alp->fileattr & (ATTR_FILE_RSRCALLOCSIZE | + ATTR_FILE_ALLOCSIZE)) { + uint32_t blksize; + + blksize = vp->v_mount->mnt_vfsstat.f_bsize; + + if (blksize == 0) { + blksize = 512; + } + ralloc = roundup(rsize, blksize); + } + } + + if (alp->fileattr & ATTR_FILE_LINKCOUNT) { + ATTR_PACK4((*abp), (uint32_t)vap->va_nlink); + abp->actual.fileattr |= ATTR_FILE_LINKCOUNT; + } + /* + * Note the following caveats for the TOTALSIZE and ALLOCSIZE attributes: + * We infer that if the filesystem does not support va_data_size or va_data_alloc + * it must not know about alternate forks. So when we need to gather + * the total size or total alloc, it's OK to substitute the total size for + * the data size below. This is because it is likely a flat filesystem and we must + * be using AD files to store the rsrc fork and EAs. + * + * Additionally, note that getattrlist is barred from being called on + * resource fork paths. (Search for CN_ALLOWRSRCFORK). So if the filesystem does + * support va_data_size, it is guaranteed to represent the data fork's size. This + * is an important distinction to make because when we call vnode_getattr on + * an HFS resource fork vnode, to get the size, it will vend out the resource + * fork's size (it only gets the size of the passed-in vnode). + */ + if (alp->fileattr & ATTR_FILE_TOTALSIZE) { + if (!is_bulk) { + uint64_t totalsize = rlength; + + if (VATTR_IS_SUPPORTED(vap, va_data_size)) { + totalsize += vap->va_data_size; + } else { + totalsize += vap->va_total_size; + } + + ATTR_PACK8((*abp), totalsize); + abp->actual.fileattr |= ATTR_FILE_TOTALSIZE; + } else if (VATTR_IS_SUPPORTED(vap, va_total_size)) { + ATTR_PACK8((*abp), vap->va_total_size); + abp->actual.fileattr |= ATTR_FILE_TOTALSIZE; + } else if (!return_valid || pack_invalid) { + uint64_t zero_val = 0; + + ATTR_PACK8((*abp), zero_val); + } + } + if (alp->fileattr & ATTR_FILE_ALLOCSIZE) { + if (!is_bulk) { + uint64_t totalalloc = ralloc; /* - * If we don't have a name, we'll get one from the vnode or mount point. + * If data_alloc is supported, then it must represent the + * data fork size. */ - if ((al.commonattr & ATTR_CMN_NAME) && !VATTR_IS_SUPPORTED(&va, va_name)) { - VATTR_CLEAR_ACTIVE(&va, va_name); + if (VATTR_IS_SUPPORTED(vap, va_data_alloc)) { + totalalloc += vap->va_data_alloc; + } else { + totalalloc += vap->va_total_alloc; } - /* If va_dirlinkcount isn't supported use a default of 1. */ - if ((al.dirattr & ATTR_DIR_LINKCOUNT) && !VATTR_IS_SUPPORTED(&va, va_dirlinkcount)) { - VATTR_RETURN(&va, va_dirlinkcount, 1); + ATTR_PACK8((*abp), totalalloc); + abp->actual.fileattr |= ATTR_FILE_ALLOCSIZE; + } else if (VATTR_IS_SUPPORTED(vap, va_total_alloc)) { + ATTR_PACK8((*abp), vap->va_total_alloc); + abp->actual.fileattr |= ATTR_FILE_ALLOCSIZE; + } else if (!return_valid || pack_invalid) { + uint64_t zero_val = 0; + + ATTR_PACK8((*abp), zero_val); + } + } + if (alp->fileattr & ATTR_FILE_IOBLOCKSIZE) { + ATTR_PACK4((*abp), vap->va_iosize); + abp->actual.fileattr |= ATTR_FILE_IOBLOCKSIZE; + } + if (alp->fileattr & ATTR_FILE_CLUMPSIZE) { + if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0); /* this value is deprecated */ + abp->actual.fileattr |= ATTR_FILE_CLUMPSIZE; + } + } + if (alp->fileattr & ATTR_FILE_DEVTYPE) { + if (vp && (vp->v_type == VCHR || vp->v_type == VBLK)) { + uint32_t dev; + + if (vp->v_specinfo != NULL) { + dev = vp->v_specinfo->si_rdev; + } else if (VATTR_IS_SUPPORTED(vap, va_rdev)) { + dev = vap->va_rdev; + } else { + dev = 0; } - - /* check again */ - if (!VATTR_ALL_SUPPORTED(&va)) { - if (return_valid) { - if (pack_invalid) { - /* Fix up valid mask for post processing */ - getattrlist_fixupattrs(&ab.valid, &va); - - /* Force packing of everything asked for */ - va.va_supported = va.va_active; - } else { - /* Adjust the requested attributes */ - getattrlist_fixupattrs((attribute_set_t *)&al.commonattr, &va); - } - } else { - error = EINVAL; - goto out; - } + ATTR_PACK4((*abp), dev); + abp->actual.fileattr |= ATTR_FILE_DEVTYPE; + } else if (vp) { + ATTR_PACK4((*abp), 0); + abp->actual.fileattr |= ATTR_FILE_DEVTYPE; + } else if (VATTR_IS_SUPPORTED(vap, va_rdev)) { + ATTR_PACK4((*abp), vap->va_rdev); + abp->actual.fileattr |= ATTR_FILE_DEVTYPE; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4((*abp), 0); + } + } + /* + * If the filesystem does not support datalength + * or dataallocsize, then we infer that totalsize and + * totalalloc are substitutes. + */ + if (alp->fileattr & ATTR_FILE_DATALENGTH) { + if (VATTR_IS_SUPPORTED(vap, va_data_size)) { + ATTR_PACK8((*abp), vap->va_data_size); + } else { + ATTR_PACK8((*abp), vap->va_total_size); + } + abp->actual.fileattr |= ATTR_FILE_DATALENGTH; + } + if (alp->fileattr & ATTR_FILE_DATAALLOCSIZE) { + if (VATTR_IS_SUPPORTED(vap, va_data_alloc)) { + ATTR_PACK8((*abp), vap->va_data_alloc); + } else { + ATTR_PACK8((*abp), vap->va_total_alloc); + } + abp->actual.fileattr |= ATTR_FILE_DATAALLOCSIZE; + } + /* already got the resource fork size/allocation above */ + if (alp->fileattr & ATTR_FILE_RSRCLENGTH) { + if (!is_bulk) { + ATTR_PACK8((*abp), rlength); + abp->actual.fileattr |= ATTR_FILE_RSRCLENGTH; + } else if (VATTR_IS_SUPPORTED(vap, va_rsrc_length)) { + ATTR_PACK8((*abp), vap->va_rsrc_length); + abp->actual.fileattr |= ATTR_FILE_RSRCLENGTH; + } else if (!return_valid || pack_invalid) { + uint64_t zero_val = 0; + + ATTR_PACK8((*abp), zero_val); + } + } + if (alp->fileattr & ATTR_FILE_RSRCALLOCSIZE) { + if (!is_bulk) { + ATTR_PACK8((*abp), ralloc); + abp->actual.fileattr |= ATTR_FILE_RSRCALLOCSIZE; + } else if (VATTR_IS_SUPPORTED(vap, va_rsrc_alloc)) { + ATTR_PACK8((*abp), vap->va_rsrc_alloc); + abp->actual.fileattr |= ATTR_FILE_RSRCALLOCSIZE; + } else if (!return_valid || pack_invalid) { + uint64_t zero_val = 0; + + ATTR_PACK8((*abp), zero_val); + } + } +out: + return (error); +} + +static void +vattr_get_alt_data(vnode_t vp, struct attrlist *alp, struct vnode_attr *vap, + int return_valid, int is_bulk, vfs_context_t ctx) +{ + /* + * There are a couple of special cases. + * If we are after object IDs, we can make do with va_fileid. + */ + if ((alp->commonattr & + (ATTR_CMN_OBJID | ATTR_CMN_OBJPERMANENTID | ATTR_CMN_FILEID)) && + !VATTR_IS_SUPPORTED(vap, va_linkid)) { + /* forget we wanted this */ + VATTR_CLEAR_ACTIVE(vap, va_linkid); + } + + /* + * Many filesystems don't know their parent object id. + * If necessary, attempt to derive it from the vnode. + */ + if ((alp->commonattr & (ATTR_CMN_PAROBJID | ATTR_CMN_PARENTID)) && + !VATTR_IS_SUPPORTED(vap, va_parentid) && vp && !is_bulk) { + vnode_t dvp; + + if ((dvp = vnode_getparent(vp)) != NULLVP) { + struct vnode_attr lva; + + VATTR_INIT(&lva); + VATTR_WANTED(&lva, va_fileid); + if (vnode_getattr(dvp, &lva, ctx) == 0 && + VATTR_IS_SUPPORTED(vap, va_fileid)) { + vap->va_parentid = lva.va_fileid; + VATTR_SET_SUPPORTED(vap, va_parentid); } + vnode_put(dvp); } } + /* + * And we can report datasize/alloc from total. + */ + if ((alp->fileattr & ATTR_FILE_DATALENGTH) && + !VATTR_IS_SUPPORTED(vap, va_data_size)) { + VATTR_CLEAR_ACTIVE(vap, va_data_size); + } + + if ((alp->fileattr & ATTR_FILE_DATAALLOCSIZE) && + !VATTR_IS_SUPPORTED(vap, va_data_alloc)) { + VATTR_CLEAR_ACTIVE(vap, va_data_alloc); + } /* - * Compute variable-space requirements. + * If we don't have an encoding, go with UTF-8 + */ + if ((alp->commonattr & ATTR_CMN_SCRIPT) && + !VATTR_IS_SUPPORTED(vap, va_encoding) && !return_valid) { + VATTR_RETURN(vap, va_encoding, + 0x7e /* kTextEncodingMacUnicode */); + } + + /* + * If we don't have a name, we'll get one from the vnode or + * mount point. */ - varsize = 0; /* length count */ + if ((alp->commonattr & ATTR_CMN_NAME) && + !VATTR_IS_SUPPORTED(vap, va_name)) { + VATTR_CLEAR_ACTIVE(vap, va_name); + } + + /* If va_dirlinkcount isn't supported use a default of 1. */ + if ((alp->dirattr & ATTR_DIR_LINKCOUNT) && + !VATTR_IS_SUPPORTED(vap, va_dirlinkcount)) { + VATTR_RETURN(vap, va_dirlinkcount, 1); + } +} + +static errno_t +calc_varsize(vnode_t vp, struct attrlist *alp, struct vnode_attr *vap, + ssize_t *varsizep, char *fullpathptr, ssize_t *fullpathlenp, + const char **vnamep, const char **cnpp, ssize_t *cnlp) +{ + int error = 0; + *varsizep = 0; /* length count */ /* We may need to fix up the name attribute if requested */ - if (al.commonattr & ATTR_CMN_NAME) { - if (VATTR_IS_SUPPORTED(&va, va_name)) { - va.va_name[MAXPATHLEN-1] = '\0'; /* Ensure nul-termination */ - cnp = va.va_name; - cnl = strlen(cnp); - } - else { + if (alp->commonattr & ATTR_CMN_NAME) { + if (VATTR_IS_SUPPORTED(vap, va_name)) { + vap->va_name[MAXPATHLEN-1] = '\0'; /* Ensure nul-termination */ + *cnpp = vap->va_name; + *cnlp = strlen(*cnpp); + } else if (vp) { /* Filesystem did not support getting the name */ if (vnode_isvroot(vp)) { if (vp->v_mount->mnt_vfsstat.f_mntonname[1] == 0x00 && @@ -1580,46 +2123,49 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, * pre Tiger code. returning nothing for the boot volume name * breaks installers - 3961058 */ - cnp = vname = vnode_getname(vp); - if (cnp == NULL) { + *cnpp = *vnamep = vnode_getname(vp); + if (*cnpp == NULL) { /* just use "/" as name */ - cnp = &vp->v_mount->mnt_vfsstat.f_mntonname[0]; + *cnpp = &vp->v_mount->mnt_vfsstat.f_mntonname[0]; } - cnl = strlen(cnp); + *cnlp = strlen(*cnpp); } else { - getattrlist_findnamecomp(vp->v_mount->mnt_vfsstat.f_mntonname, &cnp, &cnl); + getattrlist_findnamecomp(vp->v_mount->mnt_vfsstat.f_mntonname, cnpp, cnlp); } } else { - cnp = vname = vnode_getname(vp); - cnl = 0; - if (cnp != NULL) { - cnl = strlen(cnp); + *cnpp = *vnamep = vnode_getname(vp); + *cnlp = 0; + if (*cnpp != NULL) { + *cnlp = strlen(*cnpp); } } + } else { + *cnlp = 0; } - varsize += roundup(cnl + 1, 4); + *varsizep += roundup(*cnlp + 1, 4); } /* * Compute the full path to this vnode, if necessary. This attribute is almost certainly * not supported by any filesystem, so build the path to this vnode at this time. */ - if (al.commonattr & ATTR_CMN_FULLPATH) { + if (vp && (alp->commonattr & ATTR_CMN_FULLPATH)) { int len = MAXPATHLEN; int err; + /* call build_path making sure NOT to use the cache-only behavior */ err = build_path(vp, fullpathptr, len, &len, 0, vfs_context_current()); if (err) { error = err; goto out; } - fullpathlen = 0; + *fullpathlenp = 0; if (fullpathptr){ - fullpathlen = strlen(fullpathptr); + *fullpathlenp = strlen(fullpathptr); } - varsize += roundup(fullpathlen+1, 4); + *varsizep += roundup(((*fullpathlenp) + 1), 4); } /* @@ -1628,22 +2174,112 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, * XXX This needs to change at some point; since the blob is opaque in * user-space this is OK. */ - if ((al.commonattr & ATTR_CMN_EXTENDED_SECURITY) && - VATTR_IS_SUPPORTED(&va, va_acl) && - (va.va_acl != NULL)) { + if ((alp->commonattr & ATTR_CMN_EXTENDED_SECURITY) && + VATTR_IS_SUPPORTED(vap, va_acl) && + (vap->va_acl != NULL)) { /* * Since we have a kauth_acl_t (not a kauth_filesec_t), we have to check against * KAUTH_FILESEC_NOACL ourselves */ - if (va.va_acl->acl_entrycount == KAUTH_FILESEC_NOACL) { - varsize += roundup((KAUTH_FILESEC_SIZE(0)), 4); + if (vap->va_acl->acl_entrycount == KAUTH_FILESEC_NOACL) { + *varsizep += roundup((KAUTH_FILESEC_SIZE(0)), 4); } else { - varsize += roundup ((KAUTH_FILESEC_SIZE(va.va_acl->acl_entrycount)), 4); + *varsizep += roundup ((KAUTH_FILESEC_SIZE(vap->va_acl->acl_entrycount)), 4); + } + } + +out: + return (error); +} + +static errno_t +vfs_attr_pack_internal(vnode_t vp, uio_t auio, struct attrlist *alp, + uint64_t options, struct vnode_attr *vap, __unused void *fndesc, + vfs_context_t ctx, int is_bulk, enum vtype vtype, ssize_t fixedsize) +{ + struct _attrlist_buf ab; + ssize_t buf_size; + size_t copy_size; + ssize_t varsize; + const char *vname = NULL; + const char *cnp; + ssize_t cnl; + char *fullpathptr; + ssize_t fullpathlen; + int error; + int proc_is64; + int return_valid; + int pack_invalid; + int alloc_local_buf; + + proc_is64 = proc_is64bit(vfs_context_proc(ctx)); + ab.base = NULL; + cnp = "unknown"; + cnl = 0; + fullpathptr = NULL; + fullpathlen = 0; + error = 0; + alloc_local_buf = 0; + + buf_size = (ssize_t)uio_resid(auio); + if ((buf_size <= 0) || (uio_iovcnt(auio) > 1)) + return (EINVAL); + + copy_size = 0; + /* Check for special packing semantics */ + return_valid = (alp->commonattr & ATTR_CMN_RETURNED_ATTRS) ? 1 : 0; + pack_invalid = (options & FSOPT_PACK_INVAL_ATTRS) ? 1 : 0; + + if (pack_invalid) { + /* Generate a valid mask for post processing */ + bcopy(&(alp->commonattr), &ab.valid, sizeof (attribute_set_t)); + } + + /* did we ask for something the filesystem doesn't support? */ + if (vap->va_active && !VATTR_ALL_SUPPORTED(vap)) { + vattr_get_alt_data(vp, alp, vap, return_valid, is_bulk, + ctx); + + /* check again */ + if (!VATTR_ALL_SUPPORTED(vap)) { + if (return_valid && pack_invalid) { + /* Fix up valid mask for post processing */ + getattrlist_fixupattrs(&ab.valid, vap); + + /* Force packing of everything asked for */ + vap->va_supported = vap->va_active; + } else if (return_valid) { + /* Adjust the requested attributes */ + getattrlist_fixupattrs( + (attribute_set_t *)&(alp->commonattr), vap); + } else { + error = EINVAL; + } + } + + if (error) + goto out; + } + + if (alp->commonattr & (ATTR_CMN_FULLPATH)) { + fullpathptr = (char*) kalloc(MAXPATHLEN); + if (fullpathptr == NULL) { + error = ENOMEM; + VFS_DEBUG(ctx,vp, "ATTRLIST - ERROR: cannot allocate fullpath buffer"); + goto out; } } + /* + * Compute variable-space requirements. + */ + error = calc_varsize(vp, alp, vap, &varsize, fullpathptr, &fullpathlen, + &vname, &cnp, &cnl); + if (error) + goto out; + /* * Allocate a target buffer for attribute results. * @@ -1658,40 +2294,90 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER); goto out; } - MALLOC(ab.base, char *, ab.allocated, M_TEMP, M_WAITOK); - if (ab.base == NULL) { - error = ENOMEM; - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not allocate %d for copy buffer", ab.allocated); - goto out; - } - /* set the S_IFMT bits for the mode */ - if (al.commonattr & ATTR_CMN_ACCESSMASK) { - switch (vp->v_type) { - case VREG: - va.va_mode |= S_IFREG; - break; - case VDIR: - va.va_mode |= S_IFDIR; - break; - case VBLK: - va.va_mode |= S_IFBLK; - break; - case VCHR: - va.va_mode |= S_IFCHR; - break; - case VLNK: - va.va_mode |= S_IFLNK; - break; - case VSOCK: - va.va_mode |= S_IFSOCK; - break; - case VFIFO: - va.va_mode |= S_IFIFO; - break; - default: - error = EBADF; + /* + * Special handling for bulk calls, align to 8 (and only if enough + * space left. + */ + if (is_bulk) { + if (buf_size < ab.allocated) { goto out; + } else { + uint32_t newlen; + + newlen = (ab.allocated + 7) & ~0x07; + /* Align only if enough space for alignment */ + if (newlen <= (uint32_t)buf_size) + ab.allocated = newlen; + } + } + + /* + * See if we can reuse buffer passed in i.e. it is a kernel buffer + * and big enough. + */ + if (uio_isuserspace(auio) || (buf_size < ab.allocated)) { + MALLOC(ab.base, char *, ab.allocated, M_TEMP, + M_ZERO | M_WAITOK); + alloc_local_buf = 1; + } else { + /* + * In case this is a kernel buffer and sufficiently + * big, this function will try to use that buffer + * instead of allocating another buffer and bcopy'ing + * into it. + * + * The calculation below figures out where to start + * writing in the buffer and once all the data has been + * filled in, uio_resid is updated to reflect the usage + * of the buffer. + * + * uio_offset cannot be used here to determine the + * starting location as uio_offset could be set to a + * value which has nothing to do the location + * in the buffer. + */ + ab.base = (char *)uio_curriovbase(auio) + + ((ssize_t)uio_curriovlen(auio) - buf_size); + bzero(ab.base, ab.allocated); + } + + if (ab.base == NULL) { + error = ENOMEM; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not allocate %d for copy buffer", ab.allocated); + goto out; + } + + + /* set the S_IFMT bits for the mode */ + if (alp->commonattr & ATTR_CMN_ACCESSMASK) { + if (vp) { + switch (vp->v_type) { + case VREG: + vap->va_mode |= S_IFREG; + break; + case VDIR: + vap->va_mode |= S_IFDIR; + break; + case VBLK: + vap->va_mode |= S_IFBLK; + break; + case VCHR: + vap->va_mode |= S_IFCHR; + break; + case VLNK: + vap->va_mode |= S_IFLNK; + break; + case VSOCK: + vap->va_mode |= S_IFSOCK; + break; + case VFIFO: + vap->va_mode |= S_IFIFO; + break; + default: + error = EBADF; + goto out; + } } } @@ -1706,547 +2392,1184 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, ab.varcursor = ab.base + fixedsize; ab.needed = ab.allocated; - /* common attributes **************************************************/ - if (al.commonattr & ATTR_CMN_NAME) { - attrlist_pack_string(&ab, cnp, cnl); - ab.actual.commonattr |= ATTR_CMN_NAME; - } - if ((al.commonattr & ATTR_CMN_ERROR) && - (!return_valid || pack_invalid)) { - ATTR_PACK4(ab, 0); - ab.actual.commonattr |= ATTR_CMN_ERROR; - } - if (al.commonattr & ATTR_CMN_DEVID) { - ATTR_PACK4(ab, vp->v_mount->mnt_vfsstat.f_fsid.val[0]); - ab.actual.commonattr |= ATTR_CMN_DEVID; - } - if (al.commonattr & ATTR_CMN_FSID) { - ATTR_PACK8(ab, vp->v_mount->mnt_vfsstat.f_fsid); - ab.actual.commonattr |= ATTR_CMN_FSID; - } - if (al.commonattr & ATTR_CMN_OBJTYPE) { - ATTR_PACK4(ab, vtype); - ab.actual.commonattr |= ATTR_CMN_OBJTYPE; + /* common attributes ************************************************/ + error = attr_pack_common(ctx, vp, alp, &ab, vap, proc_is64, cnp, cnl, + fullpathptr, fullpathlen, return_valid, pack_invalid, vtype, is_bulk); + + /* directory attributes *********************************************/ + if (!error && alp->dirattr && (vtype == VDIR)) { + error = attr_pack_dir(vp, alp, &ab, vap); } - if (al.commonattr & ATTR_CMN_OBJTAG) { - ATTR_PACK4(ab, vp->v_tag); - ab.actual.commonattr |= ATTR_CMN_OBJTAG; + + /* file attributes **************************************************/ + if (!error && alp->fileattr && (vtype != VDIR)) { + error = attr_pack_file(ctx, vp, alp, &ab, vap, return_valid, + pack_invalid, is_bulk); } - if (al.commonattr & ATTR_CMN_OBJID) { - fsobj_id_t f; - /* - * Carbon can't deal with us reporting the target ID - * for links. So we ask the filesystem to give us the - * source ID as well, and if it gives us one, we use - * it instead. - */ - if (VATTR_IS_SUPPORTED(&va, va_linkid)) { - f.fid_objno = va.va_linkid; - } else { - f.fid_objno = va.va_fileid; + + if (error) + goto out; + + /* diagnostic */ + if (!return_valid && (ab.fixedcursor - ab.base) != fixedsize) + panic("packed field size mismatch; allocated %ld but packed %ld for common %08x vol %08x", + fixedsize, (long) (ab.fixedcursor - ab.base), alp->commonattr, alp->volattr); + if (!return_valid && ab.varcursor != (ab.base + ab.needed)) + panic("packed variable field size mismatch; used %ld but expected %ld", (long) (ab.varcursor - ab.base), ab.needed); + + /* + * In the compatible case, we report the smaller of the required and returned sizes. + * If the FSOPT_REPORT_FULLSIZE option is supplied, we report the full (required) size + * of the result buffer, even if we copied less out. The caller knows how big a buffer + * they gave us, so they can always check for truncation themselves. + */ + *(uint32_t *)ab.base = (options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); + + /* Return attribute set output if requested. */ + if (return_valid) { + ab.actual.commonattr |= ATTR_CMN_RETURNED_ATTRS; + if (pack_invalid) { + /* Only report the attributes that are valid */ + ab.actual.commonattr &= ab.valid.commonattr; + ab.actual.dirattr &= ab.valid.dirattr; + ab.actual.fileattr &= ab.valid.fileattr; } - f.fid_generation = 0; - ATTR_PACK8(ab, f); - ab.actual.commonattr |= ATTR_CMN_OBJID; + bcopy(&ab.actual, ab.base + sizeof(uint32_t), sizeof (ab.actual)); } - if (al.commonattr & ATTR_CMN_OBJPERMANENTID) { - fsobj_id_t f; + + copy_size = imin(buf_size, ab.allocated); + + /* Only actually copyout as much out as the user buffer can hold */ + if (alloc_local_buf) { + error = uiomove(ab.base, copy_size, auio); + } else { + off_t orig_offset = uio_offset(auio); + /* - * Carbon can't deal with us reporting the target ID - * for links. So we ask the filesystem to give us the - * source ID as well, and if it gives us one, we use - * it instead. + * The buffer in the uio struct was used directly + * (i.e. it was a kernel buffer and big enough + * to hold the data required) in order to avoid + * un-needed allocation and copies. + * + * At this point, update the resid value to what it + * would be if this was the result of a uiomove. The + * offset is also incremented, though it may not + * mean anything to the caller but that is what + * uiomove does as well. */ - if (VATTR_IS_SUPPORTED(&va, va_linkid)) { - f.fid_objno = va.va_linkid; - } else { - f.fid_objno = va.va_fileid; - } - f.fid_generation = 0; - ATTR_PACK8(ab, f); - ab.actual.commonattr |= ATTR_CMN_OBJPERMANENTID; + uio_setresid(auio, buf_size - copy_size); + uio_setoffset(auio, orig_offset + (off_t)copy_size); } - if (al.commonattr & ATTR_CMN_PAROBJID) { - fsobj_id_t f; - f.fid_objno = va.va_parentid; /* could be lossy here! */ - f.fid_generation = 0; - ATTR_PACK8(ab, f); - ab.actual.commonattr |= ATTR_CMN_PAROBJID; - } - if (al.commonattr & ATTR_CMN_SCRIPT) { - if (VATTR_IS_SUPPORTED(&va, va_encoding)) { - ATTR_PACK4(ab, va.va_encoding); - ab.actual.commonattr |= ATTR_CMN_SCRIPT; - } else if (!return_valid || pack_invalid) { - ATTR_PACK4(ab, 0x7e); - } - } - if (al.commonattr & ATTR_CMN_CRTIME) { - ATTR_PACK_TIME(ab, va.va_create_time, proc_is64); - ab.actual.commonattr |= ATTR_CMN_CRTIME; - } - if (al.commonattr & ATTR_CMN_MODTIME) { - ATTR_PACK_TIME(ab, va.va_modify_time, proc_is64); - ab.actual.commonattr |= ATTR_CMN_MODTIME; - } - if (al.commonattr & ATTR_CMN_CHGTIME) { - ATTR_PACK_TIME(ab, va.va_change_time, proc_is64); - ab.actual.commonattr |= ATTR_CMN_CHGTIME; +out: + if (vname) + vnode_putname(vname); + if (fullpathptr) + kfree(fullpathptr, MAXPATHLEN); + if (ab.base != NULL && alloc_local_buf) + FREE(ab.base, M_TEMP); + return (error); +} + +errno_t +vfs_attr_pack(vnode_t vp, uio_t uio, struct attrlist *alp, uint64_t options, + struct vnode_attr *vap, __unused void *fndesc, vfs_context_t ctx) +{ + int error; + ssize_t fixedsize; + uint64_t orig_active; + struct attrlist orig_al; + enum vtype v_type; + + if (vp) + v_type = vnode_vtype(vp); + else + v_type = vap->va_objtype; + + orig_al = *alp; + orig_active = vap->va_active; + vap->va_active = 0; + + error = getattrlist_setupvattr_all(alp, vap, v_type, &fixedsize, + proc_is64bit(vfs_context_proc(ctx))); + + /* + * Ugly hack to correctly report fsids. vs_fsid is 32 bits and + * there is va_fsid64 as well but filesystems have to say that + * both are supported so that the value can be used correctly. + * So we set va_fsid if the filesystem has only set va_fsid64. + */ + + if ((alp->commonattr & ATTR_CMN_FSID) && + VATTR_IS_SUPPORTED(vap, va_fsid64)) + VATTR_SET_SUPPORTED(vap, va_fsid); + + if (error) { + VFS_DEBUG(ctx, vp, + "ATTRLIST - ERROR: setup for request failed"); + goto out; } - if (al.commonattr & ATTR_CMN_ACCTIME) { - ATTR_PACK_TIME(ab, va.va_access_time, proc_is64); - ab.actual.commonattr |= ATTR_CMN_ACCTIME; + + error = vfs_attr_pack_internal(vp, uio, alp, + options|FSOPT_REPORT_FULLSIZE, vap, NULL, ctx, 1, v_type, + fixedsize); + + VATTR_CLEAR_SUPPORTED_ALL(vap); + vap->va_active = orig_active; + *alp = orig_al; +out: + return (error); +} + +/* + * Obtain attribute information about a filesystem object. + * + * Note: The alt_name parameter can be used by the caller to pass in the vnode + * name obtained from some authoritative source (eg. readdir vnop); where + * filesystems' getattr vnops do not support ATTR_CMN_NAME, the alt_name will be + * used as the ATTR_CMN_NAME attribute returned in vnode_attr.va_name. + * + */ +static int +getattrlist_internal(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, + user_addr_t attributeBuffer, size_t bufferSize, uint64_t options, + enum uio_seg segflg, char* alt_name) +{ + struct vnode_attr va; + kauth_action_t action; + ssize_t fixedsize; + char *va_name; + int proc_is64; + int error; + int return_valid; + int pack_invalid; + int vtype = 0; + uio_t auio; + char uio_buf[ UIO_SIZEOF(1)]; + + proc_is64 = proc_is64bit(vfs_context_proc(ctx)); + + if (segflg == UIO_USERSPACE) { + if (proc_is64) + segflg = UIO_USERSPACE64; + else + segflg = UIO_USERSPACE32; } - if (al.commonattr & ATTR_CMN_BKUPTIME) { - ATTR_PACK_TIME(ab, va.va_backup_time, proc_is64); - ab.actual.commonattr |= ATTR_CMN_BKUPTIME; + auio = uio_createwithbuffer(1, 0, segflg, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, attributeBuffer, bufferSize); + + VATTR_INIT(&va); + va_name = NULL; + + if (alp->bitmapcount != ATTR_BIT_MAP_COUNT) { + error = EINVAL; + goto out; } + + VFS_DEBUG(ctx, vp, "%p ATTRLIST - %s request common %08x vol %08x file %08x dir %08x fork %08x %sfollow on '%s'", + vp, p->p_comm, alp->commonattr, alp->volattr, alp->fileattr, alp->dirattr, alp->forkattr, + (options & FSOPT_NOFOLLOW) ? "no":"", vp->v_name); + +#if CONFIG_MACF + error = mac_vnode_check_getattrlist(ctx, vp, alp); + if (error) + goto out; +#endif /* MAC */ + /* - * They are requesting user access, we should obtain this before getting - * the finder info. For some network file systems this is a performance - * improvement. + * It is legal to request volume or file attributes, + * but not both. */ - if (al.commonattr & ATTR_CMN_USERACCESS) { /* this is expensive */ - if (vtype == VDIR) { - if (vnode_authorize(vp, NULL, - KAUTH_VNODE_ACCESS | KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY | KAUTH_VNODE_DELETE_CHILD, ctx) == 0) - perms |= W_OK; - if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_LIST_DIRECTORY, ctx) == 0) - perms |= R_OK; - if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_SEARCH, ctx) == 0) - perms |= X_OK; - } else { - if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA, ctx) == 0) - perms |= W_OK; - if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_READ_DATA, ctx) == 0) - perms |= R_OK; - if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_EXECUTE, ctx) == 0) - perms |= X_OK; + if (alp->volattr) { + if (alp->fileattr || alp->dirattr || alp->forkattr) { + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: mixed volume/file/directory/fork attributes"); + goto out; } + /* handle volume attribute request */ + error = getvolattrlist(ctx, vp, alp, attributeBuffer, + bufferSize, options, segflg, proc_is64); + goto out; } - - if (al.commonattr & ATTR_CMN_FNDRINFO) { - uio_t auio; - size_t fisize = 32; - char uio_buf[UIO_SIZEOF(1)]; - if ((auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, - uio_buf, sizeof(uio_buf))) == NULL) { - error = ENOMEM; + /* + * ATTR_CMN_GEN_COUNT and ATTR_CMN_DOCUMENT_ID reuse the bits + * originally allocated to ATTR_CMN_NAMEDATTRCOUNT and + * ATTR_CMN_NAMEDATTRLIST. + */ + if ((alp->commonattr & (ATTR_CMN_GEN_COUNT | ATTR_CMN_DOCUMENT_ID)) && + !(options & FSOPT_ATTR_CMN_EXTENDED)) { + error = EINVAL; + goto out; + } + + /* Check for special packing semantics */ + return_valid = (alp->commonattr & ATTR_CMN_RETURNED_ATTRS) ? 1 : 0; + pack_invalid = (options & FSOPT_PACK_INVAL_ATTRS) ? 1 : 0; + if (pack_invalid) { + /* FSOPT_PACK_INVAL_ATTRS requires ATTR_CMN_RETURNED_ATTRS */ + if (!return_valid || alp->forkattr) { + error = EINVAL; goto out; } - uio_addiov(auio, CAST_USER_ADDR_T(ab.fixedcursor), fisize); - error = vn_getxattr(vp, XATTR_FINDERINFO_NAME, auio, - &fisize, XATTR_NOSECURITY, ctx); - uio_free(auio); + /* Keep invalid attrs from being uninitialized */ + bzero(&va, sizeof (va)); + } + + /* Pick up the vnode type. If the FS is bad and changes vnode types on us, we + * will have a valid snapshot that we can work from here. + */ + vtype = vp->v_type; + + /* + * Set up the vnode_attr structure and authorise. + */ + if ((error = getattrlist_setupvattr(alp, &va, &fixedsize, &action, proc_is64, (vtype == VDIR))) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: setup for request failed"); + goto out; + } + if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: authorisation failed/denied"); + goto out; + } + + + + if (va.va_active != 0) { + uint64_t va_active = va.va_active; + /* - * Default to zeros if its not available, - * unless ATTR_CMN_RETURNED_ATTRS was requested. + * If we're going to ask for va_name, allocate a buffer to point it at */ - if (error && - (!return_valid || pack_invalid) && - ((error == ENOATTR) || (error == ENOENT) || - (error == ENOTSUP) || (error == EPERM))) { - VFS_DEBUG(ctx, vp, "ATTRLIST - No system.finderinfo attribute, returning zeroes"); - bzero(ab.fixedcursor, 32); - error = 0; + if (VATTR_IS_ACTIVE(&va, va_name)) { + MALLOC_ZONE(va_name, char *, MAXPATHLEN, M_NAMEI, + M_WAITOK); + if (va_name == NULL) { + error = ENOMEM; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: cannot allocate va_name buffer"); + goto out; + } } - if (error == 0) { - ab.fixedcursor += 32; - ab.actual.commonattr |= ATTR_CMN_FNDRINFO; - } else if (!return_valid) { - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: reading system.finderinfo attribute"); + + va.va_name = va_name; + + /* + * Call the filesystem. + */ + if ((error = vnode_getattr(vp, &va, ctx)) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: filesystem returned %d", error); goto out; } + + + /* + * If ATTR_CMN_NAME is not supported by filesystem and the + * caller has provided a name, use that. + * A (buggy) filesystem may change fields which belong + * to us. We try to deal with that here as well. + */ + va.va_active = va_active; + if (alt_name && va_name && + !(VATTR_IS_SUPPORTED(&va, va_name))) { + strlcpy(va_name, alt_name, MAXPATHLEN); + VATTR_SET_SUPPORTED(&va, va_name); + } + va.va_name = va_name; } - if (al.commonattr & ATTR_CMN_OWNERID) { - ATTR_PACK4(ab, va.va_uid); - ab.actual.commonattr |= ATTR_CMN_OWNERID; + + error = vfs_attr_pack_internal(vp, auio, alp, options, &va, NULL, ctx, + 0, vtype, fixedsize); + +out: + if (va_name) + FREE_ZONE(va_name, MAXPATHLEN, M_NAMEI); + if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) + kauth_acl_free(va.va_acl); + + VFS_DEBUG(ctx, vp, "ATTRLIST - returning %d", error); + return(error); +} + +int +fgetattrlist(proc_t p, struct fgetattrlist_args *uap, __unused int32_t *retval) +{ + vfs_context_t ctx; + vnode_t vp; + int error; + struct attrlist al; + + ctx = vfs_context_current(); + error = 0; + + if ((error = file_vnode(uap->fd, &vp)) != 0) + return (error); + + if ((error = vnode_getwithref(vp)) != 0) { + file_drop(uap->fd); + return(error); } - if (al.commonattr & ATTR_CMN_GRPID) { - ATTR_PACK4(ab, va.va_gid); - ab.actual.commonattr |= ATTR_CMN_GRPID; + + /* + * Fetch the attribute request. + */ + error = copyin(uap->alist, &al, sizeof(al)); + if (error) + goto out; + + /* Default to using the vnode's name. */ + error = getattrlist_internal(ctx, vp, &al, uap->attributeBuffer, + uap->bufferSize, uap->options, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : \ + UIO_USERSPACE32), NULL); + +out: + file_drop(uap->fd); + if (vp) + vnode_put(vp); + + return error; +} + +static int +getattrlistat_internal(vfs_context_t ctx, user_addr_t path, + struct attrlist *alp, user_addr_t attributeBuffer, size_t bufferSize, + uint64_t options, enum uio_seg segflg, enum uio_seg pathsegflg, int fd) +{ + struct nameidata nd; + vnode_t vp; + int32_t nameiflags; + int error; + + nameiflags = 0; + /* + * Look up the file. + */ + if (!(options & FSOPT_NOFOLLOW)) + nameiflags |= FOLLOW; + + nameiflags |= AUDITVNPATH1; + NDINIT(&nd, LOOKUP, OP_GETATTR, nameiflags, pathsegflg, + path, ctx); + + error = nameiat(&nd, fd); + + if (error) + return (error); + + vp = nd.ni_vp; + + error = getattrlist_internal(ctx, vp, alp, attributeBuffer, + bufferSize, options, segflg, NULL); + + /* Retain the namei reference until the getattrlist completes. */ + nameidone(&nd); + vnode_put(vp); + return (error); +} + +int +getattrlist(proc_t p, struct getattrlist_args *uap, __unused int32_t *retval) +{ + enum uio_seg segflg; + struct attrlist al; + int error; + + segflg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + + /* + * Fetch the attribute request. + */ + error = copyin(uap->alist, &al, sizeof(al)); + if (error) + return error; + + return (getattrlistat_internal(vfs_context_current(), + CAST_USER_ADDR_T(uap->path), &al, + CAST_USER_ADDR_T(uap->attributeBuffer), uap->bufferSize, + (uint64_t)uap->options, segflg, segflg, AT_FDCWD)); +} + +int +getattrlistat(proc_t p, struct getattrlistat_args *uap, __unused int32_t *retval) +{ + enum uio_seg segflg; + struct attrlist al; + int error; + + segflg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + + /* + * Fetch the attribute request. + */ + error = copyin(uap->alist, &al, sizeof(al)); + if (error) + return error; + + return (getattrlistat_internal(vfs_context_current(), + CAST_USER_ADDR_T(uap->path), &al, + CAST_USER_ADDR_T(uap->attributeBuffer), uap->bufferSize, + (uint64_t)uap->options, segflg, segflg, uap->fd)); +} + +/* + * This refills the per-fd direntries cache by issuing a VNOP_READDIR. + * It attempts to try and find a size the filesystem responds to, so + * it first tries 1 direntry sized buffer and going from 1 to 2 to 4 + * direntry sized buffers to readdir. If the filesystem does not respond + * to 4 * direntry it returns the error by the filesystem (if any) and sets + * EOF. + * + * This function also tries again if the last "refill" returned an EOF + * to try and get any additional entries if they were added after the last + * refill. + */ +static int +refill_fd_direntries(vfs_context_t ctx, vnode_t dvp, struct fd_vn_data *fvd, + int *eofflagp) +{ + uio_t rdir_uio; + char uio_buf[UIO_SIZEOF(1)]; + size_t rdirbufsiz; + size_t rdirbufused; + int eofflag; + int nentries; + int error; + + error = 0; + + /* + * If there is a cached allocation size of the dirbuf that should be + * allocated, use that. Otherwise start with a allocation size of + * FV_DIRBUF_START_SIZ. This start size may need to be increased if the + * filesystem doesn't respond to the initial size. + */ + + if (fvd->fv_offset && fvd->fv_bufallocsiz) { + rdirbufsiz = fvd->fv_bufallocsiz; + } else { + rdirbufsiz = FV_DIRBUF_START_SIZ; } - if (al.commonattr & ATTR_CMN_ACCESSMASK) { - ATTR_PACK4(ab, va.va_mode); - ab.actual.commonattr |= ATTR_CMN_ACCESSMASK; + + *eofflagp = 0; + + rdir_uio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + +retry_alloc: + /* + * Don't explicitly zero out this buffer since this is + * not copied out to user space. + */ + if (!fvd->fv_buf) { + MALLOC(fvd->fv_buf, caddr_t, rdirbufsiz, M_FD_DIRBUF, M_WAITOK); + fvd->fv_bufdone = 0; } - if (al.commonattr & ATTR_CMN_FLAGS) { - ATTR_PACK4(ab, va.va_flags); - ab.actual.commonattr |= ATTR_CMN_FLAGS; + + uio_reset(rdir_uio, fvd->fv_eoff, UIO_SYSSPACE, UIO_READ); + uio_addiov(rdir_uio, CAST_USER_ADDR_T(fvd->fv_buf), rdirbufsiz); + + /* + * Some filesystems do not set nentries or eofflag... + */ + eofflag = 0; + nentries = 0; + error = vnode_readdir64(dvp, rdir_uio, VNODE_READDIR_EXTENDED, + &eofflag, &nentries, ctx); + + rdirbufused = rdirbufsiz - (size_t)uio_resid(rdir_uio); + + if (!error && (rdirbufused > 0) && (rdirbufused <= rdirbufsiz)) { + /* Save offsets */ + fvd->fv_soff = fvd->fv_eoff; + fvd->fv_eoff = uio_offset(rdir_uio); + /* Save eofflag state but don't return EOF for this time.*/ + fvd->fv_eofflag = eofflag; + eofflag = 0; + /* Reset buffer parameters */ + fvd->fv_bufsiz = rdirbufused; + fvd->fv_bufdone = 0; + bzero(fvd->fv_buf + rdirbufused, rdirbufsiz - rdirbufused); + /* Cache allocation size the Filesystem responded to */ + fvd->fv_bufallocsiz = rdirbufsiz; + } else if (!eofflag && (rdirbufsiz < FV_DIRBUF_MAX_SIZ)) { + /* + * Some Filesystems have higher requirements for the + * smallest buffer size they will respond to for a + * directory listing. Start (relatively) small but increase + * it upto FV_DIRBUF_MAX_SIZ. Most should be good with + * 1*direntry. Cache the size found so that this does not need + * need to be done every time. This also means that an error + * from VNOP_READDIR is ignored until at least FV_DIRBUF_MAX_SIZ + * has been attempted. + */ + FREE(fvd->fv_buf, M_FD_DIRBUF); + fvd->fv_buf = NULL; + rdirbufsiz = 2 * rdirbufsiz; + fvd->fv_bufallocsiz = 0; + goto retry_alloc; + } else if (!error) { + /* + * The Filesystem did not set eofflag but also did not + * return any entries (or an error). It is presumed that + * EOF has been reached. + */ + fvd->fv_eofflag = eofflag = 1; } - if (attr_extended) { - if (al.commonattr & ATTR_CMN_GEN_COUNT) { - if (VATTR_IS_SUPPORTED(&va, va_gen)) { - ATTR_PACK4(ab, va.va_gen); - ab.actual.commonattr |= ATTR_CMN_GEN_COUNT; - } else if (!return_valid || pack_invalid) { - ATTR_PACK4(ab, 0); - } - } - if (al.commonattr & ATTR_CMN_DOCUMENT_ID) { - if (VATTR_IS_SUPPORTED(&va, va_document_id)) { - ATTR_PACK4(ab, va.va_document_id); - ab.actual.commonattr |= ATTR_CMN_DOCUMENT_ID; - } else if (!return_valid || pack_invalid) { - ATTR_PACK4(ab, 0); - } - } + /* + * If the filesystem returned an error and it had previously returned + * EOF, ignore the error and set EOF. + */ + if (error && fvd->fv_eofflag) { + eofflag = 1; + error = 0; } - /* We already obtain the user access, so just fill in the buffer here */ - if (al.commonattr & ATTR_CMN_USERACCESS) { -#if CONFIG_MACF - /* - * Rather than MAC preceding DAC, in this case we want - * the smallest set of permissions granted by both MAC & DAC - * checks. We won't add back any permissions. - */ - if (perms & W_OK) - if (mac_vnode_check_access(ctx, vp, W_OK) != 0) - perms &= ~W_OK; - if (perms & R_OK) - if (mac_vnode_check_access(ctx, vp, R_OK) != 0) - perms &= ~R_OK; - if (perms & X_OK) - if (mac_vnode_check_access(ctx, vp, X_OK) != 0) - perms &= ~X_OK; -#endif /* MAC */ - VFS_DEBUG(ctx, vp, "ATTRLIST - granting perms %d", perms); - ATTR_PACK4(ab, perms); - ab.actual.commonattr |= ATTR_CMN_USERACCESS; + + /* + * If either the directory has either hit EOF or an error, now is a good + * time to free up directory entry buffer. + */ + if ((error || eofflag) && fvd->fv_buf) { + FREE(fvd->fv_buf, M_FD_DIRBUF); + fvd->fv_buf = NULL; } - if (al.commonattr & ATTR_CMN_EXTENDED_SECURITY) { - if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { - struct kauth_filesec fsec; - /* - * We want to return a kauth_filesec (for now), but all we have is a kauth_acl. - */ - fsec.fsec_magic = KAUTH_FILESEC_MAGIC; - fsec.fsec_owner = kauth_null_guid; - fsec.fsec_group = kauth_null_guid; - attrlist_pack_variable2(&ab, &fsec, __offsetof(struct kauth_filesec, fsec_acl), va.va_acl, KAUTH_ACL_COPYSIZE(va.va_acl)); - ab.actual.commonattr |= ATTR_CMN_EXTENDED_SECURITY; - } else if (!return_valid || pack_invalid) { - attrlist_pack_variable(&ab, NULL, 0); + + *eofflagp = eofflag; + + return (error); +} + +/* + * gets the current direntry. To advance to the next direntry this has to be + * paired with a direntry_done. + * + * Since directories have restrictions on where directory enumeration + * can restart from, entries are first read into* a per fd diectory entry + * "cache" and entries provided from that cache. + */ +static int +get_direntry(vfs_context_t ctx, vnode_t dvp, struct fd_vn_data *fvd, + int *eofflagp, struct direntry **dpp) +{ + int eofflag; + int error; + + *eofflagp = 0; + *dpp = NULL; + error = 0; + if (!fvd->fv_bufsiz) { + error = refill_fd_direntries(ctx, dvp, fvd, &eofflag); + if (error) { + return (error); + } + if (eofflag) { + *eofflagp = eofflag; + return (error); } } - if (al.commonattr & ATTR_CMN_UUID) { - if (VATTR_IS_SUPPORTED(&va, va_uuuid)) { - ATTR_PACK(&ab, va.va_uuuid); - ab.actual.commonattr |= ATTR_CMN_UUID; - } else if (!return_valid || pack_invalid) { - ATTR_PACK(&ab, kauth_null_guid); + + *dpp = (struct direntry *)(fvd->fv_buf + fvd->fv_bufdone); + return (error); +} + +/* + * Advances to the next direntry. + */ +static void +direntry_done(struct fd_vn_data *fvd) +{ + struct direntry *dp; + + dp = (struct direntry *)(fvd->fv_buf + fvd->fv_bufdone); + if (dp->d_reclen) { + fvd->fv_bufdone += dp->d_reclen; + if (fvd->fv_bufdone > fvd->fv_bufsiz) { + fvd->fv_bufdone = fvd->fv_bufsiz; } + } else { + fvd->fv_bufdone = fvd->fv_bufsiz; } - if (al.commonattr & ATTR_CMN_GRPUUID) { - if (VATTR_IS_SUPPORTED(&va, va_guuid)) { - ATTR_PACK(&ab, va.va_guuid); - ab.actual.commonattr |= ATTR_CMN_GRPUUID; - } else if (!return_valid || pack_invalid) { - ATTR_PACK(&ab, kauth_null_guid); + + /* + * If we're at the end the fd direntries cache, reset the + * cache trackers. + */ + if (fvd->fv_bufdone == fvd->fv_bufsiz) { + fvd->fv_bufdone = 0; + fvd->fv_bufsiz = 0; + } +} + +/* + * A stripped down version of getattrlist_internal to fill in only select + * attributes in case of an error from getattrlist_internal. + * + * It always returns at least ATTR_BULK_REQUIRED i.e. the name (but may also + * return some other attributes which can be obtained from the vnode). + * + * It does not change the value of the passed in attrlist. + * + * The objective of this function is to fill in an "error entry", i.e. + * an entry with ATTR_CMN_RETURNED_ATTRS & ATTR_CMN_NAME. If the caller + * has also asked for ATTR_CMN_ERROR, it is filled in as well. + * + * Input + * vp - vnode pointer + * alp - pointer to attrlist struct. + * options - options passed to getattrlistbulk(2) + * kern_attr_buf - Kernel buffer to fill data (assumes offset 0 in + * buffer) + * kern_attr_buf_siz - Size of buffer. + * needs_error_attr - Whether the caller asked for ATTR_CMN_ERROR + * error_attr - This value is used to fill ATTR_CMN_ERROR (if the user + * has requested it in the attribute list. + * namebuf - This is used to fill in the name. + * ctx - vfs context of caller. + */ +static void +get_error_attributes(vnode_t vp, struct attrlist *alp, uint64_t options, + user_addr_t kern_attr_buf, size_t kern_attr_buf_siz, int error_attr, + caddr_t namebuf, vfs_context_t ctx) +{ + size_t fsiz, vsiz; + struct _attrlist_buf ab; + int namelen; + kauth_action_t action; + struct attrlist al; + int needs_error_attr = (alp->commonattr & ATTR_CMN_ERROR); + + /* + * To calculate fixed size required, in the FSOPT_PACK_INVAL_ATTRS case, + * the fixedsize should include space for all the attributes asked by + * the user. Only ATTR_BULK_REQUIRED (and ATTR_CMN_ERROR) will be filled + * and will be valid. All other attributes are zeroed out later. + * + * ATTR_CMN_RETURNED_ATTRS, ATTR_CMN_ERROR and ATTR_CMN_NAME + * (the only valid ones being returned from here) happen to be + * the first three attributes by order as well. + */ + al = *alp; + if (!(options & FSOPT_PACK_INVAL_ATTRS)) { + /* + * In this case the fixedsize only needs to be only for the + * attributes being actually returned. + */ + al.commonattr = ATTR_BULK_REQUIRED; + if (needs_error_attr) { + al.commonattr |= ATTR_CMN_ERROR; } + al.fileattr = 0; + al.dirattr = 0; } - if (al.commonattr & ATTR_CMN_FILEID) { - ATTR_PACK8(ab, va.va_fileid); - ab.actual.commonattr |= ATTR_CMN_FILEID; + + /* + * Passing NULL for the vnode_attr pointer is valid for + * getattrlist_setupvattr. All that is required is the size. + */ + fsiz = 0; + (void)getattrlist_setupvattr(&al, NULL, (ssize_t *)&fsiz, + &action, proc_is64bit(vfs_context_proc(ctx)), + (vnode_vtype(vp) == VDIR)); + + namelen = strlen(namebuf); + vsiz = namelen + 1; + vsiz = ((vsiz + 3) & ~0x03); + + bzero(&ab, sizeof(ab)); + ab.base = (char *)kern_attr_buf; + ab.needed = fsiz + vsiz; + + /* Fill in the size needed */ + *((uint32_t *)ab.base) = ab.needed; + if (ab.needed > (ssize_t)kern_attr_buf_siz) { + goto out; } - if (al.commonattr & ATTR_CMN_PARENTID) { - ATTR_PACK8(ab, va.va_parentid); - ab.actual.commonattr |= ATTR_CMN_PARENTID; + + /* + * Setup to pack results into the destination buffer. + */ + ab.fixedcursor = ab.base + sizeof(uint32_t); + /* + * Zero out buffer, ab.fixedbuffer starts after the first uint32_t + * which gives the length. This ensures everything that we don't + * fill in explicitly later is zeroed out correctly. + */ + bzero(ab.fixedcursor, fsiz); + /* + * variable size data should start after all the fixed + * size data. + */ + ab.varcursor = ab.base + fsiz; + /* + * Initialise the value for ATTR_CMN_RETURNED_ATTRS and leave space + * Leave space for filling in its value here at the end. + */ + bzero(&ab.actual, sizeof (ab.actual)); + ab.fixedcursor += sizeof (attribute_set_t); + + ab.allocated = ab.needed; + + /* Fill ATTR_CMN_ERROR (if asked for) */ + if (needs_error_attr) { + ATTR_PACK4(ab, error_attr); + ab.actual.commonattr |= ATTR_CMN_ERROR; } - - if (al.commonattr & ATTR_CMN_FULLPATH) { - attrlist_pack_string (&ab, fullpathptr, fullpathlen); - ab.actual.commonattr |= ATTR_CMN_FULLPATH; + + /* + * Fill ATTR_CMN_NAME, The attrrefrence is packed at this location + * but the actual string itself is packed after fixedsize which set + * to different lengths based on whether FSOPT_PACK_INVAL_ATTRS + * was passed. + */ + attrlist_pack_string(&ab, namebuf, namelen); + + /* + * Now Fill in ATTR_CMN_RETURNED_ATTR. This copies to a + * location after the count i.e. before ATTR_CMN_ERROR and + * ATTR_CMN_NAME. + */ + ab.actual.commonattr |= ATTR_CMN_NAME | ATTR_CMN_RETURNED_ATTRS; + bcopy(&ab.actual, ab.base + sizeof(uint32_t), sizeof (ab.actual)); +out: + return; +} + +/* + * This is the buffer size required to return at least 1 entry. We need space + * for the length, for ATTR_CMN_RETURNED_ATTRS and ATTR_CMN_NAME. Assuming the + * smallest filename of a single byte we get + */ + +#define MIN_BUF_SIZE_REQUIRED (sizeof(uint32_t) + sizeof(attribute_set_t) +\ + sizeof(attrreference_t)) + +/* + * Read directory entries and get attributes filled in for each directory + */ +static int +readdirattr(vnode_t dvp, struct fd_vn_data *fvd, uio_t auio, + struct attrlist *alp, uint64_t options, int *count, int *eofflagp, + vfs_context_t ctx) +{ + caddr_t kern_attr_buf; + size_t kern_attr_buf_siz; + caddr_t max_path_name_buf = NULL; + int error = 0; + + *count = 0; + *eofflagp = 0; + + if (uio_iovcnt(auio) > 1) { + return (EINVAL); } - - if (al.commonattr & ATTR_CMN_ADDEDTIME) { - ATTR_PACK_TIME(ab, va.va_addedtime, proc_is64); - ab.actual.commonattr |= ATTR_CMN_ADDEDTIME; + + /* + * We fill in a kernel buffer for the attributes and uiomove each + * entry's attributes (as returned by getattrlist_internal) + */ + kern_attr_buf_siz = uio_resid(auio); + if (kern_attr_buf_siz > ATTR_MAX_BUFFER) { + kern_attr_buf_siz = ATTR_MAX_BUFFER; + } else if (kern_attr_buf_siz == 0) { + /* Nothing to do */ + return (error); } - /* directory attributes *********************************************/ - if (al.dirattr && (vtype == VDIR)) { - if (al.dirattr & ATTR_DIR_LINKCOUNT) { /* full count of entries */ - ATTR_PACK4(ab, (uint32_t)va.va_dirlinkcount); - ab.actual.dirattr |= ATTR_DIR_LINKCOUNT; + MALLOC(kern_attr_buf, caddr_t, kern_attr_buf_siz, M_TEMP, M_WAITOK); + + while (uio_resid(auio) > (user_ssize_t)MIN_BUF_SIZE_REQUIRED) { + struct direntry *dp; + user_addr_t name_buffer; + struct nameidata nd; + vnode_t vp; + struct attrlist al; + size_t entlen; + size_t bytes_left; + size_t pad_bytes; + ssize_t new_resid; + + /* + * get_direntry returns the current direntry and does not + * advance. A move to the next direntry only happens if + * direntry_done is called. + */ + error = get_direntry(ctx, dvp, fvd, eofflagp, &dp); + if (error || (*eofflagp) || !dp) { + break; } - if (al.dirattr & ATTR_DIR_ENTRYCOUNT) { - ATTR_PACK4(ab, (uint32_t)va.va_nchildren); - ab.actual.dirattr |= ATTR_DIR_ENTRYCOUNT; + + /* + * skip "." and ".." (and a bunch of other invalid conditions.) + */ + if (!dp->d_reclen || dp->d_ino == 0 || dp->d_namlen == 0 || + (dp->d_namlen == 1 && dp->d_name[0] == '.') || + (dp->d_namlen == 2 && dp->d_name[0] == '.' && + dp->d_name[1] == '.')) { + direntry_done(fvd); + continue; } - if (al.dirattr & ATTR_DIR_MOUNTSTATUS) { - uint32_t mntstat; - mntstat = (vp->v_flag & VROOT) ? DIR_MNTSTATUS_MNTPOINT : 0; -#if CONFIG_TRIGGERS - /* - * Report back on active vnode triggers - * that can directly trigger a mount - */ - if (vp->v_resolve && - !(vp->v_resolve->vr_flags & VNT_NO_DIRECT_MOUNT)) { - mntstat |= DIR_MNTSTATUS_TRIGGER; + /* + * try to deal with not-null terminated filenames. + */ + if (dp->d_name[dp->d_namlen] != '\0') { + if (!max_path_name_buf) { + MALLOC(max_path_name_buf, caddr_t, MAXPATHLEN, + M_TEMP, M_WAITOK); } -#endif - ATTR_PACK4(ab, mntstat); - ab.actual.dirattr |= ATTR_DIR_MOUNTSTATUS; + bcopy(dp->d_name, max_path_name_buf, dp->d_namlen); + max_path_name_buf[dp->d_namlen] = '\0'; + name_buffer = CAST_USER_ADDR_T(max_path_name_buf); + } else { + name_buffer = CAST_USER_ADDR_T(&(dp->d_name)); } - } - - /* file attributes **************************************************/ - if (al.fileattr && (vtype != VDIR)) { - size_t rsize = 0; - uint64_t rlength = 0; - uint64_t ralloc = 0; - /* - * Pre-fetch the rsrc attributes now so we only get them once. - * Fetch the resource fork size/allocation via xattr interface + /* + * We have an iocount on the directory already */ - if (al.fileattr & (ATTR_FILE_TOTALSIZE | ATTR_FILE_ALLOCSIZE | ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE)) { - if ((error = vn_getxattr(vp, XATTR_RESOURCEFORK_NAME, NULL, &rsize, XATTR_NOSECURITY, ctx)) != 0) { - if ((error == ENOENT) || (error == ENOATTR) || (error == ENOTSUP) || (error == EPERM)|| (error == EACCES)) { - rsize = 0; - error = 0; - } else { - goto out; - } - } - rlength = rsize; + NDINIT(&nd, LOOKUP, OP_GETATTR, AUDITVNPATH1 | USEDVP, + UIO_SYSSPACE, CAST_USER_ADDR_T(name_buffer), ctx); - if (al.fileattr & (ATTR_FILE_RSRCALLOCSIZE | ATTR_FILE_ALLOCSIZE)) { - uint32_t blksize = vp->v_mount->mnt_vfsstat.f_bsize; - if (blksize == 0) { - blksize = 512; - } - ralloc = roundup(rsize, blksize); - } - } + nd.ni_dvp = dvp; + error = namei(&nd); - if (al.fileattr & ATTR_FILE_LINKCOUNT) { - ATTR_PACK4(ab, (uint32_t)va.va_nlink); - ab.actual.fileattr |= ATTR_FILE_LINKCOUNT; + if (error) { + direntry_done(fvd); + error = 0; + continue; } + + vp = nd.ni_vp; + /* - * Note the following caveats for the TOTALSIZE and ALLOCSIZE attributes: - * We infer that if the filesystem does not support va_data_size or va_data_alloc - * it must not know about alternate forks. So when we need to gather - * the total size or total alloc, it's OK to substitute the total size for - * the data size below. This is because it is likely a flat filesystem and we must - * be using AD files to store the rsrc fork and EAs. - * - * Additionally, note that getattrlist is barred from being called on - * resource fork paths. (Search for CN_ALLOWRSRCFORK). So if the filesystem does - * support va_data_size, it is guaranteed to represent the data fork's size. This - * is an important distinction to make because when we call vnode_getattr on - * an HFS resource fork vnode, to get the size, it will vend out the resource - * fork's size (it only gets the size of the passed-in vnode). + * getattrlist_internal can change the values of the + * the required attribute list. Copy the current values + * and use that one instead. */ - if (al.fileattr & ATTR_FILE_TOTALSIZE) { - uint64_t totalsize = rlength; + al = *alp; - if (VATTR_IS_SUPPORTED(&va, va_data_size)) { - totalsize += va.va_data_size; - } else { - totalsize += va.va_total_size; - } + error = getattrlist_internal(ctx, vp, &al, + CAST_USER_ADDR_T(kern_attr_buf), kern_attr_buf_siz, + options | FSOPT_REPORT_FULLSIZE, UIO_SYSSPACE, + CAST_DOWN_EXPLICIT(char *, name_buffer)); - ATTR_PACK8(ab, totalsize); - ab.actual.fileattr |= ATTR_FILE_TOTALSIZE; - } - if (al.fileattr & ATTR_FILE_ALLOCSIZE) { - uint64_t totalalloc = ralloc; - - /* - * If data_alloc is supported, then it must represent the - * data fork size. - */ - if (VATTR_IS_SUPPORTED(&va, va_data_alloc)) { - totalalloc += va.va_data_alloc; - } - else { - totalalloc += va.va_total_alloc; - } - - ATTR_PACK8(ab, totalalloc); - ab.actual.fileattr |= ATTR_FILE_ALLOCSIZE; - } - if (al.fileattr & ATTR_FILE_IOBLOCKSIZE) { - ATTR_PACK4(ab, va.va_iosize); - ab.actual.fileattr |= ATTR_FILE_IOBLOCKSIZE; - } - if (al.fileattr & ATTR_FILE_CLUMPSIZE) { - if (!return_valid || pack_invalid) { - ATTR_PACK4(ab, 0); /* this value is deprecated */ - ab.actual.fileattr |= ATTR_FILE_CLUMPSIZE; - } - } - if (al.fileattr & ATTR_FILE_DEVTYPE) { - uint32_t dev; + nameidone(&nd); - if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { - if (vp->v_specinfo != NULL) - dev = vp->v_specinfo->si_rdev; - else - dev = va.va_rdev; - } else { - dev = 0; - } - ATTR_PACK4(ab, dev); - ab.actual.fileattr |= ATTR_FILE_DEVTYPE; + if (error) { + get_error_attributes(vp, alp, options, + CAST_USER_ADDR_T(kern_attr_buf), + kern_attr_buf_siz, error, (caddr_t)name_buffer, + ctx); + error = 0; } - - /* - * If the filesystem does not support datalength - * or dataallocsize, then we infer that totalsize and - * totalalloc are substitutes. + + /* Done with vnode now */ + vnode_put(vp); + + /* + * Because FSOPT_REPORT_FULLSIZE was set, the first 4 bytes + * of the buffer returned by getattrlist contains the size + * (even if the provided buffer isn't sufficiently big). Use + * that to check if we've run out of buffer space. + * + * resid is a signed type, and the size of the buffer etc + * are unsigned types. It is theoretically possible for + * resid to be < 0 and in which case we would be assigning + * an out of bounds value to bytes_left (which is unsigned) + * uiomove takes care to not ever set resid to < 0, so it + * is safe to do this here. */ - if (al.fileattr & ATTR_FILE_DATALENGTH) { - if (VATTR_IS_SUPPORTED(&va, va_data_size)) { - ATTR_PACK8(ab, va.va_data_size); - } else { - ATTR_PACK8(ab, va.va_total_size); - } - ab.actual.fileattr |= ATTR_FILE_DATALENGTH; + bytes_left = (size_t)((user_size_t)uio_resid(auio)); + entlen = (size_t)(*((uint32_t *)(kern_attr_buf))); + if (!entlen || (entlen > bytes_left)) { + break; } - if (al.fileattr & ATTR_FILE_DATAALLOCSIZE) { - if (VATTR_IS_SUPPORTED(&va, va_data_alloc)) { - ATTR_PACK8(ab, va.va_data_alloc); + + /* + * Will the pad bytes fit as well ? If they can't be, still use + * this entry but this will be the last entry returned. + */ + pad_bytes = ((entlen + 7) & ~0x07) - entlen; + new_resid = 0; + if (pad_bytes && (entlen + pad_bytes <= bytes_left)) { + /* + * While entlen can never be > ATTR_MAX_BUFFER, + * (entlen + pad_bytes) can be, handle that and + * zero out the pad bytes. N.B. - Only zero + * out information in the kernel buffer that is + * going to be uiomove'ed out. + */ + if (entlen + pad_bytes <= kern_attr_buf_siz) { + /* This is the normal case. */ + bzero(kern_attr_buf + entlen, pad_bytes); } else { - ATTR_PACK8(ab, va.va_total_alloc); + bzero(kern_attr_buf + entlen, + kern_attr_buf_siz - entlen); + /* + * Pad bytes left over, change the resid value + * manually. We only got in here because + * bytes_left >= entlen + pad_bytes so + * new_resid (which is a signed type) is + * always positive. + */ + new_resid = (ssize_t)(bytes_left - + (entlen + pad_bytes)); } - ab.actual.fileattr |= ATTR_FILE_DATAALLOCSIZE; + entlen += pad_bytes; } - /* already got the resource fork size/allocation above */ - if (al.fileattr & ATTR_FILE_RSRCLENGTH) { - ATTR_PACK8(ab, rlength); - ab.actual.fileattr |= ATTR_FILE_RSRCLENGTH; + *((uint32_t *)kern_attr_buf) = (uint32_t)entlen; + error = uiomove(kern_attr_buf, min(entlen, kern_attr_buf_siz), + auio); + + if (error) { + break; } - if (al.fileattr & ATTR_FILE_RSRCALLOCSIZE) { - ATTR_PACK8(ab, ralloc); - ab.actual.fileattr |= ATTR_FILE_RSRCALLOCSIZE; + + if (new_resid) { + uio_setresid(auio, (user_ssize_t)new_resid); } + + /* + * At this point, the directory entry has been consumed, proceed + * to the next one. + */ + (*count)++; + direntry_done(fvd); } - /* diagnostic */ - if (!return_valid && (ab.fixedcursor - ab.base) != fixedsize) - panic("packed field size mismatch; allocated %ld but packed %ld for common %08x vol %08x", - fixedsize, (long) (ab.fixedcursor - ab.base), al.commonattr, al.volattr); - if (!return_valid && ab.varcursor != (ab.base + ab.needed)) - panic("packed variable field size mismatch; used %ld but expected %ld", (long) (ab.varcursor - ab.base), ab.needed); + if (max_path_name_buf) { + FREE(max_path_name_buf, M_TEMP); + } /* - * In the compatible case, we report the smaller of the required and returned sizes. - * If the FSOPT_REPORT_FULLSIZE option is supplied, we report the full (required) size - * of the result buffer, even if we copied less out. The caller knows how big a buffer - * they gave us, so they can always check for truncation themselves. + * At this point, kern_attr_buf is always allocated */ - *(uint32_t *)ab.base = (uap->options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); + FREE(kern_attr_buf, M_TEMP); - /* Return attribute set output if requested. */ - if (return_valid) { - ab.actual.commonattr |= ATTR_CMN_RETURNED_ATTRS; - if (pack_invalid) { - /* Only report the attributes that are valid */ - ab.actual.commonattr &= ab.valid.commonattr; - ab.actual.dirattr &= ab.valid.dirattr; - ab.actual.fileattr &= ab.valid.fileattr; - } - bcopy(&ab.actual, ab.base + sizeof(uint32_t), sizeof (ab.actual)); - } - - /* Only actually copyout as much out as the user buffer can hold */ - error = copyout(ab.base, uap->attributeBuffer, imin(uap->bufferSize, ab.allocated)); - -out: - if (va.va_name) - kfree(va.va_name, MAXPATHLEN); - if (fullpathptr) - kfree(fullpathptr, MAXPATHLEN); - if (vname) - vnode_putname(vname); - if (ab.base != NULL) - FREE(ab.base, M_TEMP); - if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) - kauth_acl_free(va.va_acl); + /* + * Always set the offset to the last succesful offset + * returned by VNOP_READDIR. + */ + uio_setoffset(auio, fvd->fv_eoff); - VFS_DEBUG(ctx, vp, "ATTRLIST - returning %d", error); - return(error); + return (error); } +/* + *int getattrlistbulk(int dirfd, struct attrlist *alist, void *attributeBuffer, + * size_t bufferSize, uint64_t options) + * + * Gets directory entries alongwith their attributes in the same way + * getattrlist does for a single file system object. + * + * On non error returns, retval will hold the count of entries returned. + */ int -fgetattrlist(proc_t p, struct fgetattrlist_args *uap, __unused int32_t *retval) +getattrlistbulk(proc_t p, struct getattrlistbulk_args *uap, int32_t *retval) { - struct vfs_context *ctx; - vnode_t vp = NULL; - int error; - struct getattrlist_args ap; + struct attrlist al; + vnode_t dvp; + struct fileproc *fp; + struct fd_vn_data *fvdata; + vfs_context_t ctx; + enum uio_seg segflg; + int count; + uio_t auio = NULL; + char uio_buf[ UIO_SIZEOF(1) ]; + kauth_action_t action; + int eofflag; + uint64_t options; + int error; + + *retval = 0; + + error = fp_getfvp(p, uap->dirfd, &fp, &dvp); + if (error) + return (error); + count = 0; + fvdata = NULL; + eofflag = 0; ctx = vfs_context_current(); - error = 0; + segflg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; - if ((error = file_vnode(uap->fd, &vp)) != 0) - return (error); + if ((fp->f_fglob->fg_flag & FREAD) == 0) { + /* + AUDIT_ARG(vnpath_withref, dvp, ARG_VNODE1); + */ + error = EBADF; + goto out; + } - if ((error = vnode_getwithref(vp)) != 0) { - file_drop(uap->fd); - return(error); + if ((error = vnode_getwithref(dvp))) { + dvp = NULLVP; + goto out; } - ap.path = 0; - ap.alist = uap->alist; - ap.attributeBuffer = uap->attributeBuffer; - ap.bufferSize = uap->bufferSize; - ap.options = uap->options; + if (dvp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } - /* Default to using the vnode's name. */ - error = getattrlist_internal(vp, &ap, NULL, p, ctx); +#if CONFIG_MACF + error = mac_file_check_change_offset(vfs_context_ucred(ctx), + fp->f_fglob); + if (error) + goto out; +#endif + /* + * XXX : Audit Support + *AUDIT_ARG(vnpath, dvp, ARG_VNODE1); + */ - file_drop(uap->fd); - if (vp) - vnode_put(vp); + options = uap->options | FSOPT_ATTR_CMN_EXTENDED; - return error; -} + if ((error = copyin(CAST_USER_ADDR_T(uap->alist), &al, + sizeof(struct attrlist)))) { + goto out; + } -int -getattrlist(proc_t p, struct getattrlist_args *uap, __unused int32_t *retval) -{ - struct vfs_context *ctx; - struct nameidata nd; - vnode_t vp = NULL; - u_long nameiflags; - int error; + if (al.volattr || + ((al.commonattr & ATTR_BULK_REQUIRED) != ATTR_BULK_REQUIRED)) { + error = EINVAL; + goto out; + } - ctx = vfs_context_current(); - error = 0; +#if CONFIG_MACF + error = mac_vnode_check_readdir(ctx, dvp); + if (error != 0) { + goto out; + } +#endif /* MAC */ /* - * Look up the file. + * If the only item requested is file names, we can let that past with + * just LIST_DIRECTORY. If they want any other attributes, that means + * they need SEARCH as well. */ - nameiflags = NOTRIGGER | AUDITVNPATH1; - if (!(uap->options & FSOPT_NOFOLLOW)) - nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, OP_GETATTR, nameiflags, UIO_USERSPACE, uap->path, ctx); + action = KAUTH_VNODE_LIST_DIRECTORY; + if ((al.commonattr & ~ATTR_CMN_NAME) || al.fileattr || al.dirattr) + action |= KAUTH_VNODE_SEARCH; + + error = vnode_authorize(dvp, NULL, action, ctx); + if (error) { + goto out; + } - if ((error = namei(&nd)) != 0) { - /* vp is still uninitialized */ - return error; + fvdata = (struct fd_vn_data *)fp->f_fglob->fg_vn_data; + if (!fvdata) { + panic("Directory expected to have fg_vn_data"); } - vp = nd.ni_vp; - /* Pass along our componentname to getattrlist_internal */ - error = getattrlist_internal(vp, uap, &(nd.ni_cnd), p, ctx); - - /* Retain the namei reference until the getattrlist completes. */ - nameidone(&nd); - if (vp) - vnode_put(vp); + FV_LOCK(fvdata); - return error; + /* + * getattrlistbulk(2) maintains its offset in fv_offset. However + * if the offset in the file glob is set (or reset) to 0, the directory + * traversal needs to be restarted (Any existing state in the + * directory buffer is removed as well). + */ + if (!fp->f_fglob->fg_offset) { + fvdata->fv_offset = 0; + if (fvdata->fv_buf) { + FV_BUF_FREE(fvdata, M_FD_DIRBUF); + } + } + + auio = uio_createwithbuffer(1, fvdata->fv_offset, segflg, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->attributeBuffer, (user_size_t)uap->bufferSize); + + /* + * For "expensive" operations in which the native VNOP implementations + * end up having to do just as much (if not more) work than the default + * implementation, fall back to the default implementation. + * The VNOP helper functions depend on the filesystem providing the + * object type, if the caller has not requested ATTR_CMN_OBJTYPE, fall + * back to the default implementation. + */ + if ((al.commonattr & + (ATTR_CMN_UUID | ATTR_CMN_GRPUUID | ATTR_CMN_EXTENDED_SECURITY)) || + !(al.commonattr & ATTR_CMN_OBJTYPE)) { + error = ENOTSUP; + } else { + struct vnode_attr va; + char *va_name; + + eofflag = 0; + count = 0; + + VATTR_INIT(&va); + MALLOC(va_name, char *, MAXPATHLEN, M_TEMP, M_WAITOK|M_ZERO); + va.va_name = va_name; + + (void)getattrlist_setupvattr_all(&al, &va, VNON, NULL, + IS_64BIT_PROCESS(p)); + + error = VNOP_GETATTRLISTBULK(dvp, &al, &va, auio, NULL, + options, &eofflag, &count, ctx); + + FREE(va_name, M_TEMP); + + /* + * cache state of eofflag. + */ + if (!error) { + fvdata->fv_eofflag = eofflag; + } + } + + /* + * If the Filessytem does not natively support getattrlistbulk, + * do the default implementation. + */ + if (error == ENOTSUP) { + eofflag = 0; + count = 0; + + error = readdirattr(dvp, fvdata, auio, &al, options, + &count, &eofflag, ctx); + } + + if (error && fvdata->fv_eofflag) { + /* + * Some filesystems return EINVAL if called again when, + * for a directory, they have already returned EOF. We + * have the EOF state from the last successful call to it. + * If this is an error just reuse the state from the last + * call and use that to return 0 to the user instead of + * percolating an error to the user. We're not particular + * about the error returned. If we get *any* error after + * having already gotten an EOF, we ignore it. + */ + eofflag = 1; + error = 0; + count = 0; + } + + if (count) { + fvdata->fv_offset = uio_offset(auio); + fp->f_fglob->fg_offset = fvdata->fv_offset; + *retval = count; + error = 0; + } else if (!error && !eofflag) { + /* + * This just means the buffer was too small to fit even a + * single entry. + */ + error = ERANGE; + } + + FV_UNLOCK(fvdata); +out: + if (dvp) { + vnode_put(dvp); + } + + file_drop(uap->dirfd); + + return (error); } static int diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index 934dbceef..c6e919d9e 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -91,7 +91,8 @@ #endif /* DIAGNOSTIC */ #include #include -#include +#include +#include #include /* fslog_io_error() */ @@ -111,12 +112,6 @@ #include #include - -#if BALANCE_QUEUES -static __inline__ void bufqinc(int q); -static __inline__ void bufqdec(int q); -#endif - int bcleanbuf(buf_t bp, boolean_t discard); static int brecover_data(buf_t bp); static boolean_t incore(vnode_t vp, daddr64_t blkno); @@ -190,17 +185,6 @@ buf_timestamp(void) /* * Insq/Remq for the buffer free lists. */ -#if BALANCE_QUEUES -#define binsheadfree(bp, dp, whichq) do { \ - TAILQ_INSERT_HEAD(dp, bp, b_freelist); \ - bufqinc((whichq)); \ - } while (0) - -#define binstailfree(bp, dp, whichq) do { \ - TAILQ_INSERT_TAIL(dp, bp, b_freelist); \ - bufqinc((whichq)); \ - } while (0) -#else #define binsheadfree(bp, dp, whichq) do { \ TAILQ_INSERT_HEAD(dp, bp, b_freelist); \ } while (0) @@ -208,8 +192,6 @@ buf_timestamp(void) #define binstailfree(bp, dp, whichq) do { \ TAILQ_INSERT_TAIL(dp, bp, b_freelist); \ } while (0) -#endif - #define BHASHENTCHECK(bp) \ if ((bp)->b_hash.le_prev != (struct buf **)0xdeadbeef) \ @@ -452,6 +434,18 @@ bufattr_free(bufattr_t bap) { FREE(bap, M_TEMP); } +bufattr_t +bufattr_dup(bufattr_t bap) { + bufattr_t new_bufattr; + MALLOC(new_bufattr, bufattr_t, sizeof(struct bufattr), M_TEMP, M_WAITOK); + if (new_bufattr == NULL) + return NULL; + + /* Copy the provided one into the new copy */ + memcpy (new_bufattr, bap, sizeof(struct bufattr)); + return new_bufattr; +} + int bufattr_rawencrypted(bufattr_t bap) { if ( (bap->ba_flags & BA_RAW_ENCRYPTED_IO) ) @@ -464,6 +458,13 @@ bufattr_throttled(bufattr_t bap) { return (GET_BUFATTR_IO_TIER(bap)); } +int +bufattr_passive(bufattr_t bap) { + if ( (bap->ba_flags & BA_PASSIVE) ) + return 1; + return 0; +} + int bufattr_nocache(bufattr_t bap) { if ( (bap->ba_flags & BA_NOCACHE) ) @@ -478,6 +479,11 @@ bufattr_meta(bufattr_t bap) { return 0; } +void +bufattr_markmeta(bufattr_t bap) { + SET(bap->ba_flags, BA_META); +} + int bufattr_delayidlesleep(bufattr_t bap) { @@ -515,6 +521,18 @@ bufattr_greedymode(bufattr_t bap) { return 0; } +void +bufattr_markisochronous(bufattr_t bap) { + SET(bap->ba_flags, BA_ISOCHRONOUS); +} + +int +bufattr_isochronous(bufattr_t bap) { + if ( (bap->ba_flags & BA_ISOCHRONOUS) ) + return 1; + return 0; +} + void bufattr_markquickcomplete(bufattr_t bap) { SET(bap->ba_flags, BA_QUICK_COMPLETE); @@ -1320,6 +1338,7 @@ buf_strategy(vnode_t devvp, void *ap) * we have only issued a single I/O. */ bufattr_setcpoff (&(bp->b_attr), (u_int64_t)f_offset); + CP_DEBUG((CPDBG_OFFSET_IO | DBG_FUNC_NONE), (uint32_t) f_offset, (uint32_t) bp->b_lblkno, (uint32_t) bp->b_blkno, (uint32_t) bp->b_bcount, 0); } } #endif @@ -1819,9 +1838,6 @@ bremfree_locked(buf_t bp) } TAILQ_REMOVE(dp, bp, b_freelist); -#if BALANCE_QUEUES - bufqdec(whichq); -#endif if (whichq == BQ_LAUNDRY) blaundrycnt--; @@ -1999,17 +2015,8 @@ bufinit(void) panic("Couldn't register buffer cache callout for vm pressure!\n"); } -#if BALANCE_QUEUES - { - static void bufq_balance_thread_init(void); - /* create a thread to do dynamic buffer queue balancing */ - bufq_balance_thread_init(); - } -#endif /* notyet */ } - - /* * Zones for the meta data buffers */ @@ -2102,7 +2109,6 @@ bio_doread(vnode_t vp, daddr64_t blkno, int size, kauth_cred_t cred, int async, /* Pay for the read. */ if (p && p->p_stats) { OSIncrementAtomicLong(&p->p_stats->p_ru.ru_inblock); /* XXX */ - OSAddAtomic64(size, &p->p_stats->ri_diskiobytes.ri_bytesread); } if (async) { @@ -2240,7 +2246,6 @@ buf_bwrite(buf_t bp) else if (p && p->p_stats) { OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ - OSAddAtomic64(buf_count(bp), &p->p_stats->ri_diskiobytes.ri_byteswritten); } } trace(TR_BUFWRITE, pack(vp, bp->b_bcount), bp->b_lblkno); @@ -2267,7 +2272,6 @@ buf_bwrite(buf_t bp) else if (p && p->p_stats) { OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ - OSAddAtomic64(buf_count(bp), &p->p_stats->ri_diskiobytes.ri_byteswritten); } /* Release the buffer. */ @@ -2325,7 +2329,6 @@ bdwrite_internal(buf_t bp, int return_error) SET(bp->b_flags, B_DELWRI); if (p && p->p_stats) { OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ - OSAddAtomic64(buf_count(bp), &p->p_stats->ri_diskiobytes.ri_byteswritten); } OSAddAtomicLong(1, &nbdwrite); buf_reassign(bp, vp); @@ -3234,6 +3237,80 @@ buf_clear_redundancy_flags(buf_t bp, uint32_t flags) CLR(bp->b_redundancy_flags, flags); } + + +static void * +recycle_buf_from_pool(int nsize) +{ + buf_t bp; + void *ptr = NULL; + + lck_mtx_lock_spin(buf_mtxp); + + TAILQ_FOREACH(bp, &bufqueues[BQ_META], b_freelist) { + if (ISSET(bp->b_flags, B_DELWRI) || bp->b_bufsize != nsize) + continue; + ptr = (void *)bp->b_datap; + bp->b_bufsize = 0; + + bcleanbuf(bp, TRUE); + break; + } + lck_mtx_unlock(buf_mtxp); + + return (ptr); +} + + + +int zalloc_nopagewait_failed = 0; +int recycle_buf_failed = 0; + +static void * +grab_memory_for_meta_buf(int nsize) +{ + zone_t z; + void *ptr; + boolean_t was_vmpriv; + + z = getbufzone(nsize); + + /* + * make sure we're NOT priviliged so that + * if a vm_page_grab is needed, it won't + * block if we're out of free pages... if + * it blocks, then we can't honor the + * nopagewait request + */ + was_vmpriv = set_vm_privilege(FALSE); + + ptr = zalloc_nopagewait(z); + + if (was_vmpriv == TRUE) + set_vm_privilege(TRUE); + + if (ptr == NULL) { + + zalloc_nopagewait_failed++; + + ptr = recycle_buf_from_pool(nsize); + + if (ptr == NULL) { + + recycle_buf_failed++; + + if (was_vmpriv == FALSE) + set_vm_privilege(TRUE); + + ptr = zalloc(z); + + if (was_vmpriv == FALSE) + set_vm_privilege(FALSE); + } + } + return (ptr); +} + /* * With UBC, there is no need to expand / shrink the file data * buffer. The VM uses the same pages, hence no waste. @@ -3259,7 +3336,6 @@ allocbuf(buf_t bp, int size) panic("allocbuf: buffer larger than MAXBSIZE requested"); if (ISSET(bp->b_flags, B_META)) { - zone_t zprev, z; int nsize = roundup(size, MINMETA); if (bp->b_datap) { @@ -3267,14 +3343,16 @@ allocbuf(buf_t bp, int size) if (ISSET(bp->b_flags, B_ZALLOC)) { if (bp->b_bufsize < nsize) { + zone_t zprev; + /* reallocate to a bigger size */ zprev = getbufzone(bp->b_bufsize); if (nsize <= MAXMETA) { desired_size = nsize; - z = getbufzone(nsize); + /* b_datap not really a ptr */ - *(void **)(&bp->b_datap) = zalloc(z); + *(void **)(&bp->b_datap) = grab_memory_for_meta_buf(nsize); } else { bp->b_datap = (uintptr_t)NULL; kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); @@ -3301,9 +3379,9 @@ allocbuf(buf_t bp, int size) /* new allocation */ if (nsize <= MAXMETA) { desired_size = nsize; - z = getbufzone(nsize); + /* b_datap not really a ptr */ - *(void **)(&bp->b_datap) = zalloc(z); + *(void **)(&bp->b_datap) = grab_memory_for_meta_buf(nsize); SET(bp->b_flags, B_ZALLOC); } else kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); @@ -3581,6 +3659,8 @@ bcleanbuf(buf_t bp, boolean_t discard) trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); buf_release_credentials(bp); + + bp->b_redundancy_flags = 0; /* If discarding, just move to the empty queue */ if (discard) { @@ -3822,10 +3902,6 @@ buf_biodone(buf_t bp) if (ISSET(bp->b_flags, B_DONE)) panic("biodone already"); - if (ISSET(bp->b_flags, B_ERROR)) { - fslog_io_error(bp); - } - bap = &bp->b_attr; if (bp->b_vp && bp->b_vp->v_mount) { @@ -4107,6 +4183,9 @@ free_io_buf(buf_t bp) bp->b_vp = NULL; bp->b_flags = B_INVAL; + /* Zero out the bufattr and its flags before relinquishing this iobuf */ + bzero (&bp->b_attr, sizeof(struct bufattr)); + lck_mtx_lock_spin(iobuffer_mtxp); binsheadfree(bp, &iobufqueue, -1); @@ -4313,9 +4392,10 @@ buffer_cache_gc(int all) * We only care about metadata (incore storage comes from zalloc()). * Unless "all" is set (used to evict meta data buffers in preparation * for deep sleep), we only evict up to BUF_MAX_GC_BATCH_SIZE buffers - * that have not been accessed in the last 30s. This limit controls both - * the hold time of the global lock "buf_mtxp" and the length of time - * we spend compute bound in the GC thread which calls this function + * that have not been accessed in the last BUF_STALE_THRESHOLD seconds. + * BUF_MAX_GC_BATCH_SIZE controls both the hold time of the global lock + * "buf_mtxp" and the length of time we spend compute bound in the GC + * thread which calls this function */ lck_mtx_lock(buf_mtxp); @@ -4515,334 +4595,3 @@ bflushq(int whichq, mount_t mp) return (total_writes); } #endif - - -#if BALANCE_QUEUES - -/* XXX move this to a separate file */ - -/* - * NOTE: THIS CODE HAS NOT BEEN UPDATED - * WITH RESPECT TO THE NEW LOCKING MODEL - */ - - -/* - * Dynamic Scaling of the Buffer Queues - */ - -typedef long long blsize_t; - -blsize_t MAXNBUF; /* initialize to (sane_size / PAGE_SIZE) */ -/* Global tunable limits */ -blsize_t nbufh; /* number of buffer headers */ -blsize_t nbuflow; /* minimum number of buffer headers required */ -blsize_t nbufhigh; /* maximum number of buffer headers allowed */ -blsize_t nbuftarget; /* preferred number of buffer headers */ - -/* - * assertions: - * - * 1. 0 < nbuflow <= nbufh <= nbufhigh - * 2. nbufhigh <= MAXNBUF - * 3. 0 < nbuflow <= nbuftarget <= nbufhigh - * 4. nbufh can not be set by sysctl(). - */ - -/* Per queue tunable limits */ - -struct bufqlim { - blsize_t bl_nlow; /* minimum number of buffer headers required */ - blsize_t bl_num; /* number of buffer headers on the queue */ - blsize_t bl_nlhigh; /* maximum number of buffer headers allowed */ - blsize_t bl_target; /* preferred number of buffer headers */ - long bl_stale; /* Seconds after which a buffer is considered stale */ -} bufqlim[BQUEUES]; - -/* - * assertions: - * - * 1. 0 <= bl_nlow <= bl_num <= bl_nlhigh - * 2. bl_nlhigh <= MAXNBUF - * 3. bufqlim[BQ_META].bl_nlow != 0 - * 4. bufqlim[BQ_META].bl_nlow > (number of possible concurrent - * file system IO operations) - * 5. bl_num can not be set by sysctl(). - * 6. bl_nhigh <= nbufhigh - */ - -/* - * Rationale: - * ---------- - * Defining it blsize_t as long permits 2^31 buffer headers per queue. - * Which can describe (2^31 * PAGE_SIZE) memory per queue. - * - * These limits are exported to by means of sysctl(). - * It was decided to define blsize_t as a 64 bit quantity. - * This will make sure that we will not be required to change it - * as long as we do not exceed 64 bit address space for the kernel. - * - * low and high numbers parameters initialized at compile time - * and boot arguments can be used to override them. sysctl() - * would not change the value. sysctl() can get all the values - * but can set only target. num is the current level. - * - * Advantages of having a "bufqscan" thread doing the balancing are, - * Keep enough bufs on BQ_EMPTY. - * getnewbuf() by default will always select a buffer from the BQ_EMPTY. - * getnewbuf() perfoms best if a buffer was found there. - * Also this minimizes the possibility of starting IO - * from getnewbuf(). That's a performance win, too. - * - * Localize complex logic [balancing as well as time aging] - * to balancebufq(). - * - * Simplify getnewbuf() logic by elimination of time aging code. - */ - -/* - * Algorithm: - * ----------- - * The goal of the dynamic scaling of the buffer queues to to keep - * the size of the LRU close to bl_target. Buffers on a queue would - * be time aged. - * - * There would be a thread which will be responsible for "balancing" - * the buffer cache queues. - * - * The scan order would be: AGE, LRU, META, EMPTY. - */ - -long bufqscanwait = 0; - -static void bufqscan_thread(); -static int balancebufq(int q); -static int btrimempty(int n); -static __inline__ int initbufqscan(void); -static __inline__ int nextbufq(int q); -static void buqlimprt(int all); - - -static __inline__ void -bufqinc(int q) -{ - if ((q < 0) || (q >= BQUEUES)) - return; - - bufqlim[q].bl_num++; - return; -} - -static __inline__ void -bufqdec(int q) -{ - if ((q < 0) || (q >= BQUEUES)) - return; - - bufqlim[q].bl_num--; - return; -} - -static void -bufq_balance_thread_init(void) -{ - thread_t thread = THREAD_NULL; - - if (bufqscanwait++ == 0) { - - /* Initalize globals */ - MAXNBUF = (sane_size / PAGE_SIZE); - nbufh = nbuf_headers; - nbuflow = min(nbufh, 100); - nbufhigh = min(MAXNBUF, max(nbufh, 2048)); - nbuftarget = (sane_size >> 5) / PAGE_SIZE; - nbuftarget = max(nbuflow, nbuftarget); - nbuftarget = min(nbufhigh, nbuftarget); - - /* - * Initialize the bufqlim - */ - - /* LOCKED queue */ - bufqlim[BQ_LOCKED].bl_nlow = 0; - bufqlim[BQ_LOCKED].bl_nlhigh = 32; - bufqlim[BQ_LOCKED].bl_target = 0; - bufqlim[BQ_LOCKED].bl_stale = 30; - - /* LRU queue */ - bufqlim[BQ_LRU].bl_nlow = 0; - bufqlim[BQ_LRU].bl_nlhigh = nbufhigh/4; - bufqlim[BQ_LRU].bl_target = nbuftarget/4; - bufqlim[BQ_LRU].bl_stale = LRU_IS_STALE; - - /* AGE queue */ - bufqlim[BQ_AGE].bl_nlow = 0; - bufqlim[BQ_AGE].bl_nlhigh = nbufhigh/4; - bufqlim[BQ_AGE].bl_target = nbuftarget/4; - bufqlim[BQ_AGE].bl_stale = AGE_IS_STALE; - - /* EMPTY queue */ - bufqlim[BQ_EMPTY].bl_nlow = 0; - bufqlim[BQ_EMPTY].bl_nlhigh = nbufhigh/4; - bufqlim[BQ_EMPTY].bl_target = nbuftarget/4; - bufqlim[BQ_EMPTY].bl_stale = 600000; - - /* META queue */ - bufqlim[BQ_META].bl_nlow = 0; - bufqlim[BQ_META].bl_nlhigh = nbufhigh/4; - bufqlim[BQ_META].bl_target = nbuftarget/4; - bufqlim[BQ_META].bl_stale = META_IS_STALE; - - /* LAUNDRY queue */ - bufqlim[BQ_LOCKED].bl_nlow = 0; - bufqlim[BQ_LOCKED].bl_nlhigh = 32; - bufqlim[BQ_LOCKED].bl_target = 0; - bufqlim[BQ_LOCKED].bl_stale = 30; - - buqlimprt(1); - } - - /* create worker thread */ - kernel_thread_start((thread_continue_t)bufqscan_thread, NULL, &thread); - thread_deallocate(thread); -} - -/* The workloop for the buffer balancing thread */ -static void -bufqscan_thread() -{ - int moretodo = 0; - - for(;;) { - do { - int q; /* buffer queue to process */ - - q = initbufqscan(); - for (; q; ) { - moretodo |= balancebufq(q); - q = nextbufq(q); - } - } while (moretodo); - -#if DIAGNOSTIC - vfs_bufstats(); - buqlimprt(0); -#endif - (void)tsleep((void *)&bufqscanwait, PRIBIO, "bufqscanwait", 60 * hz); - moretodo = 0; - } -} - -/* Seed for the buffer queue balancing */ -static __inline__ int -initbufqscan() -{ - /* Start with AGE queue */ - return (BQ_AGE); -} - -/* Pick next buffer queue to balance */ -static __inline__ int -nextbufq(int q) -{ - int order[] = { BQ_AGE, BQ_LRU, BQ_META, BQ_EMPTY, 0 }; - - q++; - q %= sizeof(order); - return (order[q]); -} - -/* function to balance the buffer queues */ -static int -balancebufq(int q) -{ - int moretodo = 0; - int n, t; - - /* reject invalid q */ - if ((q < 0) || (q >= BQUEUES)) - goto out; - - /* LOCKED or LAUNDRY queue MUST not be balanced */ - if ((q == BQ_LOCKED) || (q == BQ_LAUNDRY)) - goto out; - - n = (bufqlim[q].bl_num - bufqlim[q].bl_target); - - /* If queue has less than target nothing more to do */ - if (n < 0) - goto out; - - if ( n > 8 ) { - /* Balance only a small amount (12.5%) at a time */ - n >>= 3; - } - - /* EMPTY queue needs special handling */ - if (q == BQ_EMPTY) { - moretodo |= btrimempty(n); - goto out; - } - - t = buf_timestamp(): - - for (; n > 0; n--) { - struct buf *bp = bufqueues[q].tqh_first; - if (!bp) - break; - - /* check if it's stale */ - if ((t - bp->b_timestamp) > bufqlim[q].bl_stale) { - if (bcleanbuf(bp, FALSE)) { - /* buf_bawrite() issued, bp not ready */ - moretodo = 1; - } else { - /* release the cleaned buffer to BQ_EMPTY */ - SET(bp->b_flags, B_INVAL); - buf_brelse(bp); - } - } else - break; - } - -out: - return (moretodo); -} - -static int -btrimempty(int n) -{ - /* - * When struct buf are allocated dynamically, this would - * reclaim upto 'n' struct buf from the empty queue. - */ - - return (0); -} - -static void -buqlimprt(int all) -{ - int i; - static char *bname[BQUEUES] = - { "LOCKED", "LRU", "AGE", "EMPTY", "META", "LAUNDRY" }; - - if (all) - for (i = 0; i < BQUEUES; i++) { - printf("%s : ", bname[i]); - printf("min = %ld, ", (long)bufqlim[i].bl_nlow); - printf("cur = %ld, ", (long)bufqlim[i].bl_num); - printf("max = %ld, ", (long)bufqlim[i].bl_nlhigh); - printf("target = %ld, ", (long)bufqlim[i].bl_target); - printf("stale after %ld seconds\n", bufqlim[i].bl_stale); - } - else - for (i = 0; i < BQUEUES; i++) { - printf("%s : ", bname[i]); - printf("cur = %ld, ", (long)bufqlim[i].bl_num); - } -} - -#endif - - diff --git a/bsd/vfs/vfs_cache.c b/bsd/vfs/vfs_cache.c index 65cb02d22..a08ffe235 100644 --- a/bsd/vfs/vfs_cache.c +++ b/bsd/vfs/vfs_cache.c @@ -218,6 +218,9 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs if (first_vp == NULLVP) return (EINVAL); + + if (buflen <= 1) + return (ENOSPC); /* * Grab the process fd so we can evaluate fd_rdir. @@ -1403,7 +1406,7 @@ cache_lookup_locked(vnode_t dvp, struct componentname *cnp) struct namecache *ncp; struct nchashhead *ncpp; long namelen = cnp->cn_namelen; - unsigned int hashval = (cnp->cn_hash & NCHASHMASK); + unsigned int hashval = cnp->cn_hash; if (nc_disabled) { return NULL; @@ -1487,7 +1490,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) if (cnp->cn_hash == 0) cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); - hashval = (cnp->cn_hash & NCHASHMASK); + hashval = cnp->cn_hash; if (nc_disabled) { return 0; @@ -1560,12 +1563,9 @@ relook: /* * We found a "negative" match, ENOENT notifies client of this match. - * The nc_whiteout field records whether this is a whiteout. */ NCHSTAT(ncs_neghits); - if (ncp->nc_whiteout) - cnp->cn_flags |= ISWHITEOUT; NAME_CACHE_UNLOCK(); return (ENOENT); } @@ -1702,7 +1702,6 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn ncp->nc_vp = vp; ncp->nc_dvp = dvp; ncp->nc_hashval = cnp->cn_hash; - ncp->nc_whiteout = FALSE; if (strname == NULL) ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0); @@ -1738,13 +1737,10 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn } else { /* * this is a negative cache entry (vp == NULL) - * stick it on the negative cache list - * and record the whiteout state + * stick it on the negative cache list. */ TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry); - if (cnp->cn_flags & ISWHITEOUT) - ncp->nc_whiteout = TRUE; ncs_negtotal++; if (ncs_negtotal > desiredNegNodes) { diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c index 2f662c53c..621825949 100644 --- a/bsd/vfs/vfs_cluster.c +++ b/bsd/vfs/vfs_cluster.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -88,6 +88,7 @@ #include #include #include +#include #include #include @@ -121,7 +122,7 @@ #define CL_NOCACHE 0x40000 #define MAX_VECTOR_UPL_ELEMENTS 8 -#define MAX_VECTOR_UPL_SIZE (2 * MAX_UPL_SIZE) * PAGE_SIZE +#define MAX_VECTOR_UPL_SIZE (2 * MAX_UPL_SIZE_BYTES) extern upl_t vector_upl_create(vm_offset_t); extern boolean_t vector_upl_is_valid(upl_t); @@ -167,7 +168,7 @@ static int cluster_is_throttled(vnode_t vp); static void cluster_iostate_wait(struct clios *iostate, u_int target, const char *wait_name); -static void cluster_syncup(vnode_t vp, off_t newEOF, int (*)(buf_t, void *), void *callback_arg); +static void cluster_syncup(vnode_t vp, off_t newEOF, int (*)(buf_t, void *), void *callback_arg, int flags); static void cluster_read_upl_release(upl_t upl, int start_pg, int last_pg, int take_reference); static int cluster_copy_ubc_data_internal(vnode_t vp, struct uio *uio, int *io_resid, int mark_dirty, int take_reference); @@ -233,7 +234,7 @@ int (*bootcache_contains_block)(dev_t device, u_int64_t blkno) = NULL; * can represent it in a 32 bit int */ #define MAX_IO_REQUEST_SIZE (1024 * 1024 * 512) -#define MAX_IO_CONTIG_SIZE (MAX_UPL_SIZE * PAGE_SIZE) +#define MAX_IO_CONTIG_SIZE MAX_UPL_SIZE_BYTES #define MAX_VECTS 16 #define MIN_DIRECT_WRITE_SIZE (4 * PAGE_SIZE) @@ -243,9 +244,9 @@ int (*bootcache_contains_block)(dev_t device, u_int64_t blkno) = NULL; #define WRITE_BEHIND_SSD 1 #define PREFETCH 3 -#define PREFETCH_SSD 1 -uint32_t speculative_prefetch_max = (MAX_UPL_SIZE * 3); -uint32_t speculative_prefetch_max_iosize = (512 * 1024); /* maximum I/O size to use for a specluative read-ahead on SSDs*/ +#define PREFETCH_SSD 2 +uint32_t speculative_prefetch_max = (MAX_UPL_SIZE_BYTES * 3); /* maximum bytes in a specluative read-ahead */ +uint32_t speculative_prefetch_max_iosize = (512 * 1024); /* maximum I/O size to use in a specluative read-ahead on SSDs*/ #define IO_SCALE(vp, base) (vp->v_mount->mnt_ioscale * (base)) @@ -311,19 +312,19 @@ cluster_max_io_size(mount_t mp, int type) maxcnt = min(mp->mnt_maxreadcnt, mp->mnt_maxwritecnt); break; } - if (segcnt > MAX_UPL_SIZE) { + if (segcnt > (MAX_UPL_SIZE_BYTES >> PAGE_SHIFT)) { /* * don't allow a size beyond the max UPL size we can create */ - segcnt = MAX_UPL_SIZE; + segcnt = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT; } max_io_size = min((segcnt * PAGE_SIZE), maxcnt); - if (max_io_size < (MAX_UPL_TRANSFER * PAGE_SIZE)) { + if (max_io_size < MAX_UPL_TRANSFER_BYTES) { /* * don't allow a size smaller than the old fixed limit */ - max_io_size = (MAX_UPL_TRANSFER * PAGE_SIZE); + max_io_size = MAX_UPL_TRANSFER_BYTES; } else { /* * make sure the size specified is a multiple of PAGE_SIZE @@ -441,7 +442,7 @@ cluster_get_wbp(vnode_t vp, int flags) static void -cluster_syncup(vnode_t vp, off_t newEOF, int (*callback)(buf_t, void *), void *callback_arg) +cluster_syncup(vnode_t vp, off_t newEOF, int (*callback)(buf_t, void *), void *callback_arg, int flags) { struct cl_writebehind *wbp; @@ -450,7 +451,7 @@ cluster_syncup(vnode_t vp, off_t newEOF, int (*callback)(buf_t, void *), void *c if (wbp->cl_number) { lck_mtx_lock(&wbp->cl_lockw); - cluster_try_push(wbp, vp, newEOF, PUSH_ALL | PUSH_SYNC, 0, callback, callback_arg); + cluster_try_push(wbp, vp, newEOF, PUSH_ALL | flags, 0, callback, callback_arg); lck_mtx_unlock(&wbp->cl_lockw); } @@ -760,7 +761,7 @@ cluster_zero(upl_t upl, upl_offset_t upl_offset, int size, buf_t bp) pl = ubc_upl_pageinfo(upl); if (upl_device_page(pl) == TRUE) { - zero_addr = ((addr64_t)upl_phys_page(pl, 0) << 12) + upl_offset; + zero_addr = ((addr64_t)upl_phys_page(pl, 0) << PAGE_SHIFT) + upl_offset; bzero_phys_nc(zero_addr, size); } else { @@ -772,7 +773,7 @@ cluster_zero(upl_t upl, upl_offset_t upl_offset, int size, buf_t bp) page_index = upl_offset / PAGE_SIZE; page_offset = upl_offset & PAGE_MASK; - zero_addr = ((addr64_t)upl_phys_page(pl, page_index) << 12) + page_offset; + zero_addr = ((addr64_t)upl_phys_page(pl, page_index) << PAGE_SHIFT) + page_offset; zero_cnt = min(PAGE_SIZE - page_offset, size); bzero_phys(zero_addr, zero_cnt); @@ -1127,7 +1128,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no * * go direct to vnode_pageout so that we don't have to * unbusy the page from the UPL... we used to do this - * so that we could call ubc_sync_range, but that results + * so that we could call ubc_msync, but that results * in a potential deadlock if someone else races us to acquire * that page and wins and in addition needs one of the pages * we're continuing to hold in the UPL @@ -1234,7 +1235,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no } continue; } - lblkno = (daddr64_t)(f_offset / PAGE_SIZE_64); + lblkno = (daddr64_t)(f_offset / 0x1000); /* * we have now figured out how much I/O we can do - this is in 'io_size' * pg_offset is the starting point in the first page for the I/O @@ -1445,7 +1446,9 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no if (buf_setupl(cbp, upl, upl_offset)) panic("buf_setupl failed\n"); - +#if CONFIG_IOSCHED + upl_set_blkno(upl, upl_offset, io_size, blkno); +#endif cbp->b_trans_next = (buf_t)NULL; if ((cbp->b_iostate = (void *)iostate)) @@ -1713,16 +1716,16 @@ cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct } max_prefetch = MAX_PREFETCH(vp, cluster_max_io_size(vp->v_mount, CL_READ), (vp->v_mount->mnt_kern_flag & MNTK_SSD)); - if ((max_prefetch / PAGE_SIZE) > speculative_prefetch_max) - max_prefetch = (speculative_prefetch_max * PAGE_SIZE); + if (max_prefetch > speculative_prefetch_max) + max_prefetch = speculative_prefetch_max; if (max_prefetch <= PAGE_SIZE) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END, rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 6, 0); return; } - if (extent->e_addr < rap->cl_maxra) { - if ((rap->cl_maxra - extent->e_addr) > ((max_prefetch / PAGE_SIZE) / 4)) { + if (extent->e_addr < rap->cl_maxra && rap->cl_ralen >= 4) { + if ((rap->cl_maxra - extent->e_addr) > (rap->cl_ralen / 4)) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END, rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 2, 0); @@ -2137,6 +2140,9 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in if (flags & IO_NOCACHE) io_flag |= CL_NOCACHE; + if (flags & IO_SKIP_ENCRYPTION) + io_flag |= CL_ENCRYPTED; + iostate.io_completed = 0; iostate.io_issued = 0; iostate.io_error = 0; @@ -2214,7 +2220,7 @@ next_dwrite: } if (first_IO) { - cluster_syncup(vp, newEOF, callback, callback_arg); + cluster_syncup(vp, newEOF, callback, callback_arg, callback ? PUSH_SYNC : 0); first_IO = 0; } io_size = io_req_size & ~PAGE_MASK; @@ -2351,8 +2357,7 @@ next_dwrite: * if there are already too many outstanding writes * wait until some complete before issuing the next */ - if (iostate.io_issued > iostate.io_completed) - cluster_iostate_wait(&iostate, max_upl_size * IO_SCALE(vp, 2), "cluster_write_direct"); + cluster_iostate_wait(&iostate, max_upl_size * IO_SCALE(vp, 2), "cluster_write_direct"); if (iostate.io_error) { /* @@ -2437,14 +2442,12 @@ wait_for_dwrites: retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); reset_vector_run_state(); } + /* + * make sure all async writes issued as part of this stream + * have completed before we return + */ + cluster_iostate_wait(&iostate, 0, "cluster_write_direct"); - if (iostate.io_issued > iostate.io_completed) { - /* - * make sure all async writes issued as part of this stream - * have completed before we return - */ - cluster_iostate_wait(&iostate, 0, "cluster_write_direct"); - } if (iostate.io_error) retval = iostate.io_error; @@ -2505,7 +2508,7 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type, * -- the io_req_size will not exceed iov_len * -- the target address is physically contiguous */ - cluster_syncup(vp, newEOF, callback, callback_arg); + cluster_syncup(vp, newEOF, callback, callback_arg, callback ? PUSH_SYNC : 0); devblocksize = (u_int32_t)vp->v_mount->mnt_devblocksize; mem_alignment_mask = (u_int32_t)vp->v_mount->mnt_alignmentmask; @@ -2555,7 +2558,7 @@ next_cwrite: } pl = ubc_upl_pageinfo(upl[cur_upl]); - src_paddr = ((addr64_t)upl_phys_page(pl, 0) << 12) + (addr64_t)upl_offset; + src_paddr = ((addr64_t)upl_phys_page(pl, 0) << PAGE_SHIFT) + (addr64_t)upl_offset; while (((uio->uio_offset & (devblocksize - 1)) || io_size < devblocksize) && io_size) { u_int32_t head_size; @@ -2605,8 +2608,7 @@ next_cwrite: * if there are already too many outstanding writes * wait until some have completed before issuing the next */ - if (iostate.io_issued > iostate.io_completed) - cluster_iostate_wait(&iostate, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), "cluster_write_contig"); + cluster_iostate_wait(&iostate, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), "cluster_write_contig"); if (iostate.io_error) { /* @@ -2651,8 +2653,7 @@ wait_for_cwrites: * make sure all async writes that are part of this stream * have completed before we proceed */ - if (iostate.io_issued > iostate.io_completed) - cluster_iostate_wait(&iostate, 0, "cluster_write_contig"); + cluster_iostate_wait(&iostate, 0, "cluster_write_contig"); if (iostate.io_error) error = iostate.io_error; @@ -2760,6 +2761,9 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old if (flags & IO_NOCACHE) bflag |= CL_NOCACHE; + if (flags & IO_SKIP_ENCRYPTION) + bflag |= CL_ENCRYPTED; + zero_cnt = 0; zero_cnt1 = 0; zero_off = 0; @@ -3375,6 +3379,8 @@ cluster_read_ext(vnode_t vp, struct uio *uio, off_t filesize, int xflags, int (* if ((vp->v_flag & VRAOFF) || speculative_reads_disabled) flags |= IO_RAOFF; + if (flags & IO_SKIP_ENCRYPTION) + flags |= IO_ENCRYPTED; /* * If we're doing an encrypted IO, then first check to see * if the IO requested was page aligned. If not, then bail @@ -3398,14 +3404,9 @@ cluster_read_ext(vnode_t vp, struct uio *uio, off_t filesize, int xflags, int (* * otherwise, find out if we want the direct or contig variant for * the first vector in the uio request */ - if (((flags & IO_NOCACHE) || (flags & IO_ENCRYPTED)) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) { - - boolean_t check_io_type = TRUE; + if ( ((flags & IO_NOCACHE) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) || (flags & IO_ENCRYPTED) ) { - - if (check_io_type) { - retval = cluster_io_type(uio, &read_type, &read_length, 0); - } + retval = cluster_io_type(uio, &read_type, &read_length, 0); } while ((cur_resid = uio_resid(uio)) && uio->uio_offset < filesize && retval == 0) { @@ -3514,6 +3515,9 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file if (flags & IO_NOCACHE) bflag |= CL_NOCACHE; + if (flags & IO_SKIP_ENCRYPTION) + bflag |= CL_ENCRYPTED; + max_io_size = cluster_max_io_size(vp->v_mount, CL_READ); max_prefetch = MAX_PREFETCH(vp, max_io_size, (vp->v_mount->mnt_kern_flag & MNTK_SSD)); max_rd_size = max_prefetch; @@ -3699,8 +3703,13 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file if (upl_size > max_io_size) upl_size = max_io_size; } else { - if (upl_size > max_io_size / 4) + if (upl_size > max_io_size / 4) { upl_size = max_io_size / 4; + upl_size &= ~PAGE_MASK; + + if (upl_size == 0) + upl_size = PAGE_SIZE; + } } pages_in_upl = upl_size / PAGE_SIZE; @@ -3855,7 +3864,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file rap->cl_lastr = extent.e_addr; } } - if (iostate.io_issued > iostate.io_completed) + if (iolock_inited == TRUE) cluster_iostate_wait(&iostate, 0, "cluster_read_copy"); if (iostate.io_error) @@ -3870,7 +3879,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file io_req_size -= (val_size - io_requested); } } else { - if (iostate.io_issued > iostate.io_completed) + if (iolock_inited == TRUE) cluster_iostate_wait(&iostate, 0, "cluster_read_copy"); } if (start_pg < last_pg) { @@ -3957,17 +3966,16 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file } } if (iolock_inited == TRUE) { - if (iostate.io_issued > iostate.io_completed) { - /* - * cluster_io returned an error after it - * had already issued some I/O. we need - * to wait for that I/O to complete before - * we can destroy the iostate mutex... - * 'retval' already contains the early error - * so no need to pick it up from iostate.io_error - */ - cluster_iostate_wait(&iostate, 0, "cluster_read_copy"); - } + /* + * cluster_io returned an error after it + * had already issued some I/O. we need + * to wait for that I/O to complete before + * we can destroy the iostate mutex... + * 'retval' already contains the early error + * so no need to pick it up from iostate.io_error + */ + cluster_iostate_wait(&iostate, 0, "cluster_read_copy"); + lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp); } if (rap != NULL) { @@ -3983,7 +3991,6 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file return (retval); } - static int cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, u_int32_t *read_length, int flags, int (*callback)(buf_t, void *), void *callback_arg) @@ -4026,6 +4033,10 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, int vector_upl_index=0; upl_t vector_upl = NULL; + user_addr_t orig_iov_base = 0; + user_addr_t last_iov_base = 0; + user_addr_t next_iov_base = 0; + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START, (int)uio->uio_offset, (int)filesize, *read_type, *read_length, 0); @@ -4047,6 +4058,9 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, io_flag |= CL_NOCACHE; } + if (flags & IO_SKIP_ENCRYPTION) + io_flag |= CL_ENCRYPTED; + iostate.io_completed = 0; iostate.io_issued = 0; iostate.io_error = 0; @@ -4076,6 +4090,9 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, strict_uncached_IO = ubc_strict_uncached_IO(vp); + orig_iov_base = uio_curriovbase(uio); + last_iov_base = orig_iov_base; + next_dread: io_req_size = *read_length; iov_base = uio_curriovbase(uio); @@ -4383,8 +4400,7 @@ next_dread: * if there are already too many outstanding reads * wait until some have completed before issuing the next read */ - if (iostate.io_issued > iostate.io_completed) - cluster_iostate_wait(&iostate, max_rd_ahead, "cluster_read_direct"); + cluster_iostate_wait(&iostate, max_rd_ahead, "cluster_read_direct"); if (iostate.io_error) { /* @@ -4428,7 +4444,9 @@ next_dread: retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); reset_vector_run_state(); } - } + } + last_iov_base = iov_base + io_size; + /* * update the uio structure */ @@ -4479,8 +4497,7 @@ wait_for_dreads: * make sure all async reads that are part of this stream * have completed before we return */ - if (iostate.io_issued > iostate.io_completed) - cluster_iostate_wait(&iostate, 0, "cluster_read_direct"); + cluster_iostate_wait(&iostate, 0, "cluster_read_direct"); if (iostate.io_error) retval = iostate.io_error; @@ -4490,6 +4507,15 @@ wait_for_dreads: if (io_throttled == TRUE && retval == 0) retval = EAGAIN; + for (next_iov_base = orig_iov_base; next_iov_base < last_iov_base; next_iov_base += PAGE_SIZE) { + /* + * This is specifically done for pmap accounting purposes. + * vm_pre_fault() will call vm_fault() to enter the page into + * the pmap if there isn't _a_ physical page for that VA already. + */ + vm_pre_fault(vm_map_trunc_page(next_iov_base, PAGE_MASK)); + } + if (io_req_size && retval == 0) { /* * we couldn't handle the tail of this request in DIRECT mode @@ -4546,7 +4572,7 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, * -- the read_length will not exceed the current iov_len * -- the target address is physically contiguous for read_length */ - cluster_syncup(vp, filesize, callback, callback_arg); + cluster_syncup(vp, filesize, callback, callback_arg, PUSH_SYNC); devblocksize = (u_int32_t)vp->v_mount->mnt_devblocksize; mem_alignment_mask = (u_int32_t)vp->v_mount->mnt_alignmentmask; @@ -4604,7 +4630,7 @@ next_cread: } pl = ubc_upl_pageinfo(upl[cur_upl]); - dst_paddr = ((addr64_t)upl_phys_page(pl, 0) << 12) + (addr64_t)upl_offset; + dst_paddr = ((addr64_t)upl_phys_page(pl, 0) << PAGE_SHIFT) + (addr64_t)upl_offset; while (((uio->uio_offset & (devblocksize - 1)) || io_size < devblocksize) && io_size) { u_int32_t head_size; @@ -4655,8 +4681,7 @@ next_cread: * if there are already too many outstanding reads * wait until some have completed before issuing the next */ - if (iostate.io_issued > iostate.io_completed) - cluster_iostate_wait(&iostate, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), "cluster_read_contig"); + cluster_iostate_wait(&iostate, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), "cluster_read_contig"); if (iostate.io_error) { /* @@ -4698,8 +4723,7 @@ wait_for_creads: * make sure all async reads that are part of this stream * have completed before we proceed */ - if (iostate.io_issued > iostate.io_completed) - cluster_iostate_wait(&iostate, 0, "cluster_read_contig"); + cluster_iostate_wait(&iostate, 0, "cluster_read_contig"); if (iostate.io_error) error = iostate.io_error; @@ -5001,7 +5025,7 @@ cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *ca KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, vp, flags, 0, -2, 0); return (0); } - if (wbp->cl_number == 0 && wbp->cl_scmap == NULL) { + if (!ISSET(flags, IO_SYNC) && wbp->cl_number == 0 && wbp->cl_scmap == NULL) { lck_mtx_unlock(&wbp->cl_lockw); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, vp, flags, 0, -3, 0); @@ -5328,6 +5352,9 @@ cluster_push_now(vnode_t vp, struct cl_extent *cl, off_t EOF, int flags, int (*c else bflag = 0; + if (flags & IO_SKIP_ENCRYPTION) + bflag |= CL_ENCRYPTED; + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_START, (int)cl->b_addr, (int)cl->e_addr, (int)EOF, flags, 0); @@ -5628,7 +5655,7 @@ cluster_align_phys_io(vnode_t vp, struct uio *uio, addr64_t usr_paddr, u_int32_t } did_read = 1; } - ubc_paddr = ((addr64_t)upl_phys_page(pl, 0) << 12) + (addr64_t)(uio->uio_offset & PAGE_MASK_64); + ubc_paddr = ((addr64_t)upl_phys_page(pl, 0) << PAGE_SHIFT) + (addr64_t)(uio->uio_offset & PAGE_MASK_64); /* * NOTE: There is no prototype for the following in BSD. It, and the definitions @@ -5714,7 +5741,7 @@ cluster_copy_upl_data(struct uio *uio, upl_t upl, int upl_offset, int *io_resid) while (xsize && retval == 0) { addr64_t paddr; - paddr = ((addr64_t)upl_phys_page(pl, pg_index) << 12) + pg_offset; + paddr = ((addr64_t)upl_phys_page(pl, pg_index) << PAGE_SHIFT) + pg_offset; retval = uiomove64(paddr, csize, uio); diff --git a/bsd/vfs/vfs_conf.c b/bsd/vfs/vfs_conf.c index d802374b6..69e1a23ab 100644 --- a/bsd/vfs/vfs_conf.c +++ b/bsd/vfs/vfs_conf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -108,41 +108,38 @@ extern int mockfs_mountroot(mount_t, vnode_t, vfs_context_t); */ typedef int (*mountroot_t)(mount_t, vnode_t, vfs_context_t); +enum fs_type_num { + FT_NFS = 2, + FT_HFS = 17, + FT_DEVFS = 19, + FT_SYNTHFS = 20, + FT_MOCKFS = 0x6D6F636B +}; + /* * Set up the filesystem operations for vnodes. */ static struct vfstable vfstbllist[] = { /* HFS/HFS+ Filesystem */ #if HFS - { &hfs_vfsops, "hfs", 17, 0, (MNT_LOCAL | MNT_DOVOLFS), hfs_mountroot, NULL, 0, 0, VFC_VFSLOCALARGS | VFC_VFSREADDIR_EXTENDED | VFC_VFS64BITREADY | VFC_VFSVNOP_PAGEOUTV2 | VFC_VFSVNOP_PAGEINV2, NULL, 0}, + { &hfs_vfsops, "hfs", FT_HFS, 0, (MNT_LOCAL | MNT_DOVOLFS), hfs_mountroot, NULL, 0, 0, VFC_VFSLOCALARGS | VFC_VFSREADDIR_EXTENDED | VFC_VFS64BITREADY | VFC_VFSVNOP_PAGEOUTV2 | VFC_VFSVNOP_PAGEINV2 +#if CONFIG_SECLUDED_RENAME + | VFC_VFSVNOP_SECLUDE_RENAME #endif - - /* Memory-based Filesystem */ - -#ifndef __LP64__ -#if MFS - { &mfs_vfsops, "mfs", 3, 0, MNT_LOCAL, mfs_mountroot, NULL, 0, 0, VFC_VFSGENERICARGS, NULL, 0}, + , NULL, 0, NULL}, #endif -#endif /* __LP64__ */ /* Sun-compatible Network Filesystem */ #if NFSCLIENT - { &nfs_vfsops, "nfs", 2, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFSPREFLIGHT | VFC_VFS64BITREADY | VFC_VFSREADDIR_EXTENDED, NULL, 0}, + { &nfs_vfsops, "nfs", FT_NFS, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFSPREFLIGHT | VFC_VFS64BITREADY | VFC_VFSREADDIR_EXTENDED, NULL, 0, NULL}, #endif - /* Andrew Filesystem */ -#ifndef __LP64__ -#if AFS - { &afs_vfsops, "andrewfs", 13, 0, 0, afs_mountroot, NULL, 0, 0, VFC_VFSGENERICARGS , NULL, 0}, -#endif -#endif /* __LP64__ */ - /* Device Filesystem */ #if DEVFS #if CONFIG_MACF - { &devfs_vfsops, "devfs", 19, 0, (MNT_DONTBROWSE | MNT_MULTILABEL), NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFS64BITREADY, NULL, 0}, + { &devfs_vfsops, "devfs", FT_DEVFS, 0, MNT_MULTILABEL, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFS64BITREADY, NULL, 0, NULL}, #else - { &devfs_vfsops, "devfs", 19, 0, MNT_DONTBROWSE, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFS64BITREADY, NULL, 0}, + { &devfs_vfsops, "devfs", FT_DEVFS, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFS64BITREADY, NULL, 0, NULL}, #endif /* MAC */ #endif @@ -151,33 +148,20 @@ static struct vfstable vfstbllist[] = { #if MOCKFS /* If we are configured for it, mockfs should always be the last standard entry (and thus the last FS we attempt mountroot with) */ - { &mockfs_vfsops, "mockfs", 0x6D6F636B, 0, MNT_LOCAL, mockfs_mountroot, NULL, 0, 0, VFC_VFSGENERICARGS, NULL, 0}, + { &mockfs_vfsops, "mockfs", FT_MOCKFS, 0, MNT_LOCAL, mockfs_mountroot, NULL, 0, 0, VFC_VFSGENERICARGS, NULL, 0, NULL}, #endif /* MOCKFS */ - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0} + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0, NULL}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0, NULL}, }; /* - * Initially the size of the list, vfs_init will set maxvfsconf - * to the highest defined type number. + * vfs_init will set maxvfstypenum to the highest defined type number. */ -int maxvfsslots = sizeof(vfstbllist) / sizeof (struct vfstable); +const int maxvfsslots = sizeof(vfstbllist) / sizeof (struct vfstable); int numused_vfsslots = 0; -int maxvfsconf = sizeof(vfstbllist) / sizeof (struct vfstable); +int numregistered_fses = 0; +int maxvfstypenum = VT_NON + 1; struct vfstable *vfsconf = vfstbllist; /* diff --git a/bsd/vfs/vfs_fsevents.c b/bsd/vfs/vfs_fsevents.c index 3044c8e43..fd3636132 100644 --- a/bsd/vfs/vfs_fsevents.c +++ b/bsd/vfs/vfs_fsevents.c @@ -500,6 +500,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) printf("add_fsevent: kfse_list head %p ; num_pending_rename %d\n", listhead, num_pending_rename); printf("add_fsevent: zalloc sez: %p\n", junkptr); printf("add_fsevent: event_zone info: %d 0x%x\n", ((int *)event_zone)[0], ((int *)event_zone)[1]); + lock_watch_table(); for(ii=0; ii < MAX_WATCHERS; ii++) { if (watcher_table[ii] == NULL) { continue; @@ -511,6 +512,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) watcher_table[ii]->rd, watcher_table[ii]->wr, watcher_table[ii]->eventq_size, watcher_table[ii]->flags); } + unlock_watch_table(); last_print = current_tv; if (junkptr) { @@ -964,14 +966,7 @@ add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_even lock_watch_table(); - // now update the global list of who's interested in - // events of a particular type... - for(i=0; i < num_events; i++) { - if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) { - fs_event_type_watchers[i]++; - } - } - + // find a slot for the new watcher for(i=0; i < MAX_WATCHERS; i++) { if (watcher_table[i] == NULL) { watcher->my_id = i; @@ -980,12 +975,21 @@ add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_even } } - if (i > MAX_WATCHERS) { + if (i >= MAX_WATCHERS) { printf("fsevents: too many watchers!\n"); unlock_watch_table(); + FREE(watcher, M_TEMP); return ENOSPC; } + // now update the global list of who's interested in + // events of a particular type... + for(i=0; i < num_events; i++) { + if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) { + fs_event_type_watchers[i]++; + } + } + unlock_watch_table(); *watcher_out = watcher; @@ -2262,6 +2266,7 @@ fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, s error = falloc(p, &f, &fd, vfs_context_current()); if (error) { + remove_watcher(fseh->watcher); FREE(event_list, M_TEMP); FREE(fseh, M_TEMP); return (error); diff --git a/bsd/vfs/vfs_fslog.c b/bsd/vfs/vfs_fslog.c index dfc64b7a2..b13faa434 100644 --- a/bsd/vfs/vfs_fslog.c +++ b/bsd/vfs/vfs_fslog.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -105,231 +105,3 @@ fslog_extmod_msgtracer(proc_t caller, proc_t target) } } -/* Log file system related error in key-value format identified by Apple - * system log (ASL) facility. The key-value pairs are string pointers - * (char *) and are provided as variable arguments list. A NULL value - * indicates end of the list. - * - * Keys can not contain '[', ']', space, and newline. Values can not - * contain '[', ']', and newline. If any key-value contains any of the - * reserved characters, the behavior is undefined. The caller of the - * function should escape any occurrences of '[' and ']' by prefixing - * it with '\'. - * - * The function takes a message ID which can be used to logically group - * different ASL messages. Messages in same logical group have same message - * ID and have information to describe order of the message --- first, - * middle, or last. - * - * The following message IDs have special meaning - - * FSLOG_MSG_FIRST - This message is the first message in its logical - * group. This generates a unique message ID, creates two key-value - * pairs with message ID and order of the message as "First". - * FSLOG_MSG_LAST - This is really a MASK which should be logically OR'ed - * with message ID to indicate the last message for a logical group. - * This also creates two key-value pairs with message ID and order of - * message as "Last". - * FSLOG_MSG_SINGLE - This signifies that the message is the only message - * in its logical group. Therefore no extra key-values are generated - * for this option. - * For all other values of message IDs, it regards them as intermediate - * message and generates two key-value pairs with message ID and order of - * message as "Middle". - * - * Returns - - * Message ID of the ASL message printed. The caller should use - * this value to print intermediate messages or end the logical message - * group. - * For FSLOG_MSG_SINGLE option, it returns FSLOG_MSG_SINGLE. - */ -unsigned long fslog_err(unsigned long msg_id, ... ) -{ - va_list ap; - int num_pairs; - char msg_id_str[21]; /* To convert 64-bit number to string with NULL char */ - char *arg; - const char *msg_order_ptr; - - /* Count number of arguments and key-value pairs provided by user */ - num_pairs = 0; - va_start(ap, msg_id); - arg = va_arg(ap, char *); - while (arg) { - num_pairs++; - arg = va_arg(ap, char *); - } - num_pairs /= 2; - va_end(ap); - - va_start(ap, msg_id); - if (msg_id == FSLOG_MSG_SINGLE) { - /* Single message, do not print message ID and message order */ - (void) kern_asl_msg_va(FSLOG_VAL_LEVEL, FSLOG_VAL_FACILITY, - num_pairs, ap, - FSLOG_KEY_READ_UID, FSLOG_VAL_READ_UID, - NULL); - } else { - if (msg_id == FSLOG_MSG_FIRST) { - /* First message, generate random message ID */ - while ((msg_id == FSLOG_MSG_FIRST) || - (msg_id == FSLOG_MSG_LAST) || - (msg_id == FSLOG_MSG_SINGLE)) { - msg_id = RandomULong(); - /* MSB is reserved for indicating last message - * in sequence. Clear the MSB while generating - * new message ID. - */ - msg_id = msg_id >> 1; - } - msg_order_ptr = FSLOG_VAL_ORDER_FIRST; - } else if (msg_id & FSLOG_MSG_LAST) { - /* MSB set to indicate last message for this ID */ - msg_order_ptr = FSLOG_VAL_ORDER_LAST; - /* MSB of message ID is set to indicate last message - * in sequence. Clear the bit to get real message ID. - */ - msg_id = msg_id & ~FSLOG_MSG_LAST; - } else { - /* Intermediate message for this ID */ - msg_order_ptr = FSLOG_VAL_ORDER_MIDDLE; - } - - snprintf(msg_id_str, sizeof(msg_id_str), "%lu", msg_id); - (void) kern_asl_msg_va(FSLOG_VAL_LEVEL, FSLOG_VAL_FACILITY, - num_pairs, ap, - FSLOG_KEY_READ_UID, FSLOG_VAL_READ_UID, - FSLOG_KEY_MSG_ID, msg_id_str, - FSLOG_KEY_MSG_ORDER, msg_order_ptr, NULL); - } - va_end(ap); - return msg_id; -} - -/* Log information about runtime file system corruption detected by - * the file system. It takes the VFS mount structure as - * parameter which is used to access the mount point of the - * corrupt volume. If no mount structure or mount point string - * string exists, nothing is logged to ASL database. - * - * Currently prints following information - - * 1. Mount Point - */ -void fslog_fs_corrupt(struct mount *mnt) -{ - if (mnt != NULL) { - fslog_err(FSLOG_MSG_SINGLE, - FSLOG_KEY_ERR_TYPE, FSLOG_VAL_ERR_TYPE_FS, - FSLOG_KEY_MNTPT, mnt->mnt_vfsstat.f_mntonname, - NULL); - } - - return; -} - -/* Log information about IO error detected in buf_biodone() - * Currently prints following information - - * 1. Physical block number - * 2. Logical block number - * 3. Device node - * 4. Mount point - * 5. Path for file, if any - * 6. Error number - * 7. Type of IO (read/write) - */ -void fslog_io_error(const buf_t bp) -{ - int err; - unsigned long msg_id; - char blknum_str[21]; - char lblknum_str[21]; - char errno_str[6]; - const char *iotype; - unsigned char print_last = 0; - vnode_t vp; - - if (buf_error(bp) == 0) { - return; - } - - /* Convert error number to string */ - snprintf (errno_str, sizeof(errno_str), "%d", buf_error(bp)); - - /* Determine type of IO operation */ - if (buf_flags(bp) & B_READ) { - iotype = FSLOG_VAL_IOTYPE_READ; - } else { - iotype = FSLOG_VAL_IOTYPE_WRITE; - } - - /* Convert physical block number to string */ - snprintf (blknum_str, sizeof(blknum_str), "%lld", buf_blkno(bp)); - - /* Convert logical block number to string */ - snprintf (lblknum_str, sizeof(lblknum_str), "%lld", buf_lblkno(bp)); - - msg_id = fslog_err(FSLOG_MSG_FIRST, - FSLOG_KEY_ERR_TYPE, FSLOG_VAL_ERR_TYPE_IO, - FSLOG_KEY_ERRNO, errno_str, - FSLOG_KEY_IOTYPE, iotype, - FSLOG_KEY_PHYS_BLKNUM, blknum_str, - FSLOG_KEY_LOG_BLKNUM, lblknum_str, - NULL); - - /* Access the vnode for this buffer */ - vp = buf_vnode(bp); - if (vp) { - struct vfsstatfs *sp; - mount_t mp; - char *path; - int len; - struct vfs_context context; - - mp = vnode_mount(vp); - /* mp should be NULL only for bdevvp during boot */ - if (mp == NULL) { - goto out; - } - sp = vfs_statfs(mp); - - /* Access the file path */ - MALLOC(path, char *, MAXPATHLEN, M_TEMP, M_WAITOK); - if (path) { - len = MAXPATHLEN; - context.vc_thread = current_thread(); - context.vc_ucred = kauth_cred_get(); - /* Find path without entering file system */ - err = build_path(vp, path, len, &len, BUILDPATH_NO_FS_ENTER, - &context); - if (!err) { - err = escape_str(path, len, MAXPATHLEN); - if (!err) { - /* Print device node, mount point, path */ - msg_id = fslog_err(msg_id | FSLOG_MSG_LAST, - FSLOG_KEY_DEVNODE, sp->f_mntfromname, - FSLOG_KEY_MNTPT, sp->f_mntonname, - FSLOG_KEY_PATH, path, - NULL); - print_last = 1; - } - } - FREE(path, M_TEMP); - } - - if (print_last == 0) { - /* Print device node and mount point */ - msg_id = fslog_err(msg_id | FSLOG_MSG_LAST, - FSLOG_KEY_DEVNODE, sp->f_mntfromname, - FSLOG_KEY_MNTPT, sp->f_mntonname, - NULL); - print_last = 1; - } - } - -out: - if (print_last == 0) { - msg_id = fslog_err(msg_id | FSLOG_MSG_LAST, NULL); - } - - return; -} - diff --git a/bsd/vfs/vfs_init.c b/bsd/vfs/vfs_init.c index f9dfc09e5..54b6880fc 100644 --- a/bsd/vfs/vfs_init.c +++ b/bsd/vfs/vfs_init.c @@ -268,6 +268,10 @@ lck_grp_attr_t * trigger_vnode_lck_grp_attr; lck_attr_t * trigger_vnode_lck_attr; #endif +lck_grp_t * fd_vn_lck_grp; +lck_grp_attr_t * fd_vn_lck_grp_attr; +lck_attr_t * fd_vn_lck_attr; + /* vars for vnode list lock */ lck_grp_t * vnode_list_lck_grp; lck_grp_attr_t * vnode_list_lck_grp_attr; @@ -292,6 +296,12 @@ lck_grp_attr_t * mnt_list_lck_grp_attr; lck_attr_t * mnt_list_lck_attr; lck_mtx_t * mnt_list_mtx_lock; +/* vars for sync mutex */ +lck_grp_t * sync_mtx_lck_grp; +lck_grp_attr_t * sync_mtx_lck_grp_attr; +lck_attr_t * sync_mtx_lck_attr; +lck_mtx_t * sync_mtx_lck; + lck_mtx_t *pkg_extensions_lck; struct mount * dead_mountp; @@ -338,6 +348,10 @@ vfsinit(void) trigger_vnode_lck_grp = lck_grp_alloc_init("trigger_vnode", trigger_vnode_lck_grp_attr); trigger_vnode_lck_attr = lck_attr_alloc_init(); #endif + /* Allocate per fd vnode data lock attribute and group */ + fd_vn_lck_grp_attr = lck_grp_attr_alloc_init(); + fd_vn_lck_grp = lck_grp_alloc_init("fd_vnode_data", fd_vn_lck_grp_attr); + fd_vn_lck_attr = lck_attr_alloc_init(); /* Allocate fs config lock group attribute and group */ fsconf_lck_grp_attr= lck_grp_attr_alloc_init(); @@ -369,6 +383,12 @@ vfsinit(void) /* Allocate mount lock attribute */ mnt_lck_attr = lck_attr_alloc_init(); + /* Allocate sync lock */ + sync_mtx_lck_grp_attr = lck_grp_attr_alloc_init(); + sync_mtx_lck_grp = lck_grp_alloc_init("sync thread", sync_mtx_lck_grp_attr); + sync_mtx_lck_attr = lck_attr_alloc_init(); + sync_mtx_lck = lck_mtx_alloc_init(sync_mtx_lck_grp, sync_mtx_lck_attr); + /* * Initialize the vnode table */ @@ -399,7 +419,7 @@ vfsinit(void) * Initialize each file system type in the static list, * until the first NULL ->vfs_vfsops is encountered. */ - numused_vfsslots = maxtypenum = 0; + maxtypenum = VT_NON; for (vfsp = vfsconf, i = 0; i < maxvfsslots; i++, vfsp++) { struct vfsconf vfsc; if (vfsp->vfc_vfsops == (struct vfsops *)0) @@ -417,12 +437,26 @@ vfsinit(void) vfsc.vfc_reserved2 = 0; vfsc.vfc_reserved3 = 0; + if (vfsp->vfc_vfsops->vfs_sysctl) { + struct sysctl_oid *oidp = NULL; + struct sysctl_oid oid = SYSCTL_STRUCT_INIT(_vfs, vfsp->vfc_typenum, , CTLTYPE_NODE | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0, vfs_sysctl_node, "-", ""); + + MALLOC(oidp, struct sysctl_oid *, sizeof(struct sysctl_oid), M_TEMP, M_WAITOK); + *oidp = oid; + + /* Memory for VFS oid held by vfsentry forever */ + vfsp->vfc_sysctl = oidp; + oidp->oid_name = vfsp->vfc_name; + sysctl_register_oid(vfsp->vfc_sysctl); + } + (*vfsp->vfc_vfsops->vfs_init)(&vfsc); numused_vfsslots++; + numregistered_fses++; } /* next vfc_typenum to be used */ - maxvfsconf = maxtypenum; + maxvfstypenum = maxtypenum; /* * Initialize the vnop authorization scope. @@ -534,7 +568,16 @@ vfstable_add(struct vfstable *nvfsp) { int slot; struct vfstable *slotp, *allocated = NULL; + struct sysctl_oid *oidp = NULL; + + if (nvfsp->vfc_vfsops->vfs_sysctl) { + struct sysctl_oid oid = SYSCTL_STRUCT_INIT(_vfs, nvfsp->vfc_typenum, , CTLTYPE_NODE | CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0, vfs_sysctl_node, "-", ""); + + MALLOC(oidp, struct sysctl_oid *, sizeof(struct sysctl_oid), M_TEMP, M_WAITOK); + *oidp = oid; + } + /* * Find the next empty slot; we recognize an empty slot by a * NULL-valued ->vfc_vfsops, so if we delete a VFS, we must @@ -555,7 +598,6 @@ findslot: goto findslot; } else { slotp = allocated; - allocated = NULL; } } else { slotp = &vfsconf[slot]; @@ -575,11 +617,24 @@ findslot: } else { slotp->vfc_next = NULL; } - numused_vfsslots++; - mount_list_unlock(); + if (slotp != allocated) { + /* used a statically allocated slot */ + numused_vfsslots++; + } + numregistered_fses++; - if (allocated != NULL) { + if (oidp) { + /* Memory freed in vfstable_del after unregistration */ + slotp->vfc_sysctl = oidp; + oidp->oid_name = slotp->vfc_name; + sysctl_register_oid(slotp->vfc_sysctl); + } + + mount_list_unlock(); + + if (allocated && allocated != slotp) { + /* did allocation, but ended up using static slot */ FREE(allocated, M_TEMP); } @@ -616,12 +671,19 @@ vfstable_del(struct vfstable * vtbl) */ for( vcpp = &vfsconf; *vcpp; vcpp = &(*vcpp)->vfc_next) { if (*vcpp == vtbl) - break; - } + break; + } if (*vcpp == NULL) return(ESRCH); /* vtbl not on vfsconf list */ + if ((*vcpp)->vfc_sysctl) { + sysctl_unregister_oid((*vcpp)->vfc_sysctl); + (*vcpp)->vfc_sysctl->oid_name = NULL; + FREE((*vcpp)->vfc_sysctl, M_TEMP); + (*vcpp)->vfc_sysctl = NULL; + } + /* Unlink entry */ vcdelp = *vcpp; *vcpp = (*vcpp)->vfc_next; @@ -634,6 +696,7 @@ vfstable_del(struct vfstable * vtbl) if (vcdelp >= vfsconf && vcdelp < (vfsconf + maxvfsslots)) { /* Y */ /* Mark as empty for vfscon_add() */ bzero(vcdelp, sizeof(struct vfstable)); + numregistered_fses--; numused_vfsslots--; } else { /* N */ /* @@ -642,6 +705,7 @@ vfstable_del(struct vfstable * vtbl) * vfsconf onto our list, but it may not be persistent * because of the previous (copying) implementation. */ + numregistered_fses--; mount_list_unlock(); FREE(vcdelp, M_TEMP); mount_list_lock(); diff --git a/bsd/vfs/vfs_journal.c b/bsd/vfs/vfs_journal.c index d38556692..628e5e7dc 100644 --- a/bsd/vfs/vfs_journal.c +++ b/bsd/vfs/vfs_journal.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2012 Apple Inc. All rights reserved. + * Copyright (c) 2002-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -124,13 +124,13 @@ SYSCTL_INT(_vfs_generic_jnl_kdebug, OID_AUTO, trim, CTLFLAG_RW|CTLFLAG_LOCKED, & #if JOURNALING // -// By default, we grow the list of extents to trim by one page at a time. +// By default, we grow the list of extents to trim by 4K at a time. // We'll opt to flush a transaction if it contains at least // JOURNAL_FLUSH_TRIM_EXTENTS extents to be trimmed (even if the number // of modified blocks is small). // enum { - JOURNAL_DEFAULT_TRIM_BYTES = PAGE_SIZE, + JOURNAL_DEFAULT_TRIM_BYTES = 4096, JOURNAL_DEFAULT_TRIM_EXTENTS = JOURNAL_DEFAULT_TRIM_BYTES / sizeof(dk_extent_t), JOURNAL_FLUSH_TRIM_EXTENTS = JOURNAL_DEFAULT_TRIM_EXTENTS * 15 / 16 }; @@ -162,8 +162,6 @@ static int end_transaction(transaction *tr, int force_it, errno_t (*callback)(vo static void abort_transaction(journal *jnl, transaction *tr); static void dump_journal(journal *jnl); -static __inline__ void lock_journal(journal *jnl); -static __inline__ void unlock_journal(journal *jnl); static __inline__ void lock_oldstart(journal *jnl); static __inline__ void unlock_oldstart(journal *jnl); static __inline__ void lock_flush(journal *jnl); @@ -277,15 +275,20 @@ journal_init(void) jnl_mutex_group = lck_grp_alloc_init("jnl-mutex", jnl_group_attr); } -static __inline__ void -lock_journal(journal *jnl) +__inline__ void +journal_lock(journal *jnl) { lck_mtx_lock(&jnl->jlock); + if (jnl->owner) { + panic ("jnl: owner is %p, expected NULL\n", jnl->owner); + } + jnl->owner = current_thread(); } -static __inline__ void -unlock_journal(journal *jnl) +__inline__ void +journal_unlock(journal *jnl) { + jnl->owner = NULL; lck_mtx_unlock(&jnl->jlock); } @@ -335,6 +338,7 @@ do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction size_t io_sz = 0; buf_t bp; off_t max_iosize; + struct bufattr *bap; if (*offset < 0 || *offset > jnl->jhdr->size) { panic("jnl: do_jnl_io: bad offset 0x%llx (max 0x%llx)\n", *offset, jnl->jhdr->size); @@ -370,6 +374,20 @@ again: panic("jnl: request for i/o to jnl-header without JNL_HEADER flag set! (len %d, data %p)\n", curlen, data); } + /* + * As alluded to in the block comment at the top of the function, we use a "fake" iobuf + * here and issue directly to the disk device that the journal protects since we don't + * want this to enter the block cache. As a result, we lose the ability to mark it + * as a metadata buf_t for the layers below us that may care. If we were to + * simply attach the B_META flag into the b_flags this may confuse things further + * since this is an iobuf, not a metadata buffer. + * + * To address this, we use the extended bufattr struct embedded in the bp. + * Explicitly mark the buf here as a metadata buffer in its bufattr flags. + */ + bap = &bp->b_attr; + bap->ba_flags |= BA_META; + if (direction & JNL_READ) buf_setflags(bp, B_READ); else { @@ -572,9 +590,6 @@ buffer_flushed_callback(struct buf *bp, void *arg) CHECK_TRANSACTION(tr); jnl = tr->jnl; - if (jnl->flags & JOURNAL_INVALID) { - return; - } CHECK_JOURNAL(jnl); @@ -615,6 +630,9 @@ buffer_flushed_callback(struct buf *bp, void *arg) // cleanup for this transaction tr->total_bytes = 0xfbadc0de; + if (jnl->flags & JOURNAL_INVALID) + goto transaction_done; + //printf("jnl: tr 0x%x (0x%llx 0x%llx) in jnl 0x%x completed.\n", // tr, tr->journal_start, tr->journal_end, jnl); @@ -710,6 +728,7 @@ buffer_flushed_callback(struct buf *bp, void *arg) tr->next = jnl->tr_freeme; jnl->tr_freeme = tr; } +transaction_done: unlock_oldstart(jnl); unlock_condition(jnl, &jnl->asyncIO); @@ -1470,7 +1489,7 @@ bad_replay: #define DEFAULT_TRANSACTION_BUFFER_SIZE (128*1024) -#define MAX_TRANSACTION_BUFFER_SIZE (2048*1024) +#define MAX_TRANSACTION_BUFFER_SIZE (3072*1024) // XXXdbg - so I can change it in the debugger int def_tbuffer_size = 0; @@ -1489,14 +1508,14 @@ size_up_tbuffer(journal *jnl, int tbuffer_size, int phys_blksz) // there is in the machine. // if (def_tbuffer_size == 0) { - if (mem_size < (256*1024*1024)) { + if (max_mem < (256*1024*1024)) { def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE; - } else if (mem_size < (512*1024*1024)) { + } else if (max_mem < (512*1024*1024)) { def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * 2; - } else if (mem_size < (1024*1024*1024)) { + } else if (max_mem < (1024*1024*1024)) { def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * 3; } else { - def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * (mem_size / (256*1024*1024)); + def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * (max_mem / (256*1024*1024)); } } @@ -1534,8 +1553,6 @@ size_up_tbuffer(journal *jnl, int tbuffer_size, int phys_blksz) } } - - static void get_io_info(struct vnode *devvp, size_t phys_blksz, journal *jnl, struct vfs_context *context) { @@ -2196,7 +2213,7 @@ journal_close(journal *jnl) jnl->flags |= JOURNAL_CLOSE_PENDING; if (jnl->owner != current_thread()) { - lock_journal(jnl); + journal_lock(jnl); } wait_condition(jnl, &jnl->flushing, "journal_close"); @@ -2274,6 +2291,7 @@ journal_close(journal *jnl) } } } + wait_condition(jnl, &jnl->asyncIO, "journal_close"); free_old_stuff(jnl); @@ -2286,7 +2304,7 @@ journal_close(journal *jnl) vnode_putname_printable(jnl->jdev_name); - unlock_journal(jnl); + journal_unlock(jnl); lck_mtx_destroy(&jnl->old_start_lock, jnl_mutex_group); lck_mtx_destroy(&jnl->jlock, jnl_mutex_group); lck_mtx_destroy(&jnl->flock, jnl_mutex_group); @@ -2460,7 +2478,19 @@ static errno_t journal_allocate_transaction(journal *jnl) { transaction *tr; + boolean_t was_vm_privileged; + if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * the disk driver can allocate memory on this path... + * if we block waiting for memory, and there is enough pressure to + * cause us to try and create a new swap file, we may end up deadlocking + * due to waiting for the journal on the swap file creation path... + * by making ourselves vm_privileged, we give ourselves the best chance + * of not blocking + */ + was_vm_privileged = set_vm_privilege(TRUE); + } MALLOC_ZONE(tr, transaction *, sizeof(transaction), M_JNL_TR, M_WAITOK); memset(tr, 0, sizeof(transaction)); @@ -2471,6 +2501,8 @@ journal_allocate_transaction(journal *jnl) jnl->active_tr = NULL; return ENOMEM; } + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); // journal replay code checksum check depends on this. memset(tr->tbuffer, 0, BLHDR_CHECKSUM_SIZE); @@ -2513,14 +2545,14 @@ journal_start_transaction(journal *jnl) jnl->nested_count++; return 0; } - lock_journal(jnl); - if (jnl->owner != NULL || jnl->nested_count != 0 || jnl->active_tr != NULL) { + journal_lock(jnl); + + if (jnl->nested_count != 0 || jnl->active_tr != NULL) { panic("jnl: start_tr: owner %p, nested count %d, active_tr %p jnl @ %p\n", jnl->owner, jnl->nested_count, jnl->active_tr, jnl); } - jnl->owner = current_thread(); jnl->nested_count = 1; #if JOE @@ -2558,9 +2590,8 @@ journal_start_transaction(journal *jnl) return 0; bad_start: - jnl->owner = NULL; jnl->nested_count = 0; - unlock_journal(jnl); + journal_unlock(jnl); return ret; } @@ -2858,7 +2889,7 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(buf_t bp, vo blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp)); blhdr->binfo[i].u.bp = bp; - KERNEL_DEBUG_CONSTANT(0x3018004, vp, blhdr->binfo[i].bnum, bsize, 0, 0); + KERNEL_DEBUG_CONSTANT(0x3018004, VM_KERNEL_ADDRPERM(vp), blhdr->binfo[i].bnum, bsize, 0, 0); if (func) { void (*old_func)(buf_t, void *)=NULL, *old_arg=NULL; @@ -3006,16 +3037,31 @@ journal_trim_set_callback(journal *jnl, jnl_trim_callback_t callback, void *arg) ;________________________________________________________________________________ */ static int -trim_realloc(struct jnl_trim_list *trim) +trim_realloc(journal *jnl, struct jnl_trim_list *trim) { void *new_extents; uint32_t new_allocated_count; + boolean_t was_vm_privileged; if (jnl_kdebug) - KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_START, trim, 0, trim->allocated_count, trim->extent_count, 0); + KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_START, VM_KERNEL_ADDRPERM(trim), 0, trim->allocated_count, trim->extent_count, 0); new_allocated_count = trim->allocated_count + JOURNAL_DEFAULT_TRIM_EXTENTS; + + if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * if we block waiting for memory, and there is enough pressure to + * cause us to try and create a new swap file, we may end up deadlocking + * due to waiting for the journal on the swap file creation path... + * by making ourselves vm_privileged, we give ourselves the best chance + * of not blocking + */ + was_vm_privileged = set_vm_privilege(TRUE); + } new_extents = kalloc(new_allocated_count * sizeof(dk_extent_t)); + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); + if (new_extents == NULL) { printf("jnl: trim_realloc: unable to grow extent list!\n"); /* @@ -3143,7 +3189,7 @@ journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length) CHECK_TRANSACTION(tr); if (jnl_kdebug) - KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_START, jnl, offset, length, tr->trim.extent_count, 0); + KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_START, VM_KERNEL_ADDRPERM(jnl), offset, length, tr->trim.extent_count, 0); if (jnl->owner != current_thread()) { panic("jnl: trim_add_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n", @@ -3179,7 +3225,7 @@ journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length) if (replace_count == 0) { /* If the list was already full, we need to grow it. */ if (tr->trim.extent_count == tr->trim.allocated_count) { - if (trim_realloc(&tr->trim) != 0) { + if (trim_realloc(jnl, &tr->trim) != 0) { printf("jnl: trim_add_extent: out of memory!"); if (jnl_kdebug) KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_END, ENOMEM, 0, 0, tr->trim.extent_count, 0); @@ -3347,7 +3393,7 @@ journal_request_immediate_flush (journal *jnl) { ;________________________________________________________________________________ */ static int -trim_remove_extent(struct jnl_trim_list *trim, uint64_t offset, uint64_t length) +trim_remove_extent(journal *jnl, struct jnl_trim_list *trim, uint64_t offset, uint64_t length) { u_int64_t end; dk_extent_t *extent; @@ -3394,7 +3440,7 @@ trim_remove_extent(struct jnl_trim_list *trim, uint64_t offset, uint64_t length) if (keep_before > keep_after) { /* If the list was already full, we need to grow it. */ if (trim->extent_count == trim->allocated_count) { - if (trim_realloc(trim) != 0) { + if (trim_realloc(jnl, trim) != 0) { printf("jnl: trim_remove_extent: out of memory!"); return ENOMEM; } @@ -3490,7 +3536,7 @@ journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length) CHECK_TRANSACTION(tr); if (jnl_kdebug) - KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE | DBG_FUNC_START, jnl, offset, length, tr->trim.extent_count, 0); + KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE | DBG_FUNC_START, VM_KERNEL_ADDRPERM(jnl), offset, length, tr->trim.extent_count, 0); if (jnl->owner != current_thread()) { panic("jnl: trim_remove_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n", @@ -3499,7 +3545,7 @@ journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length) free_old_stuff(jnl); - error = trim_remove_extent(&tr->trim, offset, length); + error = trim_remove_extent(jnl, &tr->trim, offset, length); if (error == 0) { int found = FALSE; @@ -3522,10 +3568,10 @@ journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length) uint32_t async_extent_count = 0; if (jnl_kdebug) - KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE_PENDING | DBG_FUNC_START, jnl, offset, length, 0, 0); + KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE_PENDING | DBG_FUNC_START, VM_KERNEL_ADDRPERM(jnl), offset, length, 0, 0); lck_rw_lock_exclusive(&jnl->trim_lock); if (jnl->async_trim != NULL) { - error = trim_remove_extent(jnl->async_trim, offset, length); + error = trim_remove_extent(jnl, jnl->async_trim, offset, length); async_extent_count = jnl->async_trim->extent_count; } lck_rw_unlock_exclusive(&jnl->trim_lock); @@ -3544,10 +3590,22 @@ static int journal_trim_flush(journal *jnl, transaction *tr) { int errno = 0; + boolean_t was_vm_privileged; if (jnl_kdebug) - KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_FLUSH | DBG_FUNC_START, jnl, tr, 0, tr->trim.extent_count, 0); + KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_FLUSH | DBG_FUNC_START, VM_KERNEL_ADDRPERM(jnl), tr, 0, tr->trim.extent_count, 0); + if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * the disk driver can allocate memory on this path... + * if we block waiting for memory, and there is enough pressure to + * cause us to try and create a new swap file, we may end up deadlocking + * due to waiting for the journal on the swap file creation path... + * by making ourselves vm_privileged, we give ourselves the best chance + * of not blocking + */ + was_vm_privileged = set_vm_privilege(TRUE); + } lck_rw_lock_shared(&jnl->trim_lock); if (tr->trim.extent_count > 0) { dk_unmap_t unmap; @@ -3557,7 +3615,7 @@ journal_trim_flush(journal *jnl, transaction *tr) unmap.extents = tr->trim.extents; unmap.extentsCount = tr->trim.extent_count; if (jnl_kdebug) - KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_UNMAP | DBG_FUNC_START, jnl, tr, 0, tr->trim.extent_count, 0); + KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_UNMAP | DBG_FUNC_START, VM_KERNEL_ADDRPERM(jnl), tr, 0, tr->trim.extent_count, 0); errno = VNOP_IOCTL(jnl->fsdev, DKIOCUNMAP, (caddr_t)&unmap, FWRITE, vfs_context_kernel()); if (jnl_kdebug) KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_UNMAP | DBG_FUNC_END, errno, 0, 0, 0, 0); @@ -3577,6 +3635,8 @@ journal_trim_flush(journal *jnl, transaction *tr) } lck_rw_unlock_shared(&jnl->trim_lock); + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); /* * If the transaction we're flushing was the async transaction, then * tell the current transaction that there is no pending trim @@ -3914,8 +3974,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void must_wait = TRUE; if (drop_lock_early == TRUE) { - jnl->owner = NULL; - unlock_journal(jnl); + journal_unlock(jnl); drop_lock = FALSE; } if (must_wait == TRUE) @@ -3933,8 +3992,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void KERNEL_DEBUG(0xbbbbc018|DBG_FUNC_END, jnl, tr, ret_val, 0, 0); done: if (drop_lock == TRUE) { - jnl->owner = NULL; - unlock_journal(jnl); + journal_unlock(jnl); } return (ret_val); } @@ -3987,6 +4045,7 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba end = jnl->jhdr->end; for (blhdr = tr->blhdr; blhdr; blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) { + boolean_t was_vm_privileged; amt = blhdr->bytes_used; @@ -3995,9 +4054,22 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba blhdr->checksum = 0; blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE); + if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * if we block waiting for memory, and there is enough pressure to + * cause us to try and create a new swap file, we may end up deadlocking + * due to waiting for the journal on the swap file creation path... + * by making ourselves vm_privileged, we give ourselves the best chance + * of not blocking + */ + was_vm_privileged = set_vm_privilege(TRUE); + } if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *))) { panic("can't allocate %zd bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *)); } + if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE)) + set_vm_privilege(FALSE); + tbuffer_offset = jnl->jhdr->blhdr_size; for (i = 1; i < blhdr->num_blocks; i++) { @@ -4228,13 +4300,13 @@ bad_journal: jnl->flush_aborted = TRUE; unlock_condition(jnl, &jnl->flushing); - lock_journal(jnl); + journal_lock(jnl); jnl->flags |= JOURNAL_INVALID; jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] &= ~0x8000000000000000LL; abort_transaction(jnl, tr); // cleans up list of extents to be trimmed - unlock_journal(jnl); + journal_unlock(jnl); } else unlock_condition(jnl, &jnl->flushing); @@ -4438,8 +4510,7 @@ journal_end_transaction(journal *jnl) abort_transaction(jnl, tr); } - jnl->owner = NULL; - unlock_journal(jnl); + journal_unlock(jnl); return EINVAL; } @@ -4513,7 +4584,7 @@ journal_flush(journal *jnl, boolean_t wait_for_IO) KERNEL_DEBUG(DBG_JOURNAL_FLUSH | DBG_FUNC_START, jnl, 0, 0, 0, 0); if (jnl->owner != current_thread()) { - lock_journal(jnl); + journal_lock(jnl); drop_lock = TRUE; } @@ -4537,7 +4608,7 @@ journal_flush(journal *jnl, boolean_t wait_for_IO) } else { if (drop_lock == TRUE) { - unlock_journal(jnl); + journal_unlock(jnl); } /* Because of pipelined journal, the journal transactions @@ -4850,4 +4921,47 @@ journal_owner(__unused journal *jnl) { return NULL; } + +void +journal_lock(__unused journal *jnl) +{ + return; +} + +void +journal_unlock(__unused journal *jnl) +{ + return; +} + +__private_extern__ int +journal_trim_add_extent(__unused journal *jnl, + __unused uint64_t offset, + __unused uint64_t length) +{ + return 0; +} + +int +journal_request_immediate_flush(__unused journal *jnl) +{ + return 0; +} + +__private_extern__ int +journal_trim_remove_extent(__unused journal *jnl, + __unused uint64_t offset, + __unused uint64_t length) +{ + return 0; +} + +int journal_trim_extent_overlap(__unused journal *jnl, + __unused uint64_t offset, + __unused uint64_t length, + __unused uint64_t *end) +{ + return 0; +} + #endif // !JOURNALING diff --git a/bsd/vfs/vfs_journal.h b/bsd/vfs/vfs_journal.h index 0e7950ebb..5b9578b37 100644 --- a/bsd/vfs/vfs_journal.h +++ b/bsd/vfs/vfs_journal.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -333,7 +333,7 @@ int journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length) void journal_trim_set_callback(journal *jnl, jnl_trim_callback_t callback, void *arg); int journal_trim_extent_overlap (journal *jnl, uint64_t offset, uint64_t length, uint64_t *end); /* Mark state in the journal that requests an immediate journal flush upon txn completion */ -int journal_request_immediate_flush (journal *jnl); +int journal_request_immediate_flush (journal *jnl); #endif int journal_end_transaction(journal *jnl); @@ -341,6 +341,8 @@ int journal_active(journal *jnl); int journal_flush(journal *jnl, boolean_t wait_for_IO); void *journal_owner(journal *jnl); // compare against current_thread() int journal_uses_fua(journal *jnl); +void journal_lock(journal *jnl); +void journal_unlock(journal *jnl); /* diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index 784c835fc..579542e16 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -112,17 +112,18 @@ static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, v #define MAX_VOLFS_RESTARTS 5 #endif -boolean_t lookup_continue_ok(struct nameidata *ndp); -int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx); -int lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx); -int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx); -void lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation); -int lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx); -int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, +static int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx); +static int lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx); +static int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx); +static void lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation); +static int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, int vbusyflags, int *keep_going, int nc_generation, int wantparent, int atroot, vfs_context_t ctx); -int lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent); +static int lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent); +#if NAMEDRSRCFORK +static int lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx); +#endif @@ -456,7 +457,7 @@ namei_compound_available(vnode_t dp, struct nameidata *ndp) return 0; } -int +static int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx) { #if !CONFIG_MACF @@ -479,7 +480,7 @@ lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized return 0; } -void +static void lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation) { int isdot_or_dotdot; @@ -524,7 +525,7 @@ lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, * data fork if requested. On failure, returns with iocount data fork (always) and its parent directory * (if one was provided). */ -int +static int lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx) { vnode_t svp = NULLVP; @@ -612,7 +613,7 @@ out: * --In the event of an error, may return with ni_dvp NULL'ed out (in which case, iocount * was dropped). */ -int +static int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, int vbusyflags, int *keep_going, int nc_generation, int wantparent, int atroot, vfs_context_t ctx) @@ -797,7 +798,7 @@ out: /* * Comes in iocount on ni_vp. May overwrite ni_dvp, but doesn't interpret incoming value. */ -int +static int lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent) { vnode_t dp; @@ -924,9 +925,8 @@ lookup(struct nameidata *ndp) /* * Setup: break out flag bits into variables. */ - if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) { - if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE)) - docache = 0; + if (cnp->cn_flags & NOCACHE) { + docache = 0; } wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); rdonly = cnp->cn_flags & RDONLY; @@ -1305,16 +1305,14 @@ lookup_validate_creation_path(struct nameidata *ndp) /* * Modifies only ni_vp. Always returns with ni_vp still valid (iocount held). */ -int +static int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx) { mount_t mp; vnode_t tdp; int error = 0; - uthread_t uth; uint32_t depth = 0; - int dont_cache_mp = 0; vnode_t mounted_on_dp; int current_mount_generation = 0; @@ -1353,25 +1351,7 @@ lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vn continue; } - - /* - * XXX - if this is the last component of the - * pathname, and it's either not a lookup operation - * or the NOTRIGGER flag is set for the operation, - * set a uthread flag to let VFS_ROOT() for autofs - * know it shouldn't trigger a mount. - */ - uth = (struct uthread *)get_bsdthread_info(current_thread()); - if ((cnp->cn_flags & ISLASTCN) && - (cnp->cn_nameiop != LOOKUP || - (cnp->cn_flags & NOTRIGGER))) { - uth->uu_notrigger = 1; - dont_cache_mp = 1; - } - error = VFS_ROOT(mp, &tdp, ctx); - /* XXX - clear the uthread flag */ - uth->uu_notrigger = 0; mount_dropcrossref(mp, dp, 0); vfs_unbusy(mp); @@ -1402,7 +1382,7 @@ lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vn } } - if (depth && !dont_cache_mp) { + if (depth) { mp = mounted_on_dp->v_mountedhere; if (mp) { @@ -1424,7 +1404,7 @@ out: * Takes ni_vp and ni_dvp non-NULL. Returns with *new_dp set to the location * at which to start a lookup with a resolved path, and all other iocounts dropped. */ -int +static int lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx) { int error; diff --git a/bsd/vfs/vfs_quota.c b/bsd/vfs/vfs_quota.c index 4edc2115b..2546e809a 100644 --- a/bsd/vfs/vfs_quota.c +++ b/bsd/vfs/vfs_quota.c @@ -1094,6 +1094,7 @@ munge_dqblk(struct dqblk *dqblkp, struct user_dqblk *user_dqblkp, boolean_t to64 user_dqblkp->dqb_btime = dqblkp->dqb_btime; } else { + /* munge user (64 bit) dqblk into kernel (32 bit) dqblk */ bcopy((caddr_t)user_dqblkp, (caddr_t)dqblkp, offsetof(struct dqblk, dqb_btime)); dqblkp->dqb_id = user_dqblkp->dqb_id; diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index 3339f7910..a4e732070 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -128,7 +128,7 @@ #include /* kalloc()/kfree() */ #include /* delay_for_interval() */ #include /* OSAddAtomic() */ - +#include #ifdef JOE_DEBUG #include @@ -140,6 +140,8 @@ #include #endif +#define PANIC_PRINTS_VNODES + extern lck_grp_t *vnode_lck_grp; extern lck_attr_t *vnode_lck_attr; @@ -168,6 +170,8 @@ extern void memory_object_mark_unused( memory_object_control_t control, boolean_t rage); +extern void memory_object_mark_io_tracking( + memory_object_control_t control); /* XXX next protptype should be from */ extern int nfs_vinvalbuf(vnode_t, int, vfs_context_t, int); @@ -204,6 +208,8 @@ static vnode_t checkalias(vnode_t vp, dev_t nvp_rdev); static int vnode_reload(vnode_t); static int vnode_isinuse_locked(vnode_t, int, int); +static int unmount_callback(mount_t, __unused void *); + static void insmntque(vnode_t vp, mount_t mp); static int mount_getvfscnt(void); static int mount_fillfsids(fsid_t *, int ); @@ -512,28 +518,31 @@ vnode_umount_preflight(mount_t mp, vnode_t skipvp, int flags) vnode_t vp; TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { - /* disable preflight only for udf, a hack to be removed after 4073176 is fixed */ - if (vp->v_tag == VT_UDF) - return 0; if (vp->v_type == VDIR) continue; if (vp == skipvp) continue; - if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || - (vp->v_flag & VNOFLUSH))) + if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) continue; if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) continue; - if ((flags & WRITECLOSE) && - (vp->v_writecount == 0 || vp->v_type != VREG)) + if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) continue; + /* Look for busy vnode */ - if (((vp->v_usecount != 0) && - ((vp->v_usecount - vp->v_kusecount) != 0))) - return(1); + if ((vp->v_usecount != 0) && ((vp->v_usecount - vp->v_kusecount) != 0)) { + return 1; + + } else if (vp->v_iocount > 0) { + /* Busy if iocount is > 0 for more than 3 seconds */ + tsleep(&vp->v_iocount, PVFS, "vnode_drain_network", 3 * hz); + if (vp->v_iocount > 0) + return 1; + continue; } + } - return(0); + return 0; } /* @@ -616,6 +625,39 @@ vnode_iterate_clear(mount_t mp) } +#include + +struct vnode_iterate_panic_hook { + panic_hook_t hook; + mount_t mp; + struct vnode *vp; +}; + +static void vnode_iterate_panic_hook(panic_hook_t *hook_) +{ + extern int kdb_log(const char *fmt, ...); + struct vnode_iterate_panic_hook *hook = (struct vnode_iterate_panic_hook *)hook_; + panic_phys_range_t range; + uint64_t phys; + + if (panic_phys_range_before(hook->mp, &phys, &range)) { + kdb_log("mp = %p, phys = %p, prev (%p: %p-%p)\n", + hook->mp, phys, range.type, range.phys_start, + range.phys_start + range.len); + } else { + kdb_log("mp = %p, phys = %p, prev (!)\n", hook->mp, phys); + } + + if (panic_phys_range_before(hook->vp, &phys, &range)) { + kdb_log("vp = %p, phys = %p, prev (%p: %p-%p)\n", + hook->mp, phys, range.type, range.phys_start, + range.phys_start + range.len); + } else { + kdb_log("vp = %p, phys = %p, prev (!)\n", hook->vp, phys); + } + panic_dump_mem((void *)(((vm_offset_t)hook->mp -4096) & ~4095), 12288); +} + int vnode_iterate(mount_t mp, int flags, int (*callout)(struct vnode *, void *), void *arg) @@ -636,10 +678,15 @@ vnode_iterate(mount_t mp, int flags, int (*callout)(struct vnode *, void *), mount_unlock(mp); return(ret); } - + + struct vnode_iterate_panic_hook hook; + hook.mp = mp; + hook.vp = NULL; + panic_hook(&hook.hook, vnode_iterate_panic_hook); /* iterate over all the vnodes */ while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { vp = TAILQ_FIRST(&mp->mnt_workerqueue); + hook.vp = vp; TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); vid = vp->v_id; @@ -690,6 +737,7 @@ vnode_iterate(mount_t mp, int flags, int (*callout)(struct vnode *, void *), } out: + panic_unhook(&hook.hook); (void)vnode_iterate_reloadq(mp); vnode_iterate_clear(mp); mount_unlock(mp); @@ -854,37 +902,34 @@ vfs_busy(mount_t mp, int flags) restart: if (mp->mnt_lflag & MNT_LDEAD) - return(ENOENT); + return (ENOENT); - if (mp->mnt_lflag & MNT_LUNMOUNT) { - if (flags & LK_NOWAIT) - return (ENOENT); - - mount_lock(mp); + mount_lock(mp); - if (mp->mnt_lflag & MNT_LDEAD) { + if (mp->mnt_lflag & MNT_LUNMOUNT) { + if (flags & LK_NOWAIT || mp->mnt_lflag & MNT_LDEAD) { mount_unlock(mp); - return(ENOENT); - } - if (mp->mnt_lflag & MNT_LUNMOUNT) { - mp->mnt_lflag |= MNT_LWAIT; - /* - * Since all busy locks are shared except the exclusive - * lock granted when unmounting, the only place that a - * wakeup needs to be done is at the release of the - * exclusive lock at the end of dounmount. - */ - msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "vfsbusy", NULL); return (ENOENT); } - mount_unlock(mp); + + /* + * Since all busy locks are shared except the exclusive + * lock granted when unmounting, the only place that a + * wakeup needs to be done is at the release of the + * exclusive lock at the end of dounmount. + */ + mp->mnt_lflag |= MNT_LWAIT; + msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "vfsbusy", NULL); + return (ENOENT); } + mount_unlock(mp); + lck_rw_lock_shared(&mp->mnt_rwlock); /* - * until we are granted the rwlock, it's possible for the mount point to - * change state, so reevaluate before granting the vfs_busy + * Until we are granted the rwlock, it's possible for the mount point to + * change state, so re-evaluate before granting the vfs_busy. */ if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) { lck_rw_done(&mp->mnt_rwlock); @@ -896,7 +941,6 @@ restart: /* * Free a busy filesystem. */ - void vfs_unbusy(mount_t mp) { @@ -970,7 +1014,7 @@ vfs_rootmountalloc_internal(struct vfstable *vfsp, const char *devname) vfsp->vfc_refcount++; mount_list_unlock(); - strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); + strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); mp->mnt_vfsstat.f_mntonname[0] = '/'; /* XXX const poisoning layering violation */ (void) copystr((const void *)devname, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, NULL); @@ -1087,6 +1131,12 @@ vfs_mountroot(void) mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT; } + uint32_t speed; + + if (MNTK_VIRTUALDEV & mp->mnt_kern_flag) speed = 128; + else if (MNTK_SSD & mp->mnt_kern_flag) speed = 7*256; + else speed = 256; + vc_progress_setdiskspeed(speed); /* * Probe root file system for additional features. */ @@ -1369,6 +1419,7 @@ bdevvp(dev_t dev, vnode_t *vpp) return (0); } + /* * Check to see if the new vnode represents a special device * for which we already have a vnode (either because of @@ -1591,7 +1642,7 @@ out: } -static boolean_t +boolean_t vnode_on_reliable_media(vnode_t vp) { if ( !(vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) && (vp->v_mount->mnt_flag & MNT_LOCAL) ) @@ -1788,8 +1839,8 @@ vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked) panic("vnode_rele_ext: vp %p usecount -ve : %d. v_tag = %d, v_type = %d, v_flag = %x.", vp, vp->v_usecount, vp->v_tag, vp->v_type, vp->v_flag); if (fmode & FWRITE) { - if (--vp->v_writecount < 0) - panic("vnode_rele_ext: vp %p writecount -ve : %d. v_tag = %d, v_type = %d, v_flag = %x.", vp, vp->v_writecount, vp->v_tag, vp->v_type, vp->v_flag); + if (--vp->v_writecount < 0) + panic("vnode_rele_ext: vp %p writecount -ve : %d. v_tag = %d, v_type = %d, v_flag = %x.", vp, vp->v_writecount, vp->v_tag, vp->v_type, vp->v_flag); } if (fmode & O_EVTONLY) { if (--vp->v_kusecount < 0) @@ -1799,39 +1850,38 @@ vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked) panic("vnode_rele_ext: vp %p kusecount(%d) out of balance with usecount(%d). v_tag = %d, v_type = %d, v_flag = %x.",vp, vp->v_kusecount, vp->v_usecount, vp->v_tag, vp->v_type, vp->v_flag); if ((vp->v_iocount > 0) || (vp->v_usecount > 0)) { - /* + /* * vnode is still busy... if we're the last * usecount, mark for a future call to VNOP_INACTIVE * when the iocount finally drops to 0 */ - if (vp->v_usecount == 0) { - vp->v_lflag |= VL_NEEDINACTIVE; + if (vp->v_usecount == 0) { + vp->v_lflag |= VL_NEEDINACTIVE; vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF | VOPENEVT); } goto done; } vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF | VOPENEVT); - if ( (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) || dont_reenter) { - /* + if (ISSET(vp->v_lflag, VL_TERMINATE | VL_DEAD) || dont_reenter) { + /* * vnode is being cleaned, or * we've requested that we don't reenter - * the filesystem on this release... in - * this case, we'll mark the vnode aged - * if it's been marked for termination + * the filesystem on this release...in + * the latter case, we'll mark the vnode aged */ - if (dont_reenter) { - if ( !(vp->v_lflag & (VL_TERMINATE | VL_DEAD | VL_MARKTERM)) ) { - vp->v_lflag |= VL_NEEDINACTIVE; + if (dont_reenter) { + if ( !(vp->v_lflag & (VL_TERMINATE | VL_DEAD | VL_MARKTERM)) ) { + vp->v_lflag |= VL_NEEDINACTIVE; - if (vnode_on_reliable_media(vp) == FALSE) { + if (vnode_on_reliable_media(vp) == FALSE || vp->v_flag & VISDIRTY) { vnode_async_list_add(vp); goto done; } } vp->v_flag |= VAGE; } - vnode_list_add(vp); + vnode_list_add(vp); goto done; } @@ -1899,9 +1949,6 @@ done: */ #if DIAGNOSTIC int busyprt = 0; /* print out busy vnodes */ -#if 0 -struct ctldebug debug1 = { "busyprt", &busyprt }; -#endif /* 0 */ #endif int @@ -2188,6 +2235,8 @@ vclean(vnode_t vp, int flags) * Destroy ubc named reference * cluster_release is done on this path * along with dropping the reference on the ucred + * (and in the case of forced unmount of an mmap-ed file, + * the ubc reference on the vnode is dropped here too). */ ubc_destroy_named(vp); @@ -2216,6 +2265,7 @@ vclean(vnode_t vp, int flags) vp->v_data = NULL; vp->v_lflag |= VL_DEAD; + vp->v_flag &= ~VISDIRTY; if (already_terminating == 0) { vp->v_lflag &= ~VL_TERMINATE; @@ -2272,15 +2322,25 @@ vn_revoke(vnode_t vp, __unused int flags, __unused vfs_context_t a_context) SPECHASH_LOCK(); break; } - vnode_reclaim_internal(vq, 0, 1, 0); - vnode_put(vq); + vnode_lock(vq); + if (!(vq->v_lflag & VL_TERMINATE)) { + vnode_reclaim_internal(vq, 1, 1, 0); + } + vnode_put_locked(vq); + vnode_unlock(vq); SPECHASH_LOCK(); break; } } SPECHASH_UNLOCK(); } - vnode_reclaim_internal(vp, 0, 0, REVOKEALL); + vnode_lock(vp); + if (vp->v_lflag & VL_TERMINATE) { + vnode_unlock(vp); + return (ENOENT); + } + vnode_reclaim_internal(vp, 1, 0, REVOKEALL); + vnode_unlock(vp); return (0); } @@ -2715,136 +2775,141 @@ int vn_searchfs_inappropriate_name(const char *name, int len) { */ extern unsigned int vfs_nummntops; +/* + * The VFS_NUMMNTOPS shouldn't be at name[1] since + * is a VFS generic variable. Since we no longer support + * VT_UFS, we reserve its value to support this sysctl node. + * + * It should have been: + * name[0]: VFS_GENERIC + * name[1]: VFS_NUMMNTOPS + */ +SYSCTL_INT(_vfs, VFS_NUMMNTOPS, nummntops, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + &vfs_nummntops, 0, ""); + +int +vfs_sysctl(int *name __unused, u_int namelen __unused, + user_addr_t oldp __unused, size_t *oldlenp __unused, + user_addr_t newp __unused, size_t newlen __unused, proc_t p __unused); + int -vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, proc_t p) +vfs_sysctl(int *name __unused, u_int namelen __unused, + user_addr_t oldp __unused, size_t *oldlenp __unused, + user_addr_t newp __unused, size_t newlen __unused, proc_t p __unused) { - struct vfstable *vfsp; - int *username; - u_int usernamelen; - int error; - struct vfsconf vfsc; + return (EINVAL); +} - if (namelen > CTL_MAXNAME) { - return (EINVAL); - } - /* All non VFS_GENERIC and in VFS_GENERIC, - * VFS_MAXTYPENUM, VFS_CONF, VFS_SET_PACKAGE_EXTS - * needs to have root priv to have modifiers. - * For rest the userland_sysctl(CTLFLAG_ANYBODY) would cover. - */ - if ((newp != USER_ADDR_NULL) && ((name[0] != VFS_GENERIC) || - ((name[1] == VFS_MAXTYPENUM) || - (name[1] == VFS_CONF) || - (name[1] == VFS_SET_PACKAGE_EXTS))) - && (error = suser(kauth_cred_get(), &p->p_acflag))) { - return(error); - } - /* - * The VFS_NUMMNTOPS shouldn't be at name[0] since - * is a VFS generic variable. So now we must check - * namelen so we don't end up covering any UFS - * variables (sinc UFS vfc_typenum is 1). - * - * It should have been: - * name[0]: VFS_GENERIC - * name[1]: VFS_NUMMNTOPS - */ - if (namelen == 1 && name[0] == VFS_NUMMNTOPS) { - return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops)); - } +// +// The following code disallows specific sysctl's that came through +// the direct sysctl interface (vfs_sysctl_node) instead of the newer +// sysctl_vfs_ctlbyfsid() interface. We can not allow these selectors +// through vfs_sysctl_node() because it passes the user's oldp pointer +// directly to the file system which (for these selectors) casts it +// back to a struct sysctl_req and then proceed to use SYSCTL_IN() +// which jumps through an arbitrary function pointer. When called +// through the sysctl_vfs_ctlbyfsid() interface this does not happen +// and so it's safe. +// +// Unfortunately we have to pull in definitions from AFP and SMB and +// perform explicit name checks on the file system to determine if +// these selectors are being used. +// - /* all sysctl names at this level are at least name and field */ - if (namelen < 2) - return (EISDIR); /* overloaded */ - if (name[0] != VFS_GENERIC) { +#define AFPFS_VFS_CTL_GETID 0x00020001 +#define AFPFS_VFS_CTL_NETCHANGE 0x00020002 +#define AFPFS_VFS_CTL_VOLCHANGE 0x00020003 - mount_list_lock(); - for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) - if (vfsp->vfc_typenum == name[0]) { - vfsp->vfc_refcount++; - break; - } - mount_list_unlock(); +#define SMBFS_SYSCTL_REMOUNT 1 +#define SMBFS_SYSCTL_REMOUNT_INFO 2 +#define SMBFS_SYSCTL_GET_SERVER_SHARE 3 - if (vfsp == NULL) - return (ENOTSUP); - /* XXX current context proxy for proc p? */ - error = ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, - oldp, oldlenp, newp, newlen, - vfs_context_current())); +static int +is_bad_sysctl_name(struct vfstable *vfsp, int selector_name) +{ + switch(selector_name) { + case VFS_CTL_QUERY: + case VFS_CTL_TIMEO: + case VFS_CTL_NOLOCKS: + case VFS_CTL_NSTATUS: + case VFS_CTL_SADDR: + case VFS_CTL_DISC: + case VFS_CTL_SERVERINFO: + return 1; + break; - mount_list_lock(); - vfsp->vfc_refcount--; - mount_list_unlock(); - return error; + default: + break; } - switch (name[1]) { - case VFS_MAXTYPENUM: - return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); - case VFS_CONF: - if (namelen < 3) - return (ENOTDIR); /* overloaded */ - mount_list_lock(); - for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) - if (vfsp->vfc_typenum == name[2]) + // the more complicated check for some of SMB's special values + if (strcmp(vfsp->vfc_name, "smbfs") == 0) { + switch(selector_name) { + case SMBFS_SYSCTL_REMOUNT: + case SMBFS_SYSCTL_REMOUNT_INFO: + case SMBFS_SYSCTL_GET_SERVER_SHARE: + return 1; + } + } else if (strcmp(vfsp->vfc_name, "afpfs") == 0) { + switch(selector_name) { + case AFPFS_VFS_CTL_GETID: + case AFPFS_VFS_CTL_NETCHANGE: + case AFPFS_VFS_CTL_VOLCHANGE: + return 1; break; - - if (vfsp == NULL) { - mount_list_unlock(); - return (ENOTSUP); } - - vfsc.vfc_reserved1 = 0; - bcopy(vfsp->vfc_name, vfsc.vfc_name, sizeof(vfsc.vfc_name)); - vfsc.vfc_typenum = vfsp->vfc_typenum; - vfsc.vfc_refcount = vfsp->vfc_refcount; - vfsc.vfc_flags = vfsp->vfc_flags; - vfsc.vfc_reserved2 = 0; - vfsc.vfc_reserved3 = 0; - - mount_list_unlock(); - return (sysctl_rdstruct(oldp, oldlenp, newp, &vfsc, - sizeof(struct vfsconf))); - - case VFS_SET_PACKAGE_EXTS: - return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]); } - /* - * We need to get back into the general MIB, so we need to re-prepend - * CTL_VFS to our name and try userland_sysctl(). - */ - - usernamelen = namelen + 1; - MALLOC(username, int *, usernamelen * sizeof(*username), - M_TEMP, M_WAITOK); - bcopy(name, username + 1, namelen * sizeof(*name)); - username[0] = CTL_VFS; - error = userland_sysctl(p, username, usernamelen, oldp, - oldlenp, newp, newlen, oldlenp); - FREE(username, M_TEMP); - return (error); + + // + // If we get here we passed all the checks so the selector is ok + // + return 0; } -/* - * Dump vnode list (via sysctl) - defunct - * use "pstat" instead - */ -/* ARGSUSED */ -int -sysctl_vnode -(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, __unused struct sysctl_req *req) + +int vfs_sysctl_node SYSCTL_HANDLER_ARGS { - return(EINVAL); -} + int *name, namelen; + struct vfstable *vfsp; + int error; + int fstypenum; + + fstypenum = oidp->oid_number; + name = arg1; + namelen = arg2; -SYSCTL_PROC(_kern, KERN_VNODE, vnode, - CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, - 0, 0, sysctl_vnode, "S,", ""); + /* all sysctl names at this level should have at least one name slot for the FS */ + if (namelen < 1) + return (EISDIR); /* overloaded */ + + mount_list_lock(); + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == fstypenum) { + vfsp->vfc_refcount++; + break; + } + mount_list_unlock(); + + if (vfsp == NULL) { + return (ENOTSUP); + } + + if (is_bad_sysctl_name(vfsp, name[0])) { + printf("vfs: bad selector 0x%.8x for old-style sysctl(). use the sysctl-by-fsid interface instead\n", name[0]); + return EPERM; + } + error = (vfsp->vfc_vfsops->vfs_sysctl)(name, namelen, req->oldptr, &req->oldlen, req->newptr, req->newlen, vfs_context_current()); + + mount_list_lock(); + vfsp->vfc_refcount--; + mount_list_unlock(); + + return error; +} /* * Check to see if a filesystem is mounted on a block device. @@ -2876,41 +2941,68 @@ out: return (error); } +struct unmount_info { + int u_errs; // Total failed unmounts + int u_busy; // EBUSY failed unmounts +}; + +static int +unmount_callback(mount_t mp, void *arg) +{ + int error; + char *mntname; + struct unmount_info *uip = arg; + + mount_ref(mp, 0); + mount_iterdrop(mp); // avoid vfs_iterate deadlock in dounmount() + + MALLOC_ZONE(mntname, void *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (mntname) + strlcpy(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN); + + error = dounmount(mp, MNT_FORCE, 1, vfs_context_current()); + if (error) { + uip->u_errs++; + printf("Unmount of %s failed (%d)\n", mntname ? mntname:"?", error); + if (error == EBUSY) + uip->u_busy++; + } + if (mntname) + FREE_ZONE(mntname, MAXPATHLEN, M_NAMEI); + + return (VFS_RETURNED); +} + /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. + * Busy mounts are retried. */ __private_extern__ void vfs_unmountall(void) { - struct mount *mp; - int error; + int mounts, sec = 1; + struct unmount_info ui; - /* - * Since this only runs when rebooting, it is not interlocked. - */ - mount_list_lock(); - while(!TAILQ_EMPTY(&mountlist)) { - mp = TAILQ_LAST(&mountlist, mntlist); - mount_list_unlock(); - error = dounmount(mp, MNT_FORCE, 0, vfs_context_current()); - if ((error != 0) && (error != EBUSY)) { - printf("unmount of %s failed (", mp->mnt_vfsstat.f_mntonname); - printf("%d)\n", error); - mount_list_lock(); - TAILQ_REMOVE(&mountlist, mp, mnt_list); - continue; - } else if (error == EBUSY) { - /* If EBUSY is returned, the unmount was already in progress */ - printf("unmount of %p failed (", mp); - printf("BUSY)\n"); - } - mount_list_lock(); +retry: + ui.u_errs = ui.u_busy = 0; + vfs_iterate(VFS_ITERATE_CB_DROPREF | VFS_ITERATE_TAIL_FIRST, unmount_callback, &ui); + mounts = mount_getvfscnt(); + if (mounts == 0) + return; + + if (ui.u_busy > 0) { // Busy mounts - wait & retry + tsleep(&nummounts, PVFS, "busy mount", sec * hz); + sec *= 2; + if (sec <= 32) + goto retry; + printf("Unmounting timed out\n"); + } else if (ui.u_errs < mounts) { + // If the vfs_iterate missed mounts in progress - wait a bit + tsleep(&nummounts, PVFS, "missed mount", 2 * hz); } - mount_list_unlock(); } - /* * This routine is called from vnode_pager_deallocate out of the VM * The path to vnode_pager_deallocate can only be initiated by ubc_destroy_named @@ -2924,6 +3016,23 @@ vnode_pager_vrele(vnode_t vp) vnode_lock_spin(vp); vp->v_lflag &= ~VNAMED_UBC; + if (vp->v_usecount != 0) { + /* + * At the eleventh hour, just before the ubcinfo is + * destroyed, ensure the ubc-specific v_usecount + * reference has gone. We use v_usecount != 0 as a hint; + * ubc_unmap() does nothing if there's no mapping. + * + * This case is caused by coming here via forced unmount, + * versus the usual vm_object_deallocate() path. + * In the forced unmount case, ubc_destroy_named() + * releases the pager before memory_object_last_unmap() + * can be called. + */ + vnode_unlock(vp); + ubc_unmap(vp); + vnode_lock_spin(vp); + } uip = vp->v_ubcinfo; vp->v_ubcinfo = UBC_INFO_NULL; @@ -2938,6 +3047,11 @@ vnode_pager_vrele(vnode_t vp) u_int32_t rootunit = (u_int32_t)-1; +#if CONFIG_IOSCHED +extern int lowpri_throttle_enabled; +extern int iosched_enabled; +#endif + errno_t vfs_init_io_attributes(vnode_t devvp, mount_t mp) { @@ -2951,7 +3065,7 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) off_t readsegsize = 0; off_t writesegsize = 0; off_t alignment = 0; - off_t ioqueue_depth = 0; + u_int32_t ioqueue_depth = 0; u_int32_t blksize; u_int64_t temp; u_int32_t features; @@ -2999,8 +3113,8 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) * and if those advertised constraints result in a smaller * limit for a given I/O */ - mp->mnt_maxreadcnt = MAX_UPL_SIZE * PAGE_SIZE; - mp->mnt_maxwritecnt = MAX_UPL_SIZE * PAGE_SIZE; + mp->mnt_maxreadcnt = MAX_UPL_SIZE_BYTES; + mp->mnt_maxwritecnt = MAX_UPL_SIZE_BYTES; if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, ctx) == 0) { if (isvirtual) @@ -3129,9 +3243,18 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) if (features & DK_FEATURE_FORCE_UNIT_ACCESS) mp->mnt_ioflags |= MNT_IOFLAGS_FUA_SUPPORTED; - if (features & DK_FEATURE_UNMAP) + if (features & DK_FEATURE_UNMAP) { mp->mnt_ioflags |= MNT_IOFLAGS_UNMAP_SUPPORTED; - + + if (VNOP_IOCTL(devvp, _DKIOCCORESTORAGE, NULL, 0, ctx) == 0) + mp->mnt_ioflags |= MNT_IOFLAGS_CSUNMAP_SUPPORTED; + } +#if CONFIG_IOSCHED + if (iosched_enabled && (features & DK_FEATURE_PRIORITY)) { + mp->mnt_ioflags |= MNT_IOFLAGS_IOSCHED_SUPPORTED; + throttle_info_disable_throttle(mp->mnt_devbsdunit); + } +#endif /* CONFIG_IOSCHED */ return (error); } @@ -3569,18 +3692,64 @@ sysctl_vfs_noremotehang(__unused struct sysctl_oid *oidp, return (0); } +static int +sysctl_vfs_generic_conf SYSCTL_HANDLER_ARGS +{ + int *name, namelen; + struct vfstable *vfsp; + struct vfsconf vfsc; + + (void)oidp; + name = arg1; + namelen = arg2; + + if (namelen < 1) { + return (EISDIR); + } else if (namelen > 1) { + return (ENOTDIR); + } + + mount_list_lock(); + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == name[0]) + break; + + if (vfsp == NULL) { + mount_list_unlock(); + return (ENOTSUP); + } + + vfsc.vfc_reserved1 = 0; + bcopy(vfsp->vfc_name, vfsc.vfc_name, sizeof(vfsc.vfc_name)); + vfsc.vfc_typenum = vfsp->vfc_typenum; + vfsc.vfc_refcount = vfsp->vfc_refcount; + vfsc.vfc_flags = vfsp->vfc_flags; + vfsc.vfc_reserved2 = 0; + vfsc.vfc_reserved3 = 0; + + mount_list_unlock(); + return (SYSCTL_OUT(req, &vfsc, sizeof(struct vfsconf))); +} + /* the vfs.generic. branch. */ SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs generic hinge"); /* retreive a list of mounted filesystem fsid_t */ -SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD | CTLFLAG_LOCKED, +SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, NULL, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids"); /* perform operations on filesystem via fsid_t */ SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW | CTLFLAG_LOCKED, sysctl_vfs_ctlbyfsid, "ctlbyfsid"); SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_vfs_noremotehang, "I", "noremotehang"); - - +SYSCTL_INT(_vfs_generic, VFS_MAXTYPENUM, maxtypenum, + CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, + &maxvfstypenum, 0, ""); +SYSCTL_INT(_vfs_generic, OID_AUTO, sync_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &sync_timeout, 0, ""); +SYSCTL_NODE(_vfs_generic, VFS_CONF, conf, + CTLFLAG_RD | CTLFLAG_LOCKED, + sysctl_vfs_generic_conf, ""); + long num_reusedvnodes = 0; @@ -3644,7 +3813,7 @@ process_vp(vnode_t vp, int want_vp, int *deferred) * Checks for anyone racing us for recycle */ if (vp->v_type != VBAD) { - if (want_vp && vnode_on_reliable_media(vp) == FALSE) { + if (want_vp && (vnode_on_reliable_media(vp) == FALSE || (vp->v_flag & VISDIRTY))) { vnode_async_list_add(vp); vnode_unlock(vp); @@ -4259,8 +4428,10 @@ vnode_drain(vnode_t vp) * this allows us to keep actively referenced vnodes in the list without having * to constantly remove and add to the list each time a vnode w/o a usecount is * referenced which costs us taking and dropping a global lock twice. + * However, if the vnode is marked DIRTY, we want to pull it out much earlier */ -#define UNAGE_THRESHHOLD 25 +#define UNAGE_THRESHHOLD 25 +#define UNAGE_DIRTYTHRESHHOLD 6 errno_t vnode_getiocount(vnode_t vp, unsigned int vid, int vflags) @@ -4341,7 +4512,8 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags) if (withvid && vid != vp->v_id) { return(ENOENT); } - if (++vp->v_references >= UNAGE_THRESHHOLD) { + if (++vp->v_references >= UNAGE_THRESHHOLD || + (vp->v_flag & VISDIRTY && vp->v_references >= UNAGE_DIRTYTHRESHHOLD)) { vp->v_references = 0; vnode_list_remove(vp); } @@ -4539,6 +4711,8 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) vnode_put(vp); return(error); } + if (param->vnfs_mp->mnt_ioflags & MNT_IOFLAGS_IOSCHED_SUPPORTED) + memory_object_mark_io_tracking(vp->v_ubcinfo->ui_control); } #ifdef JOE_DEBUG record_vp(vp, 1); @@ -4724,6 +4898,7 @@ vfs_iterate(int flags, int (*callout)(mount_t, void *), void *arg) int count, actualcount, i; void * allocmem; int indx_start, indx_stop, indx_incr; + int cb_dropref = (flags & VFS_ITERATE_CB_DROPREF); count = mount_getvfscnt(); count += 10; @@ -4766,7 +4941,12 @@ vfs_iterate(int flags, int (*callout)(mount_t, void *), void *arg) /* iterate over all the vnodes */ ret = callout(mp, arg); - mount_iterdrop(mp); + /* + * Drop the iterref here if the callback didn't do it. + * Note: If cb_dropref is set the mp may no longer exist. + */ + if (!cb_dropref) + mount_iterdrop(mp); switch (ret) { case VFS_RETURNED: @@ -4974,8 +5154,8 @@ vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t ctx) int error; u_int32_t ndflags = 0; - if (ctx == NULL) { /* XXX technically an error */ - ctx = vfs_context_current(); + if (ctx == NULL) { + return EINVAL; } if (flags & VNODE_LOOKUP_NOFOLLOW) @@ -4985,8 +5165,6 @@ vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t ctx) if (flags & VNODE_LOOKUP_NOCROSSMOUNT) ndflags |= NOCROSSMOUNT; - if (flags & VNODE_LOOKUP_DOWHITEOUT) - ndflags |= DOWHITEOUT; /* XXX AUDITVNPATH1 needed ? */ NDINIT(&nd, LOOKUP, OP_LOOKUP, ndflags, UIO_SYSSPACE, @@ -5022,8 +5200,6 @@ vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_ if (lflags & VNODE_LOOKUP_NOCROSSMOUNT) ndflags |= NOCROSSMOUNT; - if (lflags & VNODE_LOOKUP_DOWHITEOUT) - ndflags |= DOWHITEOUT; /* XXX AUDITVNPATH1 needed ? */ NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, @@ -5065,6 +5241,20 @@ vnode_mtime(vnode_t vp, struct timespec *mtime, vfs_context_t ctx) return error; } +errno_t +vnode_flags(vnode_t vp, uint32_t *flags, vfs_context_t ctx) +{ + struct vnode_attr va; + int error; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_flags); + error = vnode_getattr(vp, &va, ctx); + if (!error) + *flags = va.va_flags; + return error; +} + /* * Returns: 0 Success * vnode_getattr:??? @@ -5094,13 +5284,43 @@ vnode_setsize(vnode_t vp, off_t size, int ioflag, vfs_context_t ctx) return(vnode_setattr(vp, &va, ctx)); } +int +vnode_setdirty(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag |= VISDIRTY; + vnode_unlock(vp); + return 0; +} + +int +vnode_cleardirty(vnode_t vp) +{ + vnode_lock_spin(vp); + vp->v_flag &= ~VISDIRTY; + vnode_unlock(vp); + return 0; +} + +int +vnode_isdirty(vnode_t vp) +{ + int dirty; + + vnode_lock_spin(vp); + dirty = (vp->v_flag & VISDIRTY) ? 1 : 0; + vnode_unlock(vp); + + return dirty; +} + static int vn_create_reg(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *vap, uint32_t flags, int fmode, uint32_t *statusp, vfs_context_t ctx) { /* Only use compound VNOP for compound operation */ if (vnode_compound_open_available(dvp) && ((flags & VN_CREATE_DOOPEN) != 0)) { *vpp = NULLVP; - return VNOP_COMPOUND_OPEN(dvp, vpp, ndp, VNOP_COMPOUND_OPEN_DO_CREATE, fmode, statusp, vap, ctx); + return VNOP_COMPOUND_OPEN(dvp, vpp, ndp, O_CREAT, fmode, statusp, vap, ctx); } else { return VNOP_CREATE(dvp, vpp, &ndp->ni_cnd, vap, ctx); } @@ -5155,6 +5375,7 @@ vn_create(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *v boolean_t batched; struct componentname *cnp; uint32_t defaulted; + uint32_t dfflags; // Directory file flags cnp = &ndp->ni_cnd; error = 0; @@ -5181,6 +5402,15 @@ vn_create(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *v panic("Mode for open, but not trying to open..."); } + /* + * Handle inheritance of restricted flag + */ + error = vnode_flags(dvp, &dfflags, ctx); + if (error) + return error; + if (dfflags & SF_RESTRICTED) + VATTR_SET(vap, va_flags, SF_RESTRICTED); + /* * Create the requested node. */ @@ -5550,14 +5780,7 @@ vn_authorize_rename(struct vnode *fdvp, struct vnode *fvp, struct componentnam /***** *****/ #if CONFIG_MACF - error = mac_vnode_check_rename_from(ctx, fdvp, fvp, fcnp); - if (error) - goto out; -#endif - -#if CONFIG_MACF - error = mac_vnode_check_rename_to(ctx, - tdvp, tvp, fdvp == tdvp, tcnp); + error = mac_vnode_check_rename(ctx, fdvp, fvp, fcnp, tdvp, tvp, tcnp); if (error) goto out; #endif @@ -7824,21 +8047,23 @@ vfs_setlocklocal(mount_t mp) } void -vfs_setunmountpreflight(mount_t mp) +vfs_setcompoundopen(mount_t mp) { mount_lock_spin(mp); - mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT; + mp->mnt_compound_ops |= COMPOUND_VNOP_OPEN; mount_unlock(mp); } + void -vfs_setcompoundopen(mount_t mp) +vnode_setswapmount(vnode_t vp) { - mount_lock_spin(mp); - mp->mnt_compound_ops |= COMPOUND_VNOP_OPEN; - mount_unlock(mp); + mount_lock(vp->v_mount); + vp->v_mount->mnt_kern_flag |= MNTK_SWAP_MOUNT; + mount_unlock(vp->v_mount); } + void vn_setunionwait(vnode_t vp) { @@ -7870,18 +8095,6 @@ vn_clearunionwait(vnode_t vp, int locked) vnode_unlock(vp); } -/* - * XXX - get "don't trigger mounts" flag for thread; used by autofs. - */ -extern int thread_notrigger(void); - -int -thread_notrigger(void) -{ - struct uthread *uth = (struct uthread *)get_bsdthread_info(current_thread()); - return (uth->uu_notrigger); -} - /* * Removes orphaned apple double files during a rmdir * Works by: @@ -7902,7 +8115,9 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * int eofflag, siz = UIO_BUFF_SIZE, nentries = 0; int open_flag = 0, full_erase_flag = 0; char uio_buf[ UIO_SIZEOF(1) ]; - char *rbuf = NULL, *cpos, *cend; + char *rbuf = NULL; + void *dir_pos; + void *dir_end; struct nameidata nd_temp; struct dirent *dp; errno_t error; @@ -7956,14 +8171,14 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * /* * Iterate through directory */ - cpos = rbuf; - cend = rbuf + siz; - dp = (struct dirent*) cpos; + dir_pos = (void*) rbuf; + dir_end = (void*) (rbuf + siz); + dp = (struct dirent*) (dir_pos); - if (cpos == cend) + if (dir_pos == dir_end) eofflag = 1; - while ((cpos < cend)) { + while (dir_pos < dir_end) { /* * Check for . and .. as well as directories */ @@ -7981,8 +8196,8 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * goto outsc; } } - cpos += dp->d_reclen; - dp = (struct dirent*)cpos; + dir_pos = (void*) ((uint8_t*)dir_pos + dp->d_reclen); + dp = (struct dirent*)dir_pos; } /* @@ -8028,14 +8243,14 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * /* * Iterate through directory */ - cpos = rbuf; - cend = rbuf + siz; - dp = (struct dirent*) cpos; + dir_pos = (void*) rbuf; + dir_end = (void*) (rbuf + siz); + dp = (struct dirent*) dir_pos; - if (cpos == cend) + if (dir_pos == dir_end) eofflag = 1; - while ((cpos < cend)) { + while (dir_pos < dir_end) { /* * Check for . and .. as well as directories */ @@ -8055,8 +8270,8 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * } } - cpos += dp->d_reclen; - dp = (struct dirent*)cpos; + dir_pos = (void*) ((uint8_t*)dir_pos + dp->d_reclen); + dp = (struct dirent*)dir_pos; } /* @@ -8106,6 +8321,135 @@ lock_vnode_and_post(vnode_t vp, int kevent_num) } } + +#ifdef PANIC_PRINTS_VNODES + +void panic_print_vnodes(void); + +static const char *__vtype(uint16_t vtype) +{ + switch (vtype) { + case VREG: + return "R"; + case VDIR: + return "D"; + case VBLK: + return "B"; + case VCHR: + return "C"; + case VLNK: + return "L"; + case VSOCK: + return "S"; + case VFIFO: + return "F"; + case VBAD: + return "x"; + case VSTR: + return "T"; + case VCPLX: + return "X"; + default: + return "?"; + } +} + +/* + * build a path from the bottom up + * NOTE: called from the panic path - no alloc'ing of memory and no locks! + */ +static char *__vpath(vnode_t vp, char *str, int len, int depth) +{ + int vnm_len; + char *dst, *src; + + if (len <= 0) + return str; + /* str + len is the start of the string we created */ + if (!vp->v_name) + return str + len; + + /* follow mount vnodes to get the full path */ + if ((vp->v_flag & VROOT)) { + if (vp->v_mount != NULL && vp->v_mount->mnt_vnodecovered) { + if (len < 1) + return str + len; + return __vpath(vp->v_mount->mnt_vnodecovered, + str, len, depth+1); + } + return str + len; + } + + src = (char *)vp->v_name; + vnm_len = strlen(src); + if (vnm_len > len) { + /* truncate the name to fit in the string */ + src += (vnm_len - len); + vnm_len = len; + } + + /* start from the back and copy just characters (no NULLs) */ + + /* this will chop off leaf path (file) names */ + if (depth > 0) { + dst = str + len - vnm_len; + memcpy(dst, src, vnm_len); + len -= vnm_len; + } else { + dst = str + len; + } + + if (vp->v_parent && len > 1) { + /* follow parents up the chain */ + len--; + *(dst-1) = '/'; + return __vpath(vp->v_parent, str, len, depth + 1); + } + + return dst; +} + +extern int kdb_printf(const char *format, ...) __printflike(1,2); + +#define SANE_VNODE_PRINT_LIMIT 5000 +void panic_print_vnodes(void) +{ + mount_t mnt; + vnode_t vp; + int nvnodes = 0; + const char *type; + char *nm; + char vname[257]; + + kdb_printf("\n***** VNODES *****\n" + "TYPE UREF ICNT PATH\n"); + + /* NULL-terminate the path name */ + vname[sizeof(vname)-1] = '\0'; + + /* + * iterate all vnodelist items in all mounts (mntlist) -> mnt_vnodelist + */ + TAILQ_FOREACH(mnt, &mountlist, mnt_list) { + TAILQ_FOREACH(vp, &mnt->mnt_vnodelist, v_mntvnodes) { + if (++nvnodes > SANE_VNODE_PRINT_LIMIT) + return; + type = __vtype(vp->v_type); + nm = __vpath(vp, vname, sizeof(vname)-1, 0); + kdb_printf("%s %0d %0d %s\n", + type, vp->v_usecount, vp->v_iocount, nm); + } + } +} + +#else /* !PANIC_PRINTS_VNODES */ +void panic_print_vnodes(void) +{ + return; +} +#endif + + #ifdef JOE_DEBUG static void record_vp(vnode_t vp, int count) { struct uthread *ut; @@ -8332,26 +8676,6 @@ vnode_resolver_detach(vnode_t vp) OSAddAtomic(-1, &mp->mnt_numtriggers); } -/* - * Pathname operations that don't trigger a mount for trigger vnodes - */ -static const u_int64_t ignorable_pathops_mask = - 1LL << OP_MOUNT | - 1LL << OP_UNMOUNT | - 1LL << OP_STATFS | - 1LL << OP_ACCESS | - 1LL << OP_GETATTR | - 1LL << OP_LISTXATTR; - -int -vfs_istraditionaltrigger(enum path_operation op, const struct componentname *cnp) -{ - if (cnp->cn_flags & ISLASTCN) - return ((1LL << op) & ignorable_pathops_mask) == 0; - else - return (1); -} - __private_extern__ void vnode_trigger_rearm(vnode_t vp, vfs_context_t ctx) diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index 4b9996834..521cb5713 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995-2012 Apple Inc. All rights reserved. + * Copyright (c) 1995-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -148,6 +148,8 @@ static int getfsstat_callback(mount_t mp, void * arg); static int getutimes(user_addr_t usrtvp, struct timespec *tsp); static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag); static int sync_callback(mount_t, void *); +static void sync_thread(void *, __unused wait_result_t); +static int sync_async(int); static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp, int *sizep, boolean_t is_64_bit, boolean_t partial_copy); @@ -162,6 +164,12 @@ void vfs_notify_mount(vnode_t pdvp); int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth); +struct fd_vn_data * fg_vn_data_alloc(void); + +static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg); + +static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *); + #ifdef CONFIG_IMGSRC_ACCESS static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx); static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx); @@ -179,6 +187,10 @@ int sync_internal(void); __private_extern__ int unlink1(vfs_context_t, struct nameidata *, int); +extern lck_grp_t *fd_vn_lck_grp; +extern lck_grp_attr_t *fd_vn_lck_grp_attr; +extern lck_attr_t *fd_vn_lck_attr; + /* * incremented each time a mount or unmount operation occurs * used to invalidate the cached value of the rootvp in the @@ -194,11 +206,18 @@ extern const struct fileops vnops; extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); #endif /* CONFIG_APPLEDOUBLE */ +typedef uint32_t vfs_rename_flags_t; +#if CONFIG_SECLUDED_RENAME +enum { + VFS_SECLUDE_RENAME = 0x00000001 +}; +#endif + /* * Virtual File System System Calls */ -#if NFSCLIENT +#if NFSCLIENT || DEVFS /* * Private in-kernel mounting spi (NFS only, not exported) */ @@ -250,7 +269,7 @@ kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path, return (error); } -#endif /* NFSCLIENT */ +#endif /* NFSCLIENT || DEVFS */ /* * Mount a file system. @@ -325,7 +344,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 /* * Get the vnode to be covered */ - NDINIT(&nd, LOOKUP, OP_MOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT, + NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) { @@ -398,14 +417,12 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 flags = (flags & ~(MNT_UPDATE)); } -#if 0 -//#ifdef SECURE_KERNEL +#ifdef SECURE_KERNEL if ((flags & MNT_RDONLY) == 0) { /* Release kernels are not allowed to mount "/" as rw */ error = EPERM; goto out; } -//#endif #endif /* * See 7392553 for more details on why this check exists. @@ -632,8 +649,8 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp, mp->mnt_vtable = vfsp; //mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; - strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); - strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN); + strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); + strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN); mp->mnt_vnodecovered = vp; mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx)); mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1; @@ -642,12 +659,12 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp, /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */ vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE); -#if NFSCLIENT +#if NFSCLIENT || DEVFS if (kernelmount) mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT; if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT; -#endif /* NFSCLIENT */ +#endif /* NFSCLIENT || DEVFS */ update: /* @@ -1651,15 +1668,10 @@ checkdirs(vnode_t olddp, vfs_context_t ctx) vnode_t tvp; int err; struct cdirargs cdr; - struct uthread * uth = get_bsdthread_info(current_thread()); if (olddp->v_usecount == 1) return(0); - if (uth != (struct uthread *)0) - uth->uu_notrigger = 1; err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx); - if (uth != (struct uthread *)0) - uth->uu_notrigger = 0; if (err != 0) { #if DIAGNOSTIC @@ -1700,7 +1712,7 @@ unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval) struct nameidata nd; vfs_context_t ctx = vfs_context_current(); - NDINIT(&nd, LOOKUP, OP_UNMOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) @@ -1818,27 +1830,24 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) int pflags_save = 0; #endif /* CONFIG_TRIGGERS */ - if (flags & MNT_FORCE) - forcedunmount = 1; - mount_lock(mp); - /* XXX post jaguar fix LK_DRAIN - then clean this up */ - if ((flags & MNT_FORCE)) { - mp->mnt_kern_flag |= MNTK_FRCUNMOUNT; - mp->mnt_lflag |= MNT_LFORCE; - } + + /* + * If already an unmount in progress just return EBUSY. + * Even a forced unmount cannot override. + */ if (mp->mnt_lflag & MNT_LUNMOUNT) { - mp->mnt_lflag |= MNT_LWAIT; - if(withref != 0) + if (withref != 0) mount_drop(mp, 1); - msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL); - /* - * The prior unmount attempt has probably succeeded. - * Do not dereference mp here - returning EBUSY is safest. - */ + mount_unlock(mp); return (EBUSY); } + if (flags & MNT_FORCE) { + forcedunmount = 1; + mp->mnt_lflag |= MNT_LFORCE; + } + #if CONFIG_TRIGGERS if (flags & MNT_NOBLOCK && p != kernproc) pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag); @@ -1856,6 +1865,15 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) mp->mnt_realrootvp = NULLVP; mount_unlock(mp); + if (forcedunmount && (flags & MNT_LNOSUB) == 0) { + /* + * Force unmount any mounts in this filesystem. + * If any unmounts fail - just leave them dangling. + * Avoids recursion. + */ + (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx); + } + /* * taking the name_cache_lock exclusively will * insure that everyone is out of the fast path who @@ -1938,11 +1956,19 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) /* mark the mount point hook in the vp but not drop the ref yet */ if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { - vnode_getwithref(coveredvp); + /* + * The covered vnode needs special handling. Trying to get an + * iocount must not block here as this may lead to deadlocks + * if the Filesystem to which the covered vnode belongs is + * undergoing forced unmounts. Since we hold a usecount, the + * vnode cannot be reused (it can, however, still be terminated) + */ + vnode_getalways(coveredvp); vnode_lock_spin(coveredvp); mp->mnt_crossref++; coveredvp->v_mountedhere = (struct mount *)0; + CLR(coveredvp->v_flag, VMOUNT); vnode_unlock(coveredvp); vnode_put(coveredvp); @@ -2008,18 +2034,38 @@ out: if (!error) { if ((coveredvp != NULLVP)) { - vnode_t pvp; + vnode_t pvp = NULLVP; - vnode_getwithref(coveredvp); - pvp = vnode_getparent(coveredvp); - vnode_rele(coveredvp); + /* + * The covered vnode needs special handling. Trying to + * get an iocount must not block here as this may lead + * to deadlocks if the Filesystem to which the covered + * vnode belongs is undergoing forced unmounts. Since we + * hold a usecount, the vnode cannot be reused + * (it can, however, still be terminated). + */ + vnode_getalways(coveredvp); mount_dropcrossref(mp, coveredvp, 0); + /* + * We'll _try_ to detect if this really needs to be + * done. The coveredvp can only be in termination (or + * terminated) if the coveredvp's mount point is in a + * forced unmount (or has been) since we still hold the + * ref. + */ + if (!vnode_isrecycled(coveredvp)) { + pvp = vnode_getparent(coveredvp); #if CONFIG_TRIGGERS - if (coveredvp->v_resolve) - vnode_trigger_rearm(coveredvp, ctx); -#endif + if (coveredvp->v_resolve) { + vnode_trigger_rearm(coveredvp, ctx); + } +#endif + } + + vnode_rele(coveredvp); vnode_put(coveredvp); + coveredvp = NULLVP; if (pvp) { lock_vnode_and_post(pvp, NOTE_WRITE); @@ -2037,6 +2083,67 @@ out: return (error); } +/* + * Unmount any mounts in this filesystem. + */ +void +dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx) +{ + mount_t smp; + fsid_t *fsids, fsid; + int fsids_sz; + int count = 0, i, m = 0; + vnode_t vp; + + mount_list_lock(); + + // Get an array to hold the submounts fsids. + TAILQ_FOREACH(smp, &mountlist, mnt_list) + count++; + fsids_sz = count * sizeof(fsid_t); + MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT); + if (fsids == NULL) { + mount_list_unlock(); + goto out; + } + fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump + + /* + * Fill the array with submount fsids. + * Since mounts are always added to the tail of the mount list, the + * list is always in mount order. + * For each mount check if the mounted-on vnode belongs to a + * mount that's already added to our array of mounts to be unmounted. + */ + for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) { + vp = smp->mnt_vnodecovered; + if (vp == NULL) + continue; + fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid + for (i = 0; i <= m; i++) { + if (fsids[i].val[0] == fsid.val[0] && + fsids[i].val[1] == fsid.val[1]) { + fsids[++m] = smp->mnt_vfsstat.f_fsid; + break; + } + } + } + mount_list_unlock(); + + // Unmount the submounts in reverse order. Ignore errors. + for (i = m; i > 0; i--) { + smp = mount_list_lookupby_fsid(&fsids[i], 0, 1); + if (smp) { + mount_ref(smp, 0); + mount_iterdrop(smp); + (void) dounmount(smp, flags, 1, ctx); + } + } +out: + if (fsids) + FREE(fsids, M_TEMP); +} + void mount_dropcrossref(mount_t mp, vnode_t dp, int need_put) { @@ -2070,34 +2177,53 @@ mount_dropcrossref(mount_t mp, vnode_t dp, int need_put) */ #if DIAGNOSTIC int syncprt = 0; -struct ctldebug debug0 = { "syncprt", &syncprt }; #endif int print_vmpage_stat=0; +int sync_timeout = 60; // Sync time limit (sec) static int -sync_callback(mount_t mp, void * arg) +sync_callback(mount_t mp, __unused void *arg) { - int asyncflag; - if ((mp->mnt_flag & MNT_RDONLY) == 0) { - asyncflag = mp->mnt_flag & MNT_ASYNC; - mp->mnt_flag &= ~MNT_ASYNC; - VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current()); - if (asyncflag) - mp->mnt_flag |= MNT_ASYNC; + int asyncflag = mp->mnt_flag & MNT_ASYNC; + + mp->mnt_flag &= ~MNT_ASYNC; + VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel()); + if (asyncflag) + mp->mnt_flag |= MNT_ASYNC; } - return(VFS_RETURNED); -} + return (VFS_RETURNED); +} /* ARGSUSED */ int sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval) { - vfs_iterate(LK_NOWAIT, sync_callback, (void *)0); + vfs_iterate(LK_NOWAIT, sync_callback, NULL); - if(print_vmpage_stat) { + if (print_vmpage_stat) { + vm_countdirtypages(); + } + +#if DIAGNOSTIC + if (syncprt) + vfs_bufstats(); +#endif /* DIAGNOSTIC */ + return 0; +} + +static void +sync_thread(void *arg, __unused wait_result_t wr) +{ + int *timeout = (int *) arg; + + vfs_iterate(LK_NOWAIT, sync_callback, NULL); + + if (timeout) + wakeup((caddr_t) timeout); + if (print_vmpage_stat) { vm_countdirtypages(); } @@ -2105,29 +2231,51 @@ sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval if (syncprt) vfs_bufstats(); #endif /* DIAGNOSTIC */ - return (0); } /* - * Change filesystem quotas. + * Sync in a separate thread so we can time out if it blocks. */ -#if QUOTA -static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval); - -int -quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval) +static int +sync_async(int timeout) { - boolean_t funnel_state; + thread_t thd; int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - error = quotactl_funneled(p, uap, retval); - thread_funnel_set(kernel_flock, funnel_state); - return(error); + struct timespec ts = {timeout, 0}; + + lck_mtx_lock(sync_mtx_lck); + if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) { + printf("sync_thread failed\n"); + lck_mtx_unlock(sync_mtx_lck); + return (0); + } + + error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts); + if (error) { + printf("sync timed out: %d sec\n", timeout); + } + thread_deallocate(thd); + + return (0); } -static int -quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval) +/* + * An in-kernel sync for power management to call. + */ +__private_extern__ int +sync_internal(void) +{ + (void) sync_async(sync_timeout); + + return 0; +} /* end of sync_internal call */ + +/* + * Change filesystem quotas. + */ +#if QUOTA +int +quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval) { struct mount *mp; int error, quota_cmd, quota_status; @@ -2199,7 +2347,7 @@ quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval) /* uap->arg is a pointer to a dqblk structure we need to copy out to */ if (error == 0) { if (proc_is64bit(p)) { - struct user_dqblk my_dqblk64; + struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0}; munge_dqblk(&my_dqblk, &my_dqblk64, TRUE); error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64)); } @@ -2247,7 +2395,7 @@ statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval) vfs_context_t ctx = vfs_context_current(); vnode_t vp; - NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) @@ -2361,7 +2509,7 @@ statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *r vfs_context_t ctxp = vfs_context_current(); vnode_t vp; - NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctxp); error = namei(&nd); if (error) @@ -2676,6 +2824,94 @@ getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval) return (0); } +/* + * gets the associated vnode with the file descriptor passed. + * as input + * + * INPUT + * ctx - vfs context of caller + * fd - file descriptor for which vnode is required. + * vpp - Pointer to pointer to vnode to be returned. + * + * The vnode is returned with an iocount so any vnode obtained + * by this call needs a vnode_put + * + */ +static int +vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp) +{ + int error; + vnode_t vp; + struct fileproc *fp; + proc_t p = vfs_context_proc(ctx); + + *vpp = NULLVP; + + error = fp_getfvp(p, fd, &fp, &vp); + if (error) + return (error); + + error = vnode_getwithref(vp); + if (error) { + (void)fp_drop(p, fd, fp, 0); + return (error); + } + + (void)fp_drop(p, fd, fp, 0); + *vpp = vp; + return (error); +} + +/* + * Wrapper function around namei to start lookup from a directory + * specified by a file descriptor ni_dirfd. + * + * In addition to all the errors returned by namei, this call can + * return ENOTDIR if the file descriptor does not refer to a directory. + * and EBADF if the file descriptor is not valid. + */ +int +nameiat(struct nameidata *ndp, int dirfd) +{ + if ((dirfd != AT_FDCWD) && + !(ndp->ni_flag & NAMEI_CONTLOOKUP) && + !(ndp->ni_cnd.cn_flags & USEDVP)) { + int error = 0; + char c; + + if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) { + error = copyin(ndp->ni_dirp, &c, sizeof(char)); + if (error) + return (error); + } else { + c = *((char *)(ndp->ni_dirp)); + } + + if (c != '/') { + vnode_t dvp_at; + + error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd, + &dvp_at); + if (error) + return (error); + + if (vnode_vtype(dvp_at) != VDIR) { + vnode_put(dvp_at); + return (ENOTDIR); + } + + ndp->ni_dvp = dvp_at; + ndp->ni_cnd.cn_flags |= USEDVP; + error = namei(ndp); + ndp->ni_cnd.cn_flags &= ~USEDVP; + vnode_put(dvp_at); + return (error); + } + } + + return (namei(ndp)); +} + /* * Change current working directory to a given file descriptor. */ @@ -2989,6 +3225,35 @@ change_dir(struct nameidata *ndp, vfs_context_t ctx) return (error); } +/* + * Free the vnode data (for directories) associated with the file glob. + */ +struct fd_vn_data * +fg_vn_data_alloc(void) +{ + struct fd_vn_data *fvdata; + + /* Allocate per fd vnode data */ + MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)), + M_FD_VN_DATA, M_WAITOK | M_ZERO); + lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr); + return fvdata; +} + +/* + * Free the vnode data (for directories) associated with the file glob. + */ +void +fg_vn_data_free(void *fgvndata) +{ + struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata; + + if (fvdata->fv_buf) + FREE(fvdata->fv_buf, M_FD_DIRBUF); + lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp); + FREE(fvdata, M_FD_VN_DATA); +} + /* * Check permissions, allocate an open file structure, * and call the device open routine if any. @@ -3145,21 +3410,20 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, if (no_controlling_tty && (p->p_flag & P_CONTROLT)) { vnode_t ttyvp; - /* - * We already have a ref from vn_open_auth(), so we can demand another reference. - */ - error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE); - if (error != 0) { - panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!"); - } - session_lock(sessp); ttyvp = sessp->s_ttyvp; sessp->s_ttyvp = vp; sessp->s_ttyvid = vnode_vid(vp); session_unlock(sessp); - if (ttyvp != NULLVP) - vnode_rele(ttyvp); + } + + /* + * For directories we hold some additional information in the fd. + */ + if (vnode_vtype(vp) == VDIR) { + fp->f_fglob->fg_vn_data = fg_vn_data_alloc(); + } else { + fp->f_fglob->fg_vn_data = NULL; } vnode_put(vp); @@ -3189,6 +3453,17 @@ bad: struct vfs_context context = *vfs_context_current(); context.vc_ucred = fp->f_fglob->fg_cred; + + if ((fp->f_fglob->fg_flag & FHASLOCK) && + (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_UNLCK; + + (void)VNOP_ADVLOCK( + vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL); + } vn_close(vp, fp->f_fglob->fg_flag, &context); vnode_put(vp); @@ -3197,6 +3472,55 @@ bad: return (error); } +/* + * While most of the *at syscall handlers can call nameiat() which + * is a wrapper around namei, the use of namei and initialisation + * of nameidata are far removed and in different functions - namei + * gets called in vn_open_auth for open1. So we'll just do here what + * nameiat() does. + */ +static int +open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags, + struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval, + int dirfd) +{ + if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) { + int error; + char c; + + if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) { + error = copyin(ndp->ni_dirp, &c, sizeof(char)); + if (error) + return (error); + } else { + c = *((char *)(ndp->ni_dirp)); + } + + if (c != '/') { + vnode_t dvp_at; + + error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd, + &dvp_at); + if (error) + return (error); + + if (vnode_vtype(dvp_at) != VDIR) { + vnode_put(dvp_at); + return (ENOTDIR); + } + + ndp->ni_dvp = dvp_at; + ndp->ni_cnd.cn_flags |= USEDVP; + error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra, + retval); + vnode_put(dvp_at); + return (error); + } + } + + return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval)); +} + /* * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)). * @@ -3310,52 +3634,152 @@ int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, return error; } - -int -open(proc_t p, struct open_args *uap, int32_t *retval) -{ - __pthread_testcancel(1); - return(open_nocancel(p, (struct open_nocancel_args *)uap, retval)); -} - -int -open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval) +static int +openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode, + int fd, enum uio_seg segflg, int *retval) { - struct filedesc *fdp = p->p_fd; + struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd; struct vnode_attr va; struct nameidata nd; int cmode; VATTR_INIT(&va); /* Mask off all but regular access permissions */ - cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; + cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; VATTR_SET(&va, va_mode, cmode & ACCESSPERMS); - NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, - uap->path, vfs_context_current()); + NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, + segflg, path, ctx); - return (open1(vfs_context_current(), &nd, uap->flags, &va, - fileproc_alloc_init, NULL, retval)); + return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL, + retval, fd)); } +int +open(proc_t p, struct open_args *uap, int32_t *retval) +{ + __pthread_testcancel(1); + return(open_nocancel(p, (struct open_nocancel_args *)uap, retval)); +} -/* - * Create a special file. - */ -static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap); +int +open_nocancel(__unused proc_t p, struct open_nocancel_args *uap, + int32_t *retval) +{ + return (openat_internal(vfs_context_current(), uap->path, uap->flags, + uap->mode, AT_FDCWD, UIO_USERSPACE, retval)); +} int -mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval) +openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap, + int32_t *retval) { - struct vnode_attr va; - vfs_context_t ctx = vfs_context_current(); - int error; - struct nameidata nd; - vnode_t vp, dvp; + return (openat_internal(vfs_context_current(), uap->path, uap->flags, + uap->mode, uap->fd, UIO_USERSPACE, retval)); +} - VATTR_INIT(&va); - VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); - VATTR_SET(&va, va_rdev, uap->dev); +int +openat(proc_t p, struct openat_args *uap, int32_t *retval) +{ + __pthread_testcancel(1); + return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval)); +} + +/* + * openbyid_np: open a file given a file system id and a file system object id + * the hfs file system object id is an fsobj_id_t {uint32, uint32} + * file systems that don't support object ids it is a node id (uint64_t). + * + * Parameters: p Process requesting the open + * uap User argument descriptor (see below) + * retval Pointer to an area to receive the + * return calue from the system call + * + * Indirect: uap->path Path to open (same as 'open') + * + * uap->fsid id of target file system + * uap->objid id of target file system object + * uap->flags Flags to open (same as 'open') + * + * Returns: 0 Success + * !0 errno value + * + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ +int +openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval) +{ + fsid_t fsid; + uint64_t objid; + int error; + char *buf = NULL; + int buflen = MAXPATHLEN; + int pathlen = 0; + vfs_context_t ctx = vfs_context_current(); + + if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) { + return (error); + } + + /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */ + if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) { + return (error); + } + + AUDIT_ARG(value32, fsid.val[0]); + AUDIT_ARG(value64, objid); + + /*resolve path from fsis, objid*/ + do { + MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK); + if (buf == NULL) { + return (ENOMEM); + } + + error = fsgetpath_internal( + ctx, fsid.val[0], objid, + buflen, buf, &pathlen); + + if (error) { + FREE(buf, M_TEMP); + buf = NULL; + } + } while (error == ENOSPC && (buflen += MAXPATHLEN)); + + if (error) { + return error; + } + + buf[pathlen] = 0; + + error = openat_internal( + ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval); + + FREE(buf, M_TEMP); + + return error; +} + + +/* + * Create a special file. + */ +static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap); + +int +mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval) +{ + struct vnode_attr va; + vfs_context_t ctx = vfs_context_current(); + int error; + struct nameidata nd; + vnode_t vp, dvp; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); + VATTR_SET(&va, va_rdev, uap->dev); /* If it's a mknod() of a FIFO, call mkfifo1() instead */ if ((uap->mode & S_IFMT) == S_IFIFO) @@ -3643,12 +4067,13 @@ safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *trunc * VNOP_LINK:??? */ /* ARGSUSED */ -int -link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval) +static int +linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2, + user_addr_t link, int flag, enum uio_seg segflg) { vnode_t vp, dvp, lvp; struct nameidata nd; - vfs_context_t ctx = vfs_context_current(); + int follow; int error; #if CONFIG_FSE fse_info finfo; @@ -3660,9 +4085,11 @@ link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval) vp = dvp = lvp = NULLVP; /* look up the object we are linking to */ - NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, - UIO_USERSPACE, uap->path, ctx); - error = namei(&nd); + follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW; + NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow, + segflg, path, ctx); + + error = nameiat(&nd, fd1); if (error) return (error); vp = nd.ni_vp; @@ -3699,8 +4126,8 @@ link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval) #endif nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK; - nd.ni_dirp = uap->link; - error = namei(&nd); + nd.ni_dirp = link; + error = nameiat(&nd, fd2); if (error != 0) goto out; dvp = nd.ni_dvp; @@ -3768,14 +4195,15 @@ link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval) } link_name_len = MAXPATHLEN; - vn_getpath(vp, link_to_path, &link_name_len); - - /* - * Call out to allow 3rd party notification of rename. - * Ignore result of kauth_authorize_fileop call. - */ - kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK, - (uintptr_t)link_to_path, (uintptr_t)target_path); + if (vn_getpath(vp, link_to_path, &link_name_len) == 0) { + /* + * Call out to allow 3rd party notification of rename. + * Ignore result of kauth_authorize_fileop call. + */ + kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK, + (uintptr_t)link_to_path, + (uintptr_t)target_path); + } if (link_to_path != NULL) { RELEASE_PATH(link_to_path); } @@ -3820,40 +4248,76 @@ out: return (error); } +int +link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval) +{ + return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path, + AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE)); +} + +int +linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval) +{ + if (uap->flag & ~AT_SYMLINK_FOLLOW) + return (EINVAL); + + return (linkat_internal(vfs_context_current(), uap->fd1, uap->path, + uap->fd2, uap->link, uap->flag, UIO_USERSPACE)); +} + /* * Make a symbolic link. * * We could add support for ACLs here too... */ /* ARGSUSED */ -int -symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval) +static int +symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd, + user_addr_t link, enum uio_seg segflg) { struct vnode_attr va; char *path; int error; struct nameidata nd; - vfs_context_t ctx = vfs_context_current(); vnode_t vp, dvp; + uint32_t dfflags; // Directory file flags size_t dummy=0; - - MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - error = copyinstr(uap->path, path, MAXPATHLEN, &dummy); + proc_t p; + + error = 0; + if (UIO_SEG_IS_USER_SPACE(segflg)) { + MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + error = copyinstr(path_data, path, MAXPATHLEN, &dummy); + } else { + path = (char *)path_data; + } if (error) goto out; AUDIT_ARG(text, path); /* This is the link string */ - NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1, - UIO_USERSPACE, uap->link, ctx); - error = namei(&nd); + NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1, + segflg, link, ctx); + + error = nameiat(&nd, fd); if (error) goto out; dvp = nd.ni_dvp; vp = nd.ni_vp; + p = vfs_context_proc(ctx); VATTR_INIT(&va); VATTR_SET(&va, va_type, VLNK); VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask); + + /* + * Handle inheritance of restricted flag + */ + error = vnode_flags(dvp, &dfflags, ctx); + if (error) + goto skipit; + if (dfflags & SF_RESTRICTED) + VATTR_SET(&va, va_flags, SF_RESTRICTED); + #if CONFIG_MACF error = mac_vnode_check_create(ctx, dvp, &nd.ni_cnd, &va); @@ -3894,22 +4358,22 @@ symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval) nd.ni_op = OP_LOOKUP; #endif nd.ni_cnd.cn_flags = 0; - error = namei(&nd); + error = nameiat(&nd, fd); vp = nd.ni_vp; if (vp == NULL) goto skipit; } - + #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */ - /* call out to allow 3rd party notification of rename. + /* call out to allow 3rd party notification of rename. * Ignore result of kauth_authorize_fileop call. */ if (kauth_authorize_fileop_has_listeners() && namei(&nd) == 0) { char *new_link_path = NULL; int len; - + /* build the path to the new link file */ new_link_path = get_pathbuff(); len = MAXPATHLEN; @@ -3918,19 +4382,19 @@ symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval) new_link_path[len - 1] = '/'; strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len); } - - kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK, + + kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK, (uintptr_t)path, (uintptr_t)new_link_path); if (new_link_path != NULL) release_pathbuff(new_link_path); } -#endif +#endif // Make sure the name & parent pointers are hooked up if (vp->v_name == NULL) update_flags |= VNODE_UPDATE_NAME; if (vp->v_parent == NULLVP) update_flags |= VNODE_UPDATE_PARENT; - + if (update_flags) vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); @@ -3952,56 +4416,43 @@ skipit: vnode_put(vp); vnode_put(dvp); out: - FREE_ZONE(path, MAXPATHLEN, M_NAMEI); + if (path && (path != (char *)path_data)) + FREE_ZONE(path, MAXPATHLEN, M_NAMEI); return (error); } +int +symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval) +{ + return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD, + uap->link, UIO_USERSPACE)); +} + +int +symlinkat(__unused proc_t p, struct symlinkat_args *uap, + __unused int32_t *retval) +{ + return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd, + uap->path2, UIO_USERSPACE)); +} + /* * Delete a whiteout from the filesystem. - * XXX authorization not implmented for whiteouts + * No longer supported. */ int -undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval) +undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval) { - int error; - struct nameidata nd; - vfs_context_t ctx = vfs_context_current(); - vnode_t vp, dvp; - - NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | DOWHITEOUT | AUDITVNPATH1, - UIO_USERSPACE, uap->path, ctx); - error = namei(&nd); - if (error) - return (error); - dvp = nd.ni_dvp; - vp = nd.ni_vp; - - if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) { - error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx); - } else - error = EEXIST; - - /* - * nameidone has to happen before we vnode_put(dvp) - * since it may need to release the fs_nodelock on the dvp - */ - nameidone(&nd); - - if (vp) - vnode_put(vp); - vnode_put(dvp); - - return (error); + return (ENOTSUP); } - /* * Delete a name from the filesystem. */ /* ARGSUSED */ -int -unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags) +static int +unlink1at(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags, int fd) { vnode_t vp, dvp; int error; @@ -4029,7 +4480,7 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags) cnp = &ndp->ni_cnd; lookup_continue: - error = namei(ndp); + error = nameiat(ndp, fd); if (error) return (error); @@ -4192,18 +4643,45 @@ out: return (error); } +int +unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags) +{ + return (unlink1at(ctx, ndp, unlink_flags, AT_FDCWD)); +} + /* * Delete a name from the filesystem using POSIX semantics. */ +static int +unlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path, + enum uio_seg segflg) +{ + struct nameidata nd; + + NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, segflg, + path, ctx); + return (unlink1at(ctx, &nd, 0, fd)); +} + int unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval) { - struct nameidata nd; - vfs_context_t ctx = vfs_context_current(); + return (unlinkat_internal(vfs_context_current(), AT_FDCWD, uap->path, + UIO_USERSPACE)); +} - NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE, - uap->path, ctx); - return unlink1(ctx, &nd, 0); +int +unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval) +{ + if (uap->flag & ~AT_REMOVEDIR) + return (EINVAL); + + if (uap->flag & AT_REMOVEDIR) + return (rmdirat_internal(vfs_context_current(), uap->fd, + uap->path, UIO_USERSPACE)); + else + return (unlinkat_internal(vfs_context_current(), uap->fd, + uap->path, UIO_USERSPACE)); } /* @@ -4649,8 +5127,9 @@ out: * namei:??? * access1: */ -int -access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval) +static int +faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode, + int flag, enum uio_seg segflg) { int error; struct nameidata nd; @@ -4661,27 +5140,31 @@ access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval) #endif /* - * Access is defined as checking against the process' - * real identity, even if operations are checking the - * effective identity. So we need to tweak the credential - * in the context. + * Unless the AT_EACCESS option is used, Access is defined as checking + * against the process' real identity, even if operations are checking + * the effective identity. So we need to tweak the credential + * in the context for that case. */ - context.vc_ucred = kauth_cred_copy_real(kauth_cred_get()); - context.vc_thread = current_thread(); + if (!(flag & AT_EACCESS)) + context.vc_ucred = kauth_cred_copy_real(kauth_cred_get()); + else + context.vc_ucred = ctx->vc_ucred; + context.vc_thread = ctx->vc_thread; + niopts = FOLLOW | AUDITVNPATH1; /* need parent for vnode_authorize for deletion test */ - if (uap->flags & _DELETE_OK) + if (amode & _DELETE_OK) niopts |= WANTPARENT; - NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_USERSPACE, - uap->path, &context); + NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg, + path, &context); #if NAMEDRSRCFORK /* access(F_OK) calls are allowed for resource forks. */ - if (uap->flags == F_OK) + if (amode == F_OK) nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; #endif - error = namei(&nd); + error = nameiat(&nd, fd); if (error) goto out; @@ -4698,7 +5181,7 @@ access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval) } #endif - error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context); + error = access1(nd.ni_vp, nd.ni_dvp, amode, &context); #if NAMEDRSRCFORK if (is_namedstream) { @@ -4707,15 +5190,33 @@ access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval) #endif vnode_put(nd.ni_vp); - if (uap->flags & _DELETE_OK) + if (amode & _DELETE_OK) vnode_put(nd.ni_dvp); nameidone(&nd); out: - kauth_cred_unref(&context.vc_ucred); - return(error); + if (!(flag & AT_EACCESS)) + kauth_cred_unref(&context.vc_ucred); + return (error); +} + +int +access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval) +{ + return (faccessat_internal(vfs_context_current(), AT_FDCWD, + uap->path, uap->flags, 0, UIO_USERSPACE)); } +int +faccessat(__unused proc_t p, struct faccessat_args *uap, + __unused int32_t *retval) +{ + if (uap->flag & ~AT_EACCESS) + return (EINVAL); + + return (faccessat_internal(vfs_context_current(), uap->fd, + uap->path, uap->amode, uap->flag, UIO_USERSPACE)); +} /* * Returns: 0 Success @@ -4725,8 +5226,12 @@ out: * vn_stat:??? */ static int -stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) +fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub, + user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64, + enum uio_seg segflg, int fd, int flag) { + struct nameidata nd; + int follow; union { struct stat sb; struct stat64 sb64; @@ -4743,12 +5248,16 @@ stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsec size_t xsecurity_bufsize; void * statptr; + follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; + NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1, + segflg, path, ctx); + #if NAMEDRSRCFORK int is_namedstream = 0; /* stat calls are allowed for resource forks. */ - ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; + nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; #endif - error = namei(ndp); + error = nameiat(&nd, fd); if (error) return (error); fsec = KAUTH_FILESEC_NONE; @@ -4760,23 +5269,23 @@ stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsec * force an inactive on release which will mark it * for recycle. */ - if (vnode_isnamedstream(ndp->ni_vp) && - (ndp->ni_vp->v_parent != NULLVP) && - vnode_isshadow(ndp->ni_vp)) { + if (vnode_isnamedstream(nd.ni_vp) && + (nd.ni_vp->v_parent != NULLVP) && + vnode_isshadow(nd.ni_vp)) { is_namedstream = 1; - vnode_ref(ndp->ni_vp); + vnode_ref(nd.ni_vp); } #endif - error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx); + error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx); #if NAMEDRSRCFORK if (is_namedstream) { - vnode_rele(ndp->ni_vp); + vnode_rele(nd.ni_vp); } #endif - vnode_put(ndp->ni_vp); - nameidone(ndp); + vnode_put(nd.ni_vp); + nameidone(&nd); if (error) return (error); @@ -4854,23 +5363,6 @@ out: return (error); } -/* - * Get file status; this version follows links. - * - * Returns: 0 Success - * stat2:??? [see stat2() in this file] - */ -static int -stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) -{ - struct nameidata nd; - vfs_context_t ctx = vfs_context_current(); - - NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | FOLLOW | AUDITVNPATH1, - UIO_USERSPACE, path, ctx); - return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64)); -} - /* * stat_extended: Get file status; with extended security (ACL). * @@ -4888,25 +5380,30 @@ stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecu * */ int -stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval) +stat_extended(__unused proc_t p, struct stat_extended_args *uap, + __unused int32_t *retval) { - return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0)); + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD, + 0)); } /* * Returns: 0 Success - * stat1:??? [see stat1() in this file] + * fstatat_internal:??? [see fstatat_internal() in this file] */ int stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval) { - return(stat1(uap->path, uap->ub, 0, 0, 0)); + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0)); } int stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval) { - return(stat1(uap->path, uap->ub, 0, 0, 1)); + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0)); } /* @@ -4928,21 +5425,9 @@ stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval) int stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval) { - return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1)); -} -/* - * Get file status; this version does not follow links. - */ -static int -lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) -{ - struct nameidata nd; - vfs_context_t ctx = vfs_context_current(); - - NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | NOFOLLOW | AUDITVNPATH1, - UIO_USERSPACE, path, ctx); - - return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64)); + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD, + 0)); } /* @@ -4964,19 +5449,26 @@ lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec int lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval) { - return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0)); + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD, + AT_SYMLINK_NOFOLLOW)); } +/* + * Get file status; this version does not follow links. + */ int lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval) { - return(lstat1(uap->path, uap->ub, 0, 0, 0)); + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW)); } int lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval) { - return(lstat1(uap->path, uap->ub, 0, 0, 1)); + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW)); } /* @@ -4999,7 +5491,30 @@ lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval) int lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval) { - return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1)); + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD, + AT_SYMLINK_NOFOLLOW)); +} + +int +fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval) +{ + if (uap->flag & ~AT_SYMLINK_NOFOLLOW) + return (EINVAL); + + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag)); +} + +int +fstatat64(__unused proc_t p, struct fstatat64_args *uap, + __unused int32_t *retval) +{ + if (uap->flag & ~AT_SYMLINK_NOFOLLOW) + return (EINVAL); + + return (fstatat_internal(vfs_context_current(), uap->path, uap->ub, + 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag)); } /* @@ -5042,50 +5557,71 @@ pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval) * Return target name of a symbolic link. */ /* ARGSUSED */ -int -readlink(proc_t p, struct readlink_args *uap, int32_t *retval) +static int +readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path, + enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg, + int *retval) { vnode_t vp; uio_t auio; - int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; int error; struct nameidata nd; - vfs_context_t ctx = vfs_context_current(); char uio_buf[ UIO_SIZEOF(1) ]; - NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1, - UIO_USERSPACE, uap->path, ctx); - error = namei(&nd); + NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1, + seg, path, ctx); + + error = nameiat(&nd, fd); if (error) return (error); vp = nd.ni_vp; nameidone(&nd); - auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, - &uio_buf[0], sizeof(uio_buf)); - uio_addiov(auio, uap->buf, uap->count); - if (vp->v_type != VLNK) + auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, buf, bufsize); + if (vp->v_type != VLNK) { error = EINVAL; - else { + } else { #if CONFIG_MACF - error = mac_vnode_check_readlink(ctx, - vp); + error = mac_vnode_check_readlink(ctx, vp); #endif if (error == 0) - error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx); + error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, + ctx); if (error == 0) error = VNOP_READLINK(vp, auio, ctx); } vnode_put(vp); - /* Safe: uio_resid() is bounded above by "count", and "count" is an int */ - *retval = uap->count - (int)uio_resid(auio); + *retval = bufsize - (int)uio_resid(auio); return (error); } -/* - * Change file flags. +int +readlink(proc_t p, struct readlink_args *uap, int32_t *retval) +{ + enum uio_seg procseg; + + procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + return (readlinkat_internal(vfs_context_current(), AT_FDCWD, + CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf), + uap->count, procseg, retval)); +} + +int +readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval) +{ + enum uio_seg procseg; + + procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path, + procseg, uap->buf, uap->bufsize, procseg, retval)); +} + +/* + * Change file flags. */ static int chflags1(vnode_t vp, int flags, vfs_context_t ctx) @@ -5190,7 +5726,7 @@ fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval) * translated to EPERM before being returned. */ static int -chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) +chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) { kauth_action_t action; int error; @@ -5230,19 +5766,21 @@ chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) * * Returns: 0 Success * namei:??? [anything namei can return] - * chmod2:??? [anything chmod2 can return] + * chmod_vnode:??? [anything chmod_vnode can return] */ static int -chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) +chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, + int fd, int flag, enum uio_seg segflg) { struct nameidata nd; - int error; + int follow, error; - NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, - UIO_USERSPACE, path, ctx); - if ((error = namei(&nd))) + follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; + NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, + segflg, path, ctx); + if ((error = nameiat(&nd, fd))) return (error); - error = chmod2(ctx, nd.ni_vp, vap); + error = chmod_vnode(ctx, nd.ni_vp, vap); vnode_put(nd.ni_vp); nameidone(&nd); return(error); @@ -5303,7 +5841,8 @@ chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int3 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount); } - error = chmod1(vfs_context_current(), uap->path, &va); + error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0, + UIO_USERSPACE); if (xsecdst != NULL) kauth_filesec_free(xsecdst); @@ -5312,17 +5851,35 @@ chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int3 /* * Returns: 0 Success - * chmod1:??? [anything chmod1 can return] + * chmodat:??? [anything chmodat can return] */ -int -chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval) +static int +fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd, + int flag, enum uio_seg segflg) { struct vnode_attr va; VATTR_INIT(&va); - VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); + VATTR_SET(&va, va_mode, mode & ALLPERMS); + + return (chmodat(ctx, path, &va, fd, flag, segflg)); +} + +int +chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval) +{ + return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode, + AT_FDCWD, 0, UIO_USERSPACE)); +} - return(chmod1(vfs_context_current(), uap->path, &va)); +int +fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval) +{ + if (uap->flag & ~AT_SYMLINK_NOFOLLOW) + return (EINVAL); + + return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode, + uap->fd, uap->flag, UIO_USERSPACE)); } /* @@ -5344,7 +5901,7 @@ fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap) } AUDIT_ARG(vnpath, vp, ARG_VNODE1); - error = chmod2(vfs_context_current(), vp, vap); + error = chmod_vnode(vfs_context_current(), vp, vap); (void)vnode_put(vp); file_drop(fd); @@ -5434,20 +5991,22 @@ fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval) */ /* ARGSUSED */ static int -chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow) +fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid, + gid_t gid, int flag, enum uio_seg segflg) { vnode_t vp; struct vnode_attr va; int error; struct nameidata nd; + int follow; kauth_action_t action; - AUDIT_ARG(owner, uap->uid, uap->gid); + AUDIT_ARG(owner, uid, gid); - NDINIT(&nd, LOOKUP, OP_SETATTR, - (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1, - UIO_USERSPACE, uap->path, ctx); - error = namei(&nd); + follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; + NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg, + path, ctx); + error = nameiat(&nd, fd); if (error) return (error); vp = nd.ni_vp; @@ -5455,13 +6014,13 @@ chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int nameidone(&nd); VATTR_INIT(&va); - if (uap->uid != VNOVAL) - VATTR_SET(&va, va_uid, uap->uid); - if (uap->gid != VNOVAL) - VATTR_SET(&va, va_gid, uap->gid); + if (uid != (uid_t)VNOVAL) + VATTR_SET(&va, va_uid, uid); + if (gid != (gid_t)VNOVAL) + VATTR_SET(&va, va_gid, gid); #if CONFIG_MACF - error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid); + error = mac_vnode_check_setowner(ctx, vp, uid, gid); if (error) goto out; #endif @@ -5480,22 +6039,33 @@ out: */ if (error == EACCES) error = EPERM; - + vnode_put(vp); return (error); } int -chown(__unused proc_t p, struct chown_args *uap, int32_t *retval) +chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval) { - return chown1(vfs_context_current(), uap, retval, 1); + return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path, + uap->uid, uap->gid, 0, UIO_USERSPACE)); } int -lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval) +lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval) { - /* Argument list identical, but machine generated; cast for chown1() */ - return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0); + return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path, + uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE)); +} + +int +fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval) +{ + if (uap->flag & ~AT_SYMLINK_NOFOLLOW) + return (EINVAL); + + return (fchownat_internal(vfs_context_current(), uap->fd, uap->path, + uap->uid, uap->gid, uap->flag, UIO_USERSPACE)); } /* @@ -6005,13 +6575,13 @@ out1: * or both not be directories. If target is a directory, it must be empty. */ /* ARGSUSED */ -int -rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval) +static int +renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from, + int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags) { vnode_t tvp, tdvp; vnode_t fvp, fdvp; struct nameidata *fromnd, *tond; - vfs_context_t ctx = vfs_context_current(); int error; int do_retry; int mntrename; @@ -6047,16 +6617,16 @@ retry: mntrename = FALSE; NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1, - UIO_USERSPACE, uap->from, ctx); + segflg, from, ctx); fromnd->ni_flag = NAMEI_COMPOUNDRENAME; - + NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK, - UIO_USERSPACE, uap->to, ctx); + segflg, to, ctx); tond->ni_flag = NAMEI_COMPOUNDRENAME; - + continue_lookup: if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) { - if ( (error = namei(fromnd)) ) + if ( (error = nameiat(fromnd, fromfd)) ) goto out1; fdvp = fromnd->ni_dvp; fvp = fromnd->ni_vp; @@ -6066,25 +6636,25 @@ continue_lookup: } if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) { - if ( (error = namei(tond)) ) { + if ( (error = nameiat(tond, tofd)) ) { /* * Translate error code for rename("dir1", "dir2/."). */ - if (error == EISDIR && fvp->v_type == VDIR) + if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; goto out1; } tdvp = tond->ni_dvp; tvp = tond->ni_vp; - } + } batched = vnode_compound_rename_available(fdvp); if (!fvp) { - /* + /* * Claim: this check will never reject a valid rename. * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp. * Suppose fdvp and tdvp are not on the same mount. - * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root, + * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root, * then you can't move it to within another dir on the same mountpoint. * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction. * @@ -6106,7 +6676,7 @@ continue_lookup: * being valid. If so, simply re-drive the rename call from the * top. */ - do_retry = 1; + do_retry = 1; } goto out1; } @@ -6121,7 +6691,7 @@ continue_lookup: */ if (fvp == tvp) { int pathconf_val; - + /* * Note: if _PC_CASE_SENSITIVE selector isn't supported, * then assume that this file system is case sensitive. @@ -6129,7 +6699,7 @@ continue_lookup: if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 || pathconf_val != 0) { goto out1; - } + } } /* @@ -6150,7 +6720,7 @@ continue_lookup: ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) && (fvp->v_mount->mnt_vnodecovered != NULLVP)) { vnode_t coveredvp; - + /* switch fvp to the covered vnode */ coveredvp = fvp->v_mount->mnt_vnodecovered; if ( (vnode_getwithref(coveredvp)) ) { @@ -6185,7 +6755,7 @@ continue_lookup: * source. NOTE: Then the target is unlocked going into vnop_rename, * so not to cause locking problems. There is a single reference on tvp. * - * NOTE - that fvp == tvp also occurs if they are hard linked and + * NOTE - that fvp == tvp also occurs if they are hard linked and * that correct behaviour then is just to return success without doing * anything. * @@ -6259,7 +6829,7 @@ continue_lookup: } else { /* * when we dropped the iocounts to take - * the lock, we allowed the identity of + * the lock, we allowed the identity of * the various vnodes to change... if they did, * we may no longer be dealing with a rename * that reshapes the tree... once we're holding @@ -6281,7 +6851,7 @@ continue_lookup: skipped_lookup: #if CONFIG_FSE need_event = need_fsevent(FSE_RENAME, fdvp); - if (need_event) { + if (need_event) { if (fvp) { get_fse_info(fvp, &from_finfo, ctx); } else { @@ -6328,8 +6898,14 @@ skipped_lookup: } to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated); - } - + } +#if CONFIG_SECLUDED_RENAME + if (flags & VFS_SECLUDE_RENAME) { + fromnd->ni_cnd.cn_flags |= CN_SECLUDE_RENAME; + } +#else + #pragma unused(flags) +#endif error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap, tdvp, &tvp, &tond->ni_cnd, tvap, 0, ctx); @@ -6353,29 +6929,29 @@ skipped_lookup: fromnd->ni_vp = fvp; tond->ni_vp = tvp; - + goto continue_lookup; } /* - * We may encounter a race in the VNOP where the destination didn't - * exist when we did the namei, but it does by the time we go and + * We may encounter a race in the VNOP where the destination didn't + * exist when we did the namei, but it does by the time we go and * try to create the entry. In this case, we should re-drive this rename * call from the top again. Currently, only HFS bubbles out ERECYCLE, - * but other filesystems susceptible to this race could return it, too. + * but other filesystems susceptible to this race could return it, too. */ if (error == ERECYCLE) { do_retry = 1; } goto out1; - } - - /* call out to allow 3rd party notification of rename. + } + + /* call out to allow 3rd party notification of rename. * Ignore result of kauth_authorize_fileop call. */ - kauth_authorize_fileop(vfs_context_ucred(ctx), - KAUTH_FILEOP_RENAME, + kauth_authorize_fileop(vfs_context_ucred(ctx), + KAUTH_FILEOP_RENAME, (uintptr_t)from_name, (uintptr_t)to_name); #if CONFIG_FSE @@ -6408,7 +6984,7 @@ skipped_lookup: } } #endif /* CONFIG_FSE */ - + /* * update filesystem's mount point data */ @@ -6427,7 +7003,10 @@ skipped_lookup: } MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len); + if (UIO_SEG_IS_USER_SPACE(segflg)) + error = copyinstr(to, tobuf, MAXPATHLEN, &len); + else + error = copystr((void *)to, tobuf, MAXPATHLEN, &len); if (!error) { /* find current mount point prefix */ pathend = &mp->mnt_vfsstat.f_mntonname[0]; @@ -6450,7 +7029,7 @@ skipped_lookup: vfs_unbusy(mp); } /* - * fix up name & parent pointers. note that we first + * fix up name & parent pointers. note that we first * check that fvp has the same name/parent pointers it * had before the rename call... this is a 'weak' check * at best... @@ -6503,8 +7082,7 @@ out1: vnode_put(fvp); vnode_put(fdvp); } - - + /* * If things changed after we did the namei, then we will re-drive * this rename call from the top. @@ -6518,6 +7096,31 @@ out1: return (error); } +int +rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval) +{ + return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from, + AT_FDCWD, uap->to, UIO_USERSPACE, 0)); +} + +#if CONFIG_SECLUDED_RENAME +int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval) +{ + return renameat_internal( + vfs_context_current(), + AT_FDCWD, uap->from, + AT_FDCWD, uap->to, + UIO_USERSPACE, uap->flags); +} +#endif + +int +renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval) +{ + return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from, + uap->tofd, uap->to, UIO_USERSPACE, 0)); +} + /* * Make a directory file. * @@ -6529,7 +7132,8 @@ out1: */ /* ARGSUSED */ static int -mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) +mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd, + enum uio_seg segflg) { vnode_t vp, dvp; int error; @@ -6538,27 +7142,27 @@ mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) struct nameidata nd; AUDIT_ARG(mode, vap->va_mode); - NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE, + NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg, path, ctx); nd.ni_cnd.cn_flags |= WILLBEDIR; nd.ni_flag = NAMEI_COMPOUNDMKDIR; continue_lookup: - error = namei(&nd); + error = nameiat(&nd, fd); if (error) return (error); dvp = nd.ni_dvp; vp = nd.ni_vp; - if (vp != NULL) { - error = EEXIST; - goto out; - } - + if (vp != NULL) { + error = EEXIST; + goto out; + } + batched = vnode_compound_mkdir_available(dvp); VATTR_SET(vap, va_type, VDIR); - + /* * XXX * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will @@ -6566,7 +7170,7 @@ continue_lookup: * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz" * it will fail in a spurious manner. Need to figure out if this is valid behavior. */ - if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) { + if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) { if (error == EACCES || error == EPERM) { int error2; @@ -6574,13 +7178,13 @@ continue_lookup: vnode_put(dvp); dvp = NULLVP; - /* - * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST + /* + * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST * rather than EACCESS if the target exists. */ - NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, UIO_USERSPACE, - path, ctx); - error2 = namei(&nd); + NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg, + path, ctx); + error2 = nameiat(&nd, fd); if (error2) { goto out; } else { @@ -6594,17 +7198,17 @@ continue_lookup: } /* - * make the directory + * make the directory */ - if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) { + if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) { if (error == EKEEPLOOKING) { nd.ni_vp = vp; goto continue_lookup; } - goto out; + goto out; } - + // Make sure the name & parent pointers are hooked up if (vp->v_name == NULL) update_flags |= VNODE_UPDATE_NAME; @@ -6627,7 +7231,7 @@ out: if (vp) vnode_put(vp); - if (dvp) + if (dvp) vnode_put(dvp); return (error); @@ -6638,12 +7242,12 @@ out: * * Parameters: p Process requesting to create the directory * uap User argument descriptor (see below) - * retval (ignored) + * retval (ignored) * * Indirect: uap->path Path of directory to create * uap->mode Access permissions to set * uap->xsecurity ACL to set - * + * * Returns: 0 Success * !0 Not success * @@ -6663,11 +7267,12 @@ mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retv return ciferror; VATTR_INIT(&va); - VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); + VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); if (xsecdst != NULL) VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); - ciferror = mkdir1(vfs_context_current(), uap->path, &va); + ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD, + UIO_USERSPACE); if (xsecdst != NULL) kauth_filesec_free(xsecdst); return ciferror; @@ -6679,17 +7284,27 @@ mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval) struct vnode_attr va; VATTR_INIT(&va); - VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); + VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); - return(mkdir1(vfs_context_current(), uap->path, &va)); + return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD, + UIO_USERSPACE)); } -/* - * Remove a directory file. - */ -/* ARGSUSED */ int -rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval) +mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval) +{ + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); + + return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd, + UIO_USERSPACE)); +} + +static int +rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath, + enum uio_seg segflg) { vnode_t vp, dvp; int error; @@ -6699,7 +7314,6 @@ rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval) int has_listeners = 0; int need_event = 0; int truncated = 0; - vfs_context_t ctx = vfs_context_current(); #if CONFIG_FSE struct vnode_attr va; #endif /* CONFIG_FSE */ @@ -6708,20 +7322,20 @@ rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval) int restart_flag; - /* + /* * This loop exists to restart rmdir in the unlikely case that two * processes are simultaneously trying to remove the same directory * containing orphaned appleDouble files. */ do { NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1, - UIO_USERSPACE, uap->path, ctx); + segflg, dirpath, ctx); nd.ni_flag = NAMEI_COMPOUNDRMDIR; continue_lookup: restart_flag = 0; vap = NULL; - error = namei(&nd); + error = nameiat(&nd, fd); if (error) return (error); @@ -6745,7 +7359,7 @@ continue_lookup: * I do not think that check was valid, because if we retry * and all dirents are gone, the directory could legitimately * be recycled but still be present in a situation where we would - * have had permission to delete. Therefore, we won't make + * have had permission to delete. Therefore, we won't make * an effort to preserve that check now that we may not have a * vp here. */ @@ -6824,20 +7438,20 @@ continue_lookup: /* - * Assuming everything went well, we will try the RMDIR again + * Assuming everything went well, we will try the RMDIR again */ if (!error) error = vn_rmdir(dvp, &vp, &nd, vap, ctx); } #endif /* CONFIG_APPLEDOUBLE */ /* - * Call out to allow 3rd party notification of delete. + * Call out to allow 3rd party notification of delete. * Ignore result of kauth_authorize_fileop call. */ if (!error) { if (has_listeners) { - kauth_authorize_fileop(vfs_context_ucred(ctx), - KAUTH_FILEOP_DELETE, + kauth_authorize_fileop(vfs_context_ucred(ctx), + KAUTH_FILEOP_DELETE, (uintptr_t)vp, (uintptr_t)path); } @@ -6873,7 +7487,7 @@ out: nameidone(&nd); vnode_put(dvp); - if (vp) + if (vp) vnode_put(vp); if (restart_flag == 0) { @@ -6888,11 +7502,22 @@ out: } +/* + * Remove a directory file. + */ +/* ARGSUSED */ +int +rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval) +{ + return (rmdirat_internal(vfs_context_current(), AT_FDCWD, + CAST_USER_ADDR_T(uap->path), UIO_USERSPACE)); +} + /* Get direntry length padded to 8 byte alignment */ #define DIRENT64_LEN(namlen) \ ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7) -static errno_t +errno_t vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, int *numdirent, vfs_context_t ctxp) { @@ -7617,9 +8242,13 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) /* It all has to do into local memory and it's not that big so we might as well put it all together. */ /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/ /* block. */ - + /* */ + /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */ + /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */ + /* assumes the size is still 556 bytes it will continue to work */ + mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 + - sizeof(struct attrlist) + sizeof(struct searchstate); + sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t)); MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK); @@ -7719,9 +8348,9 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) * Switch to the root vnode for the volume */ error = VFS_ROOT(vnode_mount(vp), &tvp, ctx); + vnode_put(vp); if (error) goto freeandexit; - vnode_put(vp); vp = tvp; /* @@ -7736,7 +8365,7 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) tvp = vp; vp = vp->v_mount->mnt_vnodecovered; if (vp == NULL) { - vp = tvp; + vnode_put(tvp); error = ENOENT; goto freeandexit; } @@ -7866,16 +8495,14 @@ uint32_t nspace_handler_timeout = 15; // seconds #define NSPACE_ITEM_NSPACE_EVENT 0x0040 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080 -#define NSPACE_ITEM_TRACK_EVENT 0x0100 -#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT | NSPACE_ITEM_TRACK_EVENT) +#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT) //#pragma optimization_level 0 typedef enum { NSPACE_HANDLER_NSPACE = 0, NSPACE_HANDLER_SNAPSHOT = 1, - NSPACE_HANDLER_TRACK = 2, NSPACE_HANDLER_COUNT, } nspace_type_t; @@ -7907,8 +8534,6 @@ static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT; case NSPACE_HANDLER_SNAPSHOT: return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT; - case NSPACE_HANDLER_TRACK: - return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_TRACK_EVENT; default: printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type); return 0; @@ -7922,8 +8547,6 @@ static inline int nspace_item_flags_for_type(nspace_type_t nspace_type) return NSPACE_ITEM_NSPACE_EVENT; case NSPACE_HANDLER_SNAPSHOT: return NSPACE_ITEM_SNAPSHOT_EVENT; - case NSPACE_HANDLER_TRACK: - return NSPACE_ITEM_TRACK_EVENT; default: printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type); return 0; @@ -7936,7 +8559,6 @@ static inline int nspace_open_flags_for_type(nspace_type_t nspace_type) case NSPACE_HANDLER_NSPACE: return FREAD | FWRITE | O_EVTONLY; case NSPACE_HANDLER_SNAPSHOT: - case NSPACE_HANDLER_TRACK: return FREAD | O_EVTONLY; default: printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type); @@ -7951,8 +8573,6 @@ static inline nspace_type_t nspace_type_for_op(uint64_t op) return NSPACE_HANDLER_NSPACE; case NAMESPACE_HANDLER_SNAPSHOT_EVENT: return NSPACE_HANDLER_SNAPSHOT; - case NAMESPACE_HANDLER_TRACK_EVENT: - return NSPACE_HANDLER_TRACK; default: printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK); return NSPACE_HANDLER_NSPACE; @@ -8625,7 +9245,12 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long if (cmd & IOC_IN) { if (size) { error = copyin(udata, data, size); - if (error) goto FSCtl_Exit; + if (error) { + if (memp) { + kfree (memp, size); + } + return error; + } } else { if (is64bit) { *(user_addr_t *)data = udata; @@ -8650,285 +9275,306 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long } /* Check to see if it's a generic command */ - if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) { - mount_t mp = vp->v_mount; - int arg = *(uint32_t*)data; - - /* record vid of vp so we can drop it below. */ - uint32_t vvid = vp->v_id; + switch (IOCBASECMD(cmd)) { - /* - * Then grab mount_iterref so that we can release the vnode. - * Without this, a thread may call vnode_iterate_prepare then - * get into a deadlock because we've never released the root vp - */ - error = mount_iterref (mp, 0); - if (error) { - goto FSCtl_Exit; - } - vnode_put(vp); + case FSCTL_SYNC_VOLUME: { + mount_t mp = vp->v_mount; + int arg = *(uint32_t*)data; - /* issue the sync for this volume */ - (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL); - - /* - * Then release the mount_iterref once we're done syncing; it's not - * needed for the VNOP_IOCTL below - */ - mount_iterdrop(mp); + /* record vid of vp so we can drop it below. */ + uint32_t vvid = vp->v_id; - if (arg & FSCTL_SYNC_FULLSYNC) { - /* re-obtain vnode iocount on the root vp, if possible */ - error = vnode_getwithvid (vp, vvid); - if (error == 0) { - error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx); - vnode_put (vp); + /* + * Then grab mount_iterref so that we can release the vnode. + * Without this, a thread may call vnode_iterate_prepare then + * get into a deadlock because we've never released the root vp + */ + error = mount_iterref (mp, 0); + if (error) { + break; + } + vnode_put(vp); + + /* issue the sync for this volume */ + (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL); + + /* + * Then release the mount_iterref once we're done syncing; it's not + * needed for the VNOP_IOCTL below + */ + mount_iterdrop(mp); + + if (arg & FSCTL_SYNC_FULLSYNC) { + /* re-obtain vnode iocount on the root vp, if possible */ + error = vnode_getwithvid (vp, vvid); + if (error == 0) { + error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx); + vnode_put (vp); + } } + /* mark the argument VP as having been released */ + *arg_vp = NULL; } - /* mark the argument VP as having been released */ - *arg_vp = NULL; + break; - } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) { - user_addr_t ext_strings; - uint32_t num_entries; - uint32_t max_width; - - if ( (is64bit && size != sizeof(user64_package_ext_info)) - || (is64bit == 0 && size != sizeof(user32_package_ext_info))) { + case FSCTL_SET_PACKAGE_EXTS: { + user_addr_t ext_strings; + uint32_t num_entries; + uint32_t max_width; - // either you're 64-bit and passed a 64-bit struct or - // you're 32-bit and passed a 32-bit struct. otherwise - // it's not ok. - error = EINVAL; - goto FSCtl_Exit; + if ( (is64bit && size != sizeof(user64_package_ext_info)) + || (is64bit == 0 && size != sizeof(user32_package_ext_info))) { + + // either you're 64-bit and passed a 64-bit struct or + // you're 32-bit and passed a 32-bit struct. otherwise + // it's not ok. + error = EINVAL; + break; + } + + if (is64bit) { + ext_strings = ((user64_package_ext_info *)data)->strings; + num_entries = ((user64_package_ext_info *)data)->num_entries; + max_width = ((user64_package_ext_info *)data)->max_width; + } else { + ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings); + num_entries = ((user32_package_ext_info *)data)->num_entries; + max_width = ((user32_package_ext_info *)data)->max_width; + } + error = set_package_extensions_table(ext_strings, num_entries, max_width); } + break; - if (is64bit) { - ext_strings = ((user64_package_ext_info *)data)->strings; - num_entries = ((user64_package_ext_info *)data)->num_entries; - max_width = ((user64_package_ext_info *)data)->max_width; - } else { - ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings); - num_entries = ((user32_package_ext_info *)data)->num_entries; - max_width = ((user32_package_ext_info *)data)->max_width; + /* namespace handlers */ + case FSCTL_NAMESPACE_HANDLER_GET: { + error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data); } - - error = set_package_extensions_table(ext_strings, num_entries, max_width); + break; + /* Snapshot handlers */ + case FSCTL_OLD_SNAPSHOT_HANDLER_GET: { + error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); + } + break; - } + case FSCTL_SNAPSHOT_HANDLER_GET_EXT: { + error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); + } + break; - /* namespace handlers */ - else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) { - error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data); - } + case FSCTL_NAMESPACE_HANDLER_UPDATE: { + uint32_t token, val; + int i; - /* Snapshot handlers */ - else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) { - error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); - } else if (IOCBASECMD(cmd) == FSCTL_SNAPSHOT_HANDLER_GET_EXT) { - error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); - } + if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) { + break; + } - /* Tracked File Handlers */ - else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) { - error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data); - } - else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GETDATA) { - error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data); - } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UPDATE) { - uint32_t token, val; - int i; + if (!nspace_is_special_process(p)) { + error = EINVAL; + break; + } - if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) { - goto FSCtl_Exit; - } + token = ((uint32_t *)data)[0]; + val = ((uint32_t *)data)[1]; - if (!nspace_is_special_process(p)) { - error = EINVAL; - goto FSCtl_Exit; - } + lck_mtx_lock(&nspace_handler_lock); - token = ((uint32_t *)data)[0]; - val = ((uint32_t *)data)[1]; + for(i=0; i < MAX_NSPACE_ITEMS; i++) { + if (nspace_items[i].token == token) { + break; /* exit for loop, not case stmt */ + } + } - lck_mtx_lock(&nspace_handler_lock); + if (i >= MAX_NSPACE_ITEMS) { + error = ENOENT; + } else { + // + // if this bit is set, when resolve_nspace_item() times out + // it will loop and go back to sleep. + // + nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER; + } - for(i=0; i < MAX_NSPACE_ITEMS; i++) { - if (nspace_items[i].token == token) { + lck_mtx_unlock(&nspace_handler_lock); + + if (error) { + printf("nspace-handler-update: did not find token %u\n", token); + } + } + break; + + case FSCTL_NAMESPACE_HANDLER_UNBLOCK: { + uint32_t token, val; + int i; + + if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) { break; } - } - if (i >= MAX_NSPACE_ITEMS) { - error = ENOENT; - } else { - // - // if this bit is set, when resolve_nspace_item() times out - // it will loop and go back to sleep. - // - nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER; - } + if (!nspace_is_special_process(p)) { + error = EINVAL; + break; + } - lck_mtx_unlock(&nspace_handler_lock); + token = ((uint32_t *)data)[0]; + val = ((uint32_t *)data)[1]; - if (error) { - printf("nspace-handler-update: did not find token %u\n", token); - } + lck_mtx_lock(&nspace_handler_lock); - } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UNBLOCK) { - uint32_t token, val; - int i; + for(i=0; i < MAX_NSPACE_ITEMS; i++) { + if (nspace_items[i].token == token) { + break; /* exit for loop, not case statement */ + } + } - if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) { - goto FSCtl_Exit; - } + if (i >= MAX_NSPACE_ITEMS) { + printf("nspace-handler-unblock: did not find token %u\n", token); + error = ENOENT; + } else { + if (val == 0 && nspace_items[i].vp) { + vnode_lock_spin(nspace_items[i].vp); + nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT; + vnode_unlock(nspace_items[i].vp); + } - if (!nspace_is_special_process(p)) { - error = EINVAL; - goto FSCtl_Exit; - } + nspace_items[i].vp = NULL; + nspace_items[i].arg = NULL; + nspace_items[i].op = 0; + nspace_items[i].vid = 0; + nspace_items[i].flags = NSPACE_ITEM_DONE; + nspace_items[i].token = 0; - token = ((uint32_t *)data)[0]; - val = ((uint32_t *)data)[1]; + wakeup((caddr_t)&(nspace_items[i].vp)); + } + + lck_mtx_unlock(&nspace_handler_lock); + } + break; - lck_mtx_lock(&nspace_handler_lock); + case FSCTL_NAMESPACE_HANDLER_CANCEL: { + uint32_t token, val; + int i; - for(i=0; i < MAX_NSPACE_ITEMS; i++) { - if (nspace_items[i].token == token) { + if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) { break; } - } - if (i >= MAX_NSPACE_ITEMS) { - printf("nspace-handler-unblock: did not find token %u\n", token); - error = ENOENT; - } else { - if (val == 0 && nspace_items[i].vp) { - vnode_lock_spin(nspace_items[i].vp); - nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT; - vnode_unlock(nspace_items[i].vp); + if (!nspace_is_special_process(p)) { + error = EINVAL; + break; } - nspace_items[i].vp = NULL; - nspace_items[i].arg = NULL; - nspace_items[i].op = 0; - nspace_items[i].vid = 0; - nspace_items[i].flags = NSPACE_ITEM_DONE; - nspace_items[i].token = 0; + token = ((uint32_t *)data)[0]; + val = ((uint32_t *)data)[1]; - wakeup((caddr_t)&(nspace_items[i].vp)); - } + lck_mtx_lock(&nspace_handler_lock); - lck_mtx_unlock(&nspace_handler_lock); - - } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_CANCEL) { - uint32_t token, val; - int i; + for(i=0; i < MAX_NSPACE_ITEMS; i++) { + if (nspace_items[i].token == token) { + break; /* exit for loop, not case stmt */ + } + } - if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) { - goto FSCtl_Exit; - } + if (i >= MAX_NSPACE_ITEMS) { + printf("nspace-handler-cancel: did not find token %u\n", token); + error = ENOENT; + } else { + if (nspace_items[i].vp) { + vnode_lock_spin(nspace_items[i].vp); + nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT; + vnode_unlock(nspace_items[i].vp); + } - if (!nspace_is_special_process(p)) { - error = EINVAL; - goto FSCtl_Exit; - } + nspace_items[i].vp = NULL; + nspace_items[i].arg = NULL; + nspace_items[i].vid = 0; + nspace_items[i].token = val; + nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING; + nspace_items[i].flags |= NSPACE_ITEM_CANCELLED; - token = ((uint32_t *)data)[0]; - val = ((uint32_t *)data)[1]; + wakeup((caddr_t)&(nspace_items[i].vp)); + } - lck_mtx_lock(&nspace_handler_lock); + lck_mtx_unlock(&nspace_handler_lock); + } + break; - for(i=0; i < MAX_NSPACE_ITEMS; i++) { - if (nspace_items[i].token == token) { + case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: { + if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { break; } - } - - if (i >= MAX_NSPACE_ITEMS) { - printf("nspace-handler-cancel: did not find token %u\n", token); - error = ENOENT; - } else { - if (nspace_items[i].vp) { - vnode_lock_spin(nspace_items[i].vp); - nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT; - vnode_unlock(nspace_items[i].vp); - } - nspace_items[i].vp = NULL; - nspace_items[i].arg = NULL; - nspace_items[i].vid = 0; - nspace_items[i].token = val; - nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING; - nspace_items[i].flags |= NSPACE_ITEM_CANCELLED; + // we explicitly do not do the namespace_handler_proc check here - wakeup((caddr_t)&(nspace_items[i].vp)); - } + lck_mtx_lock(&nspace_handler_lock); + snapshot_timestamp = ((uint32_t *)data)[0]; + wakeup(&nspace_item_idx); + lck_mtx_unlock(&nspace_handler_lock); + printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp); - lck_mtx_unlock(&nspace_handler_lock); - } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME) { - if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { - goto FSCtl_Exit; - } + } + break; - // we explicitly do not do the namespace_handler_proc check here + case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS: + { + if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { + break; + } - lck_mtx_lock(&nspace_handler_lock); - snapshot_timestamp = ((uint32_t *)data)[0]; - wakeup(&nspace_item_idx); - lck_mtx_unlock(&nspace_handler_lock); - printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp); + lck_mtx_lock(&nspace_handler_lock); + nspace_allow_virtual_devs = ((uint32_t *)data)[0]; + lck_mtx_unlock(&nspace_handler_lock); + printf("nspace-snapshot-handler will%s allow events on disk-images\n", + nspace_allow_virtual_devs ? "" : " NOT"); + error = 0; - } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS) { - if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { - goto FSCtl_Exit; } + break; - lck_mtx_lock(&nspace_handler_lock); - nspace_allow_virtual_devs = ((uint32_t *)data)[0]; - lck_mtx_unlock(&nspace_handler_lock); - printf("nspace-snapshot-handler will%s allow events on disk-images\n", - nspace_allow_virtual_devs ? "" : " NOT"); - error = 0; - - } else if (IOCBASECMD(cmd) == FSCTL_SET_FSTYPENAME_OVERRIDE) { - if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { - goto FSCtl_Exit; - } - if (vp->v_mount) { - mount_lock(vp->v_mount); - if (data[0] != 0) { - strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN); - vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE; - if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) { - vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY; - vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE; - } - } else { - if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) { - vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY; + case FSCTL_SET_FSTYPENAME_OVERRIDE: + { + if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { + break; + } + if (vp->v_mount) { + mount_lock(vp->v_mount); + if (data[0] != 0) { + strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN); + vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE; + if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) { + vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY; + vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE; + } + } else { + if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) { + vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY; + } + vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE; + vp->v_mount->fstypename_override[0] = '\0'; } - vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE; - vp->v_mount->fstypename_override[0] = '\0'; + mount_unlock(vp->v_mount); } - mount_unlock(vp->v_mount); } - } else { - /* Invoke the filesystem-specific code */ - error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx); - } - - + break; + + default: { + /* Invoke the filesystem-specific code */ + error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx); + } + + } /* end switch stmt */ + /* - * Copy any data to user, size was + * if no errors, copy any data to user. Size was * already set and checked above. */ if (error == 0 && (cmd & IOC_OUT) && size) error = copyout(data, udata, size); -FSCtl_Exit: - if (memp) kfree(memp, size); + if (memp) { + kfree(memp, size); + } return error; } @@ -9008,26 +9654,6 @@ done: } /* end of fsctl system call */ -/* - * An in-kernel sync for power management to call. - */ -__private_extern__ int -sync_internal(void) -{ - int error; - - struct sync_args data; - - int retval[2]; - - - error = sync(current_proc(), &data, &retval[0]); - - - return (error); -} /* end of sync_internal call */ - - /* * Retrieve the data of an extended attribute. */ @@ -9387,7 +10013,7 @@ listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval) if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) return (EINVAL); - nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER; + nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx); if ((error = namei(&nd))) { return (error); @@ -9453,48 +10079,34 @@ flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval) return (error); } -/* - * Obtain the full pathname of a file system object by id. - * - * This is a private SPI used by the File Manager. - */ -__private_extern__ -int -fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval) +static int fsgetpath_internal( + vfs_context_t ctx, int volfs_id, uint64_t objid, + vm_size_t bufsize, caddr_t buf, int *pathlen) { - vnode_t vp; + int error; struct mount *mp = NULL; - vfs_context_t ctx = vfs_context_current(); - fsid_t fsid; - char *realpath; - int bpflags; + vnode_t vp; int length; - int error; + int bpflags; - if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) { - return (error); - } - AUDIT_ARG(value32, fsid.val[0]); - AUDIT_ARG(value64, uap->objid); - /* Restrict output buffer size for now. */ - if (uap->bufsize > PAGE_SIZE) { + if (bufsize > PAGE_SIZE) { return (EINVAL); - } - MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK); - if (realpath == NULL) { + } + + if (buf == NULL) { return (ENOMEM); } - /* Find the target mountpoint. */ - if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) { + + if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) { error = ENOTSUP; /* unexpected failure */ - goto out; + return ENOTSUP; } + unionget: - /* Find the target vnode. */ - if (uap->objid == 2) { + if (objid == 2) { error = VFS_ROOT(mp, &vp, ctx); } else { - error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx); + error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx); } if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) { @@ -9508,28 +10120,33 @@ unionget: vfs_unbusy(tmp); if (vfs_busy(mp, LK_NOWAIT) == 0) goto unionget; - } else + } else { vfs_unbusy(mp); + } if (error) { - goto out; + return error; } + #if CONFIG_MACF error = mac_vnode_check_fsgetpath(ctx, vp); if (error) { vnode_put(vp); - goto out; + return error; } #endif + /* Obtain the absolute path to this vnode. */ bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0; bpflags |= BUILDPATH_CHECK_MOVED; - error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx); + error = build_path(vp, buf, bufsize, &length, bpflags, ctx); vnode_put(vp); + if (error) { goto out; } - AUDIT_ARG(text, realpath); + + AUDIT_ARG(text, buf); if (kdebug_enable) { long dbg_parms[NUMPARMS]; @@ -9537,16 +10154,62 @@ unionget: dbg_namelen = (int)sizeof(dbg_parms); - if (length < dbg_namelen) { - memcpy((char *)dbg_parms, realpath, length); + if (length < dbg_namelen) { + memcpy((char *)dbg_parms, buf, length); memset((char *)dbg_parms + length, 0, dbg_namelen - length); dbg_namelen = length; - } else - memcpy((char *)dbg_parms, realpath + (length - dbg_namelen), dbg_namelen); + } else { + memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen); + } kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE); } + + *pathlen = (user_ssize_t)length; /* may be superseded by error */ + +out: + return (error); +} + +/* + * Obtain the full pathname of a file system object by id. + * + * This is a private SPI used by the File Manager. + */ +__private_extern__ +int +fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval) +{ + vfs_context_t ctx = vfs_context_current(); + fsid_t fsid; + char *realpath; + int length; + int error; + + if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) { + return (error); + } + AUDIT_ARG(value32, fsid.val[0]); + AUDIT_ARG(value64, uap->objid); + /* Restrict output buffer size for now. */ + + if (uap->bufsize > PAGE_SIZE) { + return (EINVAL); + } + MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK); + if (realpath == NULL) { + return (ENOMEM); + } + + error = fsgetpath_internal( + ctx, fsid.val[0], uap->objid, + uap->bufsize, realpath, &length); + + if (error) { + goto out; + } + error = copyout((caddr_t)realpath, uap->buf, length); *retval = (user_ssize_t)length; /* may be superseded by error */ @@ -9589,7 +10252,7 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, sfs.f_fsid = sfsp->f_fsid; sfs.f_owner = sfsp->f_owner; if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) { - strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN); + strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN); } else { strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN); } @@ -9666,7 +10329,7 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, sfs.f_fsid = sfsp->f_fsid; sfs.f_owner = sfsp->f_owner; if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) { - strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN); + strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN); } else { strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN); } diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index 24e968dc0..9b431080f 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -391,7 +391,7 @@ again: ndp->ni_op = OP_LINK; #endif /* Inherit USEDVP, vnode_open() supported flags only */ - ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT); + ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1; ndp->ni_flag = NAMEI_COMPOUNDOPEN; #if NAMEDRSRCFORK @@ -414,6 +414,7 @@ continue_create_lookup: if (vp == NULL) { /* must have attributes for a new file */ if (vap == NULL) { + vnode_put(dvp); error = EINVAL; goto out; } @@ -500,7 +501,7 @@ continue_create_lookup: */ ndp->ni_cnd.cn_nameiop = LOOKUP; /* Inherit USEDVP, vnode_open() supported flags only */ - ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT); + ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT; #if NAMEDRSRCFORK /* open calls are allowed for resource forks. */ @@ -707,6 +708,18 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx) if (vnode_isspec(vp)) (void)vnode_rele_ext(vp, flags, 0); + /* + * On HFS, we flush when the last writer closes. We do this + * because resource fork vnodes hold a reference on data fork + * vnodes and that will prevent them from getting VNOP_INACTIVE + * which will delay when we flush cached data. In future, we + * might find it beneficial to do this for all file systems. + * Note that it's OK to access v_writecount without the lock + * in this context. + */ + if (vp->v_tag == VT_HFS && (flags & FWRITE) && vp->v_writecount == 1) + VNOP_FSYNC(vp, MNT_NOWAIT, ctx); + error = VNOP_CLOSE(vp, flags, ctx); #if CONFIG_FSE @@ -891,6 +904,35 @@ vn_rdwr_64( return (error); } +static inline void +vn_offset_lock(struct fileglob *fg) +{ + lck_mtx_lock_spin(&fg->fg_lock); + while (fg->fg_lflags & FG_OFF_LOCKED) { + fg->fg_lflags |= FG_OFF_LOCKWANT; + msleep(&fg->fg_lflags, &fg->fg_lock, PVFS | PSPIN, + "fg_offset_lock_wait", 0); + } + fg->fg_lflags |= FG_OFF_LOCKED; + lck_mtx_unlock(&fg->fg_lock); +} + +static inline void +vn_offset_unlock(struct fileglob *fg) +{ + int lock_wanted = 0; + + lck_mtx_lock_spin(&fg->fg_lock); + if (fg->fg_lflags & FG_OFF_LOCKWANT) { + lock_wanted = 1; + } + fg->fg_lflags &= ~(FG_OFF_LOCKED | FG_OFF_LOCKWANT); + lck_mtx_unlock(&fg->fg_lock); + if (lock_wanted) { + wakeup(&fg->fg_lflags); + } +} + /* * File table vnode read routine. */ @@ -901,6 +943,7 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) int error; int ioflag; off_t count; + int offset_locked = 0; vp = (struct vnode *)fp->f_fglob->fg_data; if ( (error = vnode_getwithref(vp)) ) { @@ -928,8 +971,13 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) if (fp->f_fglob->fg_flag & FNORDAHEAD) ioflag |= IO_RAOFF; - if ((flags & FOF_OFFSET) == 0) + if ((flags & FOF_OFFSET) == 0) { + if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) { + vn_offset_lock(fp->f_fglob); + offset_locked = 1; + } uio->uio_offset = fp->f_fglob->fg_offset; + } count = uio_resid(uio); if (vnode_isswap(vp)) { @@ -938,8 +986,13 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) } else { error = VNOP_READ(vp, uio, ioflag, ctx); } - if ((flags & FOF_OFFSET) == 0) + if ((flags & FOF_OFFSET) == 0) { fp->f_fglob->fg_offset += count - uio_resid(uio); + if (offset_locked) { + vn_offset_unlock(fp->f_fglob); + offset_locked = 0; + } + } (void)vnode_put(vp); return (error); @@ -958,6 +1011,7 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) int clippedsize = 0; int partialwrite=0; int residcount, oldcount; + int offset_locked = 0; proc_t p = vfs_context_proc(ctx); count = 0; @@ -1004,6 +1058,10 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) } if ((flags & FOF_OFFSET) == 0) { + if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) { + vn_offset_lock(fp->f_fglob); + offset_locked = 1; + } uio->uio_offset = fp->f_fglob->fg_offset; count = uio_resid(uio); } @@ -1026,8 +1084,8 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) } if (clippedsize >= residcount) { psignal(p, SIGXFSZ); - vnode_put(vp); - return (EFBIG); + error = EFBIG; + goto error_out; } partialwrite = 1; uio_setresid(uio, residcount-clippedsize); @@ -1038,8 +1096,8 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) if (p && (vp->v_type == VREG) && ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { psignal(p, SIGXFSZ); - vnode_put(vp); - return (EFBIG); + error = EFBIG; + goto error_out; } if (p && (vp->v_type == VREG) && ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { @@ -1063,6 +1121,10 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) fp->f_fglob->fg_offset = uio->uio_offset; else fp->f_fglob->fg_offset += count - uio_resid(uio); + if (offset_locked) { + vn_offset_unlock(fp->f_fglob); + offset_locked = 0; + } } /* @@ -1085,6 +1147,13 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) } (void)vnode_put(vp); return (error); + +error_out: + if (offset_locked) { + vn_offset_unlock(fp->f_fglob); + } + (void)vnode_put(vp); + return (error); } /* @@ -1292,7 +1361,6 @@ vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) off_t file_size; int error; struct vnode *ttyvp; - int funnel_state; struct session * sessp; if ( (error = vnode_getwithref(vp)) ) { @@ -1356,12 +1424,6 @@ vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx); if (error == 0 && com == TIOCSCTTY) { - error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE); - if (error != 0) { - panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!"); - } - - funnel_state = thread_funnel_set(kernel_flock, TRUE); sessp = proc_session(vfs_context_proc(ctx)); session_lock(sessp); @@ -1370,10 +1432,6 @@ vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) sessp->s_ttyvid = vnode_vid(vp); session_unlock(sessp); session_rele(sessp); - thread_funnel_set(kernel_flock, funnel_state); - - if (ttyvp) - vnode_rele(ttyvp); } } out: diff --git a/bsd/vfs/vfs_xattr.c b/bsd/vfs/vfs_xattr.c index dc2e09d32..a6fc32251 100644 --- a/bsd/vfs/vfs_xattr.c +++ b/bsd/vfs/vfs_xattr.c @@ -56,9 +56,6 @@ #include #endif -#if !CONFIG_APPLEDOUBLE -#define PANIC_ON_NOAPPLEDOUBLE 1 -#endif #if NAMEDSTREAMS @@ -3337,48 +3334,34 @@ unlock_xattrfile(vnode_t xvp, vfs_context_t context) #else /* CONFIG_APPLEDOUBLE */ -#undef panic -#define panic printf static int -default_getxattr(vnode_t vp, const char *name, +default_getxattr(__unused vnode_t vp, __unused const char *name, __unused uio_t uio, __unused size_t *size, __unused int options, __unused vfs_context_t context) { -#if PANIC_ON_NOAPPLEDOUBLE - panic("%s: no AppleDouble support, vp %p name %s", __func__, vp, name); -#endif return (ENOTSUP); } static int -default_setxattr(vnode_t vp, const char *name, +default_setxattr(__unused vnode_t vp, __unused const char *name, __unused uio_t uio, __unused int options, __unused vfs_context_t context) { -#if PANIC_ON_NOAPPLEDOUBLE - panic("%s: no AppleDouble support, vp %p name %s", __func__, vp, name); -#endif return (ENOTSUP); } static int -default_listxattr(vnode_t vp, +default_listxattr(__unused vnode_t vp, __unused uio_t uio, __unused size_t *size, __unused int options, __unused vfs_context_t context) { -#if PANIC_ON_NOAPPLEDOUBLE - panic("%s: no AppleDouble support, vp %p name %s", __func__, vp, "."); -#endif return (ENOTSUP); } static int -default_removexattr(vnode_t vp, const char *name, +default_removexattr(__unused vnode_t vp, __unused const char *name, __unused int options, __unused vfs_context_t context) { -#if PANIC_ON_NOAPPLEDOUBLE - panic("%s: no AppleDouble support, vp %p name %s", __func__, vp, name); -#endif return (ENOTSUP); } diff --git a/bsd/vfs/vnode_if.c b/bsd/vfs/vnode_if.c index 6dd63bfde..1fc9bd7df 100644 --- a/bsd/vfs/vnode_if.c +++ b/bsd/vfs/vnode_if.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -699,6 +699,23 @@ struct vnodeop_desc vnop_readdirattr_desc = { NULL }; +int vnop_getattrlistbulk_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_getattrlistbulk_args,a_vp), + VDESC_NO_OFFSET +}; +struct vnodeop_desc vnop_getattrlistbulk_desc = { + 0, + "vnop_getattrlistbulk", + 0, + vnop_getattrlistbulk_vp_offsets, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_getattrlistbulk_args, a_context), + NULL +}; + int vnop_readlink_vp_offsets[] = { VOPARG_OFFSETOF(struct vnop_readlink_args,a_vp), VDESC_NO_OFFSET @@ -1123,6 +1140,7 @@ struct vnodeop_desc *vfs_op_descs[] = { &vnop_symlink_desc, &vnop_readdir_desc, &vnop_readdirattr_desc, + &vnop_getattrlistbulk_desc, &vnop_readlink_desc, &vnop_inactive_desc, &vnop_reclaim_desc, diff --git a/bsd/vm/dp_backing_file.c b/bsd/vm/dp_backing_file.c index cc52be8fb..31b87e8a0 100644 --- a/bsd/vm/dp_backing_file.c +++ b/bsd/vm/dp_backing_file.c @@ -60,6 +60,7 @@ #include #include +#include #include #include #include @@ -77,7 +78,12 @@ #include #endif -void kprintf(const char *fmt, ...); +#include + +void macx_init(void); + +static lck_grp_t *macx_lock_group; +static lck_mtx_t *macx_lock; /* * temporary support for delayed instantiation @@ -99,6 +105,18 @@ struct bs_map bs_port_table[MAX_BACKING_STORE] = { /* ###################################################### */ +/* + * Routine: macx_init + * Function: + * Initialize locks so that only one caller can change + * state at a time. + */ +void +macx_init(void) +{ + macx_lock_group = lck_grp_alloc_init("macx", NULL); + macx_lock = lck_mtx_alloc_init(macx_lock_group, NULL); +} /* * Routine: macx_backing_store_recovery @@ -114,9 +132,7 @@ macx_backing_store_recovery( int pid = args->pid; int error; struct proc *p = current_proc(); - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); if ((error = suser(kauth_cred_get(), 0))) goto backing_store_recovery_return; @@ -130,7 +146,6 @@ macx_backing_store_recovery( task_backing_store_privileged(p->task); backing_store_recovery_return: - (void) thread_funnel_set(kernel_flock, FALSE); return(error); } @@ -147,16 +162,16 @@ macx_backing_store_suspend( { boolean_t suspend = args->suspend; int error; - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + lck_mtx_lock(macx_lock); if ((error = suser(kauth_cred_get(), 0))) goto backing_store_suspend_return; + /* Multiple writers protected by macx_lock */ vm_backing_store_disable(suspend); backing_store_suspend_return: - (void) thread_funnel_set(kernel_flock, FALSE); + lck_mtx_unlock(macx_lock); return(error); } @@ -173,6 +188,9 @@ extern boolean_t compressor_store_stop_compaction; * on by default when the system comes up and is turned * off when a shutdown/restart is requested. It is * re-enabled if the shutdown/restart is aborted for any reason. + * + * This routine assumes macx_lock has been locked by macx_triggers -> + * mach_macx_triggers -> macx_backing_store_compaction */ int @@ -180,6 +198,7 @@ macx_backing_store_compaction(int flags) { int error; + lck_mtx_assert(macx_lock, LCK_MTX_ASSERT_OWNED); if ((error = suser(kauth_cred_get(), 0))) return error; @@ -211,17 +230,19 @@ macx_triggers( { int error; + lck_mtx_lock(macx_lock); error = suser(kauth_cred_get(), 0); if (error) return error; - return mach_macx_triggers(args); + error = mach_macx_triggers(args); + + lck_mtx_unlock(macx_lock); + return error; } extern boolean_t dp_isssd; -extern void vm_swap_init(void); -extern int vm_compressor_mode; /* * In the compressed pager world, the swapfiles are created by the kernel. @@ -265,30 +286,27 @@ macx_swapon( mach_port_t backing_store; memory_object_default_t default_pager; int i; - boolean_t funnel_state; off_t file_size; vfs_context_t ctx = vfs_context_current(); struct proc *p = current_proc(); int dp_cluster_size; + AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON); + AUDIT_ARG(value32, args->priority); + + lck_mtx_lock(macx_lock); + if (COMPRESSED_PAGER_IS_ACTIVE) { if (macx_swapon_allowed == FALSE) { - return EINVAL; + error = EINVAL; + goto swapon_bailout; } else { - if ((vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP) || - (vm_compressor_mode == VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP)) { - vm_swap_init(); - } - macx_swapon_allowed = FALSE; - return 0; + error = 0; + goto swapon_bailout; } } - AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON); - AUDIT_ARG(value32, args->priority); - - funnel_state = thread_funnel_set(kernel_flock, TRUE); ndp = &nd; if ((error = suser(kauth_cred_get(), 0))) @@ -437,7 +455,7 @@ swapon_bailout: if (vp) { vnode_put(vp); } - (void) thread_funnel_set(kernel_flock, FALSE); + lck_mtx_unlock(macx_lock); AUDIT_MACH_SYSCALL_EXIT(error); if (error) @@ -466,13 +484,13 @@ macx_swapoff( struct proc *p = current_proc(); int i; int error; - boolean_t funnel_state; vfs_context_t ctx = vfs_context_current(); int orig_iopol_disk; AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF); - funnel_state = thread_funnel_set(kernel_flock, TRUE); + lck_mtx_lock(macx_lock); + backing_store = NULL; ndp = &nd; @@ -550,8 +568,7 @@ swapoff_bailout: /* get rid of macx_swapoff() namei() reference */ if (vp) vnode_put(vp); - - (void) thread_funnel_set(kernel_flock, FALSE); + lck_mtx_unlock(macx_lock); AUDIT_MACH_SYSCALL_EXIT(error); if (error) @@ -591,7 +608,7 @@ macx_swapinfo( *total_p = vm_swap_get_total_space(); *avail_p = vm_swap_get_free_space(); - *pagesize_p = PAGE_SIZE_64; + *pagesize_p = (vm_size_t)PAGE_SIZE_64; *encrypted_p = TRUE; } else { diff --git a/bsd/vm/vm_compressor_backing_file.c b/bsd/vm/vm_compressor_backing_file.c index fe74a47ea..580cdc4c3 100644 --- a/bsd/vm/vm_compressor_backing_file.c +++ b/bsd/vm/vm_compressor_backing_file.c @@ -53,7 +53,7 @@ vm_swapfile_open(const char *path, vnode_t *vp) int error = 0; vfs_context_t ctx = vfs_context_current(); - if ((error = vnode_open(path, (O_CREAT | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { + if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { printf("Failed to open swap file %d\n", error); *vp = NULL; return; @@ -124,35 +124,21 @@ vm_swapfile_preallocate(vnode_t vp, uint64_t *size) } #endif - /* - * This check exists because dynamic_pager creates the 1st swapfile, - * swapfile0, for us from user-space in a supported manner (with IO_NOZEROFILL etc). - * - * If dynamic_pager, in the future, discontinues creating that file, - * then we need to change this check to a panic / assert or return an error. - * That's because we can't be sure if the file has been created correctly. - */ + error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); - if ((error = vnode_size(vp, (off_t*) &file_size, ctx)) != 0) { - - printf("vnode_size (existing files) for swap files failed: %d\n", error); + if (error) { + printf("vnode_setsize for swap files failed: %d\n", error); goto done; - } else { - - if (file_size == 0) { + } - error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); - - if (error) { - printf("vnode_setsize for swap files failed: %d\n", error); - goto done; - } - } else { + error = vnode_size(vp, (off_t*) &file_size, ctx); - *size = file_size; - } - } + if (error) { + printf("vnode_size (new file) for swap file failed: %d\n", error); + } + assert(file_size == *size); + vnode_lock_spin(vp); SET(vp->v_flag, VSWAP); vnode_unlock(vp); @@ -173,8 +159,12 @@ vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flag upl_size_t upl_size = 0; upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE; - upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; +#if ENCRYPTED_SWAP + upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; +#else + upl_control_flags = UPL_IOSYNC; +#endif if ((flags & SWAP_READ) == FALSE) { upl_create_flags |= UPL_COPYOUT_FROM; } @@ -243,7 +233,12 @@ vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flag #define MAX_BATCH_TO_TRIM 256 -u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl) +#define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */ + /* the DKIOUNMAP command through w/o acting on it */ + /* this is used by the compressed swap system to reclaim empty space */ + + +u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only) { int error = 0; int trim_index = 0; @@ -251,6 +246,7 @@ u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl) struct vnode *devvp; dk_extent_t *extents; dk_unmap_t unmap; + _dk_cs_unmap_t cs_unmap; if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) return (ENOTSUP); @@ -266,8 +262,16 @@ u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl) extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); - memset (&unmap, 0, sizeof(dk_unmap_t)); - unmap.extents = extents; + if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { + memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t)); + cs_unmap.extents = extents; + + if (route_only == TRUE) + cs_unmap.options = ROUTE_ONLY; + } else { + memset (&unmap, 0, sizeof(dk_unmap_t)); + unmap.extents = extents; + } while (tl) { daddr64_t io_blockno; /* Block number corresponding to the start of the extent */ @@ -306,9 +310,13 @@ u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl) if (trim_index == MAX_BATCH_TO_TRIM) { - unmap.extentsCount = trim_index; - error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); - + if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { + cs_unmap.extentsCount = trim_index; + error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); + } else { + unmap.extentsCount = trim_index; + error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); + } if (error) { goto trim_exit; } @@ -321,9 +329,13 @@ u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl) tl = tl->tl_next; } if (trim_index) { - - unmap.extentsCount = trim_index; - error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); + if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { + cs_unmap.extentsCount = trim_index; + error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); + } else { + unmap.extentsCount = trim_index; + error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); + } } trim_exit: kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index 1aa660399..06b5d4e1b 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -40,10 +40,11 @@ #include +#include + #include #include #include -#include #include #include #include @@ -78,6 +79,8 @@ #include #include #include +#include +#include #include #include @@ -101,9 +104,40 @@ int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t); int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); -SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, ""); +#if VM_SCAN_FOR_SHADOW_CHAIN +static int vm_shadow_max_enabled = 0; /* Disabled by default */ +extern int proc_shadow_max(void); +static int +vm_shadow_max SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2, oidp) + int value = 0; + + if (vm_shadow_max_enabled) + value = proc_shadow_max(); + + return SYSCTL_OUT(req, &value, sizeof(value)); +} +SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, + 0, 0, &vm_shadow_max, "I", ""); + +SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, ""); + +#endif /* VM_SCAN_FOR_SHADOW_CHAIN */ +SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); +__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( + mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid); /* * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c */ @@ -141,6 +175,17 @@ SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOC int vm_shared_region_unnest_log_interval = 10; int shared_region_unnest_log_count_threshold = 5; +/* + * Shared cache path enforcement. + */ + +static int scdir_enforce = 1; +static char scdir_path[] = "/var/db/dyld/"; + +#ifndef SECURE_KERNEL +SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, ""); +#endif + /* These log rate throttling state variables aren't thread safe, but * are sufficient unto the task. */ @@ -544,6 +589,19 @@ out: return allowed; } +/* + * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__ + * + * Description: Waits for the user space daemon to respond to the request + * we made. Function declared non inline to be visible in + * stackshots and spindumps as well as debugging. + */ +__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( + mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid) +{ + return check_task_access(task_access_port, calling_pid, calling_gid, target_pid); +} + /* * Routine: task_for_pid * Purpose: @@ -618,7 +676,7 @@ task_for_pid( } /* Call up to the task access server */ - error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); + error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); if (error != MACH_MSG_SUCCESS) { if (error == MACH_RCV_INTERRUPTED) @@ -793,7 +851,7 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) } /* Call up to the task access server */ - error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); + error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); if (error != MACH_MSG_SUCCESS) { if (error == MACH_RCV_INTERRUPTED) @@ -877,7 +935,7 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) } /* Call up to the task access server */ - error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); + error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); if (error != MACH_MSG_SUCCESS) { if (error == MACH_RCV_INTERRUPTED) @@ -1004,7 +1062,8 @@ shared_region_check_np( SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n", - current_thread(), p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, (uint64_t)uap->start_address)); /* retrieve the current tasks's shared region */ @@ -1025,7 +1084,8 @@ shared_region_check_np( ("shared_region: %p [%d(%s)] " "check_np(0x%llx) " "copyout(0x%llx) error %d\n", - current_thread(), p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, (uint64_t)uap->start_address, (uint64_t)start_address, error)); } @@ -1038,7 +1098,8 @@ shared_region_check_np( SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n", - current_thread(), p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, (uint64_t)uap->start_address, (uint64_t)start_address, error)); return error; @@ -1064,7 +1125,8 @@ shared_region_copyin_mappings( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(): " "copyin(0x%llx, %d) failed (error=%d)\n", - current_thread(), p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, (uint64_t)user_mappings, mappings_count, error)); } return error; @@ -1092,7 +1154,7 @@ _shared_region_map_and_slide( int error; kern_return_t kr; struct fileproc *fp; - struct vnode *vp, *root_vp; + struct vnode *vp, *root_vp, *scdir_vp; struct vnode_attr va; off_t fs; memory_object_size_t file_size; @@ -1104,11 +1166,13 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] -> map\n", - current_thread(), p->p_pid, p->p_comm)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); shared_region = NULL; fp = NULL; vp = NULL; + scdir_vp = NULL; /* get file structure from file descriptor */ error = fp_lookup(p, fd, &fp, 0); @@ -1116,7 +1180,8 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d lookup failed (error=%d)\n", - current_thread(), p->p_pid, p->p_comm, fd, error)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, fd, error)); goto done; } @@ -1125,7 +1190,8 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not a vnode (type=%d)\n", - current_thread(), p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, fd, FILEGLOB_DTYPE(fp->f_fglob))); error = EINVAL; goto done; @@ -1136,7 +1202,8 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not readable\n", - current_thread(), p->p_pid, p->p_comm, fd)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, fd)); error = EPERM; goto done; } @@ -1147,7 +1214,8 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d getwithref failed (error=%d)\n", - current_thread(), p->p_pid, p->p_comm, fd, error)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, fd, error)); goto done; } vp = (struct vnode *) fp->f_fglob->fg_data; @@ -1157,8 +1225,10 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "not a file (type=%d)\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name, vp->v_type)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), + vp->v_name, vp->v_type)); error = EINVAL; goto done; } @@ -1197,8 +1267,9 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "not on process's root volume\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); error = EPERM; goto done; } @@ -1211,28 +1282,62 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vnode_getattr(%p) failed (error=%d)\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name, vp, error)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, + (void *)VM_KERNEL_ADDRPERM(vp), error)); goto done; } if (va.va_uid != 0) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "owned by uid=%d instead of 0\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name, va.va_uid)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), + vp->v_name, va.va_uid)); error = EPERM; goto done; } + if (scdir_enforce) { + /* get vnode for scdir_path */ + error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current()); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "vnode_lookup(%s) failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, + scdir_path, error)); + goto done; + } + + /* ensure parent is scdir_vp */ + if (vnode_parent(vp) != scdir_vp) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "shared cache file not in %s\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), + vp->v_name, scdir_path)); + error = EPERM; + goto done; + } + } + /* get vnode size */ error = vnode_size(vp, &fs, vfs_context_current()); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vnode_size(%p) failed (error=%d)\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name, vp, error)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, + (void *)VM_KERNEL_ADDRPERM(vp), error)); goto done; } file_size = fs; @@ -1243,8 +1348,9 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no memory object\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); error = EINVAL; goto done; } @@ -1256,8 +1362,9 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no shared region\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); goto done; } @@ -1275,8 +1382,9 @@ _shared_region_map_and_slide( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vm_shared_region_map_file() failed kr=0x%x\n", - current_thread(), p->p_pid, p->p_comm, - vp, vp->v_name, kr)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr)); switch (kr) { case KERN_INVALID_ADDRESS: error = EFAULT; @@ -1331,6 +1439,10 @@ done: fp_drop(p, fd, fp, 0); fp = NULL; } + if (scdir_vp != NULL) { + (void)vnode_put(scdir_vp); + scdir_vp = NULL; + } if (shared_region != NULL) { vm_shared_region_deallocate(shared_region); @@ -1338,7 +1450,8 @@ done: SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] <- map\n", - current_thread(), p->p_pid, p->p_comm)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); return error; } @@ -1379,7 +1492,8 @@ shared_region_map_and_slide_np( SHARED_REGION_TRACE_INFO( ("shared_region: %p [%d(%s)] map(): " "no mappings\n", - current_thread(), p->p_pid, p->p_comm)); + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); kr = 0; /* no mappings: we're done ! */ goto done; } else if (mappings_count <= SFM_MAX_STACK) { @@ -1388,7 +1502,8 @@ shared_region_map_and_slide_np( SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(): " "too many mappings (%d)\n", - current_thread(), p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, mappings_count)); kr = KERN_FAILURE; goto done; @@ -1412,6 +1527,9 @@ done: /* sysctl overflow room */ +SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, + (int *) &page_size, 0, "vm page size"); + /* vm_page_free_target is provided as a makeshift solution for applications that want to allocate buffer space, possibly purgeable memory, but not cause inactive pages to be reclaimed. It allows the app to calculate how much memory is free outside the free target. */ @@ -1514,6 +1632,11 @@ SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLF SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)"); SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)"); +/* counts of pages prefaulted when entering a memory object */ +extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout; +SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, ""); +SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, ""); + #include #include diff --git a/bsd/vm/vnode_pager.c b/bsd/vm/vnode_pager.c index b90ca5366..45e338a5e 100644 --- a/bsd/vm/vnode_pager.c +++ b/bsd/vm/vnode_pager.c @@ -102,6 +102,33 @@ vnode_pager_isSSD(vnode_t vp) return (FALSE); } +#if CONFIG_IOSCHED +void +vnode_pager_issue_reprioritize_io(struct vnode *devvp, uint64_t blkno, uint32_t len, int priority) +{ + u_int32_t blocksize = 0; + dk_extent_t extent; + dk_set_tier_t set_tier; + int error = 0; + + error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blocksize, 0, vfs_context_kernel()); + if (error) + return; + + memset(&extent, 0, sizeof(dk_extent_t)); + memset(&set_tier, 0, sizeof(dk_set_tier_t)); + + extent.offset = blkno * (u_int64_t) blocksize; + extent.length = len; + + set_tier.extents = &extent; + set_tier.extentsCount = 1; + set_tier.tier = priority; + + error = VNOP_IOCTL(devvp, DKIOCSETTIER, (caddr_t)&set_tier, 0, vfs_context_kernel()); + return; +} +#endif uint32_t vnode_pager_isinuse(struct vnode *vp) @@ -349,6 +376,30 @@ vnode_pageout(struct vnode *vp, } else pl = ubc_upl_pageinfo(upl); + /* + * Ignore any non-present pages at the end of the + * UPL so that we aren't looking at a upl that + * may already have been freed by the preceeding + * aborts/completions. + */ + base_index = upl_offset / PAGE_SIZE; + + for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) { + if (upl_page_present(pl, --pg_index)) + break; + if (pg_index == base_index) { + /* + * no pages were returned, so release + * our hold on the upl and leave + */ + if ( !(flags & UPL_NOCOMMIT)) + ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY); + + goto out; + } + } + isize = ((pg_index + 1) - base_index) * PAGE_SIZE; + /* * we come here for pageouts to 'real' files and * for msyncs... the upl may not contain any @@ -356,6 +407,7 @@ vnode_pageout(struct vnode *vp, * through it and find the 'runs' of dirty pages * to call VNOP_PAGEOUT on... */ + if (ubc_getsize(vp) == 0) { /* * if the file has been effectively deleted, then @@ -388,29 +440,6 @@ vnode_pageout(struct vnode *vp, } goto out; } - /* - * Ignore any non-present pages at the end of the - * UPL so that we aren't looking at a upl that - * may already have been freed by the preceeding - * aborts/completions. - */ - base_index = upl_offset / PAGE_SIZE; - - for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) { - if (upl_page_present(pl, --pg_index)) - break; - if (pg_index == base_index) { - /* - * no pages were returned, so release - * our hold on the upl and leave - */ - if ( !(flags & UPL_NOCOMMIT)) - ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY); - - goto out; - } - } - isize = ((pg_index + 1) - base_index) * PAGE_SIZE; offset = upl_offset; pg_index = base_index; @@ -547,7 +576,7 @@ vnode_pagein( if (upl == (upl_t)NULL) { flags &= ~UPL_NOCOMMIT; - if (size > (MAX_UPL_SIZE * PAGE_SIZE)) { + if (size > MAX_UPL_SIZE_BYTES) { result = PAGER_ERROR; error = PAGER_ERROR; goto out; diff --git a/config/BSDKernel.exports b/config/BSDKernel.exports index a7abbc7c9..86f815241 100644 --- a/config/BSDKernel.exports +++ b/config/BSDKernel.exports @@ -488,13 +488,11 @@ _proc_selfppid _proc_selfpgrpid _proc_signal _proc_suser -_proc_tbe _proto_inject _proto_input _proto_register_plumber _proto_unregister_plumber _random -_read_random _selrecord _selthreadclear _seltrue @@ -566,21 +564,9 @@ _sysctl_handle_long _sysctl_handle_opaque _sysctl_handle_quad _sysctl_handle_string -_sysctl_int -_sysctl_mib_init -_sysctl_quad -_sysctl_rdint -_sysctl_rdquad -_sysctl_rdstring -_sysctl_rdstruct -_sysctl_register_all _sysctl_register_fixed _sysctl_register_oid -_sysctl_register_set -_sysctl_string -_sysctl_struct _sysctl_unregister_oid -_sysctl_unregister_set _thread_issignal _timevaladd _timevalfix @@ -639,6 +625,7 @@ _vcount _vflush _vfs_64bitready _vfs_addname +_vfs_attr_pack _vfs_authcache_ttl _vfs_authopaque _vfs_authopaqueaccess @@ -689,6 +676,7 @@ _vfs_setflags _vfs_setfsprivate _vfs_setioattr _vfs_setlocklocal +_vfs_setup_vattr_from_attrlist _vfs_setmaxsymlen _vfs_statfs _vfs_sysctl @@ -797,6 +785,7 @@ _vnop_pathconf_desc _vnop_read_desc _vnop_readdir_desc _vnop_readdirattr_desc +_vnop_getattrlistbulk_desc _vnop_readlink_desc _vnop_reclaim_desc _vnop_remove_desc diff --git a/config/DtraceIgnored.symbols b/config/DtraceIgnored.symbols deleted file mode 100644 index 8b1378917..000000000 --- a/config/DtraceIgnored.symbols +++ /dev/null @@ -1 +0,0 @@ - diff --git a/config/IOKit.exports b/config/IOKit.exports index aaf27f1a9..0a112a032 100644 --- a/config/IOKit.exports +++ b/config/IOKit.exports @@ -298,6 +298,8 @@ __ZN12IOUserClient17setAsyncReferenceEPjP8ipc_portPvS3_ __ZN12IOUserClient18clientHasPrivilegeEPvPKc __ZN12IOUserClient20exportObjectToClientEP4taskP8OSObjectPS3_ __ZN12IOUserClient21destroyUserReferencesEP8OSObject +__ZN12IOUserClient21copyClientEntitlementEP4taskPKc +__ZN12IOUserClient22clientHasAuthorizationEP4taskP9IOService __ZN12IOUserClient23releaseAsyncReference64EPy __ZN12IOUserClient23releaseNotificationPortEP8ipc_port __ZN12IOUserClient26removeMappingForDescriptorEP18IOMemoryDescriptor @@ -1131,6 +1133,8 @@ __ZN9IOService26deRegisterInterestedDriverEPS_ __ZN9IOService27maxCapabilityForDomainStateEm __ZN9IOService31initialPowerStateForDomainStateEm __ZN9IOService4freeEv +__ZN9IOService4initEP12OSDictionary +__ZN9IOService4initEP15IORegistryEntryPK15IORegistryPlane __ZN9IOService4stopEPS_ __ZN9IOService5startEPS_ __ZN9IOService6PMinitEv @@ -1494,6 +1498,7 @@ _gIONameMatchedKey _gIOParentMatchKey _gIOPathMatchKey _gIOPlatformActiveActionKey +_gIOPlatformHaltRestartActionKey _gIOPlatformQuiesceActionKey _gIOPlatformSleepActionKey _gIOPlatformWakeActionKey @@ -1515,3 +1520,163 @@ _gPlatformInterruptControllerName _registerPrioritySleepWakeInterest _registerSleepWakeInterest _vetoSleepWakeNotification + + +__ZN10IOReporter10addChannelEyPKc +__ZN10IOReporter10gMetaClassE +__ZN10IOReporter10legendWithEP7OSArrayS1_19IOReportChannelTypey +__ZN10IOReporter10superClassE +__ZN10IOReporter12createLegendEv +__ZN10IOReporter12lockReporterEv +__ZN10IOReporter12updateReportEP19IOReportChannelListjPvS2_ +__ZN10IOReporter14copyChannelIDsEv +__ZN10IOReporter14unlockReporterEv +__ZN10IOReporter15configureReportEP19IOReportChannelListjPvS2_ +__ZN10IOReporter15getChannelIndexEyPi +__ZN10IOReporter16getElementValuesEi +__ZN10IOReporter16setElementValuesEiP21IOReportElementValuesy +__ZN10IOReporter16updateAllReportsEP5OSSetP19IOReportChannelListjPvS4_ +__ZN10IOReporter17copyElementValuesEiP21IOReportElementValues +__ZN10IOReporter17getChannelIndicesEyPiS0_ +__ZN10IOReporter17handleSwapCleanupEi +__ZN10IOReporter17handleSwapPrepareEi +__ZN10IOReporter18handleCreateLegendEv +__ZN10IOReporter18handleUpdateReportEP19IOReportChannelListjPvS2_ +__ZN10IOReporter18lockReporterConfigEv +__ZN10IOReporter19configureAllReportsEP5OSSetP19IOReportChannelListjPvS4_ +__ZN10IOReporter19updateChannelValuesEi +__ZN10IOReporter19updateReportChannelEiPiP24IOBufferMemoryDescriptor +__ZN10IOReporter20getFirstElementIndexEyPi +__ZN10IOReporter20handleAddChannelSwapEyPK8OSSymbol +__ZN10IOReporter20unlockReporterConfigEv +__ZN10IOReporter21handleConfigureReportEP19IOReportChannelListjPvS2_ +__ZN10IOReporter4freeEv +__ZN10IOReporter4initEP9IOService19IOReportChannelTypey +__ZN10IOReporter9MetaClassC1Ev +__ZN10IOReporter9MetaClassC2Ev +__ZN10IOReporter9metaClassE +__ZN10IOReporterC1EPK11OSMetaClass +__ZN10IOReporterC1Ev +__ZN10IOReporterC2EPK11OSMetaClass +__ZN10IOReporterC2Ev +__ZN10IOReporterD0Ev +__ZN10IOReporterD1Ev +__ZN10IOReporterD2Ev +__ZN14IOPMrootDomain12updateReportEP19IOReportChannelListjPvS2_ +__ZN14IOPMrootDomain15configureReportEP19IOReportChannelListjPvS2_ +__ZN14IOReportLegend10gMetaClassE +__ZN14IOReportLegend10superClassE +__ZN14IOReportLegend14addLegendEntryEP12OSDictionaryPKcS3_ +__ZN14IOReportLegend14organizeLegendEP12OSDictionaryPK8OSSymbolS4_ +__ZN14IOReportLegend17addReporterLegendEP10IOReporterPKcS3_ +__ZN14IOReportLegend17addReporterLegendEP9IOServiceP10IOReporterPKcS5_ +__ZN14IOReportLegend4freeEv +__ZN14IOReportLegend4withEP7OSArray +__ZN14IOReportLegend8initWithEP7OSArray +__ZN14IOReportLegend9MetaClassC1Ev +__ZN14IOReportLegend9MetaClassC2Ev +__ZN14IOReportLegend9getLegendEv +__ZN14IOReportLegend9metaClassE +__ZN14IOReportLegendC1EPK11OSMetaClass +__ZN14IOReportLegendC1Ev +__ZN14IOReportLegendC2EPK11OSMetaClass +__ZN14IOReportLegendC2Ev +__ZN14IOReportLegendD0Ev +__ZN14IOReportLegendD1Ev +__ZN14IOReportLegendD2Ev +__ZN15IOStateReporter10gMetaClassE +__ZN15IOStateReporter10setStateIDEyiy +__ZN15IOStateReporter10superClassE +__ZN15IOStateReporter14_getStateValueEyyNS_13valueSelectorE +__ZN15IOStateReporter15setChannelStateEyy +__ZN15IOStateReporter15setChannelStateEyyyy +__ZN15IOStateReporter16_getStateIndicesEyyPiS0_ +__ZN15IOStateReporter16handleSetStateIDEyiy +__ZN15IOStateReporter17handleSwapCleanupEi +__ZN15IOStateReporter17handleSwapPrepareEi +__ZN15IOStateReporter17setStateByIndicesEii +__ZN15IOStateReporter17setStateByIndicesEiiyy +__ZN15IOStateReporter19updateChannelValuesEi +__ZN15IOStateReporter20handleAddChannelSwapEyPK8OSSymbol +__ZN15IOStateReporter20overrideChannelStateEyyyyy +__ZN15IOStateReporter21getStateInTransitionsEyy +__ZN15IOStateReporter21getStateResidencyTimeEyy +__ZN15IOStateReporter21incrementChannelStateEyyyyy +__ZN15IOStateReporter23handleSetStateByIndicesEiiyy +__ZN15IOStateReporter26getStateLastTransitionTimeEyy +__ZN15IOStateReporter29getStateLastChannelUpdateTimeEy +__ZN15IOStateReporter35handleOverrideChannelStateByIndicesEiiyyy +__ZN15IOStateReporter36handleIncrementChannelStateByIndicesEiiyyy +__ZN15IOStateReporter4freeEv +__ZN15IOStateReporter4withEP9IOServicetiy +__ZN15IOStateReporter8initWithEP9IOServicetsy +__ZN15IOStateReporter8setStateEy +__ZN15IOStateReporter8setStateEyyy +__ZN15IOStateReporter9MetaClassC1Ev +__ZN15IOStateReporter9MetaClassC2Ev +__ZN15IOStateReporter9metaClassE +__ZN15IOStateReporterC1EPK11OSMetaClass +__ZN15IOStateReporterC1Ev +__ZN15IOStateReporterC2EPK11OSMetaClass +__ZN15IOStateReporterC2Ev +__ZN15IOStateReporterD0Ev +__ZN15IOStateReporterD1Ev +__ZN15IOStateReporterD2Ev +__ZN16IOSimpleReporter10gMetaClassE +__ZN16IOSimpleReporter10superClassE +__ZN16IOSimpleReporter14incrementValueEyx +__ZN16IOSimpleReporter4withEP9IOServicety +__ZN16IOSimpleReporter8getValueEy +__ZN16IOSimpleReporter8initWithEP9IOServicety +__ZN16IOSimpleReporter8setValueEyx +__ZN16IOSimpleReporter9MetaClassC1Ev +__ZN16IOSimpleReporter9MetaClassC2Ev +__ZN16IOSimpleReporter9metaClassE +__ZN16IOSimpleReporterC1EPK11OSMetaClass +__ZN16IOSimpleReporterC1Ev +__ZN16IOSimpleReporterC2EPK11OSMetaClass +__ZN16IOSimpleReporterC2Ev +__ZN16IOSimpleReporterD0Ev +__ZN16IOSimpleReporterD1Ev +__ZN16IOSimpleReporterD2Ev +__ZN19IOHistogramReporter10gMetaClassE +__ZN19IOHistogramReporter10superClassE +__ZN19IOHistogramReporter10tallyValueEx +__ZN19IOHistogramReporter18handleCreateLegendEv +__ZN19IOHistogramReporter4freeEv +__ZN19IOHistogramReporter4withEP9IOServicetyPKcyiP24IOHistogramSegmentConfig +__ZN19IOHistogramReporter8initWithEP9IOServicetyPK8OSSymbolyiP24IOHistogramSegmentConfig +__ZN19IOHistogramReporter9MetaClassC1Ev +__ZN19IOHistogramReporter9MetaClassC2Ev +__ZN19IOHistogramReporter9metaClassE +__ZN19IOHistogramReporterC1EPK11OSMetaClass +__ZN19IOHistogramReporterC1Ev +__ZN19IOHistogramReporterC2EPK11OSMetaClass +__ZN19IOHistogramReporterC2Ev +__ZN19IOHistogramReporterD0Ev +__ZN19IOHistogramReporterD1Ev +__ZN19IOHistogramReporterD2Ev +__ZN9IOService23updatePowerStatesReportEjPvS0_ +__ZN9IOService23updateSimplePowerReportEjPvS0_ +__ZN9IOService26configurePowerStatesReportEjPv +__ZN9IOService26configureSimplePowerReportEjPv +__ZNK10IOReporter12getMetaClassEv +__ZNK10IOReporter9MetaClass5allocEv +__ZNK14IOReportLegend12getMetaClassEv +__ZNK14IOReportLegend9MetaClass5allocEv +__ZNK15IOStateReporter12getMetaClassEv +__ZNK15IOStateReporter9MetaClass5allocEv +__ZNK16IOSimpleReporter12getMetaClassEv +__ZNK16IOSimpleReporter9MetaClass5allocEv +__ZNK19IOHistogramReporter12getMetaClassEv +__ZNK19IOHistogramReporter9MetaClass5allocEv +__ZTV10IOReporter +__ZTV14IOReportLegend +__ZTV15IOStateReporter +__ZTV16IOSimpleReporter +__ZTV19IOHistogramReporter +__ZTVN10IOReporter9MetaClassE +__ZTVN14IOReportLegend9MetaClassE +__ZTVN15IOStateReporter9MetaClassE +__ZTVN16IOSimpleReporter9MetaClassE +__ZTVN19IOHistogramReporter9MetaClassE diff --git a/config/IOKit.x86_64.exports b/config/IOKit.x86_64.exports index 217b17b01..f053710c8 100644 --- a/config/IOKit.x86_64.exports +++ b/config/IOKit.x86_64.exports @@ -1,3 +1,6 @@ +_IOLockSleep_darwin14 +_IOLockSleepDeadline_darwin14 +_IOLockWakeup_darwin14 _IOOFPathMatching _IOSpinUnlock _IOTrySpinLock diff --git a/config/Libkern.exports b/config/Libkern.exports index be87ea7c5..04d952f8f 100644 --- a/config/Libkern.exports +++ b/config/Libkern.exports @@ -703,7 +703,7 @@ _strtol _strtoq _strtoul _strtouq -_sysctlbyname +_sysctlbyname:_kernel_sysctlbyname _uuid_clear _uuid_compare _uuid_copy diff --git a/config/MACFramework.exports b/config/MACFramework.exports index 73dda1064..aa8401775 100644 --- a/config/MACFramework.exports +++ b/config/MACFramework.exports @@ -21,6 +21,6 @@ _sbuf_new _sbuf_printf _sbuf_putc _strsep - +_sysctl__security_mac_children _VNOP_SETXATTR _VNOP_GETXATTR diff --git a/config/MACFramework.x86_64.exports b/config/MACFramework.x86_64.exports index aa74fd56a..b296564fb 100644 --- a/config/MACFramework.x86_64.exports +++ b/config/MACFramework.x86_64.exports @@ -8,4 +8,3 @@ _mac_mbuf_free _mac_schedule_userret _mac_unwire _mac_wire -_sysctl__security_mac_children diff --git a/bsd/conf/MASTER b/config/MASTER similarity index 61% rename from bsd/conf/MASTER rename to config/MASTER index da9dcbea2..7e0b5f60c 100644 --- a/bsd/conf/MASTER +++ b/config/MASTER @@ -1,6 +1,8 @@ # # Mach Operating System # Copyright (c) 1986 Carnegie-Mellon University +# Copyright 2001-2014 Apple Inc. +# # All rights reserved. The CMU software License Agreement # specifies the terms and conditions for use and redistribution. # @@ -39,21 +41,6 @@ # ####################################################################### # -# STANDARD CONFIGURATION OPTIONS (select any combination) -# -# debug = extra kernel level debugging support -# mach = Mach support -# -# EXPERIMENTAL CONFIGURATION OPTIONS (select any combination, carefully) -# -# host = host resource control support -# -# MULTI-PROCESSOR CONFIGURATION (select at most one) -# -# multi16 = enable 16 multi-processors -# multi32 = enable 32 multi-processors -# multi48 = enable 48 multi-processors -# # SYSTEM SIZE CONFIGURATION (select exactly one) # # xlarge = extra large scale system configuration @@ -64,61 +51,31 @@ # bsmall = special extra small scale system configuration # ####################################################################### -# -# Standard Mach Research Configurations: -# -------- ---- -------- --------------- -# -# These are the default configurations that can be used by most sites. -# They are used internally by the Mach project. -# -# MACH = [mach multi16 medium debug] -# -####################################################################### # -ident NeXT - -# obsolete timezone spec -options TIMEZONE=0, PST=0 -options QUOTA # # options INET # # -options NEW_VM_CODE # # -options OLD_VM_CODE # # options HW_AST # Hardware ast support # options HW_FOOTPRINT # Cache footprint support # -options CONFIG_LCTX # Login Context options MACH # Standard Mach features # -options MACH_ASSERT # Compile in assertions # options MACH_COMPAT # Vendor syscall compatibility # -options MACH_COUNTERS # counters # -options MACH_DEBUG # IPC debugging interface # options MACH_FASTLINK # Fast symbolic links -options MACH_FIXPRI # Fixed priority threads # options MACH_HOST # Mach host (resource alloc.) # options MACH_IPC_COMPAT # Enable old IPC interface # -options MACH_IPC_DEBUG # Enable IPC debugging calls # options MACH_IPC_TEST # Testing code/printfs # -options MACH_LDEBUG # Sanity-check simple locking # -options CONFIG_ZLEAKS # Live zone leak debug sysctls # options MACH_NP # Mach IPC support # options MACH_NBC # No buffer cache # options MACH_NET # Fast network access # options MACH_XP # external pager support # -options SIMPLE_CLOCK # don't assume fixed tick # -options XPR_DEBUG # kernel tracing # -options KDEBUG # kernel tracing # -options IST_KDEBUG # limited kernel tracing # -options NO_KDEBUG # no kernel tracing # -options DDM_DEBUG # driverkit-style tracing # -options MACH_OLD_VM_COPY # Old vm_copy technology # options NO_DIRECT_RPC # for untyped mig servers # options LOOP # loopback support # -options MROUTING # multicast routing # options VLAN # # options BOND # # +options AH_ALL_CRYPTO # AH all crypto algs # +options IPCOMP_ZLIB # IP compression using zlib # options PF # Packet Filter # options PF_ALTQ # PF ALTQ (Alternate Queueing) # +options PF_ECN # PF use ECN marking # options PFLOG # PF log interface # options PKTSCHED_CBQ # CBQ packet scheduler # options PKTSCHED_HFSC # H-FSC packet scheduler # @@ -146,9 +103,10 @@ options SYSV_SEM # SVID semaphores # options SYSV_MSG # SVID messages # options SYSV_SHM # SVID shared mem # options PSYNCH # pthread synch # -options DEVELOPMENT # dev kernel # options FLOW_DIVERT # - +options NECP # +options CONTENT_FILTER # # +options PACKET_MANGLER # # # secure_kernel - secure kernel from user programs options SECURE_KERNEL # @@ -159,8 +117,8 @@ options OLD_SEMWAIT_SIGNAL # old semwait_signal handler # options SOCKETS # socket support # options DIAGNOSTIC # diagnostics # -options CONFIG_DTRACE # dtrace support # options GPROF # build profiling # +options PROFILE # kernel profiling # options SENDFILE # sendfile # options NETWORKING # networking layer # options CONFIG_FSE # file system events # @@ -174,7 +132,6 @@ options CONFIG_WORKQUEUE # # # 4.4 filesystems # -options FFS # Fast Filesystem Support # options HFS # HFS/HFS+ support # options MOCKFS # Boot from an executable # options FIFO # fifo support # @@ -186,12 +143,12 @@ options CONFIG_HFS_STD # hfs standard support # options CONFIG_HFS_TRIM # hfs trims unused blocks # options CONFIG_HFS_MOUNT_UNMAP #hfs trims blocks at mount # options CONFIG_HFS_DIRLINK #allow directory hardlink creation # +options CONFIG_DEV_KMEM # /dev/kmem device for reading KVA # # # file system features # options QUOTA # file system quotas # -options REV_ENDIAN_FS # Reverse Endian FS # options NAMEDSTREAMS # named stream vnop support # options CONFIG_APPLEDOUBLE # apple double support # options CONFIG_VOLFS # volfs path support (legacy) # @@ -199,6 +156,7 @@ options CONFIG_IMGSRC_ACCESS # source of imageboot dmg # options CONFIG_TRIGGERS # trigger vnodes # options CONFIG_EXT_RESOLVER # e.g. memberd # options CONFIG_SEARCHFS # searchfs syscall support # +options CONFIG_SECLUDED_RENAME # secluded rename syscall # # # NFS support @@ -209,12 +167,6 @@ options NFSSERVER # Be an NFS server # # # Machine Independent Apple Features # -options KERNSERV # kernel_server intarface # -options MALLOCDEBUG # kernel malloc debugging # -options DRIVERKIT # driverkit support # -options KERNOBJC # Objective-C support # -options OBJCTEST # Objc internal test # -options KERNEL_STACK # MI kernel stack support # profile # build a profiling kernel # # @@ -232,32 +184,14 @@ pseudo-device gif 1 # pseudo-device dummy 2 # pseudo-device stf 1 # -options crypto # -options ALLCRYPTO # +options CRYPTO # +options CRYPTO_SHA2 # +options ENCRYPTED_SWAP # options ZLIB # inflate/deflate support # options IF_BRIDGE # -makeoptions LIBDRIVER = "libDriver_kern.o" # -makeoptions LIBOBJC = "libkobjc.o" # - -maxusers 64 # -maxusers 50 # -maxusers 32 # -maxusers 16 # -maxusers 8 # -maxusers 2 # - -# -# Multi-processor selection -# -pseudo-device cpus 64 # -pseudo-device cpus 32 # -pseudo-device cpus 16 # -pseudo-device cpus 2 # -pseudo-device cpus 1 # - # # configurable kernel event related resources # @@ -368,14 +302,6 @@ options CONFIG_AIO_THREAD_COUNT=4 # options CONFIG_AIO_THREAD_COUNT=3 # options CONFIG_AIO_THREAD_COUNT=2 # -# -# configurable kernel related resources (CONFIG_THREAD_MAX needs to stay in -# sync with osfmk/conf/MASTER until we fix the config system...) todo XXX -# -options CONFIG_THREAD_MAX=2560 # -options CONFIG_THREAD_MAX=1536 # -options CONFIG_THREAD_MAX=1024 # - options CONFIG_MAXVIFS=32 # options CONFIG_MAXVIFS=16 # options CONFIG_MAXVIFS=2 # @@ -405,10 +331,20 @@ options CONFIG_NO_KPRINTF_STRINGS # # options CONFIG_FINE_LOCK_GROUPS # + # support dynamic signing of code # options CONFIG_DYNAMIC_CODE_SIGNING # +# enforce library validation on all processes. +# +options CONFIG_ENFORCE_LIBRARY_VALIDATION # + +# +# code decryption... used on embedded for app protection, DSMOS on desktop +# +options CONFIG_CODE_DECRYPTION # + # # User Content Protection, used on embedded # @@ -431,12 +367,22 @@ options CONFIG_FREEZE # options CHECK_CS_VALIDATION_BITMAP # +# +# enable detectiion of file cache thrashing - used on platforms with +# dynamic VM compression enabled +# +options CONFIG_PHANTOM_CACHE # + # # memory pressure event support -# must be set in both bsd/conf and osfmk/conf MASTER files # options VM_PRESSURE_EVENTS # +# +# I/O Scheduling +# +options CONFIG_IOSCHED # + # # Enable inheritance of importance through specially marked mach ports and for file locks # For now debug is enabled wherever inheritance is @@ -453,6 +399,11 @@ options CONFIG_PROC_UUID_POLICY # # options CONFIG_IN_KERNEL_TESTS # +# +# ECC data logging +# +options CONFIG_ECC_LOGGING # + # # Ethernet (ARP) # @@ -478,12 +429,7 @@ pseudo-device ptmx 1 init ptmx_init # # vnode device # -pseudo-device vndevice 16 init vndevice_init # -pseudo-device vndevice 8 init vndevice_init # -pseudo-device vndevice 4 init vndevice_init # -pseudo-device vndevice 3 init vndevice_init # -pseudo-device vndevice 2 init vndevice_init # -pseudo-device vndevice 2 init vndevice_init # +pseudo-device vndevice 4 init vndevice_init # # # memory device @@ -499,12 +445,6 @@ pseudo-device bpfilter 4 init bpf_init # # fsevents device pseudo-device fsevents 1 init fsevents_init # -# -# shim to "linux" mach disk drivers (mach drivers must also be turned on) -# -# now using iokit disk shim, this is code is obsolete -#pseudo-device diskshim - pseudo-device random 1 init random_init pseudo-device dtrace 1 init dtrace_init # pseudo-device helper 1 init helper_init # @@ -513,3 +453,264 @@ pseudo-device sdt 1 init sdt_init # pseudo-device systrace 1 init systrace_init # pseudo-device fbt 1 init fbt_init # pseudo-device profile_prvd 1 init profile_init # + +# +# IOKit configuration options +# + +options HIBERNATION # system hibernation # +options IOKITCPP # C++ implementation # +options IOKITSTATS # IOKit statistics # +options CONFIG_SLEEP # # +options CONFIG_MAX_THREADS=64 # IOConfigThread threads +options NO_KEXTD # +options NO_KERNEL_HID # + +# +# Libkern configuration options +# + +options LIBKERNCPP # C++ implementation # +options CONFIG_KXLD # kxld/runtime linking of kexts # +options CONFIG_KEC_FIPS # Kernel External Components for FIPS compliance (KEC_FIPS) # + +# Note that when adding this config option to an architecture, one MUST +# add the architecture to the preprocessor test at the beginning of +# libkern/kmod/cplus_{start.c,stop.c}. +options CONFIG_STATIC_CPPINIT # Static library initializes kext cpp runtime # + +# +# libsa configuration options +# + +# CONFIG_KEXT_BASEMENT - alloc post boot loaded kexts after prelinked kexts +# +options CONFIG_KEXT_BASEMENT # # + +# +# security configuration options +# + +options CONFIG_LCTX # Login Context + +options CONFIG_MACF # Mandatory Access Control Framework # +options CONFIG_MACF_SOCKET_SUBSET # MAC socket subest (no labels) # +#options CONFIG_MACF_SOCKET # MAC socket labels # +#options CONFIG_MACF_NET # mbuf # +#options CONFIG_MACF_DEBUG # debug # + +options CONFIG_AUDIT # Kernel auditing # + + +# +# MACH configuration options. +# +# TASK_SWAPPER enables code that manages demand for physical memory by +# forcibly suspending tasks when the demand exceeds supply. This +# option should be on. +# +options MACH_PAGEMAP # +options MACH_RT +options TASK_SWAPPER # + +# +# This defines configuration options that are normally used only during +# kernel code development and debugging. They add run-time error checks or +# statistics gathering, which will slow down the system +# +########################################################## +# +# MACH_ASSERT controls the assert() and ASSERT() macros, used to verify the +# consistency of various algorithms in the kernel. The performance impact +# of this option is significant. +# +options MACH_ASSERT # # +# +# MACH_DEBUG enables the mach_debug_server, a message interface used to +# retrieve or control various statistics. This interface may expose data +# structures that would not normally be allowed outside the kernel, and +# MUST NOT be enabled on a released configuration. +# Other options here enable information retrieval for specific subsystems +# +options MACH_DEBUG # IPC debugging interface # +options MACH_IPC_DEBUG # Enable IPC debugging calls # +options MACH_VM_DEBUG # # +# +# MACH_MP_DEBUG control the possible dead locks that may occur by controlling +# that IPL level has been raised down to SPL0 after some calls to +# hardclock device driver. +# +options MACH_MP_DEBUG # # +# +# ZONE_DEBUG keeps track of all zalloc()ed elements to perform further +# operations on each element. +# +options ZONE_DEBUG # # + +options CONFIG_ZLEAKS # Live zone leak debugging # + +# +options ZONE_ALIAS_ADDR # # + + +# +# CONFIG_TASK_ZONE_INFO allows per-task zone information to be extracted +# Primarily useful for xnu debug and development. +# +options CONFIG_TASK_ZONE_INFO # +# +# CONFIG_DEBUGGER_FOR_ZONE_INFO restricts zone info so that it is only +# available when the kernel is being debugged. +# +options CONFIG_DEBUGGER_FOR_ZONE_INFO # +# +# XPR_DEBUG enables the gathering of data through the XPR macros inserted +# into various subsystems. This option is normally only enabled for +# specific performance or behavior studies, as the overhead in both +# code and data space is large. The data is normally retrieved through +# the kernel debugger (kdb) or by reading /dev/kmem. +# +options XPR_DEBUG # # +# +# MACH_LDEBUG controls the internal consistency checks and +# data gathering in the locking package. This also enables a debug-only +# version of simple-locks on uniprocessor machines. The code size and +# performance impact of this option is significant. +# +options MACH_LDEBUG # # + +# +# configuration option for full, partial, or no kernel debug event tracing +# +options KDEBUG # kernel tracing # +options IST_KDEBUG # limited tracing # +options NO_KDEBUG # no kernel tracing # + +# +# CONFIG_DTRACE enables code needed to support DTrace. Currently this is +# only used for delivery of traps/interrupts to DTrace. +# +options CONFIG_DTRACE # # + +# kernel performance tracing +options KPERF # +options KPC # + +# MACH_COUNTERS enables code that handles various counters in the system. +# +options MACH_COUNTERS # # + +# DEVELOPMENT define for development builds +options DEVELOPMENT # dev kernel # + +# DEBUG kernel +options DEBUG # general debugging code # + +########################################################## +# +# This defines configuration options that are normally used only during +# kernel code development and performance characterization. They add run-time +# statistics gathering, which will slow down the system, +# +########################################################## +# +# MACH_IPC_STATS controls the collection of statistics in the MACH IPC +# subsystem. +# +#options MACH_IPC_STATS +# +# MACH_CLUSTER_STATS controls the collection of various statistics concerning +# the effectiveness and behavior of the clustered pageout and pagein +# code. +# +#options MACH_CLUSTER_STATS + +options MACH_BSD # BSD subsystem on top of Mach # +options IOKIT # # + +# +# configurable kernel related resources (CONFIG_THREAD_MAX needs to stay in +# sync with bsd/conf/MASTER until we fix the config system... todo XXX +# +options CONFIG_THREAD_MAX=2560 # +options CONFIG_THREAD_MAX=1536 # +options CONFIG_THREAD_MAX=1024 # + +options CONFIG_TASK_MAX=1024 # +options CONFIG_TASK_MAX=768 # +options CONFIG_TASK_MAX=512 # + +options CONFIG_ZONE_MAP_MIN=12582912 # +options CONFIG_ZONE_MAP_MIN=6291456 # +options CONFIG_ZONE_MAP_MIN=1048576 # + +# Sizes must be a power of two for the zhash to +# be able to just mask off bits instead of mod +options CONFIG_ZLEAK_ALLOCATION_MAP_NUM=16384 # +options CONFIG_ZLEAK_ALLOCATION_MAP_NUM=8192 # +options CONFIG_ZLEAK_TRACE_MAP_NUM=8192 # +options CONFIG_ZLEAK_TRACE_MAP_NUM=4096 # + +# vc_progress_white - make the progress gear white instead of black +options CONFIG_VC_PROGRESS_WHITE # + +# +# Context switched counters +# +options CONFIG_COUNTERS # + +# +# Timeshare scheduler implementations +# +options CONFIG_SCHED_TRADITIONAL # +options CONFIG_SCHED_PROTO # +options CONFIG_SCHED_GRRR # +options CONFIG_SCHED_GRRR_CORE # +options CONFIG_SCHED_MULTIQ # +options CONFIG_SCHED_TIMESHARE_CORE # +options CONFIG_SCHED_FAIRSHARE_CORE # + +options CONFIG_SCHED_IDLE_IN_PLACE # +options CONFIG_GZALLOC # + +# Enable allocation of contiguous physical memory through vm_map_enter_cpm() +options VM_CPM # + +options CONFIG_SKIP_PRECISE_USER_KERNEL_TIME # + +# +# Switch to disable cpu, wakeup and high memory watermark monitors +# +options CONFIG_NOMONITORS # + +options MACH_KDP # KDP # +options CONFIG_SERIAL_KDP # KDP over serial # +options CONFIG_KDP_INTERACTIVE_DEBUGGING # + +# +# Kernel Voucher Attr Manager for Activity Trace +# +options CONFIG_ATM # + +# +# Kernel Voucher Attr Manager for BANK +# +options CONFIG_BANK # + + +# Group related tasks together into coalitions +options CONFIG_COALITIONS # + +# Configurable Security Restrictions +options CONFIG_CSR # + +# +# Console options +# +options SERIAL_CONSOLE # bi-directional serial over UART +options VIDEO_CONSOLE # uni-directional output over framebuffer + +# +# Syscall options +# +options CONFIG_REQUIRES_U32_MUNGING # incoming U32 argument structures must be munged to match U64 # diff --git a/config/MASTER.x86_64 b/config/MASTER.x86_64 new file mode 100644 index 000000000..6e356ba91 --- /dev/null +++ b/config/MASTER.x86_64 @@ -0,0 +1,65 @@ +# +# Mach Operating System +# Copyright (c) 1986 Carnegie-Mellon University +# Copyright 2001-2013 Apple Inc. +# +# All rights reserved. The CMU software License Agreement +# specifies the terms and conditions for use and redistribution. +# +###################################################################### +# +# Master Apple configuration file (see the master machine independent +# configuration file for a description of the file format). +# +###################################################################### +# +# Standard Apple OS Configurations: +# -------- ----- -- --------------- +# +# KERNEL_BASE = [ intel medium config_requires_u32_munging ] +# KERNEL_RELEASE = [ KERNEL_BASE ] +# KERNEL_DEV = [ KERNEL_BASE development mach_assert ] +# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert ] +# BSD = [ mach_bsd sysv_sem sysv_msg sysv_shm config_imageboot config_workqueue psynch config_proc_uuid_policy ] +# FILESYS = [ devfs hfs journaling fdesc config_dev_kmem config_fse quota namedstreams fifo config_volfs hfs_compression config_hfs_std config_hfs_alloc_rbtree config_hfs_trim config_imgsrc_access config_triggers config_ext_resolver config_searchfs config_hfs_dirlink config_appledouble ] +# NFS = [ nfsclient nfsserver ] +# NETWORKING = [ inet inet6 ipv6send tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile ah_all_crypto bond vlan gif stf ifnet_input_chk config_mbuf_jumbo if_bridge ipcomp_zlib MULTIPATH packet_mangler ] +# VPN = [ ipsec flow_divert necp content_filter ] +# PF = [ pf pflog ] +# PKTSCHED = [ pktsched_cbq pktsched_fairq pktsched_hfsc pktsched_priq ] +# CLASSQ = [ classq_blue classq_red classq_rio ] +# MULTIPATH = [ multipath mptcp ] +# IOKIT = [ iokit iokitcpp hibernation config_sleep iokitstats hypervisor ] +# LIBKERN = [ libkerncpp config_kxld config_kec_fips zlib crypto_sha2 ] +# PERF_DBG = [ config_dtrace mach_kdp config_serial_kdp kdp_interactive_debugging kperf kpc config_counters zleaks config_gzalloc ] +# MACH_BASE = [ mach config_kext_basement mdebug ipc_debug config_mca config_vmx config_mtrr config_lapic config_telemetry importance_inheritance config_atm config_bank config_coalitions hypervisor config_iosched ] +# MACH_RELEASE = [ MACH_BASE ] +# MACH_DEV = [ MACH_BASE task_zone_info ] +# MACH_DEBUG = [ MACH_BASE task_zone_info ] +# SCHED_BASE = [ config_sched_traditional config_sched_multiq ] +# SCHED_RELEASE = [ SCHED_BASE ] +# SCHED_DEV = [ SCHED_BASE ] +# SCHED_DEBUG = [ SCHED_BASE config_sched_grrr config_sched_proto ] +# VM = [ vm_pressure_events memorystatus dynamic_codesigning config_code_decryption encrypted_swap phantom_cache] +# SECURITY = [ config_macf config_audit config_csr ] +# RELEASE = [ KERNEL_RELEASE BSD FILESYS NFS NETWORKING PF VPN IOKIT LIBKERN PERF_DBG MACH_RELEASE SCHED_RELEASE VM SECURITY ] +# DEVELOPMENT = [ KERNEL_DEV BSD FILESYS NFS NETWORKING PF VPN IOKIT LIBKERN PERF_DBG MACH_DEV SCHED_DEV VM SECURITY ] +# DEBUG = [ KERNEL_DEBUG BSD FILESYS NFS NETWORKING PF VPN IOKIT LIBKERN PERF_DBG MACH_DEBUG SCHED_DEBUG VM SECURITY ] +# +###################################################################### +# +machine "x86_64" # + +makeoptions OSFMK_MACHINE = "x86_64" # + +options PAL_I386 + +options CONFIG_MCA # Machine Check Architecture # +options CONFIG_VMX # Virtual Machine Extensions # +options CONFIG_MTRR # Memory Type Range Registers # + +options NO_NESTED_PMAP # + +options HYPERVISOR # Apple hypervisor kext support # + +options CONFIG_MACH_APPROXIMATE_TIME diff --git a/config/Mach.exports b/config/Mach.exports index acc7d4745..1cca2a9b7 100644 --- a/config/Mach.exports +++ b/config/Mach.exports @@ -28,6 +28,8 @@ _mach_vm_pressure_level_monitor _mach_vm_pressure_monitor _nanoseconds_to_absolutetime _preemption_enabled +_read_random +_read_frandom _semaphore_create _semaphore_dereference _semaphore_destroy diff --git a/config/Makefile b/config/Makefile index 02cec1c9f..73a907635 100644 --- a/config/Makefile +++ b/config/Makefile @@ -6,7 +6,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) - MD_SUPPORTED_KPI_FILENAME = SupportedKPIs-${CURRENT_ARCH_CONFIG_LC}.txt MI_SUPPORTED_KPI_FILENAME = SupportedKPIs-all-archs.txt @@ -28,6 +27,15 @@ SYMBOL_COMPONENT_LIST = \ Unsupported \ Private +# In general you want it to be possible to have a CPU sub-type's symbol exports +# alias to the parent type's exports. This is a special-case way to handle it +# for now: +ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64h) +EXPORT_SOURCE_ARCH_CONFIG_LC = x86_64 +else +EXPORT_SOURCE_ARCH_CONFIG_LC = $(CURRENT_ARCH_CONFIG_LC) +endif + KEXT_MACHO_LIST = $(foreach symbolset,$(filter-out Dummy,$(SYMBOL_COMPONENT_LIST)),System.kext/PlugIns/$(symbolset).kext/$(symbolset)) KEXT_PLIST_LIST += $(foreach symbolset,$(filter-out Dummy,$(SYMBOL_COMPONENT_LIST)),System.kext/PlugIns/$(symbolset).kext/Info.plist) @@ -37,43 +45,43 @@ DSTROOT_INSTALL_KEXT_MACHO_FILES = $(addprefix $(INSTALL_KEXT_DIR)/,$(KEXT_MACHO SYMROOT_INSTALL_KEXT_PLISTS = $(addprefix $(SYMROOT)/,$(KEXT_PLIST_LIST)) DSTROOT_INSTALL_KEXT_PLISTS = $(addprefix $(INSTALL_KEXT_DIR)/,$(KEXT_PLIST_LIST)) -EXPORTS_FILES = $(foreach symbolset,$(SYMBOL_COMPONENT_LIST),$(symbolset).exports $(symbolset).$(CURRENT_ARCH_CONFIG_LC).exports) Unused.exports +EXPORTS_FILES = $(foreach symbolset,$(SYMBOL_COMPONENT_LIST),$(symbolset).exports $(symbolset).$(EXPORT_SOURCE_ARCH_CONFIG_LC).exports) Unused.exports SYMBOL_SET_BUILD = $(foreach symbolset, $(SYMBOL_COMPONENT_LIST), $(OBJPATH)/$(symbolset).symbolset) -$(OBJPATH)/allsymbols: $(OBJPATH)/mach_kernel +$(OBJPATH)/allsymbols: $(OBJPATH)/$(KERNEL_FILE_NAME) $(_v)$(NM) -gj $< > $@ -$(SYMBOL_SET_BUILD): $(OBJPATH)/%.symbolset : %.exports %.$(CURRENT_ARCH_CONFIG_LC).exports $(OBJPATH)/allsymbols +$(SYMBOL_SET_BUILD): $(OBJPATH)/%.symbolset : %.exports %.$(EXPORT_SOURCE_ARCH_CONFIG_LC).exports $(OBJPATH)/allsymbols $(KEXT_CREATE_SYMBOL_SET) @echo SYMBOLSET $* "($(CURRENT_ARCH_CONFIG_LC))" $(_v)$(KEXT_CREATE_SYMBOL_SET) \ $(ARCH_FLAGS_$(CURRENT_ARCH_CONFIG)) \ -import $(OBJPATH)/allsymbols \ -export $(SOURCE)/$*.exports \ - -export $(SOURCE)/$*.$(CURRENT_ARCH_CONFIG_LC).exports \ + -export $(SOURCE)/$*.$(EXPORT_SOURCE_ARCH_CONFIG_LC).exports \ -output $@ $(_vstdout) .PHONY: check_all_exports -check_all_exports: $(OBJPATH)/allsymbols +check_all_exports: $(OBJPATH)/allsymbols $(KEXT_CREATE_SYMBOL_SET) $(_v)$(KEXT_CREATE_SYMBOL_SET) \ $(ARCH_FLAGS_$(CURRENT_ARCH_CONFIG)) \ -import $(OBJPATH)/allsymbols \ $(foreach symbolset,$(filter-out Private,$(SYMBOL_COMPONENT_LIST)), \ -export $(SOURCE)/$(symbolset).exports \ - -export $(SOURCE)/$(symbolset).$(CURRENT_ARCH_CONFIG_LC).exports) \ + -export $(SOURCE)/$(symbolset).$(EXPORT_SOURCE_ARCH_CONFIG_LC).exports) \ -output /dev/null $(_vstdout) $(_v)$(KEXT_CREATE_SYMBOL_SET) \ $(ARCH_FLAGS_$(CURRENT_ARCH_CONFIG)) \ -import $(OBJPATH)/allsymbols \ $(foreach symbolset,$(filter-out Unsupported,$(SYMBOL_COMPONENT_LIST)), \ -export $(SOURCE)/$(symbolset).exports \ - -export $(SOURCE)/$(symbolset).$(CURRENT_ARCH_CONFIG_LC).exports) \ + -export $(SOURCE)/$(symbolset).$(EXPORT_SOURCE_ARCH_CONFIG_LC).exports) \ -output /dev/null $(_vstdout) $(OBJPATH)/$(MD_SUPPORTED_KPI_FILENAME): $(EXPORTS_FILES) @echo SUPPORTED_KPI "($(CURRENT_ARCH_CONFIG_LC))" - $(_v)$(SRCROOT)/config/list_supported.sh $(SOURCE) $(CURRENT_ARCH_CONFIG_LC) $@ + $(_v)$(SRCROOT)/config/list_supported.sh $(SOURCE) $(EXPORT_SOURCE_ARCH_CONFIG_LC) $@ $(OBJPATH)/$(MI_SUPPORTED_KPI_FILENAME): $(EXPORTS_FILES) @echo SUPPORTED_KPI "(all)" @@ -99,26 +107,18 @@ $(SYMROOT_INSTALL_KEXT_MACHO_FILES): ALWAYS fi; \ exit $$cmdstatus -.PHONY: symroot_kext_plists - -symroot_kext_plists: $(SYMROOT_INSTALL_KEXT_PLISTS) - $(SYMROOT_INSTALL_KEXT_PLISTS): $(SYMROOT)/% : $(SOURCE)/% $(_v)$(MKDIR) $(dir $@) @echo INSTALLSYM kextplist $* $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ $(_v)$(NEWVERS) $@ $(_vstdout) -.PHONY: dstroot_kext_plists - -dstroot_kext_plists: $(DSTROOT_INSTALL_KEXT_PLISTS) - $(DSTROOT_INSTALL_KEXT_PLISTS): $(INSTALL_KEXT_DIR)/% : $(SYMROOT)/% $(_v)$(MKDIR) $(dir $@) @echo INSTALL kextplist $* $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ -$(DSTROOT_INSTALL_KEXT_MACHO_FILES): $(INSTALL_KEXT_DIR)/% : $(SYMROOT)/% +$(DSTROOT_INSTALL_KEXT_MACHO_FILES): $(INSTALL_KEXT_DIR)/% : $(SYMROOT)/% ALWAYS $(_v)$(MKDIR) $(dir $@) @echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" $(_v)$(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@ @@ -128,23 +128,17 @@ $(DSTROOT)/$(KRESDIR)/$(MD_SUPPORTED_KPI_FILENAME) $(DSTROOT)/$(KRESDIR)/$(MI_SU @echo INSTALL $* $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ -install_symbol_sets: $(SYMROOT_INSTALL_KEXT_MACHO_FILES) \ - symroot_kext_plists \ - $(DSTROOT_INSTALL_KEXT_MACHO_FILES) \ - dstroot_kext_plists \ - $(DSTROOT)/$(KRESDIR)/$(MD_SUPPORTED_KPI_FILENAME) \ - $(DSTROOT)/$(KRESDIR)/$(MI_SUPPORTED_KPI_FILENAME) - -do_build_install_primary:: install_symbol_sets +do_config_install:: $(SYMROOT_INSTALL_KEXT_MACHO_FILES) \ + $(SYMROOT_INSTALL_KEXT_PLISTS) \ + $(DSTROOT_INSTALL_KEXT_MACHO_FILES) \ + $(DSTROOT_INSTALL_KEXT_PLISTS) \ + $(DSTROOT)/$(KRESDIR)/$(MD_SUPPORTED_KPI_FILENAME) \ + $(DSTROOT)/$(KRESDIR)/$(MI_SUPPORTED_KPI_FILENAME) -$(OBJPATH)/kernel-kpi.exp: $(EXPORTS_FILES) +$(OBJPATH)/all-kpi.exp: $(EXPORTS_FILES) $(_v)$(SOURCE)/generate_linker_exports.sh $@ $+ -.PHONY: build_mach_kernel_exports - -build_mach_kernel_exports: $(OBJPATH)/kernel-kpi.exp - -do_build_all:: build_mach_kernel_exports +do_build_all:: $(OBJPATH)/all-kpi.exp include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/config/MasterVersion b/config/MasterVersion index 14d9aa5e4..5445aaa60 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -13.4.0 +14.0.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/Private.exports b/config/Private.exports index b36bf13bd..173309543 100644 --- a/config/Private.exports +++ b/config/Private.exports @@ -10,8 +10,8 @@ __ZTV5IOCPU __ZN24IOCPUInterruptController* __ZNK24IOCPUInterruptController* __ZTV24IOCPUInterruptController -_assert_wait_timeout_with_leeway _assert_wait_deadline_with_leeway +_assert_wait_timeout_with_leeway _audio_active _b_to_q _bdevsw @@ -27,6 +27,7 @@ _buf_shadow _bufattr_meta _bufattr_nocache _bufattr_throttled +_bufattr_passive _cdevsw _cdevsw_setkqueueok _chudxnu_platform_ptr @@ -40,45 +41,60 @@ _convert_task_suspension_token_to_port _convert_task_to_port _cp_key_store_action _cp_register_wraps +_cpu_to_processor _cs_enforcement +_cs_blob_reset_cache +_cs_require_lv +_cs_entitlement_flags _cs_entitlements_blob_get -_cs_identity_get _cs_get_cdhash +_cs_identity_get _cs_register_cscsr +_csfg_get_teamid +_csfg_get_path +_csfg_get_platform_binary +_csproc_get_teamid +_csproc_get_platform_binary +_csvnode_get_teamid +_ctl_enqueuembuf_list _ctl_id_by_name _ctl_name_by_id +_escape_str _fd_rdwr _get_aiotask +_gpu_accumulate_time +_gpu_describe _hz _ifnet_allocate_extended _ifnet_bandwidths _ifnet_clone_attach _ifnet_clone_detach _ifnet_dequeue -_ifnet_dequeue_service_class _ifnet_dequeue_multi +_ifnet_dequeue_service_class _ifnet_dequeue_service_class_multi _ifnet_disable_output _ifnet_enable_output _ifnet_enqueue _ifnet_flowid _ifnet_get_delegate +_ifnet_get_inuse_address_list +_ifnet_get_ipsec_offload_frames _ifnet_get_local_ports _ifnet_get_local_ports_extended -_ifnet_get_inuse_address_list +_ifnet_get_rcvq_maxlen _ifnet_get_service_class_sndq_len _ifnet_get_sndq_len -_ifnet_get_rcvq_maxlen _ifnet_get_sndq_maxlen _ifnet_idle_flags -_ifnet_inet_defrouter_llreachinfo _ifnet_inet6_defrouter_llreachinfo +_ifnet_inet_defrouter_llreachinfo _ifnet_input_extended _ifnet_latencies _ifnet_link_quality -_ifnet_notice_node_presence -_ifnet_notice_node_absence _ifnet_notice_master_elected +_ifnet_notice_node_absence +_ifnet_notice_node_presence _ifnet_poll_params _ifnet_purge _ifnet_report_issues @@ -93,27 +109,32 @@ _ifnet_set_rcvq_maxlen _ifnet_set_sndq_maxlen _ifnet_start _ifnet_subfamily -_ifnet_transmit_burst_start _ifnet_transmit_burst_end +_ifnet_transmit_burst_start _ifnet_tx_compl_status _in6_localaddr _in6addr_local _in_localaddr _inaddr_local _inp_clear_INP_INADDR_ANY +_io_rate_update +_io_rate_update_register _ip_gre_output _ip_gre_register_input _ipc_port_release_send _kauth_cred_getgroups +_kauth_cred_grnam2guid _kauth_cred_guid2grnam _kauth_cred_guid2pwnam -_kauth_cred_grnam2guid _kauth_cred_pwnam2guid _kdp_register_link _kdp_set_interface _kdp_unregister_link _kdp_unregister_send_receive +_kern_asl_msg +_kern_asl_msg_va _kmem_alloc_kobject +_kmem_alloc_pageable _linesw _log _logwakeup @@ -132,9 +153,10 @@ _mach_vm_allocate _mach_vm_deallocate _mach_vm_map _mach_vm_protect +_mach_vm_remap _mbuf_add_drvaux -_mbuf_find_drvaux _mbuf_del_drvaux +_mbuf_find_drvaux _mbuf_get_driver_scratch _mbuf_get_priority:_mbuf_get_traffic_class _mbuf_get_service_class @@ -165,19 +187,22 @@ _pmc_get_accessible_core_list _pmc_get_name _pmc_get_pmc_list _pmc_register -_pmc_reserve -_pmc_reserve_task -_pmc_reserve_thread _pmc_reservation_free _pmc_reservation_read _pmc_reservation_start _pmc_reservation_stop _pmc_reservation_write +_pmc_reserve +_pmc_reserve_task +_pmc_reserve_thread _pmc_unregister -_post_sys_powersource _port_name_to_task _port_name_to_thread +_post_sys_powersource +_prng_factory_register +_proc_getexecutablevnode _proc_pidbackgrounded +_proc_pidversion _proc_task _proc_uniqueid _pru_abort_notsupp @@ -210,10 +235,13 @@ _socantrcvmore _socantsendmore _sock_catchevents _sock_getlistener +_sock_gettclassopt _sock_release _sock_retain +_sock_settclassopt _sock_setupcall _sock_setupcalls +_sock_iskernel _sodisconnect _sofree _sofreelastref @@ -222,9 +250,9 @@ _soisconnecting _soisdisconnected _soisdisconnecting _sonewconn -_sopoll _sooptcopyin _sooptcopyout +_sopoll _soreceive _soreserve _sorwakeup @@ -238,14 +266,14 @@ _thread_dispatchqaddr _thread_set_eager_preempt _thread_tid _throttle_info_create +_throttle_info_io_will_be_throttled _throttle_info_mount_ref _throttle_info_mount_rel -_throttle_info_release -_throttle_info_update _throttle_info_ref_by_mask _throttle_info_rel_by_mask +_throttle_info_release +_throttle_info_update _throttle_info_update_by_mask -_throttle_info_io_will_be_throttled _throttle_lowpri_io _throttle_set_thread_io_policy _timeout @@ -258,44 +286,33 @@ _ttsetwater _ttspeedtab _ttwakeup _ttwwakeup +_tty_lock +_tty_unlock _ttyclose _ttyflush _ttyfree _ttyinput _ttymalloc _ttymodem -_ttysleep _ttyselect -_tty_lock -_tty_unlock +_ttysleep _unmountroot_pre_hook _unputc _unregister_decmpfs_decompressor _untimeout +_utun_ctl_disable_crypto_dtls _utun_ctl_register_dtls _utun_pkt_dtls_input -_utun_ctl_disable_crypto_dtls -_vnode_isdyldsharedcache -_vnode_ismonitored -_vnode_istty -_vnode_notify -_vnop_compound_open_desc -_vnop_compound_mkdir_desc -_vnop_compound_remove_desc -_vnop_compound_rename_desc -_vnop_compound_rmdir_desc -_vnop_monitor_desc _vfs_context_bind _vfs_context_get_special_port _vfs_context_set_special_port _vfs_devvp +_vfs_get_notify_attributes _vfs_getattr _vfs_getbyid -_vfs_get_notify_attributes _vfs_mntlabel _vfs_nativexattrs _vfs_setcompoundopen -_vfs_setunmountpreflight _vfs_throttle_mask _vfs_vnodecovered _vm_fault @@ -304,13 +321,24 @@ _vm_map_copy_discard _vm_map_copyin _vm_map_copyin_common _vm_map_copyout +_vm_map_create _vm_map_page_mask _vm_map_page_shift _vm_map_page_size _vm_map_round_page_mask _vm_map_trunc_page_mask +_vm_map_wire_and_extract +_vm_page_wire_count _vn_getpath_fsenter _vn_searchfs_inappropriate_name +_vnode_isdyldsharedcache +_vnode_ismonitored +_vnode_istty _vnode_lookup_continue_needed -_sock_settclassopt -_sock_gettclassopt +_vnode_notify +_vnop_compound_mkdir_desc +_vnop_compound_open_desc +_vnop_compound_remove_desc +_vnop_compound_rename_desc +_vnop_compound_rmdir_desc +_vnop_monitor_desc diff --git a/config/Private.x86_64.exports b/config/Private.x86_64.exports index 0a16bb8a4..8df705eb6 100644 --- a/config/Private.x86_64.exports +++ b/config/Private.x86_64.exports @@ -2,6 +2,8 @@ _IOGetBootKeyStoreData _SHA256_Final _SHA256_Init _SHA256_Update +__ZN14IOPMrootDomain20claimSystemWakeEventEP9IOServicejPKcP8OSObject +__ZN14IOPMrootDomain20restartWithStackshotEv __ZN22IOInterruptEventSource7warmCPUEy _acpi_install_wake_handler _acpi_sleep_kernel @@ -13,12 +15,17 @@ _bufattr_delayidlesleep _cpu_to_lapic _cpuid_features _cpuid_info +_csr_check +_csr_get_active_config +_csr_get_pending_config +_csr_set_allow_all _lapic_end_of_interrupt _lapic_get_cmci_vector _lapic_unmask_perfcnt_interrupt _mp_broadcast _mp_cpus_call _mp_cpus_call1 +_mp_cpus_kick _need_fsevent _pal_efi_call_in_32bit_mode _pal_efi_call_in_64bit_mode @@ -27,7 +34,6 @@ _smp_initialized _kext_get_vm_map _pal_machine_sleep _vfs_addtrigger -_vfs_istraditionaltrigger _vfs_resolver_auxiliary _vfs_resolver_result _vfs_resolver_sequence @@ -40,3 +46,6 @@ _xts_encrypt _xts_start _aes_decrypt _PE_reboot_on_panic +_hv_set* +_hv_get* +_hv_release* diff --git a/config/Unsupported.exports b/config/Unsupported.exports index 7bcd7a5cc..7d3ed4251 100644 --- a/config/Unsupported.exports +++ b/config/Unsupported.exports @@ -78,6 +78,7 @@ _gPEClockFrequencyInfo _gPESerialBaud _get_bsdtask_info _get_task_map +_get_task_pmap _getsectdatafromheader _hfs_getconverter _hfs_pickencoding @@ -163,7 +164,6 @@ _task_resume _task_resume2 _task_suspend _task_suspend2 -_thread_notrigger _thread_tid _tsleep _ubc_cs_blob_get @@ -175,6 +175,7 @@ _vm_map _vm_map_deallocate _vm_map_unwire _vm_map_wire +_set_vm_privilege _vm_protect _vm_region _vm_region_object_create @@ -185,3 +186,4 @@ _vnop_kqfilt_add_desc _vnop_kqfilt_remove_desc _vnop_makenamedstream_desc _vnop_removenamedstream_desc + diff --git a/config/Unused.exports b/config/Unused.exports index bb719a4c0..e4a00cd8a 100644 --- a/config/Unused.exports +++ b/config/Unused.exports @@ -1,3 +1,8 @@ # Symbols that are unused as KPI, but must be globally exported _dtrace_zero* _gLoadedKextSummaries +_ipc_mqueue_full +_atm_mana* +_bank_mana* +_ipc_importance_mana* +_user_data_mana* diff --git a/config/newvers.pl b/config/newvers.pl index 9a12b180f..7b41feac4 100755 --- a/config/newvers.pl +++ b/config/newvers.pl @@ -58,25 +58,52 @@ my $BUILD_DATE = `date`; $BUILD_DATE =~ s/[\n\t]//g; my $BUILDER=`whoami`; $BUILDER =~ s/[\n\t]//g; +my $RC_STRING = $ENV{'RC_ProjectNameAndSourceVersion'} . "~" . $ENV{'RC_ProjectBuildVersion'} if defined($ENV{'RC_XBS'}); -# Handle two scenarios: +# Handle four scenarios: # SRCROOT=/tmp/xnu # OBJROOT=/tmp/xnu/BUILD/obj # OBJPATH=/tmp/xnu/BUILD/obj/RELEASE_X86_64 # -# SRCROOT=/SourceCache/xnu/xnu-1234 -# OBJROOT=/tmp/xnu/xnu-1234~1.obj -# OBJPATH=/tmp/xnu/xnu-1234~1.obj/RELEASE_X86_64 +# SRCROOT=/SourceCache/xnu/xnu-2706 +# OBJROOT=/BinaryCache/xnu/xnu-2706~3/Objects +# OBJPATH=/BinaryCache/xnu/xnu-2706~3/Objects/DEVELOPMENT_X86_64 +# RC_XBS=YES (XBS-16.3+) +# RC_ProjectNameAndSourceVersion=xnu-2706 +# RC_ProjectBuildVersion=3 +# +# SRCROOT=/SourceCache/xnu/xnu-2706 +# OBJROOT=/private/var/tmp/xnu/xnu-2706~2 +# OBJPATH=/private/var/tmp/xnu/xnu-2706~2/DEVELOPMENT_ARM_S5L8940X +# RC_XBS=YES ( #include #include @@ -54,7 +59,11 @@ struct _notifyMsg { * *
In order to make the data queue memory available to a user process, the method getMemoryDescriptor() must be used to get an IOMemoryDescriptor instance that can be mapped into a user process. Typically, the clientMemoryForType() method on an IOUserClient instance will be used to request the IOMemoryDescriptor and then return it to be mapped into the user process. */ +#ifndef DISABLE_DATAQUEUE_WARNING +class __attribute__((deprecated)) IODataQueue : public OSObject +#else class IODataQueue : public OSObject +#endif { OSDeclareDefaultStructors(IODataQueue) diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h index eddd86c05..caf03f074 100644 --- a/iokit/IOKit/IOHibernatePrivate.h +++ b/iokit/IOKit/IOHibernatePrivate.h @@ -303,6 +303,7 @@ typedef struct hibernate_statistics_t hibernate_statistics_t; void IOHibernateSystemInit(IOPMrootDomain * rootDomain); IOReturn IOHibernateSystemSleep(void); +IOReturn IOHibernateOpenForDebugData(void); IOReturn IOHibernateIOKitSleep(void); IOReturn IOHibernateSystemHasSlept(void); IOReturn IOHibernateSystemWake(void); @@ -318,7 +319,7 @@ void IOHibernateSystemRestart(void); typedef void (*kern_get_file_extents_callback_t)(void * ref, uint64_t start, uint64_t size); struct kern_direct_file_io_ref_t * -kern_open_file_for_direct_io(const char * name, +kern_open_file_for_direct_io(const char * name, boolean_t create_file, kern_get_file_extents_callback_t callback, void * callback_ref, @@ -333,15 +334,19 @@ kern_open_file_for_direct_io(const char * name, uint64_t * partitionbase_result, uint64_t * maxiocount_result, uint32_t * oflags); +int +kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len, int ioflag); void kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, off_t write_offset, caddr_t addr, vm_size_t write_length, off_t discard_offset, off_t discard_end); #endif /* _SYS_CONF_H_ */ + void vm_compressor_do_warmup(void); + hibernate_page_list_t * hibernate_page_list_allocate(boolean_t log); @@ -488,7 +493,9 @@ enum enum { kIOHibernateHeaderSignature = 0x73696d65, - kIOHibernateHeaderInvalidSignature = 0x7a7a7a7a + kIOHibernateHeaderInvalidSignature = 0x7a7a7a7a, + kIOHibernateHeaderOpenSignature = 0xf1e0be9d, + kIOHibernateHeaderDebugDataSignature = 0xfcddfcdd }; // kind for hibernate_set_page_state() diff --git a/iokit/IOKit/IOInterruptAccounting.h b/iokit/IOKit/IOInterruptAccounting.h new file mode 100644 index 000000000..fdea6295c --- /dev/null +++ b/iokit/IOKit/IOInterruptAccounting.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2014 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __IOKIT_IOINTERRUPTACCOUNTING_H +#define __IOKIT_IOINTERRUPTACCOUNTING_H + +/* + * This header contains definitions that will be needed by userspace clients of the interrupt accounting + * mechanisms. + */ + +#define IA_INDEX_MAX (255) +#define IA_INDEX_MASK (0xFFULL) +#define IA_STATISTIC_INDEX_SHIFT (0ULL) +#define IA_INTERRUPT_INDEX_SHIFT (16ULL) + +/* + * For the moment, the indices aren't preprocessor visible, so any changes to this code will need to be + * careful to ensure that the defined number of statistics matches (or is greater than) the actual number + * of statistics, to avoid channel ID collisions... because that would be bad. + */ +#define IA_NUM_INTERRUPT_ACCOUNTING_STATISTICS (10) + +/* + * Channel ID related definitions. These serve to denote the namespace of interrupt accounting in the + * context of IOReporter-based clients. Interrupt accounting distinguishes between interrupts based on + * the nub the interrupt was registered relative to, and the nub relative interrupt index (i.e, the + * namespace for interrupt accounting is {nub ID, interrupt index}). IOReporting already knows about + * nubs (in the context of IOService), however it has no built in knowledge of interrupt accounting. + * As a result of this, it is the responsibility of the IOReporting client to request any desired + * statistics on a per-index basis (i.e, if you want to get the first level interrupt count for all + * interrupts, you must express an interest in the first level interrupt count statistic for index 0, 1, + * 2, and so on, to a reasonable maximum). + */ + +/* + * These delimit the channel ID namespace for interrupt accounting. + */ +#define IA_BASE_CHANNEL_ID IOREPORT_MAKEID('I', 'n', 't', 'r', 0, 0, 0, 0) /* First valid channel ID */ +#define IA_MAX_CHANNEL_ID IOREPORT_MAKEID('I', 'n', 't', 'r', 0xFF, 0xFF, 0xFF, 0xFF) /* Last valid channel ID */ + +/* + * Given a nub-relative interrupt index (an index into the interrupt specifiers), and the index of a + * interrupt accounting statistic, returns the channel id for that statistic. + */ +#define IA_GET_CHANNEL_ID(interruptIndex, statisticIndex) \ + ((IA_BASE_CHANNEL_ID) + \ + ((interruptIndex % IA_INDEX_MASK) << IA_INTERRUPT_INDEX_SHIFT) + \ + ((statisticIndex % IA_INDEX_MASK) << IA_STATISTIC_INDEX_SHIFT)) + +/* + * Extracts the interrupt index, given a channel ID. + */ +#define IA_GET_INTERRUPT_INDEX(channelID) \ + (((channelID - IA_BASE_CHANNEL_ID) >> IA_INTERRUPT_INDEX_SHIFT) % IA_INDEX_MASK) + +/* + * Extracts the statistic index, given a channel ID. + */ +#define IA_GET_STATISTIC_INDEX(channelID) \ + (((channelID - IA_BASE_CHANNEL_ID) >> IA_STATISTIC_INDEX_SHIFT) % IA_INDEX_MASK) + +/* + * This enum defines the basic statistics we gather for each interrupt. Currently, the information + * we gather falls into roughly three buckets: interrupt related (counts, times), scheduler related + * (thread wakeups), and power related (package/cpu state changes). + * + * First Level Count: This represents the number of times that this interrupt has fired (more + * specifically, the number of times we have run the handler we expect to run in interrupt context). + * + * Second Level Count: This represents the number of times we have run any second level handler for + * this interrupt (more specifically, the handler we expect to be run in the regular kernel context). + * + * First Level Time: This represents the aggregate time spent running the first level handler. For + * some interrupts, this value will be 0, as this is not meant to track time spent running generic + * IOKit code for the interrupt (i.e, IOInterruptEventSource code), but instead any custom code run + * at the interrupt context (i.e, the filter installed for an IOFilterInterruptEventSource). + * + * Second Level CPU Time: This represents the aggregate time spent actually running the second level + * handler on the CPU. As the second level handler may block or be preempted, it is meaningful to + * distinguish this from the system time spent handling the interrupt. As was the case for the + * first level handler, this does not attempt to track the time spent running generic IOKit code + * (i.e, IOInterruptEventSource code or IOWorkLoop code), but instead attempts to track the time + * spent running the handler that was installed. + * + * Second Level System Time: This represents the aggregate time spent in the second level handler; + * this will include time where the handler was blocked or had been preempted. This should equate + * to the wall time spent handling an interrupt (as long as we don't allow the system to go to + * sleep while a second level handler is running). + * + * No Thread Wakeups: The number of times that the interrupt did not attempt to wake up a thread + * (typically the work loop for the interrupt source is woken up). + * + * Total Thread Wakeups: The aggregate number of threads (non-unique) woken up by the interrupt. + * If no threads were actually woken up for an interrupt (i.e, the work loop thread was already + * runnable), this value will not change. + * + * Package Wakeups: The number of times that this interrupt woke up the package (as defined by the + * scheduler). + * + * CPU Wakeups: The number of times that this interrupt woke up a CPU (forcing it to go through the + * reset path). + * + * Idle Exits: The number of times that this interrupt forced a CPU out of the idle loop (the CPU + * had to exit an idle state to handle the interrupt, but it did not need to go through the reset + * path). + */ +enum { + kInterruptAccountingFirstLevelCountIndex = 0, /* Number of times we invoked the top level handler */ + kInterruptAccountingSecondLevelCountIndex, /* Number of times we invoked the workloop action */ + kInterruptAccountingFirstLevelTimeIndex, /* Time spent in the top level handler, if one was installed */ + kInterruptAccountingSecondLevelCPUTimeIndex, /* CPU time spent in the workloop action */ + kInterruptAccountingSecondLevelSystemTimeIndex, /* System time spent in the workloop action */ + kInterruptAccountingNoThreadWakeupsIndex, /* Number of first level (filter) invocations that did not wake up a thread */ + kInterruptAccountingTotalThreadWakeupsIndex, /* Number of actual thread wakeups caused by this interrupt */ + kInterruptAccountingPackageWakeupsIndex, /* Number of times this interrupt woke up the package */ + kInterruptAccountingCPUWakeupsIndex, /* Number of times this interrupt woke up a CPU */ + kInterruptAccountingIdleExitsIndex, /* Number of times this interrupt forced a CPU out of the idle loop */ + kInterruptAccountingInvalidStatisticIndex /* Sentinel value for checking for a nonsensical index */ +}; + +#endif /* __IOKIT_IOINTERRUPTACCOUNTING_PRIVATE_H */ + diff --git a/iokit/IOKit/IOInterruptAccountingPrivate.h b/iokit/IOKit/IOInterruptAccountingPrivate.h new file mode 100644 index 000000000..ef50297a7 --- /dev/null +++ b/iokit/IOKit/IOInterruptAccountingPrivate.h @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2014 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __IOKIT_IOINTERRUPTACCOUNTING_PRIVATE_H +#define __IOKIT_IOINTERRUPTACCOUNTING_PRIVATE_H + +/* + * Header containing interrupt accounting related prototypes/defines that should be kept private to + * xnu itself (no userspace, no kexts, no nothing!). + */ + +#include +#include +#include + +class OSObject; +class IOSimpleReporter; + +/* + * A brief overview. Interrupt accounting (as implemented in IOKit) pertains to infrastructure for + * gathering information (currently, statistics only) on interrupts, and allowing them to be reported + * (either to userspace through IOReporting, or through lldb; lldb macros have yet to be implemented). + * + * Currently, interrupt accounting consists of of a relationship between an IOService (a nub, which + * will contain interrupt specifiers), an IOInterruptEventSource (if we add other interrupt target + * abstractions, support could be added for them as well), and objects necessary to support them. An + * interrupt is "named" by a tuple of {provider, interrupt index}; no nub should ever have more than + * one interrupt registered for a given index, so this tuple should be unique. + * + * The "additional objects" mentioned above consist of an IOReporter object (lazily allocated and + * tied to the nub; once allocated it will live until the nub is freed), and a statistics object + * (effectively part of the IOIES in terms of lifecycle). The statistics object is used by the + * interrupt codepath itself, and by the nub when it needs to update the reporter; the reporter is + * used to report values to userspace. + * + * As a consequence of the above relationship, we do not track statistics for directly registered + * interrupt handlers. We have no guarantees what the handler or the target may be; if you don't + * follow the generic IOKit interrupt model, you will not be tracked by interrupt accounting. For + * now, this means you must use an IOIES to be eligible for interrupt accounting. We also do not + * track IOIES' that do not have providers (this is indicative that it is only being used to drive + * workloop activity, and is not actually handling interrupts). + */ + +/* + * This is meant to let us set up the set of interrupt statistics we are actually interested in, by + * setting a boot-arg. If we want to track a statistic, the bit corresponding to the index for that + * statistic should be set in the bitmask. + * + * There is a bit of a mismatch here, in that our IOReporting channel namespace allows for 256 statistics, + * but this bitmask actually limits it to 32. + */ +extern uint32_t gInterruptAccountingStatisticBitmask; + +/* + * Check the bitmask by statistic index; useful for setting the initial value and conditionalizing code. + */ +#define IA_GET_ENABLE_BIT(statisticIndex) \ + (((uint32_t) 1) << ((uint32_t) statisticIndex)) + +#define IA_GET_STATISTIC_ENABLED(statisticIndex) \ + (IA_GET_ENABLE_BIT(statisticIndex) & gInterruptAccountingStatisticBitmask) + +/* + * Check if any valid statistics are enabled. + */ +#define IA_ANY_STATISTICS_ENABLED \ + ((IA_GET_ENABLE_BIT(kInterruptAccountingInvalidStatisticIndex) - 1) & gInterruptAccountingStatisticBitmask) + +/* + * Actual string names for the statistics we gather. + */ +#define kInterruptAccountingChannelNameFirstLevelCount (" First Level Interrupt Handler Count") +#define kInterruptAccountingChannelNameSecondLevelCount (" Second Level Interrupt Handler Count") +#define kInterruptAccountingChannelNameFirstLevelTime (" First Level Interrupt Handler Time (MATUs)") +#define kInterruptAccountingChannelNameSecondLevelCPUTime (" Second Level Interrupt Handler CPU Time (MATUs)") +#define kInterruptAccountingChannelNameSecondLevelSystemTime ("Second Level Interrupt Handler System Time (MATUs)") +#define kInterruptAccountingChannelNameNoThreadWakeups (" Interrupts that did not try to wake a thread") +#define kInterruptAccountingChannelNameTotalThreadWakeups (" Sleeping threads woken up by this interrupt") +#define kInterruptAccountingChannelNamePackageWakeups (" Package wakeups caused by this interrupt") +#define kInterruptAccountingChannelNameCPUWakeups (" CPU wakeups caused by this interrupt") +#define kInterruptAccountingChannelNameIdleExits (" Idle exits caused by this interrupt") + +static const char * const kInterruptAccountingStatisticNameArray[IA_NUM_INTERRUPT_ACCOUNTING_STATISTICS] = { + [kInterruptAccountingFirstLevelCountIndex] = kInterruptAccountingChannelNameFirstLevelCount, + [kInterruptAccountingSecondLevelCountIndex] = kInterruptAccountingChannelNameSecondLevelCount, + [kInterruptAccountingFirstLevelTimeIndex] = kInterruptAccountingChannelNameFirstLevelTime, + [kInterruptAccountingSecondLevelCPUTimeIndex] = kInterruptAccountingChannelNameSecondLevelCPUTime, + [kInterruptAccountingSecondLevelSystemTimeIndex] = kInterruptAccountingChannelNameSecondLevelSystemTime, + [kInterruptAccountingNoThreadWakeupsIndex] = kInterruptAccountingChannelNameNoThreadWakeups, + [kInterruptAccountingTotalThreadWakeupsIndex] = kInterruptAccountingChannelNameTotalThreadWakeups, + [kInterruptAccountingPackageWakeupsIndex] = kInterruptAccountingChannelNamePackageWakeups, + [kInterruptAccountingCPUWakeupsIndex] = kInterruptAccountingChannelNameCPUWakeups, + [kInterruptAccountingIdleExitsIndex] = kInterruptAccountingChannelNameIdleExits, +}; + +/* + * IOReporting group names. + */ +static const char * const kInterruptAccountingGroupName = "Interrupt Statistics (by index)"; + +/* + * TODO: Generate the subgroup name strings? + */ +#define IA_MAX_SUBGROUP_NAME (32) + +static const char * const kInterruptAccountingSubgroupNames[IA_MAX_SUBGROUP_NAME] = { + "0", "1", "2" , "3", "4", "5", "6", "7", + "8", "9", "10", "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31"}; + +/* + * As long as we use a lookup table, we may be out of bounds for a valid index. In this case, fall + * back on a generic subgroup name that indicates we have screwed up. + */ +static const char * const kInterruptAccountingGenericSubgroupName = "(Index > 31)"; + +/* + * For updating the statistics in the data structure. We cannot guarantee all of our platforms will be + * able to do a 64-bit store in a single transaction. So, for new platforms, call out to the hardware + * atomic add routine; it will either be unsupported, or do the right thing. For architectures or + * platforms that do support it; just do regular assignment. + * + * We use this routine instead of a lock because at the moment, there is no way (in the interrupt context) + * to reconcile a lock (even a spinlock) with the IOReporting synchonization (as we have no guarantee that + * IOReporting will not block on a mutex, which would result in a panic if it held a spinlock). This + * means that reported values may have a disparity if we update the reporter values while an interrupt is + * being handled. + * + * Atomic modification should not be strictly required, as a given interrupt should not be dispatched to + * two processors at once (and the interrupt should serve to force out stores), and the second level + * handler should be synchonized by the work loop it runs on. + */ +#if __x86_64__ || __arm64 +#define IA_ADD_VALUE(target, value) \ + (*(target) += (value)) +#else +#define IA_ADD_VALUE(target, value) \ + (OSAddAtomic64((value), (target))) +#endif + +/* + * TODO: Should this be an OSObject? Or properly pull in its methods as member functions? + */ +struct IOInterruptAccountingData { + OSObject * owner; /* The owner of the statistics; currently always an IOIES or a subclass of it */ + queue_chain_t chain; + /* + * We have no guarantee that the owner will not temporarily mutate its index value (i.e, in setWorkLoop + * for IOIES). To ensure we can properly recalculate our own identity (and our channel IDs for the + * reporter), stash the index we set up the reporter with here. + * + * Note that we should never remap the interrupt (point it to a different specifier). The mutation of + * the index value is usually to negate it; I am uncertain of the reason for this at the moment. The + * practical impact being that we should never need to update the stashed index value; it should stay + * valid for the lifetime of the owner. + */ + int interruptIndex; + + /* + * As long as we are based on the simple reporter, all our channels will be 64 bits. Align the data + * to allow for safe atomic updates (we don't want to cross a cache line on any platform, but for some + * it would cause a panic). + */ + volatile uint64_t interruptStatistics[IA_NUM_INTERRUPT_ACCOUNTING_STATISTICS] __attribute__((aligned(8))); +}; + +/* + * Initializes global values/structures related to interrupt accounting. + */ +void interruptAccountingInit(void); + +/* + * Routines for adding and removing objects from the global queue of IOInterruptAccountingData objects; + * the queue exists as a debugging aid (no entities other than these routines should care about the + * queue at runtime). + */ +void interruptAccountingDataAddToList(IOInterruptAccountingData * data); +void interruptAccountingDataRemoveFromList(IOInterruptAccountingData * data); + +/* + * Updates reporter with the statistics contained within data. Invoked when IOReporting has been asked + * for updated statistics; requiring explicit synchronization of data between the statistic fields and + * the reporter helps keep interrupt accounting overhead down. + */ +void interruptAccountingDataUpdateChannels(IOInterruptAccountingData * data, IOSimpleReporter * reporter); + +/* + * Initializes the statistics in data using the statistics currently held by reporter. Typically invoked + * when data is first associated with reporter. The nub that an interrupt is associated with will be + * longer lived than the interrupt; as a result, our owner may not be the first to register for a + * particular interrupt index with that nub, so we need to inherit the existing statistics (as we describe + * statistics in terms of {nub id, index}, not in terms of our owner). + */ +void interruptAccountingDataInheritChannels(IOInterruptAccountingData * data, IOSimpleReporter * reporter); + +#endif /* __IOKIT_IOINTERRUPTACCOUNTING_PRIVATE_H */ + diff --git a/iokit/IOKit/IOInterruptEventSource.h b/iokit/IOKit/IOInterruptEventSource.h index 2e1a82765..553eb4104 100644 --- a/iokit/IOKit/IOInterruptEventSource.h +++ b/iokit/IOKit/IOInterruptEventSource.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,6 +42,8 @@ HISTORY class IOService; +struct IOInterruptAccountingData; + /*! @class IOInterruptEventSource : public IOEventSource @abstract Event source for interrupt delivery to work-loop based drivers. @discussion The IOInterruptEventSource is a generic object that delivers calls interrupt routines in it's client in a guaranteed single-threaded manner. IOInterruptEventSource is part of the IOKit $link IOWorkLoop infrastructure where the semantic that one and only one action method is executing within a work-loops event chain. @@ -96,7 +98,9 @@ protected: /*! @struct ExpansionData @discussion This structure will be used to expand the capablilties of the IOWorkLoop in the future. */ - struct ExpansionData { }; + struct ExpansionData { + IOInterruptAccountingData * statistics; + }; /*! @var reserved Reserved for future use. (Internal use only) */ @@ -203,6 +207,7 @@ state when checkForWork is called. */ private: IOReturn registerInterruptHandler(IOService *inProvider, int inIntIndex); + void unregisterInterruptHandler(IOService *inProvider, int inIntIndex); private: OSMetaClassDeclareReservedUnused(IOInterruptEventSource, 0); diff --git a/iokit/IOKit/IOKernelReportStructs.h b/iokit/IOKit/IOKernelReportStructs.h new file mode 100644 index 000000000..e02aa4ab0 --- /dev/null +++ b/iokit/IOKit/IOKernelReportStructs.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2012-2014 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +// Internal data structures to be used by IOReporters and User Space Observers + + +#ifndef _IOKERNELREPORTSTRUCTS_H_ +#define _IOKERNELREPORTSTRUCTS_H_ + +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define kIOReportAPIVersion 28 + +// Drivers participating in IOReporting can advertise channels by +// publishing properties in the I/O Kit registry. Various helper +// mechanisms exist to produce correctly-formatted legends. +// 12836893 tracks declaring channels in user space. +#define kIOReportLegendPublicKey "IOReportLegendPublic" // bool +#define kIOReportLegendKey "IOReportLegend" // arr +#define kIOReportLegendChannelsKey "IOReportChannels" // arr +#define kIOReportLegendGroupNameKey "IOReportGroupName" // str +#define kIOReportLegendSubGroupNameKey "IOReportSubGroupName" // str +#define kIOReportLegendInfoKey "IOReportChannelInfo" // dict +#define kIOReportLegendUnitKey "IOReportChannelUnit" // num +#define kIOReportLegendConfigKey "IOReportChannelConfig" // data +#define kIOReportLegendStateNamesKey "IOReportChannelStateNames" // str[] + +// in an I/O Kit registry legend, a small "array struct" represents a channel +#define kIOReportChannelIDIdx 0 // required +#define kIOReportChannelTypeIdx 1 // required +#define kIOReportChannelNameIdx 2 // optional + +// We are currently (internally) limited to 15 (broad!) categories. + + +/* + Units / Scaling Factors + + 1. Implementation Details + 2. Unit Constants (kIOReportUnit...) for clients + + Please file radars if you need more units (IOReporting | X) +*/ + +// 1. Implementation Details +// We are likely to someday support IOReporting data as stored binary data. +// Don't change existing values lest that data become unreadable. + +typedef uint64_t IOReportUnits; +#define __IOR_MAKEUNIT(quantity, scale) \ + (((IOReportUnits)quantity << 56) | (uint64_t)scale) +#define IOREPORT_GETUNIT_QUANTITY(unit) \ + ((IOReportQuantity)((uint64_t)unit >> 56) & 0xff) +#define IOREPORT_GETUNIT_SCALE(unit) \ + ((IOReportScaleFactor)unit & 0x00ffffffffffffff) + +// 8b quantity + 32b const + 8b * 2^10 + 8b * 2^n + 8b cardinal + 8b unused +typedef uint8_t IOReportQuantity; // SI "quantity" is what's measured +typedef uint64_t IOReportScaleFactor; + +// See for a list +// of quantities and their symbols. +enum { + // used by state reports, etc + kIOReportQuantityUndefined = 0, + + kIOReportQuantityTime = 1, // Seconds + kIOReportQuantityPower = 2, // Watts + kIOReportQuantityEnergy = 3, // Joules + kIOReportQuantityCurrent = 4, // Amperes + kIOReportQuantityVoltage = 5, // Volts + kIOReportQuantityCapacitance = 6, // Farad + kIOReportQuantityInductance = 7, // Henry + kIOReportQuantityFrequency = 8, // Hertz + kIOReportQuantityData = 9, // bits/bytes (see scale) + kIOReportQuantityTemperature = 10, // Celsius (not Kelvin :) + + kIOReportQuantityEventCount = 100, + kIOReportQuantityPacketCount = 101 +}; + + +/* A number of units end up with both IEC (2^n) and SI (10^n) scale factors. + For example, the "MB" of a 1.44 MB floppy or a 1024MHz clock. We + thus support separate 2^n and 10^n factors. The exponent encoding + scheme is modeled loosely on single-precision IEEE 754. + */ +#define kIOReportScaleConstMask 0x000000007fffffff // constant ("uint31") +#define kIOReportScaleOneOver (1LL << 31) // 1/constant +#define kIOReportExpBase (-127) // support base^(-n) +#define kIOReportExpZeroOffset -(kIOReportExpBase) // max exponent = 128 +#define kIOReportScaleSIShift 32 // * 10^n +#define kIOReportScaleSIMask 0x000000ff00000000 +#define kIOReportScaleIECShift 40 // * 2^n +#define kIOReportScaleIECMask 0x0000ff0000000000 +#define kIOReportCardinalShift 48 // placeholders +#define kIOReportCardinalMask 0x00ff000000000000 + + +/* + Scales are described as a factor times unity: + 1ms = kIOReportScaleMilli * s + + A value expressed in a scaled unit can be scaled to unity via + multiplication by the constant: + 100ms * kIOReportScaleMilli [1e-3] = 0.1s. +*/ + +// SI / decimal +#define kIOReportScalePico ((-12LL + kIOReportExpZeroOffset) \ + << kIOReportScaleSIShift) +#define kIOReportScaleNano ((-9LL + kIOReportExpZeroOffset) \ + << kIOReportScaleSIShift) +#define kIOReportScaleMicro ((-6LL + kIOReportExpZeroOffset) \ + << kIOReportScaleSIShift) +#define kIOReportScaleMilli ((-3LL + kIOReportExpZeroOffset) \ + << kIOReportScaleSIShift) +#define kIOReportScaleUnity 0 // 10^0 = 2^0 = 1 +// unity = 0 is a special case for which we give up exp = -127 +#define kIOReportScaleKilo ((3LL + kIOReportExpZeroOffset) \ + << kIOReportScaleSIShift) +#define kIOReportScaleMega ((6LL + kIOReportExpZeroOffset) \ + << kIOReportScaleSIShift) +#define kIOReportScaleGiga ((9LL + kIOReportExpZeroOffset) \ + << kIOReportScaleSIShift) +#define kIOReportScaleTera ((12LL + kIOReportExpZeroOffset) \ + << kIOReportScaleSIShift) + +// IEC / computer / binary +// It's not clear we'll ever use 2^(-n), but 1..2^~120 should suffice. +#define kIOReportScaleBits kIOReportScaleUnity +#define kIOReportScaleBytes ((3LL + kIOReportExpZeroOffset) \ + << kIOReportScaleIECShift) +// (bytes have to be added to the exponents up front, can't just OR in) +#define kIOReportScaleKibi ((10LL + kIOReportExpZeroOffset) \ + << kIOReportScaleIECShift) +#define kIOReportScaleKiBytes ((13LL + kIOReportExpZeroOffset) \ + << kIOReportScaleIECShift) +#define kIOReportScaleMebi ((20LL + kIOReportExpZeroOffset) \ + << kIOReportScaleIECShift) +#define kIOReportScaleMiBytes ((23LL + kIOReportExpZeroOffset) \ + << kIOReportScaleIECShift) +#define kIOReportScaleGibi ((30LL + kIOReportExpZeroOffset) \ + << kIOReportScaleIECShift) +#define kIOReportScaleGiBytes ((33LL + kIOReportExpZeroOffset) \ + << kIOReportScaleIECShift) +#define kIOReportScaleTebi ((40LL + kIOReportExpZeroOffset) \ + << kIOReportScaleIECShift) +#define kIOReportScaleTiBytes ((43LL + kIOReportExpZeroOffset) \ + << kIOReportScaleIECShift) +// can't encode more than 2^125 (keeping bits & bytes inside -126..128) +// Also, IOReportScaleValue() is currently limited internally by uint64_t. + + +// Cardinal values, to be filled in appropriately. +// Add values in increasing order. +#define kIOReportScaleMachHWTicks (1LL << kIOReportCardinalShift) +#define kIOReportScaleHWPageSize (2LL << kIOReportCardinalShift) + +// page scales: 2 pages * 4ikB/page = 8096 bytes +#define kIOReportScale4KiB (4 | kIOReportScaleKiBytes) +#define kIOReportScale8KiB (8 | kIOReportScaleKiBytes) + +// Clock frequencies scales (units add seconds). +// 1 GHz ticks are 1 ns: 1000 ticks * 1e-6 = 1e-3s +// The '1' is a no-op, but allows a custom label. +#define kIOReportScale1GHz (1 | kIOReportScaleNano) +// 24MHz ticks are 1/24 of a microsecond: (1/24 * kIOReportScaleMicro [1e-6])s +// So for example, 240 24Mticks * 1/24 * 1e-6 = .00001s [1e-5]s +#define kIOReportScale24MHz (kIOReportScaleOneOver|24 |kIOReportScaleMicro) + +// --- END: implementation details + +// 2. Units Constants +// --- BEGIN: units constants driver writers might use +#define kIOReportUnitNone __IOR_MAKEUNIT(kIOReportQuantityUndefined, \ + kIOReportScaleUnity) + +#define kIOReportUnit_s __IOR_MAKEUNIT(kIOReportQuantityTime, \ + kIOReportScaleUnity) +#define kIOReportUnit_ms __IOR_MAKEUNIT(kIOReportQuantityTime, \ + kIOReportScaleMilli) +#define kIOReportUnit_us __IOR_MAKEUNIT(kIOReportQuantityTime, \ + kIOReportScaleMicro) +#define kIOReportUnit_ns __IOR_MAKEUNIT(kIOReportQuantityTime, \ + kIOReportScaleNano) + +#define kIOReportUnit_J __IOR_MAKEUNIT(kIOReportQuantityEnergy, \ + kIOReportScaleUnity) +#define kIOReportUnit_mJ __IOR_MAKEUNIT(kIOReportQuantityEnergy, \ + kIOReportScaleMilli) +#define kIOReportUnit_uJ __IOR_MAKEUNIT(kIOReportQuantityEnergy, \ + kIOReportScaleMicro) +#define kIOReportUnit_nJ __IOR_MAKEUNIT(kIOReportQuantityEnergy, \ + kIOReportScaleNano) +#define kIOReportUnit_pJ __IOR_MAKEUNIT(kIOReportQuantityEnergy, \ + kIOReportScalePico) + +#define kIOReportUnitHWTicks __IOR_MAKEUNIT(kIOReportQuantityTime, \ + kIOReportScaleMachHWTicks) +#define kIOReportUnit24MHzTicks __IOR_MAKEUNIT(kIOReportQuantityTime, \ + kIOReportScale24MHz) +#define kIOReportUnit1GHzTicks __IOR_MAKEUNIT(kIOReportQuantityTime, \ + kIOReportScale1GHz) + +#define kIOReportUnitBits __IOR_MAKEUNIT(kIOReportQuantityData, \ + kIOReportScaleBits) +#define kIOReportUnitBytes __IOR_MAKEUNIT(kIOReportQuantityData, \ + kIOReportScaleBytes) +#define kIOReportUnit_KiB __IOR_MAKEUNIT(kIOReportQuantityData, \ + kIOReportScaleKiBytes) + +#define kIOReportUnitEvents __IOR_MAKEUNIT(kIOReportQuantityEventCount, \ + kIOReportScaleUnity) + +#define kIOReportUnitPackets __IOR_MAKEUNIT(kIOReportQuantityPacketCount, \ + kIOReportScaleUnity) + +// Please file radars if you need more units (IOReporting | X) + +// --- END: unit constants driver writers might use + +/* Histogram Segment Configuration + Currently supports 2 types of scaling to compute bucket upper bounds, + linear or exponential. + scale_flag = 0 -> linear scale + 1 -> exponential scale + upper_bound[n] = (scale_flag) ? pow(base,(n+1)) : base * (n+1); +*/ +#define kIOHistogramScaleLinear 0 +#define kIOHistogramScaleExponential 1 +typedef struct { + uint32_t base_bucket_width; // segment[0].bucket[0] = [0, base_width] + uint32_t scale_flag; // bit 0 only in current use (see #defs) + uint32_t segment_idx; // for multiple segments histograms + uint32_t segment_bucket_count; // number of buckets in this segment +} __attribute((packed)) IOHistogramSegmentConfig; + +// "normalized distribution"(FIXME?) internal format (unused?) +typedef struct { + uint64_t samples; + uint64_t mean; + uint64_t variance; + uint64_t reserved; +} __attribute((packed)) IONormDistReportValues; + +#ifdef __cplusplus +} +#endif + +#endif // _IOKERNELREPORTSTRUCTS_H_ diff --git a/iokit/IOKit/IOKernelReporters.h b/iokit/IOKit/IOKernelReporters.h new file mode 100644 index 000000000..de529f405 --- /dev/null +++ b/iokit/IOKit/IOKernelReporters.h @@ -0,0 +1,1663 @@ +/* + * Copyright (c) 2012-2014 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * FILE: IOReporter.h + * AUTH: Cyril & Soren (Core OS) + * DATE: 2012-2013 (Copyright Apple Inc.) + * DESC: IOReporting interfaces for I/O Kit drivers + * + */ + +#ifndef _IOKERNEL_REPORTERS_H_ +#define _IOKERNEL_REPORTERS_H_ + +#include + +#include +#include +#include +#include + +#include +#include + +typedef OSDictionary IOReportLegendEntry; + +/******************************* + TOC: this file contains + 1. Introduction + 2a. IOReporter class declaration (public & non-public members) + 2b. static IOReporter methods unrelated to the class + 3. IOReporter subclass declarations (public & non-public members) + 4. IOReportLegend class declaration +*******************************/ + +/*! + 1. Introduction + + IOReporting is a mechanism for I/O Kit drivers to gather statistics + (or other information) and make it available to various "observers," + which are generally in user space. Requests for information come + through two new IOService methods: ::configureReport(...) and + ::updateReport(...). While not required (see IOReportTypes.h), drivers + will generally use IOReporter subclass instances to track the requested + information and respond to IOReporting requests. Drivers can use these + classes to track information, either all the time or between "enable" + and "disable" calls to IOService::configureReport(). + + Available information is organized into "channels." A channel is + uniquely identified by both driver (registry) ID and a 64-bit channel + ID. One way drivers can advertise their channels is by publishing + "legends" in the I/O Kit registry. In addition to collecting + information and responding to queries, IOReporter objects can produce + legend entries describing their channels. The IOReportLegend class + helps manage legend entries from multiple reporter objects as well + as with grouping channels logically for observers. + + An important basic constraint of the current implementation is that + all channels reported by a particular reporter instance must share all + traits except channel ID and name. Specifically, the channel type + (including report class, categories, & size) and units. Additionally, + IOHistogramReporter currently only supports one channel at a time. + + Currently, ::{configure/update}Report() can be called any time between + when a driver calls registerService() and when free() is called on + your driver. 12960947 tracks improvements / recommendations for + correctly handling these calls during termination. + + * Locking + IOReporting only imposes concurrent access constraints when multiple + threads are accessing the same object. Three levels of constraint apply + depending on a method's purpose: + 1. Allocation/Teardown - same-instance concurrency UNSAFE, MAY BLOCK + 2. Configuration - same-instance concurrency SAFE, MAY BLOCK + 3. Update - same-instance concurrency SAFE, WILL NOT BLOCK + + Configuration requires memory management which can block and must + be invoked with interrupts ENABLED (for example, NOT in the interrupt + context NOR with a spin lock -- like IOSimpleLock -- held). + + Updates can be performed with interrupts disabled, but clients should + take into account that IOReporters' non-blocking currenency is achieved + with IOSimpleLockLockDisable/UnlockEnableInterrupts(): that is, by + disabling interrupts and taking a spin lock. While IOReporting will + never hold a lock beyond a call into it, some time may be spent within + the call spin-waiting for the lock. Clients holding their own + spin locks should carefully consider the impact of IOReporting's + (small) additional latency before calling it while holding a spin lock. + + The documentation for each method indicates any concurrency guarantees. + */ + + +/*********************************/ +/*** 2a. IOReporter Base Class ***/ +/*********************************/ + +class IOReporter : public OSObject +{ + OSDeclareDefaultStructors(IOReporter); + +protected: +/*! @function IOReporter::init + @abstract base init() method, called by subclass initWith() methods + + @param reportingService - IOService associated with all channels + @param channelType - type info for all channels (element_idx = 0) + @param unit - description applied for all channels + @result true on success, false otherwise + + @discussion + init() establishes the parameters of all channels for this reporter + instance. Any channels added via addChannel() will be of this type + and have this unit. + + IOReporter clients should use the static ::with() methods + below to obtain fully-initialized reporter instances. ::free() + expects ::init() to have completed successfully. On failure, any + allocations are cleaned up. + + Locking: same-instance concurrency UNSAFE +*/ + virtual bool init(IOService *reportingService, + IOReportChannelType channelType, + IOReportUnits unit); + +public: + +/*! @function IOReporter::addChannel + @abstract add an additional, similar channel to the reporter + + @param channelID - identifier for the channel to be added + @param channelName - an optional human-readble name for the channel + @result appropriate IOReturn code + + @discussion + The reporter will allocate memory to track a new channel with the + provided ID and name (if any). Its other traits (type, etc) will + be those provided when the reporter was initialized. If no channel + name is provided and the channelID consists solely of ASCII bytes, + those bytes (ignoring any NUL bytes) will be used as the + human-readable channel name in user space. The IOREPORT_MAKEID() + macro in IOReportTypes.h can be used to create ASCII channel IDs. + + Locking: same-instance concurrency SAFE, MAY BLOCK +*/ + IOReturn addChannel(uint64_t channelID, const char *channelName = NULL); + +/*! @function IOReporter::createLegend + @abstract create a legend entry represending this reporter's channels + @result An IOReportLegendEntry object or NULL on failure. + @discussion + All channels added to the reporter will be represented + in the resulting legend entry. + + Legends must be published togethar as an array under the + kIOReportLegendKey in the I/O Kit registry. The IOReportLegend + class can be used to properly combine legend entries from multiple + reporters as well as to put channels into groups of interest to + observers. When published, individual legend entries share + characteristics such as group and sub-group. Multiple IOReporter + instances are required to produce independent legend entries which + can then be published with different characteristics. + + Drivers wishing to publish legends should do so as part of their + ::start() routine. As superclasses *may* have installed legend + entries, any existing existing legend should be retrieved and + IOReportLegend used to merge it with the new entries. + + Recommendations for best practices are forthcoming. + + Instead of calling createLegend on your reporter object and then + appending it manually to IOReportLegend, one may prefer to call + IOReportLegend::appendReporterLegend which creates and appends a + reporter's IOReportLegendEntry in a single call. + + Locking: same-instance concurrency SAFE, MAY BLOCK +*/ + IOReportLegendEntry* createLegend(void); + +/*! @function IOReporter::configureReport + @abstract track IOService::configureReport(), provide sizing info + + @param channelList - channels to configure + @param action - enable/disable/size, etc (see IOReportTypes.h) + @param result - *incremented* for kIOReportGetDimensions + @param destination - action-specific default destination + @result appropriate IOReturn code + + @discussion + Any time a reporting driver's ::configureReport method is invoked, + this method should be invoked on each IOReporter that is being + used by that driver to report channels in channelList. + + Any channels in channelList which are not tracked by this reporter + are ignored. ::configureReport(kIOReportGetDimensions) expects + the full size of all channels, including any reported by + superclasses. It is valid to call this routine on multiple + reporter objects in succession and they will increment 'result' + to provide the correct total. + + In the initial release, this routine is only required to calculate + the response to kIOReportGetDimensions, but in the future it will + will enable functionality like "triggered polling" via + kIOReportNotifyHubOnChange. Internally, it is already keeping + track of the number of times each channel has been enabled and + disabled. 13073064 tracks adding a method to see whether any + channels are currently being observed. + + The static IOReporter::configureAllReports() will call this method + on multiple reporters grouped in an OSSet. + + Locking: same-instance concurrency SAFE, MAY BLOCK +*/ + IOReturn configureReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination); + +/*! @function IOReporter::updateReport + @abstract Produce standard reply to IOService::updateReport() + + @param channelList - channels to update + @param action - copy/trace data (see IOReportTypes.h) + @param result - action-specific return value (e.g. size of data) + @param destination - destination for this update (action-specific) + @result appropriate IOReturn code + + @discussion + This method searches channelList for channels tracked by this + reporter, writes the corresponding data into 'destination', and + updates 'result'. It should be possible to pass a given set of + IOService::updateReport() arguments to any and all reporters as + well as to super::updateReport() and get the right result. + + The static IOReporter::updateAllReports() will call this method + on an OSSet of reporters. + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + IOReturn updateReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination); + +/*! @function IOReporter::free + @abstract Releases the object and all its resources. + + @discussion + ::free() [called on last ->release()] assumes that init() [called + by static ::with() methods] has completed successfully. + + Locking: same-instance concurrency UNSAFE +*/ + virtual void free(void); + + +/*********************************/ +/*** 2b. Useful Static Methods ***/ +/*********************************/ + +/* The following static functions are intended to simplify the management + * of multiple reporters. They may be superseded in the future by an + * IOReportManager class. + */ + +/*! @function IOReporter::configureAllReports + @abstract call configureReport() on multiple IOReporter objects + + @param reporters - OSSet of IOReporter objects + @param channelList - full list of channels to configure + @param action - enable/disable/size, etc + @param result - action-specific returned value + @param destination - action-specific default destination + @result success if all objects successfully complete + IOReporter::configureReport() + + @discussion + The OSSet must only contain IOReporter instances. The presence + of non-IOReporter instances will cause this function to return + kIOReturnBadArgument. If any reporter returns an error, the + function will immediately return that error. + + Per the IOReporter::configureReport() documentation, each + reporter will search channelList for channels it is reporting + and provide a partial response. +*/ + static IOReturn configureAllReports(OSSet *reporters, + IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination); +// FIXME: just put the function (inline-ish) here? + +/*! @function IOReporter::updateAllReports + @abstract call updateReport() on multiple IOReporter objects + + @param reporters - OSSet of IOReporter objects + @param channels - full list of channels to update + @param action - type/style of update + @param result - returned details about what was updated + @param destination - destination for this update (action-specific) + @result IOReturn code + @discussion + The OSSet must only contain IOReporter instances. The presence + of non-IOReporter instances will cause this function to return + kIOReturnBadArgument. If any reporter returns an error, the + function will immediately return that error. + + Per the IOReporter::configureReport() documentation, each + reporter will search channelList for channels it is reporting + and provide a partial response. +*/ + static IOReturn updateAllReports(OSSet *reporters, + IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination); +// FIXME: just put the function (inline-ish) here? + + + /* Protected (subclass-only) Methods + + General subclassing is not encouraged as we intend to improve + internal interfaces. If you need something that might require + a subclass, please file a bug against IOReporting/X and we will + help you. + + One important concept for sub-classes (not clients) is that report + data is stored in IOReportElement structures (see IOReportTypes.h). + */ +protected: + +/*! @function IOReporter::lockReporterConfig + @function IOReporter::unlockReporterConfig + @abstract prevent concurrent reconfiguration of a reporter + + @discussion + lockReporterConfig() takes a mutex-based lock intended to prevent + concurrent access to the reporter's configuration. It is not + intended to prevent updates to the reporter's data. As long as + all other requirements are met, it is safe to simultaneously hold + both the configuration and data locks on a single reporter. + + lockReporterConfig() is used by routines such as addChannel(). + See also lockReporter() and ::handle*Swap*() below. +*/ + void lockReporterConfig(void); + void unlockReporterConfig(void); + +/*! @function IOReporter::lockReporter + @function IOReporter::unlockReporter + @abstract prevent concurrent access to a reporter's data + + @discussion + This method grabs a lock intended to control access the reporter's + reporting data. Sub-classes maninupating internal report values + must make sure the reporter is locked (usually by the most generic + public interface) before calling getElementValues(), + copyElementValues(), or setElementValues(). + + Subclasses should ensure that this lock is taken exactly once + before directly accessing reporter data. For example, + [virtual] IOFooReporter::handleSetFoo(.) { + // assert(lock_held) + getElementValues(1..) + getElementValues(3..) + getElementValues(5..) + [calculate] + setElementValues(6..) + } + IOFooReporter::setFoo(.) { // not virtual + lockReporter() + handleSetFoo(.) + unlockReporter() + } + + IOReporter::handle*() use lockReporter() similarly. For example, + the lock is taken by IOReporter::updateReport() and is already + held by the time any ::updateChannelValues() methods are called. + + Subclasses cannot call this routine if the lock is already held. + That's why IOReporting generally only calls it from non-virtual + public methods. In particular, this method should not be called + it from ::handle*() methods which exist to allow override after + the lock is taken. + + Because lockReporter() uses a spin lock, it is SAFE to use in the + interrupt context. For the same reason, however, it is UNSAFE + to perform any blocking blocking operations (including memory + allocations) while holding this lock. +*/ + void lockReporter(void); + void unlockReporter(void); + +/*! + @discussion + The ::handle*Swap* functions allow subclasses to safely reconfigure + their internal state. A non-virtual function handles locking + and invokes the functions in order: + - lockReporterConfig() // protecting instance vars but not content + - prepare / allocate buffers of the new size + - if error, bail (unlocking, of course) + + - lockReporter() // protecting data / blocking updates + - swap: preserve continuing data / install new buffers + - unlockReporter() + + - deallocate now-unused buffers + - unlockReporterConfig() +*/ +/*! @function IOReporter::handleSwapPrepare + @abstract allocate memory in preparation for an instance variable swap + + @param newNChannels target number of channels + @result IOReturn code + + @discussion + ::handleSwapPrepare() is responsible for allocating appropriately- + sized buffers (based on the new number of channels) and storing + them in _swap* instance variables. If returning and error, it + must deallocate any buffers and set to NULL any _swap* variables. + + Locking: The caller must ensure that the *config* lock is HELD but + that the reporter (data) lock is *NOT HELD*. +*/ + virtual IOReturn handleSwapPrepare(int newNChannels); + +/*! @function IOReporter::handleAddChannelSwap + @abstract update primary instance variables with new buffers + + @param channelID ID of channel being added + @param channelName optional channel name, in an allocated object + @result IOReturn code + + @discussion + handlAddChannelSwap() replaces the primary instance variables + with buffers allocated in handlePrepareSwap(). It copies the the + existing data into the appropriate portion of the new buffers. + Because it is specific to adding one channel, it assumes that the + target number of channels is one greater than the current value + of _nChannels. + + IOReporter::handleAddChannelSwap() increments _nElements and + _nChannels. To ensure that these variables describe the current + buffers throughout ::handle*Swap(), subclasses overriding this + method should call super::handleAddChannelSwap() after swapping + their own instance variables. + + If returning an error, all implementations should leave their + instance variables as they found them (*unswapped*). That ensures + handleSwapCleanup() cleans up the unused buffers regardless of + whether the swap was complete. + + Pseudo-code incorporating these suggestions: + res = ; swapComplete = false; + if () goto finish + tmpBuf = _primaryBuf; _primaryBuf = _swapBuf; _swapBuf = _primaryBuf; + ... + swapComplete = true; + res = super::handle*Swap() + ... + finish: + if (res && swapComplete) // unswap + + Locking: The caller must ensure that BOTH the configuration and + reporter (data) locks are HELD. +*/ + virtual IOReturn handleAddChannelSwap(uint64_t channel_id, + const OSSymbol *symChannelName); + +/*! @function IOReporter::handleSwapCleanup + @abstract release and forget unused buffers + + @param swapNChannels channel-relative size of the _swap buffers + + @discussion + ::handleSwapCleanup() is responsible for deallocating the buffers + no longer used after a swap. It must always be called if + SwapPrepare() completes successfully. Because bufers may be + swapped in and out of existance, the _swap* variables may be + NULL and should be set to NULL when complete. + + Locking: The caller must ensure that the *config* lock is HELD but + that the reporter (data) lock is *NOT HELD*. +*/ + virtual void handleSwapCleanup(int swapNChannels); + +/*! @function IOReporter::handleConfigureReport + @abstract override vector for IOReporter::configureReport() + [parameters and result should exactly match] + + @discussion + The public base class method takes the reporter lock, calls this + function, and then drops the lock. Subclasses should not call + this function directly. +*/ + virtual IOReturn handleConfigureReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination); + +/*! @function IOReporter::handleUpdateReport + @abstract override vector for IOReporter::updateReport() + [parameters and result should exactly match] + + @discussion + The public base class method takes the reporter lock, calls this + function, and then drops the lock. Subclasses should not call + this function directly. + + This function may be overriden but the common case should be to + simply update reporter's specific values by overriding + IOReporter::updateChannelValues(). +*/ + virtual IOReturn handleUpdateReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination); + +/* @function IOReporter::handleCreateLegend + @abstract override vector for IOReporter::createLegend() + [parameters and result should exactly match] + + @discussion + The public base class method takes the reporter lock, calls this + function, and then drops the lock. Subclasses should not call + this function directly. +*/ + virtual IOReportLegendEntry* handleCreateLegend(void); + +/*! @function IOReporter::updateChannelValues + @abstract update channel values for IOReporter::updateReport() + + @param channel_index - logical (internal) index of the channel + @result appropriate IOReturn code + + @discussion + Internal reporter method to allow a subclass to update channel + data when updateReport() is called. This routine handles the + common case of a subclass needing to refresh state in response + to IOReporter::updateReport(). It saves the complexity of + parsing the full parameters to IOReporter::updateReport(). + + The IOReporter base class implementation does not do anything + except return success. + + Locking: IOReporter::updateReport() takes the reporter lock, + determines the indices involved, calls this function, and + then proceeds to provide values to the caller. If subclasses + need to call this routine directly, they must ensure that + the reporter (data) lock is held: see + IOReporter::lockReporter(). +*/ + virtual IOReturn updateChannelValues(int channel_index); + + +/*! @function IOReporter::updateReportChannel + @abstract Internal method to extract channel data to a destination + + @param channel_index - offset into internal elements array + @param nElements - incremented by the number of IOReportElements added + @param destination - pointer to the destination buffer + @result IOReturn code + + @discussion + updateReportChannel() is used to extract a single channel's + data to the updateReport() destination. + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + IOReturn updateReportChannel(int channel_index, + int *nElements, + IOBufferMemoryDescriptor *destination); + + +/*! @function IOReporter::setElementValues + @abstract Atomically update a specific member of _elements[]. + + @param element_index - index of the _element in internal array + @param values - IORepoterElementValues to replace those at _elements[idx] + @param record_time - optional mach_absolute_time to be used for metadata + @result IOReturn code + + @discussion + element_index can be obtained from getFirstElementIndex(). If + record_time is not provided, IOReporter::setElementValues() will + fetch the current mach_absolute_time. If the current time is + already known, it is more efficient to pass it along. + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn setElementValues(int element_index, + IOReportElementValues *values, + uint64_t record_time = 0); + +/*! @function IOReporter::getElementValues + @abstract Internal method to directly access the values of an element + + @param element_index - index of the _element in internal array + @result A pointer to the element values requested or NULL on failure + + @discussion + + Locking: Caller must ensure that the reporter (data) lock is held. + The returned pointer is only valid until unlockReporter() is called. +*/ + virtual const IOReportElementValues* getElementValues(int element_index); + +/*! @function IOReporter::getFirstElementIndex + @abstract Returns the first element index for a channel + + @param channel_id - ID of the channel + @param element_index - pointer to the returned element_index + @result appropriate IOReturn code + + @discussion + For efficiently and thread-safely reading _elements + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn getFirstElementIndex(uint64_t channel_id, + int *element_index); + +/*! @function IOReporter::getChannelIndex + @abstract Returns the index of a channel from internal data structures + + @param channel_id - ID of the channel + @param element_index - pointer to the returned element_index + @result appropriate IOReturn code + + @discussion + For efficiently and thread-safely reading channels + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn getChannelIndex(uint64_t channel_id, + int *channel_index); + +/*! @function IOReporter::getChannelIndices + @abstract Returns the index of a channel and its corresponding + first element index from internal data structure + + @param channel_id - ID of the channel + @param channel_index - pointer to the returned channel_index + @param element_index - pointer to the returned element_index + @result appropriate IOReturn code + + @discussion + For efficiently and thread-safely reading channel elements. + It is commonly useful to get access to both channel and element + indices togther. This convenience method allows sub-classes to + get both indices simultaneously. + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn getChannelIndices(uint64_t channel_id, + int *channel_index, + int *element_index); + +/*! @function IOReporter::copyElementValues + @abstract Copies the values of an internal element to *elementValues + + @param element_index - Index of the element to return values from + @param elementValues - For returning the content of element values + @result Returns the content of an element + + @discussion + For efficiently and thread-safely reading _elements. + May need to find the index of the element first. + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn copyElementValues(int element_index, + IOReportElementValues *elementValues); + +// private methods +private: +/*! @function IOReporter::copyChannelIDs + @abstract return an an OSArray of the reporter's + channel IDs + + @param none + @result An OSArray of the repoter's channel ID's as OSNumbers + + @discussion + This method is an internal helper function used to prepare a + legend entry. It encapsulates the channel IDs in OSNumbers and + aggregates them in an OSArray used when building the IOReportLegend + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + OSArray* copyChannelIDs(void); + +/*! @function IOReporter::legendWith + @abstract Internal method to help create legend entries + + @param channelIDs - OSArray of OSNumber(uint64_t) channels IDs. + @param channelNames - parrallel OSArray of OSSymbol(rich names) + @param channelType - the type of all channels in this legend + @param unit - The unit for the quantity recorded by this reporter object + + @result An IOReportLegendEntry object or NULL on failure + + @discussion + This static method is the main legend creation function. It is called by + IOReporter sub-classes and is responsible for building an + IOReportLegendEntry corresponding to this reporter object. + This legend entry may be extended by the sub-class of IOReporter if + required. + + Locking: SAFE to call concurrently (no static globals), MAY BLOCK +*/ + static IOReportLegendEntry* legendWith(OSArray *channelIDs, + OSArray *channelNames, + IOReportChannelType channelType, + IOReportUnits unit); + +// protected instance variables (want to get rid of these) +protected: + IOReportChannelType _channelType; + uint64_t _driver_id; // driver reporting data + + // IOHistogramReporter accesses these; need to re-do its instantiation + IOReportElement *_elements; + int *_enableCounts; // refcount kIOReportEnable/Disable + uint16_t _channelDimension; // Max channel size + int _nElements; + int _nChannels; // Total Channels in this reporter + OSArray *_channelNames; + + // MUST be protected because check is a macro! + bool _reporterIsLocked; + bool _reporterConfigIsLocked; + + // Required for swapping inside addChannel + IOReportElement *_swapElements; + int *_swapEnableCounts; + +// private instance variables +private: + IOReportUnits _unit; + + int _enabled; // 'enabled' if _enabled > 0 + + IOLock *_configLock; + IOInterruptState _interruptState; + IOSimpleLock *_reporterLock; + +}; + + +/************************************/ +/***** 3. IOReporter Subclasses *****/ +/************************************/ + +/*! + @class IOSimpleReporter + @abstract Report simple integers + @discussion + Each IOSimpleReporter can have an arbitrary number of channels, + each publishing a single integer value at any given time. +*/ + +class IOSimpleReporter : public IOReporter +{ + OSDeclareDefaultStructors(IOSimpleReporter); + +public: + +/*! @function IOSimpleReporter::with + @abstract create an initialized simple reporter + + @param reportingService - IOService associated with all channels + @param categories - The category in which the report should be classified + @param unit - The unit for the quantity recorded by the reporter object + @result On success, an instance of IOSimpleReporter, else NULL + + @discussion + Creates an instance of IOSimpleReporter object + + Locking: SAFE to call concurrently (no static globals), MAY BLOCK. +*/ + static IOSimpleReporter* with(IOService *reportingService, + IOReportCategories categories, + IOReportUnits unit); + +/*! @function IOSimpleReporter::setValue + @abstract Thread safely set a channel's value + + @param channel_id - ID of the channel for which the value needs to be set + @param value - New channel value + @result Appropriate IOReturn code + + @discussion + Updates the value of a channel to the provided value. + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + IOReturn setValue(uint64_t channel_id, + int64_t value); + +/*! @function IOSimpleReporter::incrementValue + @abstract Thread safely increment a channel's value by a given amount + + @param channel_id - ID of the channel for which the value needs to be incremented + @param increment - Amount to be added to the current channel value + @result Appropriate IOReturn code + @discussion + Increments the value of the channel ID by the provided amount. + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + IOReturn incrementValue(uint64_t channel_id, + int64_t increment); + +/*! @function IOSimpleReporter::getValue + @abstract Thread safely access a channel value + + @param channel_id - ID of the channel to get a value from + @result Returns the current value stored in the channel + @discussion + Accessor method to a channel's current stored value + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + int64_t getValue(uint64_t channel_id); + +protected: + +/*! @function IOSimpleReporter::initWith + @abstract instance method implementation called by IOSimpleReporter::with + + @discussion + See description of parameters above + + Locking: same-instance concurrency UNSAFE +*/ + virtual bool initWith(IOService *reportingService, + IOReportCategories categories, + IOReportUnits unit); + +private: + +}; + + + +/*! + @class IOStateReporter + @abstract Report state machine data + @discussion + Each IOStateReporter can report information for an arbitrary number + of similar state machines. All must have the same number of states. +*/ +class IOStateReporter : public IOReporter +{ + OSDeclareDefaultStructors(IOStateReporter); + +public: + +/*! @function IOStateReporter::with + @abstract State reporter static creation method + + @param reportingService - The I/O Kit service for this reporter's channels + @param categories - The categories for this reporter's channels + @param nstates - Maximum number of states for this reporter's channels + @param unit - optional parameter if using override/increment...() + @result on success, an IOStateReporter instance, else NULL + + @discussion + Creates an instance of IOStateReporter. The default time scale + is the current system's notion of mach_absolute_time(). Using a + non-default time scale requires the use of + override/incrementChannelState() instead of setState(). + setState() always updates using mach_absolute_time(). + + Locking: SAFE to call concurrently (no static globals), MAY BLOCK +*/ + static IOStateReporter* with(IOService *reportingService, + IOReportCategories categories, + int nstates, + IOReportUnits unit = kIOReportUnitHWTicks); + +/*! @function IOStateReporter::setStateID + @abstract Assign a non-default ID to a state + + @param channel_id - ID of channel containing the state in question + @param state_index - index of state to give an ID: [0..(nstates-1)] + @param state_id - 64-bit state ID, for ASCII, use IOREPORT_MAKEID + + @result Appropriate IOReturn code + + @discussion + By default, IOStateReporter identifies its channel states by + numbering them from 0 to . If setStateID is not + called to customize the state IDs, the numbered states will be + kept throughout the life of the object and it is safe to reference + those states by their indices. Otherwise, after setStateID() has + been called, the ordering of states is no longer guaranteed and + the client must reference states by their assigned state ID. + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + IOReturn setStateID(uint64_t channel_id, + int state_index, + uint64_t state_id); + +/*! @function IOStateReporter::setChannelState + @abstract Updates the current state of a channel to a new state + + @param channel_id - ID of the channel which is updated to a new state + @param new_state_id - ID of the target state for this channel + @param last_intransition - deprecated: time of most recent entry + @param prev_state_residency - deprecated: time spent in previous state + @result Appropriate IOReturn code + + @discussion + setChannelState() updates the amount of time spent in the previous + state (if any) and increments the number of transitions into the + new state. It also sets the target state's last transition time to + the current time and enables internal time-keeping for the channel. + In this mode, calls like getStateResidencyTime() and updateReport() + automatically update a channel's time in state. + + new_state_id identifies the target state as initialized + (0..) or as configured by setStateID(). + + Drivers wishing to compute and report their own time in state + should use incrementChannelState() or overrideChannelState(). It + is not currently possible for a driver to synchronize with the + automatic time-keeping enabled by setChannelState(). The + 4-argument version of setChannelState() is thus impossible to + use correctly. In the future, there may be a setChannelState() + which accepts a last_intransition parameter and uses it to + automatically calculate time in state (ERs -> IOReporting / X). + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + IOReturn setChannelState(uint64_t channel_id, + uint64_t new_state_id); + + IOReturn setChannelState(uint64_t channel_id, + uint64_t new_state_id, + uint64_t last_intransition, + uint64_t prev_state_residency) __deprecated; + +/*! @function IOStateReporter::setState + @abstract Updates state for single channel reporters + + @param new_state_id - New state for the channel + @param last_intransition - deprecated: time of most recent entry + @param prev_state_residency - deprecated: spent in previous state + @result Appropriate IOReturn code. + + @discussion + setState() is a convenience method for single-channel state + reporter instances. An error will be returned if the reporter + in question has more than one channel. + + See further discussion at setChannelState(). + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + IOReturn setState(uint64_t new_state_id); + + IOReturn setState(uint64_t new_state_id, + uint64_t last_intransition, + uint64_t prev_state_residency) __deprecated; + +/*! @function IOStateReporter::overrideChannelState + @abstract Overrides state data for a channel with passed arguments + + @param channel_id - ID of the channel which state is to be updated + @param state_id - state id for the channel + @param time_in_state - time used as new total time in state + @param intransitions - total number of transitions into state + @param last_intransition - mach_absolute_time of most recent entry (opt) + @result Appropriate IOReturn code + + @discussion + overrideChannelState() sets a particular state's time in state + and transition count to the values provided. The optional + last_intransition records the last time the channel transitioned + into the given state. Passing 0 for time_in_state and + intransitions will force the current values to 0. Passing 0 + for last_intransition for all states will disable the notion + of a channel's "current state." + + The most recent last_intransition (amongst all states in a channel) + logically determines the current state. If last_intransition is + not provided for any state, the channel will not report a current + For consistent results, it is important to either never specify + last_intransition or to always specify it. + + There is currently a bug in determining current state (13423273). + The IOReportMacros.h macros only update the state's metadata + timestamp and libIOReport only looks at the metadata timestamps + to determine the current state. Until that bug is fixed, whichever + state is updated most recently will be considered the "current" + state by libIOReport. + + ::setState()'s automatic "time in state" updates are not supported + when using overrideChannelState(). Clients must not use + overrideChannelState() on any channel that has ::setState() called + on it. Unlike with ::setState(), clients using + overrideChannelState() are responsible for ensuring that data is + up to date for updateReport() calls. The correct way to do this + is for a driver's ::updateReport() method to push the most up to + date values into the reporters before calling + super::updateReport(). + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + IOReturn overrideChannelState(uint64_t channel_id, + uint64_t state_id, + uint64_t time_in_state, + uint64_t intransitions, + uint64_t last_intransition = 0); + +/*! @function IOStateReporter::incrementChannelState + @abstract Updates state data for a channel with passed arguments + + @param channel_id - ID of the channel which state is to be updated + @param state_id - state id for the channel + @param time_in_state - time to be accumulated for time in state + @param intransitions - number of transitions into state to be added + @param last_intransition - mach_absolute_time of most recent entry (opt) + @result Appropriate IOReturn code + + @discussion + incrementChannelState() adds time_in_state and intransitions + to the current values stored for a particular state. If provided, + last_intransition overwrites the time the state was most recently + entered. Passing 0 for time_in_state and intransitions will have + no effect. Passing 0 for last_intransition for all states will + disable the notion of a channel's "current state." + + The most recent last_intransition (amongst all states in a channel) + logically determines the current state. If last_intransition is + not provided for any state, the channel will not report a current + For consistent results, it is important to either never specify + last_intransition or to always specify it. + + There is currently a bug in determining current state (13423273). + The IOReportMacros.h macros only update the state's metadata + timestamp and libIOReport only looks at the metadata timestamps + to determine the current state. Until that bug is fixed, whichever + state is updated most recently will be considered the "current" + state by libIOReport. + + ::setState()'s automatic "time in state" updates are not supported + when using incrementChannelState(). Clients must not use + incrementChannelState() on any channel that has ::setState() + called on it. Unlike with ::setState(), clients using + incrementChannelState() are responsible for ensuring that data + is up to date for updateReport() calls. The correct way to do + this is for a driver's ::updateReport() method to push the most + up to date values into the reporters before calling + super::updateReport(). + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + IOReturn incrementChannelState(uint64_t channel_id, + uint64_t state_id, + uint64_t time_in_state, + uint64_t intransitions, + uint64_t last_intransition = 0); + +/*! @function IOStateReporter::setStateByIndices + @abstract update a channel state without validating channel_id + + @param channel_index - 0.., available from getChannelIndex() + @param new_state - New state (by index) for the channel + @param last_intransition - deprecated: time of most recent entry + @param prev_state_residency - deprecated: time spent in previous state + @result Appropriate IOReturn code + + @discussion + Similar to setState(), setStateByIndices() sets a channel's state + without searching for the channel or state IDs. It will perform + bounds checking, but relies on the caller to properly indicate + the indices of the channel and state. Clients can rely on channels + being added to IOStateReporter in order: the first channel will + have index 0, the second index 1, etc. Like ::setState(), + "time in state" calculations are handled automatically. + + setStateByIndices() is faster than than setChannelState(), but + it should only be used where the latter's performance overhead + might be a problem. For example, many channels in a single + reporter and high-frequency state changes. + + Drivers wishing to compute and report their own time in state + should use incrementChannelState() or overrideChannelState(). It + is not currently possible for a driver to synchronize with the + automatic time-keeping enabled by setStateByIndices(). The + 4-argument version of setChannelState() is thus impossible to + use correctly. In the future, there may be a setChannelState() + which accepts a last_intransition parameter and uses it to + automatically calculate time in state (ERs -> IOReporting / X). + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + IOReturn setStateByIndices(int channel_index, + int new_state_index); + + IOReturn setStateByIndices(int channel_index, + int new_state_index, + uint64_t last_intransition, + uint64_t prev_state_residency) __deprecated; + +/*! @function IOStateReporter::getStateInTransitions + @abstract Accessor method for count of transitions into state + + @param channel_id - ID of the channel + @param channel_state - State of the channel + @result Count of transitions into the requested state. + + @discussion + Some clients may need to consume internally the data aggregated by the + reporter object. This method allows a client to retrieve the count of + transitions into the requested state for the channel_id. + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + uint64_t getStateInTransitions(uint64_t channel_id, + uint64_t state_id); + +/*! @function IOStateReporter::getStateResidencyTime + @abstract Accessor method for time spent in a given state + + @param channel_id - ID of the channel + @param channel_state - State of the channel + @result Absolute time spent in specified state + + @discussion + Some clients may need to consume internally the data aggregated + by the by the reporter object. This method allows a client to + retrieve the absolute time a particular channel recorded as spent + in a specified state. + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + uint64_t getStateResidencyTime(uint64_t channel_id, + uint64_t state_id); + +/*! @function IOStateReporter::getStateLastTransitionTime + @abstract Accessor method for last time a transition occured + + @param channel_id - ID of the channel + @param channel_state - State of the channel + @result Absolute time for when the last transition occured + + @discussion + Some clients may need to consume internally the data aggregated + by the by the reporter object. This method allows a client to + retrieve the absolute time stamp for when the last transition into + a specific state was recorded. + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + uint64_t getStateLastTransitionTime(uint64_t channel_id, uint64_t state_id); + +/*! @function [DEPRECATED] IOStateReporter::getStateLastChannelUpdateTime + @abstract Deprecated accessor for last time a channel was auto-updated + + @param channel_id - ID of the channel + @result Absolute time for last time the channel was updated + + @discussion + If a channel has had ::setState() called on it, calls such as + getStateResidencyTime() or updateReport() will update time in the + current state and update an internal "last channel update time." + Because clients have no way to interlock with those methods, there + is no sensible way to use this method and it will be removed in + a future release. + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + uint64_t getStateLastChannelUpdateTime(uint64_t channel_id) __deprecated; + +/*! @function IOStateReporter::free + @abstract Releases the object and all its resources. + + @discussion + ::free() assumes that init() has completed. Clients should use + the static ::with() methods to obtain fully-initialized reporter + instances. + + Locking: same-instance concurrency UNSAFE +*/ + virtual void free(void); + +protected: + +/*! @function IOStateReporter::initWith + @abstract Instance method implementation called by ::with + + @discussion + See description of parameters above +*/ + virtual bool initWith(IOService *reportingService, + IOReportCategories categories, + int16_t nstates, IOReportUnits unit); + + +/*! @function IOStateReporter::handleSwapPrepare + @abstract _swap* = + + @function IOStateReporter::handleAddChannelSwap + @abstract swap in IOStateReporter's variables + + @function IOStateReporter::handleSwapCleanup + @abstract clean up unused buffers in _swap* + + [see IOReporter::handle*Swap* for more info] +*/ + virtual IOReturn handleSwapPrepare(int newNChannels); + virtual IOReturn handleAddChannelSwap(uint64_t channel_id, + const OSSymbol *symChannelName); + virtual void handleSwapCleanup(int swapNChannels); + +/*! @function IOStateReporter::updateChannelValues + @abstract Update accounting of time spent in current state + + @param channel_index - internal index of the channel + @result appropriate IOReturn code + + @discussion + Internal State reporter method to account for the time spent in + the current state when updateReport() is called on the reporter's + channels. + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn updateChannelValues(int channel_index); + +/*! @function IOStateReporter::setStateByIndices + @abstract update a channel state without validating channel_id + + @param channel_index - 0.., available from getChannelIndex() + @param new_state - New state for the channel + @param last_intransition - to remove: time of most recent entry + @param prev_state_residency - to remove: time spent in previous state + @result Appropriate IOReturn code + + @discussion + Locked version of IOReporter::setStateByIndices(). This method may be + overriden by sub-classes. + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn handleSetStateByIndices(int channel_index, + int new_state_index, + uint64_t last_intransition, + uint64_t prev_state_residency); + +/*! @function IOStateReporter::setStateID + @abstract Assign a non-default ID to a state + + @param channel_id - ID of channel containing the state in question + @param state_index - index of state to give an ID: [0..(nstates-1)] + @param state_id - 64-bit state ID, for ASCII, use IOREPORT_MAKEID + + @result Appropriate IOReturn code + + @discussion + Locked version of IOReporter::setStateID(). This method may be + overriden by sub-classes + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn handleSetStateID(uint64_t channel_id, + int state_index, + uint64_t state_id); + +/*! @function IOStateReporter::handleOverrideChannelStateByIndices + @abstract Overrides state data for a channel with passed arguments + + @param channel_index - index of the channel which state is to be updated + @param state_index - index of the state id for the channel + @param time_in_state - time used as new total time in state + @param intransitions - total number of transitions into state + @param last_intransition - mach_absolute_time of most recent entry (opt) + @result Appropriate IOReturn code + + @discussion + Locked version of IOReporter::overrideChannelState(). This method + may be overriden by sub-classes. + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn handleOverrideChannelStateByIndices(int channel_index, + int state_index, + uint64_t time_in_state, + uint64_t intransitions, + uint64_t last_intransition = 0); + +/*! @function IOStateReporter::handleIncrementChannelStateByIndices + @abstract Updates state data for a channel with passed arguments + + @param channel_index - index of the channel which state is to be updated + @param state_index - index of the state id for the channel + @param time_in_state - time used as new total time in state + @param intransitions - total number of transitions into state + @param last_intransition - mach_absolute_time of most recent entry (opt) + @result Appropriate IOReturn code + + @discussion + Locked version of IOReporter::incrementChannelState(). This method + may be overriden by sub-classes. + + Locking: Caller must ensure that the reporter (data) lock is held. +*/ + virtual IOReturn handleIncrementChannelStateByIndices(int channel_index, + int state_index, + uint64_t time_in_state, + uint64_t intransitions, + uint64_t last_intransition = 0); +private: + + int *_currentStates; // current states (per chonnel) + uint64_t *_lastUpdateTimes; // most recent auto-update + + // Required for swapping inside addChannel + int *_swapCurrentStates; + uint64_t *_swapLastUpdateTimes; + +enum valueSelector { + kInTransitions, + kResidencyTime, + kLastTransitionTime +}; + uint64_t _getStateValue(uint64_t channel_id, + uint64_t state_id, + enum valueSelector value); + + IOReturn _getStateIndices(uint64_t channel_id, + uint64_t state_id, + int *channel_index, + int *state_index); + +}; + + +/*! + @class IOHistogramReporter + @abstract Report histograms of values + @discussion + Each IOHistogramReporter can report one histogram representing + how a given value has changed over time. +*/ +class IOHistogramReporter : public IOReporter +{ + OSDeclareDefaultStructors(IOHistogramReporter); + +public: +/*! @function IOHistogramReporter::with + @abstract Initializes the IOHistogramReporter instance variables and data structures + + @param reportingService - IOService instanciator and data provider into the reporter object + @param categories - The categories in which the report should be classified + @param channelID - uint64_t channel identifier + @param channelName - rich channel name as char* + @param unit - The unit for the quantity recorded by the reporter object + @param nSegments - Number of segments to be extracted from the config data structure + @param config - Histograms require the caller to pass a configuration by segments + @result an instance of the IOSimpleReporter object or NULL on error + + @discussion + Creates an instance of histogram reporter object. + +FIXME: need more explanation of the config + + IOHistogramReporter currently only supports a single channel. + + + */ + static IOHistogramReporter* with(IOService *reportingService, + IOReportCategories categories, + uint64_t channelID, + const char *channelName, + IOReportUnits unit, + int nSegments, + IOHistogramSegmentConfig *config); + +/*! @function IOHistogramReporter::addChannel + @abstract Override IOReporter::addChannel(*) to return an error + + @result kIOReturnUnsupported - doesn't support adding channels +*/ + IOReturn addChannel(uint64_t channelID, const char *channelName = NULL) { + return kIOReturnUnsupported; + } + +/*! @function IOHistogramReporter::tallyValue + @abstract Add a new value to the histogram + + @param value - new value to add to the histogram + @result the index of the affected bucket, or -1 on error + + @discussion + The histogram reporter determines in which bucket the value + falls and increments it. The lowest and highest buckets + extend to negative and positive infinity, respectively. + + Locking: same-instance concurrency SAFE, WILL NOT BLOCK +*/ + int tallyValue(int64_t value); + +/*! @function IOHistogramReporter::free + @abstract Releases the object and all its resources. + + @discussion + ::free() assumes that init() has completed. Clients should use + the static ::with() methods to obtain fully-initialized reporter + instances. + + Locking: same-instance concurrency UNSAFE +*/ + virtual void free(void); + +protected: + +/*! @function IOHistogramReporter::initWith + @abstract instance method implementation called by ::with + + @discussion + See description of parameters above +*/ + virtual bool initWith(IOService *reportingService, + IOReportCategories categories, + uint64_t channelID, + const OSSymbol *channelName, + IOReportUnits unit, + int nSegments, + IOHistogramSegmentConfig *config); + +/*! @function IOHistogramReporter::handleCreateLegend + @abstract Builds an IOReporting legend entry representing the channels of this reporter. + + @result An IOReportLegendEntry or NULL on failure + + @discussion + The returned legend entry may be appended to kIOReportLegendKey + to be published by the caller in the IORegistry. See the + IOReportLegend class for more details. + + Locking: same-instance concurrency SAFE, MAY BLOCK +*/ + IOReportLegendEntry* handleCreateLegend(void); + + +private: + + int _segmentCount; + int64_t *_bucketBounds; + int _bucketCount; + IOHistogramSegmentConfig *_histogramSegmentsConfig; +}; + + +/***********************************/ +/***** 4. IOReportLegend Class *****/ +/***********************************/ + +/*! + @class IOReportLegend + @abstract combine legend entries into a complete legend + @discussion + IOReportLegend adds metadata to legend entries and combines them + into a single OSArray that can be published under the + kIOReportLegendKey property in the I/O Kit registry. +*/ +class IOReportLegend : public OSObject +{ + OSDeclareDefaultStructors(IOReportLegend); + +public: +/*! @function IOReportLegend::with + @abstract Create an instance of IOReportLegend + + @param legend - OSArray of the legend possibly already present in registry + @result an instance of IOReportLegend, or NULL on failure + + @discussion + An IOReporting legend (an OSArray of legend entries) may be already + present in the IORegistry. Thus the recommended way to publish + new entries is to append to any existing array as follows: + 1. call getProperty(kIOReportLegendKey) to get an existing legend. + + 2a. If it exists + - OSDynamicCast to OSArray + - and pass it to ::with() + IOReportLegend *legendMaker = IOReportLegend::with(legend); + The provided array is retained by IOReportLegend. + + 2b. If no legend already exists in the registry, pass NULL + IOReportLegend *legend = IOReportLegend::with(NULL); + This latter invocation will cause IOReportLegend to create a new + array internally (also holding one reference). + + At the cost of some registry churn, the static + IOReportLegend::addReporterLegend() will handle the above, removing + the need for any direct use of the IOReportLegend class. +*/ + static IOReportLegend* with(OSArray *legend); + +/*! @function IOReportLegend::addLegendEntry + @abstract Add a new legend entry + + @param legendEntry - entry to be added to the internal legend array + @param groupName - primary group name for this entry + @param subGroupName - secondary group name for this entry + @result appropriate IOReturn code + + @discussion + The entry will be retained as an element of the internal array. + Legend entries are available from reporter objects. Entries + represent some number of channels with similar properties (such + as group and sub-group). Multiple legend entries with the same + group names will be aggregated in user space. + + Drivers that instantiate their reporter objects in response to + IOService::configureReport(kIOReportDisable) will need to create + temporary reporter objects for the purpose of creating their + legend entries. User-space legends are tracked by 12836893. +*/ + IOReturn addLegendEntry(IOReportLegendEntry *legendEntry, + const char *groupName, + const char *subGroupName); + +/*! @function IOReportLegend::addReporterLegend + @abstract Add a legend entry from a reporter object + + @param reportingService - IOService data provider into the reporter object + @param reporter - IOReporter to use to extract and append the legend + @param groupName - primary group name for this entry + @param subGroupName - secondary group name for this entry + @result appropriate IOReturn code + + @discussion + An IOReportLegendEntry will be created internally to this method from + the IOReporter object passed in argument. The entry will be released + internally after being appended to the IOReportLegend object. + Legend entries are available from reporter objects. Entries + represent some number of channels with similar properties (such + as group and sub-group). Multiple legend entries with the same + group names will be aggregated in user space. + + Drivers that instantiate their reporter objects in response to + IOService::configureReport(kIOReportDisable) will need to create + temporary reporter objects for the purpose of creating their + legend entries. User-space legends are tracked by 12836893. + + The static version of addReporterLegend adds the reporter's + legend directly to reportingService's kIOReportLegendKey. This + will result in serialized getProperty() and setProperty() calls + on reportingService and should be avoided when many reporters + objects are in use. +*/ + IOReturn addReporterLegend(IOReporter *reporter, + const char *groupName, + const char *subGroupName); + + static IOReturn addReporterLegend(IOService *reportingService, + IOReporter *reporter, + const char *groupName, + const char *subGroupName); + +/*! @function IOReportLegend::getLegend + @abstract Accessor method to get the legend array + + @result Returns the OSObject holding the legend to be published by the driver + @discussion + This array will include all legend entries added to the object. +*/ + OSArray* getLegend(void); + +/*! @function IOReportLegend::free + @abstract Frees the IOReportLegend object + + @discussion + ::free() cleans up the reporter and anything it allocated. + + ::free() releases the internal array (which was either passed + to ::with() or created as a result of ::with(NULL)). Assuming + the caller extracted the array with getLegend() and published it + in the I/O Kit registry, its ownership will now be with the + registry. +*/ + void free(void); + + + +protected: + +private: + + OSArray *_reportLegend; + + IOReturn initWith(OSArray *legend); + +/*! @function IOReportLegend::organizeLegend + @abstract Sets up the legend entry, organizing it with group and sub-group names + + @param groupName - Primary group name + @param subGroupName - Secondary group name + @result IOReturn code + + @discussion +*/ + IOReturn organizeLegend(IOReportLegendEntry *legendEntry, + const OSSymbol *groupName, + const OSSymbol *subGroupName); + +// FUTURE POSSIBILITY (NOT IMPLEMENTED!) +/*! @function IOReportLegend::createReporters + @abstract Creates as many IOReporter objects as the legend contains + + @param legend - OSArray legend object containing the description of all reporters + the driver is able to address + @param reporter - OSSet of reporter objects created by this call + @result IOReturn code kIOReturnSuccess if successful + + @discussion + NOT SUPPORTED at the time of writing + Convenience method to create all the driver's reporter objects from a legend. + Can be used when a legend is made public through the IORegistry but IOReporter + objects have not yet been created to save memory, waiting for observers. + Upon a call to configureReport via the IOService method, a driver could + create all reporter objects on the fly using this function. +*/ + // For Future IOReporterManager... + // static IOReturn createReporters(requestedChannels, legend); +}; + +#endif /* ! _IOKERNEL_REPORTERS_H_ */ diff --git a/iokit/IOKit/IOKitDebug.h b/iokit/IOKit/IOKitDebug.h index f28cfdbcf..3e9c1de3b 100644 --- a/iokit/IOKit/IOKitDebug.h +++ b/iokit/IOKit/IOKitDebug.h @@ -75,18 +75,15 @@ enum { kOSRegistryModsMode = 0x00040000ULL, // Change default registry modification handling - panic vs. log // kIOTraceIOService = 0x00080000ULL, // Obsolete: Use iotrace=0x00080000ULL to enable now kIOLogHibernate = 0x00100000ULL, - kIOLogDriverPower1 = 0x01000000ULL, - kIOLogDriverPower2 = 0x02000000ULL, kIOStatistics = 0x04000000ULL, - kIOPersistentLog = 0x08000000ULL, kIOSleepWakeWdogOff = 0x40000000ULL, - kIOAppRespStacksOn = 0x80000000ULL, // debug aids - change behaviour kIONoFreeObjects = 0x00100000ULL, kIOLogSynchronous = 0x00200000ULL, // IOLog completes synchronously kOSTraceObjectAlloc = 0x00400000ULL, kIOWaitQuietPanics = 0x00800000ULL, + kIOWaitQuietBeforeRoot = 0x01000000ULL, _kIODebugTopFlag = 0x8000000000000000ULL // force enum to be 64 bits }; @@ -97,7 +94,7 @@ enum { kIOTraceEventSources = 0x00000004ULL, // Trace non-passive event sources kIOTraceIntEventSource = 0x00000008ULL, // Trace IOIES and IOFIES sources kIOTraceCommandGates = 0x00000010ULL, // Trace command gate activity - kIOTraceTimers = 0x00000008ULL, // Trace timer event source activity + kIOTraceTimers = 0x00000020ULL, // Trace timer event source activity kIOTracePowerMgmt = 0x00000400ULL, // Trace power management changes diff --git a/iokit/IOKit/IOKitKeys.h b/iokit/IOKit/IOKitKeys.h index daf1faa0d..698cf86c9 100644 --- a/iokit/IOKit/IOKitKeys.h +++ b/iokit/IOKit/IOKitKeys.h @@ -155,6 +155,7 @@ // IODTNVRAM property keys #define kIONVRAMDeletePropertyKey "IONVRAM-DELETE-PROPERTY" #define kIONVRAMSyncNowPropertyKey "IONVRAM-SYNCNOW-PROPERTY" +#define kIONVRAMActivateCSRConfigPropertyKey "IONVRAM-ARMCSR-PROPERTY" #define kIODTNVRAMPanicInfoKey "aapl,panic-info" // keys for complex boot information diff --git a/iokit/IOKit/IOKitKeysPrivate.h b/iokit/IOKit/IOKitKeysPrivate.h index 26827aa22..cae0be4f2 100644 --- a/iokit/IOKit/IOKitKeysPrivate.h +++ b/iokit/IOKit/IOKitKeysPrivate.h @@ -50,6 +50,10 @@ // IOResources property #define kIOConsoleUsersSeedKey "IOConsoleUsersSeed" /* value is OSNumber */ +// IODeviceTree:chosen properties +#define kIOProgressBackbufferKey "IOProgressBackbuffer" /* value is OSData */ +#define kIOProgressColorThemeKey "IOProgressColorTheme" /* value is OSNumber */ + // interest type #define kIOConsoleSecurityInterest "IOConsoleSecurityInterest" @@ -68,14 +72,19 @@ typedef struct _IOUCProcessToken { #define kIOKernelHasSafeSleep 1 #define kIOPlatformSleepActionKey "IOPlatformSleepAction" /* value is OSNumber (priority) */ -#define kIOPlatformWakeActionKey "IOPlatformWakeAction" /* value is OSNumber (priority) */ -#define kIOPlatformQuiesceActionKey "IOPlatformQuiesceAction" /* value is OSNumber (priority) */ -#define kIOPlatformActiveActionKey "IOPlatformActiveAction" /* value is OSNumber (priority) */ +#define kIOPlatformWakeActionKey "IOPlatformWakeAction" /* value is OSNumber (priority) */ +#define kIOPlatformQuiesceActionKey "IOPlatformQuiesceAction" /* value is OSNumber (priority) */ +#define kIOPlatformActiveActionKey "IOPlatformActiveAction" /* value is OSNumber (priority) */ +#define kIOPlatformHaltRestartActionKey "IOPlatformHaltRestartAction" /* value is OSNumber (priority) */ #define kIOPlatformFunctionHandlerSet "IOPlatformFunctionHandlerSet" #if defined(__i386__) || defined(__x86_64__) #define kIOPlatformFunctionHandlerMaxBusDelay "IOPlatformFunctionHandlerMaxBusDelay" #define kIOPlatformFunctionHandlerMaxInterruptDelay "IOPlatformFunctionHandlerMaxInterruptDelay" + +#define kIOPlatformMaxBusDelay "IOPlatformMaxBusDelay" +#define kIOPlatformMaxInterruptDelay "IOPlatformMaxInterruptDelay" + #endif /* defined(__i386__) || defined(__x86_64__) */ enum { @@ -83,5 +92,8 @@ enum { kIODirectionPrepareNoZeroFill = 0x00000010, }; +enum { + kIOServiceTerminateNeedWillTerminate = 0x00000100, +}; #endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */ diff --git a/iokit/IOKit/IOLocks.h b/iokit/IOKit/IOLocks.h index 06ae4aa8d..7b42679f1 100644 --- a/iokit/IOKit/IOLocks.h +++ b/iokit/IOKit/IOLocks.h @@ -37,6 +37,7 @@ #endif #include +#include #include @@ -129,15 +130,15 @@ void IOLockUnlock( IOLock * lock); @abstract Sleep with mutex unlock and relock @discussion Prepare to sleep,unlock the mutex, and re-acquire it on wakeup. Results are undefined if the caller has not locked the mutex. This function may block and so should not be called from interrupt level or while a spin lock is held. @param lock Pointer to the locked lock. - @param event The event to sleep on. + @param event The event to sleep on. Must be non-NULL. @param interType How can the sleep be interrupted. @result The wait-result value indicating how the thread was awakened.*/ -int IOLockSleep( IOLock * lock, void *event, UInt32 interType); +int IOLockSleep( IOLock * lock, void *event, UInt32 interType) __DARWIN14_ALIAS(IOLockSleep); int IOLockSleepDeadline( IOLock * lock, void *event, - AbsoluteTime deadline, UInt32 interType); + AbsoluteTime deadline, UInt32 interType) __DARWIN14_ALIAS(IOLockSleepDeadline); -void IOLockWakeup(IOLock * lock, void *event, bool oneThread); +void IOLockWakeup(IOLock * lock, void *event, bool oneThread) __DARWIN14_ALIAS(IOLockWakeup); #ifdef __APPLE_API_OBSOLETE diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h index b1c6b77b3..242581bcb 100644 --- a/iokit/IOKit/IOMemoryDescriptor.h +++ b/iokit/IOKit/IOMemoryDescriptor.h @@ -56,11 +56,17 @@ enum IODirection kIODirectionInOut = kIODirectionIn | kIODirectionOut, // these flags are valid for the prepare() method only - kIODirectionPrepareToPhys32 = 0x00000004, - kIODirectionPrepareNoFault = 0x00000008, - kIODirectionPrepareReserved1 = 0x00000010, + kIODirectionPrepareToPhys32 = 0x00000004, + kIODirectionPrepareNoFault = 0x00000008, + kIODirectionPrepareReserved1 = 0x00000010, #define IODIRECTIONPREPARENONCOHERENTDEFINED 1 kIODirectionPrepareNonCoherent = 0x00000020, + + // these flags are valid for the complete() method only +#define IODIRECTIONCOMPLETEWITHERRORDEFINED 1 + kIODirectionCompleteWithError = 0x00000040, +#define IODIRECTIONCOMPLETEWITHDATAVALIDDEFINED 1 + kIODirectionCompleteWithDataValid = 0x00000080, }; #ifdef __LP64__ typedef IOOptionBits IODirection; @@ -103,6 +109,12 @@ enum { #endif kIOMemoryThreadSafe = 0x00100000, // Shared with Buffer MD kIOMemoryClearEncrypt = 0x00200000, // Shared with Buffer MD + +#ifdef XNU_KERNEL_PRIVATE + kIOMemoryBufferPurgeable = 0x00400000, + kIOMemoryBufferCacheMask = 0x70000000, + kIOMemoryBufferCacheShift = 28, +#endif }; #define kIOMapperSystem ((IOMapper *) 0) @@ -167,6 +179,11 @@ enum kIOPreparationIDAlwaysPrepared = 2, }; +#ifdef XNU_KERNEL_PRIVATE +struct IOMemoryReference; +#endif + + /*! @class IOMemoryDescriptor : public OSObject @abstract An abstract base class defining common methods for describing physical or virtual memory. @discussion The IOMemoryDescriptor object represents a buffer or range of memory, specified as one or more physical or virtual address ranges. It contains methods to return the memory's physically contiguous segments (fragments), for use with the IOMemoryCursor, and methods to map the memory into any address space with caching and placed mapping options. */ @@ -186,7 +203,15 @@ protected: protected: OSSet * _mappings; IOOptionBits _flags; - void * _memEntry; + + +#ifdef XNU_KERNEL_PRIVATE +public: + struct IOMemoryReference * _memRef; +protected: +#else + void * __iomd_reserved5; +#endif #ifdef __LP64__ uint64_t __iomd_reserved1; @@ -581,6 +606,7 @@ public: kIOMapReadOnly to allow only read only accesses to the memory - writes will cause and access fault.
kIOMapReference will only succeed if the mapping already exists, and the IOMemoryMap object is just an extra reference, ie. no new mapping will be created.
kIOMapUnique allows a special kind of mapping to be created that may be used with the IOMemoryMap::redirect() API. These mappings will not be shared as is the default - there will always be a unique mapping created for the caller, not an existing mapping with an extra reference.
+ kIOMapPrefault will try to prefault the pages corresponding to the mapping. This must not be done on the kernel task, and the memory must have been wired via prepare(). Otherwise, the function will fail.
@param offset Is a beginning offset into the IOMemoryDescriptor's memory where the mapping starts. Zero is the default to map all the memory. @param length Is the length of the mapping requested for a subset of the IOMemoryDescriptor. Zero is the default to map all the memory. @result A reference to an IOMemoryMap object representing the mapping, which can supply the virtual address of the mapping and other information. The mapping may be shared with multiple callers - multiple maps are avoided if a compatible one exists. The IOMemoryMap object returned should be released only when the caller has finished accessing the mapping, as freeing the object destroys the mapping. The IOMemoryMap instance also retains the IOMemoryDescriptor it maps while it exists. */ @@ -631,6 +657,11 @@ public: IOReturn redirect( task_t safeTask, bool redirect ); IOReturn handleFault( + void * _pager, + mach_vm_size_t sourceOffset, + mach_vm_size_t length); + + IOReturn populateDevicePager( void * pager, vm_map_t addressMap, mach_vm_address_t address, @@ -908,6 +939,31 @@ public: uint64_t * address, ppnum_t * mapPages); bool initMemoryEntries(size_t size, IOMapper * mapper); + + IOMemoryReference * memoryReferenceAlloc(uint32_t capacity, + IOMemoryReference * realloc); + void memoryReferenceFree(IOMemoryReference * ref); + void memoryReferenceRelease(IOMemoryReference * ref); + + IOReturn memoryReferenceCreate( + IOOptionBits options, + IOMemoryReference ** reference); + + IOReturn memoryReferenceMap(IOMemoryReference * ref, + vm_map_t map, + mach_vm_size_t inoffset, + mach_vm_size_t size, + IOOptionBits options, + mach_vm_address_t * inaddr); + + static IOReturn memoryReferenceSetPurgeable( + IOMemoryReference * ref, + IOOptionBits newState, + IOOptionBits * oldState); + static IOReturn memoryReferenceGetPageCounts( + IOMemoryReference * ref, + IOByteCount * residentPageCount, + IOByteCount * dirtyPageCount); #endif private: @@ -918,8 +974,6 @@ private: virtual void unmapFromKernel(); #endif /* !__LP64__ */ - void *createNamedEntry(); - // Internal OSData * _memoryEntries; unsigned int _pages; diff --git a/iokit/IOKit/IOMessage.h b/iokit/IOKit/IOMessage.h index 4a571b9d4..38808ebcc 100644 --- a/iokit/IOKit/IOMessage.h +++ b/iokit/IOKit/IOMessage.h @@ -7,7 +7,7 @@ * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License - * may notificationused to create, or enable the creation or redistribution of, + * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. diff --git a/iokit/IOKit/IONVRAM.h b/iokit/IOKit/IONVRAM.h index 7d96cd050..37d4ef4d3 100644 --- a/iokit/IOKit/IONVRAM.h +++ b/iokit/IOKit/IONVRAM.h @@ -82,12 +82,12 @@ private: OSDictionary *_ofDict; OSDictionary *_nvramPartitionOffsets; OSDictionary *_nvramPartitionLengths; - UInt32 _xpramPartitionOffset; - UInt32 _xpramPartitionSize; - UInt8 *_xpramImage; - UInt32 _nrPartitionOffset; - UInt32 _nrPartitionSize; - UInt8 *_nrImage; + UInt32 _resv0 __unused; + UInt32 _resv1 __unused; + IOLock *_ofLock; + UInt32 _resv2 __unused; + UInt32 _resv3 __unused; + UInt8 *_resv4 __unused; UInt32 _piPartitionOffset; UInt32 _piPartitionSize; UInt8 *_piImage; @@ -145,6 +145,8 @@ public: virtual void sync(void); virtual bool serializeProperties(OSSerialize *s) const; + virtual OSObject *copyProperty(const OSSymbol *aKey) const; + virtual OSObject *copyProperty(const char *aKey) const; virtual OSObject *getProperty(const OSSymbol *aKey) const; virtual OSObject *getProperty(const char *aKey) const; virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject); diff --git a/iokit/IOKit/IOPlatformExpert.h b/iokit/IOKit/IOPlatformExpert.h index bdd1ef6a2..c60affed9 100644 --- a/iokit/IOKit/IOPlatformExpert.h +++ b/iokit/IOKit/IOPlatformExpert.h @@ -68,7 +68,9 @@ extern UInt32 PESavePanicInfo(UInt8 *buffer, UInt32 length); extern long PEGetGMTTimeOfDay( void ); extern void PESetGMTTimeOfDay( long secs ); - +extern void PEGetUTCTimeOfDay( clock_sec_t * secs, clock_usec_t * usecs ); +extern void PESetUTCTimeOfDay( clock_sec_t secs, clock_usec_t usecs ); + /* unless it's a "well-known" property, these will read/write out the value as raw data */ extern boolean_t PEWriteNVRAMProperty(const char *symbol, const void *value, const unsigned int len); @@ -89,6 +91,7 @@ extern const OSSymbol * gIOPlatformSleepActionKey; extern const OSSymbol * gIOPlatformWakeActionKey; extern const OSSymbol * gIOPlatformQuiesceActionKey; extern const OSSymbol * gIOPlatformActiveActionKey; +extern const OSSymbol * gIOPlatformHaltRestartActionKey; class IORangeAllocator; class IONVRAMController; @@ -178,12 +181,17 @@ public: virtual IOByteCount savePanicInfo(UInt8 *buffer, IOByteCount length); virtual OSString* createSystemSerialNumberString(OSData* myProperty); - + + virtual IOReturn deregisterInterruptController(OSSymbol *name); + + virtual void getUTCTimeOfDay( clock_sec_t * secs, clock_nsec_t * nsecs ); + virtual void setUTCTimeOfDay( clock_sec_t secs, clock_nsec_t nsecs ); + OSMetaClassDeclareReservedUsed(IOPlatformExpert, 0); OSMetaClassDeclareReservedUsed(IOPlatformExpert, 1); - OSMetaClassDeclareReservedUnused(IOPlatformExpert, 2); - OSMetaClassDeclareReservedUnused(IOPlatformExpert, 3); - OSMetaClassDeclareReservedUnused(IOPlatformExpert, 4); + OSMetaClassDeclareReservedUsed(IOPlatformExpert, 2); + OSMetaClassDeclareReservedUsed(IOPlatformExpert, 3); + OSMetaClassDeclareReservedUsed(IOPlatformExpert, 4); OSMetaClassDeclareReservedUnused(IOPlatformExpert, 5); OSMetaClassDeclareReservedUnused(IOPlatformExpert, 6); OSMetaClassDeclareReservedUnused(IOPlatformExpert, 7); diff --git a/iokit/IOKit/IOReportMacros.h b/iokit/IOKit/IOReportMacros.h index 24f7607ff..af1a1c27a 100644 --- a/iokit/IOKit/IOReportMacros.h +++ b/iokit/IOKit/IOReportMacros.h @@ -1,10 +1,8 @@ /* - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 2012 Apple Computer, Inc. All Rights Reserved. + * Copyright (c) 2012-2014 Apple Computer, Inc. All Rights Reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -13,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -24,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -37,6 +35,10 @@ extern "C" { #endif +#ifndef IOREPORT_ABORT +#define IOREPORT_ABORT panic +#endif + /* Background @@ -52,13 +54,18 @@ extern "C" { appropriate macro. */ + +/* ----- Reporting Single Integers (SimpleReport) ----- */ + /* - * Returns the buffer size required for a Simple report. + * The buffer size required for a SimpleReport. */ + #define SIMPLEREPORT_BUFSIZE (sizeof(IOReportElement)) + /* - * Initialize a buffer to hold a Simple (integer) report. + * Initialize a buffer to hold a SimpleReport. * * void* buffer - ptr to SIMPLEREPORT_BUFSIZE bytes * size_t bufSize - sanity check of buffer's size @@ -66,10 +73,10 @@ extern "C" { * uint64_t channelID - the report's channel ID * IOReportCategories categories - categories of this channel * - * If the buffer is not of sufficient size, the macro performs a - * null pointer reference to trigger a segfault. Then, the buffer is - * filled with 0xbadcafe. + * If the buffer is not of sufficient size, the macro calls IOREPORT_ABORT(). + * If that returns, the buffer is filled with 0xbadcafe. */ + #define SIMPLEREPORT_INIT(buffer, bufSize, providerID, channelID, cats) \ do { \ IOReportElement *__elem = (IOReportElement *)(buffer); \ @@ -83,23 +90,23 @@ do { \ __elem->channel_type.nelements = 1; \ __elem->channel_type.element_idx = 0; \ __elem->timestamp = 0; \ - __vals = (IOSimpleReportValues*)&__elem->values; \ - __vals->simple_value = kIOReportInvalidValue; \ + __vals = (IOSimpleReportValues*)&__elem->values; \ + __vals->simple_value = kIOReportInvalidIntValue; \ } \ else { \ - uint32_t *__nptr = NULL; \ - *__nptr = 1; \ - POLLUTE_BUF((buffer), (bufSize)); \ + IOREPORT_ABORT("bufSize is smaller than the required size\n"); \ + __POLLUTE_BUF((buffer), (bufSize)); \ } \ } while(0) /* - * Sets the SimpleReport channel to a new value. + * Set a SimpleReport to a new value. * - * void* simp_buf - ptr to memory initialized by SIMPLEREPORT_INIT() - * uint64_t new_value - new value for the channel + * void* simp_buf - ptr to memory initialized by SIMPLEREPORT_INIT() + * int64_t new_value - new value for the report */ + #define SIMPLEREPORT_SETVALUE(simp_buf, new_value) \ do { \ IOReportElement *__elem = (IOReportElement *)(simp_buf); \ @@ -108,8 +115,24 @@ do { \ __vals->simple_value = (new_value); \ } while(0) + /* - * Prepare simple report buffer for + * Increment the value of a SimpleReport. + * + * void* simp_buf - ptr to memory initialized by SIMPLEREPORT_INIT() + * int64_t increment - amount by which to increment the value + */ +#define SIMPLEREPORT_INCREMENTVALUE(simp_buf, new_value) \ +do { \ + IOReportElement *__elem = (IOReportElement *)(simp_buf); \ + IOSimpleReportValues *__vals; \ + __vals = (IOSimpleReportValues*)&__elem->values; \ + __vals->simple_value += (new_value); \ +} while(0) + + +/* + * Prepare a SimpleReport for * IOService::updateReport(kIOReportCopyChannelData...) * * void* simp_buf - Ptr to memory updated by SIMPLEREPORT_SETVALUE() @@ -118,6 +141,7 @@ do { \ * size_t size2cpy - On return, 'size2cpy' is set to the size of the report * data that needs to be copied for kIOReportCopyChannelData. */ + #define SIMPLEREPORT_UPDATEPREP(simp_buf, ptr2cpy, size2cpy) \ do { \ (ptr2cpy) = (simp_buf); \ @@ -126,7 +150,7 @@ do { \ /* - * Updates the result field received as a parameter for + * Update the result field received as a parameter for * kIOReportGetDimensions & kIOReportCopyChannelData actions. * * IOReportConfigureAction action - configure/updateReport() 'action' param @@ -142,20 +166,39 @@ do { \ } while (0) - /* - * Returns the channel id from the buffer previously initialized by - * SIMPLEREPORT_INIT(). + * Get the 64-bit channel ID of a SimpleReport. * * void* simp_buf - ptr to memory initialized by SIMPLEREPORT_INIT() */ #define SIMPLEREPORT_GETCHID(simp_buf) \ - (((IOReportElement *)(simp_buf))->channel_id); \ + (((IOReportElement *)(simp_buf))->channel_id) + +/* + * Get the IOReportChannelType of a SimpleReport. + * + * void* simp_buf - ptr to memory initialized by SIMPLEREPORT_INIT() + */ + +#define SIMPLEREPORT_GETCHTYPE(simp_buf) \ + (*(uint64_t*)&(((IOReportElement *)(simp_buf))->channel_type)) + + +/* + * Get the integer value of a SimpleReport. + * + * void* simp_buf - memory initialized by SIMPLEREPORT_INIT() + */ + +#define SIMPLEREPORT_GETVALUE(simp_buf) \ + (((IOSimpleReportValues*)&(((IOReportElement*)(simp_buf))->values)) \ + ->simple_value) +/* ----- State Machine Reporting (StateReport) ----- */ -// Internal struct for State report buffer +// Internal struct for StateReport typedef struct { uint16_t curr_state; uint64_t update_ts; @@ -163,16 +206,16 @@ typedef struct { } IOStateReportInfo; /* - * Returns the size required to be allocated for using STATEREPORT_*() + * Determine the size required for a StateReport buffer. * - * int nstates - number of states for the intended channel + * int nstates - number of states to be reported */ #define STATEREPORT_BUFSIZE(nstates) \ (sizeof(IOStateReportInfo) + (nstates) * sizeof(IOReportElement)) /* - * Initializes a buffer so it can be used with STATEREPORT_*(). + * Initialize a StateReport buffer. * * int nstates - number of states to be reported * void* buffer - ptr to STATEREPORT_BUFSIZE(nstates) bytes @@ -181,9 +224,8 @@ typedef struct { * uint64_t channelID - ID of this channel, see IOREPORT_MAKEID() * IOReportCategories categories - categories of this channel * - * If the buffer is not of sufficient size, the macro performs a - * null pointer reference to trigger a segfault. Then, the buffer is - * filled with 0xbadcafe. + * If the buffer is not of sufficient size, the macro invokes IOREPORT_ABORT. + * If that returns, the buffer is filled with 0xbadcafe. */ #define STATEREPORT_INIT(nstates, buf, bufSize, providerID, channelID, cats) \ do { \ @@ -210,15 +252,14 @@ do { \ __info->update_ts = 0; \ } \ else { \ - int *__nptr = NULL; \ - *__nptr = 1; \ - POLLUTE_BUF((buf), (bufSize)); \ + IOREPORT_ABORT("bufSize is smaller than the required size\n"); \ + __POLLUTE_BUF((buf), (bufSize)); \ } \ } while(0) /* - * Initializes the state id field of a state with the specified value. By - * default, STATEREPORT_INIT initializes the state id with the index of + * Initialize the state id field of a state with the specified value. By + * default, STATEREPORT_INIT() initializes the state IDs with the index of * that state. This macro can be used to provide a more descriptive state id. * * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() @@ -237,7 +278,7 @@ do { \ /* - * Set the state of a State report. + * Set the state of a StateReport. * * void* state_buf - pointer to memory initialized by STATEREPORT_INIT() * unsigned newStateIdx - index of new state, out of bounds -> no-op @@ -260,7 +301,8 @@ do { \ } while(0) /* - * Prepare StateReport for UpdateReport call + * Prepare a StateReport for + * IOService::updateReport(kIOReportCopyChannelData...) * * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() * uint64_t currentTime - current timestamp @@ -284,7 +326,7 @@ do { \ } while(0) /* - * Updates the result field received as a parameter for kIOReportGetDimensions & + * Update the result field received as a parameter for kIOReportGetDimensions & * kIOReportCopyChannelData actions. * * void* state_buf - memory initialized by STATEREPORT_INIT() @@ -304,49 +346,225 @@ do { \ } while (0) - /* - * Returns the channel id from the buffer previously initialized by STATEREPORT_INIT(). + * Get the 64-bit channel ID of a StateReport. * * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() */ - #define STATEREPORT_GETCHID(state_buf) \ (((IOStateReportInfo *)(state_buf))->elem[0].channel_id) /* - * Returns number of transitions occurred from the given state + * Get the IOReportChannelType of a StateReport. + * + * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() + */ +#define STATEREPORT_GETCHTYPE(state_buf) \ + (*(uint64_t*)&(((IOStateReportInfo *)(state_buf))->elem[0].channel_type)) + +/* + * Get the number of transitions into a given state. * * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() * unsigned stateIdx - index of state, out of bounds -> kIOReportInvalidValue * */ - #define STATEREPORT_GETTRANSITIONS(state_buf, stateIdx) \ (((stateIdx) < ((IOStateReportInfo *)(state_buf))->elem[0].channel_type.nelements) \ ? ((IOStateReportValues*)&(((IOStateReportInfo*)(state_buf))->elem[(stateIdx)].values))->intransitions \ : kIOReportInvalidValue) /* - * Returns the total number of ticks spent in the given state. + * Get the total number of ticks spent in a given state. * * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() * unsigned stateIdx - index of state, out of bounds -> kIOReportInvalidValue */ - #define STATEREPORT_GETTICKS(state_buf, stateIdx) \ (((stateIdx) < ((IOStateReportInfo*)(state_buf))->elem[0].channel_type.nelements) \ ? ((IOStateReportValues*)&(((IOStateReportInfo*)(state_buf))->elem[(stateIdx)].values))->upticks \ : kIOReportInvalidValue) -#define POLLUTE_BUF(buf, bufSize) \ +/* ----- Reporting an Array of Integers (SimpleArrayReport) ----- */ + +/* + * Determine the buffer size for a SimpleArrayReport. + * + * int nValues - number of values to be reported + */ + +#define SIMPLEARRAY_BUFSIZE(nValues) \ + ((((nValues)/IOR_VALUES_PER_ELEMENT) + (((nValues) % IOR_VALUES_PER_ELEMENT) ? 1:0)) \ + * sizeof(IOReportElement)) + +/* + * Initialize a buffer for use as a SimpleArrayReport. + * + * int nValues - number of elements to be reported + * void* buf - ptr to SIMPLEARRAY_BUFSIZE(nValues) bytes + * size_t bufSize - sanity check of buffer's size + * uint64_t providerID - registry Entry ID of the reporting service + * uint64_t channelID - ID of this channel, see IOREPORT_MAKEID() + * IOReportCategories categories - categories of this channel + * + * If the buffer is not of sufficient size, the macro invokes IOREPORT_ABORT() + * and, if that returns, fills the buffer with 0xbadcafe. + */ + +#define SIMPLEARRAY_INIT(nValues, buf, bufSize, providerID, channelID, cats) \ +do { \ + IOSimpleArrayReportValues *__rep; \ + IOReportElement *__elem; \ + uint32_t __nElems = (((nValues) / IOR_VALUES_PER_ELEMENT) + \ + (((nValues) % IOR_VALUES_PER_ELEMENT) ? 1 : 0)); \ + if ((bufSize) >= SIMPLEARRAY_BUFSIZE(nValues)) { \ + for (unsigned __no = 0; __no < __nElems; __no++) { \ + __elem = &(((IOReportElement *)(buf))[__no]); \ + __rep = (IOSimpleArrayReportValues *) &(__elem->values); \ + __elem->channel_id = (channelID); \ + __elem->provider_id = (providerID); \ + __elem->channel_type.report_format = kIOReportFormatSimpleArray; \ + __elem->channel_type.reserved = 0; \ + __elem->channel_type.categories = (cats); \ + __elem->channel_type.nelements = (__nElems); \ + __elem->channel_type.element_idx = __no; \ + __elem->timestamp = 0; \ + __rep->simple_values[0] = kIOReportInvalidIntValue; \ + __rep->simple_values[1] = kIOReportInvalidIntValue; \ + __rep->simple_values[2] = kIOReportInvalidIntValue; \ + __rep->simple_values[3] = kIOReportInvalidIntValue; \ + } \ + } \ + else { \ + IOREPORT_ABORT("bufSize is smaller than the required size\n"); \ + __POLLUTE_BUF((buf), (bufSize)); \ + } \ +} while(0) + + +/* SimpleArrayReport helpers */ + + #define __SA_FINDREP(array_buf, idx) \ + IOSimpleArrayReportValues *__rep; \ + IOReportElement *__elem; \ + unsigned __elemIdx = (idx) / IOR_VALUES_PER_ELEMENT; \ + unsigned __valueIdx = (idx) % IOR_VALUES_PER_ELEMENT; \ + __elem = &(((IOReportElement *)(array_buf))[0]); \ + if (__elemIdx < __elem->channel_type.nelements) { \ + __elem = &(((IOReportElement *)(array_buf))[__elemIdx]); \ + __rep = (IOSimpleArrayReportValues *) &(__elem->values); \ + + #define __SA_MAXINDEX(array_buf) \ + ((((IOReportElement*)(array_buf))->channel_type.nelements) \ + * IOR_VALUES_PER_ELEMENT) - 1 + +/* + * Set a value at a specified index in a SimpleArrayReport. + * + * void* array_bufbuf - ptr to memory initialized by SIMPLEARRAY_INIT() + * unsigned idx - array index, out of bounds -> no-op + * uint64_t newValue - new value to be stored at array[idx] + */ +#define SIMPLEARRAY_SETVALUE(array_buf, idx, newValue) \ +do { \ + __SA_FINDREP((array_buf), (idx)) \ + __rep->simple_values[__valueIdx] = (newValue); \ + } \ +} while(0) + +/* + * Increment an array value within a SimpleArrayReport. + * + * void* array_buf - ptr to memory initialized by SIMPLEARRAY_INIT() + * unsigned idx - array index to increment, out of bounds -> no-op + * int64_t value - amount by which to increment array[idx] + */ +#define SIMPLEARRAY_INCREMENTVALUE(array_buf, idx, value) \ +do { \ + __SA_FINDREP((array_buf), (idx)) \ + __rep->simple_values[__valueIdx] += (value); \ + } \ +} while(0) + + +/* + * Prepare a SimpleArrayReport for + * IOService::updateReport(kIOReportCopyChannelData...) + * + * void* array_buf - ptr to memory initialized by SIMPLEARRAY_INIT() + * void* ptr2cpy - filled in with pointer to buffer to be copied out + * size_t size2cpy - filled in with the size of the buffer to copy out + */ + +#define SIMPLEARRAY_UPDATEPREP(array_buf, ptr2cpy, size2cpy) \ +do { \ + IOReportElement *__elem; \ + __elem = &(((IOReportElement *)(array_buf))[0]); \ + (ptr2cpy) = (void *) (array_buf); \ + (size2cpy) = __elem->channel_type.nelements * sizeof(IOReportElement); \ +} while(0) + + +/* + * Update the result field received as a parameter for kIOReportGetDimensions & + * kIOReportCopyChannelData actions. + * + * void* array_buf - memory initialized by SIMPLEARRAY_INIT() + * IOReportConfigureAction action - configure/updateReport() 'action' + * void* result - configure/updateReport() 'result' + */ + +#define SIMPLEARRAY_UPDATERES(array_buf, action, result) \ do { \ - int __cnt = (bufSize)/sizeof(uint32_t); \ - while (--__cnt >= 0) \ - ((uint32_t*)(buf))[__cnt] = 0xbadcafe; \ + IOReportElement *__elem; \ + int *__nElements = (int *)(result); \ + __elem = &(((IOReportElement *)(array_buf))[0]); \ + if (((action) == kIOReportGetDimensions) || ((action) == kIOReportCopyChannelData)) { \ + *__nElements += __elem->channel_type.nelements; \ + } \ } while (0) + +/* + * Get the 64-bit channel ID of a SimpleArrayReport. + * + * void* array_buf - ptr to memory initialized by SIMPLEARRAY_INIT() + */ +#define SIMPLEARRAY_GETCHID(array_buf) \ + (((IOReportElement *)(array_buf))->channel_id) + + +/* + * Get the IOReportChannelType of a SimpleArrayReport. + * + * void* simp_buf - ptr to memory initialized by SIMPLEREPORT_INIT() + */ +#define SIMPLEARRAY_GETCHTYPE(array_buf) \ + (*(uint64_t*)&(((IOReportElement *)(array_buf))->channel_type)) + +/* + * Get a value from a SimpleArrayReport. + * + * void* array_buf - ptr to memory initialized by SIMPLEARRAY_INIT() + * unsigned idx - index of the value, out of bounds -> kIOReportInvalidValue + */ +#define SIMPLEARRAY_GETVALUE(array_buf, idx) \ + (((idx) > __SA_MAXINDEX(array_buf) || (idx) < 0) ? kIOReportInvalidIntValue : \ + ((IOSimpleArrayReportValues*)&( \ + ((IOReportElement*)(array_buf))[(idx) / IOR_VALUES_PER_ELEMENT].values)) \ + ->simple_values[(idx) % IOR_VALUES_PER_ELEMENT]) + + +/* generic utilities */ + + #define __POLLUTE_BUF(buf, bufSize) \ + do { \ + int __cnt = (bufSize)/sizeof(uint32_t); \ + while (--__cnt >= 0) \ + ((uint32_t*)(buf))[__cnt] = 0xbadcafe; \ + } while (0) + #ifdef __cplusplus } #endif diff --git a/iokit/IOKit/IOReportTypes.h b/iokit/IOKit/IOReportTypes.h index fc1399a67..8cd6e7328 100644 --- a/iokit/IOKit/IOReportTypes.h +++ b/iokit/IOKit/IOReportTypes.h @@ -1,7 +1,5 @@ /* - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 2012 Apple Computer, Inc. All Rights Reserved. + * Copyright (c) 2012-2014 Apple Computer, Inc. All Rights Reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,6 +35,8 @@ extern "C" { #endif +#define IOR_VALUES_PER_ELEMENT 4 + /*! @const kIOReportInvalidValue @const kIOReportInvalidIntValue @abstract cardinal value used to indicate data errors @@ -46,16 +46,30 @@ extern "C" { same bit pattern so that clients checking for one or the other don't have to worry about getting the signedness right. */ -#define kIOReportInvalidValue UINT64_MAX -#define kIOReportInvalidIntValue (int64_t)kIOReportInvalidValue +#define kIOReportInvalidIntValue INT64_MIN +#define kIOReportInvalidValue (uint64_t)kIOReportInvalidIntValue + +/*! @typedef IOReportCategories + @abstract encapsulate important, multi-purpose "tags" for channels -// IOReportChannelType.categories + @discussion + IOReportCategories is the type for the .categories field of + IOReportChanelType. These categories are inteded to empower a + limited number of clients to retrieve a broad range of channels + without knowing much about them. They can be OR'd together as + needed. Groups and subgroups are a more extensible mechanism + for aggregating channels produced by different drivers. +*/ typedef uint16_t IOReportCategories; #define kIOReportCategoryPower (1 << 1) // and energy -#define kIOReportCategoryTraffic (1 << 2) -#define kIOReportCategoryPerformance (1 << 3) -#define kIOReportCategoryPeripheral (1 << 4) +#define kIOReportCategoryTraffic (1 << 2) // I/O at any level +#define kIOReportCategoryPerformance (1 << 3) // e.g. cycles/byte +#define kIOReportCategoryPeripheral (1 << 4) // not built-in + +#define kIOReportCategoryField (1 << 8) // consider logging + // future categories TBD +#define kIOReportCategoryInterrupt (1 << 14) // TBR: 15850269 #define kIOReportCategoryDebug (1 << 15) #define kIOReportInvalidCategory UINT16_MAX @@ -66,7 +80,8 @@ enum { kIOReportInvalidFormat = 0, kIOReportFormatSimple = 1, kIOReportFormatState = 2, - kIOReportFormatHistogram = 3 + kIOReportFormatHistogram = 3, + kIOReportFormatSimpleArray = 4 }; // simple report values @@ -77,6 +92,11 @@ typedef struct { uint64_t reserved3; } __attribute((packed)) IOSimpleReportValues; +// simple value array +typedef struct { + int64_t simple_values[IOR_VALUES_PER_ELEMENT]; +} __attribute((packed)) IOSimpleArrayReportValues; + // state report values typedef struct { uint64_t state_id; // 0..N-1 or 8-char code (see MAKEID()) @@ -85,7 +105,7 @@ typedef struct { uint64_t last_intransition; // ticks at last in-transition } __attribute((packed)) IOStateReportValues; -// histograme report values +// histogram report values typedef struct { uint64_t bucket_hits; int64_t bucket_min; @@ -93,6 +113,8 @@ typedef struct { int64_t bucket_sum; } __attribute((packed)) IOHistogramReportValues; + + // configuration actions generally change future behavior typedef uint32_t IOReportConfigureAction; enum { @@ -168,7 +190,7 @@ typedef struct { } IOReportInterestList; typedef struct { - uint64_t v[4]; + uint64_t v[IOR_VALUES_PER_ELEMENT]; } __attribute((packed)) IOReportElementValues; typedef struct { diff --git a/iokit/IOKit/IOReturn.h b/iokit/IOKit/IOReturn.h index 8b2c58155..d4071b9aa 100644 --- a/iokit/IOKit/IOReturn.h +++ b/iokit/IOKit/IOReturn.h @@ -62,6 +62,9 @@ typedef kern_return_t IOReturn; #define sub_iokit_hidsystem err_sub(14) #define sub_iokit_scsi err_sub(16) //#define sub_iokit_pccard err_sub(21) +#ifdef PRIVATE +#define sub_iokit_nvme err_sub(28) +#endif #define sub_iokit_thunderbolt err_sub(29) #define sub_iokit_audio_video err_sub(0x45) diff --git a/iokit/IOKit/IOService.h b/iokit/IOKit/IOService.h index f8a9d59ba..5d5c093bc 100644 --- a/iokit/IOKit/IOService.h +++ b/iokit/IOKit/IOService.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2011 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -284,6 +284,9 @@ The class name that the service will attempt to allocate when a user client conn
Set some debug flags for logging the driver loading process. Flags are defined in IOKit/IOKitDebug.h, but 65535 works well.*/ +struct IOInterruptAccountingData; +struct IOInterruptAccountingReporter; + class IOService : public IORegistryEntry { OSDeclareDefaultStructors(IOService) @@ -292,7 +295,19 @@ protected: /*! @struct ExpansionData @discussion This structure will be used to expand the capablilties of this class in the future. */ - struct ExpansionData { }; + struct ExpansionData { + uint64_t authorizationID; + /* + * Variables associated with interrupt accounting. Consists of an array + * (that pairs reporters with opaque "statistics" objects), the count for + * the array, and a lock to guard both of the former variables. The lock + * is necessary as IOReporting will not update reports in a manner that is + * synchonized with the service (i.e, on a workloop). + */ + IOLock * interruptStatisticsLock; + IOInterruptAccountingReporter * interruptStatisticsArray; + int interruptStatisticsArrayCount; + }; /*! @var reserved Reserved for future use. (Internal use only) */ @@ -590,6 +605,15 @@ public: virtual bool finalize( IOOptionBits options ); +/*! @function init + @abstract Initializes generic IOService data structures (expansion data, etc). */ + virtual bool init( OSDictionary * dictionary = 0 ); + +/*! @function init + @abstract Initializes generic IOService data structures (expansion data, etc). */ + virtual bool init( IORegistryEntry * from, + const IORegistryPlane * inPlane ); + /*! @function free @abstract Frees data structures that were allocated when power management was initialized on this service. */ @@ -1081,6 +1105,19 @@ public: virtual IOReturn unregisterInterrupt(int source); +/*! @function addInterruptStatistics + @abstract Adds a statistics object to the IOService for the given interrupt. + @discussion This method associates a set of statistics and a reporter for those statistics with an interrupt for this IOService, so that we can interrogate the IOService for statistics pertaining to that interrupt. + @param statistics The IOInterruptAccountingData container we wish to associate the IOService with. + @param source The index of the interrupt source in the device. */ + IOReturn addInterruptStatistics(IOInterruptAccountingData * statistics, int source); + +/*! @function removeInterruptStatistics + @abstract Removes any statistics from the IOService for the given interrupt. + @discussion This method disassociates any IOInterruptAccountingData container we may have for the given interrupt from the IOService; this indicates that the the interrupt target (at the moment, likely an IOInterruptEventSource) is being destroyed. + @param source The index of the interrupt source in the device. */ + IOReturn removeInterruptStatistics(int source); + /*! @function getInterruptType @abstract Returns the type of interrupt used for a device supplying hardware interrupts. @param source The index of the interrupt source in the device. @@ -1246,6 +1283,8 @@ public: static void updateConsoleUsers(OSArray * consoleUsers, IOMessage systemMessage); static void consoleLockTimer(thread_call_param_t p0, thread_call_param_t p1); void setTerminateDefer(IOService * provider, bool defer); + uint64_t getAuthorizationID( void ); + IOReturn setAuthorizationID( uint64_t authorizationID ); private: static IOReturn waitMatchIdle( UInt32 ms ); @@ -1346,6 +1385,12 @@ private: OSArray * doPhase2List, void*, void * ); static void actionDidTerminate( IOService * victim, IOOptionBits options, void *, void *, void *); + + static void actionWillStop( IOService * victim, IOOptionBits options, + void *, void *, void *); + static void actionDidStop( IOService * victim, IOOptionBits options, + void *, void *, void *); + static void actionFinalize( IOService * victim, IOOptionBits options, void *, void *, void *); static void actionStop( IOService * client, IOService * provider, @@ -1667,8 +1712,7 @@ public: /*! @function powerStateForDomainState @abstract Determines what power state the device would be in for a given power domain state. - @discussion Power management calls a driver with this method to find out what power state the device would be in for a given power domain state. This happens when the power domain is changing state and power management needs to determine the effect of the change. - Most drivers do not need to implement this method, and can rely upon the default IOService implementation. The IOService implementation scans the power state array looking for the highest state whose inputPowerRequirement field exactly matches the value of the domainState parameter. If more intelligent determination is required, the power managed driver should implement the method and override the superclass's implementation. + @discussion This call is unused by power management. Drivers should override initialPowerStateForDomainState and/or maxCapabilityForDomainState instead to change the default mapping of domain state to driver power state. @param domainState Flags that describe the character of "domain power"; they represent the outputPowerCharacter field of a state in the power domain's power state array. @result A state number. */ @@ -1775,6 +1819,8 @@ public: void reset_watchdog_timer( void ); void start_watchdog_timer ( void ); bool stop_watchdog_timer ( void ); + IOReturn registerInterestForNotifer( IONotifier *notify, const OSSymbol * typeOfInterest, + IOServiceInterestHandler handler, void * target, void * ref ); #ifdef __LP64__ static IOWorkLoop * getPMworkloop( void ); diff --git a/iokit/IOKit/IOSharedDataQueue.h b/iokit/IOKit/IOSharedDataQueue.h index a31cfc965..09c5dc17e 100644 --- a/iokit/IOKit/IOSharedDataQueue.h +++ b/iokit/IOKit/IOSharedDataQueue.h @@ -33,8 +33,12 @@ #undef dequeue #endif +#define DISABLE_DATAQUEUE_WARNING /* IODataQueue is deprecated, please use IOSharedDataQueue instead */ + #include +#undef DISABLE_DATAQUEUE_WARNING + typedef struct _IODataQueueEntry IODataQueueEntry; /*! @@ -62,7 +66,21 @@ class IOSharedDataQueue : public IODataQueue protected: virtual void free(); + /*! + * @function getQueueSize + * @abstract Returns the size of the data queue. + * @discussion Use this method to access the size of the data queue. Do not access the value of size directly, as it can get modified from userspace and is not reliable. + * @result Returns the size of the data queue, or zero in case of failure. + */ UInt32 getQueueSize(); + + /*! + * @function setQueueSize + * @abstract Stores the value of the size of the data queue. + * @discussion Use this method to store the value of the size of the data queue. Do not access the value of size directly, as it can get modified from userspace and is not reliable. + * @param size The size of the data queue. + * @result Returns true in case of success, false otherwise. + */ Boolean setQueueSize(UInt32 size); public: diff --git a/iokit/IOKit/IOTimeStamp.h b/iokit/IOKit/IOTimeStamp.h index b551fd723..b063778a3 100644 --- a/iokit/IOKit/IOTimeStamp.h +++ b/iokit/IOKit/IOTimeStamp.h @@ -35,7 +35,7 @@ IOTimeStampStartConstant(unsigned int csc, uintptr_t a = 0, uintptr_t b = 0, uintptr_t c = 0, uintptr_t d = 0) { - KERNEL_DEBUG_CONSTANT(csc | DBG_FUNC_START, a, b, c, d, 0); + KERNEL_DEBUG_CONSTANT(((uint32_t)csc) | DBG_FUNC_START, a, b, c, d, 0); } static inline void @@ -43,7 +43,7 @@ IOTimeStampEndConstant(uintptr_t csc, uintptr_t a = 0, uintptr_t b = 0, uintptr_t c = 0, uintptr_t d = 0) { - KERNEL_DEBUG_CONSTANT(csc | DBG_FUNC_END, a, b, c, d, 0); + KERNEL_DEBUG_CONSTANT(((uint32_t)csc) | DBG_FUNC_END, a, b, c, d, 0); } static inline void @@ -51,7 +51,7 @@ IOTimeStampConstant(uintptr_t csc, uintptr_t a = 0, uintptr_t b = 0, uintptr_t c = 0, uintptr_t d = 0) { - KERNEL_DEBUG_CONSTANT(csc | DBG_FUNC_NONE, a, b, c, d, 0); + KERNEL_DEBUG_CONSTANT(((uint32_t)csc) | DBG_FUNC_NONE, a, b, c, d, 0); } #if KDEBUG @@ -61,7 +61,7 @@ IOTimeStampStart(uintptr_t csc, uintptr_t a = 0, uintptr_t b = 0, uintptr_t c = 0, uintptr_t d = 0) { - KERNEL_DEBUG(csc | DBG_FUNC_START, a, b, c, d, 0); + KERNEL_DEBUG(((uint32_t)csc) | DBG_FUNC_START, a, b, c, d, 0); } static inline void @@ -69,7 +69,7 @@ IOTimeStampEnd(uintptr_t csc, uintptr_t a = 0, uintptr_t b = 0, uintptr_t c = 0, uintptr_t d = 0) { - KERNEL_DEBUG(csc | DBG_FUNC_END, a, b, c, d, 0); + KERNEL_DEBUG(((uint32_t)csc) | DBG_FUNC_END, a, b, c, d, 0); } static inline void @@ -77,7 +77,7 @@ IOTimeStamp(uintptr_t csc, uintptr_t a = 0, uintptr_t b = 0, uintptr_t c = 0, uintptr_t d = 0) { - KERNEL_DEBUG(csc | DBG_FUNC_NONE, a, b, c, d, 0); + KERNEL_DEBUG(((uint32_t)csc) | DBG_FUNC_NONE, a, b, c, d, 0); } #endif /* KDEBUG */ diff --git a/iokit/IOKit/IOTypes.h b/iokit/IOKit/IOTypes.h index b563ae6d1..aa8afd057 100644 --- a/iokit/IOKit/IOTypes.h +++ b/iokit/IOKit/IOTypes.h @@ -214,10 +214,11 @@ enum { kIOMapStatic = 0x01000000, kIOMapReference = 0x02000000, - kIOMapUnique = 0x04000000 + kIOMapUnique = 0x04000000, #ifdef XNU_KERNEL_PRIVATE - , kIOMap64Bit = 0x08000000 + kIOMap64Bit = 0x08000000, #endif + kIOMapPrefault = 0x10000000, }; /*! @enum Scale Factors diff --git a/iokit/IOKit/IOUserClient.h b/iokit/IOKit/IOUserClient.h index 453795c74..c50e3ead9 100644 --- a/iokit/IOKit/IOUserClient.h +++ b/iokit/IOKit/IOUserClient.h @@ -283,9 +283,15 @@ protected: public: + static IOReturn clientHasAuthorization( task_t task, + IOService * service ); + static IOReturn clientHasPrivilege( void * securityToken, const char * privilegeName ); + static OSObject * copyClientEntitlement( task_t task, + const char * entitlement ); + /*! @function releaseAsyncReference64 @abstract Release the mach_port_t reference held within the OSAsyncReference64 structure. diff --git a/iokit/IOKit/Makefile b/iokit/IOKit/Makefile index e9ee13a71..5ea0528d5 100644 --- a/iokit/IOKit/Makefile +++ b/iokit/IOKit/Makefile @@ -24,26 +24,35 @@ INSTINC_SUBDIRS = \ EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -NOT_EXPORT_HEADERS = IOReportMacros.h +# By default, everything in xnu/iokit/IOKit gets installed into +# Kernel.framework/Headers/IOKit AND Kernel.framework/PrivateHeaders/IOKit. +# This is so the files with #ifdef ...PRIVATE portions can be processed. +# xnu/README documents the INSTALL* and EXPORT_MI_DIR lists. +ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) + +# Do not install these headers (anywhere). +NOT_EXPORT_HEADERS = IOInterruptAccountingPrivate.h +# Install these only in Kernel.framework's PrivateHeaders (not Headers). NOT_KF_MI_HEADERS = $(NOT_EXPORT_HEADERS) \ IOKitKeysPrivate.h IOCPU.h \ IOHibernatePrivate.h IOPolledInterface.h \ IOCommandQueue.h IOLocksPrivate.h \ IOSyncer.h AppleKeyStoreInterface.h \ - IOStatistics.h IOStatisticsPrivate.h - -NOT_LOCAL_HEADERS = - -ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) + IOStatistics.h IOStatisticsPrivate.h \ + IOKernelReporters.h +# These should be additionally installed in IOKit.framework's public Headers INSTALL_MI_LIST = IOBSD.h IOKitKeys.h IOKitServer.h IOReturn.h\ IOSharedLock.h IOTypes.h OSMessageNotification.h\ - IODataQueueShared.h IOMessage.h + IODataQueueShared.h IOMessage.h IOInterruptAccounting.h\ +# These should be additionally installed in IOKit.framework's PrivateHeaders INSTALL_MI_LCL_LIST = IOKitKeysPrivate.h IOHibernatePrivate.h \ IOLocksPrivate.h IOStatistics.h \ - AppleKeyStoreInterface.h IOReportTypes.h + AppleKeyStoreInterface.h \ + IOReportTypes.h IOKernelReportStructs.h \ + IOReportMacros.h INSTALL_MI_DIR = . diff --git a/iokit/IOKit/OSMessageNotification.h b/iokit/IOKit/OSMessageNotification.h index 7d7b5e537..bd6bc1cb1 100644 --- a/iokit/IOKit/OSMessageNotification.h +++ b/iokit/IOKit/OSMessageNotification.h @@ -50,7 +50,12 @@ enum { kIOServiceTerminatedNotificationType = 102, kIOAsyncCompletionNotificationType = 150, kIOServiceMessageNotificationType = 160, - kLastIOKitNotificationType = 199 + kLastIOKitNotificationType = 199, + + // reserved bits + kIOKitNoticationTypeMask = 0x00000FFF, + kIOKitNoticationTypeSizeAdjShift = 30, + kIOKitNoticationMsgSizeMask = 3, }; enum { diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h index cdaf4655f..f2ba018b2 100644 --- a/iokit/IOKit/pwr_mgt/IOPM.h +++ b/iokit/IOKit/pwr_mgt/IOPM.h @@ -95,8 +95,7 @@ enum { An indication that the power flags represent the state of the root power domain. This bit must not be set in the IOPMPowerState structure. Power Management may pass this bit to initialPowerStateForDomainState() - or powerStateForDomainState() to map from a global system state to the - desired device state. + to map from a global system state to the desired device state. */ typedef unsigned long IOPMPowerFlags; enum { diff --git a/iokit/IOKit/pwr_mgt/IOPMPrivate.h b/iokit/IOKit/pwr_mgt/IOPMPrivate.h index f9bb27642..4aecff146 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPrivate.h +++ b/iokit/IOKit/pwr_mgt/IOPMPrivate.h @@ -30,44 +30,6 @@ #include -#pragma mark PM Timeline Logging -/************************************************** -* -* Timeline API Keys - Reports timing details for -* applications, drivers, and system during PM activity -* -* For kernel-internal use only -**************************************************/ - -// Keys for interfacing with IOPMrootDomain Timeline -/* @constant kIOPMTimelineDictionaryKey - * @abstract RootDomain key for dictionary describing Timeline's info - */ -#define kIOPMTimelineDictionaryKey "PMTimelineLogging" - -/* @constant kIOPMTimelineEnabledKey - * @abstract Boolean value indicating whether the system is recording PM events. - * @discussion Key may be found in the dictionary at IOPMrootDomain's property - * kIOPMTimelineDictionaryKey. uint32_t value; may be 0. - */ -#define kIOPMTimelineEnabledKey "TimelineEnabled" - -/* @constant kIOMPTimelineSystemNumberTrackedKey - * @abstract The maximum number of system power events the system may record. - * @discussion Key may be found in the dictionary at IOPMrootDomain's property - * kIOPMTimelineDictionaryKey. uint32_t value; may be 0. - */ -#define kIOPMTimelineSystemNumberTrackedKey "TimelineSystemEventsTracked" - -/* @constant kIOPMTimelineSystemBufferSizeKey - * @abstract Size in bytes of buffer recording system PM events - * @discussion Key may be found in the dictionary at IOPMrootDomain's property - * kIOPMTimelineDictionaryKey. uint32_t value; may be 0. - */ -#define kIOPMTimelineSystemBufferSizeKey "TimelineSystemBufferSize" - - - /* @constant kIOPMEventTypeIntermediateFlag * @abstract This bit indicates the event is an intermediate event * which must occur within a major system power event. @@ -80,7 +42,7 @@ enum { kIOPMEventTypeUndefined = 0, - /* Event types mark driver events + /* Event types mark driver events */ kIOPMEventTypeSetPowerStateImmediate = 1001, kIOPMEventTypeSetPowerStateDelayed = 1002, @@ -98,10 +60,6 @@ enum { kIOPMEventTypeWakeDone = 3002, kIOPMEventTypeDoze = 4001, kIOPMEventTypeDozeDone = 4002, - kIOPMEventTypeLiteWakeUp = 5001, - kIOPMEventTypeLiteWakeUpDone = 5002, - kIOPMEventTypeLiteWakeDown = 5003, - kIOPMEventTypeLiteWakeDownDone = 5004, kIOPMEventTypeUUIDSet = 6001, kIOPMEventTypeUUIDClear = 6002, @@ -126,10 +84,10 @@ enum { * *****************************************************************************/ -/* kIOPMMessageUserIsActiveChanged +/* kIOPMMessageUserIsActiveChanged * User became active or inactive. Message sent after the kIOPMUserIsActiveKey * property was updated with a new value. - */ + */ #define kIOPMMessageUserIsActiveChanged \ iokit_family_msg(sub_iokit_powermanagement, 0x400) @@ -168,81 +126,14 @@ enum { #define kIOPMSleepServiceExitKey "Sleep Service Back to Sleep" #define kIOPMDarkWakeThermalEmergencyKey "Dark Wake Thermal Emergency" - -enum { - kIOPMMaxSystemEventsTracked = 25000, - kIOPMDefaultSystemEventsTracked = 1000, - kMaxPMStringLength = 40, -}; - -/* @struct IOPMSystemEventRecord - * @abstract Records a singe power event to a particular PM entity. - * This includes changes to a driver's power state, application responses - * to PM notifications, or system power management milestones. - */ -typedef struct { - union { - // For DRIVER events - char ownerName[kMaxPMStringLength]; - // For SYSTEM events, uuid contains the string describing the active UUID - char uuid[kMaxPMStringLength]; - }; - - // For DRIVER events - records the name of the driver who generated the notifications. - char interestName[kMaxPMStringLength]; - - // DRIVER & SYSTEM - Times are stored as uint64_t - // The high 32 bytes are the seconds returned from clock_get_calendar_microtime, - // and the low 32 bytes are the accompanying microseconds. - uint64_t timestamp; - - union { - // For DRIVER events - ownerDisambiguateID is a unique descriptor of the driver, to disambiguate - // several similarly named drivers. - uint64_t ownerDisambiguateID; - // For SYSTEM events - eventReason is a value in SystemSleepReason - uint64_t eventReason; - }; - - // DRIVER & SYSTEM - eventType is one of 'SystemEventTypes' - // The value of eventType determines, among ohter things, whether this is a SYSTEM or - // DRIVER event type. - uint32_t eventType; - - // DRIVER & SYSTEM - eventResult is an IOReturn value - uint32_t eventResult; - - // DRIVER - If defined, elapsedTimeUS records the entire time a transaction took to complete - uint32_t elapsedTimeUS; - - // DRIVER - in power state changes, oldState & newState are PM power state indices. - uint8_t oldState; - uint8_t newState; -} IOPMSystemEventRecord; - -/* @struct IOPMTraceBufferHeader - * Occupies the first bytes in the buffer allocated by IOPMrootDomain - * Describes the size and current index of the trace buffer - */ -typedef struct { - uint32_t sizeBytes; - uint32_t sizeEntries; - uint32_t index; -} IOPMTraceBufferHeader; - -/* Argument to IOPMrootDomain::clientMemoryForType to acquire - * memory mapping. - */ -enum { - kPMRootDomainMapTraceBuffer = 1 -}; - -/************************************************** -* -* Accountability API Ends here -* -**************************************************/ - +/*! kIOPMPSRestrictedModeKey + * An IOPMPowerSource property key + * Its property has an integer value. + * - value = 1 when the device is in a low power state and not fully functional. + * - value = 0, or property missing altogether, when the device is + * not in a restricted mode power state. +*/ +#define kIOPMPSRestrictedModeKey "RestrictedMode" #pragma mark Stray Bitfields // Private power commands issued to root domain @@ -278,7 +169,7 @@ enum { */ #define kIOPMSettingSleepServiceWakeCalendarKey "SleepServiceWakeCalendarKey" -/*! @constant kIOPMCalendarWakeTypes +/*! @constant kIOPMCalendarWakeTypes * * These are valid values for IOPM.h:IOPMCalendarStruct->selector */ @@ -318,7 +209,12 @@ enum { * @abstract Shutdown is in progress and irrevocable. * @discussion State remains 4 until power is removed from CPU. */ - kIOPMStateConsoleShutdownCertain = 4 + kIOPMStateConsoleShutdownCertain = 4, +/* @constant kIOPMStateConsoleSULogoutInitiated + Indicates that LoginWindow has initiated a software update restart. + The next logout will not immediately lead to a shutdown. + */ + kIOPMStateConsoleSULogoutInitiated = 5 }; /* @constant kIOPMSettingSilentRunningKey @@ -340,7 +236,7 @@ enum { /*****************************************************************************/ /*****************************************************************************/ -/* PM Statistics - event indices +/* PM Statistics - event indices * These are arguments to IOPMrootDomain::pmStatsRecordEvent(). */ enum { @@ -350,8 +246,8 @@ enum { kIOPMStatsApplicationNotify, kIOPMStatsLateDriverAcknowledge, kIOPMStatsLateAppAcknowledge, - - // To designate if you're specifying the start or stop end of + + // To designate if you're specifying the start or stop end of // each of the above events, do a bitwise OR of the appropriate // Start/Stop flag and pass the result to IOPMrootDomain to record // the event. @@ -370,26 +266,29 @@ enum { #define kIOPMStatsApplicationResponseTypeKey "ResponseType" #define kIOPMStatsMessageTypeKey "MessageType" #define kIOPMStatsPowerCapabilityKey "PowerCaps" - +#define kIOPMStatsSystemTransitionKey "TransitionType" + // PM Statistics: potential values for the key kIOPMStatsApplicationResponseTypeKey // entry in the application results array. #define kIOPMStatsResponseTimedOut "ResponseTimedOut" #define kIOPMStatsResponseCancel "ResponseCancel" #define kIOPMStatsResponseSlow "ResponseSlow" +#define kIOPMStatsResponsePrompt "ResponsePrompt" +#define kIOPMStatsDriverPSChangeSlow "DriverPSChangeSlow" struct PMStatsBounds{ uint64_t start; uint64_t stop; }; typedef struct { - + struct PMStatsBounds hibWrite; struct PMStatsBounds hibRead; // bounds driverNotifySleep; // bounds driverNotifyWake; // bounds appNotifySleep; -// bounds appNotifyWake; -// OSDictionary *tardyApps; +// bounds appNotifyWake; +// OSDictionary *tardyApps; // OSDictionary *tardyDrivers; } PMStatsStruct; @@ -444,7 +343,7 @@ enum { * Phase ends: All notified clients have acknowledged. */ kIOPMTracePointSleepPriorityClients = 0x12, - + /* When kIOPMTracePointSleepWillChangeInterests is the latest tracePoint, * PM is calling powerStateWillChangeTo() on interested drivers of root domain. * @@ -464,7 +363,7 @@ enum { * change have completed. */ kIOPMTracePointSleepPowerPlaneDrivers = 0x14, - + /* When kIOPMTracePointSleepDidChangeInterests is the latest tracePoint, * PM is calling powerStateDidChangeTo() on interested drivers of root domain. * @@ -599,7 +498,7 @@ enum { * in case any user errors occurred that we could not detect in software. * * Phase begins: After IOPMrootDomain sends kIOMessageSystemHasPoweredOn message. - * Phase ends: When loginwindow calls IOPMSleepWakeSetUUID(NULL) the system shall + * Phase ends: When loginwindow calls IOPMSleepWakeSetUUID(NULL) the system shall * be considered awake and usable. The next phase shall be kIOPMTracePointSystemUp. */ kIOPMTracePointSystemLoginwindowPhase = 0x30, @@ -702,6 +601,30 @@ enum { */ #define kIOPMUserIsActiveKey "IOPMUserIsActive" +/* kIOPMDriverWakeEventsKey + * Key refers to a property that should only be examined by powerd. + */ +#define kIOPMDriverWakeEventsKey "IOPMDriverWakeEvents" + +/***************************************************************************** + * + * IOPMDriverWakeEvents dictionary keys + * + *****************************************************************************/ + +#define kIOPMWakeEventTimeKey "Time" +#define kIOPMWakeEventFlagsKey "Flags" +#define kIOPMWakeEventReasonKey "Reason" +#define kIOPMWakeEventDetailsKey "Details" + +/***************************************************************************** + * + * Wake event flags reported to IOPMrootDomain::claimSystemWakeEvent() + * + *****************************************************************************/ + +#define kIOPMWakeEventSource 0x00000001 + /***************************************************************************** * * System Sleep Policy @@ -717,7 +640,8 @@ enum { * @discussion An OSNumber property of root domain that describes the type * of system sleep. This property is set after notifying priority sleep/wake * clients, but before informing interested drivers and shutting down power - * plane drivers. + * plane drivers. On a hibernate abort or failure, this property will not be + * updated and will indicate the failed sleep type. */ #define kIOPMSystemSleepTypeKey "IOPMSystemSleepType" @@ -775,7 +699,8 @@ enum { kIOPMSleepFactorAutoPowerOffForced = 0x00040000ULL, kIOPMSleepFactorExternalDisplay = 0x00080000ULL, kIOPMSleepFactorNetworkKeepAliveActive = 0x00100000ULL, - kIOPMSleepFactorLocalUserActivity = 0x00200000ULL + kIOPMSleepFactorLocalUserActivity = 0x00200000ULL, + kIOPMSleepFactorHibernateFailed = 0x00400000ULL }; // System Sleep Types @@ -844,30 +769,62 @@ struct IOPMSystemSleepParameters * Sleep Wake debug buffer header */ typedef struct { - uint32_t version; + uint32_t signature; uint32_t alloc_size; - uint32_t dlog_buf_offset; /* Offset at which root domain's logging is stored */ - volatile uint32_t dlog_cur_pos; /* Offset at which next trace will be copied to */ - uint32_t dlog_size; /* Size reserverd for root domain's logging */ - uint32_t crc; /* CRC for spindump & following data. Doesn't cover hdr & DLOG buf */ + uint32_t crc; /* CRC for spindump & following data.*/ uint32_t spindump_offset; /* Offset at which spindump offset is stored */ uint32_t spindump_size; + uint8_t is_osx_watchdog; /* All members from UUID onwards are saved into log file */ - char UUID[44]; + char UUID[44]; char cps[9]; /* Current power state */ - char PMStatusCode[100]; - char reason[42]; -} swd_hdr; + char PMStatusCode[32]; + char reason[32]; +} swd_hdr; -#define SWD_BUF_SIZE (20*PAGE_SIZE) -#define SWD_DLOG_SIZE ((4*PAGE_SIZE)-sizeof(swd_hdr)) +/* + * Structure between stackshot samples, expected by spindump + */ +typedef struct { + uint32_t magic; // 0xbad51ee4 + uint32_t size; // Size of the stackshot buffer following this struct +} swd_stackshot_hdr; + + +#define SWD_HDR_SIGNATURE 0xdeb8da2a +#define SWD_STACKSHOTHDR_MAGIC 0xbad51ee4 // expected by spindump + +#define SWD_BUF_SIZE (40*PAGE_SIZE) +#define SWD_INITIAL_STACK_SIZE ((SWD_BUF_SIZE/2)-sizeof(swd_hdr)) /* Bits in swd_flags */ -#define SWD_WDOG_ENABLED 0x1 -#define SWD_BOOT_BY_WDOG 0x2 -#define SWD_VALID_LOGS 0x4 +#define SWD_WDOG_ENABLED 0x01 +#define SWD_BOOT_BY_SW_WDOG 0x02 +#define SWD_BOOT_BY_OSX_WDOG 0x04 +#define SWD_VALID_LOGS 0x08 +#define SWD_LOGS_IN_FILE 0x10 +#define SWD_LOGS_IN_MEM 0x20 + +/* Filenames associated with the stackshots/logs generated by the SWD */ +#define kSleepWakeStackFilename "/var/log/SleepWakeStacks.dump" +#define kSleepWakeLogFilename "/var/log/SleepWakeLog.dump" +#define kAppleOSXWatchdogStackFilename "/var/log/AppleOSXWatchdogStacks.dump" +#define kAppleOSXWatchdogLogFilename "/var/log/AppleOSXWatchdogLog.dump" + +inline char const* getDumpStackFilename(swd_hdr *hdr) +{ + if (hdr && hdr->is_osx_watchdog) + return kAppleOSXWatchdogStackFilename; + return kSleepWakeStackFilename; +} +inline char const* getDumpLogFilename(swd_hdr *hdr) +{ + if (hdr && hdr->is_osx_watchdog) + return kAppleOSXWatchdogLogFilename; + return kSleepWakeLogFilename; +} /* RootDomain IOReporting channels */ #define kSleepCntChID IOREPORT_MAKEID('S','l','e','e','p','C','n','t') @@ -886,7 +843,7 @@ typedef struct { * register a sleep policy handler. Only a single sleep policy handler can * be installed. */ -#define kIOPMInstallSystemSleepPolicyHandlerKey \ +#define kIOPMInstallSystemSleepPolicyHandlerKey \ "IOPMInstallSystemSleepPolicyHandler" typedef IOReturn (*IOPMSystemSleepPolicyHandler)( diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index 7138ea5c2..0de68bbc8 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -30,7 +30,8 @@ #include #include -#include +#include +#include #ifdef XNU_KERNEL_PRIVATE struct AggressivesRecord; @@ -38,30 +39,33 @@ struct IOPMMessageFilterContext; struct IOPMActions; struct IOPMSystemSleepParameters; class PMSettingObject; -class IOPMTimeline; -class PMEventDetails; class PMTraceWorker; class IOPMPowerStateQueue; class RootDomainUserClient; class PMAssertionsTracker; + +#define OBFUSCATE(x) \ + (((((uintptr_t)(x)) >= VM_MIN_KERNEL_AND_KEXT_ADDRESS) && (((uintptr_t)(x)) < VM_MAX_KERNEL_ADDRESS)) ? \ + ((void *)(VM_KERNEL_ADDRPERM(x))) : (void *)(x)) + #endif /*! * Types for PM Assertions * For creating, releasing, and getting PM assertion levels. */ - + /*! IOPMDriverAssertionType * A bitfield describing a set of assertions. May be used to specify which assertions - * to set with IOPMrootDomain::createPMAssertion; or to query which + * to set with IOPMrootDomain::createPMAssertion; or to query which * assertions are set with IOPMrootDomain::releasePMAssertion. */ typedef uint64_t IOPMDriverAssertionType; /* IOPMDriverAssertionID * Drivers may create PM assertions to request system behavior (keep the system awake, - * or keep the display awake). When a driver creates an assertion via - * IOPMrootDomain::createPMAssertion, PM returns a handle to + * or keep the display awake). When a driver creates an assertion via + * IOPMrootDomain::createPMAssertion, PM returns a handle to * the assertion of type IOPMDriverAssertionID. */ typedef uint64_t IOPMDriverAssertionID; @@ -79,13 +83,13 @@ typedef uint32_t IOPMDriverAssertionLevel; * Flags for get/setSleepSupported() */ enum { - kRootDomainSleepNotSupported = 0x00000000, - kRootDomainSleepSupported = 0x00000001, - kFrameBufferDeepSleepSupported = 0x00000002, + kRootDomainSleepNotSupported = 0x00000000, + kRootDomainSleepSupported = 0x00000001, + kFrameBufferDeepSleepSupported = 0x00000002, kPCICantSleep = 0x00000004 }; -/* +/* *IOPMrootDomain registry property keys */ #define kRootDomainSupportedFeatures "Supported Features" @@ -129,9 +133,9 @@ typedef IOReturn (*IOPMSettingControllerCallback) __BEGIN_DECLS IONotifier * registerSleepWakeInterest( IOServiceInterestHandler, void *, void * = 0); - + IONotifier * registerPrioritySleepWakeInterest( - IOServiceInterestHandler handler, + IOServiceInterestHandler handler, void * self, void * ref = 0); IOReturn acknowledgeSleepWakeNotification(void * ); @@ -139,7 +143,7 @@ IOReturn acknowledgeSleepWakeNotification(void * ); IOReturn vetoSleepWakeNotification(void * PMrefcon); __END_DECLS -#define IOPM_ROOTDOMAIN_REV 2 +#define IOPM_ROOTDOMAIN_REV 2 class IOPMrootDomain: public IOService { @@ -171,13 +175,81 @@ public: @result kIOReturnSuccess on success */ IOReturn systemPowerEventOccurred( - const OSSymbol *event, + const OSSymbol *event, uint32_t intValue ); IOReturn systemPowerEventOccurred( - const OSSymbol *event, + const OSSymbol *event, OSObject *value ); +#ifdef XNU_KERNEL_PRIVATE // Hide doc from public headers +/*! @function claimSystemWakeEvent + @abstract Apple-internal SPI to describe system wake events. + @discussion IOKit drivers may call claimSystemWakeEvent() during system wakeup to + provide human readable debug information describing the event(s) that + caused the system to wake. + + - Drivers should call claimSystemWakeEvent before completing + their setPowerState() acknowledgement. IOPMrootDomain stops + collecting wake events when driver wake is complete. + + - It is only appropriate to claim a wake event when the driver + can positively identify its hardware has generated an event + that can wake the system. + + - This call tracks wake events from a non-S0 state (S0i, S3, S4) into S0. + - This call does not track wake events from DarkWake(S0) to FullWake(S0). + + Examples: + (reason = "WiFi.TCPData", + details = "TCPKeepAlive packet arrived from IP 16.2.1.1") + (reason = "WiFi.ScanOffload", + details = "WiFi station 'AppleWiFi' signal dropped below threshold") + (reason = "Enet.LinkToggle", + details = "Ethernet attached") + + @param device The device/nub that is associated with the wake event. + + @param flags Pass kIOPMWakeEventSource if the device is the source + of the wake event. Pass zero if the device is forwarding or + aggregating wake events from multiple sources, e.g. an USB or + Thunderbolt host controller. + + @param reason Caller should pass a human readable C string describing the + wake reason. Please use a string from the list below, or create + your own string matching this format: + [Hardware].[Event] + WiFi.MagicPacket + WiFi.ScanOffload + WiFi.mDNSConflict + WiFi.mDNSService + WiFi.TCPData + WiFi.TCPTimeout + WiFi.FirmwareCrash + Enet.MagicPacket + Enet.mDNSConflict + Enet.mDNSService + Enet.TCPData + Enet.TCPTimeout + Enet.Service + Enet.LinkToggle + Enet.ConflictResolution + Enet.PatternMatch + Enet.Timer + Enet.LinkUpTimeout + Enet.LinkDown + USB.DeviceAttach + USB.DeviceDetach + + @param details Optional details further describing the wake event. + Please pass an OSString defining the event. + */ +#endif + void claimSystemWakeEvent( IOService *device, + IOOptionBits flags, + const char *reason, + OSObject *details = 0 ); + virtual IOReturn receivePowerNotification( UInt32 msg ); virtual void setSleepSupported( IOOptionBits flags ); @@ -189,18 +261,18 @@ public: // KEXT driver announces support of power management feature void publishFeature( const char *feature ); - + // KEXT driver announces support of power management feature // And specifies power sources with kIOPMSupportedOn{AC/Batt/UPS} bitfield. // Returns a unique uint32_t identifier for later removing support for this - // feature. + // feature. // NULL is acceptable for uniqueFeatureID for kexts without plans to unload. - void publishFeature( const char *feature, + void publishFeature( const char *feature, uint32_t supportedWhere, uint32_t *uniqueFeatureID); - // KEXT driver announces removal of a previously published power management + // KEXT driver announces removal of a previously published power management // feature. Pass 'uniqueFeatureID' returned from publishFeature() IOReturn removePublishedFeature( uint32_t removeFeatureID ); @@ -215,8 +287,8 @@ public: /*! @function registerPMSettingController @abstract Register for callbacks on changes to certain PM settings. - @param settings NULL terminated array of C strings, each string for a PM - setting that the caller is interested in and wants to get callbacks for. + @param settings NULL terminated array of C strings, each string for a PM + setting that the caller is interested in and wants to get callbacks for. @param callout C function ptr or member function cast as such. @param target The target of the callback, usually 'this' @param refcon Will be passed to caller in callback; for caller's use. @@ -234,8 +306,8 @@ public: /*! @function registerPMSettingController @abstract Register for callbacks on changes to certain PM settings. - @param settings NULL terminated array of C strings, each string for a PM - setting that the caller is interested in and wants to get callbacks for. + @param settings NULL terminated array of C strings, each string for a PM + setting that the caller is interested in and wants to get callbacks for. @param supportedPowerSources bitfield indicating which power sources these settings are supported for (kIOPMSupportedOnAC, etc.) @param callout C function ptr or member function cast as such. @@ -282,7 +354,7 @@ public: /* @function setPMAssertionLevel @abstract Modify the level of a pre-existing assertion. - @discussion Change the value of a PM assertion to influence system behavior, + @discussion Change the value of a PM assertion to influence system behavior, without undergoing the work required to create or destroy an assertion. Suggested for clients who will assert and de-assert needs for PM behavior several times over their lifespan. @@ -294,13 +366,13 @@ public: /*! @function getPMAssertionLevel @absract Returns the active level of the specified assertion(s). - @discussion Returns kIOPMDriverAssertionLevelOff or + @discussion Returns kIOPMDriverAssertionLevelOff or kIOPMDriverAssertionLevelOn. If multiple assertions are specified in the bitfield, only returns kIOPMDriverAssertionLevelOn if all assertions are active. @param whichAssertionBits Bits defining the assertion or assertions the caller is interested in the level of. If in doubt, pass kIOPMDriverAssertionCPUBit as the argument. - @result Returns kIOPMDriverAssertionLevelOff or + @result Returns kIOPMDriverAssertionLevelOff or kIOPMDriverAssertionLevelOn indicating the specified assertion's levels, if available. If the assertions aren't supported on this machine, or aren't recognized by the OS, the result is undefined. @@ -313,6 +385,12 @@ public: */ IOReturn releasePMAssertion(IOPMDriverAssertionID releaseAssertion); +/*! @function restartWithStackshot + @abstract Take a stackshot of the system and restart the system. + @result Return kIOReturnSuccess if it work, kIOReturnError if the service is not available. +*/ + IOReturn restartWithStackshot(); + private: virtual IOReturn changePowerStateTo( unsigned long ordinal ); virtual IOReturn changePowerStateToPriv( unsigned long ordinal ); @@ -381,13 +459,13 @@ public: void handlePowerChangeStartForPCIDevice( IOService * service, - IOPMActions * actions, + IOPMActions * actions, IOPMPowerStateIndex powerState, IOPMPowerChangeFlags * inOutChangeFlags ); void handlePowerChangeDoneForPCIDevice( IOService * service, - IOPMActions * actions, + IOPMActions * actions, IOPMPowerStateIndex powerState, IOPMPowerChangeFlags changeFlags ); @@ -401,9 +479,6 @@ public: void handleQueueSleepWakeUUID( OSObject *obj); - void handleSuspendPMNotificationClient( - uint32_t pid, bool doSuspend); - void willNotifyPowerChildren( IOPMPowerStateIndex newPowerState ); IOReturn setMaintenanceWakeCalendar( @@ -412,10 +487,10 @@ public: IOReturn getSystemSleepType( uint32_t * sleepType ); // Handle callbacks from IOService::systemWillShutdown() - void acknowledgeSystemWillShutdown( IOService * from ); + void acknowledgeSystemWillShutdown( IOService * from ); // Handle platform halt and restart notifications - void handlePlatformHaltRestart( UInt32 pe_type ); + void handlePlatformHaltRestart( UInt32 pe_type ); IOReturn shutdownSystem( void ); IOReturn restartSystem( void ); @@ -442,42 +517,20 @@ public: void publishPMSetting( const OSSymbol * feature, uint32_t where, uint32_t * featureID ); -/*! @function recordPMEvent - @abstract Logs IOService PM event timing. - @discussion Should only be called from IOServicePM. Should not be exported. - @result kIOReturn on success. -*/ - IOReturn recordPMEvent( PMEventDetails *details ); - void recordPMEvent( uint32_t type, const char *uuid, - uint32_t reason, uint32_t result ); - IOReturn recordAndReleasePMEvent( PMEventDetails *details ); - void pmStatsRecordEvent( int eventIndex, AbsoluteTime timestamp); void pmStatsRecordApplicationResponse( - const OSSymbol *response, - const char *name, + const OSSymbol *response, + const char *name, int messageType, - uint32_t delay_ms, - int app_pid); - + uint32_t delay_ms, + int app_pid, + OSObject *object, + IOPMPowerStateIndex ps=0); -/*! @function suspendPMNotificationsForPID - @abstract kernel process management calls this to disable sleep/wake notifications - when a process is suspended. - @param pid the process ID - @param doSuspend true suspends the notifications; false enables them -*/ - void suspendPMNotificationsForPID( uint32_t pid, bool doSuspend); - -/*! @function pmNotificationIsSuspended - @abstract returns true if PM notifications have been suspended - @param pid the process ID - @result true if the process has been suspended -*/ - bool pmNotificationIsSuspended( uint32_t pid ); + void copyWakeReasonString( char * outBuf, size_t bufSize ); #if HIBERNATION bool getHibernateSettings( @@ -485,8 +538,8 @@ public: uint32_t * hibernateFreeRatio, uint32_t * hibernateFreeTime ); #endif + void takeStackshot(bool restart, bool isOSXWatchdog); void sleepWakeDebugTrig(bool restart); - void sleepWakeDebugLog(const char *fmt,...); void sleepWakeDebugEnableWdog(); bool sleepWakeDebugIsWdogEnabled(); static void saveTimeoutAppStackShot(void *p0, void *p1); @@ -528,6 +581,7 @@ private: OSDictionary * wranglerIdleSettings; IOLock *featuresDictLock; // guards supportedFeatures + IOLock *wakeEventLock; IOPMPowerStateQueue *pmPowerStateQueue; OSArray *allowedPMSettings; @@ -536,7 +590,7 @@ private: PMAssertionsTracker *pmAssertions; // Settings controller info - IOLock *settingsCtrlLock; + IOLock *settingsCtrlLock; OSDictionary *settingsCallbacks; OSDictionary *fPMSettingsDict; @@ -553,12 +607,10 @@ private: uint32_t sleepCnt; uint32_t darkWakeCnt; uint32_t displayWakeCnt; - + OSString *queuedSleepWakeUUIDString; OSArray *pmStatsAppResponses; - OSOrderedSet *noAckApps; // Apps not returning acks to notifications - IOBufferMemoryDescriptor *spindumpDesc; - thread_call_t stackshotOffloader; + IOLock *pmStatsLock; // guards pmStatsAppResponses bool uuidPublished; @@ -569,12 +621,13 @@ private: uint64_t autoWakeEnd; // Difference between sleepSlider and longestNonSleepSlider - unsigned long extraSleepDelay; + unsigned long extraSleepDelay; // Used to wait between say display idle and system idle thread_call_t extraSleepTimer; thread_call_t diskSyncCalloutEntry; thread_call_t fullWakeThreadCall; + thread_call_t hibDebugSetupEntry; // Track system capabilities. uint32_t _desiredCapability; @@ -637,7 +690,9 @@ private: unsigned int wranglerTickleLatched :1; unsigned int userIsActive :1; unsigned int userWasActive :1; + unsigned int displayIdleForDemandSleep :1; + unsigned int darkWakeHibernateError :1; uint32_t hibernateMode; AbsoluteTime userActivityTime; @@ -677,14 +732,12 @@ private: IONotifier * systemCapabilityNotifier; - IOPMTimeline *timeline; - typedef struct { uint32_t pid; uint32_t refcount; } PMNotifySuspendedStruct; - - uint32_t pmSuspendedCapacity; + + uint32_t pmSuspendedCapacity; uint32_t pmSuspendedSize; PMNotifySuspendedStruct *pmSuspendedPIDS; @@ -700,13 +753,16 @@ private: volatile uint32_t swd_lock; /* Lock to access swd_buffer & and its header */ void * swd_buffer; /* Memory allocated for dumping sleep/wake logs */ uint8_t swd_flags; /* Flags defined in IOPMPrivate.h */ - + IOMemoryMap * swd_logBufMap; /* Memory with sleep/wake logs from previous boot */ - + + // Wake Event Reporting + OSArray * _systemWakeEventsArray; + bool _acceptSystemWakeEvents; int findSuspendedPID(uint32_t pid, uint32_t *outRefCount); - // IOPMrootDomain internal sleep call + // IOPMrootDomain internal sleep call IOReturn privateSleepSystem( uint32_t sleepReason ); void reportUserInput( void ); void setDisableClamShellSleep( bool ); @@ -749,9 +805,6 @@ private: const AggressivesRecord * array, int count ); - // getPMTraceMemoryDescriptor should only be called by our friend RootDomainUserClient - IOMemoryDescriptor *getPMTraceMemoryDescriptor(void); - IOReturn setPMAssertionUserLevels(IOPMDriverAssertionType); void publishSleepWakeUUID( bool shouldPublish ); @@ -760,15 +813,24 @@ private: void requestFullWake( FullWakeReason reason ); void willEnterFullWake( void ); - void evaluateAssertions(IOPMDriverAssertionType newAssertions, + void evaluateAssertions(IOPMDriverAssertionType newAssertions, IOPMDriverAssertionType oldAssertions); void deregisterPMSettingObject( PMSettingObject * pmso ); void sleepWakeDebugMemAlloc( ); - void sleepWakeDebugDump(IOMemoryMap *logBufMap); + void sleepWakeDebugDumpFromMem(IOMemoryMap *logBufMap); + void sleepWakeDebugDumpFromFile( ); IOMemoryMap *sleepWakeDebugRetrieve(); errno_t sleepWakeDebugSaveFile(const char *name, char *buf, int len); + errno_t sleepWakeDebugCopyFile( struct vnode *srcVp, + vfs_context_t srcCtx, + char *tmpBuf, uint64_t tmpBufSize, + uint64_t srcOffset, + const char *dstFname, + uint64_t numBytes, + uint32_t crc); + #if HIBERNATION bool getSleepOption( const char * key, uint32_t * option ); @@ -780,6 +842,8 @@ private: bool latchDisplayWranglerTickle( bool latch ); void setDisplayPowerOn( uint32_t options ); + + void acceptSystemWakeEvents( bool accept ); void systemDidNotSleep( void ); void preventTransitionToUserActive( bool prevent ); #endif /* XNU_KERNEL_PRIVATE */ diff --git a/iokit/Kernel/IOBufferMemoryDescriptor.cpp b/iokit/Kernel/IOBufferMemoryDescriptor.cpp index 1b1775916..1c486fa0f 100644 --- a/iokit/Kernel/IOBufferMemoryDescriptor.cpp +++ b/iokit/Kernel/IOBufferMemoryDescriptor.cpp @@ -113,7 +113,6 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( mach_vm_address_t alignment, mach_vm_address_t physicalMask) { - kern_return_t kr; task_t mapTask = NULL; vm_map_t vmmap = NULL; mach_vm_address_t highestMask = 0; @@ -122,8 +121,7 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( bool mapped = false; bool needZero; - if (!capacity) - return false; + if (!capacity) return false; _options = options; _capacity = capacity; @@ -147,7 +145,7 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( IOMapper::checkForSystemMapper(); mapped = (0 != IOMapper::gSystem); } - needZero = mapped; + needZero = (mapped || (0 != (kIOMemorySharingTypeMask & options))); if (physicalMask && (alignment <= 1)) { @@ -184,53 +182,15 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( highestMask = 0; } - // set flags for entry + object create - vm_prot_t memEntryCacheMode = VM_PROT_READ | VM_PROT_WRITE; - - // set memory entry cache mode - switch (options & kIOMapCacheMask) - { - case kIOMapInhibitCache: - SET_MAP_MEM(MAP_MEM_IO, memEntryCacheMode); - break; - - case kIOMapWriteThruCache: - SET_MAP_MEM(MAP_MEM_WTHRU, memEntryCacheMode); - break; - - case kIOMapWriteCombineCache: - SET_MAP_MEM(MAP_MEM_WCOMB, memEntryCacheMode); - break; - - case kIOMapCopybackCache: - SET_MAP_MEM(MAP_MEM_COPYBACK, memEntryCacheMode); - break; - - case kIOMapCopybackInnerCache: - SET_MAP_MEM(MAP_MEM_INNERWBACK, memEntryCacheMode); - break; - - case kIOMapDefaultCache: - default: - SET_MAP_MEM(MAP_MEM_NOOP, memEntryCacheMode); - break; - } - + // set memory entry cache mode, pageable, purgeable + iomdOptions |= ((options & kIOMapCacheMask) >> kIOMapCacheShift) << kIOMemoryBufferCacheShift; if (options & kIOMemoryPageable) { iomdOptions |= kIOMemoryBufferPageable; - - // must create the entry before any pages are allocated - - // set flags for entry + object create - memEntryCacheMode |= MAP_MEM_NAMED_CREATE; - - if (options & kIOMemoryPurgeable) - memEntryCacheMode |= MAP_MEM_PURGABLE; + if (options & kIOMemoryPurgeable) iomdOptions |= kIOMemoryBufferPurgeable; } else { - memEntryCacheMode |= MAP_MEM_NAMED_REUSE; vmmap = kernel_map; // Buffer shouldn't auto prepare they should be prepared explicitly @@ -263,7 +223,7 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( capacity, highestMask, alignment, contig); } else if (needZero - && ((capacity + alignment) <= (page_size - kIOPageAllocChunkBytes))) + && ((capacity + alignment) <= (page_size - gIOPageAllocChunkBytes))) { _internalFlags |= kInternalFlagPageAllocated; needZero = false; @@ -292,22 +252,10 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( } if( (options & (kIOMemoryPageable | kIOMapCacheMask))) { - ipc_port_t sharedMem; vm_size_t size = round_page(capacity); - kr = mach_make_memory_entry(vmmap, - &size, (vm_offset_t)_buffer, - memEntryCacheMode, &sharedMem, - NULL ); - - if( (KERN_SUCCESS == kr) && (size != round_page(capacity))) { - ipc_port_release_send( sharedMem ); - kr = kIOReturnVMError; - } - if( KERN_SUCCESS != kr) - return( false ); - - _memEntry = (void *) sharedMem; + // initWithOptions will create memory entry + iomdOptions |= kIOMemoryPersistent; if( options & kIOMemoryPageable) { #if IOALLOCDEBUG @@ -352,7 +300,7 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( return( false ); } reserved->map = createMappingInTask(mapTask, 0, - kIOMapAnywhere | (options & kIOMapCacheMask), 0, 0); + kIOMapAnywhere | (options & kIOMapPrefault) | (options & kIOMapCacheMask), 0, 0); if (!reserved->map) { _buffer = 0; diff --git a/iokit/Kernel/IOCPU.cpp b/iokit/Kernel/IOCPU.cpp index 6278f43cf..00504e9eb 100644 --- a/iokit/Kernel/IOCPU.cpp +++ b/iokit/Kernel/IOCPU.cpp @@ -96,6 +96,8 @@ static queue_head_t gIOWakeActionQueue; static queue_head_t iocpu_quiesce_queue; static queue_head_t iocpu_active_queue; +static queue_head_t gIOHaltRestartActionQueue; + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ void @@ -241,6 +243,45 @@ IOInstallServicePlatformAction(IOService * service, gIOAllActionsQueue = entry; } +extern "C" kern_return_t +IOCPURunPlatformHaltRestartActions(uint32_t message) +{ + kern_return_t ret; + IORegistryIterator * iter; + OSOrderedSet * all; + IOService * service; + + if (!gIOHaltRestartActionQueue.next) + { + queue_init(&gIOHaltRestartActionQueue); + iter = IORegistryIterator::iterateOver(gIOServicePlane, + kIORegistryIterateRecursively); + if (iter) + { + all = 0; + do + { + if (all) all->release(); + all = iter->iterateAll(); + } + while (!iter->isValid()); + iter->release(); + if (all) + { + while((service = (IOService *) all->getFirstObject())) + { + IOInstallServicePlatformAction(service, gIOPlatformHaltRestartActionKey, &gIOHaltRestartActionQueue, false); + all->removeObject(service); + } + all->release(); + } + } + } + ret = iocpu_run_platform_actions(&gIOHaltRestartActionQueue, 0, 0U-1, + (void *)(uintptr_t) message, NULL, NULL); + return (ret); +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ kern_return_t PE_cpu_start(cpu_id_t target, @@ -319,6 +360,7 @@ void IOCPUSleepKernel(void) queue_init(&gIOSleepActionQueue); queue_init(&gIOWakeActionQueue); + queue_init(&gIOHaltRestartActionQueue); iter = IORegistryIterator::iterateOver( gIOServicePlane, kIORegistryIterateRecursively ); @@ -342,6 +384,7 @@ void IOCPUSleepKernel(void) IOInstallServicePlatformAction(service, gIOPlatformWakeActionKey, &gIOWakeActionQueue, true); IOInstallServicePlatformAction(service, gIOPlatformQuiesceActionKey, iocpu_get_platform_quiesce_queue(), false); IOInstallServicePlatformAction(service, gIOPlatformActiveActionKey, iocpu_get_platform_active_queue(), true); + IOInstallServicePlatformAction(service, gIOPlatformHaltRestartActionKey, &gIOHaltRestartActionQueue, false); all->removeObject(service); } all->release(); @@ -391,10 +434,10 @@ void IOCPUSleepKernel(void) IODelete(entry, iocpu_platform_action_entry_t, 1); } - if (!queue_empty(&gIOSleepActionQueue)) - panic("gIOSleepActionQueue"); - if (!queue_empty(&gIOWakeActionQueue)) - panic("gIOWakeActionQueue"); + if (!queue_empty(&gIOSleepActionQueue)) panic("gIOSleepActionQueue"); + if (!queue_empty(&gIOWakeActionQueue)) panic("gIOWakeActionQueue"); + if (!queue_empty(&gIOHaltRestartActionQueue)) panic("gIOHaltRestartActionQueue"); + gIOHaltRestartActionQueue.next = 0; rootDomain->tracePoint( kIOPMTracePointWakeCPUs ); @@ -464,7 +507,7 @@ bool IOCPU::start(IOService *provider) provider->setProperty("timebase-frequency", timebaseFrequency); timebaseFrequency->release(); - super::setProperty("IOCPUID", (uintptr_t)this, sizeof(uintptr_t)*8); + super::setProperty("IOCPUID", getRegistryEntryID(), sizeof(uint64_t)*8); setCPUNumber(0); setCPUState(kIOCPUStateUnregistered); diff --git a/iokit/Kernel/IOCatalogue.cpp b/iokit/Kernel/IOCatalogue.cpp index eb8dfbafb..b6c335fbf 100644 --- a/iokit/Kernel/IOCatalogue.cpp +++ b/iokit/Kernel/IOCatalogue.cpp @@ -83,6 +83,10 @@ IORWLock * gIOCatalogLock; #define super OSObject OSDefineMetaClassAndStructors(IOCatalogue, OSObject) +static bool isModuleLoadedNoOSKextLock(OSDictionary *theKexts, + OSDictionary *theModuleDict); + + /********************************************************************* *********************************************************************/ void IOCatalogue::initialize(void) @@ -767,8 +771,9 @@ bool IOCatalogue::resetAndAddDrivers(OSArray * drivers, bool doNubMatching) OSOrderedSet * matchSet = NULL; // must release const OSSymbol * key; OSArray * array; - OSDictionary * thisNewPersonality = NULL; // do not release - OSDictionary * thisOldPersonality = NULL; // do not release + OSDictionary * thisNewPersonality = NULL; // do not release + OSDictionary * thisOldPersonality = NULL; // do not release + OSDictionary * myKexts = NULL; // must release signed int idx, newIdx; if (drivers) { @@ -776,18 +781,23 @@ bool IOCatalogue::resetAndAddDrivers(OSArray * drivers, bool doNubMatching) if (!newPersonalities) { goto finish; } - - matchSet = OSOrderedSet::withCapacity(10, IOServiceOrdering, - (void *)gIOProbeScoreKey); - if (!matchSet) { - goto finish; - } - iter = OSCollectionIterator::withCollection(personalities); - if (!iter) { - goto finish; - } } - + matchSet = OSOrderedSet::withCapacity(10, IOServiceOrdering, + (void *)gIOProbeScoreKey); + if (!matchSet) { + goto finish; + } + iter = OSCollectionIterator::withCollection(personalities); + if (!iter) { + goto finish; + } + + /* need copy of loaded kexts so we can check if for loaded modules without + * taking the OSKext lock. There is a potential of deadlocking if we get + * an OSKext via the normal path. See 14672140. + */ + myKexts = OSKext::copyKexts(); + result = true; IOLog("Resetting IOCatalogue.\n"); @@ -800,59 +810,66 @@ bool IOCatalogue::resetAndAddDrivers(OSArray * drivers, bool doNubMatching) { array = (OSArray *) personalities->getObject(key); if (!array) continue; - for (idx = 0; (thisOldPersonality = (OSDictionary *) array->getObject(idx)); idx++) + + for (idx = 0; + (thisOldPersonality = (OSDictionary *) array->getObject(idx)); + idx++) { if (thisOldPersonality->getObject("KernelConfigTable")) continue; - if (newPersonalities) - for (newIdx = 0; - (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); - newIdx++) - { - /* Unlike in other functions, this comparison must be exact! - * The catalogue must be able to contain personalities that - * are proper supersets of others. - * Do not compare just the properties present in one driver - * personality or the other. - */ - if (OSDynamicCast(OSDictionary, thisNewPersonality) == NULL) { - /* skip thisNewPersonality if it is not an OSDictionary */ - continue; + thisNewPersonality = NULL; + + if (newPersonalities) { + for (newIdx = 0; + (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); + newIdx++) + { + /* Unlike in other functions, this comparison must be exact! + * The catalogue must be able to contain personalities that + * are proper supersets of others. + * Do not compare just the properties present in one driver + * personality or the other. + */ + if (OSDynamicCast(OSDictionary, thisNewPersonality) == NULL) { + /* skip thisNewPersonality if it is not an OSDictionary */ + continue; + } + if (thisNewPersonality->isEqualTo(thisOldPersonality)) + break; } - if (thisNewPersonality->isEqualTo(thisOldPersonality)) - break; } - if (thisNewPersonality) - { + if (thisNewPersonality) { // dup, ignore - newPersonalities->removeObject(newIdx); + newPersonalities->removeObject(newIdx); } - else - { + else { // not in new set - remove // only remove dictionary if this module in not loaded - 9953845 - if ( isModuleLoaded(thisOldPersonality) == false ) - { - if (matchSet) matchSet->setObject(thisOldPersonality); + if ( isModuleLoadedNoOSKextLock(myKexts, thisOldPersonality) == false ) { + if (matchSet) { + matchSet->setObject(thisOldPersonality); + } array->removeObject(idx); idx--; } } - } - } + } // for... + } // while... // add new - for (newIdx = 0; - (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); - newIdx++) - { - if (OSDynamicCast(OSDictionary, thisNewPersonality) == NULL) { - /* skip thisNewPersonality if it is not an OSDictionary */ - continue; - } - - OSKext::uniquePersonalityProperties(thisNewPersonality); - addPersonality(thisNewPersonality); - matchSet->setObject(thisNewPersonality); + if (newPersonalities) { + for (newIdx = 0; + (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); + newIdx++) + { + if (OSDynamicCast(OSDictionary, thisNewPersonality) == NULL) { + /* skip thisNewPersonality if it is not an OSDictionary */ + continue; + } + + OSKext::uniquePersonalityProperties(thisNewPersonality); + addPersonality(thisNewPersonality); + matchSet->setObject(thisNewPersonality); + } } /* Finally, start device matching on all new & removed personalities. @@ -865,8 +882,9 @@ bool IOCatalogue::resetAndAddDrivers(OSArray * drivers, bool doNubMatching) IORWLockUnlock(lock); finish: - if (matchSet) matchSet->release(); - if (iter) iter->release(); + if (matchSet) matchSet->release(); + if (iter) iter->release(); + if (myKexts) myKexts->release(); return result; } @@ -909,6 +927,42 @@ bool IOCatalogue::serializeData(IOOptionBits kind, OSSerialize * s) const return kr; } +/* isModuleLoadedNoOSKextLock - used to check to see if a kext is loaded + * without taking the OSKext lock. We use this to avoid the problem + * where taking the IOCatalog lock then the OSKext lock will dealock when + * a kext load or unload is happening at the same time as IOCatalog changing. + * + * theKexts - is a dictionary of current kexts (from OSKext::copyKexts) with + * key set to the kext bundle ID and value set to an OSKext object + * theModuleDict - is an IOKit personality dictionary for a given module (kext) + */ +static bool isModuleLoadedNoOSKextLock(OSDictionary *theKexts, + OSDictionary *theModuleDict) +{ + bool myResult = false; + const OSString * myBundleID = NULL; // do not release + OSKext * myKext = NULL; // do not release + + if (theKexts == NULL || theModuleDict == NULL) { + return( myResult ); + } + + // gIOModuleIdentifierKey is "CFBundleIdentifier" + myBundleID = OSDynamicCast(OSString, + theModuleDict->getObject(gIOModuleIdentifierKey)); + if (myBundleID == NULL) { + return( myResult ); + } + + myKext = OSDynamicCast(OSKext, theKexts->getObject(myBundleID->getCStringNoCopy())); + if (myKext) { + myResult = myKext->isLoaded(); + } + + return( myResult ); +} + + #if PRAGMA_MARK #pragma mark Obsolete Kext Loading Stuff #endif diff --git a/iokit/Kernel/IODMAController.cpp b/iokit/Kernel/IODMAController.cpp index 558fde64e..faf7c20aa 100644 --- a/iokit/Kernel/IODMAController.cpp +++ b/iokit/Kernel/IODMAController.cpp @@ -51,7 +51,14 @@ IODMAController *IODMAController::getController(IOService *provider, UInt32 dmaI // Find the name of the parent dma controller dmaParentData = OSDynamicCast(OSData, provider->getProperty("dma-parent")); if (dmaParentData == 0) return NULL; - dmaParentName = createControllerName(*(UInt32 *)dmaParentData->getBytesNoCopy()); + + if (dmaParentData->getLength() == sizeof(UInt32)) { + dmaParentName = createControllerName(*(UInt32 *)dmaParentData->getBytesNoCopy()); + } else { + if (dmaIndex >= dmaParentData->getLength() / sizeof(UInt32)) + panic("dmaIndex out of range"); + dmaParentName = createControllerName(*(UInt32 *)dmaParentData->getBytesNoCopy(dmaIndex * sizeof(UInt32), sizeof(UInt32))); + } if (dmaParentName == 0) return NULL; // Wait for the parent dma controller diff --git a/iokit/Kernel/IODataQueue.cpp b/iokit/Kernel/IODataQueue.cpp index 1dd0c82a3..e489cd9a8 100644 --- a/iokit/Kernel/IODataQueue.cpp +++ b/iokit/Kernel/IODataQueue.cpp @@ -26,7 +26,12 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#define DISABLE_DATAQUEUE_WARNING + #include + +#undef DISABLE_DATAQUEUE_WARNING + #include #include #include @@ -99,6 +104,13 @@ Boolean IODataQueue::initWithCapacity(UInt32 size) dataQueue->head = 0; dataQueue->tail = 0; + if (!notifyMsg) { + notifyMsg = IOMalloc(sizeof(mach_msg_header_t)); + if (!notifyMsg) + return false; + } + bzero(notifyMsg, sizeof(mach_msg_header_t)); + return true; } @@ -121,6 +133,12 @@ void IODataQueue::free() { if (dataQueue) { IOFreeAligned(dataQueue, round_page(dataQueue->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE)); + dataQueue = NULL; + + if (notifyMsg) { + IOFree(notifyMsg, sizeof(mach_msg_header_t)); + notifyMsg = NULL; + } } super::free(); @@ -161,7 +179,7 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize) OSAddAtomic(entrySize, (SInt32 *)&dataQueue->tail); } - else if ( head > entrySize ) // Is there enough room at the beginning? + else if ( head > entrySize ) // Is there enough room at the beginning? { // Wrap around to the beginning, but do not allow the tail to catch // up to the head. @@ -182,7 +200,7 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize) } else { - return false; // queue is full + return false; // queue is full } } else @@ -200,13 +218,13 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize) } else { - return false; // queue is full + return false; // queue is full } } // Send notification (via mach message) that data is available. - if ( ( head == tail ) /* queue was empty prior to enqueue() */ + if ( ( head == tail ) /* queue was empty prior to enqueue() */ || ( dataQueue->head == tail ) ) /* queue was emptied during enqueue() */ { sendDataAvailableNotification(); @@ -217,34 +235,26 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize) void IODataQueue::setNotificationPort(mach_port_t port) { - static struct _notifyMsg init_msg = { { - MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0), - sizeof (struct _notifyMsg), - MACH_PORT_NULL, - MACH_PORT_NULL, - 0, - 0 - } }; - - if (notifyMsg == 0) { - notifyMsg = IOMalloc(sizeof(struct _notifyMsg)); - } - - *((struct _notifyMsg *)notifyMsg) = init_msg; + mach_msg_header_t * msgh = (mach_msg_header_t *) notifyMsg; - ((struct _notifyMsg *)notifyMsg)->h.msgh_remote_port = port; + if (msgh) { + bzero(msgh, sizeof(mach_msg_header_t)); + msgh->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); + msgh->msgh_size = sizeof(mach_msg_header_t); + msgh->msgh_remote_port = port; + } } void IODataQueue::sendDataAvailableNotification() { - kern_return_t kr; - mach_msg_header_t * msgh; + kern_return_t kr; + mach_msg_header_t * msgh; - msgh = (mach_msg_header_t *)notifyMsg; + msgh = (mach_msg_header_t *) notifyMsg; if (msgh && msgh->msgh_remote_port) { kr = mach_msg_send_from_kernel_with_options(msgh, msgh->msgh_size, MACH_SEND_TIMEOUT, MACH_MSG_TIMEOUT_NONE); switch(kr) { - case MACH_SEND_TIMED_OUT: // Notification already sent + case MACH_SEND_TIMED_OUT: // Notification already sent case MACH_MSG_SUCCESS: case MACH_SEND_NO_BUFFER: break; @@ -266,3 +276,4 @@ IOMemoryDescriptor *IODataQueue::getMemoryDescriptor() return descriptor; } + diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp index 5d6188086..6533ed937 100644 --- a/iokit/Kernel/IODeviceTreeSupport.cpp +++ b/iokit/Kernel/IODeviceTreeSupport.cpp @@ -776,6 +776,7 @@ CompareKey( OSString * key, OSString *string; const char *ckey; UInt32 keyLen; + UInt32 nlen; const char *names; const char *lastName; bool wild; @@ -800,16 +801,16 @@ CompareKey( OSString * key, do { // for each name in the property + nlen = strnlen(names, lastName - names); if( wild) - matched = (0 == strncmp( ckey, names, keyLen - 1 )); + matched = ((nlen >= (keyLen - 1)) && (0 == strncmp(ckey, names, keyLen - 1))); else - matched = (keyLen == strlen( names )) - && (0 == strncmp( ckey, names, keyLen )); + matched = (keyLen == nlen) && (0 == strncmp(ckey, names, keyLen)); if( matched) result = names; - names = names + strlen( names) + 1; + names = names + nlen + 1; } while( (names < lastName) && (false == matched)); @@ -927,7 +928,7 @@ void IODTSetResolving( IORegistryEntry * regEntry, return; } -#if defined(__arm__) || defined(__i386__) || defined(__x86_64__) +#if defined(__arm__) || defined(__i386__) || defined(__x86_64__) static SInt32 DefaultCompare( UInt32 cellCount, UInt32 left[], UInt32 right[] ) { cellCount--; @@ -1239,6 +1240,7 @@ OSData * IODTFindSlotName( IORegistryEntry * regEntry, UInt32 deviceNumber ) OSData *ret = 0; UInt32 *bits; UInt32 i; + size_t nlen; char *names; char *lastName; UInt32 mask; @@ -1266,15 +1268,16 @@ OSData * IODTFindSlotName( IORegistryEntry * regEntry, UInt32 deviceNumber ) for( i = 0; (i <= deviceNumber) && (names < lastName); i++ ) { if( mask & (1 << i)) { + nlen = 1 + strnlen(names, lastName - names); if( i == deviceNumber) { - data = OSData::withBytesNoCopy( names, 1 + strlen( names)); + data = OSData::withBytesNoCopy(names, nlen); if( data) { regEntry->setProperty("AAPL,slot-name", data); ret = data; data->release(); } } else - names += 1 + strlen( names); + names += nlen; } } diff --git a/iokit/Kernel/IOFilterInterruptEventSource.cpp b/iokit/Kernel/IOFilterInterruptEventSource.cpp index 6ecc33bfd..83b6ed6e0 100644 --- a/iokit/Kernel/IOFilterInterruptEventSource.cpp +++ b/iokit/Kernel/IOFilterInterruptEventSource.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #if IOKITSTATS @@ -153,14 +154,33 @@ void IOFilterInterruptEventSource::normalInterruptOccurred (void */*refcon*/, IOService */*prov*/, int /*source*/) { bool filterRes; + uint64_t startTime = 0; + uint64_t endTime = 0; bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false; if (trace) IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER), VM_KERNEL_UNSLIDE(filterAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); + + if (IOInterruptEventSource::reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingFirstLevelTimeIndex)) { + startTime = mach_absolute_time(); + } + } // Call the filter. filterRes = (*filterAction)(owner, this); + + if (IOInterruptEventSource::reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingFirstLevelCountIndex)) { + IA_ADD_VALUE(&IOInterruptEventSource::reserved->statistics->interruptStatistics[kInterruptAccountingFirstLevelCountIndex], 1); + } + + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingFirstLevelTimeIndex)) { + endTime = mach_absolute_time(); + IA_ADD_VALUE(&IOInterruptEventSource::reserved->statistics->interruptStatistics[kInterruptAccountingFirstLevelTimeIndex], endTime - startTime); + } + } if (trace) IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER), @@ -174,15 +194,34 @@ void IOFilterInterruptEventSource::disableInterruptOccurred (void */*refcon*/, IOService *prov, int source) { bool filterRes; + uint64_t startTime = 0; + uint64_t endTime = 0; bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false; if (trace) IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER), VM_KERNEL_UNSLIDE(filterAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); + + if (IOInterruptEventSource::reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingFirstLevelTimeIndex)) { + startTime = mach_absolute_time(); + } + } // Call the filter. filterRes = (*filterAction)(owner, this); - + + if (IOInterruptEventSource::reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingFirstLevelCountIndex)) { + IA_ADD_VALUE(&IOInterruptEventSource::reserved->statistics->interruptStatistics[kInterruptAccountingFirstLevelCountIndex], 1); + } + + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingFirstLevelTimeIndex)) { + endTime = mach_absolute_time(); + IA_ADD_VALUE(&IOInterruptEventSource::reserved->statistics->interruptStatistics[kInterruptAccountingFirstLevelTimeIndex], endTime - startTime); + } + } + if (trace) IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER), VM_KERNEL_UNSLIDE(filterAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index 851adac60..f85f2ab0e 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -160,6 +160,7 @@ to restrict I/O ops. #include // (FWRITE, ...) #include #include +#include #include #include @@ -200,6 +201,7 @@ static const OSSymbol * gIOHibernateBootNextKey; static OSData * gIOHibernateBoot0082Data; static OSData * gIOHibernateBootNextData; static OSObject * gIOHibernateBootNextSave; +static struct kern_direct_file_io_ref_t * gDebugImageFileRef; #endif static IOLock * gFSLock; @@ -608,6 +610,105 @@ IOCopyMediaForDev(dev_t device) return (result); } +/* + * Writes header to disk with signature, block size and file extents data. + * If there are more than 2 extents, then they are written on second block. + */ +static IOReturn +WriteExtentsToFile(struct kern_direct_file_io_ref_t * fileRef, + uint32_t signature, uint32_t blockSize, + IOPolledFileExtent *fileExtents, + IOByteCount size) +{ + IOHibernateImageHeader hdr; + IOItemCount count; + IOReturn err = kIOReturnSuccess; + int rc; + + memset(&hdr, 0, sizeof(IOHibernateImageHeader)); + count = size; + if (count > sizeof(hdr.fileExtentMap)) + { + hdr.fileExtentMapSize = count; + count = sizeof(hdr.fileExtentMap); + } + else + hdr.fileExtentMapSize = sizeof(hdr.fileExtentMap); + + bcopy(fileExtents, &hdr.fileExtentMap[0], count); + + // copy file block extent list if larger than header + if (hdr.fileExtentMapSize > sizeof(hdr.fileExtentMap)) + { + count = hdr.fileExtentMapSize - sizeof(hdr.fileExtentMap); + rc = kern_write_file(fileRef, blockSize, + (caddr_t)(((uint8_t *)fileExtents) + sizeof(hdr.fileExtentMap)), + count, IO_SKIP_ENCRYPTION); + if (rc != 0) { + HIBLOG("kern_write_file returned %d\n", rc); + err = kIOReturnIOError; + goto exit; + } + } + hdr.signature = signature; + hdr.deviceBlockSize = blockSize; + + rc = kern_write_file(fileRef, 0, (char *)&hdr, sizeof(hdr), IO_SKIP_ENCRYPTION); + if (rc != 0) { + HIBLOG("kern_write_file returned %d\n", rc); + err = kIOReturnIOError; + goto exit; + } + +exit: + return err; +} + +static IOReturn +GetImageBlockSize(IOService *part, OSArray *pollers, IOByteCount *blockSize) +{ + IOService * service; + IORegistryEntry * next; + IORegistryEntry * child; + + IOReturn err = kIOReturnSuccess; + + + next = part; + do + { + IOPolledInterface * poller; + OSObject * obj; + OSNumber * num; + + obj = next->getProperty(kIOPolledInterfaceSupportKey); + if (kOSBooleanFalse == obj) + { + pollers->flushCollection(); + break; + } + else if ((poller = OSDynamicCast(IOPolledInterface, obj))) + pollers->setObject(poller); + + if ((service = OSDynamicCast(IOService, next)) + && service->getDeviceMemory() + && !pollers->getCount()) break; + + if ((num = OSDynamicCast(OSNumber, next->getProperty(kIOMediaPreferredBlockSizeKey)))) + *blockSize = num->unsigned32BitValue(); + child = next; + } + while ((next = child->getParentEntry(gIOServicePlane)) + && child->isParent(next, gIOServicePlane, true)); + + if (*blockSize < 4096) *blockSize = 4096; + + if (!pollers->getCount()) + err = kIOReturnUnsupported; + + return err; +} + IOReturn IOPolledFileOpen( const char * filename, uint64_t setFileSize, IOBufferMemoryDescriptor * ioBuffer, @@ -618,7 +719,6 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, IOPolledFileIOVars * vars; _OpenFileContext ctx; OSData * extentsData; - OSNumber * num; IOService * part = 0; OSString * keyUUID = 0; OSString * keyStoreUUID = 0; @@ -627,6 +727,8 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, uint64_t maxiobytes; AbsoluteTime startTime, endTime; uint64_t nsec; + caddr_t write_file_addr = NULL; + vm_size_t write_file_len = 0; vars = IONew(IOPolledFileIOVars, 1); if (!vars) return (kIOReturnNoMemory); @@ -644,12 +746,20 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, ctx.extents = extentsData; ctx.size = 0; clock_get_uptime(&startTime); + if (!gDebugImageFileRef) + { + // Avoid writing the header if it is written when file is prep'd for debug data + // Image is locked during prep for debug data. So, write may fail. + write_file_addr = (caddr_t)gIOHibernateCurrentHeader; + write_file_len = sizeof(IOHibernateImageHeader); + } vars->fileRef = kern_open_file_for_direct_io(filename, + true, &file_extent_callback, &ctx, setFileSize, // write file: - 0, (caddr_t) gIOHibernateCurrentHeader, - sizeof(IOHibernateImageHeader), + 0, write_file_addr, + write_file_len, // results &block_dev, &hibernate_image_dev, @@ -736,56 +846,24 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, break; } - IORegistryEntry * next; - IORegistryEntry * child; - IOService * service; - OSData * data; - vars->pollers = OSArray::withCapacity(4); - if (!vars->pollers) + if (!vars->pollers) { err = kIOReturnNoMemory; break; - } - - vars->blockSize = 512; - next = part; - do - { - IOPolledInterface * poller; - OSObject * obj; - - obj = next->getProperty(kIOPolledInterfaceSupportKey); - if (kOSBooleanFalse == obj) - { - vars->pollers->flushCollection(); - break; - } - else if ((poller = OSDynamicCast(IOPolledInterface, obj))) - vars->pollers->setObject(poller); + } - if ((service = OSDynamicCast(IOService, next)) - && service->getDeviceMemory() - && !vars->pollers->getCount()) break; + err = GetImageBlockSize(part, vars->pollers, &vars->blockSize); - if ((num = OSDynamicCast(OSNumber, next->getProperty(kIOMediaPreferredBlockSizeKey)))) - vars->blockSize = num->unsigned32BitValue(); - child = next; - } - while ((next = child->getParentEntry(gIOServicePlane)) - && child->isParent(next, gIOServicePlane, true)); + HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n", + major(hibernate_image_dev), minor(hibernate_image_dev), (long)vars->blockSize, + vars->pollers->getCount()); - if (vars->blockSize < 4096) vars->blockSize = 4096; - - HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n", - major(hibernate_image_dev), minor(hibernate_image_dev), (long)vars->blockSize, - vars->pollers->getCount()); + if (err != kIOReturnSuccess) + break; - if (!vars->pollers->getCount()) - { - err = kIOReturnUnsupported; - continue; - } + IORegistryEntry * next; + OSData * data; if (vars->blockSize < sizeof(IOHibernateImageHeader)) { err = kIOReturnError; @@ -863,6 +941,12 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, vars->fileRef = NULL; } } + else + { + WriteExtentsToFile(vars->fileRef, kIOHibernateHeaderOpenSignature, vars->blockSize, + (IOPolledFileExtent *)extentsData->getBytesNoCopy(), + extentsData->getLength()); + } if (part) part->release(); @@ -1149,6 +1233,148 @@ if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#if HIBERNATION +IOReturn +IOHibernateOpenForDebugData( ) +{ + dev_t image_dev; + OSData *extentsData = NULL; + OSObject *obj; + OSString *str; + IOByteCount blockSize = 0; + IOByteCount size; + IOService * part = 0; + OSData * data = NULL; + + IOPolledFileExtent * fileExtents; + IOReturn err = kIOReturnSuccess; + IORegistryEntry * regEntry; + OSArray * pollers = NULL; + + _OpenFileContext ctx; + + if (gDebugImageFileRef != NULL) + return kIOReturnError; + + if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFileKey))) + { + if ((str = OSDynamicCast(OSString, obj))) + strlcpy(gIOHibernateFilename, str->getCStringNoCopy(), + sizeof(gIOHibernateFilename)); + obj->release(); + } + + if (!gIOHibernateFilename[0]) { + HIBLOG("Failed to get hibernate image filename\n"); + return (kIOReturnUnsupported); + } + + extentsData = OSData::withCapacity(32); + ctx.extents = extentsData; + ctx.size = 0; + + bzero(gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader)); + gIOHibernateCurrentHeader->debugFlags = gIOHibernateDebugFlags; + gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature; + + gDebugImageFileRef = kern_open_file_for_direct_io(gIOHibernateFilename, + false, + &file_extent_callback, &ctx, + 0, 0, + (caddr_t)gIOHibernateCurrentHeader, + sizeof(IOHibernateImageHeader), + NULL, &image_dev, NULL, NULL, NULL); + + if (gDebugImageFileRef == NULL) + { + HIBLOG("Failed to open the file \n"); + err = kIOReturnError; + goto exit; + } + fileExtents = (IOPolledFileExtent *)extentsData->getBytesNoCopy(); + size = extentsData->getLength(); + + part = IOCopyMediaForDev(image_dev); + if (!part) + { + HIBLOG("Failed to get the media device\n"); + err = kIOReturnNotFound; + goto exit; + } + + + pollers = OSArray::withCapacity(4); + if (!pollers) + { + err = kIOReturnNoMemory; + goto exit; + } + + err = GetImageBlockSize(part, pollers, &blockSize); + if (err != kIOReturnSuccess) + { + HIBLOG("Failed to get block size\n"); + goto exit; + } + if (blockSize < sizeof(IOHibernateImageHeader)) + { + HIBLOG("block size %llu is less than the size of the header\n", blockSize); + err = kIOReturnError; + goto exit; + } + + WriteExtentsToFile(gDebugImageFileRef, kIOHibernateHeaderOpenSignature, + blockSize, fileExtents, size); + + char str2[24 + sizeof(uuid_string_t) + 2]; + + if (!gIOCreateEFIDevicePathSymbol) + gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath"); + + snprintf(str2, sizeof(str2), "%qx", fileExtents[0].start); + + err = IOService::getPlatform()->callPlatformFunction( + gIOCreateEFIDevicePathSymbol, false, + (void *) part, (void *) str2, + (void *) (uintptr_t) true, (void *) &data); + + if (!gIOOptionsEntry) + { + regEntry = IORegistryEntry::fromPath("/options", gIODTPlane); + gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry); + if (regEntry && !gIOOptionsEntry) + regEntry->release(); + } + if (gIOOptionsEntry) + { + const OSSymbol * sym; + + sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey); + if (sym) + { + gIOOptionsEntry->setProperty(sym, data); + sym->release(); + } + } + + +exit: + + if ( (err != kIOReturnSuccess) && gDebugImageFileRef) { + kern_close_file_for_direct_io(gDebugImageFileRef, 0, 0, 0, 0, 0); + gDebugImageFileRef = NULL; + } + if (extentsData) extentsData->release(); + if (part) part->release(); + if (pollers) pollers->release(); + if (data) data->release(); + + return err; +} +#endif + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + IOReturn IOHibernateSystemSleep(void) { @@ -1263,13 +1489,13 @@ IOHibernateSystemSleep(void) IOService::getPlatform()->getConsoleInfo(&consoleInfo); // estimate: 6% increase in pages compressed - // screen preview 2 images compressed 50% + // screen preview 2 images compressed 0% setFileSize = ((ptoa_64((106 * pageCount) / 100) * gIOHibernateCompression) >> 8) + vars->page_list->list_size + (consoleInfo.v_width * consoleInfo.v_height * 8); enum { setFileRound = 1024*1024ULL }; setFileSize = ((setFileSize + setFileRound) & ~(setFileRound - 1)); - + HIBLOG("hibernate_page_list_setall preflight pageCount %d est comp %qd setfile %qd min %qd\n", pageCount, (100ULL * gIOHibernateCompression) >> 8, setFileSize, vars->fileMinSize); @@ -1283,6 +1509,11 @@ IOHibernateSystemSleep(void) // open & invalidate the image file + if (gDebugImageFileRef) { + kern_close_file_for_direct_io(gDebugImageFileRef, 0, 0, 0, 0, 0); + gDebugImageFileRef = NULL; + } + err = IOPolledFileOpen(gIOHibernateFilename, setFileSize, vars->ioBuffer, &vars->fileVars, &vars->fileExtents, &data, &vars->volumeCryptKey[0]); @@ -1991,6 +2222,33 @@ IOHibernateSystemPostWake(void) } gFSState = kFSIdle; } + + if (gDebugImageFileRef) { + kern_close_file_for_direct_io(gDebugImageFileRef, 0, 0, 0, 0, 0); + gDebugImageFileRef = NULL; + } + + if (!gIOOptionsEntry) + { + IORegistryEntry * regEntry; + regEntry = IORegistryEntry::fromPath("/options", gIODTPlane); + gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry); + if (regEntry && !gIOOptionsEntry) + regEntry->release(); + } + if (gIOOptionsEntry) + { + const OSSymbol * sym; + + sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey); + if (sym) + { + gIOOptionsEntry->removeProperty(sym); + gIOOptionsEntry->sync(); + sym->release(); + } + } + return (kIOReturnSuccess); } @@ -3128,7 +3386,6 @@ void IOHibernateSetWakeCapabilities(uint32_t capability) { vm_compressor_do_warmup(); } - } } diff --git a/iokit/Kernel/IOHistogramReporter.cpp b/iokit/Kernel/IOHistogramReporter.cpp new file mode 100644 index 000000000..f51a1c936 --- /dev/null +++ b/iokit/Kernel/IOHistogramReporter.cpp @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2012-2013 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#define __STDC_LIMIT_MACROS // what are the C++ equivalents? +#include + +#include +#include +#include "IOReporterDefs.h" + + +#define super IOReporter +OSDefineMetaClassAndStructors(IOHistogramReporter, IOReporter); + +/* static */ +IOHistogramReporter* +IOHistogramReporter::with(IOService *reportingService, + IOReportCategories categories, + uint64_t channelID, + const char *channelName, + IOReportUnits unit, + int nSegments, + IOHistogramSegmentConfig *config) +{ + IOHistogramReporter *reporter = new IOHistogramReporter; + + const OSSymbol *tmpChannelName = NULL; + + if (reporter) { + + if (channelName) + tmpChannelName = OSSymbol::withCString(channelName); + + if(reporter->initWith(reportingService, categories, + channelID, tmpChannelName, + unit, nSegments, config)) { + return reporter; + } + } + + return 0; +} + + +bool +IOHistogramReporter::initWith(IOService *reportingService, + IOReportCategories categories, + uint64_t channelID, + const OSSymbol *channelName, + IOReportUnits unit, + int nSegments, + IOHistogramSegmentConfig *config) +{ + bool result = false; + IOReturn res; // for PREFL_MEMOP + size_t configSize, elementsSize, eCountsSize, boundsSize; + int cnt, cnt2, cnt3 = 0; + int64_t bucketBound = 0, previousBucketBound = 0; + + // analyzer appeasement + configSize = elementsSize = eCountsSize = boundsSize = 0; + + IORLOG("IOHistogramReporter::initWith"); + + // For now, this reporter is currently limited to a single channel + _nChannels = 1; + + IOReportChannelType channelType = { + .categories = categories, + .report_format = kIOReportFormatHistogram, + .nelements = 0, // Initialized when Config is unpacked + .element_idx = 0 + }; + + if (super::init(reportingService, channelType, unit) != true) { + IORLOG("%s - ERROR: super::init failed", __func__); + result = false; + goto finish; + } + + // Make sure to call this after the commit init phase + if (channelName) _channelNames->setObject(channelName); + + _segmentCount = nSegments; + if (_segmentCount == 0) { + IORLOG("IOReportHistogram init ERROR. No configuration provided!"); + result = false; + goto finish; + } + + IORLOG("%s - %u segment(s)", __func__, _segmentCount); + + PREFL_MEMOP_FAIL(_segmentCount, IOHistogramSegmentConfig); + configSize = (size_t)_segmentCount * sizeof(IOHistogramSegmentConfig); + _histogramSegmentsConfig = (IOHistogramSegmentConfig*)IOMalloc(configSize); + if (!_histogramSegmentsConfig) goto finish; + memcpy(_histogramSegmentsConfig, config, configSize); + + // Find out how many elements are need to store the histogram + for (cnt = 0; cnt < _segmentCount; cnt++) { + + _nElements += _histogramSegmentsConfig[cnt].segment_bucket_count; + _channelDimension += _histogramSegmentsConfig[cnt].segment_bucket_count; + + IORLOG("\t\t bucket_base_width: %u | log_scale: %u | buckets: %u", + _histogramSegmentsConfig[cnt].base_bucket_width, + _histogramSegmentsConfig[cnt].scale_flag, + _histogramSegmentsConfig[cnt].segment_bucket_count); + + if (_histogramSegmentsConfig[cnt].scale_flag > 1 + || _histogramSegmentsConfig[cnt].base_bucket_width == 0) { + result = false; + goto finish; + } + + } + + // Update the channel type with discovered dimension + _channelType.nelements = _channelDimension; + + IORLOG("%s - %u channel(s) of dimension %u", + __func__, _nChannels, _channelDimension); + + IORLOG("%s %d segments for a total dimension of %d elements", + __func__, _nChannels, _nElements); + + // Allocate memory for the array of report elements + PREFL_MEMOP_FAIL(_nElements, IOReportElement); + elementsSize = (size_t)_nElements * sizeof(IOReportElement); + _elements = (IOReportElement *)IOMalloc(elementsSize); + if (!_elements) goto finish; + memset(_elements, 0, elementsSize); + + // Allocate memory for the array of element watch count + PREFL_MEMOP_FAIL(_nElements, int); + eCountsSize = (size_t)_nChannels * sizeof(int); + _enableCounts = (int *)IOMalloc(eCountsSize); + if (!_enableCounts) goto finish; + memset(_enableCounts, 0, eCountsSize); + + lockReporter(); + for (cnt2 = 0; cnt2 < _channelDimension; cnt2++) { + IOHistogramReportValues hist_values; + if (copyElementValues(cnt2, (IOReportElementValues*)&hist_values)){ + goto finish; + } + hist_values.bucket_min = kIOReportInvalidIntValue; + hist_values.bucket_max = kIOReportInvalidIntValue; + hist_values.bucket_sum = kIOReportInvalidIntValue; + if (setElementValues(cnt2, (IOReportElementValues*)&hist_values)){ + goto finish; + } + + // Setup IOReporter's channel IDs + _elements[cnt2].channel_id = channelID; + + // Setup IOReporter's reporting provider service + _elements[cnt2].provider_id = _driver_id; + + // Setup IOReporter's channel type + _elements[cnt2].channel_type = _channelType; + _elements[cnt2].channel_type.element_idx = cnt2; + + //IOREPORTER_DEBUG_ELEMENT(cnt2); + } + unlockReporter(); + + // Allocate memory for the bucket upper bounds + PREFL_MEMOP_FAIL(_nElements, uint64_t); + boundsSize = (size_t)_nElements * sizeof(uint64_t); + _bucketBounds = (int64_t*)IOMalloc(boundsSize); + if (!_bucketBounds) goto finish; + memset(_bucketBounds, 0, boundsSize); + _bucketCount = _nElements; + + for (cnt = 0; cnt < _segmentCount; cnt++) { + + if (_histogramSegmentsConfig[cnt].segment_bucket_count > INT_MAX + || _histogramSegmentsConfig[cnt].base_bucket_width > INT_MAX) { + goto finish; + } + for (cnt2 = 0; cnt2 < (int)_histogramSegmentsConfig[cnt].segment_bucket_count; cnt2++) { + + if (cnt3 >= _nElements) { + IORLOG("ERROR: _bucketBounds init"); + return false; + } + + if (_histogramSegmentsConfig[cnt].scale_flag) { + // FIXME: Could use pow() but not sure how to include math.h + int64_t power = 1; + int exponent = cnt2 + 1; + while (exponent) { + power *= _histogramSegmentsConfig[cnt].base_bucket_width; + exponent--; + } + bucketBound = power; + } + + else { + bucketBound = _histogramSegmentsConfig[cnt].base_bucket_width * + ((unsigned)cnt2 + 1); + } + + if (previousBucketBound >= bucketBound) { + IORLOG("Histogram ERROR: bucket bound does not increase linearly (segment %u / bucket # %u)", + cnt, cnt2); + result = false; + goto finish; + } + + _bucketBounds[cnt3] = bucketBound; + // IORLOG("_bucketBounds[%u] = %llu", cnt3, bucketBound); + previousBucketBound = _bucketBounds[cnt3]; + cnt3++; + } + } + + // success + result = true; + +finish: + if (result != true) { + + if (_histogramSegmentsConfig) + IOFree(_histogramSegmentsConfig, configSize); + + if (_elements) + IOFree(_elements, elementsSize); + + if (_enableCounts) + IOFree(_enableCounts, eCountsSize); + + if (_bucketBounds) + IOFree(_bucketBounds, boundsSize); + } + + return result; +} + + +void +IOHistogramReporter::free(void) +{ + if (_bucketBounds) { + PREFL_MEMOP_PANIC(_nElements, int64_t); + IOFree(_bucketBounds, (size_t)_nElements * sizeof(int64_t)); + } + if (_histogramSegmentsConfig) { + PREFL_MEMOP_PANIC(_segmentCount, IOHistogramSegmentConfig); + IOFree(_histogramSegmentsConfig, + (size_t)_segmentCount * sizeof(IOHistogramSegmentConfig)); + } + + super::free(); +} + + +IOReportLegendEntry* +IOHistogramReporter::handleCreateLegend(void) +{ + OSData *tmpConfigData; + OSDictionary *tmpDict; + IOReportLegendEntry *legendEntry = NULL; + + legendEntry = super::handleCreateLegend(); + + if (legendEntry) { + + PREFL_MEMOP_PANIC(_segmentCount, IOHistogramSegmentConfig); + tmpConfigData = OSData::withBytes(_histogramSegmentsConfig, + (unsigned)_segmentCount * + (unsigned)sizeof(IOHistogramSegmentConfig)); + if (!tmpConfigData) { + legendEntry->release(); + goto finish; + } + + tmpDict = OSDynamicCast(OSDictionary, legendEntry->getObject(kIOReportLegendInfoKey)); + if (!tmpDict) { + legendEntry->release(); + goto finish; + } + + tmpDict->setObject(kIOReportLegendConfigKey, tmpConfigData); + } + +finish: + return legendEntry; +} + +int +IOHistogramReporter::tallyValue(int64_t value) +{ + int result = -1; + int cnt = 0, element_index = 0; + IOHistogramReportValues hist_values; + + lockReporter(); + + // Iterate over _bucketCount minus one to make last bucket of infinite width + for (cnt = 0; cnt < _bucketCount - 1; cnt++) { + if (value <= _bucketBounds[cnt]) break; + } + + element_index = cnt; + + if (copyElementValues(element_index, (IOReportElementValues *)&hist_values) != kIOReturnSuccess) { + goto finish; + } + + // init stats on first hit + if (hist_values.bucket_hits == 0) { + hist_values.bucket_min = hist_values.bucket_max = value; + hist_values.bucket_sum = 0; // += is below + } + + // update all values + if (value < hist_values.bucket_min) { + hist_values.bucket_min = value; + } else if (value > hist_values.bucket_max) { + hist_values.bucket_max = value; + } + hist_values.bucket_sum += value; + hist_values.bucket_hits++; + + if (setElementValues(element_index, (IOReportElementValues *)&hist_values) == kIOReturnSuccess) { + goto finish; + } + + // success! + result = element_index; + +finish: + unlockReporter(); + return result; +} diff --git a/iokit/Kernel/IOInterruptAccounting.cpp b/iokit/Kernel/IOInterruptAccounting.cpp new file mode 100644 index 000000000..8130cce23 --- /dev/null +++ b/iokit/Kernel/IOInterruptAccounting.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include + +uint32_t gInterruptAccountingStatisticBitmask = + /* Disable timestamps for older ARM platforms; they are expensive. */ + IA_GET_ENABLE_BIT(kInterruptAccountingFirstLevelTimeIndex) | + IA_GET_ENABLE_BIT(kInterruptAccountingSecondLevelCPUTimeIndex) | + IA_GET_ENABLE_BIT(kInterruptAccountingSecondLevelSystemTimeIndex) | + IA_GET_ENABLE_BIT(kInterruptAccountingFirstLevelCountIndex) | + IA_GET_ENABLE_BIT(kInterruptAccountingSecondLevelCountIndex); + +IOLock * gInterruptAccountingDataListLock = NULL; +queue_head_t gInterruptAccountingDataList; + +void interruptAccountingInit(void) +{ + int bootArgValue = 0; + + if (PE_parse_boot_argn("interrupt_accounting", &bootArgValue, sizeof(bootArgValue))) + gInterruptAccountingStatisticBitmask = bootArgValue; + + gInterruptAccountingDataListLock = IOLockAlloc(); + + assert(gInterruptAccountingDataListLock); + + queue_init(&gInterruptAccountingDataList); +} + +void interruptAccountingDataAddToList(IOInterruptAccountingData * data) +{ + IOLockLock(gInterruptAccountingDataListLock); + queue_enter(&gInterruptAccountingDataList, data, IOInterruptAccountingData *, chain); + IOLockUnlock(gInterruptAccountingDataListLock); +} + +void interruptAccountingDataRemoveFromList(IOInterruptAccountingData * data) +{ + IOLockLock(gInterruptAccountingDataListLock); + queue_remove(&gInterruptAccountingDataList, data, IOInterruptAccountingData *, chain); + IOLockUnlock(gInterruptAccountingDataListLock); +} + +void interruptAccountingDataUpdateChannels(IOInterruptAccountingData * data, IOSimpleReporter * reporter) +{ + uint64_t i = 0; + + for (i = 0; i < IA_NUM_INTERRUPT_ACCOUNTING_STATISTICS; i++) { + if (IA_GET_STATISTIC_ENABLED(i)) + reporter->setValue(IA_GET_CHANNEL_ID(data->interruptIndex, i), data->interruptStatistics[i]); + } +} + +void interruptAccountingDataInheritChannels(IOInterruptAccountingData * data, IOSimpleReporter * reporter) +{ + uint64_t i = 0; + + for (i = 0; i < IA_NUM_INTERRUPT_ACCOUNTING_STATISTICS; i++) { + if (IA_GET_STATISTIC_ENABLED(i)) + data->interruptStatistics[i] = reporter->getValue(IA_GET_CHANNEL_ID(data->interruptIndex, i)); + } +} + diff --git a/iokit/Kernel/IOInterruptEventSource.cpp b/iokit/Kernel/IOInterruptEventSource.cpp index 6782ec9fe..1636405f6 100644 --- a/iokit/Kernel/IOInterruptEventSource.cpp +++ b/iokit/Kernel/IOInterruptEventSource.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2010 Apple Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -33,6 +33,7 @@ #include #include #include +#include #if IOKITSTATS @@ -81,6 +82,14 @@ bool IOInterruptEventSource::init(OSObject *inOwner, if ( !super::init(inOwner, (IOEventSourceAction) inAction) ) return false; + reserved = IONew(ExpansionData, 1); + + if (!reserved) { + return false; + } + + bzero(reserved, sizeof(ExpansionData)); + provider = inProvider; producerCount = consumerCount = 0; autoDisable = explicitDisable = false; @@ -88,9 +97,36 @@ bool IOInterruptEventSource::init(OSObject *inOwner, // Assumes inOwner holds a reference(retain) on the provider if (inProvider) { + if (IA_ANY_STATISTICS_ENABLED) { + /* + * We only treat this as an "interrupt" if it has a provider; if it does, + * set up the objects necessary to track interrupt statistics. Interrupt + * event sources without providers are most likely being used as simple + * event source in order to poke at workloops and kick off work. + * + * We also avoid try to avoid interrupt accounting overhead if none of + * the statistics are enabled. + */ + reserved->statistics = IONew(IOInterruptAccountingData, 1); + + if (!reserved->statistics) { + /* + * We rely on the free() routine to clean up after us if init fails + * midway. + */ + return false; + } + + bzero(reserved->statistics, sizeof(IOInterruptAccountingData)); + + reserved->statistics->owner = this; + } + res = (kIOReturnSuccess == registerInterruptHandler(inProvider, inIntIndex)); - if (res) + + if (res) { intIndex = inIntIndex; + } } IOStatisticsInitializeCounter(); @@ -120,9 +156,54 @@ IOReturn IOInterruptEventSource::registerInterruptHandler(IOService *inProvider, ret = provider->registerInterrupt(inIntIndex, this, intHandler); + /* + * Add statistics to the provider. The setWorkLoop convention should ensure + * that we always go down the unregister path before we register (outside of + * init()), so we don't have to worry that we will invoke addInterruptStatistics + * erroneously. + */ + if ((ret == kIOReturnSuccess) && (reserved->statistics)) { + /* + * Stash the normal index value, for the sake of debugging. + */ + reserved->statistics->interruptIndex = inIntIndex; + + /* + * We need to hook the interrupt information up to the provider so that it + * can find the statistics for this interrupt when desired. The provider is + * responsible for maintaining the reporter for a particular interrupt, and + * needs a handle on the statistics so that it can request that the reporter + * be updated as needed. Errors are considered "soft" for the moment (it + * will either panic, or fail in a way such that we can still service the + * interrupt). + */ + provider->addInterruptStatistics(reserved->statistics, inIntIndex); + + /* + * Add the statistics object to the global list of statistics objects; this + * is an aid to debugging (we can trivially find statistics for all eligible + * interrupts, and dump them; potentially helpful if the system is wedged + * due to interrupt activity). + */ + interruptAccountingDataAddToList(reserved->statistics); + } + return (ret); } +void +IOInterruptEventSource::unregisterInterruptHandler(IOService *inProvider, + int inIntIndex) +{ + if (reserved->statistics) { + interruptAccountingDataRemoveFromList(reserved->statistics); + provider->removeInterruptStatistics(reserved->statistics->interruptIndex); + } + + provider->unregisterInterrupt(inIntIndex); +} + + IOInterruptEventSource * IOInterruptEventSource::interruptEventSource(OSObject *inOwner, Action inAction, @@ -142,7 +223,15 @@ IOInterruptEventSource::interruptEventSource(OSObject *inOwner, void IOInterruptEventSource::free() { if (provider && intIndex >= 0) - provider->unregisterInterrupt(intIndex); + unregisterInterruptHandler(provider, intIndex); + + if (reserved) { + if (reserved->statistics) { + IODelete(reserved->statistics, IOInterruptAccountingData, 1); + } + + IODelete(reserved, ExpansionData, 1); + } super::free(); } @@ -172,7 +261,11 @@ void IOInterruptEventSource::setWorkLoop(IOWorkLoop *inWorkLoop) if (provider) { if (!inWorkLoop) { if (intIndex >= 0) { - provider->unregisterInterrupt(intIndex); + /* + * It isn't necessarily safe to wait until free() to unregister the interrupt; + * our provider may disappear. + */ + unregisterInterruptHandler(provider, intIndex); intIndex = ~intIndex; } } else if ((intIndex < 0) && (kIOReturnSuccess == registerInterruptHandler(provider, ~intIndex))) { @@ -200,6 +293,10 @@ bool IOInterruptEventSource::getAutoDisable() const bool IOInterruptEventSource::checkForWork() { + uint64_t startSystemTime = 0; + uint64_t endSystemTime = 0; + uint64_t startCPUTime = 0; + uint64_t endCPUTime = 0; unsigned int cacheProdCount = producerCount; int numInts = cacheProdCount - consumerCount; IOInterruptEventAction intAction = (IOInterruptEventAction) action; @@ -212,9 +309,35 @@ bool IOInterruptEventSource::checkForWork() if (trace) IOTimeStampStartConstant(IODBG_INTES(IOINTES_ACTION), VM_KERNEL_UNSLIDE(intAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); - + + if (reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelSystemTimeIndex)) { + startSystemTime = mach_absolute_time(); + } + + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelCPUTimeIndex)) { + startCPUTime = thread_get_runtime_self(); + } + } + // Call the handler (*intAction)(owner, this, numInts); + + if (reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelCountIndex)) { + IA_ADD_VALUE(&reserved->statistics->interruptStatistics[kInterruptAccountingSecondLevelCountIndex], 1); + } + + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelCPUTimeIndex)) { + endCPUTime = thread_get_runtime_self(); + IA_ADD_VALUE(&reserved->statistics->interruptStatistics[kInterruptAccountingSecondLevelCPUTimeIndex], endCPUTime - startCPUTime); + } + + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelSystemTimeIndex)) { + endSystemTime = mach_absolute_time(); + IA_ADD_VALUE(&reserved->statistics->interruptStatistics[kInterruptAccountingSecondLevelSystemTimeIndex], endSystemTime - startSystemTime); + } + } if (trace) IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION), @@ -230,9 +353,35 @@ bool IOInterruptEventSource::checkForWork() if (trace) IOTimeStampStartConstant(IODBG_INTES(IOINTES_ACTION), VM_KERNEL_UNSLIDE(intAction), (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); + + if (reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelSystemTimeIndex)) { + startSystemTime = mach_absolute_time(); + } + + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelCPUTimeIndex)) { + startCPUTime = thread_get_runtime_self(); + } + } // Call the handler (*intAction)(owner, this, -numInts); + + if (reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelCountIndex)) { + IA_ADD_VALUE(&reserved->statistics->interruptStatistics[kInterruptAccountingSecondLevelCountIndex], 1); + } + + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelCPUTimeIndex)) { + endCPUTime = thread_get_runtime_self(); + IA_ADD_VALUE(&reserved->statistics->interruptStatistics[kInterruptAccountingSecondLevelCPUTimeIndex], endCPUTime - startCPUTime); + } + + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingSecondLevelSystemTimeIndex)) { + endSystemTime = mach_absolute_time(); + IA_ADD_VALUE(&reserved->statistics->interruptStatistics[kInterruptAccountingSecondLevelSystemTimeIndex], endSystemTime - startSystemTime); + } + } if (trace) IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION), @@ -256,6 +405,12 @@ void IOInterruptEventSource::normalInterruptOccurred if (trace) IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner); + + if (reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingFirstLevelCountIndex)) { + IA_ADD_VALUE(&reserved->statistics->interruptStatistics[kInterruptAccountingFirstLevelCountIndex], 1); + } + } signalWorkAvailable(); @@ -275,6 +430,12 @@ void IOInterruptEventSource::disableInterruptOccurred if (trace) IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner); + + if (reserved->statistics) { + if (IA_GET_STATISTIC_ENABLED(kInterruptAccountingFirstLevelCountIndex)) { + IA_ADD_VALUE(&reserved->statistics->interruptStatistics[kInterruptAccountingFirstLevelCountIndex], 1); + } + } signalWorkAvailable(); diff --git a/iokit/Kernel/IOKitDebug.cpp b/iokit/Kernel/IOKitDebug.cpp index c170d83a6..2560a0687 100644 --- a/iokit/Kernel/IOKitDebug.cpp +++ b/iokit/Kernel/IOKitDebug.cpp @@ -46,7 +46,13 @@ SInt64 gIOKitDebug = DEBUG_INIT_VALUE; SInt64 gIOKitTrace = 0; -SYSCTL_QUAD(_debug, OID_AUTO, iokit, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitDebug, "boot_arg io"); +#if DEVELOPMENT || DEBUG +#define IODEBUG_CTLFLAGS CTLFLAG_RW +#else +#define IODEBUG_CTLFLAGS CTLFLAG_RD +#endif + +SYSCTL_QUAD(_debug, OID_AUTO, iokit, IODEBUG_CTLFLAGS | CTLFLAG_LOCKED, &gIOKitDebug, "boot_arg io"); SYSCTL_QUAD(_debug, OID_AUTO, iotrace, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitTrace, "trace io"); diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h index d15549f89..a22bd8c60 100644 --- a/iokit/Kernel/IOKitKernelInternal.h +++ b/iokit/Kernel/IOKitKernelInternal.h @@ -218,13 +218,13 @@ typedef uintptr_t (*iopa_proc_t)(iopa_t * a); enum { - kIOPageAllocChunkBytes = (PAGE_SIZE / 64), kIOPageAllocSignature = 'iopa' }; extern "C" void iopa_init(iopa_t * a); extern "C" uintptr_t iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign); extern "C" uintptr_t iopa_free(iopa_t * a, uintptr_t addr, vm_size_t bytes); +extern "C" uint32_t gIOPageAllocChunkBytes; extern "C" iopa_t gIOBMDPageAllocator; @@ -233,6 +233,8 @@ extern "C" struct timeval gIOLastWakeTime; extern clock_sec_t gIOConsoleLockTime; +extern OSSet * gIORemoveOnReadProperties; + extern "C" void IOKitResetTime( void ); extern "C" void IOKitInitializeTime( void ); diff --git a/iokit/Kernel/IOLib.cpp b/iokit/Kernel/IOLib.cpp index 1c39602d7..3714d1d4f 100644 --- a/iokit/Kernel/IOLib.cpp +++ b/iokit/Kernel/IOLib.cpp @@ -144,6 +144,8 @@ static struct { static iopa_t gIOPageablePageAllocator; +uint32_t gIOPageAllocChunkBytes; + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ void IOLibInit(void) @@ -175,6 +177,8 @@ void IOLibInit(void) gIOMallocContiguousEntriesLock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); queue_init( &gIOMallocContiguousEntries ); + gIOPageAllocChunkBytes = PAGE_SIZE/64; + assert(sizeof(iopa_page_t) <= gIOPageAllocChunkBytes); iopa_init(&gIOBMDPageAllocator); iopa_init(&gIOPageablePageAllocator); @@ -366,6 +370,7 @@ IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size) #endif } + mach_vm_address_t IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxPhys, mach_vm_size_t alignment, bool contiguous) @@ -693,7 +698,7 @@ void * IOMallocPageable(vm_size_t size, vm_size_t alignment) { void * addr; - if (size >= (page_size - 4*kIOPageAllocChunkBytes)) addr = IOMallocPageablePages(size, alignment); + if (size >= (page_size - 4*gIOPageAllocChunkBytes)) addr = IOMallocPageablePages(size, alignment); else addr = ((void * ) iopa_alloc(&gIOPageablePageAllocator, &IOMallocOnePageablePage, size, alignment)); if (addr) { @@ -713,7 +718,7 @@ void IOFreePageable(void * address, vm_size_t size) #endif IOStatisticsAlloc(kIOStatisticsFreePageable, size); - if (size < (page_size - 4*kIOPageAllocChunkBytes)) + if (size < (page_size - 4*gIOPageAllocChunkBytes)) { address = (void *) iopa_free(&gIOPageablePageAllocator, (uintptr_t) address, size); size = page_size; @@ -723,14 +728,6 @@ void IOFreePageable(void * address, vm_size_t size) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#if 0 -#undef assert -#define assert(ex) \ - ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) -#endif - -typedef char iopa_page_t_assert[(sizeof(iopa_page_t) <= kIOPageAllocChunkBytes) ? 1 : -1]; - extern "C" void iopa_init(iopa_t * a) { @@ -765,7 +762,7 @@ iopa_allocinpage(iopa_page_t * pa, uint32_t count, uint64_t align) remque(&pa->link); pa->link.next = 0; } - return (n * kIOPageAllocChunkBytes + trunc_page((uintptr_t) pa)); + return (n * gIOPageAllocChunkBytes + trunc_page((uintptr_t) pa)); } return (0); @@ -797,8 +794,8 @@ iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign) uint64_t align; if (!bytes) bytes = 1; - count = (bytes + kIOPageAllocChunkBytes - 1) / kIOPageAllocChunkBytes; - align = align_masks[log2up((balign + kIOPageAllocChunkBytes - 1) / kIOPageAllocChunkBytes)]; + count = (bytes + gIOPageAllocChunkBytes - 1) / gIOPageAllocChunkBytes; + align = align_masks[log2up((balign + gIOPageAllocChunkBytes - 1) / gIOPageAllocChunkBytes)]; IOLockLock(a->lock); pa = (typeof(pa)) queue_first(&a->list); @@ -819,7 +816,7 @@ iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign) addr = alloc(a); if (addr) { - pa = (typeof(pa)) (addr + page_size - kIOPageAllocChunkBytes); + pa = (typeof(pa)) (addr + page_size - gIOPageAllocChunkBytes); pa->signature = kIOPageAllocSignature; pa->avail = -2ULL; @@ -846,13 +843,13 @@ iopa_free(iopa_t * a, uintptr_t addr, vm_size_t bytes) if (!bytes) bytes = 1; chunk = (addr & page_mask); - assert(0 == (chunk & (kIOPageAllocChunkBytes - 1))); + assert(0 == (chunk & (gIOPageAllocChunkBytes - 1))); - pa = (typeof(pa)) (addr | (page_size - kIOPageAllocChunkBytes)); + pa = (typeof(pa)) (addr | (page_size - gIOPageAllocChunkBytes)); assert(kIOPageAllocSignature == pa->signature); - count = (bytes + kIOPageAllocChunkBytes - 1) / kIOPageAllocChunkBytes; - chunk /= kIOPageAllocChunkBytes; + count = (bytes + gIOPageAllocChunkBytes - 1) / gIOPageAllocChunkBytes; + chunk /= gIOPageAllocChunkBytes; IOLockLock(a->lock); if (!pa->avail) diff --git a/iokit/Kernel/IOLocks.cpp b/iokit/Kernel/IOLocks.cpp index a61f6a326..2febff6c4 100644 --- a/iokit/Kernel/IOLocks.cpp +++ b/iokit/Kernel/IOLocks.cpp @@ -37,6 +37,11 @@ extern "C" { #include +#if defined(__x86_64__) +/* Synthetic event if none is specified, for backwards compatibility only. */ +static bool IOLockSleep_NO_EVENT __attribute__((used)) = 0; +#endif + void IOLockInitWithState( IOLock * lock, IOLockState state) { if( state == kIOLockStateLocked) @@ -60,13 +65,13 @@ lck_mtx_t * IOLockGetMachLock( IOLock * lock) int IOLockSleep( IOLock * lock, void *event, UInt32 interType) { - return (int) lck_mtx_sleep(lock, LCK_SLEEP_DEFAULT, (event_t) event, (wait_interrupt_t) interType); + return (int) lck_mtx_sleep(lock, LCK_SLEEP_PROMOTED_PRI, (event_t) event, (wait_interrupt_t) interType); } int IOLockSleepDeadline( IOLock * lock, void *event, AbsoluteTime deadline, UInt32 interType) { - return (int) lck_mtx_sleep_deadline(lock, LCK_SLEEP_DEFAULT, (event_t) event, + return (int) lck_mtx_sleep_deadline(lock, LCK_SLEEP_PROMOTED_PRI, (event_t) event, (wait_interrupt_t) interType, __OSAbsoluteTime(deadline)); } @@ -75,6 +80,42 @@ void IOLockWakeup(IOLock * lock, void *event, bool oneThread) thread_wakeup_prim((event_t) event, oneThread, THREAD_AWAKENED); } +#if defined(__x86_64__) +/* + * For backwards compatibility, kexts built against pre-Darwin 14 headers will bind at runtime to this function, + * which supports a NULL event, + */ +int IOLockSleep_legacy_x86_64( IOLock * lock, void *event, UInt32 interType) __asm("_IOLockSleep"); +int IOLockSleepDeadline_legacy_x86_64( IOLock * lock, void *event, + AbsoluteTime deadline, UInt32 interType) __asm("_IOLockSleepDeadline"); +void IOLockWakeup_legacy_x86_64(IOLock * lock, void *event, bool oneThread) __asm("_IOLockWakeup"); + +int IOLockSleep_legacy_x86_64( IOLock * lock, void *event, UInt32 interType) +{ + if (event == NULL) + event = (void *)&IOLockSleep_NO_EVENT; + + return IOLockSleep(lock, event, interType); +} + +int IOLockSleepDeadline_legacy_x86_64( IOLock * lock, void *event, + AbsoluteTime deadline, UInt32 interType) +{ + if (event == NULL) + event = (void *)&IOLockSleep_NO_EVENT; + + return IOLockSleepDeadline(lock, event, deadline, interType); +} + +void IOLockWakeup_legacy_x86_64(IOLock * lock, void *event, bool oneThread) +{ + if (event == NULL) + event = (void *)&IOLockSleep_NO_EVENT; + + IOLockWakeup(lock, event, oneThread); +} +#endif /* defined(__x86_64__) */ + struct _IORecursiveLock { lck_mtx_t *mutex; @@ -189,7 +230,7 @@ int IORecursiveLockSleep(IORecursiveLock *_lock, void *event, UInt32 interType) lock->count = 0; lock->thread = 0; - res = lck_mtx_sleep(lock->mutex, LCK_SLEEP_DEFAULT, (event_t) event, (wait_interrupt_t) interType); + res = lck_mtx_sleep(lock->mutex, LCK_SLEEP_PROMOTED_PRI, (event_t) event, (wait_interrupt_t) interType); // Must re-establish the recursive lock no matter why we woke up // otherwise we would potentially leave the return path corrupted. @@ -211,8 +252,8 @@ int IORecursiveLockSleepDeadline( IORecursiveLock * _lock, void *event, lock->count = 0; lock->thread = 0; - res = lck_mtx_sleep_deadline(lock->mutex, LCK_SLEEP_DEFAULT, (event_t) event, - (wait_interrupt_t) interType, __OSAbsoluteTime(deadline)); + res = lck_mtx_sleep_deadline(lock->mutex, LCK_SLEEP_PROMOTED_PRI, (event_t) event, + (wait_interrupt_t) interType, __OSAbsoluteTime(deadline)); // Must re-establish the recursive lock no matter why we woke up // otherwise we would potentially leave the return path corrupted. diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index d8be44963..f6ab8e93b 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -84,6 +84,8 @@ memory_object_iopl_request( unsigned int *page_list_count, int *flags); +// osfmk/device/iokit_rpc.c +unsigned int IODefaultCacheBits(addr64_t pa); unsigned int IOTranslateCacheBits(struct phys_entry *pp); __END_DECLS @@ -130,10 +132,10 @@ enum ioPLBlockFlags { kIOPLExternUPL = 0x00000002, }; -struct typePersMDData +struct IOMDPersistentInitData { - const IOGeneralMemoryDescriptor *fMD; - ipc_port_t fMemEntry; + const IOGeneralMemoryDescriptor * fMD; + IOMemoryReference * fMemRef; }; struct ioPLBlock { @@ -152,7 +154,9 @@ struct ioGMDData { addr64_t fMappedBase; uint64_t fPreparationID; unsigned int fPageCnt; - unsigned char fDiscontig; + unsigned char fDiscontig:1; + unsigned char fCompletionError:1; + unsigned char _resv:6; #if __LP64__ // align arrays to 8 bytes so following macros work unsigned char fPad[3]; @@ -169,12 +173,10 @@ struct ioGMDData { #define computeDataSize(p, u) \ (offsetof(ioGMDData, fPageList) + p * sizeof(upl_page_info_t) + u * sizeof(ioPLBlock)) - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #define next_page(a) ( trunc_page(a) + PAGE_SIZE ) - extern "C" { kern_return_t device_data_action( @@ -193,8 +195,7 @@ kern_return_t device_data_action( if( memDesc) { memDesc->retain(); - kr = memDesc->handleFault( device_pager, 0, 0, - offset, size, kIOMapDefaultCache /*?*/); + kr = memDesc->handleFault(device_pager, offset, size); memDesc->release(); } else @@ -215,11 +216,13 @@ kern_return_t device_close( } }; // end extern "C" +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + // Note this inline function uses C++ reference arguments to return values // This means that pointers are not passed and NULLs don't have to be // checked for as a NULL reference is illegal. static inline void -getAddrLenForInd(user_addr_t &addr, IOPhysicalLength &len, // Output variables +getAddrLenForInd(mach_vm_address_t &addr, mach_vm_size_t &len, // Output variables UInt32 type, IOGeneralMemoryDescriptor::Ranges r, UInt32 ind) { assert(kIOMemoryTypeUIO == type @@ -227,7 +230,8 @@ getAddrLenForInd(user_addr_t &addr, IOPhysicalLength &len, // Output variables || kIOMemoryTypePhysical == type || kIOMemoryTypePhysical64 == type); if (kIOMemoryTypeUIO == type) { user_size_t us; - uio_getiov((uio_t) r.uio, ind, &addr, &us); len = us; + user_addr_t ad; + uio_getiov((uio_t) r.uio, ind, &ad, &us); addr = ad; len = us; } #ifndef __LP64__ else if ((kIOMemoryTypeVirtual64 == type) || (kIOMemoryTypePhysical64 == type)) { @@ -245,6 +249,794 @@ getAddrLenForInd(user_addr_t &addr, IOPhysicalLength &len, // Output variables /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +static IOReturn +purgeableControlBits(IOOptionBits newState, vm_purgable_t * control, int * state) +{ + IOReturn err = kIOReturnSuccess; + + *control = VM_PURGABLE_SET_STATE; + + enum { kIOMemoryPurgeableControlMask = 15 }; + + switch (kIOMemoryPurgeableControlMask & newState) + { + case kIOMemoryPurgeableKeepCurrent: + *control = VM_PURGABLE_GET_STATE; + break; + + case kIOMemoryPurgeableNonVolatile: + *state = VM_PURGABLE_NONVOLATILE; + break; + case kIOMemoryPurgeableVolatile: + *state = VM_PURGABLE_VOLATILE | (newState & ~kIOMemoryPurgeableControlMask); + break; + case kIOMemoryPurgeableEmpty: + *state = VM_PURGABLE_EMPTY; + break; + default: + err = kIOReturnBadArgument; + break; + } + return (err); +} + +static IOReturn +purgeableStateBits(int * state) +{ + IOReturn err = kIOReturnSuccess; + + switch (VM_PURGABLE_STATE_MASK & *state) + { + case VM_PURGABLE_NONVOLATILE: + *state = kIOMemoryPurgeableNonVolatile; + break; + case VM_PURGABLE_VOLATILE: + *state = kIOMemoryPurgeableVolatile; + break; + case VM_PURGABLE_EMPTY: + *state = kIOMemoryPurgeableEmpty; + break; + default: + *state = kIOMemoryPurgeableNonVolatile; + err = kIOReturnNotReady; + break; + } + return (err); +} + + +static vm_prot_t +vmProtForCacheMode(IOOptionBits cacheMode) +{ + vm_prot_t prot = 0; + switch (cacheMode) + { + case kIOInhibitCache: + SET_MAP_MEM(MAP_MEM_IO, prot); + break; + + case kIOWriteThruCache: + SET_MAP_MEM(MAP_MEM_WTHRU, prot); + break; + + case kIOWriteCombineCache: + SET_MAP_MEM(MAP_MEM_WCOMB, prot); + break; + + case kIOCopybackCache: + SET_MAP_MEM(MAP_MEM_COPYBACK, prot); + break; + + case kIOCopybackInnerCache: + SET_MAP_MEM(MAP_MEM_INNERWBACK, prot); + break; + + case kIODefaultCache: + default: + SET_MAP_MEM(MAP_MEM_NOOP, prot); + break; + } + + return (prot); +} + +static unsigned int +pagerFlagsForCacheMode(IOOptionBits cacheMode) +{ + unsigned int pagerFlags = 0; + switch (cacheMode) + { + case kIOInhibitCache: + pagerFlags = DEVICE_PAGER_CACHE_INHIB | DEVICE_PAGER_COHERENT | DEVICE_PAGER_GUARDED; + break; + + case kIOWriteThruCache: + pagerFlags = DEVICE_PAGER_WRITE_THROUGH | DEVICE_PAGER_COHERENT | DEVICE_PAGER_GUARDED; + break; + + case kIOWriteCombineCache: + pagerFlags = DEVICE_PAGER_CACHE_INHIB | DEVICE_PAGER_COHERENT; + break; + + case kIOCopybackCache: + pagerFlags = DEVICE_PAGER_COHERENT; + break; + + case kIOCopybackInnerCache: + pagerFlags = DEVICE_PAGER_COHERENT; + break; + + case kIODefaultCache: + default: + pagerFlags = -1U; + break; + } + return (pagerFlags); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +struct IOMemoryEntry +{ + ipc_port_t entry; + int64_t offset; + uint64_t size; +}; + +struct IOMemoryReference +{ + volatile SInt32 refCount; + vm_prot_t prot; + uint32_t capacity; + uint32_t count; + IOMemoryEntry entries[0]; +}; + +enum +{ + kIOMemoryReferenceReuse = 0x00000001, + kIOMemoryReferenceWrite = 0x00000002, +}; + +SInt32 gIOMemoryReferenceCount; + +IOMemoryReference * +IOGeneralMemoryDescriptor::memoryReferenceAlloc(uint32_t capacity, IOMemoryReference * realloc) +{ + IOMemoryReference * ref; + size_t newSize, oldSize, copySize; + + newSize = (sizeof(IOMemoryReference) + - sizeof(ref->entries) + + capacity * sizeof(ref->entries[0])); + ref = (typeof(ref)) IOMalloc(newSize); + if (realloc) + { + oldSize = (sizeof(IOMemoryReference) + - sizeof(realloc->entries) + + realloc->capacity * sizeof(realloc->entries[0])); + copySize = oldSize; + if (copySize > newSize) copySize = newSize; + if (ref) bcopy(realloc, ref, copySize); + IOFree(realloc, oldSize); + } + else if (ref) + { + bzero(ref, sizeof(*ref)); + ref->refCount = 1; + OSIncrementAtomic(&gIOMemoryReferenceCount); + } + if (!ref) return (0); + ref->capacity = capacity; + return (ref); +} + +void +IOGeneralMemoryDescriptor::memoryReferenceFree(IOMemoryReference * ref) +{ + IOMemoryEntry * entries; + size_t size; + + entries = ref->entries + ref->count; + while (entries > &ref->entries[0]) + { + entries--; + ipc_port_release_send(entries->entry); + } + size = (sizeof(IOMemoryReference) + - sizeof(ref->entries) + + ref->capacity * sizeof(ref->entries[0])); + IOFree(ref, size); + + OSDecrementAtomic(&gIOMemoryReferenceCount); +} + +void +IOGeneralMemoryDescriptor::memoryReferenceRelease(IOMemoryReference * ref) +{ + if (1 == OSDecrementAtomic(&ref->refCount)) memoryReferenceFree(ref); +} + + +IOReturn +IOGeneralMemoryDescriptor::memoryReferenceCreate( + IOOptionBits options, + IOMemoryReference ** reference) +{ + enum { kCapacity = 4, kCapacityInc = 4 }; + + kern_return_t err; + IOMemoryReference * ref; + IOMemoryEntry * entries; + IOMemoryEntry * cloneEntries; + vm_map_t map; + ipc_port_t entry, cloneEntry; + vm_prot_t prot; + memory_object_size_t actualSize; + uint32_t rangeIdx; + uint32_t count; + mach_vm_address_t entryAddr, endAddr, entrySize; + mach_vm_size_t srcAddr, srcLen; + mach_vm_size_t nextAddr, nextLen; + mach_vm_size_t offset, remain; + IOByteCount physLen; + IOOptionBits type = (_flags & kIOMemoryTypeMask); + IOOptionBits cacheMode; + unsigned int pagerFlags; + + ref = memoryReferenceAlloc(kCapacity, NULL); + if (!ref) return (kIOReturnNoMemory); + entries = &ref->entries[0]; + count = 0; + + offset = 0; + rangeIdx = 0; + if (_task) getAddrLenForInd(nextAddr, nextLen, type, _ranges, rangeIdx); + else + { + nextAddr = getPhysicalSegment(offset, &physLen, kIOMemoryMapperNone); + nextLen = physLen; + // default cache mode for physical + if (kIODefaultCache == ((_flags & kIOMemoryBufferCacheMask) >> kIOMemoryBufferCacheShift)) + { + IOOptionBits mode; + pagerFlags = IODefaultCacheBits(nextAddr); + if (DEVICE_PAGER_CACHE_INHIB & pagerFlags) + { + if (DEVICE_PAGER_GUARDED & pagerFlags) + mode = kIOInhibitCache; + else + mode = kIOWriteCombineCache; + } + else if (DEVICE_PAGER_WRITE_THROUGH & pagerFlags) + mode = kIOWriteThruCache; + else + mode = kIOCopybackCache; + _flags |= (mode << kIOMemoryBufferCacheShift); + } + } + + // cache mode & vm_prot + prot = VM_PROT_READ; + cacheMode = ((_flags & kIOMemoryBufferCacheMask) >> kIOMemoryBufferCacheShift); + prot |= vmProtForCacheMode(cacheMode); + // VM system requires write access to change cache mode + if (kIODefaultCache != cacheMode) prot |= VM_PROT_WRITE; + if (kIODirectionOut != (kIODirectionOutIn & _flags)) prot |= VM_PROT_WRITE; + if (kIOMemoryReferenceWrite & options) prot |= VM_PROT_WRITE; + + if ((kIOMemoryReferenceReuse & options) && _memRef) + { + cloneEntries = &_memRef->entries[0]; + prot |= MAP_MEM_NAMED_REUSE; + } + + if (_task) + { + // virtual ranges + + if (kIOMemoryBufferPageable & _flags) + { + // IOBufferMemoryDescriptor alloc - set flags for entry + object create + prot |= MAP_MEM_NAMED_CREATE; + if (kIOMemoryBufferPurgeable & _flags) prot |= MAP_MEM_PURGABLE; + prot |= VM_PROT_WRITE; + map = NULL; + } + else map = get_task_map(_task); + + remain = _length; + while (remain) + { + srcAddr = nextAddr; + srcLen = nextLen; + nextAddr = 0; + nextLen = 0; + // coalesce addr range + for (++rangeIdx; rangeIdx < _rangesCount; rangeIdx++) + { + getAddrLenForInd(nextAddr, nextLen, type, _ranges, rangeIdx); + if ((srcAddr + srcLen) != nextAddr) break; + srcLen += nextLen; + } + entryAddr = trunc_page_64(srcAddr); + endAddr = round_page_64(srcAddr + srcLen); + do + { + entrySize = (endAddr - entryAddr); + if (!entrySize) break; + actualSize = entrySize; + + cloneEntry = MACH_PORT_NULL; + if (MAP_MEM_NAMED_REUSE & prot) + { + if (cloneEntries < &_memRef->entries[_memRef->count]) cloneEntry = cloneEntries->entry; + else prot &= ~MAP_MEM_NAMED_REUSE; + } + + err = mach_make_memory_entry_64(map, + &actualSize, entryAddr, prot, &entry, cloneEntry); + + if (KERN_SUCCESS != err) break; + if (actualSize > entrySize) panic("mach_make_memory_entry_64 actualSize"); + + if (count >= ref->capacity) + { + ref = memoryReferenceAlloc(ref->capacity + kCapacityInc, ref); + entries = &ref->entries[count]; + } + entries->entry = entry; + entries->size = actualSize; + entries->offset = offset + (entryAddr - srcAddr); + entryAddr += actualSize; + if (MAP_MEM_NAMED_REUSE & prot) + { + if ((cloneEntries->entry == entries->entry) + && (cloneEntries->size == entries->size) + && (cloneEntries->offset == entries->offset)) cloneEntries++; + else prot &= ~MAP_MEM_NAMED_REUSE; + } + entries++; + count++; + } + while (true); + offset += srcLen; + remain -= srcLen; + } + } + else + { + // _task == 0, physical + memory_object_t pager; + vm_size_t size = ptoa_32(_pages); + + if (!getKernelReserved()) panic("getKernelReserved"); + + reserved->dp.pagerContig = (1 == _rangesCount); + reserved->dp.memory = this; + + pagerFlags = pagerFlagsForCacheMode(cacheMode); + if (-1U == pagerFlags) panic("phys is kIODefaultCache"); + if (reserved->dp.pagerContig) pagerFlags |= DEVICE_PAGER_CONTIGUOUS; + + pager = device_pager_setup((memory_object_t) 0, (uintptr_t) reserved, + size, pagerFlags); + assert (pager); + if (!pager) err = kIOReturnVMError; + else + { + srcAddr = nextAddr; + entryAddr = trunc_page_64(srcAddr); + err = mach_memory_object_memory_entry_64((host_t) 1, false /*internal*/, + size, VM_PROT_READ | VM_PROT_WRITE, pager, &entry); + assert (KERN_SUCCESS == err); + if (KERN_SUCCESS != err) device_pager_deallocate(pager); + else + { + reserved->dp.devicePager = pager; + entries->entry = entry; + entries->size = size; + entries->offset = offset + (entryAddr - srcAddr); + entries++; + count++; + } + } + } + + ref->count = count; + ref->prot = prot; + + if (KERN_SUCCESS == err) + { + if (MAP_MEM_NAMED_REUSE & prot) + { + memoryReferenceFree(ref); + OSIncrementAtomic(&_memRef->refCount); + ref = _memRef; + } + } + else + { + memoryReferenceFree(ref); + ref = NULL; + } + + *reference = ref; + + return (err); +} + +struct IOMemoryDescriptorMapAllocRef +{ + vm_map_t map; + mach_vm_address_t mapped; + mach_vm_size_t size; + vm_prot_t prot; + IOOptionBits options; +}; + +static kern_return_t +IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref) +{ + IOMemoryDescriptorMapAllocRef * ref = (typeof(ref))_ref; + IOReturn err; + vm_map_offset_t addr; + + addr = ref->mapped; + err = vm_map_enter_mem_object(map, &addr, ref->size, + (vm_map_offset_t) 0, + (((ref->options & kIOMapAnywhere) + ? VM_FLAGS_ANYWHERE + : VM_FLAGS_FIXED) + | VM_MAKE_TAG(VM_MEMORY_IOKIT) + | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */ + IPC_PORT_NULL, + (memory_object_offset_t) 0, + false, /* copy */ + ref->prot, + ref->prot, + VM_INHERIT_NONE); + if (KERN_SUCCESS == err) + { + ref->mapped = (mach_vm_address_t) addr; + ref->map = map; + } + + return( err ); +} + +IOReturn +IOGeneralMemoryDescriptor::memoryReferenceMap( + IOMemoryReference * ref, + vm_map_t map, + mach_vm_size_t inoffset, + mach_vm_size_t size, + IOOptionBits options, + mach_vm_address_t * inaddr) +{ + IOReturn err; + int64_t offset = inoffset; + uint32_t rangeIdx, entryIdx; + vm_map_offset_t addr, mapAddr; + vm_map_offset_t pageOffset, entryOffset, remain, chunk; + + mach_vm_address_t srcAddr, nextAddr; + mach_vm_size_t srcLen, nextLen; + IOByteCount physLen; + IOMemoryEntry * entry; + vm_prot_t prot, memEntryCacheMode; + IOOptionBits type; + IOOptionBits cacheMode; + + /* + * For the kIOMapPrefault option. + */ + upl_page_info_t *pageList = NULL; + UInt currentPageIndex = 0; + + type = _flags & kIOMemoryTypeMask; + prot = VM_PROT_READ; + if (!(kIOMapReadOnly & options)) prot |= VM_PROT_WRITE; + prot &= ref->prot; + + cacheMode = ((options & kIOMapCacheMask) >> kIOMapCacheShift); + if (kIODefaultCache != cacheMode) + { + // VM system requires write access to change cache mode + prot |= VM_PROT_WRITE; + // update named entries cache mode + memEntryCacheMode = (MAP_MEM_ONLY | prot | vmProtForCacheMode(cacheMode)); + } + + if (_task) + { + // Find first range for offset + for (remain = offset, rangeIdx = 0; rangeIdx < _rangesCount; rangeIdx++) + { + getAddrLenForInd(nextAddr, nextLen, type, _ranges, rangeIdx); + if (remain < nextLen) break; + remain -= nextLen; + } + } + else + { + rangeIdx = 0; + remain = 0; + nextAddr = getPhysicalSegment(offset, &physLen, kIOMemoryMapperNone); + nextLen = size; + } + + assert(remain < nextLen); + if (remain >= nextLen) return (kIOReturnBadArgument); + + nextAddr += remain; + nextLen -= remain; + pageOffset = (page_mask & nextAddr); + addr = 0; + if (!(options & kIOMapAnywhere)) + { + addr = *inaddr; + if (pageOffset != (page_mask & addr)) return (kIOReturnNotAligned); + addr -= pageOffset; + } + + // find first entry for offset + for (entryIdx = 0; + (entryIdx < ref->count) && (offset >= ref->entries[entryIdx].offset); + entryIdx++) {} + entryIdx--; + entry = &ref->entries[entryIdx]; + + // allocate VM + size = round_page_64(size + pageOffset); + { + IOMemoryDescriptorMapAllocRef ref; + ref.map = map; + ref.options = options; + ref.size = size; + ref.prot = prot; + if (options & kIOMapAnywhere) + // vm_map looks for addresses above here, even when VM_FLAGS_ANYWHERE + ref.mapped = 0; + else + ref.mapped = addr; + + if ((ref.map == kernel_map) && (kIOMemoryBufferPageable & _flags)) + err = IOIteratePageableMaps( ref.size, &IOMemoryDescriptorMapAlloc, &ref ); + else + err = IOMemoryDescriptorMapAlloc(ref.map, &ref); + if (KERN_SUCCESS == err) + { + addr = ref.mapped; + map = ref.map; + } + } + + /* + * Prefaulting is only possible if we wired the memory earlier. Check the + * memory type, and the underlying data. + */ + if (options & kIOMapPrefault) { + /* + * The memory must have been wired by calling ::prepare(), otherwise + * we don't have the UPL. Without UPLs, pages cannot be pre-faulted + */ + assert(map != kernel_map); + assert(_wireCount != 0); + assert(_memoryEntries != NULL); + if ((map == kernel_map) || + (_wireCount == 0) || + (_memoryEntries == NULL)) + { + return kIOReturnBadArgument; + } + + // Get the page list. + ioGMDData* dataP = getDataP(_memoryEntries); + ioPLBlock const* ioplList = getIOPLList(dataP); + pageList = getPageList(dataP); + + // Get the number of IOPLs. + UInt numIOPLs = getNumIOPL(_memoryEntries, dataP); + + /* + * Scan through the IOPL Info Blocks, looking for the first block containing + * the offset. The research will go past it, so we'll need to go back to the + * right range at the end. + */ + UInt ioplIndex = 0; + while (ioplIndex < numIOPLs && offset >= ioplList[ioplIndex].fIOMDOffset) + ioplIndex++; + ioplIndex--; + + // Retrieve the IOPL info block. + ioPLBlock ioplInfo = ioplList[ioplIndex]; + + /* + * For external UPLs, the fPageInfo points directly to the UPL's page_info_t + * array. + */ + if (ioplInfo.fFlags & kIOPLExternUPL) + pageList = (upl_page_info_t*) ioplInfo.fPageInfo; + else + pageList = &pageList[ioplInfo.fPageInfo]; + + // Rebase [offset] into the IOPL in order to looks for the first page index. + mach_vm_size_t offsetInIOPL = offset - ioplInfo.fIOMDOffset + ioplInfo.fPageOffset; + + // Retrieve the index of the first page corresponding to the offset. + currentPageIndex = atop_32(offsetInIOPL); + } + + // enter mappings + remain = size; + mapAddr = addr; + addr += pageOffset; + while (remain && nextLen && (KERN_SUCCESS == err)) + { + srcAddr = nextAddr; + srcLen = nextLen; + nextAddr = 0; + nextLen = 0; + // coalesce addr range + for (++rangeIdx; rangeIdx < _rangesCount; rangeIdx++) + { + getAddrLenForInd(nextAddr, nextLen, type, _ranges, rangeIdx); + if ((srcAddr + srcLen) != nextAddr) break; + srcLen += nextLen; + } + + while (srcLen && (KERN_SUCCESS == err)) + { + entryOffset = offset - entry->offset; + if ((page_mask & entryOffset) != pageOffset) + { + err = kIOReturnNotAligned; + break; + } + + if (kIODefaultCache != cacheMode) + { + vm_size_t unused = 0; + err = mach_make_memory_entry(NULL /*unused*/, &unused, 0 /*unused*/, + memEntryCacheMode, NULL, entry->entry); + assert (KERN_SUCCESS == err); + } + + entryOffset -= pageOffset; + if (entryOffset >= entry->size) panic("entryOffset"); + chunk = entry->size - entryOffset; + if (chunk) + { + if (chunk > remain) chunk = remain; + + if (options & kIOMapPrefault) { + UInt nb_pages = round_page(chunk) / PAGE_SIZE; + err = vm_map_enter_mem_object_prefault(map, + &mapAddr, + chunk, 0 /* mask */, + (VM_FLAGS_FIXED + | VM_FLAGS_OVERWRITE + | VM_MAKE_TAG(VM_MEMORY_IOKIT) + | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */ + entry->entry, + entryOffset, + prot, // cur + prot, // max + &pageList[currentPageIndex], + nb_pages); + + // Compute the next index in the page list. + currentPageIndex += nb_pages; + assert(currentPageIndex <= _pages); + } else { + err = vm_map_enter_mem_object(map, + &mapAddr, + chunk, 0 /* mask */, + (VM_FLAGS_FIXED + | VM_FLAGS_OVERWRITE + | VM_MAKE_TAG(VM_MEMORY_IOKIT) + | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */ + entry->entry, + entryOffset, + false, // copy + prot, // cur + prot, // max + VM_INHERIT_NONE); + } + + if (KERN_SUCCESS != err) break; + remain -= chunk; + if (!remain) break; + mapAddr += chunk; + offset += chunk - pageOffset; + } + pageOffset = 0; + entry++; + entryIdx++; + if (entryIdx >= ref->count) + { + err = kIOReturnOverrun; + break; + } + } + } + + if ((KERN_SUCCESS != err) && addr) + { + (void) mach_vm_deallocate(map, trunc_page_64(addr), size); + addr = 0; + } + *inaddr = addr; + + return (err); +} + +IOReturn +IOGeneralMemoryDescriptor::memoryReferenceGetPageCounts( + IOMemoryReference * ref, + IOByteCount * residentPageCount, + IOByteCount * dirtyPageCount) +{ + IOReturn err; + IOMemoryEntry * entries; + unsigned int resident, dirty; + unsigned int totalResident, totalDirty; + + totalResident = totalDirty = 0; + entries = ref->entries + ref->count; + while (entries > &ref->entries[0]) + { + entries--; + err = mach_memory_entry_get_page_counts(entries->entry, &resident, &dirty); + if (KERN_SUCCESS != err) break; + totalResident += resident; + totalDirty += dirty; + } + + if (residentPageCount) *residentPageCount = totalResident; + if (dirtyPageCount) *dirtyPageCount = totalDirty; + return (err); +} + +IOReturn +IOGeneralMemoryDescriptor::memoryReferenceSetPurgeable( + IOMemoryReference * ref, + IOOptionBits newState, + IOOptionBits * oldState) +{ + IOReturn err; + IOMemoryEntry * entries; + vm_purgable_t control; + int totalState, state; + + entries = ref->entries + ref->count; + totalState = kIOMemoryPurgeableNonVolatile; + while (entries > &ref->entries[0]) + { + entries--; + + err = purgeableControlBits(newState, &control, &state); + if (KERN_SUCCESS != err) break; + err = mach_memory_entry_purgable_control(entries->entry, control, &state); + if (KERN_SUCCESS != err) break; + err = purgeableStateBits(&state); + if (KERN_SUCCESS != err) break; + + if (kIOMemoryPurgeableEmpty == state) totalState = kIOMemoryPurgeableEmpty; + else if (kIOMemoryPurgeableEmpty == totalState) continue; + else if (kIOMemoryPurgeableVolatile == totalState) continue; + else if (kIOMemoryPurgeableVolatile == state) totalState = kIOMemoryPurgeableVolatile; + else totalState = kIOMemoryPurgeableNonVolatile; + } + + if (oldState) *oldState = totalState; + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + IOMemoryDescriptor * IOMemoryDescriptor::withAddress(void * address, IOByteCount length, @@ -418,19 +1210,19 @@ IOMemoryDescriptor::withPersistentMemoryDescriptor(IOMemoryDescriptor *originalM IOMemoryDescriptor * IOGeneralMemoryDescriptor::withPersistentMemoryDescriptor(IOGeneralMemoryDescriptor *originalMD) { - ipc_port_t sharedMem = (ipc_port_t) originalMD->createNamedEntry(); + IOMemoryReference * memRef; + + if (kIOReturnSuccess != originalMD->memoryReferenceCreate(kIOMemoryReferenceReuse, &memRef)) return (0); - if (!sharedMem) - return 0; - - if (sharedMem == originalMD->_memEntry) { + if (memRef == originalMD->_memRef) + { originalMD->retain(); // Add a new reference to ourselves - ipc_port_release_send(sharedMem); // Remove extra send right + originalMD->memoryReferenceRelease(memRef); return originalMD; } IOGeneralMemoryDescriptor * self = new IOGeneralMemoryDescriptor; - typePersMDData initData = { originalMD, sharedMem }; + IOMDPersistentInitData initData = { originalMD, memRef }; if (self && !self->initWithOptions(&initData, 1, 0, 0, kIOMemoryTypePersistentMD, 0)) { @@ -440,52 +1232,6 @@ IOGeneralMemoryDescriptor::withPersistentMemoryDescriptor(IOGeneralMemoryDescrip return self; } -void *IOGeneralMemoryDescriptor::createNamedEntry() -{ - kern_return_t error; - ipc_port_t sharedMem; - - IOOptionBits type = _flags & kIOMemoryTypeMask; - - user_addr_t range0Addr; - IOByteCount range0Len; - getAddrLenForInd(range0Addr, range0Len, type, _ranges, 0); - range0Addr = trunc_page_64(range0Addr); - - vm_size_t size = ptoa_32(_pages); - vm_address_t kernelPage = (vm_address_t) range0Addr; - - vm_map_t theMap = ((_task == kernel_task) - && (kIOMemoryBufferPageable & _flags)) - ? IOPageableMapForAddress(kernelPage) - : get_task_map(_task); - - memory_object_size_t actualSize = size; - vm_prot_t prot = VM_PROT_READ; - if (kIODirectionOut != (kIODirectionOutIn & _flags)) - prot |= VM_PROT_WRITE; - - if (_memEntry) - prot |= MAP_MEM_NAMED_REUSE; - - error = mach_make_memory_entry_64(theMap, - &actualSize, range0Addr, prot, &sharedMem, (ipc_port_t) _memEntry); - - if (KERN_SUCCESS == error) { - if (actualSize == size) { - return sharedMem; - } else { -#if IOASSERT - IOLog("IOGMD::mach_make_memory_entry_64 (%08llx) size (%08llx:%08llx)\n", - (UInt64)range0Addr, (UInt64)actualSize, (UInt64)size); -#endif - ipc_port_release_send( sharedMem ); - } - } - - return MACH_PORT_NULL; -} - #ifndef __LP64__ bool IOGeneralMemoryDescriptor::initWithAddress(void * address, @@ -605,7 +1351,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, // arguments to this function. if (kIOMemoryTypePersistentMD == type) { - typePersMDData *initData = (typePersMDData *) buffers; + IOMDPersistentInitData *initData = (typeof(initData)) buffers; const IOGeneralMemoryDescriptor *orig = initData->fMD; ioGMDData *dataP = getDataP(orig->_memoryEntries); @@ -614,7 +1360,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, if ( !(orig->_flags & kIOMemoryPersistent) || !dataP) return false; - _memEntry = initData->fMemEntry; // Grab the new named entry + _memRef = initData->fMemRef; // Grab the new named entry options = orig->_flags & ~kIOMemoryAsReference; type = options & kIOMemoryTypeMask; buffers = orig->_ranges.v; @@ -684,10 +1430,10 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, options |= (kIOMemoryRedirected & _flags); if (!(kIOMemoryRedirected & options)) { - if (_memEntry) + if (_memRef) { - ipc_port_release_send((ipc_port_t) _memEntry); - _memEntry = 0; + memoryReferenceRelease(_memRef); + _memRef = 0; } if (_mappings) _mappings->flushCollection(); @@ -847,8 +1593,8 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, UInt32 length = 0; UInt32 pages = 0; for (unsigned ind = 0; ind < count; ind++) { - user_addr_t addr; - IOPhysicalLength len; + mach_vm_address_t addr; + mach_vm_size_t len; // addr & len are returned by this function getAddrLenForInd(addr, len, type, vec, ind); @@ -880,8 +1626,12 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, dataP = getDataP(_memoryEntries); dataP->fPageCnt = _pages; - if ( (kIOMemoryPersistent & _flags) && !_memEntry) - _memEntry = createNamedEntry(); + if ( (kIOMemoryPersistent & _flags) && !_memRef) + { + IOReturn + err = memoryReferenceCreate(0, &_memRef); + if (kIOReturnSuccess != err) return false; + } if ((_flags & kIOMemoryAutoPrepare) && prepare() != kIOReturnSuccess) @@ -950,11 +1700,8 @@ void IOGeneralMemoryDescriptor::free() reserved = NULL; } - if (_memEntry) - ipc_port_release_send( (ipc_port_t) _memEntry ); - - if (_prepareLock) - IOLockFree(_prepareLock); + if (_memRef) memoryReferenceRelease(_memRef); + if (_prepareLock) IOLockFree(_prepareLock); super::free(); } @@ -1028,7 +1775,7 @@ IOByteCount IOMemoryDescriptor::readBytes IOByteCount remaining; // Assert that this entire I/O is withing the available range - assert(offset < _length); + assert(offset <= _length); assert(offset + length <= _length); if (offset >= _length) { return 0; @@ -1067,13 +1814,14 @@ IOByteCount IOMemoryDescriptor::readBytes } IOByteCount IOMemoryDescriptor::writeBytes - (IOByteCount offset, const void *bytes, IOByteCount length) + (IOByteCount inoffset, const void *bytes, IOByteCount length) { addr64_t srcAddr = CAST_DOWN(addr64_t, bytes); IOByteCount remaining; + IOByteCount offset = inoffset; // Assert that this entire I/O is withing the available range - assert(offset < _length); + assert(offset <= _length); assert(offset + length <= _length); assert( !(kIOMemoryPreparedReadOnly & _flags) ); @@ -1098,10 +1846,13 @@ IOByteCount IOMemoryDescriptor::writeBytes if (dstLen > remaining) dstLen = remaining; - copypv(srcAddr, (addr64_t) dstAddr64, dstLen, - cppvPsnk | cppvFsnk | cppvNoRefSrc | cppvNoModSnk | cppvKmap); - - srcAddr += dstLen; + if (!srcAddr) bzero_phys(dstAddr64, dstLen); + else + { + copypv(srcAddr, (addr64_t) dstAddr64, dstLen, + cppvPsnk | cppvFsnk | cppvNoRefSrc | cppvNoModSnk | cppvKmap); + srcAddr += dstLen; + } offset += dstLen; remaining -= dstLen; } @@ -1111,12 +1862,11 @@ IOByteCount IOMemoryDescriptor::writeBytes assert(!remaining); + if (!srcAddr) performOperation(kIOMemoryIncoherentIOFlush, inoffset, length); + return length - remaining; } -// osfmk/device/iokit_rpc.c -extern "C" unsigned int IODefaultCacheBits(addr64_t pa); - #ifndef __LP64__ void IOGeneralMemoryDescriptor::setPosition(IOByteCount position) { @@ -1522,11 +2272,11 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * addr64_t IOGeneralMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *lengthOfSegment, IOOptionBits options) { - IOReturn ret; - addr64_t address = 0; - IOByteCount length = 0; - IOMapper * mapper = gIOSystemMapper; - IOOptionBits type = _flags & kIOMemoryTypeMask; + IOReturn ret; + mach_vm_address_t address = 0; + mach_vm_size_t length = 0; + IOMapper * mapper = gIOSystemMapper; + IOOptionBits type = _flags & kIOMemoryTypeMask; if (lengthOfSegment) *lengthOfSegment = 0; @@ -1543,7 +2293,7 @@ IOGeneralMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *l { unsigned rangesIndex = 0; Ranges vec = _ranges; - user_addr_t addr; + mach_vm_address_t addr; // Find starting address within the vector of ranges for (;;) { @@ -1560,8 +2310,8 @@ IOGeneralMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *l length -= offset; for ( ++rangesIndex; rangesIndex < _rangesCount; rangesIndex++ ) { - user_addr_t newAddr; - IOPhysicalLength newLen; + mach_vm_address_t newAddr; + mach_vm_size_t newLen; getAddrLenForInd(newAddr, newLen, type, vec, rangesIndex); if (addr + length != newAddr) @@ -1782,70 +2532,16 @@ IOMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt data return kIOReturnSuccess; } -static IOReturn -purgeableControlBits(IOOptionBits newState, vm_purgable_t * control, int * state) -{ - IOReturn err = kIOReturnSuccess; - - *control = VM_PURGABLE_SET_STATE; - - enum { kIOMemoryPurgeableControlMask = 15 }; - - switch (kIOMemoryPurgeableControlMask & newState) - { - case kIOMemoryPurgeableKeepCurrent: - *control = VM_PURGABLE_GET_STATE; - break; - - case kIOMemoryPurgeableNonVolatile: - *state = VM_PURGABLE_NONVOLATILE; - break; - case kIOMemoryPurgeableVolatile: - *state = VM_PURGABLE_VOLATILE | (newState & ~kIOMemoryPurgeableControlMask); - break; - case kIOMemoryPurgeableEmpty: - *state = VM_PURGABLE_EMPTY; - break; - default: - err = kIOReturnBadArgument; - break; - } - return (err); -} - -static IOReturn -purgeableStateBits(int * state) -{ - IOReturn err = kIOReturnSuccess; - - switch (VM_PURGABLE_STATE_MASK & *state) - { - case VM_PURGABLE_NONVOLATILE: - *state = kIOMemoryPurgeableNonVolatile; - break; - case VM_PURGABLE_VOLATILE: - *state = kIOMemoryPurgeableVolatile; - break; - case VM_PURGABLE_EMPTY: - *state = kIOMemoryPurgeableEmpty; - break; - default: - *state = kIOMemoryPurgeableNonVolatile; - err = kIOReturnNotReady; - break; - } - return (err); -} - IOReturn IOGeneralMemoryDescriptor::setPurgeable( IOOptionBits newState, IOOptionBits * oldState ) { IOReturn err = kIOReturnSuccess; + vm_purgable_t control; int state; - if (_memEntry) + if (_memRef) { err = super::setPurgeable(newState, oldState); } @@ -1873,8 +2569,8 @@ IOGeneralMemoryDescriptor::setPurgeable( IOOptionBits newState, // can only do one range Ranges vec = _ranges; IOOptionBits type = _flags & kIOMemoryTypeMask; - user_addr_t addr; - IOByteCount len; + mach_vm_address_t addr; + mach_vm_size_t len; getAddrLenForInd(addr, len, type, vec, 0); err = purgeableControlBits(newState, &control, &state); @@ -1894,78 +2590,29 @@ IOGeneralMemoryDescriptor::setPurgeable( IOOptionBits newState, if (kIOMemoryThreadSafe & _flags) UNLOCK; } + return (err); } IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState, IOOptionBits * oldState ) { - IOReturn err = kIOReturnSuccess; - vm_purgable_t control; - int state; - - if (kIOMemoryThreadSafe & _flags) - LOCK; + IOReturn err = kIOReturnNotReady; - do - { - if (!_memEntry) - { - err = kIOReturnNotReady; - break; - } - err = purgeableControlBits(newState, &control, &state); - if (kIOReturnSuccess != err) - break; - err = mach_memory_entry_purgable_control((ipc_port_t) _memEntry, control, &state); - if (oldState) - { - if (kIOReturnSuccess == err) - { - err = purgeableStateBits(&state); - *oldState = state; - } - } - } - while (false); - - if (kIOMemoryThreadSafe & _flags) - UNLOCK; + if (kIOMemoryThreadSafe & _flags) LOCK; + if (_memRef) err = IOGeneralMemoryDescriptor::memoryReferenceSetPurgeable(_memRef, newState, oldState); + if (kIOMemoryThreadSafe & _flags) UNLOCK; return (err); } - IOReturn IOMemoryDescriptor::getPageCounts( IOByteCount * residentPageCount, IOByteCount * dirtyPageCount ) { - IOReturn err = kIOReturnSuccess; - unsigned int _residentPageCount, _dirtyPageCount; - - if (kIOMemoryThreadSafe & _flags) LOCK; - - do - { - if (!_memEntry) - { - err = kIOReturnNotReady; - break; - } - if ((residentPageCount == NULL) && (dirtyPageCount == NULL)) - { - err = kIOReturnBadArgument; - break; - } - - err = mach_memory_entry_get_page_counts((ipc_port_t) _memEntry, - residentPageCount ? &_residentPageCount : NULL, - dirtyPageCount ? &_dirtyPageCount : NULL); - if (kIOReturnSuccess != err) break; - if (residentPageCount) *residentPageCount = _residentPageCount; - if (dirtyPageCount) *dirtyPageCount = _dirtyPageCount; - } - while (false); + IOReturn err = kIOReturnNotReady; + if (kIOMemoryThreadSafe & _flags) LOCK; + if (_memRef) err = IOGeneralMemoryDescriptor::memoryReferenceGetPageCounts(_memRef, residentPageCount, dirtyPageCount); if (kIOMemoryThreadSafe & _flags) UNLOCK; return (err); @@ -2111,7 +2758,6 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) ioGMDData *dataP; upl_page_info_array_t pageInfo; ppnum_t mapBase; - ipc_port_t sharedMem; assert(kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type); @@ -2154,23 +2800,23 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) if (!mapper) uplFlags |= UPL_NEED_32BIT_ADDR; if (dataP->fDMAMapNumAddressBits > 32) dataP->fDMAMapNumAddressBits = 32; } - if (kIODirectionPrepareNoFault & forDirection) uplFlags |= UPL_REQUEST_NO_FAULT; - if (kIODirectionPrepareNoZeroFill & forDirection) uplFlags |= UPL_NOZEROFILLIO; + if (kIODirectionPrepareNoFault & forDirection) uplFlags |= UPL_REQUEST_NO_FAULT; + if (kIODirectionPrepareNoZeroFill & forDirection) uplFlags |= UPL_NOZEROFILLIO; if (kIODirectionPrepareNonCoherent & forDirection) uplFlags |= UPL_REQUEST_FORCE_COHERENCY; mapBase = 0; - sharedMem = (ipc_port_t) _memEntry; - // Note that appendBytes(NULL) zeros the data up to the desired length. - _memoryEntries->appendBytes(0, dataP->fPageCnt * sizeof(upl_page_info_t)); + // Note that appendBytes(NULL) zeros the data up to the desired length + // and the length parameter is an unsigned int + size_t uplPageSize = dataP->fPageCnt * sizeof(upl_page_info_t); + if (uplPageSize > ((unsigned int)uplPageSize)) return (kIOReturnNoMemory); + if (!_memoryEntries->appendBytes(0, uplPageSize)) return (kIOReturnNoMemory); dataP = 0; // Find the appropriate vm_map for the given task vm_map_t curMap; - if (_task == kernel_task && (kIOMemoryBufferPageable & _flags)) - curMap = 0; - else - { curMap = get_task_map(_task); } + if (_task == kernel_task && (kIOMemoryBufferPageable & _flags)) curMap = 0; + else curMap = get_task_map(_task); // Iterate over the vector of virtual ranges Ranges vec = _ranges; @@ -2178,10 +2824,13 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) IOByteCount mdOffset = 0; ppnum_t highestPage = 0; + IOMemoryEntry * memRefEntry = 0; + if (_memRef) memRefEntry = &_memRef->entries[0]; + for (UInt range = 0; range < _rangesCount; range++) { ioPLBlock iopl; - user_addr_t startPage; - IOByteCount numBytes; + mach_vm_address_t startPage; + mach_vm_size_t numBytes; ppnum_t highPage = 0; // Get the startPage address and length of vec[range] @@ -2199,14 +2848,16 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) while (numBytes) { vm_address_t kernelStart = (vm_address_t) startPage; vm_map_t theMap; - if (curMap) - theMap = curMap; - else if (!sharedMem) { + if (curMap) theMap = curMap; + else if (_memRef) + { + theMap = NULL; + } + else + { assert(_task == kernel_task); theMap = IOPageableMapForAddress(kernelStart); } - else - theMap = NULL; int ioplFlags = uplFlags; dataP = getDataP(_memoryEntries); @@ -2216,7 +2867,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) vm_size_t ioplSize = round_page(numBytes); unsigned int numPageInfo = atop_32(ioplSize); - if (theMap == kernel_map && kernelStart < io_kernel_static_end) { + if ((theMap == kernel_map) && (kernelStart < io_kernel_static_end)) { error = io_get_kernel_static_upl(theMap, kernelStart, &ioplSize, @@ -2225,14 +2876,23 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) &numPageInfo, &highPage); } - else if (sharedMem) { - error = memory_object_iopl_request(sharedMem, - ptoa_32(pageIndex), - &ioplSize, - &iopl.fIOPL, - baseInfo, - &numPageInfo, - &ioplFlags); + else if (_memRef) { + memory_object_offset_t entryOffset; + + entryOffset = (mdOffset - iopl.fPageOffset - memRefEntry->offset); + if (entryOffset >= memRefEntry->size) { + memRefEntry++; + if (memRefEntry >= &_memRef->entries[_memRef->count]) panic("memRefEntry"); + entryOffset = 0; + } + if (ioplSize > (memRefEntry->size - entryOffset)) ioplSize = (memRefEntry->size - entryOffset); + error = memory_object_iopl_request(memRefEntry->entry, + entryOffset, + &ioplSize, + &iopl.fIOPL, + baseInfo, + &numPageInfo, + &ioplFlags); } else { assert(theMap); @@ -2363,6 +3023,7 @@ bool IOGeneralMemoryDescriptor::initMemoryEntries(size_t size, IOMapper * mapper dataP->fDMAMapAlignment = 0; dataP->fPreparationID = kIOPreparationIDUnprepared; dataP->fDiscontig = false; + dataP->fCompletionError = false; return (true); } @@ -2551,9 +3212,10 @@ IOReturn IOGeneralMemoryDescriptor::prepare(IODirection forDirection) * before and after an I/O transfer involving pageable memory. */ -IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */) +IOReturn IOGeneralMemoryDescriptor::complete(IODirection forDirection) { IOOptionBits type = _flags & kIOMemoryTypeMask; + ioGMDData * dataP; if ((kIOMemoryTypePhysical == type) || (kIOMemoryTypePhysical64 == type)) return kIOReturnSuccess; @@ -2563,6 +3225,10 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */) assert(_wireCount); + if ((kIODirectionCompleteWithError & forDirection) + && (dataP = getDataP(_memoryEntries))) + dataP->fCompletionError = true; + if (_wireCount) { if ((kIOMemoryClearEncrypt & _flags) && (1 == _wireCount)) @@ -2571,35 +3237,52 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */) } _wireCount--; - if (!_wireCount) + if (!_wireCount || (kIODirectionCompleteWithDataValid & forDirection)) { IOOptionBits type = _flags & kIOMemoryTypeMask; - ioGMDData * dataP = getDataP(_memoryEntries); + dataP = getDataP(_memoryEntries); ioPLBlock *ioplList = getIOPLList(dataP); - UInt count = getNumIOPL(_memoryEntries, dataP); + UInt ind, count = getNumIOPL(_memoryEntries, dataP); + if (_wireCount) + { + // kIODirectionCompleteWithDataValid & forDirection + if (kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type) + { + for (ind = 0; ind < count; ind++) + { + if (ioplList[ind].fIOPL) iopl_valid_data(ioplList[ind].fIOPL); + } + } + } + else + { #if IOMD_DEBUG_DMAACTIVE - if (__iomd_reservedA) panic("complete() while dma active"); + if (__iomd_reservedA) panic("complete() while dma active"); #endif /* IOMD_DEBUG_DMAACTIVE */ - if (dataP->fMappedBase) { - dataP->fMapper->iovmFree(atop_64(dataP->fMappedBase), _pages); - dataP->fMappedBase = 0; - } - // Only complete iopls that we created which are for TypeVirtual - if (kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type) { - for (UInt ind = 0; ind < count; ind++) - if (ioplList[ind].fIOPL) { - upl_commit(ioplList[ind].fIOPL, 0, 0); - upl_deallocate(ioplList[ind].fIOPL); - } - } else if (kIOMemoryTypeUPL == type) { - upl_set_referenced(ioplList[0].fIOPL, false); - } + if (dataP->fMappedBase) { + dataP->fMapper->iovmFree(atop_64(dataP->fMappedBase), _pages); + dataP->fMappedBase = 0; + } + // Only complete iopls that we created which are for TypeVirtual + if (kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type) { + for (ind = 0; ind < count; ind++) + if (ioplList[ind].fIOPL) { + if (dataP->fCompletionError) + upl_abort(ioplList[ind].fIOPL, 0 /*!UPL_ABORT_DUMP_PAGES*/); + else + upl_commit(ioplList[ind].fIOPL, 0, 0); + upl_deallocate(ioplList[ind].fIOPL); + } + } else if (kIOMemoryTypeUPL == type) { + upl_set_referenced(ioplList[0].fIOPL, false); + } - (void) _memoryEntries->initWithBytes(dataP, computeDataSize(0, 0)); // == setLength() + (void) _memoryEntries->initWithBytes(dataP, computeDataSize(0, 0)); // == setLength() - dataP->fPreparationID = kIOPreparationIDUnprepared; + dataP->fPreparationID = kIOPreparationIDUnprepared; + } } } @@ -2621,18 +3304,17 @@ IOReturn IOGeneralMemoryDescriptor::doMap( if (!(kIOMap64Bit & options)) panic("IOGeneralMemoryDescriptor::doMap !64bit"); #endif /* !__LP64__ */ + kern_return_t err; + IOMemoryMap * mapping = (IOMemoryMap *) *__address; mach_vm_size_t offset = mapping->fOffset + __offset; mach_vm_size_t length = mapping->fLength; - kern_return_t kr = kIOReturnVMError; - ipc_port_t sharedMem = (ipc_port_t) _memEntry; - IOOptionBits type = _flags & kIOMemoryTypeMask; Ranges vec = _ranges; - user_addr_t range0Addr = 0; - IOByteCount range0Len = 0; + mach_vm_address_t range0Addr = 0; + mach_vm_size_t range0Len = 0; if ((offset >= _length) || ((offset + length) > _length)) return( kIOReturnBadArgument ); @@ -2641,10 +3323,14 @@ IOReturn IOGeneralMemoryDescriptor::doMap( getAddrLenForInd(range0Addr, range0Len, type, vec, 0); // mapping source == dest? (could be much better) - if( _task - && (mapping->fAddressMap == get_task_map(_task)) && (options & kIOMapAnywhere) - && (1 == _rangesCount) && (0 == offset) - && range0Addr && (length <= range0Len) ) + if (_task + && (mapping->fAddressTask == _task) + && (mapping->fAddressMap == get_task_map(_task)) + && (options & kIOMapAnywhere) + && (1 == _rangesCount) + && (0 == offset) + && range0Addr + && (length <= range0Len)) { mapping->fAddress = range0Addr; mapping->fOptions |= kIOMapStatic; @@ -2652,215 +3338,106 @@ IOReturn IOGeneralMemoryDescriptor::doMap( return( kIOReturnSuccess ); } - if( 0 == sharedMem) { - - vm_size_t size = ptoa_32(_pages); - - if( _task) { - - memory_object_size_t actualSize = size; - vm_prot_t prot = VM_PROT_READ; - if (!(kIOMapReadOnly & options)) - prot |= VM_PROT_WRITE; - else if (kIOMapDefaultCache != (options & kIOMapCacheMask)) - prot |= VM_PROT_WRITE; - - if (_rangesCount == 1) + if (!_memRef) + { + IOOptionBits createOptions = 0; + if (!(kIOMapReadOnly & options)) + { + createOptions |= kIOMemoryReferenceWrite; +#if DEVELOPMENT || DEBUG + if (kIODirectionOut == (kIODirectionOutIn & _flags)) { - kr = mach_make_memory_entry_64(get_task_map(_task), - &actualSize, range0Addr, - prot, &sharedMem, - NULL); - } - if( (_rangesCount != 1) - || ((KERN_SUCCESS == kr) && (actualSize != round_page(size)))) - do - { -#if IOASSERT - IOLog("mach_vm_remap path for ranges %d size (%08llx:%08llx)\n", - _rangesCount, (UInt64)actualSize, (UInt64)size); + OSReportWithBacktrace("warning: creating writable mapping from IOMemoryDescriptor(kIODirectionOut) - use kIOMapReadOnly or change direction"); + } #endif - kr = kIOReturnVMError; - if (sharedMem) - { - ipc_port_release_send(sharedMem); - sharedMem = MACH_PORT_NULL; - } - - mach_vm_address_t address, segDestAddr; - mach_vm_size_t mapLength; - unsigned rangesIndex; - IOOptionBits type = _flags & kIOMemoryTypeMask; - user_addr_t srcAddr; - IOPhysicalLength segLen = 0; - - // Find starting address within the vector of ranges - for (rangesIndex = 0; rangesIndex < _rangesCount; rangesIndex++) { - getAddrLenForInd(srcAddr, segLen, type, _ranges, rangesIndex); - if (offset < segLen) - break; - offset -= segLen; // (make offset relative) - } - - mach_vm_size_t pageOffset = (srcAddr & PAGE_MASK); - address = trunc_page_64(mapping->fAddress); - - if ((options & kIOMapAnywhere) || ((mapping->fAddress - address) == pageOffset)) - { - vm_map_t map = mapping->fAddressMap; - kr = IOMemoryDescriptorMapCopy(&map, - options, - offset, &address, round_page_64(length + pageOffset)); - if (kr == KERN_SUCCESS) - { - segDestAddr = address; - segLen -= offset; - srcAddr += offset; - mapLength = length; - - while (true) - { - vm_prot_t cur_prot, max_prot; - - if (segLen > length) segLen = length; - kr = mach_vm_remap(map, &segDestAddr, round_page_64(segLen), PAGE_MASK, - VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, - get_task_map(_task), trunc_page_64(srcAddr), - FALSE /* copy */, - &cur_prot, - &max_prot, - VM_INHERIT_NONE); - if (KERN_SUCCESS == kr) - { - if ((!(VM_PROT_READ & cur_prot)) - || (!(kIOMapReadOnly & options) && !(VM_PROT_WRITE & cur_prot))) - { - kr = KERN_PROTECTION_FAILURE; - } - } - if (KERN_SUCCESS != kr) - break; - segDestAddr += segLen; - mapLength -= segLen; - if (!mapLength) - break; - rangesIndex++; - if (rangesIndex >= _rangesCount) - { - kr = kIOReturnBadArgument; - break; - } - getAddrLenForInd(srcAddr, segLen, type, vec, rangesIndex); - if (srcAddr & PAGE_MASK) - { - kr = kIOReturnBadArgument; - break; - } - if (segLen > mapLength) - segLen = mapLength; - } - if (KERN_SUCCESS != kr) - { - mach_vm_deallocate(mapping->fAddressMap, address, round_page_64(length + pageOffset)); - } - } - - if (KERN_SUCCESS == kr) - mapping->fAddress = address + pageOffset; - else - mapping->fAddress = NULL; - } - } - while (false); - } - else do - { // _task == 0, must be physical - - memory_object_t pager; - unsigned int flags = 0; - addr64_t pa; - IOPhysicalLength segLen; - - pa = getPhysicalSegment( offset, &segLen, kIOMemoryMapperNone ); - - if( !getKernelReserved()) - continue; - reserved->dp.pagerContig = (1 == _rangesCount); - reserved->dp.memory = this; - - /*What cache mode do we need*/ - switch(options & kIOMapCacheMask ) { - - case kIOMapDefaultCache: - default: - flags = IODefaultCacheBits(pa); - if (DEVICE_PAGER_CACHE_INHIB & flags) - { - if (DEVICE_PAGER_GUARDED & flags) - mapping->fOptions |= kIOMapInhibitCache; - else - mapping->fOptions |= kIOMapWriteCombineCache; - } - else if (DEVICE_PAGER_WRITE_THROUGH & flags) - mapping->fOptions |= kIOMapWriteThruCache; - else - mapping->fOptions |= kIOMapCopybackCache; - break; - - case kIOMapInhibitCache: - flags = DEVICE_PAGER_CACHE_INHIB | - DEVICE_PAGER_COHERENT | DEVICE_PAGER_GUARDED; - break; - - case kIOMapWriteThruCache: - flags = DEVICE_PAGER_WRITE_THROUGH | - DEVICE_PAGER_COHERENT | DEVICE_PAGER_GUARDED; - break; + } + err = memoryReferenceCreate(createOptions, &_memRef); + if (kIOReturnSuccess != err) return (err); + } - case kIOMapCopybackCache: - flags = DEVICE_PAGER_COHERENT; - break; + memory_object_t pager; + pager = (memory_object_t) (reserved ? reserved->dp.devicePager : 0); - case kIOMapWriteCombineCache: - flags = DEVICE_PAGER_CACHE_INHIB | - DEVICE_PAGER_COHERENT; - break; - } - - flags |= reserved->dp.pagerContig ? DEVICE_PAGER_CONTIGUOUS : 0; + // count)) + { + err = kIOReturnNotReadable; + break; + } - if( pager) { - kr = mach_memory_object_memory_entry_64( (host_t) 1, false /*internal*/, - size, VM_PROT_READ | VM_PROT_WRITE, pager, &sharedMem ); + size = round_page(mapping->fLength); + flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL + | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS; - assert( KERN_SUCCESS == kr ); - if( KERN_SUCCESS != kr) - { - device_pager_deallocate( pager ); - pager = MACH_PORT_NULL; - sharedMem = MACH_PORT_NULL; - } - } - if( pager && sharedMem) - reserved->dp.devicePager = pager; + if (KERN_SUCCESS != memory_object_iopl_request(_memRef->entries[0].entry, 0, &size, &redirUPL2, + NULL, NULL, + &flags)) + redirUPL2 = NULL; - } while( false ); + for (lock_count = 0; + IORecursiveLockHaveLock(gIOMemoryLock); + lock_count++) { + UNLOCK; + } + err = upl_transpose(redirUPL2, mapping->fRedirUPL); + for (; + lock_count; + lock_count--) { + LOCK; + } - _memEntry = (void *) sharedMem; - } + if (kIOReturnSuccess != err) + { + IOLog("upl_transpose(%x)\n", err); + err = kIOReturnSuccess; + } - IOReturn result; - if (0 == sharedMem) - result = kr; + if (redirUPL2) + { + upl_commit(redirUPL2, NULL, 0); + upl_deallocate(redirUPL2); + redirUPL2 = 0; + } + { + // swap the memEntries since they now refer to different vm_objects + IOMemoryReference * me = _memRef; + _memRef = mapping->fMemory->_memRef; + mapping->fMemory->_memRef = me; + } + if (pager) + err = populateDevicePager( pager, mapping->fAddressMap, mapping->fAddress, offset, length, options ); + } + while (false); + } + // upl_transpose> // else - result = super::doMap( __addressMap, __address, - options, __offset, __length ); + { + err = memoryReferenceMap(_memRef, mapping->fAddressMap, offset, length, options, &mapping->fAddress); - return( result ); + if ((err == KERN_SUCCESS) && pager) + { + err = populateDevicePager(pager, mapping->fAddressMap, mapping->fAddress, offset, length, options); + if (err != KERN_SUCCESS) + { + doUnmap(mapping->fAddressMap, (IOVirtualAddress) mapping, 0); + } + else if (kIOMapDefaultCache == (options & kIOMapCacheMask)) + { + mapping->fOptions |= ((_flags & kIOMemoryBufferCacheMask) >> kIOMemoryBufferCacheShift); + } + } + } + + return (err); } IOReturn IOGeneralMemoryDescriptor::doUnmap( @@ -2944,168 +3521,6 @@ bool IOMemoryMap::setMemoryDescriptor(IOMemoryDescriptor * _memory, mach_vm_size return( true ); } -struct IOMemoryDescriptorMapAllocRef -{ - ipc_port_t sharedMem; - vm_map_t map; - mach_vm_address_t mapped; - mach_vm_size_t size; - mach_vm_size_t sourceOffset; - IOOptionBits options; -}; - -static kern_return_t IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref) -{ - IOMemoryDescriptorMapAllocRef * ref = (IOMemoryDescriptorMapAllocRef *)_ref; - IOReturn err; - - do { - if( ref->sharedMem) - { - vm_prot_t prot = VM_PROT_READ - | ((ref->options & kIOMapReadOnly) ? 0 : VM_PROT_WRITE); - - // VM system requires write access to change cache mode - if (kIOMapDefaultCache != (ref->options & kIOMapCacheMask)) - prot |= VM_PROT_WRITE; - - // set memory entry cache - vm_prot_t memEntryCacheMode = prot | MAP_MEM_ONLY; - switch (ref->options & kIOMapCacheMask) - { - case kIOMapInhibitCache: - SET_MAP_MEM(MAP_MEM_IO, memEntryCacheMode); - break; - - case kIOMapWriteThruCache: - SET_MAP_MEM(MAP_MEM_WTHRU, memEntryCacheMode); - break; - - case kIOMapWriteCombineCache: - SET_MAP_MEM(MAP_MEM_WCOMB, memEntryCacheMode); - break; - - case kIOMapCopybackCache: - SET_MAP_MEM(MAP_MEM_COPYBACK, memEntryCacheMode); - break; - - case kIOMapCopybackInnerCache: - SET_MAP_MEM(MAP_MEM_INNERWBACK, memEntryCacheMode); - break; - - case kIOMapDefaultCache: - default: - SET_MAP_MEM(MAP_MEM_NOOP, memEntryCacheMode); - break; - } - - vm_size_t unused = 0; - - err = mach_make_memory_entry( NULL /*unused*/, &unused, 0 /*unused*/, - memEntryCacheMode, NULL, ref->sharedMem ); - if (KERN_SUCCESS != err) - IOLog("MAP_MEM_ONLY failed %d\n", err); - - err = mach_vm_map( map, - &ref->mapped, - ref->size, 0 /* mask */, - (( ref->options & kIOMapAnywhere ) ? VM_FLAGS_ANYWHERE : VM_FLAGS_FIXED) - | VM_MAKE_TAG(VM_MEMORY_IOKIT), - ref->sharedMem, ref->sourceOffset, - false, // copy - prot, // cur - prot, // max - VM_INHERIT_NONE); - - if( KERN_SUCCESS != err) { - ref->mapped = 0; - continue; - } - ref->map = map; - } - else - { - err = mach_vm_allocate(map, &ref->mapped, ref->size, - ((ref->options & kIOMapAnywhere) ? VM_FLAGS_ANYWHERE : VM_FLAGS_FIXED) - | VM_MAKE_TAG(VM_MEMORY_IOKIT) ); - if( KERN_SUCCESS != err) { - ref->mapped = 0; - continue; - } - ref->map = map; - // we have to make sure that these guys don't get copied if we fork. - err = vm_inherit(map, ref->mapped, ref->size, VM_INHERIT_NONE); - assert( KERN_SUCCESS == err ); - } - } - while( false ); - - return( err ); -} - -kern_return_t -IOMemoryDescriptorMapMemEntry(vm_map_t * map, ipc_port_t entry, IOOptionBits options, bool pageable, - mach_vm_size_t offset, - mach_vm_address_t * address, mach_vm_size_t length) -{ - IOReturn err; - IOMemoryDescriptorMapAllocRef ref; - - ref.map = *map; - ref.sharedMem = entry; - ref.sourceOffset = trunc_page_64(offset); - ref.options = options; - ref.size = length; - - if (options & kIOMapAnywhere) - // vm_map looks for addresses above here, even when VM_FLAGS_ANYWHERE - ref.mapped = 0; - else - ref.mapped = *address; - - if( ref.sharedMem && (ref.map == kernel_map) && pageable) - err = IOIteratePageableMaps( ref.size, &IOMemoryDescriptorMapAlloc, &ref ); - else - err = IOMemoryDescriptorMapAlloc( ref.map, &ref ); - - *address = ref.mapped; - *map = ref.map; - - return (err); -} - -kern_return_t -IOMemoryDescriptorMapCopy(vm_map_t * map, - IOOptionBits options, - mach_vm_size_t offset, - mach_vm_address_t * address, mach_vm_size_t length) -{ - IOReturn err; - IOMemoryDescriptorMapAllocRef ref; - - ref.map = *map; - ref.sharedMem = NULL; - ref.sourceOffset = trunc_page_64(offset); - ref.options = options; - ref.size = length; - - if (options & kIOMapAnywhere) - // vm_map looks for addresses above here, even when VM_FLAGS_ANYWHERE - ref.mapped = 0; - else - ref.mapped = *address; - - if (ref.map == kernel_map) - err = IOIteratePageableMaps(ref.size, &IOMemoryDescriptorMapAlloc, &ref); - else - err = IOMemoryDescriptorMapAlloc(ref.map, &ref); - - *address = ref.mapped; - *map = ref.map; - - return (err); -} - IOReturn IOMemoryDescriptor::doMap( vm_map_t __addressMap, IOVirtualAddress * __address, @@ -3113,130 +3528,27 @@ IOReturn IOMemoryDescriptor::doMap( IOByteCount __offset, IOByteCount __length ) { -#ifndef __LP64__ - if (!(kIOMap64Bit & options)) panic("IOMemoryDescriptor::doMap !64bit"); -#endif /* !__LP64__ */ - - IOMemoryMap * mapping = (IOMemoryMap *) *__address; - mach_vm_size_t offset = mapping->fOffset + __offset; - mach_vm_size_t length = mapping->fLength; - - IOReturn err = kIOReturnSuccess; - memory_object_t pager; - mach_vm_size_t pageOffset; - IOPhysicalAddress sourceAddr; - unsigned int lock_count; + return (kIOReturnUnsupported); +} - do +IOReturn IOMemoryDescriptor::handleFault( + void * _pager, + mach_vm_size_t sourceOffset, + mach_vm_size_t length) +{ + if( kIOMemoryRedirected & _flags) { - sourceAddr = getPhysicalSegment( offset, NULL, _kIOMemorySourceSegment ); - pageOffset = sourceAddr - trunc_page( sourceAddr ); - - if( reserved) - pager = (memory_object_t) reserved->dp.devicePager; - else - pager = MACH_PORT_NULL; - - if ((kIOMapReference|kIOMapUnique) == ((kIOMapReference|kIOMapUnique) & options)) - { - upl_t redirUPL2; - vm_size_t size; - int flags; - - if (!_memEntry) - { - err = kIOReturnNotReadable; - continue; - } - - size = round_page(mapping->fLength + pageOffset); - flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL - | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS; - - if (KERN_SUCCESS != memory_object_iopl_request((ipc_port_t) _memEntry, 0, &size, &redirUPL2, - NULL, NULL, - &flags)) - redirUPL2 = NULL; - - for (lock_count = 0; - IORecursiveLockHaveLock(gIOMemoryLock); - lock_count++) { - UNLOCK; - } - err = upl_transpose(redirUPL2, mapping->fRedirUPL); - for (; - lock_count; - lock_count--) { - LOCK; - } - - if (kIOReturnSuccess != err) - { - IOLog("upl_transpose(%x)\n", err); - err = kIOReturnSuccess; - } - - if (redirUPL2) - { - upl_commit(redirUPL2, NULL, 0); - upl_deallocate(redirUPL2); - redirUPL2 = 0; - } - { - // swap the memEntries since they now refer to different vm_objects - void * me = _memEntry; - _memEntry = mapping->fMemory->_memEntry; - mapping->fMemory->_memEntry = me; - } - if (pager) - err = handleFault( pager, mapping->fAddressMap, mapping->fAddress, offset, length, options ); - } - else - { - mach_vm_address_t address; - - if (!(options & kIOMapAnywhere)) - { - address = trunc_page_64(mapping->fAddress); - if( (mapping->fAddress - address) != pageOffset) - { - err = kIOReturnVMError; - continue; - } - } - - vm_map_t map = mapping->fAddressMap; - err = IOMemoryDescriptorMapMemEntry(&map, (ipc_port_t) _memEntry, - options, (kIOMemoryBufferPageable & _flags), - offset, &address, round_page_64(length + pageOffset)); - if( err != KERN_SUCCESS) - continue; - - if (!_memEntry || pager) - { - err = handleFault( pager, mapping->fAddressMap, address, offset, length, options ); - if (err != KERN_SUCCESS) - doUnmap( mapping->fAddressMap, (IOVirtualAddress) mapping, 0 ); - } - #if DEBUG - if (kIOLogMapping & gIOKitDebug) - IOLog("mapping(%x) desc %p @ %qx, map %p, address %qx, offset %qx, length %qx\n", - err, this, (uint64_t)sourceAddr, mapping, address, offset, length); + IOLog("sleep mem redirect %p, %qx\n", this, sourceOffset); #endif - - if (err == KERN_SUCCESS) - mapping->fAddress = address + pageOffset; - else - mapping->fAddress = NULL; - } + do { + SLEEP; + } while( kIOMemoryRedirected & _flags ); } - while( false ); - - return (err); + return (kIOReturnSuccess); } -IOReturn IOMemoryDescriptor::handleFault( +IOReturn IOMemoryDescriptor::populateDevicePager( void * _pager, vm_map_t addressMap, mach_vm_address_t address, @@ -3254,21 +3566,6 @@ IOReturn IOMemoryDescriptor::handleFault( IOPhysicalLength segLen; addr64_t physAddr; - if( !addressMap) - { - if( kIOMemoryRedirected & _flags) - { -#if DEBUG - IOLog("sleep mem redirect %p, %qx\n", this, sourceOffset); -#endif - do { - SLEEP; - } while( kIOMemoryRedirected & _flags ); - } - - return( kIOReturnSuccess ); - } - physAddr = getPhysicalSegment( sourceOffset, &segLen, kIOMemoryMapperNone ); assert( physAddr ); pageOffset = physAddr - trunc_page_64( physAddr ); @@ -3282,72 +3579,51 @@ IOReturn IOMemoryDescriptor::handleFault( do { // in the middle of the loop only map whole pages - if( segLen >= bytes) - segLen = bytes; - else if( segLen != trunc_page( segLen)) - err = kIOReturnVMError; - if( physAddr != trunc_page_64( physAddr)) - err = kIOReturnBadArgument; - if (kIOReturnSuccess != err) - break; + if( segLen >= bytes) segLen = bytes; + else if (segLen != trunc_page(segLen)) err = kIOReturnVMError; + if (physAddr != trunc_page_64(physAddr)) err = kIOReturnBadArgument; -#if DEBUG - if( kIOLogMapping & gIOKitDebug) - IOLog("IOMemoryMap::map(%p) 0x%qx->0x%qx:0x%qx\n", - addressMap, address + pageOffset, physAddr + pageOffset, - segLen - pageOffset); -#endif + if (kIOReturnSuccess != err) break; + if (reserved && reserved->dp.pagerContig) + { + IOPhysicalLength allLen; + addr64_t allPhys; - if( pager) { - if( reserved && reserved->dp.pagerContig) { - IOPhysicalLength allLen; - addr64_t allPhys; - - allPhys = getPhysicalSegment( 0, &allLen, kIOMemoryMapperNone ); - assert( allPhys ); - err = device_pager_populate_object( pager, 0, atop_64(allPhys), round_page(allLen) ); - } - else + allPhys = getPhysicalSegment( 0, &allLen, kIOMemoryMapperNone ); + assert( allPhys ); + err = device_pager_populate_object( pager, 0, atop_64(allPhys), round_page(allLen) ); + } + else + { + for( page = 0; + (page < segLen) && (KERN_SUCCESS == err); + page += page_size) { - - for( page = 0; - (page < segLen) && (KERN_SUCCESS == err); - page += page_size) - { - err = device_pager_populate_object(pager, pagerOffset, - (ppnum_t)(atop_64(physAddr + page)), page_size); - pagerOffset += page_size; - } - } - assert( KERN_SUCCESS == err ); - if( err) - break; - } + err = device_pager_populate_object(pager, pagerOffset, + (ppnum_t)(atop_64(physAddr + page)), page_size); + pagerOffset += page_size; + } + } + assert (KERN_SUCCESS == err); + if (err) break; // This call to vm_fault causes an early pmap level resolution // of the mappings created above for kernel mappings, since // faulting in later can't take place from interrupt level. - /* *** ALERT *** */ - /* *** Temporary Workaround *** */ - if ((addressMap == kernel_map) && !(kIOMemoryRedirected & _flags)) { - vm_fault(addressMap, - (vm_map_offset_t)address, - VM_PROT_READ|VM_PROT_WRITE, - FALSE, THREAD_UNINT, NULL, - (vm_map_offset_t)0); + vm_fault(addressMap, + (vm_map_offset_t)trunc_page_64(address), + VM_PROT_READ|VM_PROT_WRITE, + FALSE, THREAD_UNINT, NULL, + (vm_map_offset_t)0); } - /* *** Temporary Workaround *** */ - /* *** ALERT *** */ - sourceOffset += segLen - pageOffset; address += segLen; bytes -= segLen; pageOffset = 0; - } while (bytes && (physAddr = getPhysicalSegment( sourceOffset, &segLen, kIOMemoryMapperNone ))); @@ -3378,8 +3654,7 @@ IOReturn IOMemoryDescriptor::doUnmap( length = ((IOMemoryMap *) __address)->fLength; } - if ((addressMap == kernel_map) - && ((kIOMemoryBufferPageable & _flags) || !_memEntry)) + if ((addressMap == kernel_map) && (kIOMemoryBufferPageable & _flags)) addressMap = IOPageableMapForAddress( address ); #if DEBUG @@ -3421,7 +3696,7 @@ IOReturn IOMemoryDescriptor::redirect( task_t safeTask, bool doRedirect ) mapping->redirect( safeTask, doRedirect ); if (!doRedirect && !safeTask && pager && (kernel_map == mapping->fAddressMap)) { - err = handleFault( pager, mapping->fAddressMap, mapping->fAddress, mapping->fOffset, mapping->fLength, kIOMapDefaultCache ); + err = populateDevicePager(pager, mapping->fAddressMap, mapping->fAddress, mapping->fOffset, mapping->fLength, kIOMapDefaultCache ); } } @@ -3503,8 +3778,6 @@ IOReturn IOMemoryMap::unmap( void ) if( fAddress && fAddressMap && (0 == fSuperMap) && fMemory && (0 == (fOptions & kIOMapStatic))) { - vm_map_iokit_unmapped_region(fAddressMap, fLength); - err = fMemory->doUnmap(fAddressMap, (IOVirtualAddress) this, 0); } else @@ -3844,12 +4117,12 @@ IOReturn IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, physMem->retain(); } - if (!fRedirUPL) + if (!fRedirUPL && fMemory->_memRef && (1 == fMemory->_memRef->count)) { vm_size_t size = round_page(fLength); int flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS; - if (KERN_SUCCESS != memory_object_iopl_request((ipc_port_t) fMemory->_memEntry, 0, &size, &fRedirUPL, + if (KERN_SUCCESS != memory_object_iopl_request(fMemory->_memRef->entries[0].entry, 0, &size, &fRedirUPL, NULL, NULL, &flags)) fRedirUPL = 0; @@ -3857,7 +4130,7 @@ IOReturn IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, if (physMem) { IOUnmapPages( fAddressMap, fAddress, fLength ); - if (false) + if ((false)) physMem->redirect(0, true); } } @@ -3878,7 +4151,7 @@ IOReturn IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, upl_deallocate(fRedirUPL); fRedirUPL = 0; } - if (false && physMem) + if ((false) && physMem) physMem->redirect(0, false); } } @@ -3985,10 +4258,6 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping( kr = mapDesc->doMap( 0, (IOVirtualAddress *) &mapping, options, 0, 0 ); if (kIOReturnSuccess == kr) { - if (0 == (mapping->fOptions & kIOMapStatic)) { - vm_map_iokit_mapped_region(mapping->fAddressMap, length); - } - result = mapping; mapDesc->addMapping(result); result->setMemoryDescriptor(mapDesc, offset); @@ -4090,6 +4359,8 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const { OSSymbol const *keys[2]; OSObject *values[2]; + OSArray * array; + struct SerData { user_addr_t address; user_size_t length; @@ -4100,10 +4371,9 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const IOOptionBits type = _flags & kIOMemoryTypeMask; if (s == NULL) return false; - if (s->previouslySerialized(this)) return true; - // Pretend we are an array. - if (!s->addXMLStartTag(this, "array")) return false; + array = OSArray::withCapacity(4); + if (!array) return (false); nRanges = _rangesCount; vcopy = (SerData *) IOMalloc(sizeof(SerData) * nRanges); @@ -4123,7 +4393,7 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const if (nRanges == _rangesCount) { Ranges vec = _ranges; for (index = 0; index < nRanges; index++) { - user_addr_t addr; IOByteCount len; + mach_vm_address_t addr; mach_vm_size_t len; getAddrLenForInd(addr, len, type, vec, index); vcopy[index].address = addr; vcopy[index].length = len; @@ -4140,8 +4410,7 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const { user_addr_t addr = vcopy[index].address; IOByteCount len = (IOByteCount) vcopy[index].length; - values[0] = - OSNumber::withNumber(addr, sizeof(addr) * 8); + values[0] = OSNumber::withNumber(addr, sizeof(addr) * 8); if (values[0] == 0) { result = false; goto bail; @@ -4156,19 +4425,18 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const result = false; goto bail; } + array->setObject(dict); + dict->release(); values[0]->release(); values[1]->release(); values[0] = values[1] = 0; - - result = dict->serialize(s); - dict->release(); - if (!result) { - goto bail; - } } - result = s->addXMLEndTag("array"); + + result = array->serialize(s); bail: + if (array) + array->release(); if (values[0]) values[0]->release(); if (values[1]) @@ -4179,11 +4447,150 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const keys[1]->release(); if (vcopy) IOFree(vcopy, sizeof(SerData) * nRanges); + return result; } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#if DEVELOPMENT || DEBUG + +extern "C" void IOMemoryDescriptorTest(int x) +{ + IOGeneralMemoryDescriptor * md; + + vm_offset_t data[2]; + vm_size_t bsize = 16*1024*1024; + + vm_size_t srcsize, srcoffset, mapoffset, size; + + kern_return_t kr; + + kr = vm_allocate(kernel_map, &data[0], bsize, VM_FLAGS_ANYWHERE); + vm_inherit(kernel_map, data[0] + 1*4096, 4096, VM_INHERIT_NONE); + vm_inherit(kernel_map, data[0] + 16*4096, 4096, VM_INHERIT_NONE); + + kprintf("data 0x%lx, 0x%lx\n", (long)data[0], (long)data[1]); + + uint32_t idx, offidx; + for (idx = 0; idx < (bsize / sizeof(uint32_t)); idx++) + { + ((uint32_t*)data[0])[idx] = idx; + } + + for (srcoffset = 0; srcoffset < bsize; srcoffset = ((srcoffset << 1) + 0x40c)) + { + for (srcsize = 4; srcsize < (bsize - srcoffset - 1); srcsize = ((srcsize << 1) + 0x3fc)) + { + IOAddressRange ranges[3]; + uint32_t rangeCount = 1; + + bzero(&ranges[0], sizeof(ranges)); + ranges[0].address = data[0] + srcoffset; + ranges[0].length = srcsize; + + if (srcsize > 5*page_size) + { + ranges[0].length = 7634; + ranges[1].length = 9870; + ranges[2].length = srcsize - ranges[0].length - ranges[1].length; + ranges[1].address = ranges[0].address + ranges[0].length; + ranges[2].address = ranges[1].address + ranges[1].length; + rangeCount = 3; + } + else if ((srcsize > 2*page_size) && !(page_mask & srcoffset)) + { + ranges[0].length = 4096; + ranges[1].length = 4096; + ranges[2].length = srcsize - ranges[0].length - ranges[1].length; + ranges[0].address = data[0] + srcoffset + 4096; + ranges[1].address = data[0] + srcoffset; + ranges[2].address = ranges[0].address + ranges[0].length; + rangeCount = 3; + } + + md = OSDynamicCast(IOGeneralMemoryDescriptor, + IOMemoryDescriptor::withAddressRanges(&ranges[0], rangeCount, kIODirectionInOut, kernel_task)); + assert(md); + + kprintf("IOMemoryReferenceCreate [0x%lx @ 0x%lx]\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx]\n", + (long) srcsize, (long) srcoffset, + (long long) ranges[0].address - data[0], (long long) ranges[0].length, + (long long) ranges[1].address - data[0], (long long) ranges[1].length, + (long long) ranges[2].address - data[0], (long long) ranges[2].length); + + if (kIOReturnSuccess == kr) + { + for (mapoffset = 0; mapoffset < srcsize; mapoffset = ((mapoffset << 1) + 0xf00)) + { + for (size = 4; size < (srcsize - mapoffset - 1); size = ((size << 1) + 0x20)) + { + IOMemoryMap * map; + mach_vm_address_t addr = 0; + uint32_t data; + + kprintf("createMappingInTask(kernel_task, 0, kIOMapAnywhere, mapoffset, size); + if (map) addr = map->getAddress(); + else kr = kIOReturnError; + + kprintf(">mapRef 0x%x %llx\n", kr, addr); + + if (kIOReturnSuccess != kr) break; + kr = md->prepare(); + if (kIOReturnSuccess != kr) + { + kprintf("prepare() fail 0x%x\n", kr); + break; + } + for (idx = 0; idx < size; idx += sizeof(uint32_t)) + { + offidx = (idx + mapoffset + srcoffset); + if ((srcsize <= 5*page_size) && (srcsize > 2*page_size) && !(page_mask & srcoffset)) + { + if (offidx < 8192) offidx ^= 0x1000; + } + offidx /= sizeof(uint32_t); + + if (offidx != ((uint32_t*)addr)[idx/sizeof(uint32_t)]) + { + kprintf("vm mismatch @ 0x%x, 0x%lx, 0x%lx, \n", idx, (long) srcoffset, (long) mapoffset); + kr = kIOReturnBadMedia; + } + else + { + if (sizeof(data) != md->readBytes(mapoffset + idx, &data, sizeof(data))) data = 0; + if (offidx != data) + { + kprintf("phys mismatch @ 0x%x, 0x%lx, 0x%lx, \n", idx, (long) srcoffset, (long) mapoffset); + kr = kIOReturnBadMedia; + } + } + } + md->complete(); + map->release(); + kprintf("unmapRef %llx\n", addr); + } + if (kIOReturnSuccess != kr) break; + } + } + if (kIOReturnSuccess != kr) break; + } + if (kIOReturnSuccess != kr) break; + } + + if (kIOReturnSuccess != kr) kprintf("FAIL: src 0x%lx @ 0x%lx, map 0x%lx @ 0x%lx\n", + (long) srcsize, (long) srcoffset, (long) size, (long) mapoffset); + + vm_deallocate(kernel_map, data[0], bsize); +// vm_deallocate(kernel_map, data[1], size); +} + +#endif /* DEVELOPMENT || DEBUG */ + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 0); #ifdef __LP64__ OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 1); diff --git a/iokit/Kernel/IONVRAM.cpp b/iokit/Kernel/IONVRAM.cpp index 7b11cb19c..5af96b290 100644 --- a/iokit/Kernel/IONVRAM.cpp +++ b/iokit/Kernel/IONVRAM.cpp @@ -122,73 +122,54 @@ void IODTNVRAM::initNVRAMImage(void) // Find the offsets for the OF, XPRAM, NameRegistry and PanicInfo partitions. _ofPartitionOffset = 0xFFFFFFFF; - _xpramPartitionOffset = 0xFFFFFFFF; - _nrPartitionOffset = 0xFFFFFFFF; _piPartitionOffset = 0xFFFFFFFF; freePartitionOffset = 0xFFFFFFFF; freePartitionSize = 0; - if (getPlatform()->getBootROMType()) { - // Look through the partitions to find the OF, MacOS partitions. - while (currentOffset < kIODTNVRAMImageSize) { - currentLength = ((UInt16 *)(_nvramImage + currentOffset))[1] * 16; + + // Look through the partitions to find the OF, MacOS partitions. + while (currentOffset < kIODTNVRAMImageSize) { + currentLength = ((UInt16 *)(_nvramImage + currentOffset))[1] * 16; + + partitionOffset = currentOffset + 16; + partitionLength = currentLength - 16; + + if (strncmp((const char *)_nvramImage + currentOffset + 4, + kIODTNVRAMOFPartitionName, 12) == 0) { + _ofPartitionOffset = partitionOffset; + _ofPartitionSize = partitionLength; + } else if (strncmp((const char *)_nvramImage + currentOffset + 4, + kIODTNVRAMXPRAMPartitionName, 12) == 0) { + } else if (strncmp((const char *)_nvramImage + currentOffset + 4, + kIODTNVRAMPanicInfoPartitonName, 12) == 0) { + _piPartitionOffset = partitionOffset; + _piPartitionSize = partitionLength; + } else if (strncmp((const char *)_nvramImage + currentOffset + 4, + kIODTNVRAMFreePartitionName, 12) == 0) { + freePartitionOffset = currentOffset; + freePartitionSize = currentLength; + } else { + // Construct the partition ID from the signature and name. + snprintf(partitionID, sizeof(partitionID), "0x%02x,", + *(UInt8 *)(_nvramImage + currentOffset)); + strncpy(partitionID + 5, + (const char *)(_nvramImage + currentOffset + 4), 12); + partitionID[17] = '\0'; - partitionOffset = currentOffset + 16; - partitionLength = currentLength - 16; + partitionOffsetNumber = OSNumber::withNumber(partitionOffset, 32); + partitionLengthNumber = OSNumber::withNumber(partitionLength, 32); - if (strncmp((const char *)_nvramImage + currentOffset + 4, - kIODTNVRAMOFPartitionName, 12) == 0) { - _ofPartitionOffset = partitionOffset; - _ofPartitionSize = partitionLength; - } else if (strncmp((const char *)_nvramImage + currentOffset + 4, - kIODTNVRAMXPRAMPartitionName, 12) == 0) { - _xpramPartitionOffset = partitionOffset; - _xpramPartitionSize = kIODTNVRAMXPRAMSize; - _nrPartitionOffset = _xpramPartitionOffset + _xpramPartitionSize; - _nrPartitionSize = partitionLength - _xpramPartitionSize; - } else if (strncmp((const char *)_nvramImage + currentOffset + 4, - kIODTNVRAMPanicInfoPartitonName, 12) == 0) { - _piPartitionOffset = partitionOffset; - _piPartitionSize = partitionLength; - } else if (strncmp((const char *)_nvramImage + currentOffset + 4, - kIODTNVRAMFreePartitionName, 12) == 0) { - freePartitionOffset = currentOffset; - freePartitionSize = currentLength; - } else { - // Construct the partition ID from the signature and name. - snprintf(partitionID, sizeof(partitionID), "0x%02x,", - *(UInt8 *)(_nvramImage + currentOffset)); - strncpy(partitionID + 5, - (const char *)(_nvramImage + currentOffset + 4), 12); - partitionID[17] = '\0'; - - partitionOffsetNumber = OSNumber::withNumber(partitionOffset, 32); - partitionLengthNumber = OSNumber::withNumber(partitionLength, 32); - - // Save the partition offset and length - _nvramPartitionOffsets->setObject(partitionID, partitionOffsetNumber); - _nvramPartitionLengths->setObject(partitionID, partitionLengthNumber); - - partitionOffsetNumber->release(); - partitionLengthNumber->release(); - } - currentOffset += currentLength; + // Save the partition offset and length + _nvramPartitionOffsets->setObject(partitionID, partitionOffsetNumber); + _nvramPartitionLengths->setObject(partitionID, partitionLengthNumber); + + partitionOffsetNumber->release(); + partitionLengthNumber->release(); } - } else { - // Use the fixed address for old world machines. - _ofPartitionOffset = 0x1800; - _ofPartitionSize = 0x0800; - _xpramPartitionOffset = 0x1300; - _xpramPartitionSize = 0x0100; - _nrPartitionOffset = 0x1400; - _nrPartitionSize = 0x0400; + currentOffset += currentLength; } if (_ofPartitionOffset != 0xFFFFFFFF) _ofImage = _nvramImage + _ofPartitionOffset; - if (_xpramPartitionOffset != 0xFFFFFFFF) - _xpramImage = _nvramImage + _xpramPartitionOffset; - if (_nrPartitionOffset != 0xFFFFFFFF) - _nrImage = _nvramImage + _nrPartitionOffset; if (_piPartitionOffset == 0xFFFFFFFF) { if (freePartitionSize > 0x20) { @@ -277,14 +258,18 @@ bool IODTNVRAM::serializeProperties(OSSerialize *s) const // Verify permissions. hasPrivilege = (kIOReturnSuccess == IOUserClient::clientHasPrivilege(current_task(), kIONVRAMPrivilege)); - dict = OSDictionary::withCapacity(1); - if (dict == 0) return false; - if (_ofDict == 0) { /* No nvram. Return an empty dictionary. */ + dict = OSDictionary::withCapacity(1); + if (dict == 0) return false; } else { + IOLockLock(_ofLock); + dict = OSDictionary::withDictionary(_ofDict); + IOLockUnlock(_ofLock); + if (dict == 0) return false; + /* Copy properties with client privilege. */ - iter = OSCollectionIterator::withCollection(_ofDict); + iter = OSCollectionIterator::withCollection(dict); if (iter == 0) { dict->release(); return false; @@ -295,9 +280,8 @@ bool IODTNVRAM::serializeProperties(OSSerialize *s) const variablePerm = getOFVariablePerm(key); if ((hasPrivilege || (variablePerm != kOFVariablePermRootOnly)) && - ( ! (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) )) { - dict->setObject(key, _ofDict->getObject(key)); - } + ( ! (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) )) {} + else dict->removeObject(key); } } @@ -309,10 +293,11 @@ bool IODTNVRAM::serializeProperties(OSSerialize *s) const return result; } -OSObject *IODTNVRAM::getProperty(const OSSymbol *aKey) const +OSObject *IODTNVRAM::copyProperty(const OSSymbol *aKey) const { IOReturn result; UInt32 variablePerm; + OSObject *theObject; if (_ofDict == 0) return 0; @@ -323,24 +308,49 @@ OSObject *IODTNVRAM::getProperty(const OSSymbol *aKey) const if (variablePerm == kOFVariablePermRootOnly) return 0; } if (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) return 0; - - return _ofDict->getObject(aKey); + + IOLockLock(_ofLock); + theObject = _ofDict->getObject(aKey); + if (theObject) theObject->retain(); + IOLockUnlock(_ofLock); + + return theObject; } -OSObject *IODTNVRAM::getProperty(const char *aKey) const +OSObject *IODTNVRAM::copyProperty(const char *aKey) const { const OSSymbol *keySymbol; OSObject *theObject = 0; - keySymbol = OSSymbol::withCStringNoCopy(aKey); + keySymbol = OSSymbol::withCString(aKey); if (keySymbol != 0) { - theObject = getProperty(keySymbol); + theObject = copyProperty(keySymbol); keySymbol->release(); } return theObject; } +OSObject *IODTNVRAM::getProperty(const OSSymbol *aKey) const +{ + OSObject *theObject; + + theObject = copyProperty(aKey); + if (theObject) theObject->release(); + + return theObject; +} + +OSObject *IODTNVRAM::getProperty(const char *aKey) const +{ + OSObject *theObject; + + theObject = copyProperty(aKey); + if (theObject) theObject->release(); + + return theObject; +} + bool IODTNVRAM::setProperty(const OSSymbol *aKey, OSObject *anObject) { bool result; @@ -358,11 +368,6 @@ bool IODTNVRAM::setProperty(const OSSymbol *aKey, OSObject *anObject) } if (propPerm == kOFVariablePermKernelOnly && current_task() != kernel_task) return 0; - // Don't allow creation of new properties on old world machines. - if (getPlatform()->getBootROMType() == 0) { - if (_ofDict->getObject(aKey) == 0) return false; - } - // Don't allow change of 'aapl,panic-info'. if (aKey->isEqualTo(kIODTNVRAMPanicInfoKey)) return false; @@ -394,14 +399,12 @@ bool IODTNVRAM::setProperty(const OSSymbol *aKey, OSObject *anObject) } if (propObject == 0) return false; - + + IOLockLock(_ofLock); result = _ofDict->setObject(aKey, propObject); + IOLockUnlock(_ofLock); if (result) { - if (getPlatform()->getBootROMType() == 0) { - updateOWBootArgs(aKey, propObject); - } - _ofImageDirty = true; } @@ -423,19 +426,18 @@ void IODTNVRAM::removeProperty(const OSSymbol *aKey) } if (propPerm == kOFVariablePermKernelOnly && current_task() != kernel_task) return; - // Don't allow removal of properties on old world machines. - if (getPlatform()->getBootROMType() == 0) return; - // Don't allow change of 'aapl,panic-info'. if (aKey->isEqualTo(kIODTNVRAMPanicInfoKey)) return; // If the object exists, remove it from the dictionary. + + IOLockLock(_ofLock); result = _ofDict->getObject(aKey) != 0; if (result) { _ofDict->removeObject(aKey); - _ofImageDirty = true; } + IOLockUnlock(_ofLock); } IOReturn IODTNVRAM::setProperties(OSObject *properties) @@ -498,31 +500,13 @@ IOReturn IODTNVRAM::setProperties(OSObject *properties) IOReturn IODTNVRAM::readXPRAM(IOByteCount offset, UInt8 *buffer, IOByteCount length) { - if (_xpramImage == 0) return kIOReturnUnsupported; - - if ((buffer == 0) || (length == 0) || - (offset + length > kIODTNVRAMXPRAMSize)) - return kIOReturnBadArgument; - - bcopy(_nvramImage + _xpramPartitionOffset + offset, buffer, length); - - return kIOReturnSuccess; + return kIOReturnUnsupported; } IOReturn IODTNVRAM::writeXPRAM(IOByteCount offset, UInt8 *buffer, IOByteCount length) { - if (_xpramImage == 0) return kIOReturnUnsupported; - - if ((buffer == 0) || (length == 0) || - (offset + length > kIODTNVRAMXPRAMSize)) - return kIOReturnBadArgument; - - bcopy(buffer, _nvramImage + _xpramPartitionOffset + offset, length); - - _nvramImageDirty = true; - - return kIOReturnSuccess; + return kIOReturnUnsupported; } IOReturn IODTNVRAM::readNVRAMProperty(IORegistryEntry *entry, @@ -531,10 +515,7 @@ IOReturn IODTNVRAM::readNVRAMProperty(IORegistryEntry *entry, { IOReturn err; - if (getPlatform()->getBootROMType()) - err = readNVRAMPropertyType1(entry, name, value); - else - err = readNVRAMPropertyType0(entry, name, value); + err = readNVRAMPropertyType1(entry, name, value); return err; } @@ -545,10 +526,7 @@ IOReturn IODTNVRAM::writeNVRAMProperty(IORegistryEntry *entry, { IOReturn err; - if (getPlatform()->getBootROMType()) - err = writeNVRAMPropertyType1(entry, name, value); - else - err = writeNVRAMPropertyType0(entry, name, value); + err = writeNVRAMPropertyType1(entry, name, value); return err; } @@ -655,274 +633,131 @@ UInt8 IODTNVRAM::calculatePartitionChecksum(UInt8 *partitionHeader) return csum; } -struct OWVariablesHeader { - UInt16 owMagic; - UInt8 owVersion; - UInt8 owPages; - UInt16 owChecksum; - UInt16 owHere; - UInt16 owTop; - UInt16 owNext; - UInt32 owFlags; - UInt32 owNumbers[9]; - struct { - UInt16 offset; - UInt16 length; - } owStrings[10]; -}; -typedef struct OWVariablesHeader OWVariablesHeader; - IOReturn IODTNVRAM::initOFVariables(void) { - UInt32 cnt, propOffset, propType; + UInt32 cnt; UInt8 *propName, *propData; UInt32 propNameLength, propDataLength; const OSSymbol *propSymbol; OSObject *propObject; - OWVariablesHeader *owHeader; if (_ofImage == 0) return kIOReturnNotReady; - _ofDict = OSDictionary::withCapacity(1); - if (_ofDict == 0) return kIOReturnNoMemory; + _ofDict = OSDictionary::withCapacity(1); + _ofLock = IOLockAlloc(); + if (!_ofDict || !_ofLock) return kIOReturnNoMemory; - if (getPlatform()->getBootROMType()) { - cnt = 0; - while (cnt < _ofPartitionSize) { - // Break if there is no name. - if (_ofImage[cnt] == '\0') break; - - // Find the length of the name. - propName = _ofImage + cnt; - for (propNameLength = 0; (cnt + propNameLength) < _ofPartitionSize; - propNameLength++) { - if (_ofImage[cnt + propNameLength] == '=') break; - } - - // Break if the name goes past the end of the partition. - if ((cnt + propNameLength) >= _ofPartitionSize) break; - cnt += propNameLength + 1; - - propData = _ofImage + cnt; - for (propDataLength = 0; (cnt + propDataLength) < _ofPartitionSize; - propDataLength++) { - if (_ofImage[cnt + propDataLength] == '\0') break; - } - - // Break if the data goes past the end of the partition. - if ((cnt + propDataLength) >= _ofPartitionSize) break; - cnt += propDataLength + 1; - - if (convertPropToObject(propName, propNameLength, - propData, propDataLength, - &propSymbol, &propObject)) { - _ofDict->setObject(propSymbol, propObject); - propSymbol->release(); - propObject->release(); - } - } + cnt = 0; + while (cnt < _ofPartitionSize) { + // Break if there is no name. + if (_ofImage[cnt] == '\0') break; - // Create the boot-args property if it is not in the dictionary. - if (_ofDict->getObject("boot-args") == 0) { - propObject = OSString::withCStringNoCopy(""); - if (propObject != 0) { - _ofDict->setObject("boot-args", propObject); - propObject->release(); - } + // Find the length of the name. + propName = _ofImage + cnt; + for (propNameLength = 0; (cnt + propNameLength) < _ofPartitionSize; + propNameLength++) { + if (_ofImage[cnt + propNameLength] == '=') break; } - // Create the 'aapl,panic-info' property if needed. - if (_piImage != 0) { - propDataLength = *(UInt32 *)_piImage; - if ((propDataLength != 0) && (propDataLength <= (_piPartitionSize - 4))) { - propObject = OSData::withBytes(_piImage + 4, propDataLength); - _ofDict->setObject(kIODTNVRAMPanicInfoKey, propObject); - propObject->release(); - - // Clear the length from _piImage and mark dirty. - *(UInt32 *)_piImage = 0; - _nvramImageDirty = true; - } - } - } else { - owHeader = (OWVariablesHeader *)_ofImage; - if (!validateOWChecksum(_ofImage)) { - _ofDict->release(); - _ofDict = 0; - return kIOReturnBadMedia; + // Break if the name goes past the end of the partition. + if ((cnt + propNameLength) >= _ofPartitionSize) break; + cnt += propNameLength + 1; + + propData = _ofImage + cnt; + for (propDataLength = 0; (cnt + propDataLength) < _ofPartitionSize; + propDataLength++) { + if (_ofImage[cnt + propDataLength] == '\0') break; } - cnt = 0; - while (1) { - if (!getOWVariableInfo(cnt++, &propSymbol, &propType, &propOffset)) - break; - - switch (propType) { - case kOFVariableTypeBoolean : - propObject = OSBoolean::withBoolean(owHeader->owFlags & propOffset); - break; - - case kOFVariableTypeNumber : - propObject = OSNumber::withNumber(owHeader->owNumbers[propOffset], 32); - break; - - case kOFVariableTypeString : - propData = _ofImage + owHeader->owStrings[propOffset].offset - - _ofPartitionOffset; - propDataLength = owHeader->owStrings[propOffset].length; - propName = IONew(UInt8, propDataLength + 1); - if (propName != 0) { - strncpy((char *)propName, (const char *)propData, propDataLength); - propName[propDataLength] = '\0'; - propObject = OSString::withCString((const char *)propName); - IODelete(propName, UInt8, propDataLength + 1); - } - break; - } - - if (propObject == 0) break; - + // Break if the data goes past the end of the partition. + if ((cnt + propDataLength) >= _ofPartitionSize) break; + cnt += propDataLength + 1; + + if (convertPropToObject(propName, propNameLength, + propData, propDataLength, + &propSymbol, &propObject)) { _ofDict->setObject(propSymbol, propObject); propSymbol->release(); propObject->release(); } - - // Create the boot-args property. - propSymbol = OSSymbol::withCString("boot-command"); - if (propSymbol != 0) { - propObject = _ofDict->getObject(propSymbol); - if (propObject != 0) { - updateOWBootArgs(propSymbol, propObject); - } - propSymbol->release(); + } + + // Create the boot-args property if it is not in the dictionary. + if (_ofDict->getObject("boot-args") == 0) { + propObject = OSString::withCStringNoCopy(""); + if (propObject != 0) { + _ofDict->setObject("boot-args", propObject); + propObject->release(); } } + // Create the 'aapl,panic-info' property if needed. + if (_piImage != 0) { + propDataLength = *(UInt32 *)_piImage; + if ((propDataLength != 0) && (propDataLength <= (_piPartitionSize - 4))) { + propObject = OSData::withBytes(_piImage + 4, propDataLength); + _ofDict->setObject(kIODTNVRAMPanicInfoKey, propObject); + propObject->release(); + + // Clear the length from _piImage and mark dirty. + *(UInt32 *)_piImage = 0; + _nvramImageDirty = true; + } + } + return kIOReturnSuccess; } IOReturn IODTNVRAM::syncOFVariables(void) { bool ok; - UInt32 cnt, length, maxLength; - UInt32 curOffset, tmpOffset, tmpType, tmpDataLength; + UInt32 length, maxLength; UInt8 *buffer, *tmpBuffer; - const UInt8 *tmpData; const OSSymbol *tmpSymbol; OSObject *tmpObject; - OSBoolean *tmpBoolean; - OSNumber *tmpNumber; - OSString *tmpString; OSCollectionIterator *iter; - OWVariablesHeader *owHeader, *owHeaderOld; if ((_ofImage == 0) || (_ofDict == 0)) return kIOReturnNotReady; if (!_ofImageDirty) return kIOReturnSuccess; - if (getPlatform()->getBootROMType()) { - buffer = tmpBuffer = IONew(UInt8, _ofPartitionSize); - if (buffer == 0) return kIOReturnNoMemory; - bzero(buffer, _ofPartitionSize); - - ok = true; - maxLength = _ofPartitionSize; - - iter = OSCollectionIterator::withCollection(_ofDict); - if (iter == 0) ok = false; - - while (ok) { - tmpSymbol = OSDynamicCast(OSSymbol, iter->getNextObject()); - if (tmpSymbol == 0) break; - - // Don't save 'aapl,panic-info'. - if (tmpSymbol->isEqualTo(kIODTNVRAMPanicInfoKey)) continue; - - tmpObject = _ofDict->getObject(tmpSymbol); - - length = maxLength; - ok = convertObjectToProp(tmpBuffer, &length, tmpSymbol, tmpObject); - if (ok) { - tmpBuffer += length; - maxLength -= length; - } - } - iter->release(); - - if (ok) { - bcopy(buffer, _ofImage, _ofPartitionSize); - } - - IODelete(buffer, UInt8, _ofPartitionSize); - - if (!ok) return kIOReturnBadArgument; - } else { - buffer = IONew(UInt8, _ofPartitionSize); - if (buffer == 0) return kIOReturnNoMemory; - bzero(buffer, _ofPartitionSize); - - owHeader = (OWVariablesHeader *)buffer; - owHeaderOld = (OWVariablesHeader *)_ofImage; - - owHeader->owMagic = owHeaderOld->owMagic; - owHeader->owVersion = owHeaderOld->owVersion; - owHeader->owPages = owHeaderOld->owPages; + buffer = tmpBuffer = IONew(UInt8, _ofPartitionSize); + if (buffer == 0) return kIOReturnNoMemory; + bzero(buffer, _ofPartitionSize); + + ok = true; + maxLength = _ofPartitionSize; + + IOLockLock(_ofLock); + iter = OSCollectionIterator::withCollection(_ofDict); + if (iter == 0) ok = false; + + while (ok) { + tmpSymbol = OSDynamicCast(OSSymbol, iter->getNextObject()); + if (tmpSymbol == 0) break; - curOffset = _ofPartitionSize; + // Don't save 'aapl,panic-info'. + if (tmpSymbol->isEqualTo(kIODTNVRAMPanicInfoKey)) continue; - ok = true; - cnt = 0; - while (ok) { - if (!getOWVariableInfo(cnt++, &tmpSymbol, &tmpType, &tmpOffset)) - break; - - tmpObject = _ofDict->getObject(tmpSymbol); - - switch (tmpType) { - case kOFVariableTypeBoolean : - tmpBoolean = OSDynamicCast(OSBoolean, tmpObject); - if (tmpBoolean->getValue()) owHeader->owFlags |= tmpOffset; - break; - - case kOFVariableTypeNumber : - tmpNumber = OSDynamicCast(OSNumber, tmpObject); - owHeader->owNumbers[tmpOffset] = tmpNumber->unsigned32BitValue(); - break; - - case kOFVariableTypeString : - tmpString = OSDynamicCast(OSString, tmpObject); - tmpData = (const UInt8 *)tmpString->getCStringNoCopy(); - tmpDataLength = tmpString->getLength(); - - if ((curOffset - tmpDataLength) < sizeof(OWVariablesHeader)) { - ok = false; - break; - } - - owHeader->owStrings[tmpOffset].length = tmpDataLength; - curOffset -= tmpDataLength; - owHeader->owStrings[tmpOffset].offset = curOffset + _ofPartitionOffset; - if (tmpDataLength != 0) - bcopy(tmpData, buffer + curOffset, tmpDataLength); - break; - } - } + tmpObject = _ofDict->getObject(tmpSymbol); + length = maxLength; + ok = convertObjectToProp(tmpBuffer, &length, tmpSymbol, tmpObject); if (ok) { - owHeader->owHere = _ofPartitionOffset + sizeof(OWVariablesHeader); - owHeader->owTop = _ofPartitionOffset + curOffset; - owHeader->owNext = 0; - - owHeader->owChecksum = 0; - owHeader->owChecksum = ~generateOWChecksum(buffer); - - bcopy(buffer, _ofImage, _ofPartitionSize); + tmpBuffer += length; + maxLength -= length; } - - IODelete(buffer, UInt8, _ofPartitionSize); - - if (!ok) return kIOReturnBadArgument; } + iter->release(); + IOLockUnlock(_ofLock); + + if (ok) { + bcopy(buffer, _ofImage, _ofPartitionSize); + } + + IODelete(buffer, UInt8, _ofPartitionSize); + + if (!ok) return kIOReturnBadArgument; _ofImageDirty = false; _nvramImageDirty = true; @@ -1296,53 +1131,8 @@ void IODTNVRAM::updateOWBootArgs(const OSSymbol *key, OSObject *value) } } - -// Private methods for Name Registry access. - -enum { - kMaxNVNameLength = 4, - kMaxNVDataLength = 8 -}; - -struct NVRAMProperty -{ - IONVRAMDescriptor header; - UInt8 nameLength; - UInt8 name[ kMaxNVNameLength ]; - UInt8 dataLength; - UInt8 data[ kMaxNVDataLength ]; -}; - bool IODTNVRAM::searchNVRAMProperty(IONVRAMDescriptor *hdr, UInt32 *where) { - UInt32 offset; - SInt32 nvEnd; - - nvEnd = *((UInt16 *)_nrImage); - if(getPlatform()->getBootROMType()) { - // on NewWorld, offset to partition start - nvEnd -= 0x100; - } else { - // on old world, absolute - nvEnd -= _nrPartitionOffset; - } - if((nvEnd < 0) || (nvEnd >= kIODTNVRAMNameRegistrySize)) - nvEnd = 2; - - offset = 2; - while ((offset + sizeof(NVRAMProperty)) <= (UInt32)nvEnd) { - if (bcmp(_nrImage + offset, hdr, sizeof(*hdr)) == 0) { - *where = offset; - return true; - } - offset += sizeof(NVRAMProperty); - } - - if ((nvEnd + sizeof(NVRAMProperty)) <= kIODTNVRAMNameRegistrySize) - *where = nvEnd; - else - *where = 0; - return false; } @@ -1350,88 +1140,18 @@ IOReturn IODTNVRAM::readNVRAMPropertyType0(IORegistryEntry *entry, const OSSymbol **name, OSData **value) { - IONVRAMDescriptor hdr; - NVRAMProperty *prop; - IOByteCount length; - UInt32 offset; - IOReturn err; - char nameBuf[kMaxNVNameLength + 1]; - - if (_nrImage == 0) return kIOReturnUnsupported; - if ((entry == 0) || (name == 0) || (value == 0)) return kIOReturnBadArgument; - - err = IODTMakeNVDescriptor(entry, &hdr); - if (err != kIOReturnSuccess) return err; - - if (searchNVRAMProperty(&hdr, &offset)) { - prop = (NVRAMProperty *)(_nrImage + offset); - - length = prop->nameLength; - if (length > kMaxNVNameLength) length = kMaxNVNameLength; - strncpy(nameBuf, (const char *)prop->name, length); - nameBuf[length] = 0; - *name = OSSymbol::withCString(nameBuf); - - length = prop->dataLength; - if (length > kMaxNVDataLength) length = kMaxNVDataLength; - *value = OSData::withBytes(prop->data, length); - - if ((*name != 0) && (*value != 0)) return kIOReturnSuccess; - else return kIOReturnNoMemory; - } - - return kIOReturnNoResources; + return kIOReturnUnsupported; } + IOReturn IODTNVRAM::writeNVRAMPropertyType0(IORegistryEntry *entry, const OSSymbol *name, OSData *value) { - IONVRAMDescriptor hdr; - NVRAMProperty *prop; - IOByteCount nameLength; - IOByteCount dataLength; - UInt32 offset; - IOReturn err; - UInt16 nvLength; - bool exists; - - if (_nrImage == 0) return kIOReturnUnsupported; - if ((entry == 0) || (name == 0) || (value == 0)) return kIOReturnBadArgument; - - nameLength = name->getLength(); - dataLength = value->getLength(); - if (nameLength > kMaxNVNameLength) return kIOReturnNoSpace; - if (dataLength > kMaxNVDataLength) return kIOReturnNoSpace; - - err = IODTMakeNVDescriptor(entry, &hdr); - if (err != kIOReturnSuccess) return err; - - exists = searchNVRAMProperty(&hdr, &offset); - if (offset == 0) return kIOReturnNoMemory; - - prop = (NVRAMProperty *)(_nrImage + offset); - if (!exists) bcopy(&hdr, &prop->header, sizeof(hdr)); - - prop->nameLength = nameLength; - bcopy(name->getCStringNoCopy(), prop->name, nameLength); - prop->dataLength = dataLength; - bcopy(value->getBytesNoCopy(), prop->data, dataLength); - - if (!exists) { - nvLength = offset + sizeof(NVRAMProperty); - if (getPlatform()->getBootROMType()) - nvLength += 0x100; - else - nvLength += _nrPartitionOffset; - *((UInt16 *)_nrImage) = nvLength; - } - - _nvramImageDirty = true; - - return err; + return kIOReturnUnsupported; } + OSData *IODTNVRAM::unescapeBytesToData(const UInt8 *bytes, UInt32 length) { OSData *data = 0; @@ -1543,7 +1263,11 @@ IOReturn IODTNVRAM::readNVRAMPropertyType1(IORegistryEntry *entry, UInt8 byte; if (_ofDict == 0) return err; + + IOLockLock(_ofLock); data = OSDynamicCast(OSData, _ofDict->getObject(_registryPropertiesKey)); + IOLockUnlock(_ofLock); + if (data == 0) return err; startPtr = (const UInt8 *) data->getBytesNoCopy(); @@ -1613,6 +1337,8 @@ IOReturn IODTNVRAM::writeNVRAMPropertyType1(IORegistryEntry *entry, // copy over existing properties for other entries + IOLockLock(_ofLock); + oldData = OSDynamicCast(OSData, _ofDict->getObject(_registryPropertiesKey)); if (oldData) { startPtr = (const UInt8 *) oldData->getBytesNoCopy(); @@ -1657,62 +1383,55 @@ IOReturn IODTNVRAM::writeNVRAMPropertyType1(IORegistryEntry *entry, data = OSData::withData(oldData); else data = OSData::withCapacity(16); - if (!data) - return kIOReturnNoMemory; + if (!data) ok = false; } - if (value && value->getLength()) { - // get entries in path - OSArray *array = OSArray::withCapacity(5); - if (!array) { - data->release(); - return kIOReturnNoMemory; - } - do - array->setObject(entry); - while ((entry = entry->getParentEntry(gIODTPlane))); - - // append path - for (int i = array->getCount() - 3; - (entry = (IORegistryEntry *) array->getObject(i)); - i--) { - - name = entry->getName(gIODTPlane); - comp = entry->getLocation(gIODTPlane); - if( comp && (0 == strncmp("pci", name, sizeof("pci"))) - && (0 == strncmp("80000000", comp, sizeof("80000000")))) { - // yosemite hack - comp = "/pci@80000000"; - } else { - if (comp) - ok &= data->appendBytes("/@", 2); - else { - if (!name) - continue; - ok &= data->appendByte('/', 1); - comp = name; - } - } - ok &= data->appendBytes(comp, strlen(comp)); - } - ok &= data->appendByte(0, 1); - array->release(); - - // append prop name - ok &= data->appendBytes(propName->getCStringNoCopy(), propName->getLength() + 1); - - // append escaped data - oldData = escapeDataToData(value); - ok &= (oldData != 0); - if (ok) - ok &= data->appendBytes(oldData); - } + if (ok && value && value->getLength()) do { + // get entries in path + OSArray *array = OSArray::withCapacity(5); + if (!array) { + ok = false; + break; + } + do + array->setObject(entry); + while ((entry = entry->getParentEntry(gIODTPlane))); + + // append path + for (int i = array->getCount() - 3; + (entry = (IORegistryEntry *) array->getObject(i)); + i--) { + + name = entry->getName(gIODTPlane); + comp = entry->getLocation(gIODTPlane); + if (comp) ok &= data->appendBytes("/@", 2); + else { + if (!name) continue; + ok &= data->appendByte('/', 1); + comp = name; + } + ok &= data->appendBytes(comp, strlen(comp)); + } + ok &= data->appendByte(0, 1); + array->release(); + + // append prop name + ok &= data->appendBytes(propName->getCStringNoCopy(), propName->getLength() + 1); + + // append escaped data + oldData = escapeDataToData(value); + ok &= (oldData != 0); + if (ok) ok &= data->appendBytes(oldData); + + } while (false); + if (ok) { ok = _ofDict->setObject(_registryPropertiesKey, data); - if (ok) - _ofImageDirty = true; + if (ok) _ofImageDirty = true; } - data->release(); + + IOLockUnlock(_ofLock); + if (data) data->release(); return ok ? kIOReturnSuccess : kIOReturnNoMemory; } diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 7221295fa..ddce48807 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -57,7 +58,7 @@ #include #include -#include "IOServicePrivate.h" // _IOServiceInterestNotifier +#include "IOServicePrivate.h" // _IOServiceInterestNotifier #include "IOServicePMPrivate.h" __BEGIN_DECLS @@ -73,7 +74,6 @@ __END_DECLS #define kIOPMrootDomainClass "IOPMrootDomain" #define LOG_PREFIX "PMRD: " -#define OBFUSCATE(x) ((void *)(VM_KERNEL_ADDRPERM(x))) #define MSG(x...) \ do { kprintf(LOG_PREFIX x); IOLog(x); } while (false) @@ -82,13 +82,18 @@ __END_DECLS do { kprintf(LOG_PREFIX x); } while (false) #define DLOG(x...) do { \ - if (kIOLogPMRootDomain & gIOKitDebug) \ + if (kIOLogPMRootDomain & gIOKitDebug) \ kprintf(LOG_PREFIX x); \ - gRootDomain->sleepWakeDebugLog(x);} while (false) +} while (false) + +#define DMSG(x...) do { \ + if (kIOLogPMRootDomain & gIOKitDebug) { \ + kprintf(LOG_PREFIX x); IOLog(x); \ + } \ +} while (false) -#define _LOG(x...) -#define SUSPEND_PM_NOTIFICATIONS_DEBUG 1 +#define _LOG(x...) #define CHECK_THREAD_CONTEXT #ifdef CHECK_THREAD_CONTEXT @@ -139,8 +144,7 @@ enum { kPowerEventAssertionSetLevel, // 11 kPowerEventQueueSleepWakeUUID, // 12 kPowerEventPublishSleepWakeUUID, // 13 - kPowerEventSuspendClient, // 14 - kPowerEventSetDisplayPowerOn // 15 + kPowerEventSetDisplayPowerOn // 14 }; // For evaluatePolicy() @@ -163,8 +167,8 @@ enum { extern "C" { IOReturn OSKextSystemSleepOrWake( UInt32 ); } -extern "C" ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); -extern "C" addr64_t kvtophys(vm_offset_t va); +extern "C" ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); +extern "C" addr64_t kvtophys(vm_offset_t va); extern "C" int stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced); static void idleSleepTimerExpired( thread_call_param_t, thread_call_param_t ); @@ -188,7 +192,7 @@ static const OSSymbol *sleepMessagePEFunction = NULL; | kIOPMSupportedOnBatt \ | kIOPMSupportedOnUPS) -enum +enum { // not idle around autowake time, secs kAutoWakePreWindow = 45, @@ -220,27 +224,29 @@ enum { static IOPMPowerState ourPowerStates[NUM_POWER_STATES] = { {1, 0, 0, 0, 0,0,0,0,0,0,0,0}, - {1, kIOPMRestartCapability, kIOPMRestart, RESTART_POWER, 0,0,0,0,0,0,0,0}, + {1, kIOPMRestartCapability, kIOPMRestart, RESTART_POWER, 0,0,0,0,0,0,0,0}, {1, kIOPMSleepCapability, kIOPMSleep, SLEEP_POWER, 0,0,0,0,0,0,0,0}, {1, kIOPMPowerOn, kIOPMPowerOn, ON_POWER, 0,0,0,0,0,0,0,0} }; -#define kIOPMRootDomainWakeTypeSleepService "SleepService" -#define kIOPMRootDomainWakeTypeMaintenance "Maintenance" -#define kIOPMRootDomainWakeTypeSleepTimer "SleepTimer" -#define kIOPMrootDomainWakeTypeLowBattery "LowBattery" -#define kIOPMRootDomainWakeTypeUser "User" -#define kIOPMRootDomainWakeTypeAlarm "Alarm" -#define kIOPMRootDomainWakeTypeNetwork "Network" -#define kIOPMRootDomainWakeTypeHIDActivity "HID Activity" -#define kIOPMRootDomainWakeTypeNotification "Notification" +#define kIOPMRootDomainWakeTypeSleepService "SleepService" +#define kIOPMRootDomainWakeTypeMaintenance "Maintenance" +#define kIOPMRootDomainWakeTypeSleepTimer "SleepTimer" +#define kIOPMrootDomainWakeTypeLowBattery "LowBattery" +#define kIOPMRootDomainWakeTypeUser "User" +#define kIOPMRootDomainWakeTypeAlarm "Alarm" +#define kIOPMRootDomainWakeTypeNetwork "Network" +#define kIOPMRootDomainWakeTypeHIDActivity "HID Activity" +#define kIOPMRootDomainWakeTypeNotification "Notification" +#define kIOPMRootDomainWakeTypeHibernateError "HibernateError" // Special interest that entitles the interested client from receiving // all system messages. Only used by powerd. // #define kIOPMSystemCapabilityInterest "IOPMSystemCapabilityInterest" -#define kPMSuspendedNotificationClients "PMSuspendedNotificationClients" +#define WAKEEVENT_LOCK() IOLockLock(wakeEventLock) +#define WAKEEVENT_UNLOCK() IOLockUnlock(wakeEventLock) /* * Aggressiveness @@ -294,9 +300,6 @@ enum { kDarkWakeFlagHIDTickleLate = 0x02, // hid tickle after gfx suppression kDarkWakeFlagHIDTickleNone = 0x03, // hid tickle is not posted kDarkWakeFlagHIDTickleMask = 0x03, - kDarkWakeFlagIgnoreDiskIOInDark = 0x04, // ignore disk idle in DW - kDarkWakeFlagIgnoreDiskIOAlways = 0x08, // always ignore disk idle - kDarkWakeFlagIgnoreDiskIOMask = 0x0C, kDarkWakeFlagAlarmIsDark = 0x0100, kDarkWakeFlagGraphicsPowerState1 = 0x0200, kDarkWakeFlagAudioNotSuppressed = 0x0400 @@ -312,8 +315,7 @@ static uint32_t gAggressivesState = 0; uuid_string_t bootsessionuuid_string; -static uint32_t gDarkWakeFlags = kDarkWakeFlagHIDTickleNone | kDarkWakeFlagIgnoreDiskIOAlways; - +static uint32_t gDarkWakeFlags = kDarkWakeFlagHIDTickleNone; static PMStatsStruct gPMStats; #if HIBERNATION @@ -325,6 +327,9 @@ static void * gSleepPolicyTarget; struct timeval gIOLastSleepTime; struct timeval gIOLastWakeTime; +static char gWakeReasonString[128]; +static bool gWakeReasonSysctlRegistered = false; + // Constants used as arguments to IOPMrootDomain::informCPUStateChange #define kCPUUnknownIndex 9999999 enum { @@ -336,6 +341,8 @@ enum { const OSSymbol *gIOPMStatsApplicationResponseTimedOut; const OSSymbol *gIOPMStatsApplicationResponseCancel; const OSSymbol *gIOPMStatsApplicationResponseSlow; +const OSSymbol *gIOPMStatsApplicationResponsePrompt; +const OSSymbol *gIOPMStatsDriverPSChangeSlow; #define kBadPMFeatureID 0 @@ -400,74 +407,6 @@ struct PMSettingCallEntry { #define PMSETTING_WAIT(p) IOLockSleep(settingsCtrlLock, p, THREAD_UNINT) #define PMSETTING_WAKEUP(p) IOLockWakeup(settingsCtrlLock, p, true) -//********************************************************************************* -//********************************************************************************* -//********************************************************************************* - -/* @class IOPMTimeline - * @astract Tracks & records PM activity. - * @discussion Intended for use only as a helper-class to IOPMrootDomain. - * Do not subclass or directly invoke iOPMTimeline - */ -class IOPMTimeline : public OSObject -{ - OSDeclareDefaultStructors( IOPMTimeline ); - -public: - static IOPMTimeline* timeline(IOPMrootDomain *root_domain); - - bool setProperties(OSDictionary *d); - OSDictionary *copyInfoDictionary(void); - - IOReturn recordSystemPowerEvent( PMEventDetails *details ); - - IOReturn recordDetailedPowerEvent( PMEventDetails *details ); - - IOMemoryDescriptor *getPMTraceMemoryDescriptor(); - - uint32_t getNumEventsLoggedThisPeriod(); - void setNumEventsLoggedThisPeriod(uint32_t newCount); - bool isSleepCycleInProgress(); - void setSleepCycleInProgressFlag(bool flag); -private: - bool init(void); - void free(void); - - void setEventsTrackedCount(uint32_t newTracked); - void setEventsRecordingLevel(uint32_t eventsTrackedBits); - static uint32_t _atomicIndexIncrement(uint32_t *index, uint32_t limit); - - enum { - kPMTimelineRecordTardyDrivers = 1 << 0, - kPMTmielineRecordSystemEvents = 1 << 1, - kPMTimelineRecordAllDrivers = 1 << 2, - kPMTimelineRecordOff = 0, - kPMTimelineRecordDefault = 3, - kPMTimelineRecordDebug = 7 - }; - - // eventsRecordingLevel is a bitfield defining which PM driver events will get logged - // into the PM buffer. - uint32_t eventsRecordingLevel; - - // pmTraceMemoryDescriptor represents the memory block that IOPMTimeLine records PM trace points into. - IOBufferMemoryDescriptor *pmTraceMemoryDescriptor; - - // Pointer to starting address in pmTraceMemoryDescriptor - IOPMSystemEventRecord *traceBuffer; - IOPMTraceBufferHeader *hdr; - - uint16_t systemState; - - IOLock *logLock; - IOPMrootDomain *owner; - - uint32_t numEventsLoggedThisPeriod; - bool sleepCycleInProgress; -}; - -OSDefineMetaClassAndStructors( IOPMTimeline, OSObject ) - /* * PMTraceWorker * Internal helper object for logging trace points to RTC @@ -492,7 +431,7 @@ public: void traceLoginWindowPhase(uint8_t phase); int recordTopLevelPCIDevice(IOService *); void RTC_TRACE(void); - virtual bool serialize(OSSerialize *s) const; + virtual bool serialize(OSSerialize *s) const; IOPMTracePointHandler tracePointHandler; void * tracePointTarget; @@ -518,7 +457,7 @@ class PMAssertionsTracker : public OSObject OSDeclareFinalStructors(PMAssertionsTracker) public: static PMAssertionsTracker *pmAssertionsTracker( IOPMrootDomain * ); - + IOReturn createAssertion(IOPMDriverAssertionType, IOPMDriverAssertionLevel, IOService *, const char *, IOPMDriverAssertionID *); IOReturn releaseAssertion(IOPMDriverAssertionID); IOReturn setAssertionLevel(IOPMDriverAssertionID, IOPMDriverAssertionLevel); @@ -551,7 +490,7 @@ private: PMAssertStruct *detailsForID(IOPMDriverAssertionID, int *); void tabulate(void); - + IOPMrootDomain *owner; OSArray *assertionsArray; IOLock *assertionsArrayLock; @@ -560,9 +499,9 @@ private: IOPMDriverAssertionType assertionsUser; IOPMDriverAssertionType assertionsCombined; }; - + OSDefineMetaClassAndFinalStructors(PMAssertionsTracker, OSObject); - + /* * PMHaltWorker * Internal helper object for Shutdown/Restart notifications. @@ -599,12 +538,12 @@ static void IOPMRootDomainWillShutdown(void) { if (OSCompareAndSwap(0, 1, &gWillShutdown)) { - OSKext::willShutdown(); - for (int i = 0; i < 100; i++) - { - if (OSCompareAndSwap(0, 1, &gSleepOrShutdownPending)) break; - IOSleep( 100 ); - } + OSKext::willShutdown(); + for (int i = 0; i < 100; i++) + { + if (OSCompareAndSwap(0, 1, &gSleepOrShutdownPending)) break; + IOSleep( 100 ); + } } } @@ -629,12 +568,12 @@ extern "C" { return gRootDomain->cancelPowerChange ( (unsigned long)PMrefcon ); } - + IOReturn rootDomainRestart ( void ) { return gRootDomain->restartSystem(); } - + IOReturn rootDomainShutdown ( void ) { return gRootDomain->shutdownSystem(); @@ -642,14 +581,14 @@ extern "C" void IOSystemShutdownNotification(void) { - IOPMRootDomainWillShutdown(); - if (OSCompareAndSwap(0, 1, &gPagingOff)) - { - gRootDomain->handlePlatformHaltRestart(kPEPagingOff); - } + IOPMRootDomainWillShutdown(); + if (OSCompareAndSwap(0, 1, &gPagingOff)) + { + gRootDomain->handlePlatformHaltRestart(kPEPagingOff); + } } - int sync_internal(void); + int sync_internal(void); } /* @@ -721,7 +660,7 @@ static void disk_sync_callout( thread_call_param_t p0, thread_call_param_t p1 ) { sync_internal(); } -#if HIBERNATION +#if HIBERNATION else { IOHibernateSystemPostWake(); @@ -734,19 +673,33 @@ static void disk_sync_callout( thread_call_param_t p0, thread_call_param_t p1 ) //****************************************************************************** +static void hib_debugSetup_callout( thread_call_param_t p0, thread_call_param_t p1 ) +{ + IOService * rootDomain = (IOService *) p0; + uint32_t notifyRef = (uint32_t)(uintptr_t) p1; + +#if HIBERNATION + IOHibernateOpenForDebugData(); +#endif + + rootDomain->allowPowerChange(notifyRef); + DLOG("hib_debugSetup_callout finish\n"); +} +//****************************************************************************** + static UInt32 computeDeltaTimeMS( const AbsoluteTime * startTime ) { - AbsoluteTime endTime; - UInt64 nano = 0; + AbsoluteTime endTime; + UInt64 nano = 0; - clock_get_uptime(&endTime); - if (CMP_ABSOLUTETIME(&endTime, startTime) > 0) - { - SUB_ABSOLUTETIME(&endTime, startTime); - absolutetime_to_nanoseconds(endTime, &nano); - } + clock_get_uptime(&endTime); + if (CMP_ABSOLUTETIME(&endTime, startTime) > 0) + { + SUB_ABSOLUTETIME(&endTime, startTime); + absolutetime_to_nanoseconds(endTime, &nano); + } - return (UInt32)(nano / 1000000ULL); + return (UInt32)(nano / 1000000ULL); } //****************************************************************************** @@ -758,7 +711,7 @@ sysctl_sleepwaketime SYSCTL_HANDLER_ARGS struct proc *p = req->p; if (p == kernproc) { - return sysctl_io_opaque(req, swt, sizeof(*swt), NULL); + return sysctl_io_opaque(req, swt, sizeof(*swt), NULL); } else if(proc_is64bit(p)) { struct user64_timeval t; t.tv_sec = swt->tv_sec; @@ -773,12 +726,12 @@ sysctl_sleepwaketime SYSCTL_HANDLER_ARGS } static SYSCTL_PROC(_kern, OID_AUTO, sleeptime, - CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, - &gIOLastSleepTime, 0, sysctl_sleepwaketime, "S,timeval", ""); + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + &gIOLastSleepTime, 0, sysctl_sleepwaketime, "S,timeval", ""); static SYSCTL_PROC(_kern, OID_AUTO, waketime, - CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, - &gIOLastWakeTime, 0, sysctl_sleepwaketime, "S,timeval", ""); + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + &gIOLastWakeTime, 0, sysctl_sleepwaketime, "S,timeval", ""); static int @@ -788,17 +741,17 @@ sysctl_willshutdown int new_value, changed; int error = sysctl_io_number(req, gWillShutdown, sizeof(int), &new_value, &changed); if (changed) { - if (!gWillShutdown && (new_value == 1)) { - IOPMRootDomainWillShutdown(); - } else - error = EINVAL; + if (!gWillShutdown && (new_value == 1)) { + IOPMRootDomainWillShutdown(); + } else + error = EINVAL; } return(error); } static SYSCTL_PROC(_kern, OID_AUTO, willshutdown, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, - 0, 0, sysctl_willshutdown, "I", ""); + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + 0, 0, sysctl_willshutdown, "I", ""); static int @@ -808,10 +761,9 @@ sysctl_progressmeterenable int error; int new_value, changed; - error = sysctl_io_number(req, vc_progress_meter_enable, sizeof(int), &new_value, &changed); + error = sysctl_io_number(req, vc_progressmeter_enable, sizeof(int), &new_value, &changed); - if (changed) - vc_enable_progressmeter(new_value); + if (changed) vc_enable_progressmeter(new_value); return (error); } @@ -823,22 +775,37 @@ sysctl_progressmeter int error; int new_value, changed; - error = sysctl_io_number(req, vc_progress_meter_value, sizeof(int), &new_value, &changed); + error = sysctl_io_number(req, vc_progressmeter_value, sizeof(int), &new_value, &changed); - if (changed) - vc_set_progressmeter(new_value); + if (changed) vc_set_progressmeter(new_value); return (error); } static SYSCTL_PROC(_kern, OID_AUTO, progressmeterenable, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, - 0, 0, sysctl_progressmeterenable, "I", ""); + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + 0, 0, sysctl_progressmeterenable, "I", ""); static SYSCTL_PROC(_kern, OID_AUTO, progressmeter, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, - 0, 0, sysctl_progressmeter, "I", ""); + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + 0, 0, sysctl_progressmeter, "I", ""); + +static int +sysctl_wakereason SYSCTL_HANDLER_ARGS +{ + char wr[ sizeof(gWakeReasonString) ]; + + wr[0] = '\0'; + if (gRootDomain) + gRootDomain->copyWakeReasonString(wr, sizeof(wr)); + + return sysctl_io_string(req, wr, 0, 0, NULL); +} + +SYSCTL_PROC(_kern, OID_AUTO, wakereason, + CTLTYPE_STRING| CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + NULL, 0, sysctl_wakereason, "A", "wakereason"); static SYSCTL_INT(_debug, OID_AUTO, darkwake, CTLFLAG_RW, &gDarkWakeFlags, 0, ""); @@ -882,11 +849,13 @@ bool IOPMrootDomain::start( IOService * nub ) gIOPMStatsApplicationResponseTimedOut = OSSymbol::withCString(kIOPMStatsResponseTimedOut); gIOPMStatsApplicationResponseCancel = OSSymbol::withCString(kIOPMStatsResponseCancel); gIOPMStatsApplicationResponseSlow = OSSymbol::withCString(kIOPMStatsResponseSlow); + gIOPMStatsApplicationResponsePrompt = OSSymbol::withCString(kIOPMStatsResponsePrompt); + gIOPMStatsDriverPSChangeSlow = OSSymbol::withCString(kIOPMStatsDriverPSChangeSlow); sleepSupportedPEFunction = OSSymbol::withCString("IOPMSetSleepSupported"); sleepMessagePEFunction = OSSymbol::withCString("IOPMSystemSleepMessage"); - const OSSymbol *settingsArr[kRootDomainSettingsCount] = + const OSSymbol *settingsArr[kRootDomainSettingsCount] = { OSSymbol::withCString(kIOPMSettingSleepOnPowerButtonKey), gIOPMSettingAutoWakeSecondsKey, @@ -908,7 +877,7 @@ bool IOPMrootDomain::start( IOService * nub ) }; PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags)); - + queue_init(&aggressivesQueue); aggressivesThreadCall = thread_call_allocate(handleAggressivesFunction, this); aggressivesData = OSData::withCapacity( @@ -916,8 +885,9 @@ bool IOPMrootDomain::start( IOService * nub ) featuresDictLock = IOLockAlloc(); settingsCtrlLock = IOLockAlloc(); + wakeEventLock = IOLockAlloc(); setPMRootDomain(this); - + extraSleepTimer = thread_call_allocate( idleSleepTimerExpired, (thread_call_param_t) this); @@ -925,9 +895,9 @@ bool IOPMrootDomain::start( IOService * nub ) diskSyncCalloutEntry = thread_call_allocate( &disk_sync_callout, (thread_call_param_t) this); - - stackshotOffloader = thread_call_allocate(&saveTimeoutAppStackShot, - (thread_call_param_t) this); + hibDebugSetupEntry = thread_call_allocate( + &hib_debugSetup_callout, + (thread_call_param_t) this); #if DARK_TO_FULL_EVALUATE_CLAMSHELL fullWakeThreadCall = thread_call_allocate( @@ -954,6 +924,7 @@ bool IOPMrootDomain::start( IOService * nub ) clamshellDisabled = true; acAdaptorConnected = true; clamshellSleepDisabled = false; + gWakeReasonString[0] = '\0'; // Initialize to user active. // Will never transition to user inactive w/o wrangler. @@ -981,15 +952,15 @@ bool IOPMrootDomain::start( IOService * nub ) _statsResponseTypeKey = OSSymbol::withCString(kIOPMStatsApplicationResponseTypeKey); _statsMessageTypeKey = OSSymbol::withCString(kIOPMStatsMessageTypeKey); _statsPowerCapsKey = OSSymbol::withCString(kIOPMStatsPowerCapabilityKey); - noAckApps = OSOrderedSet::withCapacity(16); + pmStatsLock = IOLockAlloc(); idxPMCPUClamshell = kCPUUnknownIndex; idxPMCPULimitedPower = kCPUUnknownIndex; - + tmpDict = OSDictionary::withCapacity(1); setProperty(kRootDomainSupportedFeatures, tmpDict); tmpDict->release(); - + settingsCallbacks = OSDictionary::withCapacity(1); // Create a list of the valid PM settings that we'll relay to @@ -1030,21 +1001,6 @@ bool IOPMrootDomain::start( IOService * nub ) registerPowerDriver(this, ourPowerStates, NUM_POWER_STATES); changePowerStateToPriv(ON_STATE); - if (gIOKitDebug & (kIOLogDriverPower1 | kIOLogDriverPower2)) - { - // Setup our PM logging & recording code - timeline = IOPMTimeline::timeline(this); - if (timeline) { - OSDictionary *tlInfo = timeline->copyInfoDictionary(); - - if (tlInfo) - { - setProperty(kIOPMTimelineDictionaryKey, tlInfo); - tlInfo->release(); - } - } - } - // install power change handler gSysPowerDownNotifier = registerPrioritySleepWakeInterest( &sysPowerDownHandler, this, 0); @@ -1052,8 +1008,8 @@ bool IOPMrootDomain::start( IOService * nub ) // Register for a notification when IODisplayWrangler is published if ((tmpDict = serviceMatching("IODisplayWrangler"))) { - _displayWranglerNotifier = addMatchingNotification( - gIOPublishNotification, tmpDict, + _displayWranglerNotifier = addMatchingNotification( + gIOPublishNotification, tmpDict, (IOServiceMatchingNotificationHandler) &displayWranglerMatchPublished, this, 0); tmpDict->release(); @@ -1064,8 +1020,8 @@ bool IOPMrootDomain::start( IOService * nub ) if ((tmpDict = serviceMatching("IODTNVRAM"))) { - notifier = addMatchingNotification( - gIOFirstPublishNotification, tmpDict, + notifier = addMatchingNotification( + gIOFirstPublishNotification, tmpDict, (IOServiceMatchingNotificationHandler) &IONVRAMMatchPublished, this, 0); tmpDict->release(); @@ -1101,147 +1057,23 @@ bool IOPMrootDomain::start( IOService * nub ) if(psIterator) { psIterator->release(); } - - - pmSuspendedCapacity = pmSuspendedSize = 0; - pmSuspendedPIDS = NULL; - sysctl_register_oid(&sysctl__kern_sleeptime); sysctl_register_oid(&sysctl__kern_waketime); sysctl_register_oid(&sysctl__kern_willshutdown); sysctl_register_oid(&sysctl__kern_progressmeterenable); sysctl_register_oid(&sysctl__kern_progressmeter); + sysctl_register_oid(&sysctl__kern_wakereason); -#if HIBERNATION +#if HIBERNATION IOHibernateSystemInit(this); #endif - registerService(); // let clients find us + registerService(); // let clients find us return true; } - - - -void IOPMrootDomain::handleSuspendPMNotificationClient(uint32_t pid, bool doSuspend) -{ - ASSERT_GATED(); - - int index = -1; - unsigned int i; - - if (!pmSuspendedPIDS) { - pmSuspendedCapacity = 8; - pmSuspendedSize = pmSuspendedCapacity * sizeof(PMNotifySuspendedStruct); - pmSuspendedPIDS = (PMNotifySuspendedStruct *)IOMalloc(pmSuspendedSize); - bzero(pmSuspendedPIDS, pmSuspendedSize); - } - - /* Find the existing pid in the existing array */ - - for (i=0; i < pmSuspendedCapacity; i++) { - if (pmSuspendedPIDS[i].pid == pid) { - index = i; - break; - } - } - - if (-1 == index) - { - /* Find an unused slot in the suspended pids table. */ - - for (i=0; i < pmSuspendedCapacity; i++) { - if (pmSuspendedPIDS[i].refcount == 0) { - break; - } - } - - if (pmSuspendedCapacity == i) - { - /* GROW if necessary */ - - PMNotifySuspendedStruct *newSuspended = NULL; - pmSuspendedCapacity *= 2; - pmSuspendedSize = pmSuspendedCapacity * sizeof(PMNotifySuspendedStruct); - newSuspended = (PMNotifySuspendedStruct *)IOMalloc(pmSuspendedSize); - - bzero(newSuspended, pmSuspendedSize); - bcopy(pmSuspendedPIDS, newSuspended, pmSuspendedSize/2); - IOFree(pmSuspendedPIDS, pmSuspendedSize/2); - - pmSuspendedPIDS = newSuspended; - } - - index = i; - pmSuspendedPIDS[index].pid = pid; - } - - if (doSuspend) { - pmSuspendedPIDS[index].refcount++; - } else { - pmSuspendedPIDS[index].refcount--; - } - - /* - * Publish array of suspended pids in IOPMrootDomain - */ - OSArray *publish = OSArray::withCapacity(pmSuspendedCapacity); - - for (i=0; i 0) { - OSDictionary *suspended = OSDictionary::withCapacity(2); - OSNumber *n = NULL; - - n = OSNumber::withNumber(pmSuspendedPIDS[i].pid, 32); - suspended->setObject("pid", n); - n->release(); - - n = OSNumber::withNumber(pmSuspendedPIDS[i].refcount, 32); - suspended->setObject("refcount", n); - n->release(); - - publish->setObject(suspended); - suspended->release(); - - } - } - - if (0 != publish->getCount()) { - setProperty(kPMSuspendedNotificationClients, publish); - } else { - removeProperty(kPMSuspendedNotificationClients); - } - - publish->release(); - - return; -} - -bool IOPMrootDomain::pmNotificationIsSuspended(uint32_t pid) -{ - unsigned int index; - - for (index=0; index < pmSuspendedCapacity; index++) { - if (pmSuspendedPIDS[index].pid == pid) { - return pmSuspendedPIDS[index].refcount > 0; - } - } - - return false; -} - - -void IOPMrootDomain::suspendPMNotificationsForPID(uint32_t pid, bool doSuspend) -{ - if(pmPowerStateQueue) { - pmPowerStateQueue->submitPowerEvent(kPowerEventSuspendClient, (void *)(uintptr_t)pid, (uint64_t)doSuspend ); - } - return; -} - //****************************************************************************** // setProperties // @@ -1255,7 +1087,6 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) OSDictionary *dict = OSDynamicCast(OSDictionary, props_obj); OSBoolean *b; OSNumber *n; - OSDictionary *d; const OSSymbol *key; OSObject *obj; OSCollectionIterator * iter = 0; @@ -1269,8 +1100,7 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) const OSSymbol *sleepdisabled_string = OSSymbol::withCString("SleepDisabled"); const OSSymbol *ondeck_sleepwake_uuid_string = OSSymbol::withCString(kIOPMSleepWakeUUIDKey); const OSSymbol *loginwindow_tracepoint_string = OSSymbol::withCString(kIOPMLoginWindowSecurityDebugKey); - const OSSymbol *pmTimelineLogging_string = OSSymbol::withCString(kIOPMTimelineDictionaryKey); -#if HIBERNATION +#if HIBERNATION const OSSymbol *hibernatemode_string = OSSymbol::withCString(kIOHibernateModeKey); const OSSymbol *hibernatefile_string = OSSymbol::withCString(kIOHibernateFileKey); const OSSymbol *hibernatefilemin_string = OSSymbol::withCString(kIOHibernateFileMinSizeKey); @@ -1278,10 +1108,7 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) const OSSymbol *hibernatefreeratio_string = OSSymbol::withCString(kIOHibernateFreeRatioKey); const OSSymbol *hibernatefreetime_string = OSSymbol::withCString(kIOHibernateFreeTimeKey); #endif -#if SUSPEND_PM_NOTIFICATIONS_DEBUG - const OSSymbol *suspendPMClient_string = OSSymbol::withCString(kPMSuspendedNotificationClients); -#endif - + if (!dict) { return_value = kIOReturnBadArgument; @@ -1324,19 +1151,7 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) { setProperty(key, obj); } - else if (key->isEqualTo(pmTimelineLogging_string)) - { - if ((d = OSDynamicCast(OSDictionary, obj)) && - timeline && timeline->setProperties(d)) - { - OSDictionary *tlInfo = timeline->copyInfoDictionary(); - if (tlInfo) { - setProperty(kIOPMTimelineDictionaryKey, tlInfo); - tlInfo->release(); - } - } - } -#if HIBERNATION +#if HIBERNATION else if (key->isEqualTo(hibernatemode_string) || key->isEqualTo(hibernatefilemin_string) || key->isEqualTo(hibernatefilemax_string) || @@ -1351,7 +1166,7 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) OSString * str = OSDynamicCast(OSString, obj); if (str) setProperty(key, str); } -#endif +#endif else if (key->isEqualTo(sleepdisabled_string)) { if ((b = OSDynamicCast(OSBoolean, obj))) @@ -1393,17 +1208,7 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) OSBitAndAtomic(~kIOPMAlarmBitCalendarWake, &_userScheduledAlarm); DLOG("_userScheduledAlarm = 0x%x\n", (uint32_t) _userScheduledAlarm); } -#if SUSPEND_PM_NOTIFICATIONS_DEBUG - else if (key->isEqualTo(suspendPMClient_string)) - { - if ((n = OSDynamicCast(OSNumber, obj))) - { - // Toggle the suspended status for pid n. - uint32_t pid_int = n->unsigned32BitValue(); - suspendPMNotificationsForPID(pid_int, !pmNotificationIsSuspended(pid_int)); - } - } -#endif + // Relay our allowed PM settings onto our registered PM clients else if ((allowedPMSettings->getNextIndexOfObject(key, 0) != (unsigned int) -1)) { @@ -1426,7 +1231,7 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) } } - return_value = setPMSetting(key, obj); + return_value = setPMSetting(key, obj); if (kIOReturnSuccess != return_value) break; @@ -1450,7 +1255,7 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) if ((data = OSDynamicCast(OSData, obj)) && (data->getLength() == sizeof(IOPMCalendarStruct))) { - const IOPMCalendarStruct * cs = + const IOPMCalendarStruct * cs = (const IOPMCalendarStruct *) data->getBytesNoCopy(); if (cs->year) @@ -1477,15 +1282,11 @@ exit: if(sleepdisabled_string) sleepdisabled_string->release(); if(ondeck_sleepwake_uuid_string) ondeck_sleepwake_uuid_string->release(); if(loginwindow_tracepoint_string) loginwindow_tracepoint_string->release(); - if(pmTimelineLogging_string) pmTimelineLogging_string->release(); -#if HIBERNATION +#if HIBERNATION if(hibernatemode_string) hibernatemode_string->release(); if(hibernatefile_string) hibernatefile_string->release(); if(hibernatefreeratio_string) hibernatefreeratio_string->release(); if(hibernatefreetime_string) hibernatefreetime_string->release(); -#endif -#if SUSPEND_PM_NOTIFICATIONS_DEBUG - if(suspendPMClient_string) suspendPMClient_string->release(); #endif if (iter) iter->release(); return return_value; @@ -1994,7 +1795,7 @@ void IOPMrootDomain::startIdleSleepTimer( uint32_t inSeconds ) ASSERT_GATED(); if (inSeconds) { - clock_interval_to_deadline(inSeconds, kSecondScale, &deadline); + clock_interval_to_deadline(inSeconds, kSecondScale, &deadline); thread_call_enter_delayed(extraSleepTimer, deadline); idleSleepTimerPending = true; } @@ -2073,7 +1874,7 @@ void IOPMrootDomain::handleSleepTimerExpiration( void ) // getTimeToIdleSleep // // Returns number of seconds left before going into idle sleep. -// Caller has to make sure that idle sleep is allowed at the time of calling +// Caller has to make sure that idle sleep is allowed at the time of calling // this function //****************************************************************************** @@ -2090,7 +1891,7 @@ uint32_t IOPMrootDomain::getTimeToIdleSleep( void ) if (userActivityTime) lastActivityTime = userActivityTime; - else + else lastActivityTime = userBecameInactiveTime; clock_get_uptime(&now); @@ -2099,10 +1900,10 @@ uint32_t IOPMrootDomain::getTimeToIdleSleep( void ) SUB_ABSOLUTETIME(&now, &lastActivityTime); absolutetime_to_nanoseconds(now, &nanos); minutesSinceUserInactive = nanos / (60000000000ULL); - + if (minutesSinceUserInactive >= sleepSlider) sleepDelay = 0; - else + else sleepDelay = sleepSlider - minutesSinceUserInactive; } else @@ -2156,24 +1957,24 @@ IOReturn IOPMrootDomain::sleepSystemOptions( OSDictionary *options ) { OSObject *obj = NULL; OSString *reason = NULL; - /* sleepSystem is a public function, and may be called by any kernel driver. - * And that's bad - drivers should sleep the system by calling + /* sleepSystem is a public function, and may be called by any kernel driver. + * And that's bad - drivers should sleep the system by calling * receivePowerNotification() instead. Drivers should not use sleepSystem. * * Note that user space app calls to IOPMSleepSystem() will also travel * this code path and thus be correctly identified as software sleeps. */ - if (options && options->getObject("OSSwitch")) + if (options && options->getObject("OSSwitch")) { // Log specific sleep cause for OS Switch hibernation return privateSleepSystem( kIOPMSleepReasonOSSwitchHibernate); } - if (options && (obj = options->getObject("Sleep Reason"))) + if (options && (obj = options->getObject("Sleep Reason"))) { reason = OSDynamicCast(OSString, obj); - if (reason && reason->isEqualTo(kIOPMDarkWakeThermalEmergencyKey)) + if (reason && reason->isEqualTo(kIOPMDarkWakeThermalEmergencyKey)) return privateSleepSystem(kIOPMSleepReasonDarkWakeThermalEmergency); } @@ -2187,9 +1988,6 @@ IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason ) if (!checkSystemSleepEnabled() || !pmPowerStateQueue) { - recordPMEvent(kIOPMEventTypeSleep, NULL, - sleepReason, kIOReturnNotPermitted); - return kIOReturnNotPermitted; } @@ -2212,20 +2010,20 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) ASSERT_GATED(); DLOG("PowerChangeDone: %u->%u\n", (uint32_t) previousPowerState, (uint32_t) getPowerState()); - + switch ( getPowerState() ) { case SLEEP_STATE: { if (previousPowerState != ON_STATE) break; - - recordPMEvent(kIOPMEventTypeSleepDone, NULL, 0, kIOReturnSuccess); + + acceptSystemWakeEvents(true); // re-enable this timer for next sleep cancelIdleSleepTimer(); - clock_sec_t secs; - clock_usec_t microsecs; + clock_sec_t secs; + clock_usec_t microsecs; clock_get_calendar_microtime(&secs, µsecs); logtime(secs); gIOLastSleepTime.tv_sec = secs; @@ -2233,7 +2031,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) gIOLastWakeTime.tv_sec = 0; gIOLastWakeTime.tv_usec = 0; -#if HIBERNATION +#if HIBERNATION LOG("System %sSleep\n", gIOHibernateState ? "Safe" : ""); IOHibernateSystemHasSlept(); @@ -2253,7 +2051,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) _highestCapability = 0; ((IOService *)this)->start_watchdog_timer(); //14456299 -#if HIBERNATION +#if HIBERNATION IOHibernateSystemWake(); #endif @@ -2263,7 +2061,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) // trip the reset of the calendar clock clock_wakeup_calendar(); -#if HIBERNATION +#if HIBERNATION LOG("System %sWake\n", gIOHibernateState ? "SafeSleep " : ""); #endif @@ -2271,29 +2069,26 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) PMDebug(kPMLogSystemWake, 0, 0); lowBatteryCondition = false; lastSleepReason = 0; - + _lastDebugWakeSeconds = _debugWakeSeconds; _debugWakeSeconds = 0; _scheduledAlarms = 0; - // And start logging the wake event here - // TODO: Publish the wakeReason string as an integer - recordPMEvent(kIOPMEventTypeWake, NULL, 0, kIOReturnSuccess); - #ifndef __LP64__ systemWake(); #endif #if defined(__i386__) || defined(__x86_64__) - wranglerTickled = false; - graphicsSuppressed = false; - darkWakePostTickle = false; - darkWakeToSleepASAP = true; - logGraphicsClamp = true; - sleepTimerMaintenance = false; - sleepToStandby = false; - wranglerTickleLatched = false; - userWasActive = false; + wranglerTickled = false; + graphicsSuppressed = false; + darkWakePostTickle = false; + darkWakeHibernateError = false; + darkWakeToSleepASAP = true; + logGraphicsClamp = true; + sleepTimerMaintenance = false; + sleepToStandby = false; + wranglerTickleLatched = false; + userWasActive = false; fullWakeReason = kFullWakeReasonNone; OSString * wakeType = OSDynamicCast( @@ -2301,6 +2096,14 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) OSString * wakeReason = OSDynamicCast( OSString, getProperty(kIOPMRootDomainWakeReasonKey)); + if (wakeReason && (wakeReason->getLength() >= 2) && + gWakeReasonString[0] == '\0') + { + // Until the platform driver can claim its wake reasons + strlcat(gWakeReasonString, wakeReason->getCStringNoCopy(), + sizeof(gWakeReasonString)); + } + if (wakeType && wakeType->isEqualTo(kIOPMrootDomainWakeTypeLowBattery)) { lowBatteryCondition = true; @@ -2360,6 +2163,13 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) } } else + if (wakeType && + wakeType->isEqualTo(kIOPMRootDomainWakeTypeHibernateError)) + { + darkWakeMaintenance = true; + darkWakeHibernateError = true; + } + else { // Unidentified wake source, resume to full wake if debug // alarm is pending. @@ -2399,9 +2209,9 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) else if (!darkWakeMaintenance) { // Early/late tickle for non-maintenance wake. - if (((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) == + if (((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) == kDarkWakeFlagHIDTickleEarly) || - ((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) == + ((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) == kDarkWakeFlagHIDTickleLate)) { darkWakePostTickle = true; @@ -2418,12 +2228,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) changePowerStateToPriv(ON_STATE); } break; - case ON_STATE: { - if (previousPowerState != ON_STATE) - { - recordPMEvent(kIOPMEventTypeWakeDone, NULL, 0, kIOReturnSuccess); - } - } break; } } @@ -2460,14 +2264,14 @@ bool IOPMrootDomain::updatePreventIdleSleepList( ASSERT_GATED(); #if defined(__i386__) || defined(__x86_64__) - // Disregard disk I/O (anything besides the display wrangler) - // as a factor preventing idle sleep,except in the case of legacy disk I/O - if ((gDarkWakeFlags & kDarkWakeFlagIgnoreDiskIOAlways) && - addNotRemove && (service != wrangler) && (service != this)) + // Disregard disk I/O (besides the display wrangler) as a factor preventing + // idle sleep, except in the case of legacy disk I/O + if ((service != wrangler) && (service != this)) { return false; } #endif + oldCount = preventIdleSleepList->getCount(); if (addNotRemove) { @@ -2482,13 +2286,13 @@ bool IOPMrootDomain::updatePreventIdleSleepList( service->getName(), preventIdleSleepList->getCount()); } newCount = preventIdleSleepList->getCount(); - + if ((oldCount == 0) && (newCount != 0)) { // Driver added to empty prevent list. // Update the driver desire to prevent idle sleep. // Driver desire does not prevent demand sleep. - + changePowerStateTo(ON_STATE); } else if ((oldCount != 0) && (newCount == 0)) @@ -2503,7 +2307,7 @@ bool IOPMrootDomain::updatePreventIdleSleepList( #if defined(__i386__) || defined(__x86_64__) if (addNotRemove && (service == wrangler) && !checkSystemCanSustainFullWake()) { - return false; + return false; // do not idle-cancel } #endif @@ -2564,7 +2368,7 @@ bool IOPMrootDomain::tellChangeDown( unsigned long stateNum ) if (!ignoreTellChangeDown) tracePoint( kIOPMTracePointSleepApplications ); else - tracePoint( kIOPMTracePointSleepPriorityClients ); + tracePoint( kIOPMTracePointSleepPriorityClients ); } if ((SLEEP_STATE == stateNum) && !ignoreTellChangeDown) @@ -2702,7 +2506,7 @@ void IOPMrootDomain::tellNoChangeDown( unsigned long stateNum ) DLOG("tellNoChangeDown %u->%u\n", (uint32_t) getPowerState(), (uint32_t) stateNum); - // Sleep canceled, clear the sleep trace point. + // Sleep canceled, clear the sleep trace point. tracePoint(kIOPMTracePointSystemUp); systemDidNotSleep(); @@ -2745,14 +2549,6 @@ void IOPMrootDomain::tellChangeUp( unsigned long stateNum ) tellClients( kIOMessageSystemWillPowerOn ); } -#if defined(__i386__) || defined(__x86_64__) - if (spindumpDesc) - { - AbsoluteTime deadline; - clock_interval_to_deadline( 30, kSecondScale, &deadline ); - thread_call_enter_delayed(stackshotOffloader, deadline); - } -#endif tracePoint( kIOPMTracePointWakeApplications ); tellClients( kIOMessageSystemHasPoweredOn ); @@ -2777,7 +2573,24 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( if (!gRootDomain) return kIOReturnUnsupported; - if (messageType == kIOMessageSystemCapabilityChange) + if (messageType == kIOMessageSystemWillSleep) + { +#if HIBERNATION + uint32_t mem_only = 0; + IOPowerStateChangeNotification *notify = + (IOPowerStateChangeNotification *)messageArgs; + + PE_parse_boot_argn("swd_mem_only", &mem_only, sizeof(mem_only)); + if ((mem_only != 1) && (gRootDomain->sleepWakeDebugIsWdogEnabled())) + { + notify->returnValue = 30 * 1000 * 1000; + thread_call_enter1( + gRootDomain->hibDebugSetupEntry, + (thread_call_param_t)(uintptr_t) notify->powerRef); + } +#endif + } + else if (messageType == kIOMessageSystemCapabilityChange) { IOPMSystemCapabilityChangeParameters * params = (IOPMSystemCapabilityChangeParameters *) messageArgs; @@ -2799,7 +2612,7 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( { // We will ack within 20 seconds params->maxWaitForReply = 20 * 1000 * 1000; -#if HIBERNATION +#if HIBERNATION gRootDomain->evaluateSystemSleepPolicyEarly(); // add in time we could spend freeing pages @@ -2826,7 +2639,7 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( AbsoluteTime deadline; clock_interval_to_deadline( 30, kSecondScale, &deadline ); thread_call_enter1_delayed( - gRootDomain->diskSyncCalloutEntry, + gRootDomain->diskSyncCalloutEntry, (thread_call_param_t)(uintptr_t) params->notifyRef, deadline ); } @@ -2840,7 +2653,7 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( (params->toCapabilities & kIOPMSystemCapabilityCPU) && (params->fromCapabilities & kIOPMSystemCapabilityCPU) == 0) { -#if HIBERNATION +#if HIBERNATION // We will ack within 110 seconds params->maxWaitForReply = 110 * 1000 * 1000; @@ -2868,16 +2681,16 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( //****************************************************************************** void IOPMrootDomain::handleQueueSleepWakeUUID(OSObject *obj) -{ +{ OSString *str = NULL; - if (kOSBooleanFalse == obj) + if (kOSBooleanFalse == obj) { handlePublishSleepWakeUUID(NULL); } - else if ((str = OSDynamicCast(OSString, obj))) + else if ((str = OSDynamicCast(OSString, obj))) { - // This branch caches the UUID for an upcoming sleep/wake + // This branch caches the UUID for an upcoming sleep/wake if (queuedSleepWakeUUIDString) { queuedSleepWakeUUIDString->release(); queuedSleepWakeUUIDString = NULL; @@ -2908,27 +2721,13 @@ void IOPMrootDomain::handlePublishSleepWakeUUID( bool shouldPublish ) { ASSERT_GATED(); - /* + /* * Clear the current UUID */ if (gSleepWakeUUIDIsSet) { DLOG("SleepWake UUID cleared\n"); - OSString *UUIDstring = NULL; - - if (timeline && - (UUIDstring = OSDynamicCast(OSString, getProperty(kIOPMSleepWakeUUIDKey)))) - { - PMEventDetails *details = PMEventDetails::eventDetails(kIOPMEventTypeUUIDClear, - UUIDstring->getCStringNoCopy(), NULL, 0); - if (details) { - timeline->recordSystemPowerEvent( details ); - details->release(); - } - timeline->setNumEventsLoggedThisPeriod(0); - } - gSleepWakeUUIDIsSet = false; removeProperty(kIOPMSleepWakeUUIDKey); @@ -2945,22 +2744,12 @@ void IOPMrootDomain::handlePublishSleepWakeUUID( bool shouldPublish ) publishThisUUID = queuedSleepWakeUUIDString; publishThisUUID->retain(); - if (timeline) { - PMEventDetails *details; - details = PMEventDetails::eventDetails(kIOPMEventTypeUUIDSet, - publishThisUUID->getCStringNoCopy(), NULL, 0); - if (details) { - timeline->recordSystemPowerEvent( details ); - details->release(); - } - } - if (publishThisUUID) { setProperty(kIOPMSleepWakeUUIDKey, publishThisUUID); publishThisUUID->release(); } - + gSleepWakeUUIDIsSet = true; messageClients(kIOPMMessageSleepWakeUUIDChange, kIOPMMessageSleepWakeUUIDSet); @@ -3055,11 +2844,11 @@ hibernate_should_abort(void) void IOPMrootDomain::willNotifyPowerChildren( IOPMPowerStateIndex newPowerState ) { -#if HIBERNATION +#if HIBERNATION if (SLEEP_STATE == newPowerState) { - IOHibernateSystemSleep(); - IOHibernateIOKitSleep(); + IOHibernateSystemSleep(); + IOHibernateIOKitSleep(); } #endif } @@ -3088,13 +2877,13 @@ void IOPMrootDomain::sendClientClamshellNotification( void ) if (!clamshellExists) return; - setProperty(kAppleClamshellStateKey, + setProperty(kAppleClamshellStateKey, clamshellClosed ? kOSBooleanTrue : kOSBooleanFalse); - setProperty(kAppleClamshellCausesSleepKey, + setProperty(kAppleClamshellCausesSleepKey, shouldSleepOnClamshellClosed() ? kOSBooleanTrue : kOSBooleanFalse); - /* Argument to message is a bitfiel of + /* Argument to message is a bitfiel of * ( kClamshellStateBit | kClamshellSleepBit ) */ messageClients(kIOPMMessageClamshellStateChange, @@ -3186,7 +2975,7 @@ void IOPMrootDomain::publishFeature( const char * feature ) //****************************************************************************** void IOPMrootDomain::publishFeature( - const char *feature, + const char *feature, uint32_t supportedWhere, uint32_t *uniqueFeatureID) { @@ -3204,7 +2993,7 @@ void IOPMrootDomain::publishFeature( // Feature isn't supported anywhere! return; } - + if(next_feature_id > 5000) { // Far, far too many features! return; @@ -3214,16 +3003,16 @@ void IOPMrootDomain::publishFeature( OSDictionary *features = (OSDictionary *) getProperty(kRootDomainSupportedFeatures); - + // Create new features dict if necessary if ( features && OSDynamicCast(OSDictionary, features)) { features = OSDictionary::withDictionary(features); } else { features = OSDictionary::withCapacity(1); } - + // Create OSNumber to track new feature - + next_feature_id += 1; if( uniqueFeatureID ) { // We don't really mind if the calling kext didn't give us a place @@ -3249,7 +3038,7 @@ void IOPMrootDomain::publishFeature( (const OSObject **)&existing_feature, 1, 2); } else if(( existing_feature_arr = OSDynamicCast(OSArray, osObj) )) { - // Add object to existing array + // Add object to existing array existing_feature_arr = OSArray::withArray( existing_feature_arr, existing_feature_arr->getCount() + 1); @@ -3267,17 +3056,17 @@ void IOPMrootDomain::publishFeature( // set the OSNumber at key 'feature' and we're on our way. features->setObject(feature, new_feature_data); } - + new_feature_data->release(); setProperty(kRootDomainSupportedFeatures, features); features->release(); - if(featuresDictLock) IOLockUnlock(featuresDictLock); + if(featuresDictLock) IOLockUnlock(featuresDictLock); // Notify EnergySaver and all those in user space so they might - // re-populate their feature specific UI + // re-populate their feature specific UI if(pmPowerStateQueue) { pmPowerStateQueue->submitPowerEvent( kPowerEventFeatureChanged ); } @@ -3295,7 +3084,7 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) uint32_t feature_value = 0; uint16_t feature_id = 0; bool madeAChange = false; - + OSSymbol *dictKey = NULL; OSCollectionIterator *dictIterator = NULL; OSArray *arrayMember = NULL; @@ -3311,7 +3100,7 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) OSDictionary *features = (OSDictionary *) getProperty(kRootDomainSupportedFeatures); - + if ( features && OSDynamicCast(OSDictionary, features) ) { // Any modifications to the dictionary are made to the copy to prevent @@ -3323,20 +3112,20 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) ret = kIOReturnNotFound; goto exit; } - + // We iterate 'features' dictionary looking for an entry tagged // with 'removeFeatureID'. If found, we remove it from our tracking // structures and notify the OS via a general interest message. - + dictIterator = OSCollectionIterator::withCollection(features); if(!dictIterator) { goto exit; } - + while( (dictKey = OSDynamicCast(OSSymbol, dictIterator->getNextObject())) ) { osObj = features->getObject(dictKey); - + // Each Feature is either tracked by an OSNumber if( osObj && (numberMember = OSDynamicCast(OSNumber, osObj)) ) { @@ -3350,19 +3139,19 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) madeAChange = true; break; } - + // Or tracked by an OSArray of OSNumbers } else if( osObj && (arrayMember = OSDynamicCast(OSArray, osObj)) ) { unsigned int arrayCount = arrayMember->getCount(); - + for(unsigned int i=0; igetObject(i)); if(!osNum) { continue; } - + feature_value = osNum->unsigned32BitValue(); feature_id = (uint16_t)(feature_value >> 16); @@ -3388,29 +3177,29 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) break; } } - } + } } - + dictIterator->release(); - + if( madeAChange ) { - ret = kIOReturnSuccess; + ret = kIOReturnSuccess; setProperty(kRootDomainSupportedFeatures, features); - + // Notify EnergySaver and all those in user space so they might - // re-populate their feature specific UI + // re-populate their feature specific UI if(pmPowerStateQueue) { pmPowerStateQueue->submitPowerEvent( kPowerEventFeatureChanged ); } } else { ret = kIOReturnNotFound; } - + exit: if(features) features->release(); - if(featuresDictLock) IOLockUnlock(featuresDictLock); + if(featuresDictLock) IOLockUnlock(featuresDictLock); return ret; } @@ -3459,7 +3248,7 @@ IOReturn IOPMrootDomain::setPMSetting( PMSETTING_LOCK(); - // Update settings dict so changes are visible from copyPMSetting(). + // Update settings dict so changes are visible from copyPMSetting(). fPMSettingsDict->setObject(type, object); // Prep all PMSetting objects with the given 'type' for callout. @@ -3484,14 +3273,14 @@ IOReturn IOPMrootDomain::setPMSetting( pmso = (PMSettingObject *) array->getObject(i); if (pmso->disabled) continue; - entries[j].thread = thisThread; + entries[j].thread = thisThread; queue_enter(&pmso->calloutQueue, &entries[j], PMSettingCallEntry *, link); chosen->setObject(pmso); j++; } count = j; if (!count) - goto unlock_exit; + goto unlock_exit; PMSETTING_UNLOCK(); @@ -3541,7 +3330,7 @@ OSObject * IOPMrootDomain::copyPMSetting( obj->retain(); } PMSETTING_UNLOCK(); - + return obj; } @@ -3558,7 +3347,7 @@ IOReturn IOPMrootDomain::registerPMSettingController( uintptr_t refcon, OSObject **handle) { - return registerPMSettingController( + return registerPMSettingController( settings, (kIOPMSupportedOnAC | kIOPMSupportedOnBatt | kIOPMSupportedOnUPS), func, target, refcon, handle); @@ -3573,9 +3362,9 @@ IOReturn IOPMrootDomain::registerPMSettingController( // * settings - An OSArray containing OSSymbols. Caller should populate this // array with a list of settings caller wants notifications from. // * func - A C function callback of the type IOPMSettingControllerCallback -// * target - caller may provide an OSObject *, which PM will pass as an +// * target - caller may provide an OSObject *, which PM will pass as an // target to calls to "func" -// * refcon - caller may provide an void *, which PM will pass as an +// * refcon - caller may provide an void *, which PM will pass as an // argument to calls to "func" // * handle - This is a return argument. We will populate this pointer upon // call success. Hold onto this and pass this argument to @@ -3605,7 +3394,7 @@ IOReturn IOPMrootDomain::registerPMSettingController( } pmso = PMSettingObject::pmSettingObject( - (IOPMrootDomain *) this, func, target, + (IOPMrootDomain *) this, func, target, refcon, supportedPowerSources, settings, &pmsh); if (!pmso) { @@ -3677,7 +3466,7 @@ void IOPMrootDomain::deregisterPMSettingObject( PMSettingObject * pmso ) // Search each PM settings array in the kernel. iter = OSCollectionIterator::withCollection(settingsCallbacks); - if (iter) + if (iter) { while ((sym = OSDynamicCast(OSSymbol, iter->getNextObject()))) { @@ -3706,12 +3495,12 @@ void IOPMrootDomain::deregisterPMSettingObject( PMSettingObject * pmso ) //****************************************************************************** void IOPMrootDomain::informCPUStateChange( - uint32_t type, + uint32_t type, uint32_t value ) { #if defined(__i386__) || defined(__x86_64__) - pmioctlVariableInfo_t varInfoStruct; + pmioctlVariableInfo_t varInfoStruct; int pmCPUret = 0; const char *varNameStr = NULL; int32_t *varIndex = NULL; @@ -3725,7 +3514,7 @@ void IOPMrootDomain::informCPUStateChange( } else { return; } - + // Set the new value! // pmCPUControl will assign us a new ID if one doesn't exist yet bzero(&varInfoStruct, sizeof(pmioctlVariableInfo_t)); @@ -3735,8 +3524,8 @@ void IOPMrootDomain::informCPUStateChange( varInfoStruct.varCurValue = value; strncpy( (char *)varInfoStruct.varName, (const char *)varNameStr, - strlen(varNameStr) + 1 ); - + strlen(varNameStr) + 1 ); + // Set! pmCPUret = pmCPUControl( PMIOCSETVARINFO, (void *)&varInfoStruct ); @@ -3744,19 +3533,19 @@ void IOPMrootDomain::informCPUStateChange( if ((0 == pmCPUret) && (*varIndex == kCPUUnknownIndex)) { - // pmCPUControl has assigned us a new variable ID. + // pmCPUControl has assigned us a new variable ID. // Let's re-read the structure we just SET to learn that ID. pmCPUret = pmCPUControl( PMIOCGETVARNAMEINFO, (void *)&varInfoStruct ); - if (0 == pmCPUret) - { + if (0 == pmCPUret) + { // Store it in idxPMCPUClamshell or idxPMCPULimitedPower *varIndex = varInfoStruct.varID; } - } - + } + return; - + #endif /* __i386__ || __x86_64__ */ } @@ -3907,6 +3696,7 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy( if (getPMAssertionLevel(kIOPMDriverAssertionMagicPacketWakeEnabledBit) != kIOPMDriverAssertionLevelOff) currentFactors |= kIOPMSleepFactorMagicPacketWakeEnabled; +#define TCPKEEPALIVE 1 #if TCPKEEPALIVE if (getPMAssertionLevel(kIOPMDriverAssertionNetworkKeepAliveActiveBit) != kIOPMDriverAssertionLevelOff) @@ -3918,6 +3708,8 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy( currentFactors |= kIOPMSleepFactorExternalDisplay; if (userWasActive) currentFactors |= kIOPMSleepFactorLocalUserActivity; + if (darkWakeHibernateError && !CAP_HIGHEST(kIOPMSystemCapabilityGraphics)) + currentFactors |= kIOPMSleepFactorHibernateFailed; DLOG("sleep factors 0x%llx\n", currentFactors); @@ -3958,13 +3750,13 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy( } result = gSleepPolicyHandler(gSleepPolicyTarget, gSleepPolicyVars, params); - + if (kIOPMSleepPhase0 == sleepPhase) { // restore hibernateMode gSleepPolicyVars->hibernateMode = savedHibernateMode; } - + if ((result != kIOReturnSuccess) || (kIOPMSleepTypeInvalid == params->sleepType) || (params->sleepType >= kIOPMSleepTypeLast) || @@ -4204,7 +3996,7 @@ bool IOPMrootDomain::getSleepOption( const char * key, uint32_t * option ) optionsProp = copyProperty(kRootDomainSleepOptionsKey); optionsDict = OSDynamicCast(OSDictionary, optionsProp); - + if (optionsDict) { obj = optionsDict->getObject(key); @@ -4269,92 +4061,92 @@ IOReturn IOPMrootDomain::getSystemSleepType( uint32_t * sleepType ) //****************************************************************************** struct HaltRestartApplierContext { - IOPMrootDomain * RootDomain; - unsigned long PowerState; - IOPMPowerFlags PowerFlags; - UInt32 MessageType; - UInt32 Counter; + IOPMrootDomain * RootDomain; + unsigned long PowerState; + IOPMPowerFlags PowerFlags; + UInt32 MessageType; + UInt32 Counter; }; static void platformHaltRestartApplier( OSObject * object, void * context ) { - IOPowerStateChangeNotification notify; - HaltRestartApplierContext * ctx; - AbsoluteTime startTime; - UInt32 deltaTime; + IOPowerStateChangeNotification notify; + HaltRestartApplierContext * ctx; + AbsoluteTime startTime; + UInt32 deltaTime; - ctx = (HaltRestartApplierContext *) context; - - memset(¬ify, 0, sizeof(notify)); + ctx = (HaltRestartApplierContext *) context; + + memset(¬ify, 0, sizeof(notify)); notify.powerRef = (void *)(uintptr_t)ctx->Counter; notify.returnValue = 0; notify.stateNumber = ctx->PowerState; notify.stateFlags = ctx->PowerFlags; - clock_get_uptime(&startTime); + clock_get_uptime(&startTime); ctx->RootDomain->messageClient( ctx->MessageType, object, (void *)¬ify ); - deltaTime = computeDeltaTimeMS(&startTime); + deltaTime = computeDeltaTimeMS(&startTime); - if ((deltaTime > kPMHaltTimeoutMS) || + if ((deltaTime > kPMHaltTimeoutMS) || (gIOKitDebug & kIOLogPMRootDomain)) - { - _IOServiceInterestNotifier * notifier; - notifier = OSDynamicCast(_IOServiceInterestNotifier, object); - - // IOService children of IOPMrootDomain are not instrumented. - // Only IORootParent currently falls under that group. - - if (notifier) - { - LOG("%s handler %p took %u ms\n", - (ctx->MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : - (ctx->MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart", - OBFUSCATE(notifier->handler), (uint32_t) deltaTime ); - } - } + { + _IOServiceInterestNotifier * notifier; + notifier = OSDynamicCast(_IOServiceInterestNotifier, object); + + // IOService children of IOPMrootDomain are not instrumented. + // Only IORootParent currently falls under that group. + + if (notifier) + { + LOG("%s handler %p took %u ms\n", + (ctx->MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : + (ctx->MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart", + OBFUSCATE(notifier->handler), (uint32_t) deltaTime ); + } + } - ctx->Counter++; + ctx->Counter++; } void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type ) { - HaltRestartApplierContext ctx; - AbsoluteTime startTime; - UInt32 deltaTime; + HaltRestartApplierContext ctx; + AbsoluteTime startTime; + UInt32 deltaTime; - memset(&ctx, 0, sizeof(ctx)); - ctx.RootDomain = this; + memset(&ctx, 0, sizeof(ctx)); + ctx.RootDomain = this; - clock_get_uptime(&startTime); - switch (pe_type) - { - case kPEHaltCPU: + clock_get_uptime(&startTime); + switch (pe_type) + { + case kPEHaltCPU: case kPEUPSDelayHaltCPU: - ctx.PowerState = OFF_STATE; - ctx.MessageType = kIOMessageSystemWillPowerOff; - break; - - case kPERestartCPU: - ctx.PowerState = RESTART_STATE; - ctx.MessageType = kIOMessageSystemWillRestart; - break; - - case kPEPagingOff: - ctx.PowerState = ON_STATE; - ctx.MessageType = kIOMessageSystemPagingOff; - IOService::updateConsoleUsers(NULL, kIOMessageSystemPagingOff); -#if HIBERNATION - IOHibernateSystemRestart(); + ctx.PowerState = OFF_STATE; + ctx.MessageType = kIOMessageSystemWillPowerOff; + break; + + case kPERestartCPU: + ctx.PowerState = RESTART_STATE; + ctx.MessageType = kIOMessageSystemWillRestart; + break; + + case kPEPagingOff: + ctx.PowerState = ON_STATE; + ctx.MessageType = kIOMessageSystemPagingOff; + IOService::updateConsoleUsers(NULL, kIOMessageSystemPagingOff); +#if HIBERNATION + IOHibernateSystemRestart(); #endif - break; + break; - default: - return; - } + default: + return; + } - // Notify legacy clients - applyToInterested(gIOPriorityPowerStateInterest, platformHaltRestartApplier, &ctx); + // Notify legacy clients + applyToInterested(gIOPriorityPowerStateInterest, platformHaltRestartApplier, &ctx); // For normal shutdown, turn off File Server Mode. if (kPEHaltCPU == pe_type) @@ -4369,17 +4161,19 @@ void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type ) } } - if (kPEPagingOff != pe_type) - { - // Notify in power tree order - notifySystemShutdown(this, ctx.MessageType); - } + if (kPEPagingOff != pe_type) + { + // Notify in power tree order + notifySystemShutdown(this, ctx.MessageType); + } - deltaTime = computeDeltaTimeMS(&startTime); - LOG("%s all drivers took %u ms\n", - (ctx.MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : - (ctx.MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart", - (uint32_t) deltaTime ); + IOCPURunPlatformHaltRestartActions(pe_type); + + deltaTime = computeDeltaTimeMS(&startTime); + LOG("%s all drivers took %u ms\n", + (ctx.MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : + (ctx.MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart", + (uint32_t) deltaTime ); } //****************************************************************************** @@ -4524,7 +4318,7 @@ void IOPMrootDomain::tagPowerPlaneService( } // Tag top-level PCI devices. The order of PMinit() call does not - // change across boots and is used as the PCI bit number. + // change across boots and is used as the PCI bit number. if (pciHostBridgeDevice && service->metaCast("IOPCIDevice")) { // Would prefer to check built-in property, but tagPowerPlaneService() @@ -4535,7 +4329,7 @@ void IOPMrootDomain::tagPowerPlaneService( int bit = pmTracer->recordTopLevelPCIDevice( service ); if (bit >= 0) { - // Save the assigned bit for fast lookup. + // Save the assigned bit for fast lookup. actions->parameter |= (bit & kPMActionsPCIBitNumberMask); actions->actionPowerChangeStart = @@ -4580,28 +4374,28 @@ void IOPMrootDomain::overrideOurPowerChange( if (CAP_CURRENT(kIOPMSystemCapabilityGraphics)) { // Root domain is dropping power state ON->SLEEP. - // If system is in full wake, first drop to dark wake by - // converting the power state transitions to a capability - // change transition. + // If system is in full wake, first enter dark wake by + // converting the power drop to a capability change. + // Once in dark wake, transition to sleep state ASAP. darkWakeToSleepASAP = true; - // Drop graphics and audio capability. - // No transition if system is already in dark wake. - + // Drop graphics and audio capability _desiredCapability &= ~( kIOPMSystemCapabilityGraphics | kIOPMSystemCapabilityAudio ); + // Convert to capability change (ON->ON) *inOutPowerState = ON_STATE; *inOutChangeFlags |= kIOPMSynchronize; - // Revert device desire from SLEEP->ON. + // Revert device desire from SLEEP to ON changePowerStateToPriv(ON_STATE); } else { - // Broadcast root power down + // System is in dark wake, ok to drop power state. + // Broadcast root powering down to entire tree. *inOutChangeFlags |= kIOPMRootChangeDown; } } @@ -4632,6 +4426,14 @@ void IOPMrootDomain::handleOurPowerChangeStart( _systemMessageClientMask = 0; capabilityLoss = false; + if (lowBatteryCondition) + { + // Low battery notification may arrive after the initial sleep request + // has been queued. Override the sleep reason so powerd and others can + // treat this as an emergency sleep. + sleepReason = kIOPMSleepReasonLowPower; + } + // 1. Explicit capability change. if (changeFlags & kIOPMSynchronize) @@ -4668,7 +4470,7 @@ void IOPMrootDomain::handleOurPowerChangeStart( kIOPMSystemCapabilityAudio ); } #if HIBERNATION - IOHibernateSetWakeCapabilities(_desiredCapability); + IOHibernateSetWakeCapabilities(_desiredCapability); #endif } @@ -4680,6 +4482,16 @@ void IOPMrootDomain::handleOurPowerChangeStart( { _pendingCapability = 0; capabilityLoss = true; + + // Clear previous stats + IOLockLock(pmStatsLock); + if (pmStatsAppResponses) + { + pmStatsAppResponses->release(); + pmStatsAppResponses = OSArray::withCapacity(5); + } + IOLockUnlock(pmStatsLock); + } else if (kSystemTransitionNewCapClient != _systemTransitionType) { @@ -4711,13 +4523,6 @@ void IOPMrootDomain::handleOurPowerChangeStart( { tracePoint( kIOPMTracePointDarkWakeExit ); - if (pmStatsAppResponses) - { - setProperty(kIOPMSleepStatisticsAppsKey, pmStatsAppResponses); - pmStatsAppResponses->release(); - pmStatsAppResponses = OSArray::withCapacity(5); - } - willEnterFullWake(); } @@ -4729,6 +4534,7 @@ void IOPMrootDomain::handleOurPowerChangeStart( _systemMessageClientMask = kSystemMessageClientPowerd | kSystemMessageClientLegacyApp; + // rdar://15971327 // Prevent user active transitions before notifying clients // that system will sleep. @@ -4739,7 +4545,7 @@ void IOPMrootDomain::handleOurPowerChangeStart( // Publish the sleep reason for full to dark wake publishSleepReason = true; lastSleepReason = fullToDarkReason = sleepReason; - + // Publish a UUID for the Sleep --> Wake cycle handlePublishSleepWakeUUID(true); } @@ -4764,11 +4570,6 @@ void IOPMrootDomain::handleOurPowerChangeStart( publishSleepReason = true; lastSleepReason = sleepReason; - - if (timeline) - timeline->setSleepCycleInProgressFlag(true); - - recordPMEvent(kIOPMEventTypeSleep, NULL, sleepReason, kIOReturnSuccess); } // 3. System wake. @@ -4776,12 +4577,7 @@ void IOPMrootDomain::handleOurPowerChangeStart( else if (kSystemTransitionWake == _systemTransitionType) { tracePoint( kIOPMTracePointWakeWillPowerOnClients ); - if (pmStatsAppResponses) - { - setProperty(kIOPMSleepStatisticsAppsKey, pmStatsAppResponses); - pmStatsAppResponses->release(); - pmStatsAppResponses = OSArray::withCapacity(5); - } + // Clear stats about sleep if (_pendingCapability & kIOPMSystemCapabilityGraphics) { @@ -4869,7 +4665,7 @@ void IOPMrootDomain::handleOurPowerChangeDone( (void *) kStimulusDarkWakeReentry, _systemStateGeneration ); } - + // Revert device desire to max. changePowerStateToPriv(ON_STATE); } @@ -4903,7 +4699,7 @@ void IOPMrootDomain::handleOurPowerChangeDone( darkWakeToSleepASAP = false; pciCantSleepValid = false; darkWakeSleepService = false; - + if (CAP_LOSS(kIOPMSystemCapabilityCPU)) { // Remove the influence of display power assertion @@ -4919,29 +4715,6 @@ void IOPMrootDomain::handleOurPowerChangeDone( if (((_pendingCapability & kIOPMSystemCapabilityGraphics) == 0) && (_pendingCapability & kIOPMSystemCapabilityCPU)) { -#if DISABLE_SLEEP_ASAP_FOR_NETWORK_WAKE - if (((gDarkWakeFlags & kDarkWakeFlagIgnoreDiskIOInDark) == 0) && - (kSystemTransitionWake == _systemTransitionType) && - (_lastDebugWakeSeconds == 0)) - { - OSObject * prop = copyProperty(kIOPMRootDomainWakeTypeKey); - if (prop) - { - OSString * wakeType = OSDynamicCast(OSString, prop); - if (wakeType && - wakeType->isEqualTo(kIOPMRootDomainWakeTypeNetwork)) - { - // Woke from network and entered dark wake. - if (darkWakeToSleepASAP) - { - DLOG("cleared darkWakeToSleepASAP\n"); - darkWakeToSleepASAP = false; - } - } - prop->release(); - } - } -#endif // Queue an evaluation of whether to remain in dark wake, // and for how long. This serves the purpose of draining // any assertions from the queue. @@ -5057,7 +4830,7 @@ void IOPMrootDomain::overridePowerChangeForUIService( { // For graphics devices, arm the limiter when entering // system sleep. Not when dropping to dark wake. - actions->parameter |= kPMActionsFlagLimitPower; + actions->parameter |= kPMActionsFlagLimitPower; } if (actions->parameter & kPMActionsFlagLimitPower) @@ -5184,7 +4957,6 @@ void IOPMrootDomain::handleActivityTickleForDisplayWrangler( if (!wranglerTickled && ((_pendingCapability & kIOPMSystemCapabilityGraphics) == 0)) { - setProperty(kIOPMRootDomainWakeTypeKey, kIOPMRootDomainWakeTypeHIDActivity); DLOG("display wrangler tickled\n"); if (kIOLogPMRootDomain & gIOKitDebug) OSReportWithBacktrace("Dark wake display tickle"); @@ -5192,7 +4964,8 @@ void IOPMrootDomain::handleActivityTickleForDisplayWrangler( { pmPowerStateQueue->submitPowerEvent( kPowerEventPolicyStimulus, - (void *) kStimulusDarkWakeActivityTickle ); + (void *) kStimulusDarkWakeActivityTickle, + true /* set wake type */ ); } } #endif @@ -5207,7 +4980,7 @@ void IOPMrootDomain::handleUpdatePowerClientForDisplayWrangler( { #if !NO_KERNEL_HID assert(service == wrangler); - + // This function implements half of the user active detection // by monitoring changes to the display wrangler's device desire. // @@ -5293,7 +5066,7 @@ bool IOPMrootDomain::shouldDelayChildNotification( void IOPMrootDomain::handlePowerChangeStartForPCIDevice( IOService * service, - IOPMActions * actions, + IOPMActions * actions, IOPMPowerStateIndex powerState, IOPMPowerChangeFlags * inOutChangeFlags ) { @@ -5305,7 +5078,7 @@ void IOPMrootDomain::handlePowerChangeStartForPCIDevice( void IOPMrootDomain::handlePowerChangeDoneForPCIDevice( IOService * service, - IOPMActions * actions, + IOPMActions * actions, IOPMPowerStateIndex powerState, IOPMPowerChangeFlags changeFlags ) { @@ -5321,14 +5094,28 @@ void IOPMrootDomain::handlePowerChangeDoneForPCIDevice( // Override IOService::registerInterest() to intercept special clients. //****************************************************************************** +class IOPMServiceInterestNotifier: public _IOServiceInterestNotifier +{ + + friend class IOPMrootDomain; + OSDeclareDefaultStructors(IOPMServiceInterestNotifier) + +protected: + uint32_t ackTimeoutCnt; + +}; + +OSDefineMetaClassAndStructors(IOPMServiceInterestNotifier, _IOServiceInterestNotifier) + IONotifier * IOPMrootDomain::registerInterest( const OSSymbol * typeOfInterest, IOServiceInterestHandler handler, void * target, void * ref ) { - IONotifier * notifier; + IOPMServiceInterestNotifier *notifier = 0; bool isSystemCapabilityClient; bool isKernelCapabilityClient; + IOReturn rc = kIOReturnError;; isSystemCapabilityClient = typeOfInterest && @@ -5341,9 +5128,19 @@ IONotifier * IOPMrootDomain::registerInterest( if (isSystemCapabilityClient) typeOfInterest = gIOAppPowerStateInterest; - notifier = super::registerInterest(typeOfInterest, handler, target, ref); - if (notifier && pmPowerStateQueue) + notifier = new IOPMServiceInterestNotifier; + if (!notifier) return NULL; + + if (notifier->init()) { + rc = super::registerInterestForNotifer(notifier, typeOfInterest, handler, target, ref); + } + if (rc != kIOReturnSuccess) { + notifier->release(); + notifier = 0; + } + if (pmPowerStateQueue) { + notifier->ackTimeoutCnt = 0; if (isSystemCapabilityClient) { notifier->retain(); @@ -5418,7 +5215,7 @@ bool IOPMrootDomain::systemMessageFilter( capArgs->changeFlags = kIOPMSystemCapabilityDidChange; } - // Capability change messages only go to the PM configd plugin. + // Capability change messages only go to the PM configd plugin. // Wait for response post-change if capabilitiy is increasing. // Wait for response pre-change if capability is decreasing. @@ -5447,7 +5244,7 @@ bool IOPMrootDomain::systemMessageFilter( allow = true; break; } - + // Not idle sleep, don't ask apps. if (context->changeFlags & kIOPMSkipAskPowerDown) { @@ -5477,7 +5274,17 @@ bool IOPMrootDomain::systemMessageFilter( if ((context->notifyType == kNotifyApps) && (_systemMessageClientMask & kSystemMessageClientLegacyApp)) { + IOPMServiceInterestNotifier *notify; allow = true; + + if ((notify = OSDynamicCast(IOPMServiceInterestNotifier, (OSObject *)object)) + && arg3) { + + if (notify->ackTimeoutCnt >= 3) + *((OSObject **) arg3) = kOSBooleanFalse; + else + *((OSObject **) arg3) = kOSBooleanTrue; + } } else if ((context->notifyType == kNotifyPriority) && (_systemMessageClientMask & kSystemMessageClientKernel)) @@ -5493,7 +5300,7 @@ bool IOPMrootDomain::systemMessageFilter( if (_joinedCapabilityClients->getCount() == 0) { DLOG("destroyed capability client set %p\n", - _joinedCapabilityClients); + OBFUSCATE(_joinedCapabilityClients)); _joinedCapabilityClients->release(); _joinedCapabilityClients = 0; } @@ -5515,7 +5322,7 @@ IOReturn IOPMrootDomain::setMaintenanceWakeCalendar( if (!calendar) return kIOReturnBadArgument; - + data = OSData::withBytesNoCopy((void *) calendar, sizeof(*calendar)); if (!data) return kIOReturnNoMemory; @@ -5524,7 +5331,7 @@ IOReturn IOPMrootDomain::setMaintenanceWakeCalendar( ret = setPMSetting(gIOPMSettingMaintenanceWakeCalendarKey, data); if (kIOReturnSuccess == ret) OSBitOrAtomic(kIOPMAlarmBitMaintenanceWake, &_scheduledAlarms); - } else + } else if (kPMCalendarTypeSleepService == calendar->selector) { ret = setPMSetting(gIOPMSettingSleepServiceWakeCalendarKey, data); @@ -5532,7 +5339,7 @@ IOReturn IOPMrootDomain::setMaintenanceWakeCalendar( OSBitOrAtomic(kIOPMAlarmBitSleepServiceWake, &_scheduledAlarms); } DLOG("_scheduledAlarms = 0x%x\n", (uint32_t) _scheduledAlarms); - + data->release(); return ret; } @@ -5584,7 +5391,7 @@ IOReturn IOPMrootDomain::displayWranglerNotification( break; case kIOMessageDeviceHasPoweredOn: - // Display wrangler has powered on due to user activity + // Display wrangler has powered on due to user activity // or wake from sleep. if (kWranglerPowerStateMax == displayPowerState) @@ -5611,7 +5418,7 @@ IOReturn IOPMrootDomain::displayWranglerNotification( // When it's published we install a power state change handler. //****************************************************************************** -bool IOPMrootDomain::displayWranglerMatchPublished( +bool IOPMrootDomain::displayWranglerMatchPublished( void * target, void * refCon, IOService * newService, @@ -5619,8 +5426,8 @@ bool IOPMrootDomain::displayWranglerMatchPublished( { #if !NO_KERNEL_HID // found the display wrangler, now install a handler - if( !newService->registerInterest( gIOGeneralInterest, - &displayWranglerNotification, target, 0) ) + if( !newService->registerInterest( gIOGeneralInterest, + &displayWranglerNotification, target, 0) ) { return false; } @@ -5630,7 +5437,7 @@ bool IOPMrootDomain::displayWranglerMatchPublished( #if defined(__i386__) || defined(__x86_64__) -bool IOPMrootDomain::IONVRAMMatchPublished( +bool IOPMrootDomain::IONVRAMMatchPublished( void * target, void * refCon, IOService * newService, @@ -5638,22 +5445,30 @@ bool IOPMrootDomain::IONVRAMMatchPublished( { unsigned int len = 0; IOPMrootDomain *rd = (IOPMrootDomain *)target; + OSNumber *statusCode = NULL; if (PEReadNVRAMProperty(kIOSleepWakeDebugKey, NULL, &len)) { - rd->swd_flags |= SWD_BOOT_BY_WDOG; - MSG("System was rebooted due to Sleep/Wake failure\n"); - - if ( (rd->swd_logBufMap = rd->sleepWakeDebugRetrieve()) != NULL) { - rd->swd_flags |= SWD_VALID_LOGS; + statusCode = OSDynamicCast(OSNumber, rd->getProperty(kIOPMSleepWakeFailureCodeKey)); + if (statusCode != NULL) { + if (statusCode->unsigned64BitValue() != 0) { + rd->swd_flags |= SWD_BOOT_BY_SW_WDOG; + MSG("System was rebooted due to Sleep/Wake failure\n"); + } + else { + rd->swd_flags |= SWD_BOOT_BY_OSX_WDOG; + MSG("System was non-responsive and was rebooted by watchdog\n"); + } } + + rd->swd_logBufMap = rd->sleepWakeDebugRetrieve(); } if (notifier) notifier->remove(); return true; } #else -bool IOPMrootDomain::IONVRAMMatchPublished( +bool IOPMrootDomain::IONVRAMMatchPublished( void * target, void * refCon, IOService * newService, @@ -5674,10 +5489,10 @@ void IOPMrootDomain::reportUserInput( void ) #if !NO_KERNEL_HID OSIterator * iter; - if(!wrangler) + if(!wrangler) { iter = getMatchingServices(serviceMatching("IODisplayWrangler")); - if(iter) + if(iter) { wrangler = (IOService *) iter->getNextObject(); iter->release(); @@ -5698,11 +5513,13 @@ bool IOPMrootDomain::latchDisplayWranglerTickle( bool latch ) #if !NO_KERNEL_HID if (latch) { - // Not too late to prevent the display from lighting up if (!(_currentCapability & kIOPMSystemCapabilityGraphics) && !(_pendingCapability & kIOPMSystemCapabilityGraphics) && !checkSystemCanSustainFullWake()) { + // Currently in dark wake, and not transitioning to full wake. + // Full wake is unsustainable, so latch the tickle to prevent + // the display from lighting up momentarily. wranglerTickleLatched = true; } else @@ -5749,13 +5566,13 @@ void IOPMrootDomain::setDisplayPowerOn( uint32_t options ) // Notification on battery class IOPowerSource appearance //****************************************************************************** -bool IOPMrootDomain::batteryPublished( - void * target, +bool IOPMrootDomain::batteryPublished( + void * target, void * root_domain, IOService * resourceService, IONotifier * notifier __unused ) -{ - // rdar://2936060&4435589 +{ + // rdar://2936060&4435589 // All laptops have dimmable LCD displays // All laptops have batteries // So if this machine has a battery, publish the fact that the backlight @@ -5884,25 +5701,22 @@ bool IOPMrootDomain::checkSystemCanSustainFullWake( void ) if (lowBatteryCondition) { // Low battery wake, or received a low battery notification - // while system is awake. + // while system is awake. This condition will persist until + // the following wake. return false; } if (clamshellExists && clamshellClosed && !clamshellSleepDisabled) { + // Graphics state is unknown and external display might not be probed. + // Do not incorporate state that requires graphics to be in max power + // such as desktopMode or clamshellDisabled. + if (!acAdaptorConnected) { DLOG("full wake check: no AC\n"); return false; } - - if (CAP_CURRENT(kIOPMSystemCapabilityGraphics) && - !desktopMode && !clamshellDisabled) - { - // No external display - DLOG("full wake check: no ext display\n"); - return false; - } } #endif return true; @@ -5944,7 +5758,7 @@ void IOPMrootDomain::adjustPowerState( bool sleepASAP ) void IOPMrootDomain::dispatchPowerEvent( uint32_t event, void * arg0, uint64_t arg1 ) { - DLOG("power event %u args %p 0x%llx\n", event, arg0, arg1); + DLOG("power event %u args %p 0x%llx\n", event, OBFUSCATE(arg0), arg1); ASSERT_GATED(); switch (event) @@ -5956,7 +5770,7 @@ void IOPMrootDomain::dispatchPowerEvent( case kPowerEventReceivedPowerNotification: handlePowerNotification( (UInt32)(uintptr_t) arg0 ); break; - + case kPowerEventSystemBootCompleted: if (systemBooting) { @@ -5973,13 +5787,17 @@ void IOPMrootDomain::dispatchPowerEvent( } if (swd_flags & SWD_VALID_LOGS) { - sleepWakeDebugDump(swd_logBufMap); - swd_logBufMap->release(); - swd_logBufMap = 0; + if (swd_flags & SWD_LOGS_IN_MEM) { + sleepWakeDebugDumpFromMem(swd_logBufMap); + swd_logBufMap->release(); + swd_logBufMap = 0; + } + else if (swd_flags & SWD_LOGS_IN_FILE) + sleepWakeDebugDumpFromFile(); } - else if (swd_flags & SWD_BOOT_BY_WDOG) { - // If logs are invalid, write the failure code - sleepWakeDebugDump(NULL); + else if (swd_flags & (SWD_BOOT_BY_SW_WDOG|SWD_BOOT_BY_OSX_WDOG)) { + // If logs are invalid, write the failure code + sleepWakeDebugDumpFromMem(NULL); } // If lid is closed, re-send lid closed notification // now that booting is complete. @@ -5999,7 +5817,7 @@ void IOPMrootDomain::dispatchPowerEvent( to prevent sleep at unexpected times while loginwindow is trying to shutdown apps and while the OS is trying to transition to complete power of. - + Set to true during shutdown, as soon as loginwindow shows the "shutdown countdown dialog", through individual app termination, and through black screen kernel shutdown. @@ -6010,7 +5828,7 @@ void IOPMrootDomain::dispatchPowerEvent( A shutdown was initiated, but then the shutdown was cancelled, clearing systemShutdown to false here. */ - systemShutdown = false; + systemShutdown = false; } break; @@ -6072,17 +5890,14 @@ void IOPMrootDomain::dispatchPowerEvent( pmAssertions->handleSetAssertionLevel(arg1, (IOPMDriverAssertionLevel)(uintptr_t)arg0); } break; - + case kPowerEventQueueSleepWakeUUID: handleQueueSleepWakeUUID((OSObject *)arg0); break; case kPowerEventPublishSleepWakeUUID: handlePublishSleepWakeUUID((bool)arg0); break; - case kPowerEventSuspendClient: - handleSuspendPMNotificationClient((uintptr_t)arg0, (bool)arg1); - break; - + case kPowerEventSetDisplayPowerOn: if (!wrangler) break; if (arg1 != 0) @@ -6119,7 +5934,7 @@ void IOPMrootDomain::dispatchPowerEvent( // systemPowerEventOccurred // // The power controller is notifying us of a hardware-related power management -// event that we must handle. +// event that we must handle. // // systemPowerEventOccurred covers the same functionality that // receivePowerNotification does; it simply provides a richer API for conveying @@ -6133,9 +5948,9 @@ IOReturn IOPMrootDomain::systemPowerEventOccurred( IOReturn attempt = kIOReturnSuccess; OSNumber *newNumber = NULL; - if (!event) + if (!event) return kIOReturnBadArgument; - + newNumber = OSNumber::withNumber(intValue, 8*sizeof(intValue)); if (!newNumber) return kIOReturnInternalError; @@ -6153,8 +5968,8 @@ IOReturn IOPMrootDomain::systemPowerEventOccurred( { OSDictionary *thermalsDict = NULL; bool shouldUpdate = true; - - if (!event || !value) + + if (!event || !value) return kIOReturnBadArgument; // LOCK @@ -6164,9 +5979,9 @@ IOReturn IOPMrootDomain::systemPowerEventOccurred( if (featuresDictLock) IOLockLock(featuresDictLock); thermalsDict = (OSDictionary *)getProperty(kIOPMRootDomainPowerStatusKey); - + if (thermalsDict && OSDynamicCast(OSDictionary, thermalsDict)) { - thermalsDict = OSDictionary::withDictionary(thermalsDict); + thermalsDict = OSDictionary::withDictionary(thermalsDict); } else { thermalsDict = OSDictionary::withCapacity(1); } @@ -6245,15 +6060,15 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) /* * Sleep Now! */ - if (msg & kIOPMSleepNow) + if (msg & kIOPMSleepNow) { privateSleepSystem (kIOPMSleepReasonSoftware); } - + /* * Power Emergency */ - if (msg & kIOPMPowerEmergency) + if (msg & kIOPMPowerEmergency) { lowBatteryCondition = true; privateSleepSystem (kIOPMSleepReasonLowPower); @@ -6262,7 +6077,7 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) /* * Clamshell OPEN */ - if (msg & kIOPMClamshellOpened) + if (msg & kIOPMClamshellOpened) { // Received clamshel open message from clamshell controlling driver // Update our internal state and tell general interest clients @@ -6279,19 +6094,19 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) // Tell PMCPU informCPUStateChange(kInformLid, 0); - // Tell general interest clients + // Tell general interest clients sendClientClamshellNotification(); bool aborting = ((lastSleepReason == kIOPMSleepReasonClamshell) - || (lastSleepReason == kIOPMSleepReasonIdle) + || (lastSleepReason == kIOPMSleepReasonIdle) || (lastSleepReason == kIOPMSleepReasonMaintenance)); if (aborting) userActivityCount++; DLOG("clamshell tickled %d lastSleepReason %d\n", userActivityCount, lastSleepReason); } - /* + /* * Clamshell CLOSED - * Send the clamshell interest notification since the lid is closing. + * Send the clamshell interest notification since the lid is closing. */ if (msg & kIOPMClamshellClosed) { @@ -6305,8 +6120,8 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) // Tell general interest clients sendClientClamshellNotification(); - - // And set eval_clamshell = so we can attempt + + // And set eval_clamshell = so we can attempt eval_clamshell = true; } @@ -6315,7 +6130,7 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) * * -> reevaluate lid state */ - if (msg & kIOPMSetDesktopMode) + if (msg & kIOPMSetDesktopMode) { desktopMode = (0 != (msg & kIOPMSetValue)); msg &= ~(kIOPMSetDesktopMode | kIOPMSetValue); @@ -6325,13 +6140,13 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) // Re-evaluate the lid state eval_clamshell = true; } - + /* * AC Adaptor connected * * -> reevaluate lid state */ - if (msg & kIOPMSetACAdaptorConnected) + if (msg & kIOPMSetACAdaptorConnected) { acAdaptorConnected = (0 != (msg & kIOPMSetValue)); msg &= ~(kIOPMSetACAdaptorConnected | kIOPMSetValue); @@ -6362,13 +6177,13 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) clock_get_uptime(&userActivityTime); } } - + /* * Enable Clamshell (external display disappear) * * -> reevaluate lid state */ - if (msg & kIOPMEnableClamshell) + if (msg & kIOPMEnableClamshell) { // Re-evaluate the lid state // System should sleep on external display disappearance @@ -6381,13 +6196,13 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) clamshellDisabled = false; sendClientClamshellNotification(); } - + /* * Disable Clamshell (external display appeared) * We don't bother re-evaluating clamshell state. If the system is awake, - * the lid is probably open. + * the lid is probably open. */ - if (msg & kIOPMDisableClamshell) + if (msg & kIOPMDisableClamshell) { clamshellDisabled = true; sendClientClamshellNotification(); @@ -6407,7 +6222,7 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) /* * Power Button */ - if (msg & kIOPMPowerButton) + if (msg & kIOPMPowerButton) { if (!wranglerAsleep) { @@ -6532,7 +6347,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) if ((sleepSlider != 0) && (minutesToIdleSleep == 0)) flags.bit.idleSleepDisabled = true; - if (((minutesDelta != extraSleepDelay) || + if (((minutesDelta != extraSleepDelay) || (userActivityTime != userActivityTime_prev)) && !flags.bit.idleSleepEnabled && !flags.bit.idleSleepDisabled) flags.bit.sleepDelayChanged = true; @@ -6551,7 +6366,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) case kStimulusDemandSystemSleep: displayIdleForDemandSleep = true; - if(wrangler && wranglerIdleSettings) + if (wrangler && wranglerIdleSettings) { // Request wrangler idle only when demand sleep is triggered // from full wake. @@ -6570,6 +6385,13 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) break; case kStimulusDarkWakeActivityTickle: + // arg == true implies real and not self generated wrangler tickle. + // Update wake type on PM work loop instead of the tickle thread to + // eliminate the possibility of an early tickle clobbering the wake + // type set by the platform driver. + if (arg == true) + setProperty(kIOPMRootDomainWakeTypeKey, kIOPMRootDomainWakeTypeHIDActivity); + if (false == wranglerTickled) { if (latchDisplayWranglerTickle(true)) @@ -6651,7 +6473,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) else newSleepReason = kIOPMSleepReasonMaintenance; } - + if (checkSystemCanSleep(newSleepReason)) { privateSleepSystem(newSleepReason); @@ -6700,14 +6522,14 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) } else { - DLOG("user inactive\n"); + DLOG("user inactive\n"); } if (!userIsActive && sleepSlider) { startIdleSleepTimer(getTimeToIdleSleep()); } - + if (cancelQuickSpindown) restoreUserSpinDownTimeout(); } @@ -6779,6 +6601,7 @@ void IOPMrootDomain::requestFullWake( FullWakeReason reason ) { uint32_t options = 0; IOService * pciRoot = 0; + bool promotion = false; // System must be in dark wake and a valid reason for entering full wake if ((kFullWakeReasonNone == reason) || @@ -6798,8 +6621,6 @@ void IOPMrootDomain::requestFullWake( FullWakeReason reason ) !(_pendingCapability & kIOPMSystemCapabilityGraphics) && !graphicsSuppressed) { - DLOG("promote to full wake\n"); - // Promote to full wake while waking up to dark wake due to tickle. // PM will hold off notifying the graphics subsystem about system wake // as late as possible, so if a HID tickle does arrive, graphics can @@ -6815,6 +6636,7 @@ void IOPMrootDomain::requestFullWake( FullWakeReason reason ) // Immediately bring up audio and graphics pciRoot = pciHostBridgeDriver; willEnterFullWake(); + promotion = true; } // Unsafe to cancel once graphics was powered. @@ -6837,16 +6659,18 @@ void IOPMrootDomain::requestFullWake( FullWakeReason reason ) wrangler->activityTickle(0,0); } - if (options & kIOPMSyncCancelPowerDown) + // Log a timestamp for the initial full wake request. + // System may not always honor this full wake request. + if (!CAP_HIGHEST(kIOPMSystemCapabilityGraphics)) { AbsoluteTime now; uint64_t nsec; - // Log a timestamp for the initial full wake clock_get_uptime(&now); SUB_ABSOLUTETIME(&now, &systemWakeTime); absolutetime_to_nanoseconds(now, &nsec); - MSG("full wake (reason %u) %u ms\n", + MSG("full wake %s (reason %u) %u ms\n", + promotion ? "promotion" : "request", fullWakeReason, ((int)((nsec) / 1000000ULL))); } } @@ -6915,7 +6739,7 @@ void IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, I { IOPMDriverAssertionType changedBits = newAssertions ^ oldAssertions; - messageClients(kIOPMMessageDriverAssertionsChanged); + messageClients(kIOPMMessageDriverAssertionsChanged); if (changedBits & kIOPMDriverAssertionPreventDisplaySleepBit) { @@ -7001,11 +6825,13 @@ void IOPMrootDomain::pmStatsRecordEvent( * IOPMrootDomain::pmStatsAppResponses */ void IOPMrootDomain::pmStatsRecordApplicationResponse( - const OSSymbol *response, - const char *name, - int messageType, + const OSSymbol *response, + const char *name, + int messageType, uint32_t delay_ms, - int app_pid) + int app_pid, + OSObject *object, + IOPMPowerStateIndex powerState) { OSDictionary *responseDescription = NULL; OSNumber *delayNum = NULL; @@ -7013,49 +6839,34 @@ void IOPMrootDomain::pmStatsRecordApplicationResponse( OSNumber *pidNum = NULL; OSNumber *msgNum = NULL; const OSSymbol *appname; - const OSSymbol *entryName; - OSObject *entryType; - int i; -#if defined(__i386__) || defined(__x86_64__) - swd_hdr *hdr = NULL; - OSString *UUIDstring = NULL; - uint32_t spindumpSize = 0; - const OSSymbol *namesym = NULL; -#endif - - if (!pmStatsAppResponses || pmStatsAppResponses->getCount() > 50) - return; + const OSSymbol *sleep = NULL, *wake = NULL; + IOPMServiceInterestNotifier *notify = 0; - i = 0; - while ((responseDescription = (OSDictionary *) pmStatsAppResponses->getObject(i++))) + if (object && (notify = OSDynamicCast(IOPMServiceInterestNotifier, object))) { - entryType = responseDescription->getObject(_statsResponseTypeKey); - entryName = (OSSymbol *) responseDescription->getObject(_statsNameKey); - powerCaps = (OSNumber *) responseDescription->getObject(_statsPowerCapsKey); - if (entryName && (entryType == response) && entryName->isEqualTo(name) && - (powerCaps->unsigned32BitValue() == _pendingCapability)) - { - OSNumber * entryValue; - entryValue = (OSNumber *) responseDescription->getObject(_statsTimeMSKey); - if (entryValue && (entryValue->unsigned32BitValue() < delay_ms)) - entryValue->setValue(delay_ms); - return; - } + if (response->isEqualTo(gIOPMStatsApplicationResponseTimedOut)) + notify->ackTimeoutCnt++; + else + notify->ackTimeoutCnt = 0; + } + if (response->isEqualTo(gIOPMStatsApplicationResponsePrompt) || + (_systemTransitionType == kSystemTransitionNone) || (_systemTransitionType == kSystemTransitionNewCapClient)) + return; + + responseDescription = OSDictionary::withCapacity(5); - if (responseDescription) + if (responseDescription) { if (response) { responseDescription->setObject(_statsResponseTypeKey, response); } - - if (messageType != 0) { - msgNum = OSNumber::withNumber(messageType, 32); - if (msgNum) { - responseDescription->setObject(_statsMessageTypeKey, msgNum); - msgNum->release(); - } + + msgNum = OSNumber::withNumber(messageType, 32); + if (msgNum) { + responseDescription->setObject(_statsMessageTypeKey, msgNum); + msgNum->release(); } if (name && (strlen(name) > 0)) @@ -7081,79 +6892,51 @@ void IOPMrootDomain::pmStatsRecordApplicationResponse( delayNum->release(); } - powerCaps = OSNumber::withNumber(_pendingCapability, 32); + if (response->isEqualTo(gIOPMStatsDriverPSChangeSlow)) { + powerCaps = OSNumber::withNumber(powerState, 32); + +#if !defined(__i386__) && !defined(__x86_64__) + IOLog("%s::powerStateChange type(%d) to(%lu) async took %d ms\n", + name, messageType, + powerState, delay_ms); +#endif + + } + else { + powerCaps = OSNumber::withNumber(_pendingCapability, 32); + } if (powerCaps) { responseDescription->setObject(_statsPowerCapsKey, powerCaps); powerCaps->release(); } - - if (pmStatsAppResponses) { - pmStatsAppResponses->setObject(responseDescription); + sleep = OSSymbol::withCString("Sleep"); + wake = OSSymbol::withCString("Wake"); + if (_systemTransitionType == kSystemTransitionSleep) { + responseDescription->setObject(kIOPMStatsSystemTransitionKey, sleep); } + else if (_systemTransitionType == kSystemTransitionWake) { + responseDescription->setObject(kIOPMStatsSystemTransitionKey, wake); + } + else if (_systemTransitionType == kSystemTransitionCapability) { + if (CAP_LOSS(kIOPMSystemCapabilityGraphics)) + responseDescription->setObject(kIOPMStatsSystemTransitionKey, sleep); + else if (CAP_GAIN(kIOPMSystemCapabilityGraphics)) + responseDescription->setObject(kIOPMStatsSystemTransitionKey, wake); + } + if (sleep) sleep->release(); + if (wake) wake->release(); - responseDescription->release(); - } - -#if defined(__i386__) || defined(__x86_64__) - if ((gIOKitDebug & kIOAppRespStacksOn) == 0) - goto done; - - if (!name || name[0] == '\0' || - !response->isEqualTo(gIOPMStatsApplicationResponseTimedOut)) - goto done; - - namesym = OSSymbol::withCString(name); - - // Skip stackshots of previous offenders - if (noAckApps->containsObject(namesym)) - goto done; - - if (noAckApps->getCount() == noAckApps->getCapacity()) { - // Remove oldest entry from over-flowing list - noAckApps->removeObject(noAckApps->getFirstObject()); - } - noAckApps->setLastObject(namesym); - - if (spindumpDesc != NULL) { - /* Add name of this new process in the header */ - hdr = (swd_hdr *)spindumpDesc->getBytesNoCopy(); - if (!hdr) goto done; - - snprintf(hdr->PMStatusCode, sizeof(hdr->PMStatusCode), "%s,%s",hdr->PMStatusCode, name); - goto done; - } - - spindumpSize = 256*1024; - spindumpDesc = IOBufferMemoryDescriptor::inTaskWithOptions( - kernel_task, kIODirectionIn | kIOMemoryMapperNone, spindumpSize); - - if (!spindumpDesc) - goto done; - hdr = (swd_hdr *)spindumpDesc->getBytesNoCopy(); - memset(hdr, 0, sizeof(swd_hdr)); - if ((UUIDstring = OSDynamicCast(OSString, - getProperty(kIOPMSleepWakeUUIDKey))) != NULL ) { - snprintf(hdr->UUID, sizeof(hdr->UUID), "UUID: %s\n", UUIDstring->getCStringNoCopy()); - } - snprintf(hdr->cps, sizeof(hdr->cps), "caps: %d\n", _pendingCapability); - snprintf(hdr->PMStatusCode, sizeof(hdr->PMStatusCode), "Process: %s", name); - snprintf(hdr->reason, sizeof(hdr->reason), "\nStackshot reason: App Response Timeout\n"); - hdr->spindump_offset = sizeof(swd_hdr); + IOLockLock(pmStatsLock); + if (pmStatsAppResponses && pmStatsAppResponses->getCount() < 50) { + pmStatsAppResponses->setObject(responseDescription); + } + IOLockUnlock(pmStatsLock); - stack_snapshot_from_kernel(-1, (char*)hdr+hdr->spindump_offset, - spindumpSize - hdr->spindump_offset, - STACKSHOT_SAVE_LOADINFO | STACKSHOT_SAVE_KEXT_LOADINFO, - &hdr->spindump_size); - if (hdr->spindump_size == 0) { - spindumpDesc->release(); - spindumpDesc = NULL; + responseDescription->release(); } -done: - if (namesym) namesym->release(); -#endif return; } @@ -7166,8 +6949,8 @@ done: // //****************************************************************************** -#define kIOPMRegisterNVRAMTracePointHandlerKey \ - "IOPMRegisterNVRAMTracePointHandler" +#define kIOPMRegisterNVRAMTracePointHandlerKey \ + "IOPMRegisterNVRAMTracePointHandler" IOReturn IOPMrootDomain::callPlatformFunction( const OSSymbol * functionName, @@ -7180,19 +6963,19 @@ IOReturn IOPMrootDomain::callPlatformFunction( !pmTracer->tracePointHandler && !pmTracer->tracePointTarget) { uint32_t tracePointPhases, tracePointPCI; - uint64_t statusCode; + uint64_t statusCode; pmTracer->tracePointHandler = (IOPMTracePointHandler) param1; pmTracer->tracePointTarget = (void *) param2; - tracePointPCI = (uint32_t)(uintptr_t) param3; - tracePointPhases = (uint32_t)(uintptr_t) param4; + tracePointPCI = (uint32_t)(uintptr_t) param3; + tracePointPhases = (uint32_t)(uintptr_t) param4; statusCode = (((uint64_t)tracePointPCI) << 32) | tracePointPhases; - if ((tracePointPhases >> 24) != kIOPMTracePointSystemUp) + if ((tracePointPhases >> 24) != kIOPMTracePointSystemUp) { MSG("Sleep failure code 0x%08x 0x%08x\n", tracePointPCI, tracePointPhases); } - setProperty(kIOPMSleepWakeFailureCodeKey, statusCode, 64); + setProperty(kIOPMSleepWakeFailureCodeKey, statusCode, 64); pmTracer->tracePointHandler( pmTracer->tracePointTarget, 0, 0 ); return kIOReturnSuccess; @@ -7220,6 +7003,9 @@ void IOPMrootDomain::tracePoint( uint8_t point ) { if (systemBooting) return; + if (kIOPMTracePointWakeCapabilityClients == point) + acceptSystemWakeEvents(false); + PMDebug(kPMLogSleepWakeTracePoint, point, 0); pmTracer->tracePoint(point); } @@ -7277,7 +7063,7 @@ IOReturn IOPMrootDomain::updateReport(IOReportChannelList *channelList, for (cnt = 0; cnt < channelList->nchannels; cnt++) { ch_id = channelList->channels[cnt].channel_id ; - if ((ch_id == kSleepCntChID) || + if ((ch_id == kSleepCntChID) || (ch_id == kDarkWkCntChID) || (ch_id == kUserWkCntChID)) { SIMPLEREPORT_INIT(buf, sizeof(buf), getRegistryEntryID(), ch_id, kIOReportCategoryPower); } @@ -7292,7 +7078,7 @@ IOReturn IOPMrootDomain::updateReport(IOReportChannelList *channelList, SIMPLEREPORT_UPDATEPREP(buf, data2cpy, size2cpy); SIMPLEREPORT_UPDATERES(kIOReportCopyChannelData, result); - dest->appendBytes(data2cpy, size2cpy); + dest->appendBytes(data2cpy, size2cpy); } exit: @@ -7315,7 +7101,7 @@ OSDefineMetaClassAndStructors(PMTraceWorker, OSObject) PMTraceWorker *PMTraceWorker::tracer(IOPMrootDomain *owner) { PMTraceWorker *me; - + me = OSTypeAlloc( PMTraceWorker ); if (!me || !me->init()) { @@ -7338,16 +7124,16 @@ PMTraceWorker *PMTraceWorker::tracer(IOPMrootDomain *owner) void PMTraceWorker::RTC_TRACE(void) { - if (tracePointHandler && tracePointTarget) - { - uint32_t wordA; + if (tracePointHandler && tracePointTarget) + { + uint32_t wordA; wordA = (tracePhase << 24) | (loginWindowPhase << 16) | (traceData8 << 8); tracePointHandler( tracePointTarget, traceData32, wordA ); - _LOG("RTC_TRACE wrote 0x%08x 0x%08x\n", traceData32, wordA); - } + _LOG("RTC_TRACE wrote 0x%08x 0x%08x\n", traceData32, wordA); + } } int PMTraceWorker::recordTopLevelPCIDevice(IOService * pciDevice) @@ -7443,23 +7229,23 @@ void PMTraceWorker::traceLoginWindowPhase(uint8_t phase) } void PMTraceWorker::tracePCIPowerChange( - change_t type, IOService *service, uint32_t changeFlags, uint32_t bitNum) + change_t type, IOService *service, uint32_t changeFlags, uint32_t bitNum) { - uint32_t bitMask; - uint32_t expectedFlag; + uint32_t bitMask; + uint32_t expectedFlag; - // Ignore PCI changes outside of system sleep/wake. + // Ignore PCI changes outside of system sleep/wake. if ((kIOPMTracePointSleepPowerPlaneDrivers != tracePhase) && (kIOPMTracePointWakePowerPlaneDrivers != tracePhase)) return; - // Only record the WillChange transition when going to sleep, - // and the DidChange on the way up. - changeFlags &= (kIOPMDomainWillChange | kIOPMDomainDidChange); - expectedFlag = (kIOPMTracePointSleepPowerPlaneDrivers == tracePhase) ? - kIOPMDomainWillChange : kIOPMDomainDidChange; - if (changeFlags != expectedFlag) - return; + // Only record the WillChange transition when going to sleep, + // and the DidChange on the way up. + changeFlags &= (kIOPMDomainWillChange | kIOPMDomainDidChange); + expectedFlag = (kIOPMTracePointSleepPowerPlaneDrivers == tracePhase) ? + kIOPMDomainWillChange : kIOPMDomainDidChange; + if (changeFlags != expectedFlag) + return; // Mark this device off in our bitfield if (bitNum < kPMMaxRTCBitfieldSize) @@ -7486,7 +7272,7 @@ void PMTraceWorker::tracePCIPowerChange( uint64_t PMTraceWorker::getPMStatusCode( ) { - return (((uint64_t)traceData32 << 32) | (tracePhase << 24) | + return (((uint64_t)traceData32 << 32) | (tracePhase << 24) | (loginWindowPhase << 16) | (traceData8 << 8)); } @@ -7499,193 +7285,193 @@ uint64_t PMTraceWorker::getPMStatusCode( ) // //****************************************************************************** -static unsigned int gPMHaltBusyCount; -static unsigned int gPMHaltIdleCount; -static int gPMHaltDepth; +static unsigned int gPMHaltBusyCount; +static unsigned int gPMHaltIdleCount; +static int gPMHaltDepth; static unsigned long gPMHaltEvent; -static IOLock * gPMHaltLock = 0; -static OSArray * gPMHaltArray = 0; +static IOLock * gPMHaltLock = 0; +static OSArray * gPMHaltArray = 0; static const OSSymbol * gPMHaltClientAcknowledgeKey = 0; PMHaltWorker * PMHaltWorker::worker( void ) { - PMHaltWorker * me; - IOThread thread; + PMHaltWorker * me; + IOThread thread; - do { - me = OSTypeAlloc( PMHaltWorker ); - if (!me || !me->init()) - break; + do { + me = OSTypeAlloc( PMHaltWorker ); + if (!me || !me->init()) + break; - me->lock = IOLockAlloc(); - if (!me->lock) - break; + me->lock = IOLockAlloc(); + if (!me->lock) + break; - DLOG("PMHaltWorker %p\n", OBFUSCATE(me)); - me->retain(); // thread holds extra retain - if (KERN_SUCCESS != kernel_thread_start(&PMHaltWorker::main, (void *) me, &thread)) - { - me->release(); - break; - } - thread_deallocate(thread); - return me; + DLOG("PMHaltWorker %p\n", OBFUSCATE(me)); + me->retain(); // thread holds extra retain + if (KERN_SUCCESS != kernel_thread_start(&PMHaltWorker::main, (void *) me, &thread)) + { + me->release(); + break; + } + thread_deallocate(thread); + return me; - } while (false); + } while (false); - if (me) me->release(); - return 0; + if (me) me->release(); + return 0; } void PMHaltWorker::free( void ) { - DLOG("PMHaltWorker free %p\n", OBFUSCATE(this)); - if (lock) - { - IOLockFree(lock); - lock = 0; - } - return OSObject::free(); + DLOG("PMHaltWorker free %p\n", OBFUSCATE(this)); + if (lock) + { + IOLockFree(lock); + lock = 0; + } + return OSObject::free(); } void PMHaltWorker::main( void * arg, wait_result_t waitResult ) { - PMHaltWorker * me = (PMHaltWorker *) arg; - - IOLockLock( gPMHaltLock ); - gPMHaltBusyCount++; - me->depth = gPMHaltDepth; - IOLockUnlock( gPMHaltLock ); - - while (me->depth >= 0) - { - PMHaltWorker::work( me ); - - IOLockLock( gPMHaltLock ); - if (++gPMHaltIdleCount >= gPMHaltBusyCount) - { - // This is the last thread to finish work on this level, - // inform everyone to start working on next lower level. - gPMHaltDepth--; - me->depth = gPMHaltDepth; - gPMHaltIdleCount = 0; - thread_wakeup((event_t) &gPMHaltIdleCount); - } - else - { - // One or more threads are still working on this level, - // this thread must wait. - me->depth = gPMHaltDepth - 1; - do { - IOLockSleep(gPMHaltLock, &gPMHaltIdleCount, THREAD_UNINT); - } while (me->depth != gPMHaltDepth); - } - IOLockUnlock( gPMHaltLock ); - } + PMHaltWorker * me = (PMHaltWorker *) arg; + + IOLockLock( gPMHaltLock ); + gPMHaltBusyCount++; + me->depth = gPMHaltDepth; + IOLockUnlock( gPMHaltLock ); + + while (me->depth >= 0) + { + PMHaltWorker::work( me ); + + IOLockLock( gPMHaltLock ); + if (++gPMHaltIdleCount >= gPMHaltBusyCount) + { + // This is the last thread to finish work on this level, + // inform everyone to start working on next lower level. + gPMHaltDepth--; + me->depth = gPMHaltDepth; + gPMHaltIdleCount = 0; + thread_wakeup((event_t) &gPMHaltIdleCount); + } + else + { + // One or more threads are still working on this level, + // this thread must wait. + me->depth = gPMHaltDepth - 1; + do { + IOLockSleep(gPMHaltLock, &gPMHaltIdleCount, THREAD_UNINT); + } while (me->depth != gPMHaltDepth); + } + IOLockUnlock( gPMHaltLock ); + } - // No more work to do, terminate thread - DLOG("All done for worker: %p (visits = %u)\n", OBFUSCATE(me), me->visits); - thread_wakeup( &gPMHaltDepth ); - me->release(); + // No more work to do, terminate thread + DLOG("All done for worker: %p (visits = %u)\n", OBFUSCATE(me), me->visits); + thread_wakeup( &gPMHaltDepth ); + me->release(); } void PMHaltWorker::work( PMHaltWorker * me ) { - IOService * service; - OSSet * inner; - AbsoluteTime startTime; - UInt32 deltaTime; - bool timeout; - - while (true) - { - service = 0; - timeout = false; - - // Claim an unit of work from the shared pool - IOLockLock( gPMHaltLock ); - inner = (OSSet *)gPMHaltArray->getObject(me->depth); - if (inner) - { - service = (IOService *)inner->getAnyObject(); - if (service) - { - service->retain(); - inner->removeObject(service); - } - } - IOLockUnlock( gPMHaltLock ); - if (!service) - break; // no more work at this depth - - clock_get_uptime(&startTime); - - if (!service->isInactive() && - service->setProperty(gPMHaltClientAcknowledgeKey, me)) - { - IOLockLock(me->lock); - me->startTime = startTime; - me->service = service; - me->timeout = false; - IOLockUnlock(me->lock); - - service->systemWillShutdown( gPMHaltEvent ); - - // Wait for driver acknowledgement - IOLockLock(me->lock); - while (service->getProperty(gPMHaltClientAcknowledgeKey)) - { - IOLockSleep(me->lock, me, THREAD_UNINT); - } - me->service = 0; - timeout = me->timeout; - IOLockUnlock(me->lock); - } - - deltaTime = computeDeltaTimeMS(&startTime); - if ((deltaTime > kPMHaltTimeoutMS) || timeout || - (gIOKitDebug & kIOLogPMRootDomain)) - { - LOG("%s driver %s (%p) took %u ms\n", - (gPMHaltEvent == kIOMessageSystemWillPowerOff) ? - "PowerOff" : "Restart", - service->getName(), OBFUSCATE(service), - (uint32_t) deltaTime ); - } - - service->release(); - me->visits++; - } + IOService * service; + OSSet * inner; + AbsoluteTime startTime; + UInt32 deltaTime; + bool timeout; + + while (true) + { + service = 0; + timeout = false; + + // Claim an unit of work from the shared pool + IOLockLock( gPMHaltLock ); + inner = (OSSet *)gPMHaltArray->getObject(me->depth); + if (inner) + { + service = (IOService *)inner->getAnyObject(); + if (service) + { + service->retain(); + inner->removeObject(service); + } + } + IOLockUnlock( gPMHaltLock ); + if (!service) + break; // no more work at this depth + + clock_get_uptime(&startTime); + + if (!service->isInactive() && + service->setProperty(gPMHaltClientAcknowledgeKey, me)) + { + IOLockLock(me->lock); + me->startTime = startTime; + me->service = service; + me->timeout = false; + IOLockUnlock(me->lock); + + service->systemWillShutdown( gPMHaltEvent ); + + // Wait for driver acknowledgement + IOLockLock(me->lock); + while (service->getProperty(gPMHaltClientAcknowledgeKey)) + { + IOLockSleep(me->lock, me, THREAD_UNINT); + } + me->service = 0; + timeout = me->timeout; + IOLockUnlock(me->lock); + } + + deltaTime = computeDeltaTimeMS(&startTime); + if ((deltaTime > kPMHaltTimeoutMS) || timeout || + (gIOKitDebug & kIOLogPMRootDomain)) + { + LOG("%s driver %s (%p) took %u ms\n", + (gPMHaltEvent == kIOMessageSystemWillPowerOff) ? + "PowerOff" : "Restart", + service->getName(), OBFUSCATE(service), + (uint32_t) deltaTime ); + } + + service->release(); + me->visits++; + } } void PMHaltWorker::checkTimeout( PMHaltWorker * me, AbsoluteTime * now ) { - UInt64 nano; - AbsoluteTime startTime; - AbsoluteTime endTime; - - endTime = *now; - - IOLockLock(me->lock); - if (me->service && !me->timeout) - { - startTime = me->startTime; - nano = 0; - if (CMP_ABSOLUTETIME(&endTime, &startTime) > 0) - { - SUB_ABSOLUTETIME(&endTime, &startTime); - absolutetime_to_nanoseconds(endTime, &nano); - } - if (nano > 3000000000ULL) - { - me->timeout = true; - MSG("%s still waiting on %s\n", - (gPMHaltEvent == kIOMessageSystemWillPowerOff) ? - "PowerOff" : "Restart", - me->service->getName()); - } - } - IOLockUnlock(me->lock); + UInt64 nano; + AbsoluteTime startTime; + AbsoluteTime endTime; + + endTime = *now; + + IOLockLock(me->lock); + if (me->service && !me->timeout) + { + startTime = me->startTime; + nano = 0; + if (CMP_ABSOLUTETIME(&endTime, &startTime) > 0) + { + SUB_ABSOLUTETIME(&endTime, &startTime); + absolutetime_to_nanoseconds(endTime, &nano); + } + if (nano > 3000000000ULL) + { + me->timeout = true; + MSG("%s still waiting on %s\n", + (gPMHaltEvent == kIOMessageSystemWillPowerOff) ? + "PowerOff" : "Restart", + me->service->getName()); + } + } + IOLockUnlock(me->lock); } @@ -7697,28 +7483,28 @@ void PMHaltWorker::checkTimeout( PMHaltWorker * me, AbsoluteTime * now ) void IOPMrootDomain::acknowledgeSystemWillShutdown( IOService * from ) { - PMHaltWorker * worker; - OSObject * prop; - - if (!from) - return; - - //DLOG("%s acknowledged\n", from->getName()); - prop = from->copyProperty( gPMHaltClientAcknowledgeKey ); - if (prop) - { - worker = (PMHaltWorker *) prop; - IOLockLock(worker->lock); - from->removeProperty( gPMHaltClientAcknowledgeKey ); - thread_wakeup((event_t) worker); - IOLockUnlock(worker->lock); - worker->release(); - } - else - { - DLOG("%s acknowledged without worker property\n", - from->getName()); - } + PMHaltWorker * worker; + OSObject * prop; + + if (!from) + return; + + //DLOG("%s acknowledged\n", from->getName()); + prop = from->copyProperty( gPMHaltClientAcknowledgeKey ); + if (prop) + { + worker = (PMHaltWorker *) prop; + IOLockLock(worker->lock); + from->removeProperty( gPMHaltClientAcknowledgeKey ); + thread_wakeup((event_t) worker); + IOLockUnlock(worker->lock); + worker->release(); + } + else + { + DLOG("%s acknowledged without worker property\n", + from->getName()); + } } @@ -7732,285 +7518,198 @@ static void notifySystemShutdown( IOService * root, unsigned long event ) { #define PLACEHOLDER ((OSSet *)gPMHaltArray) - IORegistryIterator * iter; - IORegistryEntry * entry; - IOService * node; - OSSet * inner; - PMHaltWorker * workers[kPMHaltMaxWorkers]; - AbsoluteTime deadline; - unsigned int totalNodes = 0; - unsigned int depth; - unsigned int rootDepth; - unsigned int numWorkers; - unsigned int count; - int waitResult; - void * baseFunc; - bool ok; - - DLOG("%s event = %lx\n", __FUNCTION__, event); - - baseFunc = OSMemberFunctionCast(void *, root, &IOService::systemWillShutdown); - - // Iterate the entire PM tree starting from root - - rootDepth = root->getDepth( gIOPowerPlane ); - if (!rootDepth) goto done; - - // debug - for repeated test runs - while (PMHaltWorker::metaClass->getInstanceCount()) - IOSleep(1); - - if (!gPMHaltArray) - { - gPMHaltArray = OSArray::withCapacity(40); - if (!gPMHaltArray) goto done; - } - else // debug - gPMHaltArray->flushCollection(); + IORegistryIterator * iter; + IORegistryEntry * entry; + IOService * node; + OSSet * inner; + PMHaltWorker * workers[kPMHaltMaxWorkers]; + AbsoluteTime deadline; + unsigned int totalNodes = 0; + unsigned int depth; + unsigned int rootDepth; + unsigned int numWorkers; + unsigned int count; + int waitResult; + void * baseFunc; + bool ok; - if (!gPMHaltLock) - { - gPMHaltLock = IOLockAlloc(); - if (!gPMHaltLock) goto done; - } + DLOG("%s event = %lx\n", __FUNCTION__, event); - if (!gPMHaltClientAcknowledgeKey) - { - gPMHaltClientAcknowledgeKey = - OSSymbol::withCStringNoCopy("PMShutdown"); - if (!gPMHaltClientAcknowledgeKey) goto done; - } + baseFunc = OSMemberFunctionCast(void *, root, &IOService::systemWillShutdown); - gPMHaltEvent = event; - - // Depth-first walk of PM plane - - iter = IORegistryIterator::iterateOver( - root, gIOPowerPlane, kIORegistryIterateRecursively); - - if (iter) - { - while ((entry = iter->getNextObject())) - { - node = OSDynamicCast(IOService, entry); - if (!node) - continue; - - if (baseFunc == - OSMemberFunctionCast(void *, node, &IOService::systemWillShutdown)) - continue; - - depth = node->getDepth( gIOPowerPlane ); - if (depth <= rootDepth) - continue; - - ok = false; - - // adjust to zero based depth - depth -= (rootDepth + 1); - - // gPMHaltArray is an array of containers, each container - // refers to nodes with the same depth. - - count = gPMHaltArray->getCount(); - while (depth >= count) - { - // expand array and insert placeholders - gPMHaltArray->setObject(PLACEHOLDER); - count++; - } - count = gPMHaltArray->getCount(); - if (depth < count) - { - inner = (OSSet *)gPMHaltArray->getObject(depth); - if (inner == PLACEHOLDER) - { - inner = OSSet::withCapacity(40); - if (inner) - { - gPMHaltArray->replaceObject(depth, inner); - inner->release(); - } - } - - // PM nodes that appear more than once in the tree will have - // the same depth, OSSet will refuse to add the node twice. - if (inner) - ok = inner->setObject(node); - } - if (!ok) - DLOG("Skipped PM node %s\n", node->getName()); - } - iter->release(); - } + // Iterate the entire PM tree starting from root - // debug only - for (int i = 0; (inner = (OSSet *)gPMHaltArray->getObject(i)); i++) - { - count = 0; - if (inner != PLACEHOLDER) - count = inner->getCount(); - DLOG("Nodes at depth %u = %u\n", i, count); - } + rootDepth = root->getDepth( gIOPowerPlane ); + if (!rootDepth) goto done; - // strip placeholders (not all depths are populated) - numWorkers = 0; - for (int i = 0; (inner = (OSSet *)gPMHaltArray->getObject(i)); ) - { - if (inner == PLACEHOLDER) - { - gPMHaltArray->removeObject(i); - continue; - } - count = inner->getCount(); - if (count > numWorkers) - numWorkers = count; - totalNodes += count; - i++; - } + // debug - for repeated test runs + while (PMHaltWorker::metaClass->getInstanceCount()) + IOSleep(1); - if (gPMHaltArray->getCount() == 0 || !numWorkers) - goto done; + if (!gPMHaltArray) + { + gPMHaltArray = OSArray::withCapacity(40); + if (!gPMHaltArray) goto done; + } + else // debug + gPMHaltArray->flushCollection(); - gPMHaltBusyCount = 0; - gPMHaltIdleCount = 0; - gPMHaltDepth = gPMHaltArray->getCount() - 1; + if (!gPMHaltLock) + { + gPMHaltLock = IOLockAlloc(); + if (!gPMHaltLock) goto done; + } - // Create multiple workers (and threads) + if (!gPMHaltClientAcknowledgeKey) + { + gPMHaltClientAcknowledgeKey = + OSSymbol::withCStringNoCopy("PMShutdown"); + if (!gPMHaltClientAcknowledgeKey) goto done; + } - if (numWorkers > kPMHaltMaxWorkers) - numWorkers = kPMHaltMaxWorkers; + gPMHaltEvent = event; - DLOG("PM nodes %u, maxDepth %u, workers %u\n", - totalNodes, gPMHaltArray->getCount(), numWorkers); + // Depth-first walk of PM plane - for (unsigned int i = 0; i < numWorkers; i++) - workers[i] = PMHaltWorker::worker(); + iter = IORegistryIterator::iterateOver( + root, gIOPowerPlane, kIORegistryIterateRecursively); - // Wait for workers to exhaust all available work + if (iter) + { + while ((entry = iter->getNextObject())) + { + node = OSDynamicCast(IOService, entry); + if (!node) + continue; - IOLockLock(gPMHaltLock); - while (gPMHaltDepth >= 0) - { - clock_interval_to_deadline(1000, kMillisecondScale, &deadline); + if (baseFunc == + OSMemberFunctionCast(void *, node, &IOService::systemWillShutdown)) + continue; - waitResult = IOLockSleepDeadline( - gPMHaltLock, &gPMHaltDepth, deadline, THREAD_UNINT); - if (THREAD_TIMED_OUT == waitResult) - { - AbsoluteTime now; - clock_get_uptime(&now); + depth = node->getDepth( gIOPowerPlane ); + if (depth <= rootDepth) + continue; - IOLockUnlock(gPMHaltLock); - for (unsigned int i = 0 ; i < numWorkers; i++) - { - if (workers[i]) - PMHaltWorker::checkTimeout(workers[i], &now); - } - IOLockLock(gPMHaltLock); - } - } - IOLockUnlock(gPMHaltLock); + ok = false; - // Release all workers + // adjust to zero based depth + depth -= (rootDepth + 1); - for (unsigned int i = 0; i < numWorkers; i++) - { - if (workers[i]) - workers[i]->release(); - // worker also retained by it's own thread - } + // gPMHaltArray is an array of containers, each container + // refers to nodes with the same depth. -done: - DLOG("%s done\n", __FUNCTION__); - return; -} + count = gPMHaltArray->getCount(); + while (depth >= count) + { + // expand array and insert placeholders + gPMHaltArray->setObject(PLACEHOLDER); + count++; + } + count = gPMHaltArray->getCount(); + if (depth < count) + { + inner = (OSSet *)gPMHaltArray->getObject(depth); + if (inner == PLACEHOLDER) + { + inner = OSSet::withCapacity(40); + if (inner) + { + gPMHaltArray->replaceObject(depth, inner); + inner->release(); + } + } -// MARK: - -// MARK: Sleep/Wake Logging + // PM nodes that appear more than once in the tree will have + // the same depth, OSSet will refuse to add the node twice. + if (inner) + ok = inner->setObject(node); + } + if (!ok) + DLOG("Skipped PM node %s\n", node->getName()); + } + iter->release(); + } -//********************************************************************************* -// Sleep/Wake logging -// -//********************************************************************************* + // debug only + for (int i = 0; (inner = (OSSet *)gPMHaltArray->getObject(i)); i++) + { + count = 0; + if (inner != PLACEHOLDER) + count = inner->getCount(); + DLOG("Nodes at depth %u = %u\n", i, count); + } -IOMemoryDescriptor *IOPMrootDomain::getPMTraceMemoryDescriptor(void) -{ - if (timeline) - return timeline->getPMTraceMemoryDescriptor(); - else - return NULL; -} + // strip placeholders (not all depths are populated) + numWorkers = 0; + for (int i = 0; (inner = (OSSet *)gPMHaltArray->getObject(i)); ) + { + if (inner == PLACEHOLDER) + { + gPMHaltArray->removeObject(i); + continue; + } + count = inner->getCount(); + if (count > numWorkers) + numWorkers = count; + totalNodes += count; + i++; + } -// Forwards external reports of detailed events to IOPMTimeline -IOReturn IOPMrootDomain::recordPMEvent(PMEventDetails *details) -{ - if (timeline && details) { - - IOReturn rc; + if (gPMHaltArray->getCount() == 0 || !numWorkers) + goto done; - // Record a detailed driver power change event, or... - if(details->eventClassifier == kIOPMEventClassDriverEvent) { - rc = timeline->recordDetailedPowerEvent( details ); - } + gPMHaltBusyCount = 0; + gPMHaltIdleCount = 0; + gPMHaltDepth = gPMHaltArray->getCount() - 1; - // Record a system power management event - else if(details->eventClassifier == kIOPMEventClassSystemEvent) { - rc = timeline->recordSystemPowerEvent( details ); - } - else { - return kIOReturnBadArgument; - } - - // If we get to record this message, then we've reached the - // end of another successful Sleep --> Wake cycle - // At this point, we pat ourselves in the back and allow - // our Sleep --> Wake UUID to be published - if(details->eventType == kIOPMEventTypeWakeDone) { - timeline->setSleepCycleInProgressFlag(false); - } + // Create multiple workers (and threads) -/* - // Check if its time to clear the timeline buffer - if(getProperty(kIOPMSleepWakeUUIDKey) - && timeline->isSleepCycleInProgress() == false - && timeline->getNumEventsLoggedThisPeriod() > 500) { - - // Clear the old UUID - if(pmPowerStateQueue) { - pmPowerStateQueue->submitPowerEvent(kPowerEventPublishSleepWakeUUID, (void *)false ); - } -*/ - return rc; - } - else - return kIOReturnNotReady; -} + if (numWorkers > kPMHaltMaxWorkers) + numWorkers = kPMHaltMaxWorkers; -void IOPMrootDomain::recordPMEvent( uint32_t type, - const char *uuid, - uint32_t reason, - uint32_t result ) -{ - PMEventDetails *details = PMEventDetails::eventDetails(type, uuid, reason, result); - if (details) + DLOG("PM nodes %u, maxDepth %u, workers %u\n", + totalNodes, gPMHaltArray->getCount(), numWorkers); + + for (unsigned int i = 0; i < numWorkers; i++) + workers[i] = PMHaltWorker::worker(); + + // Wait for workers to exhaust all available work + + IOLockLock(gPMHaltLock); + while (gPMHaltDepth >= 0) { - recordPMEvent(details); - details->release(); + clock_interval_to_deadline(1000, kMillisecondScale, &deadline); + + waitResult = IOLockSleepDeadline( + gPMHaltLock, &gPMHaltDepth, deadline, THREAD_UNINT); + if (THREAD_TIMED_OUT == waitResult) + { + AbsoluteTime now; + clock_get_uptime(&now); + + IOLockUnlock(gPMHaltLock); + for (unsigned int i = 0 ; i < numWorkers; i++) + { + if (workers[i]) + PMHaltWorker::checkTimeout(workers[i], &now); + } + IOLockLock(gPMHaltLock); + } } -} + IOLockUnlock(gPMHaltLock); -IOReturn IOPMrootDomain::recordAndReleasePMEvent(PMEventDetails *details) -{ - IOReturn ret = kIOReturnBadArgument; + // Release all workers - if (details) + for (unsigned int i = 0; i < numWorkers; i++) { - ret = recordPMEvent(details); - details->release(); + if (workers[i]) + workers[i]->release(); + // worker also retained by it's own thread } - return ret; +done: + DLOG("%s done\n", __FUNCTION__); + return; } // MARK: - @@ -8026,10 +7725,10 @@ IOPMDriverAssertionID IOPMrootDomain::createPMAssertion( { IOReturn ret; IOPMDriverAssertionID newAssertion; - + if (!pmAssertions) return 0; - + ret = pmAssertions->createAssertion(whichAssertionBits, assertionLevel, ownerService, ownerDescription, &newAssertion); if (kIOReturnSuccess == ret) @@ -8042,12 +7741,13 @@ IOReturn IOPMrootDomain::releasePMAssertion(IOPMDriverAssertionID releaseAsserti { if (!pmAssertions) return kIOReturnInternalError; - + return pmAssertions->releaseAssertion(releaseAssertion); } + IOReturn IOPMrootDomain::setPMAssertionLevel( - IOPMDriverAssertionID assertionID, + IOPMDriverAssertionID assertionID, IOPMDriverAssertionLevel assertionLevel) { return pmAssertions->setAssertionLevel(assertionID, assertionLevel); @@ -8057,11 +7757,11 @@ IOPMDriverAssertionLevel IOPMrootDomain::getPMAssertionLevel(IOPMDriverAssertion { IOPMDriverAssertionType sysLevels; - if (!pmAssertions || whichAssertion == 0) + if (!pmAssertions || whichAssertion == 0) return kIOPMDriverAssertionLevelOff; sysLevels = pmAssertions->getActivatedAssertions(); - + // Check that every bit set in argument 'whichAssertion' is asserted // in the aggregate bits. if ((sysLevels & whichAssertion) == whichAssertion) @@ -8094,33 +7794,33 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const if (obj) return obj; - if (!strncmp(aKey, kIOPMSleepWakeWdogRebootKey, + if (!strncmp(aKey, kIOPMSleepWakeWdogRebootKey, sizeof(kIOPMSleepWakeWdogRebootKey))) { - if (swd_flags & SWD_BOOT_BY_WDOG) + if (swd_flags & SWD_BOOT_BY_SW_WDOG) return OSBoolean::withBoolean(true); - else + else return OSBoolean::withBoolean(false); - + } - if (!strncmp(aKey, kIOPMSleepWakeWdogLogsValidKey, + if (!strncmp(aKey, kIOPMSleepWakeWdogLogsValidKey, sizeof(kIOPMSleepWakeWdogLogsValidKey))) { - if (swd_flags & SWD_VALID_LOGS) + if (swd_flags & SWD_VALID_LOGS) return OSBoolean::withBoolean(true); - else + else return OSBoolean::withBoolean(false); - + } - /* - * XXX: We should get rid of "DesktopMode" property when 'kAppleClamshellCausesSleepKey' - * is set properly in darwake from sleep. For that, kIOPMEnableClamshell msg has to be + /* + * XXX: We should get rid of "DesktopMode" property when 'kAppleClamshellCausesSleepKey' + * is set properly in darwake from sleep. For that, kIOPMEnableClamshell msg has to be * issued by DisplayWrangler on darkwake. */ if (!strcmp(aKey, "DesktopMode")) { if (desktopMode) return OSBoolean::withBoolean(true); - else + else return OSBoolean::withBoolean(false); } if (!strcmp(aKey, "DisplayIdleForDemandSleep")) { @@ -8131,9 +7831,136 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const return OSBoolean::withBoolean(false); } } + + if (!strcmp(aKey, kIOPMDriverWakeEventsKey)) + { + OSArray * array = 0; + WAKEEVENT_LOCK(); + if (_systemWakeEventsArray && _systemWakeEventsArray->getCount()) + array = OSArray::withArray(_systemWakeEventsArray); + WAKEEVENT_UNLOCK(); + return array; + } + + if (!strcmp(aKey, kIOPMSleepStatisticsAppsKey)) + { + OSArray * array = 0; + IOLockLock(pmStatsLock); + if (pmStatsAppResponses && pmStatsAppResponses->getCount()) { + array = OSArray::withArray(pmStatsAppResponses); + pmStatsAppResponses->flushCollection(); + } + IOLockUnlock(pmStatsLock); + return array; + } + return NULL; +} + +// MARK: - +// MARK: Wake Event Reporting + +void IOPMrootDomain::copyWakeReasonString( char * outBuf, size_t bufSize ) +{ + WAKEEVENT_LOCK(); + strlcpy(outBuf, gWakeReasonString, bufSize); + WAKEEVENT_UNLOCK(); +} + +//****************************************************************************** +// acceptSystemWakeEvents +// +// Private control for the acceptance of driver wake event claims. +//****************************************************************************** + +void IOPMrootDomain::acceptSystemWakeEvents( bool accept ) +{ + bool logWakeReason = false; + + WAKEEVENT_LOCK(); + if (accept) + { + gWakeReasonString[0] = '\0'; + if (!_systemWakeEventsArray) + _systemWakeEventsArray = OSArray::withCapacity(4); + if ((_acceptSystemWakeEvents = (_systemWakeEventsArray != 0))) + _systemWakeEventsArray->flushCollection(); + } + else + { + _acceptSystemWakeEvents = false; + } + WAKEEVENT_UNLOCK(); + + if (logWakeReason) + MSG("system wake events:%s\n", gWakeReasonString); +} + +//****************************************************************************** +// claimSystemWakeEvent +// +// For a driver to claim a device is the source/conduit of a system wake event. +//****************************************************************************** + +void IOPMrootDomain::claimSystemWakeEvent( + IOService * device, + IOOptionBits flags, + const char * reason, + OSObject * details ) +{ + const OSSymbol * deviceName = 0; + OSNumber * deviceRegId = 0; + OSNumber * claimTime = 0; + OSData * flagsData = 0; + OSString * reasonString = 0; + OSDictionary * d = 0; + uint64_t timestamp; + bool ok = false; + + pmEventTimeStamp(×tamp); + + if (!device || !reason) return; + + deviceName = device->copyName(gIOServicePlane); + deviceRegId = OSNumber::withNumber(device->getRegistryEntryID(), 64); + claimTime = OSNumber::withNumber(timestamp, 64); + flagsData = OSData::withBytes(&flags, sizeof(flags)); + reasonString = OSString::withCString(reason); + d = OSDictionary::withCapacity(5 + (details ? 1 : 0)); + if (!deviceName || !deviceRegId || !claimTime || !flagsData || !reasonString) + goto done; + + d->setObject(gIONameKey, deviceName); + d->setObject(gIORegistryEntryIDKey, deviceRegId); + d->setObject(kIOPMWakeEventTimeKey, claimTime); + d->setObject(kIOPMWakeEventFlagsKey, flagsData); + d->setObject(kIOPMWakeEventReasonKey, reasonString); + if (details) + d->setObject(kIOPMWakeEventDetailsKey, details); + WAKEEVENT_LOCK(); + if (!gWakeReasonSysctlRegistered) + { + // Lazy registration until the platform driver stops registering + // the same name. + gWakeReasonSysctlRegistered = true; + } + if (_acceptSystemWakeEvents) + { + ok = _systemWakeEventsArray->setObject(d); + if (gWakeReasonString[0] != '\0') + strlcat(gWakeReasonString, " ", sizeof(gWakeReasonString)); + strlcat(gWakeReasonString, reason, sizeof(gWakeReasonString)); + } + WAKEEVENT_UNLOCK(); +done: + if (deviceName) deviceName->release(); + if (deviceRegId) deviceRegId->release(); + if (claimTime) claimTime->release(); + if (flagsData) flagsData->release(); + if (reasonString) reasonString->release(); + if (d) d->release(); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -8162,7 +7989,7 @@ void PMSettingHandle::free( void ) #define super OSObject OSDefineMetaClassAndFinalStructors( PMSettingObject, OSObject ) -/* +/* * Static constructor/initializer for PMSettingObject */ PMSettingObject *PMSettingObject::pmSettingObject( @@ -8251,368 +8078,6 @@ void PMSettingObject::clientHandleFreed( void ) parent->deregisterPMSettingObject(this); } -// MARK: - -// MARK: IOPMTimeline - -#undef super -#define super OSObject - -//********************************************************************************* -//********************************************************************************* -//********************************************************************************* - -IOPMTimeline *IOPMTimeline::timeline(IOPMrootDomain *root_domain) -{ - IOPMTimeline *myself; - - if (!root_domain) - return NULL; - - myself = new IOPMTimeline; - - if (myself) { - myself->owner = root_domain; - myself->init(); - } - - return myself; -} - -bool IOPMTimeline::init(void) -{ - if (!super::init()) { - return false; - } - - logLock = IOLockAlloc(); - - // Fresh timeline, no events logged yet - this->numEventsLoggedThisPeriod = 0; - this->sleepCycleInProgress = false; - - //this->setEventsRecordingLevel(1); // TODO - this->setEventsTrackedCount(kIOPMDefaultSystemEventsTracked); - - return true; -} - -void IOPMTimeline::free(void) -{ - if (pmTraceMemoryDescriptor) { - pmTraceMemoryDescriptor->release(); - pmTraceMemoryDescriptor = NULL; - } - - IOLockFree(logLock); - - super::free(); -} - -IOMemoryDescriptor *IOPMTimeline::getPMTraceMemoryDescriptor() -{ - return pmTraceMemoryDescriptor; -} - -//********************************************************************************* -//********************************************************************************* -//********************************************************************************* - -bool IOPMTimeline::setProperties(OSDictionary *d) -{ - OSNumber *n = NULL; - OSBoolean *b = NULL; - bool changed = false; - - /* Changes size of detailed events buffer */ - n = (OSNumber *)d->getObject(kIOPMTimelineSystemNumberTrackedKey); - if (OSDynamicCast(OSNumber, n)) - { - changed = true; - this->setEventsTrackedCount(n->unsigned32BitValue()); - } - - - /* enables or disables system events */ - b = (OSBoolean *)d->getObject(kIOPMTimelineEnabledKey); - if (b) - { - changed = true; - this->setEventsRecordingLevel((int)(kOSBooleanTrue == b)); - } - - return changed; -} - -//********************************************************************************* -//********************************************************************************* -//********************************************************************************* - -OSDictionary *IOPMTimeline::copyInfoDictionary(void) -{ - OSDictionary *out = OSDictionary::withCapacity(3); - OSNumber *n = NULL; - - if (!out || !hdr) - return NULL; - - n = OSNumber::withNumber(hdr->sizeEntries, 32); - out->setObject(kIOPMTimelineSystemNumberTrackedKey, n); - n->release(); - - n = OSNumber::withNumber(hdr->sizeBytes, 32); - out->setObject(kIOPMTimelineSystemBufferSizeKey, n); - n->release(); - - // bool - out->setObject(kIOPMTimelineEnabledKey, eventsRecordingLevel ? kOSBooleanTrue : kOSBooleanFalse); - - return out; -} - -//********************************************************************************* -//********************************************************************************* -//********************************************************************************* - -/* IOPMTimeline::recordSystemPowerEvent() - * - * Expected "type" arguments are listed in IOPMPrivate.h under enum "SystemEventTypes" - * Type arguments include "system events", and "Intermediate events" - * - * - System Events have paired "start" and "stop" events. - * - A start event shall be followed by a stop event. - * - Any number of Intermediate Events may fall between the - * start and stop events. - * - Intermediate events are meaningless outside the bounds of a system event's - * start & stoup routines. - * - It's invalid to record a Start event without a following Stop event; e.g. two - * Start events without an intervenining Stop event is invalid. - * - * Buffer invariants - * - The first recorded system event shall be preceded by an entry with type == 0 - * - IOPMTimeline may choose not to record intermediate events while there's not - * a system event in process. - */ -IOReturn IOPMTimeline::recordSystemPowerEvent( PMEventDetails *details ) -{ - static bool wakeDonePending = true; - IOPMSystemEventRecord *record_to = NULL; - OSString *swUUIDKey = NULL; - uint32_t useIndex = 0; - - if (!details) - return kIOReturnBadArgument; - - if (!traceBuffer) - return kIOReturnNotReady; - - if (details->eventType == kIOPMEventTypeWakeDone) - { - if(!wakeDonePending) - return kIOReturnBadArgument; - } - - IOLockLock(logLock); - - if (details->eventType == kIOPMEventTypeWake) { - wakeDonePending = true; - } else if (details->eventType == kIOPMEventTypeWakeDone) { - wakeDonePending = false; - } - - systemState = details->eventType; - - useIndex = _atomicIndexIncrement(&hdr->index, hdr->sizeEntries); - - // The entry immediately after the latest entry (and thus - // immediately before the first entry) shall have a type 0. - if (useIndex + 1 >= hdr->sizeEntries) { - traceBuffer[useIndex + 1].eventType = 0; - } else { - traceBuffer[0].eventType = 0; - } - - record_to = &traceBuffer[useIndex]; - bzero(record_to, sizeof(IOPMSystemEventRecord)); - - /*****/ - record_to->eventType = details->eventType; - record_to->eventReason = details->reason; - record_to->eventResult = details->result; - pmEventTimeStamp(&record_to->timestamp); - - // If caller doesn't provide a UUID, we'll use the UUID that's posted - // on IOPMrootDomain under key kIOPMSleepWakeUUIDKey - if (!details->uuid) { - swUUIDKey = OSDynamicCast(OSString, owner->copyProperty(kIOPMSleepWakeUUIDKey)); - - if (swUUIDKey) - details->uuid = swUUIDKey->getCStringNoCopy(); - } - - if (details->uuid) - strncpy(record_to->uuid, details->uuid, kMaxPMStringLength); - - if (swUUIDKey) - swUUIDKey->release(); - - numEventsLoggedThisPeriod++; - /*****/ - - IOLockUnlock(logLock); - - return kIOReturnSuccess; - -} - -//********************************************************************************* -//********************************************************************************* -//********************************************************************************* - -IOReturn IOPMTimeline::recordDetailedPowerEvent( PMEventDetails *details ) -{ - IOPMSystemEventRecord *record_to = NULL; - uint32_t useIndex; - - if (!details->eventType || !details->ownerName) - return kIOReturnBadArgument; - - IOLockLock(logLock); - - useIndex = _atomicIndexIncrement(&hdr->index, hdr->sizeEntries); - - record_to = (IOPMSystemEventRecord *)&traceBuffer[useIndex]; - bzero(record_to, sizeof(IOPMSystemEventRecord)); - - /*****/ - record_to->eventType = details->eventType; - if (details->ownerName && (strlen(details->ownerName) > 1)) { - strlcpy( record_to->ownerName, - details->ownerName, - sizeof(record_to->ownerName)); - } - - record_to->ownerDisambiguateID = details->ownerUnique; - - if (details->interestName && (strlen(details->interestName) > 1)) { - strlcpy(record_to->interestName, - details->interestName, - sizeof(record_to->interestName)); - } - - record_to->oldState = details->oldState; - record_to->newState = details->newState; - record_to->eventResult = details->result; - record_to->elapsedTimeUS = details->elapsedTimeUS; - pmEventTimeStamp(&record_to->timestamp); - - numEventsLoggedThisPeriod++; - /*****/ - - IOLockUnlock(logLock); - return kIOReturnSuccess; -} - -uint32_t IOPMTimeline::getNumEventsLoggedThisPeriod() { - return this->numEventsLoggedThisPeriod; -} - -void IOPMTimeline::setNumEventsLoggedThisPeriod(uint32_t newCount) { - this->numEventsLoggedThisPeriod = newCount; -} - -bool IOPMTimeline::isSleepCycleInProgress() { - return this->sleepCycleInProgress; -} - -void IOPMTimeline::setSleepCycleInProgressFlag(bool flag) { - this->sleepCycleInProgress = flag; -} -//********************************************************************************* -//********************************************************************************* -//********************************************************************************* - -void IOPMTimeline::setEventsTrackedCount(uint32_t newTracked) -{ - size_t make_buf_size = 0; - - make_buf_size = sizeof(IOPMTraceBufferHeader) + (newTracked * sizeof(IOPMSystemEventRecord)); - - IOLockLock(logLock); - - if (pmTraceMemoryDescriptor) { - pmTraceMemoryDescriptor->release(); - pmTraceMemoryDescriptor = NULL; - } - - hdr = NULL; - traceBuffer = NULL; - - if (0 == newTracked) - { - IOLog("IOPMrootDomain -> erased buffer.\n"); - goto exit; - } - - pmTraceMemoryDescriptor = IOBufferMemoryDescriptor::withOptions( - kIOMemoryKernelUserShared | kIODirectionIn | kIOMemoryMapperNone, - make_buf_size); - - if (!pmTraceMemoryDescriptor) - { - IOLog("IOPMRootDomain -> IOBufferMemoryDescriptor(%d) returns NULL\n", (int)make_buf_size); - goto exit; - } - - pmTraceMemoryDescriptor->prepare(kIODirectionIn); - - // Header occupies the first sizeof(IOPMTraceBufferHeader) bytes - hdr = (IOPMTraceBufferHeader *)pmTraceMemoryDescriptor->getBytesNoCopy(); - - // Recorded events occupy the remaining bulk of the buffer - traceBuffer = (IOPMSystemEventRecord *)((uint8_t *)hdr + sizeof(IOPMTraceBufferHeader)); - - bzero(hdr, make_buf_size); - - hdr->sizeBytes = make_buf_size; - hdr->sizeEntries = newTracked; - - IOLog("IOPMRootDomain -> IOBufferMemoryDescriptor(%d) returns bufferMB with address 0x%08x\n", (int)make_buf_size, (unsigned int)(uintptr_t)traceBuffer); - -exit: - IOLockUnlock(logLock); -} - -//********************************************************************************* -//********************************************************************************* -//********************************************************************************* - -void IOPMTimeline::setEventsRecordingLevel(uint32_t eventsTrackedBits) -{ - - // TODO - - return; - -} - -/* static helper to IOPMTimeline - */ -uint32_t IOPMTimeline::_atomicIndexIncrement(uint32_t *index, uint32_t limit) -{ - uint32_t was_index; - uint32_t inc_index; - - if(!index) - return NULL; - - do { - was_index = *index; - inc_index = (was_index+1)%limit; - } while (!OSCompareAndSwap(was_index, inc_index, index)); - - return inc_index; -} - // MARK: - // MARK: PMAssertionsTracker @@ -8626,20 +8091,20 @@ uint32_t IOPMTimeline::_atomicIndexIncrement(uint32_t *index, uint32_t limit) PMAssertionsTracker *PMAssertionsTracker::pmAssertionsTracker( IOPMrootDomain *rootDomain ) { PMAssertionsTracker *myself; - + myself = new PMAssertionsTracker; - + if (myself) { myself->init(); myself->owner = rootDomain; - myself->issuingUniqueID = kAssertUniqueIDStart; + myself->issuingUniqueID = kAssertUniqueIDStart; myself->assertionsArray = OSArray::withCapacity(5); myself->assertionsKernel = 0; myself->assertionsUser = 0; myself->assertionsCombined = 0; myself->assertionsArrayLock = IOLockAlloc(); myself->tabulateProducerCount = myself->tabulateConsumerCount = 0; - + if (!myself->assertionsArray || !myself->assertionsArrayLock) myself = NULL; } @@ -8689,7 +8154,7 @@ void PMAssertionsTracker::tabulate(void) if ((assertionsKernel != oldKernel) || (assertionsCombined != oldCombined)) - { + { owner->evaluateAssertions(assertionsCombined, oldCombined); } } @@ -8779,14 +8244,14 @@ IOReturn PMAssertionsTracker::handleCreateAssertion(OSData *newAssertion) } /* PMAssertionsTracker::createAssertion - * createAssertion allocates memory for a new PM assertion, and affects system behavior, if + * createAssertion allocates memory for a new PM assertion, and affects system behavior, if * appropiate. */ IOReturn PMAssertionsTracker::createAssertion( IOPMDriverAssertionType which, IOPMDriverAssertionLevel level, - IOService *serviceID, - const char *whoItIs, + IOService *serviceID, + const char *whoItIs, IOPMDriverAssertionID *outID) { OSData *dataStore = NULL; @@ -8801,7 +8266,7 @@ IOReturn PMAssertionsTracker::createAssertion( track.registryEntryID = serviceID ? serviceID->getRegistryEntryID():0; track.modifiedTime = 0; pmEventTimeStamp(&track.createdTime); - + dataStore = OSData::withBytes(&track, sizeof(PMAssertStruct)); if (!dataStore) { @@ -8811,11 +8276,11 @@ IOReturn PMAssertionsTracker::createAssertion( } *outID = track.id; - + if (owner && owner->pmPowerStateQueue) { owner->pmPowerStateQueue->submitPowerEvent(kPowerEventAssertionCreate, (void *)dataStore); } - + return kIOReturnSuccess; } @@ -8829,17 +8294,17 @@ IOReturn PMAssertionsTracker::handleReleaseAssertion( int index; PMAssertStruct *assertStruct = detailsForID(_id, &index); - + if (!assertStruct) return kIOReturnNotFound; IOLockLock(assertionsArrayLock); - if (assertStruct->ownerString) + if (assertStruct->ownerString) assertStruct->ownerString->release(); assertionsArray->removeObject(index); IOLockUnlock(assertionsArrayLock); - + tabulate(); return kIOReturnSuccess; } @@ -8861,7 +8326,7 @@ IOReturn PMAssertionsTracker::releaseAssertion( * Runs in PM workloop. Do not call directly. */ IOReturn PMAssertionsTracker::handleSetAssertionLevel( - IOPMDriverAssertionID _id, + IOPMDriverAssertionID _id, IOPMDriverAssertionLevel _level) { PMAssertStruct *assertStruct = detailsForID(_id, NULL); @@ -8884,7 +8349,7 @@ IOReturn PMAssertionsTracker::handleSetAssertionLevel( /* PMAssertionsTracker::setAssertionLevel */ IOReturn PMAssertionsTracker::setAssertionLevel( - IOPMDriverAssertionID _id, + IOPMDriverAssertionID _id, IOPMDriverAssertionLevel _level) { if (owner && owner->pmPowerStateQueue) { @@ -8892,7 +8357,7 @@ IOReturn PMAssertionsTracker::setAssertionLevel( (void *)(uintptr_t)_level, _id); } - return kIOReturnSuccess; + return kIOReturnSuccess; } IOReturn PMAssertionsTracker::handleSetUserAssertionLevels(void * arg0) @@ -8958,38 +8423,38 @@ OSArray *PMAssertionsTracker::copyAssertionsArray(void) outArray->setObject(details); details->release(); - + _n = OSNumber::withNumber(_a->id, 64); - if (_n) { + if (_n) { details->setObject(kIOPMDriverAssertionIDKey, _n); _n->release(); } _n = OSNumber::withNumber(_a->createdTime, 64); - if (_n) { + if (_n) { details->setObject(kIOPMDriverAssertionCreatedTimeKey, _n); _n->release(); } _n = OSNumber::withNumber(_a->modifiedTime, 64); - if (_n) { + if (_n) { details->setObject(kIOPMDriverAssertionModifiedTimeKey, _n); _n->release(); } _n = OSNumber::withNumber((uintptr_t)_a->registryEntryID, 64); - if (_n) { + if (_n) { details->setObject(kIOPMDriverAssertionRegistryEntryIDKey, _n); _n->release(); } _n = OSNumber::withNumber(_a->level, 64); - if (_n) { + if (_n) { details->setObject(kIOPMDriverAssertionLevelKey, _n); _n->release(); } _n = OSNumber::withNumber(_a->assertionBits, 64); - if (_n) { + if (_n) { details->setObject(kIOPMDriverAssertionAssertedKey, _n); _n->release(); } - + if (_a->ownerString) { details->setObject(kIOPMDriverAssertionOwnerStringKey, _a->ownerString); } @@ -9028,7 +8493,7 @@ static void pmEventTimeStamp(uint64_t *recordTS) if (!recordTS) return; - + // We assume tsec fits into 32 bits; 32 bits holds enough // seconds for 136 years since the epoch in 1970. clock_get_calendar_microtime(&tsec, &tusec); @@ -9108,92 +8573,65 @@ OSObject * IORootParent::copyProperty( const char * aKey) const #if defined(__i386__) || defined(__x86_64__) -void IOPMrootDomain::sleepWakeDebugLog(const char *fmt,...) -{ - char str[100]; - va_list ap; - int retry = 0; - char *ptr; - swd_hdr *hdr; - uint32_t len = 0; - uint32_t ts; - uint32_t curPos = 0, newPos = 0; - bool reset = false; - - if ( !(kIOPersistentLog & gIOKitDebug) || (swd_buffer == NULL)) - return; - - hdr = (swd_hdr *)swd_buffer; - if (hdr->dlog_size == 0) { - if ((hdr->spindump_size != 0) || !OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) - return; - - hdr->dlog_buf_offset = hdr->dlog_cur_pos = sizeof(swd_hdr); - hdr->dlog_size = SWD_DLOG_SIZE; - hdr->spindump_offset = sizeof(swd_hdr) + hdr->dlog_size; - memset(((char*)hdr)+hdr->dlog_buf_offset, 0, hdr->dlog_size); - gRootDomain->swd_lock = 0; - } - ts = mach_absolute_time() & 0xffffffff; - va_start(ap, fmt); - len = vsnprintf(str, sizeof(str), fmt, ap)+1; - va_end(ap); - if (len > sizeof(str)) len = sizeof(str); - len += 10; // 8 bytes for time stamp - - do { - curPos = hdr->dlog_cur_pos; - newPos = curPos+len; - if (newPos >= (hdr->dlog_buf_offset+hdr->dlog_size)) { - newPos = hdr->dlog_buf_offset+len; - reset = true; - } - else - reset = false; - if (retry++ == 3) return; // Don't try too hard - } while (!OSCompareAndSwap(curPos, newPos, &hdr->dlog_cur_pos)); +IOReturn IOPMrootDomain::restartWithStackshot() +{ + if ((swd_flags & SWD_WDOG_ENABLED) == 0) + return kIOReturnError; - if (reset) curPos = hdr->dlog_buf_offset; - ptr = (char*)hdr+curPos; - snprintf(ptr, len, "%08x: %s", ts, str); + takeStackshot(true, true); + return kIOReturnSuccess; } void IOPMrootDomain::sleepWakeDebugTrig(bool wdogTrigger) +{ + takeStackshot(wdogTrigger, false); +} + +void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog) { swd_hdr * hdr = NULL; addr64_t data[3]; uint32_t wdog_panic = 0; + int cnt = 0; + pid_t pid = 0; + uint32_t flags; char * dstAddr; + uint32_t size; uint32_t bytesRemaining; unsigned int len; OSString * UUIDstring = NULL; uint64_t code; IOMemoryMap * logBufMap = NULL; + swd_stackshot_hdr *stackshotHdr = NULL; if ( kIOSleepWakeWdogOff & gIOKitDebug ) return; if (wdogTrigger) { - if (PE_parse_boot_argn("swd_panic", &wdog_panic, sizeof(wdog_panic)) && + if (PE_parse_boot_argn("swd_panic", &wdog_panic, sizeof(wdog_panic)) && (wdog_panic == 1)) { // If boot-arg is set to panic on sleep/wake hang, call panic panic("Sleep/Wake hang detected\n"); return; } - else if (swd_flags & SWD_BOOT_BY_WDOG) { + else if (swd_flags & SWD_BOOT_BY_SW_WDOG) { // If current boot is due to this watch dog trigger restart in previous boot, // then don't trigger again until at least 1 successful sleep & wake. + sleepCnt = displayWakeCnt = 1; if (!(sleepCnt && displayWakeCnt)) { - IOLog("Shutting down due to repeated Sleep/Wake failures\n"); - PEHaltRestart(kPERestartCPU); + IOLog("Shutting down due to repeated Sleep/Wake failures\n"); + PEHaltRestart(kPEHaltCPU); return; } } } + if (sleepWakeDebugIsWdogEnabled() == false) + return; + if (swd_buffer == NULL) { sleepWakeDebugMemAlloc(); if (swd_buffer == NULL) return; @@ -9204,11 +8642,12 @@ void IOPMrootDomain::sleepWakeDebugTrig(bool wdogTrigger) hdr = (swd_hdr *)swd_buffer; + memset(hdr->UUID, 0x20, sizeof(hdr->UUID)); if ((UUIDstring = OSDynamicCast(OSString, getProperty(kIOPMSleepWakeUUIDKey))) != NULL ) { if (wdogTrigger || (!UUIDstring->isEqualTo(hdr->UUID))) { const char *str = UUIDstring->getCStringNoCopy(); - snprintf(hdr->UUID, sizeof(hdr->UUID), "UUID: %s\n", str); + snprintf(hdr->UUID, sizeof(hdr->UUID), "UUID: %s", str); } else { DLOG("Data for current UUID already exists\n"); @@ -9219,32 +8658,68 @@ void IOPMrootDomain::sleepWakeDebugTrig(bool wdogTrigger) dstAddr = (char*)hdr + hdr->spindump_offset; bytesRemaining = SWD_BUF_SIZE - hdr->spindump_offset; + /* if AppleOSXWatchdog triggered the stackshot, set the flag in the heaer */ + hdr->is_osx_watchdog = isOSXWatchdog; DLOG("Taking snapshot. bytesRemaining: %d\n", bytesRemaining); - stack_snapshot_from_kernel(-1, dstAddr, bytesRemaining, - STACKSHOT_SAVE_LOADINFO | STACKSHOT_SAVE_KEXT_LOADINFO|STACKSHOT_SAVE_KERNEL_FRAMES_ONLY, - &hdr->spindump_size); - if (hdr->spindump_size != 0) { - DLOG("Traced %d bytes of snapshot\n", hdr->spindump_size); - dstAddr += hdr->spindump_size; - bytesRemaining -= hdr->spindump_size; - } - else { - DLOG("Failed to get spindump\n"); - hdr->spindump_size = 0; + + while (bytesRemaining > sizeof(swd_stackshot_hdr)) { + + stackshotHdr = (swd_stackshot_hdr *)dstAddr; + stackshotHdr->magic = SWD_STACKSHOTHDR_MAGIC; + stackshotHdr->size = 0; + bytesRemaining -= sizeof(swd_stackshot_hdr); + dstAddr += sizeof(swd_stackshot_hdr); + + if (isOSXWatchdog) { + pid = -1; + size = bytesRemaining; + flags = STACKSHOT_SAVE_LOADINFO | STACKSHOT_SAVE_KEXT_LOADINFO; + } + else if (cnt == 0) { + /* + * Take stackshot of all process on first sample. Size is restricted + * to SWD_INITIAL_STACK_SIZE + */ + pid = -1; + size = (bytesRemaining > SWD_INITIAL_STACK_SIZE) ? SWD_INITIAL_STACK_SIZE : bytesRemaining; + flags = STACKSHOT_SAVE_LOADINFO | STACKSHOT_SAVE_KEXT_LOADINFO|STACKSHOT_SAVE_KERNEL_FRAMES_ONLY; + } + else { + /* Take sample of kernel threads only */ + pid = 0; + size = bytesRemaining; + flags = 0; + } + + stack_snapshot_from_kernel(pid, dstAddr, size, flags, &stackshotHdr->size); + + dstAddr += stackshotHdr->size; + bytesRemaining -= stackshotHdr->size; + + DLOG("Sample: %d size: %d bytesRemaining: %d\n", cnt, stackshotHdr->size, bytesRemaining); + if ((stackshotHdr->size == 0) || (++cnt == 10)) + break; + IOSleep(10); // 10 ms } - snprintf(hdr->cps, sizeof(hdr->cps), "cps: %d\n", ((IOService*)this)->getPowerState()); + hdr->spindump_size = (SWD_BUF_SIZE - bytesRemaining - hdr->spindump_offset); + + + memset(hdr->cps, 0x20, sizeof(hdr->cps)); + snprintf(hdr->cps, sizeof(hdr->cps), "\ncps: %d", ((IOService*)this)->getPowerState()); code = pmTracer->getPMStatusCode(); - snprintf(hdr->PMStatusCode, sizeof(hdr->PMStatusCode), "Code: %08x %08x\n", + memset(hdr->PMStatusCode, 0x20, sizeof(hdr->PMStatusCode)); + snprintf(hdr->PMStatusCode, sizeof(hdr->PMStatusCode), "\nCode: %08x %08x", (uint32_t)((code >> 32) & 0xffffffff), (uint32_t)(code & 0xffffffff)); - snprintf(hdr->reason, sizeof(hdr->reason), "Stackshot reason: Watchdog\n"); + memset(hdr->reason, 0x20, sizeof(hdr->reason)); + snprintf(hdr->reason, sizeof(hdr->reason), "\nStackshot reason: Watchdog\n\n"); - data[0] = sizeof(swd_hdr) + hdr->spindump_size + hdr->dlog_size; + data[0] = round_page(sizeof(swd_hdr) + hdr->spindump_size); /* Header & rootdomain log is constantly changing and is not covered by CRC */ - data[1] = crc32(0, ((char*)swd_buffer+hdr->spindump_offset), hdr->spindump_size); - data[2] = kvtophys((vm_offset_t)swd_buffer); + data[1] = hdr->crc = crc32(0, ((char*)swd_buffer+hdr->spindump_offset), hdr->spindump_size); + data[2] = kvtophys((vm_offset_t)swd_buffer); len = sizeof(addr64_t)*3; DLOG("bytes: 0x%llx crc:0x%llx paddr:0x%llx\n", data[0], data[1], data[2]); @@ -9266,7 +8741,7 @@ exit: else { logBufMap = sleepWakeDebugRetrieve(); if (logBufMap) { - sleepWakeDebugDump(logBufMap); + sleepWakeDebugDumpFromMem(logBufMap); logBufMap->release(); logBufMap = 0; } @@ -9288,17 +8763,17 @@ void IOPMrootDomain::sleepWakeDebugMemAlloc( ) if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) return; - // Try allocating above 4GB. If that fails, try at 2GB - memDesc = IOBufferMemoryDescriptor::inTaskWithPhysicalMask( + // Try allocating above 4GB. If that fails, try at 2GB + memDesc = IOBufferMemoryDescriptor::inTaskWithPhysicalMask( kernel_task, kIOMemoryPhysicallyContiguous|kIOMemoryMapperNone, size, 0xFFFFFFFF00000000ULL); if (!memDesc) { - memDesc = IOBufferMemoryDescriptor::inTaskWithPhysicalMask( + memDesc = IOBufferMemoryDescriptor::inTaskWithPhysicalMask( kernel_task, kIOMemoryPhysicallyContiguous|kIOMemoryMapperNone, size, 0xFFFFFFFF10000000ULL); } - if (memDesc == NULL) + if (memDesc == NULL) { DLOG("Failed to allocate Memory descriptor for sleepWake debug\n"); goto exit; @@ -9306,22 +8781,14 @@ void IOPMrootDomain::sleepWakeDebugMemAlloc( ) hdr = (swd_hdr *)memDesc->getBytesNoCopy(); - memset(hdr, 0, sizeof(swd_hdr)); + memset(hdr, 0, sizeof(swd_hdr)); - hdr->version = 1; + hdr->signature = SWD_HDR_SIGNATURE; hdr->alloc_size = size; - if (kIOPersistentLog & gIOKitDebug) { - hdr->dlog_buf_offset = hdr->dlog_cur_pos = sizeof(swd_hdr); - hdr->dlog_size = SWD_DLOG_SIZE; - memset(((char*)hdr)+hdr->dlog_buf_offset, 0, hdr->dlog_size); - } - hdr->spindump_offset = sizeof(swd_hdr) + hdr->dlog_size; - + hdr->spindump_offset = sizeof(swd_hdr); swd_buffer = (void *)hdr; - DLOG("SleepWake debug buffer size:0x%x\n", hdr->alloc_size); - DLOG("DLOG offset: 0x%x size:0x%x spindump offset:0x%x\n", - hdr->dlog_buf_offset, hdr->dlog_size, hdr->spindump_offset); + DLOG("SleepWake debug buffer size:0x%x spindump offset:0x%x\n", hdr->alloc_size, hdr->spindump_offset); exit: gRootDomain->swd_lock = 0; @@ -9343,13 +8810,13 @@ bool IOPMrootDomain::sleepWakeDebugIsWdogEnabled() errno_t IOPMrootDomain::sleepWakeDebugSaveFile(const char *name, char *buf, int len) { struct vnode *vp = NULL; - vfs_context_t ctx = vfs_context_current(); + vfs_context_t ctx = vfs_context_create(vfs_context_current()); kauth_cred_t cred = vfs_context_ucred(ctx); struct vnode_attr va; errno_t error = EIO; - if (vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), - S_IRUSR|S_IRGRP|S_IROTH, VNODE_LOOKUP_NOFOLLOW, &vp, ctx) != 0) + if (vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), + S_IRUSR|S_IRGRP|S_IROTH, VNODE_LOOKUP_NOFOLLOW, &vp, ctx) != 0) { IOLog("Failed to open the file %s\n", name); goto exit; @@ -9361,26 +8828,289 @@ errno_t IOPMrootDomain::sleepWakeDebugSaveFile(const char *name, char *buf, int vnode_getattr(vp, &va, ctx) || va.va_nlink != 1) { IOLog("Bailing as this is not a regular file\n"); goto exit; - } - VATTR_INIT(&va); + } + VATTR_INIT(&va); VATTR_SET(&va, va_data_size, 0); vnode_setattr(vp, &va, ctx); - + error = vn_rdwr(UIO_WRITE, vp, buf, len, 0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, vfs_context_proc(ctx)); - if (error != 0) + if (error != 0) IOLog("Failed to save sleep wake log. err 0x%x\n", error); else DLOG("Saved %d bytes to file %s\n",len, name); exit: if (vp) vnode_close(vp, FWRITE, ctx); + if (ctx) vfs_context_rele(ctx); + + return error; + +} + +errno_t IOPMrootDomain::sleepWakeDebugCopyFile( + struct vnode *srcVp, + vfs_context_t srcCtx, + char *tmpBuf, uint64_t tmpBufSize, + uint64_t srcOffset, + const char *dstFname, + uint64_t numBytes, + uint32_t crc) +{ + struct vnode *vp = NULL; + vfs_context_t ctx = vfs_context_create(vfs_context_current()); + struct vnode_attr va; + errno_t error = EIO; + uint64_t bytesToRead, bytesToWrite; + uint64_t readFileOffset, writeFileOffset, srcDataOffset; + uint32_t newcrc = 0; + + if (vnode_open(dstFname, (O_CREAT | FWRITE | O_NOFOLLOW), + S_IRUSR|S_IRGRP|S_IROTH, VNODE_LOOKUP_NOFOLLOW, &vp, ctx) != 0) + { + DLOG("Failed to open the file %s\n", dstFname); + goto exit; + } + VATTR_INIT(&va); + VATTR_WANTED(&va, va_nlink); + /* Don't dump to non-regular files or files with links. */ + if (vp->v_type != VREG || + vnode_getattr(vp, &va, ctx) || va.va_nlink != 1) { + DLOG("Bailing as this is not a regular file\n"); + goto exit; + } + VATTR_INIT(&va); + VATTR_SET(&va, va_data_size, 0); + vnode_setattr(vp, &va, ctx); + + writeFileOffset = 0; + while(numBytes) { + bytesToRead = (round_page(numBytes) > tmpBufSize) ? tmpBufSize : round_page(numBytes); + readFileOffset = trunc_page(srcOffset); + + DLOG("Read file (numBytes:0x%llx)\n", bytesToRead); + error = vn_rdwr(UIO_READ, srcVp, tmpBuf, bytesToRead, readFileOffset, + UIO_SYSSPACE, IO_SKIP_ENCRYPTION|IO_SYNC|IO_NODELOCKED|IO_UNIT|IO_NOCACHE, + vfs_context_ucred(srcCtx), (int *) 0, + vfs_context_proc(srcCtx)); + if (error) { + DLOG("Failed to read file(numBytes:0x%llx)\n", bytesToRead); + break; + } + + srcDataOffset = (uint64_t)tmpBuf + (srcOffset - readFileOffset); + bytesToWrite = bytesToRead - (srcOffset - readFileOffset); + if (bytesToWrite > numBytes) bytesToWrite = numBytes; + + if (crc) { + newcrc = crc32(newcrc, (void *)srcDataOffset, bytesToWrite); + } + error = vn_rdwr(UIO_WRITE, vp, (char *)srcDataOffset, bytesToWrite, writeFileOffset, + UIO_SYSSPACE, IO_SYNC|IO_NODELOCKED|IO_UNIT, + vfs_context_ucred(ctx), (int *) 0, + vfs_context_proc(ctx)); + if (error) { + DLOG("Failed to write file(numBytes:0x%llx)\n", bytesToWrite); + break; + } + + writeFileOffset += bytesToWrite; + numBytes -= bytesToWrite; + srcOffset += bytesToWrite; + + } + if (crc != newcrc) { + swd_stackshot_hdr *shdr = (swd_stackshot_hdr *)tmpBuf;; + + /* Set statckshot size to 0 if crc doesn't match */ + shdr->magic = SWD_STACKSHOTHDR_MAGIC; + shdr->size = 0; + + assert(tmpBufSize > sizeof(swd_stackshot_hdr)); + bytesToWrite = round_page(sizeof(swd_stackshot_hdr)); + vn_rdwr(UIO_WRITE, vp, (char *)tmpBuf, bytesToWrite, 0, + UIO_SYSSPACE, IO_SYNC|IO_NODELOCKED|IO_UNIT, + vfs_context_ucred(ctx), (int *) 0, + vfs_context_proc(ctx)); + + DLOG("CRC check failed. expected:0x%x actual:0x%x\n", crc, newcrc); + error = EFAULT; + } +exit: + if (vp) { + error = vnode_close(vp, FWRITE, ctx); + DLOG("vnode_close returned 0x%x\n", error); + } + if (ctx) vfs_context_rele(ctx); return error; + + } -void IOPMrootDomain::sleepWakeDebugDump(IOMemoryMap *logBufMap) + +void IOPMrootDomain::sleepWakeDebugDumpFromFile( ) +{ + + int rc; + char hibernateFilename[MAXPATHLEN+1]; + char PMStatusCode[100]; + void *tmpBuf; + swd_hdr *hdr = NULL; + uint32_t stacksSize, logSize; + uint64_t tmpBufSize; + uint64_t hdrOffset, stacksOffset, logOffset; + errno_t error = EIO; + OSObject *obj = NULL; + OSString *str = NULL; + OSNumber *failStat = NULL; + struct vnode *vp = NULL; + vfs_context_t ctx = NULL; + + struct vnode_attr va; + IOBufferMemoryDescriptor *tmpBufDesc = NULL; + IOHibernateImageHeader *imageHdr; + + DLOG("sleepWakeDebugDumpFromFile\n"); + if ((swd_flags & SWD_LOGS_IN_FILE) == 0) + return; + + if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) + return; + + + hibernateFilename[0] = 0; + if ((obj = copyProperty(kIOHibernateFileKey))) + { + if ((str = OSDynamicCast(OSString, obj))) + strlcpy(hibernateFilename, str->getCStringNoCopy(), + sizeof(hibernateFilename)); + obj->release(); + } + if (!hibernateFilename[0]) { + DMSG("sleepWakeDebugDumpFromFile: Failed to hib file name\n"); + goto exit; + } + DLOG("sleepWakeDebugDumpFromFile: Hib file name %s\n", hibernateFilename); + + /* Allocate a temp buffer to copy data between files */ + tmpBufSize = 2*4096; + tmpBufDesc = IOBufferMemoryDescriptor:: + inTaskWithOptions(kernel_task, kIODirectionOutIn | kIOMemoryMapperNone, + tmpBufSize, PAGE_SIZE); + + if (!tmpBufDesc) { + DMSG("sleepWakeDebugDumpFromFile: Fail to allocate temp buf\n"); + goto exit; + } + + tmpBuf = tmpBufDesc->getBytesNoCopy(); + + ctx = vfs_context_create(vfs_context_current()); + if (vnode_open(hibernateFilename, (FREAD | O_NOFOLLOW), 0, + VNODE_LOOKUP_NOFOLLOW, &vp, ctx) != 0) + { + DMSG("sleepWakeDebugDumpFromFile: Failed to open the hibernate file %s\n", hibernateFilename); + goto exit; + } + VATTR_INIT(&va); + VATTR_WANTED(&va, va_nlink); + VATTR_WANTED(&va, va_data_alloc); + if (vp->v_type != VREG || + vnode_getattr(vp, &va, ctx) || va.va_nlink != 1) { + DMSG("sleepWakeDebugDumpFromFile: Bailing as this is not a regular file\n"); + goto exit; + } + + /* Read the sleepimage file header */ + rc = vn_rdwr(UIO_READ, vp, (char *)tmpBuf, round_page(sizeof(IOHibernateImageHeader)), 0, + UIO_SYSSPACE, IO_SKIP_ENCRYPTION|IO_SYNC|IO_NODELOCKED|IO_UNIT|IO_NOCACHE, + vfs_context_ucred(ctx), (int *) 0, + vfs_context_proc(ctx)); + if (rc != 0) { + DMSG("sleepWakeDebugDumpFromFile: Failed to read header size %lu(rc=%d)\n", round_page(sizeof(IOHibernateImageHeader)), rc); + goto exit; + } + + imageHdr = ((IOHibernateImageHeader *)tmpBuf); + if (imageHdr->signature != kIOHibernateHeaderDebugDataSignature) { + DMSG("sleepWakeDebugDumpFromFile: File header has unexpected value 0x%x\n", imageHdr->signature); + goto exit; + } + + /* Sleep/Wake debug header(swd_hdr) is at the beggining of the second block */ + hdrOffset = imageHdr->deviceBlockSize; + if (hdrOffset + sizeof(swd_hdr) >= va.va_data_alloc) { + DMSG("sleepWakeDebugDumpFromFile: header is crossing file size(0x%llx)\n", va.va_data_alloc); + goto exit; + } + + DLOG("Reading swd_hdr len 0x%lx offset 0x%lx\n", round_page(sizeof(swd_hdr)), trunc_page(hdrOffset)); + /* Read the sleep/wake debug header(swd_hdr) */ + rc = vn_rdwr(UIO_READ, vp, (char *)tmpBuf, round_page(sizeof(swd_hdr)), trunc_page(hdrOffset), + UIO_SYSSPACE, IO_SKIP_ENCRYPTION|IO_SYNC|IO_NODELOCKED|IO_UNIT|IO_NOCACHE, + vfs_context_ucred(ctx), (int *) 0, + vfs_context_proc(ctx)); + if (rc != 0) { + DMSG("sleepWakeDebugDumpFromFile: Failed to debug read header size %lu. rc=%d\n", + round_page(sizeof(swd_hdr)), rc); + goto exit; + } + + hdr = (swd_hdr *)((char *)tmpBuf + (hdrOffset - trunc_page(hdrOffset))); + if ((hdr->signature != SWD_HDR_SIGNATURE) || (hdr->alloc_size > SWD_BUF_SIZE) || + (hdr->spindump_offset > SWD_BUF_SIZE) || (hdr->spindump_size > SWD_BUF_SIZE)) { + DMSG("sleepWakeDebugDumpFromFile: Invalid data in debug header. sign:0x%x size:0x%x spindump_offset:0x%x spindump_size:0x%x\n", + hdr->signature, hdr->alloc_size, hdr->spindump_offset, hdr->spindump_size); + goto exit; + } + stacksSize = hdr->spindump_size; + + /* Get stacks & log offsets in the image file */ + stacksOffset = hdrOffset + hdr->spindump_offset; + logOffset = hdrOffset + offsetof(swd_hdr, UUID); + logSize = sizeof(swd_hdr)-offsetof(swd_hdr, UUID); + + error = sleepWakeDebugCopyFile(vp, ctx, (char *)tmpBuf, tmpBufSize, stacksOffset, + getDumpStackFilename(hdr), stacksSize, hdr->crc); + if (error == EFAULT) { + DMSG("sleepWakeDebugDumpFromFile: Stackshot CRC doesn't match\n"); + goto exit; + } + error = sleepWakeDebugCopyFile(vp, ctx, (char *)tmpBuf, tmpBufSize, logOffset, + getDumpLogFilename(hdr), logSize, 0); + if (error) { + DMSG("sleepWakeDebugDumpFromFile: Failed to write the log file(0x%x)\n", error); + goto exit; + } +exit: + if (error) { + // Write just the SleepWakeLog.dump with failure code + uint64_t fcode = 0; + const char *fname; + if (swd_flags & SWD_BOOT_BY_SW_WDOG) { + failStat = OSDynamicCast(OSNumber, getProperty(kIOPMSleepWakeFailureCodeKey)); + fcode = failStat->unsigned64BitValue(); + fname = kSleepWakeLogFilename; + } + else { + fname = kAppleOSXWatchdogLogFilename; + } + memset(PMStatusCode, 0x20, sizeof(PMStatusCode)); // Fill with spaces + PMStatusCode[sizeof(PMStatusCode)-1] = 0xa; // And an end-of-line at the end + snprintf(PMStatusCode, sizeof(PMStatusCode)-1, "Code: 0x%llx", fcode); + sleepWakeDebugSaveFile(fname, PMStatusCode, sizeof(PMStatusCode)); + } + gRootDomain->swd_lock = 0; + + if (vp) vnode_close(vp, FREAD, ctx); + if (ctx) vfs_context_rele(ctx); + if (tmpBufDesc) tmpBufDesc->release(); + +} + +void IOPMrootDomain::sleepWakeDebugDumpFromMem(IOMemoryMap *logBufMap) { IOVirtualAddress srcBuf = NULL; char *stackBuf = NULL, *logOffset = NULL; @@ -9411,16 +9141,13 @@ void IOPMrootDomain::sleepWakeDebugDump(IOMemoryMap *logBufMap) stackBuf = (char*)hdr+hdr->spindump_offset; - error = sleepWakeDebugSaveFile("/var/tmp/SleepWakeStacks.dump", stackBuf, hdr->spindump_size); + error = sleepWakeDebugSaveFile(getDumpStackFilename(hdr), stackBuf, hdr->spindump_size); if (error) goto exit; logOffset = (char*)hdr+offsetof(swd_hdr, UUID); logSize = sizeof(swd_hdr)-offsetof(swd_hdr, UUID); - if ((hdr->dlog_buf_offset == sizeof(swd_hdr)) && (hdr->dlog_size == SWD_DLOG_SIZE)) - { - logSize += hdr->dlog_size; - } - error = sleepWakeDebugSaveFile("/var/tmp/SleepWakeLog.dump", logOffset, logSize); + + error = sleepWakeDebugSaveFile(getDumpLogFilename(hdr), logOffset, logSize); if (error) goto exit; hdr->spindump_size = 0; @@ -9429,13 +9156,31 @@ void IOPMrootDomain::sleepWakeDebugDump(IOMemoryMap *logBufMap) exit: if (error) { // Write just the SleepWakeLog.dump with failure code - if ((failStat = OSDynamicCast(OSNumber, getProperty(kIOPMSleepWakeFailureCodeKey))) != NULL) { - memset(PMStatusCode, 0x20, sizeof(PMStatusCode)); // Fill with spaces - PMStatusCode[sizeof(PMStatusCode)-1] = 0xa; // And an end-of-line at the end - const uint64_t fcode = failStat->unsigned64BitValue(); - snprintf(PMStatusCode, sizeof(PMStatusCode)-1, "Code: 0x%llx", fcode); - sleepWakeDebugSaveFile("/var/tmp/SleepWakeLog.dump", PMStatusCode, sizeof(PMStatusCode)); + uint64_t fcode = 0; + const char *sname, *lname; + swd_stackshot_hdr shdr; + + /* Try writing an empty stacks file */ + shdr.magic = SWD_STACKSHOTHDR_MAGIC; + shdr.size = 0; + + + if (swd_flags & SWD_BOOT_BY_SW_WDOG) { + failStat = OSDynamicCast(OSNumber, getProperty(kIOPMSleepWakeFailureCodeKey)); + fcode = failStat->unsigned64BitValue(); + lname = kSleepWakeLogFilename; + sname = kSleepWakeStackFilename; } + else { + lname = kAppleOSXWatchdogLogFilename; + sname= kAppleOSXWatchdogStackFilename; + } + + sleepWakeDebugSaveFile(sname, (char*)(&shdr), sizeof(shdr)); + memset(PMStatusCode, 0x20, sizeof(PMStatusCode)); // Fill with spaces + PMStatusCode[sizeof(PMStatusCode)-1] = 0xa; // And an end-of-line at the end + snprintf(PMStatusCode, sizeof(PMStatusCode)-1, "Code: 0x%llx", fcode); + sleepWakeDebugSaveFile(lname, PMStatusCode, sizeof(PMStatusCode)); } gRootDomain->swd_lock = 0; } @@ -9454,20 +9199,39 @@ IOMemoryMap *IOPMrootDomain::sleepWakeDebugRetrieve( ) uint64_t paddr = 0; swd_hdr *hdr = NULL; bool ret = false; + char str[20]; if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) return NULL; - len = sizeof(addr64_t)*3; - if (!PEReadNVRAMProperty(kIOSleepWakeDebugKey, data, &len) || (len != sizeof(addr64_t)*3) ) - { + if (!PEReadNVRAMProperty(kIOSleepWakeDebugKey, 0, &len)) { DLOG("No sleepWakeDebug note to read\n"); - return NULL; + goto exit; } - PERemoveNVRAMProperty(kIOSleepWakeDebugKey); + if (len == strlen("sleepimage")) { + str[0] = 0; + PEReadNVRAMProperty(kIOSleepWakeDebugKey, str, &len); + if (!strncmp((char*)str, "sleepimage", strlen("sleepimage"))) { + DLOG("sleepWakeDebugRetrieve: in file logs\n"); + swd_flags |= SWD_LOGS_IN_FILE|SWD_VALID_LOGS; + goto exit; + } + } + else if (len == sizeof(addr64_t)*3) + PEReadNVRAMProperty(kIOSleepWakeDebugKey, data, &len); + else { + DLOG("Invalid sleepWakeDebug note length(%d)\n", len); + goto exit; + } + + + + DLOG("sleepWakeDebugRetrieve: data[0]:0x%llx data[1]:0x%llx data[2]:0x%llx\n", + data[0], data[1], data[2]); + DLOG("sleepWakeDebugRetrieve: in mem logs\n"); bufSize = data[0]; crc = data[1]; paddr = data[2]; @@ -9480,7 +9244,7 @@ IOMemoryMap *IOPMrootDomain::sleepWakeDebugRetrieve( ) DLOG("size:0x%llx crc:0x%llx paddr:0x%llx\n", bufSize, crc, paddr); - + desc = IOMemoryDescriptor::withAddressRange( paddr, bufSize, kIODirectionOutIn | kIOMemoryMapperNone, NULL); if (desc == NULL) @@ -9500,14 +9264,14 @@ IOMemoryMap *IOPMrootDomain::sleepWakeDebugRetrieve( ) } hdr = (swd_hdr *)vaddr; - if (hdr->spindump_offset+hdr->spindump_size > bufSize) + if (hdr->spindump_offset+hdr->spindump_size > bufSize) { IOLog("SleepWake log buffer contents are invalid\n"); goto exit; } hdr->crc = crc; - newcrc = crc32(0, (void *)((char*)vaddr+hdr->spindump_offset), + newcrc = crc32(0, (void *)((char*)vaddr+hdr->spindump_offset), hdr->spindump_size); if (newcrc != crc) { IOLog("SleepWake log buffer contents are invalid\n"); @@ -9515,63 +9279,52 @@ IOMemoryMap *IOPMrootDomain::sleepWakeDebugRetrieve( ) } ret = true; + swd_flags |= SWD_LOGS_IN_MEM | SWD_VALID_LOGS; exit: + PERemoveNVRAMProperty(kIOSleepWakeDebugKey); if (!ret) { if (logBufMap) logBufMap->release(); logBufMap = 0; } if (desc) desc->release(); gRootDomain->swd_lock = 0; - - return logBufMap; -} - -void IOPMrootDomain::saveTimeoutAppStackShot(void *p0, void *p1) -{ - IOPMrootDomain *rd = (IOPMrootDomain *)p0; - IOBufferMemoryDescriptor *spindumpDesc; - errno_t error = EIO; - swd_hdr *hdr; - - if (rd && rd->spindumpDesc) - { - spindumpDesc = rd->spindumpDesc; - - hdr = (swd_hdr*)spindumpDesc->getBytesNoCopy(); - error = rd->sleepWakeDebugSaveFile("/var/tmp/SleepWakeTimeoutStacks.dump", - (char*)hdr+hdr->spindump_offset, hdr->spindump_size); - if (error) goto done; - - error = rd->sleepWakeDebugSaveFile("/var/tmp/SleepWakeTimeoutLog.dump", - (char*)hdr+offsetof(swd_hdr, UUID), - sizeof(swd_hdr)-offsetof(swd_hdr, UUID)); - - done: - spindumpDesc->release(); - rd->spindumpDesc = 0; - - } - + return logBufMap; } #else -void IOPMrootDomain::sleepWakeDebugLog(const char *fmt,...) +void IOPMrootDomain::sleepWakeDebugTrig(bool restart) { } -void IOPMrootDomain::sleepWakeDebugTrig(bool restart) +void IOPMrootDomain::takeStackshot(bool restart, bool isOSXWatchdog) { +#pragma unused(restart) +#pragma unused(isOSXWatchdog) } void IOPMrootDomain::sleepWakeDebugMemAlloc( ) { } +void IOPMrootDomain::sleepWakeDebugDumpFromMem(IOMemoryMap *map) +{ +} +errno_t IOPMrootDomain::sleepWakeDebugCopyFile( + struct vnode *srcVp, + vfs_context_t srcCtx, + char *tmpBuf, uint64_t tmpBufSize, + uint64_t srcOffset, + const char *dstFname, + uint64_t numBytes, + uint32_t crc) +{ + return EIO; +} -void IOPMrootDomain::sleepWakeDebugDump(IOMemoryMap *map) +void IOPMrootDomain::sleepWakeDebugDumpFromFile() { } @@ -9594,8 +9347,5 @@ errno_t IOPMrootDomain::sleepWakeDebugSaveFile(const char *name, char *buf, int return 0; } -void IOPMrootDomain::saveTimeoutAppStackShot(void *p0, void *p1) -{ -} #endif diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index bbbea1e39..8bcc9342e 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2010 Apple Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,11 +62,11 @@ static void getCStringForObject(OSObject *inObj, char *outStr, size_t outStrLen) OSDefineMetaClassAndStructors(IOPlatformExpert, IOService) OSMetaClassDefineReservedUsed(IOPlatformExpert, 0); - OSMetaClassDefineReservedUsed(IOPlatformExpert, 1); -OSMetaClassDefineReservedUnused(IOPlatformExpert, 2); -OSMetaClassDefineReservedUnused(IOPlatformExpert, 3); -OSMetaClassDefineReservedUnused(IOPlatformExpert, 4); +OSMetaClassDefineReservedUsed(IOPlatformExpert, 2); +OSMetaClassDefineReservedUsed(IOPlatformExpert, 3); +OSMetaClassDefineReservedUsed(IOPlatformExpert, 4); + OSMetaClassDefineReservedUnused(IOPlatformExpert, 5); OSMetaClassDefineReservedUnused(IOPlatformExpert, 6); OSMetaClassDefineReservedUnused(IOPlatformExpert, 7); @@ -324,6 +324,17 @@ IOReturn IOPlatformExpert::registerInterruptController(OSSymbol *name, IOInterru return kIOReturnSuccess; } +IOReturn IOPlatformExpert::deregisterInterruptController(OSSymbol *name) +{ + IOLockLock(gIOInterruptControllersLock); + + gIOInterruptControllers->removeObject(name); + + IOLockUnlock(gIOInterruptControllersLock); + + return kIOReturnSuccess; +} + IOInterruptController *IOPlatformExpert::lookUpInterruptController(OSSymbol *name) { OSObject *object; @@ -363,6 +374,17 @@ bool IOPlatformExpert::platformAdjustService(IOService */*service*/) return true; } +void IOPlatformExpert::getUTCTimeOfDay(clock_sec_t * secs, clock_nsec_t * nsecs) +{ + *secs = getGMTTimeOfDay(); + *nsecs = 0; +} + +void IOPlatformExpert::setUTCTimeOfDay(clock_sec_t secs, __unused clock_nsec_t nsecs) +{ + setGMTTimeOfDay(secs); +} + //********************************************************************************* // PMLog @@ -379,8 +401,8 @@ PMLog(const char *who, unsigned long event, nowus += (nows % 1000) * 1000000; kprintf("pm%u %p %.30s %d %lx %lx\n", - nowus, current_thread(), who, // Identity - (int) event, (long) param1, (long) param2); // Args + nowus, OBFUSCATE(current_thread()), who, // Identity + (int) event, (long)OBFUSCATE(param1), (long)OBFUSCATE(param2)); // Args } @@ -757,6 +779,9 @@ int PEHaltRestart(unsigned int type) IOPMrootDomain *pmRootDomain; AbsoluteTime deadline; thread_call_t shutdown_hang; + IORegistryEntry *node; + OSData *data; + uint32_t timeout = 30; if(type == kPEHaltCPU || type == kPERestartCPU || type == kPEUPSDelayHaltCPU) { @@ -768,11 +793,20 @@ int PEHaltRestart(unsigned int type) /* Spawn a thread that will panic in 30 seconds. If all goes well the machine will be off by the time - the timer expires. + the timer expires. If the device wants a different + timeout, use that value instead of 30 seconds. */ +#define RESTART_NODE_PATH "/chosen" + node = IORegistryEntry::fromPath( RESTART_NODE_PATH, gIODTPlane ); + if ( node ) { + data = OSDynamicCast( OSData, node->getProperty( "halt-restart-timeout" ) ); + if ( data && data->getLength() == 4 ) + timeout = *((uint32_t *) data->getBytesNoCopy()); + } + shutdown_hang = thread_call_allocate( &IOShutdownNotificationsTimedOut, (thread_call_param_t)(uintptr_t) type); - clock_interval_to_deadline( 30, kSecondScale, &deadline ); + clock_interval_to_deadline( timeout, kSecondScale, &deadline ); thread_call_enter1_delayed( shutdown_hang, 0, deadline ); pmRootDomain->handlePlatformHaltRestart(type); @@ -933,16 +967,35 @@ err: long PEGetGMTTimeOfDay(void) { - long result = 0; - - if( gIOPlatform) result = gIOPlatform->getGMTTimeOfDay(); + clock_sec_t secs; + clock_usec_t usecs; - return (result); + PEGetUTCTimeOfDay(&secs, &usecs); + return secs; } void PESetGMTTimeOfDay(long secs) { - if( gIOPlatform) gIOPlatform->setGMTTimeOfDay(secs); + PESetUTCTimeOfDay(secs, 0); +} + +void PEGetUTCTimeOfDay(clock_sec_t * secs, clock_usec_t * usecs) +{ + clock_nsec_t nsecs = 0; + + *secs = 0; + if (gIOPlatform) + gIOPlatform->getUTCTimeOfDay(secs, &nsecs); + + assert(nsecs < NSEC_PER_SEC); + *usecs = nsecs / NSEC_PER_USEC; +} + +void PESetUTCTimeOfDay(clock_sec_t secs, clock_usec_t usecs) +{ + assert(usecs < USEC_PER_SEC); + if (gIOPlatform) + gIOPlatform->setUTCTimeOfDay(secs, usecs * NSEC_PER_USEC); } } /* extern "C" */ diff --git a/iokit/Kernel/IOReportLegend.cpp b/iokit/Kernel/IOReportLegend.cpp new file mode 100644 index 000000000..33e04ed5d --- /dev/null +++ b/iokit/Kernel/IOReportLegend.cpp @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2012-2013 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include + + +//#define IORDEBUG_LEGEND 1 + +#ifdef IORDEBUG_LEGEND +#define IORLEGENDLOG(fmt, args...) \ +do { \ +IOLog("IOReportLegend | "); \ +IOLog(fmt, ##args); \ +IOLog("\n"); \ +} while(0) +#else +#define IORLEGENDLOG(fmt, args...) +#endif + + +#define super OSObject +OSDefineMetaClassAndStructors(IOReportLegend, OSObject); + +IOReportLegend* +IOReportLegend::with(OSArray *legend) +{ + IOReportLegend *iorLegend = new IOReportLegend; + + if (iorLegend) { + + if (legend != NULL) { + if (iorLegend->initWith(legend) != kIOReturnSuccess) { + delete iorLegend; + return NULL; + } + } + + return iorLegend; + } + + else return NULL; +} + +/* must clean up everything if it fails */ +IOReturn +IOReportLegend::initWith(OSArray *legend) +{ + if (legend) _reportLegend = OSArray::withArray(legend); + + if (_reportLegend == NULL) + return kIOReturnError; + + else return kIOReturnSuccess; +} + + +void +IOReportLegend::free(void) +{ + if (_reportLegend) _reportLegend->release(); + super::free(); +} + + +OSArray* +IOReportLegend::getLegend(void) +{ + return _reportLegend; +} + +IOReturn +IOReportLegend::addReporterLegend(IOService *reportingService, + IOReporter *reporter, + const char *groupName, + const char *subGroupName) +{ + IOReturn res = kIOReturnError; + IOReportLegend *legend; + + // No need to check groupName and subGroupName because optional params + if (!reportingService || !reporter) { + goto finish; + } + + legend = IOReportLegend::with(OSDynamicCast(OSArray, reportingService->getProperty(kIOReportLegendKey))); + + if (legend) + { + legend->addReporterLegend(reporter, groupName, subGroupName); + reportingService->setProperty(kIOReportLegendKey, legend->getLegend()); + reportingService->setProperty(kIOReportLegendPublicKey, true); + legend->free(); + res = kIOReturnSuccess; + } + +finish: + return res; +} + + +IOReturn +IOReportLegend::addLegendEntry(IOReportLegendEntry *legendEntry, + const char *groupName, + const char *subGroupName) +{ + kern_return_t res = kIOReturnError; + const OSSymbol *tmpGroupName = NULL; + const OSSymbol *tmpSubGroupName = NULL; + + if (!legendEntry) goto finish; + + if (groupName) { + tmpGroupName = OSSymbol::withCString(groupName); + } + + if (subGroupName) { + tmpSubGroupName = OSSymbol::withCString(subGroupName); + } + + // It is ok to call appendLegendWith() if tmpGroups are NULL + if (legendEntry) { + res = organizeLegend(legendEntry, tmpGroupName, tmpSubGroupName); + + if (tmpGroupName) tmpGroupName->release(); + if (tmpSubGroupName) tmpSubGroupName->release(); + } + +finish: + return res; +} + + +IOReturn +IOReportLegend::addReporterLegend(IOReporter *reporter, + const char *groupName, + const char *subGroupName) +{ + IOReturn res = kIOReturnError; + IOReportLegendEntry *legendEntry = NULL; + + if (reporter) { + + legendEntry = reporter->createLegend(); + + if (legendEntry) { + + res = addLegendEntry(legendEntry, groupName, subGroupName); + legendEntry->release(); + } + } + + return res; +} + + +IOReturn +IOReportLegend::organizeLegend(IOReportLegendEntry *legendEntry, + const OSSymbol *groupName, + const OSSymbol *subGroupName) +{ + IOReturn res = kIOReturnError; + + if (!legendEntry) + return res = kIOReturnBadArgument; + + if (!groupName && subGroupName) + return res = kIOReturnBadArgument; + + IORLEGENDLOG("IOReportLegend::organizeLegend"); + // Legend is empty, enter first node + if (_reportLegend == NULL) { + IORLEGENDLOG("IOReportLegend::new legend creation"); + _reportLegend = OSArray::withCapacity(1); + + if (!_reportLegend) + return kIOReturnNoMemory; + } + + if (groupName) + legendEntry->setObject(kIOReportLegendGroupNameKey, groupName); + + if (subGroupName) + legendEntry->setObject(kIOReportLegendSubGroupNameKey, subGroupName); + + _reportLegend->setObject(legendEntry); + + // callers can now safely release legendEntry (it is part of _reportLegend) + + return res = kIOReturnSuccess; +} + diff --git a/iokit/Kernel/IOReporter.cpp b/iokit/Kernel/IOReporter.cpp new file mode 100644 index 000000000..e132bc17e --- /dev/null +++ b/iokit/Kernel/IOReporter.cpp @@ -0,0 +1,1070 @@ +/* + * Copyright (c) 2012-2013 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include "IOReporterDefs.h" + +#include +#include + +#define super OSObject +OSDefineMetaClassAndStructors(IOReporter, OSObject); + +// be careful to retain and release as necessary +static const OSSymbol *gIOReportNoChannelName = OSSymbol::withCString("_NO_NAME_4"); + +// * We might someday want an IOReportManager (vs. these static funcs) + +/**************************************/ +/*** STATIC METHODS ***/ +/**************************************/ +IOReturn +IOReporter::configureAllReports(OSSet *reporters, + IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination) +{ + IOReturn rval = kIOReturnError; + OSCollectionIterator *iterator = NULL; + + if (reporters == NULL || channelList == NULL || result == NULL) { + rval = kIOReturnBadArgument; + goto finish; + } + + switch (action) { + + case kIOReportGetDimensions: + case kIOReportEnable: + case kIOReportDisable: + { + OSObject * object; + iterator = OSCollectionIterator::withCollection(reporters); + + while ((object = iterator->getNextObject())) { + + IOReporter *rep = OSDynamicCast(IOReporter, object); + + if (rep) { + (void)rep->configureReport(channelList, action, result, destination); + } else { + rval = kIOReturnUnsupported; // kIOReturnNotFound? + goto finish; + } + } + + break; + } + + case kIOReportTraceOnChange: + case kIOReportNotifyHubOnChange: + default: + rval = kIOReturnUnsupported; + goto finish; + } + + rval = kIOReturnSuccess; + +finish: + if (iterator) iterator->release(); + + return rval; +} + +// the duplication in these functions almost makes one want Objective-C SEL* ;) +IOReturn +IOReporter::updateAllReports(OSSet *reporters, + IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination) +{ + IOReturn rval = kIOReturnError; + OSCollectionIterator *iterator = NULL; + + if (reporters == NULL || + channelList == NULL || + result == NULL || + destination == NULL) { + rval = kIOReturnBadArgument; + goto finish; + } + + switch (action) { + + case kIOReportCopyChannelData: + { + OSObject * object; + iterator = OSCollectionIterator::withCollection(reporters); + + while ((object = iterator->getNextObject())) { + + IOReporter *rep = OSDynamicCast(IOReporter, object); + + if (rep) { + (void)rep->updateReport(channelList, action, result, destination); + } else { + rval = kIOReturnUnsupported; // kIOReturnNotFound? + goto finish; + } + } + + break; + } + + case kIOReportTraceChannelData: + default: + rval = kIOReturnUnsupported; + goto finish; + } + + rval = kIOReturnSuccess; + +finish: + if (iterator) iterator->release(); + + return rval; +} + + +/**************************************/ +/*** COMMON INIT METHODS ***/ +/**************************************/ + +bool +IOReporter::init(IOService *reportingService, + IOReportChannelType channelType, + IOReportUnits unit) +{ + bool success = false; + + // ::free() relies on these being initialized + _reporterLock = NULL; + _configLock = NULL; + _elements = NULL; + _enableCounts = NULL; + _channelNames = NULL; + + if (channelType.report_format == kIOReportInvalidFormat) { + IORLOG("init ERROR: Channel Type ill-defined"); + goto finish; + } + + _driver_id = reportingService->getRegistryEntryID(); + if (_driver_id == 0) { + IORLOG("init() ERROR: no registry ID"); + goto finish; + } + + if (!super::init()) return false; + + _channelDimension = channelType.nelements; + _channelType = channelType; + // FIXME: need to look up dynamically + if (unit == kIOReportUnitHWTicks) { +#if defined(__i386__) || defined(__x86_64__) + // Most, but not all Macs use 1GHz + unit = kIOReportUnit1GHzTicks; +#else +#error kIOReportUnitHWTicks not defined +#endif + } + _unit = unit; + + // Allocate a reporter (data) lock + _reporterLock = IOSimpleLockAlloc(); + if (!_reporterLock) goto finish; + _reporterIsLocked = false; + + // Allocate a config lock + _configLock = IOLockAlloc(); + if (!_configLock) goto finish; + _reporterConfigIsLocked = false; + + // Allocate channel names array + _channelNames = OSArray::withCapacity(1); + if (!_channelNames) goto finish; + + // success + success = true; + +finish: + if (!success) { + if (_configLock) IOLockFree(_configLock); + if (_reporterLock) IOSimpleLockFree(_reporterLock); + if (_channelNames) _channelNames->release(); + } + + return success; +} + + +/*******************************/ +/*** PUBLIC METHODS ***/ +/*******************************/ + +// init() [possibly via init*()] must be called before free() +// to ensure that _ = NULL +void +IOReporter::free(void) +{ + if (_configLock) IOLockFree(_configLock); + if (_reporterLock) IOSimpleLockFree(_reporterLock); + + if (_elements) { + PREFL_MEMOP_PANIC(_nElements, IOReportElement); + IOFree(_elements, (size_t)_nElements * sizeof(IOReportElement)); + } + if (_enableCounts) { + PREFL_MEMOP_PANIC(_nChannels, int); + IOFree(_enableCounts, (size_t)_nChannels * sizeof(int)); + } + + super::free(); +} + +/* +#define TESTALLOC() do { \ + void *tbuf; \ + tbuf = IOMalloc(10); \ + IOFree(tbuf, 10); \ + IORLOG("%s:%d - _reporterIsLocked = %d & allocation successful", \ + __PRETTY_FUNCTION__, __LINE__, _reporterIsLocked); \ +} while (0); +*/ +IOReturn +IOReporter::addChannel(uint64_t channelID, + const char *channelName /* = NULL */) +{ + IOReturn res = kIOReturnError, kerr; + const OSSymbol *symChannelName = NULL; + int oldNChannels, newNChannels = 0, freeNChannels = 0; + + IORLOG("IOReporter::addChannel %llx", channelID); + + // protect instance variables (but not contents) + lockReporterConfig(); + + // FIXME: Check if any channel is already present and return error + + // addChannel() always adds one channel + oldNChannels = _nChannels; + if (oldNChannels < 0 || oldNChannels > INT_MAX - 1) { + res = kIOReturnOverrun; + goto finish; + } + newNChannels = oldNChannels + 1; + freeNChannels = newNChannels; // until swap success + + // Expand addChannel()-specific data structure + if (_channelNames->ensureCapacity((unsigned)newNChannels) < + (unsigned)newNChannels) { + res = kIOReturnNoMemory; goto finish; + } + if (channelName) { + symChannelName = OSSymbol::withCString(channelName); + if (!symChannelName) { + res = kIOReturnNoMemory; goto finish; + } + } else { + // grab a reference to our shared global + symChannelName = gIOReportNoChannelName; + symChannelName->retain(); + } + + // allocate new buffers into _swap* variables + if ((kerr = handleSwapPrepare(newNChannels))) { + // on error, channels are *not* swapped + res = kerr; goto finish; + } + + // exchange main and _swap* buffers with buffer contents protected + // IOReporter::handleAddChannelSwap() also increments _nElements, etc + lockReporter(); + res = handleAddChannelSwap(channelID, symChannelName); + unlockReporter(); + // On failure, handleAddChannelSwap() leaves *new* buffers in _swap*. + // On success, it's the old buffers, so we put the right size in here. + if (res == kIOReturnSuccess) { + freeNChannels = oldNChannels; + } + +finish: + // free up not-in-use buffers (tracked by _swap*) + handleSwapCleanup(freeNChannels); + if (symChannelName) symChannelName->release(); + unlockReporterConfig(); + + return res; +} + + +IOReportLegendEntry* +IOReporter::createLegend(void) +{ + IOReportLegendEntry *legendEntry = NULL; + + lockReporterConfig(); + + legendEntry = handleCreateLegend(); + + unlockReporterConfig(); + + return legendEntry; +} + + +IOReturn +IOReporter::configureReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination) +{ + IOReturn res = kIOReturnError; + + lockReporterConfig(); + + res = handleConfigureReport(channelList, action, result, destination); + + unlockReporterConfig(); + + return res; + +} + + +IOReturn +IOReporter::updateReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination) +{ + IOReturn res = kIOReturnError; + + lockReporter(); + + res = handleUpdateReport(channelList, action, result, destination); + + unlockReporter(); + + return res; + +} + + +/*******************************/ +/*** PROTECTED METHODS ***/ +/*******************************/ + + +void +IOReporter::lockReporter() +{ + _interruptState = IOSimpleLockLockDisableInterrupt(_reporterLock); + _reporterIsLocked = true; +} + + +void +IOReporter::unlockReporter() +{ + _reporterIsLocked = false; + IOSimpleLockUnlockEnableInterrupt(_reporterLock, _interruptState); +} + +void +IOReporter::lockReporterConfig() +{ + IOLockLock(_configLock); + _reporterConfigIsLocked = true; +} + +void +IOReporter::unlockReporterConfig() +{ + _reporterConfigIsLocked = false; + IOLockUnlock(_configLock); +} + + +IOReturn +IOReporter::handleSwapPrepare(int newNChannels) +{ + IOReturn res = kIOReturnError; + int newNElements; + size_t newElementsSize, newECSize; + + // analyzer appeasement + newElementsSize = newECSize = 0; + + //IORLOG("IOReporter::handleSwapPrepare"); + + IOREPORTER_CHECK_CONFIG_LOCK(); + + if (newNChannels < _nChannels) { + panic("%s doesn't support shrinking", __func__); + } + if (newNChannels <= 0 || _channelDimension <= 0) { + res = kIOReturnUnderrun; + goto finish; + } + if (_swapElements || _swapEnableCounts) { + panic("IOReporter::_swap* already in use"); + } + + // calculate the number of elements given #ch & the dimension of each + if (newNChannels < 0 || newNChannels > INT_MAX / _channelDimension) { + res = kIOReturnOverrun; + goto finish; + } + newNElements = newNChannels * _channelDimension; + + // Allocate memory for the new array of report elements + PREFL_MEMOP_FAIL(newNElements, IOReportElement); + newElementsSize = (size_t)newNElements * sizeof(IOReportElement); + _swapElements = (IOReportElement *)IOMalloc(newElementsSize); + if (_swapElements == NULL) { + res = kIOReturnNoMemory; goto finish; + } + memset(_swapElements, 0, newElementsSize); + + // Allocate memory for the new array of channel watch counts + PREFL_MEMOP_FAIL(newNChannels, int); + newECSize = (size_t)newNChannels * sizeof(int); + _swapEnableCounts = (int *)IOMalloc(newECSize); + if (_swapEnableCounts == NULL){ + res = kIOReturnNoMemory; goto finish; + } + memset(_swapEnableCounts, 0, newECSize); + + // success + res = kIOReturnSuccess; + +finish: + if (res) { + if (_swapElements) { + IOFree(_swapElements, newElementsSize); + _swapElements = NULL; + } + if (_swapEnableCounts) { + IOFree(_swapEnableCounts, newECSize); + _swapEnableCounts = NULL; + } + } + + return res; +} + + +IOReturn +IOReporter::handleAddChannelSwap(uint64_t channel_id, + const OSSymbol *symChannelName) +{ + IOReturn res = kIOReturnError; + int cnt; + int *tmpWatchCounts = NULL; + IOReportElement *tmpElements = NULL; + bool swapComplete = false; + + //IORLOG("IOReporter::handleSwap"); + + IOREPORTER_CHECK_CONFIG_LOCK(); + IOREPORTER_CHECK_LOCK(); + + if (!_swapElements || !_swapEnableCounts) { + IORLOG("IOReporter::handleSwap ERROR swap variables uninitialized!"); + goto finish; + } + + // Copy any existing elements to the new location + //IORLOG("handleSwap (base) -> copying %u elements over...", _nChannels); + if (_elements) { + PREFL_MEMOP_PANIC(_nElements, IOReportElement); + memcpy(_swapElements, _elements, + (size_t)_nElements * sizeof(IOReportElement)); + + PREFL_MEMOP_PANIC(_nElements, int); + memcpy(_swapEnableCounts, _enableCounts, + (size_t)_nChannels * sizeof(int)); + } + + // Update principal instance variables, keep old buffers for cleanup + tmpElements = _elements; + _elements = _swapElements; + _swapElements = tmpElements; + + tmpWatchCounts = _enableCounts; + _enableCounts = _swapEnableCounts; + _swapEnableCounts = tmpWatchCounts; + + swapComplete = true; + + // but _nChannels & _nElements is still the old (one smaller) size + + // Initialize new element metadata (existing elements copied above) + for (cnt = 0; cnt < _channelDimension; cnt++) { + + _elements[_nElements + cnt].channel_id = channel_id; + _elements[_nElements + cnt].provider_id = _driver_id; + _elements[_nElements + cnt].channel_type = _channelType; + _elements[_nElements + cnt].channel_type.element_idx = cnt; + + //IOREPORTER_DEBUG_ELEMENT(_swapNElements + cnt); + } + + // Store a channel name at the end + if (!_channelNames->setObject((unsigned)_nChannels, symChannelName)) { + // Should never happen because we ensured capacity in addChannel() + res = kIOReturnNoMemory; + goto finish; + } + + // And update the metadata: addChannel() always adds just one channel + _nChannels += 1; + _nElements += _channelDimension; + + // success + res = kIOReturnSuccess; + +finish: + if (res && swapComplete) { + // unswap so new buffers get cleaned up instead of old + tmpElements = _elements; + _elements = _swapElements; + _swapElements = tmpElements; + + tmpWatchCounts = _enableCounts; + _enableCounts = _swapEnableCounts; + _swapEnableCounts = tmpWatchCounts; + } + return res; +} + +void +IOReporter::handleSwapCleanup(int swapNChannels) +{ + int swapNElements; + + if (!_channelDimension || swapNChannels > INT_MAX / _channelDimension) { + panic("%s - can't free %d channels of dimension %d", __func__, + swapNChannels, _channelDimension); + } + swapNElements = swapNChannels * _channelDimension; + + IOREPORTER_CHECK_CONFIG_LOCK(); + + // release buffers no longer used after swapping + if (_swapElements) { + PREFL_MEMOP_PANIC(swapNElements, IOReportElement); + IOFree(_swapElements, (size_t)swapNElements * sizeof(IOReportElement)); + _swapElements = NULL; + } + if (_swapEnableCounts) { + PREFL_MEMOP_PANIC(swapNChannels, int); + IOFree(_swapEnableCounts, (size_t)swapNChannels * sizeof(int)); + _swapEnableCounts = NULL; + } +} + + +// The reporter wants to know if its channels have observers. +// Eventually we'll add some sort of bool ::anyChannelsInUse() which +// clients can use to cull unused reporters after configureReport(disable). +IOReturn +IOReporter::handleConfigureReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination) +{ + IOReturn res = kIOReturnError; + int channel_index = 0; + uint32_t chIdx; + int *nElements, *nChannels; + + // Check on channelList and result because used below + if (!channelList || !result) goto finish; + + //IORLOG("IOReporter::configureReport action %u for %u channels", + // action, channelList->nchannels); + + // Make sure channel is present, increase matching watch count, 'result' + for (chIdx = 0; chIdx < channelList->nchannels; chIdx++) { + + if (getChannelIndex(channelList->channels[chIdx].channel_id, + &channel_index) == kIOReturnSuccess) { + // IORLOG("reporter %p recognizes channel %lld", this, channelList->channels[chIdx].channel_id); + + switch (action) { + + case kIOReportEnable: + nChannels = (int*)result; + _enabled++; + _enableCounts[channel_index]++; + (*nChannels)++; + break; + + case kIOReportDisable: + nChannels = (int*)result; + _enabled--; + _enableCounts[channel_index]--; + (*nChannels)++; + break; + + case kIOReportGetDimensions: + nElements = (int *)result; + *nElements += _channelDimension; + break; + + default: + IORLOG("ERROR configureReport unknown action!"); + break; + } + } + } + + // success + res = kIOReturnSuccess; + +finish: + return res; +} + + +IOReturn +IOReporter::handleUpdateReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination) +{ + IOReturn res = kIOReturnError; + int *nElements = (int *)result; + int channel_index = 0; + uint32_t chIdx; + IOBufferMemoryDescriptor *dest; + + if (!channelList || !result || !destination) goto finish; + + dest = OSDynamicCast(IOBufferMemoryDescriptor, (OSObject *)destination); + if (dest == NULL) { + // Invalid destination + res = kIOReturnBadArgument; + goto finish; + } + + if (!_enabled) { + goto finish; + } + + for (chIdx = 0; chIdx < channelList->nchannels; chIdx++) { + + if (getChannelIndex(channelList->channels[chIdx].channel_id, + &channel_index) == kIOReturnSuccess) { + + //IORLOG("%s - found channel_id %llx @ index %d", __func__, + // channelList->channels[chIdx].channel_id, + // channel_index); + + switch(action) { + + case kIOReportCopyChannelData: + res = updateChannelValues(channel_index); + if (res) { + IORLOG("ERROR: updateChannelValues() failed: %x", res); + goto finish; + } + + res = updateReportChannel(channel_index, nElements, dest); + if (res) { + IORLOG("ERROR: updateReportChannel() failed: %x", res); + goto finish; + } + break; + + default: + IORLOG("ERROR updateReport unknown action!"); + res = kIOReturnError; + goto finish; + } + } + } + + // success + res = kIOReturnSuccess; + +finish: + return res; +} + + +IOReportLegendEntry* +IOReporter::handleCreateLegend(void) +{ + IOReportLegendEntry *legendEntry = NULL; + OSArray *channelIDs; + + channelIDs = copyChannelIDs(); + + if (channelIDs) { + legendEntry = IOReporter::legendWith(channelIDs, _channelNames, _channelType, _unit); + channelIDs->release(); + } + + return legendEntry; +} + + +IOReturn +IOReporter::setElementValues(int element_index, + IOReportElementValues *values, + uint64_t record_time /* = 0 */) +{ + IOReturn res = kIOReturnError; + + IOREPORTER_CHECK_LOCK(); + + if (record_time == 0) { + record_time = mach_absolute_time(); + } + + if (element_index >= _nElements || values == NULL) { + res = kIOReturnBadArgument; + goto finish; + } + + memcpy(&_elements[element_index].values, values, sizeof(IOReportElementValues)); + + _elements[element_index].timestamp = record_time; + + //IOREPORTER_DEBUG_ELEMENT(index); + + res = kIOReturnSuccess; + +finish: + return res; +} + + +const IOReportElementValues* +IOReporter::getElementValues(int element_index) +{ + IOReportElementValues *elementValues = NULL; + + IOREPORTER_CHECK_LOCK(); + + if (element_index < 0 || element_index >= _nElements) { + IORLOG("ERROR getElementValues out of bounds!"); + goto finish; + } + + elementValues = &_elements[element_index].values; + +finish: + return elementValues; +} + + +IOReturn +IOReporter::updateChannelValues(int channel_index) +{ + return kIOReturnSuccess; +} + + +IOReturn +IOReporter::updateReportChannel(int channel_index, + int *nElements, + IOBufferMemoryDescriptor *destination) +{ + IOReturn res = kIOReturnError; + int start_element_idx, chElems; + size_t size2cpy; + + res = kIOReturnBadArgument; + if (!nElements || !destination) { + goto finish; + } + if (channel_index > _nChannels) { + goto finish; + } + + IOREPORTER_CHECK_LOCK(); + + res = kIOReturnOverrun; + + start_element_idx = channel_index * _channelDimension; + if (start_element_idx >= _nElements) goto finish; + + chElems = _elements[start_element_idx].channel_type.nelements; + + // make sure we don't go beyond the end of _elements[_nElements-1] + if (start_element_idx + chElems > _nElements) { + goto finish; + } + + PREFL_MEMOP_FAIL(chElems, IOReportElement); + size2cpy = (size_t)chElems * sizeof(IOReportElement); + + // make sure there's space in the destination + if (size2cpy > (destination->getCapacity() - destination->getLength())) { + IORLOG("CRITICAL ERROR: Report Buffer Overflow (buffer cap %luB, length %luB, size2cpy %luB", + (unsigned long)destination->getCapacity(), + (unsigned long)destination->getLength(), + (unsigned long)size2cpy); + goto finish; + } + + destination->appendBytes(&_elements[start_element_idx], size2cpy); + *nElements += chElems; + + res = kIOReturnSuccess; + +finish: + return res; +} + + +IOReturn +IOReporter::copyElementValues(int element_index, + IOReportElementValues *elementValues) +{ + IOReturn res = kIOReturnError; + + if (!elementValues) goto finish; + + IOREPORTER_CHECK_LOCK(); + + if (element_index >= _nElements) { + IORLOG("ERROR getElementValues out of bounds!"); + res = kIOReturnBadArgument; + goto finish; + } + + memcpy(elementValues, &_elements[element_index].values, sizeof(IOReportElementValues)); + res = kIOReturnSuccess; + +finish: + return res; +} + + +IOReturn +IOReporter::getFirstElementIndex(uint64_t channel_id, + int *index) +{ + IOReturn res = kIOReturnError; + int channel_index = 0, element_index = 0; + + if (!index) goto finish; + + res = getChannelIndices(channel_id, &channel_index, &element_index); + + if (res == kIOReturnSuccess) { + *index = element_index; + } + +finish: + return res; +} + + +IOReturn +IOReporter::getChannelIndex(uint64_t channel_id, + int *index) +{ + IOReturn res = kIOReturnError; + int channel_index = 0, element_index = 0; + + if (!index) goto finish; + + res = getChannelIndices(channel_id, &channel_index, &element_index); + + if (res == kIOReturnSuccess) { + *index = channel_index; + } + +finish: + return res; +} + + +IOReturn +IOReporter::getChannelIndices(uint64_t channel_id, + int *channel_index, + int *element_index) +{ + IOReturn res = kIOReturnNotFound; + int chIdx, elemIdx; + + if (!channel_index || !element_index) goto finish; + + for (chIdx = 0; chIdx < _nChannels; chIdx++) { + + elemIdx = chIdx * _channelDimension; + if (elemIdx >= _nElements) { + IORLOG("ERROR getChannelIndices out of bounds!"); + res = kIOReturnOverrun; + goto finish; + } + + if (channel_id == _elements[elemIdx].channel_id) { + + // The channel index does not care about the depth of elements... + *channel_index = chIdx; + *element_index = elemIdx; + + res = kIOReturnSuccess; + goto finish; + } + } + +finish: + return res; +} + +/********************************/ +/*** PRIVATE METHODS ***/ +/********************************/ + + +// copyChannelIDs relies on the caller to take lock +OSArray* +IOReporter::copyChannelIDs() +{ + int cnt, cnt2; + OSArray *channelIDs = NULL; + OSNumber *tmpNum; + + channelIDs = OSArray::withCapacity((unsigned)_nChannels); + + if (!channelIDs) goto finish; + + for (cnt = 0; cnt < _nChannels; cnt++) { + + cnt2 = cnt * _channelDimension; + + // Encapsulate the Channel ID in OSNumber + tmpNum = OSNumber::withNumber(_elements[cnt2].channel_id, 64); + if (!tmpNum) { + IORLOG("ERROR: Could not create array of channelIDs"); + channelIDs->release(); + channelIDs = NULL; + goto finish; + } + + channelIDs->setObject((unsigned)cnt, tmpNum); + tmpNum->release(); + } + +finish: + return channelIDs; +} + + +// DO NOT REMOVE THIS METHOD WHICH IS THE MAIN LEGEND CREATION FUNCTION +/*static */ IOReportLegendEntry* +IOReporter::legendWith(OSArray *channelIDs, + OSArray *channelNames, + IOReportChannelType channelType, + IOReportUnits unit) +{ + unsigned int cnt, chCnt; + uint64_t type64; + OSNumber *tmpNum; + const OSSymbol *tmpSymbol; + OSArray *channelLegendArray = NULL, *tmpChannelArray = NULL; + OSDictionary *channelInfoDict = NULL; + IOReportLegendEntry *legendEntry = NULL; + + // No need to check validity of channelNames because param is optional + if (!channelIDs) goto finish; + chCnt = channelIDs->getCount(); + + channelLegendArray = OSArray::withCapacity(chCnt); + + for (cnt = 0; cnt < chCnt; cnt++) { + + tmpChannelArray = OSArray::withCapacity(3); + + // Encapsulate the Channel ID in OSNumber + tmpChannelArray->setObject(kIOReportChannelIDIdx, channelIDs->getObject(cnt)); + + // Encapsulate the Channel Type in OSNumber + memcpy(&type64, &channelType, sizeof(type64)); + tmpNum = OSNumber::withNumber(type64, 64); + if (!tmpNum) { + goto finish; + } + tmpChannelArray->setObject(kIOReportChannelTypeIdx, tmpNum); + tmpNum->release(); + + // Encapsulate the Channel Name in OSSymbol + // Use channelNames if provided + if (channelNames != NULL) { + tmpSymbol = OSDynamicCast(OSSymbol, channelNames->getObject(cnt)); + if (tmpSymbol && tmpSymbol != gIOReportNoChannelName) { + tmpChannelArray->setObject(kIOReportChannelNameIdx, tmpSymbol); + } // Else, skip and leave name field empty + } + + channelLegendArray->setObject(cnt, tmpChannelArray); + tmpChannelArray->release(); + tmpChannelArray = NULL; + } + + // Stuff the legend entry only if we have channels... + if (channelLegendArray->getCount() != 0) { + + channelInfoDict = OSDictionary::withCapacity(1); + + if (!channelInfoDict) { + goto finish; + } + + tmpNum = OSNumber::withNumber(unit, 64); + if (tmpNum) { + channelInfoDict->setObject(kIOReportLegendUnitKey, tmpNum); + tmpNum->release(); + } + + legendEntry = OSDictionary::withCapacity(1); + + if (legendEntry) { + legendEntry->setObject(kIOReportLegendChannelsKey, channelLegendArray); + legendEntry->setObject(kIOReportLegendInfoKey, channelInfoDict); + } + } + +finish: + if (tmpChannelArray) tmpChannelArray->release(); + if (channelInfoDict) channelInfoDict->release(); + if (channelLegendArray) channelLegendArray->release(); + + return legendEntry; +} diff --git a/iokit/Kernel/IOReporterDefs.h b/iokit/Kernel/IOReporterDefs.h new file mode 100644 index 000000000..1a79a2de6 --- /dev/null +++ b/iokit/Kernel/IOReporterDefs.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2012-2013 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _IOEPORTERDEFS_H +#define _IOEPORTERDEFS_H + +//#include "IOReportHubCommon.h" + +//#define IORDEBUG_IOLOG + +#if defined(IORDEBUG_IOLOG) +#define IORLOG(fmt, args...) \ +do { \ + IOLog((fmt), ##args); \ + IOLog("\n"); \ +} while(0) + +#else +#define IORLOG(fmt, args...) +#endif + +#define IORERROR_LOG + +#ifdef IORERROR_LOG +#define IORERROR(fmt, args...) IOLog(fmt, ##args); +#else +#define IORERROR(fmt, args...) +#endif + +// overflow detection routines +#if (SIZE_T_MAX < INT_MAX) +#error "(SIZE_T_MAX < INT_MAX) -> PREFL_MEMOP_*()) unsafe for size_t" +#endif + +#define PREFL_MEMOP_FAIL(__val, __type) do { \ + if (__val <= 0) { \ + IORERROR("%s - %s <= 0!", __func__, #__val); \ + res = kIOReturnUnderrun; \ + goto finish; \ + } else if (__val > INT_MAX / (int)sizeof(__type)) { \ + IORERROR("%s - %s > INT_MAX / sizeof(%s)!", __func__, #__val, #__type);\ + res = kIOReturnOverrun; \ + goto finish; \ + } \ +} while(0) + +#define PREFL_MEMOP_PANIC(__val, __type) do { \ + if (__val <= 0) { \ + panic("%s - %s <= 0!", __func__, #__val); \ + } else if (__val > INT_MAX / (int)sizeof(__type)) { \ + panic("%s - %s > INT_MAX / sizeof(%s)!", __func__, #__val, #__type); \ + } \ +} while(0) + +//#include "IOReportHubCommon.h"// + + + +#define IOREPORTER_DEBUG_ELEMENT(idx) \ +do { \ +IOLog("IOReporter::DrvID: %llx | Elt:[%3d] |ID: %llx |Ticks: %llu |", \ +_elements[idx].provider_id, \ +idx, \ +_elements[idx].channel_id, \ +_elements[idx].timestamp); \ +IOLog("0: %llu | 1: %llu | 2: %llu | 3: %llu\n", \ +_elements[idx].values.v[0], \ +_elements[idx].values.v[1], \ +_elements[idx].values.v[2], \ +_elements[idx].values.v[3]); \ +} while(0) + + +#define IOREPORTER_CHECK_LOCK() \ +do { \ + if (!_reporterIsLocked) { \ + panic("%s was called out of locked context!", __PRETTY_FUNCTION__); \ + } \ +} while(0) \ + +#define IOREPORTER_CHECK_CONFIG_LOCK() \ +do { \ + if (!_reporterConfigIsLocked) { \ + panic("%s was called out of config locked context!", __PRETTY_FUNCTION__); \ + } \ +} while(0) \ + +#endif /* ! _IOEPORTERDEFS_H */ + diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp index d4ad9ce51..a23e683ec 100644 --- a/iokit/Kernel/IOService.cpp +++ b/iokit/Kernel/IOService.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2012 Apple Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -48,6 +48,8 @@ #include #include #include +#include +#include #include #include #include @@ -175,6 +177,7 @@ const OSSymbol * gIOPlatformSleepActionKey; const OSSymbol * gIOPlatformWakeActionKey; const OSSymbol * gIOPlatformQuiesceActionKey; const OSSymbol * gIOPlatformActiveActionKey; +const OSSymbol * gIOPlatformHaltRestartActionKey; const OSSymbol * gIOPlatformFunctionHandlerSet; @@ -217,6 +220,11 @@ static thread_call_t gIOConsoleLockCallout; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +struct IOInterruptAccountingReporter { + IOSimpleReporter * reporter; /* Reporter responsible for communicating the statistics */ + IOInterruptAccountingData * statistics; /* The live statistics values, if any */ +}; + struct ArbitrationLockQueueElement { queue_chain_t link; IOThread thread; @@ -257,6 +265,8 @@ static OSData *sCpuDelayData = OSData::withCapacity(8 * sizeof(CpuDelay static IORecursiveLock *sCpuDelayLock = IORecursiveLockAlloc(); static OSArray *sCpuLatencyHandlers[kCpuNumDelayTypes]; const OSSymbol *sCPULatencyFunctionName[kCpuNumDelayTypes]; +static OSNumber * sCPULatencyHolder[kCpuNumDelayTypes]; +static OSNumber * sCPULatencySet[kCpuNumDelayTypes]; static void requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType); @@ -337,15 +347,23 @@ void IOService::initialize( void ) gIOConsoleUsersSeedValue = OSData::withBytesNoCopy(&gIOConsoleUsersSeed, sizeof(gIOConsoleUsersSeed)); - gIOPlatformSleepActionKey = OSSymbol::withCStringNoCopy(kIOPlatformSleepActionKey); - gIOPlatformWakeActionKey = OSSymbol::withCStringNoCopy(kIOPlatformWakeActionKey); - gIOPlatformQuiesceActionKey = OSSymbol::withCStringNoCopy(kIOPlatformQuiesceActionKey); - gIOPlatformActiveActionKey = OSSymbol::withCStringNoCopy(kIOPlatformActiveActionKey); + gIOPlatformSleepActionKey = OSSymbol::withCStringNoCopy(kIOPlatformSleepActionKey); + gIOPlatformWakeActionKey = OSSymbol::withCStringNoCopy(kIOPlatformWakeActionKey); + gIOPlatformQuiesceActionKey = OSSymbol::withCStringNoCopy(kIOPlatformQuiesceActionKey); + gIOPlatformActiveActionKey = OSSymbol::withCStringNoCopy(kIOPlatformActiveActionKey); + gIOPlatformHaltRestartActionKey = OSSymbol::withCStringNoCopy(kIOPlatformHaltRestartActionKey); gIOPlatformFunctionHandlerSet = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerSet); #if defined(__i386__) || defined(__x86_64__) sCPULatencyFunctionName[kCpuDelayBusStall] = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerMaxBusDelay); sCPULatencyFunctionName[kCpuDelayInterrupt] = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerMaxInterruptDelay); + uint32_t idx; + for (idx = 0; idx < kCpuNumDelayTypes; idx++) + { + sCPULatencySet[idx] = OSNumber::withNumber(-1U, 32); + sCPULatencyHolder[idx] = OSNumber::withNumber(0ULL, 64); + assert(sCPULatencySet[idx] && sCPULatencyHolder[idx]); + } #endif gNotificationLock = IORecursiveLockAlloc(); @@ -433,13 +451,109 @@ void IOService::stop( IOService * provider ) { } +bool IOService::init( OSDictionary * dictionary ) +{ + bool ret = false; + + ret = super::init(dictionary); + + if (!ret) + goto done; + + reserved = IONew(ExpansionData, 1); + + if (!reserved) { + ret = false; + goto done; + } + + bzero(reserved, sizeof(*reserved)); + + /* + * TODO: Improve on this. Previous efforts to more lazily allocate this + * lock based on the presence of specifiers ran into issues as some + * platforms set up the specifiers after IOService initialization. + * + * We may be able to get away with a global lock, as this should only be + * contended by IOReporting clients and driver start/stop (unless a + * driver wants to remove/add handlers in the course of normal operation, + * which should be unlikely). + */ + reserved->interruptStatisticsLock = IOLockAlloc(); + + if (!reserved->interruptStatisticsLock) { + ret = false; + goto done; + } + +done: + return ret; +} + +bool IOService::init( IORegistryEntry * from, + const IORegistryPlane * inPlane ) +{ + bool ret = false; + + ret = super::init(from, inPlane); + + if (!ret) + goto done; + + reserved = IONew(ExpansionData, 1); + + if (!reserved) { + ret = false; + goto done; + } + + bzero(reserved, sizeof(*reserved)); + + /* + * TODO: Improve on this. Previous efforts to more lazily allocate this + * lock based on the presence of specifiers ran into issues as some + * platforms set up the specifiers after IOService initialization. + * + * We may be able to get away with a global lock, as this should only be + * contended by IOReporting clients and driver start/stop (unless a + * driver wants to remove/add handlers in the course of normal operation, + * which should be unlikely). + */ + reserved->interruptStatisticsLock = IOLockAlloc(); + + if (!reserved->interruptStatisticsLock) { + ret = false; + goto done; + } + +done: + return ret; +} + void IOService::free( void ) { + int i = 0; requireMaxBusStall(0); requireMaxInterruptDelay(0); if( getPropertyTable()) unregisterAllInterest(); PMfree(); + + if (reserved) { + if (reserved->interruptStatisticsArray) { + for (i = 0; i < reserved->interruptStatisticsArrayCount; i++) { + if (reserved->interruptStatisticsArray[i].reporter) + reserved->interruptStatisticsArray[i].reporter->release(); + } + + IODelete(reserved->interruptStatisticsArray, IOInterruptAccountingReporter, reserved->interruptStatisticsArrayCount); + } + + if (reserved->interruptStatisticsLock) + IOLockFree(reserved->interruptStatisticsLock); + IODelete(reserved, ExpansionData, 1); + } + super::free(); } @@ -513,9 +627,13 @@ void IOService::detach( IOService * provider ) } // check for last client detach from a terminated service - if( provider->lockForArbitration( true )) { - if( adjParent) - provider->_adjustBusy( -1 ); + if( provider->lockForArbitration( true )) + { + if (kIOServiceStartState & __state[1]) + { + provider->scheduleTerminatePhase2(); + } + if( adjParent) provider->_adjustBusy( -1 ); if( (provider->__state[1] & kIOServiceTermPhase3State) && (0 == provider->getClient())) { provider->scheduleFinalize(); @@ -963,6 +1081,25 @@ void IOService::setPlatform( IOPlatformExpert * platform) { gIOPlatform = platform; gIOResources->attachToParent( gIOServiceRoot, gIOServicePlane ); + +#if defined(__i386__) || defined(__x86_64__) + + static const char * keys[kCpuNumDelayTypes] = { + kIOPlatformMaxBusDelay, kIOPlatformMaxInterruptDelay }; + const OSObject * objs[2]; + OSArray * array; + uint32_t idx; + + for (idx = 0; idx < kCpuNumDelayTypes; idx++) + { + objs[0] = sCPULatencySet[idx]; + objs[1] = sCPULatencyHolder[idx]; + array = OSArray::withObjects(objs, 2); + if (!array) break; + platform->setProperty(keys[idx], array); + array->release(); + } +#endif /* defined(__i386__) || defined(__x86_64__) */ } void IOService::setPMRootDomain( class IOPMrootDomain * rootDomain) @@ -1502,56 +1639,76 @@ IONotifier * IOService::registerInterest( const OSSymbol * typeOfInterest, IOServiceInterestHandler handler, void * target, void * ref ) { _IOServiceInterestNotifier * notify = 0; + IOReturn rc = kIOReturnError; + + notify = new _IOServiceInterestNotifier; + if (!notify) return NULL; + + if(notify->init()) { + rc = registerInterestForNotifer(notify, typeOfInterest, + handler, target, ref); + } + + if (rc != kIOReturnSuccess) { + notify->release(); + notify = 0; + } + + return( notify ); +} + +IOReturn IOService::registerInterestForNotifer( IONotifier *svcNotify, const OSSymbol * typeOfInterest, + IOServiceInterestHandler handler, void * target, void * ref ) +{ + IOReturn rc = kIOReturnSuccess; + _IOServiceInterestNotifier *notify = 0; if( (typeOfInterest != gIOGeneralInterest) && (typeOfInterest != gIOBusyInterest) && (typeOfInterest != gIOAppPowerStateInterest) && (typeOfInterest != gIOConsoleSecurityInterest) && (typeOfInterest != gIOPriorityPowerStateInterest)) - return( 0 ); + return( kIOReturnBadArgument ); + + if (!svcNotify || !(notify = OSDynamicCast(_IOServiceInterestNotifier, svcNotify))) + return( kIOReturnBadArgument ); lockForArbitration(); if( 0 == (__state[0] & kIOServiceInactiveState)) { - notify = new _IOServiceInterestNotifier; - if( notify && !notify->init()) { - notify->release(); - notify = 0; - } - - if( notify) { - notify->handler = handler; - notify->target = target; - notify->ref = ref; - notify->state = kIOServiceNotifyEnable; - queue_init( ¬ify->handlerInvocations ); - - ////// queue + notify->handler = handler; + notify->target = target; + notify->ref = ref; + notify->state = kIOServiceNotifyEnable; - LOCKWRITENOTIFY(); + ////// queue - // Get the head of the notifier linked list - IOCommand *notifyList = (IOCommand *) getProperty( typeOfInterest ); - if (!notifyList || !OSDynamicCast(IOCommand, notifyList)) { - notifyList = OSTypeAlloc(IOCommand); - if (notifyList) { - notifyList->init(); - setProperty( typeOfInterest, notifyList); - notifyList->release(); - } - } + LOCKWRITENOTIFY(); - if (notifyList) { - enqueue(¬ifyList->fCommandChain, ¬ify->chain); - notify->retain(); // ref'ed while in list - } + // Get the head of the notifier linked list + IOCommand *notifyList = (IOCommand *) getProperty( typeOfInterest ); + if (!notifyList || !OSDynamicCast(IOCommand, notifyList)) { + notifyList = OSTypeAlloc(IOCommand); + if (notifyList) { + notifyList->init(); + setProperty( typeOfInterest, notifyList); + notifyList->release(); + } + } - UNLOCKNOTIFY(); + if (notifyList) { + enqueue(¬ifyList->fCommandChain, ¬ify->chain); + notify->retain(); // ref'ed while in list } + + UNLOCKNOTIFY(); + } + else { + rc = kIOReturnNotReady; } unlockForArbitration(); - return( notify ); + return rc; } static void cleanInterestList( OSObject * head ) @@ -1661,6 +1818,11 @@ void _IOServiceInterestNotifier::enable( bool was ) UNLOCKNOTIFY(); } +bool _IOServiceInterestNotifier::init() +{ + queue_init( &handlerInvocations ); + return (OSObject::init()); +} /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* @@ -1715,7 +1877,7 @@ bool IOService::terminatePhase1( IOOptionBits options ) bool didInactive; bool startPhase2 = false; - TLOG("%s::terminatePhase1(%08llx)\n", getName(), (long long)options); + TLOG("%s[0x%qx]::terminatePhase1(%08llx)\n", getName(), getRegistryEntryID(), (long long)options); uint64_t regID = getRegistryEntryID(); IOServiceTrace( @@ -1728,8 +1890,10 @@ bool IOService::terminatePhase1( IOOptionBits options ) // -- compat if( options & kIOServiceRecursing) { lockForArbitration(); + __state[0] |= kIOServiceInactiveState; __state[1] |= kIOServiceRecursing; unlockForArbitration(); + return( true ); } // -- @@ -1743,38 +1907,48 @@ bool IOService::terminatePhase1( IOOptionBits options ) while( victim ) { - didInactive = victim->lockForArbitration( true ); + didInactive = victim->lockForArbitration( true ); if( didInactive) { - didInactive = (0 == (victim->__state[0] & kIOServiceInactiveState)); + didInactive = (0 == (victim->__state[0] & kIOServiceInactiveState)) + || (victim->__state[1] & kIOServiceRecursing); if( didInactive) { victim->__state[0] |= kIOServiceInactiveState; victim->__state[0] &= ~(kIOServiceRegisteredState | kIOServiceMatchedState | kIOServiceFirstPublishState | kIOServiceFirstMatchState); + victim->__state[1] &= ~kIOServiceRecursing; - if (victim == this) - victim->__state[1] |= kIOServiceTermPhase1State; + if (victim == this) + { + victim->__state[1] |= kIOServiceTermPhase1State; + if (kIOServiceTerminateNeedWillTerminate & options) + { + victim->__state[1] |= kIOServiceNeedWillTerminate; + } + } victim->_adjustBusy( 1 ); } else if (victim != this) do { - IOLockLock(gIOServiceBusyLock); - wait = (victim->__state[1] & kIOServiceTermPhase1State); - if( wait) { - TLOG("%s::waitPhase1(%s)\n", getName(), victim->getName()); - victim->__state[1] |= kIOServiceTerm1WaiterState; - victim->unlockForArbitration(); - assert_wait((event_t)&victim->__state[1], THREAD_UNINT); - } - IOLockUnlock(gIOServiceBusyLock); - if( wait) { - waitResult = thread_block(THREAD_CONTINUE_NULL); - TLOG("%s::did waitPhase1(%s)\n", getName(), victim->getName()); - victim->lockForArbitration(); - } - } while( wait && (waitResult != THREAD_TIMED_OUT)); - + IOLockLock(gIOServiceBusyLock); + wait = (victim->__state[1] & kIOServiceTermPhase1State); + if( wait) { + TLOG("%s[0x%qx]::waitPhase1(%s[0x%qx])\n", + getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID()); + victim->__state[1] |= kIOServiceTerm1WaiterState; victim->unlockForArbitration(); + assert_wait((event_t)&victim->__state[1], THREAD_UNINT); + } + IOLockUnlock(gIOServiceBusyLock); + if( wait) { + waitResult = thread_block(THREAD_CONTINUE_NULL); + TLOG("%s[0x%qx]::did waitPhase1(%s[0x%qx])\n", + getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID()); + victim->lockForArbitration(); + } + } while( wait && (waitResult != THREAD_TIMED_OUT)); + + victim->unlockForArbitration(); } if( victim == this) startPhase2 = didInactive; @@ -1786,11 +1960,13 @@ bool IOService::terminatePhase1( IOOptionBits options ) iter = victim->getClientIterator(); if( iter) { while( (client = (IOService *) iter->getNextObject())) { - TLOG("%s::requestTerminate(%s, %08llx)\n", - client->getName(), victim->getName(), (long long)options); + TLOG("%s[0x%qx]::requestTerminate(%s[0x%qx], %08llx)\n", + client->getName(), client->getRegistryEntryID(), + victim->getName(), victim->getRegistryEntryID(), (long long)options); ok = client->requestTerminate( victim, options ); - TLOG("%s::requestTerminate(%s, ok = %d)\n", - client->getName(), victim->getName(), ok); + TLOG("%s[0x%qx]::requestTerminate(%s[0x%qx], ok = %d)\n", + client->getName(), client->getRegistryEntryID(), + victim->getName(), victim->getRegistryEntryID(), ok); uint64_t regID1 = client->getRegistryEntryID(); uint64_t regID2 = victim->getRegistryEntryID(); @@ -1820,20 +1996,20 @@ bool IOService::terminatePhase1( IOOptionBits options ) if( startPhase2) { - lockForArbitration(); - __state[1] &= ~kIOServiceTermPhase1State; - if (kIOServiceTerm1WaiterState & __state[1]) - { - __state[1] &= ~kIOServiceTerm1WaiterState; - TLOG("%s::wakePhase1\n", getName()); - IOLockLock( gIOServiceBusyLock ); - thread_wakeup( (event_t) &__state[1]); - IOLockUnlock( gIOServiceBusyLock ); - } - unlockForArbitration(); - - scheduleTerminatePhase2( options ); + lockForArbitration(); + __state[1] &= ~kIOServiceTermPhase1State; + if (kIOServiceTerm1WaiterState & __state[1]) + { + __state[1] &= ~kIOServiceTerm1WaiterState; + TLOG("%s[0x%qx]::wakePhase1\n", getName(), getRegistryEntryID()); + IOLockLock( gIOServiceBusyLock ); + thread_wakeup( (event_t) &__state[1]); + IOLockUnlock( gIOServiceBusyLock ); + } + unlockForArbitration(); + scheduleTerminatePhase2( options ); } + return( true ); } @@ -1847,20 +2023,21 @@ void IOService::setTerminateDefer(IOService * provider, bool defer) if (provider && !defer) { provider->lockForArbitration(); - if (provider->__state[0] & kIOServiceInactiveState) - { - provider->scheduleTerminatePhase2(); - } + provider->scheduleTerminatePhase2(); provider->unlockForArbitration(); } } +// call with lockForArbitration void IOService::scheduleTerminatePhase2( IOOptionBits options ) { AbsoluteTime deadline; int waitResult = THREAD_AWAKENED; bool wait, haveDeadline = false; + if (!(__state[0] & kIOServiceInactiveState) + || (__state[1] & kIOServiceTermPhase1State)) return; + options |= kIOServiceRequired; retain(); @@ -1895,7 +2072,7 @@ void IOService::scheduleTerminatePhase2( IOOptionBits options ) waitResult = IOLockSleepDeadline( gJobsLock, &gIOTerminateWork, deadline, THREAD_UNINT ); if( waitResult == THREAD_TIMED_OUT) { - IOLog("%s::terminate(kIOServiceSynchronous) timeout\n", getName()); + IOLog("%s[0x%qx]::terminate(kIOServiceSynchronous) timeout\n", getName(), getRegistryEntryID()); } } } while(gIOTerminateWork || (wait && (waitResult != THREAD_TIMED_OUT))); @@ -1936,10 +2113,10 @@ void IOService::terminateThread( void * arg, wait_result_t waitResult ) void IOService::scheduleStop( IOService * provider ) { - TLOG("%s::scheduleStop(%s)\n", getName(), provider->getName()); - uint64_t regID1 = getRegistryEntryID(); uint64_t regID2 = provider->getRegistryEntryID(); + + TLOG("%s[0x%qx]::scheduleStop(%s[0x%qx])\n", getName(), regID1, provider->getName(), regID2); IOServiceTrace( IOSERVICE_TERMINATE_SCHEDULE_STOP, (uintptr_t) regID1, @@ -1963,9 +2140,9 @@ void IOService::scheduleStop( IOService * provider ) void IOService::scheduleFinalize( void ) { - TLOG("%s::scheduleFinalize\n", getName()); - uint64_t regID1 = getRegistryEntryID(); + + TLOG("%s[0x%qx]::scheduleFinalize\n", getName(), regID1); IOServiceTrace( IOSERVICE_TERMINATE_SCHEDULE_FINALIZE, (uintptr_t) regID1, @@ -2019,15 +2196,16 @@ void IOService::actionWillTerminate( IOService * victim, IOOptionBits options, OSIterator * iter; IOService * client; bool ok; + uint64_t regID1, regID2 = victim->getRegistryEntryID(); iter = victim->getClientIterator(); if( iter) { while( (client = (IOService *) iter->getNextObject())) { - TLOG("%s::willTerminate(%s, %08llx)\n", - client->getName(), victim->getName(), (long long)options); - uint64_t regID1 = client->getRegistryEntryID(); - uint64_t regID2 = victim->getRegistryEntryID(); + regID1 = client->getRegistryEntryID(); + TLOG("%s[0x%qx]::willTerminate(%s[0x%qx], %08llx)\n", + client->getName(), regID1, + victim->getName(), regID2, (long long)options); IOServiceTrace( IOSERVICE_TERMINATE_WILL, (uintptr_t) regID1, @@ -2049,18 +2227,20 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options, OSIterator * iter; IOService * client; bool defer = false; + uint64_t regID1, regID2 = victim->getRegistryEntryID(); victim->messageClients( kIOMessageServiceIsTerminated, (void *)(uintptr_t) options ); iter = victim->getClientIterator(); if( iter) { while( (client = (IOService *) iter->getNextObject())) { - TLOG("%s::didTerminate(%s, %08llx)\n", - client->getName(), victim->getName(), (long long)options); + + regID1 = client->getRegistryEntryID(); + TLOG("%s[0x%qx]::didTerminate(%s[0x%qx], %08llx)\n", + client->getName(), regID1, + victim->getName(), regID2, (long long)options); client->didTerminate( victim, options, &defer ); - uint64_t regID1 = client->getRegistryEntryID(); - uint64_t regID2 = victim->getRegistryEntryID(); IOServiceTrace( (defer ? IOSERVICE_TERMINATE_DID_DEFER : IOSERVICE_TERMINATE_DID), @@ -2069,20 +2249,87 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options, (uintptr_t) regID2, (uintptr_t) (regID2 >> 32)); - TLOG("%s::didTerminate(%s, defer %d)\n", - client->getName(), victim->getName(), defer); + TLOG("%s[0x%qx]::didTerminate(%s[0x%qx], defer %d)\n", + client->getName(), regID1, + victim->getName(), regID2, defer); } iter->release(); } } -void IOService::actionFinalize( IOService * victim, IOOptionBits options, + +void IOService::actionWillStop( IOService * victim, IOOptionBits options, void *unused1 __unused, void *unused2 __unused, void *unused3 __unused ) { - TLOG("%s::finalize(%08llx)\n", victim->getName(), (long long)options); + OSIterator * iter; + IOService * provider; + bool ok; + uint64_t regID1, regID2 = victim->getRegistryEntryID(); + + iter = victim->getProviderIterator(); + if( iter) { + while( (provider = (IOService *) iter->getNextObject())) { + + regID1 = provider->getRegistryEntryID(); + TLOG("%s[0x%qx]::willTerminate(%s[0x%qx], %08llx)\n", + victim->getName(), regID2, + provider->getName(), regID1, (long long)options); + IOServiceTrace( + IOSERVICE_TERMINATE_WILL, + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32), + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32)); + + ok = victim->willTerminate( provider, options ); + } + iter->release(); + } +} +void IOService::actionDidStop( IOService * victim, IOOptionBits options, + void *unused1 __unused, void *unused2 __unused, + void *unused3 __unused ) +{ + OSIterator * iter; + IOService * provider; + bool defer = false; + uint64_t regID1, regID2 = victim->getRegistryEntryID(); + + iter = victim->getProviderIterator(); + if( iter) { + while( (provider = (IOService *) iter->getNextObject())) { + + regID1 = provider->getRegistryEntryID(); + TLOG("%s[0x%qx]::didTerminate(%s[0x%qx], %08llx)\n", + victim->getName(), regID2, + provider->getName(), regID1, (long long)options); + victim->didTerminate( provider, options, &defer ); + + IOServiceTrace( + (defer ? IOSERVICE_TERMINATE_DID_DEFER + : IOSERVICE_TERMINATE_DID), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32), + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32)); + + TLOG("%s[0x%qx]::didTerminate(%s[0x%qx], defer %d)\n", + victim->getName(), regID2, + provider->getName(), regID1, defer); + } + iter->release(); + } +} + + +void IOService::actionFinalize( IOService * victim, IOOptionBits options, + void *unused1 __unused, void *unused2 __unused, + void *unused3 __unused ) +{ uint64_t regID1 = victim->getRegistryEntryID(); + TLOG("%s[0x%qx]::finalize(%08llx)\n", victim->getName(), regID1, (long long)options); IOServiceTrace( IOSERVICE_TERMINATE_FINALIZE, (uintptr_t) regID1, @@ -2096,10 +2343,10 @@ void IOService::actionStop( IOService * provider, IOService * client, void *unused1 __unused, void *unused2 __unused, void *unused3 __unused ) { - TLOG("%s::stop(%s)\n", client->getName(), provider->getName()); - uint64_t regID1 = provider->getRegistryEntryID(); uint64_t regID2 = client->getRegistryEntryID(); + + TLOG("%s[0x%qx]::stop(%s[0x%qx])\n", client->getName(), regID2, provider->getName(), regID1); IOServiceTrace( IOSERVICE_TERMINATE_STOP, (uintptr_t) regID1, @@ -2110,7 +2357,8 @@ void IOService::actionStop( IOService * provider, IOService * client, client->stop( provider ); if( provider->isOpen( client )) provider->close( client ); - TLOG("%s::detach(%s)\n", client->getName(), provider->getName()); + + TLOG("%s[0x%qx]::detach(%s[0x%qx])\n", client->getName(), regID2, provider->getName(), regID1); client->detach( provider ); } @@ -2158,6 +2406,10 @@ void IOService::terminateWorker( IOOptionBits options ) if (doPhase2 && (iter = victim->getClientIterator())) { while (doPhase2 && (client = (IOService *) iter->getNextObject())) { doPhase2 = (0 == (client->__state[1] & kIOServiceStartState)); + + if (!doPhase2) TLOG("%s[0x%qx]::defer phase2(%s[0x%qx])\n", + victim->getName(), victim->getRegistryEntryID(), + client->getName(), client->getRegistryEntryID()); } iter->release(); } @@ -2167,6 +2419,12 @@ void IOService::terminateWorker( IOOptionBits options ) victim->unlockForArbitration(); } if( doPhase2) { + + if (kIOServiceNeedWillTerminate & victim->__state[1]) { + _workLoopAction( (IOWorkLoop::Action) &actionWillStop, + victim, (void *)(uintptr_t) options, NULL ); + } + if( 0 == victim->getClient()) { // no clients - will go to finalize IOLockLock( gJobsLock ); @@ -2194,6 +2452,10 @@ void IOService::terminateWorker( IOOptionBits options ) } _workLoopAction( (IOWorkLoop::Action) &actionDidTerminate, victim, (void *)(uintptr_t) options ); + if (kIOServiceNeedWillTerminate & victim->__state[1]) { + _workLoopAction( (IOWorkLoop::Action) &actionDidStop, + victim, (void *)(uintptr_t) options, NULL ); + } didPhase2List->removeObject(0); } IOLockLock( gJobsLock ); @@ -2220,13 +2482,13 @@ void IOService::terminateWorker( IOOptionBits options ) provider = (IOService *) gIOStopProviderList->getObject(idx); assert( provider ); + + uint64_t regID1 = provider->getRegistryEntryID(); + uint64_t regID2 = client->getRegistryEntryID(); if( !provider->isChild( client, gIOServicePlane )) { // may be multiply queued - nop it - TLOG("%s::nop stop(%s)\n", client->getName(), provider->getName()); - - uint64_t regID1 = provider->getRegistryEntryID(); - uint64_t regID2 = client->getRegistryEntryID(); + TLOG("%s[0x%qx]::nop stop(%s[0x%qx])\n", client->getName(), regID2, provider->getName(), regID1); IOServiceTrace( IOSERVICE_TERMINATE_STOP_NOP, (uintptr_t) regID1, @@ -2237,10 +2499,9 @@ void IOService::terminateWorker( IOOptionBits options ) } else { // a terminated client is not ready for stop if it has clients, skip it if( (kIOServiceInactiveState & client->__state[0]) && client->getClient()) { - TLOG("%s::defer stop(%s)\n", client->getName(), provider->getName()); - - uint64_t regID1 = provider->getRegistryEntryID(); - uint64_t regID2 = client->getRegistryEntryID(); + TLOG("%s[0x%qx]::defer stop(%s[0x%qx])\n", + client->getName(), regID2, + client->getClient()->getName(), client->getClient()->getRegistryEntryID()); IOServiceTrace( IOSERVICE_TERMINATE_STOP_DEFER, (uintptr_t) regID1, @@ -2294,8 +2555,9 @@ void IOService::terminateWorker( IOOptionBits options ) bool IOService::finalize( IOOptionBits options ) { - OSIterator * iter; - IOService * provider; + OSIterator * iter; + IOService * provider; + uint64_t regID1, regID2 = getRegistryEntryID(); iter = getProviderIterator(); assert( iter ); @@ -2306,6 +2568,16 @@ bool IOService::finalize( IOOptionBits options ) // -- compat if( 0 == (__state[1] & kIOServiceTermPhase3State)) { /* we come down here on programmatic terminate */ + + regID1 = provider->getRegistryEntryID(); + TLOG("%s[0x%qx]::stop1(%s[0x%qx])\n", getName(), regID2, provider->getName(), regID1); + IOServiceTrace( + IOSERVICE_TERMINATE_STOP, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32)); + stop( provider ); if( provider->isOpen( this )) provider->close( this ); @@ -3196,8 +3468,7 @@ void IOService::doServiceMatch( IOOptionBits options ) } __state[1] &= ~kIOServiceConfigState; - if( __state[0] & kIOServiceInactiveState) - scheduleTerminatePhase2(); + scheduleTerminatePhase2(); _adjustBusy( -1 ); unlockForArbitration(); @@ -5303,6 +5574,8 @@ requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType) { ml_set_maxintdelay(ns); } + sCPULatencyHolder[delayType]->setValue(holder ? holder->getRegistryEntryID() : 0); + sCPULatencySet [delayType]->setValue(ns); OSArray * handlers = sCpuLatencyHandlers[delayType]; IOService * target; @@ -5504,6 +5777,209 @@ IOReturn IOService::unregisterInterrupt(int source) return interruptController->unregisterInterrupt(this, source); } +IOReturn IOService::addInterruptStatistics(IOInterruptAccountingData * statistics, int source) +{ + IOReportLegend * legend = NULL; + IOInterruptAccountingData * oldValue = NULL; + IOInterruptAccountingReporter * newArray = NULL; + int newArraySize = 0; + int i = 0; + + if (source < 0) { + return kIOReturnBadArgument; + } + + /* + * We support statistics on a maximum of 256 interrupts per nub; if a nub + * has more than 256 interrupt specifiers associated with it, and tries + * to register a high interrupt index with interrupt accounting, panic. + * Having more than 256 interrupts associated with a single nub is + * probably a sign that something fishy is going on. + */ + if (source > IA_INDEX_MAX) { + panic("addInterruptStatistics called for an excessively large index (%d)", source); + } + + /* + * TODO: This is ugly (wrapping a lock around an allocation). I'm only + * leaving it as is because the likelihood of contention where we are + * actually growing the array is minimal (we would realistically need + * to be starting a driver for the first time, with an IOReporting + * client already in place). Nonetheless, cleanup that can be done + * to adhere to best practices; it'll make the code more complicated, + * unfortunately. + */ + IOLockLock(reserved->interruptStatisticsLock); + + /* + * Lazily allocate the statistics array. + */ + if (!reserved->interruptStatisticsArray) { + reserved->interruptStatisticsArray = IONew(IOInterruptAccountingReporter, 1); + assert(reserved->interruptStatisticsArray); + reserved->interruptStatisticsArrayCount = 1; + bzero(reserved->interruptStatisticsArray, sizeof(*reserved->interruptStatisticsArray)); + } + + if (source >= reserved->interruptStatisticsArrayCount) { + /* + * We're still within the range of supported indices, but we are out + * of space in the current array. Do a nasty realloc (because + * IORealloc isn't a thing) here. We'll double the size with each + * reallocation. + * + * Yes, the "next power of 2" could be more efficient; but this will + * be invoked incredibly rarely. Who cares. + */ + newArraySize = (reserved->interruptStatisticsArrayCount << 1); + + while (newArraySize <= source) + newArraySize = (newArraySize << 1); + newArray = IONew(IOInterruptAccountingReporter, newArraySize); + + assert(newArray); + + /* + * TODO: This even zeroes the memory it is about to overwrite. + * Shameful; fix it. Not particularly high impact, however. + */ + bzero(newArray, newArraySize * sizeof(*newArray)); + memcpy(newArray, reserved->interruptStatisticsArray, reserved->interruptStatisticsArrayCount * sizeof(*newArray)); + IODelete(reserved->interruptStatisticsArray, IOInterruptAccountingReporter, reserved->interruptStatisticsArrayCount); + reserved->interruptStatisticsArray = newArray; + reserved->interruptStatisticsArrayCount = newArraySize; + } + + if (!reserved->interruptStatisticsArray[source].reporter) { + /* + * We don't have a reporter associated with this index yet, so we + * need to create one. + */ + /* + * TODO: Some statistics do in fact have common units (time); should this be + * split into separate reporters to communicate this? + */ + reserved->interruptStatisticsArray[source].reporter = IOSimpleReporter::with(this, kIOReportCategoryInterrupt, kIOReportUnitNone); + + /* + * Each statistic is given an identifier based on the interrupt index (which + * should be unique relative to any single nub) and the statistic involved. + * We should now have a sane (small and positive) index, so start + * constructing the channels for statistics. + */ + for (i = 0; i < IA_NUM_INTERRUPT_ACCOUNTING_STATISTICS; i++) { + /* + * TODO: Currently, this does not add channels for disabled statistics. + * Will this be confusing for clients? If so, we should just add the + * channels; we can avoid updating the channels even if they exist. + */ + if (IA_GET_STATISTIC_ENABLED(i)) + reserved->interruptStatisticsArray[source].reporter->addChannel(IA_GET_CHANNEL_ID(source, i), kInterruptAccountingStatisticNameArray[i]); + } + + /* + * We now need to add the legend for this reporter to the registry. + */ + legend = IOReportLegend::with(OSDynamicCast(OSArray, getProperty(kIOReportLegendKey))); + + if ((source >= IA_MAX_SUBGROUP_NAME) || (source < 0)) { + /* + * Either we're using a nonsensical index (should never happen), or the + * index is larger than anticipated (may happen, almost certainly won't). + * This may move to live generation of the names in the future, but for + * now, point both cases to a generic subgroup name (this will confuse + * clients, unfortunately). + */ + legend->addReporterLegend(reserved->interruptStatisticsArray[source].reporter, kInterruptAccountingGroupName, kInterruptAccountingGenericSubgroupName); + } else { + legend->addReporterLegend(reserved->interruptStatisticsArray[source].reporter, kInterruptAccountingGroupName, kInterruptAccountingSubgroupNames[source]); + } + + setProperty(kIOReportLegendKey, legend->getLegend()); + legend->release(); + + /* + * TODO: Is this a good idea? Probably not; my assumption is it opts + * all entities who register interrupts into public disclosure of all + * IOReporting channels. Unfortunately, this appears to be as fine + * grain as it gets. + */ + setProperty(kIOReportLegendPublicKey, true); + } + + /* + * Don't stomp existing entries. If we are about to, panic; this + * probably means we failed to tear down our old interrupt source + * correctly. + */ + oldValue = reserved->interruptStatisticsArray[source].statistics; + + if (oldValue) { + panic("addInterruptStatistics call for index %d would have clobbered existing statistics", source); + } + + reserved->interruptStatisticsArray[source].statistics = statistics; + + /* + * Inherit the reporter values for each statistic. The target may + * be torn down as part of the runtime of the service (especially + * for sleep/wake), so we inherit in order to avoid having values + * reset for no apparent reason. Our statistics are ultimately + * tied to the index and the sevice, not to an individual target, + * so we should maintain them accordingly. + */ + interruptAccountingDataInheritChannels(reserved->interruptStatisticsArray[source].statistics, reserved->interruptStatisticsArray[source].reporter); + + IOLockUnlock(reserved->interruptStatisticsLock); + + return kIOReturnSuccess; +} + +IOReturn IOService::removeInterruptStatistics(int source) +{ + IOInterruptAccountingData * value = NULL; + + if (source < 0) { + return kIOReturnBadArgument; + } + + IOLockLock(reserved->interruptStatisticsLock); + + /* + * We dynamically grow the statistics array, so an excessively + * large index value has NEVER been registered. This either + * means our cap on the array size is too small (unlikely), or + * that we have been passed a corrupt index (this must be passed + * the plain index into the interrupt specifier list). + */ + if (source >= reserved->interruptStatisticsArrayCount) { + panic("removeInterruptStatistics called for index %d, which was never registered", source); + } + + assert(reserved->interruptStatisticsArray); + + /* + * If there is no existing entry, we are most likely trying to + * free an interrupt owner twice, or we have corrupted the + * index value. + */ + value = reserved->interruptStatisticsArray[source].statistics; + + if (!value) { + panic("removeInterruptStatistics called for empty index %d", source); + } + + /* + * We update the statistics, so that any delta with the reporter + * state is not lost. + */ + interruptAccountingDataUpdateChannels(reserved->interruptStatisticsArray[source].statistics, reserved->interruptStatisticsArray[source].reporter); + reserved->interruptStatisticsArray[source].statistics = NULL; + IOLockUnlock(reserved->interruptStatisticsLock); + + return kIOReturnSuccess; +} + IOReturn IOService::getInterruptType(int source, int *interruptType) { IOInterruptController *interruptController; @@ -5570,6 +6046,24 @@ IOReturn IOService::configureReport(IOReportChannelList *channelList, } } + IOLockLock(reserved->interruptStatisticsLock); + + /* The array count is signed (because the interrupt indices are signed), hence the cast */ + for (cnt = 0; cnt < (unsigned) reserved->interruptStatisticsArrayCount; cnt++) { + if (reserved->interruptStatisticsArray[cnt].reporter) { + /* + * If the reporter is currently associated with the statistics + * for an event source, we may need to update the reporter. + */ + if (reserved->interruptStatisticsArray[cnt].statistics) + interruptAccountingDataUpdateChannels(reserved->interruptStatisticsArray[cnt].statistics, reserved->interruptStatisticsArray[cnt].reporter); + + reserved->interruptStatisticsArray[cnt].reporter->configureReport(channelList, action, result, destination); + } + } + + IOLockUnlock(reserved->interruptStatisticsLock); + return kIOReturnSuccess; } @@ -5591,9 +6085,62 @@ IOReturn IOService::updateReport(IOReportChannelList *channelList, } } + IOLockLock(reserved->interruptStatisticsLock); + + /* The array count is signed (because the interrupt indices are signed), hence the cast */ + for (cnt = 0; cnt < (unsigned) reserved->interruptStatisticsArrayCount; cnt++) { + if (reserved->interruptStatisticsArray[cnt].reporter) { + /* + * If the reporter is currently associated with the statistics + * for an event source, we need to update the reporter. + */ + if (reserved->interruptStatisticsArray[cnt].statistics) + interruptAccountingDataUpdateChannels(reserved->interruptStatisticsArray[cnt].statistics, reserved->interruptStatisticsArray[cnt].reporter); + + reserved->interruptStatisticsArray[cnt].reporter->updateReport(channelList, action, result, destination); + } + } + + IOLockUnlock(reserved->interruptStatisticsLock); + return kIOReturnSuccess; } +uint64_t IOService::getAuthorizationID( void ) +{ + return reserved->authorizationID; +} + +IOReturn IOService::setAuthorizationID( uint64_t authorizationID ) +{ + OSObject * entitlement; + IOReturn status; + + entitlement = IOUserClient::copyClientEntitlement( current_task( ), "com.apple.private.iokit.IOServiceSetAuthorizationID" ); + + if ( entitlement ) + { + if ( entitlement == kOSBooleanTrue ) + { + reserved->authorizationID = authorizationID; + + status = kIOReturnSuccess; + } + else + { + status = kIOReturnNotPrivileged; + } + + entitlement->release( ); + } + else + { + status = kIOReturnNotPrivileged; + } + + return status; +} + #if __LP64__ OSMetaClassDefineReservedUsed(IOService, 0); OSMetaClassDefineReservedUsed(IOService, 1); diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index 9ba2e752c..7c363a269 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -26,9 +26,6 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -//#undef IOASSERT -//#define IOASSERT 1 - #include #include #include @@ -79,9 +76,6 @@ static uint64_t computeTimeDeltaNS( const AbsoluteTime * start ) OSDefineMetaClassAndStructors(IOPMprot, OSObject) #endif -// Container class for recording system power events -OSDefineMetaClassAndStructors( PMEventDetails, OSObject ); - //****************************************************************************** // Globals //****************************************************************************** @@ -112,7 +106,7 @@ static thread_t gIOPMWatchDogThread = NULL; static uint32_t getPMRequestType( void ) { uint32_t type = kIOPMRequestTypeInvalid; - if (gIOPMRequest) + if (gIOPMRequest) type = gIOPMRequest->getType(); return type; } @@ -132,10 +126,7 @@ static IOPMRequestTag getPMRequestTag( void ) // Macros //****************************************************************************** -#define OBFUSCATE(x) ((void *)(VM_KERNEL_ADDRPERM(x))) - #define PM_ERROR(x...) do { kprintf(x);IOLog(x); \ - IOService::getPMRootDomain()->sleepWakeDebugLog(x); \ } while (false) #define PM_LOG(x...) do { kprintf(x); } while (false) @@ -157,7 +148,6 @@ static IOPMRequestTag getPMRequestTag( void ) if ((kIOLogPMRootDomain & gIOKitDebug) && \ (getPMRootDomain() == this)) { \ kprintf("PMRD: " x); \ - getPMRootDomain()->sleepWakeDebugLog(x); \ }} while (false) #define PM_ASSERT_IN_GATE(x) \ do { \ @@ -199,7 +189,11 @@ do { \ #define IS_POWER_RISE (StateOrder(fHeadNotePowerState) > StateOrder(fCurrentPowerState)) // log setPowerStates longer than (ns): +#if defined(__i386__) || defined(__x86_64__) +#define LOG_SETPOWER_TIMES (300ULL * 1000ULL * 1000ULL) +#else #define LOG_SETPOWER_TIMES (50ULL * 1000ULL * 1000ULL) +#endif // log app responses longer than (ns): #define LOG_APP_RESPONSE_TIMES (100ULL * 1000ULL * 1000ULL) // use message tracer to log messages longer than (ns): @@ -236,12 +230,12 @@ enum { } while (false) static OSNumber * copyClientIDForNotification( - OSObject *object, + OSObject *object, IOPMInterestContext *context); static void logClientIDForNotification( OSObject *object, - IOPMInterestContext *context, + IOPMInterestContext *context, const char *logString); //********************************************************************************* @@ -295,9 +289,9 @@ enum { void IOService::PMinit( void ) { if ( !initialized ) - { - if ( !gIOPMInitialized ) - { + { + if ( !gIOPMInitialized ) + { gPlatform = getPlatform(); gIOPMWorkLoop = IOWorkLoop::workLoop(); if (gIOPMWorkLoop) @@ -430,20 +424,20 @@ void IOService::PMinit( void ) } fAckTimer = thread_call_allocate( - &IOService::ack_timer_expired, (thread_call_param_t)this); + &IOService::ack_timer_expired, (thread_call_param_t)this); fSettleTimer = thread_call_allocate( - &settle_timer_expired, (thread_call_param_t)this); + &settle_timer_expired, (thread_call_param_t)this); fIdleTimer = thread_call_allocate( &idle_timer_expired, (thread_call_param_t)this); fDriverCallEntry = thread_call_allocate( - (thread_call_func_t) &IOService::pmDriverCallout, this); + (thread_call_func_t) &IOService::pmDriverCallout, this); assert(fDriverCallEntry); // Check for powerChangeDone override. if (OSMemberFunctionCast(void (*)(void), - getResourceService(), &IOService::powerChangeDone) != - OSMemberFunctionCast(void (*)(void), - this, &IOService::powerChangeDone)) + getResourceService(), &IOService::powerChangeDone) != + OSMemberFunctionCast(void (*)(void), + this, &IOService::powerChangeDone)) { fPCDFunctionOverride = true; } @@ -457,7 +451,7 @@ void IOService::PMinit( void ) prot->thePlatform = gPlatform; fPMVars = prot; pm_vars = prot; - } + } #else pm_vars = (void *) (uintptr_t) true; #endif @@ -648,20 +642,20 @@ void IOService::PMstop( void ) void IOService::handlePMstop( IOPMRequest * request ) { OSIterator * iter; - OSObject * next; - IOPowerConnection * connection; - IOService * theChild; - IOService * theParent; + OSObject * next; + IOPowerConnection * connection; + IOService * theChild; + IOService * theParent; - PM_ASSERT_IN_GATE(); - PM_LOG2("%s: %p %s start\n", getName(), OBFUSCATE(this), __FUNCTION__); + PM_ASSERT_IN_GATE(); + PM_LOG2("%s: %p %s start\n", getName(), OBFUSCATE(this), __FUNCTION__); // remove driver from prevent system sleep lists getPMRootDomain()->updatePreventIdleSleepList(this, false); getPMRootDomain()->updatePreventSystemSleepList(this, false); // remove the property - removeProperty(kPwrMgtKey); + removeProperty(kPwrMgtKey); // detach parents iter = getParentIterator(gIOPowerPlane); @@ -684,7 +678,7 @@ void IOService::handlePMstop( IOPMRequest * request ) // detach IOConnections detachAbove( gIOPowerPlane ); - + // no more power state changes fParentsKnowState = false; @@ -720,8 +714,8 @@ void IOService::handlePMstop( IOPMRequest * request ) if ( fInterestedDrivers ) { - IOPMinformeeList * list = fInterestedDrivers; - IOPMinformee * item; + IOPMinformeeList * list = fInterestedDrivers; + IOPMinformee * item; PM_LOCK(); while ((item = list->firstInList())) @@ -749,104 +743,104 @@ void IOService::handlePMstop( IOPMRequest * request ) IOReturn IOService::addPowerChild( IOService * child ) { - IOPowerConnection * connection = 0; - IOPMRequest * requests[3] = {0, 0, 0}; - OSIterator * iter; - bool ok = true; + IOPowerConnection * connection = 0; + IOPMRequest * requests[3] = {0, 0, 0}; + OSIterator * iter; + bool ok = true; - if (!child) - return kIOReturnBadArgument; + if (!child) + return kIOReturnBadArgument; if (!initialized || !child->initialized) - return IOPMNotYetInitialized; + return IOPMNotYetInitialized; OUR_PMLog( kPMLogAddChild, (uintptr_t) child, 0 ); - do { - // Is this child already one of our children? - - iter = child->getParentIterator( gIOPowerPlane ); - if ( iter ) - { - IORegistryEntry * entry; - OSObject * next; - - while ((next = iter->getNextObject())) - { - if ((entry = OSDynamicCast(IORegistryEntry, next)) && - isChild(entry, gIOPowerPlane)) - { - ok = false; - break; - } - } - iter->release(); - } - if (!ok) - { - PM_LOG("%s: %s (%p) is already a child\n", - getName(), child->getName(), OBFUSCATE(child)); - break; - } - - // Add the child to the power plane immediately, but the - // joining connection is marked as not ready. - // We want the child to appear in the power plane before - // returning to the caller, but don't want the caller to - // block on the PM work loop. - - connection = new IOPowerConnection; - if (!connection) - break; - - // Create a chain of PM requests to perform the bottom-half - // work from the PM work loop. - - requests[0] = acquirePMRequest( - /* target */ this, - /* type */ kIOPMRequestTypeAddPowerChild1 ); - - requests[1] = acquirePMRequest( - /* target */ child, - /* type */ kIOPMRequestTypeAddPowerChild2 ); - - requests[2] = acquirePMRequest( - /* target */ this, - /* type */ kIOPMRequestTypeAddPowerChild3 ); - - if (!requests[0] || !requests[1] || !requests[2]) - break; - - requests[0]->attachNextRequest( requests[1] ); - requests[1]->attachNextRequest( requests[2] ); - - connection->init(); - connection->start(this); - connection->setAwaitingAck(false); - connection->setReadyFlag(false); - - attachToChild( connection, gIOPowerPlane ); - connection->attachToChild( child, gIOPowerPlane ); - - // connection needs to be released - requests[0]->fArg0 = connection; - requests[1]->fArg0 = connection; - requests[2]->fArg0 = connection; - - submitPMRequest( requests, 3 ); - return kIOReturnSuccess; - } - while (false); - - if (connection) connection->release(); - if (requests[0]) releasePMRequest(requests[0]); - if (requests[1]) releasePMRequest(requests[1]); - if (requests[2]) releasePMRequest(requests[2]); - - // Silent failure, to prevent platform drivers from adding the child - // to the root domain. - - return kIOReturnSuccess; + do { + // Is this child already one of our children? + + iter = child->getParentIterator( gIOPowerPlane ); + if ( iter ) + { + IORegistryEntry * entry; + OSObject * next; + + while ((next = iter->getNextObject())) + { + if ((entry = OSDynamicCast(IORegistryEntry, next)) && + isChild(entry, gIOPowerPlane)) + { + ok = false; + break; + } + } + iter->release(); + } + if (!ok) + { + PM_LOG("%s: %s (%p) is already a child\n", + getName(), child->getName(), OBFUSCATE(child)); + break; + } + + // Add the child to the power plane immediately, but the + // joining connection is marked as not ready. + // We want the child to appear in the power plane before + // returning to the caller, but don't want the caller to + // block on the PM work loop. + + connection = new IOPowerConnection; + if (!connection) + break; + + // Create a chain of PM requests to perform the bottom-half + // work from the PM work loop. + + requests[0] = acquirePMRequest( + /* target */ this, + /* type */ kIOPMRequestTypeAddPowerChild1 ); + + requests[1] = acquirePMRequest( + /* target */ child, + /* type */ kIOPMRequestTypeAddPowerChild2 ); + + requests[2] = acquirePMRequest( + /* target */ this, + /* type */ kIOPMRequestTypeAddPowerChild3 ); + + if (!requests[0] || !requests[1] || !requests[2]) + break; + + requests[0]->attachNextRequest( requests[1] ); + requests[1]->attachNextRequest( requests[2] ); + + connection->init(); + connection->start(this); + connection->setAwaitingAck(false); + connection->setReadyFlag(false); + + attachToChild( connection, gIOPowerPlane ); + connection->attachToChild( child, gIOPowerPlane ); + + // connection needs to be released + requests[0]->fArg0 = connection; + requests[1]->fArg0 = connection; + requests[2]->fArg0 = connection; + + submitPMRequest( requests, 3 ); + return kIOReturnSuccess; + } + while (false); + + if (connection) connection->release(); + if (requests[0]) releasePMRequest(requests[0]); + if (requests[1]) releasePMRequest(requests[1]); + if (requests[2]) releasePMRequest(requests[2]); + + // Silent failure, to prevent platform drivers from adding the child + // to the root domain. + + return kIOReturnSuccess; } //********************************************************************************* @@ -857,23 +851,23 @@ IOReturn IOService::addPowerChild( IOService * child ) void IOService::addPowerChild1( IOPMRequest * request ) { - IOPMPowerStateIndex tempDesire = kPowerStateZero; + IOPMPowerStateIndex tempDesire = kPowerStateZero; - // Make us temporary usable before adding the child. + // Make us temporary usable before adding the child. - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); OUR_PMLog( kPMLogMakeUsable, kPMLogMakeUsable, 0 ); - if (fControllingDriver && inPlane(gIOPowerPlane) && fParentsKnowState) - { - tempDesire = fHighestPowerState; - } + if (fControllingDriver && inPlane(gIOPowerPlane) && fParentsKnowState) + { + tempDesire = fHighestPowerState; + } - if ((tempDesire != kPowerStateZero) && + if ((tempDesire != kPowerStateZero) && (IS_PM_ROOT || (StateOrder(fMaxPowerState) >= StateOrder(tempDesire)))) - { - adjustPowerState(tempDesire); - } + { + adjustPowerState(tempDesire); + } } //********************************************************************************* @@ -885,40 +879,40 @@ void IOService::addPowerChild1( IOPMRequest * request ) void IOService::addPowerChild2( IOPMRequest * request ) { - IOPowerConnection * connection = (IOPowerConnection *) request->fArg0; - IOService * parent; - IOPMPowerFlags powerFlags; - bool knowsState; - unsigned long powerState; - unsigned long tempDesire; + IOPowerConnection * connection = (IOPowerConnection *) request->fArg0; + IOService * parent; + IOPMPowerFlags powerFlags; + bool knowsState; + unsigned long powerState; + unsigned long tempDesire; - PM_ASSERT_IN_GATE(); - parent = (IOService *) connection->getParentEntry(gIOPowerPlane); + PM_ASSERT_IN_GATE(); + parent = (IOService *) connection->getParentEntry(gIOPowerPlane); - if (!parent || !inPlane(gIOPowerPlane)) - { - PM_LOG("%s: addPowerChild2 not in power plane\n", getName()); - return; - } + if (!parent || !inPlane(gIOPowerPlane)) + { + PM_LOG("%s: addPowerChild2 not in power plane\n", getName()); + return; + } - // Parent will be waiting for us to complete this stage. - // It is safe to directly access parent's vars. + // Parent will be waiting for us to complete this stage. + // It is safe to directly access parent's vars. - knowsState = (parent->fPowerStates) && (parent->fParentsKnowState); - powerState = parent->fCurrentPowerState; + knowsState = (parent->fPowerStates) && (parent->fParentsKnowState); + powerState = parent->fCurrentPowerState; - if (knowsState) - powerFlags = parent->fPowerStates[powerState].outputPowerFlags; - else - powerFlags = 0; + if (knowsState) + powerFlags = parent->fPowerStates[powerState].outputPowerFlags; + else + powerFlags = 0; - // Set our power parent. + // Set our power parent. OUR_PMLog(kPMLogSetParent, knowsState, powerFlags); - setParentInfo( powerFlags, connection, knowsState ); + setParentInfo( powerFlags, connection, knowsState ); - connection->setReadyFlag(true); + connection->setReadyFlag(true); if ( fControllingDriver && fParentsKnowState ) { @@ -941,30 +935,30 @@ void IOService::addPowerChild2( IOPMRequest * request ) void IOService::addPowerChild3( IOPMRequest * request ) { - IOPowerConnection * connection = (IOPowerConnection *) request->fArg0; - IOService * child; + IOPowerConnection * connection = (IOPowerConnection *) request->fArg0; + IOService * child; IOPMrootDomain * rootDomain = getPMRootDomain(); - PM_ASSERT_IN_GATE(); - child = (IOService *) connection->getChildEntry(gIOPowerPlane); + PM_ASSERT_IN_GATE(); + child = (IOService *) connection->getChildEntry(gIOPowerPlane); - if (child && inPlane(gIOPowerPlane)) - { - if ((this != rootDomain) && child->getProperty("IOPMStrictTreeOrder")) - { - PM_LOG1("%s: strict PM order enforced\n", getName()); - fStrictTreeOrder = true; - } + if (child && inPlane(gIOPowerPlane)) + { + if ((this != rootDomain) && child->getProperty("IOPMStrictTreeOrder")) + { + PM_LOG1("%s: strict PM order enforced\n", getName()); + fStrictTreeOrder = true; + } if (rootDomain) rootDomain->joinAggressiveness( child ); - } - else - { - PM_LOG("%s: addPowerChild3 not in power plane\n", getName()); - } + } + else + { + PM_LOG("%s: addPowerChild3 not in power plane\n", getName()); + } - connection->release(); + connection->release(); } #ifndef __LP64__ @@ -978,9 +972,9 @@ void IOService::addPowerChild3( IOPMRequest * request ) //********************************************************************************* IOReturn IOService::setPowerParent( - IOPowerConnection * theParent, bool stateKnown, IOPMPowerFlags powerFlags ) + IOPowerConnection * theParent, bool stateKnown, IOPMPowerFlags powerFlags ) { - return kIOReturnUnsupported; + return kIOReturnUnsupported; } #endif /* !__LP64__ */ @@ -992,15 +986,15 @@ IOReturn IOService::setPowerParent( IOReturn IOService::removePowerChild( IOPowerConnection * theNub ) { - IORegistryEntry * theChild; + IORegistryEntry * theChild; - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); OUR_PMLog( kPMLogRemoveChild, 0, 0 ); theNub->retain(); // detach nub from child - theChild = theNub->copyChildEntry(gIOPowerPlane); + theChild = theNub->copyChildEntry(gIOPowerPlane); if ( theChild ) { theNub->detachFromChild(theChild, gIOPowerPlane); @@ -1011,41 +1005,41 @@ IOReturn IOService::removePowerChild( IOPowerConnection * theNub ) // Are we awaiting an ack from this child? if ( theNub->getAwaitingAck() ) - { - // yes, pretend we got one - theNub->setAwaitingAck(false); - if (fHeadNotePendingAcks != 0 ) - { - // that's one fewer ack to worry about - fHeadNotePendingAcks--; - - // is that the last? - if ( fHeadNotePendingAcks == 0 ) - { - stop_ack_timer(); - - // Request unblocked, work queue - // should re-scan all busy requests. - gIOPMWorkQueue->incrementProducerCount(); - } - } - } - - theNub->release(); - - // A child has gone away, re-scan children desires and clamp bits. - // The fPendingAdjustPowerRequest helps to reduce redundant parent work. - - if (!fAdjustPowerScheduled) - { - IOPMRequest * request; - request = acquirePMRequest( this, kIOPMRequestTypeAdjustPowerState ); - if (request) - { - submitPMRequest( request ); - fAdjustPowerScheduled = true; - } - } + { + // yes, pretend we got one + theNub->setAwaitingAck(false); + if (fHeadNotePendingAcks != 0 ) + { + // that's one fewer ack to worry about + fHeadNotePendingAcks--; + + // is that the last? + if ( fHeadNotePendingAcks == 0 ) + { + stop_ack_timer(); + + // Request unblocked, work queue + // should re-scan all busy requests. + gIOPMWorkQueue->incrementProducerCount(); + } + } + } + + theNub->release(); + + // A child has gone away, re-scan children desires and clamp bits. + // The fPendingAdjustPowerRequest helps to reduce redundant parent work. + + if (!fAdjustPowerScheduled) + { + IOPMRequest * request; + request = acquirePMRequest( this, kIOPMRequestTypeAdjustPowerState ); + if (request) + { + submitPMRequest( request ); + fAdjustPowerScheduled = true; + } + } return IOPMNoErr; } @@ -1057,43 +1051,43 @@ IOReturn IOService::removePowerChild( IOPowerConnection * theNub ) //********************************************************************************* IOReturn IOService::registerPowerDriver( - IOService * powerDriver, - IOPMPowerState * powerStates, - unsigned long numberOfStates ) + IOService * powerDriver, + IOPMPowerState * powerStates, + unsigned long numberOfStates ) { - IOPMRequest * request; - IOPMPSEntry * powerStatesCopy = 0; + IOPMRequest * request; + IOPMPSEntry * powerStatesCopy = 0; IOPMPowerStateIndex stateOrder; IOReturn error = kIOReturnSuccess; if (!initialized) - return IOPMNotYetInitialized; - - if (!powerStates || (numberOfStates < 2)) - { - OUR_PMLog(kPMLogControllingDriverErr5, numberOfStates, 0); - return kIOReturnBadArgument; - } - - if (!powerDriver || !powerDriver->initialized) - { - OUR_PMLog(kPMLogControllingDriverErr4, 0, 0); - return kIOReturnBadArgument; - } - - if (powerStates[0].version > kIOPMPowerStateVersion2) - { - OUR_PMLog(kPMLogControllingDriverErr1, powerStates[0].version, 0); - return kIOReturnBadArgument; - } - - do { - // Make a copy of the supplied power state array. - powerStatesCopy = IONew(IOPMPSEntry, numberOfStates); - if (!powerStatesCopy) + return IOPMNotYetInitialized; + + if (!powerStates || (numberOfStates < 2)) + { + OUR_PMLog(kPMLogControllingDriverErr5, numberOfStates, 0); + return kIOReturnBadArgument; + } + + if (!powerDriver || !powerDriver->initialized) + { + OUR_PMLog(kPMLogControllingDriverErr4, 0, 0); + return kIOReturnBadArgument; + } + + if (powerStates[0].version > kIOPMPowerStateVersion2) + { + OUR_PMLog(kPMLogControllingDriverErr1, powerStates[0].version, 0); + return kIOReturnBadArgument; + } + + do { + // Make a copy of the supplied power state array. + powerStatesCopy = IONew(IOPMPSEntry, numberOfStates); + if (!powerStatesCopy) { error = kIOReturnNoMemory; - break; + break; } // Initialize to bogus values @@ -1132,27 +1126,27 @@ IOReturn IOService::registerPowerDriver( if (kIOReturnSuccess != error) break; - request = acquirePMRequest( this, kIOPMRequestTypeRegisterPowerDriver ); - if (!request) + request = acquirePMRequest( this, kIOPMRequestTypeRegisterPowerDriver ); + if (!request) { error = kIOReturnNoMemory; - break; + break; } - powerDriver->retain(); - request->fArg0 = (void *) powerDriver; - request->fArg1 = (void *) powerStatesCopy; - request->fArg2 = (void *) numberOfStates; + powerDriver->retain(); + request->fArg0 = (void *) powerDriver; + request->fArg1 = (void *) powerStatesCopy; + request->fArg2 = (void *) numberOfStates; - submitPMRequest( request ); + submitPMRequest( request ); return kIOReturnSuccess; - } - while (false); + } + while (false); - if (powerStatesCopy) - IODelete(powerStatesCopy, IOPMPSEntry, numberOfStates); + if (powerStatesCopy) + IODelete(powerStatesCopy, IOPMPSEntry, numberOfStates); - return error; + return error; } //********************************************************************************* @@ -1161,39 +1155,39 @@ IOReturn IOService::registerPowerDriver( void IOService::handleRegisterPowerDriver( IOPMRequest * request ) { - IOService * powerDriver = (IOService *) request->fArg0; - IOPMPSEntry * powerStates = (IOPMPSEntry *) request->fArg1; - unsigned long numberOfStates = (unsigned long) request->fArg2; + IOService * powerDriver = (IOService *) request->fArg0; + IOPMPSEntry * powerStates = (IOPMPSEntry *) request->fArg1; + unsigned long numberOfStates = (unsigned long) request->fArg2; unsigned long i, stateIndex; unsigned long lowestPowerState; - IOService * root; - OSIterator * iter; + IOService * root; + OSIterator * iter; - PM_ASSERT_IN_GATE(); - assert(powerStates); - assert(powerDriver); - assert(numberOfStates > 1); + PM_ASSERT_IN_GATE(); + assert(powerStates); + assert(powerDriver); + assert(numberOfStates > 1); if ( !fNumberOfPowerStates ) { - OUR_PMLog(kPMLogControllingDriver, - (unsigned long) numberOfStates, - (unsigned long) kIOPMPowerStateVersion1); + OUR_PMLog(kPMLogControllingDriver, + (unsigned long) numberOfStates, + (unsigned long) kIOPMPowerStateVersion1); fPowerStates = powerStates; - fNumberOfPowerStates = numberOfStates; - fControllingDriver = powerDriver; + fNumberOfPowerStates = numberOfStates; + fControllingDriver = powerDriver; fCurrentCapabilityFlags = fPowerStates[0].capabilityFlags; lowestPowerState = fPowerStates[0].stateOrderToIndex; fHighestPowerState = fPowerStates[numberOfStates - 1].stateOrderToIndex; - // OR'in all the output power flags - fMergedOutputPowerFlags = 0; + // OR'in all the output power flags + fMergedOutputPowerFlags = 0; fDeviceUsablePowerState = lowestPowerState; - for ( i = 0; i < numberOfStates; i++ ) + for ( i = 0; i < numberOfStates; i++ ) { - fMergedOutputPowerFlags |= fPowerStates[i].outputPowerFlags; + fMergedOutputPowerFlags |= fPowerStates[i].outputPowerFlags; stateIndex = fPowerStates[i].stateOrderToIndex; assert(stateIndex < numberOfStates); @@ -1203,34 +1197,34 @@ void IOService::handleRegisterPowerDriver( IOPMRequest * request ) // The minimum power state that the device is usable fDeviceUsablePowerState = stateIndex; } - } - - // Register powerDriver as interested, unless already done. - // We don't want to register the default implementation since - // it does nothing. One ramification of not always registering - // is the one fewer retain count held. - - root = getPlatform()->getProvider(); - assert(root); - if (!root || - ((OSMemberFunctionCast(void (*)(void), - root, &IOService::powerStateDidChangeTo)) != - ((OSMemberFunctionCast(void (*)(void), - this, &IOService::powerStateDidChangeTo)))) || - ((OSMemberFunctionCast(void (*)(void), - root, &IOService::powerStateWillChangeTo)) != - ((OSMemberFunctionCast(void (*)(void), - this, &IOService::powerStateWillChangeTo))))) - { - if (fInterestedDrivers->findItem(powerDriver) == NULL) - { - PM_LOCK(); - fInterestedDrivers->appendNewInformee(powerDriver); - PM_UNLOCK(); - } - } - - // Examine all existing power clients and perform limit check. + } + + // Register powerDriver as interested, unless already done. + // We don't want to register the default implementation since + // it does nothing. One ramification of not always registering + // is the one fewer retain count held. + + root = getPlatform()->getProvider(); + assert(root); + if (!root || + ((OSMemberFunctionCast(void (*)(void), + root, &IOService::powerStateDidChangeTo)) != + ((OSMemberFunctionCast(void (*)(void), + this, &IOService::powerStateDidChangeTo)))) || + ((OSMemberFunctionCast(void (*)(void), + root, &IOService::powerStateWillChangeTo)) != + ((OSMemberFunctionCast(void (*)(void), + this, &IOService::powerStateWillChangeTo))))) + { + if (fInterestedDrivers->findItem(powerDriver) == NULL) + { + PM_LOCK(); + fInterestedDrivers->appendNewInformee(powerDriver); + PM_UNLOCK(); + } + } + + // Examine all existing power clients and perform limit check. if (fPowerClients && (iter = OSCollectionIterator::withCollection(fPowerClients))) @@ -1247,22 +1241,22 @@ void IOService::handleRegisterPowerDriver( IOPMRequest * request ) iter->release(); } - if ( inPlane(gIOPowerPlane) && fParentsKnowState ) - { - IOPMPowerStateIndex tempDesire; - fMaxPowerState = fControllingDriver->maxCapabilityForDomainState(fParentsCurrentPowerFlags); - // initially change into the state we are already in - tempDesire = fControllingDriver->initialPowerStateForDomainState(fParentsCurrentPowerFlags); - adjustPowerState(tempDesire); - } - } - else - { - OUR_PMLog(kPMLogControllingDriverErr2, numberOfStates, 0); + if ( inPlane(gIOPowerPlane) && fParentsKnowState ) + { + IOPMPowerStateIndex tempDesire; + fMaxPowerState = fControllingDriver->maxCapabilityForDomainState(fParentsCurrentPowerFlags); + // initially change into the state we are already in + tempDesire = fControllingDriver->initialPowerStateForDomainState(fParentsCurrentPowerFlags); + adjustPowerState(tempDesire); + } + } + else + { + OUR_PMLog(kPMLogControllingDriverErr2, numberOfStates, 0); IODelete(powerStates, IOPMPSEntry, numberOfStates); - } + } - powerDriver->release(); + powerDriver->release(); } //********************************************************************************* @@ -1276,8 +1270,8 @@ void IOService::handleRegisterPowerDriver( IOPMRequest * request ) IOPMPowerFlags IOService::registerInterestedDriver( IOService * driver ) { - IOPMRequest * request; - bool signal; + IOPMRequest * request; + bool signal; if (!driver || !initialized || !fInterestedDrivers) return 0; @@ -1305,7 +1299,7 @@ IOPMPowerFlags IOService::registerInterestedDriver( IOService * driver ) // for those clients that care. OUR_PMLog(kPMLogInterestedDriver, kIOPMDeviceUsable, 2); - return kIOPMDeviceUsable; + return kIOPMDeviceUsable; } //********************************************************************************* @@ -1314,8 +1308,8 @@ IOPMPowerFlags IOService::registerInterestedDriver( IOService * driver ) IOReturn IOService::deRegisterInterestedDriver( IOService * driver ) { - IOPMinformeeList * list; - IOPMinformee * item; + IOPMinformeeList * list; + IOPMinformee * item; IOPMRequest * request; bool signal; @@ -1362,9 +1356,9 @@ IOReturn IOService::deRegisterInterestedDriver( IOService * driver ) void IOService::handleInterestChanged( IOPMRequest * request ) { - IOService * driver; - IOPMinformee * informee; - IOPMinformeeList * list = fInterestedDrivers; + IOService * driver; + IOPMinformee * informee; + IOPMinformeeList * list = fInterestedDrivers; PM_LOCK(); @@ -1422,21 +1416,21 @@ void IOService::handleInterestChanged( IOPMRequest * request ) IOReturn IOService::acknowledgePowerChange( IOService * whichObject ) { - IOPMRequest * request; + IOPMRequest * request; if (!initialized) - return IOPMNotYetInitialized; - if (!whichObject) - return kIOReturnBadArgument; + return IOPMNotYetInitialized; + if (!whichObject) + return kIOReturnBadArgument; - request = acquirePMRequest( this, kIOPMRequestTypeAckPowerChange ); - if (!request) - return kIOReturnNoMemory; + request = acquirePMRequest( this, kIOPMRequestTypeAckPowerChange ); + if (!request) + return kIOReturnNoMemory; - whichObject->retain(); - request->fArg0 = whichObject; + whichObject->retain(); + request->fArg0 = whichObject; - submitPMRequest( request ); + submitPMRequest( request ); return IOPMNoErr; } @@ -1446,24 +1440,24 @@ IOReturn IOService::acknowledgePowerChange( IOService * whichObject ) bool IOService::handleAcknowledgePowerChange( IOPMRequest * request ) { - IOPMinformee * informee; - unsigned long childPower = kIOPMUnknown; - IOService * theChild; - IOService * whichObject; - bool all_acked = false; + IOPMinformee * informee; + unsigned long childPower = kIOPMUnknown; + IOService * theChild; + IOService * whichObject; + bool all_acked = false; - PM_ASSERT_IN_GATE(); - whichObject = (IOService *) request->fArg0; - assert(whichObject); + PM_ASSERT_IN_GATE(); + whichObject = (IOService *) request->fArg0; + assert(whichObject); // one of our interested drivers? - informee = fInterestedDrivers->findItem( whichObject ); + informee = fInterestedDrivers->findItem( whichObject ); if ( informee == NULL ) { if ( !isChild(whichObject, gIOPowerPlane) ) { - OUR_PMLog(kPMLogAcknowledgeErr1, 0, 0); - goto no_err; + OUR_PMLog(kPMLogAcknowledgeErr1, 0, 0); + goto no_err; } else { OUR_PMLog(kPMLogChildAcknowledge, fHeadNotePendingAcks, 0); } @@ -1486,26 +1480,11 @@ bool IOService::handleAcknowledgePowerChange( IOPMRequest * request ) if (informee->timer > 0) { uint64_t nsec = computeTimeDeltaNS(&informee->startTime); - if (nsec > LOG_SETPOWER_TIMES) - PM_LOG("%s::powerState%sChangeTo(%p, %s, %lu -> %lu) async took %d ms\n", - informee->whatObject->getName(), - (fDriverCallReason == kDriverCallInformPreChange) ? "Will" : "Did", - OBFUSCATE(informee->whatObject), - fName, fCurrentPowerState, fHeadNotePowerState, NS_TO_US(nsec)); - - uint16_t logType = (fDriverCallReason == kDriverCallInformPreChange) - ? kIOPMEventTypePSWillChangeTo - : kIOPMEventTypePSDidChangeTo; - - PMEventDetails *details = PMEventDetails::eventDetails( - logType, - fName, - (uintptr_t)this, - informee->whatObject->getName(), - 0, 0, 0, - NS_TO_MS(nsec)); - - getPMRootDomain()->recordAndReleasePMEvent( details ); + if (nsec > LOG_SETPOWER_TIMES) { + getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsDriverPSChangeSlow, informee->whatObject->getName(), + fDriverCallReason, NS_TO_MS(nsec), 0, NULL, fHeadNotePowerState); + } } #endif // mark it acked @@ -1540,21 +1519,21 @@ bool IOService::handleAcknowledgePowerChange( IOPMRequest * request ) } } } - } - - if ( fHeadNotePendingAcks == 0 ) { - // yes, stop the timer - stop_ack_timer(); - // and now we can continue - all_acked = true; - } + } + + if ( fHeadNotePendingAcks == 0 ) { + // yes, stop the timer + stop_ack_timer(); + // and now we can continue + all_acked = true; + } } else { - OUR_PMLog(kPMLogAcknowledgeErr3, 0, 0); // not expecting anybody to ack + OUR_PMLog(kPMLogAcknowledgeErr3, 0, 0); // not expecting anybody to ack } no_err: - if (whichObject) - whichObject->release(); + if (whichObject) + whichObject->release(); return all_acked; } @@ -1569,17 +1548,17 @@ no_err: IOReturn IOService::acknowledgeSetPowerState( void ) { - IOPMRequest * request; + IOPMRequest * request; if (!initialized) - return IOPMNotYetInitialized; + return IOPMNotYetInitialized; - request = acquirePMRequest( this, kIOPMRequestTypeAckSetPowerState ); - if (!request) - return kIOReturnNoMemory; + request = acquirePMRequest( this, kIOPMRequestTypeAckSetPowerState ); + if (!request) + return kIOReturnNoMemory; - submitPMRequest( request ); - return kIOReturnSuccess; + submitPMRequest( request ); + return kIOReturnSuccess; } //********************************************************************************* @@ -1588,10 +1567,10 @@ IOReturn IOService::acknowledgeSetPowerState( void ) void IOService::adjustPowerState( uint32_t clamp ) { - PM_ASSERT_IN_GATE(); - computeDesiredState(clamp, false); - if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane)) - { + PM_ASSERT_IN_GATE(); + computeDesiredState(clamp, false); + if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane)) + { IOPMPowerChangeFlags changeFlags = kIOPMSelfInitiated; // Indicate that children desires must be ignored, and do not ask @@ -1601,13 +1580,13 @@ void IOService::adjustPowerState( uint32_t clamp ) if (getPMRequestType() == kIOPMRequestTypeRequestPowerStateOverride) changeFlags |= (kIOPMIgnoreChildren | kIOPMSkipAskPowerDown); - startPowerChange( - /* flags */ changeFlags, - /* power state */ fDesiredPowerState, - /* domain flags */ 0, - /* connection */ 0, - /* parent flags */ 0); - } + startPowerChange( + /* flags */ changeFlags, + /* power state */ fDesiredPowerState, + /* domain flags */ 0, + /* connection */ 0, + /* parent flags */ 0); + } } //********************************************************************************* @@ -1618,13 +1597,13 @@ IOReturn IOService::synchronizePowerTree( IOOptionBits options, IOService * notifyRoot ) { - IOPMRequest * request_c = 0; + IOPMRequest * request_c = 0; IOPMRequest * request_s; if (this != getPMRootDomain()) return kIOReturnBadArgument; - if (!initialized) - return kIOPMNotYetInitialized; + if (!initialized) + return kIOPMNotYetInitialized; OUR_PMLog(kPMLogCSynchronizePowerTree, options, (notifyRoot != 0)); @@ -1634,7 +1613,7 @@ IOReturn IOService::synchronizePowerTree( // Cancels don't need to be synchronized. nr = acquirePMRequest(notifyRoot, kIOPMRequestTypeChildNotifyDelayCancel); - if (nr) submitPMRequest(nr); + if (nr) submitPMRequest(nr); nr = acquirePMRequest(getPMRootDomain(), kIOPMRequestTypeChildNotifyDelayCancel); if (nr) submitPMRequest(nr); } @@ -1650,15 +1629,15 @@ IOReturn IOService::synchronizePowerTree( request_c->attachNextRequest( request_s ); submitPMRequest(request_c); } - + request_s->fArg0 = (void *)(uintptr_t) options; submitPMRequest(request_s); return kIOReturnSuccess; error_no_memory: - if (request_c) releasePMRequest(request_c); - if (request_s) releasePMRequest(request_s); + if (request_c) releasePMRequest(request_c); + if (request_s) releasePMRequest(request_s); return kIOReturnNoMemory; } @@ -1668,20 +1647,20 @@ error_no_memory: void IOService::handleSynchronizePowerTree( IOPMRequest * request ) { - PM_ASSERT_IN_GATE(); - if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane) && + PM_ASSERT_IN_GATE(); + if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane) && (fCurrentPowerState == fHighestPowerState)) - { + { IOOptionBits options = (uintptr_t) request->fArg0; - startPowerChange( - /* flags */ kIOPMSelfInitiated | kIOPMSynchronize | + startPowerChange( + /* flags */ kIOPMSelfInitiated | kIOPMSynchronize | (options & kIOPMSyncNoChildNotify), - /* power state */ fCurrentPowerState, - /* domain flags */ 0, - /* connection */ 0, - /* parent flags */ 0); - } + /* power state */ fCurrentPowerState, + /* domain flags */ 0, + /* connection */ 0, + /* parent flags */ 0); + } } #ifndef __LP64__ @@ -1696,11 +1675,11 @@ void IOService::handleSynchronizePowerTree( IOPMRequest * request ) //********************************************************************************* IOReturn IOService::powerDomainWillChangeTo( - IOPMPowerFlags newPowerFlags, - IOPowerConnection * whichParent ) + IOPMPowerFlags newPowerFlags, + IOPowerConnection * whichParent ) { - assert(false); - return kIOReturnUnsupported; + assert(false); + return kIOReturnUnsupported; } #endif /* !__LP64__ */ @@ -1710,32 +1689,32 @@ IOReturn IOService::powerDomainWillChangeTo( void IOService::handlePowerDomainWillChangeTo( IOPMRequest * request ) { - IOPMPowerFlags parentPowerFlags = (IOPMPowerFlags) request->fArg0; - IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; + IOPMPowerFlags parentPowerFlags = (IOPMPowerFlags) request->fArg0; + IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; IOPMPowerChangeFlags parentChangeFlags = (IOPMPowerChangeFlags)(uintptr_t) request->fArg2; IOPMPowerChangeFlags myChangeFlags; - OSIterator * iter; - OSObject * next; - IOPowerConnection * connection; + OSIterator * iter; + OSObject * next; + IOPowerConnection * connection; IOPMPowerStateIndex maxPowerState; - IOPMPowerFlags combinedPowerFlags; - bool savedParentsKnowState; - IOReturn result = IOPMAckImplied; + IOPMPowerFlags combinedPowerFlags; + bool savedParentsKnowState; + IOReturn result = IOPMAckImplied; - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); OUR_PMLog(kPMLogWillChange, parentPowerFlags, 0); - if (!inPlane(gIOPowerPlane) || !whichParent || !whichParent->getAwaitingAck()) - { - PM_LOG("%s::%s not in power tree\n", getName(), __FUNCTION__); + if (!inPlane(gIOPowerPlane) || !whichParent || !whichParent->getAwaitingAck()) + { + PM_LOG("%s::%s not in power tree\n", getName(), __FUNCTION__); goto exit_no_ack; - } + } - savedParentsKnowState = fParentsKnowState; + savedParentsKnowState = fParentsKnowState; // Combine parents' output power flags. - combinedPowerFlags = 0; + combinedPowerFlags = 0; iter = getParentIterator(gIOPowerPlane); if ( iter ) @@ -1758,49 +1737,49 @@ void IOService::handlePowerDomainWillChangeTo( IOPMRequest * request ) if ( fControllingDriver && !fInitialPowerChange ) { - maxPowerState = fControllingDriver->maxCapabilityForDomainState( - combinedPowerFlags); + maxPowerState = fControllingDriver->maxCapabilityForDomainState( + combinedPowerFlags); // Use kIOPMSynchronize below instead of kIOPMRootBroadcastFlags // to avoid propagating the root change flags if any service must // change power state due to root's will-change notification. // Root does not change power state for kIOPMSynchronize. - + myChangeFlags = kIOPMParentInitiated | kIOPMDomainWillChange | (parentChangeFlags & kIOPMSynchronize); - result = startPowerChange( - /* flags */ myChangeFlags, - /* power state */ maxPowerState, - /* domain flags */ combinedPowerFlags, - /* connection */ whichParent, - /* parent flags */ parentPowerFlags); - } - - // If parent is dropping power, immediately update the parent's - // capability flags. Any future merging of parent(s) combined - // power flags should account for this power drop. - - if (parentChangeFlags & kIOPMDomainPowerDrop) - { - setParentInfo(parentPowerFlags, whichParent, true); - } - - // Parent is expecting an ACK from us. If we did not embark on a state - // transition, i.e. startPowerChange() returned IOPMAckImplied. We are - // still required to issue an ACK to our parent. - - if (IOPMAckImplied == result) - { - IOService * parent; - parent = (IOService *) whichParent->copyParentEntry(gIOPowerPlane); - assert(parent); - if ( parent ) - { - parent->acknowledgePowerChange( whichParent ); - parent->release(); - } - } + result = startPowerChange( + /* flags */ myChangeFlags, + /* power state */ maxPowerState, + /* domain flags */ combinedPowerFlags, + /* connection */ whichParent, + /* parent flags */ parentPowerFlags); + } + + // If parent is dropping power, immediately update the parent's + // capability flags. Any future merging of parent(s) combined + // power flags should account for this power drop. + + if (parentChangeFlags & kIOPMDomainPowerDrop) + { + setParentInfo(parentPowerFlags, whichParent, true); + } + + // Parent is expecting an ACK from us. If we did not embark on a state + // transition, i.e. startPowerChange() returned IOPMAckImplied. We are + // still required to issue an ACK to our parent. + + if (IOPMAckImplied == result) + { + IOService * parent; + parent = (IOService *) whichParent->copyParentEntry(gIOPowerPlane); + assert(parent); + if ( parent ) + { + parent->acknowledgePowerChange( whichParent ); + parent->release(); + } + } exit_no_ack: // Drop the retain from notifyChild(). @@ -1819,11 +1798,11 @@ exit_no_ack: //********************************************************************************* IOReturn IOService::powerDomainDidChangeTo( - IOPMPowerFlags newPowerFlags, - IOPowerConnection * whichParent ) + IOPMPowerFlags newPowerFlags, + IOPowerConnection * whichParent ) { - assert(false); - return kIOReturnUnsupported; + assert(false); + return kIOReturnUnsupported; } #endif /* !__LP64__ */ @@ -1833,34 +1812,34 @@ IOReturn IOService::powerDomainDidChangeTo( void IOService::handlePowerDomainDidChangeTo( IOPMRequest * request ) { - IOPMPowerFlags parentPowerFlags = (IOPMPowerFlags) request->fArg0; - IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; + IOPMPowerFlags parentPowerFlags = (IOPMPowerFlags) request->fArg0; + IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; IOPMPowerChangeFlags parentChangeFlags = (IOPMPowerChangeFlags)(uintptr_t) request->fArg2; IOPMPowerChangeFlags myChangeFlags; IOPMPowerStateIndex maxPowerState; IOPMPowerStateIndex initialDesire = kPowerStateZero; bool computeDesire = false; bool desireChanged = false; - bool savedParentsKnowState; - IOReturn result = IOPMAckImplied; + bool savedParentsKnowState; + IOReturn result = IOPMAckImplied; - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); OUR_PMLog(kPMLogDidChange, parentPowerFlags, 0); - if (!inPlane(gIOPowerPlane) || !whichParent || !whichParent->getAwaitingAck()) - { - PM_LOG("%s::%s not in power tree\n", getName(), __FUNCTION__); + if (!inPlane(gIOPowerPlane) || !whichParent || !whichParent->getAwaitingAck()) + { + PM_LOG("%s::%s not in power tree\n", getName(), __FUNCTION__); goto exit_no_ack; - } + } - savedParentsKnowState = fParentsKnowState; + savedParentsKnowState = fParentsKnowState; setParentInfo(parentPowerFlags, whichParent, true); if ( fControllingDriver ) - { - maxPowerState = fControllingDriver->maxCapabilityForDomainState( - fParentsCurrentPowerFlags); + { + maxPowerState = fControllingDriver->maxCapabilityForDomainState( + fParentsCurrentPowerFlags); if (fInitialPowerChange) { @@ -1913,40 +1892,41 @@ void IOService::handlePowerDomainDidChangeTo( IOPMRequest * request ) myChangeFlags = kIOPMParentInitiated | kIOPMDomainDidChange | (parentChangeFlags & kIOPMRootBroadcastFlags); - result = startPowerChange( - /* flags */ myChangeFlags, - /* power state */ maxPowerState, - /* domain flags */ fParentsCurrentPowerFlags, - /* connection */ whichParent, - /* parent flags */ 0); - } - - // Parent is expecting an ACK from us. If we did not embark on a state - // transition, i.e. startPowerChange() returned IOPMAckImplied. We are - // still required to issue an ACK to our parent. - - if (IOPMAckImplied == result) - { - IOService * parent; - parent = (IOService *) whichParent->copyParentEntry(gIOPowerPlane); - assert(parent); - if ( parent ) - { - parent->acknowledgePowerChange( whichParent ); - parent->release(); - } - } - - // If the parent registers its power driver late, then this is the - // first opportunity to tell our parent about our desire. Or if the + result = startPowerChange( + /* flags */ myChangeFlags, + /* power state */ maxPowerState, + /* domain flags */ fParentsCurrentPowerFlags, + /* connection */ whichParent, + /* parent flags */ 0); + } + + // Parent is expecting an ACK from us. If we did not embark on a state + // transition, i.e. startPowerChange() returned IOPMAckImplied. We are + // still required to issue an ACK to our parent. + + if (IOPMAckImplied == result) + { + IOService * parent; + parent = (IOService *) whichParent->copyParentEntry(gIOPowerPlane); + assert(parent); + if ( parent ) + { + parent->acknowledgePowerChange( whichParent ); + parent->release(); + } + } + + // If the parent registers its power driver late, then this is the + // first opportunity to tell our parent about our desire. Or if the // child's desire changed during a parent change notify. - if ((!savedParentsKnowState && fParentsKnowState) || desireChanged) - { - PM_LOG1("%s::powerDomainDidChangeTo parentsKnowState %d\n", - getName(), fParentsKnowState); - requestDomainPower( fDesiredPowerState ); - } + if (fControllingDriver && + ((!savedParentsKnowState && fParentsKnowState) || desireChanged)) + { + PM_LOG1("%s::powerDomainDidChangeTo parentsKnowState %d\n", + getName(), fParentsKnowState); + requestDomainPower( fDesiredPowerState ); + } exit_no_ack: // Drop the retain from notifyChild(). @@ -1959,22 +1939,22 @@ exit_no_ack: // Set our connection data for one specific parent, and then combine all the parent // data together. //********************************************************************************* - + void IOService::setParentInfo( - IOPMPowerFlags newPowerFlags, - IOPowerConnection * whichParent, - bool knowsState ) + IOPMPowerFlags newPowerFlags, + IOPowerConnection * whichParent, + bool knowsState ) { - OSIterator * iter; - OSObject * next; - IOPowerConnection * conn; + OSIterator * iter; + OSObject * next; + IOPowerConnection * conn; - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); // set our connection data whichParent->setParentCurrentPowerFlags(newPowerFlags); whichParent->setParentKnowsState(knowsState); - + // recompute our parent info fParentsCurrentPowerFlags = 0; fParentsKnowState = true; @@ -2017,12 +1997,11 @@ void IOService::trackSystemSleepPreventers( if ((oldCapability ^ newCapability) & kIOPMPreventIdleSleep) { - bool idleCancelAllowed = getPMRootDomain()->updatePreventIdleSleepList(this, - ((oldCapability & kIOPMPreventIdleSleep) == 0)); - if(!idleCancelAllowed) - PM_LOG2("Idle cancel was disallowed for %s\n", getName()); + bool enablePrevention = ((oldCapability & kIOPMPreventIdleSleep) == 0); + bool idleCancelAllowed = getPMRootDomain()->updatePreventIdleSleepList( + this, enablePrevention); #if SUPPORT_IDLE_CANCEL - if (idleCancelAllowed && (oldCapability & kIOPMPreventIdleSleep) == 0) + if (idleCancelAllowed && enablePrevention) { IOPMRequest * cancelRequest; @@ -2033,12 +2012,10 @@ void IOService::trackSystemSleepPreventers( } } #endif - } if ((oldCapability ^ newCapability) & kIOPMPreventSystemSleep) { - getPMRootDomain()->updatePreventSystemSleepList(this, ((oldCapability & kIOPMPreventSystemSleep) == 0)); } @@ -2053,45 +2030,45 @@ void IOService::trackSystemSleepPreventers( IOReturn IOService::requestPowerDomainState( IOPMPowerFlags childRequestPowerFlags, IOPowerConnection * childConnection, - unsigned long specification ) + unsigned long specification ) { IOPMPowerStateIndex order, powerState; - IOPMPowerFlags outputPowerFlags; + IOPMPowerFlags outputPowerFlags; IOService * child; - IOPMRequest * subRequest; + IOPMRequest * subRequest; bool adjustPower = false; if (!initialized) - return IOPMNotYetInitialized; + return IOPMNotYetInitialized; - if (gIOPMWorkLoop->onThread() == false) - { - PM_LOG("%s::requestPowerDomainState\n", getName()); - return kIOReturnSuccess; - } + if (gIOPMWorkLoop->onThread() == false) + { + PM_LOG("%s::requestPowerDomainState\n", getName()); + return kIOReturnSuccess; + } OUR_PMLog(kPMLogRequestDomain, childRequestPowerFlags, specification); - if (!isChild(childConnection, gIOPowerPlane)) - return kIOReturnNotAttached; + if (!isChild(childConnection, gIOPowerPlane)) + return kIOReturnNotAttached; if (!fControllingDriver || !fNumberOfPowerStates) return kIOReturnNotReady; - child = (IOService *) childConnection->getChildEntry(gIOPowerPlane); - assert(child); + child = (IOService *) childConnection->getChildEntry(gIOPowerPlane); + assert(child); // Remove flags from child request which we can't possibly supply childRequestPowerFlags &= fMergedOutputPowerFlags; // Merge in the power flags contributed by this power parent - // at its current or impending power state. + // at its current or impending power state. outputPowerFlags = fPowerStates[fCurrentPowerState].outputPowerFlags; - if (fMachineState != kIOPM_Finished) - { - if (IS_POWER_DROP && !IS_ROOT_DOMAIN) - { + if (fMachineState != kIOPM_Finished) + { + if (IS_POWER_DROP && !IS_ROOT_DOMAIN) + { // Use the lower power state when dropping power. // Must be careful since a power drop can be cancelled // from the following states: @@ -2114,25 +2091,25 @@ IOReturn IOService::requestPowerDomainState( PM_LOG1("%s: power drop cancelled in state %u by %s\n", getName(), fMachineState, child->getName()); } - else - { - // Beyond cancellation point, report the impending state. - outputPowerFlags = - fPowerStates[fHeadNotePowerState].outputPowerFlags; - } - } - else if (IS_POWER_RISE) - { - // When raising power, must report the output power flags from - // child's perspective. A child power request may arrive while - // parent is transitioning upwards. If a request arrives after - // setParentInfo() has already recorded the output power flags - // for the next power state, then using the power supplied by - // fCurrentPowerState is incorrect, and might cause the child - // to wait when it should not. - - outputPowerFlags = childConnection->parentCurrentPowerFlags(); - } + else + { + // Beyond cancellation point, report the impending state. + outputPowerFlags = + fPowerStates[fHeadNotePowerState].outputPowerFlags; + } + } + else if (IS_POWER_RISE) + { + // When raising power, must report the output power flags from + // child's perspective. A child power request may arrive while + // parent is transitioning upwards. If a request arrives after + // setParentInfo() has already recorded the output power flags + // for the next power state, then using the power supplied by + // fCurrentPowerState is incorrect, and might cause the child + // to wait when it should not. + + outputPowerFlags = childConnection->parentCurrentPowerFlags(); + } } child->fHeadNoteDomainTargetFlags |= outputPowerFlags; @@ -2169,24 +2146,24 @@ IOReturn IOService::requestPowerDomainState( } #endif - // Record the child's desires on the connection. - childConnection->setChildHasRequestedPower(); - childConnection->setDesiredDomainState( powerState ); + // Record the child's desires on the connection. + childConnection->setChildHasRequestedPower(); + childConnection->setDesiredDomainState( powerState ); - // Schedule a request to re-evaluate all children desires and - // adjust power state. Submit a request if one wasn't pending, - // or if the current request is part of a call tree. + // Schedule a request to re-evaluate all children desires and + // adjust power state. Submit a request if one wasn't pending, + // or if the current request is part of a call tree. if (adjustPower && !fDeviceOverrideEnabled && (!fAdjustPowerScheduled || gIOPMRequest->getRootRequest())) { - subRequest = acquirePMRequest( + subRequest = acquirePMRequest( this, kIOPMRequestTypeAdjustPowerState, gIOPMRequest ); - if (subRequest) - { - submitPMRequest( subRequest ); - fAdjustPowerScheduled = true; - } + if (subRequest) + { + submitPMRequest( subRequest ); + fAdjustPowerScheduled = true; + } } return kIOReturnSuccess; @@ -2230,8 +2207,8 @@ IOReturn IOService::makeUsable( void ) IOPMPowerFlags IOService::currentCapability( void ) { - if (!initialized) - return IOPMNotPowerManaged; + if (!initialized) + return IOPMNotPowerManaged; return fCurrentCapabilityFlags; } @@ -2276,16 +2253,16 @@ IOReturn IOService::changePowerStateToPriv( unsigned long ordinal ) IOReturn IOService::changePowerStateWithOverrideTo( IOPMPowerStateIndex ordinal, IOPMRequestTag tag ) { - IOPMRequest * request; + IOPMRequest * request; - if (!initialized) - return kIOPMNotYetInitialized; + if (!initialized) + return kIOPMNotYetInitialized; OUR_PMLog(kPMLogChangeStateToPriv, ordinal, 0); - request = acquirePMRequest( this, kIOPMRequestTypeRequestPowerStateOverride ); - if (!request) - return kIOReturnNoMemory; + request = acquirePMRequest( this, kIOPMRequestTypeRequestPowerStateOverride ); + if (!request) + return kIOReturnNoMemory; gIOPMPowerClientDevice->retain(); request->fRequestTag = tag; @@ -2297,18 +2274,18 @@ IOReturn IOService::changePowerStateWithOverrideTo( IOPMPowerStateIndex ordinal, request->installCompletionAction( action, target, param ); #endif - // Prevent needless downwards power transitions by clamping power - // until the scheduled request is executed. + // Prevent needless downwards power transitions by clamping power + // until the scheduled request is executed. - if (gIOPMWorkLoop->inGate() && (ordinal < fNumberOfPowerStates)) - { - fTempClampPowerState = StateMax(fTempClampPowerState, ordinal); - fTempClampCount++; - fOverrideMaxPowerState = ordinal; - request->fArg2 = (void *) (uintptr_t) true; - } + if (gIOPMWorkLoop->inGate() && (ordinal < fNumberOfPowerStates)) + { + fTempClampPowerState = StateMax(fTempClampPowerState, ordinal); + fTempClampCount++; + fOverrideMaxPowerState = ordinal; + request->fArg2 = (void *) (uintptr_t) true; + } - submitPMRequest( request ); + submitPMRequest( request ); return IOPMNoErr; } @@ -2332,16 +2309,16 @@ IOReturn IOService::requestPowerState( const OSSymbol * client, uint32_t state ) { - IOPMRequest * request; + IOPMRequest * request; if (!client) return kIOReturnBadArgument; - if (!initialized) - return kIOPMNotYetInitialized; + if (!initialized) + return kIOPMNotYetInitialized; - request = acquirePMRequest( this, kIOPMRequestTypeRequestPowerState ); - if (!request) - return kIOReturnNoMemory; + request = acquirePMRequest( this, kIOPMRequestTypeRequestPowerState ); + if (!request) + return kIOReturnNoMemory; client->retain(); request->fArg0 = (void *)(uintptr_t) state; @@ -2352,17 +2329,17 @@ IOReturn IOService::requestPowerState( request->installCompletionAction( action, target, param ); #endif - // Prevent needless downwards power transitions by clamping power - // until the scheduled request is executed. + // Prevent needless downwards power transitions by clamping power + // until the scheduled request is executed. - if (gIOPMWorkLoop->inGate() && (state < fNumberOfPowerStates)) - { - fTempClampPowerState = StateMax(fTempClampPowerState, state); - fTempClampCount++; - request->fArg2 = (void *) (uintptr_t) true; - } + if (gIOPMWorkLoop->inGate() && (state < fNumberOfPowerStates)) + { + fTempClampPowerState = StateMax(fTempClampPowerState, state); + fTempClampCount++; + request->fArg2 = (void *) (uintptr_t) true; + } - submitPMRequest( request ); + submitPMRequest( request ); return IOPMNoErr; } @@ -2375,16 +2352,16 @@ void IOService::handleRequestPowerState( IOPMRequest * request ) const OSSymbol * client = (const OSSymbol *) request->fArg1; uint32_t state = (uint32_t)(uintptr_t) request->fArg0; - PM_ASSERT_IN_GATE(); - if (request->fArg2) - { - assert(fTempClampCount != 0); - if (fTempClampCount) fTempClampCount--; - if (!fTempClampCount) fTempClampPowerState = kPowerStateZero; - } + PM_ASSERT_IN_GATE(); + if (request->fArg2) + { + assert(fTempClampCount != 0); + if (fTempClampCount) fTempClampCount--; + if (!fTempClampCount) fTempClampPowerState = kPowerStateZero; + } - if (fNumberOfPowerStates && (state >= fNumberOfPowerStates)) - state = fHighestPowerState; + if (fNumberOfPowerStates && (state >= fNumberOfPowerStates)) + state = fHighestPowerState; // The power suppression due to changePowerStateWithOverrideTo() expires // upon the next "device" power request - changePowerStateToPriv(). @@ -2394,14 +2371,14 @@ void IOService::handleRequestPowerState( IOPMRequest * request ) fOverrideMaxPowerState = kIOPMPowerStateMax; if ((state == kPowerStateZero) && - (client != gIOPMPowerClientDevice) && - (client != gIOPMPowerClientDriver) && - (client != gIOPMPowerClientChildProxy)) - removePowerClient(client); - else - updatePowerClient(client, state); - - adjustPowerState(); + (client != gIOPMPowerClientDevice) && + (client != gIOPMPowerClientDriver) && + (client != gIOPMPowerClientChildProxy)) + removePowerClient(client); + else + updatePowerClient(client, state); + + adjustPowerState(); client->release(); } @@ -2461,22 +2438,22 @@ uint32_t IOService::getPowerStateForClient( const OSSymbol * client ) IOReturn IOService::powerOverrideOnPriv( void ) { - IOPMRequest * request; + IOPMRequest * request; if (!initialized) - return IOPMNotYetInitialized; + return IOPMNotYetInitialized; - if (gIOPMWorkLoop->inGate()) - { - fDeviceOverrideEnabled = true; - return IOPMNoErr; - } + if (gIOPMWorkLoop->inGate()) + { + fDeviceOverrideEnabled = true; + return IOPMNoErr; + } - request = acquirePMRequest( this, kIOPMRequestTypePowerOverrideOnPriv ); - if (!request) - return kIOReturnNoMemory; + request = acquirePMRequest( this, kIOPMRequestTypePowerOverrideOnPriv ); + if (!request) + return kIOReturnNoMemory; - submitPMRequest( request ); + submitPMRequest( request ); return IOPMNoErr; } @@ -2486,22 +2463,22 @@ IOReturn IOService::powerOverrideOnPriv( void ) IOReturn IOService::powerOverrideOffPriv( void ) { - IOPMRequest * request; + IOPMRequest * request; if (!initialized) - return IOPMNotYetInitialized; + return IOPMNotYetInitialized; - if (gIOPMWorkLoop->inGate()) - { - fDeviceOverrideEnabled = false; - return IOPMNoErr; - } + if (gIOPMWorkLoop->inGate()) + { + fDeviceOverrideEnabled = false; + return IOPMNoErr; + } - request = acquirePMRequest( this, kIOPMRequestTypePowerOverrideOffPriv ); - if (!request) - return kIOReturnNoMemory; + request = acquirePMRequest( this, kIOPMRequestTypePowerOverrideOffPriv ); + if (!request) + return kIOReturnNoMemory; - submitPMRequest( request ); + submitPMRequest( request ); return IOPMNoErr; } @@ -2511,19 +2488,19 @@ IOReturn IOService::powerOverrideOffPriv( void ) void IOService::handlePowerOverrideChanged( IOPMRequest * request ) { - PM_ASSERT_IN_GATE(); - if (request->getType() == kIOPMRequestTypePowerOverrideOnPriv) - { - OUR_PMLog(kPMLogOverrideOn, 0, 0); - fDeviceOverrideEnabled = true; + PM_ASSERT_IN_GATE(); + if (request->getType() == kIOPMRequestTypePowerOverrideOnPriv) + { + OUR_PMLog(kPMLogOverrideOn, 0, 0); + fDeviceOverrideEnabled = true; + } + else + { + OUR_PMLog(kPMLogOverrideOff, 0, 0); + fDeviceOverrideEnabled = false; } - else - { - OUR_PMLog(kPMLogOverrideOff, 0, 0); - fDeviceOverrideEnabled = false; - } - adjustPowerState(); + adjustPowerState(); } //********************************************************************************* @@ -2532,20 +2509,20 @@ void IOService::handlePowerOverrideChanged( IOPMRequest * request ) void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly ) { - OSIterator * iter; - OSObject * next; - IOPowerConnection * connection; - uint32_t desiredState = kPowerStateZero; + OSIterator * iter; + OSObject * next; + IOPowerConnection * connection; + uint32_t desiredState = kPowerStateZero; uint32_t newPowerState = kPowerStateZero; bool hasChildren = false; - // Desired power state is always 0 without a controlling driver. + // Desired power state is always 0 without a controlling driver. - if (!fNumberOfPowerStates) - { + if (!fNumberOfPowerStates) + { fDesiredPowerState = kPowerStateZero; - return; - } + return; + } // Examine the children's desired power state. @@ -2582,14 +2559,14 @@ void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly const OSSymbol * client; while ((client = (const OSSymbol *) iter->getNextObject())) { - // Ignore child and driver when override is in effect. + // Ignore child and driver when override is in effect. if ((fDeviceOverrideEnabled || (getPMRequestType() == kIOPMRequestTypeRequestPowerStateOverride)) && ((client == gIOPMPowerClientChildren) || (client == gIOPMPowerClientDriver))) continue; - // Ignore child proxy when children are present. + // Ignore child proxy when children are present. if (hasChildren && (client == gIOPMPowerClientChildProxy)) continue; @@ -2599,9 +2576,9 @@ void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly continue; desiredState = getPowerStateForClient(client); - assert(desiredState < fNumberOfPowerStates); - PM_LOG1(" %u %s\n", - desiredState, client->getCStringNoCopy()); + assert(desiredState < fNumberOfPowerStates); + PM_LOG1(" %u %s\n", + desiredState, client->getCStringNoCopy()); newPowerState = StateMax(newPowerState, desiredState); @@ -2629,7 +2606,7 @@ void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly PM_LOG1(" temp %u, clamp %u, current %u, new %u\n", (uint32_t) localClamp, (uint32_t) fTempClampPowerState, - (uint32_t) fCurrentPowerState, newPowerState); + (uint32_t) fCurrentPowerState, newPowerState); if (!computeOnly) { @@ -2677,7 +2654,7 @@ unsigned long IOService::currentPowerConsumption( void ) IOWorkLoop * IOService::getPMworkloop( void ) { - return gIOPMWorkLoop; + return gIOPMWorkLoop; } #if NOT_YET @@ -2687,13 +2664,13 @@ IOWorkLoop * IOService::getPMworkloop( void ) //********************************************************************************* static void -applyToPowerChildren( +applyToPowerChildren( IOService * service, IOServiceApplierFunction applier, void * context, IOOptionBits options ) { - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); IORegistryEntry * entry; IORegistryIterator * iter; @@ -2721,13 +2698,13 @@ applyToPowerChildren( } static void -applyToPowerParent( +applyToPowerParent( IOService * service, IOServiceApplierFunction applier, void * context, IOOptionBits options ) { - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); IORegistryEntry * entry; IORegistryIterator * iter; @@ -2777,18 +2754,18 @@ void IOService::setAdvisoryTickleEnable( bool enable ) bool IOService::activityTickle( unsigned long type, unsigned long stateNumber ) { - IOPMRequest * request; - bool noPowerChange = true; + IOPMRequest * request; + bool noPowerChange = true; uint32_t tickleFlags; if (!initialized) return true; // no power change if ((type == kIOPMSuperclassPolicy1) && StateOrder(stateNumber)) - { + { IOLockLock(fActivityLock); - // Record device activity for the idle timer handler. + // Record device activity for the idle timer handler. fDeviceWasActive = true; fActivityTickleCount++; @@ -2796,29 +2773,29 @@ bool IOService::activityTickle( unsigned long type, unsigned long stateNumber ) PM_ACTION_0(actionActivityTickle); - // Record the last tickle power state. - // This helps to filter out redundant tickles as - // this function may be called from the data path. + // Record the last tickle power state. + // This helps to filter out redundant tickles as + // this function may be called from the data path. if ((fActivityTicklePowerState == kInvalidTicklePowerState) || StateOrder(fActivityTicklePowerState) < StateOrder(stateNumber)) - { - fActivityTicklePowerState = stateNumber; - noPowerChange = false; + { + fActivityTicklePowerState = stateNumber; + noPowerChange = false; tickleFlags = kTickleTypeActivity | kTickleTypePowerRise; - request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); - if (request) - { - request->fArg0 = (void *) stateNumber; - request->fArg1 = (void *)(uintptr_t) tickleFlags; + request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); + if (request) + { + request->fArg0 = (void *) stateNumber; + request->fArg1 = (void *)(uintptr_t) tickleFlags; request->fArg2 = (void *)(uintptr_t) gIOPMTickleGeneration; - submitPMRequest(request); - } - } + submitPMRequest(request); + } + } - IOLockUnlock(fActivityLock); - } + IOLockUnlock(fActivityLock); + } else if ((type == kIOPMActivityTickleTypeAdvisory) && ((stateNumber = fDeviceUsablePowerState) != kPowerStateZero)) @@ -2827,27 +2804,27 @@ bool IOService::activityTickle( unsigned long type, unsigned long stateNumber ) fAdvisoryTickled = true; - if (fAdvisoryTicklePowerState != stateNumber) - { - fAdvisoryTicklePowerState = stateNumber; - noPowerChange = false; + if (fAdvisoryTicklePowerState != stateNumber) + { + fAdvisoryTicklePowerState = stateNumber; + noPowerChange = false; tickleFlags = kTickleTypeAdvisory | kTickleTypePowerRise; - request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); - if (request) - { - request->fArg0 = (void *) stateNumber; - request->fArg1 = (void *)(uintptr_t) tickleFlags; + request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); + if (request) + { + request->fArg0 = (void *) stateNumber; + request->fArg1 = (void *)(uintptr_t) tickleFlags; request->fArg2 = (void *)(uintptr_t) gIOPMTickleGeneration; - submitPMRequest(request); - } - } + submitPMRequest(request); + } + } - IOLockUnlock(fActivityLock); + IOLockUnlock(fActivityLock); } - // Returns false if the activityTickle might cause a transition to a - // higher powered state, true otherwise. + // Returns false if the activityTickle might cause a transition to a + // higher powered state, true otherwise. return noPowerChange; } @@ -2858,12 +2835,12 @@ bool IOService::activityTickle( unsigned long type, unsigned long stateNumber ) void IOService::handleActivityTickle( IOPMRequest * request ) { - uint32_t ticklePowerState = (uint32_t)(uintptr_t) request->fArg0; + uint32_t ticklePowerState = (uint32_t)(uintptr_t) request->fArg0; uint32_t tickleFlags = (uint32_t)(uintptr_t) request->fArg1; uint32_t tickleGeneration = (uint32_t)(uintptr_t) request->fArg2; bool adjustPower = false; - - PM_ASSERT_IN_GATE(); + + PM_ASSERT_IN_GATE(); if (fResetPowerStateOnWake && (tickleGeneration != gIOPMTickleGeneration)) { // Drivers that don't want power restored on wake will drop any @@ -2879,6 +2856,7 @@ void IOService::handleActivityTickle( IOPMRequest * request ) if (tickleFlags & kTickleTypeActivity) { IOPMPowerStateIndex deviceDesireOrder = StateOrder(fDeviceDesire); + uint32_t idleTimerGeneration = ticklePowerState; // kTickleTypePowerDrop if (tickleFlags & kTickleTypePowerRise) { @@ -2890,7 +2868,8 @@ void IOService::handleActivityTickle( IOPMRequest * request ) adjustPower = true; } } - else if (deviceDesireOrder > StateOrder(fIdleTimerMinPowerState)) + else if ((deviceDesireOrder > StateOrder(fIdleTimerMinPowerState)) && + (idleTimerGeneration == fIdleTimerGeneration)) { // Power drop due to idle timer expiration. // Do not allow idle timer to reduce power below tickle power. @@ -2898,7 +2877,7 @@ void IOService::handleActivityTickle( IOPMRequest * request ) // to zero and cancelling the effect of a pre-sleep tickle when // system wakes up to doze state, while the device is unable to // raise its power state to satisfy the tickle. - + deviceDesireOrder--; if (deviceDesireOrder < fNumberOfPowerStates) { @@ -2935,10 +2914,10 @@ void IOService::handleActivityTickle( IOPMRequest * request ) } } - if (adjustPower) - { - adjustPowerState(); - } + if (adjustPower) + { + adjustPowerState(); + } } //****************************************************************************** @@ -2951,7 +2930,7 @@ void IOService::handleActivityTickle( IOPMRequest * request ) IOReturn IOService::setIdleTimerPeriod( unsigned long period ) { if (!initialized) - return IOPMNotYetInitialized; + return IOPMNotYetInitialized; OUR_PMLog(kPMLogSetIdleTimerPeriod, period, fIdleTimerPeriod); @@ -2969,7 +2948,7 @@ IOReturn IOService::setIdleTimerPeriod( unsigned long period ) IOReturn IOService::setIgnoreIdleTimer( bool ignore ) { if (!initialized) - return IOPMNotYetInitialized; + return IOPMNotYetInitialized; OUR_PMLog(kIOPMRequestTypeIgnoreIdleTimer, ignore, 0); @@ -2993,7 +2972,7 @@ IOReturn IOService::setIgnoreIdleTimer( bool ignore ) SInt32 IOService::nextIdleTimeout( AbsoluteTime currentTime, - AbsoluteTime lastActivity, + AbsoluteTime lastActivity, unsigned int powerState) { AbsoluteTime delta; @@ -3004,7 +2983,7 @@ SInt32 IOService::nextIdleTimeout( // Calculate time difference using funky macro from clock.h. delta = currentTime; SUB_ABSOLUTETIME(&delta, &lastActivity); - + // Figure it in seconds. absolutetime_to_nanoseconds(delta, &delta_ns); delta_secs = (SInt32)(delta_ns / NSEC_PER_SEC); @@ -3014,7 +2993,7 @@ SInt32 IOService::nextIdleTimeout( delay_secs = (int) fIdleTimerPeriod - delta_secs; else delay_secs = (int) fIdleTimerPeriod; - + return (SInt32)delay_secs; } @@ -3028,10 +3007,10 @@ void IOService::start_PM_idle_timer( void ) static const int minTimeout = 1; AbsoluteTime uptime, deadline; SInt32 idle_in = 0; - boolean_t pending; + boolean_t pending; - if (!initialized || !fIdleTimerPeriod) - return; + if (!initialized || !fIdleTimerPeriod) + return; IOLockLock(fActivityLock); @@ -3053,6 +3032,9 @@ void IOService::start_PM_idle_timer( void ) IOLockUnlock(fActivityLock); + fNextIdleTimerPeriod = idle_in; + fIdleTimerStartTime = uptime; + retain(); clock_interval_to_absolutetime_interval(idle_in, kSecondScale, &deadline); ADD_ABSOLUTETIME(&deadline, &uptime); @@ -3070,7 +3052,6 @@ void IOService::restartIdleTimer( void ) { fIdleTimerStopped = false; fActivityTickleCount = 0; - clock_get_uptime(&fIdleTimerStartTime); start_PM_idle_timer(); } else if (fHasAdvisoryDesire) @@ -3092,15 +3073,15 @@ static void idle_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 ) { - IOService * me = (IOService *) arg0; + IOService * me = (IOService *) arg0; - if (gIOPMWorkLoop) - gIOPMWorkLoop->runAction( + if (gIOPMWorkLoop) + gIOPMWorkLoop->runAction( OSMemberFunctionCast(IOWorkLoop::Action, me, &IOService::idleTimerExpired), me); - me->release(); + me->release(); } //********************************************************************************* @@ -3113,48 +3094,50 @@ idle_timer_expired( void IOService::idleTimerExpired( void ) { - IOPMRequest * request; - bool restartTimer = true; + IOPMRequest * request; + bool restartTimer = true; uint32_t tickleFlags; if ( !initialized || !fIdleTimerPeriod || fIdleTimerStopped || fLockedFlags.PMStop ) return; - IOLockLock(fActivityLock); + fIdleTimerStartTime = 0; - // Check for device activity (tickles) over last timer period. + IOLockLock(fActivityLock); - if (fDeviceWasActive) - { - // Device was active - do not drop power, restart timer. - fDeviceWasActive = false; - } - else if (!fIdleTimerIgnored) - { - // No device activity - drop power state by one level. - // Decrement the cached tickle power state when possible. - // This value may be kInvalidTicklePowerState before activityTickle() + // Check for device activity (tickles) over last timer period. + + if (fDeviceWasActive) + { + // Device was active - do not drop power, restart timer. + fDeviceWasActive = false; + } + else if (!fIdleTimerIgnored) + { + // No device activity - drop power state by one level. + // Decrement the cached tickle power state when possible. + // This value may be kInvalidTicklePowerState before activityTickle() // is called, but the power drop request must be issued regardless. - if ((fActivityTicklePowerState != kInvalidTicklePowerState) && + if ((fActivityTicklePowerState != kInvalidTicklePowerState) && (fActivityTicklePowerState != kPowerStateZero)) - fActivityTicklePowerState--; + fActivityTicklePowerState--; tickleFlags = kTickleTypeActivity | kTickleTypePowerDrop; - request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); - if (request) - { - request->fArg0 = (void *) kPowerStateZero; // irrelevant - request->fArg1 = (void *)(uintptr_t) tickleFlags; + request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); + if (request) + { + request->fArg0 = (void *)(uintptr_t) fIdleTimerGeneration; + request->fArg1 = (void *)(uintptr_t) tickleFlags; request->fArg2 = (void *)(uintptr_t) gIOPMTickleGeneration; - submitPMRequest( request ); + submitPMRequest( request ); - // Do not restart timer until after the tickle request has been - // processed. + // Do not restart timer until after the tickle request has been + // processed. - restartTimer = false; - } + restartTimer = false; + } } if (fAdvisoryTickled) @@ -3167,25 +3150,25 @@ void IOService::idleTimerExpired( void ) fAdvisoryTicklePowerState = kInvalidTicklePowerState; tickleFlags = kTickleTypeAdvisory | kTickleTypePowerDrop; - request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); - if (request) - { - request->fArg0 = (void *) kPowerStateZero; // irrelevant - request->fArg1 = (void *)(uintptr_t) tickleFlags; + request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); + if (request) + { + request->fArg0 = (void *)(uintptr_t) fIdleTimerGeneration; + request->fArg1 = (void *)(uintptr_t) tickleFlags; request->fArg2 = (void *)(uintptr_t) gIOPMTickleGeneration; - submitPMRequest( request ); + submitPMRequest( request ); - // Do not restart timer until after the tickle request has been - // processed. + // Do not restart timer until after the tickle request has been + // processed. - restartTimer = false; - } + restartTimer = false; + } } - IOLockUnlock(fActivityLock); + IOLockUnlock(fActivityLock); - if (restartTimer) - start_PM_idle_timer(); + if (restartTimer) + start_PM_idle_timer(); } #ifndef __LP64__ @@ -3230,7 +3213,7 @@ IOReturn IOService::getAggressiveness( unsigned long type, unsigned long * curre if (!rootDomain) return kIOReturnNotReady; - + return rootDomain->getAggressiveness( type, currentLevel ); } @@ -3257,10 +3240,10 @@ UInt32 IOService::getPowerState( void ) IOReturn IOService::systemWake( void ) { - OSIterator * iter; - OSObject * next; - IOPowerConnection * connection; - IOService * theChild; + OSIterator * iter; + OSObject * next; + IOPowerConnection * connection; + IOService * theChild; iter = getChildIterator(gIOPowerPlane); if ( iter ) @@ -3269,17 +3252,17 @@ IOReturn IOService::systemWake( void ) { if ( (connection = OSDynamicCast(IOPowerConnection, next)) ) { - if (connection->getReadyFlag() == false) - { - PM_LOG3("[%s] %s: connection not ready\n", - getName(), __FUNCTION__); - continue; - } + if (connection->getReadyFlag() == false) + { + PM_LOG3("[%s] %s: connection not ready\n", + getName(), __FUNCTION__); + continue; + } theChild = (IOService *)connection->copyChildEntry(gIOPowerPlane); if ( theChild ) { - theChild->systemWake(); + theChild->systemWake(); theChild->release(); } } @@ -3304,9 +3287,9 @@ IOReturn IOService::systemWake( void ) IOReturn IOService::temperatureCriticalForZone( IOService * whichZone ) { - IOService * theParent; - IOService * theNub; - + IOService * theParent; + IOService * theNub; + OUR_PMLog(kPMLogCriticalTemp, 0, 0); if ( inPlane(gIOPowerPlane) && !IS_PM_ROOT ) @@ -3343,8 +3326,8 @@ IOReturn IOService::startPowerChange( IOPowerConnection * parentConnection, IOPMPowerFlags parentFlags ) { - PM_ASSERT_IN_GATE(); - assert( fMachineState == kIOPM_Finished ); + PM_ASSERT_IN_GATE(); + assert( fMachineState == kIOPM_Finished ); assert( powerState < fNumberOfPowerStates ); if (powerState >= fNumberOfPowerStates) @@ -3361,7 +3344,7 @@ IOReturn IOService::startPowerChange( // Reset device desire down to the clamped power state updatePowerClient(gIOPMPowerClientDevice, powerState); computeDesiredState(kPowerStateZero, true); - + // Invalidate tickle cache so the next tickle will issue a request IOLockLock(fActivityLock); fDeviceWasActive = false; @@ -3378,29 +3361,29 @@ IOReturn IOService::startPowerChange( if (changeFlags & kIOPMNotDone) return IOPMAckImplied; - // Forks to either Driver or Parent initiated power change paths. + // Forks to either Driver or Parent initiated power change paths. fHeadNoteChangeFlags = changeFlags; fHeadNotePowerState = powerState; - fHeadNotePowerArrayEntry = &fPowerStates[ powerState ]; - fHeadNoteParentConnection = NULL; + fHeadNotePowerArrayEntry = &fPowerStates[ powerState ]; + fHeadNoteParentConnection = NULL; - if (changeFlags & kIOPMSelfInitiated) - { + if (changeFlags & kIOPMSelfInitiated) + { if (changeFlags & kIOPMSynchronize) OurSyncStart(); else OurChangeStart(); - return 0; - } - else - { - assert(changeFlags & kIOPMParentInitiated); + return 0; + } + else + { + assert(changeFlags & kIOPMParentInitiated); fHeadNoteDomainFlags = domainFlags; - fHeadNoteParentFlags = parentFlags; + fHeadNoteParentFlags = parentFlags; fHeadNoteParentConnection = parentConnection; - return ParentChangeStart(); - } + return ParentChangeStart(); + } } //********************************************************************************* @@ -3409,10 +3392,10 @@ IOReturn IOService::startPowerChange( bool IOService::notifyInterestedDrivers( void ) { - IOPMinformee * informee; - IOPMinformeeList * list = fInterestedDrivers; - DriverCallParam * param; - IOItemCount count; + IOPMinformee * informee; + IOPMinformeeList * list = fInterestedDrivers; + DriverCallParam * param; + IOItemCount count; PM_ASSERT_IN_GATE(); assert( fDriverCallParamCount == 0 ); @@ -3422,7 +3405,7 @@ bool IOService::notifyInterestedDrivers( void ) count = list->numberOfItems(); if (!count) - goto done; // no interested drivers + goto done; // no interested drivers // Allocate an array of interested drivers and their return values // for the callout thread. Everything else is still "owned" by the @@ -3442,7 +3425,7 @@ bool IOService::notifyInterestedDrivers( void ) param = IONew(DriverCallParam, count); if (!param) - goto done; // no memory + goto done; // no memory fDriverCallParamPtr = (void *) param; fDriverCallParamSlots = count; @@ -3479,39 +3462,39 @@ done: void IOService::notifyInterestedDriversDone( void ) { - IOPMinformee * informee; - IOItemCount count; + IOPMinformee * informee; + IOItemCount count; DriverCallParam * param; IOReturn result; - PM_ASSERT_IN_GATE(); - assert( fDriverCallBusy == false ); - assert( fMachineState == kIOPM_DriverThreadCallDone ); + PM_ASSERT_IN_GATE(); + assert( fDriverCallBusy == false ); + assert( fMachineState == kIOPM_DriverThreadCallDone ); - param = (DriverCallParam *) fDriverCallParamPtr; - count = fDriverCallParamCount; + param = (DriverCallParam *) fDriverCallParamPtr; + count = fDriverCallParamCount; - if (param && count) - { - for (IOItemCount i = 0; i < count; i++, param++) - { - informee = (IOPMinformee *) param->Target; - result = param->Result; + if (param && count) + { + for (IOItemCount i = 0; i < count; i++, param++) + { + informee = (IOPMinformee *) param->Target; + result = param->Result; - if ((result == IOPMAckImplied) || (result < 0)) - { - // Interested driver return IOPMAckImplied. + if ((result == IOPMAckImplied) || (result < 0)) + { + // Interested driver return IOPMAckImplied. // If informee timer is zero, it must have de-registered // interest during the thread callout. That also drops // the pending ack count. - if (fHeadNotePendingAcks && informee->timer) + if (fHeadNotePendingAcks && informee->timer) fHeadNotePendingAcks--; informee->timer = 0; - } - else if (informee->timer) - { + } + else if (informee->timer) + { assert(informee->timer == -1); // Driver has not acked, and has returned a positive result. @@ -3525,23 +3508,23 @@ void IOService::notifyInterestedDriversDone( void ) result = kMinAckTimeoutTicks; informee->timer = (result / (ACK_TIMER_PERIOD / ns_per_us)) + 1; - } - // else, child has already acked or driver has removed interest, + } + // else, child has already acked or driver has removed interest, // and head_note_pendingAcks decremented. - // informee may have been removed from the interested drivers list, + // informee may have been removed from the interested drivers list, // thus the informee must be retained across the callout. - informee->release(); - } + informee->release(); + } - fDriverCallParamCount = 0; + fDriverCallParamCount = 0; - if ( fHeadNotePendingAcks ) - { - OUR_PMLog(kPMLogStartAckTimer, 0, 0); - start_ack_timer(); - } - } + if ( fHeadNotePendingAcks ) + { + OUR_PMLog(kPMLogStartAckTimer, 0, 0); + start_ack_timer(); + } + } MS_POP(); // pop the machine state passed to notifyAll() @@ -3566,7 +3549,7 @@ void IOService::notifyInterestedDriversDone( void ) void IOService::notifyRootDomain( void ) { - assert( fDriverCallBusy == false ); + assert( fDriverCallBusy == false ); // Only for root domain in the will-change phase if (!IS_ROOT_DOMAIN || (fMachineState != kIOPM_OurChangeSetPowerState)) @@ -3587,8 +3570,8 @@ void IOService::notifyRootDomain( void ) void IOService::notifyRootDomainDone( void ) { - assert( fDriverCallBusy == false ); - assert( fMachineState == kIOPM_DriverThreadCallDone ); + assert( fDriverCallBusy == false ); + assert( fMachineState == kIOPM_DriverThreadCallDone ); MS_POP(); // pop notifyAll() machine state notifyChildren(); @@ -3600,13 +3583,13 @@ void IOService::notifyRootDomainDone( void ) void IOService::notifyChildren( void ) { - OSIterator * iter; - OSObject * next; - IOPowerConnection * connection; - OSArray * children = 0; + OSIterator * iter; + OSObject * next; + IOPowerConnection * connection; + OSArray * children = 0; IOPMrootDomain * rootDomain; bool delayNotify = false; - + if ((fHeadNotePowerState != fCurrentPowerState) && (IS_POWER_DROP == fIsPreChange) && ((rootDomain = getPMRootDomain()) == this)) @@ -3616,8 +3599,8 @@ void IOService::notifyChildren( void ) kIOPMTracePointWakePowerPlaneDrivers ); } - if (fStrictTreeOrder) - children = OSArray::withCapacity(8); + if (fStrictTreeOrder) + children = OSArray::withCapacity(8); // Sum child power consumption in notifyChild() fHeadNotePowerArrayEntry->staticPower = 0; @@ -3629,12 +3612,12 @@ void IOService::notifyChildren( void ) { if ((connection = OSDynamicCast(IOPowerConnection, next))) { - if (connection->getReadyFlag() == false) - { - PM_LOG3("[%s] %s: connection not ready\n", - getName(), __FUNCTION__); - continue; - } + if (connection->getReadyFlag() == false) + { + PM_LOG3("[%s] %s: connection not ready\n", + getName(), __FUNCTION__); + continue; + } // Mechanism to postpone the did-change notification to // certain power children to order those children last. @@ -3657,11 +3640,11 @@ void IOService::notifyChildren( void ) } } - if (!delayNotify && children) - children->setObject( connection ); - else - notifyChild( connection ); - } + if (!delayNotify && children) + children->setObject( connection ); + else + notifyChild( connection ); + } } iter->release(); } @@ -3671,10 +3654,10 @@ void IOService::notifyChildren( void ) children->release(); children = 0; } - if (children) - { + if (children) + { assert(fNotifyChildArray == 0); - fNotifyChildArray = children; + fNotifyChildArray = children; MS_PUSH(fMachineState); if (delayNotify) @@ -3698,7 +3681,7 @@ void IOService::notifyChildren( void ) fMachineState = kIOPM_NotifyChildrenOrdered; } - } + } } //********************************************************************************* @@ -3707,33 +3690,33 @@ void IOService::notifyChildren( void ) void IOService::notifyChildrenOrdered( void ) { - PM_ASSERT_IN_GATE(); - assert(fNotifyChildArray); - assert(fMachineState == kIOPM_NotifyChildrenOrdered); - - // Notify one child, wait for it to ack, then repeat for next child. - // This is a workaround for some drivers with multiple instances at - // the same branch in the power tree, but the driver is slow to power - // up unless the tree ordering is observed. Problem observed only on - // system wake, not on system sleep. - // - // We have the ability to power off in reverse child index order. - // That works nicely on some machines, but not on all HW configs. - - if (fNotifyChildArray->getCount()) - { - IOPowerConnection * connection; - connection = (IOPowerConnection *) fNotifyChildArray->getObject(0); - notifyChild( connection ); - fNotifyChildArray->removeObject(0); - } - else - { - fNotifyChildArray->release(); - fNotifyChildArray = 0; + PM_ASSERT_IN_GATE(); + assert(fNotifyChildArray); + assert(fMachineState == kIOPM_NotifyChildrenOrdered); + + // Notify one child, wait for it to ack, then repeat for next child. + // This is a workaround for some drivers with multiple instances at + // the same branch in the power tree, but the driver is slow to power + // up unless the tree ordering is observed. Problem observed only on + // system wake, not on system sleep. + // + // We have the ability to power off in reverse child index order. + // That works nicely on some machines, but not on all HW configs. + + if (fNotifyChildArray->getCount()) + { + IOPowerConnection * connection; + connection = (IOPowerConnection *) fNotifyChildArray->getObject(0); + notifyChild( connection ); + fNotifyChildArray->removeObject(0); + } + else + { + fNotifyChildArray->release(); + fNotifyChildArray = 0; MS_POP(); // pushed by notifyChildren() - } + } } //********************************************************************************* @@ -3742,11 +3725,11 @@ void IOService::notifyChildrenOrdered( void ) void IOService::notifyChildrenDelayed( void ) { - IOPowerConnection * connection; + IOPowerConnection * connection; - PM_ASSERT_IN_GATE(); - assert(fNotifyChildArray); - assert(fMachineState == kIOPM_NotifyChildrenDelayed); + PM_ASSERT_IN_GATE(); + assert(fNotifyChildArray); + assert(fMachineState == kIOPM_NotifyChildrenDelayed); // Wait after all non-delayed children and interested drivers have ack'ed, // then notify all delayed children. If notify delay is canceled, child @@ -3755,17 +3738,17 @@ void IOService::notifyChildrenDelayed( void ) for (int i = 0; ; i++) { - connection = (IOPowerConnection *) fNotifyChildArray->getObject(i); + connection = (IOPowerConnection *) fNotifyChildArray->getObject(i); if (!connection) break; - notifyChild( connection ); + notifyChild( connection ); } PM_LOG2("%s: notified delayed children\n", getName()); fNotifyChildArray->release(); fNotifyChildArray = 0; - + MS_POP(); // pushed by notifyChildren() } @@ -3775,18 +3758,18 @@ void IOService::notifyChildrenDelayed( void ) IOReturn IOService::notifyAll( uint32_t nextMS ) { - // Save the machine state to be restored by notifyInterestedDriversDone() + // Save the machine state to be restored by notifyInterestedDriversDone() - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); MS_PUSH(nextMS); - fMachineState = kIOPM_DriverThreadCallDone; - fDriverCallReason = fIsPreChange ? - kDriverCallInformPreChange : kDriverCallInformPostChange; + fMachineState = kIOPM_DriverThreadCallDone; + fDriverCallReason = fIsPreChange ? + kDriverCallInformPreChange : kDriverCallInformPostChange; - if (!notifyInterestedDrivers()) - notifyInterestedDriversDone(); + if (!notifyInterestedDrivers()) + notifyInterestedDriversDone(); - return IOPMWillAckLater; + return IOPMWillAckLater; } //********************************************************************************* @@ -3796,9 +3779,9 @@ IOReturn IOService::notifyAll( uint32_t nextMS ) //********************************************************************************* IOReturn IOService::actionDriverCalloutDone( - OSObject * target, - void * arg0, void * arg1, - void * arg2, void * arg3 ) + OSObject * target, + void * arg0, void * arg1, + void * arg2, void * arg3 ) { IOServicePM * pwrMgt = (IOServicePM *) arg0; @@ -3813,30 +3796,30 @@ IOReturn IOService::actionDriverCalloutDone( void IOService::pmDriverCallout( IOService * from ) { - assert(from); - switch (from->fDriverCallReason) - { - case kDriverCallSetPowerState: - from->driverSetPowerState(); - break; + assert(from); + switch (from->fDriverCallReason) + { + case kDriverCallSetPowerState: + from->driverSetPowerState(); + break; - case kDriverCallInformPreChange: - case kDriverCallInformPostChange: - from->driverInformPowerChange(); - break; + case kDriverCallInformPreChange: + case kDriverCallInformPostChange: + from->driverInformPowerChange(); + break; case kRootDomainInformPreChange: getPMRootDomain()->willNotifyPowerChildren(from->fHeadNotePowerState); break; - default: - panic("IOService::pmDriverCallout bad machine state %x", + default: + panic("IOService::pmDriverCallout bad machine state %x", from->fDriverCallReason); - } + } - gIOPMWorkLoop->runAction(actionDriverCalloutDone, - /* target */ from, - /* arg0 */ (void *) from->pwrMgt ); + gIOPMWorkLoop->runAction(actionDriverCalloutDone, + /* target */ from, + /* arg0 */ (void *) from->pwrMgt ); } //********************************************************************************* @@ -3848,7 +3831,7 @@ void IOService::pmDriverCallout( IOService * from ) void IOService::driverSetPowerState( void ) { IOPMPowerStateIndex powerState; - DriverCallParam * param; + DriverCallParam * param; IOPMDriverCallEntry callEntry; AbsoluteTime end; IOReturn result; @@ -3871,10 +3854,17 @@ void IOService::driverSetPowerState( void ) deassertPMDriverCall(&callEntry); + // Record the most recent max power state residency timings. + // Use with DeviceActiveTimestamp to diagnose tickle issues. + if (powerState == fHighestPowerState) + fMaxPowerStateEntryTime = end; + else if (oldPowerState == fHighestPowerState) + fMaxPowerStateExitTime = end; + if (result < 0) { PM_LOG("%s::setPowerState(%p, %lu -> %lu) returned 0x%x\n", - fName, this, fCurrentPowerState, powerState, result); + fName, OBFUSCATE(this), fCurrentPowerState, powerState, result); } #if LOG_SETPOWER_TIMES @@ -3884,21 +3874,11 @@ void IOService::driverSetPowerState( void ) SUB_ABSOLUTETIME(&end, &fDriverCallStartTime); absolutetime_to_nanoseconds(end, &nsec); - if (nsec > LOG_SETPOWER_TIMES) - PM_LOG("%s::setPowerState(%p, %lu -> %lu) took %d ms\n", - fName, this, fCurrentPowerState, powerState, NS_TO_MS(nsec)); - - PMEventDetails *details = PMEventDetails::eventDetails( - kIOPMEventTypeSetPowerStateImmediate, // type - fName, // who - (uintptr_t)this, // owner unique - NULL, // interest name - (uint8_t)oldPowerState, // old - (uint8_t)powerState, // new - 0, // result - NS_TO_US(nsec)); // usec completion time - - getPMRootDomain()->recordAndReleasePMEvent( details ); + if (nsec > LOG_SETPOWER_TIMES) { + getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsDriverPSChangeSlow, + fName, kDriverCallSetPowerState, NS_TO_MS(nsec), 0, NULL, powerState); + } } #endif } @@ -3916,15 +3896,15 @@ void IOService::driverSetPowerState( void ) void IOService::driverInformPowerChange( void ) { - IOPMinformee * informee; - IOService * driver; - DriverCallParam * param; + IOPMinformee * informee; + IOService * driver; + DriverCallParam * param; IOPMDriverCallEntry callEntry; - IOPMPowerFlags powerFlags; + IOPMPowerFlags powerFlags; IOPMPowerStateIndex powerState; AbsoluteTime end; IOReturn result; - IOItemCount count; + IOItemCount count; assert( fDriverCallBusy ); assert( fDriverCallParamPtr ); @@ -3959,7 +3939,7 @@ void IOService::driverInformPowerChange( void ) clock_get_uptime(&end); OUR_PMLog((UInt32)-kPMLogInformDriverPostChange, (uintptr_t) this, result); } - + deassertPMDriverCall(&callEntry); #if LOG_SETPOWER_TIMES @@ -3969,27 +3949,11 @@ void IOService::driverInformPowerChange( void ) SUB_ABSOLUTETIME(&end, &informee->startTime); absolutetime_to_nanoseconds(end, &nsec); - if (nsec > LOG_SETPOWER_TIMES) - PM_LOG("%s::powerState%sChangeTo(%p, %s, %lu -> %lu) took %d ms\n", - driver->getName(), - (fDriverCallReason == kDriverCallInformPreChange) ? "Will" : "Did", - driver, fName, fCurrentPowerState, powerState, NS_TO_MS(nsec)); - - uint16_t logType = (fDriverCallReason == kDriverCallInformPreChange) - ? kIOPMEventTypePSWillChangeTo - : kIOPMEventTypePSDidChangeTo; - - PMEventDetails *details = PMEventDetails::eventDetails( - logType, // type - fName, // who - (uintptr_t)this, // owner unique - driver->getName(), // interest name - (uint8_t)fCurrentPowerState, // old - (uint8_t)fHeadNotePowerState, // new - 0, // result - NS_TO_US(nsec)); // usec completion time - - getPMRootDomain()->recordAndReleasePMEvent( details ); + if (nsec > LOG_SETPOWER_TIMES) { + getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsDriverPSChangeSlow, driver->getName(), + fDriverCallReason, NS_TO_MS(nsec), 0, NULL, powerState); + } } #endif } @@ -4013,11 +3977,11 @@ bool IOService::notifyChild( IOPowerConnection * theNub ) IOReturn ret = IOPMAckImplied; unsigned long childPower; IOService * theChild; - IOPMRequest * childRequest; + IOPMRequest * childRequest; IOPMPowerChangeFlags requestArg2; - int requestType; + int requestType; - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); theChild = (IOService *)(theNub->copyChildEntry(gIOPowerPlane)); if (!theChild) { @@ -4026,7 +3990,7 @@ bool IOService::notifyChild( IOPowerConnection * theNub ) // Unless the child handles the notification immediately and returns // kIOPMAckImplied, we'll be awaiting their acknowledgement later. - fHeadNotePendingAcks++; + fHeadNotePendingAcks++; theNub->setAwaitingAck(true); requestArg2 = fHeadNoteChangeFlags; @@ -4037,21 +4001,21 @@ bool IOService::notifyChild( IOPowerConnection * theNub ) kIOPMRequestTypePowerDomainWillChange : kIOPMRequestTypePowerDomainDidChange; - childRequest = acquirePMRequest( theChild, requestType ); - if (childRequest) - { + childRequest = acquirePMRequest( theChild, requestType ); + if (childRequest) + { theNub->retain(); - childRequest->fArg0 = (void *) fHeadNotePowerArrayEntry->outputPowerFlags; - childRequest->fArg1 = (void *) theNub; - childRequest->fArg2 = (void *)(uintptr_t) requestArg2; - theChild->submitPMRequest( childRequest ); - ret = IOPMWillAckLater; - } - else - { - ret = IOPMAckImplied; - fHeadNotePendingAcks--; - theNub->setAwaitingAck(false); + childRequest->fArg0 = (void *) fHeadNotePowerArrayEntry->outputPowerFlags; + childRequest->fArg1 = (void *) theNub; + childRequest->fArg2 = (void *)(uintptr_t) requestArg2; + theChild->submitPMRequest( childRequest ); + ret = IOPMWillAckLater; + } + else + { + ret = IOPMAckImplied; + fHeadNotePendingAcks--; + theNub->setAwaitingAck(false); childPower = theChild->currentPowerConsumption(); if ( childPower == kIOPMUnknown ) { @@ -4063,7 +4027,7 @@ bool IOService::notifyChild( IOPowerConnection * theNub ) } theChild->release(); - return (IOPMAckImplied == ret); + return (IOPMAckImplied == ret); } //********************************************************************************* @@ -4072,7 +4036,7 @@ bool IOService::notifyChild( IOPowerConnection * theNub ) bool IOService::notifyControllingDriver( void ) { - DriverCallParam * param; + DriverCallParam * param; PM_ASSERT_IN_GATE(); assert( fDriverCallParamCount == 0 ); @@ -4095,7 +4059,7 @@ bool IOService::notifyControllingDriver( void ) { param = IONew(DriverCallParam, 1); if (!param) - return false; // no memory + return false; // no memory fDriverCallParamPtr = (void *) param; fDriverCallParamSlots = 1; @@ -4119,28 +4083,28 @@ bool IOService::notifyControllingDriver( void ) void IOService::notifyControllingDriverDone( void ) { - DriverCallParam * param; - IOReturn result; + DriverCallParam * param; + IOReturn result; + + PM_ASSERT_IN_GATE(); + param = (DriverCallParam *) fDriverCallParamPtr; - PM_ASSERT_IN_GATE(); - param = (DriverCallParam *) fDriverCallParamPtr; + assert( fDriverCallBusy == false ); + assert( fMachineState == kIOPM_DriverThreadCallDone ); - assert( fDriverCallBusy == false ); - assert( fMachineState == kIOPM_DriverThreadCallDone ); + if (param && fDriverCallParamCount) + { + assert(fDriverCallParamCount == 1); - if (param && fDriverCallParamCount) - { - assert(fDriverCallParamCount == 1); - - // the return value from setPowerState() - result = param->Result; + // the return value from setPowerState() + result = param->Result; - if ((result == IOPMAckImplied) || (result < 0)) - { + if ((result == IOPMAckImplied) || (result < 0)) + { fDriverTimer = 0; - } - else if (fDriverTimer) - { + } + else if (fDriverTimer) + { assert(fDriverTimer == -1); // Driver has not acked, and has returned a positive result. @@ -4154,17 +4118,17 @@ void IOService::notifyControllingDriverDone( void ) result = kMinAckTimeoutTicks; fDriverTimer = (result / (ACK_TIMER_PERIOD / ns_per_us)) + 1; - } - // else, child has already acked and driver_timer reset to 0. + } + // else, child has already acked and driver_timer reset to 0. - fDriverCallParamCount = 0; + fDriverCallParamCount = 0; - if ( fDriverTimer ) - { - OUR_PMLog(kPMLogStartAckTimer, 0, 0); - start_ack_timer(); - } - } + if ( fDriverTimer ) + { + OUR_PMLog(kPMLogStartAckTimer, 0, 0); + start_ack_timer(); + } + } MS_POP(); // pushed by OurChangeSetPowerState() fIsPreChange = false; @@ -4236,7 +4200,7 @@ void IOService::all_done( void ) trackSystemSleepPreventers( fCurrentPowerState, fHeadNotePowerState, fHeadNoteChangeFlags); - // we changed, tell our parent + // we changed, tell our parent requestDomainPower(fHeadNotePowerState); // yes, did power raise? @@ -4293,10 +4257,10 @@ void IOService::all_done( void ) { if (fHeadNoteChangeFlags & kIOPMRootChangeDown) ParentChangeRootChangeDown(); - + if (((fHeadNoteChangeFlags & kIOPMDomainWillChange) && (StateOrder(fCurrentPowerState) >= StateOrder(fHeadNotePowerState))) || - ((fHeadNoteChangeFlags & kIOPMDomainDidChange) && + ((fHeadNoteChangeFlags & kIOPMDomainDidChange) && (StateOrder(fCurrentPowerState) < StateOrder(fHeadNotePowerState)))) { trackSystemSleepPreventers( @@ -4368,12 +4332,12 @@ void IOService::all_done( void ) void IOService::OurChangeStart( void ) { - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); OUR_PMLog( kPMLogStartDeviceChange, fHeadNotePowerState, fCurrentPowerState ); - // fMaxPowerState is our maximum possible power state based on the current - // power state of our parents. If we are trying to raise power beyond the - // maximum, send an async request for more power to all parents. + // fMaxPowerState is our maximum possible power state based on the current + // power state of our parents. If we are trying to raise power beyond the + // maximum, send an async request for more power to all parents. if (!IS_PM_ROOT && (StateOrder(fMaxPowerState) < StateOrder(fHeadNotePowerState))) { @@ -4383,12 +4347,12 @@ void IOService::OurChangeStart( void ) return; } - // Redundant power changes skips to the end of the state machine. + // Redundant power changes skips to the end of the state machine. if (!fInitialPowerChange && (fHeadNotePowerState == fCurrentPowerState)) - { - OurChangeFinish(); - return; + { + OurChangeFinish(); + return; } fInitialPowerChange = false; @@ -4397,8 +4361,8 @@ void IOService::OurChangeStart( void ) PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags); - // Two separate paths, depending if power is being raised or lowered. - // Lowering power is subject to approval by clients of this service. + // Two separate paths, depending if power is being raised or lowered. + // Lowering power is subject to approval by clients of this service. if (IS_POWER_DROP) { @@ -4409,8 +4373,8 @@ void IOService::OurChangeStart( void ) fOutOfBandParameter = kNotifyApps; askChangeDown(fHeadNotePowerState); } - else - { + else + { // This service is raising power and parents are able to support the // new power state. However a parent may have already committed to // drop power, which might force this object to temporarily drop power. @@ -4495,7 +4459,7 @@ IOReturn IOService::requestDomainPower( IOPMPowerStateIndex maxPowerState; IOPMRequestDomainPowerContext context; - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); assert(ourPowerState < fNumberOfPowerStates); if (ourPowerState >= fNumberOfPowerStates) return kIOReturnBadArgument; @@ -4505,7 +4469,7 @@ IOReturn IOService::requestDomainPower( // Fetch our input power flags for the requested power state. // Parent request is stated in terms of required power flags. - requestPowerFlags = fPowerStates[ourPowerState].inputPowerFlags; + requestPowerFlags = fPowerStates[ourPowerState].inputPowerFlags; // Disregard the "previous request" for power reservation. @@ -4548,7 +4512,7 @@ done: void IOService::OurSyncStart( void ) { - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); if (fInitialPowerChange) return; @@ -4557,8 +4521,8 @@ void IOService::OurSyncStart( void ) if (fHeadNoteChangeFlags & kIOPMNotDone) { - OurChangeFinish(); - return; + OurChangeFinish(); + return; } if (fHeadNoteChangeFlags & kIOPMSyncTellPowerDown) @@ -4581,7 +4545,7 @@ void IOService::OurSyncStart( void ) // [private] OurChangeTellClientsPowerDown // // All applications and kernel clients have acknowledged our permission to drop -// power. Here we notify them that we will lower the power and wait for acks. +// power. Here we notify them that we will lower the power and wait for acks. //********************************************************************************* void IOService::OurChangeTellClientsPowerDown( void ) @@ -4653,13 +4617,6 @@ void IOService::OurChangeNotifyInterestedDriversWillChange( void ) if (IS_POWER_DROP) { rootDomain->tracePoint( kIOPMTracePointSleepWillChangeInterests ); - - PMEventDetails *details = PMEventDetails::eventDetails( - kIOPMEventTypeAppNotificationsFinished, - NULL, - 100, - kIOReturnSuccess); - rootDomain->recordAndReleasePMEvent( details ); } else rootDomain->tracePoint( kIOPMTracePointWakeWillChangeInterests ); @@ -4758,7 +4715,7 @@ void IOService::OurChangeFinish( void ) IOReturn IOService::ParentChangeStart( void ) { - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); OUR_PMLog( kPMLogStartParentChange, fHeadNotePowerState, fCurrentPowerState ); // Root power domain has transitioned to its max power state @@ -4777,10 +4734,10 @@ IOReturn IOService::ParentChangeStart( void ) { PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags); - // Tell apps and kernel clients - fInitialPowerChange = false; + // Tell apps and kernel clients + fInitialPowerChange = false; fMachineState = kIOPM_ParentChangeTellPriorityClientsPowerDown; - tellChangeDown1(fHeadNotePowerState); + tellChangeDown1(fHeadNotePowerState); return IOPMWillAckLater; } @@ -4793,26 +4750,26 @@ IOReturn IOService::ParentChangeStart( void ) { // We power up, but not all the way fHeadNotePowerState = fDesiredPowerState; - fHeadNotePowerArrayEntry = &fPowerStates[fDesiredPowerState]; + fHeadNotePowerArrayEntry = &fPowerStates[fDesiredPowerState]; OUR_PMLog(kPMLogAmendParentChange, fHeadNotePowerState, 0); } } else { // We don't need to change fHeadNotePowerState = fCurrentPowerState; - fHeadNotePowerArrayEntry = &fPowerStates[fCurrentPowerState]; + fHeadNotePowerArrayEntry = &fPowerStates[fCurrentPowerState]; OUR_PMLog(kPMLogAmendParentChange, fHeadNotePowerState, 0); } } if ( fHeadNoteChangeFlags & kIOPMDomainDidChange ) - { + { if ( StateOrder(fHeadNotePowerState) > StateOrder(fCurrentPowerState) ) { PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags); // Parent did change up - start our change up - fInitialPowerChange = false; + fInitialPowerChange = false; ParentChangeTellCapabilityWillChange(); return IOPMWillAckLater; } @@ -4938,7 +4895,7 @@ void IOService::ParentChangeRootChangeDown( void ) void IOService::ParentChangeTellPriorityClientsPowerDown( void ) { fMachineState = kIOPM_ParentChangeNotifyInterestedDriversWillChange; - tellChangeDown2(fHeadNotePowerState); + tellChangeDown2(fHeadNotePowerState); } //********************************************************************************* @@ -4965,7 +4922,7 @@ void IOService::ParentChangeTellCapabilityWillChange( void ) void IOService::ParentChangeNotifyInterestedDriversWillChange( void ) { - notifyAll( kIOPM_ParentChangeSetPowerState ); + notifyAll( kIOPM_ParentChangeSetPowerState ); } //********************************************************************************* @@ -4994,7 +4951,7 @@ void IOService::ParentChangeSetPowerState( void ) void IOService::ParentChangeWaitForPowerSettle( void ) { - fMachineState = kIOPM_ParentChangeNotifyInterestedDriversDidChange; + fMachineState = kIOPM_ParentChangeNotifyInterestedDriversDidChange; startSettleTimer(); } @@ -5007,7 +4964,7 @@ void IOService::ParentChangeWaitForPowerSettle( void ) void IOService::ParentChangeNotifyInterestedDriversDidChange( void ) { - notifyAll( kIOPM_ParentChangeTellCapabilityDidChange ); + notifyAll( kIOPM_ParentChangeTellCapabilityDidChange ); } //********************************************************************************* @@ -5027,13 +4984,13 @@ void IOService::ParentChangeTellCapabilityDidChange( void ) //********************************************************************************* // [private] ParentAcknowledgePowerChange // -// Acknowledge our power parent that our power change is done. +// Acknowledge our power parent that our power change is done. //********************************************************************************* void IOService::ParentChangeAcknowledgePowerChange( void ) { - IORegistryEntry * nub; - IOService * parent; + IORegistryEntry * nub; + IOService * parent; nub = fHeadNoteParentConnection; nub->retain(); @@ -5059,8 +5016,8 @@ void IOService::ParentChangeAcknowledgePowerChange( void ) void IOService::settleTimerExpired( void ) { - fSettleTimeUS = 0; - gIOPMWorkQueue->signalWorkAvailable(); + fSettleTimeUS = 0; + gIOPMWorkQueue->signalWorkAvailable(); } //********************************************************************************* @@ -5072,15 +5029,15 @@ void IOService::settleTimerExpired( void ) static void settle_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 ) { - IOService * me = (IOService *) arg0; + IOService * me = (IOService *) arg0; - if (gIOPMWorkLoop && gIOPMWorkQueue) - { - gIOPMWorkLoop->runAction( + if (gIOPMWorkLoop && gIOPMWorkQueue) + { + gIOPMWorkLoop->runAction( OSMemberFunctionCast(IOWorkLoop::Action, me, &IOService::settleTimerExpired), me); - } - me->release(); + } + me->release(); } //********************************************************************************* @@ -5101,9 +5058,9 @@ void IOService::startSettleTimer( void ) IOPMPowerStateIndex stateIndex; IOPMPowerStateIndex currentOrder, newOrder, i; uint32_t settleTime = 0; - boolean_t pending; + boolean_t pending; - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); currentOrder = StateOrder(fCurrentPowerState); newOrder = StateOrder(fHeadNotePowerState); @@ -5160,16 +5117,16 @@ void IOService::startSettleTimer( void ) #ifndef __LP64__ void IOService::ack_timer_ticked ( void ) { - assert(false); + assert(false); } #endif /* !__LP64__ */ bool IOService::ackTimerTick( void ) { - IOPMinformee * nextObject; - bool done = false; + IOPMinformee * nextObject; + bool done = false; - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); switch (fMachineState) { case kIOPM_OurChangeWaitForPowerSettle: case kIOPM_ParentChangeWaitForPowerSettle: @@ -5187,30 +5144,16 @@ bool IOService::ackTimerTick( void ) PM_ERROR("%s::setPowerState(%p, %lu -> %lu) timed out after %d ms\n", fName, OBFUSCATE(this), fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); -#if LOG_SETPOWER_TIMES - PMEventDetails *details = PMEventDetails::eventDetails( - kIOPMEventTypeSetPowerStateDelayed, // type - fName, // who - (uintptr_t)this, // owner unique - NULL, // interest name - (uint8_t)getPowerState(), // old - 0, // new - kIOReturnTimeout, // result - NS_TO_US(nsec)); // usec completion time - - getPMRootDomain()->recordAndReleasePMEvent( details ); -#endif - if (gIOKitDebug & kIOLogDebugPower) { panic("%s::setPowerState(%p, %lu -> %lu) timed out after %d ms", fName, this, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); } else - { - // Unblock state machine and pretend driver has acked. - done = true; - } + { + // Unblock state machine and pretend driver has acked. + done = true; + } } else { // still waiting, set timer again start_ack_timer(); @@ -5242,24 +5185,6 @@ bool IOService::ackTimerTick( void ) OBFUSCATE(nextObject->whatObject), fName, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); -#if LOG_SETPOWER_TIMES - uint16_t logType = (fDriverCallReason == kDriverCallInformPreChange) - ? kIOPMEventTypePSWillChangeTo - : kIOPMEventTypePSDidChangeTo; - - PMEventDetails *details = PMEventDetails::eventDetails( - logType, // type - fName, // who - (uintptr_t)this, // owner unique - nextObject->whatObject->getName(), // interest name - (uint8_t)fCurrentPowerState, // old - (uint8_t)fHeadNotePowerState, // new - kIOReturnTimeout, // result - NS_TO_US(nsec)); // usec completion time - - getPMRootDomain()->recordAndReleasePMEvent( details ); -#endif - // Pretend driver has acked. fHeadNotePendingAcks--; } @@ -5271,7 +5196,7 @@ bool IOService::ackTimerTick( void ) if ( fHeadNotePendingAcks == 0 ) { // yes, we can continue - done = true; + done = true; } else { // no, set timer again start_ack_timer(); @@ -5290,19 +5215,19 @@ bool IOService::ackTimerTick( void ) case kIOPM_SyncTellPriorityClientsPowerDown: case kIOPM_SyncNotifyWillChange: case kIOPM_TellCapabilityChangeDone: - // apps didn't respond in time + // apps didn't respond in time cleanClientResponses(true); OUR_PMLog(kPMLogClientTardy, 0, 1); - // tardy equates to approval - done = true; + // tardy equates to approval + done = true; break; default: PM_LOG1("%s: unexpected ack timer tick (state = %d)\n", - getName(), fMachineState); + getName(), fMachineState); break; } - return done; + return done; } //********************************************************************************* @@ -5313,8 +5238,7 @@ void IOService::start_watchdog_timer( void ) AbsoluteTime deadline; boolean_t pending; - if (!fWatchdogTimer || (kIOSleepWakeWdogOff & gIOKitDebug) || - (getPMRootDomain()->sleepWakeDebugIsWdogEnabled() == false)) + if (!fWatchdogTimer || (kIOSleepWakeWdogOff & gIOKitDebug)) return; if (thread_call_isactive(fWatchdogTimer)) return; @@ -5336,8 +5260,7 @@ bool IOService::stop_watchdog_timer( void ) { boolean_t pending; - if (!fWatchdogTimer || (kIOSleepWakeWdogOff & gIOKitDebug) || - (getPMRootDomain()->sleepWakeDebugIsWdogEnabled() == false)) + if (!fWatchdogTimer || (kIOSleepWakeWdogOff & gIOKitDebug)) return false; pending = thread_call_cancel(fWatchdogTimer); @@ -5366,15 +5289,16 @@ void IOService::reset_watchdog_timer( void ) void IOService::watchdog_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 ) { - IOService * me = (IOService *) arg0; + IOService * me = (IOService *) arg0; gIOPMWatchDogThread = current_thread(); getPMRootDomain()->sleepWakeDebugTrig(true); gIOPMWatchDogThread = 0; - me->release(); + thread_call_free(me->fWatchdogTimer); + me->fWatchdogTimer = 0; - return ; + return ; } @@ -5384,7 +5308,7 @@ IOService::watchdog_timer_expired( thread_call_param_t arg0, thread_call_param_t void IOService::start_ack_timer( void ) { - start_ack_timer( ACK_TIMER_PERIOD, kNanosecondScale ); + start_ack_timer( ACK_TIMER_PERIOD, kNanosecondScale ); } void IOService::start_ack_timer ( UInt32 interval, UInt32 scale ) @@ -5455,13 +5379,13 @@ IOService::actionAckTimerExpired( void IOService::ack_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 ) { - IOService * me = (IOService *) arg0; + IOService * me = (IOService *) arg0; - if (gIOPMWorkLoop) - { - gIOPMWorkLoop->runAction(&actionAckTimerExpired, me); - } - me->release(); + if (gIOPMWorkLoop) + { + gIOPMWorkLoop->runAction(&actionAckTimerExpired, me); + } + me->release(); } // MARK: - @@ -5473,7 +5397,7 @@ IOService::ack_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 void IOService::tellSystemCapabilityChange( uint32_t nextMS ) { - MS_PUSH( nextMS ); + MS_PUSH( nextMS ); fMachineState = kIOPM_TellCapabilityChangeDone; fOutOfBandMessage = kIOMessageSystemCapabilityChange; @@ -5579,21 +5503,21 @@ static void logAppTimeouts( OSObject * object, void * arg ) (flag = context->responseArray->getObject(clientIndex)) && (flag != kOSBooleanTrue)) { - OSNumber *clientID = copyClientIDForNotification(object, context); - + OSNumber *clientID = copyClientIDForNotification(object, context); + name[0] = '\0'; if (clientID) { pid = clientID->unsigned32BitValue(); proc_name(pid, name, sizeof(name)); clientID->release(); } - + PM_ERROR(context->errorLog, pid, name); // TODO: record message type if possible IOService::getPMRootDomain()->pmStatsRecordApplicationResponse( gIOPMStatsApplicationResponseTimedOut, - name, 0, (30*1000), pid); + name, 0, (30*1000), pid, object); } } @@ -5666,7 +5590,7 @@ bool IOService::tellClientsWithResponse( int messageType ) IOPMInterestContext context; bool isRootDomain = IS_ROOT_DOMAIN; - PM_ASSERT_IN_GATE(); + PM_ASSERT_IN_GATE(); assert( fResponseArray == NULL ); assert( fNotifyClientArray == NULL ); @@ -5683,7 +5607,7 @@ bool IOService::tellClientsWithResponse( int messageType ) fResponseArray->setCapacityIncrement(8); if (++fSerialNumber == 0) - fSerialNumber++; + fSerialNumber++; context.responseArray = fResponseArray; context.notifyClients = 0; @@ -5706,7 +5630,7 @@ bool IOService::tellClientsWithResponse( int messageType ) switch ( fOutOfBandParameter ) { case kNotifyApps: applyToInterested( gIOAppPowerStateInterest, - pmTellAppWithResponse, (void *) &context ); + pmTellAppWithResponse, (void *) &context ); if (isRootDomain && (fMachineState != kIOPM_OurChangeTellClientsPowerDown) && @@ -5718,14 +5642,14 @@ bool IOService::tellClientsWithResponse( int messageType ) context.notifyType = kNotifyCapabilityChangeApps; context.messageType = kIOMessageSystemCapabilityChange; applyToInterested( gIOAppPowerStateInterest, - pmTellCapabilityAppWithResponse, (void *) &context ); + pmTellCapabilityAppWithResponse, (void *) &context ); context.notifyType = fOutOfBandParameter; context.messageType = messageType; } context.maxTimeRequested = k30Seconds; applyToInterested( gIOGeneralInterest, - pmTellClientWithResponse, (void *) &context ); + pmTellClientWithResponse, (void *) &context ); fNotifyClientArray = context.notifyClients; break; @@ -5733,7 +5657,7 @@ bool IOService::tellClientsWithResponse( int messageType ) case kNotifyPriority: context.enableTracing = isRootDomain; applyToInterested( gIOPriorityPowerStateInterest, - pmTellClientWithResponse, (void *) &context ); + pmTellClientWithResponse, (void *) &context ); if (isRootDomain) { @@ -5747,14 +5671,14 @@ bool IOService::tellClientsWithResponse( int messageType ) case kNotifyCapabilityChangeApps: applyToInterested( gIOAppPowerStateInterest, - pmTellCapabilityAppWithResponse, (void *) &context ); + pmTellCapabilityAppWithResponse, (void *) &context ); fNotifyClientArray = context.notifyClients; context.maxTimeRequested = k30Seconds; break; case kNotifyCapabilityChangePriority: applyToInterested( gIOPriorityPowerStateInterest, - pmTellCapabilityClientWithResponse, (void *) &context ); + pmTellCapabilityClientWithResponse, (void *) &context ); break; } @@ -5764,7 +5688,7 @@ bool IOService::tellClientsWithResponse( int messageType ) OUR_PMLog(kPMLogStartAckTimer, context.maxTimeRequested, 0); if (context.enableTracing) getPMRootDomain()->traceDetail( context.maxTimeRequested / 1000 ); - start_ack_timer( context.maxTimeRequested / 1000, kMillisecondScale ); + start_ack_timer( context.maxTimeRequested / 1000, kMillisecondScale ); return false; } @@ -5799,6 +5723,7 @@ void IOService::pmTellAppWithResponse( OSObject * object, void * arg ) OSNumber *clientID = NULL; proc_t proc = NULL; boolean_t proc_suspended = FALSE; + OSObject * waitForReply = kOSBooleanTrue; #if LOG_APP_RESPONSE_TIMES AbsoluteTime now; #endif @@ -5827,9 +5752,9 @@ void IOService::pmTellAppWithResponse( OSObject * object, void * arg ) } } } - + if (context->messageFilter && - !context->messageFilter(context->us, object, context, 0, 0)) + !context->messageFilter(context->us, object, context, 0, &waitForReply)) { if (kIOLogDebugPower & gIOKitDebug) { @@ -5853,18 +5778,29 @@ void IOService::pmTellAppWithResponse( OSObject * object, void * arg ) logClientIDForNotification(object, context, "MESG App"); } + if (waitForReply == kOSBooleanTrue) + { #if LOG_APP_RESPONSE_TIMES - OSNumber * num; - clock_get_uptime(&now); - num = OSNumber::withNumber(AbsoluteTime_to_scalar(&now), sizeof(uint64_t) * 8); - if (num) + OSNumber * num; + clock_get_uptime(&now); + num = OSNumber::withNumber(AbsoluteTime_to_scalar(&now), sizeof(uint64_t) * 8); + if (num) + { + context->responseArray->setObject(msgIndex, num); + num->release(); + } + else +#endif + context->responseArray->setObject(msgIndex, kOSBooleanFalse); + } + else { - context->responseArray->setObject(msgIndex, num); - num->release(); + context->responseArray->setObject(msgIndex, kOSBooleanTrue); + if (kIOLogDebugPower & gIOKitDebug) + { + logClientIDForNotification(object, context, "App response ignored"); + } } - else -#endif - context->responseArray->setObject(msgIndex, kOSBooleanFalse); if (context->notifyClients) context->notifyClients->setObject(msgIndex, object); @@ -5899,7 +5835,7 @@ void IOService::pmTellClientWithResponse( OSObject * object, void * arg ) context->us->getName(), getIOMessageString(context->messageType), OBFUSCATE(object), OBFUSCATE(n->handler)); - } + } return; } @@ -5910,13 +5846,13 @@ void IOService::pmTellClientWithResponse( OSObject * object, void * arg ) IOServicePM * pwrMgt = context->us->pwrMgt; if (gIOKitDebug & kIOLogPower) { - OUR_PMLog(kPMLogClientNotify, msgRef, msgType); - if (OSDynamicCast(IOService, object)) { - const char *who = ((IOService *) object)->getName(); - gPlatform->PMLog(who, kPMLogClientNotify, (uintptr_t) object, 0); - } + OUR_PMLog(kPMLogClientNotify, msgRef, msgType); + if (OSDynamicCast(IOService, object)) { + const char *who = ((IOService *) object)->getName(); + gPlatform->PMLog(who, kPMLogClientNotify, (uintptr_t) object, 0); + } else if (notifier) { - OUR_PMLog(kPMLogClientNotify, (uintptr_t) notifier->handler, 0); + OUR_PMLog(kPMLogClientNotify, (uintptr_t) notifier->handler, 0); } } if ((kIOLogDebugPower & gIOKitDebug) && notifier) @@ -5945,7 +5881,7 @@ void IOService::pmTellClientWithResponse( OSObject * object, void * arg ) if (kIOReturnSuccess == retCode) { if (0 == notify.returnValue) { - OUR_PMLog(kPMLogClientAcknowledge, msgRef, (uintptr_t) object); + OUR_PMLog(kPMLogClientAcknowledge, msgRef, (uintptr_t) object); } else { replied = kOSBooleanFalse; if ( notify.returnValue > context->maxTimeRequested ) @@ -5966,7 +5902,7 @@ void IOService::pmTellClientWithResponse( OSObject * object, void * arg ) } else { // not a client of ours // so we won't be waiting for response - OUR_PMLog(kPMLogClientAcknowledge, msgRef, 0); + OUR_PMLog(kPMLogClientAcknowledge, msgRef, 0); } context->responseArray->setObject(msgIndex, replied); @@ -6016,7 +5952,7 @@ void IOService::pmTellCapabilityAppWithResponse( OSObject * object, void * arg ) if (clientID) { clientIDString = IOCopyLogNameForPID(clientID->unsigned32BitValue()); } - + PM_LOG("%s MESG App(%u) %s, wait %u, %s\n", context->us->getName(), msgIndex, getIOMessageString(msgType), @@ -6084,7 +6020,7 @@ void IOService::pmTellCapabilityClientWithResponse( context->us->getName(), getIOMessageString(context->messageType), OBFUSCATE(object), OBFUSCATE(n->handler)); - } + } return; } @@ -6095,14 +6031,14 @@ void IOService::pmTellCapabilityClientWithResponse( IOServicePM * pwrMgt = context->us->pwrMgt; if (gIOKitDebug & kIOLogPower) { - OUR_PMLog(kPMLogClientNotify, msgRef, msgType); - if (OSDynamicCast(IOService, object)) { - const char *who = ((IOService *) object)->getName(); - gPlatform->PMLog(who, kPMLogClientNotify, (uintptr_t) object, 0); - } + OUR_PMLog(kPMLogClientNotify, msgRef, msgType); + if (OSDynamicCast(IOService, object)) { + const char *who = ((IOService *) object)->getName(); + gPlatform->PMLog(who, kPMLogClientNotify, (uintptr_t) object, 0); + } else if (notifier) { - OUR_PMLog(kPMLogClientNotify, (uintptr_t) notifier->handler, 0); - } + OUR_PMLog(kPMLogClientNotify, (uintptr_t) notifier->handler, 0); + } } if ((kIOLogDebugPower & gIOKitDebug) && notifier) { @@ -6131,7 +6067,7 @@ void IOService::pmTellCapabilityClientWithResponse( if ( 0 == msgArg.maxWaitForReply ) { // client doesn't want time to respond - OUR_PMLog(kPMLogClientAcknowledge, msgRef, (uintptr_t) object); + OUR_PMLog(kPMLogClientAcknowledge, msgRef, (uintptr_t) object); } else { @@ -6156,7 +6092,7 @@ void IOService::pmTellCapabilityClientWithResponse( { // not a client of ours // so we won't be waiting for response - OUR_PMLog(kPMLogClientAcknowledge, msgRef, 0); + OUR_PMLog(kPMLogClientAcknowledge, msgRef, 0); } context->responseArray->setObject(msgIndex, replied); @@ -6236,7 +6172,7 @@ void IOService::tellClients( int messageType ) static void tellKernelClientApplier( OSObject * object, void * arg ) { - IOPowerStateChangeNotification notify; + IOPowerStateChangeNotification notify; IOPMInterestContext * context = (IOPMInterestContext *) arg; if (context->messageFilter && @@ -6250,14 +6186,14 @@ static void tellKernelClientApplier( OSObject * object, void * arg ) context->us->getName(), IOService::getIOMessageString(context->messageType), OBFUSCATE(object), OBFUSCATE(n->handler)); - } + } return; } notify.powerRef = (void *) 0; - notify.returnValue = 0; - notify.stateNumber = context->stateNumber; - notify.stateFlags = context->stateFlags; + notify.returnValue = 0; + notify.stateNumber = context->stateNumber; + notify.stateFlags = context->stateFlags; context->us->messageClient(context->messageType, object, ¬ify, sizeof(notify)); @@ -6273,7 +6209,7 @@ static void tellKernelClientApplier( OSObject * object, void * arg ) } static OSNumber * copyClientIDForNotification( - OSObject *object, + OSObject *object, IOPMInterestContext *context) { OSNumber *clientID = NULL; @@ -6283,27 +6219,27 @@ static OSNumber * copyClientIDForNotification( static void logClientIDForNotification( OSObject *object, - IOPMInterestContext *context, + IOPMInterestContext *context, const char *logString) { OSString *logClientID = NULL; - OSNumber *clientID = copyClientIDForNotification(object, context); + OSNumber *clientID = copyClientIDForNotification(object, context); - if (logString) + if (logString) { if (clientID) logClientID = IOCopyLogNameForPID(clientID->unsigned32BitValue()); - + PM_LOG("%s %s %s, %s\n", context->us->getName(), logString, IOService::getIOMessageString(context->messageType), logClientID ? logClientID->getCStringNoCopy() : ""); - if (logClientID) + if (logClientID) logClientID->release(); } - - if (clientID) + + if (clientID) clientID->release(); return; @@ -6315,7 +6251,7 @@ static void tellAppClientApplier( OSObject * object, void * arg ) OSNumber * clientID = NULL; proc_t proc = NULL; boolean_t proc_suspended = FALSE; - + if (context->us == IOService::getPMRootDomain()) { if ((clientID = copyClientIDForNotification(object, context))) @@ -6362,13 +6298,13 @@ static void tellAppClientApplier( OSObject * object, void * arg ) bool IOService::checkForDone( void ) { - int i = 0; - OSObject * theFlag; + int i = 0; + OSObject * theFlag; if (fResponseArray == NULL) { return true; } - + for (i = 0; ; i++) { theFlag = fResponseArray->getObject(i); @@ -6389,9 +6325,10 @@ bool IOService::checkForDone( void ) bool IOService::responseValid( uint32_t refcon, int pid ) { - UInt16 serialComponent; - UInt16 ordinalComponent; - OSObject * theFlag; + UInt16 serialComponent; + UInt16 ordinalComponent; + OSObject * theFlag; + OSObject *object = 0; serialComponent = (refcon >> 16) & 0xFFFF; ordinalComponent = (refcon & 0xFFFF); @@ -6400,25 +6337,28 @@ bool IOService::responseValid( uint32_t refcon, int pid ) { return false; } - + if ( fResponseArray == NULL ) { return false; } - + theFlag = fResponseArray->getObject(ordinalComponent); - + if ( theFlag == 0 ) { return false; } + if (fNotifyClientArray) + object = fNotifyClientArray->getObject(ordinalComponent); + OSNumber * num; if ((num = OSDynamicCast(OSNumber, theFlag))) { #if LOG_APP_RESPONSE_TIMES - AbsoluteTime now; - AbsoluteTime start; + AbsoluteTime now; + AbsoluteTime start; uint64_t nsec; char name[128]; @@ -6428,18 +6368,6 @@ bool IOService::responseValid( uint32_t refcon, int pid ) AbsoluteTime_to_scalar(&start) = num->unsigned64BitValue(); SUB_ABSOLUTETIME(&now, &start); absolutetime_to_nanoseconds(now, &nsec); - - PMEventDetails *details = PMEventDetails::eventDetails( - kIOPMEventTypeAppResponse, // type - name, // who - (uintptr_t)pid, // owner unique - NULL, // interest name - 0, // old - 0, // new - 0, // result - NS_TO_US(nsec)); // usec completion time - - getPMRootDomain()->recordAndReleasePMEvent( details ); if (kIOLogDebugPower & gIOKitDebug) { @@ -6451,27 +6379,39 @@ bool IOService::responseValid( uint32_t refcon, int pid ) // > 100 ms if (nsec > LOG_APP_RESPONSE_TIMES) { - PM_LOG("PM response took %d ms (%d, %s)\n", NS_TO_MS(nsec), + IOLog("PM response took %d ms (%d, %s)\n", NS_TO_MS(nsec), pid, name); + } - if (nsec > LOG_APP_RESPONSE_MSG_TRACER) - { - // TODO: populate the messageType argument - getPMRootDomain()->pmStatsRecordApplicationResponse( - gIOPMStatsApplicationResponseSlow, - name, 0, NS_TO_MS(nsec), pid); - } + if (nsec > LOG_APP_RESPONSE_MSG_TRACER) + { + // TODO: populate the messageType argument + getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsApplicationResponseSlow, + name, 0, NS_TO_MS(nsec), pid, object); + } + else + { + getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsApplicationResponsePrompt, + name, 0, NS_TO_MS(nsec), pid, object); } #endif theFlag = kOSBooleanFalse; } + else if (object) { + getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsApplicationResponsePrompt, + 0, 0, 0, pid, object); + + } - if ( kOSBooleanFalse == theFlag ) + if ( kOSBooleanFalse == theFlag ) { fResponseArray->replaceObject(ordinalComponent, kOSBooleanTrue); } - + return true; } @@ -6485,7 +6425,7 @@ bool IOService::responseValid( uint32_t refcon, int pid ) IOReturn IOService::allowPowerChange( unsigned long refcon ) { - IOPMRequest * request; + IOPMRequest * request; if ( !initialized ) { @@ -6493,23 +6433,23 @@ IOReturn IOService::allowPowerChange( unsigned long refcon ) return kIOReturnSuccess; } - request = acquirePMRequest( this, kIOPMRequestTypeAllowPowerChange ); - if (!request) - return kIOReturnNoMemory; + request = acquirePMRequest( this, kIOPMRequestTypeAllowPowerChange ); + if (!request) + return kIOReturnNoMemory; - request->fArg0 = (void *) refcon; - request->fArg1 = (void *)(uintptr_t) proc_selfpid(); - request->fArg2 = (void *) 0; - submitPMRequest( request ); + request->fArg0 = (void *) refcon; + request->fArg1 = (void *)(uintptr_t) proc_selfpid(); + request->fArg2 = (void *) 0; + submitPMRequest( request ); - return kIOReturnSuccess; + return kIOReturnSuccess; } #ifndef __LP64__ IOReturn IOService::serializedAllowPowerChange2( unsigned long refcon ) { - // [deprecated] public - return kIOReturnUnsupported; + // [deprecated] public + return kIOReturnUnsupported; } #endif /* !__LP64__ */ @@ -6523,7 +6463,7 @@ IOReturn IOService::serializedAllowPowerChange2( unsigned long refcon ) IOReturn IOService::cancelPowerChange( unsigned long refcon ) { - IOPMRequest * request; + IOPMRequest * request; char name[128]; pid_t pid = proc_selfpid(); @@ -6537,8 +6477,8 @@ IOReturn IOService::cancelPowerChange( unsigned long refcon ) proc_name(pid, name, sizeof(name)); PM_ERROR("PM notification cancel (pid %d, %s)\n", pid, name); - request = acquirePMRequest( this, kIOPMRequestTypeCancelPowerChange ); - if (!request) + request = acquirePMRequest( this, kIOPMRequestTypeCancelPowerChange ); + if (!request) { return kIOReturnNoMemory; } @@ -6554,8 +6494,8 @@ IOReturn IOService::cancelPowerChange( unsigned long refcon ) #ifndef __LP64__ IOReturn IOService::serializedCancelPowerChange2( unsigned long refcon ) { - // [deprecated] public - return kIOReturnUnsupported; + // [deprecated] public + return kIOReturnUnsupported; } //********************************************************************************* @@ -6592,17 +6532,17 @@ IOReturn IOService::configurePowerStatesReport( IOReportConfigureAction action, unsigned long i; uint64_t ts; - if (!pwrMgt) + if (!pwrMgt) return kIOReturnUnsupported; if (!fNumberOfPowerStates) return kIOReturnSuccess; // For drivers which are in power plane, but haven't called registerPowerDriver() PM_LOCK(); - switch (action) + switch (action) { case kIOReportEnable: - if (fReportBuf) + if (fReportBuf) { fReportClientCnt++; break; @@ -6628,7 +6568,7 @@ IOReturn IOService::configurePowerStatesReport( IOReportConfigureAction action, if (fPowerStates[i].capabilityFlags & kIOPMLowPower) bits |= kPMReportLowPower; - STATEREPORT_SETSTATEID(fReportBuf, i, ((bits & 0xff) << 8) | + STATEREPORT_SETSTATEID(fReportBuf, i, ((bits & 0xff) << 8) | ((StateOrder(fMaxPowerState) & 0xf) << 4) | (StateOrder(i) & 0xf)); } ts = mach_absolute_time(); @@ -6673,16 +6613,16 @@ IOReturn IOService::updatePowerStatesReport( IOReportConfigureAction action, voi IOBufferMemoryDescriptor *dest = OSDynamicCast(IOBufferMemoryDescriptor, (OSObject *)destination); - if (!pwrMgt) + if (!pwrMgt) return kIOReturnUnsupported; - if (!fNumberOfPowerStates) + if (!fNumberOfPowerStates) return kIOReturnSuccess; if ( !result || !dest ) return kIOReturnBadArgument; PM_LOCK(); switch (action) { - case kIOReportCopyChannelData: + case kIOReportCopyChannelData: if ( !fReportBuf ) { rc = kIOReturnNotOpen; break; @@ -6696,7 +6636,7 @@ IOReturn IOService::updatePowerStatesReport( IOReportConfigureAction action, voi } STATEREPORT_UPDATERES(fReportBuf, kIOReportCopyChannelData, result); - dest->appendBytes(data2cpy, size2cpy); + dest->appendBytes(data2cpy, size2cpy); default: break; @@ -6712,20 +6652,20 @@ IOReturn IOService::updatePowerStatesReport( IOReportConfigureAction action, voi //********************************************************************************* // configureSimplePowerReport // -// Configures the IOSimpleReport for given channel id +// Configures the IOSimpleReport for given channel id //********************************************************************************* IOReturn IOService::configureSimplePowerReport(IOReportConfigureAction action, void *result ) { IOReturn rc = kIOReturnSuccess; - if ( !pwrMgt ) + if ( !pwrMgt ) return kIOReturnUnsupported; - if ( !fNumberOfPowerStates ) + if ( !fNumberOfPowerStates ) return rc; - switch (action) + switch (action) { case kIOReportEnable: case kIOReportDisable: @@ -6743,28 +6683,28 @@ IOReturn IOService::configureSimplePowerReport(IOReportConfigureAction action, v //********************************************************************************* // updateSimplePowerReport // -// Updates the IOSimpleReport for the given chanel id +// Updates the IOSimpleReport for the given chanel id //********************************************************************************* IOReturn IOService::updateSimplePowerReport( IOReportConfigureAction action, void *result, void *destination ) { uint32_t size2cpy; void *data2cpy; - uint64_t buf[SIMPLEREPORT_BUFSIZE/sizeof(uint64_t)+1]; // Force a 8-byte alignment + uint64_t buf[SIMPLEREPORT_BUFSIZE/sizeof(uint64_t)+1]; // Force a 8-byte alignment IOBufferMemoryDescriptor *dest = OSDynamicCast(IOBufferMemoryDescriptor, (OSObject *)destination); IOReturn rc = kIOReturnSuccess; unsigned bits = 0; - if ( !pwrMgt ) + if ( !pwrMgt ) return kIOReturnUnsupported; if ( !result || !dest ) return kIOReturnBadArgument; - if ( !fNumberOfPowerStates ) + if ( !fNumberOfPowerStates ) return rc; PM_LOCK(); switch (action) { - case kIOReportCopyChannelData: + case kIOReportCopyChannelData: SIMPLEREPORT_INIT(buf, sizeof(buf), getRegistryEntryID(), kPMCurrStateChID, kIOReportCategoryPower); @@ -6776,7 +6716,7 @@ IOReturn IOService::updateSimplePowerReport( IOReportConfigureAction action, voi bits |= kPMReportLowPower; - SIMPLEREPORT_SETVALUE(buf, ((bits & 0xff) << 8) | ((StateOrder(fMaxPowerState) & 0xf) << 4) | + SIMPLEREPORT_SETVALUE(buf, ((bits & 0xff) << 8) | ((StateOrder(fMaxPowerState) & 0xf) << 4) | (StateOrder(fCurrentPowerState) & 0xf)); SIMPLEREPORT_UPDATEPREP(buf, data2cpy, size2cpy); @@ -6786,7 +6726,7 @@ IOReturn IOService::updateSimplePowerReport( IOReportConfigureAction action, voi } SIMPLEREPORT_UPDATERES(kIOReportCopyChannelData, result); - dest->appendBytes(data2cpy, size2cpy); + dest->appendBytes(data2cpy, size2cpy); default: break; @@ -6811,7 +6751,7 @@ IOReturn IOService::updateSimplePowerReport( IOReportConfigureAction action, voi //********************************************************************************* IOReturn IOService::setPowerState( - unsigned long powerStateOrdinal, IOService * whatDevice ) + unsigned long powerStateOrdinal, IOService * whatDevice ) { return IOPMNoErr; } @@ -6944,9 +6884,9 @@ IOReturn IOService::newTemperature( long currentTemp, IOService * whichZone ) void IOService::systemWillShutdown( IOOptionBits specifier ) { - IOPMrootDomain * rootDomain = IOService::getPMRootDomain(); - if (rootDomain) - rootDomain->acknowledgeSystemWillShutdown( this ); + IOPMrootDomain * rootDomain = IOService::getPMRootDomain(); + if (rootDomain) + rootDomain->acknowledgeSystemWillShutdown( this ); } // MARK: - @@ -6960,26 +6900,26 @@ IOPMRequest * IOService::acquirePMRequest( IOService * target, IOOptionBits requestType, IOPMRequest * active ) { - IOPMRequest * request; + IOPMRequest * request; - assert(target); + assert(target); - request = IOPMRequest::create(); - if (request) - { - request->init( target, requestType ); + request = IOPMRequest::create(); + if (request) + { + request->init( target, requestType ); if (active) { IOPMRequest * root = active->getRootRequest(); if (root) request->attachRootRequest(root); } } - else - { + else + { PM_ERROR("%s: No memory for PM request type 0x%x\n", target->getName(), (uint32_t) requestType); - } - return request; + } + return request; } //********************************************************************************* @@ -6988,11 +6928,11 @@ IOService::acquirePMRequest( IOService * target, IOOptionBits requestType, void IOService::releasePMRequest( IOPMRequest * request ) { - if (request) - { - request->reset(); - request->release(); - } + if (request) + { + request->reset(); + request->release(); + } } //********************************************************************************* @@ -7001,39 +6941,39 @@ void IOService::releasePMRequest( IOPMRequest * request ) void IOService::submitPMRequest( IOPMRequest * request ) { - assert( request ); - assert( gIOPMReplyQueue ); - assert( gIOPMRequestQueue ); + assert( request ); + assert( gIOPMReplyQueue ); + assert( gIOPMRequestQueue ); - PM_LOG1("[+ %02lx] %p [%p %s] %p %p %p\n", - (long)request->getType(), OBFUSCATE(request), - OBFUSCATE(request->getTarget()), request->getTarget()->getName(), - OBFUSCATE(request->fArg0), + PM_LOG1("[+ %02lx] %p [%p %s] %p %p %p\n", + (long)request->getType(), OBFUSCATE(request), + OBFUSCATE(request->getTarget()), request->getTarget()->getName(), + OBFUSCATE(request->fArg0), OBFUSCATE(request->fArg1), OBFUSCATE(request->fArg2)); - if (request->isReplyType()) - gIOPMReplyQueue->queuePMRequest( request ); - else - gIOPMRequestQueue->queuePMRequest( request ); + if (request->isReplyType()) + gIOPMReplyQueue->queuePMRequest( request ); + else + gIOPMRequestQueue->queuePMRequest( request ); } void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count ) { - assert( requests ); - assert( count > 0 ); - assert( gIOPMRequestQueue ); - - for (IOItemCount i = 0; i < count; i++) - { - IOPMRequest * req = requests[i]; - PM_LOG1("[+ %02lx] %p [%p %s] %p %p %p\n", - (long)req->getType(), OBFUSCATE(req), - OBFUSCATE(req->getTarget()), req->getTarget()->getName(), - OBFUSCATE(req->fArg0), + assert( requests ); + assert( count > 0 ); + assert( gIOPMRequestQueue ); + + for (IOItemCount i = 0; i < count; i++) + { + IOPMRequest * req = requests[i]; + PM_LOG1("[+ %02lx] %p [%p %s] %p %p %p\n", + (long)req->getType(), OBFUSCATE(req), + OBFUSCATE(req->getTarget()), req->getTarget()->getName(), + OBFUSCATE(req->fArg0), OBFUSCATE(req->fArg1), OBFUSCATE(req->fArg2)); - } + } - gIOPMRequestQueue->queuePMRequestChain( requests, count ); + gIOPMRequestQueue->queuePMRequestChain( requests, count ); } //********************************************************************************* @@ -7043,8 +6983,8 @@ void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count ) //********************************************************************************* bool IOService::servicePMRequestQueue( - IOPMRequest * request, - IOPMRequestQueue * queue ) + IOPMRequest * request, + IOPMRequestQueue * queue ) { bool more; @@ -7061,10 +7001,10 @@ bool IOService::servicePMRequestQueue( // Calling PM without PMinit() is not allowed, fail the request. PM_LOG("%s: PM not initialized\n", getName()); - fAdjustPowerScheduled = false; - more = gIOPMFreeQueue->queuePMRequest(request); + fAdjustPowerScheduled = false; + more = gIOPMFreeQueue->queuePMRequest(request); if (more) gIOPMWorkQueue->incrementProducerCount(); - return more; + return more; } //********************************************************************************* @@ -7074,10 +7014,10 @@ bool IOService::servicePMRequestQueue( //********************************************************************************* bool IOService::servicePMFreeQueue( - IOPMRequest * request, - IOPMCompletionQueue * queue ) + IOPMRequest * request, + IOPMCompletionQueue * queue ) { - bool more = request->getNextRequest(); + bool more = request->getNextRequest(); IOPMRequest * root = request->getRootRequest(); if (root && (root != request)) @@ -7085,8 +7025,8 @@ bool IOService::servicePMFreeQueue( if (more) gIOPMWorkQueue->incrementProducerCount(); - releasePMRequest( request ); - return more; + releasePMRequest( request ); + return more; } //********************************************************************************* @@ -7097,26 +7037,35 @@ bool IOService::servicePMFreeQueue( bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) { - assert(request && queue); + assert(request && queue); - PM_LOG1("[- %02x] %p [%p %s] state %d, busy %d\n", - request->getType(), OBFUSCATE(request), + PM_LOG1("[- %02x] %p [%p %s] state %d, busy %d\n", + request->getType(), OBFUSCATE(request), OBFUSCATE(this), getName(), - fMachineState, gIOPMBusyCount); + fMachineState, gIOPMBusyCount); - // Catch requests created by idleTimerExpired(). + // Catch requests created by idleTimerExpired(). - if ((request->getType() == kIOPMRequestTypeActivityTickle) && - (((uintptr_t) request->fArg1) & kTickleTypePowerDrop) && - fIdleTimerPeriod) - { - restartIdleTimer(); + if (request->getType() == kIOPMRequestTypeActivityTickle) + { + uint32_t tickleFlags = (uint32_t)(uintptr_t) request->fArg1; + + if ((tickleFlags & kTickleTypePowerDrop) && fIdleTimerPeriod) + { + restartIdleTimer(); + } + else if (tickleFlags == (kTickleTypeActivity | kTickleTypePowerRise)) + { + // Invalidate any idle power drop that got queued while + // processing this request. + fIdleTimerGeneration++; + } } // If the request is linked, then Work queue has already incremented its // producer count. - return (gIOPMFreeQueue->queuePMRequest( request )); + return (gIOPMFreeQueue->queuePMRequest( request )); } //********************************************************************************* @@ -7127,64 +7076,64 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) bool IOService::isPMBlocked( IOPMRequest * request, int count ) { - int reason = 0; + int reason = 0; - do { - if (kIOPM_Finished == fMachineState) - break; + do { + if (kIOPM_Finished == fMachineState) + break; - if (kIOPM_DriverThreadCallDone == fMachineState) - { + if (kIOPM_DriverThreadCallDone == fMachineState) + { // 5 = kDriverCallInformPreChange // 6 = kDriverCallInformPostChange // 7 = kDriverCallSetPowerState // 8 = kRootDomainInformPreChange - if (fDriverCallBusy) + if (fDriverCallBusy) reason = 5 + fDriverCallReason; - break; - } - - // Waiting on driver's setPowerState() timeout. - if (fDriverTimer) - { - reason = 1; break; - } - - // Child or interested driver acks pending. - if (fHeadNotePendingAcks) - { - reason = 2; break; - } - - // Waiting on apps or priority power interest clients. - if (fResponseArray) - { - reason = 3; break; - } - - // Waiting on settle timer expiration. - if (fSettleTimeUS) - { - reason = 4; break; - } - } while (false); - - fWaitReason = reason; - - if (reason) - { - if (count) - { - PM_LOG1("[B %02x] %p [%p %s] state %d, reason %d\n", - request->getType(), OBFUSCATE(request), + break; + } + + // Waiting on driver's setPowerState() timeout. + if (fDriverTimer) + { + reason = 1; break; + } + + // Child or interested driver acks pending. + if (fHeadNotePendingAcks) + { + reason = 2; break; + } + + // Waiting on apps or priority power interest clients. + if (fResponseArray) + { + reason = 3; break; + } + + // Waiting on settle timer expiration. + if (fSettleTimeUS) + { + reason = 4; break; + } + } while (false); + + fWaitReason = reason; + + if (reason) + { + if (count) + { + PM_LOG1("[B %02x] %p [%p %s] state %d, reason %d\n", + request->getType(), OBFUSCATE(request), OBFUSCATE(this), getName(), - fMachineState, reason); - } + fMachineState, reason); + } - return true; - } + return true; + } - return false; + return false; } //********************************************************************************* @@ -7195,31 +7144,31 @@ bool IOService::isPMBlocked( IOPMRequest * request, int count ) bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) { - bool done = false; - int loop = 0; + bool done = false; + int loop = 0; - assert(request && queue); + assert(request && queue); - while (isPMBlocked(request, loop++) == false) - { - PM_LOG1("[W %02x] %p [%p %s] state %d\n", - request->getType(), OBFUSCATE(request), + while (isPMBlocked(request, loop++) == false) + { + PM_LOG1("[W %02x] %p [%p %s] state %d\n", + request->getType(), OBFUSCATE(request), OBFUSCATE(this), getName(), fMachineState); - gIOPMRequest = request; + gIOPMRequest = request; gIOPMWorkCount++; - // Every PM machine states must be handled in one of the cases below. + // Every PM machine states must be handled in one of the cases below. - switch ( fMachineState ) - { - case kIOPM_Finished: + switch ( fMachineState ) + { + case kIOPM_Finished: start_watchdog_timer(); - executePMRequest( request ); - break; + executePMRequest( request ); + break; - case kIOPM_OurChangeTellClientsPowerDown: + case kIOPM_OurChangeTellClientsPowerDown: // Root domain might self cancel due to assertions. if (IS_ROOT_DOMAIN) { @@ -7230,153 +7179,115 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) } // askChangeDown() done, was it vetoed? - if (!fDoNotPowerDown) - { - if (IS_ROOT_DOMAIN) { - PMEventDetails *details = PMEventDetails::eventDetails( - kIOPMEventTypeAppNotificationsFinished, - NULL, - 0, - 0); - - getPMRootDomain()->recordAndReleasePMEvent( details ); - } - - // no, we can continue - OurChangeTellClientsPowerDown(); - } - else - { - if (IS_ROOT_DOMAIN) { - PMEventDetails *details = PMEventDetails::eventDetails( - kIOPMEventTypeSleepDone, - NULL, - 1, /* reason: 1 == Ask clients succeeded */ - kIOReturnAborted); /* result */ - - getPMRootDomain()->recordAndReleasePMEvent( details ); - } + if (!fDoNotPowerDown) + { + // no, we can continue + OurChangeTellClientsPowerDown(); + } + else + { + OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); + PM_ERROR("%s: idle cancel, state %u\n", fName, fMachineState); + // yes, rescind the warning + tellNoChangeDown(fHeadNotePowerState); + // mark the change note un-actioned + fHeadNoteChangeFlags |= kIOPMNotDone; + // and we're done + OurChangeFinish(); + } + break; - OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); - PM_ERROR("%s: idle cancel, state %u\n", fName, fMachineState); - // yes, rescind the warning - tellNoChangeDown(fHeadNotePowerState); - // mark the change note un-actioned - fHeadNoteChangeFlags |= kIOPMNotDone; - // and we're done - OurChangeFinish(); - } - break; - - case kIOPM_OurChangeTellUserPMPolicyPowerDown: + case kIOPM_OurChangeTellUserPMPolicyPowerDown: // PMRD: tellChangeDown/kNotifyApps done, was it cancelled? - if (fDoNotPowerDown) - { - OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); - PM_ERROR("%s: idle cancel, state %u\n", fName, fMachineState); - // yes, rescind the warning - tellNoChangeDown(fHeadNotePowerState); - // mark the change note un-actioned - fHeadNoteChangeFlags |= kIOPMNotDone; - // and we're done - OurChangeFinish(); - } - else - OurChangeTellUserPMPolicyPowerDown(); - break; - - case kIOPM_OurChangeTellPriorityClientsPowerDown: - // PMRD: LastCallBeforeSleep notify done + if (fDoNotPowerDown) + { + OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); + PM_ERROR("%s: idle cancel, state %u\n", fName, fMachineState); + // yes, rescind the warning + tellNoChangeDown(fHeadNotePowerState); + // mark the change note un-actioned + fHeadNoteChangeFlags |= kIOPMNotDone; + // and we're done + OurChangeFinish(); + } + else + OurChangeTellUserPMPolicyPowerDown(); + break; + + case kIOPM_OurChangeTellPriorityClientsPowerDown: + // PMRD: LastCallBeforeSleep notify done // Non-PMRD: tellChangeDown/kNotifyApps done - if (fDoNotPowerDown) - { - if (IS_ROOT_DOMAIN) { - PMEventDetails *details = PMEventDetails::eventDetails( - kIOPMEventTypeSleepDone, - NULL, - 2, /* reason: 2 == Client cancelled wake */ - kIOReturnAborted); /* result */ - - getPMRootDomain()->recordAndReleasePMEvent( details ); - } - OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); - PM_ERROR("%s: idle revert, state %u\n", fName, fMachineState); - // no, tell clients we're back in the old state - tellChangeUp(fCurrentPowerState); - // mark the change note un-actioned - fHeadNoteChangeFlags |= kIOPMNotDone; - // and we're done - OurChangeFinish(); - } - else - { - if (IS_ROOT_DOMAIN) { - PMEventDetails *details = PMEventDetails::eventDetails( - kIOPMEventTypeAppNotificationsFinished, - NULL, - 2, /* reason: 2 == TellPriorityClientsDone */ - kIOReturnSuccess); /* result */ - - getPMRootDomain()->recordAndReleasePMEvent( details ); - } - // yes, we can continue - OurChangeTellPriorityClientsPowerDown(); - } - break; + if (fDoNotPowerDown) + { + OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); + PM_ERROR("%s: idle revert, state %u\n", fName, fMachineState); + // no, tell clients we're back in the old state + tellChangeUp(fCurrentPowerState); + // mark the change note un-actioned + fHeadNoteChangeFlags |= kIOPMNotDone; + // and we're done + OurChangeFinish(); + } + else + { + // yes, we can continue + OurChangeTellPriorityClientsPowerDown(); + } + break; - case kIOPM_OurChangeNotifyInterestedDriversWillChange: - OurChangeNotifyInterestedDriversWillChange(); - break; + case kIOPM_OurChangeNotifyInterestedDriversWillChange: + OurChangeNotifyInterestedDriversWillChange(); + break; - case kIOPM_OurChangeSetPowerState: - OurChangeSetPowerState(); - break; + case kIOPM_OurChangeSetPowerState: + OurChangeSetPowerState(); + break; - case kIOPM_OurChangeWaitForPowerSettle: - OurChangeWaitForPowerSettle(); - break; + case kIOPM_OurChangeWaitForPowerSettle: + OurChangeWaitForPowerSettle(); + break; - case kIOPM_OurChangeNotifyInterestedDriversDidChange: - OurChangeNotifyInterestedDriversDidChange(); - break; + case kIOPM_OurChangeNotifyInterestedDriversDidChange: + OurChangeNotifyInterestedDriversDidChange(); + break; case kIOPM_OurChangeTellCapabilityDidChange: OurChangeTellCapabilityDidChange(); break; - case kIOPM_OurChangeFinish: - OurChangeFinish(); - break; + case kIOPM_OurChangeFinish: + OurChangeFinish(); + break; - case kIOPM_ParentChangeTellPriorityClientsPowerDown: - ParentChangeTellPriorityClientsPowerDown(); - break; + case kIOPM_ParentChangeTellPriorityClientsPowerDown: + ParentChangeTellPriorityClientsPowerDown(); + break; - case kIOPM_ParentChangeNotifyInterestedDriversWillChange: - ParentChangeNotifyInterestedDriversWillChange(); - break; + case kIOPM_ParentChangeNotifyInterestedDriversWillChange: + ParentChangeNotifyInterestedDriversWillChange(); + break; - case kIOPM_ParentChangeSetPowerState: - ParentChangeSetPowerState(); - break; + case kIOPM_ParentChangeSetPowerState: + ParentChangeSetPowerState(); + break; - case kIOPM_ParentChangeWaitForPowerSettle: - ParentChangeWaitForPowerSettle(); - break; + case kIOPM_ParentChangeWaitForPowerSettle: + ParentChangeWaitForPowerSettle(); + break; - case kIOPM_ParentChangeNotifyInterestedDriversDidChange: - ParentChangeNotifyInterestedDriversDidChange(); - break; + case kIOPM_ParentChangeNotifyInterestedDriversDidChange: + ParentChangeNotifyInterestedDriversDidChange(); + break; case kIOPM_ParentChangeTellCapabilityDidChange: ParentChangeTellCapabilityDidChange(); break; - case kIOPM_ParentChangeAcknowledgePowerChange: - ParentChangeAcknowledgePowerChange(); - break; + case kIOPM_ParentChangeAcknowledgePowerChange: + ParentChangeAcknowledgePowerChange(); + break; - case kIOPM_DriverThreadCallDone: + case kIOPM_DriverThreadCallDone: switch (fDriverCallReason) { case kDriverCallInformPreChange: @@ -7393,15 +7304,15 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) panic("%s: bad call reason %x", getName(), fDriverCallReason); } - break; + break; - case kIOPM_NotifyChildrenOrdered: - notifyChildrenOrdered(); - break; + case kIOPM_NotifyChildrenOrdered: + notifyChildrenOrdered(); + break; - case kIOPM_NotifyChildrenDelayed: - notifyChildrenDelayed(); - break; + case kIOPM_NotifyChildrenDelayed: + notifyChildrenDelayed(); + break; case kIOPM_NotifyChildrenStart: // pop notifyAll() state saved by notifyInterestedDriversDone() @@ -7418,41 +7329,41 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) &fHeadNoteChangeFlags, &cancel); fDoNotPowerDown = cancel; } - if (!fDoNotPowerDown) - { + if (!fDoNotPowerDown) + { fMachineState = kIOPM_SyncTellPriorityClientsPowerDown; fOutOfBandParameter = kNotifyApps; tellChangeDown(fHeadNotePowerState); - } - else - { + } + else + { // Cancelled by IOPMrootDomain::askChangeDownDone() or // askChangeDown/kNotifyApps - OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); - PM_ERROR("%s: idle cancel, state %u\n", fName, fMachineState); - tellNoChangeDown(fHeadNotePowerState); - fHeadNoteChangeFlags |= kIOPMNotDone; - OurChangeFinish(); - } + OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); + PM_ERROR("%s: idle cancel, state %u\n", fName, fMachineState); + tellNoChangeDown(fHeadNotePowerState); + fHeadNoteChangeFlags |= kIOPMNotDone; + OurChangeFinish(); + } break; case kIOPM_SyncTellPriorityClientsPowerDown: // PMRD: tellChangeDown/kNotifyApps done, was it cancelled? - if (!fDoNotPowerDown) - { + if (!fDoNotPowerDown) + { fMachineState = kIOPM_SyncNotifyWillChange; fOutOfBandParameter = kNotifyPriority; tellChangeDown(fHeadNotePowerState); } else { - OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); - PM_ERROR("%s: idle revert, state %u\n", fName, fMachineState); - tellChangeUp(fCurrentPowerState); - fHeadNoteChangeFlags |= kIOPMNotDone; - OurChangeFinish(); - } - break; + OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); + PM_ERROR("%s: idle revert, state %u\n", fName, fMachineState); + tellChangeUp(fCurrentPowerState); + fHeadNoteChangeFlags |= kIOPMNotDone; + OurChangeFinish(); + } + break; case kIOPM_SyncNotifyWillChange: if (kIOPMSyncNoChildNotify & fHeadNoteChangeFlags) @@ -7515,22 +7426,22 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) tellClientsWithResponse( fOutOfBandMessage ); break; - default: - panic("servicePMWorkQueue: unknown machine state %x", + default: + panic("servicePMWorkQueue: unknown machine state %x", fMachineState); - } + } - gIOPMRequest = 0; + gIOPMRequest = 0; - if (fMachineState == kIOPM_Finished) - { + if (fMachineState == kIOPM_Finished) + { stop_watchdog_timer(); - done = true; - break; - } - } + done = true; + break; + } + } - return done; + return done; } //********************************************************************************* @@ -7539,64 +7450,65 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) void IOService::executePMRequest( IOPMRequest * request ) { - assert( kIOPM_Finished == fMachineState ); + assert( kIOPM_Finished == fMachineState ); - switch (request->getType()) - { - case kIOPMRequestTypePMStop: - handlePMstop( request ); - break; + switch (request->getType()) + { + case kIOPMRequestTypePMStop: + handlePMstop( request ); + break; - case kIOPMRequestTypeAddPowerChild1: - addPowerChild1( request ); - break; + case kIOPMRequestTypeAddPowerChild1: + addPowerChild1( request ); + break; - case kIOPMRequestTypeAddPowerChild2: - addPowerChild2( request ); - break; + case kIOPMRequestTypeAddPowerChild2: + addPowerChild2( request ); + break; - case kIOPMRequestTypeAddPowerChild3: - addPowerChild3( request ); - break; + case kIOPMRequestTypeAddPowerChild3: + addPowerChild3( request ); + break; - case kIOPMRequestTypeRegisterPowerDriver: - handleRegisterPowerDriver( request ); - break; + case kIOPMRequestTypeRegisterPowerDriver: + handleRegisterPowerDriver( request ); + break; - case kIOPMRequestTypeAdjustPowerState: - fAdjustPowerScheduled = false; - adjustPowerState(); - break; + case kIOPMRequestTypeAdjustPowerState: + fAdjustPowerScheduled = false; + adjustPowerState(); + break; - case kIOPMRequestTypePowerDomainWillChange: - handlePowerDomainWillChangeTo( request ); - break; + case kIOPMRequestTypePowerDomainWillChange: + handlePowerDomainWillChangeTo( request ); + break; - case kIOPMRequestTypePowerDomainDidChange: - handlePowerDomainDidChangeTo( request ); - break; + case kIOPMRequestTypePowerDomainDidChange: + handlePowerDomainDidChangeTo( request ); + break; - case kIOPMRequestTypeRequestPowerState: + case kIOPMRequestTypeRequestPowerState: case kIOPMRequestTypeRequestPowerStateOverride: - handleRequestPowerState( request ); - break; + handleRequestPowerState( request ); + break; - case kIOPMRequestTypePowerOverrideOnPriv: - case kIOPMRequestTypePowerOverrideOffPriv: - handlePowerOverrideChanged( request ); - break; + case kIOPMRequestTypePowerOverrideOnPriv: + case kIOPMRequestTypePowerOverrideOffPriv: + handlePowerOverrideChanged( request ); + break; - case kIOPMRequestTypeActivityTickle: - handleActivityTickle( request ); - break; + case kIOPMRequestTypeActivityTickle: + handleActivityTickle( request ); + break; case kIOPMRequestTypeSynchronizePowerTree: - handleSynchronizePowerTree( request ); - break; + handleSynchronizePowerTree( request ); + break; case kIOPMRequestTypeSetIdleTimerPeriod: { fIdleTimerPeriod = (uintptr_t) request->fArg0; + fNextIdleTimerPeriod = fIdleTimerPeriod; if ((false == fLockedFlags.PMStop) && (fIdleTimerPeriod > 0)) restartIdleTimer(); } @@ -7606,9 +7518,9 @@ void IOService::executePMRequest( IOPMRequest * request ) fIdleTimerIgnored = request->fArg0 ? 1 : 0; break; - default: - panic("executePMRequest: unknown request type %x", request->getType()); - } + default: + panic("executePMRequest: unknown request type %x", request->getType()); + } } //********************************************************************************* @@ -7617,24 +7529,24 @@ void IOService::executePMRequest( IOPMRequest * request ) bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue ) { - bool more = false; + bool more = false; - assert( request && queue ); - assert( request->isReplyType() ); + assert( request && queue ); + assert( request->isReplyType() ); - PM_LOG1("[A %02x] %p [%p %s] state %d\n", - request->getType(), OBFUSCATE(request), + PM_LOG1("[A %02x] %p [%p %s] state %d\n", + request->getType(), OBFUSCATE(request), OBFUSCATE(this), getName(), fMachineState); - switch ( request->getType() ) - { - case kIOPMRequestTypeAllowPowerChange: - case kIOPMRequestTypeCancelPowerChange: - // Check if we are expecting this response. - if (responseValid((uint32_t)(uintptr_t) request->fArg0, + switch ( request->getType() ) + { + case kIOPMRequestTypeAllowPowerChange: + case kIOPMRequestTypeCancelPowerChange: + // Check if we are expecting this response. + if (responseValid((uint32_t)(uintptr_t) request->fArg0, (int)(uintptr_t) request->fArg1)) - { - if (kIOPMRequestTypeCancelPowerChange == request->getType()) + { + if (kIOPMRequestTypeCancelPowerChange == request->getType()) { // Clients are not allowed to cancel when kIOPMSkipAskPowerDown // flag is set. Only root domain will set this flag. @@ -7652,115 +7564,105 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q getPMRootDomain()->pmStatsRecordApplicationResponse( gIOPMStatsApplicationResponseCancel, name ? name->getCStringNoCopy() : "", 0, - 0, (int)(uintptr_t) request->fArg1); + 0, (int)(uintptr_t) request->fArg1, 0); } } - if (checkForDone()) - { - stop_ack_timer(); + if (checkForDone()) + { + stop_ack_timer(); cleanClientResponses(false); - more = true; - } - } + more = true; + } + } // OSString containing app name in Arg2 must be released. if (request->getType() == kIOPMRequestTypeCancelPowerChange) { OSObject * obj = (OSObject *) request->fArg2; if (obj) obj->release(); } - break; - - case kIOPMRequestTypeAckPowerChange: - more = handleAcknowledgePowerChange( request ); - break; - - case kIOPMRequestTypeAckSetPowerState: - if (fDriverTimer == -1) - { - // driver acked while setPowerState() call is in-flight. - // take this ack, return value from setPowerState() is irrelevant. - OUR_PMLog(kPMLogDriverAcknowledgeSet, - (uintptr_t) this, fDriverTimer); - fDriverTimer = 0; - } - else if (fDriverTimer > 0) - { - // expected ack, stop the timer - stop_ack_timer(); + break; + + case kIOPMRequestTypeAckPowerChange: + more = handleAcknowledgePowerChange( request ); + break; + + case kIOPMRequestTypeAckSetPowerState: + if (fDriverTimer == -1) + { + // driver acked while setPowerState() call is in-flight. + // take this ack, return value from setPowerState() is irrelevant. + OUR_PMLog(kPMLogDriverAcknowledgeSet, + (uintptr_t) this, fDriverTimer); + fDriverTimer = 0; + } + else if (fDriverTimer > 0) + { + // expected ack, stop the timer + stop_ack_timer(); #if LOG_SETPOWER_TIMES uint64_t nsec = computeTimeDeltaNS(&fDriverCallStartTime); - if (nsec > LOG_SETPOWER_TIMES) - PM_LOG("%s::setPowerState(%p, %lu -> %lu) async took %d ms\n", - fName, OBFUSCATE(this), fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); - - PMEventDetails *details = PMEventDetails::eventDetails( - kIOPMEventTypeSetPowerStateDelayed, // type - fName, // who - (uintptr_t)this, // owner unique - NULL, // interest name - (uint8_t)getPowerState(), // old - (uint8_t)fHeadNotePowerState, // new - 0, // result - NS_TO_US(nsec)); // usec completion time - - getPMRootDomain()->recordAndReleasePMEvent( details ); + if (nsec > LOG_SETPOWER_TIMES) { + getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsDriverPSChangeSlow, + fName, kDriverCallSetPowerState, NS_TO_MS(nsec), 0, NULL, fHeadNotePowerState); + } #endif - OUR_PMLog(kPMLogDriverAcknowledgeSet, (uintptr_t) this, fDriverTimer); - fDriverTimer = 0; - more = true; - } - else - { - // unexpected ack - OUR_PMLog(kPMLogAcknowledgeErr4, (uintptr_t) this, 0); - } - break; - - case kIOPMRequestTypeInterestChanged: - handleInterestChanged( request ); - more = true; - break; - - case kIOPMRequestTypeIdleCancel: - if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown) + OUR_PMLog(kPMLogDriverAcknowledgeSet, (uintptr_t) this, fDriverTimer); + fDriverTimer = 0; + more = true; + } + else + { + // unexpected ack + OUR_PMLog(kPMLogAcknowledgeErr4, (uintptr_t) this, 0); + } + break; + + case kIOPMRequestTypeInterestChanged: + handleInterestChanged( request ); + more = true; + break; + + case kIOPMRequestTypeIdleCancel: + if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown) || (fMachineState == kIOPM_OurChangeTellUserPMPolicyPowerDown) - || (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) + || (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) || (fMachineState == kIOPM_SyncTellClientsPowerDown) || (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown)) - { - OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); + { + OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); PM_LOG2("%s: cancel from machine state %d\n", getName(), fMachineState); - fDoNotPowerDown = true; + fDoNotPowerDown = true; // Stop waiting for app replys. - if ((fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) || - (fMachineState == kIOPM_OurChangeTellUserPMPolicyPowerDown) || - (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown)) - cleanClientResponses(false); - more = true; - } - break; + if ((fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) || + (fMachineState == kIOPM_OurChangeTellUserPMPolicyPowerDown) || + (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown)) + cleanClientResponses(false); + more = true; + } + break; case kIOPMRequestTypeChildNotifyDelayCancel: if (fMachineState == kIOPM_NotifyChildrenDelayed) { - PM_LOG2("%s: delay notify cancelled\n", getName()); + PM_LOG2("%s: delay notify cancelled\n", getName()); notifyChildrenDelayed(); } break; - default: - panic("servicePMReplyQueue: unknown reply type %x", + default: + panic("servicePMReplyQueue: unknown reply type %x", request->getType()); - } + } - more |= gIOPMFreeQueue->queuePMRequest(request); + more |= gIOPMFreeQueue->queuePMRequest(request); if (more) gIOPMWorkQueue->incrementProducerCount(); - return more; + return more; } //********************************************************************************* @@ -7784,7 +7686,7 @@ bool IOService::assertPMDriverCall( { goto fail; } - + if (((options & kIOPMADC_NoInactiveCheck) == 0) && isInactive()) { goto fail; @@ -7862,7 +7764,7 @@ void IOService::waitForPMDriverCall( IOService * target ) } continue; } - + wait = true; break; } @@ -7890,7 +7792,7 @@ const char * IOService::getIOMessageString( uint32_t msg ) { #define MSG_ENTRY(x) {(int) x, #x} - static const IONamedValue msgNames[] = { + static const IONamedValue msgNames[] = { MSG_ENTRY( kIOMessageCanDevicePowerOff ), MSG_ENTRY( kIOMessageDeviceWillPowerOff ), MSG_ENTRY( kIOMessageDeviceWillNotPowerOff ), @@ -7925,54 +7827,54 @@ OSDefineMetaClassAndStructors( IOPMRequest, IOCommand ); IOPMRequest * IOPMRequest::create( void ) { - IOPMRequest * me = OSTypeAlloc(IOPMRequest); - if (me && !me->init(0, kIOPMRequestTypeInvalid)) - { - me->release(); - me = 0; - } - return me; + IOPMRequest * me = OSTypeAlloc(IOPMRequest); + if (me && !me->init(0, kIOPMRequestTypeInvalid)) + { + me->release(); + me = 0; + } + return me; } bool IOPMRequest::init( IOService * target, IOOptionBits type ) { - if (!IOCommand::init()) - return false; + if (!IOCommand::init()) + return false; - fType = type; - fTarget = target; + fType = type; + fTarget = target; #if NOT_READY fCompletionStatus = kIOReturnSuccess; #endif - if (fTarget) - fTarget->retain(); + if (fTarget) + fTarget->retain(); - return true; + return true; } void IOPMRequest::reset( void ) { - assert( fWorkWaitCount == 0 ); - assert( fFreeWaitCount == 0 ); + assert( fWorkWaitCount == 0 ); + assert( fFreeWaitCount == 0 ); - detachNextRequest(); + detachNextRequest(); detachRootRequest(); - fType = kIOPMRequestTypeInvalid; + fType = kIOPMRequestTypeInvalid; #if NOT_READY - if (fCompletionAction) - { + if (fCompletionAction) + { fCompletionAction(fCompletionTarget, fCompletionParam, fCompletionStatus); } #endif - if (fTarget) - { - fTarget->release(); - fTarget = 0; - } + if (fTarget) + { + fTarget->release(); + fTarget = 0; + } } bool IOPMRequest::attachNextRequest( IOPMRequest * next ) @@ -8076,82 +7978,82 @@ OSDefineMetaClassAndStructors( IOPMRequestQueue, IOEventSource ); IOPMRequestQueue * IOPMRequestQueue::create( IOService * inOwner, Action inAction ) { - IOPMRequestQueue * me = OSTypeAlloc(IOPMRequestQueue); - if (me && !me->init(inOwner, inAction)) - { - me->release(); - me = 0; - } - return me; + IOPMRequestQueue * me = OSTypeAlloc(IOPMRequestQueue); + if (me && !me->init(inOwner, inAction)) + { + me->release(); + me = 0; + } + return me; } bool IOPMRequestQueue::init( IOService * inOwner, Action inAction ) { - if (!inAction || !IOEventSource::init(inOwner, (IOEventSourceAction)inAction)) + if (!inAction || !IOEventSource::init(inOwner, (IOEventSourceAction)inAction)) return false; - queue_init(&fQueue); - fLock = IOLockAlloc(); - return (fLock != 0); + queue_init(&fQueue); + fLock = IOLockAlloc(); + return (fLock != 0); } void IOPMRequestQueue::free( void ) { - if (fLock) - { - IOLockFree(fLock); - fLock = 0; - } - return IOEventSource::free(); + if (fLock) + { + IOLockFree(fLock); + fLock = 0; + } + return IOEventSource::free(); } void IOPMRequestQueue::queuePMRequest( IOPMRequest * request ) { - assert(request); - IOLockLock(fLock); - queue_enter(&fQueue, request, IOPMRequest *, fCommandChain); - IOLockUnlock(fLock); - if (workLoop) signalWorkAvailable(); + assert(request); + IOLockLock(fLock); + queue_enter(&fQueue, request, IOPMRequest *, fCommandChain); + IOLockUnlock(fLock); + if (workLoop) signalWorkAvailable(); } void IOPMRequestQueue::queuePMRequestChain( IOPMRequest ** requests, IOItemCount count ) { - IOPMRequest * next; + IOPMRequest * next; - assert(requests && count); - IOLockLock(fLock); - while (count--) - { - next = *requests; - requests++; - queue_enter(&fQueue, next, IOPMRequest *, fCommandChain); - } - IOLockUnlock(fLock); - if (workLoop) signalWorkAvailable(); + assert(requests && count); + IOLockLock(fLock); + while (count--) + { + next = *requests; + requests++; + queue_enter(&fQueue, next, IOPMRequest *, fCommandChain); + } + IOLockUnlock(fLock); + if (workLoop) signalWorkAvailable(); } bool IOPMRequestQueue::checkForWork( void ) { - Action dqAction = (Action) action; - IOPMRequest * request; - IOService * target; - bool more = false; + Action dqAction = (Action) action; + IOPMRequest * request; + IOService * target; + bool more = false; - IOLockLock( fLock ); + IOLockLock( fLock ); - while (!queue_empty(&fQueue)) - { - queue_remove_first( &fQueue, request, IOPMRequest *, fCommandChain ); - IOLockUnlock( fLock ); - target = request->getTarget(); - assert(target); - more |= (*dqAction)( target, request, this ); - IOLockLock( fLock ); - } + while (!queue_empty(&fQueue)) + { + queue_remove_first( &fQueue, request, IOPMRequest *, fCommandChain ); + IOLockUnlock( fLock ); + target = request->getTarget(); + assert(target); + more |= (*dqAction)( target, request, this ); + IOLockLock( fLock ); + } - IOLockUnlock( fLock ); - return more; + IOLockUnlock( fLock ); + return more; } // MARK: - @@ -8168,28 +8070,28 @@ OSDefineMetaClassAndStructors( IOPMWorkQueue, IOEventSource ); IOPMWorkQueue * IOPMWorkQueue::create( IOService * inOwner, Action work, Action retire ) { - IOPMWorkQueue * me = OSTypeAlloc(IOPMWorkQueue); - if (me && !me->init(inOwner, work, retire)) - { - me->release(); - me = 0; - } - return me; + IOPMWorkQueue * me = OSTypeAlloc(IOPMWorkQueue); + if (me && !me->init(inOwner, work, retire)) + { + me->release(); + me = 0; + } + return me; } bool IOPMWorkQueue::init( IOService * inOwner, Action work, Action retire ) { - if (!work || !retire || - !IOEventSource::init(inOwner, (IOEventSourceAction)0)) - return false; + if (!work || !retire || + !IOEventSource::init(inOwner, (IOEventSourceAction)0)) + return false; - queue_init(&fWorkQueue); + queue_init(&fWorkQueue); - fWorkAction = work; - fRetireAction = retire; + fWorkAction = work; + fRetireAction = retire; fConsumerCount = fProducerCount = 0; - return true; + return true; } bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt ) @@ -8197,20 +8099,20 @@ bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt bool more = false; bool empty; - assert( request ); + assert( request ); assert( pwrMgt ); - assert( onThread() ); + assert( onThread() ); assert( queue_next(&request->fCommandChain) == queue_prev(&request->fCommandChain) ); - gIOPMBusyCount++; + gIOPMBusyCount++; // Add new request to the tail of the per-service request queue. // Then immediately check the request queue to minimize latency // if the queue was empty. empty = queue_empty(&pwrMgt->RequestHead); - queue_enter(&pwrMgt->RequestHead, request, IOPMRequest *, fCommandChain); + queue_enter(&pwrMgt->RequestHead, request, IOPMRequest *, fCommandChain); if (empty) { more = checkRequestQueue(&pwrMgt->RequestHead, &empty); @@ -8232,24 +8134,24 @@ bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt bool IOPMWorkQueue::checkRequestQueue( queue_head_t * queue, bool * empty ) { - IOPMRequest * request; - IOService * target; + IOPMRequest * request; + IOService * target; bool more = false; - bool done = false; + bool done = false; assert(!queue_empty(queue)); do { - request = (IOPMRequest *) queue_first(queue); - if (request->isWorkBlocked()) + request = (IOPMRequest *) queue_first(queue); + if (request->isWorkBlocked()) break; // cannot start, blocked on attached request - target = request->getTarget(); + target = request->getTarget(); done = (*fWorkAction)( target, request, this ); - if (!done) + if (!done) break; // work started, blocked on PM state machine assert(gIOPMBusyCount > 0); - if (gIOPMBusyCount) + if (gIOPMBusyCount) gIOPMBusyCount--; queue_remove_first(queue, request, IOPMRequest *, fCommandChain); @@ -8272,9 +8174,9 @@ bool IOPMWorkQueue::checkRequestQueue( queue_head_t * queue, bool * empty ) bool IOPMWorkQueue::checkForWork( void ) { - IOServicePM * entry; - IOServicePM * next; - bool more = false; + IOServicePM * entry; + IOServicePM * next; + bool more = false; bool empty; #if WORK_QUEUE_STATS @@ -8291,7 +8193,7 @@ bool IOPMWorkQueue::checkForWork( void ) fConsumerCount = fProducerCount; -#if WORK_QUEUE_STATS +#if WORK_QUEUE_STATS if (queue_empty(&fWorkQueue)) { fStatQueueEmpty++; @@ -8333,7 +8235,7 @@ bool IOPMWorkQueue::checkForWork( void ) void IOPMWorkQueue::signalWorkAvailable( void ) { fProducerCount++; - IOEventSource::signalWorkAvailable(); + IOEventSource::signalWorkAvailable(); } void IOPMWorkQueue::incrementProducerCount( void ) @@ -8353,48 +8255,48 @@ OSDefineMetaClassAndStructors( IOPMCompletionQueue, IOEventSource ); IOPMCompletionQueue * IOPMCompletionQueue::create( IOService * inOwner, Action inAction ) { - IOPMCompletionQueue * me = OSTypeAlloc(IOPMCompletionQueue); - if (me && !me->init(inOwner, inAction)) - { - me->release(); - me = 0; - } - return me; + IOPMCompletionQueue * me = OSTypeAlloc(IOPMCompletionQueue); + if (me && !me->init(inOwner, inAction)) + { + me->release(); + me = 0; + } + return me; } bool IOPMCompletionQueue::init( IOService * inOwner, Action inAction ) { - if (!inAction || !IOEventSource::init(inOwner, (IOEventSourceAction)inAction)) + if (!inAction || !IOEventSource::init(inOwner, (IOEventSourceAction)inAction)) return false; - queue_init(&fQueue); - return true; + queue_init(&fQueue); + return true; } bool IOPMCompletionQueue::queuePMRequest( IOPMRequest * request ) { bool more; - assert(request); + assert(request); // unblock dependent request more = request->detachNextRequest(); - queue_enter(&fQueue, request, IOPMRequest *, fCommandChain); + queue_enter(&fQueue, request, IOPMRequest *, fCommandChain); return more; } bool IOPMCompletionQueue::checkForWork( void ) { - Action dqAction = (Action) action; - IOPMRequest * request; - IOPMRequest * next; - IOService * target; - bool more = false; + Action dqAction = (Action) action; + IOPMRequest * request; + IOPMRequest * next; + IOService * target; + bool more = false; request = (IOPMRequest *) queue_first(&fQueue); while (!queue_end(&fQueue, (queue_entry_t) request)) { next = (IOPMRequest *) queue_next(&request->fCommandChain); - if (!request->isFreeBlocked()) + if (!request->isFreeBlocked()) { queue_remove(&fQueue, request, IOPMRequest *, fCommandChain); target = request->getTarget(); @@ -8431,13 +8333,13 @@ setPMProperty( OSDictionary * dict, const char * key, uint64_t value ) IOReturn IOServicePM::gatedSerialize( OSSerialize * s ) const { - OSDictionary * dict; - bool ok = false; + OSDictionary * dict; + bool ok = false; int powerClamp = -1; - int dictSize = 5; + int dictSize = 6; - if (IdleTimerPeriod) - dictSize += 4; + if (IdleTimerPeriod) + dictSize += 4; if (PMActions.parameter & kPMActionsFlagLimitPower) { @@ -8459,9 +8361,10 @@ IOReturn IOServicePM::gatedSerialize( OSSerialize * s ) const else dict = OSDictionary::withCapacity(dictSize); - if (dict) - { + if (dict) + { setPMProperty(dict, "CurrentPowerState", CurrentPowerState); + setPMProperty(dict, "CapabilityFlags", CurrentCapabilityFlags); if (NumberOfPowerStates) setPMProperty(dict, "MaxPowerState", NumberOfPowerStates-1); if (DesiredPowerState != CurrentPowerState) @@ -8473,38 +8376,38 @@ IOReturn IOServicePM::gatedSerialize( OSSerialize * s ) const if (powerClamp >= 0) setPMProperty(dict, "PowerClamp", powerClamp); - if (IdleTimerPeriod) - { + if (IdleTimerPeriod) + { AbsoluteTime now; AbsoluteTime delta; uint64_t nsecs; clock_get_uptime(&now); - // The idle timer period in milliseconds. - setPMProperty(dict, "IdleTimerPeriod", IdleTimerPeriod * 1000ULL); + // The idle timer period in milliseconds + setPMProperty(dict, "IdleTimerPeriod", NextIdleTimerPeriod * 1000ULL); - // The number of activity tickles recorded since device idle + // Number of tickles since the last idle timer expiration setPMProperty(dict, "ActivityTickles", ActivityTickleCount); if (AbsoluteTime_to_scalar(&DeviceActiveTimestamp)) { - // The number of milliseconds since the last activity tickle. + // Milliseconds since the last activity tickle delta = now; SUB_ABSOLUTETIME(&delta, &DeviceActiveTimestamp); absolutetime_to_nanoseconds(delta, &nsecs); setPMProperty(dict, "TimeSinceLastTickle", NS_TO_MS(nsecs)); } - if (AbsoluteTime_to_scalar(&IdleTimerStartTime)) + if (!IdleTimerStopped && AbsoluteTime_to_scalar(&IdleTimerStartTime)) { - // The number of milliseconds since the last device idle. + // Idle timer elapsed time in milliseconds delta = now; SUB_ABSOLUTETIME(&delta, &IdleTimerStartTime); absolutetime_to_nanoseconds(delta, &nsecs); - setPMProperty(dict, "TimeSinceDeviceIdle", NS_TO_MS(nsecs)); + setPMProperty(dict, "IdleTimerElapsedTime", NS_TO_MS(nsecs)); } - } + } #if WORK_QUEUE_STATS if (gIOPMRootNode == Owner) @@ -8526,11 +8429,11 @@ IOReturn IOServicePM::gatedSerialize( OSSerialize * s ) const dict->removeObject(gIOPMPowerClientAdvisoryTickle); } - ok = dict->serialize(s); - dict->release(); - } + ok = dict->serialize(s); + dict->release(); + } - return (ok ? kIOReturnSuccess : kIOReturnNoMemory); + return (ok ? kIOReturnSuccess : kIOReturnNoMemory); } bool IOServicePM::serialize( OSSerialize * s ) const @@ -8544,11 +8447,11 @@ bool IOServicePM::serialize( OSSerialize * s ) const ret = gatedSerialize(s); } else if (gIOPMWorkLoop) - { - ret = gIOPMWorkLoop->runAction( + { + ret = gIOPMWorkLoop->runAction( OSMemberFunctionCast(IOWorkLoop::Action, this, &IOServicePM::gatedSerialize), (OSObject *) this, (void *) s); - } + } return (kIOReturnSuccess == ret); } @@ -8598,52 +8501,3 @@ void IOServicePM::pmTrace( IOTimeStampConstant(code, name, (uintptr_t) regId, param1, param2); } -PMEventDetails* PMEventDetails::eventDetails(uint32_t type, - const char *ownerName, - uintptr_t ownerUnique, - const char *interestName, - uint8_t oldState, - uint8_t newState, - uint32_t result, - uint32_t elapsedTimeUS) { - - PMEventDetails *myself; - myself = new PMEventDetails; - - if(myself) { - myself->eventType = type; - myself->ownerName = ownerName; - myself->ownerUnique = ownerUnique; - myself->interestName = interestName; - myself->oldState = oldState; - myself->newState = newState; - myself->result = result; - myself->elapsedTimeUS = elapsedTimeUS; - - myself->eventClassifier = kIOPMEventClassDriverEvent; - } - - return myself; -} - - -PMEventDetails* PMEventDetails::eventDetails(uint32_t type, - const char *uuid, - uint32_t reason, - uint32_t result) { - - PMEventDetails *myself; - myself = new PMEventDetails; - - if(myself) { - myself->eventType = type; - myself->uuid = uuid; - myself->reason = reason; - myself->result = result; - - myself->eventClassifier = kIOPMEventClassSystemEvent; - } - - return myself; -} - diff --git a/iokit/Kernel/IOServicePMPrivate.h b/iokit/Kernel/IOServicePMPrivate.h index 40baaa62c..313d8e3c5 100644 --- a/iokit/Kernel/IOServicePMPrivate.h +++ b/iokit/Kernel/IOServicePMPrivate.h @@ -55,7 +55,7 @@ enum { kIOPMRequestTypeRequestPowerStateOverride = 0x0E, kIOPMRequestTypeSetIdleTimerPeriod = 0x0F, kIOPMRequestTypeIgnoreIdleTimer = 0x10, - + /* Reply Types */ kIOPMRequestTypeReplyStart = 0x80, kIOPMRequestTypeAckPowerChange = 0x81, @@ -132,57 +132,16 @@ enum { kPMActionsFlagIsGraphicsDevice = 0x00000200, kPMActionsFlagIsAudioDevice = 0x00000400, kPMActionsFlagLimitPower = 0x00000800, - kPMActionsPCIBitNumberMask = 0x000000ff + kPMActionsPCIBitNumberMask = 0x000000ff }; //****************************************************************************** - -enum { - kIOPMEventClassSystemEvent = 0x00, - kIOPMEventClassDriverEvent = 0x1 -}; - -class PMEventDetails : public OSObject -{ - OSDeclareDefaultStructors( PMEventDetails ); - friend class IOServicePM; - friend class IOPMrootDomain; - friend class IOPMTimeline; -public: - static PMEventDetails *eventDetails(uint32_t type, - const char *ownerName, - uintptr_t ownerUnique, - const char *interestName, - uint8_t oldState, - uint8_t newState, - uint32_t result, - uint32_t elapsedTimeUS); - - static PMEventDetails *eventDetails(uint32_t type, - const char *uuid, - uint32_t reason, - uint32_t result); -private: - uint8_t eventClassifier; - uint32_t eventType; - const char *ownerName; - uintptr_t ownerUnique; - const char *interestName; - uint8_t oldState; - uint8_t newState; - uint32_t result; - uint32_t elapsedTimeUS; - - const char *uuid; - uint32_t reason; -}; - // Internal concise representation of IOPMPowerState struct IOPMPSEntry { - IOPMPowerFlags capabilityFlags; - IOPMPowerFlags outputPowerFlags; - IOPMPowerFlags inputPowerFlags; + IOPMPowerFlags capabilityFlags; + IOPMPowerFlags outputPowerFlags; + IOPMPowerFlags inputPowerFlags; uint32_t staticPower; uint32_t settleUpTime; uint32_t settleDownTime; @@ -204,7 +163,7 @@ class IOServicePM : public OSObject private: // Link IOServicePM objects on IOPMWorkQueue. queue_chain_t WorkChain; - + // Queue of IOPMRequest objects. queue_head_t RequestHead; @@ -227,6 +186,7 @@ private: // Settle time after changing power state. uint32_t SettleTimeUS; + uint32_t IdleTimerGeneration; // The flags describing current change note. IOPMPowerChangeFlags HeadNoteChangeFlags; @@ -245,7 +205,7 @@ private: // Connection attached to the changing parent. IOPowerConnection * HeadNoteParentConnection; - + // Power flags supplied by the changing parent. IOPMPowerFlags HeadNoteParentFlags; @@ -263,7 +223,7 @@ private: unsigned int StrictTreeOrder :1; unsigned int IdleTimerStopped :1; unsigned int AdjustPowerScheduled :1; - + unsigned int IsPreChange :1; unsigned int DriverCallBusy :1; unsigned int PCDFunctionOverride :1; @@ -274,6 +234,8 @@ private: // Time of last device activity. AbsoluteTime DeviceActiveTimestamp; + AbsoluteTime MaxPowerStateEntryTime; + AbsoluteTime MaxPowerStateExitTime; // Used to protect activity flag. IOLock * ActivityLock; @@ -281,6 +243,7 @@ private: // Idle timer's period in seconds. unsigned long IdleTimerPeriod; unsigned long IdleTimerMinPowerState; + unsigned long NextIdleTimerPeriod; AbsoluteTime IdleTimerStartTime; // Power state desired by a subclassed device object. @@ -356,7 +319,6 @@ private: uint32_t WaitReason; uint32_t SavedMachineState; - uint32_t RootDomainState; // Protected by PMLock - BEGIN struct { @@ -367,14 +329,12 @@ private: queue_head_t PMDriverCallQueue; OSSet * InsertInterestSet; OSSet * RemoveInterestSet; - - + // IOReporter Data uint32_t ReportClientCnt; void * ReportBuf; // Protected by PMLock - END - #if PM_VARS_SUPPORT IOPMprot * PMVars; #endif @@ -384,7 +344,7 @@ private: // Serialize IOServicePM state for debug output. IOReturn gatedSerialize( OSSerialize * s ) const; virtual bool serialize( OSSerialize * s ) const; - + // PM log and trace void pmPrint( uint32_t event, uintptr_t param1, uintptr_t param2 ) const; void pmTrace( uint32_t event, uintptr_t param1, uintptr_t param2 ) const; @@ -399,6 +359,7 @@ private: #define fIdleTimer pwrMgt->IdleTimer #define fWatchdogTimer pwrMgt->WatchdogTimer #define fSettleTimeUS pwrMgt->SettleTimeUS +#define fIdleTimerGeneration pwrMgt->IdleTimerGeneration #define fHeadNoteChangeFlags pwrMgt->HeadNoteChangeFlags #define fHeadNotePowerState pwrMgt->HeadNotePowerState #define fHeadNotePowerArrayEntry pwrMgt->HeadNotePowerArrayEntry @@ -424,9 +385,12 @@ private: #define fAdvisoryTickleUsed pwrMgt->AdvisoryTickleUsed #define fResetPowerStateOnWake pwrMgt->ResetPowerStateOnWake #define fDeviceActiveTimestamp pwrMgt->DeviceActiveTimestamp +#define fMaxPowerStateEntryTime pwrMgt->MaxPowerStateEntryTime +#define fMaxPowerStateExitTime pwrMgt->MaxPowerStateExitTime #define fActivityLock pwrMgt->ActivityLock #define fIdleTimerPeriod pwrMgt->IdleTimerPeriod #define fIdleTimerMinPowerState pwrMgt->IdleTimerMinPowerState +#define fNextIdleTimerPeriod pwrMgt->NextIdleTimerPeriod #define fIdleTimerStartTime pwrMgt->IdleTimerStartTime #define fDeviceDesire pwrMgt->DeviceDesire #define fDesiredPowerState pwrMgt->DesiredPowerState @@ -466,7 +430,6 @@ private: #define fAdvisoryTickled pwrMgt->AdvisoryTickled #define fWaitReason pwrMgt->WaitReason #define fSavedMachineState pwrMgt->SavedMachineState -#define fRootDomainState pwrMgt->RootDomainState #define fLockedFlags pwrMgt->LockedFlags #define fPMDriverCallQueue pwrMgt->PMDriverCallQueue #define fInsertInterestSet pwrMgt->InsertInterestSet @@ -476,11 +439,11 @@ private: #define fPMVars pwrMgt->PMVars #define fPMActions pwrMgt->PMActions -#define StateOrder(state) (((state) < fNumberOfPowerStates) \ - ? pwrMgt->PowerStates[(state)].stateOrder \ - : (state)) -#define StateMax(a,b) (StateOrder((a)) < StateOrder((b)) ? (b) : (a)) -#define StateMin(a,b) (StateOrder((a)) < StateOrder((b)) ? (a) : (b)) +#define StateOrder(state) (((state) < fNumberOfPowerStates) \ + ? pwrMgt->PowerStates[(state)].stateOrder \ + : (state)) +#define StateMax(a,b) (StateOrder((a)) < StateOrder((b)) ? (b) : (a)) +#define StateMin(a,b) (StateOrder((a)) < StateOrder((b)) ? (a) : (b)) #define kPowerStateZero (0) @@ -496,7 +459,7 @@ the ack timer is ticking every tenth of a second. // Max wait time in microseconds for kernel priority and capability clients // with async message handlers to acknowledge. -// +// #define kPriorityClientMaxWait (90 * 1000 * 1000) #define kCapabilityClientMaxWait (240 * 1000 * 1000) @@ -582,6 +545,8 @@ enum { extern const OSSymbol *gIOPMStatsApplicationResponseTimedOut; extern const OSSymbol *gIOPMStatsApplicationResponseCancel; extern const OSSymbol *gIOPMStatsApplicationResponseSlow; +extern const OSSymbol *gIOPMStatsApplicationResponsePrompt; +extern const OSSymbol *gIOPMStatsDriverPSChangeSlow; //****************************************************************************** // IOPMRequest diff --git a/iokit/Kernel/IOServicePrivate.h b/iokit/Kernel/IOServicePrivate.h index 4fc6f9170..465b8261a 100644 --- a/iokit/Kernel/IOServicePrivate.h +++ b/iokit/Kernel/IOServicePrivate.h @@ -62,6 +62,7 @@ enum { kIOServiceTermPhase1State = 0x00400000, kIOServiceTerm1WaiterState = 0x00200000, kIOServiceRecursing = 0x00100000, + kIOServiceNeedWillTerminate = 0x00080000, }; // notify state @@ -121,6 +122,7 @@ public: virtual bool disable(); virtual void enable( bool was ); virtual void wait(); + virtual bool init(); }; class _IOConfigThread : public OSObject diff --git a/iokit/Kernel/IOSharedDataQueue.cpp b/iokit/Kernel/IOSharedDataQueue.cpp index 5eb1c35ed..0ad0f3cde 100644 --- a/iokit/Kernel/IOSharedDataQueue.cpp +++ b/iokit/Kernel/IOSharedDataQueue.cpp @@ -75,26 +75,26 @@ Boolean IOSharedDataQueue::initWithCapacity(UInt32 size) { IODataQueueAppendix * appendix; vm_size_t allocSize; - + if (!super::init()) { return false; } - + _reserved = (ExpansionData *)IOMalloc(sizeof(struct ExpansionData)); if (!_reserved) { return false; } - + if (size > UINT32_MAX - DATA_QUEUE_MEMORY_HEADER_SIZE - DATA_QUEUE_MEMORY_APPENDIX_SIZE) { return false; } allocSize = round_page(size + DATA_QUEUE_MEMORY_HEADER_SIZE + DATA_QUEUE_MEMORY_APPENDIX_SIZE); - + if (allocSize < size) { return false; } - + dataQueue = (IODataQueueMemory *)IOMallocAligned(allocSize, PAGE_SIZE); if (dataQueue == 0) { return false; @@ -103,7 +103,7 @@ Boolean IOSharedDataQueue::initWithCapacity(UInt32 size) dataQueue->queueSize = size; dataQueue->head = 0; dataQueue->tail = 0; - + if (!setQueueSize(size)) { return false; } @@ -126,7 +126,7 @@ void IOSharedDataQueue::free() if (_reserved) { IOFree (_reserved, sizeof(struct ExpansionData)); _reserved = NULL; - } + } super::free(); } @@ -148,22 +148,22 @@ IODataQueueEntry * IOSharedDataQueue::peek() IODataQueueEntry *entry = 0; if (dataQueue && (dataQueue->head != dataQueue->tail)) { - IODataQueueEntry * head = 0; + IODataQueueEntry * head = 0; UInt32 headSize = 0; UInt32 headOffset = dataQueue->head; UInt32 queueSize = getQueueSize(); - + if (headOffset >= queueSize) { return NULL; } - head = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset); - headSize = head->size; + head = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset); + headSize = head->size; - // Check if there's enough room before the end of the queue for a header. + // Check if there's enough room before the end of the queue for a header. // If there is room, check if there's enough room to hold the header and // the data. - + if ((headOffset > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) || (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize) || (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headSize) || @@ -276,17 +276,17 @@ Boolean IOSharedDataQueue::dequeue(void *data, UInt32 *dataSize) if (dataQueue) { if (dataQueue->head != dataQueue->tail) { - IODataQueueEntry * head = 0; + IODataQueueEntry * head = 0; UInt32 headSize = 0; UInt32 headOffset = dataQueue->head; UInt32 queueSize = getQueueSize(); - + if (headOffset > queueSize) { return false; } - head = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset); - headSize = head->size; + head = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset); + headSize = head->size; // we wrapped around to beginning, so read from there // either there was not even room for the header @@ -316,7 +316,7 @@ Boolean IOSharedDataQueue::dequeue(void *data, UInt32 *dataSize) newHeadOffset = headOffset + entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE; } } - + if (entry) { if (data) { if (dataSize) { diff --git a/iokit/Kernel/IOSimpleReporter.cpp b/iokit/Kernel/IOSimpleReporter.cpp new file mode 100644 index 000000000..81c8232eb --- /dev/null +++ b/iokit/Kernel/IOSimpleReporter.cpp @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2012-2013 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include "IOReporterDefs.h" + +#define super IOReporter +OSDefineMetaClassAndStructors(IOSimpleReporter, IOReporter); + +/* static */ +IOSimpleReporter* +IOSimpleReporter::with(IOService *reportingService, + IOReportCategories categories, + IOReportUnits unit) +{ + IOSimpleReporter *reporter, *rval = NULL; + + // kprintf("%s\n", __func__); // can't IORLOG() from static + + reporter = new IOSimpleReporter; + if (!reporter) goto finish; + + + if (!reporter->initWith(reportingService, categories, unit)) { + goto finish; + } + + // success + rval = reporter; + +finish: + if (!rval) { + if (reporter) delete reporter; + } + + return rval; +} + +bool +IOSimpleReporter::initWith(IOService *reportingService, + IOReportCategories categories, + IOReportUnits unit) +{ + // fully specify the channel type for the superclass + IOReportChannelType channelType = { + .categories = categories, + .report_format = kIOReportFormatSimple, + .nelements = 1, + .element_idx = 0 + }; + + return super::init(reportingService, channelType, unit); +} + + +IOReturn +IOSimpleReporter::setValue(uint64_t channel_id, + int64_t value) +{ + IOReturn res = kIOReturnError; + IOSimpleReportValues simple_values; + int element_index = 0; + + lockReporter(); + + if (getFirstElementIndex(channel_id, &element_index) != kIOReturnSuccess) { + res = kIOReturnBadArgument; + goto finish; + } + + + if (copyElementValues(element_index, (IOReportElementValues *)&simple_values) != kIOReturnSuccess) { + res = kIOReturnBadArgument; + goto finish; + } + + simple_values.simple_value = value; + res = setElementValues(element_index, (IOReportElementValues *)&simple_values); + +finish: + unlockReporter(); + return res; +} + + +IOReturn +IOSimpleReporter::incrementValue(uint64_t channel_id, + int64_t increment) +{ + IOReturn res = kIOReturnError; + IOSimpleReportValues simple_values; + int element_index = 0; + + lockReporter(); + + if (getFirstElementIndex(channel_id, &element_index) != kIOReturnSuccess) { + res = kIOReturnBadArgument; + goto finish; + } + + if (copyElementValues(element_index, (IOReportElementValues *)&simple_values) != kIOReturnSuccess){ + res = kIOReturnBadArgument; + goto finish; + } + + simple_values.simple_value += increment; + + res = setElementValues(element_index, (IOReportElementValues *)&simple_values); + +finish: + unlockReporter(); + return res; +} + +int64_t +IOSimpleReporter::getValue(uint64_t channel_id) +{ + IOSimpleReportValues *values = NULL; + int64_t simple_value = (int64_t)kIOReportInvalidValue; + int index = 0; + + lockReporter(); + + if (getFirstElementIndex(channel_id, &index) == kIOReturnSuccess) { + + values = (IOSimpleReportValues *)getElementValues(index); + + if (values != NULL) + simple_value = values->simple_value; + } + + unlockReporter(); + return simple_value; +} + diff --git a/iokit/Kernel/IOStartIOKit.cpp b/iokit/Kernel/IOStartIOKit.cpp index 4a218304d..787a69bf2 100644 --- a/iokit/Kernel/IOStartIOKit.cpp +++ b/iokit/Kernel/IOStartIOKit.cpp @@ -42,11 +42,15 @@ #include #include #include +#include #include #include "IOKitKernelInternal.h" +const OSSymbol * gIOProgressBackbufferKey; +OSSet * gIORemoveOnReadProperties; + extern "C" { extern void OSlibkernInit (void); @@ -153,6 +157,11 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 ) IOLibInit(); OSlibkernInit(); + gIOProgressBackbufferKey = OSSymbol::withCStringNoCopy(kIOProgressBackbufferKey); + gIORemoveOnReadProperties = OSSet::withObjects((const OSObject **) &gIOProgressBackbufferKey, 1); + + interruptAccountingInit(); + rootNub = new IOPlatformExpertDevice; if( rootNub && rootNub->initWithArgs( p1, p2, p3, p4)) { @@ -194,4 +203,17 @@ IORegistrySetOSBuildVersion(char * build_version) return; } +void +IORecordProgressBackbuffer(void * buffer, size_t size, uint32_t theme) +{ + IORegistryEntry * chosen; + if ((chosen = IORegistryEntry::fromPath(kIODeviceTreePlane ":/chosen"))) + { + chosen->setProperty(kIOProgressBackbufferKey, buffer, size); + chosen->setProperty(kIOProgressColorThemeKey, theme, 32); + + chosen->release(); + } +} + }; /* extern "C" */ diff --git a/iokit/Kernel/IOStateReporter.cpp b/iokit/Kernel/IOStateReporter.cpp new file mode 100644 index 000000000..0eaeb6f1d --- /dev/null +++ b/iokit/Kernel/IOStateReporter.cpp @@ -0,0 +1,888 @@ +/* + * Copyright (c) 2012-2013 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include "IOReporterDefs.h" + + +#define super IOReporter +OSDefineMetaClassAndStructors(IOStateReporter, IOReporter); + + +/* static */ +IOStateReporter* +IOStateReporter::with(IOService *reportingService, + IOReportCategories categories, + int nstates, + IOReportUnits unit/* = kIOReportUnitHWTicks*/) +{ + IOStateReporter *reporter, *rval = NULL; + + // kprintf("%s\n", __func__); // can't IORLOG() from static + + reporter = new IOStateReporter; + if (!reporter) goto finish; + + if (!reporter->initWith(reportingService, categories, nstates, unit)) { + goto finish; + } + + // success + rval = reporter; + +finish: + if (!rval) { + if (reporter) delete reporter; + } + + return rval; +} + +bool +IOStateReporter::initWith(IOService *reportingService, + IOReportCategories categories, + int16_t nstates, + IOReportUnits unit) +{ + bool success = false; + + IOReportChannelType channelType = { + .categories = categories, + .report_format = kIOReportFormatState, + .nelements = static_cast(nstates), + .element_idx = 0 + }; + + if(super::init(reportingService, channelType, unit) != true) { + IORLOG("ERROR super::initWith failed"); + success = false; + goto finish; + } + + _currentStates = NULL; + _lastUpdateTimes = NULL; + + success = true; + +finish: + return success; +} + + +void +IOStateReporter::free(void) +{ + if (_currentStates) { + PREFL_MEMOP_PANIC(_nChannels, int); + IOFree(_currentStates, (size_t)_nChannels * sizeof(int)); + } + if (_lastUpdateTimes) { + PREFL_MEMOP_PANIC(_nChannels, uint64_t); + IOFree(_lastUpdateTimes, (size_t)_nChannels * sizeof(uint64_t)); + } + + super::free(); +} + + +IOReturn +IOStateReporter::handleSwapPrepare(int newNChannels) +{ + IOReturn res = kIOReturnError; + size_t newCurStatesSize, newTSSize; + + //IORLOG("handleSwapPrepare (state) _nChannels before = %u", _nChannels); + + IOREPORTER_CHECK_CONFIG_LOCK(); + + if (_swapCurrentStates || _swapLastUpdateTimes) { + panic("IOStateReporter::_swap* already in use"); + } + + // new currentStates buffer + PREFL_MEMOP_FAIL(newNChannels, int); + newCurStatesSize = (size_t)newNChannels * sizeof(int); + _swapCurrentStates = (int*)IOMalloc(newCurStatesSize); + if (_swapCurrentStates == NULL) { + res = kIOReturnNoMemory; goto finish; + } + memset(_swapCurrentStates, -1, newCurStatesSize); // init w/"no state" + + // new timestamps buffer + PREFL_MEMOP_FAIL(newNChannels, uint64_t); + newTSSize = (size_t)newNChannels * sizeof(uint64_t); + _swapLastUpdateTimes = (uint64_t *)IOMalloc(newTSSize); + if (_swapLastUpdateTimes == NULL) { + res = kIOReturnNoMemory; goto finish; + } + memset(_swapLastUpdateTimes, 0, newTSSize); + + res = super::handleSwapPrepare(newNChannels); + +finish: + if (res) { + if (_swapCurrentStates) { + IOFree(_swapCurrentStates, newCurStatesSize); + _swapCurrentStates = NULL; + } + if (_swapLastUpdateTimes) { + IOFree(_swapLastUpdateTimes, newTSSize); + _swapLastUpdateTimes = NULL; + } + } + + return res; +} + +IOReturn +IOStateReporter::handleAddChannelSwap(uint64_t channelID, + const OSSymbol *symChannelName) +{ + IOReturn res = kIOReturnError; + int cnt; + int *tmpCurStates; + uint64_t *tmpTimestamps; + bool swapComplete = false; + + //IORLOG("IOStateReporter::handleSwap"); + + if (!_swapCurrentStates || !_swapLastUpdateTimes) { + IORLOG("IOReporter::handleSwap ERROR swap variables uninitialized!"); + goto finish; + } + + IOREPORTER_CHECK_CONFIG_LOCK(); + IOREPORTER_CHECK_LOCK(); + + // Copy any existing buffers + if (_currentStates) { + PREFL_MEMOP_FAIL(_nChannels, int); + memcpy(_swapCurrentStates, _currentStates, + (size_t)_nChannels * sizeof(int)); + + if (!_lastUpdateTimes) { + panic("IOStateReporter::handleAddChannelSwap _lastUpdateTimes unset despite non-NULL _currentStates"); + } + PREFL_MEMOP_FAIL(_nChannels, uint64_t); + memcpy(_swapLastUpdateTimes, _lastUpdateTimes, + (size_t)_nChannels * sizeof(uint64_t)); + } + + // Update principal instance variables, keep old values in _swap* for cleanup + tmpCurStates = _currentStates; + _currentStates = _swapCurrentStates; + _swapCurrentStates = tmpCurStates; + + tmpTimestamps = _lastUpdateTimes; + _lastUpdateTimes = _swapLastUpdateTimes; + _swapLastUpdateTimes = tmpTimestamps; + + swapComplete = true; + + // subclass success + + // invoke superclass(es): base class updates _nChannels & _nElements + res = super::handleAddChannelSwap(channelID, symChannelName); + if (res) { + IORLOG("handleSwap(state) ERROR super::handleSwap failed!"); + goto finish; + } + + // Channel added successfully, initialize the new channel's state_ids to 0..nStates-1 + for (cnt = 0; cnt < _channelDimension; cnt++) { + handleSetStateID(channelID, cnt, (uint64_t)cnt); + } + +finish: + if (res && swapComplete) { + // unswap so the unused buffers get cleaned up + tmpCurStates = _currentStates; + _currentStates = _swapCurrentStates; + _swapCurrentStates = tmpCurStates; + + tmpTimestamps = _lastUpdateTimes; + _lastUpdateTimes = _swapLastUpdateTimes; + _swapLastUpdateTimes = tmpTimestamps; + } + + return res; +} + + +void +IOStateReporter::handleSwapCleanup(int swapNChannels) +{ + IOREPORTER_CHECK_CONFIG_LOCK(); + + super::handleSwapCleanup(swapNChannels); + + if (_swapCurrentStates) { + PREFL_MEMOP_PANIC(swapNChannels, int); + IOFree(_swapCurrentStates, (size_t)swapNChannels * sizeof(int)); + _swapCurrentStates = NULL; + } + if (_swapLastUpdateTimes) { + PREFL_MEMOP_PANIC(swapNChannels, uint64_t); + IOFree(_swapLastUpdateTimes, (size_t)swapNChannels * sizeof(uint64_t)); + _swapLastUpdateTimes = NULL; + } +} + + +IOReturn +IOStateReporter::_getStateIndices(uint64_t channel_id, + uint64_t state_id, + int *channel_index, + int *state_index) +{ + IOReturn res = kIOReturnError; + int cnt; + IOStateReportValues *values; + int element_index = 0; + + IOREPORTER_CHECK_LOCK(); + + if (getChannelIndices(channel_id, + channel_index, + &element_index) != kIOReturnSuccess) { + res = kIOReturnBadArgument; + + goto finish; + } + + for (cnt = 0; cnt < _channelDimension; cnt++) { + + values = (IOStateReportValues *)getElementValues(element_index + cnt); + + if (values == NULL) { + + res = kIOReturnError; + goto finish; + } + + if (values->state_id == state_id) { + *state_index = cnt; + res = kIOReturnSuccess; + goto finish; + } + } + + res = kIOReturnBadArgument; + +finish: + return res; +} + + +IOReturn +IOStateReporter::setChannelState(uint64_t channel_id, + uint64_t new_state_id) +{ + IOReturn res = kIOReturnError; + int channel_index, new_state_index; + uint64_t last_intransition = 0; + uint64_t prev_state_residency = 0; + + lockReporter(); + + if (_getStateIndices(channel_id, new_state_id, &channel_index, &new_state_index) == kIOReturnSuccess) { + res = handleSetStateByIndices(channel_index, new_state_index, + last_intransition, + prev_state_residency); + goto finish; + } + + res = kIOReturnBadArgument; + +finish: + unlockReporter(); + return res; +} + +IOReturn +IOStateReporter::setChannelState(uint64_t channel_id, + uint64_t new_state_id, + uint64_t last_intransition, + uint64_t prev_state_residency) +{ + return setChannelState(channel_id, new_state_id); +} + +IOReturn +IOStateReporter::overrideChannelState(uint64_t channel_id, + uint64_t state_id, + uint64_t time_in_state, + uint64_t intransitions, + uint64_t last_intransition /*=0*/) +{ + IOReturn res = kIOReturnError; + int channel_index, state_index; + + lockReporter(); + + if (_getStateIndices(channel_id, state_id, &channel_index, &state_index) == kIOReturnSuccess) { + + if (_lastUpdateTimes[channel_index]) { + panic("overrideChannelState() cannot be used after setChannelState()!\n"); + } + + res = handleOverrideChannelStateByIndices(channel_index, state_index, + time_in_state, intransitions, + last_intransition); + goto finish; + } + + res = kIOReturnBadArgument; + +finish: + unlockReporter(); + return res; +} + + +IOReturn +IOStateReporter::handleOverrideChannelStateByIndices(int channel_index, + int state_index, + uint64_t time_in_state, + uint64_t intransitions, + uint64_t last_intransition /*=0*/) +{ + IOReturn kerr, result = kIOReturnError; + IOStateReportValues state_values; + int element_index; + + if (channel_index < 0 || channel_index >= _nChannels) { + result = kIOReturnBadArgument; goto finish; + } + + if (channel_index < 0 || channel_index > (_nElements - state_index) + / _channelDimension) { + result = kIOReturnOverrun; goto finish; + } + element_index = channel_index * _channelDimension + state_index; + + kerr = copyElementValues(element_index,(IOReportElementValues*)&state_values); + if (kerr) { + result = kerr; goto finish; + } + + // last_intransition = 0 -> no current state ("residency summary only") + state_values.last_intransition = last_intransition; + state_values.intransitions = intransitions; + state_values.upticks = time_in_state; + + // determines current time for metadata + kerr = setElementValues(element_index, (IOReportElementValues *)&state_values); + if (kerr) { + result = kerr; goto finish; + } + + // success + result = kIOReturnSuccess; + +finish: + return result; +} + + +IOReturn +IOStateReporter::incrementChannelState(uint64_t channel_id, + uint64_t state_id, + uint64_t time_in_state, + uint64_t intransitions, + uint64_t last_intransition /*=0*/) +{ + IOReturn res = kIOReturnError; + int channel_index, state_index; + + lockReporter(); + + if (_getStateIndices(channel_id, state_id, &channel_index, &state_index) == kIOReturnSuccess) { + + if (_lastUpdateTimes[channel_index]) { + panic("incrementChannelState() cannot be used after setChannelState()!\n"); + } + + res = handleIncrementChannelStateByIndices(channel_index, state_index, + time_in_state, intransitions, + last_intransition); + goto finish; + } + + res = kIOReturnBadArgument; + +finish: + unlockReporter(); + return res; + +} + + +IOReturn +IOStateReporter::handleIncrementChannelStateByIndices(int channel_index, + int state_index, + uint64_t time_in_state, + uint64_t intransitions, + uint64_t last_intransition /*=0*/) +{ + IOReturn kerr, result = kIOReturnError; + IOStateReportValues state_values; + int element_index; + + if (channel_index < 0 || channel_index >= _nChannels) { + result = kIOReturnBadArgument; goto finish; + } + + if (channel_index < 0 || channel_index > (_nElements - state_index) + / _channelDimension) { + result = kIOReturnOverrun; goto finish; + } + element_index = channel_index * _channelDimension + state_index; + + kerr = copyElementValues(element_index,(IOReportElementValues*)&state_values); + if (kerr) { + result = kerr; + goto finish; + } + + state_values.last_intransition = last_intransition; + state_values.intransitions += intransitions; + state_values.upticks += time_in_state; + + // determines current time for metadata + kerr = setElementValues(element_index, (IOReportElementValues *)&state_values); + if (kerr) { + result = kerr; + goto finish; + } + + // success + result = kIOReturnSuccess; + +finish: + return result; +} + + +IOReturn +IOStateReporter::setState(uint64_t new_state_id) +{ + uint64_t last_intransition = 0; + uint64_t prev_state_residency = 0; + IOReturn res = kIOReturnError; + IOStateReportValues *values; + int channel_index = 0, element_index = 0, new_state_index = 0; + int cnt; + + lockReporter(); + + if (_nChannels == 1) { + + for (cnt = 0; cnt < _channelDimension; cnt++) { + + new_state_index = element_index + cnt; + + values = (IOStateReportValues *)getElementValues(new_state_index); + + if (values == NULL) { + res = kIOReturnError; + goto finish; + } + + if (values->state_id == new_state_id) { + + res = handleSetStateByIndices(channel_index, new_state_index, + last_intransition, + prev_state_residency); + goto finish; + } + } + } + + res = kIOReturnBadArgument; + +finish: + unlockReporter(); + return res; +} + +IOReturn +IOStateReporter::setState(uint64_t new_state_id, + uint64_t last_intransition, + uint64_t prev_state_residency) +{ + return setState(new_state_id); +} + +IOReturn +IOStateReporter::setStateID(uint64_t channel_id, + int state_index, + uint64_t state_id) +{ + IOReturn res = kIOReturnError; + + lockReporter(); + + res = handleSetStateID(channel_id, state_index, state_id); + + unlockReporter(); + + return res; +} + + +IOReturn +IOStateReporter::handleSetStateID(uint64_t channel_id, + int state_index, + uint64_t state_id) +{ + IOReturn res = kIOReturnError; + IOStateReportValues state_values; + int element_index = 0; + + IOREPORTER_CHECK_LOCK(); + + if (getFirstElementIndex(channel_id, &element_index) == kIOReturnSuccess) { + + if (state_index >= _channelDimension) { + res = kIOReturnBadArgument; goto finish; + } + if (_nElements - state_index <= element_index) { + res = kIOReturnOverrun; goto finish; + } + element_index += state_index; + + if (copyElementValues(element_index, (IOReportElementValues *)&state_values) != kIOReturnSuccess) { + res = kIOReturnBadArgument; + goto finish; + } + + state_values.state_id = state_id; + + res = setElementValues(element_index, (IOReportElementValues *)&state_values); + } + + // FIXME: set a bit somewhere (reporter-wide?) that state_ids can no longer be + // assumed to be contiguous +finish: + return res; +} + +IOReturn +IOStateReporter::setStateByIndices(int channel_index, + int new_state_index) +{ + IOReturn res = kIOReturnError; + uint64_t last_intransition = 0; + uint64_t prev_state_residency = 0; + + lockReporter(); + + res = handleSetStateByIndices(channel_index, new_state_index, + last_intransition, prev_state_residency); + + unlockReporter(); + + return res; +} + +IOReturn +IOStateReporter::setStateByIndices(int channel_index, + int new_state_index, + uint64_t last_intransition, + uint64_t prev_state_residency) +{ + return setStateByIndices(channel_index, new_state_index); +} + +IOReturn +IOStateReporter::handleSetStateByIndices(int channel_index, + int new_state_index, + uint64_t last_intransition, + uint64_t prev_state_residency) +{ + IOReturn res = kIOReturnError; + + IOStateReportValues curr_state_values, new_state_values; + int curr_state_index = 0; + int curr_element_index, new_element_index; + uint64_t last_ch_update_time = 0; + uint64_t recordTime = mach_absolute_time(); + + IOREPORTER_CHECK_LOCK(); + + if (channel_index < 0 || channel_index >= _nChannels) { + res = kIOReturnBadArgument; goto finish; + } + + // if no timestamp provided, last_intransition = time of recording (now) + if (last_intransition == 0) { + last_intransition = recordTime; + } + + // First update target state if different than the current state + // _currentStates[] initialized to -1 to detect first state transition + curr_state_index = _currentStates[channel_index]; + if (new_state_index != curr_state_index) { + // fetch element data + if (channel_index < 0 || channel_index > (_nElements-new_state_index) + / _channelDimension) { + res = kIOReturnOverrun; goto finish; + } + new_element_index = channel_index*_channelDimension + new_state_index; + if (copyElementValues(new_element_index, + (IOReportElementValues *)&new_state_values)) { + res = kIOReturnBadArgument; + goto finish; + } + + // Update new state's transition info + new_state_values.intransitions += 1; + new_state_values.last_intransition = last_intransition; + + // and store the values + res = setElementValues(new_element_index, + (IOReportElementValues *)&new_state_values, + recordTime); + + if (res != kIOReturnSuccess) { + goto finish; + } + + _currentStates[channel_index] = new_state_index; + } + + /* Now update time spent in any previous state + If new_state_index = curr_state_index, this updates time in the + current state. If this is the channel's first state transition, + the last update time will be zero. + + Note: While setState() should never be called on a channel being + updated with increment/overrideChannelState(), that's another way + that the last update time might not exist. Regardless, if there + is no basis for determining time spent in previous state, there's + nothing to update! + */ + last_ch_update_time = _lastUpdateTimes[channel_index]; + if (last_ch_update_time != 0) { + if (channel_index < 0 || channel_index > (_nElements-curr_state_index) + / _channelDimension) { + res = kIOReturnOverrun; goto finish; + } + curr_element_index = channel_index*_channelDimension + curr_state_index; + if (copyElementValues(curr_element_index, + (IOReportElementValues *)&curr_state_values)) { + res = kIOReturnBadArgument; + goto finish; + } + // compute the time spent in previous state, unless provided + if (prev_state_residency == 0) { + prev_state_residency = last_intransition - last_ch_update_time; + } + + curr_state_values.upticks += prev_state_residency; + + res = setElementValues(curr_element_index, + (IOReportElementValues*)&curr_state_values, + recordTime); + + if (res != kIOReturnSuccess) { + goto finish; + } + } + + // record basis for next "time in prior state" calculation + // (also arms a panic in override/incrementChannelState()) + _lastUpdateTimes[channel_index] = last_intransition; + +finish: + return res; +} + + +// blocks might make this slightly easier? +uint64_t +IOStateReporter::getStateInTransitions(uint64_t channel_id, + uint64_t state_id) +{ + return _getStateValue(channel_id, state_id, kInTransitions); +} + +uint64_t +IOStateReporter::getStateResidencyTime(uint64_t channel_id, + uint64_t state_id) +{ + return _getStateValue(channel_id, state_id, kResidencyTime); +} + +uint64_t +IOStateReporter::getStateLastTransitionTime(uint64_t channel_id, + uint64_t state_id) +{ + return _getStateValue(channel_id, state_id, kLastTransitionTime); +} + +uint64_t +IOStateReporter::_getStateValue(uint64_t channel_id, + uint64_t state_id, + enum valueSelector value) +{ + int channel_index = 0, element_index = 0, cnt; + IOStateReportValues *values = NULL; + uint64_t result = kIOReportInvalidValue; + + lockReporter(); + + if (getChannelIndices(channel_id, &channel_index, &element_index) == kIOReturnSuccess) { + + if (updateChannelValues(channel_index) == kIOReturnSuccess) { + + for (cnt = 0; cnt < _channelDimension; cnt++) { + + values = (IOStateReportValues *)getElementValues(element_index); + + if (state_id == values->state_id) { + + switch (value) { + case kInTransitions: + result = values->intransitions; + break; + case kResidencyTime: + result = values->upticks; + break; + case kLastTransitionTime: + result = values->last_intransition; + default: + break; + } + + break; + } + + element_index++; + } + } + } + + unlockReporter(); + return result; +} + + +uint64_t +IOStateReporter::getStateLastChannelUpdateTime(uint64_t channel_id) +{ + int channel_index; + uint64_t result = kIOReportInvalidValue; + + lockReporter(); + + if (getChannelIndex(channel_id, &channel_index) == kIOReturnSuccess) { + + result = _lastUpdateTimes[channel_index]; + } + + unlockReporter(); + + return result; +} + + +/* updateChannelValues() is called to refresh state before being + reported outside the reporter. In the case of IOStateReporter, + this is primarily an update to the "time in state" data. +*/ +IOReturn +IOStateReporter::updateChannelValues(int channel_index) +{ + IOReturn kerr, result = kIOReturnError; + + int state_index, element_idx; + uint64_t currentTime; + uint64_t last_ch_update_time; + uint64_t time_in_state; + IOStateReportValues state_values; + + IOREPORTER_CHECK_LOCK(); + + if (channel_index < 0 || channel_index >= _nChannels) { + result = kIOReturnBadArgument; goto finish; + } + + /* First check to see whether this channel has begun self- + calculation of time in state. It's possible this channel + has yet to be initialized or that the driver is updating + the channel with override/incrementChannelState() which + never enable automatic time-in-state updates. In that case, + there is nothing to update and we return success. + */ + last_ch_update_time = _lastUpdateTimes[channel_index]; + if (last_ch_update_time == 0) { + result = kIOReturnSuccess; goto finish; + } + + // figure out the current state (if any) + state_index = _currentStates[channel_index]; + + // e.g. given 4 4-state channels, the boundary is ch[3].st[3] <- _elems[15] + if (channel_index < 0 || channel_index > (_nElements - state_index) + / _channelDimension) { + result = kIOReturnOverrun; goto finish; + } + element_idx = channel_index * _channelDimension + state_index; + + // get the current values + kerr = copyElementValues(element_idx,(IOReportElementValues*)&state_values); + if (kerr) { + result = kerr; goto finish; + } + + // calculate time in state + currentTime = mach_absolute_time(); + time_in_state = currentTime - last_ch_update_time; + state_values.upticks += time_in_state; + + // and store the values + kerr = setElementValues(element_idx, + (IOReportElementValues *)&state_values, + currentTime); + if (kerr) { + result = kerr; goto finish; + } + + // Record basis for next "prior time" calculation + _lastUpdateTimes[channel_index] = currentTime; + + + // success + result = kIOReturnSuccess; + +finish: + return result; +} diff --git a/iokit/Kernel/IOUserClient.cpp b/iokit/Kernel/IOUserClient.cpp index dcc6e69e2..9f3587844 100644 --- a/iokit/Kernel/IOUserClient.cpp +++ b/iokit/Kernel/IOUserClient.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2012 Apple Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,6 +43,7 @@ #include #include #include +#include #if CONFIG_MACF @@ -372,7 +373,6 @@ public: }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - extern "C" { // functions called from osfmk/device/iokit_rpc.c @@ -825,6 +825,15 @@ IOReturn IOServiceMessageUserNotification::handler( void * ref, argSize = kIOUserNotifyMaxMessageSize; bcopy( messageArgument, data->messageArgument, argSize ); } + + // adjust message size for ipc restrictions + natural_t type; + type = pingMsg->notifyHeader.type; + type &= ~(kIOKitNoticationMsgSizeMask << kIOKitNoticationTypeSizeAdjShift); + type |= ((argSize & kIOKitNoticationMsgSizeMask) << kIOKitNoticationTypeSizeAdjShift); + pingMsg->notifyHeader.type = type; + argSize = (argSize + kIOKitNoticationMsgSizeMask) & ~kIOKitNoticationMsgSizeMask; + pingMsg->msgHdr.msgh_size = msgSize - pingMsg->notifyHeader.size + sizeof( IOServiceInterestContent64 ) - sizeof( data->messageArgument) @@ -944,6 +953,29 @@ static OSDictionary * CopyUserOnConsole(void) return (user); } +IOReturn IOUserClient::clientHasAuthorization( task_t task, + IOService * service ) +{ + proc_t p; + + p = (proc_t) get_bsdtask_info(task); + if (p) + { + uint64_t authorizationID; + + authorizationID = proc_uniqueid(p); + if (authorizationID) + { + if (service->getAuthorizationID() == authorizationID) + { + return (kIOReturnSuccess); + } + } + } + + return (kIOReturnNotPermitted); +} + IOReturn IOUserClient::clientHasPrivilege( void * securityToken, const char * privilegeName ) { @@ -958,10 +990,9 @@ IOReturn IOUserClient::clientHasPrivilege( void * securityToken, if (!strncmp(privilegeName, kIOClientPrivilegeForeground, sizeof(kIOClientPrivilegeForeground))) { - /* is graphics access denied for current task? */ - if (proc_get_effective_task_policy(current_task(), TASK_POLICY_GPU_DENY) != 0) + if (task_is_gpu_denied(current_task())) return (kIOReturnNotPrivileged); - else + else return (kIOReturnSuccess); } @@ -1038,6 +1069,82 @@ IOReturn IOUserClient::clientHasPrivilege( void * securityToken, return (kr); } +OSObject * IOUserClient::copyClientEntitlement( task_t task, + const char * entitlement ) +{ +#define MAX_ENTITLEMENTS_LEN (128 * 1024) + + proc_t p = NULL; + pid_t pid = 0; + char procname[MAXCOMLEN + 1] = ""; + size_t len = 0; + void *entitlements_blob = NULL; + char *entitlements_data = NULL; + OSObject *entitlements_obj = NULL; + OSDictionary *entitlements = NULL; + OSString *errorString = NULL; + OSObject *value = NULL; + + p = (proc_t)get_bsdtask_info(task); + if (p == NULL) + goto fail; + pid = proc_pid(p); + proc_name(pid, procname, (int)sizeof(procname)); + + if (cs_entitlements_blob_get(p, &entitlements_blob, &len) != 0) + goto fail; + + if (len <= offsetof(CS_GenericBlob, data)) + goto fail; + + /* + * Per , enforce a limit on the amount of XML + * we'll try to parse in the kernel. + */ + len -= offsetof(CS_GenericBlob, data); + if (len > MAX_ENTITLEMENTS_LEN) { + IOLog("failed to parse entitlements for %s[%u]: %lu bytes of entitlements exceeds maximum of %u\n", procname, pid, len, MAX_ENTITLEMENTS_LEN); + goto fail; + } + + /* + * OSUnserializeXML() expects a nul-terminated string, but that isn't + * what is stored in the entitlements blob. Copy the string and + * terminate it. + */ + entitlements_data = (char *)IOMalloc(len + 1); + if (entitlements_data == NULL) + goto fail; + memcpy(entitlements_data, ((CS_GenericBlob *)entitlements_blob)->data, len); + entitlements_data[len] = '\0'; + + entitlements_obj = OSUnserializeXML(entitlements_data, len + 1, &errorString); + if (errorString != NULL) { + IOLog("failed to parse entitlements for %s[%u]: %s\n", procname, pid, errorString->getCStringNoCopy()); + goto fail; + } + if (entitlements_obj == NULL) + goto fail; + + entitlements = OSDynamicCast(OSDictionary, entitlements_obj); + if (entitlements == NULL) + goto fail; + + /* Fetch the entitlement value from the dictionary. */ + value = entitlements->getObject(entitlement); + if (value != NULL) + value->retain(); + +fail: + if (entitlements_data != NULL) + IOFree(entitlements_data, len + 1); + if (entitlements_obj != NULL) + entitlements_obj->release(); + if (errorString != NULL) + errorString->release(); + return value; +} + bool IOUserClient::init() { if (getPropertyTable() || super::init()) @@ -1396,6 +1503,17 @@ extern "C" { if( !(out = OSDynamicCast( cls, obj))) \ return( kIOReturnBadArgument ) +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* Routine io_server_version */ +kern_return_t is_io_server_version( + mach_port_t master_port, + uint64_t *version) +{ + *version = IOKIT_SERVER_VERSION; + return (kIOReturnSuccess); +} + /* Routine io_object_get_class */ kern_return_t is_io_object_get_class( io_object_t object, @@ -1561,11 +1679,12 @@ kern_return_t is_io_iterator_is_valid( return( kIOReturnSuccess ); } -/* Routine io_service_match_property_table */ -kern_return_t is_io_service_match_property_table( + +static kern_return_t internal_io_service_match_property_table( io_service_t _service, - io_string_t matching, - boolean_t *matches ) + const char * matching, + mach_msg_type_number_t matching_size, + boolean_t *matches) { CHECK( IOService, _service, service ); @@ -1573,8 +1692,8 @@ kern_return_t is_io_service_match_property_table( OSObject * obj; OSDictionary * dict; - obj = OSUnserializeXML( matching ); - + obj = matching_size ? OSUnserializeXML(matching, matching_size) + : OSUnserializeXML(matching); if( (dict = OSDynamicCast( OSDictionary, obj))) { *matches = service->passiveMatch( dict ); kr = kIOReturnSuccess; @@ -1587,6 +1706,16 @@ kern_return_t is_io_service_match_property_table( return( kr ); } +/* Routine io_service_match_property_table */ +kern_return_t is_io_service_match_property_table( + io_service_t service, + io_string_t matching, + boolean_t *matches ) +{ + return (internal_io_service_match_property_table(service, matching, 0, matches)); +} + + /* Routine io_service_match_property_table_ool */ kern_return_t is_io_service_match_property_table_ool( io_object_t service, @@ -1604,18 +1733,28 @@ kern_return_t is_io_service_match_property_table_ool( if( KERN_SUCCESS == kr) { // must return success after vm_map_copyout() succeeds - *result = is_io_service_match_property_table( service, - (char *) data, matches ); + *result = internal_io_service_match_property_table(service, + (const char *)data, matchingCnt, matches ); vm_deallocate( kernel_map, data, matchingCnt ); } return( kr ); } -/* Routine io_service_get_matching_services */ -kern_return_t is_io_service_get_matching_services( +/* Routine io_service_match_property_table_bin */ +kern_return_t is_io_service_match_property_table_bin( + io_object_t service, + io_struct_inband_t matching, + mach_msg_type_number_t matchingCnt, + boolean_t *matches) +{ + return (internal_io_service_match_property_table(service, matching, matchingCnt, matches)); +} + +static kern_return_t internal_io_service_get_matching_services( mach_port_t master_port, - io_string_t matching, + const char * matching, + mach_msg_type_number_t matching_size, io_iterator_t *existing ) { kern_return_t kr; @@ -1625,8 +1764,8 @@ kern_return_t is_io_service_get_matching_services( if( master_port != master_device_port) return( kIOReturnNotPrivileged); - obj = OSUnserializeXML( matching ); - + obj = matching_size ? OSUnserializeXML(matching, matching_size) + : OSUnserializeXML(matching); if( (dict = OSDynamicCast( OSDictionary, obj))) { *existing = IOService::getMatchingServices( dict ); kr = kIOReturnSuccess; @@ -1639,6 +1778,15 @@ kern_return_t is_io_service_get_matching_services( return( kr ); } +/* Routine io_service_get_matching_services */ +kern_return_t is_io_service_get_matching_services( + mach_port_t master_port, + io_string_t matching, + io_iterator_t *existing ) +{ + return (internal_io_service_get_matching_services(master_port, matching, 0, existing)); +} + /* Routine io_service_get_matching_services_ool */ kern_return_t is_io_service_get_matching_services_ool( mach_port_t master_port, @@ -1656,19 +1804,29 @@ kern_return_t is_io_service_get_matching_services_ool( if( KERN_SUCCESS == kr) { // must return success after vm_map_copyout() succeeds - *result = is_io_service_get_matching_services( master_port, - (char *) data, existing ); + *result = internal_io_service_get_matching_services(master_port, + (const char *) data, matchingCnt, existing); vm_deallocate( kernel_map, data, matchingCnt ); } return( kr ); } +/* Routine io_service_get_matching_services_bin */ +kern_return_t is_io_service_get_matching_services_bin( + mach_port_t master_port, + io_struct_inband_t matching, + mach_msg_type_number_t matchingCnt, + io_object_t *existing) +{ + return (internal_io_service_get_matching_services(master_port, matching, matchingCnt, existing)); +} -/* Routine io_service_get_matching_service */ -kern_return_t is_io_service_get_matching_service( + +static kern_return_t internal_io_service_get_matching_service( mach_port_t master_port, - io_string_t matching, + const char * matching, + mach_msg_type_number_t matching_size, io_service_t *service ) { kern_return_t kr; @@ -1678,8 +1836,8 @@ kern_return_t is_io_service_get_matching_service( if( master_port != master_device_port) return( kIOReturnNotPrivileged); - obj = OSUnserializeXML( matching ); - + obj = matching_size ? OSUnserializeXML(matching, matching_size) + : OSUnserializeXML(matching); if( (dict = OSDynamicCast( OSDictionary, obj))) { *service = IOService::copyMatchingService( dict ); kr = *service ? kIOReturnSuccess : kIOReturnNotFound; @@ -1692,6 +1850,15 @@ kern_return_t is_io_service_get_matching_service( return( kr ); } +/* Routine io_service_get_matching_service */ +kern_return_t is_io_service_get_matching_service( + mach_port_t master_port, + io_string_t matching, + io_service_t *service ) +{ + return (internal_io_service_get_matching_service(master_port, matching, 0, service)); +} + /* Routine io_service_get_matching_services_ool */ kern_return_t is_io_service_get_matching_service_ool( mach_port_t master_port, @@ -1709,19 +1876,29 @@ kern_return_t is_io_service_get_matching_service_ool( if( KERN_SUCCESS == kr) { // must return success after vm_map_copyout() succeeds - *result = is_io_service_get_matching_service( master_port, - (char *) data, service ); + *result = internal_io_service_get_matching_service(master_port, + (const char *) data, matchingCnt, service ); vm_deallocate( kernel_map, data, matchingCnt ); } return( kr ); } +/* Routine io_service_get_matching_service_bin */ +kern_return_t is_io_service_get_matching_service_bin( + mach_port_t master_port, + io_struct_inband_t matching, + mach_msg_type_number_t matchingCnt, + io_object_t *service) +{ + return (internal_io_service_get_matching_service(master_port, matching, matchingCnt, service)); +} static kern_return_t internal_io_service_add_notification( mach_port_t master_port, io_name_t notification_type, - io_string_t matching, + const char * matching, + size_t matching_size, mach_port_t port, void * reference, vm_size_t referenceSize, @@ -1735,7 +1912,6 @@ static kern_return_t internal_io_service_add_notification( IOReturn err; unsigned long int userMsgType; - if( master_port != master_device_port) return( kIOReturnNotPrivileged); @@ -1745,8 +1921,16 @@ static kern_return_t internal_io_service_add_notification( if( !(sym = OSSymbol::withCString( notification_type ))) err = kIOReturnNoResources; - if( !(dict = OSDynamicCast( OSDictionary, - OSUnserializeXML( matching )))) { + if (matching_size) + { + dict = OSDynamicCast(OSDictionary, OSUnserializeXML(matching, matching_size)); + } + else + { + dict = OSDynamicCast(OSDictionary, OSUnserializeXML(matching)); + } + + if (!dict) { err = kIOReturnBadArgument; continue; } @@ -1804,7 +1988,7 @@ kern_return_t is_io_service_add_notification( io_object_t * notification ) { return (internal_io_service_add_notification(master_port, notification_type, - matching, port, &reference[0], sizeof(io_async_ref_t), + matching, 0, port, &reference[0], sizeof(io_async_ref_t), false, notification)); } @@ -1819,10 +2003,43 @@ kern_return_t is_io_service_add_notification_64( io_object_t *notification ) { return (internal_io_service_add_notification(master_port, notification_type, - matching, wake_port, &reference[0], sizeof(io_async_ref64_t), + matching, 0, wake_port, &reference[0], sizeof(io_async_ref64_t), true, notification)); } +/* Routine io_service_add_notification_bin */ +kern_return_t is_io_service_add_notification_bin +( + mach_port_t master_port, + io_name_t notification_type, + io_struct_inband_t matching, + mach_msg_type_number_t matchingCnt, + mach_port_t wake_port, + io_async_ref_t reference, + mach_msg_type_number_t referenceCnt, + io_object_t *notification) +{ + return (internal_io_service_add_notification(master_port, notification_type, + matching, matchingCnt, wake_port, &reference[0], sizeof(io_async_ref_t), + false, notification)); +} + +/* Routine io_service_add_notification_bin_64 */ +kern_return_t is_io_service_add_notification_bin_64 +( + mach_port_t master_port, + io_name_t notification_type, + io_struct_inband_t matching, + mach_msg_type_number_t matchingCnt, + mach_port_t wake_port, + io_async_ref64_t reference, + mach_msg_type_number_t referenceCnt, + io_object_t *notification) +{ + return (internal_io_service_add_notification(master_port, notification_type, + matching, matchingCnt, wake_port, &reference[0], sizeof(io_async_ref64_t), + true, notification)); +} static kern_return_t internal_io_service_add_notification_ool( mach_port_t master_port, @@ -1846,7 +2063,7 @@ static kern_return_t internal_io_service_add_notification_ool( if( KERN_SUCCESS == kr) { // must return success after vm_map_copyout() succeeds *result = internal_io_service_add_notification( master_port, notification_type, - (char *) data, wake_port, reference, referenceSize, client64, notification ); + (char *) data, matchingCnt, wake_port, reference, referenceSize, client64, notification ); vm_deallocate( kernel_map, data, matchingCnt ); } @@ -2230,6 +2447,11 @@ kern_return_t is_io_registry_entry_get_property_bytes( CHECK( IORegistryEntry, registry_entry, entry ); +#if CONFIG_MACF + if (0 != mac_iokit_check_get_property(kauth_cred_get(), entry, property_name)) + return kIOReturnNotPermitted; +#endif + obj = entry->copyProperty(property_name); if( !obj) return( kIOReturnNoResources ); @@ -2287,6 +2509,11 @@ kern_return_t is_io_registry_entry_get_property( CHECK( IORegistryEntry, registry_entry, entry ); +#if CONFIG_MACF + if (0 != mac_iokit_check_get_property(kauth_cred_get(), entry, property_name)) + return kIOReturnNotPermitted; +#endif + obj = entry->copyProperty(property_name); if( !obj) return( kIOReturnNotFound ); @@ -2296,7 +2523,6 @@ kern_return_t is_io_registry_entry_get_property( obj->release(); return( kIOReturnNoMemory ); } - s->clearText(); if( obj->serialize( s )) { len = s->getLength(); @@ -2327,6 +2553,11 @@ kern_return_t is_io_registry_entry_get_property_recursively( CHECK( IORegistryEntry, registry_entry, entry ); +#if CONFIG_MACF + if (0 != mac_iokit_check_get_property(kauth_cred_get(), entry, property_name)) + return kIOReturnNotPermitted; +#endif + obj = entry->copyProperty( property_name, IORegistryEntry::getPlane( plane ), options); if( !obj) @@ -2338,8 +2569,6 @@ kern_return_t is_io_registry_entry_get_property_recursively( return( kIOReturnNoMemory ); } - s->clearText(); - if( obj->serialize( s )) { len = s->getLength(); *propertiesCnt = len; @@ -2354,13 +2583,50 @@ kern_return_t is_io_registry_entry_get_property_recursively( return( err ); } +#if CONFIG_MACF + +static kern_return_t +filteredProperties(IORegistryEntry *entry, OSDictionary *properties, OSDictionary **filteredp) +{ + kern_return_t err = 0; + OSDictionary *filtered = NULL; + OSCollectionIterator *iter = NULL; + OSSymbol *key; + OSObject *p; + kauth_cred_t cred = kauth_cred_get(); + + if (properties == NULL) + return kIOReturnUnsupported; + + if ((iter = OSCollectionIterator::withCollection(properties)) == NULL || + (filtered = OSDictionary::withCapacity(properties->getCapacity())) == NULL) { + err = kIOReturnNoMemory; + goto out; + } + + while ((p = iter->getNextObject()) != NULL) { + if ((key = OSDynamicCast(OSSymbol, p)) == NULL || + mac_iokit_check_get_property(cred, entry, key->getCStringNoCopy()) != 0) + continue; + filtered->setObject(key, properties->getObject(key)); + } + +out: + if (iter != NULL) + iter->release(); + *filteredp = filtered; + return err; +} + +#endif + /* Routine io_registry_entry_get_properties */ kern_return_t is_io_registry_entry_get_properties( io_object_t registry_entry, io_buf_ptr_t *properties, mach_msg_type_number_t *propertiesCnt ) { - kern_return_t err; + kern_return_t err = 0; vm_size_t len; CHECK( IORegistryEntry, registry_entry, entry ); @@ -2369,17 +2635,166 @@ kern_return_t is_io_registry_entry_get_properties( if( !s) return( kIOReturnNoMemory ); - s->clearText(); + if (!entry->serializeProperties(s)) + err = kIOReturnUnsupported; + +#if CONFIG_MACF + if (!err && mac_iokit_check_filter_properties(kauth_cred_get(), entry)) { + OSObject *propobj = OSUnserializeXML(s->text(), s->getLength()); + OSDictionary *filteredprops = NULL; + err = filteredProperties(entry, OSDynamicCast(OSDictionary, propobj), &filteredprops); + if (propobj) propobj->release(); + + if (!err) { + s->clearText(); + if (!filteredprops->serialize(s)) + err = kIOReturnUnsupported; + } + if (filteredprops != NULL) + filteredprops->release(); + } +#endif /* CONFIG_MACF */ + + if (!err) { + len = s->getLength(); + *propertiesCnt = len; + err = copyoutkdata( s->text(), len, properties ); + } + + s->release(); + return( err ); +} + +#if CONFIG_MACF + +struct GetPropertiesEditorRef +{ + kauth_cred_t cred; + IORegistryEntry * entry; + OSCollection * root; +}; + +static const OSMetaClassBase * +GetPropertiesEditor(void * reference, + OSSerialize * s, + OSCollection * container, + const OSSymbol * name, + const OSMetaClassBase * value) +{ + GetPropertiesEditorRef * ref = (typeof(ref)) reference; + + if (!ref->root) ref->root = container; + if (ref->root == container) + { + if (0 != mac_iokit_check_get_property(ref->cred, ref->entry, name->getCStringNoCopy())) + { + value = 0; + } + } + if (value) value->retain(); + return (value); +} + +#endif /* CONFIG_MACF */ + +/* Routine io_registry_entry_get_properties */ +kern_return_t is_io_registry_entry_get_properties_bin( + io_object_t registry_entry, + io_buf_ptr_t *properties, + mach_msg_type_number_t *propertiesCnt) +{ + kern_return_t err = kIOReturnSuccess; + vm_size_t len; + OSSerialize * s; + OSSerialize::Editor editor = 0; + void * editRef = 0; + + CHECK(IORegistryEntry, registry_entry, entry); + +#if CONFIG_MACF + GetPropertiesEditorRef ref; + if (mac_iokit_check_filter_properties(kauth_cred_get(), entry)) + { + editor = &GetPropertiesEditor; + editRef = &ref; + ref.cred = kauth_cred_get(); + ref.entry = entry; + ref.root = 0; + } +#endif + + s = OSSerialize::binaryWithCapacity(4096, editor, editRef); + if (!s) return (kIOReturnNoMemory); + + if (!entry->serializeProperties(s)) err = kIOReturnUnsupported; + + if (kIOReturnSuccess == err) + { + len = s->getLength(); + *propertiesCnt = len; + err = copyoutkdata(s->text(), len, properties); + } + s->release(); + + return (err); +} + +/* Routine io_registry_entry_get_property_bin */ +kern_return_t is_io_registry_entry_get_property_bin( + io_object_t registry_entry, + io_name_t plane, + io_name_t property_name, + uint32_t options, + io_buf_ptr_t *properties, + mach_msg_type_number_t *propertiesCnt ) +{ + kern_return_t err; + vm_size_t len; + OSObject * obj; + const OSSymbol * sym; + + CHECK( IORegistryEntry, registry_entry, entry ); + +#if CONFIG_MACF + if (0 != mac_iokit_check_get_property(kauth_cred_get(), entry, property_name)) + return kIOReturnNotPermitted; +#endif + + if ((kIORegistryIterateRecursively & options) && plane[0]) + { + obj = entry->copyProperty(property_name, + IORegistryEntry::getPlane(plane), options); + } + else + { + obj = entry->copyProperty(property_name); + } + + if( !obj) + return( kIOReturnNotFound ); + + sym = OSSymbol::withCString(property_name); + if (sym) + { + if (gIORemoveOnReadProperties->containsObject(sym)) entry->removeProperty(sym); + sym->release(); + } + + OSSerialize * s = OSSerialize::binaryWithCapacity(4096); + if( !s) { + obj->release(); + return( kIOReturnNoMemory ); + } - if( entry->serializeProperties( s )) { + if( obj->serialize( s )) { len = s->getLength(); *propertiesCnt = len; err = copyoutkdata( s->text(), len, properties ); - } else - err = kIOReturnUnsupported; + } else err = kIOReturnUnsupported; s->release(); + obj->release(); return( err ); } @@ -2417,10 +2832,15 @@ kern_return_t is_io_registry_entry_set_properties #if CONFIG_MACF else if (0 != mac_iokit_check_set_properties(kauth_cred_get(), registry_entry, obj)) + { res = kIOReturnNotPermitted; + } #endif else - res = entry->setProperties( obj ); + { + res = entry->setProperties( obj ); + } + if (obj) obj->release(); } else @@ -2512,6 +2932,35 @@ kern_return_t is_io_service_request_probe( return( service->requestProbe( options )); } +/* Routine io_service_get_authorization_id */ +kern_return_t is_io_service_get_authorization_id( + io_object_t _service, + uint64_t *authorization_id ) +{ + kern_return_t kr; + + CHECK( IOService, _service, service ); + + kr = IOUserClient::clientHasPrivilege( (void *) current_task(), + kIOClientPrivilegeAdministrator ); + if( kIOReturnSuccess != kr) + return( kr ); + + *authorization_id = service->getAuthorizationID(); + + return( kr ); +} + +/* Routine io_service_set_authorization_id */ +kern_return_t is_io_service_set_authorization_id( + io_object_t _service, + uint64_t authorization_id ) +{ + CHECK( IOService, _service, service ); + + return( service->setAuthorizationID( authorization_id ) ); +} + /* Routine io_service_open_ndr */ kern_return_t is_io_service_open_extended( io_object_t _service, @@ -2738,8 +3187,8 @@ kern_return_t is_io_connect_map_memory( io_object_t connect, uint32_t type, task_t task, - vm_address_t * mapAddr, - vm_size_t * mapSize, + uint32_t * mapAddr, + uint32_t * mapSize, uint32_t flags ) { IOReturn err; @@ -2845,7 +3294,7 @@ kern_return_t is_io_connect_unmap_memory( io_object_t connect, uint32_t type, task_t task, - vm_address_t mapAddr ) + uint32_t mapAddr ) { IOReturn err; mach_vm_address_t address; @@ -2930,6 +3379,7 @@ kern_return_t is_io_connect_method_var_output args.scalarOutput = scalar_output; args.scalarOutputCount = *scalar_outputCnt; + bzero(&scalar_output[0], *scalar_outputCnt * sizeof(scalar_output[0])); args.structureOutput = inband_output; args.structureOutputSize = *inband_outputCnt; args.structureOutputDescriptor = NULL; @@ -3022,6 +3472,7 @@ kern_return_t is_io_connect_method args.scalarOutput = scalar_output; args.scalarOutputCount = *scalar_outputCnt; + bzero(&scalar_output[0], *scalar_outputCnt * sizeof(scalar_output[0])); args.structureOutput = inband_output; args.structureOutputSize = *inband_outputCnt; @@ -3104,6 +3555,7 @@ kern_return_t is_io_connect_async_method args.scalarOutput = scalar_output; args.scalarOutputCount = *scalar_outputCnt; + bzero(&scalar_output[0], *scalar_outputCnt * sizeof(scalar_output[0])); args.structureOutput = inband_output; args.structureOutputSize = *inband_outputCnt; @@ -3147,6 +3599,7 @@ kern_return_t is_io_connect_method_scalarI_scalarO( mach_msg_type_number_t struct_outputCnt = 0; mach_vm_size_t ool_output_size = 0; + bzero(&_output[0], sizeof(_output)); for (i = 0; i < inputCount; i++) _input[i] = SCALAR64(input[i]); @@ -3177,6 +3630,7 @@ kern_return_t shim_io_connect_method_scalarI_scalarO( IOReturn err; err = kIOReturnBadArgument; + bzero(&_output[0], sizeof(_output)); do { if( inputCount != method->count0) @@ -3258,6 +3712,7 @@ kern_return_t is_io_async_method_scalarI_scalarO( io_scalar_inband64_t _output; io_async_ref64_t _reference; + bzero(&_output[0], sizeof(_output)); for (i = 0; i < referenceCnt; i++) _reference[i] = REF64(reference[i]); @@ -3404,6 +3859,7 @@ kern_return_t shim_io_async_method_scalarI_scalarO( IOReturn err; io_async_ref_t reference; + bzero(&_output[0], sizeof(_output)); for (i = 0; i < asyncReferenceCount; i++) reference[i] = REF32(asyncReference[i]); @@ -4190,8 +4646,6 @@ kern_return_t is_io_catalog_get_data( if ( !s ) return kIOReturnNoMemory; - s->clearText(); - kr = gIOCatalogue->serializeData(flag, s); if ( kr == kIOReturnSuccess ) { @@ -4367,8 +4821,7 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume if (kIOUCForegroundOnly & method->flags) { - /* is graphics access denied for current task? */ - if (proc_get_effective_task_policy(current_task(), TASK_POLICY_GPU_DENY) != 0) + if (task_is_gpu_denied(current_task())) return (kIOReturnNotPermitted); } @@ -4417,10 +4870,8 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume if (kIOUCForegroundOnly & method->flags) { - /* is graphics access denied for current task? */ - if (proc_get_effective_task_policy(current_task(), TASK_POLICY_GPU_DENY) != 0) + if (task_is_gpu_denied(current_task())) return (kIOReturnNotPermitted); - } switch (method->flags & kIOUCTypeMask) diff --git a/iokit/Kernel/IOWorkLoop.cpp b/iokit/Kernel/IOWorkLoop.cpp index 047a01fe8..0789f66bb 100644 --- a/iokit/Kernel/IOWorkLoop.cpp +++ b/iokit/Kernel/IOWorkLoop.cpp @@ -144,11 +144,6 @@ bool IOWorkLoop::init() workToDo = false; } - if (!reserved) { - reserved = IONew(ExpansionData, 1); - reserved->options = 0; - } - IOStatisticsRegisterCounter(); if ( controlG == NULL ) { diff --git a/iokit/Kernel/RootDomainUserClient.cpp b/iokit/Kernel/RootDomainUserClient.cpp index cfc365b51..ae122f4c1 100644 --- a/iokit/Kernel/RootDomainUserClient.cpp +++ b/iokit/Kernel/RootDomainUserClient.cpp @@ -47,17 +47,17 @@ OSDefineMetaClassAndStructors(RootDomainUserClient, IOUserClient) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -bool RootDomainUserClient::initWithTask(task_t owningTask, void *security_id, - UInt32 type, OSDictionary * properties) +bool RootDomainUserClient::initWithTask(task_t owningTask, void *security_id, + UInt32 type, OSDictionary * properties) { if (properties) - properties->setObject(kIOUserClientCrossEndianCompatibleKey, kOSBooleanTrue); + properties->setObject(kIOUserClientCrossEndianCompatibleKey, kOSBooleanTrue); if (!super::initWithTask(owningTask, security_id, type, properties)) - return false; + return false; fOwningTask = owningTask; - task_reference (fOwningTask); + task_reference (fOwningTask); return true; } @@ -78,8 +78,8 @@ IOReturn RootDomainUserClient::secureSleepSystem( uint32_t *return_code ) return secureSleepSystemOptions(NULL, 0, return_code); } -IOReturn RootDomainUserClient::secureSleepSystemOptions( - const void *inOptions, +IOReturn RootDomainUserClient::secureSleepSystemOptions( + const void *inOptions, IOByteCount inOptionsSize, uint32_t *returnCode) { @@ -92,18 +92,18 @@ IOReturn RootDomainUserClient::secureSleepSystemOptions( ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeLocalUser); local_priv = (kIOReturnSuccess == ret); - + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); admin_priv = (kIOReturnSuccess == ret); - - + + if (inOptions) { unserializedOptions = OSDynamicCast( OSDictionary, OSUnserializeXML((const char *)inOptions, inOptionsSize, &unserializeErrorString)); - + if (!unserializedOptions) { - IOLog("IOPMRootDomain SleepSystem unserialization failure: %s\n", + IOLog("IOPMRootDomain SleepSystem unserialization failure: %s\n", unserializeErrorString ? unserializeErrorString->getCStringNoCopy() : "Unknown"); } } @@ -115,21 +115,21 @@ IOReturn RootDomainUserClient::secureSleepSystemOptions( if (p) { fOwner->setProperty("SleepRequestedByPID", proc_pid(p), 32); } - - if (unserializedOptions) + + if (unserializedOptions) { // Publish Sleep Options in registry under root_domain - fOwner->setProperty( kRootDomainSleepOptionsKey, unserializedOptions); + fOwner->setProperty( kRootDomainSleepOptionsKey, unserializedOptions); *returnCode = fOwner->sleepSystemOptions( unserializedOptions ); - unserializedOptions->release(); + unserializedOptions->release(); } else { // No options // Clear any pre-existing options fOwner->removeProperty( kRootDomainSleepOptionsKey ); - *returnCode = fOwner->sleepSystemOptions( NULL ); + *returnCode = fOwner->sleepSystemOptions( NULL ); } } else { @@ -139,7 +139,7 @@ IOReturn RootDomainUserClient::secureSleepSystemOptions( return kIOReturnSuccess; } -IOReturn RootDomainUserClient::secureSetAggressiveness( +IOReturn RootDomainUserClient::secureSetAggressiveness( unsigned long type, unsigned long newLevel, int *return_code ) @@ -150,7 +150,7 @@ IOReturn RootDomainUserClient::secureSetAggressiveness( ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeLocalUser); local_priv = (kIOReturnSuccess == ret); - + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); admin_priv = (kIOReturnSuccess == ret); @@ -168,7 +168,7 @@ IOReturn RootDomainUserClient::secureSetMaintenanceWakeCalendar( { int admin_priv = 0; IOReturn ret = kIOReturnNotPrivileged; - + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); admin_priv = (kIOReturnSuccess == ret); @@ -185,7 +185,7 @@ IOReturn RootDomainUserClient::secureSetUserAssertionLevels( { int admin_priv = 0; IOReturn ret = kIOReturnNotPrivileged; - + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); admin_priv = (kIOReturnSuccess == ret); @@ -217,47 +217,24 @@ IOReturn RootDomainUserClient::secureGetSystemSleepType( IOReturn RootDomainUserClient::clientClose( void ) { detach(fOwner); - + if(fOwningTask) { task_deallocate(fOwningTask); fOwningTask = 0; - } - - return kIOReturnSuccess; -} - -IOReturn RootDomainUserClient::clientMemoryForType( - UInt32 type, - IOOptionBits *options, - IOMemoryDescriptor ** memory) -{ - if (!fOwner) - return kIOReturnNotReady; - - if (kPMRootDomainMapTraceBuffer == type) - { - *memory = fOwner->getPMTraceMemoryDescriptor(); - if (*memory) { - (*memory)->retain(); - *options = 0; - return kIOReturnSuccess; - } else { - return kIOReturnNotFound; - } - } - return kIOReturnUnsupported; + + return kIOReturnSuccess; } IOReturn RootDomainUserClient::externalMethod( - uint32_t selector, + uint32_t selector, IOExternalMethodArguments * arguments, - IOExternalMethodDispatch * dispatch __unused, + IOExternalMethodDispatch * dispatch __unused, OSObject * target __unused, void * reference __unused ) { - IOReturn ret = kIOReturnBadArgument; - + IOReturn ret = kIOReturnBadArgument; + switch (selector) { case kPMSetAggressiveness: @@ -270,7 +247,7 @@ IOReturn RootDomainUserClient::externalMethod( (int *)&arguments->scalarOutput[0]); } break; - + case kPMGetAggressiveness: if ((1 == arguments->scalarInputCount) && (1 == arguments->scalarOutputCount)) @@ -280,12 +257,12 @@ IOReturn RootDomainUserClient::externalMethod( (unsigned long *)&arguments->scalarOutput[0]); } break; - + case kPMSleepSystem: if (1 == arguments->scalarOutputCount) { ret = this->secureSleepSystem( - (uint32_t *)&arguments->scalarOutput[0]); + (uint32_t *)&arguments->scalarOutput[0]); } break; @@ -323,16 +300,16 @@ IOReturn RootDomainUserClient::externalMethod( break; case kPMSetMaintenanceWakeCalendar: ret = this->secureSetMaintenanceWakeCalendar( - (IOPMCalendarStruct *)arguments->structureInput, + (IOPMCalendarStruct *)arguments->structureInput, (uint32_t *)&arguments->structureOutput); arguments->structureOutputSize = sizeof(uint32_t); break; - + case kPMSetUserAssertionLevels: ret = this->secureSetUserAssertionLevels( (uint32_t)arguments->scalarInput[0]); break; - + case kPMActivityTickle: if ( fOwner->checkSystemCanSustainFullWake() ) { @@ -355,6 +332,7 @@ IOReturn RootDomainUserClient::externalMethod( } break; +#if defined(__i386__) || defined(__x86_64__) case kPMSleepWakeWatchdogEnable: ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); if (ret == kIOReturnSuccess) @@ -367,6 +345,7 @@ IOReturn RootDomainUserClient::externalMethod( if (ret == kIOReturnSuccess) fOwner->sleepWakeDebugTrig(false); break; +#endif case kPMSetDisplayPowerOn: if (1 == arguments->scalarInputCount) @@ -376,55 +355,7 @@ IOReturn RootDomainUserClient::externalMethod( fOwner->setDisplayPowerOn((uint32_t)arguments->scalarInput[0]); } break; -/* - case kPMMethodCopySystemTimeline: - // intentional fallthrough - case kPMMethodCopyDetailedTimeline: - if (!arguments->structureOutputDescriptor) - { - // TODO: Force IOKit.framework to always send this data out - // of line; so I don't have to create a MemoryDescriptor here. - mem_size = arguments->structureOutputSize; - mem = IOMemoryDescriptor::withAddressRange( - (mach_vm_address_t)arguments->structureOutput, - (mach_vm_size_t)mem_size, - kIODirectionIn, current_task()); - } else { - mem_size = arguments->structureOutputDescriptorSize; - if (( mem = arguments->structureOutputDescriptor )) - mem->retain(); - } - - if (mem) - { - mem->prepare(kIODirectionNone); - - if (kPMMethodCopySystemTimeline == selector) { - arguments->scalarOutput[0] = fOwner->copySystemTimeline( - mem, &mem_size); - } - else - if (kPMMethodCopyDetailedTimeline == selector) { - arguments->scalarOutput[0] = fOwner->copyDetailedTimeline( - mem, &mem_size); - } - - if (arguments->structureOutputDescriptor) { - arguments->structureOutputDescriptorSize = mem_size; - } else { - arguments->structureOutputSize = mem_size; - } - - mem->release(); - - ret = kIOReturnSuccess; - } else { - ret = kIOReturnCannotWire; - } - - break; -*/ default: // bad selector return kIOReturnBadArgument; @@ -438,7 +369,7 @@ IOReturn RootDomainUserClient::externalMethod( * We maintain getTargetAndExternalMethod since it's an exported symbol, * and only for that reason. */ -IOExternalMethod * RootDomainUserClient::getTargetAndMethodForIndex( +IOExternalMethod * RootDomainUserClient::getTargetAndMethodForIndex( IOService ** targetP, UInt32 index ) { // DO NOT EDIT @@ -448,6 +379,6 @@ IOExternalMethod * RootDomainUserClient::getTargetAndMethodForIndex( /* setPreventative * Does nothing. Exists only for exported symbol compatibility. */ -void +void RootDomainUserClient::setPreventative(UInt32 on_off, UInt32 types_of_sleep) { return; } // DO NOT EDIT diff --git a/iokit/Kernel/RootDomainUserClient.h b/iokit/Kernel/RootDomainUserClient.h index 3dece8c25..9083d8772 100644 --- a/iokit/Kernel/RootDomainUserClient.h +++ b/iokit/Kernel/RootDomainUserClient.h @@ -48,23 +48,23 @@ class RootDomainUserClient : public IOUserClient friend class IOPMrootDomain; private: - IOPMrootDomain * fOwner; + IOPMrootDomain * fOwner; task_t fOwningTask; IOReturn secureSleepSystem( uint32_t *return_code ); - - IOReturn secureSleepSystemOptions( const void *inOptions, + + IOReturn secureSleepSystemOptions( const void *inOptions, IOByteCount inOptionsSize, uint32_t *returnCode); - IOReturn secureSetAggressiveness( unsigned long type, - unsigned long newLevel, + IOReturn secureSetAggressiveness( unsigned long type, + unsigned long newLevel, int *return_code ); IOReturn secureSetMaintenanceWakeCalendar( IOPMCalendarStruct *inCalendar, uint32_t *returnCode); - + IOReturn secureSetUserAssertionLevels(uint32_t assertionBitfield); IOReturn secureGetSystemSleepType( uint32_t *sleepType ); @@ -72,19 +72,17 @@ private: public: virtual IOReturn clientClose( void ); - - virtual IOReturn clientMemoryForType( UInt32 type, IOOptionBits *options, IOMemoryDescriptor **memory); - - virtual IOReturn externalMethod( uint32_t selector, + + virtual IOReturn externalMethod( uint32_t selector, IOExternalMethodArguments * arguments, - IOExternalMethodDispatch * dispatch, - OSObject * target, - void * reference ); + IOExternalMethodDispatch * dispatch, + OSObject * target, + void * reference ); virtual bool start( IOService * provider ); - virtual bool initWithTask(task_t owningTask, void *security_id, - UInt32 type, OSDictionary * properties); + virtual bool initWithTask(task_t owningTask, void *security_id, + UInt32 type, OSDictionary * properties); // Unused - retained for symbol compatibility void setPreventative(UInt32 on_off, UInt32 types_of_sleep); diff --git a/iokit/bsddev/IOKitBSDInit.cpp b/iokit/bsddev/IOKitBSDInit.cpp index ac28f446b..9b08bb834 100644 --- a/iokit/bsddev/IOKitBSDInit.cpp +++ b/iokit/bsddev/IOKitBSDInit.cpp @@ -38,6 +38,7 @@ extern "C" { #include #include #include +#include // how long to wait for matching root device, secs #if DEBUG @@ -473,6 +474,11 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize, } } + if( gIOKitDebug & kIOWaitQuietBeforeRoot ) { + IOLog( "Waiting for matching to complete\n" ); + IOService::getPlatform()->waitQuiet(); + } + if( true && matching) { OSSerialize * s = OSSerialize::withCapacity( 5 ); diff --git a/iokit/conf/MASTER b/iokit/conf/MASTER deleted file mode 100644 index a0558a788..000000000 --- a/iokit/conf/MASTER +++ /dev/null @@ -1,104 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -####################################################################### -# -# Master machine independent configuration file. -# -# Specific configuration files are created based on this and -# the machine specific master file using the doconf script. -# -# Any changes to the master configuration files will affect all -# other configuration files based upon it. -# -####################################################################### -# -# To build a configuration, execute "doconf ." -# Configurations are specified in the "Configurations:" section -# of the MASTER and MASTER.* files as follows: -# -# = [ ... ] -# -# Lines in the MASTER and MASTER.* files are selected based on -# the attribute selector list, found in a comment at the end of -# the line. This is a list of attributes separated by commas. -# The "!" operator selects the line if none of the attributes are -# specified. -# -# For example: -# -# selects a line if "foo" or "bar" are specified. -# selects a line if neither "foo" nor "bar" is -# specified. -# -# Lines with no attributes specified are selected for all -# configurations. -# -####################################################################### -# -# -# These are the default configurations that can be used by most sites. -# They are used internally by the Mach project. -# -# IOKIT = [iokitcpp debug] -# -####################################################################### -# SYSTEM SIZE CONFIGURATION (select exactly one) -# -# xlarge = extra large scale system configuration -# large = large scale system configuration -# medium = medium scale system configuration -# small = small scale system configuration -# xsmall = extra small scale system configuration -# bsmall = special extra small scale system configuration -# -ident IOKIT - -options HIBERNATION # system hibernation # -options KERNOBJC # Objective-C implementation # -options IOKITCPP # C++ implementation # -options IOKITSTATS # IOKit statistics # -options KDEBUG # kernel tracing # -options IST_KDEBUG # limited tracing # -options NO_KDEBUG # no kernel tracing # -options NETWORKING # kernel networking # -options CRYPTO # want crypto code # -options CONFIG_DTRACE # enable dtrace # -options VM_PRESSURE_EVENTS # - -options CONFIG_SLEEP # # - -#makeoptions LIBDRIVER = "libDriver_kern.o" # -#makeoptions LIBOBJC = "libkobjc.o" # - -# -# configurable kernel related resources -# -options CONFIG_MAX_THREADS=64 # -options CONFIG_MAX_THREADS=64 # -options CONFIG_MAX_THREADS=64 # - -# -# configurable kernel - use these options to strip strings from panic -# and printf calls. -# no_panic_str - saves around 50K of kernel footprint. -# no_printf_str - saves around 45K of kernel footprint. -# -options CONFIG_NO_PANIC_STRINGS # -options CONFIG_NO_PRINTF_STRINGS # -options CONFIG_NO_KPRINTF_STRINGS # - -# secure_kernel - secure kernel from user programs -options SECURE_KERNEL # - -options MACH_ASSERT # - -options DEVELOPMENT # -options DEBUG # - -options CONFIG_MEMORYSTATUS # -options CONFIG_JETSAM # -options CONFIG_FREEZE # diff --git a/iokit/conf/MASTER.x86_64 b/iokit/conf/MASTER.x86_64 deleted file mode 100644 index 72f503441..000000000 --- a/iokit/conf/MASTER.x86_64 +++ /dev/null @@ -1,26 +0,0 @@ -###################################################################### -# -# Standard Apple Mac OS Configurations: -# -------- ----- ------ --------------- -# -# RELEASE = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats vm_pressure_events memorystatus ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug mach_assert ] -# -# EMBEDDED = [ intel mach iokitcpp hibernation no_kextd bsmall crypto ] -# DEVELOPMENT = [ EMBEDDED development ] -# -###################################################################### - -machine "x86_64" # -cpu "x86_64" # - -# -# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and -# security/conf MASTER files. -# -options CONFIG_MACF # Mandatory Access Control Framework - -options NO_KEXTD # - -options NO_NESTED_PMAP # diff --git a/iokit/conf/Makefile b/iokit/conf/Makefile index 25a42ef5e..76db9a7d8 100644 --- a/iokit/conf/Makefile +++ b/iokit/conf/Makefile @@ -6,20 +6,24 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) - -$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) - $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ - cd $(addsuffix /conf, $(TARGET)); \ - rm -f $(notdir $?); \ - cp $? .; \ - if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); +# Special handling for x86_64h which shares a MASTER config file with x86_64: +ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64h) +DOCONF_ARCH_CONFIG_LC = x86_64 +else +DOCONF_ARCH_CONFIG_LC = $(CURRENT_ARCH_CONFIG_LC) +endif + +MASTERCONFDIR = $(SRCROOT)/config +DOCONFDEPS = $(addprefix $(MASTERCONFDIR)/, MASTER MASTER.$(DOCONF_ARCH_CONFIG_LC)) \ + $(addprefix $(SOURCE)/, Makefile.template Makefile.$(DOCONF_ARCH_CONFIG_LC) files files.$(DOCONF_ARCH_CONFIG_LC)) + +ifneq (,$(wildcard $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC))) +DOCONFDEPS += $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) +endif + +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile: $(SRCROOT)/SETUP/config/doconf $(OBJROOT)/SETUP/config $(DOCONFDEPS) + $(_v)$(MKDIR) $(TARGET)/$(CURRENT_KERNEL_CONFIG) + $(_v)$(SRCROOT)/SETUP/config/doconf -c -cpu $(DOCONF_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) -s $(SOURCE) -m $(MASTERCONFDIR) $(CURRENT_KERNEL_CONFIG); do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile $(_v)${MAKE} \ diff --git a/iokit/conf/Makefile.template b/iokit/conf/Makefile.template index 4a72c656f..ceffec084 100644 --- a/iokit/conf/Makefile.template +++ b/iokit/conf/Makefile.template @@ -64,27 +64,34 @@ COMP_SUBDIRS = %CFILES +%CXXFILES + %SFILES %MACHDEP -# -# OBJSDEPS is the set of files (defined in the machine dependent -# template if necessary) which all objects depend on (such as an -# in-line assembler expansion filter) -# -${OBJS}: ${OBJSDEPS} - -LDOBJS = $(OBJS) - -$(COMPONENT).filelist: $(LDOBJS) +# Rebuild if per-file overrides change +${OBJS}: $(firstword $(MAKEFILE_LIST)) + +# Rebuild if global compile flags change +$(COBJS): .CFLAGS +.CFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KCC) $(CFLAGS) $(INCFLAGS) +$(CXXOBJS): .CXXFLAGS +.CXXFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KC++) $(CXXFLAGS) $(INCFLAGS) +$(SOBJS): .SFLAGS +.SFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(S_KCC) $(SFLAGS) $(INCFLAGS) + +$(COMPONENT).filelist: $(OBJS) $(_v)for hib_file in ${HIB_FILES}; \ do \ $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \ mv $${hib_file}__ $${hib_file} ; \ done @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${LDOBJS}; do \ + $(_v)( for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist diff --git a/iokit/conf/files b/iokit/conf/files index 90fbc0098..31173f88c 100644 --- a/iokit/conf/files +++ b/iokit/conf/files @@ -75,7 +75,15 @@ iokit/Kernel/IOSharedDataQueue.cpp optional iokitcpp # iokit/Tests/TestContainers.cpp optional iokitcpp # iokit/Tests/TestCollections.cpp optional iokitcpp -iokit/Kernel/IOStatistics.cpp optional iokitcpp +iokit/Kernel/IOStatistics.cpp optional iokitcpp +iokit/Kernel/IOInterruptAccounting.cpp optional iokitcpp + +iokit/Kernel/IOReporter.cpp optional iokitcpp +iokit/Kernel/IOSimpleReporter.cpp optional iokitcpp +iokit/Kernel/IOStateReporter.cpp optional iokitcpp +iokit/Kernel/IOHistogramReporter.cpp optional iokitcpp +iokit/Kernel/IOReportLegend.cpp optional iokitcpp + iokit/Kernel/IOStringFuncs.c standard diff --git a/libkern/Makefile b/libkern/Makefile index 6f52aee26..ad77bbce2 100644 --- a/libkern/Makefile +++ b/libkern/Makefile @@ -9,11 +9,13 @@ include $(MakeInc_def) INSTINC_SUBDIRS = \ libkern INSTINC_SUBDIRS_X86_64 = libkern +INSTINC_SUBDIRS_X86_64H = libkern INSTINC_SUBDIRS_ARM = libkern EXPINC_SUBDIRS = \ libkern EXPINC_SUBDIRS_X86_64 = libkern +EXPINC_SUBDIRS_X86_64H = libkern EXPINC_SUBDIRS_ARM = libkern COMP_SUBDIRS = conf diff --git a/libkern/OSKextLib.cpp b/libkern/OSKextLib.cpp index 9dbc988f8..175704eec 100644 --- a/libkern/OSKextLib.cpp +++ b/libkern/OSKextLib.cpp @@ -441,19 +441,20 @@ kmod_panic_dump(vm_offset_t * addr, unsigned int cnt) extern int kdb_printf(const char *format, ...) __printflike(1,2); OSKext::printKextsInBacktrace(addr, cnt, &kdb_printf, - /* takeLock? */ false); + /* takeLock? */ false, false); return; } /********************************************************************/ -void kmod_dump_log(vm_offset_t *addr, unsigned int cnt); +void kmod_dump_log(vm_offset_t *addr, unsigned int cnt, boolean_t doUnslide); void kmod_dump_log( vm_offset_t * addr, - unsigned int cnt) + unsigned int cnt, + boolean_t doUnslide) { - OSKext::printKextsInBacktrace(addr, cnt, &printf, /* lock? */ true); + OSKext::printKextsInBacktrace(addr, cnt, &printf, /* lock? */ true, doUnslide); } /********************************************************************* diff --git a/libkern/c++/OSArray.cpp b/libkern/c++/OSArray.cpp index 92370e81f..61ed05f97 100644 --- a/libkern/c++/OSArray.cpp +++ b/libkern/c++/OSArray.cpp @@ -60,11 +60,15 @@ extern "C" { bool OSArray::initWithCapacity(unsigned int inCapacity) { - int size; + unsigned int size; if (!super::init()) return false; + // integer overflow check + if (inCapacity > (UINT_MAX / sizeof(const OSMetaClassBase*))) + return false; + size = sizeof(const OSMetaClassBase *) * inCapacity; array = (const OSMetaClassBase **) kalloc(size); if (!array) @@ -187,15 +191,21 @@ unsigned int OSArray::setCapacityIncrement(unsigned int increment) unsigned int OSArray::ensureCapacity(unsigned int newCapacity) { const OSMetaClassBase **newArray; - int oldSize, newSize; + unsigned int finalCapacity; + unsigned int oldSize, newSize; if (newCapacity <= capacity) return capacity; // round up - newCapacity = (((newCapacity - 1) / capacityIncrement) + 1) + finalCapacity = (((newCapacity - 1) / capacityIncrement) + 1) * capacityIncrement; - newSize = sizeof(const OSMetaClassBase *) * newCapacity; + + // integer overflow check + if ((finalCapacity < newCapacity) || (finalCapacity > (UINT_MAX / sizeof(const OSMetaClassBase*)))) + return capacity; + + newSize = sizeof(const OSMetaClassBase *) * finalCapacity; newArray = (const OSMetaClassBase **) kalloc(newSize); if (newArray) { @@ -207,7 +217,7 @@ unsigned int OSArray::ensureCapacity(unsigned int newCapacity) bzero(&newArray[capacity], newSize - oldSize); kfree(array, oldSize); array = newArray; - capacity = newCapacity; + capacity = finalCapacity; } return capacity; @@ -399,8 +409,8 @@ bool OSArray::serialize(OSSerialize *s) const if (!s->addXMLStartTag(this, "array")) return false; - for (unsigned i = 0; i < count; i++) { - if (!array[i]->serialize(s)) return false; + for (unsigned i = 0; i < count; i++) { + if (array[i] == NULL || !array[i]->serialize(s)) return false; } return s->addXMLEndTag("array"); diff --git a/libkern/c++/OSBoolean.cpp b/libkern/c++/OSBoolean.cpp index 55f4c86b2..7e67b4e9f 100644 --- a/libkern/c++/OSBoolean.cpp +++ b/libkern/c++/OSBoolean.cpp @@ -110,5 +110,7 @@ bool OSBoolean::isEqualTo(const OSMetaClassBase *obj) const bool OSBoolean::serialize(OSSerialize *s) const { + if (s->binary) return s->binarySerialize(this); + return s->addString(value ? "" : ""); } diff --git a/libkern/c++/OSData.cpp b/libkern/c++/OSData.cpp index 295193465..d43f67a13 100644 --- a/libkern/c++/OSData.cpp +++ b/libkern/c++/OSData.cpp @@ -56,12 +56,6 @@ extern int debug_container_malloc_size; #define ACCUMSIZE(s) #endif -struct OSData::ExpansionData -{ - DeallocFunction deallocFunction; - bool disableSerialization; -}; - bool OSData::initWithCapacity(unsigned int inCapacity) { if (!super::init()) @@ -224,24 +218,29 @@ unsigned int OSData::setCapacityIncrement(unsigned increment) unsigned int OSData::ensureCapacity(unsigned int newCapacity) { unsigned char * newData; + unsigned int finalCapacity; if (newCapacity <= capacity) return capacity; - newCapacity = (((newCapacity - 1) / capacityIncrement) + 1) + finalCapacity = (((newCapacity - 1) / capacityIncrement) + 1) * capacityIncrement; - newData = (unsigned char *) kalloc(newCapacity); - + // integer overflow check + if (finalCapacity < newCapacity) + return capacity; + + newData = (unsigned char *) kalloc(finalCapacity); + if ( newData ) { - bzero(newData + capacity, newCapacity - capacity); + bzero(newData + capacity, finalCapacity - capacity); if (data) { bcopy(data, newData, capacity); kfree(data, capacity); } - ACCUMSIZE( newCapacity - capacity ); + ACCUMSIZE( finalCapacity - capacity ); data = (void *) newData; - capacity = newCapacity; + capacity = finalCapacity; } return capacity; diff --git a/libkern/c++/OSDictionary.cpp b/libkern/c++/OSDictionary.cpp index 7329f3a4e..2f86e9a1d 100644 --- a/libkern/c++/OSDictionary.cpp +++ b/libkern/c++/OSDictionary.cpp @@ -66,8 +66,10 @@ bool OSDictionary::initWithCapacity(unsigned int inCapacity) if (!super::init()) return false; - int size = inCapacity * sizeof(dictEntry); + if (inCapacity > (UINT_MAX / sizeof(dictEntry))) + return false; + unsigned int size = inCapacity * sizeof(dictEntry); //fOptions |= kSort; dictionary = (dictEntry *) kalloc(size); @@ -276,15 +278,20 @@ unsigned int OSDictionary::setCapacityIncrement(unsigned int increment) unsigned int OSDictionary::ensureCapacity(unsigned int newCapacity) { dictEntry *newDict; - int oldSize, newSize; + unsigned int finalCapacity, oldSize, newSize; if (newCapacity <= capacity) return capacity; // round up - newCapacity = (((newCapacity - 1) / capacityIncrement) + 1) + finalCapacity = (((newCapacity - 1) / capacityIncrement) + 1) * capacityIncrement; - newSize = sizeof(dictEntry) * newCapacity; + + // integer overflow check + if (finalCapacity < newCapacity || (finalCapacity > (UINT_MAX / sizeof(dictEntry)))) + return capacity; + + newSize = sizeof(dictEntry) * finalCapacity; newDict = (dictEntry *) kalloc(newSize); if (newDict) { @@ -297,7 +304,7 @@ unsigned int OSDictionary::ensureCapacity(unsigned int newCapacity) kfree(dictionary, oldSize); dictionary = newDict; - capacity = newCapacity; + capacity = finalCapacity; } return capacity; diff --git a/libkern/c++/OSKext.cpp b/libkern/c++/OSKext.cpp index 2e15a06e7..05cd9653b 100644 --- a/libkern/c++/OSKext.cpp +++ b/libkern/c++/OSKext.cpp @@ -124,10 +124,9 @@ static void * GetAppleTEXTHashForKext(OSKext * theKext, OSDictionary *theInfoDic * Constants & Macros *********************************************************************/ -/* A typical Snow Leopard system has a bit under 120 kexts loaded. - * Use this number to create containers. +/* Use this number to create containers. */ -#define kOSKextTypicalLoadCount (120) +#define kOSKextTypicalLoadCount (150) /* Any kext will have at least 1 retain for the internal lookup-by-ID dict. * A loaded kext will no dependents or external retains will have 2 retains. @@ -304,16 +303,15 @@ kmod_info_t * kmod = NULL; static char * loaded_kext_paniclist = NULL; static uint32_t loaded_kext_paniclist_size = 0; -static uint32_t loaded_kext_paniclist_length = 0; - + AbsoluteTime last_loaded_timestamp; -static char last_loaded_str[2*KMOD_MAX_NAME]; +static char last_loaded_str_buf[2*KMOD_MAX_NAME]; static u_long last_loaded_strlen = 0; static void * last_loaded_address = NULL; static u_long last_loaded_size = 0; AbsoluteTime last_unloaded_timestamp; -static char last_unloaded_str[2*KMOD_MAX_NAME]; +static char last_unloaded_str_buf[2*KMOD_MAX_NAME]; static u_long last_unloaded_strlen = 0; static void * last_unloaded_address = NULL; static u_long last_unloaded_size = 0; @@ -360,8 +358,6 @@ static IOLock * sKextSummariesLock = NULL; void (*sLoadedKextSummariesUpdated)(void) = OSKextLoadedKextSummariesUpdated; OSKextLoadedKextSummaryHeader * gLoadedKextSummaries __attribute__((used)) = NULL; static size_t sLoadedKextSummariesAllocSize = 0; -OSKextLoadedKextSummaryHeader * sPrevLoadedKextSummaries = NULL; -static size_t sPrevLoadedKextSummariesAllocSize = 0; }; /********************************************************************* @@ -815,7 +811,7 @@ OSKext::removeKextBootstrap(void) // 04/18/11 - gab: // overwrite memory occupied by KLD segment with random data before // releasing it. - read_random((void *) seg_to_remove->vmaddr, seg_to_remove->vmsize); + read_frandom((void *) seg_to_remove->vmaddr, seg_to_remove->vmsize); ml_static_mfree(seg_to_remove->vmaddr, seg_to_remove->vmsize); } #else @@ -2117,7 +2113,7 @@ OSKext::setExecutable( result = true; goto finish; } - + if (infoDict->getObject(_kOSKextExecutableKey) || infoDict->getObject(_kOSKextMkextExecutableReferenceKey)) { @@ -2285,7 +2281,7 @@ OSKext::readMkextArchive(OSData * mkextData, "Mkext archive too small to be valid."); goto finish; } - + mkextHeader = (mkext_header *)mkextData->getBytesNoCopy(); if (MKEXT_GET_MAGIC(mkextHeader) != MKEXT_MAGIC || @@ -2309,8 +2305,6 @@ OSKext::readMkextArchive(OSData * mkextData, if (mkextVersion == MKEXT_VERS_2) { result = OSKext::readMkext2Archive(mkextData, NULL, checksumPtr); - } else if (mkextVersion == MKEXT_VERS_1) { - result = OSKext::readMkext1Archive(mkextData, checksumPtr); } else { OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | @@ -2323,332 +2317,6 @@ finish: return result; } -/********************************************************************* -* Assumes magic, signature, version, length have been checked. -* -* Doesn't do as much bounds-checking as it should, but we're dropping -* mkext1 support from the kernel for SnowLeopard soon. -* -* Should keep track of all kexts created so far, and if we hit a -* fatal error halfway through, remove those kexts. If we've dropped -* an older version that had already been read, whoops! Might want to -* add a level of buffering? -*********************************************************************/ -/* static */ -OSReturn -OSKext::readMkext1Archive( - OSData * mkextData, - uint32_t * checksumPtr) -{ - OSReturn result = kOSReturnError; - uint32_t mkextLength; - mkext1_header * mkextHeader = 0; // do not free - void * mkextEnd = 0; // do not free - uint32_t mkextVersion; - uint8_t * crc_address = 0; - uint32_t checksum; - uint32_t numKexts = 0; - - OSData * infoDictDataObject = NULL; // must release - OSObject * parsedXML = NULL; // must release - OSDictionary * infoDict = NULL; // do not release - OSString * errorString = NULL; // must release - OSData * mkextExecutableInfo = NULL; // must release - OSKext * theKext = NULL; // must release - - mkextLength = mkextData->getLength(); - mkextHeader = (mkext1_header *)mkextData->getBytesNoCopy(); - mkextEnd = (char *)mkextHeader + mkextLength; - mkextVersion = OSSwapBigToHostInt32(mkextHeader->version); - - crc_address = (u_int8_t *)&mkextHeader->version; - checksum = mkext_adler32(crc_address, - (uintptr_t)mkextHeader + - OSSwapBigToHostInt32(mkextHeader->length) - (uintptr_t)crc_address); - - if (OSSwapBigToHostInt32(mkextHeader->adler32) != checksum) { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | kOSKextLogArchiveFlag, - "Kext archive has a bad checksum."); - result = kOSKextReturnBadData; - goto finish; - } - - if (checksumPtr) { - *checksumPtr = checksum; - } - - /* Check that the CPU type & subtype match that of the running kernel. */ - if (OSSwapBigToHostInt32(mkextHeader->cputype) != (UInt32)CPU_TYPE_ANY) { - if ((UInt32)_mh_execute_header.cputype != - OSSwapBigToHostInt32(mkextHeader->cputype)) { - - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | kOSKextLogArchiveFlag, - "Kext archive doesn't contain software " - "for this computer's CPU type."); - result = kOSKextReturnArchNotFound; - goto finish; - } - } - - numKexts = OSSwapBigToHostInt32(mkextHeader->numkexts); - - for (uint32_t i = 0; i < numKexts; i++) { - - OSSafeReleaseNULL(infoDictDataObject); - OSSafeReleaseNULL(infoDict); - OSSafeReleaseNULL(mkextExecutableInfo); - OSSafeReleaseNULL(errorString); - OSSafeReleaseNULL(theKext); - - mkext_kext * kextEntry = &mkextHeader->kext[i]; - mkext_file * infoDictPtr = &kextEntry->plist; - mkext_file * executablePtr = &kextEntry->module; - if (kextEntry >= mkextEnd) { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | kOSKextLogArchiveFlag, - "Mkext file overrun."); - result = kOSKextReturnBadData; - goto finish; - } - - /* Note that we're pretty tolerant of errors in individual entries. - * As long as we can keep processing, we do. - */ - infoDictDataObject = OSKext::extractMkext1Entry( - mkextHeader, infoDictPtr); - if (!infoDictDataObject) { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | kOSKextLogArchiveFlag, - "Can't uncompress info dictionary " - "from mkext archive entry %d.", i); - continue; - } - - parsedXML = OSUnserializeXML( - (const char *)infoDictDataObject->getBytesNoCopy(), - &errorString); - if (parsedXML) { - infoDict = OSDynamicCast(OSDictionary, parsedXML); - } - if (!infoDict) { - const char * errorCString = "(unknown error)"; - - if (errorString && errorString->getCStringNoCopy()) { - errorCString = errorString->getCStringNoCopy(); - } else if (parsedXML) { - errorCString = "not a dictionary"; - } - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | kOSKextLogArchiveFlag, - "Error: Can't read XML property list " - "for mkext archive entry %d: %s.", i, errorCString); - continue; - } - - theKext = new OSKext; - if (!theKext) { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | kOSKextLogArchiveFlag, - "Kext allocation failure."); - continue; - } - - /***** - * Prepare an entry to hold the mkext entry info for the - * compressed binary module, if there is one. If all four fields - * of the module entry are zero, there isn't one. - */ - if ((OSSwapBigToHostInt32(executablePtr->offset) || - OSSwapBigToHostInt32(executablePtr->compsize) || - OSSwapBigToHostInt32(executablePtr->realsize) || - OSSwapBigToHostInt32(executablePtr->modifiedsecs))) { - - MkextEntryRef entryRef; - - mkextExecutableInfo = OSData::withCapacity(sizeof(entryRef)); - if (!mkextExecutableInfo) { - panic("Error: Couldn't allocate data object " - "for mkext archive entry %d.\n", i); - } - - entryRef.mkext = (mkext_basic_header *)mkextHeader; - entryRef.fileinfo = (uint8_t *)executablePtr; - if (!mkextExecutableInfo->appendBytes(&entryRef, - sizeof(entryRef))) { - - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | kOSKextLogArchiveFlag, - "Couldn't record executable info " - "for mkext archive entry %d.", i); - // we might hit a load error later but oh well - // xxx - should probably remove theKext - continue; - } - - } - - /* Init can fail because of a data/runtime error, or because the - * kext is a dup. Either way, we don't care here. - */ - if (!theKext->initWithMkext1Info(infoDict, mkextExecutableInfo, - mkextData)) { - - // theKext is released at the top of the loop or in the finish block - continue; - } - - /* If we got even one kext out of the mkext archive, - * we have successfully read the archive, in that we - * have data references into its mapped memory. - */ - result = kOSReturnSuccess; - } - -finish: - - OSSafeRelease(infoDictDataObject); - OSSafeRelease(parsedXML); - OSSafeRelease(errorString); - OSSafeRelease(mkextExecutableInfo); - OSSafeRelease(theKext); - - return result; -} - -/********************************************************************* -*********************************************************************/ -bool -OSKext::initWithMkext1Info( - OSDictionary * anInfoDict, - OSData * executableWrapper, - OSData * mkextData) -{ - bool result = false; - - // mkext1 doesn't allow for path (might stuff in info dict) - if (!setInfoDictionaryAndPath(anInfoDict, /* path */ NULL)) { - goto finish; - } - - if (!registerIdentifier()) { - goto finish; - } - - if (!setExecutable(executableWrapper, mkextData, true)) { - goto finish; - } - - result = true; - -finish: - - /* If we can't init, remove the kext from the lookup dictionary. - * This is safe to call in init because there's an implicit retain. - */ - if (!result) { - OSKext::removeKext(this, /* removePersonalities? */ false); - } - - return result; -} - -/********************************************************************* -* xxx - this should take the input data length -*********************************************************************/ -/* static */ -OSData * -OSKext::extractMkext1Entry( - const void * mkextFileBase, - const void * entry) -{ - OSData * result = NULL; - OSData * uncompressedData = NULL; // release on error - const char * errmsg = NULL; - - mkext_file * fileinfo; - uint8_t * uncompressedDataBuffer = 0; // do not free (panic on alloc. fail) - size_t uncompressed_size = 0; - kern_return_t kern_result; - - fileinfo = (mkext_file *)entry; - - size_t offset = OSSwapBigToHostInt32(fileinfo->offset); - size_t compressed_size = OSSwapBigToHostInt32(fileinfo->compsize); - size_t expected_size = OSSwapBigToHostInt32(fileinfo->realsize); - - // Add 1 for '\0' to terminate XML string (for plists) - // (we really should have the archive format include that). - size_t alloc_size = expected_size + 1; - time_t modifiedsecs = OSSwapBigToHostInt32(fileinfo->modifiedsecs); - - /* If these four fields are zero there's no file, but it's up to - * the calling context to decide if that's an error. - */ - if (offset == 0 && compressed_size == 0 && - expected_size == 0 && modifiedsecs == 0) { - goto finish; - } - - kern_result = kmem_alloc(kernel_map, - (vm_offset_t *)&uncompressedDataBuffer, - alloc_size); - if (kern_result != KERN_SUCCESS) { - panic(ALLOC_FAIL); - goto finish; - } - - uncompressedData = OSData::withBytesNoCopy(uncompressedDataBuffer, - alloc_size); - if (uncompressedData == NULL) { - /* No need to free uncompressedDataBuffer here, either. */ - panic(ALLOC_FAIL); - goto finish; - } - uncompressedData->setDeallocFunction(&osdata_kmem_free); - - /* Do the decompression if necessary. Note that even if the file isn't - * compressed, we want to make a copy so that we don't have the tie to - * the larger mkext file buffer any more. - * xxx - need to detect decompression overflow too - */ - if (compressed_size != 0) { - errmsg = "OSKext::uncompressMkext - " - "uncompressed file shorter than expected"; - uncompressed_size = decompress_lzss(uncompressedDataBuffer, - expected_size, - ((uint8_t *)mkextFileBase) + offset, - compressed_size); - if (uncompressed_size != expected_size) { - goto finish; - } - } else { - memcpy(uncompressedDataBuffer, - ((uint8_t *)mkextFileBase) + offset, - expected_size); - } - - // Add a terminating nul character in case the data is XML. - // (we really should have the archive format include that). - uncompressedDataBuffer[expected_size] = '\0'; - - result = uncompressedData; - errmsg = NULL; - -finish: - if (!result) { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | kOSKextLogArchiveFlag, - "%s", errmsg); - - if (uncompressedData) { - uncompressedData->release(); - } - } - return result; -} - /********************************************************************* * Assumes magic, signature, version, length have been checked. * xxx - need to add further bounds checking for each file entry @@ -2801,7 +2469,7 @@ OSKext::readMkext2Archive( infoDict = OSDynamicCast(OSDictionary, mkextInfoDictArray->getObject(i)); - + /* Create the kext for the entry, then release it, because the * kext system keeps them around until explicitly removed. * Any creation/registration failures are already logged for us. @@ -2959,9 +2627,22 @@ z_alloc(void * notused __unused, u_int num_items, u_int size) { void * result = NULL; z_mem * zmem = NULL; - uint32_t total = num_items * size; - uint32_t allocSize = total + sizeof(zmem); + + uint64_t total = ((uint64_t)num_items) * ((uint64_t)size); + //Check for overflow due to multiplication + if (total > UINT32_MAX){ + panic("z_alloc(%p, %x, %x): overflow caused by %x * %x\n", + notused, num_items, size, num_items, size); + } + uint64_t allocSize64 = total + ((uint64_t)sizeof(zmem)); + //Check for overflow due to addition + if (allocSize64 > UINT32_MAX){ + panic("z_alloc(%p, %x, %x): overflow caused by %x + %lx\n", + notused, num_items, size, (uint32_t)total, sizeof(zmem)); + } + uint32_t allocSize = (uint32_t)allocSize64; + zmem = (z_mem *)kalloc(allocSize); if (!zmem) { goto finish; @@ -3186,7 +2867,7 @@ OSKext::loadFromMkext( Boolean delayAutounload = false; OSKextExcludeLevel startKextExcludeLevel = kOSKextExcludeNone; OSKextExcludeLevel startMatchingExcludeLevel = kOSKextExcludeAll; - + IORecursiveLockLock(sKextLock); if (logInfoOut) { @@ -3630,11 +3311,11 @@ OSKext::removeKext( } OSKextLog(aKext, - kOSKextLogProgressLevel | - kOSKextLogKextBookkeepingFlag, - "Removing kext %s.", - aKext->getIdentifierCString()); - + kOSKextLogProgressLevel | + kOSKextLogKextBookkeepingFlag, + "Removing kext %s.", + aKext->getIdentifierCString()); + sKextsByID->removeObject(aKext->getIdentifier()); result = kOSReturnSuccess; @@ -3992,9 +3673,6 @@ OSKext::getExecutable(void) extractedExecutable = extractMkext2FileData( MKEXT2_GET_ENTRY_DATA(fileinfo), "executable", compressedSize, fullSize); - } else if (mkextVersion == MKEXT_VERS_1) { - extractedExecutable = extractMkext1Entry( - mkextEntryRef->mkext, mkextEntryRef->fileinfo); } else { OSKextLog(this, kOSKextLogErrorLevel | kOSKextLogArchiveFlag, @@ -4202,7 +3880,16 @@ OSKext::copyUUID(void) header = (const kernel_mach_header_t *)theExecutable->getBytesNoCopy(); load_cmd = (const struct load_command *)&header[1]; - + + if (header->magic != MH_MAGIC_KERNEL) { + OSKextLog(NULL, + kOSKextLogErrorLevel | kOSKextLogGeneralFlag, + "%s: bad header %p", + __func__, + header); + goto finish; + } + for (i = 0; i < header->ncmds; i++) { if (load_cmd->cmd == LC_UUID) { uuid_cmd = (struct uuid_command *)load_cmd; @@ -4444,10 +4131,6 @@ OSKext::loadKextWithIdentifier( OSKext * theKext = NULL; // do not release OSDictionary * loadRequest = NULL; // must release const OSSymbol * kextIdentifierSymbol = NULL; // must release -#if CONFIG_MACF - int macCheckResult = 0; - kauth_cred_t cred = NULL; -#endif IORecursiveLockLock(sKextLock); @@ -4525,26 +4208,6 @@ OSKext::loadKextWithIdentifier( goto finish; } -#if CONFIG_MACF - if (current_task() != kernel_task) { - cred = kauth_cred_get_with_ref(); - macCheckResult = mac_kext_check_load(cred, kextIdentifier->getCStringNoCopy()); - kauth_cred_unref(&cred); - } - - if (macCheckResult != 0) { - result = kOSReturnError; - - OSKextLog(theKext, - kOSKextLogErrorLevel | - kOSKextLogLoadFlag, - "Failed to load kext %s (MAC policy error 0x%x).", - kextIdentifier->getCStringNoCopy(), macCheckResult); - - goto finish; - } -#endif - result = theKext->load(startOpt, startMatchingOpt, personalityNames); if (result != kOSReturnSuccess) { @@ -4662,6 +4325,26 @@ OSKext::load( getIdentifierCString()); goto loaded; } + +#if CONFIG_MACF + if (current_task() != kernel_task) { + int macCheckResult = 0; + kauth_cred_t cred = NULL; + + cred = kauth_cred_get_with_ref(); + macCheckResult = mac_kext_check_load(cred, getIdentifierCString()); + kauth_cred_unref(&cred); + + if (macCheckResult != 0) { + result = kOSReturnError; + OSKextLog(this, + kOSKextLogErrorLevel | kOSKextLogLoadFlag, + "Failed to load kext %s (MAC policy error 0x%x).", + getIdentifierCString(), macCheckResult); + goto finish; + } + } +#endif if (!sLoadEnabled) { OSKextLog(this, @@ -4710,8 +4393,7 @@ OSKext::load( kOSKextLogProgressLevel | kOSKextLogLoadFlag, "Loading kext %s.", getIdentifierCString()); - - + if (!sKxldContext) { kxldResult = kxld_create_context(&sKxldContext, &kern_allocate, &kxld_log_callback, /* Flags */ (KXLDFlags) 0, @@ -5511,10 +5193,18 @@ OSKext::jettisonLinkeditSegment(void) vm_size_t linkeditsize, kextsize; OSData * data = NULL; + /* 16K_XXX: To Remove */ + /* We don't currently guarantee alignment greater than 4KB for kext + * segments, so we cannot always jettison __LINKEDIT cleanly, so let + * it be for now. + */ + if (!TEST_PAGE_SIZE_4K) + return; + #if NO_KEXTD - /* We can free symbol tables for all embedded kexts because we don't - * support runtime kext linking. - */ + /* We can free symbol tables for all embedded kexts because we don't + * support runtime kext linking. + */ if (sKeepSymbols || !isExecutable() || !linkedExecutable || flags.jettisonLinkeditSeg) { #else if (sKeepSymbols || isLibrary() || !isExecutable() || !linkedExecutable || flags.jettisonLinkeditSeg) { @@ -6613,10 +6303,9 @@ OSKext::autounloadKext(OSKext * aKext) result = OSKext::removeKext(aKext); finish: - return result; } - + /********************************************************************* *********************************************************************/ void @@ -6634,20 +6323,7 @@ _OSKextConsiderUnloads( IORecursiveLockLock(sKextInnerLock); OSKext::flushNonloadedKexts(/* flushPrelinkedKexts */ true); - - IOLockLock(sKextSummariesLock); - - /* If there is an old kext summary, free that now. - */ - if (sPrevLoadedKextSummaries) { - kmem_free(kernel_map, (vm_offset_t)sPrevLoadedKextSummaries, - sPrevLoadedKextSummariesAllocSize); - sPrevLoadedKextSummaries = NULL; - sPrevLoadedKextSummariesAllocSize = 0; - } - - IOLockUnlock(sKextSummariesLock); - + /* If the system is powering down, don't try to unload anything. */ if (sSystemSleep) { @@ -6655,9 +6331,8 @@ _OSKextConsiderUnloads( } OSKextLog(/* kext */ NULL, - kOSKextLogProgressLevel | - kOSKextLogLoadFlag, - "Checking for unused kexts to autounload."); + kOSKextLogProgressLevel | kOSKextLogLoadFlag, + "Checking for unused kexts to autounload."); /***** * Remove any request callbacks marked as stale, @@ -6699,8 +6374,8 @@ _OSKextConsiderUnloads( i = count - 1; do { OSKext * thisKext = OSDynamicCast(OSKext, - sLoadedKexts->getObject(i)); - didUnload = (kOSReturnSuccess == OSKext::autounloadKext(thisKext)); + sLoadedKexts->getObject(i)); + didUnload |= (kOSReturnSuccess == OSKext::autounloadKext(thisKext)); } while (i--); } } while (didUnload); @@ -7660,7 +7335,6 @@ OSKext::handleRequest( OSDictionary * requestDict = NULL; // do not release OSString * errorString = NULL; // must release - OSData * responseData = NULL; // must release OSObject * responseObject = NULL; // must release OSSerialize * serializer = NULL; // must release @@ -7673,8 +7347,6 @@ OSKext::handleRequest( OSKext * theKext = NULL; // do not release OSBoolean * boolArg = NULL; // do not release - bool hideTheSlide = false; - IORecursiveLockLock(sKextLock); if (responseOut) { @@ -7737,20 +7409,18 @@ OSKext::handleRequest( result = kOSKextReturnNotPrivileged; if (hostPriv == HOST_PRIV_NULL) { - if (sPrelinkBoot) { - hideTheSlide = true; - /* must be root to use these kext requests */ - if (predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress) || - predicate->isEqualTo(kKextRequestPredicateUnload) || - predicate->isEqualTo(kKextRequestPredicateStart) || - predicate->isEqualTo(kKextRequestPredicateStop) ) { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | - kOSKextLogIPCFlag, - "Access Failure - must be root user."); - goto finish; - } - } + /* must be root to use these kext requests */ + if (predicate->isEqualTo(kKextRequestPredicateUnload) || + predicate->isEqualTo(kKextRequestPredicateStart) || + predicate->isEqualTo(kKextRequestPredicateStop) || + predicate->isEqualTo(kKextRequestPredicateGetKernelRequests) || + predicate->isEqualTo(kKextRequestPredicateSendResource) ) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Access Failure - must be root user."); + goto finish; + } } /* Get common args in anticipation of use. @@ -7875,34 +7545,6 @@ OSKext::handleRequest( "Returning loaded kext info."); result = kOSReturnSuccess; } -#if !SECURE_KERNEL - } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress)) { - OSNumber * addressNum = NULL; // released as responseObject - unsigned long long unslid_addr = 0; - kernel_segment_command_t * textseg = getsegbyname("__TEXT"); - - if (!textseg) { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | - kOSKextLogGeneralFlag | kOSKextLogIPCFlag, - "Can't find text segment for kernel load address."); - result = kOSReturnError; - goto finish; - } - - unslid_addr = VM_KERNEL_UNSLIDE(textseg->vmaddr); - - OSKextLog(/* kext */ NULL, - kOSKextLogDebugLevel | - kOSKextLogIPCFlag, - "Returning kernel load address 0x%llx.", - (unsigned long long) unslid_addr); - - addressNum = OSNumber::withNumber((long long unsigned int) unslid_addr, - 8 * sizeof(long long unsigned int)); - responseObject = addressNum; - result = kOSReturnSuccess; -#endif } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelRequests)) { /* Hand the current sKernelRequests array to the caller @@ -7953,19 +7595,7 @@ OSKext::handleRequest( "probable memory leak."); } - if (responseData && responseObject) { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | - kOSKextLogIPCFlag, - "Mistakenly generated both data & plist responses to user request " - "(returning only data)."); - } - - if (responseData && responseData->getLength() && responseOut) { - - response = (char *)responseData->getBytesNoCopy(); - responseLength = responseData->getLength(); - } else if (responseOut && responseObject) { + if (responseOut && responseObject) { serializer = OSSerialize::withCapacity(0); if (!serializer) { result = kOSKextReturnNoMemory; @@ -7974,8 +7604,7 @@ OSKext::handleRequest( if (!responseObject->serialize(serializer)) { OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | - kOSKextLogIPCFlag, + kOSKextLogGeneralFlag | kOSKextLogErrorLevel, "Failed to serialize response to request from user space."); result = kOSKextReturnSerialization; goto finish; @@ -8028,7 +7657,6 @@ finish: OSSafeRelease(parsedXML); OSSafeRelease(errorString); - OSSafeRelease(responseData); OSSafeRelease(responseObject); OSSafeRelease(serializer); OSSafeRelease(logInfoArray); @@ -8326,7 +7954,7 @@ OSKext::copyInfo(OSArray * infoKeys) /***** * OSKernelResource, OSBundleIsInterface, OSBundlePrelinked, OSBundleStarted. */ - if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleUUIDKey)) { + if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSKernelResourceKey)) { result->setObject(kOSKernelResourceKey, isKernelComponent() ? kOSBooleanTrue : kOSBooleanFalse); } @@ -9079,7 +8707,6 @@ static OSReturn _OSKextCreateRequest( { OSReturn result = kOSKextReturnNoMemory; OSDictionary * request = NULL; // must release on error - OSDictionary * args = NULL; // must release request = OSDictionary::withCapacity(2); if (!request) { @@ -9098,7 +8725,6 @@ finish: } else { *requestP = request; } - if (args) args->release(); return result; } @@ -10091,7 +9717,8 @@ OSKext::printKextsInBacktrace( vm_offset_t * addr, unsigned int cnt, int (* printf_func)(const char *fmt, ...), - bool lockFlag) + bool lockFlag, + bool doUnslide) { addr64_t summary_page = 0; addr64_t last_summary_page = 0; @@ -10124,17 +9751,17 @@ OSKext::printKextsInBacktrace( if (!summary->address) { continue; } - + if (!summaryIsInBacktrace(summary, addr, cnt)) { continue; } - + if (!found_kmod) { (*printf_func)(" Kernel Extensions in backtrace:\n"); found_kmod = true; } - printSummary(summary, printf_func); + printSummary(summary, printf_func, doUnslide); } finish: @@ -10203,20 +9830,28 @@ static void findSummaryUUID( *********************************************************************/ void OSKext::printSummary( OSKextLoadedKextSummary * summary, - int (* printf_func)(const char *fmt, ...)) + int (* printf_func)(const char *fmt, ...), + bool doUnslide) { kmod_reference_t * kmod_ref = NULL; uuid_string_t uuid; char version[kOSKextVersionMaxLength]; + uint64_t tmpAddr; if (!OSKextVersionGetString(summary->version, version, sizeof(version))) { strlcpy(version, "unknown version", sizeof(version)); } (void) uuid_unparse(summary->uuid, uuid); - + + if (doUnslide) { + tmpAddr = VM_KERNEL_UNSLIDE(summary->address); + } + else { + tmpAddr = summary->address; + } (*printf_func)(" %s(%s)[%s]@0x%llx->0x%llx\n", summary->name, version, uuid, - summary->address, summary->address + summary->size - 1); + tmpAddr, tmpAddr + summary->size - 1); /* print dependency info */ for (kmod_ref = (kmod_reference_t *) summary->reference_list; @@ -10244,8 +9879,14 @@ void OSKext::printSummary( /* locate UUID in gLoadedKextSummaries */ findSummaryUUID(rinfo->id, uuid); + if (doUnslide) { + tmpAddr = VM_KERNEL_UNSLIDE(rinfo->address); + } + else { + tmpAddr = rinfo->address; + } (*printf_func)(" dependency: %s(%s)[%s]@%p\n", - rinfo->name, rinfo->version, uuid, rinfo->address); + rinfo->name, rinfo->version, uuid, tmpAddr); } return; } @@ -10379,12 +10020,15 @@ compactIdentifier( /* identPlusVers must be at least 2*KMOD_MAX_NAME in length. */ static int assemble_identifier_and_version( - kmod_info_t * kmod_info, - char * identPlusVers); + kmod_info_t * kmod_info, + char * identPlusVers, + int bufSize); + static int assemble_identifier_and_version( - kmod_info_t * kmod_info, - char * identPlusVers) + kmod_info_t * kmod_info, + char * identPlusVers, + int bufSize) { int result = 0; @@ -10392,8 +10036,12 @@ assemble_identifier_and_version( result = strnlen(identPlusVers, KMOD_MAX_NAME - 1); identPlusVers[result++] = '\t'; // increment for real char identPlusVers[result] = '\0'; // don't increment for nul char - result = strlcat(identPlusVers, kmod_info->version, KMOD_MAX_NAME); - + result = strlcat(identPlusVers, kmod_info->version, bufSize); + if (result >= bufSize) { + identPlusVers[bufSize - 1] = '\0'; + result = bufSize - 1; + } + return result; } @@ -10401,18 +10049,16 @@ assemble_identifier_and_version( * Assumes sKextLock is held. *******************************************************************************/ /* static */ -uint32_t +int OSKext::saveLoadedKextPanicListTyped( const char * prefix, int invertFlag, int libsFlag, char * paniclist, - uint32_t list_size, - uint32_t * list_length_ptr) + uint32_t list_size) { - uint32_t result = 0; - int error = 0; - unsigned int count, i; + int result = -1; + unsigned int count, i; count = sLoadedKexts->getCount(); if (!count) { @@ -10424,9 +10070,10 @@ OSKext::saveLoadedKextPanicListTyped( OSObject * rawKext = sLoadedKexts->getObject(i); OSKext * theKext = OSDynamicCast(OSKext, rawKext); int match; - char identPlusVers[2*KMOD_MAX_NAME]; uint32_t identPlusVersLength; - + uint32_t tempLen; + char identPlusVers[2*KMOD_MAX_NAME]; + if (!rawKext) { printf("OSKext::saveLoadedKextPanicListTyped - " "NULL kext in loaded kext list; continuing\n"); @@ -10466,36 +10113,39 @@ OSKext::saveLoadedKextPanicListTyped( !pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)kmod_info))) { printf("kext scan stopped due to missing kmod_info page: %p\n", - kmod_info); - error = 1; + kmod_info); goto finish; } identPlusVersLength = assemble_identifier_and_version(kmod_info, - identPlusVers); + identPlusVers, + sizeof(identPlusVers)); if (!identPlusVersLength) { printf("error saving loaded kext info\n"); goto finish; } - /* Adding 1 for the newline. - */ - if (*list_length_ptr + identPlusVersLength + 1 >= list_size) { + /* make sure everything fits and we null terminate. + */ + tempLen = strlcat(paniclist, identPlusVers, list_size); + if (tempLen >= list_size) { + // panic list is full, keep it and null terminate + paniclist[list_size - 1] = 0x00; + result = 0; + goto finish; + } + tempLen = strlcat(paniclist, "\n", list_size); + if (tempLen >= list_size) { + // panic list is full, keep it and null terminate + paniclist[list_size - 1] = 0x00; + result = 0; goto finish; } - - *list_length_ptr = strlcat(paniclist, identPlusVers, list_size); - *list_length_ptr = strlcat(paniclist, "\n", list_size); - } while (i--); + result = 0; finish: - if (!error) { - if (*list_length_ptr + 1 <= list_size) { - result = list_size - (*list_length_ptr + 1); - } - } - + return result; } @@ -10507,51 +10157,52 @@ OSKext::saveLoadedKextPanicList(void) { char * newlist = NULL; uint32_t newlist_size = 0; - uint32_t newlist_length = 0; - - newlist_length = 0; + newlist_size = KEXT_PANICLIST_SIZE; newlist = (char *)kalloc(newlist_size); if (!newlist) { OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | kOSKextLogGeneralFlag, - "Couldn't allocate kext panic log buffer."); + kOSKextLogErrorLevel | kOSKextLogGeneralFlag, + "Couldn't allocate kext panic log buffer."); goto finish; } newlist[0] = '\0'; - + // non-"com.apple." kexts - if (!OSKext::saveLoadedKextPanicListTyped("com.apple.", /* invert? */ 1, - /* libs? */ -1, newlist, newlist_size, &newlist_length)) { + if (OSKext::saveLoadedKextPanicListTyped("com.apple.", /* invert? */ 1, + /* libs? */ -1, newlist, newlist_size) != 0) { goto finish; } // "com.apple." nonlibrary kexts - if (!OSKext::saveLoadedKextPanicListTyped("com.apple.", /* invert? */ 0, - /* libs? */ 0, newlist, newlist_size, &newlist_length)) { + if (OSKext::saveLoadedKextPanicListTyped("com.apple.", /* invert? */ 0, + /* libs? */ 0, newlist, newlist_size) != 0) { goto finish; } // "com.apple." library kexts - if (!OSKext::saveLoadedKextPanicListTyped("com.apple.", /* invert? */ 0, - /* libs? */ 1, newlist, newlist_size, &newlist_length)) { + if (OSKext::saveLoadedKextPanicListTyped("com.apple.", /* invert? */ 0, + /* libs? */ 1, newlist, newlist_size) != 0) { goto finish; } - + if (loaded_kext_paniclist) { kfree(loaded_kext_paniclist, loaded_kext_paniclist_size); } loaded_kext_paniclist = newlist; + newlist = NULL; loaded_kext_paniclist_size = newlist_size; - loaded_kext_paniclist_length = newlist_length; - + finish: + if (newlist) { + kfree(newlist, newlist_size); + } return; } - + /********************************************************************* * Assumes sKextLock is held. *********************************************************************/ @@ -10564,8 +10215,9 @@ OSKext::savePanicString(bool isLoading) return; // do not goto finish here b/c of lock } - len = assemble_identifier_and_version(kmod_info, - (isLoading) ? last_loaded_str : last_unloaded_str); + len = assemble_identifier_and_version( kmod_info, + (isLoading) ? last_loaded_str_buf : last_unloaded_str_buf, + (isLoading) ? sizeof(last_loaded_str_buf) : sizeof(last_unloaded_str_buf) ); if (!len) { printf("error saving unloaded kext info\n"); goto finish; @@ -10596,14 +10248,14 @@ OSKext::printKextPanicLists(int (*printf_func)(const char *fmt, ...)) if (last_loaded_strlen) { printf_func("last loaded kext at %llu: %.*s (addr %p, size %lu)\n", AbsoluteTime_to_scalar(&last_loaded_timestamp), - last_loaded_strlen, last_loaded_str, + last_loaded_strlen, last_loaded_str_buf, last_loaded_address, last_loaded_size); } if (last_unloaded_strlen) { printf_func("last unloaded kext at %llu: %.*s (addr %p, size %lu)\n", AbsoluteTime_to_scalar(&last_unloaded_timestamp), - last_unloaded_strlen, last_unloaded_str, + last_unloaded_strlen, last_unloaded_str_buf, last_unloaded_address, last_unloaded_size); } @@ -10612,7 +10264,9 @@ OSKext::printKextPanicLists(int (*printf_func)(const char *fmt, ...)) pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) loaded_kext_paniclist) && loaded_kext_paniclist[0]) { - printf_func("%.*s", loaded_kext_paniclist_length, loaded_kext_paniclist); + printf_func("%.*s", + strnlen(loaded_kext_paniclist, loaded_kext_paniclist_size), + loaded_kext_paniclist); } else { printf_func("(none)\n"); } @@ -10634,89 +10288,96 @@ OSKext::updateLoadedKextSummaries(void) size_t summarySize = 0; size_t size; u_int count; - u_int numKexts; + u_int maxKexts; u_int i, j; - + +#if DEVELOPMENT || DEBUG + if (IORecursiveLockHaveLock(sKextLock) == false) { + panic("sKextLock must be held"); + } +#endif + IOLockLock(sKextSummariesLock); - + count = sLoadedKexts->getCount(); - for (i = 0, numKexts = 0; i < count; ++i) { + for (i = 0, maxKexts = 0; i < count; ++i) { aKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); - numKexts += (aKext && aKext->isExecutable()); + maxKexts += (aKext && aKext->isExecutable()); } - - if (!numKexts) goto finish; - - /* Calculate the size needed for the new summary headers. - */ + + if (!maxKexts) goto finish; + if (maxKexts < kOSKextTypicalLoadCount) maxKexts = kOSKextTypicalLoadCount; + + /* Calculate the size needed for the new summary headers. + */ size = sizeof(*gLoadedKextSummaries); - size += numKexts * sizeof(*gLoadedKextSummaries->summaries); + size += maxKexts * sizeof(*gLoadedKextSummaries->summaries); size = round_page(size); - - /* If the previous summary is large enough, use it (and be sure to make - * it writable). If it's too small, free it and allocate a new buffer. - */ - - if (sPrevLoadedKextSummariesAllocSize < size) { - if (sPrevLoadedKextSummaries) { - kmem_free(kernel_map, (vm_offset_t)sPrevLoadedKextSummaries, - sPrevLoadedKextSummariesAllocSize); - sPrevLoadedKextSummaries = NULL; - sPrevLoadedKextSummariesAllocSize = 0; - } - - result = kmem_alloc(kernel_map, - (vm_offset_t*)&summaryHeaderAlloc, size); + + if (gLoadedKextSummaries == NULL || sLoadedKextSummariesAllocSize < size) { + if (gLoadedKextSummaries) { + kmem_free(kernel_map, + (vm_offset_t)gLoadedKextSummaries, + sLoadedKextSummariesAllocSize); + gLoadedKextSummaries = NULL; + sLoadedKextSummariesAllocSize = 0; + } + result = kmem_alloc(kernel_map, + (vm_offset_t*)&summaryHeaderAlloc, + size); if (result != KERN_SUCCESS) goto finish; - summaryHeader = summaryHeaderAlloc; summarySize = size; - } else { - summaryHeader = sPrevLoadedKextSummaries; - summarySize = sPrevLoadedKextSummariesAllocSize; - + } + else { + summaryHeader = gLoadedKextSummaries; + summarySize = sLoadedKextSummariesAllocSize; + start = (vm_map_offset_t) summaryHeader; end = start + summarySize; - result = vm_map_protect(kernel_map, start, end, VM_PROT_DEFAULT, FALSE); + result = vm_map_protect(kernel_map, + start, + end, + VM_PROT_DEFAULT, + FALSE); if (result != KERN_SUCCESS) goto finish; } - - /* Populate the summary header. - */ - + + /* Populate the summary header. + */ + bzero(summaryHeader, summarySize); summaryHeader->version = kOSKextLoadedKextSummaryVersion; summaryHeader->entry_size = sizeof(OSKextLoadedKextSummary); - summaryHeader->numSummaries = numKexts; - - /* Populate each kext summary. - */ - + + /* Populate each kext summary. + */ + count = sLoadedKexts->getCount(); - for (i = 0, j = 0; i < count; ++i) { + for (i = 0, j = 0; i < count && j < maxKexts; ++i) { aKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); - if (!aKext || !aKext->isExecutable()) continue; - + if (!aKext || !aKext->isExecutable()) { + continue; + } + aKext->updateLoadedKextSummary(&summaryHeader->summaries[j++]); + summaryHeader->numSummaries++; } - - /* Write protect the buffer and move it into place. - */ - + + /* Write protect the buffer and move it into place. + */ + start = (vm_map_offset_t) summaryHeader; end = start + summarySize; + result = vm_map_protect(kernel_map, start, end, VM_PROT_READ, FALSE); if (result != KERN_SUCCESS) goto finish; - - sPrevLoadedKextSummaries = gLoadedKextSummaries; - sPrevLoadedKextSummariesAllocSize = sLoadedKextSummariesAllocSize; - + gLoadedKextSummaries = summaryHeader; sLoadedKextSummariesAllocSize = summarySize; - summaryHeaderAlloc = NULL; - + /* Call the magic breakpoint function through a static function pointer so * the compiler can't optimize the function away. */ diff --git a/libkern/c++/OSMetaClass.cpp b/libkern/c++/OSMetaClass.cpp index fafd4547c..807eb4598 100644 --- a/libkern/c++/OSMetaClass.cpp +++ b/libkern/c++/OSMetaClass.cpp @@ -494,7 +494,6 @@ OSMetaClass::~OSMetaClass() /********************************************************************* * Empty overrides. *********************************************************************/ -void * OSMetaClass::operator new(__unused size_t size) { return 0; } void OSMetaClass::retain() const { } void OSMetaClass::release() const { } void OSMetaClass::release(__unused int when) const { } diff --git a/libkern/c++/OSObject.cpp b/libkern/c++/OSObject.cpp index 7da83e9a1..0dc95ed55 100644 --- a/libkern/c++/OSObject.cpp +++ b/libkern/c++/OSObject.cpp @@ -28,6 +28,7 @@ /* OSObject.cpp created by gvdl on Fri 1998-11-17 */ #include +#include #include #include #include @@ -241,14 +242,19 @@ void OSObject::release(int when) const bool OSObject::serialize(OSSerialize *s) const { - if (s->previouslySerialized(this)) return true; + char cstr[128]; + bool ok; - if (!s->addXMLStartTag(this, "string")) return false; + snprintf(cstr, sizeof(cstr), "%s is not serializable", getClassName(this)); - if (!s->addString(getClassName(this))) return false; - if (!s->addString(" is not serializable")) return false; - - return s->addXMLEndTag("string"); + OSString * str; + str = OSString::withCStringNoCopy(cstr); + if (!str) return false; + + ok = str->serialize(s); + str->release(); + + return (ok); } diff --git a/libkern/c++/OSOrderedSet.cpp b/libkern/c++/OSOrderedSet.cpp index 5a5fb83e4..1ba5e04dd 100644 --- a/libkern/c++/OSOrderedSet.cpp +++ b/libkern/c++/OSOrderedSet.cpp @@ -63,11 +63,14 @@ bool OSOrderedSet:: initWithCapacity(unsigned int inCapacity, OSOrderFunction inOrdering, void *inOrderingRef) { - int size; + unsigned int size; if (!super::init()) return false; + if (inCapacity > (UINT_MAX / sizeof(_Element))) + return false; + size = sizeof(_Element) * inCapacity; array = (_Element *) kalloc(size); if (!array) @@ -125,15 +128,19 @@ unsigned int OSOrderedSet::setCapacityIncrement(unsigned int increment) unsigned int OSOrderedSet::ensureCapacity(unsigned int newCapacity) { _Element *newArray; - int oldSize, newSize; + unsigned int finalCapacity, oldSize, newSize; if (newCapacity <= capacity) return capacity; // round up - newCapacity = (((newCapacity - 1) / capacityIncrement) + 1) + finalCapacity = (((newCapacity - 1) / capacityIncrement) + 1) * capacityIncrement; - newSize = sizeof(_Element) * newCapacity; + if ((finalCapacity < newCapacity) || + (finalCapacity > (UINT_MAX / sizeof(_Element)))) { + return capacity; + } + newSize = sizeof(_Element) * finalCapacity; newArray = (_Element *) kalloc(newSize); if (newArray) { @@ -145,7 +152,7 @@ unsigned int OSOrderedSet::ensureCapacity(unsigned int newCapacity) bzero(&newArray[capacity], newSize - oldSize); kfree(array, oldSize); array = newArray; - capacity = newCapacity; + capacity = finalCapacity; } return capacity; diff --git a/libkern/c++/OSRuntimeSupport.c b/libkern/c++/OSRuntimeSupport.c index 6ed505f73..2bb1d5f98 100644 --- a/libkern/c++/OSRuntimeSupport.c +++ b/libkern/c++/OSRuntimeSupport.c @@ -2,8 +2,10 @@ // exporting the mangled functions for loadable drivers compiled on older // systems. // Note that I have had to manually mangle the symbols names. -#if __GNUC__ >= 3 - void _ZN11OSMetaClassdlEPvm(void *mem, unsigned long size); - void _ZN11OSMetaClassdlEPvm(__attribute__((__unused__)) void *mem, __attribute__((__unused__)) unsigned long size) { } -#endif +void _ZN11OSMetaClassdlEPvm(void *mem, unsigned long size); +void *_ZN11OSMetaClassnwEm(unsigned long size); + +void _ZN11OSMetaClassdlEPvm(__attribute__((unused)) void *mem, __attribute__((__unused__)) unsigned long size) { } +void *_ZN11OSMetaClassnwEm(__attribute__((unused)) unsigned long size) { return (void *)0ULL; } + diff --git a/libkern/c++/OSSerialize.cpp b/libkern/c++/OSSerialize.cpp index 90a0b6054..909bc0a4c 100644 --- a/libkern/c++/OSSerialize.cpp +++ b/libkern/c++/OSSerialize.cpp @@ -36,6 +36,7 @@ __END_DECLS #include #include #include +#include #define super OSObject @@ -65,8 +66,17 @@ char * OSSerialize::text() const void OSSerialize::clearText() { - bzero((void *)data, capacity); - length = 1; + if (binary) + { + length = sizeof(kOSSerializeBinarySignature); + bzero(&data[length], capacity - length); + endCollection = true; + } + else + { + bzero((void *)data, capacity); + length = 1; + } tag = 0; tags->flushCollection(); } @@ -76,6 +86,8 @@ bool OSSerialize::previouslySerialized(const OSMetaClassBase *o) char temp[16]; OSString *tagString; + if (binary) return (binarySerialize(o)); + // look it up tagString = (OSString *)tags->getObject((const OSSymbol *) o); @@ -101,6 +113,11 @@ bool OSSerialize::previouslySerialized(const OSMetaClassBase *o) bool OSSerialize::addXMLStartTag(const OSMetaClassBase *o, const char *tagString) { + if (binary) + { + printf("class %s: xml serialize\n", o->getMetaClass()->getClassName()); + return (false); + } if (!addChar('<')) return false; if (!addString(tagString)) return false; @@ -124,6 +141,12 @@ bool OSSerialize::addXMLEndTag(const char *tagString) bool OSSerialize::addChar(const char c) { + if (binary) + { + printf("xml serialize\n"); + return (false); + } + // add char, possibly extending our capacity if (length >= capacity && length >=ensureCapacity(capacity+capacityIncrement)) return false; diff --git a/libkern/c++/OSSerializeBinary.cpp b/libkern/c++/OSSerializeBinary.cpp new file mode 100644 index 000000000..e939f0558 --- /dev/null +++ b/libkern/c++/OSSerializeBinary.cpp @@ -0,0 +1,469 @@ +/* + * Copyright (c) 2014 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +#include +#include +#include +#include + +#include + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#if 0 +#define DEBG(fmt, args...) { kprintf(fmt, args); } +#else +#define DEBG(fmt, args...) {} +#endif + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +OSSerialize *OSSerialize::binaryWithCapacity(unsigned int inCapacity, + Editor editor, void * reference) +{ + OSSerialize *me; + + if (inCapacity < sizeof(uint32_t)) return (0); + me = OSSerialize::withCapacity(inCapacity); + if (!me) return (0); + + me->binary = true; + me->endCollection = true; + me->editor = editor; + me->editRef = reference; + + bcopy(kOSSerializeBinarySignature, &me->data[0], sizeof(kOSSerializeBinarySignature)); + me->length = sizeof(kOSSerializeBinarySignature); + + return (me); +} + +bool OSSerialize::addBinary(const void * bits, size_t size) +{ + unsigned int newCapacity; + size_t alignSize; + + alignSize = ((size + 3) & ~3L); + newCapacity = length + alignSize; + if (newCapacity >= capacity) + { + newCapacity = (((newCapacity - 1) / capacityIncrement) + 1) * capacityIncrement; + if (newCapacity < ensureCapacity(newCapacity)) return (false); + } + + bcopy(bits, &data[length], size); + length += alignSize; + + return (true); +} + +bool OSSerialize::addBinaryObject(const OSMetaClassBase * o, uint32_t key, + const void * bits, size_t size) +{ + unsigned int newCapacity; + size_t alignSize; + OSNumber * tagNum; + + // build a tag + tagNum = OSNumber::withNumber(tag, 32); + tag++; + // add to tag dictionary + tags->setObject((const OSSymbol *) o, tagNum); + tagNum->release(); + + alignSize = ((size + sizeof(key) + 3) & ~3L); + newCapacity = length + alignSize; + if (newCapacity >= capacity) + { + newCapacity = (((newCapacity - 1) / capacityIncrement) + 1) * capacityIncrement; + if (newCapacity < ensureCapacity(newCapacity)) return (false); + } + + if (endCollection) + { + endCollection = false; + key |= kOSSerializeEndCollecton; + } + + bcopy(&key, &data[length], sizeof(key)); + bcopy(bits, &data[length + sizeof(key)], size); + length += alignSize; + + return (true); +} + +bool OSSerialize::binarySerialize(const OSMetaClassBase *o) +{ + OSDictionary * dict; + OSArray * array; + OSSet * set; + OSNumber * num; + OSSymbol * sym; + OSString * str; + OSData * data; + OSBoolean * boo; + + OSNumber * tagNum; + uint32_t i, key; + size_t len; + bool ok; + + tagNum = (OSNumber *)tags->getObject((const OSSymbol *) o); + // does it exist? + if (tagNum) + { + key = (kOSSerializeObject | tagNum->unsigned32BitValue()); + if (endCollection) + { + endCollection = false; + key |= kOSSerializeEndCollecton; + } + ok = addBinary(&key, sizeof(key)); + return (ok); + } + + if ((dict = OSDynamicCast(OSDictionary, o))) + { + key = (kOSSerializeDictionary | dict->count); + ok = addBinaryObject(o, key, NULL, 0); + for (i = 0; ok && (i < dict->count);) + { + const OSSymbol * dictKey; + const OSMetaClassBase * dictValue; + const OSMetaClassBase * nvalue = 0; + + i++; + dictKey = dict->dictionary[i-1].key; + dictValue = dict->dictionary[i-1].value; + if (editor) + { + dictValue = nvalue = (*editor)(editRef, this, dict, dictKey, dictValue); + if (!dictValue) dictValue = dict; + } + ok = binarySerialize(dictKey); + if (!ok) break; + endCollection = (i == dict->count); + ok = binarySerialize(dictValue); + if (!ok) ok = dictValue->serialize(this); + if (nvalue) nvalue->release(); +// if (!ok) ok = binarySerialize(kOSBooleanFalse); + } + } + else if ((array = OSDynamicCast(OSArray, o))) + { + key = (kOSSerializeArray | array->count); + ok = addBinaryObject(o, key, NULL, 0); + for (i = 0; ok && (i < array->count);) + { + i++; + endCollection = (i == array->count); + ok = binarySerialize(array->array[i-1]); + if (!ok) ok = array->array[i-1]->serialize(this); +// if (!ok) ok = binarySerialize(kOSBooleanFalse); + } + } + else if ((set = OSDynamicCast(OSSet, o))) + { + key = (kOSSerializeSet | set->members->count); + ok = addBinaryObject(o, key, NULL, 0); + for (i = 0; ok && (i < set->members->count);) + { + i++; + endCollection = (i == set->members->count); + ok = binarySerialize(set->members->array[i-1]); + if (!ok) ok = set->members->array[i-1]->serialize(this); +// if (!ok) ok = binarySerialize(kOSBooleanFalse); + } + } + else if ((num = OSDynamicCast(OSNumber, o))) + { + key = (kOSSerializeNumber | num->size); + ok = addBinaryObject(o, key, &num->value, sizeof(num->value)); + } + else if ((boo = OSDynamicCast(OSBoolean, o))) + { + key = (kOSSerializeBoolean | (kOSBooleanTrue == boo)); + ok = addBinaryObject(o, key, NULL, 0); + } + else if ((sym = OSDynamicCast(OSSymbol, o))) + { + len = (sym->getLength() + 1); + key = (kOSSerializeSymbol | len); + ok = addBinaryObject(o, key, sym->getCStringNoCopy(), len); + } + else if ((str = OSDynamicCast(OSString, o))) + { + len = (str->getLength() + 0); + key = (kOSSerializeString | len); + ok = addBinaryObject(o, key, str->getCStringNoCopy(), len); + } + else if ((data = OSDynamicCast(OSData, o))) + { + len = data->getLength(); + if (data->reserved && data->reserved->disableSerialization) len = 0; + key = (kOSSerializeData | len); + ok = addBinaryObject(o, key, data->getBytesNoCopy(), len); + } + else return (false); + + return (ok); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#define setAtIndex(v, idx, o) \ + if (idx >= v##Capacity) \ + { \ + uint32_t ncap = v##Capacity + 64; \ + typeof(v##Array) nbuf = (typeof(v##Array)) kalloc(ncap * sizeof(o)); \ + if (!nbuf) ok = false; \ + if (v##Array) \ + { \ + bcopy(v##Array, nbuf, v##Capacity * sizeof(o)); \ + kfree(v##Array, v##Capacity * sizeof(o)); \ + } \ + v##Array = nbuf; \ + v##Capacity = ncap; \ + } \ + if (ok) v##Array[idx] = o; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +OSObject * +OSUnserializeBinary(const char *buffer, size_t bufferSize, OSString **errorString) +{ + OSObject ** objsArray; + uint32_t objsCapacity; + uint32_t objsIdx; + + OSObject ** stackArray; + uint32_t stackCapacity; + uint32_t stackIdx; + + OSObject * result; + OSObject * parent; + OSDictionary * dict; + OSArray * array; + OSSet * set; + OSDictionary * newDict; + OSArray * newArray; + OSSet * newSet; + OSObject * o; + OSSymbol * sym; + + size_t bufferPos; + const uint32_t * next; + uint32_t key, len, wordLen; + bool end, newCollect, isRef; + unsigned long long value; + bool ok; + + if (errorString) *errorString = 0; + if (0 != strcmp(kOSSerializeBinarySignature, buffer)) return (NULL); + if (3 & ((uintptr_t) buffer)) return (NULL); + if (bufferSize < sizeof(kOSSerializeBinarySignature)) return (NULL); + bufferPos = sizeof(kOSSerializeBinarySignature); + next = (typeof(next)) (((uintptr_t) buffer) + bufferPos); + + DEBG("---------OSUnserializeBinary(%p)\n", buffer); + + objsArray = stackArray = NULL; + objsIdx = objsCapacity = 0; + stackIdx = stackCapacity = 0; + + result = 0; + parent = 0; + dict = 0; + array = 0; + set = 0; + sym = 0; + + ok = true; + while (ok) + { + bufferPos += sizeof(*next); + if (!(ok = (bufferPos <= bufferSize))) break; + key = *next++; + + len = (key & kOSSerializeDataMask); + wordLen = (len + 3) >> 2; + end = (0 != (kOSSerializeEndCollecton & key)); + DEBG("key 0x%08x: 0x%04x, %d\n", key, len, end); + + newCollect = isRef = false; + o = 0; newDict = 0; newArray = 0; newSet = 0; + + switch (kOSSerializeTypeMask & key) + { + case kOSSerializeDictionary: + o = newDict = OSDictionary::withCapacity(len); + newCollect = (len != 0); + break; + case kOSSerializeArray: + o = newArray = OSArray::withCapacity(len); + newCollect = (len != 0); + break; + case kOSSerializeSet: + o = newSet = OSSet::withCapacity(len); + newCollect = (len != 0); + break; + + case kOSSerializeObject: + if (len >= objsIdx) break; + o = objsArray[len]; + o->retain(); + isRef = true; + break; + + case kOSSerializeNumber: + bufferPos += sizeof(long long); + if (bufferPos > bufferSize) break; + value = next[1]; + value <<= 32; + value |= next[0]; + o = OSNumber::withNumber(value, len); + next += 2; + break; + + case kOSSerializeSymbol: + bufferPos += (wordLen * sizeof(uint32_t)); + if (bufferPos > bufferSize) break; + if (0 != ((const char *)next)[len-1]) break; + o = (OSObject *) OSSymbol::withCString((const char *) next); + next += wordLen; + break; + + case kOSSerializeString: + bufferPos += (wordLen * sizeof(uint32_t)); + if (bufferPos > bufferSize) break; + o = OSString::withStringOfLength((const char *) next, len); + next += wordLen; + break; + + case kOSSerializeData: + bufferPos += (wordLen * sizeof(uint32_t)); + if (bufferPos > bufferSize) break; + o = OSData::withBytes(next, len); + next += wordLen; + break; + + case kOSSerializeBoolean: + o = (len ? kOSBooleanTrue : kOSBooleanFalse); + break; + + default: + break; + } + + if (!(ok = (o != 0))) break; + + if (!isRef) + { + setAtIndex(objs, objsIdx, o); + if (!ok) break; + objsIdx++; + } + + if (dict) + { + if (sym) + { + DEBG("%s = %s\n", sym->getCStringNoCopy(), o->getMetaClass()->getClassName()); + if (o != dict) ok = dict->setObject(sym, o); + o->release(); + sym->release(); + sym = 0; + } + else + { + sym = OSDynamicCast(OSSymbol, o); + ok = (sym != 0); + } + } + else if (array) + { + ok = array->setObject(o); + o->release(); + } + else if (set) + { + ok = set->setObject(o); + o->release(); + } + else + { + assert(!parent); + result = o; + } + + if (!ok) break; + + if (newCollect) + { + if (!end) + { + stackIdx++; + setAtIndex(stack, stackIdx, parent); + if (!ok) break; + } + DEBG("++stack[%d] %p\n", stackIdx, parent); + parent = o; + dict = newDict; + array = newArray; + set = newSet; + end = false; + } + + if (end) + { + if (!stackIdx) break; + parent = stackArray[stackIdx]; + DEBG("--stack[%d] %p\n", stackIdx, parent); + stackIdx--; + set = 0; + dict = 0; + array = 0; + if (!(dict = OSDynamicCast(OSDictionary, parent))) + { + if (!(array = OSDynamicCast(OSArray, parent))) ok = (0 != (set = OSDynamicCast(OSSet, parent))); + } + } + } + DEBG("ret %p\n", result); + + if (objsCapacity) kfree(objsArray, objsCapacity * sizeof(*objsArray)); + if (stackCapacity) kfree(stackArray, stackCapacity * sizeof(*stackArray)); + + if (!ok && result) + { + result->release(); + result = 0; + } + return (result); +} \ No newline at end of file diff --git a/libkern/c++/OSSet.cpp b/libkern/c++/OSSet.cpp index 775253baf..0cb188567 100644 --- a/libkern/c++/OSSet.cpp +++ b/libkern/c++/OSSet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000, 2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,7 +77,7 @@ bool OSSet::initWithObjects(const OSObject *inObjects[], for ( unsigned int i = 0; i < inCount; i++ ) { // xx-review: no test here for failure of setObject() - if (members->getCount() < inCapacity) + if (members->getCount() < capacity) setObject(inObjects[i]); else return false; diff --git a/libkern/c++/OSString.cpp b/libkern/c++/OSString.cpp index f5095fa01..65032f5eb 100644 --- a/libkern/c++/OSString.cpp +++ b/libkern/c++/OSString.cpp @@ -87,6 +87,24 @@ bool OSString::initWithCString(const char *cString) return true; } +bool OSString::initWithStringOfLength(const char *cString, size_t inlength) +{ + if (!cString || !super::init()) + return false; + + length = inlength + 1; + string = (char *) kalloc(length); + if (!string) + return false; + + bcopy(cString, string, inlength); + string[inlength] = 0; + + ACCUMSIZE(length); + + return true; +} + bool OSString::initWithCStringNoCopy(const char *cString) { if (!cString || !super::init()) @@ -135,6 +153,20 @@ OSString *OSString::withCStringNoCopy(const char *cString) return me; } +OSString *OSString::withStringOfLength(const char *cString, size_t length) +{ + OSString *me = new OSString; + + if (me && !me->initWithStringOfLength(cString, length)) { + me->release(); + return 0; + } + + return me; +} + + + /* @@@ gvdl */ #if 0 OSString *OSString::stringWithFormat(const char *format, ...) diff --git a/libkern/c++/OSSymbol.cpp b/libkern/c++/OSSymbol.cpp index c90107c1b..b2f5f94bd 100644 --- a/libkern/c++/OSSymbol.cpp +++ b/libkern/c++/OSSymbol.cpp @@ -30,9 +30,7 @@ #include #include -__BEGIN_DECLS -#include -__END_DECLS +#include #include #include @@ -465,7 +463,7 @@ void OSSymbol::initialize() pool = new OSSymbolPool; assert(pool); - if (!pool->init()) { + if (pool && !pool->init()) { delete pool; assert(false); }; diff --git a/libkern/c++/OSUnserializeXML.cpp b/libkern/c++/OSUnserializeXML.cpp index e5a692141..b694f44ec 100644 --- a/libkern/c++/OSUnserializeXML.cpp +++ b/libkern/c++/OSUnserializeXML.cpp @@ -2788,9 +2788,10 @@ OSObject* OSUnserializeXML(const char *buffer, OSString **errorString) { OSObject *object; - parser_state_t *state = (parser_state_t *)malloc(sizeof(parser_state_t)); - if ((!state) || (!buffer)) return 0; + if (!buffer) return 0; + parser_state_t *state = (parser_state_t *)malloc(sizeof(parser_state_t)); + if (!state) return 0; // just in case if (errorString) *errorString = NULL; @@ -2816,13 +2817,18 @@ OSUnserializeXML(const char *buffer, OSString **errorString) return object; } +#include + OSObject* OSUnserializeXML(const char *buffer, size_t bufferSize, OSString **errorString) { - if ((!buffer) || (!bufferSize)) return 0; + if (!buffer) return (0); + if (bufferSize < sizeof(kOSSerializeBinarySignature)) return (0); + + if (!strcmp(kOSSerializeBinarySignature, buffer)) return OSUnserializeBinary(buffer, bufferSize, errorString); // XML must be null terminated - if (buffer[bufferSize - 1] || strnlen(buffer, bufferSize) == bufferSize) return 0; + if (buffer[bufferSize - 1]) return 0; return OSUnserializeXML(buffer, errorString); } diff --git a/libkern/conf/MASTER b/libkern/conf/MASTER deleted file mode 100644 index edcb732a6..000000000 --- a/libkern/conf/MASTER +++ /dev/null @@ -1,89 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -####################################################################### -# -# Master machine independent configuration file. -# -# Specific configuration files are created based on this and -# the machine specific master file using the doconf script. -# -# Any changes to the master configuration files will affect all -# other configuration files based upon it. -# -####################################################################### -# -# To build a configuration, execute "doconf ." -# Configurations are specified in the "Configurations:" section -# of the MASTER and MASTER.* files as follows: -# -# = [ ... ] -# -# Lines in the MASTER and MASTER.* files are selected based on -# the attribute selector list, found in a comment at the end of -# the line. This is a list of attributes separated by commas. -# The "!" operator selects the line if none of the attributes are -# specified. -# -# For example: -# -# selects a line if "foo" or "bar" are specified. -# selects a line if neither "foo" nor "bar" is -# specified. -# -# Lines with no attributes specified are selected for all -# configurations. -# -####################################################################### -# -# Standard Mach Research Configurations: -# -------- ---- -------- --------------- -# -# These are the default configurations that can be used by most sites. -# They are used internally by the Mach project. -# -# LIBKERN = [libkerncpp debug] -# -####################################################################### -# -ident LIBKERN - -options HIBERNATION # system hibernation # -options KDEBUG # kernel tracing # -options IST_KDEBUG # limited tracing # -options NO_KDEBUG # no kernel tracing # -options GPROF # kernel profiling # -options LIBKERNCPP # C++ implementation # -options NETWORKING # kernel networking # -options CONFIG_DTRACE # dtrace support # -options VM_PRESSURE_EVENTS # -options CRYPTO # cryptographic routines # -options ALLCRYPTO # -options ZLIB # zlib support # -options IOKITSTATS # IOKit statistics # - -options CONFIG_NO_PANIC_STRINGS # -options CONFIG_NO_PRINTF_STRINGS # -options CONFIG_NO_KPRINTF_STRINGS # - -options IPSEC # IP security # - -options CONFIG_KXLD # kxld/runtime linking of kexts # - -options CONFIG_KEC_FIPS # Kernel External Components for FIPS compliance (KEC_FIPS) # - - -# Note that when adding this config option to an architecture, one MUST -# add the architecture to the preprocessor test at the beginning of -# libkern/kmod/cplus_{start.c,stop.c}. -options CONFIG_STATIC_CPPINIT # Static library initializes kext cpp runtime # - -# secure_kernel - secure kernel from user programs -options SECURE_KERNEL # - - -options DEBUG # -options MACH_ASSERT # diff --git a/libkern/conf/MASTER.x86_64 b/libkern/conf/MASTER.x86_64 deleted file mode 100644 index 0813228a0..000000000 --- a/libkern/conf/MASTER.x86_64 +++ /dev/null @@ -1,17 +0,0 @@ -###################################################################### -# -# RELEASE = [ intel mach libkerncpp hibernation networking config_dtrace crypto allcrypto zlib config_kxld iokitstats vm_pressure_events config_kec_fips ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug mach_assert ] -# -# EMBEDDED = [ intel mach libkerncpp hibernation networking crypto zlib ] -# DEVELOPMENT = [ EMBEDDED ] -# -###################################################################### - -machine "x86_64" # -cpu "x86_64" # - -options NO_NESTED_PMAP # - -options CONFIG_MACF # Mandatory Access Control Framework diff --git a/libkern/conf/Makefile b/libkern/conf/Makefile index 25a42ef5e..76db9a7d8 100644 --- a/libkern/conf/Makefile +++ b/libkern/conf/Makefile @@ -6,20 +6,24 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) - -$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) - $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ - cd $(addsuffix /conf, $(TARGET)); \ - rm -f $(notdir $?); \ - cp $? .; \ - if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); +# Special handling for x86_64h which shares a MASTER config file with x86_64: +ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64h) +DOCONF_ARCH_CONFIG_LC = x86_64 +else +DOCONF_ARCH_CONFIG_LC = $(CURRENT_ARCH_CONFIG_LC) +endif + +MASTERCONFDIR = $(SRCROOT)/config +DOCONFDEPS = $(addprefix $(MASTERCONFDIR)/, MASTER MASTER.$(DOCONF_ARCH_CONFIG_LC)) \ + $(addprefix $(SOURCE)/, Makefile.template Makefile.$(DOCONF_ARCH_CONFIG_LC) files files.$(DOCONF_ARCH_CONFIG_LC)) + +ifneq (,$(wildcard $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC))) +DOCONFDEPS += $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) +endif + +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile: $(SRCROOT)/SETUP/config/doconf $(OBJROOT)/SETUP/config $(DOCONFDEPS) + $(_v)$(MKDIR) $(TARGET)/$(CURRENT_KERNEL_CONFIG) + $(_v)$(SRCROOT)/SETUP/config/doconf -c -cpu $(DOCONF_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) -s $(SOURCE) -m $(MASTERCONFDIR) $(CURRENT_KERNEL_CONFIG); do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile $(_v)${MAKE} \ diff --git a/libkern/conf/Makefile.template b/libkern/conf/Makefile.template index a9b7a9af7..5885a434c 100644 --- a/libkern/conf/Makefile.template +++ b/libkern/conf/Makefile.template @@ -37,6 +37,8 @@ uncompr.o_CWARNFLAGS_ADD = -Wno-cast-qual # warnings in bison-generated code OSUnserializeXML.cpo_CXXWARNFLAGS_ADD = -Wno-uninitialized +OSUnserializeXML.cpo_CXXWARNFLAGS_ADD += -Wno-unreachable-code +OSUnserialize.cpo_CXXWARNFLAGS_ADD += -Wno-unreachable-code # Runtime support functions don't interact well with LTO (9294679) stack_protector.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG) @@ -64,6 +66,8 @@ COMP_SUBDIRS = %CFILES +%CXXFILES + %SFILES %MACHDEP @@ -83,24 +87,31 @@ uncompr.o_CWARNFLAGS_ADD = -Wno-cast-qual # warnings in bison-generated code OSUnserializeXML.cpo_CXXWARNFLAGS_ADD = -Wno-uninitialized - -# -# OBJSDEPS is the set of files (defined in the machine dependent -# template if necessary) which all objects depend on (such as an -# in-line assembler expansion filter) -# -${OBJS}: ${OBJSDEPS} - -LDOBJS = $(OBJS) - -$(COMPONENT).filelist: $(LDOBJS) +OSUnserializeXML.cpo_CXXWARNFLAGS_ADD += -Wno-unreachable-code +OSUnserialize.cpo_CXXWARNFLAGS_ADD += -Wno-unreachable-code + +# Rebuild if per-file overrides change +${OBJS}: $(firstword $(MAKEFILE_LIST)) + +# Rebuild if global compile flags change +$(COBJS): .CFLAGS +.CFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KCC) $(CFLAGS) $(INCFLAGS) +$(CXXOBJS): .CXXFLAGS +.CXXFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KC++) $(CXXFLAGS) $(INCFLAGS) +$(SOBJS): .SFLAGS +.SFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(S_KCC) $(SFLAGS) $(INCFLAGS) + +$(COMPONENT).filelist: $(OBJS) $(_v)for hib_file in ${HIB_FILES}; \ do \ $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \ mv $${hib_file}__ $${hib_file} ; \ done @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${LDOBJS}; do \ + $(_v)( for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist diff --git a/libkern/conf/files b/libkern/conf/files index 3eaf35007..bdf3cb6e0 100644 --- a/libkern/conf/files +++ b/libkern/conf/files @@ -7,7 +7,6 @@ OPTIONS/config_dtrace optional config_dtrace OPTIONS/hibernation optional hibernation OPTIONS/networking optional networking OPTIONS/crypto optional crypto -OPTIONS/allcrypto optional allcrypto OPTIONS/zlib optional zlib # libkern @@ -36,8 +35,9 @@ libkern/c++/OSString.cpp optional libkerncpp libkern/c++/OSSymbol.cpp optional libkerncpp libkern/c++/OSUnserialize.cpp optional libkerncpp libkern/c++/OSUnserializeXML.cpp optional libkerncpp +libkern/c++/OSSerializeBinary.cpp optional libkerncpp -libkern/OSKextLib.cpp standard libkerncpp +libkern/OSKextLib.cpp optional libkerncpp libkern/mkext.c standard libkern/OSKextVersion.c standard @@ -66,11 +66,9 @@ libkern/zlib/uncompr.c optional zlib libkern/zlib/zutil.c optional zlib libkern/crypto/register_crypto.c optional crypto -libkern/crypto/corecrypto_sha2.c optional crypto allcrypto +libkern/crypto/corecrypto_sha2.c optional crypto_sha2 libkern/crypto/corecrypto_sha1.c optional crypto -libkern/crypto/corecrypto_sha1.c optional ipsec libkern/crypto/corecrypto_md5.c optional crypto -libkern/crypto/corecrypto_md5.c optional networking libkern/crypto/corecrypto_des.c optional crypto libkern/crypto/corecrypto_aes.c optional crypto libkern/crypto/corecrypto_aesxts.c optional crypto diff --git a/libkern/crypto/corecrypto_aes.c b/libkern/crypto/corecrypto_aes.c index f70fc0cb8..bec9846e1 100644 --- a/libkern/crypto/corecrypto_aes.c +++ b/libkern/crypto/corecrypto_aes.c @@ -57,7 +57,7 @@ aes_rval aes_encrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv return aes_good; } -#if defined (__i386__) || defined (__x86_64__) +#if defined (__i386__) || defined (__x86_64__) || defined (__arm64__) /* This does one block of ECB, using the CBC implementation - this allow to use the same context for both CBC and ECB */ aes_rval aes_encrypt(const unsigned char *in_blk, unsigned char *out_blk, aes_encrypt_ctx cx[1]) { @@ -90,7 +90,7 @@ aes_rval aes_decrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv return aes_good; } -#if defined (__i386__) || defined (__x86_64__) +#if defined (__i386__) || defined (__x86_64__) || defined (__arm64__) /* This does one block of ECB, using the CBC implementation - this allow to use the same context for both CBC and ECB */ aes_rval aes_decrypt(const unsigned char *in_blk, unsigned char *out_blk, aes_decrypt_ctx cx[1]) { diff --git a/libkern/gen/OSDebug.cpp b/libkern/gen/OSDebug.cpp index f5bbec683..7cb847108 100644 --- a/libkern/gen/OSDebug.cpp +++ b/libkern/gen/OSDebug.cpp @@ -50,7 +50,7 @@ extern vm_offset_t min_valid_stack_address(void); extern vm_offset_t max_valid_stack_address(void); // From osfmk/kmod.c -extern void kmod_dump_log(vm_offset_t *addr, unsigned int cnt); +extern void kmod_dump_log(vm_offset_t *addr, unsigned int cnt, boolean_t doUnslide); extern addr64_t kvtophys(vm_offset_t va); @@ -111,7 +111,7 @@ OSReportWithBacktrace(const char *str, ...) (unsigned long) VM_KERNEL_UNSLIDE(bt[4]), (unsigned long) VM_KERNEL_UNSLIDE(bt[5]), (unsigned long) VM_KERNEL_UNSLIDE(bt[6]), (unsigned long) VM_KERNEL_UNSLIDE(bt[7]), (unsigned long) VM_KERNEL_UNSLIDE(bt[8])); - kmod_dump_log((vm_offset_t *) &bt[2], cnt - 2); + kmod_dump_log((vm_offset_t *) &bt[2], cnt - 2, TRUE); } lck_mtx_unlock(sOSReportLock); } diff --git a/libkern/kxld/Makefile b/libkern/kxld/Makefile index 6b9ec5a38..59f788571 100644 --- a/libkern/kxld/Makefile +++ b/libkern/kxld/Makefile @@ -45,10 +45,12 @@ DYLIBDST=$(DSTROOT)/usr/lib/system ARCHIVEDST=$(DSTROOT)/usr/local/lib LIBKXLD_DYLIB=libkxld.dylib LIBKXLD_ARCHIVE=libkxld.a +LIBKXLD_DSYM=$(LIBKXLD_DYLIB).dSYM LIBKXLD_INSTALLNAME=/usr/lib/system/$(LIBKXLD_DYLIB) LIBKXLDOBJ_DYLIB=$(OBJROOT)/$(LIBKXLD_DYLIB) LIBKXLDOBJ_ARCHIVE=$(OBJROOT)/$(LIBKXLD_ARCHIVE) LIBKXLDSYM_DYLIB=$(SYMROOT)/$(LIBKXLD_DYLIB) +LIBKXLDSYM_DYLIB_DSYM=$(SYMROOT)/$(LIBKXLD_DSYM) LIBKXLDSYM_ARCHIVE=$(SYMROOT)/$(LIBKXLD_ARCHIVE) LIBKXLDDST_DYLIB=$(DYLIBDST)/$(LIBKXLD_DYLIB) LIBKXLDDST_ARCHIVE=$(ARCHIVEDST)/$(LIBKXLD_ARCHIVE) @@ -56,7 +58,14 @@ TESTSRC=$(SRCROOT)/tests TESTDST=./BUILD/tests # Flags -SDKROOT ?= / +ifdef SDKROOT + SDK_DIR := $(shell xcodebuild -version -sdk $(SDKROOT) Path) +endif + +ifeq ($(strip $(SDK_DIR)),) + SDK_DIR := / +endif + DEFINES = -DPRIVATE CFLAGS=-std=c99 -Wall -Wextra -Werror -pedantic -Wformat=2 -Wcast-align \ -Wwrite-strings -Wshorten-64-to-32 -Wshadow -Winit-self -Wpointer-arith \ @@ -68,16 +77,17 @@ LDFLAGS=$(ARCHS) -dynamiclib -install_name $(LIBKXLD_INSTALLNAME) \ -current_version $(CURRENT_VERSION) -lstdc++ INCLUDES=-I$(HDRSRC) -isystem $(EXTHDRSRC) -ifneq ($(SDKROOT),/) - CFLAGS += -isysroot $(SDKROOT) - LDFLAGS += -isysroot $(SDKROOT) +ifneq ($(SDK_DIR),/) + CFLAGS += -isysroot $(SDK_DIR) + LDFLAGS += -isysroot $(SDK_DIR) endif # Tools -CC = xcrun -sdk $(SDKROOT) clang +CC = xcrun -sdk $(SDK_DIR) clang CLANG_ANALYZER = clang --analyze -LIBTOOL = xcrun -sdk $(SDKROOT) libtool -STRIP = xcrun -sdk $(SDKROOT) strip +LIBTOOL = xcrun -sdk $(SDK_DIR) libtool +STRIP = xcrun -sdk $(SDK_DIR) strip +DSYMUTIL = xcrun -sdk $(SDK_DIR) dsymutil # Files HDR_NAMES=kxld.h kxld_types.h @@ -124,6 +134,7 @@ build: $(LIBKXLDSYM_$(PRODUCT_TYPE)) $(LIBKXLDSYM_DYLIB): $(LIBKXLDOBJ_DYLIB) @mkdir -p $(SYMROOT) install -c -m 644 $< $@ + $(DSYMUTIL) $@ -o $(LIBKXLDSYM_DYLIB_DSYM) $(LIBKXLDSYM_ARCHIVE): $(LIBKXLDOBJ_ARCHIVE) @mkdir -p $(SYMROOT) diff --git a/libkern/kxld/kxld_object.c b/libkern/kxld/kxld_object.c index 1995cb88c..d936c7853 100644 --- a/libkern/kxld/kxld_object.c +++ b/libkern/kxld/kxld_object.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2012 Apple Inc. All rights reserved. + * Copyright (c) 2009-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -329,7 +329,13 @@ get_target_machine_info(KXLDObject *object, cpu_type_t cputype __unused, #if defined(__x86_64__) object->cputype = CPU_TYPE_X86_64; +/* FIXME: we need clang to provide a __x86_64h__ macro for the sub-type. Using + * __AVX2__ is a temporary solution until this is available. */ +#if defined(__AVX2__) + object->cpusubtype = CPU_SUBTYPE_X86_64_H; +#else object->cpusubtype = CPU_SUBTYPE_X86_64_ALL; +#endif return KERN_SUCCESS; #else kxld_log(kKxldLogLinking, kKxldLogErr, @@ -374,6 +380,9 @@ get_target_machine_info(KXLDObject *object, cpu_type_t cputype __unused, case CPU_TYPE_ARM: object->cpusubtype = CPU_SUBTYPE_ARM_ALL; break; + case CPU_TYPE_ARM64: + object->cpusubtype = CPU_SUBTYPE_ARM64_ALL; + break; default: object->cpusubtype = 0; break; @@ -386,6 +395,7 @@ get_target_machine_info(KXLDObject *object, cpu_type_t cputype __unused, switch(object->cputype) { case CPU_TYPE_ARM: + case CPU_TYPE_ARM64: case CPU_TYPE_I386: case CPU_TYPE_X86_64: object->target_order = NX_LittleEndian; @@ -452,8 +462,7 @@ get_macho_slice_for_arch(KXLDObject *object, u_char *file, u_long size) /* Locate the Mach-O for the requested architecture */ - arch = NXFindBestFatArch(object->cputype, object->cpusubtype, archs, - fat->nfat_arch); + arch = NXFindBestFatArch(object->cputype, object->cpusubtype, archs, fat->nfat_arch); require_action(arch, finish, rval=KERN_FAILURE; kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogArchNotFound)); require_action(size >= arch->offset + arch->size, finish, @@ -485,6 +494,7 @@ get_macho_slice_for_arch(KXLDObject *object, u_char *file, u_long size) require_action(object->cputype == mach_hdr->cputype, finish, rval=KERN_FAILURE; kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + object->cpusubtype = mach_hdr->cpusubtype; /* */ rval = KERN_SUCCESS; finish: @@ -1478,7 +1488,8 @@ target_supports_protected_segments(const KXLDObject *object) { return (object->is_final_image && (object->cputype == CPU_TYPE_X86_64 || - object->cputype == CPU_TYPE_ARM)); + object->cputype == CPU_TYPE_ARM || + object->cputype == CPU_TYPE_ARM64)); } /******************************************************************************* diff --git a/libkern/kxld/kxld_reloc.c b/libkern/kxld/kxld_reloc.c index 5f41b511c..41e899eac 100644 --- a/libkern/kxld/kxld_reloc.c +++ b/libkern/kxld/kxld_reloc.c @@ -71,6 +71,9 @@ #if KXLD_USER_OR_ARM #include #endif +#if KXLD_USER_OR_ARM64 +#include +#endif #define KXLD_TARGET_NONE (u_int) 0x0 #define KXLD_TARGET_VALUE (u_int) 0x1 @@ -148,6 +151,19 @@ static kern_return_t arm_process_reloc(const KXLDRelocator *relocator, kxld_addr_t pair_target, boolean_t swap); #endif /* KXLD_USER_OR_ARM */ +#if KXLD_USER_OR_ARM64 +static boolean_t arm64_reloc_has_pair(u_int _type) + __attribute__((const)); +static u_int arm64_reloc_get_pair_type(u_int _prev_type) + __attribute__((const)); +static boolean_t arm64_reloc_has_got(u_int _type) + __attribute__((const)); +static kern_return_t arm64_process_reloc(const KXLDRelocator *relocator, + u_char *instruction, u_int length, u_int pcrel, kxld_addr_t base_pc, + kxld_addr_t link_pc, kxld_addr_t link_disp, u_int type, kxld_addr_t target, + kxld_addr_t pair_target, boolean_t swap); +#endif /* KXLD_USER_OR_ARM64 */ + #if KXLD_USER_OR_ILP32 static kxld_addr_t get_pointer_at_addr_32(const KXLDRelocator *relocator, const u_char *data, u_long offset) @@ -227,6 +243,17 @@ kxld_relocator_init(KXLDRelocator *relocator, u_char *file, relocator->may_scatter = FALSE; break; #endif /* KXLD_USER_OR_ARM */ +#if KXLD_USER_OR_ARM64 + case CPU_TYPE_ARM64: + relocator->reloc_has_pair = arm64_reloc_has_pair; + relocator->reloc_get_pair_type = arm64_reloc_get_pair_type; + relocator->reloc_has_got = arm64_reloc_has_got; + relocator->process_reloc = arm64_process_reloc; + relocator->function_align = 0; + relocator->is_32_bit = FALSE; + relocator->may_scatter = FALSE; + break; +#endif /* KXLD_USER_OR_ARM64 */ default: rval = KERN_FAILURE; @@ -1533,3 +1560,116 @@ finish: } #endif /* KXLD_USER_OR_ARM */ + +#if KXLD_USER_OR_ARM64 +/******************************************************************************* +*******************************************************************************/ +boolean_t +arm64_reloc_has_pair(u_int _type) +{ + return (_type == ARM64_RELOC_SUBTRACTOR); +} + +/******************************************************************************* +*******************************************************************************/ +u_int +arm64_reloc_get_pair_type(u_int _prev_type __unused) +{ + if (_prev_type == ARM64_RELOC_SUBTRACTOR) { + return ARM64_RELOC_UNSIGNED; + } else { + return -1u; + } +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +arm64_reloc_has_got(u_int _type) +{ + return (_type == ARM64_RELOC_GOT_LOAD_PAGE21 || + _type == ARM64_RELOC_GOT_LOAD_PAGEOFF12); +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +arm64_process_reloc(const KXLDRelocator *relocator __unused, u_char *instruction, + u_int length, u_int pcrel, kxld_addr_t _base_pc __unused, kxld_addr_t _link_pc, + kxld_addr_t _link_disp __unused, u_int _type, kxld_addr_t _target, + kxld_addr_t _pair_target __unused, boolean_t swap) +{ + kern_return_t rval = KERN_FAILURE; + enum reloc_type_arm64 type = _type; + uint64_t target = _target; + uint64_t link_pc = (uint64_t) _link_pc; + uint64_t difference = 0; + int64_t displacement = 0; + uint32_t addend = 0; + + check(instruction); + require_action((length == 2 || length == 3), finish, rval=KERN_FAILURE); + + if (length == 2) { + uint32_t *instr32p = (uint32_t *) (void *) instruction; + uint32_t instr32 = *instr32p; + +#if !KERNEL + if (swap) instr32 = OSSwapInt32(instr32); +#endif + + switch (type) { + case ARM64_RELOC_BRANCH26: + require_action(pcrel, finish, rval=KERN_FAILURE); + addend = (instr32 & 0x03FFFFFF) << 2; + addend = SIGN_EXTEND(addend, 27); + displacement = (target - link_pc + addend); + difference = ABSOLUTE_VALUE(displacement); + displacement = (displacement >> 2); + require_action(difference < (128 * 1024 * 1024), finish, + rval = KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogRelocationOverflow)); + instr32 = (instr32 & 0xFC000000) | (displacement & 0x03FFFFFF); + break; + + default: + rval = KERN_FAILURE; + goto finish; + } + +#if !KERNEL + if (swap) instr32 = OSSwapInt32(instr32); +#endif + + *instr32p = instr32; + } else { /* length == 3 */ + uint64_t *instr64p = (uint64_t *) (void *) instruction; + uint64_t instr64 = *instr64p; + +#if !KERNEL + if (swap) instr64 = OSSwapInt64(instr64); +#endif + + switch (type) { + case ARM64_RELOC_UNSIGNED: + require_action(!pcrel, finish, rval=KERN_FAILURE); + instr64 += target; + break; + default: + rval = KERN_FAILURE; + goto finish; + } + +#if !KERNEL + if (swap) instr64 = OSSwapInt64(instr64); +#endif + + *instr64p = instr64; + } + + rval = KERN_SUCCESS; +finish: + return rval; +} + +#endif /* KXLD_USER_OR_ARM64 */ diff --git a/libkern/kxld/kxld_seg.c b/libkern/kxld/kxld_seg.c index 7160f7e7d..4ea424356 100644 --- a/libkern/kxld/kxld_seg.c +++ b/libkern/kxld/kxld_seg.c @@ -754,6 +754,10 @@ kxld_seg_finish_init(KXLDSeg *seg) } } + /* XXX Cross architecture linking will fail if the page size ever differs + * from 4096. (As of this writing, we're fine on i386, x86_64, arm, and + * arm64.) + */ seg->vmsize = round_page(maxaddr + maxsize - seg->base_addr); } diff --git a/libkern/libkern/Makefile b/libkern/libkern/Makefile index 222b91341..8b2f0606a 100644 --- a/libkern/libkern/Makefile +++ b/libkern/libkern/Makefile @@ -12,10 +12,12 @@ INSTINC_SUBDIRS = \ crypto INSTINC_SUBDIRS_X86_64 = \ i386 - +INSTINC_SUBDIRS_X86_64H = \ + i386 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} +EXPINC_SUBDIRS_X86_64H = ${INSTINC_SUBDIRS_X86_64H} DATAFILES = \ OSAtomic.h \ @@ -37,7 +39,8 @@ PRIVATE_DATAFILES = \ OSKextLibPrivate.h \ kext_request_keys.h \ mkext.h \ - prelink.h + prelink.h \ + OSSerializeBinary.h INSTALL_MI_LIST = \ OSByteOrder.h \ diff --git a/libkern/libkern/OSKextLib.h b/libkern/libkern/OSKextLib.h index 34e7544ac..8d752baf6 100644 --- a/libkern/libkern/OSKextLib.h +++ b/libkern/libkern/OSKextLib.h @@ -463,6 +463,7 @@ __BEGIN_DECLS * about itself. */ +#ifdef KERNEL /*! * @typedef OSKextLoadTag * @@ -482,6 +483,22 @@ __BEGIN_DECLS * @link OSKextReleaseKextWithLoadTag * OSKextReleaseKextWithLoadTag@/link. */ +#else +/*! + * @typedef OSKextLoadTag + * + * @abstract + * A unique identifier assigned to a loaded instanace of a kext. + * + * @discussion + * If a kext is unloaded and later reloaded, the new instance + * has a different load tag. + * + * A kext can get its own load tag in the kmod_info_t + * structure passed into its module start routine, as the + * id field (cast to this type). + */ +#endif typedef uint32_t OSKextLoadTag; /*! @@ -493,7 +510,6 @@ typedef uint32_t OSKextLoadTag; */ #define kOSKextInvalidLoadTag ((OSKextLoadTag)(-1)) - #ifdef KERNEL /* Make these visible to kexts only and *not* the kernel. diff --git a/libkern/libkern/OSSerializeBinary.h b/libkern/libkern/OSSerializeBinary.h new file mode 100644 index 000000000..f6d5d3bfb --- /dev/null +++ b/libkern/libkern/OSSerializeBinary.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2014 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _OS_OSSERIALIZEBINARY_H +#define _OS_OSSERIALIZEBINARY_H + +enum +{ + kOSSerializeDictionary = 0x01000000U, + kOSSerializeArray = 0x02000000U, + kOSSerializeSet = 0x03000000U, + kOSSerializeNumber = 0x04000000U, + kOSSerializeSymbol = 0x08000000U, + kOSSerializeString = 0x09000000U, + kOSSerializeData = 0x0a000000U, + kOSSerializeBoolean = 0x0b000000U, + kOSSerializeObject = 0x0c000000U, + kOSSerializeTypeMask = 0x7F000000U, + kOSSerializeDataMask = 0x00FFFFFFU, + kOSSerializeEndCollecton = 0x80000000U, +}; + +#define kOSSerializeBinarySignature "\323\0\0" + + +#endif /* _OS_OSSERIALIZEBINARY_H */ diff --git a/libkern/libkern/OSTypes.h b/libkern/libkern/OSTypes.h index 1119f7303..ce87c6443 100644 --- a/libkern/libkern/OSTypes.h +++ b/libkern/libkern/OSTypes.h @@ -49,12 +49,12 @@ typedef unsigned long UInt32; #endif typedef unsigned long long UInt64; #if defined(__BIG_ENDIAN__) -typedef struct UnsignedWide { +typedef struct __attribute__((deprecated)) UnsignedWide { UInt32 hi; UInt32 lo; } UnsignedWide __attribute__((deprecated)); #elif defined(__LITTLE_ENDIAN__) -typedef struct UnsignedWide { +typedef struct __attribute__((deprecated)) UnsignedWide { UInt32 lo; UInt32 hi; } UnsignedWide __attribute__((deprecated)); diff --git a/libkern/libkern/c++/OSArray.h b/libkern/libkern/c++/OSArray.h index dce9f84e9..91cbd81bf 100644 --- a/libkern/libkern/c++/OSArray.h +++ b/libkern/libkern/c++/OSArray.h @@ -88,6 +88,7 @@ class OSSerialize; class OSArray : public OSCollection { friend class OSSet; + friend class OSSerialize; OSDeclareDefaultStructors(OSArray) diff --git a/libkern/libkern/c++/OSBoolean.h b/libkern/libkern/c++/OSBoolean.h index 039e8f609..4eb533ccf 100644 --- a/libkern/libkern/c++/OSBoolean.h +++ b/libkern/libkern/c++/OSBoolean.h @@ -63,6 +63,7 @@ class OSString; class OSBoolean : public OSObject { OSDeclareDefaultStructors(OSBoolean) + friend class OSSerialize; protected: bool value; diff --git a/libkern/libkern/c++/OSData.h b/libkern/libkern/c++/OSData.h index 11c3a3d10..5c499cc8c 100644 --- a/libkern/libkern/c++/OSData.h +++ b/libkern/libkern/c++/OSData.h @@ -74,6 +74,7 @@ class OSString; class OSData : public OSObject { OSDeclareDefaultStructors(OSData) + friend class OSSerialize; protected: void * data; @@ -81,7 +82,22 @@ protected: unsigned int capacity; unsigned int capacityIncrement; - struct ExpansionData; +#ifdef XNU_KERNEL_PRIVATE + /* Available within xnu source only */ +public: + typedef void (*DeallocFunction)(void * ptr, unsigned int length); +protected: + struct ExpansionData + { + DeallocFunction deallocFunction; + bool disableSerialization; + }; +#else +private: + typedef void (*DeallocFunction)(void * ptr, unsigned int length); +protected: + struct ExpansionData; +#endif /* Reserved for future use. (Internal use only) */ ExpansionData * reserved; @@ -720,8 +736,6 @@ public: #else private: #endif - // xxx - DO NOT USE - This interface may change - typedef void (*DeallocFunction)(void * ptr, unsigned int length); virtual void setDeallocFunction(DeallocFunction func); OSMetaClassDeclareReservedUsed(OSData, 0); diff --git a/libkern/libkern/c++/OSDictionary.h b/libkern/libkern/c++/OSDictionary.h index 74ec638e4..9bdba7ac2 100644 --- a/libkern/libkern/c++/OSDictionary.h +++ b/libkern/libkern/c++/OSDictionary.h @@ -113,6 +113,7 @@ class OSString; class OSDictionary : public OSCollection { OSDeclareDefaultStructors(OSDictionary) + friend class OSSerialize; protected: struct dictEntry { diff --git a/libkern/libkern/c++/OSKext.h b/libkern/libkern/c++/OSKext.h index a054c0281..815d85501 100644 --- a/libkern/libkern/c++/OSKext.h +++ b/libkern/libkern/c++/OSKext.h @@ -95,7 +95,7 @@ kern_return_t is_io_catalog_send_data( mach_msg_type_number_t inDataCount, kern_return_t * result); -void kmod_dump_log(vm_offset_t*, unsigned int); +void kmod_dump_log(vm_offset_t*, unsigned int, boolean_t); #endif /* XNU_KERNEL_PRIVATE */ @@ -175,7 +175,7 @@ class OSKext : public OSObject kern_return_t * result); friend void kmod_panic_dump(vm_offset_t*, unsigned int); - friend void kmod_dump_log(vm_offset_t*, unsigned int); + friend void kmod_dump_log(vm_offset_t*, unsigned int, boolean_t); friend void kext_dump_panic_lists(int (*printf_func)(const char * fmt, ...)); @@ -325,17 +325,6 @@ private: uint32_t compressedSize, uint32_t fullSize); - static OSReturn readMkext1Archive( - OSData * mkextData, - uint32_t * checksumPtr); - bool initWithMkext1Info( - OSDictionary * anInfoDict, - OSData * executableWrapper, - OSData * mkextData); - static OSData * extractMkext1Entry( - const void * mkextFileBase, - const void * entry); - /* Dependencies. */ virtual bool resolveDependencies( @@ -477,22 +466,23 @@ private: vm_offset_t * addr, unsigned int cnt, int (* printf_func)(const char *fmt, ...), - bool lockFlag); + bool lockFlag, + bool doUnslide); static boolean_t summaryIsInBacktrace( OSKextLoadedKextSummary * summary, vm_offset_t * addr, unsigned int cnt); static void printSummary( OSKextLoadedKextSummary * summary, - int (* printf_func)(const char *fmt, ...)); + int (* printf_func)(const char *fmt, ...), + bool doUnslide); - static uint32_t saveLoadedKextPanicListTyped( + static int saveLoadedKextPanicListTyped( const char * prefix, int invertFlag, int libsFlag, char * paniclist, - uint32_t list_size, - uint32_t * list_length_ptr); + uint32_t list_size); static void saveLoadedKextPanicList(void); void savePanicString(bool isLoading); static void printKextPanicLists(int (*printf_func)(const char *fmt, ...)); diff --git a/libkern/libkern/c++/OSMetaClass.h b/libkern/libkern/c++/OSMetaClass.h index a16e072e7..84c30e6ab 100644 --- a/libkern/libkern/c++/OSMetaClass.h +++ b/libkern/libkern/c++/OSMetaClass.h @@ -65,12 +65,8 @@ class OSOrderedSet; #else /* XNU_KERNEL_PRIVATE */ #include -#if TARGET_OS_EMBEDDED -#define APPLE_KEXT_VTABLE_PADDING 0 -#else /* TARGET_OS_EMBEDDED */ /*! @parseOnly */ #define APPLE_KEXT_VTABLE_PADDING 1 -#endif /* TARGET_OS_EMBEDDED */ #endif /* XNU_KERNEL_PRIVATE */ diff --git a/libkern/libkern/c++/OSNumber.h b/libkern/libkern/c++/OSNumber.h index 0b206784e..6502a3039 100644 --- a/libkern/libkern/c++/OSNumber.h +++ b/libkern/libkern/c++/OSNumber.h @@ -71,6 +71,7 @@ class OSNumber : public OSObject { OSDeclareDefaultStructors(OSNumber) + friend class OSSerialize; protected: unsigned long long value; diff --git a/libkern/libkern/c++/OSSerialize.h b/libkern/libkern/c++/OSSerialize.h index 51d822fc5..4d3d56fb6 100644 --- a/libkern/libkern/c++/OSSerialize.h +++ b/libkern/libkern/c++/OSSerialize.h @@ -32,6 +32,7 @@ #include +class OSCollection; class OSSet; class OSDictionary; @@ -76,9 +77,14 @@ class OSDictionary; * handle synchronization via defined member functions * for serializing properties. */ + +OSObject * +OSUnserializeBinary(const void *buffer, size_t bufferSize); + class OSSerialize : public OSObject { OSDeclareDefaultStructors(OSSerialize) + friend class OSBoolean; protected: char * data; // container for serialized data @@ -94,6 +100,26 @@ protected: /* Reserved for future use. (Internal use only) */ ExpansionData *reserved; +#ifdef XNU_KERNEL_PRIVATE +public: + typedef const OSMetaClassBase * (*Editor)(void * reference, + OSSerialize * s, + OSCollection * container, + const OSSymbol * name, + const OSMetaClassBase * value); +#else + typedef void * Editor; +#endif + +private: + bool binary; + bool endCollection; + Editor editor; + void * editRef; + + bool binarySerialize(const OSMetaClassBase *o); + bool addBinary(const void * data, size_t size); + bool addBinaryObject(const OSMetaClassBase * o, uint32_t key, const void * _bits, size_t size); public: @@ -115,6 +141,8 @@ public: */ static OSSerialize * withCapacity(unsigned int capacity); + static OSSerialize * binaryWithCapacity(unsigned int inCapacity, Editor editor = 0, void * reference = 0); + /*! * @function text * diff --git a/libkern/libkern/c++/OSSet.h b/libkern/libkern/c++/OSSet.h index 0e82f7a87..558f4d1da 100644 --- a/libkern/libkern/c++/OSSet.h +++ b/libkern/libkern/c++/OSSet.h @@ -85,6 +85,7 @@ class OSArray; class OSSet : public OSCollection { OSDeclareDefaultStructors(OSSet) + friend class OSSerialize; private: OSArray * members; diff --git a/libkern/libkern/c++/OSString.h b/libkern/libkern/c++/OSString.h index 2a5474027..29c8be084 100644 --- a/libkern/libkern/c++/OSString.h +++ b/libkern/libkern/c++/OSString.h @@ -180,6 +180,7 @@ public: */ static OSString * withCStringNoCopy(const char * cString); + static OSString * withStringOfLength(const char *cString, size_t length); /*! * @function initWithString @@ -246,6 +247,7 @@ public: */ virtual bool initWithCStringNoCopy(const char * cString); + bool initWithStringOfLength(const char *cString, size_t inlength); /*! * @function free diff --git a/libkern/libkern/c++/OSUnserialize.h b/libkern/libkern/c++/OSUnserialize.h index c01f07d93..0dbd2f45e 100644 --- a/libkern/libkern/c++/OSUnserialize.h +++ b/libkern/libkern/c++/OSUnserialize.h @@ -95,6 +95,9 @@ extern "C++" OSObject * OSUnserializeXML( size_t bufferSize, OSString ** errorString = 0); +extern "C++" OSObject * +OSUnserializeBinary(const char *buffer, size_t bufferSize, OSString **errorString); + #ifdef __APPLE_API_OBSOLETE extern OSObject* OSUnserialize(const char *buffer, OSString **errorString = 0); #endif /* __APPLE_API_OBSOLETE */ diff --git a/libkern/libkern/crypto/aes.h b/libkern/libkern/crypto/aes.h index dc7a16c6e..3015d8aec 100644 --- a/libkern/libkern/crypto/aes.h +++ b/libkern/libkern/crypto/aes.h @@ -73,7 +73,7 @@ aes_rval aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); -#if defined (__i386__) || defined (__x86_64__) +#if defined (__i386__) || defined (__x86_64__) || defined (__arm64__) aes_rval aes_encrypt(const unsigned char *in, unsigned char *out, aes_encrypt_ctx cx[1]); #endif @@ -85,7 +85,7 @@ aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); -#if defined (__i386__) || defined (__x86_64__) +#if defined (__i386__) || defined (__x86_64__) || defined (__arm64__) aes_rval aes_decrypt(const unsigned char *in, unsigned char *out, aes_decrypt_ctx cx[1]); #endif diff --git a/libkern/libkern/kext_request_keys.h b/libkern/libkern/kext_request_keys.h index 66b50e6c2..36dd2ab3f 100644 --- a/libkern/libkern/kext_request_keys.h +++ b/libkern/libkern/kext_request_keys.h @@ -88,16 +88,6 @@ extern "C" { */ #define kKextRequestPredicateGetLoaded "Get Loaded Kext Info" -/* Predicate: Get Kernel Load Address - * Argument: None - * Response: OSNumber containing kernel load address. - * Op result: OSReturn indicating any errors in processing (see OSKextLib.h) - * - * Retrieves the base load address of the running kernel for use in generating - * debug symbols in user space. - */ -#define kKextRequestPredicateGetKernelLoadAddress "Get Kernel Load Address" - /* Predicate: Get All Load Requests * Argument: None * Response: A set of bundle identifiers of all requested kext loads.. diff --git a/libkern/libkern/kxld_types.h b/libkern/libkern/kxld_types.h index 8e5fac9e7..1a9b7f3cc 100644 --- a/libkern/libkern/kxld_types.h +++ b/libkern/libkern/kxld_types.h @@ -62,6 +62,11 @@ #define KXLD_USER_OR_ARM 1 #endif +/* For arm64-specific linking code */ +#if (!KERNEL || __arm64__) + #define KXLD_USER_OR_ARM64 1 +#endif + /* For linking code specific to architectures that support common symbols */ #if (!KERNEL || __i386__) #define KXLD_USER_OR_COMMON 1 diff --git a/libkern/libkern/tree.h b/libkern/libkern/tree.h index 7865f359e..3a26162bd 100644 --- a/libkern/libkern/tree.h +++ b/libkern/libkern/tree.h @@ -334,9 +334,9 @@ struct name { \ #define RB_PLACEHOLDER NULL #define RB_ENTRY(type) \ struct { \ - struct type *rbe_parent; /* parent element */ \ struct type *rbe_left; /* left element */ \ struct type *rbe_right; /* right element */ \ + struct type *rbe_parent; /* parent element */ \ } #define RB_COLOR_MASK (uintptr_t)0x1 diff --git a/libsa/bootstrap.cpp b/libsa/bootstrap.cpp index c36e98fde..02e5694d0 100644 --- a/libsa/bootstrap.cpp +++ b/libsa/bootstrap.cpp @@ -131,9 +131,6 @@ private: void readPrelinkedExtensions( kernel_section_t * prelinkInfoSect); void readBooterExtensions(void); - OSReturn readMkextExtensions( - OSString * deviceTreeName, - OSData * deviceTreeData); OSReturn loadKernelComponentKexts(void); void loadKernelExternalComponents(void); @@ -256,7 +253,7 @@ KLDBootstrap::readPrelinkedExtensions( "Can't find prelinked kexts' text segment."); goto finish; } - + #if KASLR_KEXT_DEBUG unsigned long scratchSize; vm_offset_t scratchAddr; @@ -458,7 +455,6 @@ finish: /********************************************************************* *********************************************************************/ #define BOOTER_KEXT_PREFIX "Driver-" -#define BOOTER_MKEXT_PREFIX "DriversPackage-" typedef struct _DeviceTreeBuffer { uint32_t paddr; @@ -482,7 +478,7 @@ KLDBootstrap::readBooterExtensions(void) OSKextLog(/* kext */ NULL, kOSKextLogProgressLevel | kOSKextLogDirectoryScanFlag | kOSKextLogKextBookkeepingFlag, - "Reading startup extensions/mkexts from booter memory."); + "Reading startup extensions from booter memory."); booterMemoryMap = IORegistryEntry::fromPath( "/chosen/memory-map", gIODTPlane); @@ -520,7 +516,6 @@ KLDBootstrap::readBooterExtensions(void) while ( ( deviceTreeName = OSDynamicCast(OSString, keyIterator->getNextObject() ))) { - boolean_t isMkext = FALSE; const char * devTreeNameCString = deviceTreeName->getCStringNoCopy(); OSData * deviceTreeEntry = OSDynamicCast(OSData, propertyDict->getObject(deviceTreeName)); @@ -534,18 +529,10 @@ KLDBootstrap::readBooterExtensions(void) continue; } - /* Make sure it is either a kext or an mkext */ - if (!strncmp(devTreeNameCString, BOOTER_KEXT_PREFIX, - CONST_STRLEN(BOOTER_KEXT_PREFIX))) { - - isMkext = FALSE; - - } else if (!strncmp(devTreeNameCString, BOOTER_MKEXT_PREFIX, - CONST_STRLEN(BOOTER_MKEXT_PREFIX))) { - - isMkext = TRUE; - - } else { + /* Make sure it is a kext */ + if (strncmp(devTreeNameCString, + BOOTER_KEXT_PREFIX, + CONST_STRLEN(BOOTER_KEXT_PREFIX))) { continue; } @@ -568,7 +555,7 @@ KLDBootstrap::readBooterExtensions(void) OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | kOSKextLogDirectoryScanFlag, - "Can't get virtual address for device tree mkext entry %s.", + "Can't get virtual address for device tree entry %s.", devTreeNameCString); goto finish; } @@ -590,16 +577,12 @@ KLDBootstrap::readBooterExtensions(void) } booterData->setDeallocFunction(osdata_phys_free); - if (isMkext) { - readMkextExtensions(deviceTreeName, booterData); - } else { - /* Create the kext for the entry, then release it, because the - * kext system keeps them around until explicitly removed. - * Any creation/registration failures are already logged for us. - */ - OSKext * newKext = OSKext::withBooterData(deviceTreeName, booterData); - OSSafeRelease(newKext); - } + /* Create the kext for the entry, then release it, because the + * kext system keeps them around until explicitly removed. + * Any creation/registration failures are already logged for us. + */ + OSKext * newKext = OSKext::withBooterData(deviceTreeName, booterData); + OSSafeRelease(newKext); booterMemoryMap->removeProperty(deviceTreeName); @@ -615,49 +598,6 @@ finish: return; } -/********************************************************************* -*********************************************************************/ -OSReturn -KLDBootstrap::readMkextExtensions( - OSString * deviceTreeName, - OSData * booterData) -{ - OSReturn result = kOSReturnError; - - uint32_t checksum; - IORegistryEntry * registryRoot = NULL; // do not release - OSData * checksumObj = NULL; // must release - - OSKextLog(/* kext */ NULL, - kOSKextLogStepLevel | - kOSKextLogDirectoryScanFlag | kOSKextLogArchiveFlag, - "Reading startup mkext archive from device tree entry %s.", - deviceTreeName->getCStringNoCopy()); - - /* If we successfully read the archive, - * then save the mkext's checksum in the IORegistry. - * assumes we'll only ever have one mkext to boot - */ - result = OSKext::readMkextArchive(booterData, &checksum); - if (result == kOSReturnSuccess) { - - OSKextLog(/* kext */ NULL, - kOSKextLogProgressLevel | - kOSKextLogArchiveFlag, - "Startup mkext archive has checksum 0x%x.", (int)checksum); - - registryRoot = IORegistryEntry::getRegistryRoot(); - assert(registryRoot); - checksumObj = OSData::withBytes((void *)&checksum, sizeof(checksum)); - assert(checksumObj); - if (checksumObj) { - registryRoot->setProperty(kOSStartupMkextCRC, checksumObj); - } - } - - return result; -} - /********************************************************************* *********************************************************************/ #define COM_APPLE "com.apple." diff --git a/libsa/conf/MASTER b/libsa/conf/MASTER deleted file mode 100644 index 5f90761b8..000000000 --- a/libsa/conf/MASTER +++ /dev/null @@ -1,77 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -####################################################################### -# -# Master machine independent configuration file. -# -# Specific configuration files are created based on this and -# the machine specific master file using the doconf script. -# -# Any changes to the master configuration files will affect all -# other configuration files based upon it. -# -####################################################################### -# -# To build a configuration, execute "doconf ." -# Configurations are specified in the "Configurations:" section -# of the MASTER and MASTER.* files as follows: -# -# = [ ... ] -# -# Lines in the MASTER and MASTER.* files are selected based on -# the attribute selector list, found in a comment at the end of -# the line. This is a list of attributes separated by commas. -# The "!" operator selects the line if none of the attributes are -# specified. -# -# For example: -# -# selects a line if "foo" or "bar" are specified. -# selects a line if neither "foo" nor "bar" is -# specified. -# -# Lines with no attributes specified are selected for all -# configurations. -# -####################################################################### -# -# Standard Mach Research Configurations: -# -------- ---- -------- --------------- -# -# These are the default configurations that can be used by most sites. -# They are used internally by the Mach project. -# -# LIBSA = [debug] -# -####################################################################### -# -ident LIBSA - -options KDEBUG # kernel tracing # -options IST_KDEBUG # limited tracing # -options NO_KDEBUG # no kernel tracing # - -options GPROF # kernel profiling # - -options CONFIG_NOLIBKLD # kernel linker # - -options MALLOC_RESET_GC # -options CONFIG_DTRACE # -options VM_PRESSURE_EVENTS # - -options CONFIG_NO_PANIC_STRINGS # -options CONFIG_NO_PRINTF_STRINGS # -options CONFIG_NO_KPRINTF_STRINGS # - -options CONFIG_KXLD # kxld/runtime linking of kexts # - -options DEVELOPMENT # dev kernel # - -# CONFIG_KEXT_BASEMENT - alloc post boot loaded kexts after prelinked kexts -# -options CONFIG_KEXT_BASEMENT # # - diff --git a/libsa/conf/MASTER.x86_64 b/libsa/conf/MASTER.x86_64 deleted file mode 100644 index c4edf238f..000000000 --- a/libsa/conf/MASTER.x86_64 +++ /dev/null @@ -1,15 +0,0 @@ -###################################################################### -# -# RELEASE = [ intel mach libkerncpp config_dtrace config_kxld vm_pressure_events config_kext_basement ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug ] -# -# EMBEDDED = [ intel mach libkerncpp ] -# DEVELOPMENT = [ EMBEDDED ] -# -###################################################################### - -machine "x86_64" # -cpu "x86_64" # - -options NO_NESTED_PMAP # diff --git a/libsa/conf/Makefile b/libsa/conf/Makefile index 25a42ef5e..76db9a7d8 100644 --- a/libsa/conf/Makefile +++ b/libsa/conf/Makefile @@ -6,20 +6,24 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) - -$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) - $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ - cd $(addsuffix /conf, $(TARGET)); \ - rm -f $(notdir $?); \ - cp $? .; \ - if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); +# Special handling for x86_64h which shares a MASTER config file with x86_64: +ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64h) +DOCONF_ARCH_CONFIG_LC = x86_64 +else +DOCONF_ARCH_CONFIG_LC = $(CURRENT_ARCH_CONFIG_LC) +endif + +MASTERCONFDIR = $(SRCROOT)/config +DOCONFDEPS = $(addprefix $(MASTERCONFDIR)/, MASTER MASTER.$(DOCONF_ARCH_CONFIG_LC)) \ + $(addprefix $(SOURCE)/, Makefile.template Makefile.$(DOCONF_ARCH_CONFIG_LC) files files.$(DOCONF_ARCH_CONFIG_LC)) + +ifneq (,$(wildcard $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC))) +DOCONFDEPS += $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) +endif + +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile: $(SRCROOT)/SETUP/config/doconf $(OBJROOT)/SETUP/config $(DOCONFDEPS) + $(_v)$(MKDIR) $(TARGET)/$(CURRENT_KERNEL_CONFIG) + $(_v)$(SRCROOT)/SETUP/config/doconf -c -cpu $(DOCONF_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) -s $(SOURCE) -m $(MASTERCONFDIR) $(CURRENT_KERNEL_CONFIG); do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile $(_v)${MAKE} \ diff --git a/libsa/conf/Makefile.template b/libsa/conf/Makefile.template index 88f2eb5a3..940446104 100644 --- a/libsa/conf/Makefile.template +++ b/libsa/conf/Makefile.template @@ -41,26 +41,33 @@ COMP_SUBDIRS = %CFILES +%CXXFILES + %SFILES %MACHDEP -# -# OBJSDEPS is the set of files (defined in the machine dependent -# template if necessary) which all objects depend on (such as an -# in-line assembler expansion filter) -# -${OBJS}: ${OBJSDEPS} - -LDOBJS = $(OBJS) - -$(COMPONENT).filelist: $(LDOBJS) - $(_v)for kld_file in ${LDOBJS}; do \ +# Rebuild if per-file overrides change +${OBJS}: $(firstword $(MAKEFILE_LIST)) + +# Rebuild if global compile flags change +$(COBJS): .CFLAGS +.CFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KCC) $(CFLAGS) $(INCFLAGS) +$(CXXOBJS): .CXXFLAGS +.CXXFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KC++) $(CXXFLAGS) $(INCFLAGS) +$(SOBJS): .SFLAGS +.SFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(S_KCC) $(SFLAGS) $(INCFLAGS) + +$(COMPONENT).filelist: $(OBJS) + $(_v)for kld_file in ${OBJS}; do \ $(SEG_HACK) -n __KLD -o $${kld_file}__ $${kld_file} ; \ mv $${kld_file}__ $${kld_file} ; \ done @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${LDOBJS}; do \ + $(_v)( for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist diff --git a/libsa/printPlist b/libsa/printPlist deleted file mode 100644 index 8f38d2121..000000000 --- a/libsa/printPlist +++ /dev/null @@ -1,80 +0,0 @@ -void printPlist(OSObject * plist, UInt32 indent = 0) { - const OSMetaClass * typeID; - OSCollectionIterator * iterator; - OSString * key; - OSObject * value; - unsigned int i; - - if (!plist) { - IOLog("error! null plist\n"); - return; - } - - typeID = OSTypeIDInst(plist); - - if (typeID == OSTypeID(OSDictionary)) { - - IOLog("{\n"); - OSDictionary * dict = OSDynamicCast(OSDictionary, plist); - iterator = OSCollectionIterator::withCollection(dict); - while ( (key = OSDynamicCast(OSString, iterator->getNextObject())) ) { - for (i = 0; i < indent + 4; i++) { - IOLog(" "); - } - IOLog("%s = ", key->getCStringNoCopy()); - value = dict->getObject(key); - printPlist(value, indent + 4); - } - - for (i = 0; i < indent; i++) { - IOLog(" "); - } - IOLog("}\n"); - - } else if (typeID == OSTypeID(OSArray)) { - - IOLog("(\n"); - - OSArray * array = OSDynamicCast(OSArray, plist); - iterator = OSCollectionIterator::withCollection(array); - while ( (value = iterator->getNextObject()) ) { - for (i = 0; i < indent + 4; i++) { - IOLog(" "); - } - printPlist(value, indent + 4); - } - - for (i = 0; i < indent; i++) { - IOLog(" "); - } - IOLog(")\n"); - - } else if (typeID == OSTypeID(OSString) || typeID == OSTypeID(OSSymbol)) { - - OSString * string = OSDynamicCast(OSString, plist); - IOLog("\"%s\"\n", string->getCStringNoCopy()); - - } else if (typeID == OSTypeID(OSNumber)) { - - OSNumber * number = OSDynamicCast(OSNumber, plist); - UInt32 numberValue = number->unsigned32BitValue(); - IOLog("0x%lx (%ld base 10)\n", numberValue, numberValue); - - } else if (typeID == OSTypeID(OSBoolean)) { - - OSBoolean * boolObj = OSDynamicCast(OSBoolean, plist); - IOLog("%s\n", boolObj->isTrue() ? "true" : "false"); - - } else if (typeID == OSTypeID(OSData)) { - - IOLog("(binary data)\n"); - - } else { - - IOLog("(object of class %s)\n", plist->getMetaClass()->getClassName()); - - } - - IODelay(150000); - return; -} diff --git a/libsyscall/Libsyscall.xcconfig b/libsyscall/Libsyscall.xcconfig index 4fa5f1097..ddebd34a2 100644 --- a/libsyscall/Libsyscall.xcconfig +++ b/libsyscall/Libsyscall.xcconfig @@ -1,38 +1,35 @@ #include "/Makefiles/CoreOS/Xcode/BSD.xcconfig" + +#include "/AppleInternal/XcodeConfig/SimulatorSupport.xcconfig" +// Set INSTALL_PATH[sdk=macosx*] when SimulatorSupport.xcconfig is unavailable +INSTALL_PATH[sdk=macosx*] = $(INSTALL_PATH_ACTUAL) + BUILD_VARIANTS = normal -SUPPORTED_PLATFORMS = macosx iphoneos +SUPPORTED_PLATFORMS = macosx iphoneos iphoneosnano ONLY_ACTIVE_ARCH = NO +DEAD_CODE_STRIPPING = YES DEBUG_INFORMATION_FORMAT = dwarf-with-dsym -INSTALL_PATH = /usr/lib/system -INSTALL_PATH[sdk=iphoneos*] = /usr/lib/system -INSTALL_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/lib/system -INSTALL_PATH[sdk=macosx*] = /usr/lib/system -PUBLIC_HEADERS_FOLDER_PATH = /usr/include/mach -PUBLIC_HEADERS_FOLDER_PATH[sdk=iphoneos*] = /usr/include -PUBLIC_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/include -PUBLIC_HEADERS_FOLDER_PATH[sdk=macosx*] = /usr/include -PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include -PRIVATE_HEADERS_FOLDER_PATH[sdk=iphoneos*] = /usr/local/include -PRIVATE_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/local/include -PRIVATE_HEADERS_FOLDER_PATH[sdk=macosx*] = /usr/local/include -OS_PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include/os -OS_PRIVATE_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/local/include/os +INSTALL_PATH_ACTUAL = /usr/lib/system +PUBLIC_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/include +PRIVATE_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/local/include +OS_PRIVATE_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/local/include/os EXECUTABLE_PREFIX = libsystem_ PRODUCT_NAME = kernel ALWAYS_SEARCH_USER_PATHS = NO ORDER_FILE[sdk=iphoneos*] = $(SDKROOT)/$(APPLE_INTERNAL_DIR)/OrderFiles/libsystem_kernel.order OTHER_CFLAGS = -fdollars-in-identifiers -no-cpp-precomp -fno-common -fno-stack-protector -momit-leaf-frame-pointer -DLIBSYSCALL_INTERFACE -D__DARWIN_VERS_1050=1 OTHER_CFLAGS[sdk=macosx*] = $(inherited) -DSYSCALL_PRE1050 -OTHER_CFLAGS[sdk=macosx*][arch=x86_64] = $(inherited) -DNO_SYSCALL_LEGACY +OTHER_CFLAGS[sdk=macosx*][arch=x86_64*] = $(inherited) -DNO_SYSCALL_LEGACY OTHER_CFLAGS[sdk=iphoneos*] = $(inherited) -DNO_SYSCALL_LEGACY GCC_PREPROCESSOR_DEFINITIONS = CF_OPEN_SOURCE CF_EXCLUDE_CSTD_HEADERS DEBUG _FORTIFY_SOURCE=0 -HEADER_SEARCH_PATHS = $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders $(PROJECT_DIR)/mach $(PROJECT_DIR)/os $(PROJECT_DIR)/wrappers $(PROJECT_DIR)/wrappers/string $(PROJECT_DIR)/wrappers/libproc $(PROJECT_DIR)/wrappers/libproc/spawn +HEADER_SEARCH_PATHS = $(PROJECT_DIR)/mach $(PROJECT_DIR)/os $(PROJECT_DIR)/wrappers $(PROJECT_DIR)/wrappers/string $(PROJECT_DIR)/wrappers/libproc $(PROJECT_DIR)/wrappers/libproc/spawn $(BUILT_PRODUCTS_DIR)/internal_hdr/include $(BUILT_PRODUCTS_DIR)/mig_hdr/local/include $(BUILT_PRODUCTS_DIR)/mig_hdr/include $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders WARNING_CFLAGS = -Wmost GCC_TREAT_WARNINGS_AS_ERRORS = YES GCC_WARN_ABOUT_MISSING_NEWLINE = YES CODE_SIGN_IDENTITY = - DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion) DYLIB_LDFLAGS = -umbrella System -all_load -Wl,-alias_list,$(SRCROOT)/Libsyscall.aliases +DYLIB_LDFLAGS[sdk=iphoneos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000 OTHER_LDFLAGS = INSTALLHDRS_SCRIPT_PHASE = YES INSTALLHDRS_COPY_PHASE = YES diff --git a/libsyscall/Libsyscall.xcodeproj/project.pbxproj b/libsyscall/Libsyscall.xcodeproj/project.pbxproj index f2e78e2e1..1bb73464b 100644 --- a/libsyscall/Libsyscall.xcodeproj/project.pbxproj +++ b/libsyscall/Libsyscall.xcodeproj/project.pbxproj @@ -30,6 +30,17 @@ name = Build; productName = Build; }; + BA4414A1183366E600AAE813 /* MIG headers */ = { + isa = PBXAggregateTarget; + buildConfigurationList = BA4414A2183366E700AAE813 /* Build configuration list for PBXAggregateTarget "MIG headers" */; + buildPhases = ( + BA4414A41833672200AAE813 /* Generate MIG Headers */, + ); + dependencies = ( + ); + name = "MIG headers"; + productName = "MIG headers"; + }; /* End PBXAggregateTarget section */ /* Begin PBXBuildFile section */ @@ -89,13 +100,33 @@ 24E4782712088267009A384D /* _libc_funcptr.c in Sources */ = {isa = PBXBuildFile; fileRef = 24E47824120881DF009A384D /* _libc_funcptr.c */; }; 291D3C281354FDD100D46061 /* mach_port.c in Sources */ = {isa = PBXBuildFile; fileRef = 291D3C261354FDD100D46061 /* mach_port.c */; }; 291D3C291354FDD100D46061 /* mach_vm.c in Sources */ = {isa = PBXBuildFile; fileRef = 291D3C271354FDD100D46061 /* mach_vm.c */; }; + 29A59AE2183B0DE000E8B896 /* renameat.c in Sources */ = {isa = PBXBuildFile; fileRef = 29A59AE1183B0DE000E8B896 /* renameat.c */; }; + 29A59AE6183B110C00E8B896 /* unlinkat.c in Sources */ = {isa = PBXBuildFile; fileRef = 29A59AE5183B110C00E8B896 /* unlinkat.c */; }; + 2BA88DCC1810A3CE00EB63F6 /* coalition.c in Sources */ = {isa = PBXBuildFile; fileRef = 2BA88DCB1810A3CE00EB63F6 /* coalition.c */; }; + 374A36E314748F1300AAF39D /* varargs_wrappers.s in Sources */ = {isa = PBXBuildFile; fileRef = 374A36E214748EE400AAF39D /* varargs_wrappers.s */; }; 467DAFD4157E8AF200CE68F0 /* guarded_open_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */; }; + 4BDD5F1D1891AB2F004BF300 /* mach_approximate_time.c in Sources */ = {isa = PBXBuildFile; fileRef = 4BDD5F1B1891AB2F004BF300 /* mach_approximate_time.c */; }; + 4BDD5F1E1891AB2F004BF300 /* mach_approximate_time.s in Sources */ = {isa = PBXBuildFile; fileRef = 4BDD5F1C1891AB2F004BF300 /* mach_approximate_time.s */; }; 729B7D0A15C8938C000E2501 /* carbon_delete.c in Sources */ = {isa = PBXBuildFile; fileRef = FB50F1B315AB7DE700F814BA /* carbon_delete.c */; }; - 7466C924170CBA53004557CC /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; settings = {ATTRIBUTES = (Private, ); }; }; + 72B1E6ED190723DB00FB3FA2 /* guarded_open_dprotected_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 72B1E6EC190723DB00FB3FA2 /* guarded_open_dprotected_np.c */; }; + 74119F46188F3B6A00C6F48F /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; }; + 7466C924170CBA53004557CC /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; }; + 746C7FEA18E48791008639D7 /* vm_page_size.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; }; + 74F3290B18EB269400B2B70E /* vm_page_size.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; }; + 7AE28FDF18AC41B1006A5626 /* csr.c in Sources */ = {isa = PBXBuildFile; fileRef = 7AE28FDE18AC41B1006A5626 /* csr.c */; }; + 9002401118FC9A7F00D73BFA /* rename_ext.c in Sources */ = {isa = PBXBuildFile; fileRef = 906AA2D018F74CD1001C681A /* rename_ext.c */; }; A59CB95616669EFB00B064B3 /* stack_logging_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = A59CB95516669DB700B064B3 /* stack_logging_internal.h */; }; A59CB9581666A1A200B064B3 /* munmap.c in Sources */ = {isa = PBXBuildFile; fileRef = A59CB9571666A1A200B064B3 /* munmap.c */; }; + BA4414AA18336A5F00AAE813 /* mach in CopyFiles */ = {isa = PBXBuildFile; fileRef = BA4414A51833697C00AAE813 /* mach */; }; + BA4414AB18336A6400AAE813 /* servers in CopyFiles */ = {isa = PBXBuildFile; fileRef = BA4414A6183369A100AAE813 /* servers */; }; + BA4414AD18336A9300AAE813 /* mach in CopyFiles */ = {isa = PBXBuildFile; fileRef = BA4414A7183369C100AAE813 /* mach */; }; + BA4414B518336E3600AAE813 /* mach in Copy Files */ = {isa = PBXBuildFile; fileRef = BA4414A51833697C00AAE813 /* mach */; }; + BA4414B618336E3A00AAE813 /* servers in Copy Files */ = {isa = PBXBuildFile; fileRef = BA4414A6183369A100AAE813 /* servers */; }; + BA4414B818336E6F00AAE813 /* mach in CopyFiles */ = {isa = PBXBuildFile; fileRef = BA4414A7183369C100AAE813 /* mach */; }; C639F0E51741C25800A39F47 /* gethostuuid.h in Headers */ = {isa = PBXBuildFile; fileRef = C639F0E41741C09A00A39F47 /* gethostuuid.h */; settings = {ATTRIBUTES = (Public, ); }; }; + C6460B7C182025DF00F73CCA /* sfi.c in Sources */ = {isa = PBXBuildFile; fileRef = C6460B7B182025DF00F73CCA /* sfi.c */; }; C6AB38DB174202C10036DD9F /* gethostuuid.h in Headers */ = {isa = PBXBuildFile; fileRef = C639F0E41741C09A00A39F47 /* gethostuuid.h */; settings = {ATTRIBUTES = (Public, ); }; }; + C6BEE9181806840200D25AAB /* posix_sem_obsolete.c in Sources */ = {isa = PBXBuildFile; fileRef = C6BEE9171806840200D25AAB /* posix_sem_obsolete.c */; }; C6C40122174155E3000AE69F /* gethostuuid_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C6C40121174154D9000AE69F /* gethostuuid_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; C6C4012317415637000AE69F /* gethostuuid.c in Sources */ = {isa = PBXBuildFile; fileRef = C6C4012017415384000AE69F /* gethostuuid.c */; }; C6C401241741566D000AE69F /* gethostuuid_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C6C40121174154D9000AE69F /* gethostuuid_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; @@ -118,11 +149,12 @@ C6D3EFC816542C510052CF30 /* exc_catcher.h in Headers */ = {isa = PBXBuildFile; fileRef = 247A091611F8E7A800E4693F /* exc_catcher.h */; }; C6D3EFC916542C510052CF30 /* _libkernel_init.h in Headers */ = {isa = PBXBuildFile; fileRef = 247A08B211F8B05900E4693F /* _libkernel_init.h */; settings = {ATTRIBUTES = (Private, ); }; }; C6D3F03016542C980052CF30 /* dummy.c in Sources */ = {isa = PBXBuildFile; fileRef = C6D3F02F16542C980052CF30 /* dummy.c */; }; + C962B16C18DBA2C80031244A /* setpriority.c in Sources */ = {isa = PBXBuildFile; fileRef = C962B16B18DBA2C80031244A /* setpriority.c */; }; + C962B16E18DBB43F0031244A /* thread_act.c in Sources */ = {isa = PBXBuildFile; fileRef = C962B16D18DBB43F0031244A /* thread_act.c */; }; C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */ = {isa = PBXBuildFile; fileRef = C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */; }; C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */ = {isa = PBXBuildFile; fileRef = C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */; }; C9A3D6EB1672AD1000A5CAA3 /* tsd.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = C9EE57F51669673D00337E4B /* tsd.h */; }; C9B6A5ED153795DE00749EBA /* alloc_once.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C1824F15338C0B00933F23 /* alloc_once.c */; }; - C9D9BD17114B00600000D8B9 /* vm_map_compat.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC2114B00600000D8B9 /* vm_map_compat.c */; }; C9D9BD19114B00600000D8B9 /* clock_priv.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC5114B00600000D8B9 /* clock_priv.defs */; }; C9D9BD1A114B00600000D8B9 /* clock_reply.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC6114B00600000D8B9 /* clock_reply.defs */; }; C9D9BD1B114B00600000D8B9 /* clock_sleep.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC7114B00600000D8B9 /* clock_sleep.c */; }; @@ -175,6 +207,7 @@ C9D9BD58114B00600000D8B9 /* thread_act.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD10114B00600000D8B9 /* thread_act.defs */; }; C9D9BD59114B00600000D8B9 /* vm_map.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD11114B00600000D8B9 /* vm_map.defs */; }; C9FD8508166D6BD400963B73 /* tsd.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = C9EE57F51669673D00337E4B /* tsd.h */; }; + E4216C311822D404006F2632 /* mach_voucher.defs in Sources */ = {isa = PBXBuildFile; fileRef = E4216C301822D404006F2632 /* mach_voucher.defs */; }; E453AF351700FD3C00F2C94C /* getiopolicy_np.c in Sources */ = {isa = PBXBuildFile; fileRef = E453AF341700FD3C00F2C94C /* getiopolicy_np.c */; }; E453AF3617013CBF00F2C94C /* libproc.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C2B16F868ED0002AF25 /* libproc.h */; settings = {ATTRIBUTES = (Public, ); }; }; E453AF3717013CC200F2C94C /* libproc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C2C16F868ED0002AF25 /* libproc_internal.h */; settings = {ATTRIBUTES = (Private, ); }; }; @@ -223,9 +256,68 @@ remoteGlobalIDString = 249C60FE1194747600ED73F3; remoteInfo = Libmach; }; + BA4414AE18336AF300AAE813 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = BA4414A1183366E600AAE813; + remoteInfo = "MIG headers"; + }; + BA4414B218336D8D00AAE813 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = BA4414A1183366E600AAE813; + remoteInfo = "MIG headers"; + }; /* End PBXContainerItemProxy section */ /* Begin PBXCopyFilesBuildPhase section */ + BA4414A818336A1300AAE813 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = "$(PUBLIC_HEADERS_FOLDER_PATH)"; + dstSubfolderSpec = 0; + files = ( + BA4414AA18336A5F00AAE813 /* mach in CopyFiles */, + BA4414AB18336A6400AAE813 /* servers in CopyFiles */, + ); + runOnlyForDeploymentPostprocessing = 1; + }; + BA4414AC18336A7700AAE813 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = "$(PRIVATE_HEADERS_FOLDER_PATH)"; + dstSubfolderSpec = 0; + files = ( + BA4414AD18336A9300AAE813 /* mach in CopyFiles */, + 746C7FEA18E48791008639D7 /* vm_page_size.h in CopyFiles */, + ); + runOnlyForDeploymentPostprocessing = 1; + }; + BA4414B418336E1A00AAE813 /* Copy Files */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = "$(PUBLIC_HEADERS_FOLDER_PATH)"; + dstSubfolderSpec = 0; + files = ( + BA4414B518336E3600AAE813 /* mach in Copy Files */, + BA4414B618336E3A00AAE813 /* servers in Copy Files */, + ); + name = "Copy Files"; + runOnlyForDeploymentPostprocessing = 1; + }; + BA4414B718336E5600AAE813 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = "$(PRIVATE_HEADERS_FOLDER_PATH)"; + dstSubfolderSpec = 0; + files = ( + BA4414B818336E6F00AAE813 /* mach in CopyFiles */, + 74F3290B18EB269400B2B70E /* vm_page_size.h in CopyFiles */, + ); + runOnlyForDeploymentPostprocessing = 1; + }; C63F480B1654203800A1F78F /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 8; @@ -252,7 +344,6 @@ 030B179A135377B400DAD1F0 /* open_dprotected_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = open_dprotected_np.c; sourceTree = ""; }; 240D716711933ED300556E97 /* mach_install_mig.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = mach_install_mig.sh; sourceTree = ""; }; 2419382A12135FF6003CDE41 /* chmod.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = chmod.c; sourceTree = ""; }; - 2427FA821200BCF800EF7A1F /* compat-symlinks.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "compat-symlinks.sh"; sourceTree = ""; }; 242AB66511EBDC1200107336 /* errno.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = errno.c; sourceTree = ""; }; 24484A7311F51E9800E10CD2 /* string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = string.h; sourceTree = ""; }; 24484A7411F51E9800E10CD2 /* string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = string.c; sourceTree = ""; }; @@ -331,20 +422,37 @@ 24E47824120881DF009A384D /* _libc_funcptr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = _libc_funcptr.c; sourceTree = ""; }; 291D3C261354FDD100D46061 /* mach_port.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_port.c; sourceTree = ""; }; 291D3C271354FDD100D46061 /* mach_vm.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_vm.c; sourceTree = ""; }; + 29A59AE1183B0DE000E8B896 /* renameat.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = renameat.c; sourceTree = ""; }; + 29A59AE5183B110C00E8B896 /* unlinkat.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = unlinkat.c; sourceTree = ""; }; + 2BA88DCB1810A3CE00EB63F6 /* coalition.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = coalition.c; sourceTree = ""; }; + 374A36E214748EE400AAF39D /* varargs_wrappers.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = varargs_wrappers.s; sourceTree = ""; }; + 37DDFB7614748713009D3355 /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = ""; }; 467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = guarded_open_np.c; sourceTree = ""; }; + 4BDD5F1B1891AB2F004BF300 /* mach_approximate_time.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_approximate_time.c; sourceTree = ""; }; + 4BDD5F1C1891AB2F004BF300 /* mach_approximate_time.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mach_approximate_time.s; sourceTree = ""; }; + 72B1E6EC190723DB00FB3FA2 /* guarded_open_dprotected_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = guarded_open_dprotected_np.c; sourceTree = ""; }; 7466C923170CB99B004557CC /* vm_page_size.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vm_page_size.h; sourceTree = ""; }; + 7AE28FDE18AC41B1006A5626 /* csr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = csr.c; sourceTree = ""; }; + 906AA2D018F74CD1001C681A /* rename_ext.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = rename_ext.c; sourceTree = ""; }; A59CB95516669DB700B064B3 /* stack_logging_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stack_logging_internal.h; sourceTree = ""; }; A59CB9571666A1A200B064B3 /* munmap.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = munmap.c; sourceTree = ""; }; + BA4414A51833697C00AAE813 /* mach */ = {isa = PBXFileReference; lastKnownFileType = text; name = mach; path = mig_hdr/include/mach; sourceTree = BUILT_PRODUCTS_DIR; }; + BA4414A6183369A100AAE813 /* servers */ = {isa = PBXFileReference; lastKnownFileType = text; name = servers; path = mig_hdr/include/servers; sourceTree = BUILT_PRODUCTS_DIR; }; + BA4414A7183369C100AAE813 /* mach */ = {isa = PBXFileReference; lastKnownFileType = text; name = mach; path = mig_hdr/local/include/mach; sourceTree = BUILT_PRODUCTS_DIR; }; + BA5CDB4018AEBAD500E37982 /* __thread_selfusage.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __thread_selfusage.s; sourceTree = ""; }; C639F0E41741C09A00A39F47 /* gethostuuid.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = gethostuuid.h; sourceTree = ""; }; + C6460B7B182025DF00F73CCA /* sfi.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sfi.c; sourceTree = ""; }; + C6BEE9171806840200D25AAB /* posix_sem_obsolete.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = posix_sem_obsolete.c; sourceTree = ""; }; C6C4012017415384000AE69F /* gethostuuid.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = gethostuuid.c; sourceTree = ""; }; C6C40121174154D9000AE69F /* gethostuuid_private.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = gethostuuid_private.h; sourceTree = ""; }; C6D3F02E16542C510052CF30 /* libsystem_Libsyscall_headers_Sim.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsystem_Libsyscall_headers_Sim.a; sourceTree = BUILT_PRODUCTS_DIR; }; C6D3F02F16542C980052CF30 /* dummy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dummy.c; sourceTree = ""; }; + C962B16B18DBA2C80031244A /* setpriority.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = setpriority.c; sourceTree = ""; }; + C962B16D18DBB43F0031244A /* thread_act.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = thread_act.c; sourceTree = ""; }; C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm; path = __get_cpu_capabilities.s; sourceTree = ""; }; C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = init_cpu_capabilities.c; sourceTree = ""; }; C9C1824F15338C0B00933F23 /* alloc_once.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = alloc_once.c; sourceTree = ""; }; C9D9BCBF114B00600000D8B9 /* .open_source_exclude */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .open_source_exclude; sourceTree = ""; }; - C9D9BCC2114B00600000D8B9 /* vm_map_compat.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = vm_map_compat.c; sourceTree = ""; }; C9D9BCC5114B00600000D8B9 /* clock_priv.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = clock_priv.defs; sourceTree = ""; }; C9D9BCC6114B00600000D8B9 /* clock_reply.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = clock_reply.defs; sourceTree = ""; }; C9D9BCC7114B00600000D8B9 /* clock_sleep.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = clock_sleep.c; sourceTree = ""; }; @@ -407,6 +515,7 @@ C9EE57F51669673D00337E4B /* tsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tsd.h; sourceTree = ""; }; D2AAC0630554660B00DB518D /* libsystem_kernel.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsystem_kernel.a; sourceTree = BUILT_PRODUCTS_DIR; }; E40C845216FAFB3F00C238DD /* Libsyscall.aliases */ = {isa = PBXFileReference; lastKnownFileType = text; path = Libsyscall.aliases; sourceTree = ""; }; + E4216C301822D404006F2632 /* mach_voucher.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = mach_voucher.defs; sourceTree = ""; }; E453AF341700FD3C00F2C94C /* getiopolicy_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = getiopolicy_np.c; sourceTree = ""; }; E4D45C2116F856900002AF25 /* __commpage_gettimeofday.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = __commpage_gettimeofday.c; sourceTree = ""; }; E4D45C2216F856900002AF25 /* __commpage_gettimeofday.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __commpage_gettimeofday.s; sourceTree = ""; }; @@ -462,6 +571,7 @@ 247A08B011F8AF1700E4693F /* wrappers */, 240D716611933ED300556E97 /* xcodescripts */, 1AB674ADFE9D54B511CA2CBB /* Products */, + BA4414B118336D6A00AAE813 /* Generated MIG headers */, ); name = mach; sourceTree = ""; @@ -506,6 +616,7 @@ C9D9BCF2114B00600000D8B9 /* mach_port.defs */, C9D9BCF3114B00600000D8B9 /* mach_traps.s */, 291D3C271354FDD100D46061 /* mach_vm.c */, + E4216C301822D404006F2632 /* mach_voucher.defs */, C9D9BCF4114B00600000D8B9 /* mach_vm.defs */, C9D9BCF6114B00600000D8B9 /* mig_allocate.c */, C9D9BCF7114B00600000D8B9 /* mig_deallocate.c */, @@ -525,6 +636,7 @@ 24484A7311F51E9800E10CD2 /* string.h */, 24484A7411F51E9800E10CD2 /* string.c */, C9D9BD0F114B00600000D8B9 /* task.defs */, + C962B16D18DBB43F0031244A /* thread_act.c */, C9D9BD10114B00600000D8B9 /* thread_act.defs */, C9D9BD11114B00600000D8B9 /* vm_map.defs */, 249C612C1194827D00ED73F3 /* dylib_link.c */, @@ -549,7 +661,6 @@ 24D1159911E6723E0063D54D /* create-syscalls.pl */, 24614EA111E7A2ED00E78584 /* compile-syscalls.pl */, 240D716711933ED300556E97 /* mach_install_mig.sh */, - 2427FA821200BCF800EF7A1F /* compat-symlinks.sh */, ); path = xcodescripts; sourceTree = ""; @@ -571,35 +682,47 @@ isa = PBXGroup; children = ( 248BA04A121C8EE4008C073F /* cancelable */, - 2419382912135FE1003CDE41 /* unix03 */, 24A7C6951200AF8A007669EB /* legacy */, E4D45C2916F868ED0002AF25 /* libproc */, E4D45C3B16FB20970002AF25 /* spawn */, E4D7E55216F8776300F92D8D /* string */, - 247A08B211F8B05900E4693F /* _libkernel_init.h */, - 247A08B311F8B05900E4693F /* _libkernel_init.c */, + 2419382912135FE1003CDE41 /* unix03 */, E4D45C2116F856900002AF25 /* __commpage_gettimeofday.c */, E4D45C2216F856900002AF25 /* __commpage_gettimeofday.s */, C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */, 24A7C5CB11FF973C007669EB /* _errno.h */, 24E47824120881DF009A384D /* _libc_funcptr.c */, + 247A08B311F8B05900E4693F /* _libkernel_init.c */, + 247A08B211F8B05900E4693F /* _libkernel_init.h */, FB50F1B315AB7DE700F814BA /* carbon_delete.c */, + 2BA88DCB1810A3CE00EB63F6 /* coalition.c */, + 7AE28FDE18AC41B1006A5626 /* csr.c */, + C6C40121174154D9000AE69F /* gethostuuid_private.h */, + C6C4012017415384000AE69F /* gethostuuid.c */, + C639F0E41741C09A00A39F47 /* gethostuuid.h */, E453AF341700FD3C00F2C94C /* getiopolicy_np.c */, 467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */, + 72B1E6EC190723DB00FB3FA2 /* guarded_open_dprotected_np.c */, C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */, 248BA07F121DA36B008C073F /* ioctl.c */, 248BA081121DA4F3008C073F /* kill.c */, E4D45C2316F856900002AF25 /* mach_absolute_time.s */, - 24B8C2611237F53900D36CC3 /* remove-counter.c */, + 4BDD5F1B1891AB2F004BF300 /* mach_approximate_time.c */, + 4BDD5F1C1891AB2F004BF300 /* mach_approximate_time.s */, 030B179A135377B400DAD1F0 /* open_dprotected_np.c */, + C6BEE9171806840200D25AAB /* posix_sem_obsolete.c */, + 24B8C2611237F53900D36CC3 /* remove-counter.c */, 248AA966122C7CDA0085F5B1 /* rename.c */, + 29A59AE1183B0DE000E8B896 /* renameat.c */, + 906AA2D018F74CD1001C681A /* rename_ext.c */, 248AA964122C7C330085F5B1 /* rmdir.c */, 248BA090121DDD7F008C073F /* select-base.c */, + C962B16B18DBA2C80031244A /* setpriority.c */, + C6460B7B182025DF00F73CCA /* sfi.c */, 24B223B3121DFF12007DAEDE /* sigsuspend-base.c */, 248AA962122C7B2A0085F5B1 /* unlink.c */, - C6C40121174154D9000AE69F /* gethostuuid_private.h */, - C639F0E41741C09A00A39F47 /* gethostuuid.h */, - C6C4012017415384000AE69F /* gethostuuid.c */, + 29A59AE5183B110C00E8B896 /* unlinkat.c */, + 374A36E214748EE400AAF39D /* varargs_wrappers.s */, ); path = wrappers; sourceTree = ""; @@ -662,6 +785,7 @@ 24D1156F11E671B20063D54D /* __sigreturn.s */, 24D1157011E671B20063D54D /* __syscall.s */, 24D1157111E671B20063D54D /* __thread_selfid.s */, + BA5CDB4018AEBAD500E37982 /* __thread_selfusage.s */, 24D1157211E671B20063D54D /* __vfork.s */, 24D1157311E671B20063D54D /* custom.s */, 24D1157411E671B20063D54D /* SYS.h */, @@ -683,6 +807,7 @@ 24D1158A11E672270063D54D /* iPhoneOS */ = { isa = PBXGroup; children = ( + 37DDFB7514748713009D3355 /* arm64 */, 24D1158B11E672270063D54D /* arm */, ); path = iPhoneOS; @@ -730,6 +855,24 @@ path = x86_64; sourceTree = ""; }; + 37DDFB7514748713009D3355 /* arm64 */ = { + isa = PBXGroup; + children = ( + 37DDFB7614748713009D3355 /* syscall.map */, + ); + path = arm64; + sourceTree = ""; + }; + BA4414B118336D6A00AAE813 /* Generated MIG headers */ = { + isa = PBXGroup; + children = ( + BA4414A7183369C100AAE813 /* mach */, + BA4414A6183369A100AAE813 /* servers */, + BA4414A51833697C00AAE813 /* mach */, + ); + name = "Generated MIG headers"; + sourceTree = ""; + }; C9C1824E15338BEB00933F23 /* os */ = { isa = PBXGroup; children = ( @@ -743,7 +886,6 @@ isa = PBXGroup; children = ( C9D9BCBF114B00600000D8B9 /* .open_source_exclude */, - C9D9BCC2114B00600000D8B9 /* vm_map_compat.c */, ); path = arm; sourceTree = ""; @@ -845,6 +987,7 @@ E453AF3917013F1B00F2C94C /* spawn_private.h in Headers */, E453AF3617013CBF00F2C94C /* libproc.h in Headers */, E453AF3717013CC200F2C94C /* libproc_internal.h in Headers */, + 74119F46188F3B6A00C6F48F /* vm_page_size.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -862,7 +1005,6 @@ C6C40122174155E3000AE69F /* gethostuuid_private.h in Headers */, C9D9BD29114B00600000D8B9 /* mach_interface.h in Headers */, C9D9BD2B114B00600000D8B9 /* port_obj.h in Headers */, - 7466C924170CBA53004557CC /* vm_page_size.h in Headers */, C9D9BD2C114B00600000D8B9 /* sync.h in Headers */, C9D9BD2F114B00600000D8B9 /* vm_task.h in Headers */, C9D9BD50114B00600000D8B9 /* key_defs.h in Headers */, @@ -878,6 +1020,7 @@ E4D45C4016FB20DC0002AF25 /* spawn_private.h in Headers */, E4D45C2F16F868ED0002AF25 /* libproc.h in Headers */, E4D45C3016F868ED0002AF25 /* libproc_internal.h in Headers */, + 7466C924170CBA53004557CC /* vm_page_size.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -890,7 +1033,6 @@ buildPhases = ( 249C61281194815000ED73F3 /* Sources */, 249C60FD1194747600ED73F3 /* Frameworks */, - 2427FA811200BCDA00EF7A1F /* Compat Symlinks */, ); buildRules = ( ); @@ -908,12 +1050,14 @@ buildPhases = ( C6D3EFB516542C510052CF30 /* Headers */, C6D3EFCA16542C510052CF30 /* CopyFiles */, - C6D3EFCC16542C510052CF30 /* Install MIG Headers */, + BA4414B418336E1A00AAE813 /* Copy Files */, + BA4414B718336E5600AAE813 /* CopyFiles */, C6D3EFCD16542C510052CF30 /* Sources */, ); buildRules = ( ); dependencies = ( + BA4414B318336D8D00AAE813 /* PBXTargetDependency */, ); name = Libsyscall_headers_Sim; productName = mach; @@ -926,7 +1070,8 @@ buildPhases = ( D2AAC0600554660B00DB518D /* Headers */, C63F480B1654203800A1F78F /* CopyFiles */, - 2487545E11629934000975E0 /* Install MIG Headers */, + BA4414A818336A1300AAE813 /* CopyFiles */, + BA4414AC18336A7700AAE813 /* CopyFiles */, D2AAC0610554660B00DB518D /* Sources */, D289988505E68E00004EDB86 /* Frameworks */, ); @@ -934,6 +1079,7 @@ ); dependencies = ( 242AB67911ED03ED00107336 /* PBXTargetDependency */, + BA4414AF18336AF300AAE813 /* PBXTargetDependency */, ); name = Libsyscall_static; productName = mach; @@ -964,6 +1110,7 @@ targets = ( 249C61101194755D00ED73F3 /* Build */, 24614EF311E7C98600E78584 /* Syscalls */, + BA4414A1183366E600AAE813 /* MIG headers */, D2AAC0620554660B00DB518D /* Libsyscall_static */, 249C60FE1194747600ED73F3 /* Libsyscall_dynamic */, C6D3EFB216542C510052CF30 /* Libsyscall_headers_Sim */, @@ -972,20 +1119,6 @@ /* End PBXProject section */ /* Begin PBXShellScriptBuildPhase section */ - 2427FA811200BCDA00EF7A1F /* Compat Symlinks */ = { - isa = PBXShellScriptBuildPhase; - buildActionMask = 8; - files = ( - ); - inputPaths = ( - ); - name = "Compat Symlinks"; - outputPaths = ( - ); - runOnlyForDeploymentPostprocessing = 1; - shellPath = /bin/sh; - shellScript = "\"$PROJECT_DIR\"/xcodescripts/compat-symlinks.sh"; - }; 24614EF211E7C98600E78584 /* Generate Syscalls */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 2147483647; @@ -1015,28 +1148,14 @@ shellPath = /bin/sh; shellScript = "set -x\n[[ $ACTION == \"installhdrs\" ]] && exit 0\n\nmkdir -p $OBJROOT/UninstalledProducts\n\n$SRCROOT/xcodescripts/compile-syscalls.pl \\\n\t$OBJROOT/sys/stubs.list \\\n\t$BUILD_ROOT/syscalls.a"; }; - 2487545E11629934000975E0 /* Install MIG Headers */ = { - isa = PBXShellScriptBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - inputPaths = ( - ); - name = "Install MIG Headers"; - outputPaths = ( - ); - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "\"$PROJECT_DIR\"/xcodescripts/mach_install_mig.sh"; - }; - C6D3EFCC16542C510052CF30 /* Install MIG Headers */ = { + BA4414A41833672200AAE813 /* Generate MIG Headers */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 2147483647; files = ( ); inputPaths = ( ); - name = "Install MIG Headers"; + name = "Generate MIG Headers"; outputPaths = ( ); runOnlyForDeploymentPostprocessing = 0; @@ -1091,13 +1210,16 @@ C9D9BD53114B00600000D8B9 /* netname.defs in Sources */, C9D9BD57114B00600000D8B9 /* task.defs in Sources */, C9D9BD58114B00600000D8B9 /* thread_act.defs in Sources */, + 29A59AE6183B110C00E8B896 /* unlinkat.c in Sources */, C9D9BD59114B00600000D8B9 /* vm_map.defs in Sources */, C9D9BD1B114B00600000D8B9 /* clock_sleep.c in Sources */, + 29A59AE2183B0DE000E8B896 /* renameat.c in Sources */, C9D9BD1D114B00600000D8B9 /* error_codes.c in Sources */, C9D9BD1F114B00600000D8B9 /* exc_catcher_state_identity.c in Sources */, C9D9BD20114B00600000D8B9 /* exc_catcher_state.c in Sources */, C9D9BD21114B00600000D8B9 /* exc_catcher.c in Sources */, C9D9BD24114B00600000D8B9 /* fprintf_stderr.c in Sources */, + 72B1E6ED190723DB00FB3FA2 /* guarded_open_dprotected_np.c in Sources */, C9D9BD36114B00600000D8B9 /* mach_error_string.c in Sources */, C9D9BD37114B00600000D8B9 /* mach_error.c in Sources */, C9D9BD3B114B00600000D8B9 /* mach_init.c in Sources */, @@ -1119,12 +1241,12 @@ 24484A7511F6178E00E10CD2 /* string.c in Sources */, E453AF351700FD3C00F2C94C /* getiopolicy_np.c in Sources */, 2485235511582D8F0051B413 /* mach_legacy.c in Sources */, - C9D9BD17114B00600000D8B9 /* vm_map_compat.c in Sources */, 242AB66611EBDC1200107336 /* errno.c in Sources */, E4D45C2E16F868ED0002AF25 /* libproc.c in Sources */, 247A08C211F8BDC900E4693F /* _libkernel_init.c in Sources */, 24A7C5BC11FF8DA6007669EB /* accept.c in Sources */, 24A7C5BD11FF8DA6007669EB /* bind.c in Sources */, + 4BDD5F1D1891AB2F004BF300 /* mach_approximate_time.c in Sources */, C6C4012317415637000AE69F /* gethostuuid.c in Sources */, 24A7C5BF11FF8DA6007669EB /* getattrlist.c in Sources */, 24A7C5C011FF8DA6007669EB /* getpeername.c in Sources */, @@ -1132,33 +1254,42 @@ 24A7C5C211FF8DA6007669EB /* lchown.c in Sources */, 24A7C5C311FF8DA6007669EB /* listen.c in Sources */, 24A7C5C411FF8DA6007669EB /* recvfrom.c in Sources */, + C962B16E18DBB43F0031244A /* thread_act.c in Sources */, 24A7C5C511FF8DA6007669EB /* recvmsg.c in Sources */, 24A7C5C611FF8DA6007669EB /* sendmsg.c in Sources */, 24A7C5C711FF8DA6007669EB /* sendto.c in Sources */, 24A7C5C811FF8DA6007669EB /* setattrlist.c in Sources */, 24A7C5C911FF8DA6007669EB /* socketpair.c in Sources */, + 9002401118FC9A7F00D73BFA /* rename_ext.c in Sources */, 2419382B12135FF6003CDE41 /* chmod.c in Sources */, 248BA01D121C56BF008C073F /* connect.c in Sources */, 248BA01F121C607E008C073F /* fchmod.c in Sources */, E4D45C3616F86BD80002AF25 /* posix_spawn.c in Sources */, + C962B16C18DBA2C80031244A /* setpriority.c in Sources */, 248BA04F121C8F06008C073F /* fcntl.c in Sources */, 248BA05C121C9649008C073F /* fcntl-cancel.c in Sources */, 248BA069121D9E27008C073F /* getrlimit.c in Sources */, + C6460B7C182025DF00F73CCA /* sfi.c in Sources */, 248BA080121DA36B008C073F /* ioctl.c in Sources */, + C6BEE9181806840200D25AAB /* posix_sem_obsolete.c in Sources */, 248BA082121DA4F3008C073F /* kill.c in Sources */, 248BA085121DA5E4008C073F /* kill.c in Sources */, + 2BA88DCC1810A3CE00EB63F6 /* coalition.c in Sources */, 248BA087121DA72D008C073F /* mmap.c in Sources */, + 7AE28FDF18AC41B1006A5626 /* csr.c in Sources */, 248BA089121DA8E0008C073F /* mprotect.c in Sources */, 248BA08B121DAC86008C073F /* msync.c in Sources */, 248BA08D121DB0E7008C073F /* munmap.c in Sources */, 248BA08F121DC545008C073F /* open.c in Sources */, 248BA093121DE369008C073F /* select.c in Sources */, 248BA095121DE565008C073F /* select-pre1050.c in Sources */, + 4BDD5F1E1891AB2F004BF300 /* mach_approximate_time.s in Sources */, 248BA0B3121DE760008C073F /* select-cancel.c in Sources */, 248BA0BE121DE902008C073F /* select.c in Sources */, 248BA0CD121DEBEF008C073F /* setrlimit.c in Sources */, 24B223B0121DFD36007DAEDE /* sigsuspend.c in Sources */, 24B223B2121DFE6D007DAEDE /* sigsuspend-cancel.c in Sources */, + E4216C311822D404006F2632 /* mach_voucher.defs in Sources */, 24B223B5121DFF29007DAEDE /* sigsuspend.c in Sources */, 248AA963122C7B2A0085F5B1 /* unlink.c in Sources */, 248AA965122C7C330085F5B1 /* rmdir.c in Sources */, @@ -1168,6 +1299,7 @@ C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */, 030B179B135377B400DAD1F0 /* open_dprotected_np.c in Sources */, E4D45C3116F868ED0002AF25 /* proc_listpidspath.c in Sources */, + 374A36E314748F1300AAF39D /* varargs_wrappers.s in Sources */, 291D3C281354FDD100D46061 /* mach_port.c in Sources */, 291D3C291354FDD100D46061 /* mach_vm.c in Sources */, EE3F605A149A6D66003BAEBA /* getaudit.c in Sources */, @@ -1196,6 +1328,16 @@ target = 249C60FE1194747600ED73F3 /* Libsyscall_dynamic */; targetProxy = 249C61141194756A00ED73F3 /* PBXContainerItemProxy */; }; + BA4414AF18336AF300AAE813 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = BA4414A1183366E600AAE813 /* MIG headers */; + targetProxy = BA4414AE18336AF300AAE813 /* PBXContainerItemProxy */; + }; + BA4414B318336D8D00AAE813 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = BA4414A1183366E600AAE813 /* MIG headers */; + targetProxy = BA4414B218336D8D00AAE813 /* PBXContainerItemProxy */; + }; /* End PBXTargetDependency section */ /* Begin XCBuildConfiguration section */ @@ -1204,10 +1346,7 @@ baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; buildSettings = { COPY_PHASE_STRIP = NO; - INSTALL_PATH = /usr/local/lib/dyld; - "INSTALL_PATH[sdk=iphoneos*]" = /usr/local/lib/dyld; - "INSTALL_PATH[sdk=iphonesimulator*]" = "$(SDKROOT)/usr/local/lib/dyld"; - "INSTALL_PATH[sdk=macosx*]" = /usr/local/lib/dyld; + INSTALL_PATH_ACTUAL = /usr/local/lib/dyld; STRIP_INSTALLED_PRODUCT = NO; }; name = Release; @@ -1222,6 +1361,7 @@ GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_PARAMETER = YES; GCC_WARN_UNUSED_VARIABLE = YES; + OTHER_MIGFLAGS = "-novouchers"; }; name = Release; }; @@ -1234,6 +1374,7 @@ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; MAP_PLATFORM = "$(MAP_PLATFORM_$(PLATFORM_NAME))"; MAP_PLATFORM_iphoneos = iPhoneOS; + MAP_PLATFORM_iphoneosnano = iPhoneOS; MAP_PLATFORM_macosx = MacOSX; PRODUCT_NAME = Syscalls; STRIP_STYLE = debugging; @@ -1245,7 +1386,6 @@ baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; buildSettings = { OTHER_LDFLAGS = "$(DYLIB_LDFLAGS)"; - STRIP_INSTALLED_PRODUCT = NO; VERSION_INFO_PREFIX = "___"; }; name = Release; @@ -1259,6 +1399,14 @@ }; name = Release; }; + BA4414A3183366E700AAE813 /* Release */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; + buildSettings = { + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; C6D3F02D16542C510052CF30 /* Release */ = { isa = XCBuildConfiguration; baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; @@ -1312,6 +1460,14 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + BA4414A2183366E700AAE813 /* Build configuration list for PBXAggregateTarget "MIG headers" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + BA4414A3183366E700AAE813 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; C6D3F02C16542C510052CF30 /* Build configuration list for PBXNativeTarget "Libsyscall_headers_Sim" */ = { isa = XCConfigurationList; buildConfigurations = ( diff --git a/libsyscall/Platforms/MacOSX/i386/syscall.map b/libsyscall/Platforms/MacOSX/i386/syscall.map index be628a466..60976ed6d 100644 --- a/libsyscall/Platforms/MacOSX/i386/syscall.map +++ b/libsyscall/Platforms/MacOSX/i386/syscall.map @@ -14,6 +14,7 @@ _fcntl ___fcntl_nocancel _fcntl$NOCANCEL$UNIX2003 ___fcntl_nocancel _fcntl$UNIX2003 ___fcntl _fstat$INODE64 ___fstat64 +_fstatat$INODE64 ___fstatat64 _fstatfs$INODE64 ___fstatfs64 _fsync ___fsync_nocancel _fsync$NOCANCEL$UNIX2003 ___fsync_nocancel @@ -39,6 +40,8 @@ _msync$NOCANCEL$UNIX2003 ___msync_nocancel _msync$UNIX2003 ___msync _open$NOCANCEL$UNIX2003 ___open_nocancel _open$UNIX2003 ___open +_openat$NOCANCEL ___openat_nocancel +_openat ___openat _poll ___poll_nocancel _poll$NOCANCEL$UNIX2003 ___poll_nocancel _poll$UNIX2003 ___poll diff --git a/libsyscall/Platforms/MacOSX/x86_64/syscall.map b/libsyscall/Platforms/MacOSX/x86_64/syscall.map index 2769c32a6..f606b2619 100644 --- a/libsyscall/Platforms/MacOSX/x86_64/syscall.map +++ b/libsyscall/Platforms/MacOSX/x86_64/syscall.map @@ -3,6 +3,7 @@ _aio_suspend$NOCANCEL ___aio_suspend_nocancel _close$NOCANCEL ___close_nocancel _connect$NOCANCEL ___connect_nocancel _fstat$INODE64 ___fstat64 +_fstatat$INODE64 ___fstatat64 _fstatfs$INODE64 ___fstatfs64 _fsync$NOCANCEL ___fsync_nocancel _getfsstat$INODE64 ___getfsstat64 @@ -12,6 +13,7 @@ _msgsnd$NOCANCEL ___msgsnd_nocancel _msgsys ___msgsys _msync$NOCANCEL ___msync_nocancel _open$NOCANCEL ___open_nocancel +_openat$NOCANCEL ___openat_nocancel _poll$NOCANCEL ___poll_nocancel _pread$NOCANCEL ___pread_nocancel _pwrite$NOCANCEL ___pwrite_nocancel @@ -43,6 +45,7 @@ _mprotect ___mprotect _msgctl ___msgctl _msync ___msync _open ___open +_openat ___openat _recvfrom ___recvfrom _recvmsg ___recvmsg _semctl ___semctl diff --git a/libsyscall/custom/__thread_selfid.s b/libsyscall/custom/__thread_selfid.s index 2c4dd934c..d84a5305f 100644 --- a/libsyscall/custom/__thread_selfid.s +++ b/libsyscall/custom/__thread_selfid.s @@ -30,10 +30,10 @@ #if defined(__x86_64__) -__SYSCALL(___thread_selfid, thread_selfid, 1) +__SYSCALL(___thread_selfid, thread_selfid, 0) #elif defined(__i386__) -__SYSCALL_INT(___thread_selfid, thread_selfid, 1) +__SYSCALL_INT(___thread_selfid, thread_selfid, 0) #endif diff --git a/bsd/machine/_structs.h b/libsyscall/custom/__thread_selfusage.s similarity index 84% rename from bsd/machine/_structs.h rename to libsyscall/custom/__thread_selfusage.s index 509d5f618..064c5bad9 100644 --- a/bsd/machine/_structs.h +++ b/libsyscall/custom/__thread_selfusage.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * Copyright (c) 2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,8 +25,15 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#if defined (__i386__) || defined (__x86_64__) -#include "i386/_structs.h" -#else -#error architecture not supported + +#include "SYS.h" + +#if defined(__x86_64__) + +__SYSCALL(___thread_selfusage, thread_selfusage, 0) + +#elif defined(__i386__) + +__SYSCALL_INT(___thread_selfusage, thread_selfusage, 0) + #endif diff --git a/libsyscall/custom/errno.c b/libsyscall/custom/errno.c index 640ed14fd..86f754a65 100644 --- a/libsyscall/custom/errno.c +++ b/libsyscall/custom/errno.c @@ -77,22 +77,3 @@ cerror(int err) _pthread_exit_if_canceled(err); return cerror_nocancel(err); } - -#if !TARGET_OS_EMBEDDED - -// Internal symbol no longer used by anybody in Libsystem but required for -// backwards compatibility with 3rd parties - -void -cthread_set_errno_self(int err, int nocancel) -{ - asm(".global $ld$hide$os10.9$_cthread_set_errno_self\n\t" - ".set $ld$hide$os10.9$_cthread_set_errno_self, _cthread_set_errno_self"); - if (nocancel) { - cerror_nocancel(err); - } else { - cerror(err); - } -} - -#endif diff --git a/libsyscall/mach/.gitignore b/libsyscall/mach/.gitignore new file mode 100644 index 000000000..f718d68d2 --- /dev/null +++ b/libsyscall/mach/.gitignore @@ -0,0 +1,3 @@ +*.pbxuser +*.perspectivev3 +build/ diff --git a/libsyscall/mach/err_mach_ipc.sub b/libsyscall/mach/err_mach_ipc.sub index d1e542fae..b0e82564c 100644 --- a/libsyscall/mach/err_mach_ipc.sub +++ b/libsyscall/mach/err_mach_ipc.sub @@ -42,8 +42,8 @@ static const char * const err_codes_mach_send[] = { /* 2 */ "(ipc/send) invalid data", /* 3 */ "(ipc/send) invalid destination port", /* 4 */ "(ipc/send) timed out", - /* 5 */ "(ipc/send) will notify", - /* 6 */ "(ipc/send) notify in progress", + /* 5 */ "(ipc/send) invalid voucher", + /* 6 */ "(ipc/send) unused error", /* 7 */ "(ipc/send) interrupted", /* 8 */ "(ipc/send) msg too small", /* 9 */ "(ipc/send) invalid reply port", @@ -51,13 +51,13 @@ static const char * const err_codes_mach_send[] = { /* 11 */ "(ipc/send) invalid notify port", /* 12 */ "(ipc/send) invalid memory", /* 13 */ "(ipc/send) no msg buffer", - /* 14 */ "(ipc/send) no notify possible", + /* 14 */ "(ipc/send) msg too large", /* 15 */ "(ipc/send) invalid msg-type", /* 16 */ "(ipc/send) invalid msg-header", /* 17 */ "(ipc/send) invalid msg-trailer", - /* 18 */ "(ipc/send) DIPC transport failure", - /* 19 */ "(ipc/send) DIPC port migrated", - /* 20 */ "(ipc/send) DIPC resend failed", + /* 18 */ "(ipc/send) unused error", + /* 19 */ "(ipc/send) unused error", + /* 20 */ "(ipc/send) unused error", /* 21 */ "(ipc/send) out-of-line buffer too large", }; diff --git a/libsyscall/mach/mach/mach.h b/libsyscall/mach/mach/mach.h index a94230e0b..1d1db7ab3 100644 --- a/libsyscall/mach/mach/mach.h +++ b/libsyscall/mach/mach/mach.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -134,6 +134,104 @@ extern kern_return_t clock_sleep(mach_port_t, int, mach_timespec_t, mach_timespec_t *); + +/*! + * @group voucher_mach_msg Prototypes + */ + +#define VOUCHER_MACH_MSG_API_VERSION 20140205 + +/*! + * @typedef voucher_mach_msg_state_t + * + * @abstract + * Opaque object encapsulating state changed by voucher_mach_msg_adopt(). + */ +typedef struct voucher_mach_msg_state_s *voucher_mach_msg_state_t; + +/*! + * @const VOUCHER_MACH_MSG_STATE_UNCHANGED + * + * @discussion + * Constant indicating no state change occurred. + */ +#define VOUCHER_MACH_MSG_STATE_UNCHANGED ((voucher_mach_msg_state_t)~0ul) + +/*! + * @function voucher_mach_msg_set + * + * @abstract + * Change specified message header to contain current mach voucher with a + * COPY_SEND disposition. + * Does not change message if it already has non-zero MACH_MSGH_BITS_VOUCHER. + * + * @discussion + * Borrows reference to current thread voucher so message should be sent + * immediately (without intervening calls that might change that voucher). + * + * @param msg + * The message to modify. + * + * @result + * True if header was changed. + */ +extern boolean_t voucher_mach_msg_set(mach_msg_header_t *msg); + +/*! + * @function voucher_mach_msg_clear + * + * @abstract + * Removes changes made to specified message header by voucher_mach_msg_set() + * and any mach_msg() send operations (successful or not). + * If the message is not needed further, mach_msg_destroy() should be called + * instead. + * + * @discussion + * Not intended to be called if voucher_mach_msg_set() returned false. + * Releases reference to message mach voucher if an extra reference was + * acquired due to an unsuccessful send operation (pseudo-receive). + * + * @param msg + * The message to modify. + */ +extern void voucher_mach_msg_clear(mach_msg_header_t *msg); + +/*! + * @function voucher_mach_msg_adopt + * + * @abstract + * Adopt the voucher contained in the specified message on the current thread + * and return the previous thread voucher state. + * + * @discussion + * Ownership of the mach voucher in the message is transferred to the current + * thread and the message header voucher fields are cleared. + * + * @param msg + * The message to query and modify. + * + * @result + * The previous thread voucher state or VOUCHER_MACH_MSG_STATE_UNCHANGED if no + * state change occurred. + */ +extern voucher_mach_msg_state_t voucher_mach_msg_adopt(mach_msg_header_t *msg); + +/*! + * @function voucher_mach_msg_revert + * + * @abstract + * Restore thread voucher state previously modified by voucher_mach_msg_adopt(). + * + * @discussion + * Current thread voucher reference is released. + * No change to thread voucher state if passed VOUCHER_MACH_MSG_STATE_UNCHANGED. + * + * @param state + * The thread voucher state to restore. + */ + +extern void voucher_mach_msg_revert(voucher_mach_msg_state_t state); + __END_DECLS #endif /* _MACH_H_ */ diff --git a/libsyscall/mach/mach/mach_init.h b/libsyscall/mach/mach/mach_init.h index 9816f1138..85e8319ab 100644 --- a/libsyscall/mach/mach/mach_init.h +++ b/libsyscall/mach/mach/mach_init.h @@ -59,6 +59,7 @@ #define _MACH_INIT_ 1 #include +#include #include #include @@ -97,24 +98,6 @@ extern mach_port_t bootstrap_port; #define MACH_PORTS_SLOTS_USED 3 -/* - * Globally interesting numbers. - * These macros assume vm_page_size is a power-of-2. - */ - -extern vm_size_t vm_page_size; -extern vm_size_t vm_page_mask; -extern int vm_page_shift; - -#define trunc_page(x) ((x) & (~(vm_page_size - 1))) -#define round_page(x) trunc_page((x) + (vm_page_size - 1)) - -/* - * Page-size rounding macros for the fixed-width VM types. - */ -#define mach_vm_trunc_page(x) ((mach_vm_offset_t)(x) & ~((signed)PAGE_MASK)) -#define mach_vm_round_page(x) (((mach_vm_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) - /* * fprintf_stderr uses vprintf_stderr_func to produce * error messages, this can be overridden by a user diff --git a/libsyscall/mach/mach/vm_page_size.h b/libsyscall/mach/mach/vm_page_size.h index 4d8f1c0b0..fd1a92c73 100644 --- a/libsyscall/mach/mach/vm_page_size.h +++ b/libsyscall/mach/mach/vm_page_size.h @@ -31,15 +31,37 @@ #include #include +#include __BEGIN_DECLS + /* + * Globally interesting numbers. + * These macros assume vm_page_size is a power-of-2. + */ +extern vm_size_t vm_page_size; +extern vm_size_t vm_page_mask; +extern int vm_page_shift; + +/* + * These macros assume vm_page_size is a power-of-2. + */ +#define trunc_page(x) ((x) & (~(vm_page_size - 1))) +#define round_page(x) trunc_page((x) + (vm_page_size - 1)) + +/* + * Page-size rounding macros for the fixed-width VM types. + */ +#define mach_vm_trunc_page(x) ((mach_vm_offset_t)(x) & ~((signed)vm_page_mask)) +#define mach_vm_round_page(x) (((mach_vm_offset_t)(x) + vm_page_mask) & ~((signed)vm_page_mask)) + + extern vm_size_t vm_kernel_page_size __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); extern vm_size_t vm_kernel_page_mask __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); extern int vm_kernel_page_shift __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); #define trunc_page_kernel(x) ((x) & (~vm_kernel_page_mask)) -#define round_page_kernel(x) trunc_kernel_page((x) + vm_kernel_page_mask) +#define round_page_kernel(x) trunc_page_kernel((x) + vm_kernel_page_mask) __END_DECLS diff --git a/libsyscall/mach/mach_init.c b/libsyscall/mach/mach_init.c index 19e87120e..893a06e75 100644 --- a/libsyscall/mach/mach_init.c +++ b/libsyscall/mach/mach_init.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include "externs.h" @@ -68,13 +69,13 @@ mach_port_t mach_host_self_ = MACH_PORT_NULL; #endif extern mach_port_t _task_reply_port; -vm_size_t vm_kernel_page_size = KERNEL_PAGE_SIZE; -vm_size_t vm_kernel_page_mask = KERNEL_PAGE_MASK; -int vm_kernel_page_shift = KERNEL_PAGE_SHIFT; +vm_size_t vm_kernel_page_size = 0; +vm_size_t vm_kernel_page_mask = 0; +int vm_kernel_page_shift = 0; -vm_size_t vm_page_size = PAGE_SIZE; -vm_size_t vm_page_mask = PAGE_MASK; -int vm_page_shift = PAGE_SHIFT; +vm_size_t vm_page_size = 0; +vm_size_t vm_page_mask = 0; +int vm_page_shift = 0; int mach_init(void); int _mach_fork_child(void); @@ -87,7 +88,7 @@ extern void _init_cpu_capabilities(void); kern_return_t host_page_size(__unused host_t host, vm_size_t *out_page_size) { - *out_page_size = PAGE_SIZE; + *out_page_size = vm_kernel_page_size; return KERN_SUCCESS; } @@ -121,6 +122,24 @@ mach_init_doit(void) mach_task_self_ = task_self_trap(); _task_reply_port = mach_reply_port(); + if (vm_kernel_page_shift == 0) { +#ifdef _COMM_PAGE_KERNEL_PAGE_SHIFT + vm_kernel_page_shift = *(uint8_t*) _COMM_PAGE_KERNEL_PAGE_SHIFT; + vm_kernel_page_size = 1 << vm_kernel_page_shift; + vm_kernel_page_mask = vm_kernel_page_size - 1; +#else + vm_kernel_page_size = PAGE_SIZE; + vm_kernel_page_mask = PAGE_MASK; + vm_kernel_page_shift = PAGE_SHIFT; +#endif /* _COMM_PAGE_KERNEL_PAGE_SHIFT */ + } + + if (vm_page_shift == 0) { + vm_page_shift = vm_kernel_page_shift; + vm_page_size = 1 << vm_page_shift; + vm_page_mask = vm_page_size - 1; + } + _init_cpu_capabilities(); _pthread_set_self(0); } diff --git a/libsyscall/mach/mach_msg.c b/libsyscall/mach/mach_msg.c index 415d6a70a..676a5392c 100644 --- a/libsyscall/mach/mach_msg.c +++ b/libsyscall/mach/mach_msg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2007 Apple Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -275,6 +275,7 @@ mach_msg_destroy(mach_msg_header_t *msg) */ mach_msg_destroy_port(msg->msgh_remote_port, MACH_MSGH_BITS_REMOTE(mbits)); + mach_msg_destroy_port(msg->msgh_voucher_port, MACH_MSGH_BITS_VOUCHER(mbits)); if (mbits & MACH_MSGH_BITS_COMPLEX) { mach_msg_base_t *base; @@ -382,16 +383,17 @@ mach_msg_server_once( mach_msg_return_t mr; kern_return_t kr; mach_port_t self = mach_task_self_; + voucher_mach_msg_state_t old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; - options &= ~(MACH_SEND_MSG|MACH_RCV_MSG); + options &= ~(MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_VOUCHER); trailer_alloc = REQUESTED_TRAILER_SIZE(options); - request_alloc = round_page(max_size + trailer_alloc); + request_alloc = (mach_msg_size_t)round_page(max_size + trailer_alloc); request_size = (options & MACH_RCV_LARGE) ? request_alloc : max_size + trailer_alloc; - reply_alloc = round_page((options & MACH_SEND_TRAILER) ? + reply_alloc = (mach_msg_size_t)round_page((options & MACH_SEND_TRAILER) ? (max_size + MAX_TRAILER_SIZE) : max_size); @@ -416,14 +418,14 @@ mach_msg_server_once( return kr; } - mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|options, + mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|MACH_RCV_VOUCHER|options, 0, request_size, rcv_name, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); if (!((mr == MACH_RCV_TOO_LARGE) && (options & MACH_RCV_LARGE))) break; - new_request_alloc = round_page(bufRequest->Head.msgh_size + + new_request_alloc = (mach_msg_size_t)round_page(bufRequest->Head.msgh_size + trailer_alloc); vm_deallocate(self, (vm_address_t) bufRequest, @@ -434,6 +436,8 @@ mach_msg_server_once( if (mr == MACH_MSG_SUCCESS) { /* we have a request message */ + old_state = voucher_mach_msg_adopt(&bufRequest->Head); + (void) (*demux)(&bufRequest->Head, &bufReply->Head); if (!(bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) { @@ -476,6 +480,8 @@ mach_msg_server_once( } done_once: + voucher_mach_msg_revert(old_state); + (void)vm_deallocate(self, (vm_address_t) bufRequest, request_alloc); @@ -507,55 +513,60 @@ mach_msg_server( mach_msg_return_t mr; kern_return_t kr; mach_port_t self = mach_task_self_; + voucher_mach_msg_state_t old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; + boolean_t buffers_swapped = FALSE; - options &= ~(MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_OVERWRITE); + options &= ~(MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_VOUCHER|MACH_RCV_OVERWRITE); - reply_alloc = round_page((options & MACH_SEND_TRAILER) ? - (max_size + MAX_TRAILER_SIZE) : max_size); + reply_alloc = (mach_msg_size_t)round_page((options & MACH_SEND_TRAILER) ? + (max_size + MAX_TRAILER_SIZE) : max_size); kr = vm_allocate(self, - (vm_address_t *)&bufReply, - reply_alloc, - VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE); - if (kr != KERN_SUCCESS) + (vm_address_t *)&bufReply, + reply_alloc, + VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE); + if (kr != KERN_SUCCESS) return kr; request_alloc = 0; trailer_alloc = REQUESTED_TRAILER_SIZE(options); - new_request_alloc = round_page(max_size + trailer_alloc); + new_request_alloc = (mach_msg_size_t)round_page(max_size + trailer_alloc); request_size = (options & MACH_RCV_LARGE) ? - new_request_alloc : max_size + trailer_alloc; + new_request_alloc : max_size + trailer_alloc; for (;;) { if (request_alloc < new_request_alloc) { request_alloc = new_request_alloc; kr = vm_allocate(self, - (vm_address_t *)&bufRequest, - request_alloc, - VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE); + (vm_address_t *)&bufRequest, + request_alloc, + VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE); if (kr != KERN_SUCCESS) { vm_deallocate(self, - (vm_address_t)bufReply, - reply_alloc); + (vm_address_t)bufReply, + reply_alloc); return kr; } } - - mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|options, - 0, request_size, rcv_name, - MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); - + + mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|MACH_RCV_VOUCHER|options, + 0, request_size, rcv_name, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + while (mr == MACH_MSG_SUCCESS) { /* we have another request message */ + buffers_swapped = FALSE; + old_state = voucher_mach_msg_adopt(&bufRequest->Head); + (void) (*demux)(&bufRequest->Head, &bufReply->Head); if (!(bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) { if (bufReply->RetCode == MIG_NO_REPLY) bufReply->Head.msgh_remote_port = MACH_PORT_NULL; else if ((bufReply->RetCode != KERN_SUCCESS) && - (bufRequest->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) { + (bufRequest->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) { /* destroy the request - but not the reply port */ bufRequest->Head.msgh_remote_port = MACH_PORT_NULL; mach_msg_destroy(&bufRequest->Head); @@ -576,65 +587,86 @@ mach_msg_server( mig_reply_error_t *bufTemp; mr = mach_msg( - &bufReply->Head, - (MACH_MSGH_BITS_REMOTE(bufReply->Head.msgh_bits) == - MACH_MSG_TYPE_MOVE_SEND_ONCE) ? - MACH_SEND_MSG|MACH_RCV_MSG|options : - MACH_SEND_MSG|MACH_RCV_MSG|MACH_SEND_TIMEOUT|options, - bufReply->Head.msgh_size, request_size, rcv_name, - MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + &bufReply->Head, + (MACH_MSGH_BITS_REMOTE(bufReply->Head.msgh_bits) == + MACH_MSG_TYPE_MOVE_SEND_ONCE) ? + MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_TIMEOUT|MACH_RCV_VOUCHER|options : + MACH_SEND_MSG|MACH_RCV_MSG|MACH_SEND_TIMEOUT|MACH_RCV_TIMEOUT|MACH_RCV_VOUCHER|options, + bufReply->Head.msgh_size, request_size, rcv_name, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); /* swap request and reply */ bufTemp = bufRequest; bufRequest = bufReply; bufReply = bufTemp; - + buffers_swapped = TRUE; } else { mr = mach_msg_overwrite( &bufReply->Head, (MACH_MSGH_BITS_REMOTE(bufReply->Head.msgh_bits) == MACH_MSG_TYPE_MOVE_SEND_ONCE) ? - MACH_SEND_MSG|MACH_RCV_MSG|options : - MACH_SEND_MSG|MACH_RCV_MSG|MACH_SEND_TIMEOUT|options, + MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_TIMEOUT|MACH_RCV_VOUCHER|options : + MACH_SEND_MSG|MACH_RCV_MSG|MACH_SEND_TIMEOUT|MACH_RCV_TIMEOUT|MACH_RCV_VOUCHER|options, bufReply->Head.msgh_size, request_size, rcv_name, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL, &bufRequest->Head, 0); } - + if ((mr != MACH_SEND_INVALID_DEST) && - (mr != MACH_SEND_TIMED_OUT)) + (mr != MACH_SEND_TIMED_OUT) && + (mr != MACH_RCV_TIMED_OUT)) { + + voucher_mach_msg_revert(old_state); + old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; + continue; + } } - if (bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) - mach_msg_destroy(&bufReply->Head); + /* + * Need to destroy the reply msg in case if there was a send timeout or + * invalid destination. The reply msg would be swapped with request msg + * if buffers_swapped is true, thus destroy request msg instead of + * reply msg in such cases. + */ + if (mr != MACH_RCV_TIMED_OUT) { + if (buffers_swapped) { + if (bufRequest->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) + mach_msg_destroy(&bufRequest->Head); + } else { + if (bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) + mach_msg_destroy(&bufReply->Head); + } + } + voucher_mach_msg_revert(old_state); + old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; - mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|options, - 0, request_size, rcv_name, - MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|MACH_RCV_VOUCHER|options, + 0, request_size, rcv_name, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); } /* while (mr == MACH_MSG_SUCCESS) */ - + if ((mr == MACH_RCV_TOO_LARGE) && (options & MACH_RCV_LARGE)) { - new_request_alloc = round_page(bufRequest->Head.msgh_size + - trailer_alloc); + new_request_alloc = (mach_msg_size_t)round_page(bufRequest->Head.msgh_size + + trailer_alloc); request_size = new_request_alloc; vm_deallocate(self, - (vm_address_t) bufRequest, - request_alloc); + (vm_address_t) bufRequest, + request_alloc); continue; } break; - } /* for(;;) */ + } /* for(;;) */ - (void)vm_deallocate(self, - (vm_address_t) bufRequest, - request_alloc); - (void)vm_deallocate(self, - (vm_address_t) bufReply, - reply_alloc); - return mr; + (void)vm_deallocate(self, + (vm_address_t) bufRequest, + request_alloc); + (void)vm_deallocate(self, + (vm_address_t) bufReply, + reply_alloc); + return mr; } /* @@ -661,10 +693,11 @@ mach_msg_server_importance( mach_port_t self = mach_task_self_; int retval = 1; uint64_t token; + voucher_mach_msg_state_t old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; - options &= ~(MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_OVERWRITE); + options &= ~(MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_VOUCHER|MACH_RCV_OVERWRITE); - reply_alloc = round_page((options & MACH_SEND_TRAILER) ? + reply_alloc = (mach_msg_size_t)round_page((options & MACH_SEND_TRAILER) ? (max_size + MAX_TRAILER_SIZE) : max_size); kr = vm_allocate(self, @@ -676,7 +709,7 @@ mach_msg_server_importance( request_alloc = 0; trailer_alloc = REQUESTED_TRAILER_SIZE(options); - new_request_alloc = round_page(max_size + trailer_alloc); + new_request_alloc = (mach_msg_size_t)round_page(max_size + trailer_alloc); request_size = (options & MACH_RCV_LARGE) ? new_request_alloc : max_size + trailer_alloc; @@ -696,14 +729,17 @@ mach_msg_server_importance( } } - mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|options, + mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|MACH_RCV_VOUCHER|options, 0, request_size, rcv_name, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); if (mr == MACH_MSG_SUCCESS) { /* we have another request message */ + old_state = voucher_mach_msg_adopt(&bufRequest->Head); + retval = proc_importance_assertion_begin_with_msg(&bufRequest->Head, NULL, &token); + (void) (*demux)(&bufRequest->Head, &bufReply->Head); if (!(bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) { @@ -741,6 +777,10 @@ mach_msg_server_importance( (mr != MACH_SEND_TIMED_OUT)) { if (retval == 0) proc_importance_assertion_complete(token); + + voucher_mach_msg_revert(old_state); + old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; + continue; } mr = MACH_MSG_SUCCESS; @@ -750,10 +790,13 @@ mach_msg_server_importance( if (retval == 0) proc_importance_assertion_complete(token); + voucher_mach_msg_revert(old_state); + old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED; + } /* if (mr == MACH_MSG_SUCCESS) */ if ((mr == MACH_RCV_TOO_LARGE) && (options & MACH_RCV_LARGE)) { - new_request_alloc = round_page(bufRequest->Head.msgh_size + + new_request_alloc = (mach_msg_size_t)round_page(bufRequest->Head.msgh_size + trailer_alloc); request_size = new_request_alloc; vm_deallocate(self, @@ -775,3 +818,10 @@ mach_msg_server_importance( reply_alloc); return mr; } + +kern_return_t +mach_voucher_deallocate( + mach_voucher_t voucher) +{ + return mach_port_deallocate(mach_task_self(), voucher); +} diff --git a/libsyscall/mach/mach_port.c b/libsyscall/mach/mach_port.c index 2aadae90d..428ada4f2 100644 --- a/libsyscall/mach/mach_port.c +++ b/libsyscall/mach/mach_port.c @@ -395,6 +395,18 @@ mach_port_space_info( return (rv); } +kern_return_t +mach_port_space_basic_info( + ipc_space_t task, + ipc_info_space_basic_t *space_basic_info) +{ + kern_return_t rv; + + rv = _kernelrpc_mach_port_space_basic_info(task, space_basic_info); + + return (rv); +} + kern_return_t mach_port_dnrequest_info( ipc_space_t task, diff --git a/libsyscall/mach/mach_vm.c b/libsyscall/mach/mach_vm.c index 1b6d7f98e..d31037398 100644 --- a/libsyscall/mach/mach_vm.c +++ b/libsyscall/mach/mach_vm.c @@ -79,7 +79,7 @@ mach_vm_deallocate( rv = _kernelrpc_mach_vm_deallocate(target, address, size); if (__syscall_logger) { - __syscall_logger(stack_logging_type_vm_deallocate, (uintptr_t)target, (uintptr_t)address, size, 0, 0); + __syscall_logger(stack_logging_type_vm_deallocate, (uintptr_t)target, (uintptr_t)address, (uintptr_t)size, 0, 0); } return (rv); diff --git a/bsd/i386/_structs.h b/libsyscall/mach/mach_voucher.defs similarity index 92% rename from bsd/i386/_structs.h rename to libsyscall/mach/mach_voucher.defs index 36e42cb24..fe6850ee2 100644 --- a/bsd/i386/_structs.h +++ b/libsyscall/mach/mach_voucher.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,5 +25,4 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ - -#include +#include diff --git a/libsyscall/mach/string.c b/libsyscall/mach/string.c index 000a0f88f..e1555b49d 100644 --- a/libsyscall/mach/string.c +++ b/libsyscall/mach/string.c @@ -36,7 +36,7 @@ _mach_strlen(const char *str) const char *p; for (p = str; p; p++) { if (*p == '\0') { - return (p - str); + return (int)(p - str); } } /* NOTREACHED */ diff --git a/libsyscall/mach/thread_act.c b/libsyscall/mach/thread_act.c new file mode 100644 index 000000000..5726d1131 --- /dev/null +++ b/libsyscall/mach/thread_act.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2011 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#undef _thread_act_user_ +#include + +extern void _pthread_clear_qos_tsd(mach_port_t port); + +kern_return_t +thread_policy(thread_act_t thr_act, policy_t policy, policy_base_t base, mach_msg_type_number_t baseCnt, boolean_t set_limit) +{ + kern_return_t kr; + + kr = _kernelrpc_thread_policy(thr_act, policy, base, baseCnt, set_limit); + if (kr == KERN_SUCCESS) { + _pthread_clear_qos_tsd(thr_act); + } else if (kr == KERN_POLICY_STATIC) { + kr = KERN_SUCCESS; + } + + return kr; +} + +kern_return_t +thread_policy_set(thread_act_t thread, thread_policy_flavor_t flavor, thread_policy_t policy_info, mach_msg_type_number_t policy_infoCnt) +{ + kern_return_t kr; + + kr = _kernelrpc_thread_policy_set(thread, flavor, policy_info, policy_infoCnt); + if (kr == KERN_SUCCESS) { + _pthread_clear_qos_tsd(thread); + } else if (kr == KERN_POLICY_STATIC) { + kr = KERN_SUCCESS; + } + + return kr; +} + +kern_return_t +thread_set_policy(thread_act_t thr_act, processor_set_t pset, policy_t policy, policy_base_t base, mach_msg_type_number_t baseCnt, policy_limit_t limit, mach_msg_type_number_t limitCnt) +{ + kern_return_t kr; + + kr = _kernelrpc_thread_set_policy(thr_act, pset, policy, base, baseCnt, limit, limitCnt); + if (kr == KERN_SUCCESS) { + _pthread_clear_qos_tsd(thr_act); + } else if (kr == KERN_POLICY_STATIC) { + kr = KERN_SUCCESS; + } + + return kr; +} diff --git a/libsyscall/wrappers/_libc_funcptr.c b/libsyscall/wrappers/_libc_funcptr.c index 8a2ba68c8..63eb09ca6 100644 --- a/libsyscall/wrappers/_libc_funcptr.c +++ b/libsyscall/wrappers/_libc_funcptr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Apple Inc. All rights reserved. + * Copyright (c) 2010-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -28,6 +28,7 @@ #include "_libkernel_init.h" extern _libkernel_functions_t _libkernel_functions; +extern void mig_os_release(void* ptr); __attribute__((visibility("hidden"))) void * @@ -70,3 +71,64 @@ _pthread_exit_if_canceled(int error) __attribute__((visibility("hidden"))) void _pthread_set_self(void *ptr __attribute__((__unused__))) {} + +__attribute__((visibility("hidden"))) +void +_pthread_clear_qos_tsd(mach_port_t thread_port) +{ + if (_libkernel_functions->version >= 3 && + _libkernel_functions->pthread_clear_qos_tsd) { + return _libkernel_functions->pthread_clear_qos_tsd(thread_port); + } +} + +/* + * mach/mach.h voucher_mach_msg API + */ + +static const struct _libkernel_voucher_functions + _libkernel_voucher_functions_empty; +static _libkernel_voucher_functions_t _libkernel_voucher_functions = + &_libkernel_voucher_functions_empty; + +kern_return_t +__libkernel_voucher_init(_libkernel_voucher_functions_t fns) +{ + _libkernel_voucher_functions = fns; + return KERN_SUCCESS; +} + +boolean_t +voucher_mach_msg_set(mach_msg_header_t *msg) +{ + if (_libkernel_voucher_functions->voucher_mach_msg_set) { + return _libkernel_voucher_functions->voucher_mach_msg_set(msg); + } + return 0; +} + +void +voucher_mach_msg_clear(mach_msg_header_t *msg) +{ + if (_libkernel_voucher_functions->voucher_mach_msg_clear) { + return _libkernel_voucher_functions->voucher_mach_msg_clear(msg); + } +} + +voucher_mach_msg_state_t +voucher_mach_msg_adopt(mach_msg_header_t *msg) +{ + if (_libkernel_voucher_functions->voucher_mach_msg_adopt) { + return _libkernel_voucher_functions->voucher_mach_msg_adopt(msg); + } + return VOUCHER_MACH_MSG_STATE_UNCHANGED; +} + +void +voucher_mach_msg_revert(voucher_mach_msg_state_t state) +{ + if (_libkernel_voucher_functions->voucher_mach_msg_revert) { + return _libkernel_voucher_functions->voucher_mach_msg_revert(state); + } +} + diff --git a/libsyscall/wrappers/_libkernel_init.h b/libsyscall/wrappers/_libkernel_init.h index f5bef896f..68a7067e4 100644 --- a/libsyscall/wrappers/_libkernel_init.h +++ b/libsyscall/wrappers/_libkernel_init.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Apple Inc. All rights reserved. + * Copyright (c) 2010-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,20 +30,59 @@ #define __LIBKERNEL_INIT_H #include +#include +#include +#include + +#ifndef __OS_VOUCHER_PRIVATE__ +// We cannot include the actual os/voucher_private.h definition of voucher_t +// here, as that also marks the voucher_XXX functions as exported, which causes +// a compile error when we attempt to mark them hidden in the .c file. +// +// The Libsystem init.c file does include os/voucher_private.h though, as well +// as this file, and this typedef causes an error if it is unguarded. +struct voucher_s; +typedef struct voucher_s *voucher_t; +#endif typedef const struct _libkernel_functions { - /* Structure version 1. Subsequent versions must only add pointers! */ + /* The following functions are included in version 1 of this structure */ unsigned long version; void* (*dlsym)(void*, const char*); void* (*malloc)(size_t); void (*free)(void*); void* (*realloc)(void*, size_t); void (*_pthread_exit_if_canceled)(int); + + /* The following functions are included in version 2 of this structure */ + void *reserved1; + void *reserved2; + void *reserved3; + void *reserved4; + void *reserved5; + + /* The following functions are included in version 3 of this structure */ + void (*pthread_clear_qos_tsd)(mach_port_t); + + /* Subsequent versions must only add pointers! */ } *_libkernel_functions_t; +typedef const struct _libkernel_voucher_functions { + /* The following functions are included in version 1 of this structure */ + unsigned long version; + boolean_t (*voucher_mach_msg_set)(mach_msg_header_t*); + void (*voucher_mach_msg_clear)(mach_msg_header_t*); + voucher_mach_msg_state_t (*voucher_mach_msg_adopt)(mach_msg_header_t*); + void (*voucher_mach_msg_revert)(voucher_mach_msg_state_t); + + /* Subsequent versions must only add pointers! */ +} *_libkernel_voucher_functions_t; + struct ProgramVars; /* forward reference */ void __libkernel_init(_libkernel_functions_t fns, const char *envp[], const char *apple[], const struct ProgramVars *vars); -#endif // __LIBKERNEL_INIT_H` +kern_return_t __libkernel_voucher_init(_libkernel_voucher_functions_t fns); + +#endif // __LIBKERNEL_INIT_H diff --git a/libsyscall/wrappers/cancelable/fcntl-base.c b/libsyscall/wrappers/cancelable/fcntl-base.c index 7a9a6f970..7bbded99e 100644 --- a/libsyscall/wrappers/cancelable/fcntl-base.c +++ b/libsyscall/wrappers/cancelable/fcntl-base.c @@ -50,12 +50,14 @@ fcntl(int fd, int cmd, ...) case F_LOG2PHYS_EXT: case F_GETPATH: case F_GETPATH_MTMINFO: + case F_GETCODEDIR: case F_PATHPKG_CHECK: case F_OPENFROM: case F_UNLINKFROM: case F_ADDSIGS: case F_ADDFILESIGS: case F_FINDSIGS: + case F_TRANSCODEKEY: arg = va_arg(ap, void *); break; default: diff --git a/libsyscall/wrappers/coalition.c b/libsyscall/wrappers/coalition.c new file mode 100644 index 000000000..92a0bda27 --- /dev/null +++ b/libsyscall/wrappers/coalition.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include + +/* Syscall entry points */ +int __coalition(uint32_t operation, uint64_t *cid, uint32_t flags); +int __coalition_info(uint32_t operation, uint64_t *cid, void *buffer, size_t bufsize); + +int coalition_create(uint64_t *cid_out, uint32_t flags) +{ + return __coalition(COALITION_OP_CREATE, cid_out, flags); +} + +int coalition_terminate(uint64_t cid, uint32_t flags) +{ + return __coalition(COALITION_OP_TERMINATE, &cid, flags); +} + +int coalition_reap(uint64_t cid, uint32_t flags) +{ + return __coalition(COALITION_OP_REAP, &cid, flags); +} + +int coalition_info_resource_usage(uint64_t cid, struct coalition_resource_usage *cru, size_t sz) +{ + return __coalition_info(COALITION_INFO_RESOURCE_USAGE, &cid, cru, &sz); +} diff --git a/bsd/sys/_types/_pthread_key_t.h b/libsyscall/wrappers/csr.c similarity index 52% rename from bsd/sys/_types/_pthread_key_t.h rename to libsyscall/wrappers/csr.c index 5dd708529..2870bf97f 100644 --- a/bsd/sys/_types/_pthread_key_t.h +++ b/libsyscall/wrappers/csr.c @@ -1,19 +1,14 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2014 Apple Inc. All rights reserved. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -23,9 +18,26 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * @APPLE_LICENSE_HEADER_END@ */ -#ifndef _PTHREAD_KEY_T -#define _PTHREAD_KEY_T -typedef __darwin_pthread_key_t pthread_key_t; -#endif /* _PTHREAD_KEY_T */ + +#include +#include + +/* Syscall entry point */ +int __csrctl(csr_op_t op, void *buffer, size_t size); + +int csr_check(csr_config_t mask) +{ + return __csrctl(CSR_OP_CHECK, &mask, sizeof(csr_config_t)); +} + +int csr_get_active_config(csr_config_t *config) +{ + return __csrctl(CSR_OP_GET_ACTIVE_CONFIG, config, sizeof(csr_config_t)); +} + +int csr_get_pending_config(csr_config_t *config) +{ + return __csrctl(CSR_OP_GET_PENDING_CONFIG, config, sizeof(csr_config_t)); +} diff --git a/libsyscall/wrappers/getiopolicy_np.c b/libsyscall/wrappers/getiopolicy_np.c index 146fdac6d..583321622 100644 --- a/libsyscall/wrappers/getiopolicy_np.c +++ b/libsyscall/wrappers/getiopolicy_np.c @@ -22,8 +22,10 @@ */ #include #include +#include extern int __iopolicysys(int, struct _iopol_param_t *); +extern void _pthread_clear_qos_tsd(mach_port_t); int getiopolicy_np(int iotype, int scope) @@ -58,10 +60,17 @@ setiopolicy_np(int iotype, int scope, int policy) { /* kernel validates the indiv values, no need to repeat it */ struct _iopol_param_t iop_param; - + iop_param.iop_scope = scope; iop_param.iop_iotype = iotype; iop_param.iop_policy = policy; - return( __iopolicysys(IOPOL_CMD_SET, &iop_param)); + int rv = __iopolicysys(IOPOL_CMD_SET, &iop_param); + if (rv == -2) { + /* not an actual error but indication that __iopolicysys removed the thread QoS */ + _pthread_clear_qos_tsd(MACH_PORT_NULL); + rv = 0; + } + + return rv; } diff --git a/libsyscall/wrappers/guarded_open_dprotected_np.c b/libsyscall/wrappers/guarded_open_dprotected_np.c new file mode 100644 index 000000000..152fd147d --- /dev/null +++ b/libsyscall/wrappers/guarded_open_dprotected_np.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include + +int __guarded_open_dprotected_np(const char *path, + const guardid_t *guard, u_int guardflags, int flags, int dpclass, int dpflags, int mode); + +int +guarded_open_dprotected_np(const char *path, + const guardid_t *guard, u_int guardflags, int flags, int dpclass, int dpflags, ...) +{ + int mode = 0; + + if (flags & O_CREAT) { + va_list ap; + va_start(ap, dpflags); + mode = va_arg(ap, int); + va_end(ap); + } + return (__guarded_open_dprotected_np(path, guard, guardflags, flags, dpclass, dpflags, mode)); +} diff --git a/libsyscall/wrappers/libproc/libproc.c b/libsyscall/wrappers/libproc/libproc.c index fa0b8c16e..2f47dcaf4 100644 --- a/libsyscall/wrappers/libproc/libproc.c +++ b/libsyscall/wrappers/libproc/libproc.c @@ -29,7 +29,6 @@ #include #include #include -#define BUILD_LIBSYSCALL 1 #include #include @@ -102,12 +101,40 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, void *buffer, int buffersize) return(retval); } + +int +proc_pidoriginatorinfo(int flavor, void *buffer, int buffersize) +{ + int retval; + + if ((retval = __proc_info(PROC_INFO_CALL_PIDORIGINATORINFO, getpid(), flavor, 0, buffer, buffersize)) == -1) + return(0); + + return(retval); +} + int proc_pid_rusage(int pid, int flavor, rusage_info_t *buffer) { return (__proc_info(PROC_INFO_CALL_PIDRUSAGE, pid, flavor, 0, buffer, 0)); } +int +proc_setthread_cpupercent(uint8_t percentage, uint32_t ms_refill) +{ + uint32_t arg = 0; + + /* Pack percentage and refill into a 32-bit number to match existing kernel implementation */ + if ((percentage >= 100) || (ms_refill & ~0xffffffU)) { + errno = EINVAL; + return -1; + } + + arg = ((ms_refill << 8) | percentage); + + return (proc_rlimit_control(-1, RLIMIT_THREAD_CPULIMITS, (void *)(uintptr_t)arg)); +} + int proc_pidfdinfo(int pid, int fd, int flavor, void * buffer, int buffersize) { @@ -150,7 +177,7 @@ proc_name(int pid, void * buffer, uint32_t buffersize) } else { bcopy(&pbsd.pbi_comm, buffer, sizeof(pbsd.pbi_comm)); } - len = strlen(buffer); + len = (int)strlen(buffer); return(len); } return(0); @@ -169,7 +196,7 @@ proc_regionfilename(int pid, uint64_t address, void * buffer, uint32_t buffersiz retval = proc_pidinfo(pid, PROC_PIDREGIONPATHINFO, (uint64_t)address, ®info, sizeof(struct proc_regionwithpathinfo)); if (retval != -1) { - len = strlen(®info.prp_vip.vip_path[0]); + len = (int)strlen(®info.prp_vip.vip_path[0]); if (len != 0) { if (len > MAXPATHLEN) len = MAXPATHLEN; @@ -208,7 +235,7 @@ proc_pidpath(int pid, void * buffer, uint32_t buffersize) retval = __proc_info(PROC_INFO_CALL_PIDINFO, pid, PROC_PIDPATHINFO, (uint64_t)0, buffer, buffersize); if (retval != -1) { - len = strlen(buffer); + len = (int)strlen(buffer); return(len); } return (0); @@ -293,6 +320,16 @@ proc_get_dirty(pid_t pid, uint32_t *flags) return 0; } +int +proc_clear_dirty(pid_t pid, uint32_t flags) +{ + if (__proc_info(PROC_INFO_CALL_DIRTYCONTROL, pid, PROC_DIRTYCONTROL_CLEAR, flags, NULL, 0) == -1) { + return errno; + } + + return 0; +} + int proc_terminate(pid_t pid, int *sig) { @@ -337,7 +374,7 @@ proc_get_cpumon_params(pid_t pid, int *percentage, int *interval) if ((ret == 0) && (attr.ppattr_cpu_attr == PROC_POLICY_RSRCACT_NOTIFY_EXC)) { *percentage = attr.ppattr_cpu_percentage; - *interval = attr.ppattr_cpu_attr_interval; + *interval = (int)attr.ppattr_cpu_attr_interval; } else { *percentage = 0; *interval = 0; @@ -374,6 +411,56 @@ proc_disable_cpumon(pid_t pid) PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, 0)); } + +/* + * Turn on the CPU usage monitor using the supplied parameters, and make + * violations of the monitor fatal. + * + * Returns: 0 on success; + * -1 on failure and sets errno + */ +int +proc_set_cpumon_params_fatal(pid_t pid, int percentage, int interval) +{ + int current_percentage = 0; + int current_interval = 0; /* intervals are in seconds */ + int ret = 0; + + if ((percentage <= 0) || (interval <= 0)) { + errno = EINVAL; + return (-1); + } + + /* + * Do a simple query to see if CPU monitoring is + * already active. If either the percentage or the + * interval is nonzero, then CPU monitoring is + * already in use for this process. + */ + (void)proc_get_cpumon_params(pid, ¤t_percentage, ¤t_interval); + if (current_percentage || current_interval) + { + /* + * The CPU monitor appears to be active. + * We choose not to disturb those settings. + */ + errno = EBUSY; + return (-1); + } + + if ((ret = proc_set_cpumon_params(pid, percentage, interval)) != 0) { + /* Failed to activate the CPU monitor */ + return (ret); + } + + if ((ret = proc_rlimit_control(pid, RLIMIT_CPU_USAGE_MONITOR, CPUMON_MAKE_FATAL)) != 0) { + /* Failed to set termination, back out the CPU monitor settings. */ + (void)proc_disable_cpumon(pid); + } + + return (ret); +} + int proc_set_wakemon_params(pid_t pid, int rate_hz, int flags __unused) { @@ -431,141 +518,6 @@ proc_disable_wakemon(pid_t pid) } -#if TARGET_OS_EMBEDDED - -int -proc_setcpu_percentage(pid_t pid, int action, int percentage) -{ - proc_policy_cpuusage_attr_t attr; - - bzero(&attr, sizeof(proc_policy_cpuusage_attr_t)); - attr.ppattr_cpu_attr = action; - attr.ppattr_cpu_percentage = percentage; - if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_RESOURCE_USAGE, PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, (uint64_t)0) != -1) - return(0); - else - return(errno); -} - -int -proc_setcpu_deadline(pid_t pid, int action, uint64_t deadline) -{ - proc_policy_cpuusage_attr_t attr; - - bzero(&attr, sizeof(proc_policy_cpuusage_attr_t)); - attr.ppattr_cpu_attr = action; - attr.ppattr_cpu_attr_deadline = deadline; - if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_RESOURCE_USAGE, PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, (uint64_t)0) != -1) - return(0); - else - return(errno); - -} - - -int -proc_setcpu_percentage_withdeadline(pid_t pid, int action, int percentage, uint64_t deadline) -{ - proc_policy_cpuusage_attr_t attr; - - bzero(&attr, sizeof(proc_policy_cpuusage_attr_t)); - attr.ppattr_cpu_attr = action; - attr.ppattr_cpu_percentage = percentage; - attr.ppattr_cpu_attr_deadline = deadline; - if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_RESOURCE_USAGE, PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, (uint64_t)0) != -1) - return(0); - else - return(errno); -} - -int -proc_clear_cpulimits(pid_t pid) -{ - if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_RESTORE, PROC_POLICY_RESOURCE_USAGE, PROC_POLICY_RUSAGE_CPU, NULL, pid, (uint64_t)0) != -1) - return(0); - else - return(errno); - - -} - -int -proc_appstate(int pid, int * appstatep) -{ - int state; - - if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_GET, PROC_POLICY_APP_LIFECYCLE, PROC_POLICY_APPLIFE_STATE, (proc_policy_attribute_t*)&state, pid, (uint64_t)0) != -1) { - if (appstatep != NULL) - *appstatep = state; - return(0); - } else - return(errno); - -} - - -int -proc_setappstate(int pid, int appstate) -{ - int state = appstate; - - switch (state) { - case PROC_APPSTATE_NONE: - case PROC_APPSTATE_ACTIVE: - case PROC_APPSTATE_INACTIVE: - case PROC_APPSTATE_BACKGROUND: - case PROC_APPSTATE_NONUI: - break; - default: - return(EINVAL); - } - if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_APP_LIFECYCLE, PROC_POLICY_APPLIFE_STATE, (proc_policy_attribute_t*)&state, pid, (uint64_t)0) != -1) - return(0); - else - return(errno); -} - -int -proc_devstatusnotify(int devicestatus) -{ - int state = devicestatus; - - switch (devicestatus) { - case PROC_DEVSTATUS_SHORTTERM: - case PROC_DEVSTATUS_LONGTERM: - break; - default: - return(EINVAL); - } - - if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_APP_LIFECYCLE, PROC_POLICY_APPLIFE_DEVSTATUS, (proc_policy_attribute_t*)&state, getpid(), (uint64_t)0) != -1) { - return(0); - } else - return(errno); - -} - -int -proc_pidbind(int pid, uint64_t threadid, int bind) -{ - int state = bind; - pid_t passpid = pid; - - switch (bind) { - case PROC_PIDBIND_CLEAR: - passpid = getpid(); /* ignore pid on clear */ - break; - case PROC_PIDBIND_SET: - break; - default: - return(EINVAL); - } - if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_APP_LIFECYCLE, PROC_POLICY_APPLIFE_PIDBIND, (proc_policy_attribute_t*)&state, passpid, threadid) != -1) - return(0); - else - return(errno); -} -#endif /* TARGET_OS_EMBEDDED */ /* Donate importance to adaptive processes from this process */ @@ -574,19 +526,11 @@ proc_donate_importance_boost() { int rval; -#if TARGET_OS_EMBEDDED - rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, - PROC_POLICY_ACTION_ENABLE, - PROC_POLICY_APPTYPE, - PROC_POLICY_IOS_DONATEIMP, - NULL, getpid(), (uint64_t)0); -#else /* TARGET_OS_EMBEDDED */ rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_SET, PROC_POLICY_BOOST, PROC_POLICY_IMP_DONATION, NULL, getpid(), 0); -#endif /* TARGET_OS_EMBEDDED */ if (rval == 0) return (0); @@ -606,6 +550,7 @@ proc_importance_bad_assertion(char *reason) { uint64_t important_boost_assertion_token = 0xfafafafafafafafa; uint64_t normal_boost_assertion_token = 0xfbfbfbfbfbfbfbfb; uint64_t non_boost_assertion_token = 0xfcfcfcfcfcfcfcfc; +uint64_t denap_boost_assertion_token = 0xfdfdfdfdfdfdfdfd; /* * Accept the boost on a message, or request another boost assertion @@ -625,9 +570,12 @@ proc_importance_assertion_begin_with_msg(mach_msg_header_t *msg, if (assertion_token == NULL) return (EINVAL); - - /* Is this a boosting message? */ - if ((msg->msgh_bits & MACH_MSGH_BITS_RAISEIMP) != 0) { + +#define LEGACYBOOSTMASK (MACH_MSGH_BITS_VOUCHER_MASK | MACH_MSGH_BITS_RAISEIMP) +#define LEGACYBOOSTED(m) (((m)->msgh_bits & LEGACYBOOSTMASK) == MACH_MSGH_BITS_RAISEIMP) + + /* Is this a legacy boosted message? */ + if (LEGACYBOOSTED(msg)) { /* * Have we accepted the implicit boost for this message yet? @@ -640,21 +588,11 @@ proc_importance_assertion_begin_with_msg(mach_msg_header_t *msg, } /* Request an additional boost count */ - -#if TARGET_OS_EMBEDDED - rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, - PROC_POLICY_ACTION_ENABLE, - PROC_POLICY_APPTYPE, - PROC_POLICY_IOS_HOLDIMP, - NULL, getpid(), 0); -#else /* TARGET_OS_EMBEDDED */ rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_HOLD, PROC_POLICY_BOOST, PROC_POLICY_IMP_IMPORTANT, NULL, getpid(), 0); -#endif /* TARGET_OS_EMBEDDED */ - if (rval == 0) { *assertion_token = (uint64_t) &important_boost_assertion_token; return (0); @@ -683,21 +621,11 @@ proc_importance_assertion_complete(uint64_t assertion_token) return (0); if (assertion_token == (uint64_t) &important_boost_assertion_token) { - -#if TARGET_OS_EMBEDDED - rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, - PROC_POLICY_ACTION_ENABLE, - PROC_POLICY_APPTYPE, - PROC_POLICY_IOS_DROPIMP, - NULL, getpid(), 0); -#else /* TARGET_OS_EMBEDDED */ rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_DROP, PROC_POLICY_BOOST, PROC_POLICY_IMP_IMPORTANT, - NULL, getpid(), 0); -#endif /* TARGET_OS_EMBEDDED */ - + NULL, getpid(), 0); if (rval == 0) { return (0); } else if (errno == EOVERFLOW) { @@ -712,7 +640,36 @@ proc_importance_assertion_complete(uint64_t assertion_token) } } -#if !TARGET_OS_EMBEDDED +/* + * Accept the De-Nap boost on a message, or request another boost assertion + * if we have already accepted the implicit boost for this message. + * + * Interface is deprecated before it really got started - just as synonym + * for proc_importance_assertion_begin_with_msg() now. + */ +int +proc_denap_assertion_begin_with_msg(mach_msg_header_t *msg, + uint64_t *assertion_token) +{ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + return proc_importance_assertion_begin_with_msg(msg, NULL, assertion_token); +#pragma clang diagnostic pop +} + + +/* + * Drop a denap boost assertion. + * + * Interface is deprecated before it really got started - just a synonym + * for proc_importance_assertion_complete() now. + */ +int +proc_denap_assertion_complete(uint64_t assertion_token) +{ + return proc_importance_assertion_complete(assertion_token); +} + int proc_clear_vmpressure(pid_t pid) @@ -808,7 +765,6 @@ proc_suppress(__unused pid_t pid, __unused uint64_t *generation) #endif /* !TARGET_IPHONE_SIMULATOR */ -#endif /* !TARGET_OS_EMBEDDED */ diff --git a/libsyscall/wrappers/libproc/libproc.h b/libsyscall/wrappers/libproc/libproc.h index 5fda14853..9e98f1760 100644 --- a/libsyscall/wrappers/libproc/libproc.h +++ b/libsyscall/wrappers/libproc/libproc.h @@ -122,6 +122,7 @@ int proc_setpcontrol(const int control); int proc_track_dirty(pid_t pid, uint32_t flags); int proc_set_dirty(pid_t pid, bool dirty); int proc_get_dirty(pid_t pid, uint32_t *flags); +int proc_clear_dirty(pid_t pid, uint32_t flags); int proc_terminate(pid_t pid, int *sig); diff --git a/libsyscall/wrappers/libproc/libproc_internal.h b/libsyscall/wrappers/libproc/libproc_internal.h index f04e9e1a3..a39de570f 100644 --- a/libsyscall/wrappers/libproc/libproc_internal.h +++ b/libsyscall/wrappers/libproc/libproc_internal.h @@ -30,38 +30,6 @@ __BEGIN_DECLS -#if TARGET_OS_EMBEDDED - -#define PROC_SETCPU_ACTION_NONE 0 -#define PROC_SETCPU_ACTION_THROTTLE 1 -#define PROC_SETCPU_ACTION_SUSPEND 2 -#define PROC_SETCPU_ACTION_TERMINATE 3 -#define PROC_SETCPU_ACTION_NOTIFY 4 - -int proc_setcpu_percentage(pid_t pid, int action, int percentage) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); -int proc_setcpu_deadline(pid_t pid, int action, uint64_t deadline) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); -int proc_setcpu_percentage_withdeadline(pid_t pid, int action, int percentage, uint64_t deadline) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); -int proc_clear_cpulimits(pid_t pid) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); - -#define PROC_APPSTATE_NONE 0 -#define PROC_APPSTATE_ACTIVE 1 -#define PROC_APPSTATE_BACKGROUND 2 -#define PROC_APPSTATE_NONUI 3 -#define PROC_APPSTATE_INACTIVE 4 - -int proc_setappstate(int pid, int appstate); -int proc_appstate(int pid, int * appstatep); - -#define PROC_DEVSTATUS_SHORTTERM 1 -#define PROC_DEVSTATUS_LONGTERM 2 - -int proc_devstatusnotify(int devicestatus); - -#define PROC_PIDBIND_CLEAR 0 -#define PROC_PIDBIND_SET 1 -int proc_pidbind(int pid, uint64_t threadid, int bind); - -#else /* TARGET_OS_EMBEDDED */ /* resume the process suspend due to low VM resource */ int proc_clear_vmpressure(pid_t pid); @@ -88,19 +56,27 @@ int proc_clear_delayidlesleep(void); int proc_disable_apptype(pid_t pid, int apptype); int proc_enable_apptype(pid_t pid, int apptype); -#endif /* TARGET_OS_EMBEDDED */ /* mark process as importance donating */ int proc_donate_importance_boost(void); +/* DEPRECATED: supported for backward compatibility only */ /* check the message for an importance boost and take an assertion on it */ int proc_importance_assertion_begin_with_msg(mach_msg_header_t *msg, - mach_msg_trailer_t *trailer, - uint64_t *assertion_token); + mach_msg_trailer_t *trailer, + uint64_t *assertion_token) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_10, __IPHONE_6_0, __IPHONE_8_0); +/* DEPRECATED: supported for backward compatibility only */ /* drop an assertion */ int proc_importance_assertion_complete(uint64_t assertion_handle); +/* check the message for a App De-Nap boost and take an assertion on it */ +int proc_denap_assertion_begin_with_msg(mach_msg_header_t *msg, + uint64_t *assertion_token); + +/* drop a de-nap assertion */ +int proc_denap_assertion_complete(uint64_t assertion_handle); + int proc_set_cpumon_params(pid_t pid, int percentage, int interval) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); int proc_get_cpumon_params(pid_t pid, int *percentage, int *interval) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); int proc_set_cpumon_defaults(pid_t pid) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); @@ -111,6 +87,14 @@ int proc_get_wakemon_params(pid_t pid, int *rate_hz, int *flags) __OSX_AVAILABLE int proc_set_wakemon_defaults(pid_t pid) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); int proc_disable_wakemon(pid_t pid) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); +int proc_set_cpumon_params_fatal(pid_t pid, int percentage, int interval) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); + +/* request trace buffer collection */ +int proc_trace_log(pid_t pid, uint64_t uniqueid) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); + +/* proc_info call to get the originator information */ +int proc_pidoriginatorinfo(int flavor, void *buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); + #if !TARGET_IPHONE_SIMULATOR #define PROC_SUPPRESS_SUCCESS (0) diff --git a/libsyscall/wrappers/libproc/proc_listpidspath.c b/libsyscall/wrappers/libproc/proc_listpidspath.c index 57494deb7..c263c7f2c 100644 --- a/libsyscall/wrappers/libproc/proc_listpidspath.c +++ b/libsyscall/wrappers/libproc/proc_listpidspath.c @@ -211,33 +211,55 @@ check_process_vnodes(fdOpenInfoRef info, int pid) static int check_process_text(fdOpenInfoRef info, int pid) { - uint64_t a = 0; int status; + int buf_used; + struct proc_regionwithpathinfo rwpi; - while (1) { // for all memory regions - int buf_used; - struct proc_regionwithpathinfo rwpi; + if (info->flags & PROC_LISTPIDSPATH_PATH_IS_VOLUME) { - // processing next address - buf_used = proc_pidinfo(pid, PROC_PIDREGIONPATHINFO, a, &rwpi, sizeof(rwpi)); + // ask for first memory region that matches mountpoint + buf_used = proc_pidinfo(pid, PROC_PIDREGIONPATHINFO3, info->match_stat.st_dev, &rwpi, sizeof(rwpi)); if (buf_used <= 0) { if ((errno == ESRCH) || (errno == EINVAL)) { // if no more text information is available for this process. - break; + return 0; } return -1; } else if (buf_used < sizeof(rwpi)) { // if we didn't get enough information return -1; } - + status = check_file(info, &rwpi.prp_vip.vip_vi.vi_stat); if (status != 0) { // if error or match return status; } - - a = rwpi.prp_prinfo.pri_address + rwpi.prp_prinfo.pri_size; + } else { + uint64_t a = 0; + + while (1) { // for all memory regions + // processing next address + buf_used = proc_pidinfo(pid, PROC_PIDREGIONPATHINFO2, a, &rwpi, sizeof(rwpi)); + if (buf_used <= 0) { + if ((errno == ESRCH) || (errno == EINVAL)) { + // if no more text information is available for this process. + break; + } + return -1; + } else if (buf_used < sizeof(rwpi)) { + // if we didn't get enough information + return -1; + } + + status = check_file(info, &rwpi.prp_vip.vip_vi.vi_stat); + if (status != 0) { + // if error or match + return status; + } + + a = rwpi.prp_prinfo.pri_address + rwpi.prp_prinfo.pri_size; + } } return 0; @@ -283,18 +305,18 @@ check_process_fds(fdOpenInfoRef info, int pid) } } - buf_used = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, info->fds, info->fds_size); + buf_used = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, info->fds, (int)info->fds_size); if (buf_used <= 0) { return -1; } if ((buf_used + sizeof(struct proc_fdinfo)) >= info->fds_size) { // if not enough room in the buffer for an extra fd - buf_used = info->fds_size + sizeof(struct proc_fdinfo); + buf_used = (int)(info->fds_size + sizeof(struct proc_fdinfo)); continue; } - info->fds_count = buf_used / sizeof(struct proc_fdinfo); + info->fds_count = (int)(buf_used / sizeof(struct proc_fdinfo)); break; } @@ -399,14 +421,14 @@ check_process_threads(fdOpenInfoRef info, int pid) } } - buf_used = proc_pidinfo(pid, PROC_PIDLISTTHREADS, 0, info->threads, info->thr_size); + buf_used = proc_pidinfo(pid, PROC_PIDLISTTHREADS, 0, info->threads, (int)info->thr_size); if (buf_used <= 0) { return -1; } if ((buf_used + sizeof(uint64_t)) >= info->thr_size) { // if not enough room in the buffer for an extra thread - buf_used = info->thr_size + sizeof(uint64_t); + buf_used = (int)(info->thr_size + sizeof(uint64_t)); continue; } @@ -443,10 +465,10 @@ check_process_threads(fdOpenInfoRef info, int pid) /* - * check_process - * check [process] current working and root directories - * check [process] text (memory) + * check_process_phase1 + * check [process] process-wide current working and root directories * check [process] open file descriptors + * check [process] per-thread current working and root directories * * in : pid * out : -1 if error @@ -454,7 +476,7 @@ check_process_threads(fdOpenInfoRef info, int pid) * 1 if match */ static int -check_process(fdOpenInfoRef info, int pid) +check_process_phase1(fdOpenInfoRef info, int pid) { int status; @@ -465,13 +487,6 @@ check_process(fdOpenInfoRef info, int pid) return status; } - // check process text (memory) - status = check_process_text(info, pid); - if (status != 0) { - // if error or match - return status; - } - // check open file descriptors status = check_process_fds(info, pid); if (status != 0) { @@ -489,6 +504,29 @@ check_process(fdOpenInfoRef info, int pid) return 0; } +/* + * check_process_phase2 + * check [process] text (memory) + * + * in : pid + * out : -1 if error + * 0 if no match + * 1 if match + */ +static int +check_process_phase2(fdOpenInfoRef info, int pid) +{ + int status; + + // check process text (memory) + status = check_process_text(info, pid); + if (status != 0) { + // if error or match + return status; + } + + return 0; +} /* * proc_listpidspath @@ -559,14 +597,14 @@ proc_listpidspath(uint32_t type, } } - buf_used = proc_listpids(type, typeinfo, info->pids, info->pids_size); + buf_used = proc_listpids(type, typeinfo, info->pids, (int)info->pids_size); if (buf_used <= 0) { goto done; } if ((buf_used + sizeof(int)) >= info->pids_size) { // if not enough room in the buffer for an extra pid - buf_used = info->pids_size + sizeof(int); + buf_used = (int)(info->pids_size + sizeof(int)); continue; } @@ -578,15 +616,46 @@ proc_listpidspath(uint32_t type, buf_used = 0; for (i = info->pids_count - 1; i >= 0; i--) { int pid; - int status; + int pstatus; + + pid = info->pids[i]; + if (pid == 0) { + continue; + } + + pstatus = check_process_phase1(info, pid); + if (pstatus != 1) { + // if not a match + continue; + } + + *buf_next++ = pid; + buf_used += sizeof(int); + + if (buf_used >= buffersize) { + // if we have filled the buffer + break; + } + } + + if (buf_used >= buffersize) { + // if we have filled the buffer + status = buf_used; + goto done; + } + + // do a more expensive search if we still have buffer space + for (i = info->pids_count - 1; i >= 0; i--) { + int pid; + int pstatus; pid = info->pids[i]; if (pid == 0) { continue; } - status = check_process(info, pid); - if (status != 1) { + pstatus = check_process_phase2(info, pid); + if (pstatus != 1) { // if not a match continue; } diff --git a/libsyscall/wrappers/mach_approximate_time.c b/libsyscall/wrappers/mach_approximate_time.c new file mode 100644 index 000000000..1e0a4e472 --- /dev/null +++ b/libsyscall/wrappers/mach_approximate_time.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include + +extern uint64_t mach_absolute_time(void); + +#if defined(__arm64__) || defined(__x86_64__) + +uint64_t mach_approximate_time(void) { + uint8_t supported = *((uint8_t *)_COMM_PAGE_APPROX_TIME_SUPPORTED); + if (supported) + { + return *((uint64_t *)_COMM_PAGE_APPROX_TIME); + } + return mach_absolute_time(); +} + +#endif diff --git a/libsyscall/wrappers/mach_approximate_time.s b/libsyscall/wrappers/mach_approximate_time.s new file mode 100644 index 000000000..7ef3336f7 --- /dev/null +++ b/libsyscall/wrappers/mach_approximate_time.s @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include + +#if defined(__i386__) + + .text + .align 4, 0x90 + .globl _mach_approximate_time +_mach_approximate_time: + pushl %ebp // push a frame + movl %esp, %ebp + cmpb $0, _COMM_PAGE_APPROX_TIME_SUPPORTED // check commpage + // "is supported" flag. + je _approx_time_not_supported // if zero, fall through to + // absolute_time + // note the comment above for 32-bit ARM applies equally to 32-bit i386. + .align 4, 0x90 +_approx_time_consistency_check: + movl _COMM_PAGE_APPROX_TIME+4, %edx // load high + movl _COMM_PAGE_APPROX_TIME, %eax // load low + lfence // avoid predictive reads that + // could be invalid if + // interrupted + cmpl _COMM_PAGE_APPROX_TIME+4, %edx // load high and check if equal + // to the first read + jne _approx_time_consistency_check // if not, try again. + popl %ebp + ret + +_approx_time_not_supported: + popl %ebp + jmp _mach_absolute_time + +#endif diff --git a/libsyscall/wrappers/posix_sem_obsolete.c b/libsyscall/wrappers/posix_sem_obsolete.c new file mode 100644 index 000000000..396a1c6ed --- /dev/null +++ b/libsyscall/wrappers/posix_sem_obsolete.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +/* + * system call stubs are no longer generated for these from + * syscalls.master. Instead, provide simple stubs here. + */ + +int sem_destroy(sem_t *s __unused) +{ + errno = ENOSYS; + return -1; +} + +int sem_getvalue(sem_t * __restrict __unused s, int * __restrict __unused x) +{ + errno = ENOSYS; + return -1; +} + +int sem_init(sem_t * __unused s, int __unused x, unsigned int __unused y) +{ + errno = ENOSYS; + return -1; +} diff --git a/libsyscall/wrappers/rename_ext.c b/libsyscall/wrappers/rename_ext.c new file mode 100644 index 000000000..8c7762139 --- /dev/null +++ b/libsyscall/wrappers/rename_ext.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + #include + +/* + * XXXXX REMOVE AND REPLACE the definition of VFS_RENAME_FLAGS_MASK below with + * appropriate header file if/when defined in a header file. + */ +#define VFS_RENAME_FLAGS_MASK 0x00000001 + +void __inc_remove_counter(void); +int __rename_ext(const char *old, const char *new, int flags); + +int +rename_ext(const char *old, const char *new, unsigned int flags) +{ + if (!(flags & VFS_RENAME_FLAGS_MASK)) { + errno = EINVAL; + return -1; + } + + int res = __rename_ext(old, new, flags & VFS_RENAME_FLAGS_MASK); + if (res == 0) __inc_remove_counter(); + return res; +} diff --git a/libsyscall/wrappers/renameat.c b/libsyscall/wrappers/renameat.c new file mode 100644 index 000000000..727760f67 --- /dev/null +++ b/libsyscall/wrappers/renameat.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +void __inc_remove_counter(void); +int __renameat(int oldfd, const char *old, int newfd, const char *new); + +int +renameat(int oldfd, const char *old, int newfd, const char *new) +{ + int res = __renameat(oldfd, old, newfd, new); + if (res == 0) __inc_remove_counter(); + return res; +} diff --git a/bsd/sys/_types/_pthread_attr_t.h b/libsyscall/wrappers/setpriority.c similarity index 50% rename from bsd/sys/_types/_pthread_attr_t.h rename to libsyscall/wrappers/setpriority.c index 0f69ae97c..0ec881022 100644 --- a/bsd/sys/_types/_pthread_attr_t.h +++ b/libsyscall/wrappers/setpriority.c @@ -1,20 +1,15 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2014 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,10 +17,29 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +extern int __setpriority(int which, id_t who, int prio); +extern void _pthread_clear_qos_tsd(mach_port_t); + +/* + * Stub function to account for special case return code from setpriority + * when called with PRIO_DARWIN_THREAD. */ -#ifndef _PTHREAD_ATTR_T -#define _PTHREAD_ATTR_T -typedef __darwin_pthread_attr_t pthread_attr_t; -#endif /* _PTHREAD_ATTR_T */ +int +setpriority(int which, id_t who, int prio) +{ + int rv = __setpriority(which, who, prio); + if (which == PRIO_DARWIN_THREAD && rv == -2) { + _pthread_clear_qos_tsd(MACH_PORT_NULL); + rv = 0; + } + + return rv; +} diff --git a/libsyscall/wrappers/sfi.c b/libsyscall/wrappers/sfi.c new file mode 100644 index 000000000..949f261b0 --- /dev/null +++ b/libsyscall/wrappers/sfi.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include + +int system_set_sfi_window(uint64_t sfi_window_usec) +{ + return __sfi_ctl(SFI_CTL_OPERATION_SFI_SET_WINDOW, 0, sfi_window_usec, NULL); +} + +int system_get_sfi_window(uint64_t *sfi_window_usec) +{ + return __sfi_ctl(SFI_CTL_OPERATION_SFI_GET_WINDOW, 0, 0, sfi_window_usec); +} + +int sfi_set_class_offtime(sfi_class_id_t class_id, uint64_t offtime_usec) +{ + return __sfi_ctl(SFI_CTL_OPERATION_SET_CLASS_OFFTIME, class_id, offtime_usec, NULL); +} + +int sfi_get_class_offtime(sfi_class_id_t class_id, uint64_t *offtime_usec) +{ + return __sfi_ctl(SFI_CTL_OPERATION_GET_CLASS_OFFTIME, class_id, 0, offtime_usec); +} + +int sfi_process_set_flags(pid_t pid, uint32_t flags) +{ + return __sfi_pidctl(SFI_PIDCTL_OPERATION_PID_SET_FLAGS, pid, flags, NULL); +} + +int sfi_process_get_flags(pid_t pid, uint32_t *flags) +{ + return __sfi_pidctl(SFI_PIDCTL_OPERATION_PID_GET_FLAGS, pid, 0, flags); +} diff --git a/libsyscall/wrappers/spawn/posix_spawn.c b/libsyscall/wrappers/spawn/posix_spawn.c index 3d8369be0..d3bec4ede 100644 --- a/libsyscall/wrappers/spawn/posix_spawn.c +++ b/libsyscall/wrappers/spawn/posix_spawn.c @@ -39,9 +39,6 @@ #include #include -#if TARGET_OS_EMBEDDED -#include -#endif /* * posix_spawnattr_init @@ -117,12 +114,8 @@ posix_spawnattr_init(posix_spawnattr_t *attr) (*psattrp)->flags_padding = 0; (*psattrp)->int_padding = 0; - - /* - * The default value of this attribute shall be an no - * process control on resource starvation - */ - (*psattrp)->psa_apptype = 0; + /* Default is no new apptype requested */ + (*psattrp)->psa_apptype = POSIX_SPAWN_PROCESS_TYPE_DEFAULT; /* Jetsam related */ (*psattrp)->psa_jetsam_flags = 0; @@ -135,6 +128,12 @@ posix_spawnattr_init(posix_spawnattr_t *attr) /* Default is no MAC policy extensions. */ (*psattrp)->psa_mac_extensions = NULL; + + /* Default is to inherit parent's coalition */ + (*psattrp)->psa_coalitionid = 0; + + /* Default is no new clamp */ + (*psattrp)->psa_qos_clamp = POSIX_SPAWN_PROC_CLAMP_NONE; } return (err); @@ -1266,40 +1265,6 @@ posix_spawnattr_getcpumonitor(posix_spawnattr_t * __restrict attr, return (0); } -#if TARGET_OS_EMBEDDED -/* - * posix_spawnattr_setjetsam - * - * Description: Set jetsam attributes for the spawn attribute object - * referred to by 'attr'. - * - * Parameters: flags The flags value to set - * priority Relative jetsam priority - * high_water_mark Value in pages; resident page - * counts above this level can - * result in termination - * - * Returns: 0 Success - */ -int -posix_spawnattr_setjetsam(posix_spawnattr_t * __restrict attr, - short flags, int priority, int high_water_mark) -{ - _posix_spawnattr_t psattr; - - if (attr == NULL || *attr == NULL) - return EINVAL; - - psattr = *(_posix_spawnattr_t *)attr; - - psattr->psa_jetsam_flags = flags; - psattr->psa_jetsam_flags |= POSIX_SPAWN_JETSAM_SET; - psattr->psa_priority = priority; - psattr->psa_high_water_mark = high_water_mark; - - return (0); -} -#endif /* @@ -1379,8 +1344,9 @@ posix_spawnattr_getmacpolicyinfo_np(const posix_spawnattr_t * __restrict attr, if (extension == NULL) return ESRCH; *datap = (void *)(uintptr_t)extension->data; - if (datalenp != NULL) - *datalenp = extension->datalen; + if (datalenp != NULL) { + *datalenp = (size_t)extension->datalen; + } return 0; } @@ -1424,6 +1390,54 @@ posix_spawnattr_setmacpolicyinfo_np(posix_spawnattr_t * __restrict attr, return 0; } +int posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict attr, uint64_t coalitionid) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) { + return EINVAL; + } + + psattr = *(_posix_spawnattr_t *)attr; + psattr->psa_coalitionid = coalitionid; + + return 0; +} + + +int posix_spawnattr_set_qos_clamp_np(const posix_spawnattr_t * __restrict attr, uint64_t qos_clamp) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) { + return EINVAL; + } + + if (qos_clamp >= POSIX_SPAWN_PROC_CLAMP_LAST) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + psattr->psa_qos_clamp = qos_clamp; + + return 0; +} + +int +posix_spawnattr_get_qos_clamp_np(const posix_spawnattr_t * __restrict attr, uint64_t * __restrict qos_clampp) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) { + return EINVAL; + } + + psattr = *(_posix_spawnattr_t *)attr; + *qos_clampp = psattr->psa_qos_clamp; + + return (0); +} + + /* * posix_spawn * diff --git a/libsyscall/wrappers/spawn/spawn_private.h b/libsyscall/wrappers/spawn/spawn_private.h index 4dd89b859..10a1b544c 100644 --- a/libsyscall/wrappers/spawn/spawn_private.h +++ b/libsyscall/wrappers/spawn/spawn_private.h @@ -39,10 +39,6 @@ int posix_spawnattr_setcpumonitor(posix_spawnattr_t * __restrict, uint64_t, uint int posix_spawnattr_getcpumonitor(posix_spawnattr_t * __restrict, uint64_t *, uint64_t *) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); int posix_spawnattr_setcpumonitor_default(posix_spawnattr_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_6_0); -#if TARGET_OS_EMBEDDED -int posix_spawnattr_setjetsam(posix_spawnattr_t * __restrict attr, - short flags, int priority, int high_water_mark) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); -#endif #define POSIX_SPAWN_IMPORTANCE_PORT_COUNT 32 int posix_spawnattr_set_importancewatch_port_np(posix_spawnattr_t * __restrict attr, @@ -52,4 +48,9 @@ int posix_spawnattr_set_importancewatch_port_np(posix_spawnattr_t * __restrict a int posix_spawnattr_getmacpolicyinfo_np(const posix_spawnattr_t * __restrict, const char *, void **, size_t *) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); int posix_spawnattr_setmacpolicyinfo_np(posix_spawnattr_t * __restrict, const char *, void *, size_t) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); +int posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); + +int posix_spawnattr_set_qos_clamp_np(const posix_spawnattr_t * __restrict, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); +int posix_spawnattr_get_qos_clamp_np(const posix_spawnattr_t * __restrict, uint64_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0); + #endif /* !defined _SPAWN_PRIVATE_H_*/ diff --git a/libsyscall/wrappers/unlinkat.c b/libsyscall/wrappers/unlinkat.c new file mode 100644 index 000000000..265235199 --- /dev/null +++ b/libsyscall/wrappers/unlinkat.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +void __inc_remove_counter(void); +int __unlinkat(int fd, const char *path, int flag); + +int +unlinkat(int fd, const char *path, int flag) +{ + int res = __unlinkat(fd, path, flag); + if (res == 0) __inc_remove_counter(); + return res; +} diff --git a/libsyscall/wrappers/varargs_wrappers.s b/libsyscall/wrappers/varargs_wrappers.s new file mode 100644 index 000000000..bc6d6c3a4 --- /dev/null +++ b/libsyscall/wrappers/varargs_wrappers.s @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + diff --git a/libsyscall/xcodescripts/compat-symlinks.sh b/libsyscall/xcodescripts/compat-symlinks.sh deleted file mode 100755 index 78b504777..000000000 --- a/libsyscall/xcodescripts/compat-symlinks.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2010 Apple Inc. All rights reserved. -# -# @APPLE_OSREFERENCE_LICENSE_HEADER_START@ -# -# This file contains Original Code and/or Modifications of Original Code -# as defined in and that are subject to the Apple Public Source License -# Version 2.0 (the 'License'). You may not use this file except in -# compliance with the License. The rights granted to you under the License -# may not be used to create, or enable the creation or redistribution of, -# unlawful or unlicensed copies of an Apple operating system, or to -# circumvent, violate, or enable the circumvention or violation of, any -# terms of an Apple operating system software license agreement. -# -# Please obtain a copy of the License at -# http://www.opensource.apple.com/apsl/ and read it before using this file. -# -# The Original Code and all software distributed under the License are -# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER -# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, -# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. -# Please see the License for the specific language governing rights and -# limitations under the License. -# -# @APPLE_OSREFERENCE_LICENSE_HEADER_END@ -# - -if [ "x$ACTION" != "xinstallhdrs" ]; then - ln -sf libsystem_kernel.a "$DSTROOT/usr/local/lib/dyld/libsystem_mach.a" -fi diff --git a/libsyscall/xcodescripts/create-syscalls.pl b/libsyscall/xcodescripts/create-syscalls.pl index ca2700866..eace5b955 100755 --- a/libsyscall/xcodescripts/create-syscalls.pl +++ b/libsyscall/xcodescripts/create-syscalls.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl # -# Copyright (c) 2006-2012 Apple Inc. All rights reserved. +# Copyright (c) 2006-2014 Apple Inc. All rights reserved. # # @APPLE_OSREFERENCE_LICENSE_HEADER_START@ # @@ -135,17 +135,18 @@ my @Cancelable = qw/ accept access aio_suspend close connect connectx disconnectx - fcntl fdatasync fpathconf fstat fsync + faccessat fcntl fdatasync fpathconf fstat fstatat fsync getlogin ioctl - link lseek lstat + link linkat lseek lstat msgrcv msgsnd msync - open + open openat pathconf peeloff poll posix_spawn pread pwrite - read readv recvfrom recvmsg rename + read readv recvfrom recvmsg rename renameat + rename_ext __semwait_signal __sigwait - select sem_wait semop sendmsg sendto sigsuspend stat symlink sync - unlink + select sem_wait semop sendmsg sendto sigsuspend stat symlink symlinkat sync + unlink unlinkat wait4 waitid write writev /; @@ -240,6 +241,7 @@ sub checkForCustomStubs { if (!$$sym{is_private}) { foreach my $subarch (@Architectures) { (my $arch = $subarch) =~ s/arm(v.*)/arm/; + $arch =~ s/x86_64(.*)/x86_64/; $$sym{aliases}{$arch} = [] unless $$sym{aliases}{$arch}; push(@{$$sym{aliases}{$arch}}, $$sym{asm_sym}); } @@ -261,6 +263,7 @@ sub readAliases { my @a = (); for my $arch (@Architectures) { (my $new_arch = $arch) =~ s/arm(v.*)/arm/g; + $new_arch =~ s/x86_64(.*)/x86_64/g; push(@a, $new_arch) unless grep { $_ eq $new_arch } @a; } @@ -318,6 +321,7 @@ sub writeStubForSymbol { my @conditions; for my $subarch (@Architectures) { (my $arch = $subarch) =~ s/arm(v.*)/arm/; + $arch =~ s/x86_64(.*)/x86_64/; push(@conditions, "defined(__${arch}__)") unless grep { $_ eq $arch } @{$$symbol{except}}; } @@ -349,6 +353,7 @@ sub writeAliasesForSymbol { foreach my $subarch (@Architectures) { (my $arch = $subarch) =~ s/arm(v.*)/arm/; + $arch =~ s/x86_64(.*)/x86_64/; next unless scalar($$symbol{aliases}{$arch}); diff --git a/libsyscall/xcodescripts/mach_install_mig.sh b/libsyscall/xcodescripts/mach_install_mig.sh index ac66a4f2d..1ca67723d 100755 --- a/libsyscall/xcodescripts/mach_install_mig.sh +++ b/libsyscall/xcodescripts/mach_install_mig.sh @@ -30,29 +30,29 @@ # build inside OBJROOT cd $OBJROOT -# check if we're building for the simulator -if [ "$PLATFORM_NAME" = "iphonesimulator" ] ; then - DSTROOT="${DSTROOT}${SDKROOT}" -fi - MIG=`xcrun -sdk "$SDKROOT" -find mig` MIGCC=`xcrun -sdk "$SDKROOT" -find cc` export MIGCC MIG_DEFINES="-DLIBSYSCALL_INTERFACE" -MIG_HEADER_DST="$DSTROOT/usr/include/mach" -MIG_PRIVATE_HEADER_DST="$DSTROOT/usr/local/include/mach" -SERVER_HEADER_DST="$DSTROOT/usr/include/servers" -MACH_HEADER_DST="$DSTROOT/usr/include/mach" +MIG_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/include/mach" +MIG_PRIVATE_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/local/include/mach" +SERVER_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/include/servers" +MACH_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/include/mach" # from old Libsystem makefiles MACHINE_ARCH=`echo $ARCHS | cut -d' ' -f 1` -if [[ ( "$MACHINE_ARCH" = "x86_64" ) && `echo $ARCHS | wc -w` -gt 1 ]] +if [[ ( "$MACHINE_ARCH" = "arm64" || "$MACHINE_ARCH" = "x86_64" || "$MACHINE_ARCH" = "x86_64h" ) && `echo $ARCHS | wc -w` -gt 1 ]] then # MACHINE_ARCH needs to be a 32-bit arch to generate vm_map_internal.h correctly. MACHINE_ARCH=`echo $ARCHS | cut -d' ' -f 2` + if [[ ( "$MACHINE_ARCH" = "arm64" || "$MACHINE_ARCH" = "x86_64" || "$MACHINE_ARCH" = "x86_64h" ) && `echo $ARCHS | wc -w` -gt 1 ]] + then + # MACHINE_ARCH needs to be a 32-bit arch to generate vm_map_internal.h correctly. + MACHINE_ARCH=`echo $ARCHS | cut -d' ' -f 3` + fi fi SRC="$SRCROOT/mach" -MIG_INTERNAL_HEADER_DST="$DERIVED_SOURCES_DIR/mach" +MIG_INTERNAL_HEADER_DST="$BUILT_PRODUCTS_DIR/internal_hdr/include/mach" MIG_PRIVATE_DEFS_INCFLAGS="-I${SDKROOT}/System/Library/Frameworks/System.framework/PrivateHeaders" MIGS="clock.defs @@ -64,6 +64,7 @@ MIGS="clock.defs lock_set.defs mach_host.defs mach_port.defs + mach_voucher.defs processor.defs processor_set.defs task.defs @@ -74,7 +75,7 @@ MIGS_PRIVATE="" MIGS_DUAL_PUBLIC_PRIVATE="" -if [[ "$PLATFORM_NAME" = "iphoneos" || "$PLATFORM_NAME" = "iphonesimulator" ]] +if [[ "$PLATFORM_NAME" = "iphoneos" || "$PLATFORM_NAME" = "iphonesimulator" || "$PLATFORM_NAME" = "iphoneosnano" || "$PLATFORM_NAME" = "iphonenanosimulator" ]] then MIGS_PRIVATE="mach_vm.defs" else @@ -83,6 +84,7 @@ fi MIGS_INTERNAL="mach_port.defs mach_vm.defs + thread_act.defs vm_map.defs" SERVER_HDRS="key_defs.h @@ -96,7 +98,8 @@ MACH_HDRS="mach.h mach_interface.h port_obj.h sync.h - vm_task.h" + vm_task.h + vm_page_size.h" # install /usr/include/server headers mkdir -p $SERVER_HEADER_DST @@ -111,7 +114,7 @@ for hdr in $MACH_HDRS; do done # special case because we only have one to do here -$MIG -arch $MACHINE_ARCH -header "$SERVER_HEADER_DST/netname.h" $SRC/servers/netname.defs +$MIG -novouchers -arch $MACHINE_ARCH -header "$SERVER_HEADER_DST/netname.h" $SRC/servers/netname.defs # install /usr/include/mach mig headers @@ -119,14 +122,14 @@ mkdir -p $MIG_HEADER_DST for mig in $MIGS $MIGS_DUAL_PUBLIC_PRIVATE; do MIG_NAME=`basename $mig .defs` - $MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $SRC/$mig + $MIG -novouchers -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $SRC/$mig done mkdir -p $MIG_PRIVATE_HEADER_DST for mig in $MIGS_PRIVATE $MIGS_DUAL_PUBLIC_PRIVATE; do MIG_NAME=`basename $mig .defs` - $MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_PRIVATE_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $MIG_PRIVATE_DEFS_INCFLAGS $SRC/$mig + $MIG -novouchers -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_PRIVATE_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $MIG_PRIVATE_DEFS_INCFLAGS $SRC/$mig if [ ! -e "$MIG_HEADER_DST/$MIG_NAME.h" ]; then echo "#error $MIG_NAME.h unsupported." > "$MIG_HEADER_DST/$MIG_NAME.h" fi @@ -141,6 +144,6 @@ mkdir -p $MIG_INTERNAL_HEADER_DST for mig in $MIGS_INTERNAL; do MIG_NAME=`basename $mig .defs` - $MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_INTERNAL_HEADER_DST/${MIG_NAME}_internal.h" $SRC/$mig + $MIG -novouchers -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_INTERNAL_HEADER_DST/${MIG_NAME}_internal.h" $SRC/$mig done diff --git a/makedefs/MakeInc.cmd b/makedefs/MakeInc.cmd index e69b52e0c..af01dedae 100644 --- a/makedefs/MakeInc.cmd +++ b/makedefs/MakeInc.cmd @@ -26,13 +26,15 @@ _v = @ _vstdout = > /dev/null endif +VERBOSE_GENERATED_MAKE_FRAGMENTS = NO + ifeq ($(VERBOSE),YES) XCRUN = /usr/bin/xcrun -verbose else XCRUN = /usr/bin/xcrun endif -SDKROOT ?= / +SDKROOT ?= macosx.internal HOST_SDKROOT ?= macosx HOST_SPARSE_SDKROOT ?= / @@ -64,7 +66,7 @@ ifeq ($(SDKVERSION),) export SDKVERSION := $(shell $(XCRUN) -sdk $(SDKROOT) -show-sdk-version) endif -ifeq ($(PLATFORM),iPhoneOS) +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) ifeq ($(HOST_SPARSE_SDKROOT),/) export HOST_SPARSE_SDKROOT := $(shell $(XCRUN) -sdk iphonehost.internal -show-sdk-path) endif @@ -119,7 +121,7 @@ ifeq ($(NMEDIT),) endif # Platform-specific tools -ifeq (iPhoneOS,$(PLATFORM)) +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) ifeq ($(EMBEDDED_DEVICE_MAP),) export EMBEDDED_DEVICE_MAP := $(shell $(XCRUN) -sdk $(SDKROOT) -find embedded_device_map) endif @@ -127,11 +129,21 @@ EDM_DBPATH = $(PLATFORMPATH)/usr/local/standalone/firmware/device_map.db endif # Scripts or tools we build ourselves +# +# setsegname - Rename segments in a Mach-O object file +# kextsymboltool - Create kext pseudo-kext Mach-O kexts binaries +# decomment - Strip out comments to detect whether a file is comments-only +# installfile - Atomically copy files, esp. when multiple architectures +# are trying to install the same target header +# replacecontents - Write contents to a file and update modtime *only* if +# contents differ +# SEG_HACK = $(OBJROOT)/SETUP/setsegname/setsegname KEXT_CREATE_SYMBOL_SET = $(OBJROOT)/SETUP/kextsymboltool/kextsymboltool DECOMMENT = $(OBJROOT)/SETUP/decomment/decomment NEWVERS = $(SRCROOT)/config/newvers.pl INSTALL = $(OBJROOT)/SETUP/installfile/installfile +REPLACECONTENTS = $(OBJROOT)/SETUP/replacecontents/replacecontents # Standard BSD tools RM = /bin/rm -f @@ -141,13 +153,15 @@ MV = /bin/mv LN = /bin/ln -fs CAT = /bin/cat MKDIR = /bin/mkdir -p +CHMOD = /bin/chmod FIND = /usr/bin/find XARGS = /usr/bin/xargs -TAR = /usr/bin/gnutar +PAX = /bin/pax BASENAME = /usr/bin/basename DIRNAME = /usr/bin/dirname TR = /usr/bin/tr TOUCH = /usr/bin/touch +SLEEP = /bin/sleep AWK = /usr/bin/awk SED = /usr/bin/sed ECHO = /bin/echo @@ -263,7 +277,15 @@ space := $(empty) $(empty) # Arithmetic # $(1) is the number to increment -NUM16 = x x x x x x x x x x x x x x x x -increment = $(words x $(wordlist 1,$(1),$(NUM16))) +NUM32 = x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x +increment = $(words x $(wordlist 1,$(1),$(NUM32))) +decrement = $(words $(wordlist 2,$(1),$(NUM32))) + +# Create a sequence from 1 to $(1) +# F(N) = if N > 0: return F(N-1) + "N" else: return "" +sequence = $(if $(wordlist 1,$(1),$(NUM32)),$(call sequence,$(call decrement,$(1))) $(1),) + +# Reverse a list of words in $(1) +reverse = $(if $(word 2,$(1)),$(call reverse,$(wordlist 2,$(words $(1)),$(1)))) $(word 1,$(1)) # vim: set ft=make: diff --git a/makedefs/MakeInc.def b/makedefs/MakeInc.def index 910cc0e46..6010d8dbb 100644 --- a/makedefs/MakeInc.def +++ b/makedefs/MakeInc.def @@ -1,6 +1,6 @@ # -*- mode: makefile;-*- # -# Copyright (C) 1999-2012 Apple Inc. All rights reserved. +# Copyright (C) 1999-2013 Apple Inc. All rights reserved. # # MakeInc.def contains global definitions for building, # linking, and installing files. @@ -9,7 +9,7 @@ # # Architecture Configuration options # -SUPPORTED_ARCH_CONFIGS := X86_64 +SUPPORTED_ARCH_CONFIGS := X86_64 X86_64H # # Kernel Configuration options @@ -21,12 +21,13 @@ SUPPORTED_KERNEL_CONFIGS = RELEASE DEVELOPMENT DEBUG PROFILE # SUPPORTED_X86_64_MACHINE_CONFIGS = NONE +SUPPORTED_X86_64H_MACHINE_CONFIGS = NONE # # Platform options # -SUPPORTED_PLATFORMS = MacOSX iPhoneOS iPhoneSimulator +SUPPORTED_PLATFORMS = MacOSX iPhoneOS iPhoneSimulator iPhoneOSNano iPhoneNanoSimulator # # Setup up *_LC variables during recursive invocations @@ -58,9 +59,9 @@ COMPONENT_IMPORT_LIST = $(filter-out $(COMPONENT),$(COMPONENT_LIST)) # ifeq ($(PLATFORM),MacOSX) DEPLOYMENT_TARGET_FLAGS = -mmacosx-version-min=$(SDKVERSION) -else ifeq ($(PLATFORM),iPhoneOS) +else ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) DEPLOYMENT_TARGET_FLAGS = -miphoneos-version-min=$(SDKVERSION) -else ifeq ($(PLATFORM),iPhoneSimulator) +else ifneq ($(filter iPhoneSimulator iPhoneNanoSimulator,$(PLATFORM)),) DEPLOYMENT_TARGET_FLAGS = else DEPLOYMENT_TARGET_FLAGS = @@ -71,7 +72,7 @@ endif # Standard defines list # DEFINES = -DAPPLE -DKERNEL -DKERNEL_PRIVATE -DXNU_KERNEL_PRIVATE \ - -DPRIVATE -D__MACHO__=1 -Dvolatile=__volatile $(IDENT) \ + -DPRIVATE -D__MACHO__=1 -Dvolatile=__volatile $(CONFIG_DEFINES) \ $(SEED_DEFINES) # @@ -88,7 +89,8 @@ CWARNFLAGS_STD = \ -Wall -Werror -Wno-format-y2k -Wextra -Wstrict-prototypes \ -Wmissing-prototypes -Wpointer-arith -Wreturn-type -Wcast-qual \ -Wwrite-strings -Wswitch -Wshadow -Wcast-align -Wchar-subscripts \ - -Winline -Wnested-externs -Wredundant-decls -Wextra-tokens + -Winline -Wnested-externs -Wredundant-decls -Wextra-tokens \ + -Wunreachable-code # Can be overridden in Makefile.template or Makefile.$arch export CWARNFLAGS ?= $(CWARNFLAGS_STD) @@ -100,7 +102,8 @@ endef CXXWARNFLAGS_STD = \ -Wall -Werror -Wno-format-y2k -Wextra -Wpointer-arith -Wreturn-type \ -Wcast-qual -Wwrite-strings -Wswitch -Wcast-align -Wchar-subscripts \ - -Wredundant-decls -Wextra-tokens + -Wredundant-decls -Wextra-tokens \ + -Wunreachable-code # overloaded-virtual warnings are non-fatal (9000888) CXXWARNFLAGS_STD += -Wno-error=overloaded-virtual @@ -116,6 +119,7 @@ endef # Default ARCH_FLAGS, for use with compiler/linker/assembler/mig drivers ARCH_FLAGS_X86_64 = -arch x86_64 +ARCH_FLAGS_X86_64H = -arch x86_64h # @@ -128,7 +132,6 @@ endif # # Debug info # -DSYMKERNELSYSDIR = mach_kernel.sys.dSYM DSYMINFODIR = Contents DSYMKGMACROSDIR = Contents/Resources DSYMLLDBMACROSDIR = Contents/Resources/Python @@ -151,8 +154,9 @@ CFLAGS_DEBUG = CFLAGS_PROFILE = -pg CFLAGS_X86_64 = -Dx86_64 -DX86_64 -D__X86_64__ -DLP64 \ - -DPAGE_SIZE_FIXED -mkernel -msoft-float \ - -fno-limit-debug-info # Workaround for 11076603 + -DPAGE_SIZE_FIXED -mkernel -msoft-float + +CFLAGS_X86_64H = $(CFLAGS_X86_64) CFLAGS_RELEASEX86_64 = -O2 @@ -161,11 +165,18 @@ CFLAGS_DEVELOPMENTX86_64 = -O2 CFLAGS_DEBUGX86_64 = -O0 CFLAGS_PROFILEX86_64 = -O2 +CFLAGS_RELEASEX86_64H = -O2 +CFLAGS_DEVELOPMENTX86_64H = -O2 +# No space optimization for the DEBUG kernel for the benefit of gdb: +CFLAGS_DEBUGX86_64H = -O0 +CFLAGS_PROFILEX86_64H = -O2 + CFLAGS_RELEASEARM = -O2 CFLAGS_DEVELOPMENTARM = -O2 CFLAGS_DEBUGARM = -O0 CFLAGS_PROFILEARM = -O2 + CFLAGS = $(CFLAGS_GEN) \ $($(addsuffix $(CURRENT_MACHINE_CONFIG),MACHINE_FLAGS_$(CURRENT_ARCH_CONFIG)_)) \ $($(addsuffix $(CURRENT_ARCH_CONFIG),ARCH_FLAGS_)) \ @@ -186,6 +197,7 @@ CXXFLAGS_GEN = -fapple-kext $(OTHER_CXXFLAGS) # For the moment, do not use gnu++11 #CXXFLAGS_ARM = -std=gnu++11 + CXXFLAGS = $(CXXFLAGS_GEN) \ $($(addsuffix $(CURRENT_ARCH_CONFIG),CXXFLAGS_)) \ $($(addsuffix $(CURRENT_KERNEL_CONFIG),CXXFLAGS_)) @@ -200,7 +212,7 @@ S_KCC = $(CC) # # Default SFLAGS # -SFLAGS_GEN = -D__ASSEMBLER__ $(OTHER_CFLAGS) +SFLAGS_GEN = -D__ASSEMBLER__ -DASSEMBLER $(OTHER_CFLAGS) SFLAGS_RELEASE = SFLAGS_DEVELOPMENT = @@ -208,6 +220,7 @@ SFLAGS_DEBUG = SFLAGS_PROFILE = SFLAGS_X86_64 = $(CFLAGS_X86_64) +SFLAGS_X86_64H = $(CFLAGS_X86_64H) SFLAGS = $(SFLAGS_GEN) \ $($(addsuffix $(CURRENT_MACHINE_CONFIG),MACHINE_FLAGS_$(CURRENT_ARCH_CONFIG)_)) \ @@ -284,11 +297,17 @@ LDFLAGS_KERNEL_RELEASEX86_64 = \ # Define KERNEL_BASE_OFFSET so known at compile time: CFLAGS_X86_64 += -DKERNEL_BASE_OFFSET=$(KERNEL_BASE_OFFSET) +CFLAGS_X86_64H += -DKERNEL_BASE_OFFSET=$(KERNEL_BASE_OFFSET) LDFLAGS_KERNEL_DEBUGX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) LDFLAGS_KERNEL_DEVELOPMENTX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) LDFLAGS_KERNEL_PROFILEX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) +LDFLAGS_KERNEL_RELEASEX86_64H = $(LDFLAGS_KERNEL_RELEASEX86_64) +LDFLAGS_KERNEL_DEBUGX86_64H = $(LDFLAGS_KERNEL_RELEASEX86_64H) +LDFLAGS_KERNEL_DEVELOPMENTX86_64H = $(LDFLAGS_KERNEL_RELEASEX86_64H) +LDFLAGS_KERNEL_PROFILEX86_64H = $(LDFLAGS_KERNEL_RELEASEX86_64H) + LDFLAGS_KERNEL = $(LDFLAGS_KERNEL_GEN) \ $($(addsuffix $(CURRENT_ARCH_CONFIG),ARCH_FLAGS_)) \ @@ -334,12 +353,15 @@ INCFLAGS = $(INCFLAGS_LOCAL) $(INCFLAGS_GEN) $(INCFLAGS_IMPORT) $(INCFLAGS_EXTE # # Default MIGFLAGS # -MIGFLAGS = $(DEFINES) $(INCFLAGS) $($(addsuffix $(CURRENT_ARCH_CONFIG),CFLAGS_)) $($(addsuffix $(CURRENT_ARCH_CONFIG),ARCH_FLAGS_)) \ +MIGFLAGS = $(DEFINES) $(INCFLAGS) -novouchers $($(addsuffix $(CURRENT_ARCH_CONFIG),CFLAGS_)) $($(addsuffix $(CURRENT_ARCH_CONFIG),ARCH_FLAGS_)) \ $(DEPLOYMENT_TARGET_FLAGS) # # Support for LLVM Link Time Optimization (LTO) # +# LTO can be explicitly enabled or disabled with BUILD_LTO=1|0 +# and defaults to enabled except for DEBUG kernels +# # CFLAGS_NOLTO_FLAG is needed on a per-file basis (for files # that deal poorly with LTO, or files that must be machine # code *.o files for xnu to build (i.e, setsegname runs on @@ -349,10 +371,20 @@ MIGFLAGS = $(DEFINES) $(INCFLAGS) $($(addsuffix $(CURRENT_ARCH_CONFIG),CFLAGS_)) # do not utilize an export list. For these configs to build, # we need to prevent the LTO logic from dead stripping them. -ifeq ($(BUILD_LTO),1) +LTO_ENABLED_RELEASE = 1 +LTO_ENABLED_DEVELOPMENT = 1 +LTO_ENABLED_DEBUG = 0 + +ifneq ($(BUILD_LTO),) +USE_LTO = $(BUILD_LTO) +else +USE_LTO = $(LTO_ENABLED_$(CURRENT_KERNEL_CONFIG)) +endif + +ifeq ($(USE_LTO),1) CFLAGS_GEN += -flto CXXFLAGS_GEN += -flto -LDFLAGS_KERNEL_GEN += -Wl,-mllvm,-disable-simplify-libcalls -Wl,-object_path_lto,$(TARGET)/lto.o # -Wl,-mllvm -Wl,-disable-fp-elim +LDFLAGS_KERNEL_GEN += -Wl,-mllvm,-inline-threshold=125 -Wl,-object_path_lto,$(TARGET)/lto.o # -Wl,-mllvm -Wl,-disable-fp-elim LDFLAGS_NOSTRIP_FLAG = -rdynamic CFLAGS_NOLTO_FLAG = -fno-lto SUPPORTS_CTFCONVERT = 0 @@ -382,7 +414,8 @@ EXEC_INSTALL_FLAGS = -c -S -m 0755 # Header file destinations # ifeq ($(RC_ProjectName),xnu_headers_Sim) - HEADER_INSTALL_PREFIX = $(SDKROOT) + include $(MAKEFILEPATH)/../AppleInternal/Makefiles/Makefile.indigo_prefix + HEADER_INSTALL_PREFIX = $(INDIGO_PREFIX) else HEADER_INSTALL_PREFIX = endif @@ -418,7 +451,6 @@ SINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) $(SEED_DEFINES) KPINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) $(SEED_DEFINES) -DKERNEL_PRIVATE -DPRIVATE -DKERNEL -U_OPEN_SOURCE_ -U__OPEN_SOURCE__ KINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) $(SEED_DEFINES) -UKERNEL_PRIVATE -UPRIVATE -DKERNEL -D_OPEN_SOURCE_ -D__OPEN_SOURCE__ - # # Compononent Header file destinations # @@ -428,7 +460,7 @@ EXPDIR = EXPORT_HDRS/$(COMPONENT) # Strip Flags # STRIP_FLAGS_RELEASE = -S -x -STRIP_FLAGS_DEVELOPMENT = -S -x +STRIP_FLAGS_DEVELOPMENT = -S STRIP_FLAGS_DEBUG = -S STRIP_FLAGS_PROFILE = -S -x @@ -437,9 +469,13 @@ STRIP_FLAGS = $($(addsuffix $(CURRENT_KERNEL_CONFIG),STRIP_FLAGS_)) # # dsymutil flags # -DSYMUTIL_FLAGS_X86_64 = --arch=x86_64 +DSYMUTIL_FLAGS_GEN = --minimize -DSYMUTIL_FLAGS = $($(addsuffix $(CURRENT_ARCH_CONFIG),DSYMUTIL_FLAGS_)) +DSYMUTIL_FLAGS_X86_64 = --arch=x86_64 +DSYMUTIL_FLAGS_X86_64H = --arch=x86_64h + +DSYMUTIL_FLAGS = $(DSYMUTIL_FLAGS_GEN) \ + $($(addsuffix $(CURRENT_ARCH_CONFIG),DSYMUTIL_FLAGS_)) # # Man Page destination @@ -456,13 +492,41 @@ DEVELOPER_EXTRAS_DIR = /AppleInternal/CoreOS/xnu_debug # INSTALL_KERNEL_DIR = / +# +# new OS X install location +# +SYSTEM_LIBRARY_KERNELS_DIR = /System/Library/Kernels + +# +# File names in DSTROOT +# + +ifeq ($(PLATFORM),MacOSX) +KERNEL_FILE_NAME_PREFIX = kernel +else +KERNEL_FILE_NAME_PREFIX = mach +endif + +ifeq ($(CURRENT_MACHINE_CONFIG),NONE) +ifeq ($(CURRENT_KERNEL_CONFIG),RELEASE) +KERNEL_FILE_NAME = $(KERNEL_FILE_NAME_PREFIX) +KERNEL_LLDBBOOTSTRAP_NAME = $(KERNEL_FILE_NAME_PREFIX).py +else +KERNEL_FILE_NAME = $(KERNEL_FILE_NAME_PREFIX).$(CURRENT_KERNEL_CONFIG_LC) +KERNEL_LLDBBOOTSTRAP_NAME = $(KERNEL_FILE_NAME_PREFIX).py +endif +else +KERNEL_FILE_NAME = $(KERNEL_FILE_NAME_PREFIX).$(CURRENT_KERNEL_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) +KERNEL_LLDBBOOTSTRAP_NAME = $(KERNEL_FILE_NAME_PREFIX)_$(CURRENT_KERNEL_CONFIG_LC).py +endif + # # System.kext pseudo-kext install location # INSTALL_EXTENSIONS_DIR = /System/Library/Extensions # -# KDK location for iOS +# KDK location # INSTALL_KERNEL_SYM_DIR = /System/Library/Extensions/KDK @@ -477,16 +541,18 @@ INSTALL_DTRACE_SCRIPTS_DIR = /usr/lib/dtrace # ifeq ($(RC_ProjectName),xnu_debug) INSTALL_KERNEL_DIR := $(DEVELOPER_EXTRAS_DIR) -DELETE_KERNEL_FRAMEWORK_AND_MISC = 1 +INSTALL_KERNEL_SYM_DIR := $(DEVELOPER_EXTRAS_DIR) +INSTALL_KERNEL_SYM_TO_KDK = 1 INSTALL_XNU_DEBUG_FILES = 1 -endif -ifeq ($(PLATFORM),iPhoneOS) -INSTALL_PRIMARY_KERNEL_LIKE_NON_PRIMARY = 1 +else ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) INSTALL_KERNEL_SYM_TO_KDK = 1 USE_BINARY_PLIST = 1 -else ifeq ($(PLATFORM),iPhoneSimulator) -INSTALL_SYSTEM_FRAMEWORK_SYMLINKS = 1 +else ifneq ($(filter iPhoneSimulator iPhoneNanoSimulator,$(PLATFORM)),) USE_BINARY_PLIST = 1 +else ifeq ($(PLATFORM),MacOSX) +INSTALL_KERNEL_DIR := $(SYSTEM_LIBRARY_KERNELS_DIR) +INSTALL_KERNEL_SYM_DIR := $(SYSTEM_LIBRARY_KERNELS_DIR) +INSTALL_KERNEL_SYM_TO_KDK = $(if $(filter YES,$(DWARF_DSYM_FILE_SHOULD_ACCOMPANY_PRODUCT)),1,0) endif # vim: set ft=make: diff --git a/makedefs/MakeInc.dir b/makedefs/MakeInc.dir index 8b4b92a99..9ecadbdf1 100644 --- a/makedefs/MakeInc.dir +++ b/makedefs/MakeInc.dir @@ -93,10 +93,11 @@ $(eval $(call RECURSIVE_BUILD_RULES_template,config_all,$(CONFIG_SUBDIRS),do_con # $(eval $(call RECURSIVE_BUILD_RULES_template,build_install_primary,$(INST_SUBDIRS),do_build_install_primary,1)) $(eval $(call RECURSIVE_BUILD_RULES_template,build_install_non_primary,$(INST_SUBDIRS),do_build_install_non_primary,1)) +$(eval $(call RECURSIVE_BUILD_RULES_template,config_install,$(CONFIG_SUBDIRS),do_config_install,1)) # -# Install Man Pages +# Install text files # -$(eval $(call RECURSIVE_BUILD_RULES_template,build_installman,$(INSTMAN_SUBDIRS),do_installman,)) +$(eval $(call RECURSIVE_BUILD_RULES_template,textfiles_install,$(INSTTEXTFILES_SUBDIRS),do_textfiles_install,)) # vim: set ft=make: diff --git a/makedefs/MakeInc.kernel b/makedefs/MakeInc.kernel index d44d218ca..9c06f04ea 100644 --- a/makedefs/MakeInc.kernel +++ b/makedefs/MakeInc.kernel @@ -28,73 +28,69 @@ endif STATIC_KMODS = $(SRCROOT)/kmods.a -# -# File names in DSTROOT -# - -KERNEL_FILE_NAME_SUFFIX = - -KERNEL_FILE_NAME = mach_kernel$(KERNEL_FILE_NAME_SUFFIX) - -ifeq ($(CURRENT_MACHINE_CONFIG),NONE) -ALT_KERNEL_FILE_NAME = mach$(KERNEL_FILE_NAME_SUFFIX).$(CURRENT_KERNEL_CONFIG_LC) -ALT_KERNEL_LLDBBOOTSTRAP_NAME = mach$(KERNEL_FILE_NAME_SUFFIX).py -else -ALT_KERNEL_FILE_NAME = mach$(KERNEL_FILE_NAME_SUFFIX).$(CURRENT_KERNEL_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) -ALT_KERNEL_LLDBBOOTSTRAP_NAME = mach$(KERNEL_FILE_NAME_SUFFIX)_$(CURRENT_KERNEL_CONFIG_LC).py -endif - # # Rules for the highly parallel "build" phase, where each build configuration # writes into their own $(TARGET) independent of other build configs # +# There are 3 primary build outputs: +# 1) $(KERNEL_FILE_NAME).unstripped (raw linked kernel, unstripped) +# 2) $(KERNEL_FILE_NAME) (stripped kernel, with optional CTF data) +# 3) $(KERNEL_FILE_NAME).dSYM (dSYM) +# -do_build_all:: do_build_mach_kernel +do_build_all:: do_build_kernel -.PHONY: do_build_mach_kernel +.PHONY: do_build_kernel -do_build_mach_kernel: $(TARGET)/mach_kernel $(TARGET)/mach_kernel.sys - $(_v)if [ $(CURRENT_MACHINE_CONFIG) != NONE ] ; then \ - $(LN) $(call function_convert_build_config_to_objdir,$(CURRENT_BUILD_CONFIG))/mach_kernel $(OBJROOT)/mach.$(CURRENT_KERNEL_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC); \ - fi; +do_build_kernel: $(TARGET)/$(KERNEL_FILE_NAME) $(TARGET)/$(KERNEL_FILE_NAME).unstripped + @: ifeq ($(BUILD_DSYM),1) -do_build_all:: do_build_mach_kernel_dSYM +do_build_all:: do_build_kernel_dSYM endif -.PHONY: do_build_mach_kernel_dSYM +.PHONY: do_build_kernel_dSYM -do_build_mach_kernel_dSYM: $(TARGET)/mach_kernel.sys.dSYM +do_build_kernel_dSYM: $(TARGET)/$(KERNEL_FILE_NAME).dSYM @: -$(TARGET)/mach_kernel: $(TARGET)/mach_kernel.sys - @echo STRIP mach_kernel +.LDFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(LD) $(LDFLAGS_KERNEL) $(LD_KERNEL_LIBS) +.CFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KCC) $(CFLAGS) $(INCFLAGS) + +$(TARGET)/$(KERNEL_FILE_NAME): $(TARGET)/$(KERNEL_FILE_NAME).unstripped + @echo STRIP $(@F) $(_v)$(STRIP) $(STRIP_FLAGS) $< -o $@ $(_v)$(RM) $@.ctfdata ifeq ($(DO_CTFMERGE),1) - @echo CTFMERGE mach_kernel + @echo CTFMERGE $(@F) $(_v)$(FIND) $(TARGET)/ -name \*.ctf -size +0 | \ $(XARGS) $(CTFMERGE) -l xnu -o $@ -Z $@.ctfdata || true endif $(_v)if [ -s $@.ctfdata ]; then \ - echo CTFINSERT mach_kernel; \ + echo CTFINSERT $(@F); \ $(CTFINSERT) $@ $(ARCH_FLAGS_$(CURRENT_ARCH_CONFIG)) \ $@.ctfdata -o $@; \ fi; + $(_v)$(LN) $(call function_convert_build_config_to_objdir,$(CURRENT_BUILD_CONFIG))/$(KERNEL_FILE_NAME) $(OBJROOT)/$(KERNEL_FILE_NAME) -$(TARGET)/mach_kernel.sys.dSYM: $(TARGET)/mach_kernel.sys - $(_v)echo DSYMUTIL mach_kernel.sys +$(TARGET)/$(KERNEL_FILE_NAME).dSYM: $(TARGET)/$(KERNEL_FILE_NAME).unstripped + $(_v)echo DSYMUTIL $(@F) $(_v)$(DSYMUTIL) $(DSYMUTIL_FLAGS) $< -o $@ + $(_v)$(MV) $@/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME).unstripped $@/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) $(_v)$(TOUCH) $@ -$(TARGET)/mach_kernel.sys: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST),$(component)/$(CURRENT_KERNEL_CONFIG)/$(component).filelist)) lastkernelconstructor.o $(SRCROOT)/config/version.c $(SRCROOT)/config/MasterVersion +$(TARGET)/$(KERNEL_FILE_NAME).unstripped: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST),$(component)/$(CURRENT_KERNEL_CONFIG)/$(component).filelist)) lastkernelconstructor.o $(SRCROOT)/config/version.c $(SRCROOT)/config/MasterVersion .LDFLAGS $(filter %/MakeInc.kernel,$(MAKEFILE_LIST)) $(_v)${MAKE} -f $(firstword $(MAKEFILE_LIST)) version.o - @echo LD mach_kernel.sys + @echo LD $(@F) $(_v)$(CAT) $(filter %.filelist,$+) < /dev/null > link.filelist - $(_v)$(LD) $(LDFLAGS_KERNEL) -filelist link.filelist version.o $(filter %.o,$+) `if [ -e $(STATIC_KMODS) ]; then echo $(STATIC_KMODS); fi` \ - -o $@ $(LD_KERNEL_LIBS) + $(_v)$(LD) $(LDFLAGS_KERNEL) -filelist link.filelist version.o $(filter %.o,$+) -o $@ $(LD_KERNEL_LIBS) +-include version.d +version.o: .CFLAGS $(filter %/MakeInc.kernel,$(MAKEFILE_LIST)) version.o: $(OBJPATH)/version.c + ${C_RULE_0} ${C_RULE_1A}$< ${C_RULE_2} ${C_RULE_4} @@ -104,7 +100,10 @@ $(OBJPATH)/version.c: $(SRCROOT)/config/version.c $(NEWVERS) $(SRCROOT)/config/M $(_v)$(CP) $< $@ $(_v)$(NEWVERS) $(OBJPATH)/version.c > /dev/null; +-include lastkernelconstructor.d +lastkernelconstructor.o: .CFLAGS $(filter %/MakeInc.kernel,$(MAKEFILE_LIST)) lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c + ${C_RULE_0} ${C_RULE_1A}$< $(CFLAGS_NOLTO_FLAG) ${C_RULE_2} ${C_RULE_3} @@ -112,36 +111,22 @@ lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c $(_v)$(SEG_HACK) -s __DATA -n __LAST -o $@__ $@ $(_v)$(MV) $@__ $@ -# invalidate current kernel in $(SYMROOT) -do_build_setup:: - $(_v)$(TOUCH) $(OBJROOT)/.mach_kernel.timestamp - $(_v)$(TOUCH) $(OBJROOT)/.symbolset.timestamp - # # Install rules. Each build config is classified as "primary" (the first # config for an architecture) or "non-primary". Primary build configs # have the semantic of competing to *combine* single-architecture # files into a multi-architecture output in the DSTROOT, like -# $(DSTROOT)/mach_kernel, and consequently each primary build config +# $(DSTROOT)/$(KERNEL_FILE_NAME), and consequently each primary build config # has its install target run serially with respect to other primary # build configs. Non-primary build configs will never compete for # files in the DSTROOT or SYMROOT, and can be installed in parallel # with other non-primary configs (and even primary configs) # -do_build_install_primary:: do_install_kernel_framework - -ifeq ($(INSTALL_PRIMARY_KERNEL_LIKE_NON_PRIMARY),1) do_build_install_primary:: do_install_machine_specific_kernel ifeq ($(BUILD_DSYM),1) do_build_install_primary:: do_install_machine_specific_kernel_dSYM endif -else -do_build_install_primary:: do_install_mach_kernel -ifeq ($(BUILD_DSYM),1) -do_build_install_primary:: do_install_mach_kernel_dSYM -endif -endif do_build_install_non_primary:: do_install_machine_specific_kernel ifeq ($(BUILD_DSYM),1) @@ -157,97 +142,77 @@ endif ifeq ($(INSTALL_XNU_DEBUG_FILES),1) do_build_install_primary:: do_install_xnu_debug_files -ifeq ($(BUILD_DSYM),1) -do_build_install_primary:: do_install_xnu_debug_mach_kernel_dSYM -endif endif -.PHONY: do_install_mach_kernel do_install_mach_kernel_dSYM do_install_xnu_debug_files do_install_xnu_debug_mach_kernel_dSYM - -do_install_mach_kernel: $(DSTROOT)/$(INSTALL_KERNEL_DIR)/$(KERNEL_FILE_NAME) \ - $(SYMROOT)/$(KERNEL_FILE_NAME) - -do_install_mach_kernel_dSYM: \ - $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ - $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ - $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_FILE_NAME).py \ - $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) - @: +.PHONY: do_install_xnu_debug_files do_install_xnu_debug_files: $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/README.DEBUG-kernel.txt @: -do_install_xnu_debug_mach_kernel_dSYM: \ - $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ - $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ - $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_FILE_NAME).py \ - $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) - @: - # # If the timestamp indicates the DSTROOT kernel is out of # date, start over. Normal dependencies don't work because we can have # ( BUILDA, BUILDB, INSTALLB, INSTALLA ) in which case at INSTALLA time -# the timestamps would $(DSTROOT)/mach_kernel is not out of date compared +# the timestamps would $(DSTROOT)/$(KERNEL_FILE_NAME) is not out of date compared # to BUILDA. So we maintain a separate file at the time make(1) # was run and use it to determine what actions to take # -$(DSTROOT)/$(INSTALL_KERNEL_DIR)/$(KERNEL_FILE_NAME): $(TARGET)/mach_kernel ALWAYS +$(DSTROOT)/$(INSTALL_KERNEL_DIR)/$(KERNEL_FILE_NAME): $(TARGET)/$(KERNEL_FILE_NAME) ALWAYS $(_v)$(MKDIR) $(dir $@) - $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ - echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ - $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ - cmdstatus=$$?; \ - else \ - echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ - $(LIPO) -create $@ $< -output $@; \ - cmdstatus=$$?; \ - fi; \ + $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ + echo INSTALL $(@F) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ + $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ + cmdstatus=$$?; \ + else \ + echo INSTALL $(@F) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ + $(LIPO) -create $@ $< -output $@; \ + cmdstatus=$$?; \ + fi; \ exit $$cmdstatus -$(SYMROOT)/$(KERNEL_FILE_NAME): $(TARGET)/mach_kernel.sys ALWAYS +$(SYMROOT)/$(KERNEL_FILE_NAME): $(TARGET)/$(KERNEL_FILE_NAME).unstripped ALWAYS $(_v)$(MKDIR) $(dir $@) $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ - echo INSTALLSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ + echo INSTALLSYM $(@F) "($(CURRENT_ARCH_CONFIG_LC))"; \ $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ cmdstatus=$$?; \ else \ - echo INSTALLSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ + echo INSTALLSYM $(@F) "($(CURRENT_ARCH_CONFIG_LC))"; \ $(LIPO) -create $@ $< -output $@; \ cmdstatus=$$?; \ fi; \ exit $$cmdstatus -$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros +$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros: $(TARGET)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros $(_v)$(MKDIR) $(dir $@) - @echo INSTALLMACROS $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" + @echo INSTALLMACROS $(@F) "($(CURRENT_ARCH_CONFIG_LC))" $(_v)$(CP) -r $< $(dir $@) $(_v)$(TOUCH) $@ -$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_FILE_NAME).py $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_FILE_NAME).py: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros/core/xnu_lldb_init.py +$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_LLDBBOOTSTRAP_NAME) $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_LLDBBOOTSTRAP_NAME): $(TARGET)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_LLDBBOOTSTRAP_NAME) $(_v)$(MKDIR) $(dir $@) - @echo INSTALLMACROS $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" + @echo INSTALLMACROS $(@F) "($(CURRENT_ARCH_CONFIG_LC))" $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/README.DEBUG-kernel.txt: $(SRCROOT)/config/README.DEBUG-kernel.txt $(_v)$(MKDIR) $(dir $@) - @echo INSTALL $(notdir $@) + @echo INSTALL $(@F) $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ -$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMINFODIR)/Info.plist +$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist: $(TARGET)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist $(_v)$(MKDIR) $(dir $@) - @echo INSTALLSYM dSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" + @echo INSTALLSYM dSYM $(@F) "($(CURRENT_ARCH_CONFIG_LC))" $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ -$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME): $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMDWARFDIR)/mach_kernel.sys ALWAYS +$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME): $(TARGET)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) ALWAYS $(_v)$(MKDIR) $(dir $@) $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ - echo INSTALLSYM dSYM $(notdir $@).dSYM "($(CURRENT_ARCH_CONFIG_LC))"; \ + echo INSTALLSYM dSYM $(@F).dSYM "($(CURRENT_ARCH_CONFIG_LC))"; \ $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ cmdstatus=$$?; \ else \ - echo INSTALLSYM dSYM $(notdir $@).dSYM "($(CURRENT_ARCH_CONFIG_LC))"; \ + echo INSTALLSYM dSYM $(@F).dSYM "($(CURRENT_ARCH_CONFIG_LC))"; \ $(LIPO) -create $@ $< -output $@; \ cmdstatus=$$?; \ fi; \ @@ -255,92 +220,33 @@ $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) $(DSTROO .PHONY: do_install_machine_specific_kernel do_install_machine_specific_kernel_dSYM -do_install_machine_specific_kernel: $(DSTROOT)/$(ALT_KERNEL_FILE_NAME) \ - $(SYMROOT)/$(ALT_KERNEL_FILE_NAME) +do_install_machine_specific_kernel: $(DSTROOT)/$(INSTALL_KERNEL_DIR)/$(KERNEL_FILE_NAME) \ + $(SYMROOT)/$(KERNEL_FILE_NAME) @: do_install_machine_specific_kernel_dSYM: \ - $(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ - $(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ - $(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(ALT_KERNEL_LLDBBOOTSTRAP_NAME) \ - $(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(ALT_KERNEL_FILE_NAME) + $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ + $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ + $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_LLDBBOOTSTRAP_NAME) \ + $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) @: .PHONY: do_install_machine_specific_KDK_dSYM do_install_machine_specific_KDK_dSYM: \ - $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME) \ - $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ - $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ - $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(ALT_KERNEL_LLDBBOOTSTRAP_NAME) \ - $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(ALT_KERNEL_FILE_NAME) + $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ + $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ + $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_LLDBBOOTSTRAP_NAME) \ + $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) @: -$(DSTROOT)/$(ALT_KERNEL_FILE_NAME): $(TARGET)/mach_kernel ALWAYS - $(_v)$(MKDIR) $(dir $@) - $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ - echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ - $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ - cmdstatus=$$?; \ - else \ - echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ - $(LIPO) -create $@ $< -output $@; \ - cmdstatus=$$?; \ - fi; \ - exit $$cmdstatus - -$(SYMROOT)/$(ALT_KERNEL_FILE_NAME) $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME): $(TARGET)/mach_kernel.sys ALWAYS - $(_v)$(MKDIR) $(dir $@) - $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ - echo INSTALLSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ - $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ - cmdstatus=$$?; \ - else \ - echo INSTALLSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ - $(LIPO) -create $@ $< -output $@; \ - cmdstatus=$$?; \ - fi; \ - exit $$cmdstatus - -$(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMINFODIR)/Info.plist - $(_v)$(MKDIR) $(dir $@) - @echo INSTALLSYM dSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))" -ifeq ($(USE_BINARY_PLIST),1) - $(_v)$(PLUTIL) -convert binary1 -o $@ $< -else - $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ -endif - -$(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros - $(_v)$(MKDIR) $(dir $@) - @echo INSTALLMACROS $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" - $(_v)$(CP) -r $< $(dir $@) - $(_v)$(TOUCH) $@ - -$(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(ALT_KERNEL_LLDBBOOTSTRAP_NAME) $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(ALT_KERNEL_LLDBBOOTSTRAP_NAME): $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros/core/xnu_lldb_init.py - $(_v)$(MKDIR) $(dir $@) - @echo INSTALLMACROS $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))" - $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ - -$(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(ALT_KERNEL_FILE_NAME) $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(ALT_KERNEL_FILE_NAME): $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMDWARFDIR)/mach_kernel.sys ALWAYS - $(_v)$(MKDIR) $(dir $@) - $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ - echo INSTALLSYM dSYM $(notdir $@).dSYM "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ - $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ - cmdstatus=$$?; \ - else \ - echo INSTALLSYM dSYM $(notdir $@).dSYM "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ - $(LIPO) -create $@ $< -output $@; \ - cmdstatus=$$?; \ - fi; \ - exit $$cmdstatus - # The $(RM) is needed so that the $(LN) doesn't dereference an existing # symlink during incremental builds and create a new symlink inside # the target of the existing symlink -do_installhdrs_mi:: +do_installhdrs_mi:: $(DSTROOT)/$(KRESDIR)/Info.plist $(_v)$(MKDIR) $(DSTROOT)/$(KINCFRAME) $(_v)$(MKDIR) $(DSTROOT)/$(KPINCDIR) + $(_v)$(MKDIR) $(DSTROOT)/$(KRESDIR) $(_v)$(RM) $(DSTROOT)/$(KINCFRAME)/Versions/Current $(_v)$(LN) $(KINCVERS) $(DSTROOT)/$(KINCFRAME)/Versions/Current $(_v)$(RM) $(DSTROOT)/$(KINCFRAME)/Headers @@ -349,14 +255,9 @@ do_installhdrs_mi:: $(_v)$(RM) $(DSTROOT)/$(KINCFRAME)/PrivateHeaders $(_v)$(LN) Versions/Current/PrivateHeaders \ $(DSTROOT)/$(KINCFRAME)/PrivateHeaders -ifeq ($(INSTALL_SYSTEM_FRAMEWORK_SYMLINKS),1) - $(_v)$(MKDIR) $(DSTROOT)/$(SINCFRAME)/Versions - $(_v)$(RM) $(DSTROOT)/$(SINCFRAME)/Versions/Current - $(_v)$(LN) $(SINCVERS) $(DSTROOT)/$(SINCFRAME)/Versions/Current - $(_v)$(RM) $(DSTROOT)/$(SINCFRAME)/PrivateHeaders - $(_v)$(LN) Versions/Current/PrivateHeaders \ - $(DSTROOT)/$(SINCFRAME)/PrivateHeaders -endif + $(_v)$(RM) $(DSTROOT)/$(KINCFRAME)/Resources + $(_v)$(LN) Versions/Current/Resources \ + $(DSTROOT)/$(KINCFRAME)/Resources $(DSTROOT)/$(KRESDIR)/Info.plist: $(SOURCE)/EXTERNAL_HEADERS/Info.plist $(_v)$(MKDIR) $(DSTROOT)/$(KRESDIR) @@ -366,25 +267,5 @@ ifeq ($(USE_BINARY_PLIST),1) $(_v)$(PLUTIL) -convert binary1 -o $@ $@ endif -.PHONY: do_install_kernel_framework - -do_install_kernel_framework: $(DSTROOT)/$(KRESDIR)/Info.plist - $(_v)$(MKDIR) $(DSTROOT)/$(KRESDIR) - $(_v)$(RM) $(DSTROOT)/$(KINCFRAME)/Resources - $(_v)$(LN) Versions/Current/Resources \ - $(DSTROOT)/$(KINCFRAME)/Resources -ifeq ($(DELETE_KERNEL_FRAMEWORK_AND_MISC),1) - $(_v)$(FIND) $(DSTROOT)/$(KINCFRAME) \ - $(DSTROOT)/$(INSTALL_EXTENSIONS_DIR) \ - $(DSTROOT)/$(INSTALL_SHARE_MISC_DIR) \ - $(DSTROOT)/$(INSTALL_DTRACE_SCRIPTS_DIR) \ - \( -type f -o -type l \) -exec $(RM) "{}" \; - $(_v)$(FIND) -d $(DSTROOT)/$(KINCFRAME) \ - $(DSTROOT)/$(INSTALL_EXTENSIONS_DIR) \ - $(DSTROOT)/$(INSTALL_SHARE_MISC_DIR) \ - $(DSTROOT)/$(INSTALL_DTRACE_SCRIPTS_DIR) \ - \( -type d \) -exec $(RMDIR) "{}" \; -endif - print_exports: $(_v)printenv | sort diff --git a/makedefs/MakeInc.rule b/makedefs/MakeInc.rule index 06fc1bda5..a635cf4c7 100644 --- a/makedefs/MakeInc.rule +++ b/makedefs/MakeInc.rule @@ -81,6 +81,11 @@ $(3)_MKDIR: $$(_v)$$(MKDIR) ./$(3) $$(_v)$$(MKDIR) $(dir $(firstword $(1))) +# Rebuild if unifdef flags change +$(1): $(3)/.UNIFDEF_FLAGS +$(3)/.UNIFDEF_FLAGS: ALWAYS | $(3)_MKDIR + $$(_v)$$(REPLACECONTENTS) $$@ $$(UNIFDEF) $(4) + $(1): $(dir $(firstword $(1)))% : $(if $(2),%,$$(SOURCE)/%) | $(3)_MKDIR @echo INSTALLHDR $$* $$(_v)$$(UNIFDEF) $(4) $$< > ./$(3)/$$*.unifdef.$$$$$$$$; \ @@ -223,7 +228,7 @@ do_exporthdrs_md: $(EXPORT_MD_GEN_INC_FILES) $(EXPORT_MD_INC_FILES) # S_RULE_0=@echo AS $@ -S_RULE_1A=$(_v)${S_KCC} -c ${SFLAGS} -MD -MF $(@:o=d) -MP -DASSEMBLER ${$@_SFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} +S_RULE_1A=$(_v)${S_KCC} -c ${SFLAGS} -MD -MF $(@:o=d) -MP ${$@_SFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} S_RULE_1B=$( /dev/null || true; fi else @@ -321,6 +326,13 @@ INSTALL_MAN_FILES_LINKS = $(call function_generate_man_links_rules,$(INSTALL_MAN do_installman: $(INSTALL_MAN_FILES) $(INSTALL_MAN_FILES_LINKS) @: +.PHONY: do_textfiles_install + +# Do-nothing rule, since not all levels of the recursive hierarchy might implement this +# in their local Makefiles. Those that do will use a "::" rule to augment this. +do_textfiles_install:: do_installman + @: + .PHONY: do_build_setup # Do-nothing rule, since not all levels of the recursive hierarchy might implement this @@ -335,6 +347,11 @@ do_build_setup:: do_config_all:: @: --include Makedep +.PHONY: do_config_install + +# Do-nothing rule, since not all levels of the recursive hierarchy might implement this +# in their local Makefiles. Those that do will use a "::" rule to augment this. +do_config_install:: + @: # vim: set ft=make: diff --git a/makedefs/MakeInc.top b/makedefs/MakeInc.top index 052453c7d..e698cf2fa 100644 --- a/makedefs/MakeInc.top +++ b/makedefs/MakeInc.top @@ -34,9 +34,9 @@ include $(MakeInc_cmd) # # Default to current kernel architecture -ifeq ($(PLATFORM),iPhoneOS) +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) override DEFAULT_ARCH_CONFIG := ARM -else ifeq ($(PLATFORM),iPhoneSimulator) +else ifneq ($(filter iPhoneSimulator iPhoneNanoSimulator,$(PLATFORM)),) override DEFAULT_ARCH_CONFIG := X86_64 else override DEFAULT_ARCH_CONFIG := X86_64 @@ -58,7 +58,9 @@ endif ifeq ($(RC_ProjectName),xnu_debug) override DEFAULT_KERNEL_CONFIG := DEBUG -else ifeq ($(PLATFORM),iPhoneOS) +else ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) +override DEFAULT_KERNEL_CONFIG := DEVELOPMENT +else ifeq ($(PLATFORM),MacOSX) override DEFAULT_KERNEL_CONFIG := DEVELOPMENT else override DEFAULT_KERNEL_CONFIG := RELEASE @@ -75,6 +77,7 @@ endif override DEFAULT_I386_MACHINE_CONFIG := NONE override DEFAULT_X86_64_MACHINE_CONFIG := NONE +override DEFAULT_X86_64H_MACHINE_CONFIG := NONE # This is typically never specified (TARGET_CONFIGS is used) @@ -103,13 +106,33 @@ endif # default architecture configuration = system architecture where you are running make. +ifeq ($(PLATFORM),MacOSX) + +# Defaults for "make all_desktop" +ifeq ($(KERNEL_CONFIGS),DEFAULT) +KERNEL_CONFIGS_DESKTOP := RELEASE DEVELOPMENT +else +KERNEL_CONFIGS_DESKTOP := $(KERNEL_CONFIGS) +endif + +endif + ifndef TARGET_CONFIGS ifneq ($(PRODUCT_CONFIGS),) # generate TARGET_CONFIGS using KERNEL_CONFIGS and PRODUCT_CONFIGS TARGET_CONFIGS := $(foreach my_kernel_config,$(KERNEL_CONFIGS),$(foreach my_product_config,$(shell printf "%s" "$(PRODUCT_CONFIGS)" | $(TR) A-Z a-z),$(my_kernel_config) $(subst ;, ,$(call function_lookup_product,$(my_product_config))))) +else ifneq ($(filter %_release_embedded,$(MAKECMDGOALS)),) +# generate TARGET_CONFIGS for RELEASE kernel configs and products in the device map +TARGET_CONFIGS := $(foreach my_kernel_config,RELEASE,$(foreach my_arch_config,$(ARCH_CONFIGS_EMBEDDED),$(foreach my_product_config,$(DEVICEMAP_PRODUCTS_$(my_arch_config)),$(my_kernel_config) $(subst ;, ,$(call function_lookup_product,$(my_product_config)))))) +else ifneq ($(filter %_development_embedded,$(MAKECMDGOALS)),) +# generate TARGET_CONFIGS for DEVELOPMENT kernel configs and products in the device map +TARGET_CONFIGS := $(foreach my_kernel_config,DEVELOPMENT,$(foreach my_arch_config,$(ARCH_CONFIGS_EMBEDDED),$(foreach my_product_config,$(DEVICEMAP_PRODUCTS_$(my_arch_config)),$(my_kernel_config) $(subst ;, ,$(call function_lookup_product,$(my_product_config)))))) else ifneq ($(filter %_embedded,$(MAKECMDGOALS)),) # generate TARGET_CONFIGS for all kernel configs and products in the device map TARGET_CONFIGS := $(foreach my_kernel_config,$(KERNEL_CONFIGS_EMBEDDED),$(foreach my_arch_config,$(ARCH_CONFIGS_EMBEDDED),$(foreach my_product_config,$(DEVICEMAP_PRODUCTS_$(my_arch_config)),$(my_kernel_config) $(subst ;, ,$(call function_lookup_product,$(my_product_config)))))) +else ifneq ($(filter %_desktop,$(MAKECMDGOALS)),) +# generate TARGET_CONFIGS for all kernel configs for B&I +TARGET_CONFIGS := $(foreach my_kern_config, $(KERNEL_CONFIGS_DESKTOP), $(foreach my_arch_config, $(ARCH_CONFIGS), $(foreach my_machine_config, $(MACHINE_CONFIGS), $(my_kern_config) $(my_arch_config) $(my_machine_config)))) else # generate TARGET_CONFIGS using KERNEL_CONFIGS and ARCH_CONFIGS and MACHINE_CONFIGS (which defaults to "DEFAULT") TARGET_CONFIGS := $(foreach my_kern_config, $(KERNEL_CONFIGS), $(foreach my_arch_config, $(ARCH_CONFIGS), $(foreach my_machine_config, $(MACHINE_CONFIGS), $(my_kern_config) $(my_arch_config) $(my_machine_config)))) @@ -147,22 +170,17 @@ FIRST_BUILD_CONFIG = $(firstword $(BUILD_CONFIGS)) MEMORY_SIZE := $(shell /usr/sbin/sysctl -n hw.memsize) -# Embedded kernels use LTO by default. -ifeq ($(PLATFORM),iPhoneOS) -export BUILD_LTO := 1 -endif - -LARGE_BUILD_FOOTPRINT := $(BUILD_LTO) +# Assume LTO scaling by default, unless it is being explicitly passed on the command-line +LARGE_BUILD_FOOTPRINT := $(if $(BUILD_LTO),$(BUILD_LTO),1) ifeq ($(LARGE_BUILD_FOOTPRINT),1) RAM_PER_KERNEL_BUILD := 8589934592 -FLOCK_SIZE := $(shell if [ $(MEMORY_SIZE) -le $$((1 * $(RAM_PER_KERNEL_BUILD))) ]; then echo 1; elif [ $(MEMORY_SIZE) -gt $$((8 * $(RAM_PER_KERNEL_BUILD))) ]; then echo 8; else expr $(MEMORY_SIZE) / $(RAM_PER_KERNEL_BUILD); fi ) else RAM_PER_KERNEL_BUILD := 268435456 -FLOCK_SIZE := $(shell if [ $(MEMORY_SIZE) -le $$((2 * $(RAM_PER_KERNEL_BUILD))) ]; then echo 2; elif [ $(MEMORY_SIZE) -gt $$((8 * $(RAM_PER_KERNEL_BUILD))) ]; then echo 8; else expr $(MEMORY_SIZE) / $(RAM_PER_KERNEL_BUILD); fi ) endif -# $(warning Building $(FLOCK_SIZE) kernels in parallel) +KERNEL_BUILDS_IN_PARALLEL := $(shell if [ $(MEMORY_SIZE) -le $$((1 * $(RAM_PER_KERNEL_BUILD))) ]; then echo 1; elif [ $(MEMORY_SIZE) -gt $$(($(SYSCTL_HW_PHYSICALCPU) * $(RAM_PER_KERNEL_BUILD))) ]; then echo $(SYSCTL_HW_PHYSICALCPU); else expr $(MEMORY_SIZE) / $(RAM_PER_KERNEL_BUILD); fi ) +# $(warning Building $(KERNEL_BUILDS_IN_PARALLEL) kernels in parallel) # # TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template @@ -177,8 +195,10 @@ endif # $(6) is which build configs to build in parallel # # Since building many configurations in parallel may overwhelm the system, -# we try to throttle behavior into more managable "flocks" of N configurations -# at once, by creating a dependency on all members of the previous flock. +# we try to throttle behavior into more managable S "stripes" of N/S +# configurations by generating sequential dependencies between configs +# in each stripe. That ensures that only S kernel builds are occurring +# at once at any point in time define TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template @@ -187,9 +207,14 @@ $(1)_bootstrap_target_list = $$(addprefix $(1)_bootstrap_,$(6)) .PHONY: $$($(1)_bootstrap_target_list) -$$(eval $$(call _function_generate_flock_groupings,$(1),$$(wordlist 1,$(5),$(6)),$$(wordlist $(call increment,$(5)),$$(words $(6)),$(6)),,$(5))) +$(1)_generated_stripe_dependencies = $$(call _function_generate_stripe_groupings,$(1),$(5),$(call reverse,$(6))) +ifeq ($$(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) +$$(warning Generate makefile fragment: $$($(1)_generated_stripe_dependencies)) +endif +$$(eval $$($(1)_generated_stripe_dependencies)) + -$$($(1)_bootstrap_target_list): $(1)_bootstrap_% : $(1)_flock_dep_for_% $$(addsuffix _bootstrap_%,$(4)) $(3) +$$($(1)_bootstrap_target_list): $(1)_bootstrap_% : $(1)_stripe_dep_for_% $$(addsuffix _bootstrap_%,$(4)) $(3) $$(_v)$$(MKDIR) $${OBJROOT}/$$(call function_convert_build_config_to_objdir,$$(patsubst $(1)_bootstrap_%,%,$$@))$(2) $$(_v)$${MAKE} \ -C $${OBJROOT}/$$(call function_convert_build_config_to_objdir,$$(patsubst $(1)_bootstrap_%,%,$$@))$(2) \ @@ -211,42 +236,58 @@ $(1)_bootstrap: $$($(1)_bootstrap_target_list) endef # -# TOP_LEVEL_FLOCK_DEPENDENCY_template +# TOP_LEVEL_STRIPE_DEPENDENCY_template # # $(1) is the Makefile target we are building for -# $(2) are the members of the current flock -# $(3) is what the flock depends on. None of the build -# configs in $(2) will start building until all of -# $(3) are done building +# $(2) is the build config that must build first +# $(3) is the build config that must build after $(2) -define TOP_LEVEL_FLOCK_DEPENDENCY_template +define TOP_LEVEL_STRIPE_DEPENDENCY_template -.PHONY: $(addprefix $(1)_flock_dep_for_,$(2)) +.PHONY: $(1)_stripe_dep_for_$(3) -$(addprefix $(1)_flock_dep_for_,$(2)): $(addprefix $(1)_bootstrap_,$(3)) + $(1)_stripe_dep_for_$(3): $(if $(2),$(1)_bootstrap_$(2)) endef # $(1) is the Makefile target we are building for -# $(2) is the first flock (5 build configs) -# $(3) is the rest of the build configs -# $(4) is the build configs that the first flock depends on -# $(5) is the flock size -_function_generate_flock_groupings = $(if $(3), $(call _function_generate_flock_groupings,$(1),$(wordlist 1,$(5),$(3)),$(wordlist $(call increment,$(5)),$(words $(3)),$(3)),$(2),$(5))) $(call TOP_LEVEL_FLOCK_DEPENDENCY_template,$(1),$(2),$(4)) +# $(2) is the stripe size +# $(3) is the list of the build configs in the current group +# $(4) is the list of remaining build configs +_function_generate_stripe_groupings_recursive = $(foreach stripe_index,$(call sequence,$(2)),$(if $(word $(stripe_index),$(4)),$(call TOP_LEVEL_STRIPE_DEPENDENCY_template,$(1),$(word $(stripe_index),$(3)),$(word $(stripe_index),$(4))))) $(if $(word $(call increment,$(2)),$(4)),$(call _function_generate_stripe_groupings_recursive,$(1),$(2),$(wordlist 1,$(2),$(4)),$(wordlist $(call increment,$(2)),$(words $(4)),$(4)))) + + +# $(1) is the Makefile target we are building for +# $(2) is the stripe size +# $(3) is the list of the build configs +_function_generate_stripe_groupings = $(call _function_generate_stripe_groupings_recursive,$(1),$(2),,$(3)) # # Setup pass for build system tools # -generated_top_level_build_setup = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_setup,/..,,,$(FLOCK_SIZE),$(FIRST_BUILD_CONFIG)) -ifeq ($(VERBOSE),YES) +generated_top_level_build_setup = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_setup,/..,,,1,$(FIRST_BUILD_CONFIG)) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_build_setup)) endif $(eval $(generated_top_level_build_setup)) .PHONY: setup +# invalidate current kernel in $(SYMROOT). Timestamp must be +1 from a previous kernel build setup: build_setup_bootstrap + $(_v)$(TOUCH) $(OBJROOT)/.mach_kernel.timestamp.new + $(_v)while [ \! $(OBJROOT)/.mach_kernel.timestamp.new -nt $(OBJROOT)/.mach_kernel.timestamp ]; do \ + $(SLEEP) 1; \ + $(TOUCH) $(OBJROOT)/.mach_kernel.timestamp.new; \ + done + $(_v)$(MV) $(OBJROOT)/.mach_kernel.timestamp.new $(OBJROOT)/.mach_kernel.timestamp + $(_v)$(TOUCH) $(OBJROOT)/.symbolset.timestamp.new + $(_v)while [ \! $(OBJROOT)/.symbolset.timestamp.new -nt $(OBJROOT)/.symbolset.timestamp ]; do \ + $(SLEEP) 1; \ + $(TOUCH) $(OBJROOT)/.symbolset.timestamp.new; \ + done + $(_v)$(MV) $(OBJROOT)/.symbolset.timestamp.new $(OBJROOT)/.symbolset.timestamp # # Install kernel header files @@ -260,7 +301,7 @@ exporthdrs: exporthdrs_mi exporthdrs_md # generated_top_level_build_exporthdrs_mi = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_exporthdrs_mi,,setup,,1,$(FIRST_BUILD_CONFIG)) -ifeq ($(VERBOSE),YES) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_build_exporthdrs_mi)) endif $(eval $(generated_top_level_build_exporthdrs_mi)) @@ -271,8 +312,8 @@ exporthdrs_mi: build_exporthdrs_mi_bootstrap # Install machine dependent kernel header files # -generated_top_level_build_exporthdrs_md = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_exporthdrs_md,,setup,,$(FLOCK_SIZE),$(PRIMARY_BUILD_CONFIGS)) -ifeq ($(VERBOSE),YES) +generated_top_level_build_exporthdrs_md = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_exporthdrs_md,,setup,,$(KERNEL_BUILDS_IN_PARALLEL),$(PRIMARY_BUILD_CONFIGS)) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_build_exporthdrs_md)) endif $(eval $(generated_top_level_build_exporthdrs_md)) @@ -293,40 +334,56 @@ else installhdrs: installhdrs_mi installhdrs_md endif -.PHONY: installhdrs_embedded +.PHONY: installhdrs_embedded installhdrs_release_embedded installhdrs_development_embedded installhdrs_desktop + +installhdrs_embedded installhdrs_release_embedded installhdrs_desktop: installhdrs -installhdrs_embedded: installhdrs +installhdrs_development_embedded: # # Install machine independent header files # -generated_top_level_build_installhdrs_mi = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_installhdrs_mi,,setup exporthdrs_mi,,1,$(FIRST_BUILD_CONFIG)) -ifeq ($(VERBOSE),YES) +generated_top_level_build_installhdrs_mi = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_installhdrs_mi,,setup,build_exporthdrs_mi,1,$(FIRST_BUILD_CONFIG)) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_build_installhdrs_mi)) endif $(eval $(generated_top_level_build_installhdrs_mi)) -installhdrs_mi: exporthdrs_mi build_installhdrs_mi_bootstrap +installhdrs_mi: build_installhdrs_mi_bootstrap # # Install machine dependent kernel header files # -generated_top_level_build_installhdrs_md = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_installhdrs_md,,setup exporthdrs_md,,$(FLOCK_SIZE),$(PRIMARY_BUILD_CONFIGS)) -ifeq ($(VERBOSE),YES) +generated_top_level_build_installhdrs_md = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_installhdrs_md,,setup,build_exporthdrs_md,$(KERNEL_BUILDS_IN_PARALLEL),$(PRIMARY_BUILD_CONFIGS)) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_build_installhdrs_md)) endif $(eval $(generated_top_level_build_installhdrs_md)) -installhdrs_md: exporthdrs_md build_installhdrs_md_bootstrap +installhdrs_md: build_installhdrs_md_bootstrap + +# +# Install text files (man pages, dtrace scripts, etc.) +# + +generated_top_level_textfiles_install = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,textfiles_install,,setup,,1,$(FIRST_BUILD_CONFIG)) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) +$(warning Generate makefile fragment: $(generated_top_level_textfiles_install)) +endif +$(eval $(generated_top_level_textfiles_install)) + +.PHONY: install_textfiles + +install_textfiles: textfiles_install_bootstrap # # Build all architectures for all Configuration/Architecture options # -generated_top_level_build_all = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_all,,setup exporthdrs,,$(FLOCK_SIZE),$(BUILD_CONFIGS)) -ifeq ($(VERBOSE),YES) +generated_top_level_build_all = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_all,,setup exporthdrs,,$(KERNEL_BUILDS_IN_PARALLEL),$(BUILD_CONFIGS)) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_build_all)) endif $(eval $(generated_top_level_build_all)) @@ -339,66 +396,88 @@ build: build_all_bootstrap # Post-process build results # -generated_top_level_config_all = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,config_all,,setup,build_all,$(FLOCK_SIZE),$(BUILD_CONFIGS)) -ifeq ($(VERBOSE),YES) +generated_top_level_config_all = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,config_all,,setup,build_all,$(KERNEL_BUILDS_IN_PARALLEL),$(BUILD_CONFIGS)) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_config_all)) endif $(eval $(generated_top_level_config_all)) -.PHONY: all +.PHONY: all config -all: config_all_bootstrap +all config: config_all_bootstrap -.PHONY: all_embedded +.PHONY: all_embedded all_release_embedded all_development_embedded all_desktop -all_embedded: all +all_embedded all_release_embedded all_development_embedded all_desktop: all # -# Install kernel and header files +# Install kernel files # generated_top_level_build_install_primary = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_install_primary,,setup,config_all,1,$(PRIMARY_BUILD_CONFIGS)) -ifeq ($(VERBOSE),YES) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_build_install_primary)) endif $(eval $(generated_top_level_build_install_primary)) -generated_top_level_build_install_non_primary = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_install_non_primary,,setup,config_all,$(FLOCK_SIZE),$(NON_PRIMARY_BUILD_CONFIGS)) -ifeq ($(VERBOSE),YES) +.PHONY: install_primary + +install_primary: build_install_primary_bootstrap + +generated_top_level_build_install_non_primary = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_install_non_primary,,setup,config_all,$(KERNEL_BUILDS_IN_PARALLEL),$(NON_PRIMARY_BUILD_CONFIGS)) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_build_install_non_primary)) endif $(eval $(generated_top_level_build_install_non_primary)) +.PHONY: install_non_primary + +install_non_primary: build_install_non_primary_bootstrap + +generated_top_level_config_install = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,config_install,,setup,config_all,1,$(PRIMARY_BUILD_CONFIGS)) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) +$(warning Generate makefile fragment: $(generated_top_level_config_install)) +endif +$(eval $(generated_top_level_config_install)) + +.PHONY: install_config final_touch_config_timestamps + +install_config: config_install_bootstrap final_touch_config_timestamps + +# Tell the next build the latest timestamp of any potential file in DSTROOT/SYMROOT +final_touch_config_timestamps: config_install_bootstrap + $(_v)$(TOUCH) $(OBJROOT)/.symbolset.timestamp + +# +# Aggregate install targets, which install everything appropriate for the current build alias/make target +# .PHONY: install ifeq ($(RC_ProjectName),xnu_debug) - -install: build_install_primary_bootstrap build_install_non_primary_bootstrap +install: install_kernels else ifeq ($(RC_ProjectName),xnu_headers_Sim) install: installhdrs else -install: all installhdrs installman build_install_primary_bootstrap build_install_non_primary_bootstrap +install: installhdrs install_textfiles install_config install_kernels endif -.PHONY: install_embedded +.PHONY: install_embedded install_release_embedded install_development_embedded install_desktop -install_embedded: install +# By default, all kernel files, headers, text files, and pseudo-kexts are installed +install_embedded install_release_embedded install_desktop: install -# -# Install man pages -# +# These special configs only install the kernel files +install_development_embedded: install_kernels -generated_top_level_build_installman = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_installman,,setup,,1,$(FIRST_BUILD_CONFIG)) -ifeq ($(VERBOSE),YES) -$(warning Generate makefile fragment: $(generated_top_level_build_installman)) -endif -$(eval $(generated_top_level_build_installman)) +.PHONY: install_kernels final_touch_kernel_timestamps -.PHONY: installman +install_kernels: build_install_primary_bootstrap build_install_non_primary_bootstrap final_touch_kernel_timestamps -installman: setup build_installman_bootstrap +# Tell the next build the latest timestamp of any potential file in DSTROOT/SYMROOT +final_touch_kernel_timestamps: build_install_primary_bootstrap build_install_non_primary_bootstrap + $(_v)$(TOUCH) $(OBJROOT)/.mach_kernel.timestamp # # Install source tree @@ -408,7 +487,8 @@ installman: setup build_installman_bootstrap installsrc: @echo INSTALLSRC $(SRCROOT) $(_v)$(MKDIR) $(SRCROOT) - $(_v)($(TAR) -c --mode go=r,+X --no-ignore-case --exclude .git --exclude .svn --exclude cscope.\* --exclude BUILD --exclude \*~ -f - .) | (cd $(SRCROOT) && $(TAR) --no-same-owner -xf -) + $(_v)$(FIND) -x . \! \( \( -name BUILD -o -name .svn -o -name .git -o -name cscope.\* -o -name \*~ \) -prune \) -print0 | $(PAX) -rw -p a -d0 $(SRCROOT) + $(_v)$(CHMOD) -R go+rX $(SRCROOT) # @@ -418,10 +498,16 @@ installsrc: clean: @: + @rm -f cscope.* 2> /dev/null + @rm -f $(OBJROOT)/cscope.genhdrs/* 2> /dev/null || true + @rm -f TAGS 2> /dev/null + # # Build source file list for cscope database and tags # +.PHONY: cscope.files + cscope.files: @echo "Building file list for cscope and tags" @find . -name '*.h' -type f | grep -v ^..BUILD > _cscope.files 2> /dev/null @@ -430,6 +516,7 @@ cscope.files: @find . -name '*.cpp' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null @find . -name '*.s' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null @find . -name '*.h.template' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @cat $(OBJROOT)/cscope.genhdrs/* >> _cscope.files 2> /dev/null || true @echo -k -q -c > cscope.files 2> /dev/null @sort -u < _cscope.files >> cscope.files 2> /dev/null @rm -f _cscope.files _cscope.files2 2> /dev/null @@ -453,6 +540,7 @@ tags: cscope.files TAGS: cscope.files @echo "Building etags" @-cat cscope.files | etags -l auto -S - 2> /dev/null + @rm -f cscope.files 2> /dev/null help: @cat README @@ -462,7 +550,7 @@ print_exports: generated_top_level_print_exports = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,print_exports,,,,1,$(FIRST_BUILD_CONFIG)) -ifeq ($(VERBOSE),YES) +ifeq ($(VERBOSE_GENERATED_MAKE_FRAGMENTS),YES) $(warning Generate makefile fragment: $(generated_top_level_print_exports)) endif $(eval $(generated_top_level_print_exports)) diff --git a/osfmk/Makefile b/osfmk/Makefile index 3fa6846f2..67a457ba4 100644 --- a/osfmk/Makefile +++ b/osfmk/Makefile @@ -8,6 +8,8 @@ include $(MakeInc_def) INSTINC_SUBDIRS = \ mach \ + atm \ + bank \ device \ default_pager \ mach_debug \ @@ -22,14 +24,21 @@ INSTINC_SUBDIRS = \ libsa \ kdp \ pmc \ - kperf + kperf \ + prng INSTINC_SUBDIRS_X86_64 = \ mach \ i386 \ x86_64 +INSTINC_SUBDIRS_X86_64H = \ + mach \ + i386 \ + x86_64 EXPINC_SUBDIRS = \ mach \ + atm \ + bank \ device \ default_pager \ mach_debug \ @@ -45,12 +54,17 @@ EXPINC_SUBDIRS = \ libsa \ console \ pmc \ - kperf + kperf \ + prng EXPINC_SUBDIRS_X86_64 = \ mach \ i386 \ x86_64 +EXPINC_SUBDIRS_X86_64H = \ + mach \ + i386 \ + x86_64 COMP_SUBDIRS = \ conf diff --git a/osfmk/UserNotification/KUNCUserNotifications.c b/osfmk/UserNotification/KUNCUserNotifications.c index 98ad8b928..710f6fa8e 100644 --- a/osfmk/UserNotification/KUNCUserNotifications.c +++ b/osfmk/UserNotification/KUNCUserNotifications.c @@ -62,7 +62,7 @@ struct UNDReply { }; #define UNDReply_lock(reply) lck_mtx_lock(&reply->lock) -#define UNDReply_unlock(reply) lck_mtx_lock(&reply->lock) +#define UNDReply_unlock(reply) lck_mtx_unlock(&reply->lock) extern lck_grp_t LockCompatGroup; diff --git a/osfmk/atm/Makefile b/osfmk/atm/Makefile new file mode 100644 index 000000000..ee54e0b3e --- /dev/null +++ b/osfmk/atm/Makefile @@ -0,0 +1,123 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + +include $(MakeInc_cmd) +include $(MakeInc_def) + +MIG_TYPES = \ + atm_types.defs + +MIG_DEFS = \ + atm_notification.defs + +MACH_PRIVATE_DEFS = + +# +# MIG-generated headers that are traditionally used by user +# level code. +# +MIG_USHDRS = + +MIG_UUHDRS = + +MIGINCLUDES = ${MIG_UUHDRS} ${MIG_USHDRS} + +DATAFILES = \ + atm_types.h \ + ${MIG_TYPES} \ + ${MIG_DEFS} + +INSTALL_MI_LIST = \ + ${DATAFILES} + +INSTALL_KF_MI_LIST = \ + ${DATAFILES} + +INSTALL_KF_MI_LCL_LIST = \ + ${DATAFILES} + +INSTALL_MI_GEN_LIST = + +INSTALL_MI_DIR = atm + +EXPORT_MI_LIST = \ + ${DATAFILES} + +EXPORT_MI_GEN_LIST = \ + ${MIGINCLUDES} + +EXPORT_MI_DIR = atm + +${MIGINCLUDES} : ${MIG_TYPES} + +${MIG_UUHDRS} : \ + %.h : %.defs + @echo MIG $@ + $(_v)$(MIG) $(MIGFLAGS) \ + -server /dev/null \ + -user /dev/null \ + -header $@ \ + $< + +${MIG_USHDRS} : \ + %_server.h : %.defs + @echo MIG $@ + $(_v)$(MIG) $(MIGFLAGS) \ + -server /dev/null \ + -user /dev/null \ + -header /dev/null \ + -sheader $@ \ + $< + +# +# Build path +# + +INCFLAGS_MAKEFILE= -I.. + +MIGKSFLAGS = -DMACH_KERNEL_PRIVATE -DKERNEL_SERVER=1 +MIGKUFLAGS = -DMACH_KERNEL_PRIVATE -DKERNEL_USER=1 -maxonstack 1024 +# +# MIG-generated headers that are traditionally used by kernel +# level code. +# +MIG_KUHDRS = \ + atm_notification.h + +MIG_KUSRC = \ + atm_notification_user.c + +MIG_KSHDRS = + +MIG_KSSRC = + +COMP_FILES = ${MIG_KUSRC} ${MIG_KSSRC} + +do_build_all:: $(COMP_FILES) + +${COMP_FILES} : ${MIG_TYPES} + +${MIG_KUSRC} : \ + %_user.c : %.defs + @echo MIG $@ + $(_v)${MIG} ${MIGFLAGS} ${MIGKUFLAGS} \ + -user $*_user.c \ + -header $*.h \ + -server /dev/null \ + -sheader /dev/null \ + $< + +${MIG_KSSRC}: \ + %_server.c : %.defs + @echo MIG $@ + $(_v)${MIG} ${MIGFLAGS} ${MIGKSFLAGS} \ + -user /dev/null \ + -header /dev/null \ + -server $*_server.c \ + -sheader $*_server.h \ + $< + +include $(MakeInc_rule) +include $(MakeInc_dir) diff --git a/osfmk/atm/atm.c b/osfmk/atm/atm.c new file mode 100644 index 000000000..f1ba71062 --- /dev/null +++ b/osfmk/atm/atm.c @@ -0,0 +1,1429 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_ATM_VALUES (2 * 4096) +#define MAX_TRACE_BUFFER_SIZE (0x40000000) /* Restrict to 1GB per task */ +#define MAX_MAILBOX_SIZE (8 * 4096) + +#define ATM_VALUE_TO_HANDLE(x) (CAST_DOWN(atm_voucher_id_t, (x))) +#define HANDLE_TO_ATM_VALUE(x) (CAST_DOWN(atm_value_t, (x))) + +#define ATM_MAX_HASH_TABLE_SIZE (256) +#define AID_HASH_MASK (0xFF) +#define AID_TO_HASH(x) ((x) & (AID_HASH_MASK)) + +#define ATM_LIST_DEAD_MAX 15 + +#define AID_ARRAY_COUNT_MAX (256) + +struct atm_value_hash atm_value_hash_table[ATM_MAX_HASH_TABLE_SIZE]; +extern int maxproc; + +/* Global flag to disable ATM. ATM get value and memory registration will return error. */ +boolean_t disable_atm = FALSE; + +#if DEVELOPMENT || DEBUG +queue_head_t atm_descriptors_list; +queue_head_t atm_values_list; +#endif + +ipc_voucher_attr_control_t voucher_attr_control; /* communication channel from ATM to voucher system */ +static zone_t atm_value_zone, atm_descriptors_zone, atm_link_objects_zone; + +static aid_t get_aid(); +static atm_value_t atm_value_alloc_init(); +static void atm_value_dealloc(atm_value_t atm_value); +static void atm_hash_table_init(); +static void atm_value_hash_table_insert(atm_value_t new_atm_value); +static void atm_value_hash_table_delete(atm_value_t atm_value); +static atm_value_t get_atm_value_from_aid(aid_t aid); +static void atm_value_get_ref(atm_value_t atm_value); +static kern_return_t atm_listener_insert(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, mailbox_offset_t mailbox_offset); +static void atm_listener_delete_all(atm_value_t atm_value); +static atm_task_descriptor_t atm_task_descriptor_alloc_init(mach_port_t trace_buffer,uint64_t buffer_size, void *mailbox_addr, uint64_t mailbox_array_size, __assert_only task_t task); +static void atm_descriptor_get_reference(atm_task_descriptor_t task_descriptor); +static void atm_task_descriptor_dealloc(atm_task_descriptor_t task_descriptor); +static mach_atm_subaid_t atm_get_min_sub_aid(atm_value_t atm_value); +static void +atm_get_min_sub_aid_array(aid_t *aid_array, mach_atm_subaid_t *subaid_array, uint32_t count) __unused; +static kern_return_t atm_value_unregister(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, mailbox_offset_t mailbox_offset); +static kern_return_t atm_listener_delete(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, mailbox_offset_t mailbox_offset); +static void atm_link_get_reference(atm_link_object_t link_object); +static void atm_link_dealloc(atm_link_object_t link_object); +kern_return_t atm_invoke_collection(atm_value_t atm_value, uint64_t sub_activity_id, uint32_t flags); +kern_return_t atm_send_user_notification(aid_t aid, uint64_t subaid, mach_port_t *buffers_array, uint64_t *sizes_array, mach_msg_type_number_t count, uint32_t flags); + +kern_return_t +atm_release_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_t value, + mach_voucher_attr_value_reference_t sync); + +kern_return_t +atm_get_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_recipe_command_t command, + mach_voucher_attr_value_handle_array_t prev_values, + mach_msg_type_number_t __assert_only prev_value_count, + mach_voucher_attr_content_t recipe, + mach_voucher_attr_content_size_t recipe_size, + mach_voucher_attr_value_handle_t *out_value, + ipc_voucher_t *out_value_voucher); + +kern_return_t +atm_extract_content( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_recipe_command_t *out_command, + mach_voucher_attr_content_t out_recipe, + mach_voucher_attr_content_size_t *in_out_recipe_size); + +kern_return_t +atm_command( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_command_t command, + mach_voucher_attr_content_t in_content, + mach_voucher_attr_content_size_t in_content_size, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *in_out_content_size); + +void +atm_release(ipc_voucher_attr_manager_t __assert_only manager); + +/* + * communication channel from voucher system to ATM + */ +struct ipc_voucher_attr_manager atm_manager = { + .ivam_release_value = atm_release_value, + .ivam_get_value = atm_get_value, + .ivam_extract_content = atm_extract_content, + .ivam_command = atm_command, + .ivam_release = atm_release, +}; + +#if DEVELOPMENT || DEBUG +decl_lck_mtx_data(, atm_descriptors_list_lock); +decl_lck_mtx_data(, atm_values_list_lock); + +lck_grp_t atm_dev_lock_grp; +lck_attr_t atm_dev_lock_attr; +lck_grp_attr_t atm_dev_lock_grp_attr; +#endif + +extern vm_map_t kernel_map; +/* + * Global aid. Incremented on each get_aid. + */ +aid_t global_aid; + +/* + * Lock group attributes for atm sub system. + */ +lck_grp_t atm_lock_grp; +lck_attr_t atm_lock_attr; +lck_grp_attr_t atm_lock_grp_attr; + + +/* + * Routine: atm_init + * Purpose: Initialize the atm subsystem. + * Returns: None. + */ +void +atm_init() +{ + kern_return_t kr = KERN_SUCCESS; + char temp_buf[20]; + + /* Disable atm if disable_atm present in device-tree properties or in boot-args */ + if ((PE_get_default("kern.disable_atm", temp_buf, sizeof(temp_buf))) || + (PE_parse_boot_argn("-disable_atm", temp_buf, sizeof(temp_buf)))) { + disable_atm = TRUE; + } + + /* setup zones for descriptors, values and link objects */ + atm_value_zone = zinit(sizeof(struct atm_value), + MAX_ATM_VALUES * sizeof(struct atm_value), + sizeof(struct atm_value), + "atm_values"); + + atm_descriptors_zone = zinit(sizeof(struct atm_task_descriptor), + MAX_ATM_VALUES * sizeof(struct atm_task_descriptor), + sizeof(struct atm_task_descriptor), + "atm_task_descriptors"); + + atm_link_objects_zone = zinit(sizeof(struct atm_link_object), + MAX_ATM_VALUES * sizeof(struct atm_link_object), + sizeof(struct atm_link_object), + "atm_link_objects"); + + /* Initialize atm lock group and lock attributes. */ + lck_grp_attr_setdefault(&atm_lock_grp_attr); + lck_grp_init(&atm_lock_grp, "atm_lock", &atm_lock_grp_attr); + lck_attr_setdefault(&atm_lock_attr); + + global_aid = 1; + atm_hash_table_init(); + +#if DEVELOPMENT || DEBUG + /* Initialize global atm development lock group and lock attributes. */ + lck_grp_attr_setdefault(&atm_dev_lock_grp_attr); + lck_grp_init(&atm_dev_lock_grp, "atm_dev_lock", &atm_dev_lock_grp_attr); + lck_attr_setdefault(&atm_dev_lock_attr); + + lck_mtx_init(&atm_descriptors_list_lock, &atm_dev_lock_grp, &atm_dev_lock_attr); + lck_mtx_init(&atm_values_list_lock, &atm_dev_lock_grp, &atm_dev_lock_attr); + + queue_init(&atm_descriptors_list); + queue_init(&atm_values_list); +#endif + + /* Register the atm manager with the Vouchers sub system. */ + kr = ipc_register_well_known_mach_voucher_attr_manager( + &atm_manager, + 0, + MACH_VOUCHER_ATTR_KEY_ATM, + &voucher_attr_control); + if (kr != KERN_SUCCESS ) + panic("ATM subsystem initialization failed"); + + kprintf("ATM subsystem is initialized\n"); + return ; +} + + +/* + * ATM Resource Manager Routines. + */ + + +/* + * Routine: atm_release_value + * Purpose: Release a value, if sync matches the sync count in value. + * Returns: KERN_SUCCESS: on Successful deletion. + * KERN_FAILURE: if sync value does not matches. + */ +kern_return_t +atm_release_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_t value, + mach_voucher_attr_value_reference_t sync) +{ + atm_value_t atm_value = ATM_VALUE_NULL; + + assert(MACH_VOUCHER_ATTR_KEY_ATM == key); + assert(manager == &atm_manager); + + atm_value = HANDLE_TO_ATM_VALUE(value); + if (atm_value == VAM_DEFAULT_VALUE) { + /* Return success for default value */ + return KERN_SUCCESS; + } + + if (atm_value->sync != sync) { + return KERN_FAILURE; + } + + /* Deallocate the atm value. */ + atm_value_hash_table_delete(atm_value); + atm_value_dealloc(atm_value); + return KERN_SUCCESS; +} + + +/* + * Routine: atm_get_value + */ +kern_return_t +atm_get_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_recipe_command_t command, + mach_voucher_attr_value_handle_array_t prev_values, + mach_msg_type_number_t __assert_only prev_value_count, + mach_voucher_attr_content_t __unused recipe, + mach_voucher_attr_content_size_t __unused recipe_size, + mach_voucher_attr_value_handle_t *out_value, + ipc_voucher_t *out_value_voucher) +{ + atm_value_t atm_value = ATM_VALUE_NULL; + mach_voucher_attr_value_handle_t atm_handle; + atm_task_descriptor_t task_descriptor = ATM_TASK_DESCRIPTOR_NULL; + task_t task; + mailbox_offset_t mailbox_offset; + natural_t i; + kern_return_t kr = KERN_SUCCESS; + + assert(MACH_VOUCHER_ATTR_KEY_ATM == key); + assert(manager == &atm_manager); + + /* never an out voucher */ + *out_value_voucher = IPC_VOUCHER_NULL; + + if (disable_atm) + return KERN_NOT_SUPPORTED; + + switch (command) { + + case MACH_VOUCHER_ATTR_ATM_REGISTER: + + for (i = 0; i < prev_value_count; i++) { + atm_handle = prev_values[i]; + atm_value = HANDLE_TO_ATM_VALUE(atm_handle); + + if (atm_value == VAM_DEFAULT_VALUE) + continue; + + task = current_task(); + task_descriptor = task->atm_context; + if (task_descriptor != ATM_TASK_DESCRIPTOR_NULL) { + if (recipe_size != sizeof(mailbox_offset_t)) { + kr = KERN_INVALID_ARGUMENT; + break; + } + memcpy(&mailbox_offset, recipe, sizeof(mailbox_offset_t)); + if (mailbox_offset > task_descriptor->mailbox_array_size) { + kr = KERN_INVALID_ARGUMENT; + break; + } + + kr = atm_listener_insert(atm_value, task_descriptor, mailbox_offset); + if (kr != KERN_SUCCESS) { + break; + } + } + + /* Increment sync value. */ + lck_mtx_lock(&atm_value->listener_lock); + atm_value->sync++; + lck_mtx_unlock(&atm_value->listener_lock); + + *out_value = atm_handle; + return kr; + } + + *out_value = ATM_VALUE_TO_HANDLE(VAM_DEFAULT_VALUE); + break; + + case MACH_VOUCHER_ATTR_ATM_CREATE: + + /* Allocate a new atm value. */ + atm_value = atm_value_alloc_init(); + atm_value_hash_table_insert(atm_value); + + if (atm_value == ATM_VALUE_NULL) { + return KERN_RESOURCE_SHORTAGE; + } + + *out_value = ATM_VALUE_TO_HANDLE(atm_value); + break; + + case MACH_VOUCHER_ATTR_ATM_NULL: + default: + kr = KERN_INVALID_ARGUMENT; + break; + } + + return kr; +} + + +/* + * Routine: atm_extract_content + * Purpose: Extract a set of aid from an array of voucher values. + * Returns: KERN_SUCCESS: on Success. + * KERN_FAILURE: one of the value is not present in the hash. + * KERN_NO_SPACE: insufficeint buffer provided to fill an array of aid. + */ +kern_return_t +atm_extract_content( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_recipe_command_t *out_command, + mach_voucher_attr_content_t out_recipe, + mach_voucher_attr_content_size_t *in_out_recipe_size) +{ + atm_value_t atm_value; + mach_voucher_attr_value_handle_t atm_handle; + natural_t i; + + assert(MACH_VOUCHER_ATTR_KEY_ATM == key); + assert(manager == &atm_manager); + + for (i = 0; i < value_count; i++) { + atm_handle = values[i]; + atm_value = HANDLE_TO_ATM_VALUE(atm_handle); + if (atm_value == VAM_DEFAULT_VALUE) + continue; + + if (( sizeof(aid_t)) > *in_out_recipe_size) { + *in_out_recipe_size = 0; + return KERN_NO_SPACE; + } + + memcpy(&out_recipe[0], &atm_value->aid, sizeof(aid_t)); + *out_command = MACH_VOUCHER_ATTR_ATM_NULL; + *in_out_recipe_size = sizeof(aid_t); + return KERN_SUCCESS; + } + + *in_out_recipe_size = 0; + return KERN_SUCCESS; +} + +/* + * Routine: atm_command + * Purpose: Execute a command against a set of ATM values. + * Returns: KERN_SUCCESS: On successful execution of command. + KERN_FAILURE: On failure. + */ +kern_return_t +atm_command( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_command_t command, + mach_voucher_attr_content_t in_content, + mach_voucher_attr_content_size_t in_content_size, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *out_content_size) +{ + assert(MACH_VOUCHER_ATTR_KEY_ATM == key); + assert(manager == &atm_manager); + atm_value_t atm_value = ATM_VALUE_NULL; + natural_t i = 0; + aid_t *aid_array = NULL; + mach_atm_subaid_t *subaid_array = NULL; + uint32_t aid_array_count = 0; + atm_task_descriptor_t task_descriptor = ATM_TASK_DESCRIPTOR_NULL; + task_t task; + uint32_t collection_flags = ATM_ACTION_LOGFAIL; + kern_return_t kr = KERN_SUCCESS; + + switch (command) { + case ATM_ACTION_COLLECT: + collection_flags = ATM_ACTION_COLLECT; + /* Fall through */ + + case ATM_ACTION_LOGFAIL: { + mach_atm_subaid_t sub_aid = 0; + + /* find the first non-default atm_value */ + for (i = 0; i < value_count; i++) { + atm_value = HANDLE_TO_ATM_VALUE(values[i]); + if (atm_value != VAM_DEFAULT_VALUE) + break; + } + + /* if we are not able to find any atm values + * in stack then this call was made in error + */ + if (atm_value == NULL) { + return KERN_FAILURE; + } + if (in_content == NULL || in_content_size < sizeof(mach_atm_subaid_t) ){ + return KERN_INVALID_ARGUMENT; + } + + sub_aid = *(mach_atm_subaid_t *)(void *)in_content; + *out_content_size = 0; + kr = atm_invoke_collection(atm_value, sub_aid, collection_flags); + break; + } + + case ATM_FIND_MIN_SUB_AID: + if ((in_content_size/sizeof(aid_t)) > (*out_content_size/sizeof(mach_atm_subaid_t))) + return KERN_FAILURE; + + aid_array_count = in_content_size / sizeof(aid_t); + if (aid_array_count > AID_ARRAY_COUNT_MAX) + return KERN_FAILURE; + + aid_array = (aid_t *) kalloc(aid_array_count * sizeof(aid_t)); + if (aid_array == NULL) + return KERN_NO_SPACE; + + subaid_array = (mach_atm_subaid_t *) kalloc(aid_array_count * sizeof(mach_atm_subaid_t)); + if (subaid_array == NULL) { + kfree(aid_array, aid_array_count * sizeof(aid_t)); + return KERN_NO_SPACE; + } + + memcpy(aid_array, in_content, aid_array_count * sizeof(aid_t)); + atm_get_min_sub_aid_array(aid_array, subaid_array, aid_array_count); + + memcpy(out_content, subaid_array, aid_array_count * sizeof(mach_atm_subaid_t)); + *out_content_size = aid_array_count * sizeof(mach_atm_subaid_t); + + kfree(aid_array, aid_array_count * sizeof(aid_t)); + kfree(subaid_array, aid_array_count * sizeof(mach_atm_subaid_t)); + kr = KERN_SUCCESS; + + break; + + case ATM_ACTION_UNREGISTER: + /* find the first non-default atm_value */ + for (i = 0; i < value_count; i++) { + atm_value = HANDLE_TO_ATM_VALUE(values[i]); + if (atm_value != VAM_DEFAULT_VALUE) + break; + } + + /* if we are not able to find any atm values + * in stack then this call was made in error + */ + if (atm_value == NULL) { + return KERN_FAILURE; + } + if (in_content == NULL || in_content_size != sizeof(mailbox_offset_t)){ + return KERN_INVALID_ARGUMENT; + } + + mailbox_offset_t mailbox_offset; + memcpy(&mailbox_offset, in_content, sizeof(mailbox_offset_t)); + task = current_task(); + task_descriptor = task->atm_context; + + kr = atm_value_unregister(atm_value, task_descriptor, mailbox_offset); + + break; + + default: + kr = KERN_INVALID_ARGUMENT; + break; + } + + return kr; +} + + +void +atm_release( + ipc_voucher_attr_manager_t __assert_only manager) +{ + assert(manager == &atm_manager); +} + + +/* + * Routine: atm_invoke_collection + * Purpose: Sends a notification with array of memory buffer. + * Note: may block till user daemon responds. + */ +kern_return_t +atm_invoke_collection( + atm_value_t atm_value, + subaid_t sub_activity_id, + uint32_t flags) +{ + aid_t aid = atm_value->aid; + kern_return_t kr = KERN_SUCCESS; + uint32_t array_count = 0, i = 0, requestor_index = 0; + uint64_t *sizes_array = NULL; + atm_link_object_t link_object = NULL; + mach_port_t *mem_array = NULL; + boolean_t need_swap_first = FALSE; + atm_task_descriptor_t requesting_descriptor = current_task()->atm_context; + + lck_mtx_lock(&atm_value->listener_lock); + array_count = atm_value->listener_count; + lck_mtx_unlock(&atm_value->listener_lock); + + if (array_count == 0){ + return KERN_SUCCESS; + } + + mem_array = kalloc(sizeof(mach_port_t) * array_count); + if (mem_array == NULL){ + return KERN_NO_SPACE; + } + + sizes_array = kalloc(sizeof(uint64_t) * array_count); + if (sizes_array == NULL){ + kfree(mem_array, sizeof(mach_port_t) * array_count); + return KERN_NO_SPACE; + } + + lck_mtx_lock(&atm_value->listener_lock); + queue_iterate(&atm_value->listeners, link_object, atm_link_object_t, listeners_element) { + if (i >= array_count){ + break; + } + + if (!need_swap_first && requesting_descriptor == link_object->descriptor){ + assert(requesting_descriptor != NULL); + requestor_index = i; + need_swap_first = TRUE; + } + + sizes_array[i] = link_object->descriptor->trace_buffer_size; + mem_array[i] = ipc_port_copy_send(link_object->descriptor->trace_buffer); + if (!IPC_PORT_VALID(mem_array[i])){ + mem_array[i] = NULL; + } + i++; + } + lck_mtx_unlock(&atm_value->listener_lock); + + /* + * Swap the position of requesting task ahead, diagnostics can + * process its buffers the first. + */ + if (need_swap_first && requestor_index != 0){ + assert(requestor_index < array_count); + mach_port_t tmp_port = mem_array[0]; + uint64_t tmp_size = sizes_array[0]; + mem_array[0] = mem_array[requestor_index]; + sizes_array[0] = sizes_array[requestor_index]; + mem_array[requestor_index] = tmp_port; + sizes_array[requestor_index] = tmp_size; + } + + if (i > 0) { + kr = atm_send_user_notification(aid, sub_activity_id, mem_array, sizes_array, i, flags); + } + + kfree(mem_array, sizeof(mach_port_t) * array_count); + kfree(sizes_array, sizeof(uint64_t) * array_count); + + return kr; +} + +/* + * Routine: atm_send_user_notification + * Purpose: Make an upcall to user space daemon if its listening for atm notifications. + * Returns: KERN_SUCCESS for successful delivery. + * KERN_FAILURE if port is dead or NULL. + */ +kern_return_t +atm_send_user_notification( + aid_t aid, + subaid_t subaid, + mach_port_t *buffers_array, + uint64_t *sizes_array, + mach_msg_type_number_t count, + uint32_t flags) +{ + mach_port_t user_port; + int error; + error = host_get_atm_notification_port(host_priv_self(), &user_port); + if ((error != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) { + return KERN_FAILURE; + } + + return atm_collect_trace_info(user_port, aid, subaid, flags, buffers_array, count, sizes_array, count); +} + +/* + * Routine: atm_send_proc_inspect_notification + * Purpose: Make an upcall to user space daemon if its listening for trace + * notifications for per process inspection. + * Returns: KERN_SUCCESS for successful delivery. + * KERN_FAILURE if port is dead or NULL. + */ + +kern_return_t +atm_send_proc_inspect_notification( + task_t task, + int32_t traced_pid, + uint64_t traced_uniqueid) +{ + mach_port_t user_port = MACH_PORT_NULL; + mach_port_t memory_port = MACH_PORT_NULL; + atm_task_descriptor_t task_descriptor = ATM_TASK_DESCRIPTOR_NULL; + uint64_t buffer_size = 0; + int error; + + /* look for the requested memory in target task */ + if (!task) + return KERN_INVALID_TASK; + + task_lock(task); + if (task->atm_context){ + task_descriptor = task->atm_context; + atm_descriptor_get_reference(task_descriptor); + } + task_unlock(task); + + if (task_descriptor == ATM_TASK_DESCRIPTOR_NULL){ + return KERN_FAILURE; + } + + memory_port = ipc_port_copy_send(task_descriptor->trace_buffer); + buffer_size = task_descriptor->trace_buffer_size; + atm_task_descriptor_dealloc(task_descriptor); + + /* get the communication port */ + error = host_get_atm_notification_port(host_priv_self(), &user_port); + if ((error != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) { + ipc_port_release_send(memory_port); + return KERN_FAILURE; + } + + return atm_inspect_process_buffer(user_port, traced_pid, traced_uniqueid, buffer_size, memory_port); +} + +/* + * Routine: atm_value_alloc_init + * Purpose: Allocates an atm value struct and initialize it. + * Returns: atm_value_t: On Success with a sync count on atm_value. + * ATM_VALUE_NULL: On failure. + */ +static atm_value_t +atm_value_alloc_init() +{ + atm_value_t new_atm_value = ATM_VALUE_NULL; + + new_atm_value = (atm_value_t) zalloc(atm_value_zone); + if (new_atm_value == ATM_VALUE_NULL) + panic("Ran out of ATM values structure.\n\n"); + + new_atm_value->aid = get_aid(); + queue_init(&new_atm_value->listeners); + new_atm_value->sync = 1; + new_atm_value->listener_count = 0; + new_atm_value->reference_count = 1; + lck_mtx_init(&new_atm_value->listener_lock, &atm_lock_grp, &atm_lock_attr); + +#if DEVELOPMENT || DEBUG + lck_mtx_lock(&atm_values_list_lock); + queue_enter(&atm_values_list, new_atm_value, atm_value_t, value_elt); + lck_mtx_unlock(&atm_values_list_lock); +#endif + return new_atm_value; +} + + +/* + * Routine: get_aid + * Purpose: Increment the global aid counter and return it. + * Returns: aid + */ +static aid_t +get_aid() +{ + aid_t aid; + aid = (aid_t)OSIncrementAtomic64((SInt64 *)&global_aid); + return aid; +} + + +/* + * Routine: atm_value_dealloc + * Purpose: Drops the reference on atm value and deallocates. + * Deletes all the listeners on deallocation. + * Returns: None. + */ +static void +atm_value_dealloc(atm_value_t atm_value) +{ + lck_mtx_lock(&atm_value->listener_lock); + + atm_value->reference_count--; + assert(atm_value->reference_count >= 0); + + if (atm_value->reference_count > 0) { + lck_mtx_unlock(&atm_value->listener_lock); + return; + } + + lck_mtx_unlock(&atm_value->listener_lock); + + /* Free up the atm value and also remove all the listeners. */ + atm_listener_delete_all(atm_value); + + lck_mtx_destroy(&atm_value->listener_lock, &atm_lock_grp); + +#if DEVELOPMENT || DEBUG + lck_mtx_lock(&atm_values_list_lock); + queue_remove(&atm_values_list, atm_value, atm_value_t, value_elt); + lck_mtx_unlock(&atm_values_list_lock); +#endif + zfree(atm_value_zone, atm_value); + return; +} + + +/* + * Routine: atm_hash_table_init + * Purpose: Initialize the atm aid hash table. + * Returns: None. + */ +static void +atm_hash_table_init() +{ + int i; + + for (i = 0; i < ATM_MAX_HASH_TABLE_SIZE; i++) { + queue_init(&atm_value_hash_table[i].hash_list); + lck_mtx_init(&atm_value_hash_table[i].hash_list_lock, &atm_lock_grp, &atm_lock_attr); + } +} + + +/* + * Routine: atm_value_hash_table_insert + * Purpose: Insert an atm value in the hash table. + * Returns: None. + */ +static void +atm_value_hash_table_insert(atm_value_t new_atm_value) +{ + int hash_index; + atm_value_hash_t hash_list_head; + aid_t aid = new_atm_value->aid; + + hash_index = AID_TO_HASH(aid); + hash_list_head = &atm_value_hash_table[hash_index]; + + lck_mtx_lock(&hash_list_head->hash_list_lock); + queue_enter(&hash_list_head->hash_list, new_atm_value, atm_value_t, vid_hash_elt); + lck_mtx_unlock(&hash_list_head->hash_list_lock); +} + + +/* + * Routine: atm_value_hash_table_delete + * Purpose: Delete the atm value from the hash table. + * Returns: None. + */ +static void +atm_value_hash_table_delete(atm_value_t atm_value) +{ + int hash_index; + atm_value_hash_t hash_list_head; + aid_t aid = atm_value->aid; + + hash_index = AID_TO_HASH(aid); + hash_list_head = &atm_value_hash_table[hash_index]; + + lck_mtx_lock(&hash_list_head->hash_list_lock); + queue_remove(&hash_list_head->hash_list, atm_value, atm_value_t, vid_hash_elt); + lck_mtx_unlock(&hash_list_head->hash_list_lock); +} + + +/* + * Routine: get_atm_value_from_aid + * Purpose: Search a given aid in atm value hash table and + * return the atm value stucture. + * Returns: atm value structure if aid found. + * ATM_VALUE_NULL: If aid not found in atm value hash table. + */ +static atm_value_t +get_atm_value_from_aid(aid_t aid) +{ + int hash_index; + atm_value_hash_t hash_list_head; + atm_value_t next; + + hash_index = AID_TO_HASH(aid); + hash_list_head = &atm_value_hash_table[hash_index]; + + /* Lock the atm list and search for the aid. */ + lck_mtx_lock(&hash_list_head->hash_list_lock); + + queue_iterate(&hash_list_head->hash_list, next, atm_value_t, vid_hash_elt) { + if (next->aid == aid) { + /* + * Aid found. Incerease ref count and return + * the atm value structure. + */ + atm_value_get_ref(next); + lck_mtx_unlock(&hash_list_head->hash_list_lock); + return (next); + } + } + lck_mtx_unlock(&hash_list_head->hash_list_lock); + return ATM_VALUE_NULL; +} + + +/* + * Routine: atm_value_get_ref + * Purpose: Get a reference on atm value. + * Returns: None. + */ +static void +atm_value_get_ref(atm_value_t atm_value) +{ + lck_mtx_lock(&atm_value->listener_lock); + atm_value->reference_count++; + lck_mtx_unlock(&atm_value->listener_lock); +} + + +/* + * Routine: atm_listener_insert + * Purpose: Insert a listener to an atm value. + * Returns: KERN_SUCCESS on success. + * KERN_FAILURE if the task is already present as a listener. + */ +static kern_return_t +atm_listener_insert( + atm_value_t atm_value, + atm_task_descriptor_t task_descriptor, + mailbox_offset_t mailbox_offset) +{ + atm_link_object_t new_link_object; + atm_link_object_t next; + void *mailbox = (void *)((char *)task_descriptor->mailbox_kernel_addr + mailbox_offset); + + new_link_object = (atm_link_object_t) zalloc(atm_link_objects_zone); + new_link_object->descriptor = task_descriptor; + new_link_object->reference_count = 1; + new_link_object->flags = 0; + new_link_object->mailbox = mailbox; + + /* Get a reference on the task descriptor */ + atm_descriptor_get_reference(task_descriptor); + + /* Check if the task mailbox is already on the listener list */ + lck_mtx_lock(&atm_value->listener_lock); + queue_iterate(&atm_value->listeners, next, atm_link_object_t, listeners_element) { + if (next->descriptor == task_descriptor) { + /* + * Replace the mailbox with the new one, the old mailbox is anyways on unregister path. + * There is a race when get_min_sub_aid would cache the mailbox, and this function will + * replace it. It would just behave as if the get value call happened after get_min_sub_aid + * was already completed. + */ + next->mailbox = mailbox; + lck_mtx_unlock(&atm_value->listener_lock); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (ATM_CODE(ATM_GETVALUE_INFO, (ATM_VALUE_REPLACED))) | DBG_FUNC_NONE, + atm_value, atm_value->aid, mailbox_offset, 0, 0); + + /* Drop the extra reference on task descriptor taken by this function. */ + atm_task_descriptor_dealloc(task_descriptor); + zfree(atm_link_objects_zone, new_link_object); + return KERN_SUCCESS; + } + } + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (ATM_CODE(ATM_GETVALUE_INFO, (ATM_VALUE_ADDED))) | DBG_FUNC_NONE, + atm_value, atm_value->aid, mailbox_offset, 0, 0); + + queue_enter(&atm_value->listeners, new_link_object, atm_link_object_t, listeners_element); + atm_value->listener_count++; + lck_mtx_unlock(&atm_value->listener_lock); + return KERN_SUCCESS; +} + + +/* + * Routine: atm_listener_delete_all + * Purpose: Deletes all the listeners for an atm value. + * Returns: None. + */ +static void +atm_listener_delete_all(atm_value_t atm_value) +{ + atm_link_object_t next; + + while(!queue_empty(&atm_value->listeners)) { + queue_remove_first(&atm_value->listeners, next, atm_link_object_t, listeners_element); + + /* Drops the reference on the link object */ + atm_link_dealloc(next); + } +} + + +/* + * Routine: atm_listener_delete + * Purpose: Deletes a listerner for an atm value. + * Returns: KERN_SUCCESS on successful unregister. + * KERN_INVALID_VALUE on finding a different mailbox. + * KERN_FAILURE on failure. + */ +static kern_return_t +atm_listener_delete( + atm_value_t atm_value, + atm_task_descriptor_t task_descriptor, + mailbox_offset_t mailbox_offset) +{ + queue_head_t free_listeners; + atm_link_object_t next, elem; + void *mailbox = (void *)((char *)task_descriptor->mailbox_kernel_addr + mailbox_offset); + kern_return_t kr = KERN_FAILURE; + + queue_init(&free_listeners); + + lck_mtx_lock(&atm_value->listener_lock); + + next = (atm_link_object_t)(void *) queue_first(&atm_value->listeners); + while (!queue_end(&atm_value->listeners, (queue_entry_t)next)) { + elem = next; + next = (atm_link_object_t)(void *) queue_next(&next->listeners_element); + + if (elem->descriptor == task_descriptor) { + if (elem->mailbox == mailbox) { + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (ATM_CODE(ATM_UNREGISTER_INFO, + (ATM_VALUE_UNREGISTERED))) | DBG_FUNC_NONE, + atm_value, atm_value->aid, mailbox_offset, 0, 0); + queue_remove(&atm_value->listeners, elem, atm_link_object_t, listeners_element); + queue_enter(&free_listeners, elem, atm_link_object_t, listeners_element); + atm_value->listener_count--; + kr = KERN_SUCCESS; + break; + } else { + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (ATM_CODE(ATM_UNREGISTER_INFO, + (ATM_VALUE_DIFF_MAILBOX))) | DBG_FUNC_NONE, + atm_value, atm_value->aid, 0, 0, 0); + kr = KERN_INVALID_VALUE; + break; + } + } + } + lck_mtx_unlock(&atm_value->listener_lock); + + while(!queue_empty(&free_listeners)) { + queue_remove_first(&free_listeners, next, atm_link_object_t, listeners_element); + + /* Drops the reference on the link object */ + atm_link_dealloc(next); + } + return kr; +} + + +/* + * Routine: atm_descriptor_alloc_init + * Purpose: Allocate an atm task descriptor and initialize it and takes a reference. + * Returns: atm task descriptor: On success. + * NULL: on error. + */ +static atm_task_descriptor_t +atm_task_descriptor_alloc_init( + mach_port_t trace_buffer, + uint64_t buffer_size, + void * mailbox_addr, + uint64_t mailbox_array_size, + task_t __assert_only task) +{ + atm_task_descriptor_t new_task_descriptor; + + new_task_descriptor = (atm_task_descriptor_t) zalloc(atm_descriptors_zone); + + new_task_descriptor->trace_buffer = trace_buffer; + new_task_descriptor->trace_buffer_size = buffer_size; + new_task_descriptor->mailbox_array_size = mailbox_array_size; + new_task_descriptor->mailbox_kernel_addr = mailbox_addr; + new_task_descriptor->reference_count = 1; + new_task_descriptor->flags = 0; + lck_mtx_init(&new_task_descriptor->lock, &atm_lock_grp, &atm_lock_attr); + +#if DEVELOPMENT || DEBUG + new_task_descriptor->task = task; + lck_mtx_lock(&atm_descriptors_list_lock); + queue_enter(&atm_descriptors_list, new_task_descriptor, atm_task_descriptor_t, descriptor_elt); + lck_mtx_unlock(&atm_descriptors_list_lock); +#endif + + return new_task_descriptor; +} + + +/* + * Routine: atm_descriptor_get_reference + * Purpose: Get a reference count on task descriptor. + * Returns: None. + */ +static void +atm_descriptor_get_reference(atm_task_descriptor_t task_descriptor) +{ + lck_mtx_lock(&task_descriptor->lock); + task_descriptor->reference_count++; + lck_mtx_unlock(&task_descriptor->lock); +} + + +/* + * Routine: atm_task_descriptor_dealloc + * Prupose: Drops the reference on atm descriptor. + * Returns: None. + */ +static void +atm_task_descriptor_dealloc(atm_task_descriptor_t task_descriptor) +{ + lck_mtx_lock(&task_descriptor->lock); + task_descriptor->reference_count--; + assert(task_descriptor->reference_count >= 0); + if (task_descriptor->reference_count > 0) { + lck_mtx_unlock(&task_descriptor->lock); + return; + } + +#if DEVELOPMENT || DEBUG + lck_mtx_lock(&atm_descriptors_list_lock); + queue_remove(&atm_descriptors_list, task_descriptor, atm_task_descriptor_t, descriptor_elt); + lck_mtx_unlock(&atm_descriptors_list_lock); +#endif + mach_vm_deallocate(kernel_map, (mach_vm_address_t)task_descriptor->mailbox_kernel_addr, + task_descriptor->mailbox_array_size); + task_descriptor->mailbox_kernel_addr = NULL; + task_descriptor->mailbox_array_size = 0; + /* release the send right for the named memory entry */ + ipc_port_release_send(task_descriptor->trace_buffer); + lck_mtx_unlock(&task_descriptor->lock); + lck_mtx_destroy(&task_descriptor->lock, &atm_lock_grp); + zfree(atm_descriptors_zone, task_descriptor); + return; +} + + +/* + * Routine: atm_link_get_reference + * Purpose: Get a reference count on atm link object. + * Returns: None. + */ +static void +atm_link_get_reference(atm_link_object_t link_object) +{ + atm_link_object_reference_internal(link_object); +} + + +/* + * Routine: atm_link_dealloc + * Prupose: Drops the reference on link object. + * Returns: None. + */ +static void +atm_link_dealloc(atm_link_object_t link_object) +{ + if (0 < atm_link_object_release_internal(link_object)) { + return; + } + + assert(link_object->reference_count == 0); + + /* Drop the reference on atm task descriptor. */ + atm_task_descriptor_dealloc(link_object->descriptor); + zfree(atm_link_objects_zone, link_object); +} + + +/* + * Routine: atm_register_trace_memory + * Purpose: Registers trace memory for a task. + * Returns: KERN_SUCCESS: on Success. + * KERN_FAILURE: on Error. + */ +kern_return_t +atm_register_trace_memory( + task_t task, + uint64_t trace_buffer_address, + uint64_t buffer_size, + uint64_t mailbox_array_size) +{ + atm_task_descriptor_t task_descriptor; + mach_port_t trace_buffer = MACH_PORT_NULL; + mach_vm_offset_t mailbox_kernel_ptr = 0; + kern_return_t kr = KERN_SUCCESS; + + if (disable_atm) + return KERN_NOT_SUPPORTED; + + if (task != current_task()) + return KERN_INVALID_ARGUMENT; + + if (task->atm_context != NULL + || (void *)trace_buffer_address == NULL + || buffer_size == 0 + || (buffer_size & PAGE_MASK) != 0 + || buffer_size > MAX_TRACE_BUFFER_SIZE + || mailbox_array_size == 0 + || mailbox_array_size >= buffer_size + || mailbox_array_size > MAX_MAILBOX_SIZE + || mailbox_array_size & PAGE_MIN_MASK) { + return KERN_INVALID_ARGUMENT; + } + + vm_map_t map = current_map(); + memory_object_size_t mo_size = (memory_object_size_t) buffer_size; + kr = mach_make_memory_entry_64(map, + &mo_size, + (mach_vm_offset_t)trace_buffer_address, + VM_PROT_READ, + &trace_buffer, + NULL); + if (kr != KERN_SUCCESS) + return kr; + + kr = mach_vm_map(kernel_map, + &mailbox_kernel_ptr, + mailbox_array_size, + 0, + VM_FLAGS_ANYWHERE, + trace_buffer, + 0, + FALSE, + VM_PROT_READ, + VM_PROT_READ, + VM_INHERIT_NONE + ); + + if (kr != KERN_SUCCESS){ + ipc_port_release_send(trace_buffer); + return kr; + } + + task_descriptor = atm_task_descriptor_alloc_init(trace_buffer, buffer_size, (void *)mailbox_kernel_ptr, mailbox_array_size, task); + if (task_descriptor == ATM_TASK_DESCRIPTOR_NULL) { + ipc_port_release_send(trace_buffer); + mach_vm_deallocate(kernel_map, (mach_vm_address_t)mailbox_kernel_ptr, mailbox_array_size); + return KERN_NO_SPACE; + } + + task_lock(task); + if (task->atm_context == NULL) { + task->atm_context = task_descriptor; + kr = KERN_SUCCESS; + } else { + kr = KERN_FAILURE; + } + task_unlock(task); + + if (kr != KERN_SUCCESS) { + /* undo the mapping and allocations since we failed to hook descriptor to task */ + atm_task_descriptor_dealloc(task_descriptor); + } + return KERN_SUCCESS; +} + + +/* + * Routine: atm_get_min_sub_aid_array + * Purpose: For an array of aid, lookup the atm value and fill the minimum subaid. + * Returns: None. + */ +static void +atm_get_min_sub_aid_array( + aid_t *aid_array, + mach_atm_subaid_t *subaid_array, + uint32_t count) +{ + atm_value_t atm_value; + uint32_t i; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (ATM_CODE(ATM_SUBAID_INFO, (ATM_MIN_CALLED))) | DBG_FUNC_START, + 0, 0, 0, 0, 0); + + for (i = 0; i < count; i++) { + atm_value = get_atm_value_from_aid(aid_array[i]); + if (atm_value == ATM_VALUE_NULL) { + subaid_array[i] = ATM_SUBAID32_MAX; + continue; + } + subaid_array[i] = atm_get_min_sub_aid(atm_value); + atm_value_dealloc(atm_value); + } + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (ATM_CODE(ATM_SUBAID_INFO, (ATM_MIN_CALLED))) | DBG_FUNC_END, + count, 0, 0, 0, 0); + +} + + +/* + * Routine: atm_get_min_sub_aid + * Purpose: Walk the list of listeners and get the min sub-aid for an activity id. + * Returns: Minimum sub-aid to keep. + * Note: Unlock the listener lock before accessing the mailbox, since it may page fault and + * might take long time. Also cleans the listeners list for the tasks which are dead + * and atm_task_descriptors do not hold any useful data. + */ +static mach_atm_subaid_t +atm_get_min_sub_aid(atm_value_t atm_value) +{ + int32_t i = 0, j, freed_count = 0, dead_but_not_freed = 0; + int32_t listener_count; + atm_subaid32_t min_subaid = ATM_SUBAID32_MAX, subaid, max_subaid; + atm_link_object_t *link_object_array = NULL; + atm_link_object_t next, elem; + queue_head_t free_listeners; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (ATM_CODE(ATM_SUBAID_INFO, (ATM_MIN_LINK_LIST))) | DBG_FUNC_START, + 0, 0, 0, 0, 0); + + lck_mtx_lock(&atm_value->listener_lock); + listener_count = atm_value->listener_count; + lck_mtx_unlock(&atm_value->listener_lock); + + /* separate memory access from locked iterate since memory read may fault */ + link_object_array = (atm_link_object_t *) kalloc(sizeof(atm_link_object_t) * listener_count); + if (link_object_array == NULL) { + return 0; + } + + /* Iterate the list and take a ref on link objects and store it in an array */ + lck_mtx_lock(&atm_value->listener_lock); + queue_iterate(&atm_value->listeners, next, atm_link_object_t, listeners_element) { + /* Additional listener are added between the allocation of array and iterating the list */ + if (i >= listener_count) + break; + + /* Get a ref on the link object */ + atm_link_get_reference(next); + link_object_array[i] = (atm_link_object_t)next; + i++; + } + lck_mtx_unlock(&atm_value->listener_lock); + j = i; + + /* Iterate the array to find the min */ + for (i = 0; i < j; i++) { + /* Ignore the min value of the dead processes. */ + if (link_object_array[i]->descriptor->flags == ATM_TASK_DEAD) + continue; + /* Dereference the mailbox to get the min subaid */ + subaid = *((atm_subaid32_t *)link_object_array[i]->mailbox); + if (subaid < min_subaid) + min_subaid = subaid; + } + + /* + * Mark the link object that can be freed, and release the ref on the link object + * Mark the link object of dead task free after the dead task descriptor count + * increases than ATM_LIST_DEAD_MAX. + */ + for (i = j - 1; i >= 0; i--) { + if (link_object_array[i]->descriptor->flags == ATM_TASK_DEAD) { + if (dead_but_not_freed > ATM_LIST_DEAD_MAX) { + link_object_array[i]->flags = ATM_LINK_REMOVE; + freed_count++; + } else { + max_subaid = *(((atm_subaid32_t *)link_object_array[i]->mailbox) + 1); + if (max_subaid < min_subaid) { + link_object_array[i]->flags = ATM_LINK_REMOVE; + freed_count++; + } else { + dead_but_not_freed++; + } + } + } + atm_link_dealloc(link_object_array[i]); + link_object_array[i] = NULL; + } + + /* Check if the number of live entries in list is less than maxproc */ + assert((j - (freed_count + dead_but_not_freed)) <= maxproc); + + kfree(link_object_array, (sizeof(atm_link_object_t) * listener_count)); + + /* Remove the marked link objects from the list */ + lck_mtx_lock(&atm_value->listener_lock); + + queue_init(&free_listeners); + next = (atm_link_object_t)(void *) queue_first(&atm_value->listeners); + while (!queue_end(&atm_value->listeners, (queue_entry_t)next)) { + elem = next; + next = (atm_link_object_t)(void *) queue_next(&next->listeners_element); + + if (elem->flags == ATM_LINK_REMOVE) { + queue_remove(&atm_value->listeners, elem, atm_link_object_t, listeners_element); + queue_enter(&free_listeners, elem, atm_link_object_t, listeners_element); + atm_value->listener_count--; + } + } + lck_mtx_unlock(&atm_value->listener_lock); + + /* Free the link objects */ + while(!queue_empty(&free_listeners)) { + queue_remove_first(&free_listeners, next, atm_link_object_t, listeners_element); + + /* Drops the reference on the link object */ + atm_link_dealloc(next); + } + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (ATM_CODE(ATM_SUBAID_INFO, (ATM_MIN_LINK_LIST))) | DBG_FUNC_END, + j, freed_count, dead_but_not_freed, 0, 0); + + /* explicitly upgrade uint32_t to 64 bit mach size */ + return CAST_DOWN(mach_atm_subaid_t, min_subaid); +} + + +/* + * Routine: atm_value_unregister + * Purpose: Unregisters a process from an activity id. + * Returns: KERN_SUCCESS on successful unregister. + * KERN_INVALID_VALUE on finding a diff mailbox. + * KERN_FAILURE on failure. + */ +static kern_return_t +atm_value_unregister( + atm_value_t atm_value, + atm_task_descriptor_t task_descriptor, + mailbox_offset_t mailbox_offset) +{ + kern_return_t kr; + + if (task_descriptor == ATM_TASK_DESCRIPTOR_NULL) + return KERN_INVALID_ARGUMENT; + if (mailbox_offset > task_descriptor->mailbox_array_size) + return KERN_INVALID_ARGUMENT; + + kr = atm_listener_delete(atm_value, task_descriptor, mailbox_offset); + return kr; +} + +void +atm_task_descriptor_destroy(atm_task_descriptor_t task_descriptor) +{ + /* Mark the task dead in the task descriptor to make task descriptor eligible for cleanup. */ + lck_mtx_lock(&task_descriptor->lock); + task_descriptor->flags = ATM_TASK_DEAD; + lck_mtx_unlock(&task_descriptor->lock); + + atm_task_descriptor_dealloc(task_descriptor); +} diff --git a/osfmk/atm/atm_internal.h b/osfmk/atm/atm_internal.h new file mode 100644 index 000000000..6fbc32b65 --- /dev/null +++ b/osfmk/atm/atm_internal.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _ATM_ATM_INTERNAL_H_ +#define _ATM_ATM_INTERNAL_H_ + +#include +#include +#include + +#ifdef MACH_KERNEL_PRIVATE + +#include +#include +#include +#include + +/* Flags for atm task descriptor */ +#define ATM_TASK_DEAD 0x1 + +/* Default value for Voucher Attribute Manager for ATM */ +#define VAM_DEFAULT_VALUE NULL + +typedef mach_voucher_attr_value_handle_t atm_voucher_id_t; + +struct atm_task_descriptor { + decl_lck_mtx_data(,lock) /* lock to protect reference count */ + mach_port_t trace_buffer; /* named memory entry registered by user */ + uint64_t trace_buffer_size; /* size of the trace_buffer registered */ + uint64_t mailbox_array_size; /* Mailbox array size in bytes. */ + void * mailbox_kernel_addr; /* Kernel address where the mailbox is mapped. */ + uint32_t reference_count:31, + flags:1; +#if DEVELOPMENT || DEBUG + task_t task; /* task pointer for debugging purposes */ + queue_chain_t descriptor_elt; /* global chain of all descriptors */ +#endif +}; + +typedef struct atm_task_descriptor *atm_task_descriptor_t; +#define ATM_TASK_DESCRIPTOR_NULL NULL + +struct atm_value { + aid_t aid; /* activity id */ + queue_head_t listeners; /* List of listeners who register for this activity */ + decl_lck_mtx_data( ,listener_lock) /* Lock to protect listener list */ + queue_chain_t vid_hash_elt; /* Next hash element in the global hash table */ +#if DEVELOPMENT || DEBUG + queue_chain_t value_elt; /* global chain of all values */ +#endif + uint32_t sync; /* Made ref count given to voucher sub system. */ + uint32_t listener_count; /* Number of Listerners listening on the value. */ + int32_t reference_count; /* use count on the atm value, 1 taken by the global hash table */ +}; + +typedef struct atm_value *atm_value_t; +#define ATM_VALUE_NULL NULL + +/* Flags for atm link objects */ +#define ATM_LINK_REMOVE 0x1 + +struct atm_link_object { + atm_task_descriptor_t descriptor; + void * mailbox; /* Offset in the mailbox registered by the user for an activity. */ + uint32_t reference_count; /* Refernece count for link object */ + uint8_t flags; /* Flags used mark for deletion from the listener list */ + queue_chain_t listeners_element; /* Head is atm_value->listeners. */ +}; + +typedef struct atm_link_object *atm_link_object_t; + +#define atm_link_object_reference_internal(elem) \ + (hw_atomic_add(&(elem)->reference_count, 1)) + +#define atm_link_object_release_internal(elem) \ + (hw_atomic_sub(&(elem)->reference_count, 1)) + +struct atm_value_hash { + queue_head_t hash_list; + decl_lck_mtx_data(, hash_list_lock) /* lock to protect bucket list. */ +}; + +typedef struct atm_value_hash *atm_value_hash_t; + +void atm_init(void); +void atm_task_descriptor_destroy(atm_task_descriptor_t task_descriptor); +kern_return_t atm_register_trace_memory(task_t task, uint64_t trace_buffer_address, uint64_t buffer_size, uint64_t mailbox_array_size); +kern_return_t atm_send_proc_inspect_notification(task_t task, int32_t traced_pid, uint64_t traced_uniqueid); + +#endif /* MACH_KERNEL_PRIVATE */ + +#endif /* _ATM_ATM_INTERNAL_H_ */ diff --git a/osfmk/atm/atm_notification.defs b/osfmk/atm/atm_notification.defs new file mode 100644 index 000000000..256f99f61 --- /dev/null +++ b/osfmk/atm/atm_notification.defs @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * Interface definition for the Activity Trace Manager facility. + */ + +subsystem +#if KERNEL_USER + KernelUser +#endif /* KERNEL_USER */ + atm_notification 11500; + +#include +#include +#include + +/* + * Routine: + */ +simpleroutine atm_collect_trace_info( + atm_port : mach_port_move_send_t; + activity_trace_id : atm_aid_t; + sub_activity_id : mach_atm_subaid_t; + flags : uint32_t; + memory_buffers : atm_memory_descriptor_array_t; + buffer_sizes : atm_memory_size_array_t + ); + +simpleroutine atm_inspect_process_buffer( + atm_port : mach_port_move_send_t; + proc_pid : uint32_t; + proc_uniqueid : uint64_t; + buffer_size : uint64_t; + trace_buffer : mach_port_t +); + +/* vim: set ft=c : */ diff --git a/osfmk/atm/atm_types.defs b/osfmk/atm/atm_types.defs new file mode 100644 index 000000000..e2654eb9b --- /dev/null +++ b/osfmk/atm/atm_types.defs @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * Activity Trace Manager interface type declarations + */ + +#ifndef _ATM_ATM_TYPES_DEFS_ +#define _ATM_ATM_TYPES_DEFS_ + + +#include + +type aid_t = uint64_t; +type atm_aid_t = uint64_t; +type mach_atm_subaid_t = uint64_t; + +type atm_memory_descriptor_array_t = array[*:512] of mach_port_t; +type atm_memory_size_array_t = array[*:512] of uint64_t; + +import ; + +#endif /* _ATM_ATM_TYPES_DEFS_ */ +/* vim: set ft=c : */ diff --git a/osfmk/atm/atm_types.h b/osfmk/atm/atm_types.h new file mode 100644 index 000000000..2bd03c0fe --- /dev/null +++ b/osfmk/atm/atm_types.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _ATM_ATM_TYPES_H_ +#define _ATM_ATM_TYPES_H_ + +#include +#include + +#define MACH_VOUCHER_ATTR_ATM_NULL ((mach_voucher_attr_recipe_command_t)501) +#define MACH_VOUCHER_ATTR_ATM_CREATE ((mach_voucher_attr_recipe_command_t)510) +#define MACH_VOUCHER_ATTR_ATM_REGISTER ((mach_voucher_attr_recipe_command_t)511) + +typedef uint32_t atm_action_t; +#define ATM_ACTION_DISCARD 0x1 +#define ATM_ACTION_COLLECT 0x2 +#define ATM_ACTION_LOGFAIL 0x3 +#define ATM_FIND_MIN_SUB_AID 0x4 +#define ATM_ACTION_UNREGISTER 0x5 + +/* Deprecated. will be removed soon */ +typedef uint64_t aid_t; +typedef uint64_t subaid_t; +typedef uint64_t mailbox_offset_t; +#define SUB_AID_MAX (UINT64_MAX) + +typedef uint64_t atm_aid_t; +typedef uint32_t atm_subaid32_t; +typedef uint64_t mach_atm_subaid_t; /* Used for mach based apis. */ +typedef uint64_t atm_mailbox_offset_t; + + +typedef mach_port_t atm_memory_descriptor_t; +typedef atm_memory_descriptor_t *atm_memory_descriptor_array_t; +typedef uint64_t *atm_memory_size_array_t; + +#define ATM_SUBAID32_MAX (UINT32_MAX) + +#endif /* _ATM_ATM_TYPES_H_ */ diff --git a/osfmk/bank/Makefile b/osfmk/bank/Makefile new file mode 100644 index 000000000..adf9c6152 --- /dev/null +++ b/osfmk/bank/Makefile @@ -0,0 +1,119 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + +include $(MakeInc_cmd) +include $(MakeInc_def) + +MIG_TYPES = + +MIG_DEFS = + +MACH_PRIVATE_DEFS = + +# +# MIG-generated headers that are traditionally used by user +# level code. +# +MIG_USHDRS = + +MIG_UUHDRS = + +MIGINCLUDES = ${MIG_UUHDRS} ${MIG_USHDRS} + +DATAFILES = \ + bank_types.h \ + ${MIG_TYPES} \ + ${MIG_DEFS} + +INSTALL_MI_LIST = \ + ${DATAFILES} + +INSTALL_KF_MI_LIST = \ + ${DATAFILES} + +INSTALL_KF_MI_LCL_LIST = \ + ${DATAFILES} + +INSTALL_MI_GEN_LIST = + +INSTALL_MI_DIR = bank + +EXPORT_MI_LIST = \ + ${DATAFILES} + +EXPORT_MI_GEN_LIST = \ + ${MIGINCLUDES} + +EXPORT_MI_DIR = bank + +${MIGINCLUDES} : ${MIG_TYPES} + +${MIG_UUHDRS} : \ + %.h : %.defs + @echo MIG $@ + $(_v)$(MIG) $(MIGFLAGS) \ + -server /dev/null \ + -user /dev/null \ + -header $@ \ + $< + +${MIG_USHDRS} : \ + %_server.h : %.defs + @echo MIG $@ + $(_v)$(MIG) $(MIGFLAGS) \ + -server /dev/null \ + -user /dev/null \ + -header /dev/null \ + -sheader $@ \ + $< + +# +# Build path +# + +INCFLAGS_MAKEFILE= -I.. + +MIGKSFLAGS = -DMACH_KERNEL_PRIVATE -DKERNEL_SERVER=1 +MIGKUFLAGS = -DMACH_KERNEL_PRIVATE -DKERNEL_USER=1 -maxonstack 1024 +# +# MIG-generated headers that are traditionally used by kernel +# level code. +# +MIG_KUHDRS = + +MIG_KUSRC = + +MIG_KSHDRS = + +MIG_KSSRC = + +COMP_FILES = ${MIG_KUSRC} ${MIG_KSSRC} + +do_build_all:: $(COMP_FILES) + +${COMP_FILES} : ${MIG_TYPES} + +${MIG_KUSRC} : \ + %_user.c : %.defs + @echo MIG $@ + $(_v)${MIG} ${MIGFLAGS} ${MIGKUFLAGS} \ + -user $*_user.c \ + -header $*.h \ + -server /dev/null \ + -sheader /dev/null \ + $< + +${MIG_KSSRC}: \ + %_server.c : %.defs + @echo MIG $@ + $(_v)${MIG} ${MIGFLAGS} ${MIGKSFLAGS} \ + -user /dev/null \ + -header /dev/null \ + -server $*_server.c \ + -sheader $*_server.h \ + $< + +include $(MakeInc_rule) +include $(MakeInc_dir) diff --git a/osfmk/bank/bank.c b/osfmk/bank/bank.c new file mode 100644 index 000000000..e1fe60598 --- /dev/null +++ b/osfmk/bank/bank.c @@ -0,0 +1,985 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static zone_t bank_task_zone, bank_account_zone; +#define MAX_BANK_TASK (CONFIG_TASK_MAX) +#define MAX_BANK_ACCOUNT (CONFIG_TASK_MAX + CONFIG_THREAD_MAX) + +#define BANK_ELEMENT_TO_HANDLE(x) (CAST_DOWN(bank_handle_t, (x))) +#define HANDLE_TO_BANK_ELEMENT(x) (CAST_DOWN(bank_element_t, (x))) + +/* Need macro since bank_element_t is 4 byte aligned on release kernel and direct type case gives compilation error */ +#define CAST_TO_BANK_TASK(x) ((bank_task_t)((void *)(x))) +#define CAST_TO_BANK_ACCOUNT(x) ((bank_account_t)((void *)(x))) + +ipc_voucher_attr_control_t bank_voucher_attr_control; /* communication channel from ATM to voucher system */ + +#if DEVELOPMENT || DEBUG +queue_head_t bank_tasks_list; +queue_head_t bank_accounts_list; +#endif + +static ledger_template_t bank_ledger_template = NULL; +struct _bank_ledger_indices bank_ledgers = { -1 }; + +static bank_task_t bank_task_alloc_init(void); +static bank_account_t bank_account_alloc_init(bank_task_t bank_holder, bank_task_t bank_merchant); +static bank_task_t get_bank_task_context(task_t task); +static void bank_task_dealloc(bank_task_t bank_task, mach_voucher_attr_value_reference_t sync); +static kern_return_t bank_account_dealloc_with_sync(bank_account_t bank_account, mach_voucher_attr_value_reference_t sync); +static void bank_rollup_chit_to_tasks(ledger_t bill, bank_task_t bank_holder, bank_task_t bank_merchant); +static void init_bank_ledgers(void); + +kern_return_t +bank_release_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_t value, + mach_voucher_attr_value_reference_t sync); + +kern_return_t +bank_get_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_recipe_command_t command, + mach_voucher_attr_value_handle_array_t prev_values, + mach_msg_type_number_t __assert_only prev_value_count, + mach_voucher_attr_content_t recipe, + mach_voucher_attr_content_size_t recipe_size, + mach_voucher_attr_value_handle_t *out_value, + ipc_voucher_t *out_value_voucher); + +kern_return_t +bank_extract_content( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_recipe_command_t *out_command, + mach_voucher_attr_content_t out_recipe, + mach_voucher_attr_content_size_t *in_out_recipe_size); + +kern_return_t +bank_command( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_command_t command, + mach_voucher_attr_content_t in_content, + mach_voucher_attr_content_size_t in_content_size, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *in_out_content_size); + +void +bank_release(ipc_voucher_attr_manager_t __assert_only manager); + +/* + * communication channel from voucher system to ATM + */ +struct ipc_voucher_attr_manager bank_manager = { + .ivam_release_value = bank_release_value, + .ivam_get_value = bank_get_value, + .ivam_extract_content = bank_extract_content, + .ivam_command = bank_command, + .ivam_release = bank_release, +}; + + +#if DEVELOPMENT || DEBUG +decl_lck_mtx_data(, bank_tasks_list_lock); +decl_lck_mtx_data(, bank_accounts_list_lock); + +lck_grp_t bank_dev_lock_grp; +lck_attr_t bank_dev_lock_attr; +lck_grp_attr_t bank_dev_lock_grp_attr; +#endif + +/* + * Lock group attributes for bank sub system. + */ +lck_grp_t bank_lock_grp; +lck_attr_t bank_lock_attr; +lck_grp_attr_t bank_lock_grp_attr; + +/* + * Routine: bank_init + * Purpose: Initialize the BANK subsystem. + * Returns: None. + */ +void +bank_init() +{ + kern_return_t kr = KERN_SUCCESS; + /* setup zones for bank_task and bank_account objects */ + bank_task_zone = zinit(sizeof(struct bank_task), + MAX_BANK_TASK * sizeof(struct bank_task), + sizeof(struct bank_task), + "bank_task"); + + bank_account_zone = zinit(sizeof(struct bank_account), + MAX_BANK_ACCOUNT * sizeof(struct bank_account), + sizeof(struct bank_account), + "bank_account"); + + init_bank_ledgers(); + + /* Initialize bank lock group and lock attributes. */ + lck_grp_attr_setdefault(&bank_lock_grp_attr); + lck_grp_init(&bank_lock_grp, "bank_lock", &bank_lock_grp_attr); + lck_attr_setdefault(&bank_lock_attr); + +#if DEVELOPMENT || DEBUG + /* Initialize global bank development lock group and lock attributes. */ + lck_grp_attr_setdefault(&bank_dev_lock_grp_attr); + lck_grp_init(&bank_dev_lock_grp, "bank_dev_lock", &bank_dev_lock_grp_attr); + lck_attr_setdefault(&bank_dev_lock_attr); + + lck_mtx_init(&bank_tasks_list_lock, &bank_dev_lock_grp, &bank_dev_lock_attr); + lck_mtx_init(&bank_accounts_list_lock, &bank_dev_lock_grp, &bank_dev_lock_attr); + + queue_init(&bank_tasks_list); + queue_init(&bank_accounts_list); +#endif + + /* Register the bank manager with the Vouchers sub system. */ + kr = ipc_register_well_known_mach_voucher_attr_manager( + &bank_manager, + 0, + MACH_VOUCHER_ATTR_KEY_BANK, + &bank_voucher_attr_control); + if (kr != KERN_SUCCESS ) + panic("BANK subsystem initialization failed"); + + kprintf("BANK subsystem is initialized\n"); + return ; +} + + +/* + * BANK Resource Manager Routines. + */ + + +/* + * Routine: bank_release_value + * Purpose: Release a value, if sync matches the sync count in value. + * Returns: KERN_SUCCESS: on Successful deletion. + * KERN_FAILURE: if sync value does not matches. + */ +kern_return_t +bank_release_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_t value, + mach_voucher_attr_value_reference_t sync) +{ + bank_task_t bank_task = BANK_TASK_NULL; + bank_element_t bank_element = BANK_ELEMENT_NULL; + bank_account_t bank_account = BANK_ACCOUNT_NULL; + kern_return_t kr = KERN_SUCCESS; + + assert(MACH_VOUCHER_ATTR_KEY_BANK == key); + assert(manager == &bank_manager); + + + bank_element = HANDLE_TO_BANK_ELEMENT(value); + if (bank_element == BANK_DEFAULT_VALUE) { + /* Return success for default value */ + return KERN_SUCCESS; + } + + + if (bank_element->be_type == BANK_TASK) { + bank_task = CAST_TO_BANK_TASK(bank_element); + + if (bank_task->bt_made != (int)sync) { + return KERN_FAILURE; + } + + bank_task_made_release_num(bank_task, sync); + bank_task_dealloc(bank_task, sync); + } else if (bank_element->be_type == BANK_ACCOUNT) { + bank_account = CAST_TO_BANK_ACCOUNT(bank_element); + kr = bank_account_dealloc_with_sync(bank_account, sync); + } else { + panic("Bogus bank type: %d passed in get_value\n", bank_element->be_type); + } + + return kr; +} + + +/* + * Routine: bank_get_value + */ +kern_return_t +bank_get_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_recipe_command_t command, + mach_voucher_attr_value_handle_array_t prev_values, + mach_msg_type_number_t prev_value_count, + mach_voucher_attr_content_t __unused recipe, + mach_voucher_attr_content_size_t __unused recipe_size, + mach_voucher_attr_value_handle_t *out_value, + ipc_voucher_t *out_value_voucher) +{ + bank_task_t bank_task = BANK_TASK_NULL; + bank_task_t bank_holder = BANK_TASK_NULL; + bank_task_t bank_merchant = BANK_TASK_NULL; + bank_element_t bank_element = BANK_ELEMENT_NULL; + bank_account_t bank_account = BANK_ACCOUNT_NULL; + bank_account_t old_bank_account = BANK_ACCOUNT_NULL; + mach_voucher_attr_value_handle_t bank_handle; + task_t task; + kern_return_t kr = KERN_SUCCESS; + mach_msg_type_number_t i; + + assert(MACH_VOUCHER_ATTR_KEY_BANK == key); + assert(manager == &bank_manager); + + /* never an out voucher */ + *out_value_voucher = IPC_VOUCHER_NULL; + + switch (command) { + + case MACH_VOUCHER_ATTR_BANK_CREATE: + + /* Get the bank context from the current task and take a reference on it. */ + task = current_task(); + bank_task = get_bank_task_context(task); + if (bank_task == BANK_TASK_NULL) + return KERN_RESOURCE_SHORTAGE; + + bank_task_reference(bank_task); + bank_task_made_reference(bank_task); + + *out_value = BANK_ELEMENT_TO_HANDLE(bank_task); + break; + + case MACH_VOUCHER_ATTR_REDEEM: + + for (i = 0; i < prev_value_count; i++) { + bank_handle = prev_values[i]; + bank_element = HANDLE_TO_BANK_ELEMENT(bank_handle); + + if (bank_element == BANK_DEFAULT_VALUE) + continue; + + task = current_task(); + if (bank_element->be_type == BANK_TASK) { + bank_holder = CAST_TO_BANK_TASK(bank_element); + } else if (bank_element->be_type == BANK_ACCOUNT) { + old_bank_account = CAST_TO_BANK_ACCOUNT(bank_element); + bank_holder = old_bank_account->ba_holder; + } else { + panic("Bogus bank type: %d passed in get_value\n", bank_element->be_type); + } + + bank_merchant = get_bank_task_context(task); + if (bank_merchant == BANK_TASK_NULL) + return KERN_RESOURCE_SHORTAGE; + + /* Check if trying to redeem for self task, return the bank task */ + if (bank_holder == bank_merchant) { + bank_task_reference(bank_holder); + bank_task_made_reference(bank_holder); + *out_value = BANK_ELEMENT_TO_HANDLE(bank_holder); + return kr; + } + + bank_account = bank_account_alloc_init(bank_holder, bank_merchant); + if (bank_account == BANK_ACCOUNT_NULL) + return KERN_RESOURCE_SHORTAGE; + + *out_value = BANK_ELEMENT_TO_HANDLE(bank_account); + return kr; + } + + *out_value = BANK_ELEMENT_TO_HANDLE(BANK_DEFAULT_VALUE); + break; + default: + kr = KERN_INVALID_ARGUMENT; + break; + } + + return kr; +} + + +/* + * Routine: bank_extract_content + * Purpose: Extract a set of aid from an array of voucher values. + * Returns: KERN_SUCCESS: on Success. + * KERN_FAILURE: one of the value is not present in the hash. + * KERN_NO_SPACE: insufficeint buffer provided to fill an array of aid. + */ +kern_return_t +bank_extract_content( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_recipe_command_t *out_command, + mach_voucher_attr_content_t out_recipe, + mach_voucher_attr_content_size_t *in_out_recipe_size) +{ + bank_task_t bank_task = BANK_TASK_NULL; + bank_element_t bank_element = BANK_ELEMENT_NULL; + bank_account_t bank_account = BANK_ACCOUNT_NULL; + mach_voucher_attr_value_handle_t bank_handle; + char buf[MACH_VOUCHER_BANK_CONTENT_SIZE]; + mach_msg_type_number_t i; + + assert(MACH_VOUCHER_ATTR_KEY_BANK == key); + assert(manager == &bank_manager); + + for (i = 0; i < value_count; i++) { + bank_handle = values[i]; + bank_element = HANDLE_TO_BANK_ELEMENT(bank_handle); + if (bank_element == BANK_DEFAULT_VALUE) + continue; + + if (MACH_VOUCHER_BANK_CONTENT_SIZE > *in_out_recipe_size) { + *in_out_recipe_size = 0; + return KERN_NO_SPACE; + } + + if (bank_element->be_type == BANK_TASK) { + bank_task = CAST_TO_BANK_TASK(bank_element); + snprintf(buf, MACH_VOUCHER_BANK_CONTENT_SIZE, + " Bank Context for a pid %d\n", bank_task->bt_pid); + } else if (bank_element->be_type == BANK_ACCOUNT) { + bank_account = CAST_TO_BANK_ACCOUNT(bank_element); + snprintf(buf, MACH_VOUCHER_BANK_CONTENT_SIZE, + " Bank Account linking holder pid %d with merchant pid %d\n", + bank_account->ba_holder->bt_pid, + bank_account->ba_merchant->bt_pid); + } else { + panic("Bogus bank type: %d passed in get_value\n", bank_element->be_type); + } + + + memcpy(&out_recipe[0], buf, strlen(buf) + 1); + *out_command = MACH_VOUCHER_ATTR_BANK_NULL; + *in_out_recipe_size = (mach_voucher_attr_content_size_t)strlen(buf) + 1; + return KERN_SUCCESS; + } + + return KERN_SUCCESS; +} + +/* + * Routine: bank_command + * Purpose: Execute a command against a set of ATM values. + * Returns: KERN_SUCCESS: On successful execution of command. + KERN_FAILURE: On failure. + */ +kern_return_t +bank_command( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t __unused values, + mach_msg_type_number_t __unused value_count, + mach_voucher_attr_command_t __unused command, + mach_voucher_attr_content_t __unused in_content, + mach_voucher_attr_content_size_t __unused in_content_size, + mach_voucher_attr_content_t __unused out_content, + mach_voucher_attr_content_size_t __unused *out_content_size) +{ + bank_task_t bank_task = BANK_TASK_NULL; + bank_element_t bank_element = BANK_ELEMENT_NULL; + bank_account_t bank_account = BANK_ACCOUNT_NULL; + mach_voucher_attr_value_handle_t bank_handle; + mach_msg_type_number_t i; + int32_t pid; + + assert(MACH_VOUCHER_ATTR_KEY_BANK == key); + assert(manager == &bank_manager); + + switch (command) { + case BANK_ORIGINATOR_PID: + + if ((sizeof(pid)) > *out_content_size) { + *out_content_size = 0; + return KERN_NO_SPACE; + } + + for (i = 0; i < value_count; i++) { + bank_handle = values[i]; + bank_element = HANDLE_TO_BANK_ELEMENT(bank_handle); + if (bank_element == BANK_DEFAULT_VALUE) + continue; + + if (bank_element->be_type == BANK_TASK) { + bank_task = CAST_TO_BANK_TASK(bank_element); + } else if (bank_element->be_type == BANK_ACCOUNT) { + bank_account = CAST_TO_BANK_ACCOUNT(bank_element); + bank_task = bank_account->ba_holder; + } else { + panic("Bogus bank type: %d passed in voucher_command\n", bank_element->be_type); + } + pid = bank_task->bt_pid; + + memcpy(&out_content[0], &pid, sizeof(pid)); + *out_content_size = (mach_voucher_attr_content_size_t)sizeof(pid); + return KERN_SUCCESS; + } + /* In the case of no value, return error KERN_INVALID_VALUE */ + *out_content_size = 0; + return KERN_INVALID_VALUE; + + break; + default: + return KERN_INVALID_ARGUMENT; + } + return KERN_SUCCESS; +} + + +void +bank_release( + ipc_voucher_attr_manager_t __assert_only manager) +{ + assert(manager == &bank_manager); +} + + + +/* + * Bank Internal Routines. + */ + +/* + * Routine: bank_task_alloc_init + * Purpose: Allocate and initialize a bank task structure. + * Returns: bank_task_t on Success. + * BANK_TASK_NULL: on Failure. + * Notes: Leaves the task and creditcard blank and has only 1 ref, + needs to take 1 extra ref after the task field is initialized. + */ +static bank_task_t +bank_task_alloc_init(void) +{ + bank_task_t new_bank_task; + + new_bank_task = (bank_task_t) zalloc(bank_task_zone); + if (new_bank_task == BANK_TASK_NULL) + return BANK_TASK_NULL; + + new_bank_task->bt_type = BANK_TASK; + new_bank_task->bt_refs = 1; + new_bank_task->bt_made = 0; + new_bank_task->bt_pid = 0; + new_bank_task->bt_creditcard = NULL; + queue_init(&new_bank_task->bt_accounts_to_pay); + queue_init(&new_bank_task->bt_accounts_to_charge); + lck_mtx_init(&new_bank_task->bt_acc_to_pay_lock, &bank_lock_grp, &bank_lock_attr); + lck_mtx_init(&new_bank_task->bt_acc_to_charge_lock, &bank_lock_grp, &bank_lock_attr); + +#if DEVELOPMENT || DEBUG + new_bank_task->bt_task = NULL; + lck_mtx_lock(&bank_tasks_list_lock); + queue_enter(&bank_tasks_list, new_bank_task, bank_task_t, bt_global_elt); + lck_mtx_unlock(&bank_tasks_list_lock); +#endif + return (new_bank_task); +} + +/* + * Routine: bank_account_alloc_init + * Purpose: Allocate and Initialize the bank account struct. + * Returns: bank_account_t : On Success. + * BANK_ACCOUNT_NULL: On Failure. + */ +static bank_account_t +bank_account_alloc_init( + bank_task_t bank_holder, + bank_task_t bank_merchant) +{ + bank_account_t new_bank_account; + bank_account_t bank_account; + boolean_t entry_found = FALSE; + ledger_t new_ledger = ledger_instantiate(bank_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES); + + if (new_ledger == NULL) + return BANK_ACCOUNT_NULL; + + ledger_entry_setactive(new_ledger, bank_ledgers.cpu_time); + new_bank_account = (bank_account_t) zalloc(bank_account_zone); + if (new_bank_account == BANK_ACCOUNT_NULL) { + ledger_dereference(new_ledger); + return BANK_ACCOUNT_NULL; + } + + new_bank_account->ba_type = BANK_ACCOUNT; + new_bank_account->ba_refs = 1; + new_bank_account->ba_made = 1; + new_bank_account->ba_pid = 0; + new_bank_account->ba_bill = new_ledger; + new_bank_account->ba_merchant = bank_merchant; + new_bank_account->ba_holder = bank_holder; + + /* Iterate through accounts need to pay list to find the existing entry */ + lck_mtx_lock(&bank_holder->bt_acc_to_pay_lock); + queue_iterate(&bank_holder->bt_accounts_to_pay, bank_account, bank_account_t, ba_next_acc_to_pay) { + if (bank_account->ba_merchant != bank_merchant) + continue; + + entry_found = TRUE; + /* Take a made ref, since this value would be returned to voucher system. */ + bank_account_reference(bank_account); + bank_account_made_reference(bank_account); + break; + } + + if (!entry_found) { + + /* Create a linkage between the holder and the merchant task, Grab both the list locks before adding it to the list. */ + lck_mtx_lock(&bank_merchant->bt_acc_to_charge_lock); + + /* Add the account entry into Accounts need to pay account link list. */ + queue_enter(&bank_holder->bt_accounts_to_pay, new_bank_account, bank_account_t, ba_next_acc_to_pay); + + /* Add the account entry into Accounts need to charge account link list. */ + queue_enter(&bank_merchant->bt_accounts_to_charge, new_bank_account, bank_account_t, ba_next_acc_to_charge); + + lck_mtx_unlock(&bank_merchant->bt_acc_to_charge_lock); + } + + lck_mtx_unlock(&bank_holder->bt_acc_to_pay_lock); + + if (entry_found) { + ledger_dereference(new_ledger); + zfree(bank_account_zone, new_bank_account); + return bank_account; + } + + bank_task_reference(bank_holder); + bank_task_reference(bank_merchant); + +#if DEVELOPMENT || DEBUG + new_bank_account->ba_task = NULL; + lck_mtx_lock(&bank_accounts_list_lock); + queue_enter(&bank_accounts_list, new_bank_account, bank_account_t, ba_global_elt); + lck_mtx_unlock(&bank_accounts_list_lock); +#endif + + return (new_bank_account); +} + +/* + * Routine: get_bank_task_context + * Purpose: Get the bank context of the given task + * Returns: bank_task_t on Success. + * BANK_TASK_NULL: on Failure. + * Note: Initialize bank context if NULL. + */ +static bank_task_t +get_bank_task_context(task_t task) +{ + bank_task_t bank_task; + + if (task->bank_context) + return (task->bank_context); + + bank_task = bank_task_alloc_init(); + + /* Grab the task lock and check if we won the race. */ + task_lock(task); + if (task->bank_context) { + task_unlock(task); + if (bank_task != BANK_TASK_NULL) + bank_task_dealloc(bank_task, 1); + return (task->bank_context); + } else if (bank_task == BANK_TASK_NULL) { + task_unlock(task); + return BANK_TASK_NULL; + } + /* We won the race. Take a ref on the ledger and initialize bank task. */ + bank_task->bt_creditcard = task->ledger; + bank_task->bt_pid = audit_token_pid_from_task(task); +#if DEVELOPMENT || DEBUG + bank_task->bt_task = task; +#endif + ledger_reference(task->ledger); + + task->bank_context = bank_task; + task_unlock(task); + + return (bank_task); +} + +/* + * Routine: bank_task_dealloc + * Purpose: Drops the reference on bank task. + * Returns: None. + */ +static void +bank_task_dealloc( + bank_task_t bank_task, + mach_voucher_attr_value_reference_t sync) +{ + assert(bank_task->bt_refs >= 0); + + if (bank_task_release_num(bank_task, sync) > (int)sync) + return; + + assert(bank_task->bt_refs == 0); + assert(queue_empty(&bank_task->bt_accounts_to_pay)); + assert(queue_empty(&bank_task->bt_accounts_to_charge)); + + ledger_dereference(bank_task->bt_creditcard); + lck_mtx_destroy(&bank_task->bt_acc_to_pay_lock, &bank_lock_grp); + lck_mtx_destroy(&bank_task->bt_acc_to_charge_lock, &bank_lock_grp); + +#if DEVELOPMENT || DEBUG + lck_mtx_lock(&bank_tasks_list_lock); + queue_remove(&bank_tasks_list, bank_task, bank_task_t, bt_global_elt); + lck_mtx_unlock(&bank_tasks_list_lock); +#endif + + zfree(bank_task_zone, bank_task); +} + +/* + * Routine: bank_account_dealloc_with_sync + * Purpose: Drop the reference on bank account if the sync matches. + * Returns: KERN_SUCCESS if sync matches. + * KERN_FAILURE on mismatch. + */ +static kern_return_t +bank_account_dealloc_with_sync( + bank_account_t bank_account, + mach_voucher_attr_value_reference_t sync) +{ + bank_task_t bank_holder = bank_account->ba_holder; + bank_task_t bank_merchant = bank_account->ba_merchant; + + /* Grab the acc to pay list lock and check the sync value */ + lck_mtx_lock(&bank_holder->bt_acc_to_pay_lock); + + if (bank_account->ba_made != (int)sync) { + lck_mtx_unlock(&bank_holder->bt_acc_to_pay_lock); + return KERN_FAILURE; + } + + bank_account_made_release_num(bank_account, sync); + + if (bank_account_release_num(bank_account, sync) > (int)sync) + panic("Sync and ref value did not match for bank account %p\n", bank_account); + + + /* Grab both the acc to pay and acc to charge locks */ + lck_mtx_lock(&bank_merchant->bt_acc_to_charge_lock); + + bank_rollup_chit_to_tasks(bank_account->ba_bill, bank_holder, bank_merchant); + + /* Remove the account entry from Accounts need to pay account link list. */ + queue_remove(&bank_holder->bt_accounts_to_pay, bank_account, bank_account_t, ba_next_acc_to_pay); + + /* Remove the account entry from Accounts need to charge account link list. */ + queue_remove(&bank_merchant->bt_accounts_to_charge, bank_account, bank_account_t, ba_next_acc_to_charge); + + lck_mtx_unlock(&bank_merchant->bt_acc_to_charge_lock); + lck_mtx_unlock(&bank_holder->bt_acc_to_pay_lock); + + ledger_dereference(bank_account->ba_bill); + + /* Drop the reference of bank holder and merchant */ + bank_task_dealloc(bank_holder, 1); + bank_task_dealloc(bank_merchant, 1); + +#if DEVELOPMENT || DEBUG + lck_mtx_lock(&bank_accounts_list_lock); + queue_remove(&bank_accounts_list, bank_account, bank_account_t, ba_global_elt); + lck_mtx_unlock(&bank_accounts_list_lock); +#endif + + zfree(bank_account_zone, bank_account); + return KERN_SUCCESS; +} + +/* + * Routine: bank_rollup_chit_to_tasks + * Purpose: Debit and Credit holder's and merchant's ledgers. + * Returns: None. + */ +static void +bank_rollup_chit_to_tasks( + ledger_t bill, + bank_task_t bank_holder, + bank_task_t bank_merchant) +{ + ledger_amount_t credit; + ledger_amount_t debit; + kern_return_t ret; + + ret = ledger_get_entries(bill, bank_ledgers.cpu_time, &credit, &debit); + if (ret != KERN_SUCCESS) { + return; + } + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (BANK_CODE(BANK_ACCOUNT_INFO, (BANK_SETTLE_CPU_TIME))) | DBG_FUNC_NONE, + bank_merchant->bt_pid, bank_holder->bt_pid, credit, debit, 0); +#if CONFIG_BANK + ledger_credit(bank_holder->bt_creditcard, task_ledgers.cpu_time_billed_to_me, credit); + ledger_debit(bank_holder->bt_creditcard, task_ledgers.cpu_time_billed_to_me, debit); + + ledger_credit(bank_merchant->bt_creditcard, task_ledgers.cpu_time_billed_to_others, credit); + ledger_debit(bank_merchant->bt_creditcard, task_ledgers.cpu_time_billed_to_others, debit); +#endif +} + + + +/* + * Routine: bank_task_destroy + * Purpose: Drops reference on bank task. + * Returns: None. + */ +void +bank_task_destroy(bank_task_t bank_task) +{ + bank_task_dealloc(bank_task, 1); +} + +/* + * Routine: init_bank_ledgers + * Purpose: Initialize template for bank ledgers. + * Returns: None. + */ +static void +init_bank_ledgers(void) { + ledger_template_t t; + int idx; + + assert(bank_ledger_template == NULL); + + if ((t = ledger_template_create("Bank ledger")) == NULL) + panic("couldn't create bank ledger template"); + + if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) { + panic("couldn't create cpu_time entry for bank ledger template"); + } + + bank_ledgers.cpu_time = idx; + bank_ledger_template = t; +} + +/* + * Routine: bank_billed_time + * Purpose: Walk throught the Accounts need to pay account list and get the current billing balance. + * Returns: balance. + */ +uint64_t +bank_billed_time(bank_task_t bank_task) +{ + int64_t balance = 0; +#ifdef CONFIG_BANK + bank_account_t bank_account; + int64_t temp = 0; +#endif + if (bank_task == BANK_TASK_NULL) { + return balance; + } + +#ifdef CONFIG_BANK + lck_mtx_lock(&bank_task->bt_acc_to_pay_lock); + + ledger_get_balance(bank_task->bt_creditcard, task_ledgers.cpu_time_billed_to_me, &temp); + balance +=temp; + + queue_iterate(&bank_task->bt_accounts_to_pay, bank_account, bank_account_t, ba_next_acc_to_pay) { + temp = 0; + ledger_get_balance(bank_account->ba_bill, bank_ledgers.cpu_time, &temp); + balance += temp; + } + lck_mtx_unlock(&bank_task->bt_acc_to_pay_lock); +#endif + return (uint64_t)balance; +} + +/* + * Routine: bank_serviced_time + * Purpose: Walk throught the Account need to charge account list and get the current balance to be charged. + * Returns: balance. + */ +uint64_t +bank_serviced_time(bank_task_t bank_task) +{ + int64_t balance = 0; +#ifdef CONFIG_BANK + bank_account_t bank_account; + int64_t temp = 0; +#endif + if (bank_task == BANK_TASK_NULL) { + return balance; + } + +#ifdef CONFIG_BANK + lck_mtx_lock(&bank_task->bt_acc_to_charge_lock); + + ledger_get_balance(bank_task->bt_creditcard, task_ledgers.cpu_time_billed_to_others, &temp); + balance +=temp; + + queue_iterate(&bank_task->bt_accounts_to_charge, bank_account, bank_account_t, ba_next_acc_to_charge) { + temp = 0; + ledger_get_balance(bank_account->ba_bill, bank_ledgers.cpu_time, &temp); + balance += temp; + } + lck_mtx_unlock(&bank_task->bt_acc_to_charge_lock); +#endif + return (uint64_t)balance; +} + +/* + * Routine: bank_get_voucher_ledger + * Purpose: Get the bankledger (chit) from the voucher. + * Returns: bank_ledger if bank_account attribute present in voucher. + * NULL on no attribute ot bank_task attribute. + */ +ledger_t +bank_get_voucher_ledger(ipc_voucher_t voucher) +{ + bank_element_t bank_element = BANK_ELEMENT_NULL; + bank_account_t bank_account = BANK_ACCOUNT_NULL; + mach_voucher_attr_value_handle_t vals[MACH_VOUCHER_ATTR_VALUE_MAX_NESTED]; + mach_voucher_attr_value_handle_array_size_t val_count; + ledger_t bankledger = NULL; + kern_return_t kr; + + val_count = MACH_VOUCHER_ATTR_VALUE_MAX_NESTED; + kr = mach_voucher_attr_control_get_values(bank_voucher_attr_control, + voucher, + vals, + &val_count); + + if (kr != KERN_SUCCESS) + return NULL; + + if (val_count == 0) + return NULL; + + bank_element = HANDLE_TO_BANK_ELEMENT(vals[0]); + if (bank_element == BANK_DEFAULT_VALUE) + return NULL; + + if (bank_element->be_type == BANK_TASK) { + bankledger = NULL; + } else if (bank_element->be_type == BANK_ACCOUNT) { + bank_account = CAST_TO_BANK_ACCOUNT(bank_element); + bankledger = bank_account->ba_bill; + } else { + panic("Bogus bank type: %d passed in bank_get_voucher_ledger\n", bank_element->be_type); + } + + return (bankledger); +} + +/* + * Routine: bank_swap_thread_bank_ledger + * Purpose: swap the bank ledger on the thread. + * Retunrs: None. + * Note: Should be only called for current thread or thread which is not started. + */ +void +bank_swap_thread_bank_ledger(thread_t thread __unused, ledger_t new_ledger __unused) +{ +#ifdef CONFIG_BANK + spl_t s; + processor_t processor; + ledger_t old_ledger = thread->t_bankledger; + int64_t ctime, effective_ledger_time_consumed = 0; + int64_t remainder = 0, consumed = 0; + + if (old_ledger == NULL && new_ledger == NULL) + return; + + assert((thread == current_thread() || thread->started == 0)); + + s = splsched(); + thread_lock(thread); + + /* + * Calculation of time elapsed by the thread before voucher swap. + * Following is the timeline which shows all the variables used in the calculation below. + * + * thread ledger + * cpu_time + * |<- consumed ->|<- remainder ->| + * timeline -----------------------------------------------------------------> + * | | | + * thread_dispatch ctime quantum end + * + * |<-effective_ledger_time -> | + * deduct_bank_ledger_time + */ + + ctime = mach_absolute_time(); + processor = thread->last_processor; + if (processor != NULL) { + if ((int64_t)processor->quantum_end > ctime) + remainder = (int64_t)processor->quantum_end - ctime; + + consumed = thread->quantum_remaining - remainder; + effective_ledger_time_consumed = consumed - thread->t_deduct_bank_ledger_time; + } + + thread->t_deduct_bank_ledger_time = consumed; + + thread->t_bankledger = new_ledger; + + thread_unlock(thread); + splx(s); + + if (old_ledger != NULL) + ledger_credit(old_ledger, + bank_ledgers.cpu_time, + effective_ledger_time_consumed); +#endif +} + diff --git a/osfmk/bank/bank_internal.h b/osfmk/bank/bank_internal.h new file mode 100644 index 000000000..155f794ac --- /dev/null +++ b/osfmk/bank/bank_internal.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _BANK_BANK_INTERNAL_H_ +#define _BANK_BANK_INTERNAL_H_ + +#include +#include + +#ifdef MACH_KERNEL_PRIVATE + +#include +#include +#include +#include +#include + +/* Default value for Voucher Attribute Manager for BANK */ +#define BANK_DEFAULT_VALUE NULL +typedef mach_voucher_attr_value_handle_t bank_handle_t; + +#define BANK_TASK 0 +#define BANK_ACCOUNT 1 + +struct bank_element { + int be_type; /* Type of element */ + int be_refs; /* Ref count */ + int be_made; /* Made refs for voucher, Actual ref is also taken for each Made ref */ + int32_t be_pid; /* Customer task's pid. */ +#if DEVELOPMENT || DEBUG + task_t be_task; /* Customer task, do not use it since ref is not taken on task */ +#endif +}; + +typedef struct bank_element * bank_element_t; +#define BANK_ELEMENT_NULL ((bank_element_t) 0) + +struct bank_task { + struct bank_element bt_elem; /* Bank element */ + ledger_t bt_creditcard; /* Ledger of the customer task */ + queue_head_t bt_accounts_to_pay; /* List of accounts worked for me and need to pay */ + queue_head_t bt_accounts_to_charge; /* List of accounts I did work and need to charge */ + decl_lck_mtx_data(, bt_acc_to_pay_lock) /* Lock to protect accounts to pay list */ + decl_lck_mtx_data(, bt_acc_to_charge_lock) /* Lock to protect accounts to charge list */ +#if DEVELOPMENT || DEBUG + queue_chain_t bt_global_elt; /* Element on the global bank task chain */ +#endif +}; + +#define bt_type bt_elem.be_type +#define bt_refs bt_elem.be_refs +#define bt_made bt_elem.be_made +#define bt_pid bt_elem.be_pid + +#if DEVELOPMENT || DEBUG +#define bt_task bt_elem.be_task +#endif + +typedef struct bank_task * bank_task_t; +#define BANK_TASK_NULL ((bank_task_t) 0) + +#define bank_task_reference(elem) \ + (OSAddAtomic(1, &(elem)->bt_refs)) + +#define bank_task_release(elem) \ + (OSAddAtomic(-1, &(elem)->bt_refs)) + +#define bank_task_release_num(elem, num) \ + (OSAddAtomic(-(num), &(elem)->bt_refs)) + +#define bank_task_made_reference(elem) \ + (OSAddAtomic(1, &(elem)->bt_made)) + +#define bank_task_made_release(elem) \ + (OSAddAtomic(-1, &(elem)->bt_made)) + +#define bank_task_made_release_num(elem, num) \ + (OSAddAtomic(-(num), &(elem)->bt_made)) + + +struct bank_account { + struct bank_element ba_elem; /* Bank element */ + ledger_t ba_bill; /* Temporary ledger i.e. chit */ + bank_task_t ba_merchant; /* Task who worked for me, who will charge me on behalf of */ + bank_task_t ba_holder; /* Credit Card task holder */ + queue_chain_t ba_next_acc_to_pay; /* Next account I need to pay to */ + queue_chain_t ba_next_acc_to_charge; /* Next account I need to charge to */ +#if DEVELOPMENT || DEBUG + queue_chain_t ba_global_elt; /* Element on the global account chain */ +#endif +}; + +#define ba_type ba_elem.be_type +#define ba_refs ba_elem.be_refs +#define ba_made ba_elem.be_made +#define ba_pid ba_elem.be_pid + +#if DEVELOPMENT || DEBUG +#define ba_task ba_elem.be_task +#endif + +typedef struct bank_account * bank_account_t; +#define BANK_ACCOUNT_NULL ((bank_account_t) 0) + +#define bank_account_reference(elem) \ + (OSAddAtomic(1, &(elem)->ba_refs)) + +#define bank_account_release(elem) \ + (OSAddAtomic(-1, &(elem)->ba_refs)) + +#define bank_account_release_num(elem, num) \ + (OSAddAtomic(-(num), &(elem)->ba_refs)) + +#define bank_account_made_reference(elem) \ + (OSAddAtomic(1, &(elem)->ba_made)) + +#define bank_account_made_release(elem) \ + (OSAddAtomic(-1, &(elem)->ba_made)) + +#define bank_account_made_release_num(elem, num) \ + (OSAddAtomic(-(num), &(elem)->ba_made)) + +struct _bank_ledger_indices { + int cpu_time; +}; + +extern struct _bank_ledger_indices bank_ledgers; + +extern void bank_init(void); +extern void bank_task_destroy(bank_task_t); +extern uint64_t bank_billed_time(bank_task_t bank_task); +extern uint64_t bank_serviced_time(bank_task_t bank_task); +extern ledger_t bank_get_voucher_ledger(ipc_voucher_t voucher); +extern void bank_swap_thread_bank_ledger(thread_t thread, ledger_t ledger); + +#endif /* MACH_KERNEL_PRIVATE */ +#endif /* _BANK_BANK_INTERNAL_H_ */ diff --git a/osfmk/bank/bank_types.h b/osfmk/bank/bank_types.h new file mode 100644 index 000000000..c0ce5720f --- /dev/null +++ b/osfmk/bank/bank_types.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _BANK_BANK_TYPES_H_ +#define _BANK_BANK_TYPES_H_ + +#include +#include + +#define MACH_VOUCHER_ATTR_BANK_NULL ((mach_voucher_attr_recipe_command_t)601) +#define MACH_VOUCHER_ATTR_BANK_CREATE ((mach_voucher_attr_recipe_command_t)610) + +#define MACH_VOUCHER_BANK_CONTENT_SIZE (500) + +typedef uint32_t bank_action_t; +#define BANK_ORIGINATOR_PID 0x1 + +#endif /* _BANK_BANK_TYPES_H_ */ diff --git a/osfmk/conf/MASTER b/osfmk/conf/MASTER deleted file mode 100644 index f3dd6fcc8..000000000 --- a/osfmk/conf/MASTER +++ /dev/null @@ -1,322 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -####################################################################### -# -# Master machine independent configuration file. -# -# Specific configuration files are created based on this and -# the machine specific master file using the doconf script. -# -# Any changes to the master configuration files will affect all -# other configuration files based upon it. -# -####################################################################### -# -# To build a configuration, execute "doconf ." -# Configurations are specified in the "Configurations:" section -# of the MASTER and MASTER.* files as follows: -# -# = [ ... ] -# -# Lines in the MASTER and MASTER.* files are selected based on -# the attribute selector list, found in a comment at the end of -# the line. This is a list of attributes separated by commas. -# The "!" operator selects the line if none of the attributes are -# specified. -# -# For example: -# -# selects a line if "foo" or "bar" are specified. -# selects a line if neither "foo" nor "bar" is -# specified. -# -# Lines with no attributes specified are selected for all -# configurations. -# -####################################################################### -# SYSTEM SIZE CONFIGURATION (select exactly one) -# -# xlarge = extra large scale system configuration -# large = large scale system configuration -# medium = medium scale system configuration -# small = small scale system configuration -# xsmall = extra small scale system configuration -# bsmall = special extra small scale system configuration -# -####################################################################### -# -# Basic compilation options. -# -# The MACH ident is passed to every kernel file compilation as -DMACH. -# This is useful in order to #ifdef code that is intended to be used in -# a MACH kernel. -# -ident MACH -############################################################################## -# -# MACH configuration options. -# -# TASK_SWAPPER enables code that manages demand for physical memory by -# forcibly suspending tasks when the demand exceeds supply. This -# option should be on. -# -options MACH_KERNEL -options MACH_PAGEMAP # -options MACH_LOAD -options MACH_RT -options TASK_SWAPPER # -pseudo-device test_device 1 -options ADVISORY_PAGEOUT -########################################################## -# -# conf/config.debug -# -# This defines configuration options that are normally used only during -# kernel code development and debugging. They add run-time error checks or -# statistics gathering, which will slow down the system -# -########################################################## -# -# MACH_ASSERT controls the assert() and ASSERT() macros, used to verify the -# consistency of various algorithms in the kernel. The performance impact -# of this option is significant. -# -options MACH_ASSERT # # -# -# MACH_DEBUG enables the mach_debug_server, a message interface used to -# retrieve or control various statistics. This interface may expose data -# structures that would not normally be allowed outside the kernel, and -# MUST NOT be enabled on a released configuration. -# Other options here enable information retrieval for specific subsystems -# -options MACH_DEBUG # # -options MACH_IPC_DEBUG # # -# -options MACH_VM_DEBUG # # -# -# MACH_MP_DEBUG control the possible dead locks that may occur by controlling -# that IPL level has been raised down to SPL0 after some calls to -# hardclock device driver. -# -options MACH_MP_DEBUG # # -# -# ZONE_DEBUG keeps track of all zalloc()ed elements to perform further -# operations on each element. -# -options ZONE_DEBUG # # - -options CONFIG_ZLEAKS # Live zone leak debugging # - -# -options ZONE_ALIAS_ADDR # # - - -# -# CONFIG_TASK_ZONE_INFO allows per-task zone information to be extracted -# Primarily useful for xnu debug and development. -# -options CONFIG_TASK_ZONE_INFO # -# -# CONFIG_DEBUGGER_FOR_ZONE_INFO restricts zone info so that it is only -# available when the kernel is being debugged. -# -options CONFIG_DEBUGGER_FOR_ZONE_INFO # -# -# XPR_DEBUG enables the gathering of data through the XPR macros inserted -# into various subsystems. This option is normally only enabled for -# specific performance or behavior studies, as the overhead in both -# code and data space is large. The data is normally retrieved through -# the kernel debugger (kdb) or by reading /dev/kmem. -# -options XPR_DEBUG # # -# -# MACH_LDEBUG controls the internal consistency checks and -# data gathering in the locking package. This also enables a debug-only -# version of simple-locks on uniprocessor machines. The code size and -# performance impact of this option is significant. -# -options MACH_LDEBUG # # - -# -# configuration option for full, partial, or no kernel debug event tracing -# -options KDEBUG # kernel tracing # -options IST_KDEBUG # limited tracing # -options NO_KDEBUG # no kernel tracing # - -# -# CONFIG_DTRACE enables code needed to support DTrace. Currently this is -# only used for delivery of traps/interrupts to DTrace. -# -options CONFIG_DTRACE # # - -# MACH_COUNTERS enables code that handles various counters in the system. -# -options MACH_COUNTERS # # - -# DEVELOPMENT define for development builds -options DEVELOPMENT # dev kernel # - -########################################################## -# -# This defines configuration options that are normally used only during -# kernel code development and performance characterization. They add run-time -# statistics gathering, which will slow down the system, -# -########################################################## -# -# MACH_IPC_STATS controls the collection of statistics in the MACH IPC -# subsystem. -# -#options MACH_IPC_STATS -# -# MACH_CO_INFO controls the collection of callout statistics. This -# information is retrieved via a mach_debug message, or though -# /dev/kmem. The runtime impact of the option is minimal. -# -#options MACH_CO_INFO -# -# MACH_CLUSTER_STATS controls the collection of various statistics concerning -# the effectiveness and behavior of the clustered pageout and pagein -# code. -# -#options MACH_CLUSTER_STATS -# -# MACH_SCTIMES enables optional code that can be used to measure the -# execution overhead of performing Mach traps with 1 through 6 -# arguments. -# -#options MACH_SCTIMES -# -# MACH_COUNTERS enables various code-path statistics. Most of these -# are accessed through the debugger. -# -options MACH_COUNTERS # # - -# -# configuration option for including cypto code -# -options CRYPTO # - -# HIBERNATION - include hibernation code -# -options HIBERNATION # # - -# CONFIG_SLEEP - include sleep power state code -# -options CONFIG_SLEEP # # - -# CONFIG_KEXT_BASEMENT - alloc post boot loaded kexts after prelinked kexts -# -options CONFIG_KEXT_BASEMENT # # - - -# -# configurable kernel related resources (CONFIG_THREAD_MAX needs to stay in -# sync with bsd/conf/MASTER until we fix the config system... todo XXX -# -options CONFIG_THREAD_MAX=2560 # -options CONFIG_THREAD_MAX=1536 # -options CONFIG_THREAD_MAX=1024 # - -options CONFIG_TASK_MAX=1024 # -options CONFIG_TASK_MAX=768 # -options CONFIG_TASK_MAX=512 # - -options CONFIG_ZONE_MAP_MIN=12582912 # -options CONFIG_ZONE_MAP_MIN=6291456 # -options CONFIG_ZONE_MAP_MIN=1048576 # - -# Sizes must be a power of two for the zhash to -# be able to just mask off bits instead of mod -options CONFIG_ZLEAK_ALLOCATION_MAP_NUM=16384 # -options CONFIG_ZLEAK_ALLOCATION_MAP_NUM=8192 # -options CONFIG_ZLEAK_TRACE_MAP_NUM=8192 # -options CONFIG_ZLEAK_TRACE_MAP_NUM=4096 # - -# -# configurable kernel - use these options to strip strings from panic -# and printf calls. -# no_panic_str - saves around 50K of kernel footprint. -# no_printf_str - saves around 45K of kernel footprint. -# -options CONFIG_NO_PANIC_STRINGS # -options CONFIG_NO_PRINTF_STRINGS # -options CONFIG_NO_KPRINTF_STRINGS # - -# support dynamic signing of code -# -options CONFIG_DYNAMIC_CODE_SIGNING # - -# vc_progress_white - make the progress gear white instead of black -options CONFIG_VC_PROGRESS_WHITE # - -# secure_kernel - secure kernel from user programs -options SECURE_KERNEL # - -# -# Context switched counters -# -options CONFIG_COUNTERS # - -# -# Timeshare scheduler implementations -# -options CONFIG_SCHED_TRADITIONAL # -options CONFIG_SCHED_PROTO # -options CONFIG_SCHED_GRRR # -options CONFIG_SCHED_FIXEDPRIORITY # -options CONFIG_SCHED_GRRR_CORE # - -options CONFIG_SCHED_IDLE_IN_PLACE # -options CONFIG_GZALLOC # -# -# enable per-process memory priority tracking -# -options CONFIG_MEMORYSTATUS # - -# -# enable jetsam - used on embedded -# -options CONFIG_JETSAM # - -# -# enable freezing of suspended processes - used on embedded -# -options CONFIG_FREEZE # - -options CHECK_CS_VALIDATION_BITMAP # - -# -# Enable dispatch of memory pressure events from the vm_pageout_garbage_collect thread -# -options VM_PRESSURE_EVENTS # - -# -# Enable inheritance of importance through specially marked mach ports and for file locks -# For now debug is enabled wherever inheritance is -# -options IMPORTANCE_INHERITANCE # -options IMPORTANCE_DEBUG # - -# Enable allocation of contiguous physical memory through vm_map_enter_cpm() -options VM_CPM # - -options CONFIG_SKIP_PRECISE_USER_KERNEL_TIME # - -options CONFIG_TELEMETRY # - -# -# Switch to disable cpu, wakeup and high memory watermark monitors -# -options CONFIG_NOMONITORS # - -# -# In-kernel tests -# -options CONFIG_IN_KERNEL_TESTS # - diff --git a/osfmk/conf/MASTER.x86_64 b/osfmk/conf/MASTER.x86_64 deleted file mode 100644 index 14ec832da..000000000 --- a/osfmk/conf/MASTER.x86_64 +++ /dev/null @@ -1,81 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -###################################################################### -# -# Standard Apple MacOS X Configurations: -# -------- ---- -------- --------------- -# -# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_gzalloc config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap vm_pressure_events kperf kpc memorystatus config_kext_basement config_telemetry importance_inheritance dynamic_codesigning ] -# DEBUG = [ RELEASE osf_debug debug mach_assert task_zone_info ] -# -# EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ] -# EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ] -# DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_counters task_zone_info ] -# -###################################################################### -# -machine "x86_64" # -cpu "x86_64" # - -pseudo-device com 2 -pseudo-device vc 1 - -# choices for platform_bus are pci at386 sqt and kkt -makeoptions OSFMK_MACHINE = "x86_64" # -makeoptions CCONFIGFLAGS = "-g -O -fno-omit-frame-pointer" # -makeoptions CCONFIGFLAGS = "-O3" # -makeoptions RELOC = "00100000" # -makeoptions SYMADDR = "00780000" # - -options GDB # GNU kernel debugger # -options DEBUG # general debugging code # -options SHOW_SPACE # print size of structures # -options EVENTMETER # event meter support # -options FP_EMUL # floating point emulation # -options PC_SUPPORT # virtual PC support # -options PROFILE # kernel profiling # -options UXPR # user-level XPR package # -config mach_kernel swap generic # - -options GPROF # kgmon profiling # - -options EVENT # - -options MACH_BSD -options IOKIT # # -options MACH_PE # # - -options MACH_KDP # KDP # -options CONFIG_SERIAL_KDP # KDP over serial # -options KPERF # # -options KPC # # -options PAE -options X86_64 -options DISPATCH_COUNTS -options PAL_I386 - -# -# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and -# security/conf MASTER files. -# -options CONFIG_MACF # Mandatory Access Control Framework -#options CONFIG_MACF_MACH # MACF applied to Mach services -options CONFIG_AUDIT # Kernel auditing - -# -# code decryption... used on i386 for DSMOS -# must be set in all the bsd/conf and osfmk/conf MASTER files -# -options CONFIG_CODE_DECRYPTION - -options CONFIG_MCA # Machine Check Architecture # -options CONFIG_VMX # Virtual Machine Extensions # -options CONFIG_MTRR # Memory Type Range Registers # - -options NO_NESTED_PMAP # -options CONFIG_NO_NESTED_PMAP # -options CONFIG_NESTED_PMAP # diff --git a/osfmk/conf/Makefile b/osfmk/conf/Makefile index 25a42ef5e..76db9a7d8 100644 --- a/osfmk/conf/Makefile +++ b/osfmk/conf/Makefile @@ -6,20 +6,24 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) - -$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) - $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ - cd $(addsuffix /conf, $(TARGET)); \ - rm -f $(notdir $?); \ - cp $? .; \ - if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); +# Special handling for x86_64h which shares a MASTER config file with x86_64: +ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64h) +DOCONF_ARCH_CONFIG_LC = x86_64 +else +DOCONF_ARCH_CONFIG_LC = $(CURRENT_ARCH_CONFIG_LC) +endif + +MASTERCONFDIR = $(SRCROOT)/config +DOCONFDEPS = $(addprefix $(MASTERCONFDIR)/, MASTER MASTER.$(DOCONF_ARCH_CONFIG_LC)) \ + $(addprefix $(SOURCE)/, Makefile.template Makefile.$(DOCONF_ARCH_CONFIG_LC) files files.$(DOCONF_ARCH_CONFIG_LC)) + +ifneq (,$(wildcard $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC))) +DOCONFDEPS += $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) +endif + +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile: $(SRCROOT)/SETUP/config/doconf $(OBJROOT)/SETUP/config $(DOCONFDEPS) + $(_v)$(MKDIR) $(TARGET)/$(CURRENT_KERNEL_CONFIG) + $(_v)$(SRCROOT)/SETUP/config/doconf -c -cpu $(DOCONF_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) -s $(SOURCE) -m $(MASTERCONFDIR) $(CURRENT_KERNEL_CONFIG); do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile $(_v)${MAKE} \ diff --git a/osfmk/conf/Makefile.template b/osfmk/conf/Makefile.template index e8a23fd21..a1354f469 100644 --- a/osfmk/conf/Makefile.template +++ b/osfmk/conf/Makefile.template @@ -16,15 +16,16 @@ include $(MakeInc_def) # # XXX: CFLAGS # -CFLAGS+= -include meta_features.h -DMACH_KERNEL_PRIVATE +CFLAGS+= -include meta_features.h -DMACH_KERNEL_PRIVATE -DMACH_KERNEL # Objects that don't want -Wcast-align warning (8474835) OBJS_NO_CAST_ALIGN = \ + atm_notification_user.o \ model_dep.o \ chud_thread.o \ chud_thread_arm.o \ video_console.o \ - kdp.o \ + kern_stackshot.o \ kdp_udp.o \ kdp_machdep.o \ host.o \ @@ -52,7 +53,7 @@ OBJS_NO_CAST_ALIGN = \ startup64.o \ affinity.o \ sched_grrr.o \ - sched_fixedpriority.o \ + sched_proto.o \ stack.o \ task_policy.o \ wait_queue.o \ @@ -62,10 +63,24 @@ OBJS_NO_CAST_ALIGN = \ status.o \ machine_routines.o \ loose_ends.o \ - sleh.o - + fips_sha1.o \ + prng_yarrow.o \ + sha1mod.o \ + sleh.o \ + ccdigest_final_64be.o \ + ccdigest_init.o \ + ccdigest_update.o \ + cchmac_final.o \ + cchmac_init.o \ + ccsha1.o \ + + +# Objects that don't want -Wsign-compare warning (15294427) +OBJS_NO_SIGN_COMPARE = \ + atm_notification_user.o $(foreach file,$(OBJS_NO_CAST_ALIGN),$(eval $(call add_perfile_cflags,$(file),-Wno-cast-align))) +$(foreach file,$(OBJS_NO_SIGN_COMPARE),$(eval $(call add_perfile_cflags,$(file),-Wno-sign-compare))) # # Do not provide CTF symbolic these files @@ -81,6 +96,7 @@ INCFLAGS_MAKEFILE= -I$(SOURCE)libsa # Directories for mig generated files # COMP_SUBDIRS = \ + atm \ default_pager \ device \ mach \ @@ -107,27 +123,34 @@ COMP_SUBDIRS = \ %CFILES +%CXXFILES + %SFILES %MACHDEP -# -# OBJSDEPS is the set of files (defined in the machine dependent -# template if necessary) which all objects depend on (such as an -# in-line assembler expansion filter) -# -${OBJS}: ${OBJSDEPS} - -LDOBJS = $(OBJS) - -$(COMPONENT).filelist: $(LDOBJS) assym.s +# Rebuild if per-file overrides change +${OBJS}: $(firstword $(MAKEFILE_LIST)) + +# Rebuild if global compile flags change +$(COBJS): .CFLAGS +.CFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KCC) $(CFLAGS) $(INCFLAGS) +$(CXXOBJS): .CXXFLAGS +.CXXFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KC++) $(CXXFLAGS) $(INCFLAGS) +$(SOBJS): .SFLAGS +.SFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(S_KCC) $(SFLAGS) $(INCFLAGS) + +$(COMPONENT).filelist: $(OBJS) $(_v)for hib_file in ${HIB_FILES}; \ do \ $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \ mv $${hib_file}__ $${hib_file} ; \ done @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${LDOBJS}; do \ + $(_v)( for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist @@ -143,11 +166,15 @@ GENASSYM_LOCATION = $(CURRENT_ARCH_CONFIG_LC) ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64) GENASSYM_LOCATION = i386 +else ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64h) +GENASSYM_LOCATION = i386 endif +-include genassym.d +genassym.o: .CFLAGS $(firstword $(MAKEFILE_LIST)) genassym.o: $(SOURCE_DIR)/$(COMPONENT)/$(GENASSYM_LOCATION)/genassym.c @echo GENASSYM $< - $(_v)${KCC} $(subst -flto,,${CFLAGS}) -MD -S -o ${@} ${INCFLAGS} $< + $(_v)${KCC} ${CFLAGS} ${CFLAGS_NOLTO_FLAG} -MD -S -o ${@} ${INCFLAGS} $< assym.s: genassym.o $(_v)sed -e '/#DEFINITION#/!d' -e 's/^.*#DEFINITION#//' -e 's/\$$//' -e 'p' -e 's/#//2' -e 's/[^A-Za-z0-9_]*\([A-Za-z0-9_]*\)/ \1_NUM/2' genassym.o > $@ diff --git a/osfmk/conf/files b/osfmk/conf/files index 2c44e99ff..d6313ca2e 100644 --- a/osfmk/conf/files +++ b/osfmk/conf/files @@ -29,12 +29,10 @@ OPTIONS/hibernation optional hibernation OPTIONS/crypto optional crypto -OPTIONS/dli optional dli +OPTIONS/encrypted_swap optional encrypted_swap OPTIONS/kdebug optional kdebug OPTIONS/mach_assert optional mach_assert OPTIONS/mach_debug optional mach_debug -OPTIONS/mach_machine_routines.h optional mach_machine_routines -OPTIONS/machine_timer_routines optional machine_timer_routines # OPTIONS/mach_cluster_stats optional mach_cluster_stats OPTIONS/mach_counters optional mach_counters @@ -47,15 +45,10 @@ OPTIONS/mach_ldebug optional mach_ldebug OPTIONS/mach_mp_debug optional mach_mp_debug OPTIONS/mach_pagemap optional mach_pagemap OPTIONS/mach_rt optional mach_rt -OPTIONS/advisory_pageout optional advisory_pageout OPTIONS/mach_vm_debug optional mach_vm_debug OPTIONS/mach_page_hash_stats optional mach_page_hash_stats OPTIONS/mig_debug optional mig_debug -OPTIONS/time_stamp optional time_stamp OPTIONS/xpr_debug optional xpr_debug -OPTIONS/bootstrap_symbols optional bootstrap_symbols -OPTIONS/fast_tas optional fast_tas -OPTIONS/power_save optional power_save OPTIONS/zone_debug optional zone_debug OPTIONS/zone_alias_addr optional zone_alias_addr OPTIONS/vm_cpm optional vm_cpm @@ -63,6 +56,7 @@ OPTIONS/task_swapper optional task_swapper OPTIONS/stack_usage optional stack_usage OPTIONS/importance_inheritance optional importance_inheritance OPTIONS/importance_debug optional importance_debug +OPTIONS/config_ecc_logging optional config_ecc_logging OPTIONS/config_dtrace optional config_dtrace OPTIONS/config_counters optional config_counters @@ -101,11 +95,13 @@ osfmk/default_pager/dp_memory_object.c standard ./UserNotification/UNDReplyServer.c standard osfmk/UserNotification/KUNCUserNotifications.c standard -osfmk/kdp/kdp.c optional mach_kdp +osfmk/kdp/kdp.c optional config_kdp_interactive_debugging +osfmk/kern/kern_stackshot.c standard osfmk/kdp/kdp_udp.c optional mach_kdp osfmk/kdp/kdp_serial.c optional config_serial_kdp osfmk/ipc/ipc_entry.c standard osfmk/ipc/ipc_hash.c standard +osfmk/ipc/ipc_importance.c optional importance_inheritance osfmk/ipc/ipc_init.c standard osfmk/ipc/ipc_kmsg.c standard osfmk/ipc/ipc_mqueue.c standard @@ -116,7 +112,7 @@ osfmk/ipc/ipc_pset.c standard osfmk/ipc/ipc_right.c standard osfmk/ipc/ipc_space.c standard osfmk/ipc/ipc_table.c standard -osfmk/ipc/ipc_labelh.c standard +osfmk/ipc/ipc_voucher.c standard osfmk/ipc/mach_debug.c standard osfmk/ipc/mach_kernelrpc.c standard osfmk/ipc/mach_msg.c standard @@ -128,8 +124,10 @@ osfmk/kern/audit_sessionport.c optional config_audit osfmk/kern/btlog.c standard osfmk/kern/clock.c standard osfmk/kern/clock_oldops.c standard +osfmk/kern/coalition.c optional config_coalitions osfmk/kern/counters.c standard osfmk/kern/debug.c standard +osfmk/kern/energy_perf.c standard osfmk/kern/exception.c standard osfmk/kern/extmod_statistics.c standard osfmk/kern/host.c standard @@ -142,6 +140,7 @@ osfmk/kern/ipc_misc.c standard osfmk/kern/ipc_sync.c standard osfmk/kern/ipc_tt.c standard osfmk/kern/kalloc.c standard +osfmk/kern/kern_ecc.c optional config_ecc_logging osfmk/kern/ledger.c standard osfmk/kern/locks.c standard osfmk/kern/machine.c standard @@ -152,13 +151,13 @@ osfmk/kern/printf.c standard osfmk/kern/priority.c standard osfmk/kern/processor.c standard osfmk/kern/processor_data.c standard -osfmk/kern/queue.c standard osfmk/kern/sched_average.c standard +osfmk/kern/sched_dualq.c optional config_sched_multiq osfmk/kern/sched_prim.c standard osfmk/kern/sched_proto.c optional config_sched_proto osfmk/kern/sched_grrr.c optional config_sched_grrr_core -osfmk/kern/sched_fixedpriority.c optional config_sched_fixedpriority -osfmk/kern/security.c optional config_macf +osfmk/kern/sched_multiq.c optional config_sched_multiq +osfmk/kern/sfi.c standard osfmk/kern/stack.c standard osfmk/kern/startup.c standard osfmk/kern/sync_lock.c standard @@ -196,14 +195,19 @@ osfmk/pmc/pmc.c standard ./mach/mach_notify_user.c standard ./mach/mach_port_server.c standard ./mach/mach_vm_server.c standard +./mach/mach_voucher_server.c standard +./mach/mach_voucher_attr_control_server.c standard ./mach/memory_object_server.c standard ./mach/memory_object_control_server.c standard ./mach/memory_object_default_server.c standard -./mach/memory_object_name_server.c standard ./mach/upl_server.c standard ./mach/audit_triggers_user.c standard ./mach/task_access_user.c standard ./mach/telemetry_notification_user.c optional config_telemetry +osfmk/bank/bank.c optional config_bank +osfmk/atm/atm.c optional config_atm +./atm/atm_notification_user.c optional config_atm +./mach/coalition_notification_user.c optional config_coalitions # # For now, no external pagers # @@ -215,11 +219,11 @@ osfmk/pmc/pmc.c standard ./mach/task_server.c standard ./mach/thread_act_server.c standard ./mach/vm32_map_server.c standard -./mach/security_server.c optional config_macf osfmk/vm/bsd_vm.c optional mach_bsd osfmk/vm/vm_compressor.c standard osfmk/vm/vm_compressor_pager.c standard +osfmk/vm/vm_phantom_cache.c optional config_phantom_cache osfmk/vm/default_freezer.c optional config_freeze osfmk/vm/device_vm.c standard osfmk/vm/memory_object.c standard @@ -283,3 +287,24 @@ osfmk/kern/kpc_common.c optional kpc osfmk/console/serial_general.c standard osfmk/kern/telemetry.c optional config_telemetry + +# Built-in corecrypto for early_random(): +osfmk/corecrypto/ccdbrg/src/ccdrbg_nisthmac.c standard +osfmk/corecrypto/ccdigest/src/ccdigest_init.c standard +osfmk/corecrypto/ccdigest/src/ccdigest_update.c standard +osfmk/corecrypto/cchmac/src/cchmac.c standard +osfmk/corecrypto/cchmac/src/cchmac_init.c standard +osfmk/corecrypto/cchmac/src/cchmac_update.c standard +osfmk/corecrypto/cchmac/src/cchmac_final.c standard +osfmk/corecrypto/ccsha1/src/ccdigest_final_64be.c standard +osfmk/corecrypto/ccsha1/src/ccsha1_eay.c standard +osfmk/corecrypto/ccsha1/src/ccsha1_initial_state.c standard + +osfmk/prng/random.c standard +osfmk/prng/prng_yarrow.c standard +osfmk/prng/fips_sha1.c standard +osfmk/prng/YarrowCoreLib/port/smf.c standard +osfmk/prng/YarrowCoreLib/src/comp.c standard +osfmk/prng/YarrowCoreLib/src/prng.c standard +osfmk/prng/YarrowCoreLib/src/sha1mod.c standard +osfmk/prng/YarrowCoreLib/src/yarrowUtils.c standard diff --git a/osfmk/conf/files.x86_64 b/osfmk/conf/files.x86_64 index 168f91883..790657904 100644 --- a/osfmk/conf/files.x86_64 +++ b/osfmk/conf/files.x86_64 @@ -1,21 +1,8 @@ -OPTIONS/show_space optional show_space -OPTIONS/gdb optional gdb -OPTIONS/iplmeas optional iplmeas OPTIONS/fb optional fb -OPTIONS/config_nested_pmap optional config_nested_pmap -OPTIONS/config_no_nested_pmap optional config_no_nested_pmap - -#machdep/x86_64/unix_signal.c standard -#machdep/x86_64/unix_startup.c standard - OPTIONS/debug optional debug - OPTIONS/gprof optional gprof -OPTIONS/dynamic_num_nodes optional dynamic_num_nodes -OPTIONS/vtoc_compat optional vtoc_compat -OPTIONS/fddi optional fddi osfmk/vm/vm_apple_protect.c standard @@ -96,12 +83,10 @@ osfmk/i386/acpi.c standard osfmk/i386/mtrr.c optional config_mtrr -osfmk/console/i386/serial_console.c optional com device-driver +osfmk/console/i386/serial_console.c optional serial_console -osfmk/console/video_console.c optional vc device-driver -osfmk/console/i386/video_scroll.c optional vc device-driver - -osfmk/kern/etap_map.c optional etap device-driver +osfmk/console/video_console.c optional video_console +osfmk/console/i386/video_scroll.c optional video_console #osfmk/profiling/x86_64/profile-md.c optional gprof #osfmk/profiling/x86_64/profile-asm.s optional gprof @@ -124,6 +109,8 @@ osfmk/i386/ucode.c standard osfmk/i386/vmx/vmx_cpu.c optional config_vmx osfmk/i386/vmx/vmx_shims.c optional config_vmx +osfmk/kern/hv_support.c optional hypervisor + # DUMMIES TO FORCE GENERATION OF .h FILES #osfmk/OPTIONS/ln optional ln #osfmk/OPTIONS/eisa optional eisa @@ -137,3 +124,5 @@ osfmk/x86_64/kpc_x86.c optional kpc osfmk/i386/startup64.c standard osfmk/x86_64/idt64.s standard + +osfmk/i386/panic_hooks.c standard diff --git a/osfmk/console/art/ProgressBarEmptyLeftEndcap.png b/osfmk/console/art/ProgressBarEmptyLeftEndcap.png new file mode 100644 index 0000000000000000000000000000000000000000..d0debd13f536567f28bf2d45db619d24dea12512 GIT binary patch literal 3764 zcmZ{n2|QHY8^;HeeXH!LCRtL<*ozpuF=ZK{W@MXTFc>2yL$YScmOV=$OZKfOq9Hr+ zH_4U?$(EhSE`O@`&AR{3Q5C1Ij)1GHgAgtR8=NB+0MKShxlb}O9N-G3kk6Cf91#Est?=Dq z0KL7a%?UIJf%_t;g93!zG!AKT92UK5ZS;x?7A#<;$-#8TAJfn9l^xMsXP}`uJ{a!6 zuuvqsJe0iCf*Kx}tmq%GUGVC!0wA{if%QZrm9I=Xl4E|R&h*I9_Qo}89(F2LB-N`C z8yg^OU4y!3o2-Hs7-k}NtVh0`5Fq%=8r=R>|y1L)%#M2_?K3j&80HK4UFWuxCY&kyY4!am2&2L65PHeg!`IQLMVKa zUG!WL|J)p^Z^CY)c3}8~x8aUb>#Z&0CE;00s%m&DYS?kM5JKhxtO}YuI9^s0Zwx5M z)2fV*6zVU40ACLQ%7-2zy_B-O*R_xc{^@h&@bu#k~~x#n|9W978~e zU3H+MA2JGT%^2X*JL1D5Cql>To#Mb@chdsk6)2CWgwwSWlmP0dX#|~=Z&7>ATxXYL zY^`}|R}@|18Yy+_`{d?HCeu)-5gkLXEf#7B%Weqae| zF-sPHUukEWArK(ISDT&YJ`Gr#8m9t$stSF#w*@8NQMui*cLwzM-FgGm^Otj`H3eRK z#p2Aw>hrInthOAASVY)L~c3Moi+_>@DmKo)hLW^xp3 z*0C26+R-=0XlR4e(TO}<&Ag+*sPGZYRdgTbl0?c4@y2|Hqi#C)r4m zLS;7wxx)`4#2c?#Mp|AkO?X1{4n2F3pS9%`?`M8z?@At1DJR)tUkGYFU#)M$*e3or85PBKo7gKRWKTl<<^sUh2Q+#UOk3HU2mg|-)yvM$mI1W3X zwbdOv)wFW*3webBD}=p1erBA1oNv7B^%dHB3CnfqE4+`xW5cV%BbQ|+UQk9UALL4K z=DRON<2TMaoxL3|5N{Jt6(5oe9%3pvkz& zjHgQOYUI4ik35G6k~LEg7XBQK&+#zDM#6imkUZ zJsxbFV^Ru3hNW-~a#@JYi5ZH|h@C`AEpe$?p;kwg!h=xI3r!7=MT#HJRD$(Cv?y zbZf*n4!%o#nawyiU3tJ_GKIpk9+k%K-tHuzI7U-Ly|G$5vh{5%EG&ZSG8aj! z4c4UD3)9u~)J)L4oPZQVm5phIn}mb1mAFnGkXcl4xFNSl%N2&EypMARd)Z(k(SYBLIT&AdWURU0KZ~^7(<=tvR=+M%0 zX3|O`WJrTdri6OcYh3TV@zD&Gu4tVzt|*)8#CzX$zG;UGJ`lQtVk)zLI%EsOroA@k zI6Sd3mOSRve!OkDZOdEMMwWEbw#Myy&~}~dyaQ`dU|+H6hZlMzv%0F0Z#6TKco1HD zV6k1RJpy}d*eP_V$Ka7cX^o{_*}_D#+6y&<5wdvabymkFM^4ANL7TE7`ws~M=3U<~ z?Vm$G>kEC1nHffTUE3J!SPhsXKVE&jMf&7MPx?4GRke&v!l29t-V}c_D;n?kn(?)w z`r2&GRrK{6mTb64>&SAc+^6JACHAo1oawA-KfcK13G%dO#azn!mJj{4!+4)=stB4U zI_L=w{+B+jYY2xTReTfr<49oxWhP^~s@B%W!bf9HdnuX%3xqvxsgJzbd$FhgeD3); zx%!Bh<|m^!S_nIzzNvd2Eto<`XTDemUN|Mc^tomI3tvjLY`;I;XHLbho^kIQ;v5Y7gv6;dwV5(H7U95we4j}bSLM-2l8no@2_gjflFZ_ z+`$Uhh|sF?v9~G3rNu+-ehDS!OD|mVK9IMzjS~uNi{;HOdZl=g=Yn=3`c71*-nW?b z#!Wq2z>}w(%XaRM7Tv2kE5k~#9srkx9JH6}uHhJWg*+>1Bl(M_FkwKN}klpx=68(lllsw8g zt#YktSP_g8z7aln>&Z&fVtaT(_h?|P==bMq@m&RJ-lQ$EEvBt;(V=T5N%_)HZjJnA z-zt96`pZ~13Ad1)6FO8-qrB?3lfGrO(h@y2tG})vpBAA^y3Mft)m<2GGcmjtu)HOu zfb!n|x^&h?xdH%8NB0XAAo(E=06^1%GeQ!Px;hG01UwjRO~7Em?s(@tH2|RKuCQ0d zV~J>xJKoXBRl!|J;DU-o#286$%19T5&}?05D28`Vr`>v zMMLWszSmL`up<(k6(A52i3BD|g9$FS5J`D?d5DA*L`q70Pa*E=;Y38ci#xfV{^{f| zKN?t9D;JzI5l3(W?fXSz2sepJ0s{L3{eAtKC*Jw*ft*}_MYR_n#2xJnkpxRX{s|)D zY<^Ggf2jSQf5ouwxPQU+dwya+7USm}757$DL6=~Sv+>YC6R}V!2}v1o2^n!IDI-ZK z1zBkYNeM;BADZ70{-D)euxKK|#fU&~gsSc*41};F5DBh!1ZU9RpTZekv=z>2zb*RH z(?R_|xxyZph_twrypg1=f{dJkwCn-=r{(~?za|R$E;#I7vi4H~l~jcMpY2aQ>I&AC z;CSJe1V@64p)=YF3zd_V z-4Fav^PdC)=ZM}HksDC0vL7-H|-YQZ&Bjm~gdS@LxHF758_7OJpK&^+O0Uq&yBF(D+_?w&4% z*Ec5mG81^{xLK}fu={f0g|ZmOvf(Nn^o$OIui?ab1r)fi}NoSY$Uj=YYJ&j^D HOaK1@4wQtJ literal 0 HcmV?d00001 diff --git a/osfmk/console/art/ProgressBarEmptyLeftEndcap@2x.png b/osfmk/console/art/ProgressBarEmptyLeftEndcap@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..eb77bacce689e8b06fc0245a34dada54ac92a182 GIT binary patch literal 3837 zcmZ{n2|QHY8^;Gz_NB6?8j~f(jJ-@|EQ2vpMyMItW*CfZ#AK)}V+|vFmXZ*XEJYCw zS+d06BzuKqOGrxACk;H?N_yE|UNNmktSu#GsK#5XOt_NOB_r0EWC7*Qn-Z1HutZ#!<$L-C&UP zg7`&F(97e7LO|0g7xf*+^rtak5dYg}R#6=31R zvOFDebOfT7^;o;VG2m%wcMO}#7C%CQ9I9W-*J5P6dp82(!AFr&I{{q!J?fePG9u%w zNoHlw#FRIH&li&Al*5&jg|j%b?}0}fOWYRoCyrFNrl!b@Wr^cD#Ga_$q*r5x(xpdo z=)f$iROz=)IhWaq40CikV0`INSJ(0lV}PPynr-`}OqN@uvc90$$H>pD0FIU^2m3PI^eQp3rSE563eg>s&&iXuTJ3P?}YP8?ZEK<0JC*?%f&T|d6^mJO&$78+_2lsJt#vMuqb7DeP3Bk zvIU^reW&*L$UT!e5a9C;K>5&(FZ;vYY!VvZ@yK#V&&;@avt+tLj=Vp*mKjWbUK_5l zl-Blf?ac8!5iQz5w2%r>e>^Wg?}T1^I}cw9(_wu{U}N;D2I)Vo4e~cKpNh0BMANyElL9JoRQdqx69)n(bm0y10$oiEq|{KGUbm1y=BG8}+B$R_2Xb&=J7 z`ka6gS4+(k=c4!$?^xxF-zQfOa9c%rj2P>H%_bIkVjaAsv*pEdhC|)zSa-*{K<@My|2|{Ueh1Z=>WErB7ID$V32xaTsjT64jpQhU@u-B5q52=GWCdl>-85t*# zpuO@!=8ov$kh@t(41D1rMZAi`I%M@0`wNij&UF_5&|RslU3lJpz@wmn(&G{wsQR|I zdcHz8*hQYc9x2P`_?hDpe9ceA-br`{REk5<=doJu{Vll0ZwS*lkjvJyrTrI#*I$w=380Wo7xva@Z^4E5&`N zqqo<(5~rRm9C*)I;3P^D&y61%mlzixFMED^XT74`vdU?(TzVqCnjZT_b>cB|l=)h{ zgk-bvL@s&du*c!c$>3ziWR~REyT%se9>JcByY6>Qx)ohyT<|UfE^9rI!Wd>glX{bx zVXL|^_ATN=X;tLK%2k)I4Ikhi62;TSYsAIGEyPn)o7BY8J8 z$1Nux+9)QceQY;(lrD?b$SNwH%~$9|+#W217unw_!#d@hD(l!|od7v^RHwqK!i=hG zasrBl+LoBkWg$#6i!yK5qH50tK7aZG^vW(;=X%|ptf;J7{7rn$i2cF4@r9j-b1ZX? zHyyKobUtbxF?vp>G%^F7fR1TBY1ZF8WNuf*ENAk#PB_b`7OGV@XP3Ug;qDpq-qL8$ zY`L7{cYWmuH#`~}ogq9ZY%4!2Z>BIUe*h_IoxH`zN{l} zaN=Q6fx@s)?X3UtbnDVV;p$V>%tgLM@S4n;5<5J?H-f>p)KBp_R2}NyvupbONM*-7 zeUx4HlI;C1bpCJ#u0yVawzUPU@j7|ba(cD> zWM9mrPosO|U_X3dy>jgc{|WwhRCIGve&VD!liTJ>O3B&yvn3S?ON2R>xqJi+5s8RF z?5XjoDfEXf(U`O?lPALLry4S z=5y2bT@wpq>0=&k`&z%Wt_7$$s!=aE)%bi5|5m5=(Uq?#thd$<9uuQk)L z?jU!=flqA)Z85~X!yZvX-KMupOKa?$%jPDUbRX-QjxZED&hfcDa}#o#9ds-!a(SHs zw(0yzXnPm+&P4i6!t^lC|Ln?O`(o%UBX==(jr!JygZgH0s_F|ijexTmcv1Y;x@f%p zbN1(o>a#O7XYl9F+wqfP9b(Is?>t}_mAIgK@;}`EKogHmpJ05rR56?Js`+(a?XY`b z7fTG=Gj_y&L5U}UElU{JA|3Z<_%|c>8kp1BAF67d0&N5JW)0`#nW!*SZgYL?g`VTx zeMj#cO;WFqNocx1dcK*w{`M>K(w@R8j7rYqW#BPMjrn)Y%LnFe@@NkDMfu;Il3mYS zeDuOH$}+3>O>g9*+v=oC#+Qbtg1R#Hb?j5ny1sFFfxeiQUiRGS1T(%v=*D%8OlrVq z-KMbl=tz+W*jWmqs(kEaMsaEJP#Z0!#Ag1nSHWw>+Bb`oLZ@O4>*M|z{*2l1^_brM z)iSH}EO{ zq4iGo0)88<_KU~Xo!9MWMzdwPOb*|?t5vk#v)t!cz*v~M)n~^nyW$iZUWtTmB(Id1 zH0)p&Fpn6N8+WG^SEl7@x`R8bkKtgNu9Q1JHipx}KK zJiHJ74DweTJ)*b07s-=CB71Dw+|Nkc_5T!#xNRUR3d$PhN@_4wb(o6UHvFe%8@)9rFcU8lakE%kB|#`@LjTY9 zryh5j=uLLJ@I!x8`4_O=_$TmNZ{b0rApS!BF#kzmfBO-CceCBN4cICY3`uq)dzpFS z?TH9=HMOn8|1|$eU`W11H>@7Xo#^5HBl}~&m_Hr=2;zQ&D(c(8ZH`}{CUk49Z(I96 zH`Wj9=FUQJZ5HXTU5enUn+%fM+_+cKdOGHYy!LjYJ8~oy#aS0PSPW97H#Rn$c0357 z_3yiqOiSgy61meZBq;m}%LQWqK`r}2wwyy~m4l>ocu>0o>pPGUFHLNWg@EDGhP=Gh zEf5zFr(@0n3h}@+NX6+KFCe~v-O{Nzq(+a7kDnKL5yxu8i)4Z&wb|e_?5-;sG+#k_ g_)}*No(BQ|KzhFr?K)~bck7*k)<30JtZNtiUkz}fK>z>% literal 0 HcmV?d00001 diff --git a/osfmk/console/art/ProgressBarEmptyMiddle.png b/osfmk/console/art/ProgressBarEmptyMiddle.png new file mode 100644 index 0000000000000000000000000000000000000000..4422194d0113bcf5c7d952bcb22b08ad2b78d16e GIT binary patch literal 3718 zcmZ{n2{@Ep8^;Gz_NB5@O=2X>FqR_5ZcGtE&B!*xU~D5ML$YRhZP~*VvSiIt6k+UZ zd?wjaA=$F0WS5WXeP3VS`##rop65LG`Ty>7?sK2}x}GP@;4*@hiJu7o0I(u;G_UME zY4+MthQoVz^0o3B0Dwsiuc2Xp)X)GKxDjmePB;KShc)#c$=GO!JCsT}M|pKr2qdz? ze~S_H`hpG@&@c??Z$KLo1oqH8q|JFm?5d4%Jq;pM$Xbh&`A#5qknszLL2HAdrq<+e zq$A@}k=)l2@=hCibZEL_aL8`SXRr!jupJ0&B%)~iWiwEmi*pTTM~`(juhH^y(6FIs z>c?ztfrxcY+P-aya!N{`PHWlfBE;XO`kh3b0oZ+D*DqG+oQCjW0Fzd~ypk`NXOcF- zsH}w#whMf@k|+iXk%n<+FlIg$8gDLfTFspjukJ`r0#9V{qr3TPWYZ$64MxZ!V_A{F z46|gB4|Z9%>2VZeBpEQdcBZ#?{edn(iZjKsYZ{#46b94cXjZ6kV~f~O zuulVc=GaYy1H+KmoFjwD_{mT0wwudkMjKdZ@q{M5u|GU@sVn6yccpmzO9=P0XhhJ? zDfY1oMS=?p=z%Hwjk=-H6TU_}%I&wdOqRj()HJooH1w#`{9`DE8?Y*DdjEJ?ZK4UF z-1)HTZ^m?A<;7=xopAr~@ z!tASql>$&P5F4f-zy2{lUU^Y^KHpSFPWzjd0H0t5gGwZQJ3$$sahgupMdcQ)&)jtm zX{Pqt8vCNy68C7>t?$#DCz;K{UB-0Pgp8(ES)y&+L^37#vPOfP8fcHk&=Vgr$p%2A zXeF#z1^s1Q=|(_+Ab%YWx_fjG9a_99@TnU7&E64|enaDN$H5iQ7jWwp@N%HM>tQqC zwR%=pW)^>hDpN?qB`KPb06atBXAZ7RwSpM#Z1xoO0S*CE1}_aYgY%qpEgE4l9C4}} zufTb{XYUkbXc#E3oF?*DF__=kd`SNaBzt&=#wX}VGHox0^$p;;|4``#K?X!)=Lbzs zt_Soy_3y?mR&vAHDjIpl1U>^Kf-M{48C2UD=z6qK3Mt^ zY}o=YqI6BDHUb}E`=8{X zL?5eoFv=gfA0^p*)hgQRdTG)Vx;L2l3xaHI^?V-%U41Kg&0sEa#lQ;o!N_X=xhB>z z@Umyk*%8jAK=uar&)D0iYMJ~5{Z5QVpKPrYjbSteItH?zx(5<=5;o@GXP9Q#2|Cg! zYZ#EHPZSE_@M<>0iaojeMy&=Z2)-Q8i3}0uxgsojy_%stR^F%_ zjMO4$CSw1gTJ@l)e^ubc$cS_mS7@%5uGO#kaV(QpXYJ zvbTEUXIfTHexj@};zV%QC(ld@PV!Hdy}WX`QOasv<_h1V$oR6N;t$s*h6g8JhN~5M) z%I;2Vht8H(h25&$bokaZt2`UePv)=X=i@iwPm*nw<0HF}JIIsdY1y`NdsF6TK+{Rn z>1P&Fi7AU+#*v%e%qomX>iU1QOl_;j#Ktv*+RMMw#?G^X!K+4frkoBO6|9^ zyzXy^Gb=}+B2u}Bxh*9YB#b2IBu=8m(1%dY<+G)mO>djYj^@@rZ6OZh)@9u}!&A?S z@+C(->K1%1kj+bnxvMW%Q&-tmg|@(3()7xqo}m=BwKqhMGu1&p{YU0Lja7CpM~>5r zdW$~kMdpsCqPxYqS-Z2CS$ID4^hxr%_1qY%6m*M)*gzsJJzX>UOx`7qo6c=^=?z9r zdo(*Y55G|!+Ns4T$c>-or!rgokyLUm_F73r+!}VtVJR1`2oHlt!H?B? z)E4?c*D*V^J06?EUk5)=Zw_wh1J45=Gk#*q25JMZ(!-fyj7JaM1U5C$G*kzkgub>E zHtXZ2Ro~X|2v%a0V=?F2bmxUg$UX+odsUiv`g)Rpl2|QGjpk~d==Rs`h=?feOWY*w z4n&JqKSEE-TPsQHQW8o6T{fW|X&MR2QRY4=z~wFM;g-MldiYZ4`20tv@kNM?*}by) zw{FiS^E;`d{PUHV?B#79+WktM&LEvfUuJ=*U(SJWy=*?!{%2c|+CMx7xXAXOOeb z(C1F=&M2I~s7v@rpW#Ep(poF~vZbk3^%v@fV-(5m>ugRfPFzk4!?tBb4)2nLEPB3S zJ3oeh)E9XlH#ds*xwbLfwHmZQd9?ayi}b;Rf%JZOrs^vy1&g*AdR6?vylArPOXin~ z>TC10S25RbSh3@yZKBIzc~2=9OB@jWxwF}`0sPVADax#O#X{=aws(Vdqt1T4G*NUd z^zaj$f;E2aYX*)*YR)Z~_hXNnsB@XKRdsfLmVTNGI?J(CL@?q}TVwRi{tJDB=km@a z$TvpCwLTfY(MH($@J++}SmBI;OxBBa;Q3Pu%OBg;PcEggC=Gdq`xMNG?xe3ie`OkO znlbQxAnbXzJlc?ySG;&S5wGkFYPW-W4pN?+*e2^`F>Gv4PK52 z;|W!~Mubdg$}n zEmXR0o!_zFv7R5#6lKysn^vGww9~&nXq!)2nSVHFMJ@ZoE;^)A1G<~IQKH{;h?-9o z*DlwdMHC^ZksFc2x1OxDeC~`)>KzZR6Z`&rEwQIC-IugQvBP#We|GGdOHsKvl2@y+ zIk4(HZS!fOmxN!+$qgSVtW{YJ*vZ(kUTKS+nb%*}PfU+eA>C%&{^ALCww)SX3;MbR zQ$+jjeN!^J>Y&{L0On)+jRrt|zzYD-wc(9XM3kPcqBX%8g0UfBaS%^u*F7}=pya8z z*LB7bF(6N8Cl_}`Pi3JW3dOztz8NY6`XM1YDhr|X3_uzLHylU~A`6icf-`|YASE{& zTg5Ay+Q0CcUrvxso1pPnT?|SqV zoIAnk<`4Zj*e~Fq@ps^Fy@?B+2>%oL&HOus`g2Y~$~DQXa$2yRBM7;79{ zUQTX5@ZZh969#xsoD)hD?~HSC{}ufYNUC*1H7f|ZG%2><}FB6Tz`@7-zl zPGN?_d)I9E96JEOq>hKf4Uup-$k3f&hj+#S06MH`cS$D3L)>9h%6ZDGqe38&75{HO@Le`igR(U!Tjj4&gL~*UJe>I6wQk< zJ3Am^U6ZzNo1&7ElCRTRvAPHju&sS3QEw>fF|g|&Cv+Yzd>Fu_)vuuJC(1KPn`m6o z!Y8u}e7TY&E)yy(!=1^PRV*~#T;{x*H+8PIBbh8Zk;#wl=6f!e9#v~Nk}5Km9RX&?l1mn+HJO{2dr`Bii-nC$wcT0c*d2eNi2qMVlf?au7d*=$pG=KDB{Bww zJJg0K2clxZwoJkP{bT;T3S#tperZme4mYd-z9EW+)kykwf(ihBnoihN^(L+F+%*nq zruMq$4kdAA9x*aEzfW%pGMh)Zj_Iij8BeXU#MruvWJ&O4j|Mw8&>oGYCq7`33j|Bi zN?5ZB1jxG4jer2b0XiIXcj>@7w0Je(V|Cb@y)7vHhQ{-@qZ^Xa3uC!+*i$qHIF6e!c*E5VFL2Vez{6uX;?*`@ ziRSa3yibw21H}$2Td=o z`}907-i_&2bHmuG8hOT!e+Ec|ST!avsI@cD^=P9QjxTF!#G-_0j+}Ce6Uw12&`OPE z%Q^lmS|{%M1l{4VOiVH_cPrm`7&>YUdj&Iq)s^Z`wn@IC$1|m1#~~ZI5%dt-|0owF zR;=pDsBq+7v}E%Y>lo{6<>W_nZ!q&p0&HzB_&y4_`Bn3p%ecyy0;|{uqiO@@npnp~ zm%W~!9pPLGVsG&HjJ%uZ|Kwd%n^qL&glk)gsomxaYsDg#ZBjzSiqDx#EA2^~;dggY-< z&C;M$I0IxQ!XB?`n0Q8%(?C~1OJnGgqV6*_Z#8fD!^^Dsu6?M&f!XC(GPTDl8&yJ( zTB%t{*uSu;*l_IakMb=>BG2Ok0rS z?M`fm&6d}M->lwr{MIz9GMm7k%3sIN$8X9{mTQ&gOLa}{NS#cbmTRkYFk^lKG@CS= zeqt$=l(N`mVkc4&shC+(x{xQ?1IrmMQYo>?uRz&nU#jRnW)Tm*c3!>8yvmrQVQ>+G zf>@OqEoH)tGDJjzV{9nF!1$t{8seZ2^KQkh;9+QsA9i6<>6__cw}^kzVVyB5fkegY9*D$Y05!Vu1LPNEvx)J8eObC@IbLix&2nQ z_q~mC%qo$n$TaR@ZYzle31i7Q2|=_t`Vh*ca<+W4>1}hWlZB0MTd3o>O+|O^@YIu% z0?AR&`UPL5REzRq?%GSW)K#`sp)JuZX?m3~uP_SR+8d(hnc86A{v&gr#;UuQqsHmQ ze8e90BJ)Pm(B0zQtlinnEIgli`XqVXd#;aF3%JLDZNX7iUT&Fvrtgx*&E__{^arD- zJ)2#chu^3S?No1_W538Ahlp%T%uAT&r!rgqMJ~G8ryKs5M@Sr77MOT4_>f@T(Ri9ceSaPpBD)ziPh4CH`nUKw7+ggL`HL8Eg5&Bv_T4b$@WRwKDVnRF0EDDsX!Yz25%SYJLyb|ZaWT0 z4t^Zmqe*VI2)99&BDs*`v*duTmFO9m6 zOs!0$PPlfS==j>P#Q9rmZn-Yo@p43Q6#&su{pOmb2%>z+f|e}z9S1+_I$&3 zevJ5NAo4zbZWQf%bz`_|HF$yYaP{F9>4PT&>HY9b%~w9Q3u$lL-VN4|y7>3fMANm< z!%lJvJoj&3Gju9ZcWJ@AA1iL6&SlNk)Z6=8`D-rdEXPq1A&7@lAKTUim(p33hrAFcB1=)r0TuhTPsnkDXA4N?JrW}y1DM(Q_LXwebHzQS&j_n z34>lG!fGlfUZ<6omyUD>lFKZYpSc&jqik)Pl8fw16)lu}(|jolp*zt7Cu`I1TFv|6 zXPzv%P-fgJcJ7Xs+^IX{JH2VK1sPeb@twT0n|7Qo^6=&&xXx$S^1aH8@5oLMeF3|r zYS+yRI}STG^W#}!Oa^Du3sp;Y`qu~T3Meb{4+gEN6@S^sgjT~LyGa{m22F>k1=Mre zmD;n25(G7BBWn2Oqm`D=ol)f8@sN7)?@!l~dWtgqNLv(pY)A8Fr=GbKRo#*NI>pU_ zRhMboPZPZ){8DaS#7I${>T2Ll=9bM$TindN!MZ_GMzkvF7UT98FHsk}snNCIuUj%u zwBO!0C8L`T+5-S!KDK|-08;Ps0swSvcoP&6rLPCIA-I4swgfB=?B(LNrv?C&y`X!K zE;u3vH+mq5&EHk?mh3DAwr-Z5~7oe5K7+=1Sh!TK=NQYu#^yt2?PQuyW849 zFKcT5!uQ@(gdB)OHz))`B9Xu(Sunxf9wM!%s0fjgfyl^6?kOZayj_VHFG*LA(?6a3 z?MD;mVdIW>BjO3Jpnbm>Ea3)GMM!9WqJNHGak{wuGm)#uudMd+gLq-wAkttd$iG2E zyxs5c{tvZ3@;@=07yf@>`y)TGAB*ubM&-Q~h3XS*@pj&t7$OcPBPA^-DJ3ThgPS@{F_Pt5^(e@&nU?s(i@vGz*>lU9cOKii*r^ktj} z!TH7y{dt*Rz(M1mz;C^&E1n4Z8~MZhCx!Yw4*O@CgT@2EevzPXf-}M0*bQTYgDJ?% z?}?()Un(34Waw*?HTvGW5iWYdl5 T-m~@J?{K8nCCyR|>!AMv=;L)z literal 0 HcmV?d00001 diff --git a/osfmk/console/art/ProgressBarEmptyRightEndcap.png b/osfmk/console/art/ProgressBarEmptyRightEndcap.png new file mode 100644 index 0000000000000000000000000000000000000000..3667ebe45cec71588b7c8afb9f8ca2013cb69c88 GIT binary patch literal 3758 zcmZ{n2|QG7`^N`U_AUETO|qnzVJt>$;I*_25n5UxLwAB5-Wdk~=&`2XC7Box@`h00J;l&wUQ(W4V8uRk{PoeiHtRsIah{`~ZIL|$TP| z#^7*=nh=#hR4mw*DcHYn)Sq8bf?mKc-HF@bh84g!M9HuULElDD1!$e56LnR)N$Wd% zjYF2Ht@edOaa^fK4D{yrsmRBRySsr0Ju6+X#GVQn&Ch^L(mcjPnJeuEN>2bs%AfjkQsxwmWJUuZn|cz@K}y` z^^Mo!`TVDE7m~FMRaZ_D1*;h>Zf`!Ie+`m9yhGy~%$Y*lgJFFOcor~Nc3zkP-q7(; z+l%KuJ>SdsqZg`pVQiHRe4|230I3kGh6D!nHU_$GT@-`RvbJU{N|c84gj1YI4sC%> zS}a?R(DP`$xa;F|hr`I26n@?ofw3@j)F}1}rXPDjrZ2@N<%&Mvq@o>%T;N8~Lu}vU zT$IEkHBUxG&U?|)O;@aAtgn?NKc;(&nL97c*7{Q5ldzj#6~8&uRiOk}$vzNO6ENGz zIx4>G_2Tp}_hJxxy~h&v)`?mse_{V)BQeKY>Lg+r&45lp>?iJmM4d%VI0PA{7njjoSEW}mCDJK%zm2Khf z{AhJcWQk}d$V!YoUd=G^lsGr?g0PO};3cIC&(*!vy|o@*X3cl)MHQx0r<)-u+P)Wj zA@Bsb+oh3~oBRkTL+>h;Ef*=1MAx}y~h zsv!uSw5%lTU)VHkIQI4@g=QnM7x97dl}C^T>eFR!#2%gy)a-fa{idqIphEc_?w!n0 z_}QGTo`mVKOIC zVkzEo_l0EA#%b5nw~|DX?2>4bVhZ(534Pps>4h$Z$X*#oaYu~fpyO5_xG0*MM?}JV}1%W zn=qStYAKVHy3lE2CsrP*L@q9w&y((k__v-V>5#%~d1}1yd=44@9QTOR=v+VON z=WL!`k64C{T@x=0Pe;TfqT3CO-}VlhSXWaks4PyC4&w4f3N@`+WgpP!N4otFlp0mq zZe@Gl+c?9l8i|Tb=N;m;lA4z?mY$V5j+R6pLb+7Tlx;S?Yf5vnu<>mTbsV!P@5&vT zd|F%}J>pq6?|VMYqHKt_=28uHm2FjIOMFX~UNy`sjKa3|mgsq^CfK)+bN2ISRo8OV z7`=p##N!@B-bgySOR|f#E1Q{xZ;7v0n%}+q`e>D~dmPvn9A)L@M(#CzpEPDRyV+?l z5IyDDR9MfNy&WNTtx!jvGD+48UC(yMV-ODp5ou#1k1c`#*II4l~* zRqI(>e1Iv9}_x$bcAC!oU5T=Lce6i>N#lwIFL{iwiQ3sa%L)e*@B@B0eF-KE%(Cpnb1y40Wys&r=?*KHMUG^kg7?7@DsBib}aG_!!%m3TXZl%7s!#cwgR3&Pc`({%vc9PQ@B7&6p3Pj~c17Su@pj_WoA>+VgtLaa4E+{9$WD%#FVDy#r_S z&n7B1M8~&09=qO3*!lQP%ZICI+E6b0`8x3238m#vt?S1ZGgwpxy(4@JrzLhWSD(E$ zi!dYif9MZ?mZOOG(f1jd4(Lfg+I3V~?cVOKm8jL!wDMQ>7pZYwJooP@Ws>~9Xtsnb zM~3r-DPJYRsw>9dq?eSH40i-3ms&1AcQ1HP+1fTuF0wCCvN-RX?n{{u-HGl$R+Dkp zYR(To{dCcVGVNBrb9b!xPVFJzsZEP5$na{l@5G(mbRoLP!TAy9Z5319?!#myd z1?-k;oj1?zIPBQWjb%wNAx~!%sul0_tq<50P*&z13|Ldk|FVw>t>)X>exE6qzCQOP}HA$5}9pRFZz7iId9wkY=4_NFDL?%7ne3&Z)f zN}K(wE>pIj$9qWl#oWAz;i6i#)xaI{md#3Q-1HoB9hsCFtwy@Vxc$XT+{JEkWG(pX z7E~GSxBqqNribBBBBiRm2C(vV2mvRivxSPxb3L{02MFg zy`~F}hyi)IIJw-r5)<4hEHxm6w*0mxe-3 zWTDCmipo$~704f&-x>a(wcK$SBEj8+KyZd>>=z7V=s+M6JRAsapuInZQwA6tyzBn6 zd}{R z9t7tbKlEpzzkq|rKY`zRQ&&6@_BZl}`A-V)g)$ zz|JYc%(!z6nROlq09e%tnwrKaO-+!oC((i6iU$C6Inr;D%}o1v!)a6n>YD>#km!=& z6&BFjGrEU>CK0GWWBRZVh_}{09iIK-=k3jE=#b%HJ8d4e>%q8Qmap8#O|>Rk+N1px z7nb<~xy6CBt!B(%-*{PXpX0n=Zw0`3GZ%6}P2hv1`awxzo z%M{U1jyczu@KiHY8enwgL}%yf9Rq+APpZx9aY&YH1YDa(U@qb-J%Bl}LGh(0XVjXa zQwG2%*Ks5g7=gm&?(a<@j83#TtS^+9uHmFlCDt1b-S9D>EtRt0l;R64BHq%b6UDft zI>pTv2+hu7dd8gAs{00y`kQX4v|QORUx3WeGSn#$1o}{U|GcC_K}jRBy&Ki z8>8Cj&|{-{5a8=RK2jlQrqS> z&Y#KUQw=(fIwXVF9?gA_tFQI?H9KcA&3k?^0^dWk!kHGB)96Xqrpi7X)z zPL-j`LFibhJ!?on_fP=8ycm;!f4U2g(`6fgU#OySIf|);r~=SD&LHBhdWGI^`XaY9 zYfIHjr-Ha5uNe51@8j#o*eoO6hYZxgren+OG4`IK*%AUdgCVZ9^ao;@NOxIfgP>CM z61E&dfifNp10X<1pe{GVEe5DAJwXllR2}hdX9-Hbqw~J*>;dQsy7C5iHdx++(Gqx} zhQou6JrJqJ8dj?>MK=&cU=IGmeJES~Q7rF0u2hX4?!y+$zMAUBr+F9}H6voVu}e8`z&9rEz%3KB7JMJq{UjGH z_E^=MMSlP7Xvv22wlTIBi<6%)yu;3%5#nsF5%?_R;a|>g33rz(1eS62QYr(d>p6xX z3qCJT4)DwebJco%!Ce!sVhs=qI64?}tf^Womc;_-63iuh3nbzyV#Y1VJkGoovcFE& zBDDolj$oQQa8X^t7g`Ef2u)5%vwjS_p4$;md*Ofehb0KM_evvh_^>r_Hf z+G*KIxEr`MTmnY7g64wBvQ2UVY3^yQX`^Z5vdyJV7HrRe7NZv9 z&#a}AQs-WqIf#}-DP|QE&OVUrK-}xkS1GW2Sb}!UIa|_x&?+8!Q9-@TvdomMVWba3 z!)%I7=CcqcnFX2ms*%+f176j<0ll}4QomjMFe@^v8k>R58L~V6C@#O_WR6A7nWocr z&o4zTAcikOiX+ld@u=ulL(_L%17@}rv{D+o%a{{HHear?IlK5H2J={_=dNPCa?7QR%$>yfzZE5~h;V62~y&n0;us(y8M0`u7cKE>?Da&0)^Nb|vk({bSDx z@+1ent7rYrq*)dB^H!d%q%CtUgEt@>(o8DhKH*f(m3JiX6O|!;-TS8}hRWL)D8o!* zSH+%mq8<#UW7@^rIofmB*!jNjbxHDjc3c`N7xIjQ+CwQeJ|0$z zdpEc>^uJT-+bZ8U#ih>`hm2}Ye2_3MNMp0Ukz8~k?m|&n{0eT~dHw-H2@!#aMjWj2 zuFCg=tzx(6x4hT;7kj^qulH^k0Z#)TvrMqw1L^?JGa=aEEC=>o2G-Zo)m8={gT1v8 zvFzfd*Vxqb4pnB6W4AiA?!^z4kbMl9@hvy^@%JGEC2`tXnhlk@F)eRfkWtaR`n+VF zR%D}gH_}l1s&=xrell7DQ!=7Mv7mr*Rd|mbK6F*Y+cR(FZNGl_@XTk{;W?;`<*kyL z_nyy2^V(>Iu{O1sODB&WK6Ye~dpqJ_UEZic;gZb@@+h!3p)!0Ue!S__c=UqRDtlvnq(dvY!79QIRfsx-8m2c=fA;C}Q=6|rBYBCji8d+mY7A)(w9-2RZU($13mdVVkvbb1s|(p*@9`A6}d#Th&$!eXE*|aRa&O z_I+v7X^X}m9(0c!=rXx$Qe0*0R5Cx-r13(-WQZ!+ev#9)(e;q)Y`;TEf%AuCuyw~b zT-)c!&qksjQ$d0;X=Bu>t7Wt4@0&mLRu8%bbkapL zG%_KM@(8^QXjw6KDNuK7#C{xlTu+Q#8C zkSJdl9pCh1_);@*>(e*Qs|WKZjb(CPtO8F9D=vI)UOhIS!LHor8|n9GQfw=8`S}}* zNQw!3c;FqutXvbeowQbDRKJgw zM?0lcsxyTwK+-5{l>RGEmKwjbQIb1{L#xHVKVM1e$j|gAZ%`d^tqosXI;K-q^#&eR zDX#Y{yN%mVjC7I-^SKWq2lA^_mxH#lHtd#~<0fZ}R*jM}qgBb*ST?`bW@r-H&_KzK=my2w6LEN`kDJGi8URrC zQQB#`;YnDKkDIHzmy(YP_=iGir@d>2fk8hcBo`Gh+RzxJN%X{nwo1ok%1PB^ZoMCPT?GP@<{q0is5|-|AOs$equi+Ddw|^{DQOa2iJqn&SUWsI zUQTW|@juOf62=4{yenFZ;D&ei`jP$VU(BD5e*`hVK^ghI;2y^>P#Lzn*7vOapBw9k zb!TTGSa*u_*Dgh{-uhfpzq4^;P+IC{x*T@4{QGi*r3C4hnCY6fx3@EeJF$}s0*>7%V^|PG^9pxROh$k7 cZ65{z*ft(979nIz@4i$}+Gn*2HEe_b3%1jv&;S4c literal 0 HcmV?d00001 diff --git a/osfmk/console/art/ProgressBarFullLeftEndcap.png b/osfmk/console/art/ProgressBarFullLeftEndcap.png new file mode 100644 index 0000000000000000000000000000000000000000..55ecb55d22731da405ea6ddf17d2cc71e532afba GIT binary patch literal 3772 zcmZ{n2|QHo7sm%v_O0wG(_~38V=rRt#*{VGjBGOu#x`OymaJLA$eyK;C2N+Vh=%OM z-z0m5WXqn&n*6EWH~;?cdp~zQ&+mJlbDr}&_j7N!fgYThfsX+I05Bu8H7@U6>GzH! zbcgrO;Q_1K004t3PF>vqp{@=xaKqc;oUj0ZHgnotqOs8sR~UtSj{N!v7$mgH=S>fK zb3vOEXc&$NFrW?%7WU9Mq{YE5dd0^0B^5jjY^}+`cq<4qK>vl!przhWLvwPNjp9h$Bj7_wXP9jFEvdGMPw@#kqR3BS$-$)~R{es92CxFUM?c zf$$9t>fUc;rIeI>?UwSjMM!{6%{#F=17Y|6UH@3{Id#Fq00zxIIYmEV?n&wdqw;26 zsa@c!)kIOLP)RASO!}-x;PIwXr?tGPvo-C>Nx~DEe5fwo7c%Lj8iSEkp|NZdFw-nq z=!0GMOQGiInC?h$2bALPYElWIoKx&$ z7mE287Et|D_M3G>qbK}~c9hz@w@sFX=PBu`q;%A%)BGbSnG3KcXnOB>d2ONzpu+jE z%H-H1{Us3K%OOC;$o-WQp-vWYO&^&=7$fKBo!qH193f{vo!ia`!oR8um0wTkSlqsH zA(vY@@Dzem8ml>(o0oe@qqCEVC5d9Qv(C0V{!$+IpY}$v`x!5}mqR|87=B8i4-U7l z2~iA0#z1Tsg8lo({CVU=XnFn8969W7SOR=Q%@fZzaaHk!LM5N&Fl3h;?4Y;bQ1N)A$a+;VUQ^agsr2I>XLxgItHUVX{z z%E%M|S78XPza&955{RP<`pm|erCJ!nmBX5%*3Wj#l+H_C)!;k_O|yD<3|pMa=4;`6 zo-?-!Gt~{0R!jH^Kw7?zx4 z&!e~d$a<**Jftx`OF@2A7ks^U9pO`x>lC-m}U??e}SK+^(F5|epkOL9y2Ky*%DwS>j0@HV6KsQOnBMz z#hDR~r6AUN_s^J{CuLyItd!H@zG7w?F6$o$QTCZ z>l47CY+g-f7}3Xf2321m_=WZ2IS`?O+?NGKQz`;qTm_z>}zoy!dp8UnJKk7=ezVG3zMtUO!dhcz88I=@ObH@ zm89|Z;`pLi$HciLA0hpOfh1y*B_u<{@}&R8@44Tqs155?>Wu=k%dTW>ZFo2m-k3Wc}Ww-U$U=W@1t z;%Ayy1wN5i>9IoCYm=uZ`6u}%%U@kS+#q4KA$^(mAt|0zLyBIJnR-qcr@WIZ#aZmW z5KY`X<8tO^A~?}Dkt#8|P{#z{$I+Ko=v=7ZE8!sQfOZ&i*zSWAMN#r7#B@rUrOfWc zx3Jl=>TvI>Er+j-vr4n^e5ri3e7t-nd`U7bvb?D-sqLwgsnas8751i#Pl2YBrqfR? zBob2=JB@9H$|L17i%S;r#JgcR!$nHP*7@Z~yKKGkuA}C0kZb2uE6pm6h-&(mph&1? zso_#4%rK)kBc~2tcg_FR%h#Z{R*|ar>hm)rGV9Rk=>V+-s-{#>m>j3&H!8N>%=Wss zd6rQr5*eAsHOyrxwjgFCJ|`xC5=9+CI#QT4gdm+`lY?!M?uZFV5vIgE3-j<|Q3iAvjv#bviJWkgH`}VQVeHyFkS|*Ltiui~; z?m^^@rlGn-yO_JO8JW00bN7n#xOHD2tKxTyh1ftymY%Mey(aGx$4%$9I&}x4rahXR zn}!FKhIXp9&$3=(jfF?HCgjCW^HCTrZYPyqjlEi08MlsEa#+fPDZs*EQLv-69<@cj z&<*qs^^V8Z@XEmF>8*inec*ZEBl=GaIY2Go6>f@Zy3 z)N0?LkVlt0}=e?>-JpDY0Kyi$whI&(tc68gDHh5$d*Cj5ZRy({| zvk$JT>7$vXc_|4ghAN-XBAJpvxk_9D$2ffiJ=_Y`-wa<08=wEkFun+pHoIFs|JLp4 zWI+d|B*wBHb^XkVV*=N^HWW?dA6#(7Mn#XB5IUmlthuz3 z2pQTalP#fM{R-E&XmT`5r8`#Vv^&bSCi%{Hov+#?!FxitP>kgcPe$zE*o;?(o$OPq z6R8s}9mm^O+PD2=ZDol!>}ox}hkmPJ)j_r*a|fZR@*%x>0BU9;zssW?I+@ z4*nPZZR-Y(#j4KD==Wof8Yy#Gv($te`z1^Fuq_vdP@>h13D6w6f_wUJP5dFTWwS+83hI5B0 zTqVG&D<5?;f#~zx>uv#d0 zdY|91-?5$_&k|wKKa*alT)fk_F<@IjUY&n1U_~jvZ5JI{r4HRq+$_~^JVYs=oYkt( znuQm`DWpx(u=nHD=Fc6Zq@MARI??aX))TvnGW>|!WIIfI(`U!-xfJD#Bl)%RTm5U! z(>9+bdWg8C+`Nd9qFUv(z@5x(>($oSnR)#U{lttYW#UcxZ(lrxoo%N^*MnEKr4&$p z`(Kx?+9-Dbfbr=5K?O*?&jSF^wBn4B1f;Hxf;HY5g0{h9un_c7;kpB%uEU5pcG@ z!}}jF>w*pI>Z8KdIfh$`sfZE&_;8fXF*CM6*$BQ7B$E+u6wDWxF0 zx63L*|Iqx-@CU8#hD8(bZpL`L6HIl#U?2l~JOS@+k9P&_{VAN*MO))s_Mb(6dOE28 zCs)`5fk=x>$s0?`D#*wwNdF4t2mVuYfZiVy1${RhcCT3bC4osQLjTY9ryg|~>yCH2 z@k4)3>KAa(_$TmNZ{mU@!2Ux1F#ky*f49T_?&hHJ0I**q1$Dd=-p$ArZHBZvCUNuECl9%%RlDnj?C`hltcb6x$g?ky}B z!(NI1TBI-re~qI3b*_idP&GcyX>G;R6|lU!yGv4GJwx@FhppUdMNFGrI}pU?PxWzk zkVBH{Q1h}V07oqfrc;Tqpq6B1yY)VlGl1V);I=A*s{(&f7y1+(AQj1Eqd!81?!OWc MntB=~YF0u21>c2)c>n+a literal 0 HcmV?d00001 diff --git a/osfmk/console/art/ProgressBarFullLeftEndcap@2x.png b/osfmk/console/art/ProgressBarFullLeftEndcap@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..6b3169ef98371b7309c009c0186a55d5041ef532 GIT binary patch literal 3875 zcmZ{n2|Sc-_s0j5HA`hr851MLj9n(iG8iL5mS$ud48}HMGE`&B^4PLxDan$^T2z+C zzQ${kEftb2A(ZU$Qaw-p{h#;#-0L~N@42pXo$IvZo$O-@e*wMOL7k00Q zcMnl!#@%zsyvsBIz^Xw&B8|{UB*@5%=tywG0|2`08Mnx$CPTd8G^!Hyq7}!X{&;`n8VYolfHdr1$(%HO9&(B52fuXA( zb94lv*0kvRwy3IUY5BUX?Co87!G=QI%sj6mDv9U4p}Onz*4+*m3%S+|opoz!G7_Lr|dZMov$O=-TsQsOObIx(z! znsfYovC#ZHc3{eRy>4jqxWCD^YWvkqvn9wJ?XE`jUF@jaToH`Q3s@1gxP7d=HrWhN z;m)W&IaXx200MkD0H_$b^YwU`n{`6-hlAp5QFC){-gKF+(9<84HZy~XFY3aSSJOH_ zZxo?sC5P`gS`nZ3j% z!`fc^+_^Zu)H_!8>i6jlVK&Q1k1>4>u*uZQ!B__`v200!oY4@s2KvKsOr(3P@3ebPuK`a3hsw?hF{2tg-)s5u z++pIYe>~j*T9(Gsg|s=}Gse+NYZ9^Gzu_a>)g)2j92rf0Tz2 zFH-YiQ9N`zMylD|HrDo1S;`}Z*SNW}LL6=N0w07t{VVw`Wjz#1fX_GwqiX_Zo7l%7 zOTN!fjc_jnb2fM{+Fci^WepGtI6fLH+*&6d$6^6=4dxWN1rl`=HRTdyo@U+-In*d` z9F%WJ0*7(=HCx(AJi7T><2hOgaz2q89VW_mK~y5GBFGf&0=9yvLZ~8%9dHMf7e7|r z+OR}46J#UCnV@ErbP~dCs3)YYIdooGuS(rd-4A*H0(-tkALe0db%uo@Rg0oWfuR!R zQog2)cN8ZU#=9oZrd$&Fc>5Nr0s)nMq z)3cN9{<2HAi?F-#L7~N1?0G^^!n4DM1?n?puf*<)2x|7;_j^@YVNd~ogMTA^45gI2 z*_$}ivMl_Oy3B$X!(W;_IVm(LI9dMU0%N1J?V8*Lf&0;k(KXSrU*)H&XydfEilqeW zo#ztC>!&2Iqh?osiRnVlyX*a$@~MU9z^bNp=z;xemTY|=X`nh5vv5~B_)k#md{Mcnuh0K z7?@3|@j@2DII}o2w+>ZzDd0u@OVAtJD2>|<`B{-!b-25@oH6?o5914aPUTqSoNYa0 z|MYU?5@P%kq%0x>oq&$%Ffe)DH)3jAO{<_Cbe(dB$QLTqv}KpQ!(xkc2JR_0skC3u z@w>f#noTtd6P3X`%xfb#FKHq*D=CbXz#hQ3S9~hlXnNC}?rLREX$x~1w=eI`8=iVn zTp%^-Q#VgJn{HJ$%v*E5hPJ}70^Wpd$}p*h`-W3FR$r5RPS%7_`VY;19INbJiXLYY zzb5{u7yV!~1KTap&EB2Ec93t8uTP5KtLO4qrI1%V)Bzf8@Ylh`>5ainL*N-;5z9x`T%ZonoC(1u%X0X@6<|{XT|-T=T~Xw>+eejO4(^^A)9M-W7}V~qoQJX&+(FV zI#4az{U`(NYuYK==Tb0|*zyUTXp3l2o+_{KQJ!m}K3)Z@uZGWskI#Kz9sdlKv%FP4 z_r~kVWI-pbB+jM*d->Gyqr%5VxppFsG!{(imn_>3^*6>^L zz?6{3Aw60t%~lci=n^y!dYs-+CFMxS74#JRhY(V+R?({|hKnpa zZ6&8FPK7ne=SU-~Ul95~n;pqk?}^tx>5X-)Nxk`9|C?^K=xwnZShjMP$0JTCeC7+| zu0vDH6X_EkoyR)9c5M19I4Y2@IMw=m58G-``0UD296C^9`L^mj*{Y#>>|5<@tUJhE zcWALwr!xkBbkrkqq|f-CaapacbNRwlt7es^@fcOA`x1v+iyM#I{IFwrvCG>OuyxNj zyUq`h9}LCbCCrXuDHqp=yH-NxsrOgzZ<62pFq7X6&s2ZKq}gGuhhCPvw<@0O`jY+S zS;y;2uc|ASAtpMnHq)2FDPu05O2c+>WFux`{npqDO& zp@j)?oLlI5K>MnZYq5rV3+~-mQ4?)8`%`tDQ-Do?*1YagJPj3!y5H6qd!_$u-=I>y zQj%h0OhW6U@yl(*?f2i1*Nzm<7|G>StpU%7C@+0zTN7Tmdr)P_FOu?bMtnPS<>^a{ zNQpmu>c6;aga`Z}Cdie{dbF}zwo;$adGs*s6G+RTL zq9XXh;TK7W>WYb186{;UBb`AhrPfPTUIlNdn_Ffng-#{PR%a<06zY7~cFe%>n!C4b z=KKjWPZr#%GoIz!x5kTa)*hftZ&+=@MpmjRlQ(xVjxt0sZhVH;UfZ#Lr#eF!+3sN~ z;Ivljx_V~YdE0(&JX@UA@YLOhYQ@|AYlDsj)aAK*gSNEtzno&jDv_|Aa+rUSGB z+G(8%olmG@6fJr^did(2<(9?H=#<{^&^n3lPgj$B3N!u5n^Y&ej^;(zp4l`ty^;J{ z<&A+A_i2ZZ6TM`@Lf(VOk-}QFm7wjcP5b4x_?bDwHN)i07&Y>BmaQ+o5O>F^(bbTz zo3e1M|L!*>i>EHu8vtNCvUkt{((mvC01WK}Qw#}XpbxhvxH-tU=VV9*Z<$yF7MF)#uliC%b+0#qI<4MwnnKp+(_ z2S@k?EuCNZ?vpCmnMCr0!(e1G8A_Ie61|*YGRn%zFlkwstgO_oLdx6EgM{;y^6)F{_fuYq4qrg6T|xw{s*?_`HB4)jGui}*&R{10nvfr=%ZbwEL%U(g0^q@Ys8n z_!()x{@+63_jN>0N>HT*n*&@q(K&5Y# zlad(zH(cg&9j@o#UoRn$37Axdsrus_xxFIY%EiK-;q*7l;EW<#-KKq>R#sKIU0Vi_ zc-W@!%Q@bv0AV?wZ}94fNECSsbK4#u`MIzIQ6npb`I`eVJmoUxRP-R<=x%c&);Fja!g4a10 Q_uf2c?ekhCnzq6J1<2L6i2wiq literal 0 HcmV?d00001 diff --git a/osfmk/console/art/ProgressBarFullMiddle.png b/osfmk/console/art/ProgressBarFullMiddle.png new file mode 100644 index 0000000000000000000000000000000000000000..dd2d810bc983f92cf8f22e8189f79533c82d750a GIT binary patch literal 3723 zcmZ{n2{@Ep8^;Gz_NB6?nq*0sVQiTgyD>!wH6z;$gRza6%t+QOuPu9)LYAyqiXx1C zjn5=oDkNL>l&r~zdf(UA_rA|{o##2veg41uocrA8zOLs9HMopmW#VT70068=9nCAd zPnz9!l;QC1J>^lnzVi2Day&oc{;6StBYVio9cHGbp|5t13SJkf(jZ!hXG7l{qjmaB0Q6{ z@kV7We9#@>%asH%Xs|StJA*OvvEX=fiPLKClz4SVQliL220yx+uSPa4qS|02MR+VL z0+?ZzB>cfH>oz@(VvI}yOs<{n?OlJM3y|VWw(Ob~$#4pVYH{){hJK+1FvK@2*0`~S zZz$TQ0mwOa6JfwmBsS;BU=n`vQ@icva+%QvR$4s1NpI{AvMzO{oaL?*k6#Jlo)(QT z+Bw-iW}!%6VF5ibWxr83GNNitLg5Ci3Yp$NQC6E^ z0w{MrtU5XNSbqrw_;LtPKJwt}$zUgo*yfKcqRip*^G@zG>5gFWPYPS<0fd)z!HR3i zor_ynFXZs3_@6;?$>6jmb8>SoX?At7uq9G$w%0gz#_JXF|LJIwc#vMtvmErv#PCx* zV_>L#b&!%jDjICV6zJPO=F2NDO3&w$>d0w-(-Pnvq-aoyq;DrE12oRi3Aw1;qV=A; z&LPdzURz^d6jS0J1-dMUGhfrk-Znz{xGvbeD2>8swm8n(`&7IAjtUkbT+?2snL(SkkCtZt1Xf#Kx z>c%UPJl=D63NkbdlvmCW`KuVr?`%G#e+7~~yiMaBcqECo7sL7n@Z4{x^nw5bqOtRX zCYkF2Jx~3+v5S@5aJGs@p0VSf0TMx$jd2XB?F@81+9-zO%bM!ZC?T37ryXMivuX3S zQli6oWb;+4D68wGiBIU>VCF9fu(j3meH3u@spK_-y2up+E7%7ks{Q7g zSjR+`$u;LjIF|z08{9u*Z=bGZ@)htsIU04UwN5me(G=(yz<&B3NXSXZn1i2TnqfQe zNTaNwf1W;3Fqp%$*$gZ8$2<3EK2h0ZH z#*0?9&@UEB2U!ZU$Ep~_pB3TMzbK%kK6F{};tN$zRZopaS6K60`cMT)RjH=>6ix4o z-Vj8bOybwX@s6Un!WhSdxkN8v{rJH|Qlcd|UDWcl@5LWE-z#Yi8&n&N{Ig20W@wL< zH!24qwNf$@uzz4vu%XyHALUvMg==E{V=Ip8=c~??z7~FTnqR&5k>~5ma=micTijcz z69|Rut=_nqmX%YVC@YLOVchk}vy%do{F7xbuN-cavRaq9!uKd5E}}Xj>Z|P33+g!a zoqP%2Vy8wdVdI?3x!VbX3APC|2~h>QCWL;@{?r2J0{uQI2N4I1!;r&PKe#ZGnoA|6 zQBy5tcP74v%$8P#-m2Vm_|`P5JR8TK!e7hJ$8W-)DBCK>m*SGrkusSwE!$RZZ_4}( zXgX;+{meotA$hUO*jBhKTrs1lcp+D^2cA7#s9a>7SBA37x?I+M%sdu+T|uqFtip(- zu73%Ff>@RqE@i+C(~Hux>kxI5h$-wjmt~rD zq4m7=^BZBy@bT*+rJG>$2{g z;i+du`I4g^bqn4XQp`(-xvMW%Q&-tm1-C@Dr0JDI$RQNAwKqhMv(!(=f+L*Q<*LPNG!P)bFHKzb`87au#^jj!9(GZ z@MEWd(n{32l!%f*mrZC#m_~qdl(|nG=kgNraLZqNJ$xx-eEuWT_##-w z>|WXYTeoME`JL3_Xv+rljdLfDpE@zhu@ic%F@I9Gc*XJsX%aXXR~@nyJKZWi9l312 zj{6=1ObmP)*rS=)Y#wTjEJku6$7%J|o_)Ca)bfk}M1Fj9e3qC@eoy{?@PXy)<-O{{ zn6OfDa~Wk(3ZzLkOG=~aCBA>rwRezywI7X4|zRu><;>6{&Fl<{^wU8{i$lt-(Nwn!g57)bAjXR5xUlCfxup;yHp%!?+w zzGQx>sJ=E|dlhs2h7~(L$|kBDn)j4)vBUw(3vRGDVs7s#r*U+xBj-Zq(Vg zmnM>~g&uyAQ=rDTea*nJNX@wg^M34c6Ll_gwyMs~*V0#WL1#IJiU>kHYHN(T*?*yL zP$5qtUcNChw)M&QjW)vehi@8Q#|mc*WU^kY1J9pUT>jX$erhR=MQO-0%)4MlbUS_Z z`76^f(~N=l1EJ5eE!F|PalFt|Q z)}ZC^P@WLjH6pyKeByO#acS{Lr+;FJ#qtZc{CAYC?)&TX}$#+S*`M(yt|WnoG$$E<|4S(Ysccf@{IS$ zb`O0%yM;>Et@GRV+t%~rnW9Yk=h6yPinjaL2W|5yEAtNrt*B*x*hK|bYCv`pHcIrH z4pH-|;@aifvxp)DHDV)T_|}t^md~9LiM``Nbz(oBuO;*pru&e#D0bM6=Fg5jbIB?f zNAhYFHwRXor)@q>^pfyPIk{mYg|#ZH{@WQ_)+=o>GxPfE`U&ZgDx}+t-@lMWoNcE@ z*8;z8L1Ad0-ET@pR~@uF0Kj}~uh9Tf9`FJHbZvNJ6cMGT3$rFTgE2M)EDlU|cHLD2 z07_)oZr2${#DK`oPA={+va;a50=C=VGeZPH`x2s~vLH&&0Hi^1!-3?$vS2AeI1>m2 zQgXAggKmBOp+^ya4u0%Y+1+?cEgC*P~DhmqkP4w6GYn{%ne@*1#{wu28_#kAAD?}PB z1^Fk4h`0TFdH+N0jr=QyBjf)C+Z*|b?Qh1 zeFf)EaJsp#SJ;hnA2?|I9r#;s;({l_|3rQ>|4yO)9*6%m%|YVD1b-+Y?AlHDiVo)>ie*KX!Hqoj~K|qnc6=kA5WIgaQBy Wn?NlE&vfy<3P);P)+|=H3ivPi26WK? literal 0 HcmV?d00001 diff --git a/osfmk/console/art/ProgressBarFullMiddle@2x.png b/osfmk/console/art/ProgressBarFullMiddle@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..e1a8061d68c3cbc98a37d4ea40ab4628e78917c7 GIT binary patch literal 3735 zcmZ{n2{@GP_s0iQ_O0wGlNc#x>}6ssgE1k5nvrdW!PrJjhGfn1Vr0)!NXn8dMG+0z ziJwWfR7ke$DcR*Ws`t&m|NDEc>)Foz{ha%p``qWco+rxW9Fmhwgbe@yaH8~e&eQLV z^izUmKmD4EnB@il*t7@;gb50P0GW6Z9SLrD06?EJ^CsEcY)~MQMmxh!&( z74-UyK0nYj3KeX^7!d~c(b=cR$18oo!TbdSGE&@Lmyi8=C~koD3y(=ly{V4w#1O@m zb+K6a>u|<)D`sSHs&Zh^X)$1+8ep;&3Tz;u8G@B^(0mKC^;QQCb~LRr3h^*-p&4F` zIywT8YdVa*TU7P*^aA~s@|6WhutUu|nK~1&cmGaMg7_(f#C`yqZl8);AXspMG1;uV zSy*8Q_;NW#S|LJSK_G|q?nCjhrc$?+{K=Cw?P;mt@f;CMm+*6?EJ}^ZaE9b)E(Mrl zl_vSYDfb#Po@$QD08FeN@99~)Z3vL#OSkQu0_V6zDd_SEFGPJ|1h6DGsXq7Oidk26 z&I0)6IgLjHqfod!-hnj2#HTjLjiqw4b)5XkzkCg8%M~0qQcy1|hnfo9w4ZC5ufmcVngEG_ zzF%`<^r7)02=HYepkny;*CP>bHi=CiIi%QQ=H}eI8M0j=CqJFq%nl{Ktcy@xP48IP zyl^H@P$T3RieC}0JCT>4XQ0#B$-$LMbJ$+x*%^DGO88HEqs;B>7lKRSpDav2C9{S_ zIoE`%g`ndh4s2mTeWO7_DpJhCftjv+&R1*!0pY49RVd~*qB;O^j7h>n<0@mo>?Iy~ zwzk^m&cz9(-f;?7zfWz5v0FuZj2dc*n@z58#5s6L-jxx~9SL)*XFL$kOuECS6atZB zl(FL!4OaAI8U_Kvg7tZrZZbji83~%e$6D|=^erg=hQa5$izlEr zFE~BfIf9XzY!USaaty;E1eVaxJp6aH9>fdW<4)J^=Q(7_;)l>OInBq^jEIWoNz`0_ z1uhUeas5FK!bE-f7)hj>#rpcj9p+acrTyCs0b#spj6GP+H-M+XgJow#S&$7KA9Q^A zZ!-(NcsF{sN&wDP*&sN2=rcej+_oW!MYD~Csap@ta%f3MJ02~;z9Sz z*S$l}V)YX)k2CF$%)zD!3A6~0MPewUxC_{R+*!H4H2bs*hJuqSjy#GX>!J5?eUI|c zQV%tJSXFp$#mY8au#2<1RF?XP=?!-7j3`&@3*nEVo`F?DRtg@E#-ULawNtpA9uuQRRhw(Nj znT8Y?lf)x<{Fe*+Sewu!W`{y|eJbKX&(yB8pjj1{TX9J+f zB*oOPsblTMNks{+DYL2mlE%pcspM2!NVb&i(V(;6^R}uOP3tx5%|dd^F68KqRy3%G zqjWRwrr`dbe`$S!4#Y^HZO&p&Poe-HQe|dg?gPh%(;(6iwlq5Vbjtb(j}zBY#8Vto7*gUM7+Mhf`1&#*xIZxNm2&~RU|j}XHv1q&v9x>|Ig6HQ ztF$w|6**H@9d)&8!{uA!jQUKHNQOwQh_Hx-NUBncvT%k+MtjCY#*|WPg|j956QJdU z<#UPIE`e@)*^e;*$A%*>3o~p(6ES`+{<`Q|`I)u7lQzkV~huDy=Hb$lAsR zP&CxG)O0ZiZkk=3eXkB#cPZ%Qi&vnxb}?GF>I-tBbLy~J*xXV3V-FIFx=-X<=ALOe zZU6Lg^b&mR61XfX6P1XHZ8tJ|(>rW#S52#+akx%8gO!SuYg+G?y~ki4>h<4IZB%Q! zmg{$G{Up133_2!LU`W7LW?sfjc2-6VBaPXIcCVNz+h}~-l;LV^AJ7`%GG<@il{Yl` zq_|La#HVgP;7o>f*^ofZxfOl%C-82lAAvGw)L>9u&5GG;lu(P=ag zJLS{l-Zb<^eQ>*K^CY(ccLFk|H90?NN`%I4^H*x=#e|Ecm5HmkMVG~VI1C;IkA)ws z^{FiifUaS;8Ml2lhQ1Dbp4u4LGzOjqK4ksGb`Piryub`+S71G`?+UQ7o}s=bR1Ess zR>G=RfKhu3;S;XLs?1@{zu_$ekx_aGp7X1+@D21O17&f#I*6tk{kXQ*ZOE8d0RsWD zUOTc`w-0Hg>#v)tYmkbT!IY2dQ7kE-JaqxFL;U^{K3;{ZuZIjG$L2n=jV(YFt!|dj zz4dxBQP@E%iMOrCTt0E+kl5i7o}H+J4TTeiCCj$Y$P>VUq?*Xh#Hp5(Q?X0dYxu1M zU~1Uoux_2yChI7BR0)b7HO6SH_2k2q$F^TW#tW0xji48u5r8?lrw*T2^c4T)sHjqWw(Ubd)OFb&1QZ*^S?Ae#o)B*yUZSxJ~yr zT*t@gkH(Vk6K6*-0T+Kr{gd-gRe?HSQk%pe!2Ul zvgYDk?FHPFmydKhAv znwjB8_(Y!vwXK@C7HhdTW8aTHY^2TJovE&K3bGB-nb%)RpdrJN_gfp{uJoPh9XM5R zDp{o=HnHW=*yUE@_J?l>|AR%-CW^Vw)_|vvsxEzOT@zc(;!qp(iw<}&Ew!D!^7NHu zv}I2J`~Ik>_f!b}hW;bd!9AIWyAI20+}gReOj$|KD1Yf>KuhT2zkN$Jn;iH>yCr-n zCQ2|8c98_Ht{8uvSyEOq+!2ymYP0mrtMDClbIT&N$f-ot`b85;-&lau`tR7hSDy|T-tJ~D z@rkSl8pYdvYXgpj)aAK519r6XzntPCsu0kfl=V{M#(lIx z+DW|%y%}UNl15pl3|)P+-2AzNlG-yCUMKzi>1s-MQFb7Clj?+PZ~E-oJ)5p^cDSHc zb)$d9eahj}cn_Jdn3o?tTvV&E60)7MX}{c>Fg<6yW}K28t3kfTy7k2u?Cv-@vKsbv zQvrqvq<>Sgdg^1m0RZ-cyC(x6n?w)it0HEdz zqd&UiNm!7tyPJnM%vW9fhXO`_-ZewTK|drUS9Ni;kqHPv^umLbAxaQAaX1?Y1XA;I zaD<)L(fftd-_*sONhD7g6iOzOA!J1e(aQ-cud1pFl~aH!D9F+kvfh3kB&@Hjhxf6c zPX6|zgZH-gB6yMrL=VueUo4Jzg`_SnzB|!B$FDfuJ^z`=!~0iO^!%W{SWl=tL=O6I z5Q*UUJG}ox?T-9U4DU<$AK325PwdBH{EShJzM?QAq65LvPX|lF!xiM@m1O0VWEB+5 z~W|4E^LkHi0&X0LG%uv;V;g6Kx{GV{dRQ{E} zjJxMyhJ(A;V4r0z0DwuA0EeTHa5xC%Otd1{;{gCI*5vzS1O0yPAS&e?<;`J1knl49 zEk@AW3tC)2y9NU3k^=@JASx6#xESGrdmC9jq!mn zJH~|q*{_31+s&Av{>iewe(MF#z6t=Ouu8?Yi|^x#-YRh%K9 z)Zw7Y`0x|m1rXrN0YK^C!>`8!?MEKCfguPMe!%0M7tFojg8CCHED@H@!s+6C8AJZ|wX}yL=}C{6WPHJr zwBqKh0zT4Cbb}y(zmFCN-F-T+7A-*q_)Hb{Zg&Yvy`yoxW9tOy_PzB6c-c?R>7X(2 zS`DicGm8&Gg(Y6I7(^x6p6or!Q{@yBlEL zJ*4NU`7nI3oEyegR>w1ZbP*sPU{)8ypwhxX*QtqSIJ%^+7Ks+3Idsx4N-&c)S0gEs zE%WHhaIL5tV{`|D(y$4<+)aF=L71>%+*NEZ?xJK*flUF&fG1O7E5Lm|E7gp&rQ_ngq zvgG#a%pm82AA7CKBJTFdDkg6M@8d%eCz`57BN>f=c7E(9?}LQwg$y|O873LF{SVd2 z==tX8k^}=e+#8H>Vo&eAQ+P{L^$(e zR7`aXg;GIg!tBw?sMymYoVphUG}QVp%U^t{;;!Nje|&{C$FUoomr#*xq)Sovyyyu* z#7M_~jUR0-h{=z#i<^n}5Y~U1tM8jpd^JsTxU^0w z0I89b9*4V&OTq=??u^Jb>IuJ!_Khw(tedMcRs2@?@kxHQuE*|g%S&}i72f0DOCCd< z%iQdWnQB}<@tLyBh!@6RA3r@VFwQ?-^7_ibI!W_2=_`DX!(zfJ!y>-QOuVFyQa{KQ z5lnYpiN&p-aXfQ7PB6|YjwUW5Puq~#!`YLZ=a8q{Eom!ai?!{y-RuG9hf}ku

VM znas}ER?u{DMewci4cl+^(@N7Z{7L*({Cxa|{P8kPvV2L7Nv%oaNs}_or8Y*)&w)nc zMw8D?CF2t3+YPLQOG4$-3JT}4B|2f51Nlk?7C9wo>x|1K9Y;)}!Pn2JmKm4nlht%D zLC_GhBE5w)m|ki@YGyT}`nvb)nm3^L=Ao(&YID*;(yFm3*of5c#(X!bsmuUBlj zo#Fmq{VcOmC^|Hmdw|wiUrmkxeOjr69K;3ft;ClI!V8f6tynGoOdcJC?#m z=|w$6pLQX$hmtWJVjZj<8O$s^i#**Dyw06BhRX$aic~v8|^xM z;ghZn4h;kEl=`>JH_x(PVvjGnu(-?1T)QV_%+kyxU=f1>E z)@(&IYV;s=Y#W5vgnqfv^pe!Zs6Gyo`gj}6-SKkg?3L2dqVH%wWOB>%W znSJm4d_1>}S{P|oi@9;;_|X%`hB$VDkJROkYZoq?y(EtV`(i4CHlrt-&Q69enXKWr zqJZ)K&-^>p;~PwZEs%vsF61b!uIlqoH=mh(@g2*Jjf~9@lg{nT{SU#vbgi^YO&A+e zeAYx-Nt6Pqm&uTXSG*?l%o`p_SLuw>KJ9|Bs!X`|UHh9>n9u{^I~e8?+h>E;2z=^m zz4k*B%VSAnj%~+UzqW3A$y&*hZ(3Kmeh=KLm7TX^D+uTiMJM7gru}aUKbaJaw|`0h zQdW6ww(2VO`VDh-LWE^RDKzI9)`|WRI=9>wWffj z&|sb*g=-{OMd{ev9 z&3X~0o-a61rkqN)?~fMTt2*F0xnZ&i8CpfaHrp1nqv@hdx@S`Ilnb_d*7~e+Da*5u`pl^%cda7=%i)lnxb-64`UBKl z>RHWF&1pmdf*Q6SHgN0da^qrKSbW!LK(*NS7prld`KeyyO^P+HwPDe&b0$&w;$Tjd z{6_DJ!=&Zsu`V)UAuBs%FuzK9#dkYx(_*UHaAdCqF0x3FM zS}9yn*ZhU=J}C*>kVsAn5D1w}29u@1L}zP=l)St=L=p;tLM3(;5-#qJB&?f+qsytE zPX6+vj(4$eCODA@L`Tq`Uo4JzlcXdlxHr(>$FDdYocTjt z6jVVLsvsk$2>CE05uFW)M0=R(Uco>p8zPD5VncKS?fxm8*1=j39QU5Z zetO!k|0h@YeSt_zK;;djWOt)bke1zt|J3ZG_vS=F*O`FdE!JL1U{Z>Z|Fiw6$6Ud? z5bbaN(4T|;0`?pK1b*uc9SJ1ZU&tTkKPmL@e%Rl`>^JTM_KKtcC)yL8^_{R5c$l25 z>|Wx3n*StF1UI}rTAkp4cXZjyUj7&Ir~Mx}%x_NW{C;p>$1hM3vbWUtE&ZSS>W6iA zW5JkqYxLJ9g)!-)K(V{~RtKrBYH*s%!ko9mXK80==f!!&HTu)vneHz5a^?=B8W=by zX-3G}=Q(}+cj%UQK=*6188SIo7RCH9t9}P^3f^%>DigtiX!R literal 0 HcmV?d00001 diff --git a/osfmk/console/art/ProgressBarFullRightEndcap@2x.png b/osfmk/console/art/ProgressBarFullRightEndcap@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..5e0b1f333427501f27ddaa9b925a13b0489e97ea GIT binary patch literal 3881 zcmZ{n2|QHY8^;Gz_N^?DGEJ5gGmI@0V;PJoWrUiMZHB>MjK*ZBtXZ;UPn0BONtU7# z4Oz0p-y~ZqWX~SSE`O@`&A9e-4F|<-ra*XPV7p5Tr4&@)@D={*jog4mfV0EO_P5_rdhnl9JjOZ|H zqFM1PG1vz1^?Z^XEK~_5lEHcRA$YL1&}|`m!$1@q)@1 zxvb%Q8{nPgG!y|0M-#FHyKaz%KQ}n8%odw16O@i5)|~9W>TS%NFX6eSC>l^mxo*HB zjU%Tx$4%u+OikfBN1T@{yZiS0nXPLzTwJx7m6>GT){VN2>vNlY2xW)>7Njh1?klcH zvH+Bjcj^rHKQx&E0p@l9N_ua7-5=^^6JI;PBg-8*IqBxflI{vQ@>zW~J&5wUGE`$R zrD=Ng%&{y{?ZAU*VP&Gha8`EK3H|0~9=>Fz!}_AY#^6g0(tjFjEL z&Iu@SHB`KC&W|hfjDcPJKDr{wZ583(Z>$S88(H9qaiB`yl^4tG3wEnw-4)ADyThdt z2vKB}x8s!vQ1)Q!1p$Hsj0D)Ovq6klNjkvCx`=n1OHk<@i`Nww4?uh1#W%oHL24d5 zt$=4=@_KOd1fX=dLaRI4^kI%REX^rGCWow zUT67@OwOLeR~}>_Fl^Uu)hJR>|AH@4Hmq?+J@(S2Y4FLU35%>16AGh zN#9%e7Q5)n5BA z8l=YZ-P`*-+9>Y)5ZlhM4E&8fB6VVeVYsM%!Wn!g;kaVQ4f`8sj73M(90imEmxJyT zIv!>UV;5t0t|<8t+dKT^F$uo#WXO-ga!^3@Ts*T$UVCDPWJ^`LgrM05IR&!^t6;*N=cwO+68Paqb0*QkkANs zKvDPLbZkrtq|!mQ(){t-n8ZUeLMF#04D`BBX&is9ZxKOry9HEAOGlC();S28`>7LqP&d%*wr_pG&2R?{k-Dzm`MqB9wW z{Uy~}A!vityGewrgj7N};mUyOD^uwg@qzKrcA4brj1|3=zJEYmukF6i+tQMgCGhve z_lo;a>i1UL62@N5OMYg|a}uSA=Y|grOALz-7r#Eevs%$^N%^$c{iuYf@~D`vDkIOC zgUk|YX;*ZSalyNEyR3FV@}im9 zO!{qRnyt#l&|27dQCaxK(iN9)HRD?23F4{Z72;yz7UIb&b*f^i?x~Hb!>OYx^(D@h z+)sd(!ZQ^6hhqu}+z%id%MD$3xDk>pru3 zW=7XDIRV8&Z3|6jG7zTe`RVs6QI+TXU%z|`e;c3KZ-4MXTwd$p zOv}t;bw}-=o{yMC44#uI3Qt4FqoW&7n!Ri9HMc8cmN0o-N1SC;@>I*~?-qT;;T{@x z-qEPhY`C21b94C!w^k%JGEJmM#8!Sv-b`UaUJ@sV+kqvQj2Ercysu4lwYK-I4|N%| zFK)@|8F`YQtI+3FIpupS)w-xhr2JGlbAfLGyehM*#I6-qjt32Gd~0TN~F?UB)L@gZr@ za$=?VWLNa4S1q}==bcvfdg4X0j1* zL^vWEvAe>nBF`7PgkNV}_gd-s+Vy30rEAp$cog`M^E1~ypds)KJAxa=xogJ-U`-WE zRe6vk^sTLwRl5kQ-Wt*?M3YmM$69#Ba}Pvb<)O@^PpO5spEn(-Krqlp)|MN^G`wv< zMMjI95TP43qFxzvpiUZGGDtQ!k&Knc6%QFkSw?}fv_vHL3SW})qUJ8X?Ku%PI61&I zI1N#@x?Vi_p88}sw~1L0Yg>goe|Z01$$fnS8{xaFbBB!!=53$Thk;!Q#u*>-#5tDVxc1%nn^Bb1P3bE*?qZk6 zy-p}%`fJl>!IAl))FJn#eT`omSN&8SRp}R;D!jgju2rc{yYl6SbQV~Bcz%j*T~*fq ztzsgE3?dtKe`zvoiYD&ubC2k4H@#z8RAJ{_JTp?K_gv4kpP|rlj?e9to3Ptdk7IGZ z%ZFsJP3t#8(?G<4iS)<#i9Vd~+2x+*h2Sa1{e}Ch^iN(K^p8DbWnZx=1e{Ixn}Sc) z`NPd~cjum!pPj5YgFkoPj-M3c5K{undCWLo=z{9V9=|spC?1nK!Wh5wY%1-2{fDl~ zKC*usOElXncEo-mi5LD2ix}5@UGgjZ$Nq;k%!#|>WtC3;w*LB4Mze8DR0!&ReRa%* zj$`dz>N)C(YSq#4b&m$m*HhL%eM4T_oi~P2&V0TEJbFN5cA$Pqa^^OVX17m-?}IVf z_4I|OZ!9A$Gde$ZhCjWhM!IBtsc$TxEp1=RJ_YTY8<*#!7E)4+Upt*(#}!7Y&1-r6I~nhTf(X6czL~1tu5T%s!{)eqgMwStRE<6=+x=^G)+*OogsT zckVC0ecg7_k2LmVhRhiAC|#IVG09F>n^MlDD6TzoYD>Pu5pa@$}?rQG+Yi%G3{>3;N8h7+N&_KR!l zM2hzD-kb`JmCgn7sKe)>Hacl0D?6e$uR?nva6Mzye!f0#Y|>=OBq=>wn|_&dZO&VU z>^Rc582oh=2FLkrep7OK7~wnt0PfveCkr6;)*b+Wt$}2YrD0DR!|f?#2;PB0AVR#! z9-C?aK+_w(*(4Kbc#t>Q&D|63tp)y}fN!?9%uq1shlJ*;1;(DlfRGd_5u^%Hfhd9z zTp$oglj`6IKdo>03*UUw0z1=a9&jj>PNzfY$`A_G396)_p#fEdL18e3O@)G|k2?+T zt>Er?@Mn;};^-4S?WrUW8j0c#+KP)OP%hB4z~HTp{yu(W%k+ zDnS&X{{+!Uj=zWZKh&1zUooOL>0hud&rj^fWc(bX=H`sTPf{F6jz0Q$8W90gR8mn; zR8fGz%#~npRaLmMiYD|A&F>6<&`2r~Poq%HDHJz^?pDD-7-tHN;^|EB0B!y$96E`& zC%JDu%l(YBUH?y^h}#CDtN_z6S5k$msKJ$0x8XlE+vu%1ftyfC#LZ%Dl?0)r3H?9Y zpL*PBq9?`e!VkSV>=&@z_$TmNZ{bd&A^t-CF#kzmfBO-CceCBN4cICY97%DbP|ZB> z_C$o5s_Itaf13X!FeGoH8&;o0Cc1n6$o}Xr=1<2zg1FzHvf6fVo8uR#3Ef)j+t&Wi zjrGI2xw8;ln??F-mm;{#o2C0UH*OhPU)S7-cPfFwSFOksy!-pk!)NSJgi#jnjif*) zKzn!yIQS)_P@6wie|#tb(qd=((Q{u=%{~wIEDum>x5jKn`rO_}A~Q;cnSOO~SD(+R z59n&<#tDC;(T3s!a;ze{keB6bsPF+Z#DLjaL{ceJ@U%5MaF)yMM6kdeWlR#+H`oDe zE=FA`S5;cim_JjO@!9^Z#x=>f`wdHy98/tmp/xx.c; cc /tmp/xx.c -Wall; cat /tmp/xx.c + +#import +#import +#import +#import +#include +#import + + +#define MAX_COLORS 256 + +typedef struct { + uint8_t r; + uint8_t g; + uint8_t b; +} pixel_t; + +static uint32_t clut_size = 0; +static pixel_t clut[MAX_COLORS]; + +static uint8_t +lookup_color(uint8_t r, uint8_t g, uint8_t b) +{ + unsigned int i; + + for (i = 0; i < clut_size; i++) { + if (clut[i].r == r && + clut[i].g == g && + clut[i].b == b) { + return i; + } + } + if (clut_size >= MAX_COLORS) { + printf("Image must have no more than 256 unique pixel colors\n"); + exit(1); + } + clut[clut_size].r = r; + clut[clut_size].g = g; + clut[clut_size].b = b; + + return (uint8_t)clut_size++;; +} + +void print_buffer (uint8_t * buffer, size_t width, size_t height, size_t row) +{ + printf("{"); + for (int y = 0; y < height; y++) + { + printf("\n "); + for (int x = 0; x < width; x++) + { + printf("0x%02x,", buffer[x + y*row]); + } + } + printf("\n}"); +} + +int onefile(const char * filename, int w, int h) +{ + int size; + uint8_t color; + + FILE *file; + if ((file = fopen(filename, "r")) == NULL) { + fclose(file); + printf ("ERROR!!! can not open resource file [%s]\n", filename); + return 1; + } + fclose(file); + + NSString* filePath = [NSString stringWithUTF8String:filename]; + NSData* fileData = [NSData dataWithContentsOfFile:filePath]; + NSBitmapImageRep* bitmapImageRep = [[NSBitmapImageRep alloc] initWithData:fileData]; + NSSize imageSize = [bitmapImageRep size]; + + size_t image_length = (int)imageSize.width * (int)imageSize.height; + uint8_t* uncompressed_color_buffer = malloc(image_length); + uint8_t* uncompressed_alpha_buffer = malloc(image_length); + + bzero(clut, sizeof(clut)); + + clut_size = 0; + size = 0; + + for (int y = 0; y < imageSize.height; y++) { + for (int x = 0; x < imageSize.width; x++) { + NSUInteger pixel[4] = {}; + [bitmapImageRep getPixel:pixel atX:x y:y]; + + color = lookup_color((uint8_t)pixel[0], + (uint8_t)pixel[1], + (uint8_t)pixel[2]); + + assert(color <= 1); + uint8_t alpha = pixel[3]; + assert((alpha != 0) == color); + + alpha = 255 - alpha; + + uncompressed_color_buffer[size] = color; + uncompressed_alpha_buffer[size] = alpha; + size++; + } + } + + assert(clut_size == 2); + assert(clut[0].r == 0); + assert(clut[0].g == 0); + assert(clut[0].b == 0); + assert(clut[1].r == 0xff); + assert(clut[1].g == 0xff); + assert(clut[1].b == 0xff); + + printf("\n"); + + assert(w <= imageSize.width); + assert(h <= imageSize.height); + + print_buffer (uncompressed_alpha_buffer, w, h, imageSize.width); + + if (uncompressed_color_buffer != NULL) { + free (uncompressed_color_buffer); + } + if (uncompressed_alpha_buffer != NULL) { + free (uncompressed_alpha_buffer); + } + + return 0; +} + + +int main (int argc, char * argv[]) +{ + printf("#include \n\n"); + + + printf("\nstatic const unsigned char progressmeter_leftcap1x[2][%d * %d] = {", 9, 18); + onefile("ProgressBarFullLeftEndCap.png", 9, 18); + printf(","); + onefile("ProgressBarEmptyLeftEndCap.png", 9, 18); + printf("};\n"); + + printf("\nstatic const unsigned char progressmeter_leftcap2x[2][4 * %d * %d] = {", 9, 18); + onefile("ProgressBarFullLeftEndCap@2x.png", 2*9, 2*18); + printf(","); + onefile("ProgressBarEmptyLeftEndCap@2x.png", 2*9, 2*18); + printf("};\n"); + + printf("\nstatic const unsigned char progressmeter_middle1x[2][%d * %d] = {", 1, 18); + onefile("ProgressBarFullMiddle.png", 1, 18); + printf(","); + onefile("ProgressBarEmptyMiddle.png", 1, 18); + printf("};\n"); + + printf("\nstatic const unsigned char progressmeter_middle2x[2][2 * %d * %d] = {", 1, 18); + onefile("ProgressBarFullMiddle@2x.png", 1, 2*18); + printf(","); + onefile("ProgressBarEmptyMiddle@2x.png", 1, 2*18); + printf("};\n"); + + printf("\nstatic const unsigned char progressmeter_rightcap1x[2][%d * %d] = {", 9, 18); + onefile("ProgressBarFullRightEndCap.png", 9, 18); + printf(","); + onefile("ProgressBarEmptyRightEndCap.png", 9, 18); + printf("};\n"); + + printf("\nstatic const unsigned char progressmeter_rightcap2x[2][4 * %d * %d] = {", 9, 18); + onefile("ProgressBarFullRightEndCap@2x.png", 2*9, 2*18); + printf(","); + onefile("ProgressBarEmptyRightEndCap@2x.png", 2*9, 2*18); + printf("};\n"); + + +} + diff --git a/osfmk/console/art/scalegear.c b/osfmk/console/art/scalegear.c new file mode 100644 index 000000000..91051837b --- /dev/null +++ b/osfmk/console/art/scalegear.c @@ -0,0 +1,53 @@ +// +//cc scalegear.c -framework Accelerate -g -Wall */ + +#include +#include +#include + +#include "../../../pexpert/pexpert/GearImage.h" + +int main(int argc, char * argv[]) +{ + vImage_Buffer vs; + vImage_Buffer vd; + vImage_Error verr; + uint32_t i, data32; + uint8_t data8; + + vs.width = kGearWidth * 2; + vs.height = kGearHeight * 2 * kGearFrames; + vs.rowBytes = vs.width * sizeof(uint32_t); + vs.data = malloc(vs.height * vs.rowBytes); + + vd.width = 1.5 * vs.width; + vd.height = 1.5 * vs.height; + vd.rowBytes = vd.width * sizeof(uint32_t); + vd.data = malloc(vd.height * vd.rowBytes); + + for (i = 0; i < vs.width * vs.height; i++) + { + data32 = gGearPict2x[i]; + data32 = (0xFF000000 | (data32 << 16) | (data32 << 8) | data32); + ((uint32_t *)vs.data)[i] = data32; + } + + verr = vImageScale_ARGB8888(&vs, &vd, NULL, kvImageHighQualityResampling); + + if (kvImageNoError != verr) exit(1); + + printf("const unsigned char gGearPict3x[9*kGearFrames*kGearWidth*kGearHeight] = {"); + + for (i = 0; i < vd.width * vd.height; i++) + { + data32 = ((uint32_t *)vd.data)[i]; + data8 = (0xFF & data32); + if (data32 != (0xFF000000 | (data8 << 16) | (data8 << 8) | data8)) exit(1); + + if (0 == (15 & i)) printf("\n "); + printf("0x%02x,", data8); + } + printf("\n};\n"); + + exit(0); +} diff --git a/osfmk/console/i386/serial_console.c b/osfmk/console/i386/serial_console.c index 7cac3d45d..1758e7a5a 100644 --- a/osfmk/console/i386/serial_console.c +++ b/osfmk/console/i386/serial_console.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include diff --git a/osfmk/console/panic_ui/README b/osfmk/console/panic_ui/README deleted file mode 100644 index f8031a2ad..000000000 --- a/osfmk/console/panic_ui/README +++ /dev/null @@ -1,74 +0,0 @@ -Creating a Panic UI image (either the default or loadable) - -The key steps are: create an indexed image using the MacOS X system -8 clut, saved in QuickTime uncompressed 256 color format. Run it -through the genimage tool to create a C structure or a kernel -loadable file. This code all has byte dependencies in it, therefore -this all must be done on a PowerPC machine. - - -===== Create the image - -Using an application like Photoshop, create an image to be used as the image -displayed at panic time. Your selection of colors is limited to those found in -the MacOS X system 8 clut; in the application you're using, make sure you are -in "indexed mode" and that the supplied CLUT (appleClut8.act) has been selected. - -* The appleClut8.act is the default Mac OS X CLUT. - -Keep in mind the following. - -* There must be at least 20 lines at the bottom of the image reserved. This is -used by the system for displaying extra panic information. There can be more than -20 lines, but you'll have to provide this information when generating the image for -the kernel. - -* You must determine the colors used by the font for displaying the panic information. -There are forground and background colors. The default foreground is "100% White". -It is represented by a 24-bit value of 0xFFFFFF. The default background is -"13% White, or Dark Gray". It is represented by a 24-bit value of 0x222222. To change -the defaults, you'll have to provide this information when generating the image for -the kernel. - -Save the completed image as a TIFF (still indexed off the CLUT). - - -===== Convert the TIFF indexed image to QuickTime RAW - -Using the Preview application from 10.3.x, open the TIFF image. Use -File:Export to save the TIFF image in QuickTime image format with -options of "None" for compression and "256 Colors" for the depth. -Quality should be "Best". The saved results should be a .qtif -formatted RAW image. - - -===== Generate an image for the kernel. - -To generate the default kernel panic image file "panic_image.c", in your working -directory, build the program genimage: -cc -o genimage genimage.c - -execute: - -genimage -i -n -fg <24-bit color> -bg <24-bit color> -** options other than -i are optional. - -To genertate a kernel loadable panic image file, build the qtif2kraw binary: -cc -o qtif2kraw qtif2kraw.c - -execute: - -qtif2kraw -i -o -n -fg <24-bit color> -bg <24-bit color> -** options other than -i and -o are optional. - - - -===== Other Info - -The reason an 8-bit image was choosen, was because it is easy to convert to 24 or 16 bit colors. -The system does not typically run in 8-bit mode. If the system is in 8-bit mode. Then we have -to check to see if the active CLUT is the same as the one that the image was created with. If the -CLUTs are different. The image is converted to grayscale and the nearest matching gray in the active -CLUT is used. - - diff --git a/osfmk/console/panic_ui/appleclut8.h b/osfmk/console/panic_ui/appleclut8.h deleted file mode 100644 index 0e499f5d5..000000000 --- a/osfmk/console/panic_ui/appleclut8.h +++ /dev/null @@ -1,51 +0,0 @@ -// This bootClut was generated from appleClut8.act -unsigned int appleClut8[256] = { -// 00 - 0xFFFFFF, 0xFFFFCC, 0xFFFF99, 0xFFFF66, 0xFFFF33, 0xFFFF00, 0xFFCCFF, 0xFFCCCC, - 0xFFCC99, 0xFFCC66, 0xFFCC33, 0xFFCC00, 0xFF99FF, 0xFF99CC, 0xFF9999, 0xFF9966, -// 10 - 0xFF9933, 0xFF9900, 0xFF66FF, 0xFF66CC, 0xFF6699, 0xFF6666, 0xFF6633, 0xFF6600, - 0xFF33FF, 0xFF33CC, 0xFF3399, 0xFF3366, 0xFF3333, 0xFF3300, 0xFF00FF, 0xFF00CC, -// 20 - 0xFF0099, 0xFF0066, 0xFF0033, 0xFF0000, 0xCCFFFF, 0xCCFFCC, 0xCCFF99, 0xCCFF66, - 0xCCFF33, 0xCCFF00, 0xCCCCFF, 0xCCCCCC, 0xCCCC99, 0xCCCC66, 0xCCCC33, 0xCCCC00, -// 30 - 0xCC99FF, 0xCC99CC, 0xCC9999, 0xCC9966, 0xCC9933, 0xCC9900, 0xCC66FF, 0xCC66CC, - 0xCC6699, 0xCC6666, 0xCC6633, 0xCC6600, 0xCC33FF, 0xCC33CC, 0xCC3399, 0xCC3366, -// 40 - 0xCC3333, 0xCC3300, 0xCC00FF, 0xCC00CC, 0xCC0099, 0xCC0066, 0xCC0033, 0xCC0000, - 0x99FFFF, 0x99FFCC, 0x99FF99, 0x99FF66, 0x99FF33, 0x99FF00, 0x99CCFF, 0x99CCCC, -// 50 - 0x99CC99, 0x99CC66, 0x99CC33, 0x99CC00, 0x9999FF, 0x9999CC, 0x999999, 0x999966, - 0x999933, 0x999900, 0x9966FF, 0x9966CC, 0x996699, 0x996666, 0x996633, 0x996600, -// 60 - 0x9933FF, 0x9933CC, 0x993399, 0x993366, 0x993333, 0x993300, 0x9900FF, 0x9900CC, - 0x990099, 0x990066, 0x990033, 0x990000, 0x66FFFF, 0x66FFCC, 0x66FF99, 0x66FF66, -// 70 - 0x66FF33, 0x66FF00, 0x66CCFF, 0x66CCCC, 0x66CC99, 0x66CC66, 0x66CC33, 0x66CC00, - 0x6699FF, 0x6699CC, 0x669999, 0x669966, 0x669933, 0x669900, 0x6666FF, 0x6666CC, -// 80 - 0x666699, 0x666666, 0x666633, 0x666600, 0x6633FF, 0x6633CC, 0x663399, 0x663366, - 0x663333, 0x663300, 0x6600FF, 0x6600CC, 0x660099, 0x660066, 0x660033, 0x660000, -// 90 - 0x33FFFF, 0x33FFCC, 0x33FF99, 0x33FF66, 0x33FF33, 0x33FF00, 0x33CCFF, 0x33CCCC, - 0x33CC99, 0x33CC66, 0x33CC33, 0x33CC00, 0x3399FF, 0x3399CC, 0x339999, 0x339966, -// A0 - 0x339933, 0x339900, 0x3366FF, 0x3366CC, 0x336699, 0x336666, 0x336633, 0x336600, - 0x3333FF, 0x3333CC, 0x333399, 0x333366, 0x333333, 0x333300, 0x3300FF, 0x3300CC, -// B0 - 0x330099, 0x330066, 0x330033, 0x330000, 0x00FFFF, 0x00FFCC, 0x00FF99, 0x00FF66, - 0x00FF33, 0x00FF00, 0x00CCFF, 0x00CCCC, 0x00CC99, 0x00CC66, 0x00CC33, 0x00CC00, -// C0 - 0x0099FF, 0x0099CC, 0x009999, 0x009966, 0x009933, 0x009900, 0x0066FF, 0x0066CC, - 0x006699, 0x006666, 0x006633, 0x006600, 0x0033FF, 0x0033CC, 0x003399, 0x003366, -// D0 - 0x003333, 0x003300, 0x0000FF, 0x0000CC, 0x000099, 0x000066, 0x000033, 0xEE0000, - 0xDD0000, 0xBB0000, 0xAA0000, 0x880000, 0x770000, 0x550000, 0x440000, 0x220000, -// E0 - 0x110000, 0x00EE00, 0x00DD00, 0x00BB00, 0x00AA00, 0x008800, 0x007700, 0x005500, - 0x004400, 0x002200, 0x001100, 0x0000EE, 0x0000DD, 0x0000BB, 0x0000AA, 0x000088, -// F0 - 0x000077, 0x000055, 0x000044, 0x000022, 0x000011, 0xEEEEEE, 0xDDDDDD, 0xBBBBBB, - 0xAAAAAA, 0x888888, 0x777777, 0x555555, 0x444444, 0x222222, 0x111111, 0x000000 -}; diff --git a/osfmk/console/panic_ui/generated_files/panic_image.c b/osfmk/console/panic_ui/generated_files/panic_image.c deleted file mode 100644 index 8933773fe..000000000 --- a/osfmk/console/panic_ui/generated_files/panic_image.c +++ /dev/null @@ -1,1953 +0,0 @@ -/* generated c file */ - -static const struct { - unsigned int pd_width; - unsigned int pd_height; - unsigned int bytes_per_pixel; /* 1: CLUT, 3:RGB, 4:RGBA */ - unsigned char image_pixel_data[0x880a]; -} panic_dialog = { - 472, 255, 1, -0xae,0x87,0xfd, 0x01,0x6c, 0x01,0x55, 0x80,0xbb,0xfd, 0x01,0x55, 0x01,0x6c, 0x06,0xfd, -0x01,0x6c, 0x01,0x55, 0x0b,0xfd, 0x01,0x41, 0x01,0x83, 0x24,0xfd, 0x01,0x83, 0x01,0x41, 0x80,0xa6,0xfd, -0x02,0x2b, 0x04,0xfd, 0x01,0x2b, 0x01,0x19, 0x30,0xfd, 0x01,0x2b, 0x01,0x00, 0x80,0xa9,0xfd, -0x01,0x55, 0x01,0x00, 0x04,0xfd, 0x01,0x19, 0x01,0x2b, 0x0a,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, -0x01,0x2b, 0x01,0x00, 0x0b,0xfd, 0x01,0x00, 0x01,0x41, 0x24,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, -0x04,0x00, 0x01,0x07, 0x80,0x8f,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x83, 0x30,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x01,0x6c, 0x22,0xfd, -0x01,0x6c, 0x01,0x55, 0x10,0xfd, 0x01,0x41, 0x01,0x83, 0x52,0xfd, 0x01,0x55, 0x01,0x6c, 0x17,0xfd, -0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, -0x01,0x2b, 0x01,0x00, 0x0b,0xfd, 0x01,0x00, 0x01,0x41, 0x1f,0xfd, 0x01,0x83, 0x01,0x41, 0x03,0xfd, -0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x41, 0x01,0x07, 0x01,0x00, 0x01,0x2b, -0x80,0x8f,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x31,0xfd, 0x01,0x2b, -0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x22,0xfd, 0x01,0x2b, 0x01,0x00, 0x10,0xfd, 0x01,0x00, -0x01,0x41, 0x52,0xfd, 0x01,0x00, 0x01,0x2b, 0x17,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, -0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x0b,0xfd, 0x01,0x00, -0x01,0x41, 0x1f,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x80,0x8f,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, -0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x02,0x00, 0x01,0x19, -0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, -0x01,0x55, 0x05,0x00, 0x05,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x04,0x00, -0x01,0x83, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0xfd, 0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2e, -0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x04,0x00, -0x04,0xfd, 0x01,0x19, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x19, 0x03,0x00, -0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x02,0xfd, -0x01,0x19, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0x2b, -0x01,0x83, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x03,0x00, 0x01,0x55, 0x02,0xfd, -0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x09,0xfd, -0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x55, 0x05,0x00, 0x07,0xfd, 0x01,0x2b, 0x02,0x00, -0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x01,0xfd, 0x02,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x06,0xfd, 0x04,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x01,0xfd, -0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x71,0xfd, -0x01,0x19, 0x02,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, -0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x83, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x02,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x02,0x55, -0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x07, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, -0x06,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, -0x02,0x00, 0x01,0x41, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, -0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x0a,0xfd, 0x01,0x41, 0x07,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, -0x01,0x07, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0x83, -0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, -0x01,0x00, 0x01,0x83, 0x01,0x19, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, -0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x06,0xfd, -0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x01,0x55, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, -0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x6c, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, -0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x07, 0x74,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0x41, 0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, -0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, -0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x01,0x6c, 0x03,0xfd, 0x01,0x2b, -0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, -0x01,0x41, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x02,0x2b, -0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x04,0xfd, -0x02,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, -0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, -0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, -0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x0b,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0x41, 0x01,0x2b, 0x01,0x00, -0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, -0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, -0x01,0x00, 0x01,0x07, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x02,0x2b, 0x01,0xfd, -0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, -0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, 0x06,0xfd, 0x04,0x00, 0x01,0x2e, -0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0x19, -0x01,0x00, 0x01,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x00, 0x01,0x83, -0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x75,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0xfd, 0x06,0x00, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, -0x06,0x00, 0x02,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, -0x01,0x6c, 0x04,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, -0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x0b,0xfd, 0x01,0x41, -0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, -0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, -0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0x2b, 0x01,0x19, -0x01,0x00, 0x01,0x83, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x06,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x2b, 0x01,0x2e, -0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x75,0xfd, -0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, -0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x2b, -0x01,0x00, 0x05,0xfd, 0x01,0x07, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, -0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x08,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x07,0xfd, 0x01,0x00, -0x02,0x2b, 0x01,0x07, 0x02,0xfd, 0x02,0x2b, 0x03,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x6c, 0x04,0xfd, 0x02,0x2b, 0x01,0x87, 0x01,0x82, 0x01,0x7d, 0x02,0x2b, 0x01,0x74, -0x01,0x26, 0x01,0x00, 0x01,0x6e, 0x01,0x6d, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, -0x01,0x2b, 0x01,0x6f, 0x01,0x71, 0x01,0x00, 0x01,0x2c, 0x01,0x7a, 0x01,0x7d, 0x01,0x58, 0x01,0x00, -0x01,0x5f, 0x01,0x8b, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x0b,0xfd, 0x01,0x41, -0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x07, 0x01,0x00, -0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x02,0x2b, 0x01,0x00, -0x01,0x55, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x41, 0x04,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, -0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x01,0x07, 0x01,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x75,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0xfd, 0x01,0x07, -0x02,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x07, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, -0x02,0xfd, 0x01,0x2e, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x19, 0x02,0x00, -0x06,0xfd, 0x02,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, -0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x01,0xfd, -0x01,0x6c, 0x01,0x41, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, -0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x07, 0x07,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x6c, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x08,0xfd, 0x02,0x00, 0x01,0x56, 0x01,0x7a, 0x01,0x15, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x22, -0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, -0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x01,0x59, 0x02,0x00, -0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x64, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, -0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, 0x01,0xfd, -0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x41, 0x01,0x2b, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x19, 0x02,0x00, -0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x19, -0x02,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x06,0xfd, -0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x02,0x00, 0x01,0x6c, 0x02,0xfd, 0x02,0x00, -0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x75,0xfd, 0x01,0x00, -0x01,0x2b, 0x04,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x55, 0x02,0x00, 0x02,0x2b, -0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x07, -0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, -0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, -0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x6c, 0x03,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, -0x01,0x00, 0x01,0x55, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x19, 0x02,0x00, 0x06,0xfd, 0x01,0x83, -0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, -0x01,0x55, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x8b, 0x01,0x82, -0x01,0x79, 0x01,0x70, 0x01,0x59, 0x04,0x00, 0x02,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x03, 0x02,0xfb, -0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x03, 0x02,0x00, 0x01,0x03, 0x01,0x00, -0x01,0x2b, 0x01,0x70, 0x01,0x79, 0x01,0x44, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x6c, 0x04,0x00, -0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x41, -0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x06,0xfd, 0x01,0x00, -0x01,0x41, 0x05,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, -0x02,0xfd, 0x02,0x00, 0x03,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, -0x01,0x00, 0x80,0xf2,0xfd, 0x01,0x00, 0x01,0x41, 0x15,0xfd, 0x01,0x8b, 0x01,0x84, 0x01,0x7a, -0x01,0x6f, 0x1f,0xfb, 0x01,0x00, 0x01,0x22, 0x10,0xfb, 0x01,0x6f, 0x01,0x7a, 0x01,0x84, 0x01,0x8b, -0x81,0x87,0xfd, 0x01,0x19, 0x01,0x00, 0x13,0xfd, 0x01,0x8b, 0x01,0x80, 0x01,0x75, 0x23,0xfb, -0x01,0x00, 0x01,0x22, 0x14,0xfb, 0x01,0x75, 0x01,0x80, 0x01,0x8b, 0x81,0x84,0xfd, 0x01,0x2e, -0x01,0x41, 0x10,0xfd, 0x01,0x8b, 0x01,0x81, 0x01,0x75, 0x26,0xfb, 0x01,0x2b, 0x01,0x47, 0x17,0xfb, -0x01,0x75, 0x01,0x81, 0x01,0x8b, 0x81,0x91,0xfd, 0x01,0x86, 0x01,0x79, 0x45,0xfb, 0x01,0x79, -0x01,0x86, 0x81,0x8c,0xfd, 0x01,0x8b, 0x01,0x80, 0x01,0x72, 0x49,0xfb, 0x01,0x72, 0x01,0x80, -0x01,0x8b, 0x80,0xfe,0xfd, 0x01,0x55, 0x01,0x6c, 0x2e,0xfd, 0x01,0x55, 0x01,0x41, 0x3b,0xfd, -0x01,0x41, 0x01,0x83, 0x1a,0xfd, 0x01,0x8b, 0x01,0xfc, 0x01,0x6d, 0x12,0xfb, 0x01,0x22, 0x01,0x59, -0x39,0xfb, 0x01,0x6d, 0x01,0xfc, 0x01,0x8b, 0x0c,0xfd, 0x01,0x83, 0x01,0x41, 0x42,0xfd, 0x01,0x6c, -0x01,0x55, 0x80,0xaa,0xfd, 0x01,0x00, 0x01,0x2b, 0x2d,0xfd, 0x03,0x00, 0x01,0x2b, 0x3a,0xfd, -0x01,0x00, 0x01,0x41, 0x18,0xfd, 0x01,0x8b, 0x01,0x7b, 0x15,0xfb, 0x01,0x00, 0x01,0x22, 0x3c,0xfb, -0x01,0x7b, 0x01,0x8b, 0x0a,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x04,0x00, 0x01,0x2e, 0x2b,0xfd, -0x01,0x2b, 0x01,0x00, 0x80,0xaa,0xfd, 0x01,0x00, 0x01,0x2b, 0x0e,0xfd, 0x01,0x6c, 0x01,0x55, -0x03,0xfd, 0x01,0x55, 0x01,0x6c, 0x17,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x3c,0xfd, 0x01,0x00, -0x01,0x41, 0x16,0xfd, 0x01,0x8b, 0x01,0x7c, 0x17,0xfb, 0x01,0x00, 0x01,0x22, 0x3e,0xfb, 0x01,0x7c, -0x01,0x8b, 0x03,0xfd, 0x01,0x83, 0x01,0x41, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, -0x01,0x07, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x10,0xfd, 0x01,0x55, 0x01,0x6c, 0x0f,0xfd, -0x01,0x6c, 0x01,0x55, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x0e,0xfd, 0x01,0x83, 0x01,0x41, 0x03,0xfd, -0x01,0x83, 0x01,0x41, 0x80,0x95,0xfd, 0x01,0x00, 0x01,0x2b, 0x0e,0xfd, 0x01,0x2b, 0x01,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x17,0xfd, 0x01,0x41, 0x01,0x00, 0x3d,0xfd, 0x01,0x00, 0x01,0x41, -0x14,0xfd, 0x01,0x8b, 0x01,0x7d, 0x19,0xfb, 0x01,0x00, 0x01,0x22, 0x40,0xfb, 0x01,0x7d, 0x01,0x8b, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x10,0xfd, 0x01,0x00, 0x01,0x2b, 0x0f,0xfd, 0x01,0x2b, 0x01,0x00, -0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x0e,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, -0x80,0x95,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x03,0x00, 0x02,0x2b, 0x03,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, -0x05,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0x00, 0x05,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x83, 0x03,0x00, -0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0x6c, -0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, -0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x8b, 0x01,0x81, 0x01,0x14, -0x03,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x59, 0x04,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x02,0x2b, -0x02,0x00, 0x01,0x36, 0x03,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, -0x01,0x36, 0x03,0x00, 0x01,0x03, 0x06,0xfb, 0x01,0x59, 0x04,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, -0x01,0x2b, 0x02,0x00, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x12, 0x03,0x00, 0x01,0x59, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x03, 0x02,0xfb, 0x01,0x36, -0x03,0x00, 0x01,0x03, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x04,0xfb, 0x01,0x70, 0x01,0x81, 0x04,0x00, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x03,0x00, -0x01,0x83, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, -0x01,0x41, 0x01,0xfd, 0x04,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x41, -0x03,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x04,0x00, 0x01,0xfd, 0x04,0x00, -0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x02,0x00, -0x01,0x19, 0x80,0x84,0xfd, 0x02,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0x6c, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x06,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x07, 0x02,0xfd, 0x01,0x6c, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x02,0xfd, -0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x6c, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x0e, 0x01,0x75, 0x01,0x22, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x47, 0x01,0xfb, 0x02,0x00, 0x02,0x59, 0x02,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x2b, -0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, 0x01,0x59, 0x01,0x2b, 0x01,0x00, -0x01,0x22, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x47, 0x06,0xfb, 0x02,0x00, 0x02,0x59, 0x02,0x00, -0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x06,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x47, 0x01,0x59, 0x02,0x00, -0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x02,0xfb, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, -0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x47, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0xfb, -0x01,0x47, 0x06,0xfb, 0x01,0x28, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, -0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x06,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x2e, 0x02,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2e, 0x01,0x83, 0x01,0xfd, 0x02,0x00, 0x02,0xfd, 0x02,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, -0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, -0x01,0x83, 0x01,0x07, 0x01,0x00, 0x80,0x84,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, -0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, -0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, -0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x19, 0x01,0x83, 0x03,0xfd, 0x01,0x00, -0x01,0x2b, 0x01,0x41, 0x01,0x3a, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x02,0x00, 0x04,0xfb, 0x01,0x47, -0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x00, 0x01,0x22, -0x01,0xfb, 0x02,0x00, 0x01,0x12, 0x07,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x47, -0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, -0x01,0x12, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x12, -0x02,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x02,0x00, 0x01,0x12, 0x03,0xfb, 0x01,0x47, 0x01,0x00, -0x01,0x2b, 0x01,0x47, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0xfc, 0x01,0x8b, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, -0x01,0x07, 0x01,0x00, 0x06,0xfd, 0x04,0x00, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, -0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, 0x04,0xfd, 0x01,0x00, 0x01,0x2b, -0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, -0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x80,0x84,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, -0x01,0x2b, 0x07,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x05,0x00, -0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x41, 0x05,0x00, -0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x05,0x00, -0x01,0x2b, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x00, -0x01,0x22, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, -0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x2b, 0x02,0x00, 0x01,0x36, -0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, -0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, -0x02,0xfb, 0x01,0x22, 0x05,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x59, 0x01,0x2b, 0x02,0x00, 0x01,0x36, -0x02,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0xfb, 0x01,0x72, -0x01,0x85, 0x01,0x3f, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x06,0x00, 0x06,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, -0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, -0x01,0x07, 0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, -0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x80,0x84,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x19, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, -0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x09,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2e, 0x01,0x00, 0x01,0x6c, -0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x08,0xfd, 0x01,0x07, -0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x05,0xfb, 0x02,0x00, 0x04,0xfb, 0x01,0x47, -0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x03,0xfb, 0x01,0x00, 0x01,0x22, -0x03,0xfb, 0x01,0x59, 0x02,0x00, 0x05,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x47, -0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, -0x01,0x03, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, -0x07,0xfb, 0x01,0x59, 0x02,0x00, 0x04,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x01,0x2f, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, -0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, -0x02,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, -0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x80,0x84,0xfd, -0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x07, 0x01,0x00, -0x01,0x83, 0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x02,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x41, 0x01,0x6c, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, -0x02,0xfd, 0x03,0x00, 0x03,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x02,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x02,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x6c, 0x01,0x88, -0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, -0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x02,0x59, 0x02,0x00, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x59, 0x01,0x47, -0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x06,0xfb, -0x02,0x00, 0x02,0x59, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, -0x01,0x2b, 0x01,0x59, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x03,0xfb, -0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, 0x01,0x36, 0x01,0xfb, 0x01,0x22, 0x01,0x47, 0x01,0xfb, -0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x47, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x03, -0x05,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x88, 0x02,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x06,0xfd, -0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, -0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x03,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x6c, -0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x55, 0x01,0x00, -0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x2b, -0x80,0x80,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x83, 0x03,0x00, -0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x02,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x83, -0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x83, -0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x55, 0x02,0x00, -0x01,0x2b, 0x01,0x19, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x2e, -0x03,0x00, 0x01,0x15, 0x01,0x81, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, -0x03,0x00, 0x01,0x12, 0x01,0xfb, 0x01,0x59, 0x04,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x12, 0x01,0x00, 0x01,0x22, -0x01,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x36, 0x06,0xfb, 0x01,0x59, 0x04,0x00, 0x01,0x59, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, -0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x59, 0x01,0x2b, 0x03,0x00, 0x01,0x12, 0x01,0xfb, 0x01,0x2b, -0x03,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x47, 0x03,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x2b, 0x02,0x00, -0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0xfb, 0x01,0x81, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, -0x01,0x41, 0x04,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, -0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, -0x02,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, -0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x2b, 0x02,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, -0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x80,0xf7,0xfd, 0x01,0x8b, 0x01,0x7b, 0x44,0xfb, 0x01,0x22, 0x01,0x00, 0x2d,0xfb, -0x01,0x7b, 0x01,0x8b, 0x81,0x60,0xfd, 0x01,0x8b, 0x01,0x75, 0x45,0xfb, 0x01,0x22, 0x01,0x00, -0x2e,0xfb, 0x01,0x75, 0x01,0x8b, 0x81,0x5e,0xfd, 0x01,0x86, 0x01,0x70, 0x46,0xfb, 0x01,0x47, -0x01,0x2b, 0x2f,0xfb, 0x01,0x70, 0x01,0x86, 0x81,0x5c,0xfd, 0x01,0x82, 0x7b,0xfb, 0x01,0x82, -0x81,0x5a,0xfd, 0x01,0x7f, 0x7d,0xfb, 0x01,0x7f, 0x81,0x58,0xfd, 0x01,0x7d, 0x3d,0xfb, -0x01,0x82, 0x04,0xfd, 0x01,0x82, 0x3c,0xfb, 0x01,0x7d, 0x81,0x55,0xfd, 0x01,0x8b, 0x01,0x7c, -0x3d,0xfb, 0x08,0xfd, 0x3c,0xfb, 0x01,0x7c, 0x01,0x8b, 0x81,0x52,0xfd, 0x01,0x8b, 0x01,0x7b, -0x3d,0xfb, 0x0a,0xfd, 0x3c,0xfb, 0x01,0x7b, 0x01,0x8b, 0x81,0x50,0xfd, 0x01,0x8b, 0x01,0x7b, -0x3d,0xfb, 0x01,0x7e, 0x0a,0xfd, 0x01,0x7e, 0x3c,0xfb, 0x01,0x7b, 0x01,0x8b, 0x81,0x4f,0xfd, -0x01,0x7c, 0x3e,0xfb, 0x0c,0xfd, 0x3d,0xfb, 0x01,0x7c, 0x81,0x4e,0xfd, 0x01,0x7d, 0x3f,0xfb, -0x0c,0xfd, 0x3e,0xfb, 0x01,0x7d, 0x81,0x4c,0xfd, 0x01,0x7f, 0x40,0xfb, 0x0c,0xfd, 0x3f,0xfb, -0x01,0x7f, 0x81,0x4a,0xfd, 0x01,0x82, 0x41,0xfb, 0x0c,0xfd, 0x40,0xfb, 0x01,0x82, 0x81,0x48,0xfd, -0x01,0x86, 0x42,0xfb, 0x0c,0xfd, 0x41,0xfb, 0x01,0x86, 0x81,0x46,0xfd, 0x01,0x8b, 0x01,0x70, -0x42,0xfb, 0x0c,0xfd, 0x41,0xfb, 0x01,0x70, 0x01,0x8b, 0x81,0x44,0xfd, 0x01,0x8b, 0x01,0x75, -0x43,0xfb, 0x0c,0xfd, 0x42,0xfb, 0x01,0x75, 0x01,0x8b, 0x81,0x43,0xfd, 0x01,0x7b, 0x44,0xfb, -0x0c,0xfd, 0x43,0xfb, 0x01,0x7b, 0x81,0x42,0xfd, 0x01,0x81, 0x45,0xfb, 0x0c,0xfd, 0x44,0xfb, -0x01,0x81, 0x81,0x40,0xfd, 0x01,0x88, 0x46,0xfb, 0x0c,0xfd, 0x45,0xfb, 0x01,0x88, 0x81,0x3e,0xfd, -0x01,0x8b, 0x01,0x74, 0x46,0xfb, 0x0c,0xfd, 0x45,0xfb, 0x01,0x74, 0x01,0x8b, 0x80,0xf2,0xfd, -0x01,0x83, 0x01,0x41, 0x02,0xfd, 0x01,0x6c, 0x01,0x55, 0x26,0xfd, 0x01,0x55, 0x01,0x6c, 0x05,0xfd, -0x01,0x41, 0x01,0x83, 0x16,0xfd, 0x01,0xfc, 0x47,0xfb, 0x08,0xfd, 0x01,0x83, 0x01,0x41, 0x02,0xfd, -0x46,0xfb, 0x01,0xfc, 0x33,0xfd, 0x01,0x6c, 0x01,0x55, 0x29,0xfd, 0x01,0x41, 0x01,0x83, 0x76,0xfd, -0x01,0x07, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x83, 0x10,0xfd, 0x01,0x00, 0x01,0x55, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x26,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, -0x02,0x2b, 0x16,0xfd, 0x01,0x85, 0x48,0xfb, 0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x03, -0x01,0x2b, 0x33,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x04,0xfb, 0x01,0x2b, 0x01,0x00, 0x09,0xfb, -0x01,0x0d, 0x01,0x2b, 0x31,0xfd, 0x01,0x2b, 0x01,0x00, 0x29,0xfd, 0x01,0x00, 0x01,0x41, 0x76,0xfd, -0x01,0x2e, 0x01,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x11,0xfd, 0x01,0x2b, 0x01,0x6c, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x26,0xfd, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x55, 0x01,0x2b, 0x16,0xfd, 0x01,0x8b, 0x01,0x72, 0x29,0xfb, 0x01,0x47, 0x01,0x36, -0x1d,0xfb, 0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x22, 0x01,0x03, 0x11,0xfb, 0x01,0x47, -0x01,0x36, 0x20,0xfb, 0x02,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x47, 0x02,0x00, 0x09,0xfb, 0x01,0x27, -0x01,0x12, 0x0a,0xfd, 0x01,0x6c, 0x01,0x55, 0x25,0xfd, 0x01,0x2b, 0x01,0x00, 0x0d,0xfd, 0x01,0x6c, -0x01,0x55, 0x1a,0xfd, 0x01,0x00, 0x01,0x41, 0x77,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, -0x01,0x07, 0x14,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x26,0xfd, 0x01,0x00, -0x01,0x2b, 0x1b,0xfd, 0x01,0xfc, 0x2a,0xfb, 0x01,0x2b, 0x01,0x00, 0x1d,0xfb, 0x08,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x13,0xfb, 0x01,0x2b, 0x01,0x00, 0x20,0xfb, 0x03,0x00, 0x03,0xfb, 0x01,0x2b, -0x02,0x00, 0x0a,0xfb, 0x01,0xfc, 0x0a,0xfd, 0x01,0x2b, 0x01,0x00, 0x25,0xfd, 0x01,0x2b, 0x01,0x00, -0x0d,0xfd, 0x01,0x2b, 0x01,0x00, 0x1a,0xfd, 0x01,0x00, 0x01,0x41, 0x77,0xfd, 0x01,0x2b, 0x01,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x02,0xfd, -0x06,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x55, 0x03,0x00, -0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x19, 0x02,0x00, 0x01,0x19, -0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x19, 0x01,0x88, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, -0x01,0x00, 0x02,0x22, 0x01,0x00, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x36, 0x03,0x00, -0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x04,0xfb, 0x01,0x12, 0x01,0x00, 0x01,0x59, -0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x36, 0x01,0xfb, -0x01,0x22, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x02,0x8b, 0x01,0x13, -0x03,0x00, 0x01,0x59, 0x06,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0xf7, -0x02,0x00, 0x01,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfb, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x13, -0x01,0x8b, 0x01,0x3c, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x03, 0x02,0xfb, -0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x03, -0x01,0x00, 0x01,0x2b, 0x09,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x00, -0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x12, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0x88, 0x01,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x03,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, -0x02,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x05,0x00, -0x01,0x41, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x19, 0x05,0xfd, -0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x02,0xfd, -0x01,0x00, 0x01,0x2e, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x55, 0x6c,0xfd, -0x01,0x2e, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x04,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x02,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x19, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, -0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, -0x02,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x75, 0x01,0xfb, 0x01,0x2b, -0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x01,0xfb, 0x01,0x59, -0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x06,0xfb, -0x01,0x00, 0x01,0x03, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, -0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x02,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, -0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, -0x01,0x6c, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x2b, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x63, -0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x85, 0x02,0x00, -0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, -0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x0a,0xfb, 0x01,0x00, 0x01,0x22, -0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x02,0x36, -0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x75, 0x01,0x8b, -0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, -0x01,0x83, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x19, -0x05,0xfd, 0x02,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x6c, -0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0x83, -0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, -0x6d,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0x41, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x03,0xfd, 0x01,0x83, -0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, -0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, -0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, -0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0x22, -0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, -0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x7a, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, 0x05,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0xfb, 0x01,0x03, -0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, -0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x02,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x0b,0xfb, -0x01,0x00, 0x01,0x22, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x02,0x2b, 0x01,0x22, 0x01,0x00, 0x06,0xfb, -0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x81, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, -0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, -0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, 0x6c,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, -0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x06,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, 0x03,0xfd, 0x02,0x00, 0x08,0xfd, 0x01,0x00, -0x01,0x2b, 0x03,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfb, -0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x05,0x00, 0x01,0x36, -0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0x12, 0x01,0x00, -0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0x82, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x06,0x00, 0x01,0x87, 0x04,0xfb, 0x01,0x2b, -0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x3e, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, -0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x35, 0x04,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, -0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x0c,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x47, -0x01,0x00, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x47, 0x04,0x00, -0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x70, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, -0x04,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, -0x05,0x00, 0x01,0x55, 0x6c,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, -0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x08,0xfd, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, -0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x30, -0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x03, -0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x03, -0x02,0xfb, 0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x70, 0x03,0xfb, -0x01,0x03, 0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x3e, 0x02,0xfd, -0x01,0x55, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x4a, 0x01,0x00, -0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x8b, 0x01,0x7e, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, -0x01,0x12, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x0c,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, -0x03,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, -0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x06,0xfd, -0x01,0x19, 0x01,0x00, 0x01,0x55, 0x08,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, -0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x72,0xfd, 0x02,0x00, -0x01,0x19, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, -0x01,0x07, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, -0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x19, -0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, -0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, -0x01,0x74, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, 0x01,0x47, 0x01,0xfb, -0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x2b, 0x02,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, -0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0x87, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x01,0xfd, -0x01,0x7e, 0x03,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x00, 0x01,0x3e, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x02,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0xfb, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, -0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x48, -0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0x22, 0x02,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, -0x04,0xfb, 0x01,0x2b, 0x01,0x22, 0x06,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, -0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x03, -0x01,0x00, 0x01,0x12, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, -0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x19, -0x01,0x00, 0x01,0x2e, 0x09,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, -0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0xfd, 0x01,0x07, -0x02,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, -0x01,0x55, 0x01,0x6c, 0x6d,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x04,0xfd, 0x01,0x41, 0x04,0x00, -0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, -0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x06,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x07, -0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x12, 0x02,0x00, 0x01,0xfb, -0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0x47, -0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x2b, -0x03,0x00, 0x01,0x36, 0x01,0x8b, 0x01,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, -0x01,0x2b, 0x04,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x7e, 0x04,0xfb, 0x01,0x36, 0x03,0x00, -0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x3e, 0x03,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x07, 0x01,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0xfb, 0x01,0x7e, 0x01,0x2b, 0x02,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, 0x01,0x83, 0x02,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0x8b, 0x01,0x59, 0x03,0x00, -0x01,0x12, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, -0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0x59, 0x01,0xfb, 0x01,0x22, 0x01,0x00, -0x02,0xfb, 0x01,0x03, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x01,0x36, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, -0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x06,0x00, 0x01,0x41, -0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2e, 0x02,0x00, 0x05,0xfd, -0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x55, -0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x02,0xfd, 0x01,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x80,0xca,0xfd, -0x01,0x8b, 0x2c,0xfb, 0x01,0x8b, 0x13,0xfd, 0x01,0x7e, 0x0d,0xfb, 0x0c,0xfd, 0x0b,0xfb, 0x01,0x7e, -0x13,0xfd, 0x01,0x8b, 0x2d,0xfb, 0x01,0x8b, 0x81,0x2f,0xfd, 0x01,0x7b, 0x2b,0xfb, 0x01,0x8b, -0x14,0xfd, 0x01,0x78, 0x0d,0xfb, 0x0c,0xfd, 0x0b,0xfb, 0x01,0x78, 0x14,0xfd, 0x01,0x8b, 0x2c,0xfb, -0x01,0x7b, 0x81,0x2e,0xfd, 0x01,0x8b, 0x2b,0xfb, 0x01,0x8b, 0x15,0xfd, 0x0e,0xfb, 0x0c,0xfd, -0x0c,0xfb, 0x15,0xfd, 0x01,0x8b, 0x2c,0xfb, 0x01,0x8b, 0x81,0x2d,0xfd, 0x01,0xfc, 0x29,0xfb, -0x01,0x6e, 0x01,0x8b, 0x15,0xfd, 0x01,0x8b, 0x0e,0xfb, 0x0c,0xfd, 0x0c,0xfb, 0x01,0x8b, 0x15,0xfd, -0x01,0x8b, 0x01,0x6e, 0x2a,0xfb, 0x01,0xfc, 0x81,0x2c,0xfd, 0x01,0x8b, 0x01,0x6d, 0x29,0xfb, -0x01,0x8b, 0x16,0xfd, 0x0f,0xfb, 0x0c,0xfd, 0x0d,0xfb, 0x16,0xfd, 0x01,0x8b, 0x2a,0xfb, 0x01,0x6d, -0x01,0x8b, 0x80,0xd2,0xfd, 0x01,0x83, 0x01,0x41, 0x12,0xfd, 0x01,0x55, 0x01,0x6c, 0x05,0xfd, -0x01,0x41, 0x01,0x83, 0x3c,0xfd, 0x01,0x80, 0x0e,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0x47, 0x18,0xfb, -0x01,0x8b, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x13,0xfd, 0x10,0xfb, 0x07,0xfd, 0x01,0x55, 0x01,0x6c, -0x03,0xfd, 0x0e,0xfb, 0x11,0xfd, 0x01,0x55, 0x01,0x6c, 0x03,0xfd, 0x01,0x8b, 0x2a,0xfb, 0x01,0x80, -0x2e,0xfd, 0x01,0x41, 0x01,0x83, 0x27,0xfd, 0x01,0x83, 0x01,0x41, 0x79,0xfd, 0x01,0x41, 0x01,0x00, -0x12,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x02,0x2b, 0x3d,0xfd, 0x01,0x72, 0x0d,0xfb, 0x01,0x22, -0x03,0x00, 0x17,0xfb, 0x01,0x8b, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x11,0xfd, 0x01,0x8b, -0x11,0xfb, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x0f,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x01,0x00, -0x01,0x2b, 0x04,0xfd, 0x01,0x8b, 0x0b,0xfb, 0x01,0x36, 0x01,0x00, 0x1c,0xfb, 0x01,0x72, 0x2e,0xfd, -0x01,0x00, 0x01,0x41, 0x27,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x55, 0x70,0xfd, -0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x55, 0x01,0x2b, 0x3d,0xfd, -0x01,0x86, 0x0e,0xfb, 0x01,0x00, 0x01,0x2b, 0x18,0xfb, 0x01,0x8b, 0x02,0xfd, 0x01,0x2b, 0x01,0x55, -0x11,0xfd, 0x01,0x85, 0x12,0xfb, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x0e,0xfb, 0x01,0x36, -0x01,0x47, 0x01,0x85, 0x0e,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x8b, 0x0a,0xfb, 0x01,0x47, -0x01,0x2b, 0x1d,0xfb, 0x01,0x86, 0x2d,0xfd, 0x01,0x00, 0x01,0x41, 0x27,0xfd, 0x01,0x41, 0x01,0x00, -0x07,0xfd, 0x01,0x2b, 0x01,0x6c, 0x70,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, 0x01,0x2b, -0x42,0xfd, 0x01,0x79, 0x0e,0xfb, 0x01,0x00, 0x01,0x2b, 0x17,0xfb, 0x01,0x87, 0x14,0xfd, 0x01,0x8b, -0x01,0x82, 0x13,0xfb, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x0e,0xfb, 0x01,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x82, 0x01,0x8b, 0x0c,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x87, 0x28,0xfb, -0x01,0x79, 0x2d,0xfd, 0x01,0x00, 0x01,0x41, 0x27,0xfd, 0x01,0x41, 0x01,0x00, 0x75,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x19, -0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x55, 0x02,0xfd, -0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, -0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x00, -0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x83, -0x05,0x00, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x8b, 0x01,0x03, 0x03,0x00, -0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x03,0x00, 0x01,0x59, 0x01,0xfb, 0x04,0x00, 0x01,0x22, -0x01,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x12, 0x03,0x00, 0x01,0x59, -0x02,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, -0x01,0x19, 0x03,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x8b, 0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x03, -0x03,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x36, 0x02,0x00, -0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x04,0x00, -0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x03,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x03,0x00, -0x01,0x22, 0x01,0xfb, 0x01,0x8b, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x03,0x00, 0x01,0x55, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, -0x01,0x2b, 0x03,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, -0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x12, 0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x8b, -0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x2e, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, -0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, -0x07,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0x00, -0x01,0x83, 0x5d,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, -0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x41, -0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, -0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x32, -0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0x59, 0x01,0x00, -0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0x59, 0x01,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0x8b, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, -0x01,0x2b, 0x04,0xfd, 0x01,0x8b, 0x01,0xfb, 0x02,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x12, 0x01,0x00, -0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, -0x02,0x00, 0x01,0x47, 0x01,0x36, 0x01,0x00, 0x01,0x52, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2c, 0x01,0x59, 0x01,0xfb, 0x02,0x00, -0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x04,0xfb, 0x01,0x8b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, -0x01,0x2b, 0x01,0x00, 0x02,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, -0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x02,0xfb, 0x01,0x00, -0x01,0x03, 0x01,0xfb, 0x02,0x59, 0x02,0xfb, 0x01,0x81, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, -0x01,0xfd, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, -0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, -0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, -0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x6c, 0x01,0x83, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x5c,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, -0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, -0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, -0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x02,0x00, 0x02,0x22, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, -0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, -0x01,0x03, 0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x82, 0x03,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, -0x01,0x00, 0x03,0xfd, 0x01,0x87, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, -0x02,0xfb, 0x01,0x00, 0x01,0x3e, 0x01,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, -0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x87, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, -0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x82, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0x47, 0x03,0xfb, -0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x01,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x03,0xfb, -0x02,0x00, 0x01,0x36, 0x04,0xfb, 0x01,0x75, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x19, 0x01,0x83, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x05,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, -0x01,0x2b, 0x01,0x6c, 0x07,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, -0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x5c,0xfd, -0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, 0x05,0xfd, -0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, -0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, -0x04,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x05,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x8b, 0x06,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, -0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, -0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x7c, 0x04,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0xfd, 0x06,0x00, -0x02,0xfd, 0x01,0x7e, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x05,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x3e, 0x01,0xfd, -0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0xf7, 0x04,0x00, 0x02,0xfb, -0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, -0x01,0x7e, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x7c, 0x01,0x22, -0x02,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x05,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, -0x01,0x47, 0x03,0x00, 0x01,0x59, 0x03,0xfb, 0x01,0x8b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x19, -0x02,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x41, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x06,0x00, 0x02,0xfd, -0x01,0x41, 0x02,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, -0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x06,0x00, 0x02,0xfd, -0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x5c,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, -0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, -0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x07, -0x08,0xfd, 0x01,0x80, 0x02,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, -0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x03, 0x01,0x00, -0x05,0xfd, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x84, 0x04,0xfb, -0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, -0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x3e, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, -0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x07,0xfb, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0xfb, -0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, -0x01,0x00, 0x06,0xfb, 0x01,0x47, 0x02,0x00, 0x03,0xfb, 0x01,0x80, 0x04,0xfd, 0x01,0x07, 0x01,0x00, -0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x02,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x19, -0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, -0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x00, 0x06,0xfd, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x19, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, -0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x5d,0xfd, 0x01,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x83, 0x01,0x41, 0x05,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, -0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, -0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0x2b, 0x02,0x00, -0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x04,0xfd, 0x01,0x75, -0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, -0x01,0x2b, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x07, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, -0x02,0xfd, 0x01,0x2c, 0x01,0x85, 0x05,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, -0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x3e, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x6c, -0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x59, 0x01,0x47, -0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, -0x07,0xfb, 0x02,0x00, 0x01,0x69, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, -0x01,0x36, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, -0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x36, 0x01,0x00, -0x01,0x36, 0x01,0x47, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x12, -0x02,0x59, 0x02,0x00, 0x03,0xfb, 0x01,0x75, 0x01,0xfd, 0x02,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x01,0xfd, 0x01,0x6c, -0x01,0x41, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, -0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0x41, -0x02,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x5d,0xfd, 0x01,0x55, 0x03,0x00, -0x01,0x07, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, 0x02,0x00, -0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x02,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, -0x04,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, -0x02,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x8b, 0x02,0xfb, -0x01,0x22, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x22, -0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x70, 0x01,0x53, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x06,0xfb, 0x05,0x00, 0x01,0x36, -0x03,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, -0x01,0x3e, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0x12, 0x02,0x00, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x36, 0x02,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x01,0x00, 0x01,0x2b, -0x01,0xfd, 0x01,0x6c, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, -0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x36, 0x01,0x00, 0x01,0x22, 0x01,0xfb, -0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x04,0x00, 0x01,0x59, 0x04,0xfb, 0x01,0x8b, 0x01,0x2e, 0x03,0x00, -0x01,0x19, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2e, 0x03,0x00, 0x01,0x2e, -0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x41, -0x04,0x00, 0x01,0xfd, 0x01,0x6c, 0x03,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, -0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, -0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, -0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x80,0xa9,0xfd, 0x01,0x00, 0x01,0x2b, -0x0b,0xfd, 0x01,0x84, 0x25,0xfb, 0x01,0x8b, 0x11,0xfd, 0x01,0x87, 0x07,0xfb, 0x01,0x00, 0x01,0x2b, -0x13,0xfb, 0x0c,0xfd, 0x17,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x87, 0x11,0xfd, 0x01,0x8b, -0x26,0xfb, 0x01,0x84, 0x81,0x12,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x6c, 0x0b,0xfd, -0x01,0x7a, 0x25,0xfb, 0x11,0xfd, 0x01,0x8b, 0x08,0xfb, 0x01,0x00, 0x01,0x2b, 0x13,0xfb, 0x0c,0xfd, -0x17,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x8b, 0x11,0xfd, 0x26,0xfb, 0x01,0x7a, 0x81,0x12,0xfd, -0x01,0x6c, 0x01,0x07, 0x01,0x2b, 0x01,0x07, 0x01,0x83, 0x0c,0xfd, 0x01,0x6f, 0x24,0xfb, 0x01,0x8b, -0x10,0xfd, 0x01,0x8b, 0x09,0xfb, 0x01,0x03, 0x01,0x22, 0x13,0xfb, 0x0c,0xfd, 0x17,0xfb, 0x01,0x2b, -0x01,0x47, 0x03,0xfb, 0x01,0x8b, 0x10,0xfd, 0x01,0x8b, 0x25,0xfb, 0x01,0x6f, 0x81,0x22,0xfd, -0x01,0x8b, 0x24,0xfb, 0x01,0x78, 0x10,0xfd, 0x01,0x8b, 0x1f,0xfb, 0x0c,0xfd, 0x1d,0xfb, 0x01,0x8b, -0x10,0xfd, 0x01,0x78, 0x25,0xfb, 0x01,0x8b, 0x81,0x21,0xfd, 0x01,0x82, 0x24,0xfb, 0x01,0x8b, -0x10,0xfd, 0x01,0x80, 0x1f,0xfb, 0x0c,0xfd, 0x1d,0xfb, 0x01,0x80, 0x10,0xfd, 0x01,0x8b, 0x25,0xfb, -0x01,0x82, 0x81,0x1a,0xfd, 0x01,0x6c, 0x01,0x55, 0x05,0xfd, 0x01,0x79, 0x07,0xfb, 0x01,0x59, -0x01,0x22, 0x1b,0xfb, 0x10,0xfd, 0x01,0x8b, 0x08,0xfb, 0x01,0x47, 0x01,0x36, 0x15,0xfb, 0x01,0x59, -0x01,0x3e, 0x0b,0xfd, 0x17,0xfb, 0x01,0x47, 0x01,0x36, 0x05,0xfb, 0x01,0x8b, 0x10,0xfd, 0x25,0xfb, -0x01,0x79, 0x81,0x1a,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x70, 0x07,0xfb, 0x01,0x22, -0x01,0x00, 0x1a,0xfb, 0x01,0x7d, 0x10,0xfd, 0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x15,0xfb, 0x01,0x00, -0x01,0x16, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x06,0xfd, 0x04,0xfb, 0x01,0x00, 0x01,0x36, 0x06,0xfb, -0x01,0x03, 0x01,0x2b, 0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x03, 0x02,0xfb, -0x10,0xfd, 0x01,0x7d, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x21,0xfb, 0x01,0x70, 0x81,0x1a,0xfd, -0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x8b, 0x08,0xfb, 0x01,0x22, 0x01,0x00, 0x17,0xfb, 0x01,0x22, -0x01,0x59, 0x01,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x01,0x8b, 0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x14,0xfb, -0x02,0x03, 0x04,0xfd, 0x01,0x2b, 0x01,0x6c, 0x06,0xfd, 0x04,0xfb, 0x01,0x2b, 0x01,0x47, 0x01,0xfb, -0x01,0x59, 0x01,0x22, 0x03,0xfb, 0x01,0x22, 0x01,0x03, 0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, -0x01,0x03, 0x01,0x22, 0x02,0xfb, 0x01,0x8b, 0x0c,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x8b, -0x01,0xfb, 0x01,0x47, 0x01,0x2b, 0x22,0xfb, 0x01,0x8b, 0x81,0x19,0xfd, 0x01,0x2b, 0x01,0x00, -0x04,0xfd, 0x01,0x88, 0x08,0xfb, 0x01,0x22, 0x01,0x00, 0x17,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x7e, -0x10,0xfd, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, 0x16,0xfb, 0x0c,0xfd, 0x07,0xfb, 0x01,0x22, 0x01,0x00, -0x0e,0xfb, 0x01,0x2b, 0x01,0x00, 0x07,0xfb, 0x0c,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x7e, -0x24,0xfb, 0x01,0x88, 0x80,0xc8,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0x19, 0x01,0x00, -0x04,0xfd, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x41, -0x05,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x05,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x03, 0x06,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x12, -0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, -0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x04,0x00, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x19, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x01,0x80, 0x06,0xfb, 0x01,0x36, -0x05,0x00, 0x02,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x03, 0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, -0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x22, -0x01,0xfb, 0x04,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x03, -0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, -0x02,0xfd, 0x01,0x2e, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x04,0x00, 0x01,0x8b, 0x01,0x22, 0x01,0x00, -0x03,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x03, 0x02,0x00, -0x01,0x03, 0x12,0xfb, 0x01,0x80, 0x80,0xc8,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, -0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x01,0x00, 0x01,0x19, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0xfd, -0x01,0x19, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, -0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x7a, 0x01,0x36, 0x01,0x00, -0x01,0x47, 0x05,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x47, 0x01,0x59, 0x02,0x00, 0x01,0xfb, 0x01,0x59, -0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, -0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, -0x01,0x55, 0x01,0x00, 0x01,0x36, 0x05,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, -0x02,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x05,0xfb, 0x01,0x2b, -0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x52, 0x01,0x00, -0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0xfd, -0x01,0x6c, 0x02,0xfd, 0x02,0x55, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, 0x01,0xfb, -0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x59, 0x01,0x2b, 0x01,0x00, -0x12,0xfb, 0x01,0x7a, 0x80,0xcb,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, -0x04,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, 0x04,0xfd, -0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x01,0x28, 0x01,0x22, 0x01,0x00, -0x01,0x2b, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x12, 0x01,0x00, 0x01,0xfb, 0x01,0x03, -0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, -0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x03, -0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x12, 0x02,0x22, -0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x02,0x00, 0x02,0x22, 0x01,0xf7, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, -0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, -0x01,0x2b, 0x01,0x00, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0x5d, 0x07,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, -0x01,0x03, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, -0x01,0x00, 0x12,0xfb, 0x01,0x73, 0x80,0xc8,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x02,0xfd, 0x06,0x00, 0x03,0xfd, 0x01,0x2e, 0x01,0x00, -0x01,0x2e, 0x07,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x03, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x22, -0x01,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x39, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, -0x01,0x22, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x22, -0x05,0x00, 0x01,0x03, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x06,0x00, 0x02,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, -0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, -0x04,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, -0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x03,0xfd, 0x01,0x39, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x13,0xfb, 0x80,0xc7,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x03,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x01,0x07, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x19, 0x0a,0xfd, 0x01,0x83, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x4d, -0x09,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0xfb, 0x01,0x03, 0x01,0x00, -0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x01,0x00, -0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x40, 0x02,0xfd, 0x02,0x2b, 0x03,0xfd, 0x02,0x2b, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x87, 0x01,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, -0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x09,0xfb, 0x01,0x2b, -0x01,0x00, 0x03,0xfb, 0x02,0x00, 0x02,0xfb, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x01,0xfb, -0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x04,0xfb, -0x01,0x22, 0x01,0x00, 0x01,0x06, 0x01,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x40, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x03, -0x03,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, -0x13,0xfb, 0x01,0x8b, 0x80,0xc6,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x83, 0x01,0x2b, -0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x6c, 0x02,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x19, -0x02,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x01,0xfd, -0x01,0x55, 0x01,0x00, 0x01,0x2b, 0x09,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, -0x01,0x36, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x01,0x47, 0x01,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0x47, 0x02,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x07, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x47, 0x01,0x00, -0x01,0x12, 0x01,0xfb, 0x01,0x03, 0x02,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, -0x01,0x36, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0xfd, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, -0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0x59, 0x01,0x2b, -0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x47, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x16, 0x01,0xfd, 0x01,0x00, 0x01,0x07, -0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, -0x01,0x2b, 0x0f,0xfb, 0x01,0x8b, 0x80,0xc7,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2e, 0x02,0x00, -0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x2b, 0x02,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x2b, -0x05,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x19, 0x02,0x00, -0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x12, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, -0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x2b, 0x02,0x00, -0x01,0x36, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x09, 0x02,0x00, 0x02,0xfd, 0x01,0x19, 0x03,0x00, -0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x06,0xfb, -0x01,0x03, 0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x01,0x2b, 0x03,0x00, 0x01,0x12, -0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x22, 0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, -0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, -0x01,0x12, 0x02,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x47, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x01,0x55, -0x01,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x03,0x00, -0x01,0x36, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, -0x01,0x00, 0x0f,0xfb, 0x01,0x87, 0x80,0xce,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, -0x01,0x00, 0x10,0xfd, 0x01,0x00, 0x01,0x41, 0x33,0xfd, 0x01,0x82, 0x22,0xfb, 0x01,0x82, 0x0e,0xfd, -0x01,0x8b, 0x25,0xfb, 0x0c,0xfd, 0x23,0xfb, 0x01,0x8b, 0x0e,0xfd, 0x01,0x82, 0x23,0xfb, 0x01,0x82, -0x80,0xce,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x0f,0xfd, 0x01,0x19, -0x01,0x00, 0x34,0xfd, 0x01,0x7d, 0x22,0xfb, 0x01,0x8b, 0x0e,0xfd, 0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, -0x0e,0xfd, 0x01,0x8b, 0x23,0xfb, 0x01,0x7d, 0x80,0xce,0xfd, 0x01,0x41, 0x01,0x19, 0x06,0xfd, -0x01,0x41, 0x01,0x19, 0x0f,0xfd, 0x01,0x2e, 0x01,0x41, 0x34,0xfd, 0x01,0x7a, 0x22,0xfb, 0x01,0x8b, -0x0e,0xfd, 0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, 0x0e,0xfd, 0x01,0x8b, 0x23,0xfb, 0x01,0x7a, 0x81,0x1d,0xfd, -0x01,0x77, 0x22,0xfb, 0x0e,0xfd, 0x01,0x89, 0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, 0x01,0x89, 0x0e,0xfd, -0x23,0xfb, 0x01,0x77, 0x81,0x1d,0xfd, 0x01,0x74, 0x21,0xfb, 0x01,0x71, 0x0e,0xfd, 0x01,0x7d, -0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, 0x01,0x7d, 0x0e,0xfd, 0x01,0x71, 0x22,0xfb, 0x01,0x74, 0x81,0x1d,0xfd, -0x01,0x71, 0x21,0xfb, 0x01,0x7d, 0x0e,0xfd, 0x01,0x73, 0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, 0x01,0x73, -0x0e,0xfd, 0x01,0x7d, 0x22,0xfb, 0x01,0x71, 0x81,0x1d,0xfd, 0x01,0x6f, 0x21,0xfb, 0x01,0x84, -0x0e,0xfd, 0x27,0xfb, 0x0c,0xfd, 0x25,0xfb, 0x0e,0xfd, 0x01,0x84, 0x22,0xfb, 0x01,0x6f, 0x81,0x1d,0xfd, -0x01,0x6e, 0x21,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x27,0xfb, 0x0c,0xfd, 0x25,0xfb, 0x01,0x8b, -0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x6e, 0x81,0x1d,0xfd, 0x01,0x6d, 0x21,0xfb, 0x01,0x8b, -0x0d,0xfd, 0x01,0x8b, 0x27,0xfb, 0x01,0x82, 0x0a,0xfd, 0x01,0x82, 0x25,0xfb, 0x01,0x8b, 0x0d,0xfd, -0x01,0x8b, 0x22,0xfb, 0x01,0x6d, 0x81,0x1d,0xfd, 0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, -0x28,0xfb, 0x0a,0xfd, 0x26,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x23,0xfb, 0x81,0x1d,0xfd, -0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x28,0xfb, 0x01,0x75, 0x08,0xfd, 0x01,0x75, 0x26,0xfb, -0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x23,0xfb, 0x81,0x1d,0xfd, 0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, -0x01,0x8b, 0x2a,0xfb, 0x01,0x82, 0x04,0xfd, 0x01,0x82, 0x28,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, -0x23,0xfb, 0x81,0x1d,0xfd, 0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x89, 0x58,0xfb, 0x01,0x89, -0x0d,0xfd, 0x01,0x8b, 0x23,0xfb, 0x81,0x1d,0xfd, 0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x89, -0x58,0xfb, 0x01,0x89, 0x0d,0xfd, 0x01,0x8b, 0x23,0xfb, 0x81,0x1d,0xfd, 0x01,0x6d, 0x21,0xfb, -0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x58,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x6d, -0x81,0x1d,0xfd, 0x01,0x6e, 0x21,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x58,0xfb, 0x01,0x8b, -0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x6e, 0x81,0x1d,0xfd, 0x01,0x6f, 0x21,0xfb, 0x01,0x8b, -0x0d,0xfd, 0x01,0x8b, 0x58,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x6f, 0x81,0x1d,0xfd, -0x01,0x71, 0x21,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x58,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, -0x22,0xfb, 0x01,0x71, 0x81,0x1d,0xfd, 0x01,0x74, 0x21,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, -0x58,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x74, 0x81,0x16,0xfd, 0x01,0x41, -0x01,0x83, 0x05,0xfd, 0x01,0x77, 0x21,0xfb, 0x01,0x84, 0x0e,0xfd, 0x58,0xfb, 0x0e,0xfd, 0x01,0x84, -0x22,0xfb, 0x01,0x77, 0x02,0xfd, 0x01,0x55, 0x01,0x6c, 0x35,0xfd, 0x01,0x41, 0x01,0x83, 0x80,0x8c,0xfd, -0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, 0x02,0xfd, 0x01,0x2b, 0x01,0x19, 0x1a,0xfd, -0x02,0x6c, 0x01,0xfd, 0x01,0x41, 0x24,0xfd, 0x01,0x2b, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x05,0xfd, 0x01,0x7a, 0x1a,0xfb, 0x01,0x22, 0x03,0x00, 0x01,0x2b, 0x01,0x47, 0x01,0xfb, 0x01,0x7d, -0x0e,0xfd, 0x01,0x73, 0x56,0xfb, 0x01,0x73, 0x0e,0xfd, 0x01,0x7d, 0x14,0xfb, 0x01,0x03, 0x01,0x2b, -0x04,0xfb, 0x01,0x2b, 0x01,0x03, 0x06,0xfb, 0x01,0x7a, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x1a,0xfd, -0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, 0x02,0xfd, 0x01,0x2b, 0x01,0x19, 0x12,0xfd, -0x01,0x00, 0x01,0x41, 0x80,0x8c,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x19, 0x01,0x2b, -0x02,0xfd, 0x01,0x19, 0x01,0x41, 0x1a,0xfd, 0x02,0x19, 0x01,0xfd, 0x01,0x00, 0x24,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x7d, 0x19,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0x2b, 0x01,0x22, 0x01,0x12, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x71, 0x0e,0xfd, 0x01,0x7d, -0x15,0xfb, 0x01,0x36, 0x01,0x47, 0x35,0xfb, 0x01,0x59, 0x01,0x22, 0x08,0xfb, 0x01,0x7d, 0x07,0xfd, -0x01,0x55, 0x01,0x6c, 0x05,0xfd, 0x01,0x71, 0x14,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, -0x01,0x2b, 0x06,0xfb, 0x01,0x7d, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x83, -0x16,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x19, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x41, -0x12,0xfd, 0x01,0x00, 0x01,0x41, 0x80,0x8b,0xfd, 0x01,0x41, 0x01,0x00, 0x4a,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x82, 0x18,0xfb, 0x01,0x36, 0x01,0x00, -0x01,0x36, 0x07,0xfb, 0x0e,0xfd, 0x01,0x89, 0x15,0xfb, 0x01,0x00, 0x01,0x2b, 0x35,0xfb, 0x01,0x22, -0x01,0x00, 0x08,0xfb, 0x01,0x89, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x15,0xfb, 0x01,0x2b, -0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x82, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x15,0xfd, 0x01,0x41, 0x01,0x00, 0x1a,0xfd, 0x01,0x00, 0x01,0x41, -0x80,0x8b,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x55, 0x03,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x19, -0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, -0x01,0x55, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2e, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x19, -0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x87, 0x01,0x00, -0x01,0x2b, 0x02,0x00, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0x03, 0x02,0x00, 0x01,0x03, 0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x08,0xfb, 0x01,0x13, 0x03,0x00, -0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x03,0x00, -0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x22, -0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x36, -0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x05,0xfb, 0x01,0x22, 0x01,0x00, -0x01,0x36, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, -0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x12, 0x03,0x00, 0x01,0x36, 0x01,0xfb, -0x04,0x00, 0x01,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0xfc, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x19, -0x01,0x00, 0x02,0x2b, 0x03,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x36, 0x02,0xfb, -0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x36, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, -0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x0e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, -0x04,0x00, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, -0x01,0x55, 0x05,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x55, 0x03,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x76,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x41, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, -0x01,0x00, 0x06,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, -0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x02,0x83, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, -0x01,0x83, 0x01,0x00, 0x01,0x07, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, -0x01,0x83, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x8b, 0x02,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x59, 0x01,0x2b, -0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, 0x02,0x00, -0x01,0x3e, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, -0x01,0x2b, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, -0x02,0x00, 0x01,0x22, 0x06,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x47, 0x01,0x36, 0x01,0x00, 0x01,0x36, -0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, -0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x02,0x59, 0x01,0xfb, -0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x47, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0xf7, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x83, 0x01,0x00, 0x01,0x11, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x2b, -0x01,0xfb, 0x02,0x00, 0x0a,0xfb, 0x01,0x2b, 0x06,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x47, -0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x02,0x00, -0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x06,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x06,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x6c, -0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x02,0x00, 0x01,0x83, 0x01,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x77,0xfd, 0x01,0x6c, 0x02,0x00, 0x01,0x19, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, -0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x2e, 0x04,0xfd, 0x02,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x8b, -0x01,0x00, 0x01,0x12, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x01,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x00, -0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, -0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x8b, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, -0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, -0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0x22, 0x01,0x00, -0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x02,0xfb, -0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x02,0x00, 0x02,0x22, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, -0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x02,0x00, 0x01,0x36, 0x03,0xfb, 0x01,0x22, -0x01,0x00, 0x07,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x03,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x33, 0x02,0x22, 0x01,0x00, 0x01,0x36, -0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, -0x04,0x22, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, -0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x6c, 0x02,0x00, -0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, -0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x00, -0x01,0x2b, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x79,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x6c, 0x01,0x2b, -0x02,0x00, 0x01,0x83, 0x01,0xfd, 0x06,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x05,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x06,0xfb, 0x02,0x00, 0x07,0xfb, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x22, -0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x2b, 0x05,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x06,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, -0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x47, 0x03,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x01,0x59, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, -0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, -0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x09,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x6c, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x7a,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, -0x01,0x6c, 0x02,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, -0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x06,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x47, -0x06,0xfb, 0x02,0x00, 0x03,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, -0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x47, -0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, -0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, -0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x02,0x00, 0x06,0xfb, 0x01,0x00, -0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x47, 0x02,0x00, 0x01,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x70, 0x05,0xfb, 0x01,0x00, -0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, -0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x28, 0x01,0x00, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x06,0xfd, -0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x19, 0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x75,0xfd, 0x01,0x55, 0x01,0x19, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, -0x01,0x6c, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x41, -0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x02,0x83, 0x01,0x00, 0x01,0x2b, -0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, -0x01,0x00, 0x07,0xfb, 0x02,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x12, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, -0x01,0x00, 0x01,0x3e, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, 0x01,0x47, -0x01,0x59, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x02,0x00, 0x01,0x2b, 0x02,0xfb, -0x02,0x00, 0x02,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, 0x01,0x47, 0x01,0xfb, -0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, -0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x59, -0x02,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x12, 0x02,0x59, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, -0x01,0x36, 0x02,0xfb, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x04,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, -0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x03, -0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x36, 0x01,0x00, -0x01,0x36, 0x01,0x59, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, -0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x55, -0x01,0x19, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, 0x05,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x02,0x00, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, 0x05,0xfd, -0x02,0x00, 0x01,0xfd, 0x01,0x07, 0x02,0x00, 0x75,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x55, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x04,0x00, -0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x04,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x07,0xfb, 0x01,0x59, 0x01,0x2b, 0x04,0x00, -0x01,0x12, 0x02,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0x00, -0x01,0x59, 0x02,0xfb, 0x01,0x03, 0x02,0x00, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x36, -0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, -0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x22, 0x04,0x00, -0x02,0xfb, 0x01,0x03, 0x02,0x00, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x04,0x00, 0x01,0x59, -0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x02,0x00, 0x01,0x2b, 0x01,0x19, 0x01,0x00, -0x01,0x07, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x5d, -0x04,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, -0x02,0x00, 0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, -0x02,0x00, 0x01,0x1d, 0x02,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, -0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x05,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x01,0x19, 0x01,0x00, 0x01,0x07, 0x06,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x80,0xcd,0xfd, 0x01,0x88, -0x23,0xfb, 0x0f,0xfd, 0x01,0x87, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x4c,0xfb, 0x01,0x87, 0x0f,0xfd, -0x24,0xfb, 0x01,0x88, 0x81,0x1f,0xfd, 0x01,0x8b, 0x23,0xfb, 0x0f,0xfd, 0x01,0x8b, 0x02,0xfb, -0x01,0x00, 0x01,0x22, 0x4c,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x24,0xfb, 0x01,0x8b, 0x81,0x20,0xfd, -0x01,0x70, 0x22,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x01,0x80, 0x01,0xfb, 0x01,0x2b, 0x01,0x47, 0x4b,0xfb, -0x01,0x80, 0x0f,0xfd, 0x01,0x8b, 0x23,0xfb, 0x01,0x70, 0x81,0x21,0xfd, 0x01,0x79, 0x22,0xfb, -0x01,0x7d, 0x10,0xfd, 0x4e,0xfb, 0x10,0xfd, 0x01,0x7d, 0x23,0xfb, 0x01,0x79, 0x81,0x21,0xfd, -0x01,0x82, 0x23,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x01,0x89, 0x4c,0xfb, 0x01,0x89, 0x0f,0xfd, 0x01,0x8b, -0x24,0xfb, 0x01,0x82, 0x80,0xcd,0xfd, 0x01,0x83, 0x01,0x41, 0x31,0xfd, 0x01,0x83, 0x01,0x41, -0x0e,0xfd, 0x01,0x41, 0x01,0x83, 0x0f,0xfd, 0x01,0x8b, 0x23,0xfb, 0x01,0x7d, 0x10,0xfd, 0x1e,0xfb, -0x01,0x22, 0x01,0x59, 0x1a,0xfb, 0x01,0x36, 0x01,0x47, 0x10,0xfb, 0x10,0xfd, 0x01,0x7d, 0x08,0xfb, -0x01,0x36, 0x01,0x47, 0x16,0xfb, 0x01,0x22, 0x01,0x59, 0x02,0xfb, 0x01,0x8b, 0x18,0xfd, 0x01,0x41, -0x01,0x83, 0x16,0xfd, 0x01,0x83, 0x01,0x41, 0x16,0xfd, 0x01,0x6c, 0x01,0x55, 0x80,0x83,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x01,0x2b, 0x0d,0xfd, 0x01,0x19, 0x04,0x00, 0x01,0x2b, -0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x17,0xfd, 0x01,0x41, 0x01,0x00, 0x0e,0xfd, 0x01,0x00, 0x01,0x41, -0x10,0xfd, 0x01,0x6f, 0x1f,0xfb, 0x01,0x36, 0x01,0x00, 0x02,0xfb, 0x08,0xfd, 0x01,0x19, 0x01,0x2b, -0x06,0xfd, 0x01,0x8b, 0x0f,0xfb, 0x01,0x03, 0x02,0x00, 0x01,0x2b, 0x01,0x59, 0x09,0xfb, 0x01,0x00, -0x01,0x22, 0x1a,0xfb, 0x01,0x00, 0x01,0x2b, 0x0f,0xfb, 0x01,0x8b, 0x10,0xfd, 0x09,0xfb, 0x01,0x00, -0x01,0x2b, 0x08,0xfb, 0x01,0x36, 0x02,0x59, 0x01,0x36, 0x0a,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, -0x01,0x6f, 0x19,0xfd, 0x01,0x00, 0x01,0x41, 0x16,0xfd, 0x01,0x41, 0x01,0x00, 0x08,0xfd, 0x01,0x83, -0x01,0x55, 0x01,0xfd, 0x01,0x55, 0x01,0x83, 0x09,0xfd, 0x01,0x2b, 0x01,0x00, 0x80,0x83,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x19, 0x0d,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x41, -0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x19, 0x17,0xfd, 0x01,0x41, 0x01,0x00, 0x0e,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x03,0xfd, 0x01,0x6c, 0x01,0x55, 0x08,0xfd, 0x01,0x7a, -0x08,0xfb, 0x01,0x36, 0x01,0x47, 0x15,0xfb, 0x01,0x47, 0x01,0x2b, 0x02,0xfb, 0x01,0x8b, 0x07,0xfd, -0x01,0x41, 0x01,0x19, 0x07,0xfd, 0x01,0x7e, 0x0d,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x12, 0x01,0x22, -0x01,0x2b, 0x01,0x36, 0x09,0xfb, 0x01,0x00, 0x01,0x22, 0x1a,0xfb, 0x01,0x00, 0x01,0x2b, 0x0e,0xfb, -0x01,0x7e, 0x10,0xfd, 0x01,0x8b, 0x09,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x2b, 0x02,0x36, -0x01,0x2b, 0x0a,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x7a, 0x03,0xfd, 0x01,0x6c, 0x01,0x55, -0x14,0xfd, 0x01,0x00, 0x01,0x41, 0x16,0xfd, 0x01,0x41, 0x01,0x00, 0x08,0xfd, 0x01,0x55, 0x01,0x2b, -0x01,0xfd, 0x01,0x2b, 0x01,0x55, 0x09,0xfd, 0x01,0x2b, 0x01,0x00, 0x80,0x83,0xfd, 0x01,0x41, -0x01,0x00, 0x11,0xfd, 0x01,0x2b, 0x01,0x00, 0x1e,0xfd, 0x01,0x41, 0x01,0x00, 0x0e,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x08,0xfd, 0x01,0x84, -0x08,0xfb, 0x01,0x00, 0x01,0x2b, 0x19,0xfb, 0x01,0x73, 0x10,0xfd, 0x01,0x8b, 0x0d,0xfb, 0x01,0x00, -0x01,0x2b, 0x0d,0xfb, 0x01,0x00, 0x01,0x22, 0x1a,0xfb, 0x01,0x00, 0x01,0x2b, 0x0e,0xfb, 0x01,0x8b, -0x10,0xfd, 0x01,0x73, 0x09,0xfb, 0x01,0x00, 0x01,0x2b, 0x16,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, -0x01,0x84, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x14,0xfd, 0x01,0x00, 0x01,0x41, 0x16,0xfd, 0x01,0x41, -0x01,0x00, 0x16,0xfd, 0x01,0x2b, 0x01,0x00, 0x7f,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x06,0xfd, 0x01,0x2b, -0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, -0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x04,0x00, 0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x2e, 0x03,0x00, 0x01,0x2b, 0x01,0x8b, 0x01,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x03, 0x01,0xfb, -0x01,0x2b, 0x03,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x36, 0x07,0xfb, 0x01,0x2b, -0x02,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x17, -0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x05,0x00, 0x01,0x8b, -0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x06,0xfb, 0x02,0x00, 0x01,0x36, 0x05,0xfb, 0x01,0x03, -0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, -0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, -0x01,0x36, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x03, 0x04,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x36, -0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x54, 0x06,0xfd, 0x01,0x83, -0x05,0x00, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x03, 0x04,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, -0x01,0xfb, 0x01,0xfa, 0x01,0x00, 0x01,0x55, 0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, -0x03,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x02,0xfd, -0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0x00, 0x05,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x2b, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x6c,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, -0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x2b, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x6c, -0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x6c, -0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x02,0x55, 0x01,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x54, 0x01,0x75, 0x02,0x00, 0x02,0xfb, 0x01,0x47, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x06,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, -0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x01,0xfd, 0x01,0x87, 0x01,0x2b, 0x01,0x00, -0x02,0x59, 0x01,0x00, 0x01,0x03, 0x05,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x2b, 0x01,0x59, 0x02,0xfb, -0x01,0x22, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x47, -0x01,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, -0x01,0x22, 0x02,0x00, 0x01,0x47, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x22, 0x01,0x00, -0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x03, -0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x2b, 0x01,0x87, 0x02,0x00, 0x06,0xfd, -0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x59, -0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, -0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, -0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, 0x02,0xfb, -0x01,0x00, 0x01,0x22, 0x01,0x4d, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x06,0xfd, 0x01,0x00, -0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x07, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, -0x01,0x83, 0x01,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x02,0x2b, 0x03,0xfd, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x6b,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, -0x02,0x41, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x2b, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x07, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0x80, 0x02,0x00, 0x01,0x12, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, -0x03,0x22, 0x01,0x00, 0x01,0x36, 0x05,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x01,0xfb, -0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x8b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x07,0xfb, 0x01,0x2b, 0x02,0x00, -0x01,0x59, 0x01,0xfb, 0x02,0x00, 0x02,0x22, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, -0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, -0x01,0x22, 0x01,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x02,0x00, 0x03,0xfb, -0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0x22, 0x01,0x00, 0x01,0x36, 0x01,0xfb, -0x01,0x00, 0x01,0x2b, 0x01,0x87, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x55, 0x01,0x00, -0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, -0x01,0xfb, 0x02,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, -0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, -0x01,0x47, 0x05,0xfb, 0x01,0x00, 0x01,0x12, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, -0x07,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x00, 0x01,0x19, -0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x06,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0x00, -0x01,0x83, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x6b,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, -0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, -0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x41, 0x01,0x8b, -0x01,0x59, 0x01,0x2b, 0x02,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, -0x05,0x00, 0x01,0x36, 0x05,0xfb, 0x05,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, -0x01,0x2b, 0x01,0x00, 0x01,0x71, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, -0x08,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0xfb, 0x06,0x00, 0x02,0xfb, 0x03,0x00, 0x01,0x59, 0x03,0xfb, -0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, -0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x2b, 0x05,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, -0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, -0x06,0xfb, 0x03,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x41, -0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x01,0xfd, 0x06,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x07, -0x03,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x6b,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x09,0xfd, 0x01,0x2b, 0x01,0x00, -0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x04,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x05,0xfd, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x75, -0x01,0xfb, 0x01,0x59, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, -0x0a,0xfb, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0xfb, 0x01,0x8b, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0xf7, 0x01,0x84, -0x0c,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x02,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0x00, -0x01,0x59, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x12, 0x01,0x00, 0x02,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, -0x01,0x2b, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x05,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, -0x01,0x8b, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, -0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x05,0xfb, 0x01,0x00, 0x01,0x28, 0x02,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x19, -0x01,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, -0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x6c,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x02,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, -0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x02,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x07, -0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, -0x01,0xfd, 0x01,0x32, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x02,0xfb, -0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, 0x01,0x47, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, -0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0xfb, 0x01,0x3e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, -0x01,0x41, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0x82, 0x01,0x59, -0x01,0x22, 0x05,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, -0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x59, -0x02,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0x22, 0x02,0x00, 0x02,0xfb, 0x01,0x22, -0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x47, 0x01,0x59, -0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, -0x01,0x47, 0x01,0x82, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x02,0x00, -0x01,0x41, 0x01,0x2b, 0x02,0x00, 0x01,0xfd, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, -0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x47, 0x01,0x59, 0x01,0x2b, 0x01,0x00, -0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x02,0x00, 0x01,0xfb, 0x01,0x2b, 0x02,0x00, -0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x32, 0x01,0x83, -0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, -0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0x6c, -0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, -0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x02,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x6c, 0x02,0x00, -0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, -0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x6c,0xfd, -0x01,0x55, 0x03,0x00, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, -0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x05,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, -0x01,0x55, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x01,0x19, 0x01,0x00, 0x01,0x07, -0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0xfd, 0x01,0x83, 0x02,0x00, -0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x01,0x55, 0x01,0x2b, 0x03,0x00, -0x01,0x36, 0x02,0xfb, 0x01,0x36, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0x47, -0x06,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, -0x01,0x00, 0x02,0xfb, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, -0x01,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x05,0xfb, -0x01,0x12, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x22, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, -0x01,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x12, 0x01,0x00, 0x02,0xfb, -0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, -0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0x56, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, -0x01,0x7c, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, -0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x36, 0x02,0x00, 0x02,0x2b, -0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x39, 0x01,0xfd, 0x01,0x83, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, 0x03,0x00, -0x01,0x55, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x41, -0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x07,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x07, 0x01,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x00, 0x01,0x41, -0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x07, -0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x80,0xc8,0xfd, 0x01,0x79, 0x25,0xfb, 0x01,0x82, -0x0a,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x87, 0x38,0xfb, 0x01,0x87, 0x12,0xfd, 0x01,0x00, -0x01,0x2b, 0x26,0xfb, 0x01,0x79, 0x81,0x29,0xfd, 0x01,0x86, 0x26,0xfb, 0x01,0x8b, 0x05,0xfd, -0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x6c, 0x08,0xfd, 0x01,0x87, 0x36,0xfb, 0x01,0x87, 0x0f,0xfd, -0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x47, 0x26,0xfb, 0x01,0x86, 0x81,0x2a,0xfd, 0x01,0x72, -0x25,0xfb, 0x01,0x7c, 0x05,0xfd, 0x01,0x6c, 0x01,0x07, 0x01,0x2b, 0x01,0x07, 0x01,0x83, 0x0a,0xfd, -0x01,0x8b, 0x34,0xfb, 0x01,0x8b, 0x10,0xfd, 0x01,0x6c, 0x01,0x07, 0x01,0x2b, 0x01,0x07, 0x01,0x66, -0x26,0xfb, 0x01,0x72, 0x81,0x2b,0xfd, 0x01,0x80, 0x26,0xfb, 0x01,0x87, 0x14,0xfd, 0x01,0x8b, -0x01,0x7e, 0x30,0xfb, 0x01,0x7e, 0x01,0x8b, 0x14,0xfd, 0x01,0x87, 0x27,0xfb, 0x01,0x80, 0x81,0x2b,0xfd, -0x01,0x8b, 0x01,0x6d, 0x26,0xfb, 0x01,0x8b, 0x15,0xfd, 0x01,0x8b, 0x2e,0xfb, 0x01,0x8b, 0x15,0xfd, -0x01,0x8b, 0x27,0xfb, 0x01,0x6d, 0x01,0x8b, 0x80,0xe8,0xfd, 0x01,0x83, 0x01,0x41, 0x42,0xfd, -0x01,0xfc, 0x27,0xfb, 0x01,0x8b, 0x16,0xfd, 0x01,0x89, 0x2a,0xfb, 0x01,0x89, 0x16,0xfd, 0x01,0x8b, -0x28,0xfb, 0x01,0xfc, 0x80,0xcf,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, -0x02,0xfd, 0x01,0x2b, 0x01,0x19, 0x11,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x01,0x2b, -0x0d,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x03,0xfd, 0x01,0x55, 0x01,0x2b, 0x29,0xfd, 0x01,0x8b, -0x0b,0xfb, 0x07,0x00, 0x01,0x2b, 0x15,0xfb, 0x01,0x8b, 0x17,0xfd, 0x01,0x80, 0x26,0xfb, 0x01,0x80, -0x17,0xfd, 0x01,0x8b, 0x29,0xfb, 0x01,0x8b, 0x80,0xcf,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x41, -0x01,0x19, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x41, 0x11,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, -0x01,0x41, 0x01,0x19, 0x0d,0xfd, 0x01,0x41, 0x02,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x18,0xfd, -0x01,0x55, 0x01,0x6c, 0x0f,0xfd, 0x01,0x6b, 0x01,0x3f, 0x0a,0xfb, 0x03,0x22, 0x02,0x00, 0x02,0x22, -0x01,0x36, 0x0f,0xfb, 0x01,0x22, 0x01,0x59, 0x05,0xfb, 0x01,0x8b, 0x17,0xfd, 0x01,0x8b, 0x01,0x87, -0x22,0xfb, 0x01,0x87, 0x01,0x8b, 0x17,0xfd, 0x01,0x8b, 0x29,0xfb, 0x01,0x7b, 0x80,0xcf,0xfd, -0x01,0x41, 0x01,0x00, 0x19,0xfd, 0x01,0x41, 0x01,0x00, 0x11,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x2e, -0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x18,0xfd, 0x01,0x00, 0x01,0x2b, 0x0f,0xfd, 0x01,0x2b, 0x01,0x00, -0x0d,0xfb, 0x01,0x00, 0x01,0x2b, 0x12,0xfb, 0x01,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x6e, 0x01,0x8b, -0x19,0xfd, 0x01,0x89, 0x1e,0xfb, 0x01,0x89, 0x19,0xfd, 0x01,0x8b, 0x01,0x6e, 0x29,0xfb, 0x01,0x8b, -0x80,0xcf,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x01,0x55, 0x03,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x06,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, -0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x55, -0x03,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x41, 0x01,0xfd, 0x04,0x00, 0x01,0x55, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x0b,0xfb, 0x01,0x00, -0x01,0x2b, 0x04,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x03, 0x02,0xfb, 0x01,0x12, 0x03,0x00, 0x01,0x36, -0x01,0xfb, 0x04,0x00, 0x02,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x7a, 0x1b,0xfd, 0x01,0x8b, 0x18,0xfb, -0x01,0x8b, 0x1b,0xfd, 0x01,0x8b, 0x2a,0xfb, 0x01,0x7c, 0x80,0xd1,0xfd, 0x01,0x19, 0x02,0x00, -0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, -0x01,0x07, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, -0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2e, 0x01,0x83, 0x01,0xfd, 0x02,0x00, 0x02,0xfd, -0x02,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x8b, 0x0c,0xfb, 0x01,0x00, 0x01,0x2b, -0x04,0xfb, 0x01,0x22, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x03, -0x01,0xfb, 0x02,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x36, -0x01,0xfb, 0x02,0x00, 0x01,0x8b, 0x1c,0xfd, 0x01,0x8b, 0x01,0x89, 0x01,0x7c, 0x10,0xfb, 0x01,0x7c, -0x01,0x89, 0x01,0x8b, 0x1c,0xfd, 0x01,0x8b, 0x2b,0xfb, 0x01,0x8b, 0x80,0xd2,0xfd, 0x01,0x6c, -0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, -0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, -0x01,0x07, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0x41, -0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, -0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, -0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x7d, 0x01,0xfb, 0x01,0x36, 0x04,0x22, 0x01,0x36, -0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x36, -0x04,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x02,0x00, 0x02,0x22, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, -0x01,0x8b, 0x20,0xfd, 0x04,0x8b, 0x04,0x89, 0x04,0x8b, 0x20,0xfd, 0x01,0x8b, 0x2b,0xfb, 0x01,0x7d, -0x80,0xd5,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, -0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, -0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x07, 0x04,0x00, -0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x8b, 0x01,0x70, -0x01,0x2b, 0x04,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x22, 0x04,0x00, -0x02,0xfb, 0x01,0x47, 0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x06,0x00, -0x02,0xfb, 0x01,0x8b, 0x4a,0xfd, 0x01,0x8b, 0x2b,0xfb, 0x01,0x70, 0x01,0x8b, 0x80,0xd6,0xfd, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x09,0xfd, -0x01,0x55, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x09,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x81, 0x0a,0xfb, 0x01,0x00, 0x01,0x2b, -0x03,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x47, -0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x02,0x00, 0x07,0xfb, 0x01,0x87, 0x48,0xfd, -0x01,0x87, 0x2c,0xfb, 0x01,0x81, 0x80,0xd2,0xfd, 0x01,0x55, 0x01,0x19, 0x01,0x6c, 0x01,0xfd, -0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, -0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, -0x01,0x6c, 0x01,0x55, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2e, 0x02,0x00, 0x02,0xfd, -0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, -0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x00, -0x01,0x2b, 0x03,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x8b, 0x01,0x75, 0x09,0xfb, -0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0x59, 0x01,0x2b, 0x01,0x00, -0x01,0x59, 0x01,0xfb, 0x01,0x12, 0x02,0x59, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, -0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0x7a, -0x01,0x8b, 0x44,0xfd, 0x01,0x8b, 0x01,0x7a, 0x2c,0xfb, 0x01,0x75, 0x01,0x8b, 0x80,0xd2,0xfd, -0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, -0x04,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, -0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x05,0xfd, 0x01,0x41, 0x01,0x00, -0x04,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x83, -0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x55, -0x02,0x00, 0x01,0x41, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, -0x03,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x88, 0x09,0xfb, 0x01,0x00, 0x01,0x2b, -0x04,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x12, 0x02,0x00, 0x01,0xfb, 0x04,0x00, 0x01,0x59, 0x02,0xfb, -0x01,0x03, 0x02,0x00, 0x02,0xfb, 0x01,0x22, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, -0x01,0x82, 0x42,0xfd, 0x01,0x82, 0x2e,0xfb, 0x01,0x88, 0x81,0x38,0xfd, 0x01,0xfc, 0x2d,0xfb, -0x01,0x7c, 0x40,0xfd, 0x01,0x7c, 0x2e,0xfb, 0x01,0xfc, 0x81,0x39,0xfd, 0x01,0x8b, 0x01,0x72, -0x2e,0xfb, 0x01,0x85, 0x3c,0xfd, 0x01,0x85, 0x2f,0xfb, 0x01,0x72, 0x01,0x8b, 0x81,0x3a,0xfd, -0x01,0x85, 0x2f,0xfb, 0x01,0x6e, 0x01,0x8b, 0x38,0xfd, 0x01,0x8b, 0x01,0x6e, 0x30,0xfb, 0x01,0x85, -0x81,0x3c,0xfd, 0x01,0xfc, 0x31,0xfb, 0x01,0x8b, 0x34,0xfd, 0x01,0x8b, 0x32,0xfb, 0x01,0xfc, -0x81,0x3d,0xfd, 0x01,0x8b, 0x01,0x74, 0x31,0xfb, 0x01,0x73, 0x01,0x8b, 0x30,0xfd, 0x01,0x8b, -0x01,0x73, 0x32,0xfb, 0x01,0x74, 0x01,0x8b, 0x81,0x3e,0xfd, 0x01,0x88, 0x34,0xfb, 0x01,0x7c, -0x01,0x8b, 0x2a,0xfd, 0x01,0x8b, 0x01,0x7c, 0x35,0xfb, 0x01,0x88, 0x81,0x40,0xfd, 0x01,0x81, -0x35,0xfb, 0x01,0x7d, 0x01,0x8b, 0x26,0xfd, 0x01,0x8b, 0x01,0x7d, 0x36,0xfb, 0x01,0x81, 0x81,0x42,0xfd, -0x01,0x7b, 0x38,0xfb, 0x01,0x87, 0x20,0xfd, 0x01,0x87, 0x39,0xfb, 0x01,0x7b, 0x81,0x43,0xfd, -0x01,0x8b, 0x01,0x75, 0x3a,0xfb, 0x01,0x77, 0x01,0x80, 0x02,0x8b, 0x14,0xfd, 0x02,0x8b, 0x01,0x80, -0x01,0x77, 0x3b,0xfb, 0x01,0x75, 0x01,0x8b, 0x81,0x44,0xfd, 0x01,0x8b, 0x01,0x70, 0x3e,0xfb, -0x01,0x71, 0x01,0x7d, 0x01,0x84, 0x0c,0x8b, 0x01,0x84, 0x01,0x7d, 0x01,0x71, 0x3f,0xfb, 0x01,0x70, -0x01,0x8b, 0x81,0x46,0xfd, 0x01,0x86, 0x80,0x8f,0xfb, 0x01,0x86, 0x81,0x48,0xfd, -0x01,0x82, 0x80,0x8d,0xfb, 0x01,0x82, 0x81,0x4a,0xfd, 0x01,0x7f, 0x80,0x8b,0xfb, -0x01,0x7f, 0x81,0x4c,0xfd, 0x01,0x7d, 0x80,0x89,0xfb, 0x01,0x7d, 0x81,0x4e,0xfd, -0x01,0x7c, 0x80,0x87,0xfb, 0x01,0x7c, 0x81,0x4f,0xfd, 0x01,0x8b, 0x01,0x7b, 0x80,0x85,0xfb, -0x01,0x7b, 0x01,0x8b, 0x81,0x50,0xfd, 0x01,0x8b, 0x01,0x7b, 0x80,0x83,0xfb, 0x01,0x7b, -0x01,0x8b, 0x81,0x52,0xfd, 0x01,0x8b, 0x01,0x7c, 0x80,0x81,0xfb, 0x01,0x7c, 0x01,0x8b, -0x81,0x55,0xfd, 0x01,0x7d, 0x7f,0xfb, 0x01,0x7d, 0x81,0x58,0xfd, 0x01,0x7f, 0x7d,0xfb, -0x01,0x7f, 0x81,0x08,0xfd, 0x02,0x2b, 0x01,0x6c, 0x1f,0xfd, 0x01,0x07, 0x01,0x55, 0x0b,0xfd, -0x01,0x41, 0x01,0x55, 0x06,0xfd, 0x01,0x83, 0x0b,0x41, 0x01,0x83, 0x03,0xfd, 0x01,0x19, 0x01,0x55, -0x09,0xfd, 0x01,0x82, 0x02,0xfb, 0x01,0x22, 0x01,0x2b, 0x01,0x03, 0x01,0xfb, 0x01,0x47, 0x01,0x12, -0x0a,0xfb, 0x01,0x2b, 0x16,0xfb, 0x01,0x00, 0x01,0x03, 0x07,0xfb, 0x01,0x59, 0x0b,0x22, 0x01,0x59, -0x04,0xfb, 0x01,0x22, 0x01,0x59, 0x04,0xfb, 0x01,0x59, 0x01,0x2b, 0x05,0xfb, 0x02,0x22, 0x0a,0xfb, -0x02,0x47, 0x0e,0xfb, 0x01,0x03, 0x01,0x47, 0x0c,0xfb, 0x01,0x2b, 0x01,0x82, 0x3e,0xfd, 0x01,0x41, -0x02,0xfd, 0x01,0x2e, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x07, 0x01,0x55, 0x18,0xfd, 0x01,0x41, -0x01,0x55, 0x80,0x8e,0xfd, 0x01,0x83, 0x0c,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x41, -0x01,0x19, 0x01,0x6c, 0x01,0x2b, 0x1e,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x83, 0x0b,0xfd, 0x01,0x00, -0x01,0x6c, 0x02,0xfd, 0x01,0x83, 0x03,0xfd, 0x01,0x6c, 0x05,0x2b, 0x01,0x00, 0x05,0x2b, 0x01,0x6c, -0x03,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x04,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, -0x03,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x22, 0x01,0x2b, 0x0a,0xfb, 0x01,0x00, 0x08,0xfb, 0x01,0x2b, -0x05,0x00, 0x01,0x36, 0x07,0xfb, 0x01,0x47, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x36, -0x02,0xfb, 0x01,0x47, 0x03,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0x00, 0x03,0x2b, 0x01,0x47, 0x04,0xfb, -0x01,0x00, 0x04,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0x12, 0x04,0xfb, 0x02,0x2b, 0x02,0xfb, -0x01,0x59, 0x01,0x22, 0x01,0x47, 0x05,0xfb, 0x01,0x2b, 0x01,0x12, 0x01,0xfb, 0x01,0x03, 0x01,0x2b, -0x01,0x22, 0x0a,0xfb, 0x01,0x00, 0x01,0x22, 0x0b,0xfb, 0x01,0x70, 0x01,0x00, 0x15,0xfd, 0x01,0x83, -0x01,0x41, 0x02,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x02,0xfd, -0x01,0x55, 0x08,0x41, 0x01,0x83, 0x14,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x2b, -0x01,0x41, 0x04,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x83, 0x08,0xfd, 0x01,0x83, 0x0f,0xfd, 0x01,0x00, -0x01,0x6c, 0x02,0xfd, 0x01,0x83, 0x7e,0xfd, 0x09,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, -0x0a,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x6c, 0x02,0x2b, 0x01,0x6c, 0x1e,0xfd, 0x01,0x2b, -0x06,0x00, 0x03,0xfd, 0x01,0x55, 0x08,0x00, 0x01,0x55, 0x08,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, -0x04,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x03,0x2b, 0x01,0x00, -0x02,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x2b, 0x04,0xfb, 0x01,0x03, 0x03,0x2b, 0x06,0x00, 0x01,0x2b, -0x04,0xfb, 0x01,0x36, 0x01,0x22, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x0a,0xfb, 0x01,0x47, 0x01,0x00, -0x01,0x59, 0x01,0x00, 0x01,0x59, 0x03,0xfb, 0x01,0x47, 0x02,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x22, -0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x01,0x59, 0x04,0xfb, 0x01,0x59, 0x01,0x00, 0x04,0xfb, 0x01,0x59, -0x01,0x2b, 0x01,0x03, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x07,0x00, 0x01,0x2b, 0x01,0x47, 0x05,0xfb, -0x01,0x00, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0x22, 0x01,0x00, 0x01,0x03, 0x04,0xfb, 0x01,0x36, -0x01,0x00, 0x03,0x2b, 0x05,0x00, 0x03,0xfb, 0x01,0x03, 0x03,0x2b, 0x06,0x00, 0x01,0x2b, 0x11,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x07, 0x01,0x55, 0x01,0x07, 0x01,0x55, -0x02,0xfd, 0x01,0x00, 0x07,0x2b, 0x01,0x00, 0x01,0x2e, 0x14,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x2e, -0x06,0xfd, 0x01,0x2b, 0x06,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x09,0xfd, 0x01,0x55, -0x08,0x00, 0x01,0x55, 0x80,0x84,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x55, 0x04,0xfd, 0x01,0x2e, 0x01,0x19, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x83, -0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x1f,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x07, -0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x6c, 0x09,0xfd, 0x09,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x41, -0x05,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x02,0x41, 0x01,0x07, 0x01,0x2b, 0x01,0x2e, -0x01,0x22, 0x01,0x2b, 0x04,0x00, 0x02,0xfb, 0x05,0x22, 0x01,0x47, 0x01,0x00, 0x03,0xfb, 0x01,0x59, -0x07,0xfb, 0x01,0x00, 0x01,0x2b, 0x09,0xfb, 0x01,0x00, 0x03,0xfb, 0x02,0x2b, 0x04,0xfb, 0x01,0x2b, -0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x02,0xfb, -0x01,0x59, 0x01,0x22, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x03, 0x01,0xfb, 0x01,0x59, 0x01,0x00, -0x01,0x22, 0x06,0xfb, 0x01,0x2b, 0x01,0x22, 0x02,0xfb, 0x01,0x2b, 0x07,0xfb, 0x01,0x00, 0x01,0x2b, -0x01,0x03, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x05,0xfb, 0x01,0x36, 0x03,0x22, 0x01,0x00, 0x01,0x2b, -0x01,0x47, 0x05,0xfb, 0x03,0x22, 0x01,0x2e, 0x01,0x3a, 0x01,0x6c, 0x01,0x00, 0x03,0xfd, 0x01,0x83, -0x11,0xfd, 0x01,0x2e, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x83, 0x01,0x07, 0x01,0x19, 0x03,0xfd, -0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x00, 0x01,0x83, 0x0f,0xfd, 0x01,0x2b, 0x09,0x00, 0x01,0x2b, -0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x07, 0x01,0x2b, 0x05,0xfd, 0x01,0x2b, -0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x2e, 0x01,0x19, 0x05,0xfd, 0x01,0x00, 0x01,0x6c, 0x80,0x89,0xfd, -0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x19, 0x01,0x2b, 0x04,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, -0x01,0x41, 0x01,0x00, 0x01,0x83, 0x01,0x2e, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x1f,0xfd, -0x01,0x55, 0x01,0x00, 0x02,0x55, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x02,0x2b, 0x01,0x07, -0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x83, 0x04,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x02,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x41, 0x04,0x00, 0x01,0x2b, -0x02,0xfd, 0x06,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x2b, 0x01,0xfb, 0x01,0x00, 0x05,0xfb, 0x01,0x03, -0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x09,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x03, 0x08,0xfb, 0x01,0x2b, -0x01,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x01,0x22, 0x03,0xfb, 0x01,0x2b, -0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, -0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x22, 0x01,0x2b, -0x01,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x06,0xfb, 0x04,0x2b, 0x01,0x00, 0x07,0xfb, 0x02,0x00, -0x04,0xfb, 0x01,0x00, 0x01,0x36, 0x04,0xfb, 0x01,0x47, 0x03,0xfb, 0x01,0x00, 0x03,0x22, 0x01,0x03, -0x05,0xfb, 0x01,0x81, 0x01,0x15, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x15,0xfd, 0x01,0x2b, 0x01,0x07, -0x03,0xfd, 0x01,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x6c, 0x01,0x00, -0x15,0xfd, 0x01,0x00, 0x08,0xfd, 0x01,0x55, 0x01,0x00, 0x02,0x55, 0x03,0xfd, 0x01,0x00, 0x01,0x55, -0x06,0xfd, 0x01,0x19, 0x01,0x2b, 0x04,0xfd, 0x01,0x00, 0x01,0x19, 0x04,0xfd, 0x02,0x2b, 0x01,0x07, -0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x83, 0x80,0x84,0xfd, 0x01,0x41, 0x01,0x00, 0x0b,0xfd, -0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x2b, 0x01,0x2e, 0x09,0xfd, 0x01,0x2b, -0x05,0x00, 0x05,0xfd, 0x01,0x19, 0x09,0x2b, 0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0x55, -0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x05,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2e, -0x01,0x19, 0x01,0x00, 0x01,0x2e, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x09,0x00, 0x02,0xfd, -0x01,0x83, 0x02,0x41, 0x01,0x2b, 0x01,0x07, 0x01,0x41, 0x01,0x6c, 0x01,0x00, 0x06,0xfd, 0x01,0x00, -0x02,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x88, 0x02,0x2b, 0x01,0xfb, 0x01,0x00, 0x04,0xfb, -0x01,0x22, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x08,0xfb, 0x01,0x59, 0x05,0x00, 0x01,0x03, -0x04,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x59, -0x01,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x2b, 0x09,0x00, 0x01,0x22, 0x04,0xfb, 0x01,0x00, 0x01,0x47, -0x02,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x03, 0x04,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x22, -0x03,0x00, 0x01,0x03, 0x05,0xfb, 0x01,0x00, 0x01,0x12, 0x04,0xfb, 0x01,0x00, 0x01,0x22, 0x04,0xfb, -0x06,0x00, 0x02,0x2b, 0x01,0x22, 0x03,0xfb, 0x01,0x74, 0x01,0x88, 0x01,0x41, 0x01,0x2b, 0x01,0xfd, -0x01,0x41, 0x01,0x00, 0x15,0xfd, 0x01,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x01,0x07, 0x05,0xfd, -0x01,0x2b, 0x01,0x55, 0x05,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x19, 0x09,0x2b, 0x01,0x19, -0x04,0xfd, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x04,0xfd, -0x01,0x41, 0x01,0x00, 0x02,0x55, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x0c,0xfd, -0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2e, 0x01,0x19, 0x01,0x00, 0x01,0x2e, -0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x80,0x83,0xfd, 0x01,0x41, 0x01,0x00, 0x0a,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x10,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, -0x01,0x19, 0x09,0x2b, 0x01,0x19, 0x03,0xfd, 0x01,0x6c, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, -0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x01,0x2b, -0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, -0x01,0x2e, 0x01,0x2b, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x83, 0x02,0xfd, 0x01,0x00, -0x01,0x41, 0x01,0x07, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x2f, 0x01,0xfb, -0x01,0x00, 0x04,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x36, 0x06,0xfb, -0x01,0x36, 0x02,0x00, 0x01,0x03, 0x01,0x59, 0x01,0xfb, 0x01,0x47, 0x02,0x00, 0x03,0xfb, 0x02,0x2b, -0x01,0xfb, 0x01,0x00, 0x01,0xfb, 0x02,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x2b, 0x05,0xfb, 0x01,0x47, -0x01,0x00, 0x01,0x59, 0x08,0xfb, 0x01,0x59, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x47, -0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x02,0x2b, 0x01,0x22, 0x01,0x59, 0x01,0x00, 0x01,0xfb, 0x01,0x59, -0x01,0x00, 0x01,0x36, 0x04,0xfb, 0x01,0x00, 0x01,0x59, 0x04,0xfb, 0x01,0x00, 0x01,0x22, 0x08,0xfb, -0x01,0x2b, 0x01,0x22, 0x05,0xfb, 0x01,0xfc, 0x01,0x8b, 0x01,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0xfd, -0x01,0x83, 0x01,0x00, 0x01,0x55, 0x13,0xfd, 0x01,0x6c, 0x01,0x00, 0x04,0xfd, 0x01,0x55, 0x01,0x00, -0x0c,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x19, 0x09,0x2b, 0x01,0x19, 0x03,0xfd, 0x01,0x55, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x04,0xfd, 0x01,0x6c, 0x03,0xfd, -0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x0b,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x41, -0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x01,0x2b, 0x01,0x55, 0x80,0x85,0xfd, -0x01,0x41, 0x01,0x00, 0x09,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x41, 0x01,0x00, -0x10,0xfd, 0x01,0x2e, 0x01,0x2b, 0x18,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x55, 0x01,0x41, -0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x01,0x00, 0x05,0xfd, 0x01,0x6c, 0x01,0x41, 0x01,0x00, -0x03,0x41, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x41, -0x01,0x2b, 0x02,0x00, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, -0x04,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x69, 0x01,0x85, 0x01,0x00, 0x04,0xfb, 0x01,0x22, -0x01,0x00, 0x01,0x22, 0x02,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x22, 0x06,0xfb, -0x01,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0x00, -0x03,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0xfb, 0x01,0x03, 0x0b,0x00, 0x01,0x36, 0x02,0xfb, 0x02,0x2b, -0x03,0xfb, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x59, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0xfb, -0x01,0x2b, 0x01,0x22, 0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x02,0x2b, 0x04,0xfb, 0x01,0x00, 0x04,0xfb, -0x01,0x59, 0x01,0x00, 0x06,0xfb, 0x01,0x36, 0x02,0x2b, 0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x72, -0x01,0x85, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x13,0xfd, -0x02,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x41, 0x0a,0xfd, 0x01,0x07, 0x01,0x00, 0x12,0xfd, 0x01,0x2b, -0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x2b, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, -0x0b,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x19, 0x04,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x07, 0x01,0x00, 0x80,0x87,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x6c, -0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x10,0xfd, 0x01,0x2b, 0x01,0x07, -0x17,0xfd, 0x02,0x2b, 0x08,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x05,0xfd, -0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x07,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x6c, 0x01,0xfd, 0x03,0x2b, -0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x03,0x2b, 0x01,0x00, -0x01,0x2b, 0x01,0x19, 0x01,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0xfc, 0x04,0xfb, 0x01,0x22, -0x01,0x2b, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x06,0xfb, 0x01,0x47, 0x01,0xfb, 0x03,0x00, 0x01,0x36, -0x02,0xfb, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x03,0xfb, -0x01,0x00, 0x01,0xfb, 0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x12, 0x02,0xfb, -0x01,0x36, 0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x06,0xfb, -0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x02,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x03, 0x09,0xfb, -0x01,0x2b, 0x01,0x00, 0x05,0xfb, 0x02,0x00, 0x01,0x12, 0x01,0x22, 0x03,0x00, 0x01,0x22, 0x01,0xfc, -0x01,0x8b, 0x05,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x2b, -0x02,0x00, 0x01,0x83, 0x09,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x41, 0x01,0x00, -0x01,0x83, 0x08,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x6c, 0x11,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x83, -0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x02,0x2b, 0x0a,0xfd, 0x01,0x6c, -0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, -0x80,0x80,0xfd, 0x09,0x00, 0x03,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0x19, 0x02,0x00, 0x01,0x2b, -0x01,0x83, 0x07,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x6c, 0x01,0x41, 0x01,0x2e, -0x09,0xfd, 0x01,0x00, 0x01,0x41, 0x15,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x09,0xfd, 0x01,0x41, -0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x07,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, -0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x41, -0x01,0x07, 0x01,0x2b, 0x01,0x41, 0x01,0x2e, 0x01,0x19, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, -0x01,0xfd, 0x01,0x88, 0x01,0x75, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x22, 0x01,0x2b, -0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x01,0x36, -0x01,0x47, 0x02,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x36, 0x02,0xfb, 0x01,0x36, -0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x05,0xfb, 0x01,0x03, 0x01,0x00, 0x02,0x59, -0x01,0xfb, 0x02,0x2b, 0x06,0xfb, 0x02,0x2b, 0x01,0x59, 0x02,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, -0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x05,0xfb, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x22, -0x01,0x3c, 0x02,0x00, 0x01,0x83, 0x07,0xfd, 0x02,0x2b, 0x05,0xfd, 0x01,0x55, 0x01,0x2b, 0x01,0xfd, -0x01,0x19, 0x01,0x07, 0x08,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x83, 0x05,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x11,0xfd, 0x01,0x83, 0x01,0x00, -0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x04,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0x19, 0x02,0x00, 0x01,0x2b, 0x01,0x83, -0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x80,0x87,0xfd, 0x01,0x55, 0x01,0x2b, -0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, 0x0b,0xfd, 0x01,0x2e, 0x05,0x00, 0x01,0x2b, 0x01,0x19, -0x04,0xfd, 0x08,0x00, 0x01,0x2b, 0x11,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, 0x0b,0xfd, -0x01,0x2b, 0x01,0x00, 0x03,0x2b, 0x02,0x00, 0x04,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x02,0xfd, -0x01,0x41, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x07, 0x06,0x41, 0x01,0x2e, 0x01,0x6c, -0x01,0x2b, 0x04,0x00, 0x01,0x07, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x02,0xfd, -0x01,0x8b, 0x01,0x81, 0x01,0x70, 0x01,0x12, 0x01,0x00, 0x01,0x2b, 0x0a,0xfb, 0x02,0x00, 0x01,0x2b, -0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x47, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0x00, 0x03,0xfb, -0x01,0x22, 0x01,0x00, 0x05,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0x2b, 0x01,0x00, 0x02,0x2b, 0x02,0x00, -0x01,0x22, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x47, 0x01,0x36, 0x03,0x00, 0x01,0x47, 0x06,0xfb, -0x01,0x59, 0x02,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x12, 0x02,0x00, 0x01,0x2b, 0x01,0x59, 0x06,0xfb, -0x01,0x22, 0x01,0x2b, 0x02,0x00, 0x01,0x59, 0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, -0x01,0x6c, 0x01,0x8b, 0x01,0xfd, 0x01,0x2b, 0x01,0x83, 0x05,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0x2b, -0x06,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x83, 0x01,0x2b, 0x01,0x19, 0x08,0xfd, 0x01,0x83, 0x01,0x07, -0x08,0xfd, 0x01,0x83, 0x01,0x2b, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x13,0xfd, -0x01,0x41, 0x04,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x2e, 0x03,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, -0x01,0x2e, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, 0x0c,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x2b, -0x02,0x00, 0x80,0xbe,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x83, 0x0d,0xfd, 0x01,0x83, 0x03,0x41, -0x01,0x6c, 0x05,0xfd, 0x01,0x2b, 0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, -0x01,0x55, 0x02,0xfd, 0x01,0x83, 0x01,0x41, 0x06,0x2b, 0x01,0x41, 0x01,0x83, 0x01,0x41, 0x04,0xfd, -0x01,0x2b, 0x01,0x2e, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x04,0xfd, 0x01,0x8b, 0x01,0x01, 0x01,0x22, -0x0c,0xfb, 0x01,0x36, 0x01,0x22, 0x01,0x36, 0x05,0xfb, 0x01,0x59, 0x01,0x2b, 0x01,0x59, 0x01,0xfb, -0x01,0x2b, 0x04,0x00, 0x01,0x36, 0x03,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x2b, 0x01,0x12, 0x04,0xfb, -0x01,0x22, 0x02,0x00, 0x02,0xfb, 0x01,0x36, 0x03,0xfb, 0x01,0x22, 0x01,0x47, 0x0c,0xfb, 0x01,0x59, -0x01,0x22, 0x09,0xfb, 0x01,0x47, 0x01,0x2b, 0x01,0x47, 0x09,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0x30, -0x01,0x8b, 0x0a,0xfd, 0x01,0x07, 0x01,0x41, 0x08,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0x2b, 0x1b,0xfd, -0x01,0x83, 0x1a,0xfd, 0x01,0x2b, 0x08,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x83, 0x1a,0xfd, 0x01,0x83, -0x03,0x41, 0x01,0x6c, 0x81,0x03,0xfd, 0x01,0x8b, 0x01,0x7c, 0x57,0xfb, 0x01,0x7c, 0x01,0x8b, -0x81,0x7f,0xfd, 0x01,0x8b, 0x01,0x7b, 0x53,0xfb, 0x01,0x7b, 0x01,0x8b, 0x81,0x83,0xfd, -0x01,0x8b, 0x01,0xfc, 0x01,0x6d, 0x4d,0xfb, 0x01,0x6d, 0x01,0xfc, 0x01,0x8b, 0x81,0x87,0xfd, -0x01,0x8b, 0x01,0x80, 0x01,0x72, 0x49,0xfb, 0x01,0x72, 0x01,0x80, 0x01,0x8b, 0x80,0xfd,0xfd, -0x01,0x19, 0x01,0xfd, 0x01,0x19, 0x02,0x55, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x83, 0x05,0xfd, -0x01,0x83, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x05,0xfd, 0x01,0x55, 0x03,0x41, 0x01,0x55, -0x01,0xfd, 0x04,0x41, 0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x02,0xfd, 0x06,0x41, 0x01,0x55, 0x04,0xfd, -0x01,0x41, 0x01,0x83, 0x0a,0xfd, 0x01,0x41, 0x01,0x19, 0x04,0xfd, 0x01,0x2b, 0x06,0xfd, 0x01,0x6c, -0x05,0xfd, 0x01,0x6c, 0x01,0x55, 0x14,0xfd, 0x01,0x19, 0x01,0x55, 0x22,0xfd, 0x01,0x86, 0x01,0x79, -0x1b,0xfb, 0x01,0x22, 0x0e,0xfb, 0x01,0x22, 0x02,0xfb, 0x01,0x12, 0x01,0x22, 0x01,0x00, 0x06,0xfb, -0x01,0x2b, 0x01,0x36, 0x0d,0xfb, 0x01,0x79, 0x01,0x86, 0x09,0xfd, 0x01,0x41, 0x01,0x55, 0x08,0xfd, -0x01,0x2b, 0x02,0xfd, 0x06,0x41, 0x01,0x55, 0x04,0xfd, 0x01,0x41, 0x01,0x83, 0x1c,0xfd, 0x01,0x83, -0x01,0x55, 0x08,0xfd, 0x01,0x19, 0x04,0xfd, 0x01,0x6c, 0x01,0x2b, 0x08,0xfd, 0x01,0x07, 0x80,0xa7,0xfd, -0x01,0x19, 0x02,0x2b, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x83, -0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x03,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x00, 0x03,0x2b, 0x01,0x00, -0x01,0xfd, 0x01,0x00, 0x03,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, -0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x0a,0xfd, 0x01,0x2b, 0x02,0x6c, -0x03,0x2b, 0x01,0x00, 0x03,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x19, 0x04,0xfd, 0x01,0x19, 0x01,0x2b, -0x06,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x00, 0x01,0x41, 0x17,0xfd, 0x01,0x00, -0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x8b, 0x01,0x00, 0x01,0x75, 0x18,0xfb, 0x01,0x00, -0x0e,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0x22, 0x04,0xfb, 0x01,0x59, -0x01,0x00, 0x01,0x59, 0x08,0xfb, 0x01,0x59, 0x01,0xfb, 0x01,0x75, 0x01,0x81, 0x01,0x8b, 0x0b,0xfd, -0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x83, 0x05,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, -0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x13,0xfd, 0x01,0x6c, 0x07,0xfd, -0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x83, 0x01,0x00, 0x03,0xfd, 0x01,0x55, 0x01,0x00, -0x01,0x2b, 0x01,0x07, 0x07,0xfd, 0x01,0x00, 0x01,0x2e, 0x06,0xfd, 0x01,0x83, 0x01,0x55, 0x80,0x9d,0xfd, -0x01,0x83, 0x01,0x41, 0x04,0x2b, 0x01,0x55, 0x01,0x2e, 0x01,0x00, 0x03,0x41, 0x01,0x6c, 0x02,0xfd, -0x01,0x41, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x03,0xfd, -0x05,0x00, 0x01,0xfd, 0x05,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x6c, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x83, 0x08,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x2b, 0x03,0x41, 0x01,0x00, 0x03,0x41, 0x03,0xfd, -0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0x55, -0x01,0xfd, 0x02,0x2b, 0x08,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x2e, 0x13,0xfd, 0x01,0x00, 0x04,0xfd, -0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x8b, 0x01,0x80, 0x01,0x75, 0x15,0xfb, -0x01,0x00, 0x0e,0xfb, 0x01,0x00, 0x03,0xfb, 0x01,0x12, 0x06,0xfb, 0x01,0x2b, 0x06,0x00, 0x03,0xfb, -0x01,0x75, 0x01,0x00, 0x01,0x2b, 0x01,0x7f, 0x09,0xfd, 0x01,0x55, 0x08,0x00, 0x01,0x55, 0x02,0xfd, -0x01,0x6c, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x6c, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, -0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x83, 0x09,0xfd, 0x01,0x19, 0x01,0x2b, 0x08,0x00, 0x01,0x19, -0x06,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x19, -0x02,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0x6c, 0x07,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x2b, -0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x55, 0x80,0x96,0xfd, -0x01,0x6c, 0x02,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x19, 0x03,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x41, -0x01,0x6c, 0x02,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, -0x01,0x2b, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x02,0x41, -0x01,0x00, 0x02,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x0a,0xfd, 0x01,0x19, 0x01,0x2b, 0x01,0x00, -0x01,0x83, 0x01,0x19, 0x02,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0xfd, -0x01,0x6c, 0x03,0x41, 0x01,0x07, 0x01,0x00, 0x02,0x2b, 0x07,0xfd, 0x02,0x2b, 0x06,0xfd, 0x01,0x41, -0x02,0x2b, 0x03,0x00, 0x01,0x41, 0x01,0xfd, 0x02,0x2b, 0x12,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x41, -0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0xf7, 0x01,0x2b, 0x01,0x00, -0x01,0x09, 0x01,0x6f, 0x03,0xfb, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x59, 0x02,0xfb, 0x01,0x36, -0x01,0x47, 0x06,0xfb, 0x01,0x00, 0x09,0xfb, 0x01,0x2b, 0x09,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x22, -0x01,0x00, 0x01,0x59, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x7a, 0x01,0x84, 0x01,0x8b, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x2e, 0x01,0x19, 0x05,0xfd, 0x01,0x00, 0x01,0x6c, -0x07,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x02,0x41, 0x01,0x00, -0x02,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x0a,0xfd, 0x01,0x2e, 0x02,0x41, 0x01,0x83, 0x02,0xfd, -0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x07,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x19, 0x07,0xfd, 0x01,0x2b, -0x03,0x00, 0x02,0x2b, 0x08,0xfd, 0x06,0x00, 0x01,0x2b, 0x01,0x2e, 0x01,0x83, 0x03,0xfd, 0x01,0x41, -0x01,0x00, 0x05,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x80,0x96,0xfd, 0x01,0x83, 0x01,0x00, -0x02,0x2b, 0x01,0x00, 0x01,0x55, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0xfd, -0x01,0x83, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0x55, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, -0x01,0xfd, 0x01,0x41, 0x01,0x2b, 0x02,0xfd, 0x04,0x00, 0x01,0x2b, 0x01,0xfd, 0x05,0x00, 0x03,0xfd, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, -0x0b,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x2e, 0x01,0x55, 0x05,0x41, 0x01,0x55, 0x03,0xfd, 0x01,0x00, -0x01,0xfd, 0x01,0x6c, 0x04,0x2b, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, -0x07,0xfd, 0x01,0x07, 0x01,0x19, 0x02,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x01,0xfd, -0x01,0x00, 0x01,0x41, 0x11,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x02,0x2b, -0x04,0x00, 0x01,0x2b, 0x01,0x07, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x8b, 0x01,0x82, -0x01,0x08, 0x01,0x00, 0x01,0xfb, 0x02,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x03, 0x06,0xfb, 0x01,0x00, -0x01,0x03, 0x01,0x59, 0x0c,0xfb, 0x01,0x00, 0x08,0xfb, 0x01,0x36, 0x01,0x00, 0x02,0x36, 0x01,0x70, -0x01,0x79, 0x01,0x82, 0x01,0x00, 0x01,0x50, 0x06,0xfd, 0x01,0x19, 0x01,0x2b, 0x04,0xfd, 0x01,0x00, -0x01,0x19, 0x04,0xfd, 0x02,0x2b, 0x01,0x07, 0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x83, 0x04,0xfd, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, -0x0f,0xfd, 0x01,0x2b, 0x01,0x19, 0x08,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x41, 0x0a,0xfd, 0x01,0x2b, -0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x41, 0x03,0x2b, 0x01,0x6c, 0x08,0xfd, 0x01,0x19, 0x01,0x2b, -0x05,0xfd, 0x01,0x6c, 0x01,0x00, 0x06,0xfd, 0x02,0x2b, 0x80,0x95,0xfd, 0x01,0x55, 0x01,0x00, -0x01,0x55, 0x01,0x19, 0x02,0x55, 0x01,0x00, 0x01,0x2b, 0x01,0x07, 0x01,0xfd, 0x01,0x00, 0x01,0x83, -0x03,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0x2b, 0x01,0x19, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x83, -0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x02,0xfd, 0x05,0x41, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, -0x01,0x00, 0x01,0x83, 0x01,0x55, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x04,0xfd, -0x01,0x00, 0x0a,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x06,0x00, 0x03,0xfd, -0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x02,0x00, -0x01,0x19, 0x06,0xfd, 0x01,0x2b, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, -0x01,0x00, 0x11,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x2e, 0x01,0x41, -0x01,0x83, 0x01,0x00, 0x04,0xfd, 0x02,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x79, 0x01,0x71, 0x01,0x00, -0x01,0x7a, 0x01,0x73, 0x01,0x00, 0x01,0x47, 0x06,0xfb, 0x03,0x00, 0x01,0x2b, 0x01,0x47, 0x07,0xfb, -0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x00, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x04,0xfb, 0x01,0x28, -0x01,0x00, 0x01,0x43, 0x01,0x49, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x0c,0xfd, -0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2e, 0x01,0x19, 0x01,0x00, 0x01,0x2e, -0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x83, 0x01,0x55, 0x01,0x00, 0x02,0xfd, -0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x0e,0xfd, 0x02,0x2b, 0x08,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x83, 0x0b,0xfd, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x03,0x41, -0x01,0x83, 0x09,0xfd, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x06,0xfd, 0x01,0x83, 0x01,0x00, -0x01,0x83, 0x80,0x95,0xfd, 0x02,0x83, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x01,0x83, 0x01,0x00, -0x01,0x83, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x02,0x2b, 0x02,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, -0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x03,0x2b, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x6c, 0x01,0xfd, 0x01,0x55, -0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x83, 0x01,0x55, -0x01,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x03,0xfd, 0x01,0x41, 0x01,0x00, -0x05,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x19, 0x01,0x83, 0x01,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, -0x05,0xfd, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x55, -0x10,0xfd, 0x01,0x2b, 0x04,0xfd, 0x01,0x19, 0x01,0x2b, 0x07,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x83, -0x02,0x2b, 0x06,0xfd, 0x01,0x2b, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x83, 0x01,0x41, 0x01,0x00, -0x02,0x8b, 0x01,0x87, 0x01,0x82, 0x01,0x7d, 0x01,0x7a, 0x01,0x77, 0x01,0x00, 0x01,0x71, 0x01,0x5c, -0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x03,0xfb, 0x01,0x6d, 0x01,0x38, 0x01,0x00, 0x01,0x71, -0x01,0x74, 0x01,0x00, 0x01,0x7a, 0x01,0x7d, 0x01,0x00, 0x01,0x35, 0x02,0x8b, 0x02,0xfd, 0x01,0x6c, -0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x0b,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x03,0xfd, -0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x01,0x2b, 0x01,0x55, -0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, -0x04,0xfd, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x6c, 0x06,0xfd, 0x01,0x00, 0x01,0x6c, 0x08,0xfd, -0x01,0x83, 0x01,0x00, 0x01,0x19, 0x0a,0xfd, 0x01,0x6c, 0x01,0x00, 0x0c,0xfd, 0x01,0x6c, 0x01,0x2b, -0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x04,0xfd, 0x01,0x2b, -0x01,0x07, 0x80,0x94,0xfd, 0x01,0x55, 0x06,0x00, 0x01,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0xfd, -0x01,0x83, 0x01,0x00, 0x01,0x2e, 0x01,0x2b, 0x01,0x6c, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x41, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x03,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, -0x01,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x07,0x00, 0x04,0xfd, 0x01,0x00, 0x05,0xfd, -0x01,0x83, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x01,0x41, 0x01,0x2b, 0x01,0x07, 0x01,0x19, 0x01,0xfd, -0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0x2b, -0x03,0xfd, 0x01,0x19, 0x01,0x2b, 0x04,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x00, -0x01,0x55, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2e, -0x01,0x6c, 0x15,0xfd, 0x01,0x2b, 0x01,0x07, 0x07,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x01,0x2b, -0x0c,0xfd, 0x01,0x00, 0x01,0x41, 0x07,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, -0x04,0xfd, 0x01,0x2b, 0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x2b, 0x09,0xfd, -0x01,0x00, 0x01,0x2b, 0x0b,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x19, 0x04,0xfd, 0x01,0x55, 0x01,0x41, -0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x01,0x00, 0x05,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0x00, -0x02,0xfd, 0x07,0x00, 0x04,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x83, 0x01,0x00, 0x07,0xfd, 0x01,0x00, -0x0a,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x09,0xfd, 0x01,0x07, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, -0x01,0x83, 0x07,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x83, 0x01,0x2e, 0x01,0x00, -0x01,0x55, 0x04,0xfd, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x07, 0x01,0x19, 0x03,0xfd, 0x01,0x41, -0x01,0x00, 0x80,0x95,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x07, 0x02,0xfd, -0x01,0x2e, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x07, 0x01,0x2e, 0x01,0x41, 0x01,0x2b, 0x04,0xfd, -0x01,0x83, 0x01,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x03,0x41, 0x01,0x00, -0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, -0x01,0x19, 0x04,0xfd, 0x01,0x00, 0x01,0x83, 0x04,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x41, -0x02,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x05,0xfd, 0x02,0x00, 0x01,0x83, 0x03,0xfd, -0x01,0x2b, 0x01,0x19, 0x05,0xfd, 0x01,0x6c, 0x01,0xfd, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, -0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x83, 0x01,0x2e, -0x12,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x07,0xfd, 0x01,0x00, 0x0f,0xfd, 0x01,0x07, 0x01,0x2b, -0x08,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x83, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x83, 0x02,0xfd, -0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x02,0x2b, 0x0a,0xfd, 0x01,0x6c, 0x01,0x2b, -0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x07,0xfd, -0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x04,0xfd, 0x01,0x00, -0x01,0x83, 0x04,0xfd, 0x02,0x2b, 0x07,0xfd, 0x01,0x00, 0x01,0x07, 0x0b,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x83, 0x07,0xfd, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x08,0xfd, 0x01,0x00, -0x01,0x2e, 0x0b,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x6c, 0x03,0xfd, 0x01,0x83, -0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x83, 0x80,0x8f,0xfd, 0x01,0x6c, 0x01,0x2b, -0x02,0x00, 0x01,0x83, 0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x83, 0x01,0xfd, 0x01,0x41, -0x01,0x2b, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x04,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, -0x03,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x00, 0x07,0xfd, -0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x00, -0x01,0x41, 0x02,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x07, -0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x01,0x00, 0x07,0xfd, 0x01,0x41, -0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x03,0xfd, 0x01,0x6c, -0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x2e, 0x01,0x00, 0x05,0xfd, 0x01,0x6c, 0x01,0x00, -0x01,0x19, 0x10,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x55, 0x08,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, -0x01,0x55, 0x01,0x2e, 0x08,0xfd, 0x01,0x2b, 0x01,0x00, 0x09,0xfd, 0x01,0x00, 0x08,0xfd, 0x01,0x83, -0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x04,0xfd, -0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0x19, 0x02,0x00, 0x01,0x2b, -0x01,0x83, 0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x07,0xfd, 0x01,0x00, 0x05,0xfd, -0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, -0x07,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x2b, 0x01,0x6c, 0x0a,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x6c, -0x05,0xfd, 0x01,0x55, 0x01,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x04,0xfd, 0x01,0x83, 0x03,0xfd, -0x01,0x2b, 0x01,0x07, 0x05,0xfd, 0x01,0x6c, 0x05,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x00, -0x05,0xfd, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x07, 0x80,0x90,0xfd, -0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x02,0x2b, -0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x41, -0x05,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x04,0x41, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, -0x05,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x2b, 0x01,0x83, 0x03,0xfd, 0x01,0x55, -0x01,0x2b, 0x01,0x41, 0x01,0x2b, 0x01,0x83, 0x01,0x41, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, -0x01,0xfd, 0x01,0x2b, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x07, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0x55, 0x08,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x03,0xfd, 0x01,0x00, -0x01,0x19, 0x01,0xfd, 0x03,0x00, 0x01,0x2e, 0x06,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x0d,0xfd, -0x01,0x2b, 0x02,0x00, 0x01,0x6c, 0x09,0xfd, 0x01,0x2e, 0x06,0x00, 0x06,0xfd, 0x01,0x2e, 0x01,0x00, -0x01,0x2b, 0x0a,0xfd, 0x01,0x00, 0x09,0xfd, 0x01,0x41, 0x04,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x2e, -0x03,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, -0x0c,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x2b, 0x02,0x00, 0x04,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x00, -0x07,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x2b, 0x01,0x83, 0x09,0xfd, 0x01,0x6c, 0x01,0x2b, 0x02,0x00, -0x0a,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x83, -0x01,0x2b, 0x04,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x6c, 0x02,0x00, 0x03,0x2b, 0x02,0x00, 0x06,0xfd, -0x01,0x55, 0x01,0x2b, 0x01,0x83, 0x07,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x83, 0x01,0x2b, 0x01,0x19, -0x80,0x8e,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, -0x03,0xfd, 0x02,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0x19, 0x01,0x00, 0x01,0x19, 0x01,0x83, -0x06,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, -0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x09,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0x83, 0x06,0xfd, 0x01,0x6c, -0x01,0x41, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, -0x07,0xfd, 0x01,0x41, 0x01,0x83, 0x0a,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0x55, 0x06,0xfd, 0x01,0x55, -0x03,0xfd, 0x02,0x55, 0x08,0xfd, 0x01,0x55, 0x01,0x2b, 0x0d,0xfd, 0x01,0x55, 0x01,0x6c, 0x18,0xfd, -0x01,0x55, 0x01,0x6c, 0x0b,0xfd, 0x01,0x2b, 0x0e,0xfd, 0x01,0x2b, 0x08,0xfd, 0x01,0x83, 0x01,0x2b, -0x01,0x83, 0x1a,0xfd, 0x01,0x83, 0x03,0x41, 0x01,0x6c, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, -0x05,0xfd, 0x01,0x00, 0x09,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0x83, 0x0e,0xfd, 0x01,0x83, 0x0b,0xfd, -0x01,0x19, 0x01,0x83, 0x04,0xfd, 0x01,0x6c, 0x01,0x83, 0x0e,0xfd, 0x01,0x83, 0x03,0x41, 0x13,0xfd, -0x01,0x2e, 0x01,0x00, 0x01,0x2b, 0x80,0x95,0xfd, 0x01,0x83, 0x3c,0xfd, 0x01,0x83, 0x01,0x6c, -0xb6,0xd5,0xfd -}; diff --git a/osfmk/console/panic_ui/generated_files/rendered_numbers.c b/osfmk/console/panic_ui/generated_files/rendered_numbers.c deleted file mode 100644 index c2d571ef9..000000000 --- a/osfmk/console/panic_ui/generated_files/rendered_numbers.c +++ /dev/null @@ -1,376 +0,0 @@ - /* generated c file */ - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x82]; -} num_0 = { -/* w */ 9, -/* h */ 11, -/* pixel_data */ -0x09,0xfd, -0x02,0xfd, 0x01,0x81, 0x01,0x2b, 0x02,0x00, 0x01,0x26, 0x02,0xfd, -0x02,0xfd, 0x01,0x2b, 0x01,0x01, 0x01,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x69, 0x01,0xfd, -0x02,0xfd, 0x01,0x00, 0x01,0x52, 0x02,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, -0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x03,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x03,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x03,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0xfd, -0x02,0xfd, 0x01,0x00, 0x01,0x52, 0x02,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, -0x02,0xfd, 0x01,0x2b, 0x01,0x01, 0x01,0xfd, 0x01,0x69, 0x01,0x00, 0x01,0x69, 0x01,0xfd, -0x02,0xfd, 0x01,0x81, 0x01,0x2b, 0x02,0x00, 0x01,0x26, 0x02,0xfd, -0x09,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x54]; -} num_1 = { -/* w */ 7, -/* h */ 11, -/* pixel_data */ -0x07,0xfd, -0x01,0xfd, 0x01,0xf9, 0x01,0x2b, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, -0x01,0xfd, 0x01,0x26, 0x01,0x52, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, -0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, -0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, -0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, -0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, -0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, -0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, -0x01,0xfd, 0x01,0x2b, 0x04,0x00, 0x01,0xfd, -0x07,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x5c]; -} num_2 = { -/* w */ 8, -/* h */ 11, -/* pixel_data */ -0x08,0xfd, -0x01,0xfd, 0x01,0x52, 0x03,0x00, 0x01,0x01, 0x02,0xfd, -0x01,0xfd, 0x01,0x52, 0x01,0x81, 0x01,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x12, 0x01,0xfd, -0x05,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, -0x04,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0xf9, 0x01,0xfd, -0x03,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x01, 0x03,0xfd, -0x02,0xfd, 0x01,0x00, 0x01,0x12, 0x04,0xfd, -0x01,0xfd, 0x01,0x01, 0x01,0x2b, 0x05,0xfd, -0x01,0xfd, 0x05,0x00, 0x01,0x01, 0x01,0xfd, -0x08,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x46]; -} num_3 = { -/* w */ 6, -/* h */ 11, -/* pixel_data */ -0x06,0xfd, -0x01,0xfd, 0x04,0x00, 0x01,0x52, -0x01,0xfd, 0x01,0x81, 0x02,0xfd, 0x02,0x00, -0x04,0xfd, 0x01,0x01, 0x01,0x00, -0x03,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0x52, -0x01,0xfd, 0x01,0x52, 0x02,0x00, 0x01,0x2b, 0x01,0x81, -0x04,0xfd, 0x01,0x2b, 0x01,0x00, -0x04,0xfd, 0x01,0xf9, 0x01,0x00, -0x01,0xfd, 0x01,0x81, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, -0x01,0xfd, 0x04,0x00, 0x01,0x52, -0x06,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x64]; -} num_4 = { -/* w */ 9, -/* h */ 11, -/* pixel_data */ -0x09,0xfd, -0x05,0xfd, 0x02,0x00, 0x02,0xfd, -0x04,0xfd, 0x01,0x01, 0x02,0x00, 0x02,0xfd, -0x03,0xfd, 0x01,0x52, 0x01,0x2b, 0x01,0x01, 0x01,0x00, 0x02,0xfd, -0x03,0xfd, 0x01,0x00, 0x01,0x81, 0x01,0x01, 0x01,0x00, 0x02,0xfd, -0x02,0xfd, 0x01,0x01, 0x01,0xf9, 0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x02,0xfd, -0x01,0xfd, 0x01,0x69, 0x01,0x00, 0x02,0xf9, 0x01,0x2b, 0x01,0x00, 0x01,0xf9, 0x01,0xfd, -0x01,0xfd, 0x01,0x69, 0x03,0x01, 0x02,0x00, 0x01,0x01, 0x01,0xfd, -0x05,0xfd, 0x01,0x01, 0x01,0x00, 0x02,0xfd, -0x05,0xfd, 0x01,0x01, 0x01,0x00, 0x02,0xfd, -0x09,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x58]; -} num_5 = { -/* w */ 7, -/* h */ 11, -/* pixel_data */ -0x07,0xfd, -0x01,0xfd, 0x01,0xf9, 0x04,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x04,0xfd, -0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x04,0xfd, -0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0x01, 0x01,0x69, 0x02,0xfd, -0x01,0xfd, 0x01,0x81, 0x01,0xf9, 0x01,0x12, 0x01,0x00, 0x01,0x12, 0x01,0xfd, -0x04,0xfd, 0x01,0x12, 0x01,0x00, 0x01,0xfd, -0x04,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0xfd, -0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0x12, 0x03,0x00, 0x01,0x81, 0x01,0xfd, -0x07,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x72]; -} num_6 = { -/* w */ 8, -/* h */ 11, -/* pixel_data */ -0x08,0xfd, -0x02,0xfd, 0x01,0x52, 0x03,0x00, 0x01,0x01, 0x01,0xfd, -0x01,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x52, 0x02,0xfd, 0x01,0x81, 0x01,0xfd, -0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x05,0xfd, -0x01,0xfd, 0x01,0x00, 0x02,0x01, 0x01,0x00, 0x01,0x2b, 0x01,0x69, 0x01,0xfd, -0x01,0xfd, 0x02,0x00, 0x01,0xf9, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0xf9, -0x01,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0xf9, -0x01,0xfd, 0x01,0x69, 0x01,0x00, 0x01,0x81, 0x01,0xfd, 0x01,0x12, 0x01,0x00, 0x01,0xfd, -0x02,0xfd, 0x01,0x26, 0x03,0x00, 0x01,0x52, 0x01,0xfd, -0x08,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x4a]; -} num_7 = { -/* w */ 7, -/* h */ 11, -/* pixel_data */ -0x07,0xfd, -0x01,0xfd, 0x06,0x00, -0x05,0xfd, 0x01,0x2b, 0x01,0x01, -0x04,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0xfd, -0x04,0xfd, 0x01,0x00, 0x01,0xf9, 0x01,0xfd, -0x03,0xfd, 0x01,0x12, 0x01,0x00, 0x02,0xfd, -0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x69, 0x02,0xfd, -0x02,0xfd, 0x01,0x01, 0x01,0x00, 0x03,0xfd, -0x02,0xfd, 0x01,0x00, 0x01,0x12, 0x03,0xfd, -0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x81, 0x03,0xfd, -0x07,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x76]; -} num_8 = { -/* w */ 8, -/* h */ 11, -/* pixel_data */ -0x08,0xfd, -0x02,0xfd, 0x01,0x52, 0x03,0x00, 0x01,0x12, 0x01,0xfd, -0x02,0xfd, 0x01,0x00, 0x01,0x26, 0x01,0xfd, 0x01,0x26, 0x01,0x00, 0x01,0xfd, -0x02,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, 0x01,0x26, 0x01,0x00, 0x01,0xfd, -0x02,0xfd, 0x01,0x12, 0x01,0x00, 0x01,0x01, 0x01,0x00, 0x01,0x52, 0x01,0xfd, -0x02,0xfd, 0x01,0x12, 0x03,0x00, 0x01,0x52, 0x01,0xfd, -0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x02,0x81, 0x02,0x00, 0x01,0x81, -0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0xf9, -0x01,0xfd, 0x01,0x12, 0x01,0x00, 0x01,0x81, 0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x81, -0x02,0xfd, 0x01,0x01, 0x03,0x00, 0x01,0xf9, 0x01,0xfd, -0x08,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x66]; -} num_9 = { -/* w */ 7, -/* h */ 11, -/* pixel_data */ -0x07,0xfd, -0x02,0xfd, 0x01,0x01, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, -0x01,0xfd, 0x01,0x12, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x12, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x26, 0x01,0x00, -0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0xf9, 0x01,0x00, -0x01,0xfd, 0x01,0x26, 0x01,0x00, 0x01,0x69, 0x01,0x81, 0x02,0x00, -0x02,0xfd, 0x01,0x26, 0x01,0x00, 0x01,0x2b, 0x01,0x26, 0x01,0x00, -0x05,0xfd, 0x01,0x01, 0x01,0x2b, -0x01,0xfd, 0x01,0x81, 0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x52, -0x01,0xfd, 0x01,0x69, 0x03,0x00, 0x01,0x26, 0x01,0xfd, -0x07,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x76]; -} num_a = { -/* w */ 10, -/* h */ 11, -/* pixel_data */ -0x0a,0xfd, -0x04,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0x81, 0x03,0xfd, -0x04,0xfd, 0x02,0x00, 0x01,0x12, 0x03,0xfd, -0x03,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, -0x03,0xfd, 0x01,0x2b, 0x01,0x12, 0x01,0x69, 0x01,0x00, 0x01,0x52, 0x02,0xfd, -0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x81, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, -0x02,0xfd, 0x01,0x12, 0x01,0x00, 0x02,0x01, 0x02,0x00, 0x02,0xfd, -0x02,0xfd, 0x01,0x00, 0x01,0x12, 0x03,0xf9, 0x01,0x00, 0x01,0x26, 0x01,0xfd, -0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0x2b, 0x01,0x01, 0x04,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0x81, -0x0a,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x68]; -} num_b = { -/* w */ 7, -/* h */ 11, -/* pixel_data */ -0x07,0xfd, -0x01,0xfd, 0x04,0x00, 0x01,0x2b, 0x01,0x81, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x01, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x00, 0x01,0x01, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0x81, -0x01,0xfd, 0x04,0x00, 0x01,0xf9, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x26, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x01, 0x01,0x00, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, -0x01,0xfd, 0x05,0x00, 0x01,0x81, -0x07,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x5a]; -} num_c = { -/* w */ 9, -/* h */ 11, -/* pixel_data */ -0x09,0xfd, -0x03,0xfd, 0x01,0x01, 0x04,0x00, 0x01,0xf9, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x81, 0x03,0xfd, 0x01,0x69, -0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x69, 0x05,0xfd, -0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x06,0xfd, -0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x06,0xfd, -0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x06,0xfd, -0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x69, 0x05,0xfd, -0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x81, 0x02,0xfd, 0x01,0x81, 0x01,0x52, -0x03,0xfd, 0x01,0x01, 0x04,0x00, 0x01,0x52, -0x09,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x7c]; -} num_d = { -/* w */ 10, -/* h */ 11, -/* pixel_data */ -0x0a,0xfd, -0x01,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0x81, 0x02,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x81, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x00, 0x01,0x12, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, -0x01,0xfd, 0x05,0x00, 0x01,0x01, 0x01,0x81, 0x02,0xfd, -0x0a,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x48]; -} num_e = { -/* w */ 7, -/* h */ 11, -/* pixel_data */ -0x07,0xfd, -0x01,0xfd, 0x05,0x00, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, -0x01,0xfd, 0x04,0x00, 0x01,0x12, 0x01,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, -0x01,0xfd, 0x05,0x00, 0x01,0x52, -0x07,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x46]; -} num_f = { -/* w */ 6, -/* h */ 11, -/* pixel_data */ -0x06,0xfd, -0x01,0xfd, 0x05,0x00, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, -0x01,0xfd, 0x04,0x00, 0x01,0x12, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, -0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, -0x06,0xfd -}; - - -static const struct { - unsigned int num_w; - unsigned int num_h; - unsigned char num_pixel_data[0x2e]; -} num_colon = { -/* w */ 4, -/* h */ 11, -/* pixel_data */ -0x04,0xfd, -0x04,0xfd, -0x04,0xfd, -0x01,0xfd, 0x01,0x69, 0x01,0x01, 0x01,0xfd, -0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0xfd, -0x04,0xfd, -0x04,0xfd, -0x04,0xfd, -0x01,0xfd, 0x01,0x81, 0x01,0xf9, 0x01,0xfd, -0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0xfd, -0x04,0xfd -}; - - diff --git a/osfmk/console/panic_ui/genimage.c b/osfmk/console/panic_ui/genimage.c deleted file mode 100644 index 533e6b32b..000000000 --- a/osfmk/console/panic_ui/genimage.c +++ /dev/null @@ -1,1621 +0,0 @@ -/* converts a QT RAW image file into the c structure that the - * kernel panic ui system expects. - * - * to build: cc -o genimage genimage.c -*/ - -#include -#include -#include -#include -#include -#include - -int EncodeImage( - unsigned char * data, - int pixels, - unsigned char * fileArr ); -int decode_rle( - unsigned char * dataPtr, - unsigned int * quantity, - unsigned int * depth, - unsigned char ** value ); -int findIndexNearMatch( - unsigned int color24 ); -unsigned char findIndexMatch( - unsigned int color24 ); -int convert24toGrey( - unsigned char * data, - unsigned int size ); -int convert8toGrey( - unsigned char * data, - unsigned int size ); -int convert8bitIndexto24( - unsigned char * data, - int height, - int width, - unsigned char ** dout ); -int convert8bitIndexto8( - unsigned char * data, - int height, - int width, - unsigned char ** dout ); -int convert24to8bitIndex( - unsigned char * data, - int height, - int width, - unsigned char ** dout ); -unsigned int * CreateCLUTarry( - unsigned char * raw_clut ); -unsigned int * ReplaceCLUT( - char * iname ); -void GenerateCLUT( - char * oname ); -void WriteQTRawFile( - FILE * ostream, - unsigned char * data, - int height, - int width, - int depth, - unsigned int size ); -void CreateRawQTFont( - void ); -void CreateRawQTCLUT( - int type ); - -#define offsetof(type, field) ((size_t)(&((type *)0)->field)) - -struct panicimage { - unsigned int pd_sum; - unsigned int pd_dataSize; - unsigned int pd_tag; - unsigned short pd_width; - unsigned short pd_height; - unsigned char pd_depth; - unsigned char pd_info_height; - unsigned char pd_info_color[2]; - unsigned char data[]; -}; - - - - -void -usage( int type ) { -printf( -"\n" -"Usage:\n" -"\tgenimage -i <.qtif> [operands ...]\n\n" -"\tThe following operands are available\n\n" -"\t-h\t\tDisplay full help information\n" -"\t-i \tUse file containing QuickTime uncompressed raw image as\n" -"\t\t\tthe panic dialog (8 or 24 bit)\n" -"\t-o \tWrite the output as a compressed WHD RAW image suitable\n" -"\t\t\tfor loading into the kernel\n" -"\t-c \tUse file containing 256 RGB values for 8-bit indexed \n" -"\t\t\tlookups, overrides built-in appleClut8\n" -"\t-fg \tForeground color of font used for panic information in\n" -"\t\t\t24-bits, default 0xFFFFFF (100%% white)\n" -"\t-bg \tBackground color of font used for panic information in\n" -"\t\t\t24-bits, default 0x222222 (13%% white, dark gray)\n" -"\t-n \tNumber of lines that have been reserved to display the\n" -"\t\t\tpanic information, must be at least 20\n" -"\n\tThese are useful options for testing\n" -"\t-io \tUse to override the default C source filename\n" -"\t-bw\t\tConvert the input image to shades of gray\n" -"\t-n24\t\tConvert an image from 8 bit to 24 bit mode before\n" -"\t\t\tprocessing\n" -"\t-n8\t\tDon't convert an image from 24 bit to 8 bit mode before \n" -"\t\t\tprocessing, default is to convert\n" -"\t-qt \t(requires -i) Write QuickTime uncompressed raw .gtif\n" -"\t\t\tfile containing the input image in 8-bit format\n" -"\t-r\t\tCreate a Quicktime uncompressed image of the 8-bit\n" -"\t\t\tsystem CLUT named appleclut8.qtif \n" -"\t-f\t\tCreate a Quicktime uncompressed image of the 8x16\n" -"\t\t\tbit panic info font named font.qtif \n" -"\n\n" ); -if ( type > 0 ) -printf( -"\ -This utility is used to convert a panic dialog from .qtif format, into\n\ -one that is suitable for the kernel to display. The .qtif image file\n\ -can be in either 24 or 8 bit mode, but must be in an uncompressed raw\n\ -format. 8 bit mode is preferred, as it requires no conversion to the\n\ -colors that are contained in the CLUT. If a color cannot be found in\n\ -the CLUT, it will be converted to the nearest gray. The default CLUT\n\ -is the same as the system CLUT. If needed, this can be overridden by\n\ -providing a new CLUT with the -c option.\n\ -\n\ -However, if you override the default CLUT. The panic UI may not appear\n\ -as you intended, when the systme is in 8 bit mode. Colors that are not\n\ -present in the active CLUT, will be converted to the nearest gray.\n\ -\n\ -The panic dialog must have a number of lines reserved at the bottom for\n\ -displaying additional panic information. The minimum number of lines\n\ -is 20. The font use to display this information needs to have the\n\ -foreground and background colors defined. The defaults are full white\n\ -on dark gray. This can be changed by using the -fg and/or -bg options to\n\ -provide new 24 bit colors. These colors must be contained in the CLUT.\n\ -\n\ -There are two possible output results. The default is to create a C\n\ -source file named panic_image.c that contains the panic image in a 8 bit\n\ -modified RLE compressed format and the CLUT that was used to create the\n\ -image. The second possibility is to create a binary version of the same\n\ -information by using the -o option. This file can then be used to replace\n\ -the panic dialog that is currently active in the kernel by using\n\ -sysctl(KERN_PANIC_INFO).\n\ -\n\n"); -} - - -#include "appleclut8.h" -#include "../iso_font.c" - -struct QTHeader { - long idSize; /* total size of ImageDescription including extra data ( CLUTs and other per sequence data ) */ - long cType; /* 'raw '; what kind of codec compressed this data */ - long resvd1; /* reserved for Apple use */ - short resvd2; /* reserved for Apple use */ - short dataRefIndex; /* set to zero */ - short version; /* which version is this data */ - short revisionLevel; /* what version of that codec did this */ - long vendor; /* whose codec compressed this data */ - long temporalQuality; /* what was the temporal quality factor */ - long spatialQuality; /* what was the spatial quality factor */ - short width; /* how many pixels wide is this data */ - short height; /* how many pixels high is this data */ - long hRes; /* horizontal resolution */ - long vRes; /* vertical resolution */ - long dataSize; /* if known, the size of data for this image descriptor */ - short frameCount; /* number of frames this description applies to */ - char name[32]; /* name of codec ( in case not installed ) */ - short depth; /* what depth is this data (1-32) or ( 33-40 grayscale ) */ - short clutID; /* clut id or if 0 clut follows or -1 if no clut */ -} image_header; - -static unsigned int mismatchClut[256]; -static int nextmis = -1, neargrey = 0, cvt2grey = 0, exactmatch=0; -static int grey = 0, debug = 0, testfont = 0, testclut = 0; -static int convert = 8; // default is to convert image to 8 bit uncompressed .tgif -static unsigned char fg, bg; -unsigned int * panic_clut = NULL; -static char * clutin = NULL; - -union colors { - unsigned int c24; - unsigned char rgb[4]; - struct { - unsigned char dummy; - unsigned char red; - unsigned char green; - unsigned char blue; - } clut; -}; - -int -main( int argc, char *argv[] ) -{ - char *file = NULL; - char *out = NULL; - char *kraw = NULL; - char *qtraw = NULL; - char *clutout = NULL; - char *whdname = NULL; - FILE * stream, *out_stream; - unsigned char * data; - unsigned short width = 0, height = 0; - unsigned char depth = 0, lines = 20; - unsigned int i, pixels, sum, encodedSize, fg24= 0xFFFFFF, bg24=0x222222; - unsigned char *fileArr; - int chars_this_line, next, runindex; - - - // pull apart the arguments - for( next = 1; next < argc; next++ ) - { - if (strcmp(argv[next], "-i") == 0) // image file in raw QT uncompressed format (.qtif) - file = argv[++next]; - - else if (strcmp(argv[next], "-o") == 0) // output file for WHD image - kraw = argv[++next]; - else if (strcmp(argv[next], "-io") == 0) // output file for image - out = argv[++next]; - - else if (strcmp(argv[next], "-n") == 0) // numbers of reserved lines - lines = atoi(argv[++next]); - else if (strcmp(argv[next], "-fg") == 0) // foreground color in 24 bits - sscanf(argv[++next], "%i", &fg24); - else if (strcmp(argv[next], "-bg") == 0) // background color in 24 bits - sscanf(argv[++next], "%i", &bg24); - else if (strcmp(argv[next], "-c") == 0) // input file for clut - clutin = argv[++next]; - else if (strcmp(argv[next], "-h") == 0) // display more help - { usage(1); exit(1); } - - // useful testing options - else if (strcmp(argv[next], "-co") == 0) // output file for generating appleClut8.h array included in this file - clutout = argv[++next]; - else if (strcmp(argv[next], "-a8") == 0) // output file for testing system CLUT 8 in QT RAW (test) - testclut = 8; - else if (strcmp(argv[next], "-r") == 0) // output file for QT clut RAW (test) - testclut = 1; - else if (strcmp(argv[next], "-qt") == 0) // output file for QT RAW (test) - qtraw = argv[++next]; - else if (strcmp(argv[next], "-bw") == 0) // use only shades of grey (test) - grey = 1; - else if (strcmp(argv[next], "-n8") == 0) // don't convert to 8 by default (test) - convert = 0; - else if (strcmp(argv[next], "-n24") == 0) // convert to 8 to 24 (test) - convert = 24; - else if (strcmp(argv[next], "-f") == 0) // test font (test) - testfont = 1; - else if (strcmp(argv[next], "-w") == 0) // read WHD raw file and output 8 bit tqif - whdname = argv[++next]; - - else if (strcmp(argv[next], "-debug") == 0) // verbose - debug++; - } - - if (!(file || clutout || testfont || testclut || whdname) ) { - usage(0); - exit(1); - } - - printf("\n"); - - panic_clut = appleClut8; - - if ( clutin ) - { - panic_clut = ReplaceCLUT( clutin ); - printf("Built-in CLUT has been replaced with %s...\n", clutin); - } else - { - if ( whdname ) - printf("Using CLUT from %s...\n", whdname); - else - printf("Using Built-in CLUT...\n"); - } - - if ( clutout ) - { - GenerateCLUT( clutout ); - printf("Created C source file of %s...\n", clutout); - } - - fg = findIndexNearMatch(fg24); - bg = findIndexNearMatch(bg24); - - if ( testclut ) - CreateRawQTCLUT(testclut); - - if ( testfont ) - CreateRawQTFont(); - - // Begin to process the image - - if( file == NULL) - { - if ( whdname == NULL ) - { - if ( debug) - printf("No image file was processed...\n\n"); - exit(0); - } - } - - - printf("Verifing image file...\n"); - if ( file != NULL ) - { - stream = fopen(file, "r"); - if (!stream) { - fprintf(stderr, "Err: could not open .qtif image file.\n\n"); - exit(1); - } - - { - long hdr_off; - long hdr_type; - - fread((void *) &hdr_off, sizeof(long), 1, stream); - fread((void *) &hdr_type, sizeof(long), 1, stream); - - if ( hdr_type != 'idat' ) goto errQTimage; - - fseek(stream, hdr_off, SEEK_SET); - fread((void *) &hdr_off, sizeof(long), 1, stream); - fread((void *) &hdr_type, sizeof(long), 1, stream); - - if ( hdr_type != 'idsc' ) goto errQTimage; - - fread((void *) &image_header, sizeof(image_header), 1, stream); - if ( image_header.cType != 'raw ' ) goto errQTimage; - if (( image_header.depth != 8 ) && ( image_header.depth != 24 )) goto errQTimage; - - width = image_header.width; - height = image_header.height; - depth = image_header.depth; - - printf("Image info: width: %d height: %d depth: %d...\n", width, height, depth); - - if (!(width && height && depth)) { - fprintf(stderr,"Err: Invalid image file header (width, height, or depth is 0)\n"); - exit(1); - } - } - - if ( !(data = (char *)malloc(image_header.dataSize))) { - fprintf(stderr,"Err: Couldn't malloc file data (%ld bytes)... bailing.\n", image_header.dataSize); - exit(1); - } - - // Read the image data - fseek(stream, 8, SEEK_SET); - fread((void *) data, image_header.dataSize, 1, stream); - fclose( stream ); - - if ( kraw && image_header.depth == 24 ) - { - fprintf(stderr, "Err: The WHD raw file (%s) will not be created when input in is millions of colors\n", kraw); - kraw = NULL; - } - - pixels = image_header.dataSize; - - if ( image_header.depth == 24 ) - { - if ( grey == 1 ) - pixels = convert24toGrey( data, image_header.dataSize); - - if ( convert == 8 ) - { - printf("Converting image file to 8 bit...\n"); - pixels = convert24to8bitIndex( data, height, width, &data ); - image_header.dataSize = pixels; - depth = 1; - } else - depth = 3; - } else { - if ( grey == 1 ) - pixels = convert8toGrey( data, image_header.dataSize ); - - if ( convert == 24 ) - { - printf("Converting image file to 24 bit...\n"); - pixels = convert8bitIndexto24( data, height, width, &data ); - image_header.dataSize = pixels; - depth = 3; - } else - { - printf("Converting image file to 8 bit raw...\n"); - pixels = convert8bitIndexto8( data, height, width, &data ); - image_header.dataSize = pixels; - depth = 1; - } - } - - printf("Converted %d pixels%s...\n", pixels/depth, ((grey==1)?" to grayscale":"")); - if ( exactmatch > 0 ) - printf("Found %d color mathces in CLUT...\n", exactmatch); - if ( cvt2grey > 0 ) - printf("Converted %d colors to gray...\n", cvt2grey); - if ( neargrey > 0 ) - printf("Adjusted %d grays to best match...\n", neargrey); - if ( nextmis > 0 ) - printf("Total of %d seperate color mismatches...\n", nextmis); - } - else - { - unsigned int pixels_out; - struct panicimage image; - - stream = fopen(whdname, "r"); - if (!stream) { - fprintf(stderr, "Err: could not open WHD raw image file.\n\n"); - exit(1); - } - - fread(&image, sizeof(image), 1, stream); - - if ( image.pd_tag != 'RNMp' ) - goto errWHDimage; - - if ( image.pd_depth != 1 ) - goto errWHDimage; - - width = image.pd_width; - height = image.pd_height; - depth = image.pd_depth; - - printf("Image info: width: %d height: %d depth: %d...\n", image.pd_width, image.pd_height, image.pd_depth); - - if (!(width && height && depth)) { - fprintf(stderr,"Err: Invalid image file header (width, height, or depth is 0)\n"); - exit(1); - } - - if ( !(fileArr = (char *)malloc(image.pd_dataSize))) { - fprintf(stderr,"Err: Couldn't malloc file data (%ld bytes)... bailing.\n", image.pd_dataSize); - exit(1); - } - - /* read the data into a buffer */ - fread(fileArr, image.pd_dataSize, 1, stream); - fclose(stream); - - encodedSize = image.pd_dataSize - (256 * 3); - - for(sum=0,i=0; i= pixels ) - { - printf("Skipping encoding...\n"); - } - - for (sum=0,i=0; i= encodedSize) // this is the last element - break; - - if(chars_this_line >= 80) { - fprintf( out_stream, "\n"); - chars_this_line = 0; - } - } - - - if (debug) - { - printf("Encoded size = %d\n", encodedSize); - printf("Decoded size = %d\n", pixels); - } - - fprintf(out_stream, "\n\n"); - for ( i=0; i<256; i+=4) - { - union colors c; - - if ( (i % 16) == 0 ) fprintf(out_stream, "// %02X\n", i); - c.c24 = panic_clut[i+0]; - fprintf(out_stream, "\t0x%02X,0x%02X,0x%02X, ", c.clut.red, c.clut.green, c.clut.blue); - c.c24 = panic_clut[i+1]; - fprintf(out_stream, "0x%02X,0x%02X,0x%02X, ", c.clut.red, c.clut.green, c.clut.blue); - c.c24 = panic_clut[i+2]; - fprintf(out_stream, "0x%02X,0x%02X,0x%02X, ", c.clut.red, c.clut.green, c.clut.blue); - c.c24 = panic_clut[i+3]; - fprintf(out_stream, "0x%02X,0x%02X,0x%02X%s\n", c.clut.red, c.clut.green, c.clut.blue, ((i!=(256-4))?",":"")); - } - - fprintf(out_stream, "}\n"); - fprintf(out_stream, "};\n"); - - fclose( out_stream ); - -leaveOK: - printf("\n"); - return 0; - -errQTimage: - fprintf(stderr,"Err: Image must be in the QuickTime Raw Uncompressed Millions or 256 Colors format\n"); - exit(1); -errWHDimage: - fprintf(stderr,"Err: Image must be in the WHD Raw 256 Colors format\n"); - exit(1); -} - - - -#define RUN_MAX ((1<<20)-1) - -union RunData { - unsigned int i; - unsigned char c[4]; -}; - -unsigned int encode_rle( - unsigned char * fileArr, - unsigned int filePos, - unsigned int quantity, - union RunData * value, - int depth); - -int -compareruns( unsigned char * data, unsigned int * index, unsigned int max, union RunData * currP, int * depth ) -{ - unsigned int i = *index; - union RunData * nextP; - static int retc = 0; - - if ( currP == NULL || data == NULL ) - { - retc = 0; - goto Leave; - } - - if ( (*index+*depth) > max ) - { - *depth = 1; - retc = 0; - goto Leave; - } - - nextP = (union RunData *) &data[*index]; - - if ( retc == 1 ) - { - // check current data against current depth - switch ( *depth ) - { - case 1: - if ( nextP->c[0] == currP->c[0] ) - goto Leave; - break; - case 2: - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] ) - goto Leave; - break; - case 3: - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] && - nextP->c[2] == currP->c[2] ) - goto Leave; - break; - case 4: - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] && - nextP->c[2] == currP->c[2] && - nextP->c[3] == currP->c[3] ) - goto Leave; - break; - } - - retc = 0; - goto Leave; - } - - // start of a new pattern match begine with depth = 1 - - if ( (*index+6) <= max ) - { - // We have at least 8 bytes left in the buffer starting from currP -#if 1 - nextP = (union RunData *) &data[*index+3]; - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] && - nextP->c[2] == currP->c[2] && - nextP->c[3] == currP->c[3] ) - { - // check if they are all the same value - if ( currP->c[0] == currP->c[1] && - currP->c[1] == currP->c[2] && - currP->c[2] == currP->c[3] ) - { // if so, leave at depth = 1 - retc = 1; - *depth = 1; - goto Leave; - } - - if (debug>2) printf("Found 4 at %x\n", *index); - retc = 1; - *depth = 4; - *index += 3; - goto Leave; - } - - nextP = (union RunData *) &data[*index+2]; - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] && - nextP->c[2] == currP->c[2] ) - { - // check if they are all the same value - if ( currP->c[0] == currP->c[1] && - currP->c[1] == currP->c[2] ) - { // if so, leave at depth = 1 - retc = 1; - *depth = 1; - goto Leave; - } - - if (debug>2) printf("Found 3 at %x\n", *index); - retc = 1; - *depth = 3; - *index += 2; - goto Leave; - } - - nextP = (union RunData *) &data[*index+1]; - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] ) - { - // check if they are all the same value - if ( currP->c[0] == currP->c[1] ) - { // if so, leave at depth = 1 - retc = 1; - *depth = 1; - goto Leave; - } - - if (debug>2) printf("Found 2 at %x\n", *index); - retc = 1; - *depth = 2; - *index += 1; - goto Leave; - } - -#endif - nextP = (union RunData *) &data[*index]; - - } - - if ( nextP->c[0] == currP->c[0] ) - retc = 1; - else - retc = 0; - -Leave: - - if ( retc == 1 ) - *index += *depth; - - return retc; -} - -int -EncodeImage( unsigned char * data, int pixels, unsigned char * fileArr ) -{ - union RunData * currP, * norunP ; - int i, match, depth; - unsigned int filePos, run, nomatchrun; - - currP = NULL; - norunP = NULL; - nomatchrun = 0; - filePos = 0; // position in the file we're writing out - run = 1; - depth = 1; - - currP = (union RunData *)&data[0]; // start a new run - for (i=1; i 2 ) - { - unsigned char * p = (unsigned char *)norunP; - - if( nomatchrun ) - { - while (nomatchrun) - { - int cnt; - - cnt = (nomatchrun > 127) ? 127 : nomatchrun; - fileArr[filePos++] = cnt; - nomatchrun -= cnt; - - while ( cnt-- ) - fileArr[filePos++] = *p++; - } - } - - filePos += encode_rle(fileArr, filePos, run, currP, depth); - - norunP = NULL; - } - else - { - nomatchrun+=run; - } - - currP = (union RunData *)&data[i]; // start a new run - - if( norunP == NULL ) - { - nomatchrun = 0; - norunP = currP; - } - - depth = 1; // switch back to a single byte depth - run = 1; // thee is always at least one entry - i++; // point to next byte - } - } - - if( nomatchrun ) - { - unsigned char * p = (unsigned char *)norunP; - while (nomatchrun) - { - int cnt; - - cnt = (nomatchrun > 127) ? 127 : nomatchrun; - fileArr[filePos++] = cnt; - nomatchrun -= cnt; - - while ( cnt-- ) - fileArr[filePos++] = *p++; - } - } - - // write out any run that was in progress - if (run > 0) { - filePos += encode_rle(fileArr, filePos, run, currP, depth); - } - - return filePos; -} - -/* encode_rle applies a "modified-RLE encoding to a given image. The encoding works as follows: - - The quantity is described in the first byte. If the MSB is zero, then the next seven bits - are the quantity. If the MSB is set, bits 0-3 of the quantity are in the least significant bits. - If bit 5 is set, then the quantity is further described in the next byte, where an additional - 7 bits (4-10) worth of quantity will be found. If the MSB of this byte is set, then an additional - 7 bits (11-17) worth of quantity will be found in the next byte. This repeats until the MSB of - a quantity byte is zero, thus ending the chain. - - The value is described in the first byte. If the MSB is zero, then the value is in the next byte. - If the MSB is set, then bits 5/6 describe the number of value bytes following the quantity bytes. - - encodings are: (q = quantity, v = value, c = quantity continues) - - Byte 1 Byte 2 Byte 3 Byte 4 Byte 5 Byte 6 Byte 7 Byte 8 - case 1: [ 0 q6-q0 ] [ v7-v0 ] - case 2: [ 1 0 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] - case 3: [ 1 0 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] - case 4: [ 1 1 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] - case 5: [ 1 1 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] -*/ - -unsigned int -encode_length(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, unsigned int mask) -{ - unsigned char single_mask = 0x0F; - unsigned char double_mask = 0x7F; - unsigned int slots_used = 0; - - fileArr[filePos] = mask | (quantity & single_mask); // low bits (plus mask) - slots_used++; - - if (quantity >>= 4) - { - fileArr[filePos++] |= 0x10; // set length continuation bit - fileArr[filePos] = quantity & double_mask; - slots_used++; - - while (quantity >>= 7) - { - fileArr[filePos++] |= 0x80; // set length continuation bit - fileArr[filePos] = quantity & double_mask; - slots_used++; - } - } - - return slots_used; -} - - -unsigned int -encode_rle(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, union RunData * value, int depth) -{ - unsigned char single_mask = 0x0F; - unsigned char double_mask = 0x7F; - unsigned char slots_used = 0; - - - switch ( depth ) - { - case 1: - slots_used += encode_length( fileArr, filePos, quantity, 0x80 ); - fileArr[filePos+slots_used++] = value->c[0]; - break; - - case 2: - slots_used += encode_length( fileArr, filePos, quantity, 0xA0 ); - fileArr[filePos+slots_used++] = value->c[0]; - fileArr[filePos+slots_used++] = value->c[1]; - break; - - case 3: - slots_used += encode_length( fileArr, filePos, quantity, 0xC0 ); - fileArr[filePos+slots_used++] = value->c[0]; - fileArr[filePos+slots_used++] = value->c[1]; - fileArr[filePos+slots_used++] = value->c[2]; - break; - - case 4: - slots_used += encode_length( fileArr, filePos, quantity, 0xE0 ); - fileArr[filePos+slots_used++] = value->c[0]; - fileArr[filePos+slots_used++] = value->c[1]; - fileArr[filePos+slots_used++] = value->c[2]; - fileArr[filePos+slots_used++] = value->c[3]; - break; - } - - return slots_used; -} - -int -decode_rle( unsigned char * dataPtr, unsigned int * quantity, unsigned int * depth, unsigned char ** value ) -{ - unsigned int mask; - int i, runlen, runsize; - - i = 0; - mask = dataPtr[i] & 0xF0; - - if ( mask & 0x80 ) - { - runsize = ((mask & 0x60) >> 5) + 1; - runlen = dataPtr[i++] & 0x0F; - - if ( mask & 0x10 ) - { - int shift = 4; - - do - { - mask = dataPtr[i] & 0x80; - runlen |= ((dataPtr[i++] & 0x7F) << shift); - shift+=7; - } while (mask); - } - } else - { - runlen = 1; - runsize = dataPtr[i++]; - } - - *depth = runsize; - *quantity = runlen; - *value = &dataPtr[i]; - - return i+runsize; -} - -int -findIndexNearMatch( unsigned int color24 ) -{ - union colors color8; - union colors clut8; - int isGrey = 0; - - color8.c24 = color24; - - if ( color8.clut.red == color8.clut.green && color8.clut.green == color8.clut.blue ) - isGrey = 1; - - if ( isGrey ) { - int i; - unsigned int bestIndex = 0, rel, bestMatch = -1; - - for (i=0; i<256; i++) - { - clut8.c24 = panic_clut[i]; - - if ( clut8.clut.red != clut8.clut.green || clut8.clut.green != clut8.clut.blue ) - continue; - - if ( clut8.clut.red > color8.clut.red) continue; - rel = abs(color8.clut.red - clut8.clut.red); - if ( rel < bestMatch ) { - bestMatch = rel; - bestIndex = i; - } - } - - return bestIndex; - } - - // we must have a non-grey color - return -1; -} - -unsigned int -color24toGrey( unsigned int color24 ) -{ - float R, G, B; - float Grey; - union colors c; - unsigned char grey8; - unsigned int grey24; - - c.c24 = color24; - - R = (c.clut.red & 0xFF) ; - G = (c.clut.green & 0xFF) ; - B = (c.clut.blue & 0xFF) ; - - Grey = (R*.30) + (G*.59) + (B*.11); - grey8 = (unsigned char) ( Grey + .5); - grey24 = (grey8<<16) | (grey8<<8) | grey8; - return grey24; -} - -int -convert24toGrey( unsigned char * data, unsigned int size ) -{ - float R, G, B; - float Grey; - unsigned int grey8; - int i24; - - - for ( i24=0; i24 c.rgb[2] && c.rgb[1] > c.rgb[3] ) - prim = 1; - else if ( c.rgb[2] > c.rgb[1] && c.rgb[2] > c.rgb[3] ) - prim = 2; - else if ( c.rgb[3] > c.rgb[1] && c.rgb[3] > c.rgb[2] ) - prim = 3; - else if ( c.rgb[1] == c.rgb[2] && c.rgb[1] == c.rgb[3] ) - prim = 0; // gray - else if ( c.rgb[1] == c.rgb[2] ) - prim = 0x12; // red green - else if ( c.rgb[1] == c.rgb[3] ) - prim = 0x13; // red blue - else if ( c.rgb[2] == c.rgb[3] ) - prim = 0x23; // green blue - else - printf("cannot tell color %06x\n", color24); - - last_c = color24; - last_p = prim; - - if ( prim == 0 || prim > 3 ) - { - last_co = -1; - return last_co; - } - -#if 0 - for (i=0; i<256; i++) - { - - break; - } -#endif - - return -1; -} - - -unsigned char -findIndexMatch( unsigned int color24 ) -{ - int i; - unsigned char ri; - static unsigned char last = 0; - -retry: - if ( panic_clut[last] == color24 ) - { - exactmatch++; - return last; - } - - for (i=0; i<256; i++) - { - if ( panic_clut[i] == color24 ) { - last = i; - exactmatch++; - return last; - } - } - - if ( nextmis == -1 ) { - for (i=0; i<256; i++) mismatchClut[i] = -1; - nextmis = 0; - } - - i = findIndexNearMatch(color24); - - if ( i == -1 ) // found a color that is not grey - { - unsigned int colormatch = findColor24NearMatch( color24 ); - - if ( colormatch == -1 ) // cannot convert color - { - cvt2grey++; - if (debug>1) printf("color %06X not matched at all\n", color24); - color24 = color24toGrey(color24); - if (debug>1) printf("now grey %06X\n", color24); - } - else - color24 = colormatch; - - goto retry; - } - - if (debug>1) printf("color %06X now matched at %x\n", color24, i); - - ri = i; - - neargrey++; - - // keep track of missed repeats - for ( i=0; i= 256) ) - { - fprintf(stderr,"Err: Too many color mismatches detected with this CLUT\n"); - exit(1); - } - - return ri; -} - -/* - * Convert 24 bit mode to 8 bit. We do not do any alignment conversions - */ - -int -convert24to8bitIndex( unsigned char * data, int height, int width, unsigned char ** dout ) -{ - unsigned int row, col, i, i24, i8, size; - unsigned char index; - unsigned char * ddata; - union colors color24; - - size = height * width; - - ddata = (unsigned char *) calloc( size, 1); - - for (i24=0,i8=0,row=0; row=0; j--) - { - if ( bits & 0x80) - fonts[row][i][j] = fg; - else - fonts[row][i][j] = bg; - bits <<= 1; - } - } - } - - WriteQTRawFile( ostream, (unsigned char *) fonts, 16, 256*8, 1, 16*256*8 ); - fclose(ostream); -} diff --git a/osfmk/console/panic_ui/images/panic_dialog.tiff b/osfmk/console/panic_ui/images/panic_dialog.tiff deleted file mode 100644 index f5c96eceb208c0d057adf11cbd17b8a8714d8042..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 156664 zcmeIbYm^(=bsmaBgF=&NOP~$Pij7uUltjof$?gUpO~M$CFeFD!dpHBlw4{+7#KDj~ zoQA;ep6;gjm>GfdBxlGO&Vw8duJds2ue_Y)B)|NKop{B0c*${A9N#?FO;(a0S&4I< zdmZH_&We|n@7w2`s#Apmy3j;7*<>BE3stA~dF}7K>g==kIe9Yi#Y7@8ocLtoj}s}} zIr!sch(FGEdC~WyxKFBk&?lTBeGg*iWBMMUob4u_Q@`N+phdI8y8ooQcNpgnYx-m& z@BBz&i%Q=4r1PWh{imHT=&(u9PbP3pd>VJVoTA+4aDUAC5@{scPbYtcMJUfD66vR3 z{bh+CVUGU#8z<;L`m4DA%`0;z-BaJavYbdK63H(l67{QyJCWG-N0$?cLe~6NPA@z& zJ63&eVP#?YxrN2CqfhM{J6S(9H_zupabn_7X|hb5r`+xk$fAmNH z>wo>v|M`c1_&@%~fBSFW``&;3uYd3d|K-2@{_p?i|NMKu_n-dLfA|mo{@?$*fA??y z?Z5dq|N3A5tAF(`|K-2<7yta9|L*Vpvw!wGzw=N3=|A}=zx~_)_#gk)Z+-W>zxkX0 z=pX&VfA|mn!QcP;zwsM?@9+Kkum7FD^J~BMtH1gyzw*n!{7b*|i@*4{{?_08o4@c2 zfBmoj{LlZ@zxr4H%3uD=f8j6u`9J^X{@kDavp@T@?e^{4AAWf2*3FymzklP#JMX;x z_Vw#;zIpZPl`C(&@%rnpz4q#>moK+kFT8N+(#4AxE}TDq?%dMS^Up6XE-cK?pFP`b z&dojh?CH~|PCfI?Pyh5czxj=CJpJ_7zy7ta&CGn|E7j@~Pdxtk$&-&g_ESIglRtUl z#FxMP=%dGvA3JvR=p&C*Dqs52!w(-h^3X%&a;bFq@S#IfQwI+oIIw^JzJ0~w+o`&aWRhIdO9F=utbFUCooY~e`eb$N z=)%(M*#5nfdrnR4J2m+0v889@S~$>7kv^t*k7T#>dZ}Kfm|8i*63*m-h-1drvK#9mfmgZo;2Cc$SVX&DK{ImSz?fnn!w08p@2rNA5AG z?7o^kR$rMt!uiFX$>N@g;*6PJ?7@s!*^5!)uqcCZpw^wPuhb*O zV$-ez7$;BmhU4tn@vwTfyz=C%SEWONT%NhOI6MC2?DE37rBkyY_i8ok4FEZLvUF_W z)H&A76UUC6J2!W_wEy^l%EZL}!;c<5I(h8aBpL{&LHj3M z)0EI|RxF^fs5BSM%)Y~q9-o{%-o1bw;k?Xv(Vh95`MH%NMV(r}I~eM56zkiw$L7w=E+fo=gS(3drto(Ne}{J$4;FV9r}pCyeuwt& zKCo{Bf0MX}|Dk=mi-!;H-ZwG1d*9?C{2ktXpoqVHMff3}sR{gnc8bpj@E(8r_U|kH z#G%Op`zuqA9GZCK*ztqM4jw$Z@6e&iM-Lu5e(3Q2;{N@H!@isavwec2#MkFeuRM2T z|3Sl8%QKYzvDx`E2v9t9K=VcdXy0InN9I))BFBL^EpR<@$EX|!dF~2gq zguuG`?$haYrwGQS7(iB3=Pt}P-3Ac!*Wm^>LeE~9>(WyBrn9BewTsOvuXADC1$`*$a7DMVfATQms~|U16BCmMipTb0pr1Ovf8yZb{gvbBRs>8RVuP;ci)Q61d{lB@ zcUE}!RY__a=6RS;mdC@qtvcwvK5_JDb!p+*IrMIoMf7oFPk#9kJGk{eH2n13GUme< z$4ws>-tv_%K0)vaH12)_nK0~4*Ux#`UcWTP;zaslZp^NjU43z&FXFg%#q8>f1AP(4 zwJT;Z`XY{N zSIn-yIM5eyT)Sd+^~Hg{h~wH7v#T!-^hF%ku9#hYaiA~axOTWc$?5y!PFW>;Sv z=!>{#?b_yP70=E?mH0e#h7);MwxB_bLyw#|nW6;`7`^cMeO~;s|M3t0)c3~!?x+6Z z|M|k1pZ|^1zxzw1ANoDM`&1V3o^6hOdv=NXFc0m7YU|Fi+4)mYADut*(9W;UJi6zQ zs3(8s*b_%*zEM3sw%EkJ{;}#;A9?J=(XpL-#>YiDV0`@8%(1cRV<(=P8KZja`0>Yg zj@hcjb*d1E1}zAe$DvuUIJ>lR@i9cU2Lbk;UO5fT#IlOrmmTT2&jrPOy# zn61!@MD%B@u!Euwr^0>)1G_Io}Vd+Gg#M~l> z;iFe!MTne zBg2MqZxjlQ<6GT98!5UYx#g>KIy1L$rY;=cyxi_m28-A8^*zmdfZai0eY3sF>BhO` zmD$tud~){G%4V$jYbzjuvf2}#WIUhPi#qL%?5lWB?b{zlePHUK&)aGX8{ga_vIpJY zeO>{ACG1iI_Ds@b20wOWVr^ke~6|BT7S5_?m5nFQJg46?Yl)97 zH|I{xE`P20*fO3RUpial9lU=Ke`$F*amAlDQ|>KpPqfTey+KK+pXWm3U>%OS0=^VPo8<^jF=;{n7duY|Hzl!7YLGg zd|{rv-nz1|_{6!D2G{|tEyMa182rxurp3MWv@uHvvd)0>zk16}f zPhG?+*xBW=6A(u)EiRaLsq)8AiEXZ7I(Z!2h5T2seeWy6SLc)8_58eaPMur&wtjM) z-%2D_d-meE__q^@%9E&Izv%gW4P(+@8^B#F$ZD} z#2kn@5OW~rK+J)d12G3;4#XUYIS_Lo=0MDWm;(?Ls(_P^vm!H0CY?S2PcI@we)rfz^_c z>ZF}b!v<5u?RUX@U-g%l(b_;Tp5D)0bE4R9GR#QUhNU#gFf5&7wV2e^qjw(H`E4$Z zvi038-70O^(kquX8x>u&Hqa~1&UamNGAJx1o=_~&qVskW@66oB__UJ667$i@I!+eN zlPodJFJwx|Byr9s9d=Qrl#>BT4L@0x_k!a97Za8_&g3)bhBRd%>7?3K&=lI0w8JfYz=yP@B(KQ^$x}#27=`$%PD$7ER?a~T-up_*$+t3+3#XWIQs`7zFOX@KlUiE* zR3;cXoQI{!C(CFR0_Ag(1;&GNrfcn7jy?^LAiJ6ahzFU@ATk8Bc)Kt3#!xmFSUE~Hy6WU&guEOMAEwySAf4@vHZWnMEW zluT3Bzz|F`q-xY^?Hc;?0s^bjlb9tfWkC-7_(y~V%e?ONGik=)z0WjiPO92UArdE3 zZRf!=jiqF~g$RrAlT0uHx~jtgEyc2VW%L!IkZ!byC4Sq*WUGW2QYddRkL)TAsCZOl z$U@DOkp9d;5q~;%)PzO~1 zQOS6cIaC!=M`0o{(d#lwK2&B@wV=O5iFi__e7=-wu}9JzU}TazlSNZPzK~!p7l3KH zlFLOgqI>Re@Sl^3q-}LtFY!*WXY9pN~ zNfBciX5=*od}n7;K^LV-Kn~XFafo3gHbYs((7_Q5{Sq)OA)qRk44S0IqzCWJc!+_h z!N%)i+2(jzao-z8N+M3SAp?A}*vfIH#GJHJxmFt=Cz6ROJn394-F)J;uMoLRtC2&x zdb~ug^oZ$lfbnpcRFQ#HmyrHaoA6_}kV|32bW#`{vK;L(N6@?y;`O~KEX^E`(=?N~ z!4haVDR8Emc9P&&De2@)(*=289xEJFlmI4)J{A*C3{c5rA@9^w)QCZqsg>kVNJynk zb z%QdrZeF9e2+$`JjxwAgGiersA5OW~rz@RwLRlLw&0t1MqLFq{?(CK!{So+JCroJP= z%mB)^rcC!8NPUs?9n20u(V&S2i!N()g=8knr6u{MapN$bN3O@(ZbBwXd92BpW#8VB z2C~$fxXW85MD6l&P~O3eN<)ZT6NS2p+3?O;{xytaIzX}<%zL>%4sVuwVg-c~5o1~_vg{9lL=kLWm5WD&eCRY_ddVEmkdz z+3iGfVg!grgo+h9Sr=tcSAvOEJFKOOjJ23z|uxO zB9_cP87q;CV3R(4lIZ!$6rL|snkHKip>EsusmsTlbIZ>=51nn&IS(y7zwA8nqVwq0 zw?EJ_R=`CrLhQlaEOd<5q>H&SoPt>r)~|8z$t-nDUS5O}7n2uZ20cSrRL!K*z|s^) z%WX+&!b`T+<02VF z!n=i(GZ5W}+E%kPXcqn?Mp*n0i#TMS$jv0)>fRWaFE8u=#Q=p3td zY0E6NwAL+#qKwKS3N0CwHH!Sya$9<_EMBZCcLTj<(%7wnJ(UnLR#Qk&gpEF_q>ZJa zGT9PpK3G_~(%tNodheb zH>JD1%j{Ac)$Vd8?rQS+O8cX0s{#oCyJLx$E3IoE-OXYYvV|OH?GLzn@7432O)bP{ zbSA{p@;d+Odv}kMtx{pVhQ+&AsD4wv-deKoxU<`(*7bgqkLd@+fsfvNL47;k8HQfy zvoE}PSG(FlF=o@nbNlK=4~2o9M+ZBKHNME*}OTue(BDkiV(x4 z_yc9V>&UHF2KPbP8_}g#?uC`T_2%bYjQ{P8X_N}(OK;!BlHT`8zrNhNEe-hX^6U3K zNAFev-){{VtJR}xy??g~dw-{UeZ5u(dO>+%Mm)-8$4@yE#k@%-SWZhVg7f0;SClgjYY2Lmk2Jsg!*ds&m7 z8v?aL0|u1u-g_YmWMMS*@=Wul;@-`$_ddUw>28lNMBj4|q6VbZo&trs%a~GgO{;bkoAeq+&Bl6j7jLP#7POZsd9OjKat#hxwy@*2(-F&(d2z%27MWgU&klBI z?JAec*g2`)$d2YqUhJ!N)HZmC!1s9Zh8s_B?ZU2D=3PYxH>SIJ!+es>whPI-{krEp zv7~964reFwt_x*zc+`N@?KiO74guU|*>11aFn!*O-vZkBAIKvGY5S=r8ZYk%4lLiQz^7*BC6XNT{~h)MGn`x5?ejCPK%LJ zCPwYKt!QQ=NoQ!fIf{_jkUruh$D4vHnLj$b!vav>I#a{k*gT8?Cz(4MfvP&+9CCJ` z5=_H;d$YMC$tT9iSm_v@j$J*D3Q2PuJEzB+mmb4rWGX*^ofov4;5O3V))efaLY3iGJIt`19#|S^Dy9QRNkl&K<*pmHqqD9 zs%AzRjhp&K1J4j4Qx!)Kh0HIU(Jat(+DNHjnL7F0(5_~4s@@zKQTR27w>PJu7s56w zSSIn_b|+JXFTG?sR$(;on{lSf|%LbVX|w>@DLjfu*B39ZrsC?9Cw}RW6?HAOe{?nst&$N@LnLo-4;Z{E_X-L z%&73j#LftX5En+ohq8kYRfDBuon_bMWr3s|^z)Scq)5Ejx_t}_vQ|Ew;Y`v*MU!~q zBk{o3iign;)-eOL`k)onvIGTwM!Ai7&}B2qXois-OH^K3Vog}xODT+otg<5N+4p2@ zga)h}AF2yy$<-NjlXSe_0UU+n;=@Y_ma1LS`nSSzT(?WzHIFA%d z?Q%P?Y`*FmIW^jQHkLttppar8gIL)ViJ z(JpkyORHT5`N4vw^0aXbokGPjVTHyLU0fBft#+w>s>xC%3nIf;k6{D6lgb0bI$BW; z`gX|z{Ve01rk-kLG_PH1A{vRX=dqGIcOJ_pW8X7VxW1yXtYcO>l^khK55c333Fww7 z-tQpurS+6GhE0imRGKFnF8*$M^H_x?=dnq=Q)+vh>iXufDt5tgc)QG)^*k2cfHU+M zaiDggH;;Aa2Ylh<2TQ|Qc@bFJx_uf;yvmheWtWetT^gTCCQECohOdlgle_o54@=!} z*7BoQ@W=~bdB^VJ1_`HS9=OQ?w6Z^#T?7NhvZ)OlSos|IZ8crUdlSTgM}HS0eEp8w zkVG(OESnp!2dGjgu;0CJ+}i`%rZsRZm*d@eeO9tt(PbmO+wZ`!Y+ia$MANVwg^8Uz zs8D7nUVTE@ex)};(cebLa&;+;PW6?33ll;wqT7JK*ijAmt=pr0lwg-c#IA>D7@glF z#?(hJM3>ma;O6y+yVG~571bWlM#qvLF_GNT0QTnLlS95WEv8_ZCu<(N@W#9zz%T8E zuHxkAwzjdlBg;U_%NI^WfFZH5LDS&ohf#ajaV6mZ1TSBI2kxyGgJ^We^2N6p;`+-b zL}VA#3M*ab>>8#d(_YxYTAMS( zB#;s%f^{sMPHDSLq{fKNj3G5j!J4}uGJ}bx=_WIwSDuMRTQ>TA+(OxQ0%(*i@EvG6k-$%F%LImqK-S1a^v0QQLMS{ zN@77BYp1)CE|S1-xZK^!XvWKUglLCmRIX=v9VN!I^EL*Z7iikDtUvY7T;0e#UqM6 zEU~r>8hzZM#`4;N2#L{*MzNqSM$r@t6^3<4N-QjY#`(O4Qj*Mkeo;b9p$d@d63ho` zEHB}CY73TEu_ws+Ca7IWYVL=wil9`znr;q_pjAR>B1`sM@nw|Tu?no+@TadyH8s*& zQ#>iTYIR0zYOt)J+&F?w9}sDv3Rb+>SZdiNla}}R zHAX&hA}j|~4@=ei$ur4NHLK2x0s`BdVWZ)pdI3m0EML=SEh4m*|Ggww@f&pUjp);BvmAj+aM@)`}3JCBTTf69VpDi6f%X)K-=|u!2Twg zz~Zqirg+Q?9$5|}HkC4wGx6L)zSIKLNJ6J`=urs{RH;hA!C z2DJ*()AA59g>BQL9C6_wJpCfiiZod2;xP{fOLx39oWXg=TsTojNg6DzfP$qxUTRaQ z>$Eh>tYhLT$0TY)okrTo-Ds@-bjFY-ZVi@Ne8>7P&4D67!4N_CYmPg_@rWnb1l)P7 z8mc_`q_!N&=`9+^J4jNWwaA2({O`wrrP@H}fLfnF&OG15t~*X%W=0%m>zq%Lsw9u^ z$gANQ?)w|T1}_LYi9MyKtYAYjw^JyjMp_>UL=b z6fAAAsOB(^kX#Q(u7|iT-XW9`ygmw*TAID+>@_{-ej?iOe&EfPWW4m=-E5AdoonZQ zkb3Xi?cm3$ArAY!xS7qZ$wNI;M7|U$n5JOcSBnxH>b~Fs1!$Pd)=pQUhu88q1}SD zkCyp@6)e6D;lTCCmM6oZyrod|t~QpQFYm%dSJA;dJAftj5DwfwUXeG3gfsGrA1*v~ z$1=Q|ZC~$*qMHX61!C8o1MfwYbv3VswyIY;+1>-oW~d&%Lm0UcMeSeTpoli7grQsGT33??5fo1FJTj?s_O`Pb{0w(60KW z(3ml1>H=*5_I$ze0Vkh9uiLR7%iQunZkEBloh`Lh=!NDGo%O`CPUD(yY7H ztz)!EZ-&;3*E&bun_-=GSm1c})3xT_S_xxVhuWnj*$+nd{-;vja z8JpZY+AOwnQM}uJW_&fC|!lFEySST=(zK(}x7j#t0E@PJ_yvAFL?ksW?J z7};uaji^|5iqO6t1#!xLU`!9* zbEL(V;Dqz7)fgTb%jR1FmUoCkLz5DNrh$<0W0ZCE7Xxg%9m-$ZGdh;dz)0MF(T=Cv z{rNzSHt%DIwbZp1=9c=R>#7A==eDgt+qvz{VFSirUDo%3<&^-KUE9263A5>a?s{6M zJ{A~n^Xq$whfo&wwuKptVolF9Ae${(iydYgWw+QoBe2+N>1zCiY8Sq(*pHo(;f@>8 zgWvSn++!SfB!{`1HD&U)mMN|JnGujJcU^<9caSw*m3MB`4spiY{02xWRF~>-vNN;8 zcc30HZdfg*tkhS-(lr8$Y5665fp!o!j5~!6EQ5W(cOo`S zAC|5yiY{OoZ}V#aQSGsn3xiVavNe?J^DEG`Qr_;{)v)X%8+tLeCzgR#b*ov5ra)-B z#O7k`7jGw1omh6H6=;~y9k(q%W^=GU;KC+jw%T?&dtRCH|(Hj~devTN4U zBlKB|Wrk@4mb=G!UxS8ZPIju>24hj$EvSh<_k4DHL6vKe5h?9eu~d0AzTR;yeNkHW0h z40g~XvsvuvuB093M^zM_J|~83<_}obw~G@+9D4h_f1i-3m9f||Zw{|H#N3G*{6AB+R6HD6i z7GMdo`gZ;fdE488C2a&|p&@b8*rBz|%epF;!-!;LO zu+e1&A#3VrCqj$yQpjj&Qf!H;Hq``{u&vojf_E}ra$IGcY+w8{Dwhm1 zI{ez7BKM3^h4X~Q5X@XH1)l2QEPcMEgC;C>aNtW)gpF1GCKT=d|i0c?Bm^ z8KXnVUtl?Jl9AEGc^qaAw+utuOK5QsGIoGE4v7d@g001zV5#lUa=arA|BT9|+z&S& zNBxp)HnQoAQXxS;cabLajN#~?2%Ib4ltQ}IBp3@RYn?@c}#%x#xONmCcR-9F%{HWCBXF0~Hj zw1YNKb}pG@q`1yW8e1lBcR_oafa-Rs>~}K~I$~LIaM}XerLjY6V`)?_HJ0CGyQFF* zOXO5#uZUo2^f^TrP`dD4=mBT~0p$}DAox}762mhb&BM~M_~sXFPPa>?6zWsbRLn)c zin#?!NDi&1V{)jluv1`6#)3&j&!Q~T7Dl`8*~jWShrk>!@fi#NjgqE|l5KIP?pj2V~`!k7?R7hpvCMTFD*?=fyBu+)HtcfOPXo$ z#!Jm3QW}%9*>t15j_1?R&<9Z@qOie%%9T@-GfR(9uyo)1vE)1!#)AoW@nr5NN*%l`4>57EiWS1AowcgIJ4P<{@_6TRQp$D1!il3OS zA$z5esm5WKB6t&ABeqj~Fz+J>qWvoc5qDSF+zg@+_Wbc&A;WV!l#|thRj`c3(S}>* z_golbeSXjTSJ)ynWCl0!^Vb($mxn)HELaQ9x1R@@tIru8A-b)V2IJ@#%fL?RmxJ+jK0oiv*MsrUO@OCL zztG8~fDfjzmb(#n+wIX>E)UF>x=1{|1NE4iCm{7FQ0Meoxdkg%J#Itgl=y_>SHY%p zoU?a#tLwJ=xB9HMXUlNf+QSjpgd)pDhRpl*&z7%yFMM~_eCTZVLfLQhQh@En``%m$ zQr|1jp}BSi=&NHHjmbWa^T8{A;8hnaqg9z0OOLPSPOU89DZuEEM}OycHH0G6YsDnB z5_sJOOaI2YZe6*BrBz!@QenUMTD!D`j!I4m<4_dJWw1sf!a051BUerYCQq*E$}x2i zWgZm^Ng1*GN;xE*kwxk?HLm4w5gYCXg`Ze!R&qTV$S6L9e8UwmYWXDBgTz`M)_;K5 z+U%RQPkQ;Ngg^nVpY*$*!{;RPAYmR%=SXy$;wvo~4 zAds`d0IdK{yRTxXSFOV;S){_jI*{@PWy<)9S8_dBD!{5JkF<0JjOv_Bk!R9coHn(Fz9L~m4Q@{>i{Re@iCx^U2Te37DY_QblW&&1;3wAHd4XhI@{)^C4FNqs@ zrG+KMktl$sQVhgvFSU(9(M^TS_fEB>H`|~beTfD}DT7dGEPeJ-E2?tpAj&+dV98kY zI;@dJ8XT+xDR1OtbyCaq++`muj!Ju>fKeeNCtNZN zLz*8C#ntUnpPR`f5!Vb8)m;!v0Y~i2FXWZBXrbX0%!(2rq(FqPz0_35jk(%FM}Vdsj6rIEdfUj+rULSYa~-qjG&iz|Tu;^tC_dcbOdA2Ckxx#K3{SD0cgDfFC>ohKVi}|u9o~L~*>-1^ z*V+8oWf&{60M$V~5iFN>?%3pS=h8DFQN!e)il71>+7q{k!nmQLN$x;P+?p!F-CaN76r* z-FC%ZcZi=A{0)B>8)ZyhdNASYS?bk4Fq?1qTTK*BTR26z_1BhLQEE&4dD#hT6yvde zR`-9b^lEUv=Fe87%CafMYWO>vKK=EN}VQGuWwYU=rJov>0*TpQHFmI1puxCAf|~&~MB@qmO3W(7zZJL#a3r zVHK|#(86lBBL#}k;6V~xi;Fb#=?E30Ne)D^>+D}&Z@0T2G7}9zkE-LynJDY(&DgH9 zs=TgVkgm%SRtuUPt1MBZQXXJE?)(xpWU1Felxr2Szmd#xowOL9l)r zjdtYCWWkL=?%ta6_TiCkdpUxKL=Q?_5)hEIZ`+k*S~io7ds~I{Zr*0tnM17t?6P8W zUEX4gzP*R`0Ht!(5;pUA#zQ-ETPn4=-8+&CVrkAN8O8qAsU3QEc}GhMto_s)J&z~p z2yQ%Vd%o~{EohV1x8|cZhgn<7WXI+V!d>euX)ax@{Jh^1Dq(|UQyns^wcJqcu#`)n(sUlR}w)0^caR7Lm)dtB4T zPN*|HtxR_+ETuWU0QORe#I-f5LO_`hsEwerW+ODwg0{{&0WWQ{v`GtM5|JrZCb@^| z39eJRLp0M?o`GtFii{C1p@cNPDLlxsV5rAZtCgCKXExL`OWMf{6>fvC*hG)UhbCcc zjxv4G*Gnr54VFs1uC4U)TdH|d_tRqpdy5q*4x+%rsXZs(8*?bkAu?2MgIU~rSF zaJpT}Zz*-WjHc+>8dV|62wX0c^){A6BhrP_iw>z>mhH`q&=e~JE5O$1h#ZvRXqpy| zjg)!B-^!s>wF=9`MrTpM0re7$mtci$Hq}BsnqM*N-xj0zDZ&@87g_vN`3O|RHc&@2+1@76)cq=&cK1+OE=M1nN;OBnb|r# z?Ao5yeIh7_hlb(&Sk7#ByIp0o^gFj@7}F8K&L^N53u4Lq5Z&_dmMtEZC?*YqdNoT5 zho*@t4_4S_LnRI7JH-&9cPVAiA@0hLVxzGs$}AYJ8Z1R8NYv}_l6rZz^-U7PxypT7 zsZV1mGud${!iiJua(f4s%|D|ml-LGkgvgDYF)ZLSs6k&;+VRYaKqWrqX10tdFTNh$ zc}ujGq!_wbx3+@T-!2&vK2#yJ+T}IF2g3s?94>g4cByrw;p3Vm)jct>)yyz~HuASt zqBeJnSui$5nY<|n@i4l2TD{Kb<>7M+{i~gQV^~uw^=T~CIqt(-!1^iJ-FC-IFw$+8 zBQy2w+b|+$kr&i&G2KbqnNN8(E?q3AsXPWHA!96*h)mV%973V)r)-wuGmshcBC0j6 zvQkJZ)c8x?pbzHQ^&RN)NiZ^U{v*Q&!vpf80Da7)7*^Hlh4ggIlG>(7?;@7aWOdM= z2zc7&D06AbK|Dku55HQyPV41a);CS|jdGt>>eE=ZWtVInlSOA35V-SL8_OM-nw~++ zPC0q@==iNF3#X@5r%b#|yM!5bCpjXfLB~y_g?$Rz*N1K(HTHh6%~0UNJiCB-`$mNE?zP}i#U_i9I-<5oWH0KJ@U8?^J(7U(SbN`-*iqW zAa-CO-0$AXd_HW-({7-2KQis7ZBEybPPhwhDY1%R-v)9PEUil#Chl}!uEPzFOvEZc zxcssr3mP>iy;{N{rL!yV?OqMj!*Y5@bH}vX&6+q}gS*ZXw{E-vEv<2@A>+V`H*Vaz zZ9Z}R`K%;!)t?+SwtozKJEoBizE|-<8gLgwD}+BzKAD{9H^kB|t$S~YJ2zzaK{yw5 zE^}Di!SY6)Ch>Z@kka|QdX1PKmWs7z*fSBhIh3WG+c!VBas92UZ@^uB>-vokZVLZh z8ecAx#oIRmaCwjVL&)=JMjkCq&U9kL()P0NO@1Wr=3%{lH5d3}%OX{BDPq`4w$j`i z!7MV{emN$@nX>4tWS!bTC&;Q0CSqMsoe@~b{IF&n7FNV`sFqGbfGGzjjt=jD*a8dO zTsL^j#3=>|l@OBoF&00WS7qswzF4_+ax`e>vwO-Hm897YtBssWDq6rP!cGBF8#Nss zeNl>NC>s4A+a%O-WQvrFP4}I3xWpJd9l6koXMBM~ykm6xIHV3&8 zehrp>IVP6Ncw(ZKb-JcE6}h%0ir0G=gm1=lXx&IB)xn84O$|$_2FtRJGtFr&dNyo5P>JCPg&zSsa7~ zERFP2`$j9Rnq?>!=Q43;C?FI>6F^h$E_vFQfE%!HE`@upg@S9$dOrfF(uld($~oSaS1%@}Q+p$saopc=_#~ zwr^b$`oVBMi9)wRU>o_bP`sOqavs?0{Qww+I%pkL+ko9MRjnZ#u9==OiZ{J z2xk#bH+N=m!Xdf`N;Ac|CltDtKIvUb;w)f{mp;3vAH2fub(9Cqj-%OYBd3BT_WbZr zMiF+BNEGHHj@^B7|ff%n60atG4t>Upb>2ULt12Sb*v5C!W_!J>#&$KLjDQHgYOh zawe@0Wh9i!@M!V}t{dU0md{o)*%}$5g}TN!N@=C!%o(|&17fmGpS$Mdi)4VQOIu;v z>|#1pSnI#MwU*=~>E-#;X}%JYkTRYg@?!IOsV|ZN<{Lv8PNVH^IyAsud0e{EMfTAx zU+-O6RrDQ_&ZVGkUo~CG)5dS?x^Up_k`*QM)0Mt6z6-M)1_iXo=Xbe21n z+AD*jk`UknKx#VfWo(G{}pFIL?y(v|bCtpcWO z!0Z~j_lx^_V~D+~?}{k`2-!4&S6v@lk9854bUEwL55c+bTzTh%4pGMK58k=*opXN5 z{+=$r-!;Sc!3$2tjpNsfa$jTCShQv$U*HkyQz_?DTqDKm3x)f1T>BZp zn!8qm#OX{)szaJzp)op_yM~6bG|%-RtU0KY7m1;)pF47XjF9(nxu5L67=(`!)VX|&=pFmcDXhzUkF%MWIi{qbFtKYEdUS{SR%gTg~ObB0@~^UG#Ls) z0?L-86PH!Ju+dDUpR`N)eebB=4MLn`oRh|-)FAD4Moj5!olDW-0g)|~6zqX%zwT0> z3xra%d-p=TD7G`08l6JsIIBg29+sdK^=G_vs%eV-{mPbZIT=LSrR!s1+5RaDOLL;4 zQG*P_k}^j+*$bc*wrrs!4?bkLKHbH4z1-aocSh&YV42IbAVXK3r4bD(EL}>BmlYz_ zf=;bDK~YF_unJ^$IaCik8qGv<3Xg`W>R`=4fbmXLG-{AxSQ?#68%raUbPqlps-ts; zkU6ru7%(ln>ESeDJAcX!Ku}AWvy;*!8p~SNai9oGini8F zWMn=p-@Rn8bWc>&YLH=A8l6iYmRcxj4nD*$4S_^PvU%xU<|=k~&VlzLD-1>fCWR*wQ@UvE6!QEn8aG zg5}aH@u$doSGF~7YwZ`#OYx*h z$4luOSI@4kb=P?mIA6TBaRrG}0ten)ua$#~u|2H~uVA;Yt_PYILv2@Xc;Ozn1V4K7 zh1INDM(I*LnzJvw8O<=-1IOx(DDd^x;668(TJdMdMqt)GjrU$X?=`GDxI6!9ymz2G zmpAIoN7q_+`oX!%+(#MF~qM9<4oCxS&Xo&#r0YA9{M zmOM6c*yttBgn#_R@y9yb^03t$gc*tk&U_hcC-j0uxNEf9wW>NNs^u%N8@6i7=lZPgjaAGlw>e`QU8$jN!V|7y=cIOxQlWsI zmS9~0E|cR`m;(~pHHB%JN+y%4APo~J!{XI)c?nj{;Ls*k4Tew2|H9#GFJI6WL8di; zWos9#V7EoXSKcx`JUTi$gukDD`PzpnXSoW}(iuP=Shplm<`XWTg=bol$CFhUc?*2# z+k$=ARu)ywRI)Hopby_lSA7Su56x*bS~6eS(hVRgpH5(45M^3ZBhP z#0cwUX;>iJ5XTbNXd(~}#S<(W1q)qywyl(f;s9+mUb@Is7P) zia?hwq*`$b#a1h&>W`$Cm3!4FaYc|y6_mB~!Ph@CyEMfXIK_5}PLyuAJUUt)o*5k- zA1&9Xmo995eP|aTL9z{K^Mbt!r%W-IZZLktm(9R9NFh!0eadM|TFhHfnhx(!3o?aP ziCDI1)iKj36?L6JzTL1Y1~ECu^70fY&kn@|xBUFVS@{MXe`xg64-bFiPmb+-^wiU6Q5ke( zfCL+_vxQWtibS%Nb}5~dK@;&SUIQk5i+pEsPuD_0s+k6c6!nP8mX$tNY>z3U*Ag5> z$_k19!t#pCPlA_6^3MpWtwBI?LBGnvzibVu$rCNA}_RW5o|86rxy%5D@zq#s&eHkZku$tin| z$s9TgdBLJ%Him~hXJ*R0wVu<^8W|H-j>w_*AbMzKpxx(i_+zcDI~$3M+-xN?rAn%n z*UC5Izm6A=N7C&O6rYn$qM?Tj{J;P>Nk=Yu0X6LVdX6I);Ui*jY11x2hCoc86$PPyGjhqG zEzwC-19J@=Mlp&CSdF!gNq%rqw#RVbnQ^FbsJAX3STRs?ln{ubF!-w_T?>AAA~zcx zZc0hv5WLpi344)BD<@MD0m4qy6xjA7xs0fo!AcV#7S&u4tp$HYb~hO|GrGBN<* zrj-%gB~Ev8!hE&>j}%-YXST#)5TJACrT~tR`5Xrslu$-ic~SMCSKMr2CX^|-UtPY? z>+ztoUy@m=VuUXln}^_Xs*WOamH8HUU5li1pyvhR!q-6YNwC z0jppa67@IVmRir(ZY7q6pi?XrWaiPzh|zE$CSADrHL_^H=)I){O2uNTbeTd{DhhLw zR^Z?jW;V!&%)(&vq?IXFE2*^kTvk(9Agm|mEvg2K%XQXi4l=B*$~ZllV2GjNZ#tDM zYJa!tUKUoTsXIB1FfukM=TogtI z&k|V*$iZ%?{;8ri8daRPD6O)D&$&uIleMaUA#E~&B+_IETDD9v5Uq@>$p);1mx?7p zldBOq&S7K#19hWXleq}{e;9>^4=PuSoo6+V2pjAmm{%O&QQ%>EaHv`ARa>HfN{^E2 zYw;26^|thfE2VK^nKkf54Q0*ZrVP;PdsV`$^98&tWQy$Xl`E1_8O~1x?KF;Fb*q~u zsb)<56^}wnRcjkdm?y~OD>6+ewF+5$V!^^MEQX@BRA5|I&k<^9!o;#5OF&p0%4Le# zw4R4>>YjeM7Jtp{Bf%Z_zBaq45b3LMz@?fP9SVVFkEk-Cu0B#>&^j8Aid=%D>XH<;q(%72^sZmE0u;&O?1>#$K)X_fC=dR z%FNPxT=lMnwVIM%U^E|u{y?rI5|gTiDqQEZ#has&0saU!>l0Xsbuj!VPaUrmxdvD4 zxI~7s%6A|?B|N9_HQ%G~ucmU8PFhX{qeml^!3v)LExhG(hbymY?UlmmYAc5|P>p~( zzQ~G^xBM2&swGla152dxxu?F=^vWif zpUGGAsM3@?U?Q((^28F~eh_J;jChX$+_O9@IkEJl*?{kNtM2p5q@&@208NK$Fyi>7 zk*Vb%nFj*m^tZc?QT-50eJr7opSG+;YkUl04KU3GKrAD2U0?m@%Di?p#_nJlBiCkq z&7(Xh)TaMpsG{W&F#IP{Udt=xvRtD?y95is4Sw*aa8??Au`~nm9;mB@yu$?@S(wou z`i5UzdZ1L#g426t?h2_47a>STFRs);G!4w<+Z#SXxi4EVLnzLB@>8twOVcb65Vs0p z8L@!w0E@$13pH_qf^rVnn!=sx03?J&0IyB#z(uc*^{{Mbixnl>0ei5Z$7Yh&!%9on z;bIBggaER^RaC3DHQW$sC?BdI#4?`yfo}+~F3MJ>x!mHKuUgjAKKjriL#}=qLYyZ* z#iN{^+oBA(k*P><fD2v9fZ@WR>Y@~vsVly=FNJ^h|~}u5=RfqMy8tOJ{>sM z+q~7B20evS@MXQm!9q9(!h;J_6rtxBn$K5IBO2+d^l7XXbkh8)Y?q45G6o8urq|^< zsylSb1O@BD07ztFmi*N4KF_IICI#7ArO0J%^svaQ`fq9sX<4ddR(JZ@nk$kv@odsx z@RGf&`fsr0VS73JYu7Lpl~*#<2{vSfTUKe17%|bJuv-R#t~|J}Tm$>c>Lz97m zB>}a%QWZiZC!T-=o^0z?1w60t!7+}AMOOMP`KbzzWo~r;7#pyUBhAHeoIsWDz>>9! z&MgFPyj-hyoAZjjFh-Kk!l?ps-v%W8B4~uz5#BO|Z5W(fHbC4cB7k1}Ki!#-igJ@(Q4G~l;;ONvEHZ1 zm-}Uq^78@0Kt?1^y+OrL4zdh$tA9bzfD}nLNF6=p72;qfQ{TBi6h9J6v*||5Px&5F z8y!7nYowtc1f!}aM@$M!*~1HMb^Llz5M#XIBQPKNE^6^m0Z3E5bf$yJQbH2l-u#2|>6i{_fs5E$?sS zJejTuCRq@iR2U;aZD13f7dw*0U`?kW)eTZQ`-aT9Sz5@&U|&;QjY9<|c+Agcc0Mdi zoYGRDSIc83UKQx!&}Mv)pf9a>%)(X<5%1YDNHj3aV$wM>(SV7M7|1N8wN$c6e7po4 zM!-fXo6BLc(rW81a3GP-&1zCUdjcWYJqSRutl-K|t7)ExVq67%D?S?0ynR@<(mCvb zm60Hiodzh9+G5@)s7jNo$W9Uy%jWxqs8}+s+a-6}sh$cuO0mfhxZs0YT;Tj5QYlLH z+P=0#yi1=dx;SwS9j$uO*Gem!rL3}5GU!{cyG4$Pt)}WCKgC;qOyMW5zMXWtq_;sF zdkx&A8oJV8*5rA*Z|{p6-A&yWWr+(Bb0FqG%z>B#F$ZD}#2kn@5OW~rK+J)d12G3; z4#XUYIS_Lo=0MDWm;*5fVh+R{h&d2*Am%{KftUj^2VxGy9Edp(b0FqG%z>B#F$ZD} z#2kn@5OW~rK+J)d12G3;4uo+aG4yjzVknVFIN$$%BJp#{M2h$L|J|P(a_>js2lu}L DrPr{$ diff --git a/osfmk/console/panic_ui/images/panic_dialogWHD.raw b/osfmk/console/panic_ui/images/panic_dialogWHD.raw deleted file mode 100644 index ba7161d2bf532bf95279c6946e63b1b0c18feae8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 120366 zcmeHwU2i4Fc3oc(FztsJ@Hp22AM(P7L_lBA#tls58is7b0T>%F5ICk00x|NCAb=4{ z8qL+z#RlB;nc2)Jy z{-ybU&F?O&s}iUZs1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3W zs1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1lejfs3>G zX&+t6vkPOXSBaCw96lE(b5cEgnzkhm?ZLL3w9U3T9xVNtSUpfhjfuwny-GB*5IY&E zcLTToVAUHh_@sg2oi(%CYDpnP5>3C18~S@{p~!iI-BbzO#-=F}(B=333ujhudtZ<3Q;;VL?#xI)uc>o|w!J4~}^_7B*% zEZCbsZ7H5$vTY6;Vcs^98t$0nj7hCOLIg01ykxUM&~oW18NEEgPrE$QFG#ly1C(6_ zFPjS`N86InK<@SSb|}pn@o#dU|Aqkj8=x7q}jP z!&2fZ1g!!N*%P1Ov0n)^FVdl^p&vlHvj%07GcaiSo^T}#tcPTw0@!Sh5_M4Chng>& z#YJ-jZuY&-N_w9^Lc*A5-Fo`!xRP_!Q_k3Ah3S|)<&A(yiI_wVZJ1hQ`QVWQZXB&L z5o0p079AoT#w&wpkl_MXcE;>}l)-5@ksvSpsEAgi5|yNww>fCrgJyvwOg6w3IasQk z6`|~&tT{oeXpY+Uh}EGx@H$)Cd;FCUye*^q7{k>@@sb4|96gaBdCE(sMa@&|1$5x_QKuRG=qs;hsZ!g>Si+|UA`pI>3czxZppMBo^;`QZ!YyL+^sj^W1 zNehDsazkg-z*6E9N&A&R7a*{tuK=QL&d}Ga!!_4XIgvk8SD%J@vHPPd(< zu&4ya4;ScI0f3&bRy&OtLjt73$Ep!SQkMn8)noAD5+pZz!h(B@FF2-9vitrqTw|=r z;a)J3YQ>=^q@<2TV5B4*#>7Zaj2kse$-WoaK$;IU(mgZMsaXIB=EA+6ZRkhO60Y6b zSI>bQvD?G%-Zii-ab>4_uDHFXeg5h{krVayj%(B+;~I5jp~jENn&C&t4FoiZgs#%> zLq5xol!#5ZhAc>cIpKmjo3FKZjpm~*E6oS<<1!i6xBKSxXTWfsqu%!2XRp8MhJ@<< zgGeAAh`qwQtCxMiM(cw{Otx5gjL+_6#%^(wv?QEszoJcqpyv~l({q)HyPKOxq z-q3yWQ?EPTh2eD7Ff~@m#}Q^P8nb!9i$FU&@k+vp(J#_p1iaQ3svKd;%yHG#8ga~ba`BD>=ru}H$=-)iUYK;lMFtvrVW%pLj~8quw3%84 zP8e&F4rbloe|gxn*Vm_*?>z@Fz!kT=wwV7VuJrq>YEI&FO^0ccxIq%6JjT^B(C67c z;`}U{n3KFnL;K7CVx=J%5YMC z1y{FE^ewWiL@z?Tw;Fwfxe*jEXgnDhA6by&de9tDV_??(v$hcqfR4zVHpkFcVhWU? zdVIb%%`>VUTrY`2G-3RViIL{Cd3MM)Vi``GG;wj*U}$_IHHeZ#5KM{4 zPmi2D_*umjVE3fx86NN=&hbN@$**{4pqM8Tyiq8)WMD5soU>o$5&}9cSb*Uz?vzs# z;2Br;g%E_&0|x-H_QZW(YZ!Xo|R+rJ<ktW= zy*-2%u#Lbz<#!Sae^`*JuPpnBZhj_k*l#9fL^m3^QcZ~$pPNh(`f5oOB7NO+T&e8l zJcBxTgecV?g9Vl^ew=GTlUdb+-x!5eT`9+K9oUHtWa+C((b~Ni@n9D|LHNAGAKEQ# ztsLJ-2=N#l;0lEDQYi6#VvlDxH&P`KTpAw{fHg~!D>GoN(@AsjJ)fTEw5+z*r|5s< z`5L%3=cf%)9G3lu<|C2#&DHLm_y@XY9>TB66SXm#)@5KC)<2Co99(Hcx>+5p<=>w7TJy)}vX^h)|*_OCozwGY0 zo*!6o-#y;|*v#lt_QBh@=hc9BhfVG6Hc;$1?b|!7*9T7ew>ySvJ95AMz@>j@)t{c_ zp97He?N$nZd1p2IkO@zkyV?7h2zUvK)8g zyCtk&XD+^;2jO_C3TzhSPJC1ABF4qP5?b=G61h#lS0$zDzpVsrH)P6BSMsq9jH_uc zZ(Eg`KTF`&FF2XhZE?M>uREnMZ{@$$+lf@JEPho7w+pEg6s0t8;s0(sxsRnTOCO8h zDy&W^mg2mzzuUo_a9Q|P7>*@=UX1{yJa6dVi3O)Ddkm`W!m4$rl;;ioUv4M!@ziD6 z<0-ZZ{j#K2{Wp;SR!8i>l_l?h99B?NcPj~ezY7$0`TpwsV9ir?H``F4vXuy zSy!j^n05|RpN{Un-3JQg;<` zBt%D6GUEWet#Lh@5K%XO8#j*(oA4x%mg0r*Z(t{5*4m$fRe z4yYVU(B?W+*h*IgahG$Ii|Dn~9Kzrdc!-r-Fm0C$a@>bwN6cmZaSG@NuBeuuE2?wI z$dziCPOm)yw3J7j5zB3rwp|cABleq@kR1fbJr{s*PZH+_4LOH^;^wnY8Yk+;kkXtZ zyY1Fs1Jqu{z?H3l;QddOV*>vc_uM0yaYeDoHQY3Ej9)aCV{Pzgo0nbe1hqyU`#XDQ z^jfWL7{X+#nE~Xv^4=B(XcvnFr40ox8_RGnzbKKX`(yJl&!8RA*VskKEf4Vkl_yTG zWgr5|Gh$V9P=h;x2!+-mwQmo4dk769&~l98y*5R*O!=C`f+3~h#wz4Q!e96E0p>^s%*v){^RejZd7Ivy=gTayE${aY<)p50+DsPu%wUsyF3Un+{ zfh$x?`&X&HK&fqDKFidbcLbIYsZVV_c4-cA{ZnJ!u%O?11?z`NB3yHbU3bkXBe;Rb zaDC1KnK$RpJL8y24C%Z?2_Pjiao8>W5T26?%w3!z8_RGnzc@jeaOuZb6K`P1gqJov zB8j)xki3ny3(L?ckvSKz9XO9r_9 zP4QO7GCM4OVwr3)p_+y)fCNm|NTBP6D|pDbmis$d47b_S9pvXDm}Dx((vj!h-ciLv zf8iY0(c=P@;O>6x`5)T{n|(a-QO%62Jt}zGsc6)dSVKK6^fyvnnH0#tm6vV^I#9@jw_CO zzYZ4MJTti+i_T&^)>njDCjxP z`eGZ~1I@NMw&r2D7Wt)x(nzZ%sKMc70gkov5_xO67;OVzP(>hxmT-kW!)3i-w)fg~ z#B8qXMylFhPFf}?sD53ByML>aug8iWAzFi60$&!=-{c*Y?LYT&m2xd2{<#bf|5m_O zOKga3)^!`%j>0;aRVoWDa834rGJd&D;Zaaae~l;xGUPGtVHOtao! z%qEtFRR5R+x{(t~C&W9B%s^dxdf6j1Ryq#o>!n$H^T`AQ`%@XKsCbl^OPS3x)Hka@N#b~_bP;%wik;>0SDwvjlW<2WxH_10T$h?*?lUre_iBub$*}El zog{T95tuBA3F43hW(E<|CZpksL6x&DwEyU9o}5>KQGj$b8-;shrR(+(0%MAwCChj_ zb+(0IJy7;or1xAX*(aeplwgY+7{%#8OgfQpFh_DUgB&P70nXx{j z7}UrDk9wm`m?S`|vsoe*!c@4>cq&_$jHl@gTE>#Jb4%(-Oh<2VIv0aZ!US97h#XV#g7I@OysWW7WDgUx*2B%sg z0P)<{7N*?z_$E3}rshx6!kkJA3<3B!F~@ZI>MEC#BxWU^cA)gqQpN!0gP~Cfo-Fm<4T!6M5@XD zge&7(Pb{lf|5xBs_<$`laGX?NM$gHgE*7iflJoRFf*bIN`yaQG~~V4Da?);R}abh>gLMj z@`_@oa|nfN)>p%I54B`zmP=%p%WsrR`nsoFxEBHit_MIu`dTy^X1gYq?1jj+;|kU0 z@)zMssz&Wx&sW_NS$M9eE^jHxVRy+KuG$Bb(jBJo&*ewTD!9#cu!-4>$H&sPh?{Q__9<%dXrVedB_ z2bt~G53_~ffom|guTyLb;@*hRss0mKgcymd8Iv!L0h}3K{b*dATni{eU!U0l^yrA~ z0Nr4RaA6VY@TrNXuf8hk7Gj)n&$Hp_A|))c5-P(u3embS2sjzJtsi0ZUAeGkqAt-7 zgFPD$j~i5J=*+k>u}q&BRVXy*bkqvCLiE&?WJ)z$pZ`}Y)^n*-Hx*)DJ@-&R(GLf6 z9Dmca8;u5ZK@m>lXgv6dD+a6wxUwE7TPknV&|)yY4qOqfe)zic0dNR|w@^;zDMq<0 zT1WBPBS_R89#3g0EDX4sV5y{*BlRP91?rCJQk2Q5Gc}0T{B|jQc;bDNNUXR>#Oq3= z1puvgWit8E$mY0jRx*C@0eZ#io^_?kOeF6IzY5~@I+TE3c9zRH)W>FY`qGQL z%VFM#uxY7umpsiMAxGFT8EO-Nn4WA?8ctTUSZO|_eTi4BlxWKHe3+}=hXf4Qq4bk`f~ufn>QE2>nl6g%d6mdkWb z$Rw`K;mWGttVNne&bP@|O4tL31*EKHT2AKmS0X2peb19QM}#vZGBMh$2o0;8UeK&& zb7E7(m~AyvqozxI_JI>s$QV)aQAy?dh5j%7(J*Yb&={IYF^u(3#zPvRHDyjp2)2V0%Yr`z4Id$>rKUZaI zc%S`lKBaAuvz&2tTs4yoaKmtjnzsIYBfOZ0NVw8NM_ilsHEo`Bw3`B zO_G27hNp~WN=U;Tt*b!OgK#uXBmLk?^q{fGt>I*A?oWWtcs z!&S@Tkiov;X(MMPgvjHopClp#XwLnSjfU-j6TnDc;_8JtuCTfOqjRJT@Uv*?C|F}E zu|o2N`?cqJ?=L)iA^8-9pf(x3$@EVrSYnr0-^A+Y9#})lA zM@W!OKH+?N@Py$N zx3qj-|6XrAoN?8#mq7Bf%T7eNf)pG|@^01j6%bi3k8A9;MS(DzlP$0ZvoLMr?Crq@ ziE2D-`f8NPsJm4gUuwzkqbEArxaK-gX}5N7Lrl}x!BlKFowA5287YFNyaVN%!If6g zm8HDr{=hS?sXDR9DFw{(TuwCurmy|7ewa>J#*~@L@G1WfWroS6GiC1I^mEj^o&+`@ zH~e`o{c=XfP3f1JN;IbYmt}_K^7E;5IsA)$j(XRVz-9qn_R=qZoh7@@S7ut5F5(8$ zM>AT_puLp^R{xB}O@pO>0mO!DDcTK5ExOF$3(nW)t4o~vBJ1C6KG>l@Y?}9$vwc6@ zx5Ca@4D)HfBK58(fz9Vi+zi*^rFYKD=)k%@2iH$}^{6*}39Rqy7<)5Zi-FVTB5qbp zzasUnCxOjEzag$KqnMYQi>SDUuy%IcEc6@VdTkSZ^EzR0v3)T+lfhFYv@NbPrzJL% z9j53{Ow1XcJw-TnR;QtOSMU3ZRB_#z=iqCJTsPZuwifs* zW^+QeE~eUZMsf9HJ~!K6Z#M5g2g+WWK9e)fMu7d*k8wPw8pSD#_K)Mc4-IoD3t!)(UQ96O!u#p4z{I;@u{7G3t4 z#N8Tk-Dn}$N&&kHz)5EZE!OR=LGIvSiDiJBot7mUooPcSn$h7itO6W7$Hp_@gDf|3 zdC%>obL}cP`q;N6iCQy>!jOaYm{Ptq;(AoLY9;Pw$Y!M|aGjjz(`sK~)h9VVF%iPv z65PDtJEogl1xM-hafQXGs7IekF1HR*dK7q0N2 z)-DhehU79KE-p{z38y;nYT?VQ(>KkO^S*z89lK6o633U41N*>Y9N%fU5;kt2J6J9j z5SN{sh}`^MqMUHG`;LqCfr}0gMWxf;wEzI4- zctGQ1Q*PSu<5CM;u|oqVlS*H);Qze&5%lpDE~((6`2!;ks%&)Oh$bA0(vCb!OreB8 z;%k$vKp}rp;N~EG|r1<2N38Zan+|jaQ4DmcCTm`NxU!b&;O}T^Ck4r6al_ExA zP-CMn77v>LkUnLZyA8HJeBv^1|I4@z&vR8J$HfH+#*opZp^u2s`(U{8NJE_xY3cV z8O_eu^;O$IY)hB1Rubyt%A6!08%4-J^fd$$!Vs?G;!>9fWn~t&avqczrFympxS$f? zrd-({RS8_}a89sI?$BgYGi3c$6NK{`_lB!MQR?d+`)A3Kq=kyJ9M>yG=Uo^%;m3{C!T>bFS5?A(~IOLRv#(oT$ z#}+D40S>me9eAAUfk$lM>wkeVa81XhdSc>gCzEQ>jl4(^xS}2ynWdDm!;{fFJM@ex;!RjIG+ zW0(>SBN5*Zy%txf#jqc?U2=T-ax|;)FpCRZnE~Ts#O+b7Fcju?N4Vk(l!2>tn>jIY zWm4n^8Rl&dntjt(jLc4!|Ck98OEwz1ge$ZCTi`%hV0@GWQs6ovE)$VP+$@vj_yF9_ zc$G|_#7{a8#$-48JE-V8h?-C3ow?zb$2o+b%y}>a;qL0Tn<2zOS;(?mT)RO5Ggum| zhZ+#5xB0xaHNqUq{fftI?ao~dV3^a?>UnDep{?A;`G{+9x*TR&llBHtL+ayG(0#(M zhMDU9Kmu?30vo*>D(v_q@o3h~fiX24e9ckUC$?Mt2A@0}W?8X+%n{>d$ULs=(ZY#} zlWx`W6Ex$R)O9tfQ6>3dON-2t_MM>|B7>jm8n+K~<+}ilb%nz?pED4UhNsrIZi}`<$pI+RwT0i4G%VB|& z-P{0zsB#`7YI*NhGB-GYfb+j6IDd{4pJOi9$6`@$HQn4!lWBcIKX|%m2tO6mKbvx| zd3M-l$!~+}%9)zE$z)vH!;`i@OXsnTD@~livav`aG zMv{3r*@T%&&OL$r|835!2Fi^*Zo86km2*&>Y2$p7;bUv-G+-@l}=@Ip%Q zg%Opw7N2xPDgK2TEqiVYr4JmPgp~L^nGg51HP%c z;h&MTq&x)97_$jQoDN4^|CTVa%5w4XsxYo*le5jW7Q```9Jp#ySx3=PeiRx-8WnZY zcYjIWlSFlJ3@Mr&c!Vo)b$reKx^LAsv#AZ12zU=y5@=4Y{IoRU8b(y2ReaL1Z8`}A zh3604+4m!vQo75}fyQ?!ft;zB;c7apd0rF}WdPOyXGk=X%7m*12VCPYT%6X%&MM)m zJGNC-fyBFhd0ikur%= zDDobD()T1$a{1|c@oFq*Inn;=M#-`oE4Z>ssNzd@q$a*FqJ}teHC(SdmrPczAJd47 zsUdi28N3bb-cV6?ojB0g@F;MNGBUrZ6X*n}vBDorFWq^DSmSr}J| zNZ?wE&QMz*%H@O3jrl{QBzmv7QXA2z96>w@e3(CitJz=q{J zabt&w{?ZQcCM*D0wol5>t+I~m^~b63^g+tEzEHHKk&bpzXr#VNsdj>AB>>L2!c>o* zfzMZ!@wUJfIwqn~W+Zi7d0r+6(F#cNzr-L%=4R8U{Cw?wJ~H9j6CIk69RTa{kp{@B zDUsekt{lGcnZS}|T{M%hb!rL?IWY*F+ITN;#T_kfUXzA)MP{@ zt`w&03{}reyDHp_l{8YBg&O8I{0yNoH$3oibB*wAxE|Hw1PFtBBg; zb1e&u{BE=kHE=`%FE$F33G{{a<=}&q-K?yGMAkwIMo9F34-!q>z9_O)?|KPbO~`N% zJk&A~?fP@YgDhjWU$4MQLUT3$YJF8|Y$1Ve^End&XcUk+r=ykd-j2v8ptS{ZtBmjekO@TNv7hW_0Irc6a{MHr09$(oS#aG- zR?IfgeL98Ba=D>5p6G$WLbv(2p* zG=4GXCws+U|En28g&UO6o~zPS+!0p~3|y(G+#8F_@`$_b;3r)e;z}VJuKwXiV2iz~ zoE^kGJh!s0!)V$4fZ4#l`|5}FIr^#B-mN9{egX}n8WazR#CA_qqckms36Zq}qOdOG zHv$9K*c(e+;TSB8;lcN>xxZA8)zlAjg4XSJ#MLdV8>QR9{Q$?Sl{Ed^Yfx)J)u9s8 zgx=ej)%~y3NU0@o2@@i#$8eaJb70`g>23ABNTh8?8Q1PP`&~oS^_BOrS5^AT00wOY zi%iUO+ravKhvm-W?tW77p-6pArIn_|Fd;IMxbn%`vnv*RW2vKDCs69^@=aD2;Tj)) z6tdP26H~d-nWx0o7M>ZS=*-reEeO9SNhdaR0-!Ogtn?TsgYfAoyW(JPEMpMFbxPI` z(;VC7qrHWzZ8JUhy{Z~Zg@v^}e7zIZ>Cz(C4BoWwGuh^Qb(zNI% zMDi_MH7fEzvrV#dRzHkvI7Y<2P^srDZrWwuGim)W67$24NXO+5dsNo2u(p04`0a!= zXAemMbKM(xLvhcYO*C2G`kiBC;9Ouh;(cl}h&Kf32HCoK_PQkVZ*NdW%JOX@Gtcwl z3HHKC7E+F1;!Tn+;=kHL=-rm1B`>Cqg>Tu`irI+*xlP{7 znaFl&*YYNc{rD$1BXLWdw$A=#U!wExHqJq9HMi6&_o-Q3w+3Yg8H#DAo^0pb{N`$W zp1-6j2U=6uI4tNyfUJqlneth;YyH=M}b zrS5Js@4R~fJXwklJ$Sl&vXXt^iaqS>PJO{~_}kVG7@KSk`&jvw9FEJBTj1o(a5yAp zd5{xfg5nS#a6H8+Ea9+XZOrK;E=HVlak)FJb^`(Zo?E^{EG`dsdKx~v?)C25;y8=M z@)jO!VmPwpdsupzYc!w`G8hCu*e&IsoU?y&aM7l)QO|;h`y|IhB6xp+llX9EgnrpD zTKhsA*C}%OB9J&Nz>F2dIxQxS@X``veA*0~1x`tedwy^gP|AP>s%jf8D4k<}2pvN9 za6(XWzOl>hpJN5=h~r-0i!+=F1jf(G#<-Fe`OmNik>N$P%lb1aWI+D|IWG5h&qV zHOBw0sKWKU-8-~Js?2VYQ3afFhVMb>qF85uj^O-JKXzWgzt&tN~^w9h5 z(V^r5t))AqCnFaD^EJb?-6^&h%;j?C!D&qEifpP~GWKx<$gZ4C;%QOePu zT@aU|?RvG2$b7TdXSJigy-tYf97ohiRqa)~lp9EFPy|xDC+E2O4s~se1mi~#->MY) zEa9XvwgqbtIGlB#Ih2Vq^gk#)^gw%kAQXd}`fB3e17N~|KRlPzo#JuM9eY(km;U49 zlh8@z;i2PnP|HNYl-&+vqz3b|f=2Pp&ylasyx+C^WYy4cT-5?r6Q}(BYL{~B$zYYj z{e}dtD%G~!00n-c+nu#m-5gMia%kV!V21}z?CwD)bcD}%nUaUPLKaZbcK;+~1&ATx z@1cmo1-t9)1T;d#?ujsZhYpR&|;E=0?@Y!xm6HR++C~ZKq^%>&YOO@i4x!!aC{D`{5opo~U12 zjfu34gISK2I8oR|LjlBJd&GX2cTnX9{V-^?e{8ry?EE^Y91geJJLi#jxD)Q|h3ymQ zeAHSZq7NVb)Ze0+0Q;a}iK2&=qBGY|9Y<~p?^b%%!ZtGApXoPZq_+K*Xw}c1E%9AJPgNoYqnmF^TbULLJpkW(Na){D1SJ>k`O`KnONG(;MH5K7bdgvLb z#xJ=18_bN47IXx30A}^`O7tpAg$AyL6ujD&%H>wdpqKqYZW4$%AjUePkv_|Ns#OME zFo>W+Si6S|y|->KpoZ7=6^}98;J8e+eXSTj1TL*i*TPUM**uluq=Yr9!TL=jCV0Lp^ zZiNgY@&T?%3sCmr1jiDjfs*Zfy*SCw*cd7f*o;Fe7|izJ?ZWYOSx8f;sE`H6!zV`v zJm6sYy|z*2AutA0R6yWzer(+_aOHOQV`Rn|wZdx;=eoOkQcdwpMsONA(QD}xeE8=X zL`MQg6&-M>B!|uS?$i4#`~^lHgUdq6h^ynDtKFAdPX;;v!}#n_rgET!4o8?nvETy! zqJ=u65JJd>A<|O4%$B%f$kO7YopMLW0mp6ffOgrvdHyWJN*!Y~a5=?E#RSInQtvj1 zLPvehll^&YupCg`<0&NZsU5yRx%QKp<^oSvQ_iF4H=%k~Epog_2rsO5_Dyz1^_(87 z>Wf(4UbXvjD`n85@Lt_4l7buCoIkWT21yB{LtHtrwm7kV`NU3gYcU^ZyT%@j>t-sM-h4 zLU;9IBU*D4ytw+Xd*B1uMe!!>W1of19H@QKUWG3Y>KDUYzMaHP@3Wb9Hl~j!Y``@B z?Y_bQJK2E0d$k6*JTy`w+~V!j(ifj6WWcMumOq<9r>n5a4!#cQPHzL*Qklli5&uZ9 zm?_f-y2u_+Z{bi-V|m%D8vj+8W_4}iynK;uFKB7y90ao3+XG);=ro#5Z}DX&i}zoZ z7ZW30lg|Y*%+jx88220(WBQ-BX3A5<`Sr^;T|u5hZX4gCn29Q+F?sCW;|$*s(aQ;5GohJ46Fl-A6F&WNPLPwR_8-^uCV-c7%;MO5Av39+ z2S#U5mOLjzqG7wFH#tQ_9+c5tjYGSNE5Ca~XAOw)FPk4lSVD5%iHvu8rNIT8BAIt~S62?t@e5h;11Q6;+8zYWPgska=DWbzl zMP8+0#`Fcw!F2UW5utm0V5iQFf&H48t&d2I}HaW$_#Z(jd8CJfooVv-3n zg|y5O=8u}c`fIen;(SLC7%_!TR-q^m-o6S#EC+nfqz6yT`nw2@d41XZDL1mSNVK1l?uaRL zvI@psQpnuLAR<-*7^j-pj_UYuERCov$s_sokp&UtDDVhKVb)f-5XQiDU+}6=7D$Ga z+QL2-LQuu=sY%qS5j=(wdv`fW+-O4fM@fP`86^3DwizKJI>@LsxeiC~w2T>|?nC8(OT6s7?Dve6A8-dsISgHBMrk z@yC8tzqqPfl2&1Sh)g!X!c{T;th(US8Kgu`OKMDdi_-q%g2=s}rSV`~VnG}Wp<~fE zGKCuR_6uUU3Z$yfwbDZu_ByeiIuiS0sHG5F8fdX`qq7jThT`cF@;W6fwb{FDf$uG5G@n{Dj%!rU5Z6PtBv4--AlISD&HgcIRJk| zvSoJzAON&=pWvGMO0dAD)xmzL(Hz6Du4j0C#S;*!$q}`xo)%mn7st0Y*MYuTU-Mv< zdyg>OBT*KrS$(~vo;@u}2cM*(TLKK0Q!_x3*)P(3mME!Z4z>% diff --git a/osfmk/console/panic_ui/images/rendered_numbers.tiff b/osfmk/console/panic_ui/images/rendered_numbers.tiff deleted file mode 100644 index 6deae082e5db6e3fe364200e69e24d3c14aab4eb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3218 zcmeHIS8o$R5S|SX5WIjz6TCz`ScpT&7=?go3B3ivh29A<|7goy6?9^|{I_2fmji|7csNgdZwG61|ef;?0!~6H|-g(}e zH?Lp6diC<60f!j~_pJ^zh+>2lwyayLb2Qojaa)`}VC{H*em!asB$W zYgeycxpMjPrAwZ7@#2LG=g*%zclPX=GoNrypFVZ!>sb$NSE?u%@@!~~`9H*vc;lc$As;lSEud13iZ|>YVb7s$;HEZU~ z88fC&pEhmk)G1RYPo6ljveLFEOsJ?BKYrY}^764`$BY?0deo?qBS(xFK781)Awvca zE-Nc79W-d*zySmL_wUy)k?7mEPoI*K-o1;9lYjXT1pz}Yko=5hdwltl&pO*8pFWVi zeVa_~-o1MDq)9`EcIlE#68-M{iSo^#=>PnK9_Oc?7sx{{ko=5hPdA$n{dlgwa(*;_ zqCbEC-_PIk{=fg+X@pbHKfzr4?0e|TFE(2f@cE^IHl&T`Usj!}N)}brr0P?3HL2R7 zk{&UN$6~z_v3Me0T-389(JP)PjuEw{uhbO(ITZf*6YyH84Btu>P!mzNfK3yhhp{U3 zd1zIez`l8*YL{ixK9Ex@)gj=UHKS#bBICbI$Hz)#|s zxB)6{7*|F>+6SOG7Gb|F6I6^MSbi=5{g*&i#4%n9@U@HFB#=L>#sFk?9Go*b#Tto$GfI}(1$2m!N&BO@02*~b(&X-d*&i+4S&jZ5+e zmR3+v#C2VMQ|tm|?hbJ2**hR{@d_NXE285#d{CC@b*2oyIw)6~Z(v4nMHkqZ-v^J> zxG?~pg)F|d1(1pN9RST}g5LrB9B}veEAAjzFB};p4;yLFYY}r+qmx-XkfLtXg5yGm ZyRk?A*nz*K*he@Bq!F@!?DG!~eFIZAZo2>g diff --git a/osfmk/console/panic_ui/images/rendered_numbersWHD.raw b/osfmk/console/panic_ui/images/rendered_numbersWHD.raw deleted file mode 100644 index 35290cd565b35a74d0ad11be0fb3d0a16b84e8c9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1425 zcmc(f%Z-#5r#3>lF z$JBL2MANM+E~AUUihP@qknujGZV}ziEs`5+=S_xCUY@1!TRD(Vr?-_c);g~OkP?+s z(XL1edCl@pK)Hkim!O}r9QqlEV!*T1L()e#o`Jjt7m)+a)*+?sx3WZ)mdDk7Z2*3+ zIBVny00Ov$vvXM?hs|l`86JCaqRqgcYZV zSe$4N>l1)yM{+8Sn0q<*pv9R9^4f^vRAJ#fqt`BoA&{B=^Buqk11|Hv1GPn%M9yer zRMiq!XV>rEXlV~@HJRd=bMj3QBIElG(E8sUa9(`{qjtqM@^I8vas6roaj0FBUoa1e zVgYBq4@O#H0ALpW3i2|b%J&aIg68=@puYpyj#Z0G`o^@tYJT!V5^u7tb$98n#sc diff --git a/osfmk/console/panic_ui/qtif2kraw.c b/osfmk/console/panic_ui/qtif2kraw.c deleted file mode 100644 index 6627e7eaa..000000000 --- a/osfmk/console/panic_ui/qtif2kraw.c +++ /dev/null @@ -1,892 +0,0 @@ -/* converts a QT RAW 8-bit image file into format the kernel panic ui expects. - * - * to build: cc -o qtif2kraw qtif2kraw.c -*/ - -#include -#include -#include -#include -#include -#include - -int EncodeImage( unsigned char * data, int pixels, unsigned char * fileArr ); -int findIndexNearMatch( unsigned int color24 ); -unsigned int findColor24NearMatch( unsigned int color24 ); -unsigned char findIndexMatch( unsigned int color24 ); -int convert8toGrey( unsigned char * data, unsigned int size ); -int convert8bitIndexto8( unsigned char * data, int height, int width, unsigned char ** dout ); -unsigned int * CreateCLUTarry( unsigned char * raw_clut ); -unsigned int * ReplaceCLUT( char * iname ); - -#define offsetof(type, field) ((size_t)(&((type *)0)->field)) - -struct panicimage { - unsigned int pd_sum; - unsigned int pd_dataSize; - unsigned int pd_tag; - unsigned short pd_width; - unsigned short pd_height; - unsigned char pd_depth; - unsigned char pd_info_height; - unsigned char pd_info_color[2]; - unsigned char data[]; -}; - - -void -usage( int type ) { -printf( -"\n" -"Usage:\n" -"\tqtif2kraw -i <.qtif> -o <.kraw> [operands ...]\n\n" -"\tThe following operands are available\n\n" -"\t-h\t\tDisplay full help information\n" -"\t-i \tUse file containing QuickTime uncompressed raw image as\n" -"\t\t\tthe panic dialog (8 bit only)\n" -"\t-o \tWrite the output as a compressed kernel RAW image suitable\n" -"\t\t\tfor loading into the kernel\n" -"\t-c \tUse file containing 256 RGB values for 8-bit indexed \n" -"\t\t\tlookups, overrides built-in appleClut8\n" -"\t-fg \tForeground color of font used for panic information in\n" -"\t\t\t24-bits, default 0xFFFFFF (100%% white)\n" -"\t-bg \tBackground color of font used for panic information in\n" -"\t\t\t24-bits, default 0x222222 (13%% white, dark gray)\n" -"\t-n \tNumber of lines that have been reserved to display the\n" -"\t\t\tpanic information, must be at least 20\n" -"\n\n" ); -} - - -#include "appleclut8.h" -#include "../iso_font.c" - -struct QTHeader { - long idSize; /* total size of ImageDescription including extra data ( CLUTs and other per sequence data ) */ - long cType; /* 'raw '; what kind of codec compressed this data */ - long resvd1; /* reserved for Apple use */ - short resvd2; /* reserved for Apple use */ - short dataRefIndex; /* set to zero */ - short version; /* which version is this data */ - short revisionLevel; /* what version of that codec did this */ - long vendor; /* whose codec compressed this data */ - long temporalQuality; /* what was the temporal quality factor */ - long spatialQuality; /* what was the spatial quality factor */ - short width; /* how many pixels wide is this data */ - short height; /* how many pixels high is this data */ - long hRes; /* horizontal resolution */ - long vRes; /* vertical resolution */ - long dataSize; /* if known, the size of data for this image descriptor */ - short frameCount; /* number of frames this description applies to */ - char name[32]; /* name of codec ( in case not installed ) */ - short depth; /* what depth is this data (1-32) or ( 33-40 grayscale ) */ - short clutID; /* clut id or if 0 clut follows or -1 if no clut */ -} image_header; - -static unsigned int mismatchClut[256]; -static int nextmis = -1, neargrey = 0, cvt2grey = 0, exactmatch=0; -static int grey = 0, debug = 0; -static unsigned char fg, bg; -unsigned int * panic_clut = NULL; -static char * clutin = NULL; - -union colors { - unsigned int c24; - unsigned char rgb[4]; - struct { - unsigned char dummy; - unsigned char red; - unsigned char green; - unsigned char blue; - } clut; -}; - -int -main( int argc, char *argv[] ) -{ - char *file = NULL; - char *kraw = NULL; - FILE * stream; - unsigned char * data; - unsigned short width = 0, height = 0; - unsigned char depth = 0, lines = 20; - unsigned int i, pixels, sum, encodedSize, fg24= 0xFFFFFF, bg24=0x222222; - unsigned char *fileArr; - int next; - - - // pull apart the arguments - for( next = 1; next < argc; next++ ) - { - if (strcmp(argv[next], "-i") == 0) // image file in raw QT uncompressed format (.qtif) - file = argv[++next]; - - else if (strcmp(argv[next], "-o") == 0) // output file for WHD image - kraw = argv[++next]; - - else if (strcmp(argv[next], "-n") == 0) // numbers of reserved lines - lines = atoi(argv[++next]); - else if (strcmp(argv[next], "-fg") == 0) // foreground color in 24 bits - sscanf(argv[++next], "%i", &fg24); - else if (strcmp(argv[next], "-bg") == 0) // background color in 24 bits - sscanf(argv[++next], "%i", &bg24); - else if (strcmp(argv[next], "-c") == 0) // input file for clut - clutin = argv[++next]; - else if (strcmp(argv[next], "-h") == 0) // display more help - { usage(1); exit(1); } - - else if (strcmp(argv[next], "-debug") == 0) // verbose - debug++; - } - - if (!(file || kraw) ) { - usage(0); - exit(1); - } - - printf("\n"); - - panic_clut = appleClut8; - - if ( clutin ) - { - panic_clut = ReplaceCLUT( clutin ); - printf("Built-in CLUT has been replaced with %s...\n", clutin); - } - - fg = findIndexNearMatch(fg24); - bg = findIndexNearMatch(bg24); - - // Begin to process the image - - if( file == NULL) - { - printf("No image file was processed...\n\n"); - exit(0); - } - - - printf("Verifing image file...\n"); - if ( file != NULL ) - { - stream = fopen(file, "r"); - if (!stream) { - fprintf(stderr, "Err: could not open .qtif image file.\n\n"); - exit(1); - } - - { - long hdr_off; - long hdr_type; - int rc; - - if ( ! fread((void *) &hdr_off, sizeof(long), 1, stream) ) goto errQTimage; - if ( ! fread((void *) &hdr_type, sizeof(long), 1, stream) ) goto errQTimage; - - if ( hdr_type != 'idat' ) goto errQTimage; - - if ( fseek(stream, hdr_off, SEEK_SET) ) goto errQTimage; - if ( ! fread((void *) &hdr_off, sizeof(long), 1, stream) ) goto errQTimage; - if ( ! fread((void *) &hdr_type, sizeof(long), 1, stream) ) goto errQTimage; - - if ( hdr_type != 'idsc' ) goto errQTimage; - - rc = fread((void *) &image_header, sizeof(image_header), 1, stream); - if ( !rc && !feof(stream) ) goto errQTimage; - if ( image_header.cType != 'raw ' ) goto errQTimage; - if ( image_header.depth != 8 ) goto errQTimage; - - - width = image_header.width; - height = image_header.height; - depth = image_header.depth; - - printf("Image info: width: %d height: %d depth: %d...\n", width, height, depth); - - if (!(width && height && depth)) { - fprintf(stderr,"Err: Invalid image file header (width, height, or depth is 0)\n"); - exit(1); - } - } - - if ( !(data = (char *)malloc(image_header.dataSize))) { - fprintf(stderr,"Err: Couldn't malloc file data (%ld bytes)... bailing.\n", image_header.dataSize); - exit(1); - } - - // Read the image data - if ( fseek(stream, 8, SEEK_SET) ) goto errQTimage; - if ( ! fread((void *) data, image_header.dataSize, 1, stream) ) goto errQTimage; - fclose( stream ); - - pixels = image_header.dataSize; - - if ( grey == 1 ) - pixels = convert8toGrey( data, image_header.dataSize ); - - printf("Converting image file to 8 bit raw...\n"); - pixels = convert8bitIndexto8( data, height, width, &data ); - image_header.dataSize = pixels; - depth = 1; - - printf("Converted %d pixels%s...\n", pixels/depth, ((grey==1)?" to grayscale":"")); - if ( exactmatch > 0 ) - printf("Found %d color mathces in CLUT...\n", exactmatch); - if ( cvt2grey > 0 ) - printf("Converted %d colors to gray...\n", cvt2grey); - if ( neargrey > 0 ) - printf("Adjusted %d grays to best match...\n", neargrey); - if ( nextmis > 0 ) - printf("Total of %d seperate color mismatches...\n", nextmis); - } - - printf("Encoding image file...\n"); - - if (!(fileArr = (unsigned char *) malloc(pixels))) { - fprintf(stderr,"Err: Couldn't malloc fileArr (%d pixels)... bailing.\n", pixels); - exit(1); - } - - encodedSize = EncodeImage( data, pixels, fileArr ); - - if ( encodedSize >= pixels ) - { - printf("Skipping encoding...\n"); - } - - for (sum=0,i=0; i max ) - { - *depth = 1; - retc = 0; - goto Leave; - } - - nextP = (union RunData *) &data[*index]; - - if ( retc == 1 ) - { - // check current data against current depth - switch ( *depth ) - { - case 1: - if ( nextP->c[0] == currP->c[0] ) - goto Leave; - break; - case 2: - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] ) - goto Leave; - break; - case 3: - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] && - nextP->c[2] == currP->c[2] ) - goto Leave; - break; - case 4: - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] && - nextP->c[2] == currP->c[2] && - nextP->c[3] == currP->c[3] ) - goto Leave; - break; - } - - retc = 0; - goto Leave; - } - - // start of a new pattern match begine with depth = 1 - - if ( (*index+6) <= max ) - { - // We have at least 8 bytes left in the buffer starting from currP -#if 1 - nextP = (union RunData *) &data[*index+3]; - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] && - nextP->c[2] == currP->c[2] && - nextP->c[3] == currP->c[3] ) - { - // check if they are all the same value - if ( currP->c[0] == currP->c[1] && - currP->c[1] == currP->c[2] && - currP->c[2] == currP->c[3] ) - { // if so, leave at depth = 1 - retc = 1; - *depth = 1; - goto Leave; - } - - if (debug>2) printf("Found 4 at %x\n", *index); - retc = 1; - *depth = 4; - *index += 3; - goto Leave; - } - - nextP = (union RunData *) &data[*index+2]; - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] && - nextP->c[2] == currP->c[2] ) - { - // check if they are all the same value - if ( currP->c[0] == currP->c[1] && - currP->c[1] == currP->c[2] ) - { // if so, leave at depth = 1 - retc = 1; - *depth = 1; - goto Leave; - } - - if (debug>2) printf("Found 3 at %x\n", *index); - retc = 1; - *depth = 3; - *index += 2; - goto Leave; - } - - nextP = (union RunData *) &data[*index+1]; - if ( nextP->c[0] == currP->c[0] && - nextP->c[1] == currP->c[1] ) - { - // check if they are all the same value - if ( currP->c[0] == currP->c[1] ) - { // if so, leave at depth = 1 - retc = 1; - *depth = 1; - goto Leave; - } - - if (debug>2) printf("Found 2 at %x\n", *index); - retc = 1; - *depth = 2; - *index += 1; - goto Leave; - } - -#endif - nextP = (union RunData *) &data[*index]; - - } - - if ( nextP->c[0] == currP->c[0] ) - retc = 1; - else - retc = 0; - -Leave: - - if ( retc == 1 ) - *index += *depth; - - return retc; -} - -int -EncodeImage( unsigned char * data, int pixels, unsigned char * fileArr ) -{ - union RunData * currP, * norunP ; - int i, depth; - unsigned int filePos, run, nomatchrun; - - currP = NULL; - norunP = NULL; - nomatchrun = 0; - filePos = 0; // position in the file we're writing out - run = 1; - depth = 1; - - currP = (union RunData *)&data[0]; // start a new run - for (i=1; i 2 ) { - unsigned char * p = (unsigned char *)norunP; - - if( nomatchrun ) { - while (nomatchrun) { - int cnt; - - cnt = (nomatchrun > 127) ? 127 : nomatchrun; - fileArr[filePos++] = cnt; - nomatchrun -= cnt; - - while ( cnt-- ) - fileArr[filePos++] = *p++; - } - } - - filePos += encode_rle(fileArr, filePos, run, currP, depth); - - norunP = NULL; - } else { - nomatchrun+=run; - } - - currP = (union RunData *)&data[i]; // start a new run - - if( norunP == NULL ) { - nomatchrun = 0; - norunP = currP; - } - - depth = 1; // switch back to a single byte depth - run = 1; // thee is always at least one entry - i++; // point to next byte - } - } - - if( nomatchrun ) { - unsigned char * p = (unsigned char *)norunP; - while (nomatchrun) { - int cnt; - - cnt = (nomatchrun > 127) ? 127 : nomatchrun; - fileArr[filePos++] = cnt; - nomatchrun -= cnt; - - while ( cnt-- ) - fileArr[filePos++] = *p++; - } - } - - // write out any run that was in progress - if (run > 0) { - filePos += encode_rle(fileArr, filePos, run, currP, depth); - } - - return filePos; -} - -/* encode_rle applies a "modified-RLE encoding to a given image. The encoding works as follows: - - The quantity is described in the first byte. If the MSB is zero, then the next seven bits - are the quantity. If the MSB is set, bits 0-3 of the quantity are in the least significant bits. - If bit 5 is set, then the quantity is further described in the next byte, where an additional - 7 bits (4-10) worth of quantity will be found. If the MSB of this byte is set, then an additional - 7 bits (11-17) worth of quantity will be found in the next byte. This repeats until the MSB of - a quantity byte is zero, thus ending the chain. - - The value is described in the first byte. If the MSB is zero, then the value is in the next byte. - If the MSB is set, then bits 5/6 describe the number of value bytes following the quantity bytes. - - encodings are: (q = quantity, v = value, c = quantity continues) - - Byte 1 Byte 2 Byte 3 Byte 4 Byte 5 Byte 6 Byte 7 Byte 8 - case 1: [ 0 q6-q0 ] [ v7-v0 ] - case 2: [ 1 0 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] - case 3: [ 1 0 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] - case 4: [ 1 1 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] - case 5: [ 1 1 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] -*/ - -unsigned int -encode_length(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, unsigned int mask) -{ - unsigned char single_mask = 0x0F; - unsigned char double_mask = 0x7F; - unsigned int slots_used = 0; - - fileArr[filePos] = mask | (quantity & single_mask); // low bits (plus mask) - slots_used++; - - if (quantity >>= 4) { - fileArr[filePos++] |= 0x10; // set length continuation bit - fileArr[filePos] = quantity & double_mask; - slots_used++; - - while (quantity >>= 7) { - fileArr[filePos++] |= 0x80; // set length continuation bit - fileArr[filePos] = quantity & double_mask; - slots_used++; - } - } - - return slots_used; -} - - -unsigned int -encode_rle(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, union RunData * value, int depth) -{ - unsigned char slots_used = 0; - - - switch ( depth ) { - case 1: - slots_used += encode_length( fileArr, filePos, quantity, 0x80 ); - fileArr[filePos+slots_used++] = value->c[0]; - break; - - case 2: - slots_used += encode_length( fileArr, filePos, quantity, 0xA0 ); - fileArr[filePos+slots_used++] = value->c[0]; - fileArr[filePos+slots_used++] = value->c[1]; - break; - - case 3: - slots_used += encode_length( fileArr, filePos, quantity, 0xC0 ); - fileArr[filePos+slots_used++] = value->c[0]; - fileArr[filePos+slots_used++] = value->c[1]; - fileArr[filePos+slots_used++] = value->c[2]; - break; - - case 4: - slots_used += encode_length( fileArr, filePos, quantity, 0xE0 ); - fileArr[filePos+slots_used++] = value->c[0]; - fileArr[filePos+slots_used++] = value->c[1]; - fileArr[filePos+slots_used++] = value->c[2]; - fileArr[filePos+slots_used++] = value->c[3]; - break; - } - - return slots_used; -} - - -int -findIndexNearMatch( unsigned int color24 ) -{ - union colors color8; - union colors clut8; - int isGrey = 0; - - color8.c24 = color24; - - if ( color8.clut.red == color8.clut.green && color8.clut.green == color8.clut.blue ) - isGrey = 1; - - if ( isGrey ) { - int i; - unsigned int bestIndex = 0, rel, bestMatch = -1; - - for (i=0; i<256; i++) { - clut8.c24 = panic_clut[i]; - - if ( clut8.clut.red != clut8.clut.green || clut8.clut.green != clut8.clut.blue ) - continue; - - if ( clut8.clut.red > color8.clut.red) continue; - rel = abs(color8.clut.red - clut8.clut.red); - if ( rel < bestMatch ) { - bestMatch = rel; - bestIndex = i; - } - } - - return bestIndex; - } - - // we must have a non-grey color - return -1; -} - -unsigned int -color24toGrey( unsigned int color24 ) -{ - float R, G, B; - float Grey; - union colors c; - unsigned char grey8; - unsigned int grey24; - - c.c24 = color24; - - R = (c.clut.red & 0xFF) ; - G = (c.clut.green & 0xFF) ; - B = (c.clut.blue & 0xFF) ; - - Grey = (R*.30) + (G*.59) + (B*.11); - grey8 = (unsigned char) ( Grey + .5); - grey24 = (grey8<<16) | (grey8<<8) | grey8; - return grey24; -} - - -int -convert8toGrey( unsigned char * data, unsigned int size ) -{ - int i; - unsigned int c24; - union colors c; - - for ( i=0; i c.rgb[2] && c.rgb[1] > c.rgb[3] ) - prim = 1; - else if ( c.rgb[2] > c.rgb[1] && c.rgb[2] > c.rgb[3] ) - prim = 2; - else if ( c.rgb[3] > c.rgb[1] && c.rgb[3] > c.rgb[2] ) - prim = 3; - else if ( c.rgb[1] == c.rgb[2] && c.rgb[1] == c.rgb[3] ) - prim = 0; // gray - else if ( c.rgb[1] == c.rgb[2] ) - prim = 0x12; // red green - else if ( c.rgb[1] == c.rgb[3] ) - prim = 0x13; // red blue - else if ( c.rgb[2] == c.rgb[3] ) - prim = 0x23; // green blue - else - printf("cannot tell color %06x\n", color24); - - last_c = color24; - last_p = prim; - - if ( prim == 0 || prim > 3 ) - { - last_co = -1; - return last_co; - } - - return -1; -} - - -unsigned char -findIndexMatch( unsigned int color24 ) -{ - int i; - unsigned char ri; - static unsigned char last = 0; - -retry: - if ( panic_clut[last] == color24 ) - { - exactmatch++; - return last; - } - - for (i=0; i<256; i++) - { - if ( panic_clut[i] == color24 ) { - last = i; - exactmatch++; - return last; - } - } - - if ( nextmis == -1 ) { - for (i=0; i<256; i++) mismatchClut[i] = -1; - nextmis = 0; - } - - i = findIndexNearMatch(color24); - - if ( i == -1 ) // found a color that is not grey - { - unsigned int colormatch = findColor24NearMatch( color24 ); - - if ( colormatch == -1 ) // cannot convert color - { - cvt2grey++; - if (debug>1) printf("color %06X not matched at all\n", color24); - color24 = color24toGrey(color24); - if (debug>1) printf("now grey %06X\n", color24); - } - else - color24 = colormatch; - - goto retry; - } - - if (debug>1) printf("color %06X now matched at %x\n", color24, i); - - ri = i; - - neargrey++; - - // keep track of missed repeats - for ( i=0; i= 256) ) - { - fprintf(stderr,"Err: Too many color mismatches detected with this CLUT\n"); - exit(1); - } - - return ri; -} - -/* - * Convert 8 bit mode to 8 bit, We have to strip off the alignment bytes - */ - -int -convert8bitIndexto8( unsigned char * data, int height, int width, unsigned char ** dout ) -{ - unsigned int row, col, i, i8, size, adj; - unsigned char index; - unsigned char * ddata; - union colors color24; - - adj=(4-(width%4))%4; // adjustment needed to strip off the word alignment padding - size = height * width; - ddata = (unsigned char *) calloc( size, 1); - - for (i8=0,row=0; row -#include -#include - -#define RUN_MAX 32767 - -void create_numbers_file( FILE *stream, char *outfile ); -unsigned int encode_rle(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, unsigned char value); - -void -usage(void) { - printf("\nusage: setupdialog -i -oi -n -on \n"); - - printf("\nYou can supply a panic image file, a numbers file, or both. Input files\n"); - printf("must be in RAW format where each pixel is represented by an index into the\n"); - printf("MacOS X system CLUT. The first %d bytes must be the width, height, and depth\n", 3 * sizeof(short)); - printf("(in that order, %d bytes each).\n", sizeof(short)); - - printf("\nThe output files are generated C structures in the format the panic ui code\n"); - printf("expects (default output files are panic_image.c and rendered_numbers.c).\n\n"); -} - -int -main( int argc, char *argv[] ) -{ - int next; - char *file = NULL, *ptr, *out = NULL, *numsfile = NULL, *numsout = NULL; - FILE * stream, *out_stream; - int * data; - short width = 0, height = 0, depth = 0; - char word[2]; - char byte; - unsigned int i, pixels, filePos; - int err; - unsigned char *fileArr; - unsigned char nextP; - unsigned int count; - int currP; - int fd; - int pairs_this_line; - - - // pull apart the arguments - for( next = 1; next < argc; next++ ) - { - if (strcmp(argv[next], "-i") == 0) // image file (RAW/PICT?) - file = argv[++next]; - else if (strcmp(argv[next], "-n") == 0) // numbers/chars image file (RAW) - numsfile = argv[++next]; - else if (strcmp(argv[next], "-oi") == 0) // output file for image - out = argv[++next]; - else if (strcmp(argv[next], "-on") == 0) // output file for numbers - numsout = argv[++next]; - - /* perhaps we should just let the user specify the W/H rather than require the header */ - /* - else if (strcmp(argv[next], "-w") == 0) // image width (pixels) - width = strtoul(argv[++next], &ptr, 0); - else if (strcmp(argv[next], "-h") == 0) // image height (pixels) - width = strtoul(argv[++next], &ptr, 0); - */ - } - - if (!(numsfile || file)) { - usage(); - exit(1); - } - - if (!numsfile) { - printf("\nNo numbers file to process\n"); - } else { - stream = fopen(numsfile, "r"); - if (!stream) { - printf("bad nums infile.. bailing.\n"); - exit(1); - } - create_numbers_file( stream, numsout ); - fclose(stream); - } - - if( file == NULL) { - printf("\nNo image file to process\n"); - exit(1); - } - - stream = fopen(file, "r"); - if (!stream) { - printf("bad infile.. bailing.\n"); - exit(1); - } - - printf("\nReading image file...\n"); - - fread((void *) &width, sizeof(short), 1, stream); - printf("got width: %d\n", width); - fread((void *) &height, sizeof(short), 1, stream); - printf("got height: %d\n", height); - fread((void *) &depth, sizeof(short), 1, stream); - printf("got depth: %d\n", depth); - - if (!(width && height && depth)) { - printf("Invalid image file header (width, height, or depth is 0)\n"); - exit(1); - } - - pixels = width * height; - - if (!(fileArr = (unsigned char *) malloc(pixels))) { - printf("couldn't malloc fileArr (%d pixels)... bailing.\n", pixels); - exit(1); - } - - currP = -1; - count = 0; - filePos = 0; // position in the file we're writing out - - for (i=0; i < pixels; i++) { - nextP = fgetc(stream); - count++; - if (nextP == currP) { - if (count >= RUN_MAX) { - filePos += encode_rle(fileArr, filePos, count, (unsigned char) currP); - count = 0; - currP = -1; - } - } else { - if (currP != -1) { - filePos += encode_rle(fileArr, filePos, count-1, (unsigned char) currP); - } - currP = nextP; // start a new run - count = 1; - } - } - - // write out any run that was in progress - if (count > 0) { - filePos += encode_rle(fileArr, filePos, count, (unsigned char) currP); - } - - fclose( stream ); - - // now, generate the c file - - if ( out == NULL) - out = "panic_image.c"; - out_stream = fopen(out, "w"); - - if(out_stream == NULL) { - printf("couldn't open out file.. bailing\n"); - exit(1); - } - - pairs_this_line = 0; - - fprintf( out_stream, "/* generated c file */\n\n"); - fprintf( out_stream, "static const struct {\n"); - fprintf( out_stream, " unsigned int pd_width;\n"); - fprintf( out_stream, " unsigned int pd_height;\n"); - fprintf( out_stream, " unsigned int bytes_per_pixel; /* 1: CLUT, 3:RGB, 4:RGBA */\n"); - fprintf( out_stream, " unsigned char image_pixel_data[%#4.2x];\n", (filePos)); - - fprintf( out_stream, "} panic_dialog = {\n"); - fprintf( out_stream, "\t%d, ", width); /* panic dialog x */ - fprintf( out_stream, "%d, ", height); /* panic dialog y */ - fprintf( out_stream, "1,\n"); /* bytes per pixel */ - - for( i=0; i < filePos;) { - fprintf( out_stream, "0x%.2x,0x%.2x", fileArr[i], fileArr[i+1]); - i+=2; - pairs_this_line++; - - // if the first byte had a leading 1, this is a 3-byte encoding - if ((fileArr[i-2] >> 7) == 1) { - fprintf( out_stream, ",0x%.2x", fileArr[i++]); - pairs_this_line++; - } - - if (i >= filePos) // this is the last element - fprintf( out_stream, "\n};"); - else fprintf( out_stream, ", "); - - if(pairs_this_line > 8) { - fprintf( out_stream, "\n"); - pairs_this_line = 0; - } - } - - - fclose( out_stream ); - - return 0; -} - - -/* Each number/char (0-f) has its own row in the pixmap array. - When done, these rows each contain an RLE character. - The image file is read row by row, so the individual characters - must be constructed in the same way. The numPos array tracks the - current position in each character's RLE array. - */ -void -create_numbers_file( FILE *stream, char *outfile ) -{ - int err; - short height, depth, totalwidth; - int numbers = 17; - int width[17] = {9,7,8,6,9,7,8,7,8,7,10,7,9,10,7,6,4}; - int numPos[17] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - - int **pixmap; - int row, col, item, line=0, currWidth; - int nextP, currP; - int count, currNum; - - FILE *out_stream; - - printf("\nReading numbers file...\n"); - fread((void *) &totalwidth, sizeof(short), 1, stream); - printf("got width: %d\n", totalwidth); - fread((void *) &height, sizeof(short), 1, stream); - printf("got height: %d\n", height); - fread((void *) &depth, sizeof(short), 1, stream); - printf("got depth: %d\n", depth); - - if (!(width && height && depth)) { - printf("Invalid numbers file header (width, height, or depth is 0)\n"); - return; - } - - // allocate array to hold each number's RLE encoding (20 = 2xwidest width[i] value, 17 = num chars) - pixmap = (int **) malloc( 17 * sizeof(int *) ); - for( item=0; item<17; item++) - pixmap[item] = (int *) malloc( 2*width[item]*height*sizeof(int) ); - - currP = -1; - count = 0; - currWidth = 0; - currNum = 0; - - for( row=0; row < height; row++) { - for( item=0; item < numbers; item++) { - count = 0; - currP = -1; // start each character fresh - for( col=0; col < width[item]; col++) { - nextP = fgetc( stream ); - if( nextP == currP) { - if( count == 127) { // probably never executed given the small widths - pixmap[item][numPos[item]] = count; - pixmap[item][numPos[item]+1] = currP; - numPos[item]+=2; - count = 0; - currP = -1; - } else count++; // add one to the current run - } else { - if( currP != -1) { - pixmap[item][numPos[item]] = count; // currP was the end of the run - pixmap[item][numPos[item]+1] = currP; - numPos[item]+=2; - } - currP = nextP; // start a new run - count = 1; - } - } - // write out any run that was in progress - if( count > 0) { - pixmap[item][numPos[item]] = count; - pixmap[item][numPos[item]+1] = currP; - numPos[item]+=2; - } - } - } - - // now, generate the c file - - if ( outfile == NULL) - outfile = "rendered_numbers.c"; - out_stream = fopen(outfile, "w"); - - if(out_stream == NULL) { - printf("couldn't open numbers outfile.. bailing\n"); - exit(1); - } - - fprintf( out_stream, " /* generated c file */\n\n"); - - // iterate through all the numbers/chars - for( item=0; item= width[item]) { - fprintf( out_stream, "\n"); - line = 0; - } - col+=2; - } - } - - fclose( out_stream ); -} - - -/* encode_rle applies a "modified-RLE encoding to a given image. The encoding works as follows: - - The quantity and value will be described by either two or three bytes. If the - most significant bit of the first byte is a 0, then the next seven bits are - the quantity (run-length) and the following 8 bits are the value (index into - a clut, in this case). If the msb of the first byte is a 1, then the next 15 bits - are the quantity and the following 8 are the value. Visually, the two possible - encodings are: (q = quantity, v = value) - - Byte 1 Byte 2 Byte 3 - case 1: [ 0 q6 q5 q4 q3 q2 q1 q0 ] [ v7 v6 v5 v4 v3 v2 v1 v0 ] [ ] - case 2: [ 1 q14 q13 q12 a11 q10 q9 q8 ] [ q7 q6 q5 q4 q3 q2 q1 q0 ] [ v7 v6 v5 v4 v3 v2 v1 v0 ] -*/ - - -unsigned int -encode_rle(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, unsigned char value) -{ - unsigned char single_mask = 0x00; - unsigned char double_mask = 0x80; - unsigned char slots_used = 0; - - if (quantity < 128) { - fileArr[filePos] = single_mask | quantity; - slots_used = 1; - } else { - fileArr[filePos] = double_mask | (quantity >> 8); // high 7 bits (plus mask) - fileArr[filePos+1] = (unsigned char) quantity; // low 8 bits - slots_used = 2; - } - - fileArr[filePos+slots_used] = value; - slots_used++; - - return slots_used; -} diff --git a/osfmk/console/panic_ui/systemCLUT.act b/osfmk/console/panic_ui/systemCLUT.act deleted file mode 100644 index 0ad32f3ae5e6e8ce6d2b14c6cabaa2d1c0e42ea8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 768 zcmc(d*Tr#A{5E(LsqB4adO374;@B&_;XZeoH&durK zT%3Ki_FDV*-}_=i2uGvQa5x+c2A`jw5Q1K>_wn)Z{{G(WcHiFKUSD53old*mZnatf zfM&DVXf$44Uh4Jw^Yio5(^IWhdwhI^06siCRIAlWrE-6NUoMwRrPAHq-Rhkh3nM@`UiHnPi^Yim~JRXb1 zqS5Ht*;ynK35Ua>P$(D-1_A-U-|zGJyk4)z<2gAwal73vm+ScW*y(gS91gqPZnN2r zj*hHWtHokDJUld;&HMZNCX>l%G#U&By5R+g8S zxm+%X!(p@8EEa2NX^F{XE-o%I7z{d{zOb+`KR-{S(Wq4F+}s?6LLrmMBob+6W@dVN znn)xP2n0MHkHg`xSS$vEL8H+q6bgw%PEAey`cDNQ{Jt4L@V)&P4Zt7d3F-^{00;da f2L5tSzu%vvr>Ca}2M4RGtFyDS2n1r{O#b;d!}L?2 diff --git a/osfmk/console/progress_meter_data.c b/osfmk/console/progress_meter_data.c index 95443c3cb..a5ed2560a 100644 --- a/osfmk/console/progress_meter_data.c +++ b/osfmk/console/progress_meter_data.c @@ -28,380 +28,368 @@ #define kProgressBarHeight (18) #define kProgressBarCapWidth (9) -#define kProgressBarWidth (300 + 2 * kProgressBarCapWidth) +#define kProgressBarWidth (234) -static const unsigned char -progressmeter_leftcap1x[2][kProgressBarCapWidth * kProgressBarHeight] = { +static const unsigned char progressmeter_leftcap1x[2][9 * 18] = { { - 0xff,0xff,0xff,0xff,0xff,0xe8,0xcc,0xbb,0xaa, - 0xff,0xff,0xff,0xf3,0xbf,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xeb,0xb0,0xa9,0xad,0xd4,0xec,0xfe, - 0xff,0xf3,0xb0,0xa9,0xbf,0xf4,0xff,0xff,0xff, - 0xff,0xbf,0xa9,0xc4,0xff,0xff,0xff,0xff,0xff, - 0xe8,0xa9,0xb0,0xfb,0xff,0xff,0xff,0xff,0xff, - 0xcc,0xa9,0xd5,0xff,0xff,0xff,0xff,0xff,0xff, - 0xb5,0xa9,0xef,0xff,0xff,0xff,0xff,0xff,0xff, - 0xac,0xa9,0xfc,0xff,0xff,0xff,0xff,0xff,0xff, - 0xae,0xa9,0xfa,0xff,0xff,0xff,0xff,0xff,0xff, - 0xb8,0xa9,0xef,0xff,0xff,0xff,0xff,0xff,0xff, - 0xcc,0xa9,0xd5,0xff,0xff,0xff,0xff,0xff,0xff, - 0xe8,0xa9,0xb0,0xfb,0xff,0xff,0xff,0xff,0xff, - 0xff,0xbf,0xa9,0xc5,0xff,0xff,0xff,0xff,0xff, - 0xff,0xf3,0xae,0xa9,0xc0,0xf4,0xff,0xff,0xff, - 0xff,0xff,0xeb,0xb0,0xa9,0xad,0xd4,0xec,0xff, - 0xff,0xff,0xff,0xf3,0xbd,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xe8,0xcc,0xbb,0xa9, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xfe,0xdc,0xc6,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2, + 0xdd,0xcc,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xc6,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xc6,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xdd,0xcc,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xfe,0xdc,0xc6,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, }, { - 0xff,0xff,0xff,0xff,0xff,0xe8,0xcc,0xbb,0xaa, - 0xff,0xff,0xff,0xf3,0xbf,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xeb,0xb0,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xf3,0xb0,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xbf,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xe8,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xcc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xb5,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xac,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xae,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xb8,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xcc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xe8,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xbf,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xf3,0xae,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xeb,0xb0,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xf3,0xbd,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xe8,0xcc,0xbb,0xa9, -} -}; + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xfa,0x8b,0x41,0x33,0x33,0x33,0x33,0x33,0x33, + 0x8c,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x41,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x41,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x8c,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0xfa,0x8b,0x41,0x33,0x33,0x33,0x33,0x33,0x33, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, +}}; -static const unsigned char -progressmeter_leftcap2x[2][4 * kProgressBarCapWidth * kProgressBarHeight] = { +static const unsigned char progressmeter_leftcap2x[2][4 * 9 * 18] = { { - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf8,0xe0,0xcc,0xbc,0xbb,0xaa, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xe4,0xc0,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xec,0xbc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xfe,0xcf,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xfa,0xc1,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb8,0xd4,0xeb,0xee,0xfe, - 0xff,0xff,0xff,0xff,0xfa,0xb8,0xa9,0xa9,0xa9,0xa9,0xa9,0xc4,0xf2,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xfe,0xc1,0xa9,0xa9,0xa9,0xa9,0xb4,0xec,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xcf,0xa9,0xa9,0xa9,0xa9,0xbf,0xf8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xeb,0xa9,0xa9,0xa9,0xa9,0xbf,0xfc,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xbd,0xa9,0xa9,0xa9,0xb4,0xf8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xe4,0xa9,0xa9,0xa9,0xa9,0xeb,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xbc,0xa9,0xa9,0xa9,0xc5,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xf4,0xa9,0xa9,0xa9,0xa9,0xf2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xdc,0xa9,0xa9,0xa9,0xbf,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xc8,0xa9,0xa9,0xa9,0xd7,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xb9,0xa9,0xa9,0xa9,0xea,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xb0,0xa9,0xa9,0xa9,0xf8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xac,0xa9,0xa9,0xa9,0xfc,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xac,0xa9,0xa9,0xa9,0xfc,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xb0,0xa9,0xa9,0xa9,0xf8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xb9,0xa9,0xa9,0xa9,0xea,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xc7,0xa9,0xa9,0xa9,0xd7,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xdc,0xa9,0xa9,0xa9,0xbf,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xf4,0xa9,0xa9,0xa9,0xa9,0xf2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xbc,0xa9,0xa9,0xa9,0xc5,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xe4,0xa9,0xa9,0xa9,0xa9,0xeb,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xbd,0xa9,0xa9,0xa9,0xb5,0xf8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xeb,0xa9,0xa9,0xa9,0xa9,0xbf,0xfc,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xcd,0xa9,0xa9,0xa9,0xa9,0xbf,0xf8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xfe,0xc0,0xa9,0xa9,0xa9,0xa9,0xb4,0xec,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xfa,0xb7,0xa9,0xa9,0xa9,0xa9,0xa9,0xc5,0xf3,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xf8,0xc0,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbb,0xd4,0xec,0xef,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xfe,0xcd,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xeb,0xbc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xe3,0xbc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf4,0xdb,0xcb,0xbb,0xb9,0xa9, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xfe,0xe5,0xcf,0xc5,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2, + 0xff,0xf9,0xcd,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2, + 0xfe,0xcd,0xc2,0xc8,0xd2,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xe5,0xc2,0xc8,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xcf,0xc2,0xd1,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xc3,0xc2,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xc3,0xc2,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xcf,0xc2,0xd2,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xe5,0xc2,0xc7,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xfd,0xcd,0xc2,0xc8,0xd2,0xd8,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9, + 0xff,0xf9,0xcd,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2, + 0xff,0xff,0xfe,0xe4,0xce,0xc5,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, }, { - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf8,0xe0,0xcc,0xbc,0xbb,0xaa, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xe4,0xc0,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xec,0xbc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xfe,0xcf,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xfa,0xc1,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xfa,0xb8,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xfe,0xc1,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xcf,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xeb,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xbd,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xe4,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xbc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xf4,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xdc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xc8,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xb9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xb0,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xac,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xac,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xb0,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xb9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xc7,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xdc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xf4,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xbc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xe4,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xbd,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xeb,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xcd,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xfe,0xc0,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xfa,0xb7,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xf8,0xc0,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xfe,0xcd,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xeb,0xbc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xe3,0xbc,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf4,0xdb,0xcb,0xbb,0xb9,0xa9, -} -}; - - - + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xfa,0xa7,0x60,0x3d,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0xff,0xe9,0x59,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0xfa,0x59,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0xa8,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x60,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x38,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x38,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0x60,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0xa7,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0xf9,0x59,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0xff,0xe9,0x59,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0xff,0xff,0xfa,0xa6,0x5d,0x3c,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, +}}; -static const unsigned char -progressmeter_middle1x[2][1 * kProgressBarHeight] = { +static const unsigned char progressmeter_middle1x[2][1 * 18] = { { - 0xaa, - 0xa9, - 0xf6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xc2, + 0xd9, + 0xd9, + 0xd9, + 0xd9, + 0xc2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xf6, - 0xa9, - 0xa9, }, { - 0xaa, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, }}; - -static const unsigned char -progressmeter_middle2x[2][2 * 1 * kProgressBarHeight] = { +static const unsigned char progressmeter_middle2x[2][2 * 1 * 18] = { { - 0xaa, - 0xa9, - 0xa9, - 0xa9, - 0xfe, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xa9, - 0xa9, - 0xa9, - 0xa9, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xc2, + 0xc2, + 0xd9, + 0xd9, + 0xd9, + 0xd9, + 0xd9, + 0xd9, + 0xd9, + 0xd9, + 0xc2, + 0xc2, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, }, { - 0xaa, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, - 0xa9, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0x33, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, }}; -static const unsigned char -progressmeter_rightcap1x[2][kProgressBarCapWidth * kProgressBarHeight] = { +static const unsigned char progressmeter_rightcap1x[2][9 * 18] = { { - 0xa9,0xbb,0xcc,0xe8,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xbf,0xf3,0xff,0xff,0xff, - 0xfe,0xec,0xd4,0xad,0xa9,0xb0,0xeb,0xff,0xff, - 0xff,0xff,0xff,0xf4,0xbf,0xa9,0xb0,0xf3,0xff, - 0xff,0xff,0xff,0xff,0xff,0xc4,0xa9,0xbf,0xff, - 0xff,0xff,0xff,0xff,0xff,0xfb,0xb0,0xa9,0xe8, - 0xff,0xff,0xff,0xff,0xff,0xff,0xd5,0xa9,0xcc, - 0xff,0xff,0xff,0xff,0xff,0xff,0xef,0xa9,0xb5, - 0xff,0xff,0xff,0xff,0xff,0xff,0xfa,0xa9,0xac, - 0xff,0xff,0xff,0xff,0xff,0xff,0xfb,0xa9,0xac, - 0xff,0xff,0xff,0xff,0xff,0xff,0xef,0xa9,0xb5, - 0xff,0xff,0xff,0xff,0xff,0xff,0xd5,0xa9,0xcc, - 0xff,0xff,0xff,0xff,0xff,0xfb,0xb0,0xa9,0xe8, - 0xff,0xff,0xff,0xff,0xff,0xc5,0xa9,0xbf,0xff, - 0xff,0xff,0xff,0xf4,0xc1,0xa9,0xae,0xf3,0xff, - 0xff,0xec,0xd4,0xad,0xa9,0xb0,0xea,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xbd,0xf3,0xff,0xff,0xff, - 0xa9,0xbb,0xcc,0xe8,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc6,0xdc,0xfe, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xcc,0xde, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xc6, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xc6, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xcc,0xde, + 0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc6,0xdc,0xfe, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, }, { - 0xa9,0xbb,0xcc,0xe8,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xbf,0xf3,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xb0,0xeb,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb0,0xf3,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbf,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xe8, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xcc, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb5, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xac, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xac, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb5, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xcc, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xe8, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbf,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xae,0xf3,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xb0,0xea,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xbd,0xf3,0xff,0xff,0xff, - 0xa9,0xbb,0xcc,0xe8,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0x33,0x33,0x33,0x33,0x33,0x33,0x41,0x8b,0xfa, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x92, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x41, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x41, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x91, + 0x33,0x33,0x33,0x33,0x33,0x33,0x41,0x8b,0xfa, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, }}; - -static const unsigned char -progressmeter_rightcap2x[2][4 * kProgressBarCapWidth * kProgressBarHeight] = { +static const unsigned char progressmeter_rightcap2x[2][4 * 9 * 18] = { { - 0xaa,0xbb,0xbc,0xcc,0xe0,0xf8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc0,0xe4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbc,0xec,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xcf,0xfe,0xff,0xff,0xff,0xff,0xff,0xff, - 0xfe,0xee,0xeb,0xd4,0xb8,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc1,0xfb,0xff,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xf2,0xc5,0xa9,0xa9,0xa9,0xa9,0xa9,0xb8,0xfa,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xec,0xb4,0xa9,0xa9,0xa9,0xa9,0xc1,0xfe,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf8,0xbf,0xa9,0xa9,0xa9,0xa9,0xcf,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xfc,0xbf,0xa9,0xa9,0xa9,0xa9,0xeb,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf8,0xb4,0xa9,0xa9,0xa9,0xbd,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xeb,0xa9,0xa9,0xa9,0xa9,0xe4,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xc5,0xa9,0xa9,0xa9,0xbc,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf2,0xa9,0xa9,0xa9,0xa9,0xf4, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xbf,0xa9,0xa9,0xa9,0xdc, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xd7,0xa9,0xa9,0xa9,0xc8, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xea,0xa9,0xa9,0xa9,0xb9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf8,0xa9,0xa9,0xa9,0xb0, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xfc,0xa9,0xa9,0xa9,0xac, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xfc,0xa9,0xa9,0xa9,0xac, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf8,0xa9,0xa9,0xa9,0xb0, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xea,0xa9,0xa9,0xa9,0xb9, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xd7,0xa9,0xa9,0xa9,0xc8, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xc0,0xa9,0xa9,0xa9,0xdc, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf2,0xa9,0xa9,0xa9,0xa9,0xf4, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xc5,0xa9,0xa9,0xa9,0xbc,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xec,0xa9,0xa9,0xa9,0xa9,0xe4,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf8,0xb5,0xa9,0xa9,0xa9,0xbc,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xfc,0xbf,0xa9,0xa9,0xa9,0xa9,0xeb,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf8,0xbf,0xa9,0xa9,0xa9,0xa9,0xcd,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xec,0xb4,0xa9,0xa9,0xa9,0xa9,0xc0,0xfe,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xf3,0xc5,0xa9,0xa9,0xa9,0xa9,0xa9,0xb7,0xfa,0xff,0xff,0xff,0xff, - 0xff,0xef,0xec,0xd4,0xbb,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc0,0xf8,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xcd,0xfe,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbc,0xeb,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbc,0xe3,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xb9,0xbb,0xcb,0xdb,0xf4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc5,0xcf,0xe5,0xfe,0xff,0xff, + 0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xcd,0xf9,0xff, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xd2,0xc8,0xc2,0xcd,0xfd, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xc7,0xc2,0xe5, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd1,0xc2,0xcf, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xc2,0xc4, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xc2,0xc5, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd2,0xc2,0xcf, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xc7,0xc2,0xe5, + 0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd9,0xd8,0xd2,0xc7,0xc2,0xcd,0xfd, + 0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xcd,0xf9,0xff, + 0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc2,0xc5,0xce,0xe4,0xfe,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, }, { - 0xaa,0xbb,0xbc,0xcc,0xe0,0xf8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc0,0xe4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbc,0xec,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xcf,0xfe,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc1,0xfb,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb8,0xfa,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc1,0xfe,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xcf,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xeb,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbd,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xe4,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbc,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xf4, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xdc, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc8, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb9, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb0, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xac, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xac, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb0, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb9, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc8, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xdc, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xf4, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbc,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xe4,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbc,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xeb,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xcd,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc0,0xfe,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xb7,0xfa,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xc0,0xf8,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xcd,0xfe,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbc,0xeb,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xa9,0xa9,0xa9,0xa9,0xa9,0xbc,0xe3,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, - 0xa9,0xb9,0xbb,0xcb,0xdb,0xf4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, -} }; + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x3d,0x60,0xa7,0xfa,0xff,0xff, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x59,0xe9,0xff, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x59,0xf9, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0xa8, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x60, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x39, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x3d, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x60, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0xa7, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x58,0xf9, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x59,0xe9,0xff, + 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x3c,0x5d,0xa6,0xfa,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, +}}; + static const unsigned char * progressmeter_leftcap[2][2] = { diff --git a/osfmk/console/serial_general.c b/osfmk/console/serial_general.c index 8551fd6b7..28d5737cd 100644 --- a/osfmk/console/serial_general.c +++ b/osfmk/console/serial_general.c @@ -32,7 +32,6 @@ * @APPLE_FREE_COPYRIGHT@ */ -#include #include #include #include diff --git a/osfmk/console/video_console.c b/osfmk/console/video_console.c index 3f96d8c42..e696d5c30 100644 --- a/osfmk/console/video_console.c +++ b/osfmk/console/video_console.c @@ -88,15 +88,12 @@ * */ -#include - #include #include #include #include #include -#include #include #include @@ -242,11 +239,12 @@ enum vt100state_e { } gc_vt100state = ESnormal; -#ifdef CONFIG_VC_PROGRESS_WHITE -enum { kProgressAcquireDelay = 0 /* secs */ }; -#else -enum { kProgressAcquireDelay = 5 /* secs */ }; -#endif +enum +{ + /* secs */ + kProgressAcquireDelay = 0, + kProgressReacquireDelay = 5, +}; static int8_t vc_rotate_matr[4][2][2] = { { { 1, 0 }, @@ -1351,6 +1349,33 @@ static int vc_rendered_char_size = 0; #define REN_MAX_DEPTH 32 static unsigned char vc_rendered_char[ISO_CHAR_HEIGHT * ((REN_MAX_DEPTH / 8) * ISO_CHAR_WIDTH)]; +static void +internal_set_progressmeter(int new_value); +static void +internal_enable_progressmeter(int new_value); + +enum +{ + kProgressMeterOff = FALSE, + kProgressMeterUser = TRUE, + kProgressMeterKernel = 3, +}; +enum +{ + kProgressMeterMax = 1024, + kProgressMeterEnd = 512, +}; + + +static boolean_t vc_progress_white = +#ifdef CONFIG_VC_PROGRESS_WHITE + TRUE; +#else /* !CONFIG_VC_PROGRESS_WHITE */ + FALSE; +#endif /* !CONFIG_VC_PROGRESS_WHITE */ + +static int vc_acquire_delay = kProgressAcquireDelay; + static void vc_clear_screen(unsigned int xx, unsigned int yy, unsigned int scrreg_top, unsigned int scrreg_bottom, int which) @@ -1815,7 +1840,8 @@ vc_update_color(int color, boolean_t fore) */ static vc_progress_element * vc_progress; -static const unsigned char * vc_progress_data[2]; +enum { kMaxProgressData = 3 }; +static const unsigned char * vc_progress_data[kMaxProgressData]; static const unsigned char * vc_progress_alpha; static boolean_t vc_progress_enable; static const unsigned char * vc_clut; @@ -1829,9 +1855,21 @@ static thread_call_data_t vc_progress_call; static boolean_t vc_needsave; static void * vc_saveunder; static vm_size_t vc_saveunder_len; -static int8_t vc_uiselect = 0; +static int8_t vc_uiscale = 1; decl_simple_lock_data(,vc_progress_lock) +static int vc_progress_withmeter = 3; +int vc_progressmeter_enable; +static int vc_progressmeter_drawn; +int vc_progressmeter_value; +static uint32_t vc_progressmeter_count; +static uint64_t vc_progressmeter_interval; +static uint64_t vc_progressmeter_deadline; +static thread_call_data_t vc_progressmeter_call; +static void * vc_progressmeter_backbuffer; +static boolean_t vc_progressmeter_hold; +static uint32_t vc_progressmeter_diskspeed = 256; + enum { kSave = 0x10, kDataIndexed = 0x20, @@ -1875,6 +1913,7 @@ static void vc_blit_rect_30(int x, int y, int bx, unsigned int * backBuffer, unsigned int flags); static void vc_progress_task( void * arg0, void * arg ); +static void vc_progressmeter_task( void * arg0, void * arg ); static void vc_blit_rect(int x, int y, int bx, int width, int height, @@ -2012,13 +2051,13 @@ static void vc_blit_rect_16( int x, int y, int bx, out = (((((back & MASK_R) * data) + MASK_R_8) >> 8) & MASK_R) | (((((back & MASK_G) * data) + MASK_G_8) >> 8) & MASK_G) | (((((back & MASK_B) * data) + MASK_B_8) >> 8) & MASK_B); -#ifdef CONFIG_VC_PROGRESS_WHITE - out += (((0xff - data) & CLUT_MASK_R) CLUT_SHIFT_R) - | (((0xff - data) & CLUT_MASK_G) CLUT_SHIFT_G) - | (((0xff - data) & CLUT_MASK_B) CLUT_SHIFT_B); -#endif - } else + if (vc_progress_white) out += (((0xff - data) & CLUT_MASK_R) CLUT_SHIFT_R) + | (((0xff - data) & CLUT_MASK_G) CLUT_SHIFT_G) + | (((0xff - data) & CLUT_MASK_B) CLUT_SHIFT_B); + } else if (kDataBack & flags) out = back; + else + out = data; *(dst + col) = out; } dst = (volatile unsigned short *) (((volatile char*)dst) + vinfo.v_rowbytes); @@ -2077,13 +2116,13 @@ static void vc_blit_rect_32(int x, int y, int bx, } else if (kDataAlpha & flags) { out = (((((back & 0x00ff00ff) * data) + 0x00ff00ff) >> 8) & 0x00ff00ff) | (((((back & 0x0000ff00) * data) + 0x0000ff00) >> 8) & 0x0000ff00); -#ifdef CONFIG_VC_PROGRESS_WHITE - out += ((0xff - data) << 16) - | ((0xff - data) << 8) - | (0xff - data); -#endif - } else + if (vc_progress_white) out += ((0xff - data) << 16) + | ((0xff - data) << 8) + | (0xff - data); + } else if (kDataBack & flags) out = back; + else + out = data; *(dst + col) = out; } dst = (volatile unsigned int *) (((volatile char*)dst) + vinfo.v_rowbytes); @@ -2145,13 +2184,13 @@ static void vc_blit_rect_30(int x, int y, int bx, exp = (((((exp & 0x3FF003FF) * data) + 0x0FF000FF) >> 8) & 0x3FF003FF) | (((((exp & 0x000FFC00) * data) + 0x0003FC00) >> 8) & 0x000FFC00); out = (unsigned int)exp; -#ifdef CONFIG_VC_PROGRESS_WHITE - out += ((0xFF - data) << 22) - | ((0xFF - data) << 12) - | ((0xFF - data) << 2); -#endif - } else + if (vc_progress_white) out += ((0xFF - data) << 22) + | ((0xFF - data) << 12) + | ((0xFF - data) << 2); + } else if (kDataBack & flags) out = back; + else + out = data; *(dst + col) = out; } dst = (volatile unsigned int *) (((volatile char*)dst) + vinfo.v_rowbytes); @@ -2160,6 +2199,14 @@ static void vc_blit_rect_30(int x, int y, int bx, } } +static void vc_clean_boot_graphics(void) +{ + // clean up possible FDE login graphics + vc_progress_set(FALSE, 0); + const unsigned char * + color = (typeof(color))(uintptr_t)(vc_progress_white ? 0x00000000 : 0xBFBFBFBF); + vc_blit_rect(0, 0, 0, vinfo.v_width, vinfo.v_height, 0, 0, color, NULL, 0); +} /* * Routines to render the lzss image format @@ -2243,6 +2290,8 @@ vc_display_lzss_icon(uint32_t dst_x, uint32_t dst_y, uint32_t bytes_per_pixel = 4; uint32_t bytes_per_row = vinfo.v_rowbytes; + vc_clean_boot_graphics(); + image_start = (uint32_t *) (vinfo.v_baseaddr + (dst_y * bytes_per_row) + (dst_x * bytes_per_pixel)); lzss_image_state state = {0, 0, image_width, image_height, bytes_per_row, image_start, clut}; @@ -2311,6 +2360,8 @@ void vc_display_icon( vc_progress_element * desc, if( vc_progress_enable && vc_clut) { + vc_clean_boot_graphics(); + width = desc->width; height = desc->height; x = desc->dx; @@ -2327,20 +2378,20 @@ void vc_progress_initialize( vc_progress_element * desc, const unsigned char * data1x, const unsigned char * data2x, + const unsigned char * data3x, const unsigned char * clut ) { - uint64_t abstime; + uint64_t abstime; if( (!clut) || (!desc) || (!data1x)) return; vc_clut = clut; vc_clut8 = clut; - simple_lock_init(&vc_progress_lock, 0); - vc_progress = desc; vc_progress_data[0] = data1x; vc_progress_data[1] = data2x; + vc_progress_data[2] = data3x; if( 2 & vc_progress->flags) vc_progress_alpha = data1x + vc_progress->count * vc_progress->width * vc_progress->height; @@ -2348,9 +2399,13 @@ vc_progress_initialize( vc_progress_element * desc, vc_progress_alpha = NULL; thread_call_setup(&vc_progress_call, vc_progress_task, NULL); - clock_interval_to_absolutetime_interval(vc_progress->time, 1000 * 1000, &abstime); vc_progress_interval = (uint32_t)abstime; + + thread_call_setup(&vc_progressmeter_call, vc_progressmeter_task, NULL); + clock_interval_to_absolutetime_interval(1000 / 8, 1000 * 1000, &abstime); + vc_progressmeter_interval = (uint32_t)abstime; + } void @@ -2367,11 +2422,41 @@ vc_progress_set(boolean_t enable, uint32_t vc_delay) unsigned int pdata32; unsigned int * buf32; - if( !vc_progress) + + if (kBootArgsFlagBlack & ((boot_args *) PE_state.bootArgs)->flags) return; + + if (1 & vc_progress_withmeter) + { + if (enable) internal_enable_progressmeter(kProgressMeterKernel); + + s = splhigh(); + simple_lock(&vc_progress_lock); + + if( vc_progress_enable != enable) { + vc_progress_enable = enable; + if( enable) + { + vc_progressmeter_count = 0; + clock_interval_to_deadline(vc_delay, + 1000 * 1000 * 1000 /*second scale*/, + &vc_progressmeter_deadline); + thread_call_enter_delayed(&vc_progressmeter_call, vc_progressmeter_deadline); + } + else thread_call_cancel(&vc_progressmeter_call); + } + + simple_unlock(&vc_progress_lock); + splx(s); + + if (!enable) internal_enable_progressmeter(kProgressMeterOff); return; + } + + + if(!vc_progress) return; if( enable) { - saveLen = (vc_progress->width << vc_uiselect) * (vc_progress->height << vc_uiselect) * vinfo.v_depth / 8; + saveLen = (vc_progress->width * vc_uiscale) * (vc_progress->height * vc_uiscale) * vinfo.v_depth / 8; saveBuf = kalloc( saveLen ); switch( vinfo.v_depth) { @@ -2450,44 +2535,85 @@ vc_progress_set(boolean_t enable, uint32_t vc_delay) } +static void +vc_progressmeter_task(__unused void *arg0, __unused void *arg) +{ + spl_t s; + uint64_t interval; + + s = splhigh(); + simple_lock(&vc_progress_lock); + if (vc_progressmeter_enable) + { + uint32_t pos = (vc_progressmeter_count >> 13); + internal_set_progressmeter(pos); + if (pos < kProgressMeterEnd) + { + static uint16_t incr[8] = { 10000, 10000, 8192, 4096, 2048, 384, 384, 64 }; + vc_progressmeter_count += incr[(pos * 8) / kProgressMeterEnd]; + + interval = vc_progressmeter_interval; + interval = ((interval * 256) / vc_progressmeter_diskspeed); + + clock_deadline_for_periodic_event(interval, mach_absolute_time(), &vc_progressmeter_deadline); + thread_call_enter_delayed(&vc_progressmeter_call, vc_progressmeter_deadline); + } + } + simple_unlock(&vc_progress_lock); + splx(s); +} + +void vc_progress_setdiskspeed(uint32_t speed) +{ + vc_progressmeter_diskspeed = speed; +} + + static void vc_progress_task(__unused void *arg0, __unused void *arg) { - spl_t s; - int x, y, width, height; + spl_t s; + int x, y, width, height; const unsigned char * data; s = splhigh(); simple_lock(&vc_progress_lock); if( vc_progress_enable) { - - vc_progress_count++; - if( vc_progress_count >= vc_progress->count) { - vc_progress_count = 0; + + vc_progress_count++; + if( vc_progress_count >= vc_progress->count) { + vc_progress_count = 0; vc_progress_angle++; - } - - width = (vc_progress->width << vc_uiselect); - height = (vc_progress->height << vc_uiselect); - x = (vc_progress->dx << vc_uiselect); - y = (vc_progress->dy << vc_uiselect); - data = vc_progress_data[vc_uiselect]; - data += vc_progress_count * width * height; - if( 1 & vc_progress->flags) { - x += ((vinfo.v_width - width) / 2); - y += ((vinfo.v_height - height) / 2); } - vc_blit_rect( x, y, 0, - width, height, width, width, - data, vc_saveunder, - kDataAlpha - | (vc_progress_angle & kDataRotate) - | (vc_needsave ? kSave : 0) ); - vc_needsave = FALSE; - clock_deadline_for_periodic_event(vc_progress_interval, mach_absolute_time(), &vc_progress_deadline); - thread_call_enter_delayed(&vc_progress_call, vc_progress_deadline); + width = (vc_progress->width * vc_uiscale); + height = (vc_progress->height * vc_uiscale); + x = (vc_progress->dx * vc_uiscale); + y = (vc_progress->dy * vc_uiscale); + data = vc_progress_data[vc_uiscale - 1]; + if (data) + { + data += vc_progress_count * width * height; + if( 1 & vc_progress->flags) { + x += ((vinfo.v_width - width) / 2); + y += ((vinfo.v_height - height) / 2); + } + + assert(((x + width) < (int)vinfo.v_width) && + ((y + height) < (int)vinfo.v_height)); + + vc_blit_rect( x, y, 0, + width, height, width, width, + data, vc_saveunder, + kDataAlpha + | (vc_progress_angle & kDataRotate) + | (vc_needsave ? kSave : 0) ); + vc_needsave = FALSE; + + clock_deadline_for_periodic_event(vc_progress_interval, mach_absolute_time(), &vc_progress_deadline); + thread_call_enter_delayed(&vc_progress_call, vc_progress_deadline); + } } simple_unlock(&vc_progress_lock); splx(s); @@ -2505,12 +2631,12 @@ vc_progress_task(__unused void *arg0, __unused void *arg) static boolean_t gc_acquired = FALSE; static boolean_t gc_graphics_boot = FALSE; static boolean_t gc_desire_text = FALSE; +static boolean_t gc_paused_progress; static uint64_t lastVideoPhys = 0; static vm_offset_t lastVideoVirt = 0; static vm_size_t lastVideoSize = 0; static boolean_t lastVideoMapped = FALSE; - static void gc_pause( boolean_t pause, boolean_t graphics_now ) { @@ -2526,9 +2652,19 @@ gc_pause( boolean_t pause, boolean_t graphics_now ) simple_lock(&vc_progress_lock); - vc_progress_enable = gc_graphics_boot && !gc_desire_text && !pause; - if (vc_progress_enable) - thread_call_enter_delayed(&vc_progress_call, vc_progress_deadline); + if (pause) + { + gc_paused_progress = vc_progress_enable; + vc_progress_enable = FALSE; + } + else vc_progress_enable = gc_paused_progress; + + if (vc_progress_enable) + { + if (1 & vc_progress_withmeter) thread_call_enter_delayed(&vc_progressmeter_call, vc_progressmeter_deadline); + else + thread_call_enter_delayed(&vc_progress_call, vc_progress_deadline); + } simple_unlock(&vc_progress_lock); splx(s); @@ -2541,7 +2677,9 @@ vc_initialize(__unused struct vc_info * vinfo_p) vinfo.v_rows = vinfo.v_height / ISO_CHAR_HEIGHT; vinfo.v_columns = vinfo.v_width / ISO_CHAR_WIDTH; vinfo.v_rowscanbytes = ((vinfo.v_depth + 7) / 8) * vinfo.v_width; - vc_uiselect = (2 == vinfo.v_scale) ? 1 : 0; + vc_uiscale = vinfo.v_scale; + if (vc_uiscale > kMaxProgressData) vc_uiscale = kMaxProgressData; + else if (!vc_uiscale) vc_uiscale = 1; } void @@ -2609,7 +2747,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) { panic("initialize_screen: Strange framebuffer - addr = %08X\n", (uint32_t)boot_vinfo->v_baseAddr); } - new_vinfo.v_physaddr = (((uint64_t)fbppage) << 12) | (boot_vinfo->v_baseAddr & PAGE_MASK); /* Get the physical address */ + new_vinfo.v_physaddr = (((uint64_t)fbppage) << PAGE_SHIFT) | (boot_vinfo->v_baseAddr & PAGE_MASK); /* Get the physical address */ } if (boot_vinfo->v_length != 0) @@ -2652,9 +2790,9 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) { if (lastVideoVirt) /* Was the framebuffer mapped before? */ { -#if FALSE - if(lastVideoMapped) /* Was this not a special pre-VM mapping? */ -#endif + /* XXX why did this ever succeed? */ + /* TODO: Consider this. */ + if (!TEST_PAGE_SIZE_4K && lastVideoMapped) /* Was this not a special pre-VM mapping? */ { pmap_remove(kernel_pmap, trunc_page_64(lastVideoVirt), round_page_64(lastVideoVirt + lastVideoSize)); /* Toss mappings */ @@ -2685,10 +2823,6 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) gc_ops.update_color = vc_update_color; gc_initialize(&vinfo); } - -#ifdef GRATEFULDEBUGGER - GratefulDebInit((bootBumbleC *)boot_vinfo); /* Re-initialize GratefulDeb */ -#endif /* GRATEFULDEBUGGER */ } graphics_now = gc_graphics_boot && !gc_desire_text; @@ -2706,7 +2840,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) case kPEAcquireScreen: if ( gc_acquired ) break; - vc_progress_set( graphics_now, kProgressAcquireDelay ); + vc_progress_set( graphics_now, vc_acquire_delay ); gc_enable( !graphics_now ); gc_acquired = TRUE; gc_desire_text = FALSE; @@ -2749,17 +2883,39 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) if ( gc_graphics_boot == FALSE ) break; vc_progress_set( FALSE, 0 ); - vc_enable_progressmeter( FALSE ); - + vc_acquire_delay = kProgressReacquireDelay; + vc_enable_progressmeter(FALSE); + vc_progress_white = TRUE; + vc_progress_withmeter &= ~1; vc_clut8 = NULL; -#ifdef GRATEFULDEBUGGER - GratefulDebInit(0); /* Stop grateful debugger */ -#endif /* GRATEFULDEBUGGER */ break; + + + case kPERefreshBootGraphics: + { + spl_t s; + boolean_t save; + + if (kBootArgsFlagBlack & ((boot_args *) PE_state.bootArgs)->flags) break; + + save = vc_progress_white; + vc_progress_white = (0 != (kBootArgsFlagBlackBg & ((boot_args *) PE_state.bootArgs)->flags)); + + internal_enable_progressmeter(kProgressMeterKernel); + + s = splhigh(); + simple_lock(&vc_progress_lock); + + vc_progressmeter_drawn = 0; + internal_set_progressmeter(vc_progressmeter_count >> 13); + + simple_unlock(&vc_progress_lock); + splx(s); + + internal_enable_progressmeter(kProgressMeterOff); + vc_progress_white = save; + } } -#ifdef GRATEFULDEBUGGER - if ( boot_vinfo ) GratefulDebInit((bootBumbleC *)boot_vinfo); /* Re initialize GratefulDeb */ -#endif /* GRATEFULDEBUGGER */ } void @@ -2802,6 +2958,11 @@ vcattach(void) { vm_initialized = TRUE; + vc_progress_white = (0 != ((kBootArgsFlagBlackBg | kBootArgsFlagLoginUI) + & ((boot_args *) PE_state.bootArgs)->flags)); + PE_parse_boot_argn("meter", &vc_progress_withmeter, sizeof(vc_progress_withmeter)); + simple_lock_init(&vc_progress_lock, 0); + if ( gc_graphics_boot == FALSE ) { long index; @@ -2830,106 +2991,124 @@ vcattach(void) } -int vc_progress_meter_enable; -int vc_progress_meter_value; - -static void * vc_progress_meter_backbuffer; -static int vc_progress_meter_drawn; - +// redraw progress meter between pixels x1, x2, position at x3 static void -vc_draw_progress_meter(int select, unsigned int flags, int x1, int x2) +vc_draw_progress_meter(unsigned int flags, int x1, int x2, int x3) { const unsigned char * data; int x, w; int ox, oy; + int endCapPos; + int onoff; + // 1 rounded fill, 0 square end + int style = (0 == (2 & vc_progress_withmeter)); - ox = ((vinfo.v_width - (kProgressBarWidth << vc_uiselect)) / 2); - oy = vinfo.v_height - (((vinfo.v_height / 2) - ((vc_progress->dy + kProgressBarHeight) << vc_uiselect)) / 2); + ox = ((vinfo.v_width - (kProgressBarWidth * vc_uiscale)) / 2); + oy = vinfo.v_height - (vinfo.v_height / 3) - ((kProgressBarHeight * vc_uiscale) / 2); if (kDataBack == flags) { // restore back bits vc_blit_rect(ox + x1, oy, x1, - x2, (kProgressBarHeight << vc_uiselect), 0, (kProgressBarWidth << vc_uiselect), - NULL, vc_progress_meter_backbuffer, flags); + x2, (kProgressBarHeight * vc_uiscale), 0, (kProgressBarWidth * vc_uiscale), + NULL, vc_progressmeter_backbuffer, flags); return; } for (x = x1; x < x2; x += w) { - if (x < (kProgressBarCapWidth << vc_uiselect)) + onoff = (x < x3); + endCapPos = ((style && onoff) ? x3 : (kProgressBarWidth * vc_uiscale)); + if (x < (kProgressBarCapWidth * vc_uiscale)) { - if (x2 < (kProgressBarCapWidth << vc_uiselect)) + if (x2 < (kProgressBarCapWidth * vc_uiscale)) w = x2 - x; else - w = (kProgressBarCapWidth << vc_uiselect) - x; - data = progressmeter_leftcap[vc_uiselect & 1][select & 1]; + w = (kProgressBarCapWidth * vc_uiscale) - x; + data = progressmeter_leftcap[vc_uiscale >= 2][onoff]; data += x; vc_blit_rect(ox + x, oy, x, w, - (kProgressBarHeight << vc_uiselect), - (kProgressBarCapWidth << vc_uiselect), - (kProgressBarWidth << vc_uiselect), - data, vc_progress_meter_backbuffer, flags); + (kProgressBarHeight * vc_uiscale), + (kProgressBarCapWidth * vc_uiscale), + (kProgressBarWidth * vc_uiscale), + data, vc_progressmeter_backbuffer, flags); } - else if (x < ((kProgressBarWidth - kProgressBarCapWidth) << vc_uiselect)) + else if (x < (endCapPos - (kProgressBarCapWidth * vc_uiscale))) { - if (x2 < ((kProgressBarWidth - kProgressBarCapWidth) << vc_uiselect)) + if (x2 < (endCapPos - (kProgressBarCapWidth * vc_uiscale))) w = x2 - x; else - w = ((kProgressBarWidth - kProgressBarCapWidth) << vc_uiselect) - x; - data = progressmeter_middle[vc_uiselect & 1][select & 1]; + w = (endCapPos - (kProgressBarCapWidth * vc_uiscale)) - x; + data = progressmeter_middle[vc_uiscale >= 2][onoff]; vc_blit_rect(ox + x, oy, x, w, - (kProgressBarHeight << vc_uiselect), + (kProgressBarHeight * vc_uiscale), 1, - (kProgressBarWidth << vc_uiselect), - data, vc_progress_meter_backbuffer, flags); + (kProgressBarWidth * vc_uiscale), + data, vc_progressmeter_backbuffer, flags); } else { - w = x2 - x; - data = progressmeter_rightcap[vc_uiselect & 1][select & 1]; - data += x - ((kProgressBarWidth - kProgressBarCapWidth) << vc_uiselect); + w = endCapPos - x; + data = progressmeter_rightcap[vc_uiscale >= 2][onoff]; + data += x - (endCapPos - (kProgressBarCapWidth * vc_uiscale)); vc_blit_rect(ox + x, oy, x, w, - (kProgressBarHeight << vc_uiselect), - (kProgressBarCapWidth << vc_uiselect), - (kProgressBarWidth << vc_uiselect), - data, vc_progress_meter_backbuffer, flags); + (kProgressBarHeight * vc_uiscale), + (kProgressBarCapWidth * vc_uiscale), + (kProgressBarWidth * vc_uiscale), + data, vc_progressmeter_backbuffer, flags); } } } -void -vc_enable_progressmeter(int new_value) +extern void IORecordProgressBackbuffer(void * buffer, size_t size, uint32_t theme); + +static void +internal_enable_progressmeter(int new_value) { - spl_t s; - void * new_buffer = NULL; + spl_t s; + void * new_buffer; + boolean_t stashBackbuffer; + stashBackbuffer = FALSE; + new_buffer = NULL; if (new_value) - new_buffer = kalloc((kProgressBarWidth << vc_uiselect) - * (kProgressBarHeight << vc_uiselect) * sizeof(int)); + { + new_buffer = kalloc((kProgressBarWidth * vc_uiscale) + * (kProgressBarHeight * vc_uiscale) * sizeof(int)); + } s = splhigh(); simple_lock(&vc_progress_lock); - if (gc_enabled || !gc_acquired || !gc_graphics_boot) - new_value = FALSE; + if (kProgressMeterUser == new_value) + { + if (gc_enabled || !gc_acquired || !gc_graphics_boot + || (kProgressMeterKernel == vc_progressmeter_enable)) new_value = vc_progressmeter_enable; + } - if (new_value != vc_progress_meter_enable) + if (new_value != vc_progressmeter_enable) { if (new_value) { - vc_progress_meter_backbuffer = new_buffer; - vc_draw_progress_meter(FALSE, kDataAlpha | kSave, 0, (kProgressBarWidth << vc_uiselect)); - vc_progress_meter_enable = TRUE; - new_buffer = NULL; - vc_progress_meter_drawn = 0; + if (kProgressMeterOff == vc_progressmeter_enable) + { + vc_progressmeter_backbuffer = new_buffer; + vc_draw_progress_meter(kDataAlpha | kSave, 0, (kProgressBarWidth * vc_uiscale), 0); + new_buffer = NULL; + vc_progressmeter_drawn = 0; + } + vc_progressmeter_enable = new_value; } - else if (vc_progress_meter_backbuffer) + else if (vc_progressmeter_backbuffer) { - vc_draw_progress_meter(0, kDataBack, 0, (kProgressBarWidth << vc_uiselect)); - new_buffer = vc_progress_meter_backbuffer; - vc_progress_meter_backbuffer = NULL; - vc_progress_meter_enable = FALSE; + if (kProgressMeterUser == vc_progressmeter_enable) + { + vc_draw_progress_meter(kDataBack, 0, (kProgressBarWidth * vc_uiscale), vc_progressmeter_drawn); + } + else stashBackbuffer = TRUE; + new_buffer = vc_progressmeter_backbuffer; + vc_progressmeter_backbuffer = NULL; + vc_progressmeter_enable = FALSE; } } @@ -2937,33 +3116,75 @@ vc_enable_progressmeter(int new_value) splx(s); if (new_buffer) - kfree(new_buffer, (kProgressBarWidth << vc_uiselect) - * (kProgressBarHeight << vc_uiselect) * sizeof(int)); + { + if (stashBackbuffer) IORecordProgressBackbuffer(new_buffer, + (kProgressBarWidth * vc_uiscale) + * (kProgressBarHeight * vc_uiscale) + * sizeof(int), + vc_progress_white); + kfree(new_buffer, (kProgressBarWidth * vc_uiscale) + * (kProgressBarHeight * vc_uiscale) * sizeof(int)); + } +} + +static void +internal_set_progressmeter(int new_value) +{ + int x1, x3; + int capRedraw; + // 1 rounded fill, 0 square end + int style = (0 == (2 & vc_progress_withmeter)); + + if ((new_value < 0) || (new_value > kProgressMeterMax)) return; + + if (vc_progressmeter_enable) + { + vc_progressmeter_value = new_value; + + capRedraw = (style ? (kProgressBarCapWidth * vc_uiscale) : 0); + x3 = (((kProgressBarWidth * vc_uiscale) - 2 * capRedraw) * vc_progressmeter_value) / kProgressMeterMax; + x3 += (2 * capRedraw); + + if (x3 > vc_progressmeter_drawn) + { + x1 = capRedraw; + if (x1 > vc_progressmeter_drawn) x1 = vc_progressmeter_drawn; + vc_draw_progress_meter(kDataAlpha, vc_progressmeter_drawn - x1, x3, x3); + } + else + { + vc_draw_progress_meter(kDataAlpha, x3 - capRedraw, vc_progressmeter_drawn, x3); + } + vc_progressmeter_drawn = x3; + } +} + +void +vc_enable_progressmeter(int new_value) +{ + if (kProgressMeterKernel == vc_progressmeter_enable) + { + vc_progressmeter_hold = new_value; + } + else + { + internal_enable_progressmeter(new_value ? kProgressMeterUser : kProgressMeterOff); + } } void vc_set_progressmeter(int new_value) { spl_t s; - int x2; - - if ((new_value < 0) | (new_value > 100)) - return; s = splhigh(); simple_lock(&vc_progress_lock); - if (vc_progress_meter_enable) + if (vc_progressmeter_enable && (kProgressMeterKernel != vc_progressmeter_enable)) { - vc_progress_meter_value = new_value; - x2 = ((kProgressBarWidth << vc_uiselect) * new_value) / 100; - if (x2 > vc_progress_meter_drawn) - vc_draw_progress_meter(TRUE, kDataAlpha, vc_progress_meter_drawn, x2); - else - vc_draw_progress_meter(FALSE, kDataAlpha, x2, vc_progress_meter_drawn); - vc_progress_meter_drawn = x2; + internal_set_progressmeter((new_value * kProgressMeterMax) / 100); } - + simple_unlock(&vc_progress_lock); splx(s); } diff --git a/osfmk/console/video_console.h b/osfmk/console/video_console.h index 158c140e6..5e38a23f0 100644 --- a/osfmk/console/video_console.h +++ b/osfmk/console/video_console.h @@ -91,6 +91,7 @@ typedef struct vc_progress_element vc_progress_element; void vc_progress_initialize( vc_progress_element * desc, const unsigned char * data1x, const unsigned char * data2x, + const unsigned char * data3x, const unsigned char * clut ); void vc_progress_set(boolean_t enable, uint32_t vc_delay); @@ -106,8 +107,9 @@ int vc_display_lzss_icon(uint32_t dst_x, uint32_t dst_y, extern void vc_enable_progressmeter(int new_value); extern void vc_set_progressmeter(int new_value); -extern int vc_progress_meter_enable; -extern int vc_progress_meter_value; +extern int vc_progressmeter_enable; +extern int vc_progressmeter_value; +extern void vc_progress_setdiskspeed(uint32_t speed); #ifdef __cplusplus diff --git a/osfmk/corecrypto/ccdbrg/src/ccdrbg_nisthmac.c b/osfmk/corecrypto/ccdbrg/src/ccdrbg_nisthmac.c new file mode 100644 index 000000000..f797d0378 --- /dev/null +++ b/osfmk/corecrypto/ccdbrg/src/ccdrbg_nisthmac.c @@ -0,0 +1,416 @@ +/* + * ccdrbg_nisthmac.c + * corecrypto + * + * Created by John Hurley on 04/30/14. + * Copyright 2014 Apple, Inc. All rights reserved. + * + */ + +#include +#include +#include +#if !CC_KERNEL +#include +#endif + + +#if CC_KERNEL +#include +static int hmac_dbrg_error(int val, __unused const char *msg) { + return val; +} +#else +static int hmac_dbrg_error(int val, const char *msg) { + if (msg) { + char buffer[1024]; + snprintf(buffer, sizeof(buffer)-1, "Error: %s", msg); + cc_print(buffer, 0, NULL); + } + return val; +} +#endif + +// Test vectors at: +// http://csrc.nist.gov/groups/STM/cavp/#05 +// http://csrc.nist.gov/groups/STM/cavp/documents/drbg/drbgtestvectors.zip +// + +/* + This HMAC DBRG is described in: + + SP 800-90 A Rev. 1 (2nd Draft) + DRAFT Recommendation for Random Number Generation Using Deterministic Random Bit Generators + April 2014 + + SP 800-90A (revision 1), Recommendation for Random Number Generation Using Deterministic Random Bit Generators + http://csrc.nist.gov/publications/drafts/800-90/sp800_90a_r1_draft.pdf + + See in particular + - 10.1.2 HMAC_DRBG (p 45) + - B.2 HMAC_DRBGExample (p 83) + + We only support one security strength, 256 bits + In addition, we limit the personalization string to 20 bytes + Note that the example in B.2 is very limited, refer to §10.1.2 for more +*/ + + + +/* + The Get_entropy_input function is specified in pseudocode in [SP 800-90C] for various RBG constructions; + however, in general, the function has the following meaning: + Get_entropy_input: A function that is used to obtain entropy input. The function call is: + (status, entropy_input) = Get_entropy_input (min_entropy, min_ length, max_ length, prediction_resistance_request), + which requests a string of bits (entropy_input) with at least min_entropy bits of entropy. The length for the string + shall be equal to or greater than min_length bits, and less than or equal to max_length bits. The + prediction_resistance_request parameter indicates whether or not prediction resistance is to be provided during the request + (i.e., whether fresh entropy is required). A status code is also returned from the function. + */ + +/* + Check the validity of the input parameters. + 1. If (requested_instantiation_security_strength > 256), then Return (“Invalid + requested_instantiation_security_strength”, −1). + 2. If (len (personalization_string) > 160), then Return (“Personalization_string + too long”, −1) + Comment: Set the security_strength to one of the valid security strengths. + 3. If (requested_security_strength ≤ 112), then security_strength = 112 Else (requested_ security_strength ≤ 128), then security_strength = 128 Else (requested_ security_strength ≤ 192), then security_strength = 192 Else security_strength = 256. + Comment: Get the entropy_input and the nonce. + 4. min_entropy = 1.5 × security_strength. + 5. (status, entropy_input) = Get_entropy_input (min_entropy, 1000). + 6. If (status ≠ “Success”), then Return (status, −1). + */ + +/* + 1. highest_supported_security_strength = 256. + 2. Output block (outlen) = 256 bits. + 3. Required minimum entropy for the entropy input at instantiation = 3/2 security_strength (this includes the entropy required for the nonce). + 4. Seed length (seedlen) = 440 bits. + 5. Maximum number of bits per request (max_number_of_bits_per_request) = 7500 + bits. + 6. Reseed_interval (reseed_ interval) = 10,000 requests. + 7. Maximum length of the personalization string (max_personalization_string_length) = 160 bits. + 8. Maximum length of the entropy input (max _length) = 1000 bits. + */ + +// +// Defines below based on 10.1, Table 2: Definitions for Hash-Based DRBG Mechanisms (p 39) +// + +#define NH_MAX_SECURITY_STRENGTH 256 // in bits +#define NH_MAX_OUTPUT_BLOCK_SIZE (CCSHA512_OUTPUT_SIZE) // 512 bits, i.e. 64 bytes (CCSHA512_OUTPUT_SIZE) +#define NH_MAX_KEY_SIZE (CCSHA512_OUTPUT_SIZE) // 512 bits, i.e. 64 bytes (CCSHA512_OUTPUT_SIZE) +#define NH_REQUIRED_MIN_ENTROPY(s) (3*(s)/2) +#define NH_MAX_BYTES_PER_REQUEST (0xffff) // in bytes, 2^^16 +#define NH_RESEED_INTERVAL ((unsigned long)0xffffffffffff) // 2^^48 requests between reseeds +#define NH_MAX_PERSONALIZE_LEN (1024) // 1024 bytes +#define NH_MIN_ENTROPY_LEN (NH_MAX_SECURITY_STRENGTH/8) +#define NH_MAX_ENTROPY_LEN (0xffffffff) // in bytes, 2^^32 + +struct ccdrbg_nisthmac_state { + const struct ccdrbg_info *info; + size_t bytesLeft; + size_t reseed_counter; + size_t vsize; + size_t keysize; + uint8_t v[NH_MAX_OUTPUT_BLOCK_SIZE]; + uint8_t key[NH_MAX_KEY_SIZE]; +}; + +#ifdef DEBUGFOO +static void dumpState(const char *label, struct ccdrbg_nisthmac_state *state) { + cc_print(label, state->vsize, state->v); + cc_print(label, state->keysize, state->key); +} +#endif + +/* + NIST SP 800-90A, Rev. 1 HMAC_DRBG April 2014, p 46 + + HMAC_DRBG_Update (provided_data, K, V): + 1. provided_data: The data to be used. + 2. K: The current value of Key. + 3. V: The current value of V. + Output: + 1. K: The new value for Key. + 2. V: The new value for V. + + HMAC_DRBG Update Process: + + 1. K = HMAC (K, V || 0x00 || provided_data). + 2. V=HMAC(K,V). + 3. If (provided_data = Null), then return K and V. + 4. K = HMAC (K, V || 0x01 || provided_data). + 5. V=HMAC(K,V). + 6. Return K and V. + */ + +// was: unsigned long providedDataLength, const void *providedData + +/* + To handle the case where we have three strings that are concatenated, + we pass in three (ptr, len) pairs + */ + +static int hmac_dbrg_update(struct ccdrbg_state *drbg, + unsigned long daLen, const void *da, + unsigned long dbLen, const void *db, + unsigned long dcLen, const void *dc + ) +{ + struct ccdrbg_nisthmac_state *state = (struct ccdrbg_nisthmac_state *)drbg; + const struct ccdrbg_nisthmac_custom *custom = state->info->custom; + const struct ccdigest_info *di = custom->di; + + const unsigned char cZero = 0x00; + const unsigned char cOne = 0x01; + cchmac_ctx_decl(di->state_size, di->block_size, ctx); + + cchmac_init(di, ctx, state->keysize, state->key); + // 1. K = HMAC (K, V || 0x00 || provided_data). + cchmac_update(di, ctx, state->vsize, state->v); + cchmac_update(di, ctx, 1, &cZero); + if (da && daLen) cchmac_update(di, ctx, daLen, da); + if (db && dbLen) cchmac_update(di, ctx, dbLen, db); + if (dc && dcLen) cchmac_update(di, ctx, dcLen, dc); + cchmac_final(di, ctx, state->key); + + // 2. V=HMAC(K,V). + cchmac(di, state->keysize, state->key, state->vsize, state->v, state->v); + + // 3. If (provided_data = Null), then return K and V. + // One parameter must be non-empty, or return + if (!((da && daLen) || (db && dbLen) || (dc && dcLen))) + return 0; + + // 4. K = HMAC (K, V || 0x01 || provided_data). + cchmac_init(di, ctx, state->keysize, state->key); + cchmac_update(di, ctx, state->vsize, state->v); + cchmac_update(di, ctx, 1, &cOne); + if (da && daLen) cchmac_update(di, ctx, daLen, da); + if (db && dbLen) cchmac_update(di, ctx, dbLen, db); + if (dc && dcLen) cchmac_update(di, ctx, dcLen, dc); + cchmac_final(di, ctx, state->key); + + // 5. V=HMAC(K,V). + cchmac(di, state->keysize, state->key, state->vsize, state->v, state->v); + + return 0; +} + +/* + NIST SP 800-90A, Rev. 1 April 2014 B.2.2, p 84 + + HMAC_DRBG_Instantiate_algorithm (...): + Input: bitstring (entropy_input, personalization_string). + Output: bitstring (V, Key), integer reseed_counter. + + Process: + 1. seed_material = entropy_input || personalization_string. + 2. Set Key to outlen bits of zeros. + 3. Set V to outlen/8 bytes of 0x01. + 4. (Key, V) = HMAC_DRBG_Update (seed_material, Key, V). + 5. reseed_counter = 1. + 6. Return (V, Key, reseed_counter). +*/ + +// This version does not do memory allocation + +static int hmac_dbrg_instantiate_algorithm(struct ccdrbg_state *drbg, + unsigned long entropyLength, const void *entropy, + unsigned long nonceLength, const void *nonce, + unsigned long psLength, const void *ps) +{ + // TODO: The NIST code passes nonce (i.e. HMAC key) to generate, but cc interface isn't set up that way + + struct ccdrbg_nisthmac_state *state=(struct ccdrbg_nisthmac_state *)drbg; + + // 1. seed_material = entropy_input || nonce || personalization_string. + + // 2. Set Key to outlen bits of zeros. + cc_zero(state->keysize, state->key); + + // 3. Set V to outlen/8 bytes of 0x01. + CC_MEMSET(state->v, 0x01, state->vsize); + + // 4. (Key, V) = HMAC_DRBG_Update (seed_material, Key, V). + hmac_dbrg_update(drbg, entropyLength, entropy, nonceLength, nonce, psLength, ps); + + // 5. reseed_counter = 1. + state->reseed_counter = 1; + + return 0; +} + +// In NIST terminology, the nonce is the HMAC key and ps is the personalization string + +static int init(const struct ccdrbg_info *info, struct ccdrbg_state *drbg, + unsigned long entropyLength, const void* entropy, + unsigned long nonceLength, const void* nonce, + unsigned long psLength, const void* ps) +{ + struct ccdrbg_nisthmac_state *state=(struct ccdrbg_nisthmac_state *)drbg; + const struct ccdrbg_nisthmac_custom *custom = NULL; + const struct ccdigest_info *di = NULL; + size_t security_strength; + size_t min_entropy; + + state->bytesLeft = 0; + state->info = info; + custom = state->info->custom; + di = custom->di; + state->vsize = di->output_size; // TODO: state_size? or output_size + state->keysize = di->output_size; // TODO: state size? + + security_strength = NH_MAX_SECURITY_STRENGTH; + + if (psLength > NH_MAX_PERSONALIZE_LEN) // "Personalization_string too long" + return hmac_dbrg_error(-1, "Personalization_string too long"); + + if (entropyLength > NH_MAX_ENTROPY_LEN) // Supplied too much entropy + return hmac_dbrg_error(-1, "Supplied too much entropy"); + + // 4. min_entropy = 1.5 × security_strength. + min_entropy = NH_REQUIRED_MIN_ENTROPY(security_strength); + + // 7. (V, Key, reseed_counter) = HMAC_DRBG_Instantiate_algorithm (entropy_input, personalization_string). + + hmac_dbrg_instantiate_algorithm(drbg, entropyLength, entropy, nonceLength, nonce, psLength, ps); + +#ifdef DEBUGFOO + dumpState("Init: ", state); +#endif + return 0; +} + +/* + 10.1.2.4 Reseeding an HMAC_DRBG Instantiation + Notes for the reseed function specified in Section 9.2: + The reseeding of an HMAC_DRBG instantiation requires a call to the Reseed_function specified in Section 9.2. + Process step 6 of that function calls the reseed algorithm specified in this section. The values for min_length + are provided in Table 2 of Section 10.1. + + The reseed algorithm: + Let HMAC_DRBG_Update be the function specified in Section 10.1.2.2. The following process or its equivalent + shall be used as the reseed algorithm for this DRBG mechanism (see step 6 of the reseed process in Section 9.2): + + HMAC_DRBG_Reseed_algorithm (working_state, entropy_input, additional_input): + 1. working_state: The current values for V, Key and reseed_counter (see Section 10.1.2.1). + 2. entropy_input: The string of bits obtained from the source of entropy input. + 3. additional_input: The additional input string received from the consuming application. + Note that the length of the additional_input string may be zero. + + Output: + 1. new_working_state: The new values for V, Key and reseed_counter. HMAC_DRBG Reseed Process: + 1. seed_material = entropy_input || additional_input. + 2. (Key, V) = HMAC_DRBG_Update (seed_material, Key, V). 3. reseed_counter = 1. + 4. Return V, Key and reseed_counter as the new_working_state. +*/ + +static int reseed(struct ccdrbg_state *drbg, + unsigned long entropyLength, const void *entropy, + unsigned long inputlen, const void *input) +{ + struct ccdrbg_nisthmac_state *state=(struct ccdrbg_nisthmac_state *)drbg; + + int rx = hmac_dbrg_update(drbg, entropyLength, entropy, inputlen, input, 0, NULL); + state->reseed_counter = 1; + +#ifdef DEBUGFOO + dumpState("Reseed: ", state); +#endif + return rx; +} + +/* + HMAC_DRBG_Generate_algorithm: + Input: bitstring (V, Key), integer (reseed_counter, requested_number_of_bits). + Output: string status, bitstring (pseudorandom_bits, V, Key), integer reseed_counter. + + Process: + 1. If (reseed_counter ≥ 10,000), then Return (“Reseed required”, Null, V, Key, reseed_counter). + 2. temp = Null. + 3. While (len (temp) < requested_no_of_bits) do: + 3.1 V = HMAC (Key, V). + 3.2 temp = temp || V. + 4. pseudorandom_bits = Leftmost (requested_no_of_bits) of temp. + 5. (Key, V) = HMAC_DRBG_Update (Null, Key, V). + 6. reseed_counter = reseed_counter + 1. + 7. Return (“Success”, pseudorandom_bits, V, Key, reseed_counter). +*/ + +static int generate(struct ccdrbg_state *drbg, unsigned long numBytes, void *outBytes, + unsigned long inputLen, const void *input) +{ + struct ccdrbg_nisthmac_state *state = (struct ccdrbg_nisthmac_state *)drbg; + const struct ccdrbg_nisthmac_custom *custom = state->info->custom; + const struct ccdigest_info *di = custom->di; + + if (numBytes > NH_MAX_BYTES_PER_REQUEST) + return hmac_dbrg_error(CCDRBG_STATUS_PARAM_ERROR, + "Requested too many bytes in one request"); + + // 1. If (reseed_counter > 2^^48), then Return (“Reseed required”, Null, V, Key, reseed_counter). + if (state->reseed_counter > NH_RESEED_INTERVAL) + return hmac_dbrg_error(CCDRBG_STATUS_NEED_RESEED, "Reseed required"); + + // 2. If additional_input ≠ Null, then (Key, V) = HMAC_DRBG_Update (additional_input, Key, V). + if (input && inputLen) + hmac_dbrg_update(drbg, inputLen, input, 0, NULL, 0, NULL); + + // hmac_dbrg_generate_algorithm + char *outPtr = (char *) outBytes; + while (numBytes > 0) { + if (!state->bytesLeft) { + // 5. V=HMAC(K,V). + cchmac(di, state->keysize, state->key, state->vsize, state->v, state->v); + state->bytesLeft = di->output_size;//di->output_size; state->vsize + } + size_t outLength = numBytes > state->bytesLeft ? state->bytesLeft : numBytes; + memcpy(outPtr, state->v, outLength); + state->bytesLeft -= outLength; + outPtr += outLength; + numBytes -= outLength; + } + + // 6. (Key, V) = HMAC_DRBG_Update (additional_input, Key, V). + hmac_dbrg_update(drbg, inputLen, input, 0, NULL, 0, NULL); + + // 7. reseed_counter = reseed_counter + 1. + state->reseed_counter++; + +#ifdef DEBUGFOO + dumpState("generate: ", state); +#endif + + return 0; +} + +static void done(struct ccdrbg_state *drbg) +{ + struct ccdrbg_nisthmac_state *state=(struct ccdrbg_nisthmac_state *)drbg; + cc_zero(sizeof(state->v), state->v); + cc_zero(sizeof(state->key), state->key); +} + +struct ccdrbg_info ccdrbg_nisthmac_info = { + .size = sizeof(struct ccdrbg_nisthmac_state) + sizeof(struct ccdrbg_nisthmac_custom), + .init = init, + .reseed = reseed, + .generate = generate, + .done = done, + .custom = NULL +}; + +/* This initializes an info object with the right options */ +void ccdrbg_factory_nisthmac(struct ccdrbg_info *info, const struct ccdrbg_nisthmac_custom *custom) +{ + info->size = sizeof(struct ccdrbg_nisthmac_state) + sizeof(struct ccdrbg_nisthmac_custom); + info->init = init; + info->generate = generate; + info->reseed = reseed; + info->done = done; + info->custom = custom; +}; + diff --git a/osfmk/corecrypto/ccdigest/src/ccdigest_init.c b/osfmk/corecrypto/ccdigest/src/ccdigest_init.c new file mode 100644 index 000000000..b9b3b8852 --- /dev/null +++ b/osfmk/corecrypto/ccdigest/src/ccdigest_init.c @@ -0,0 +1,17 @@ +/* + * ccdigest_init.c + * corecrypto + * + * Created by Michael Brouwer on 11/30/10. + * Copyright 2010,2011 Apple Inc. All rights reserved. + * + */ + +#include +#include + +void ccdigest_init(const struct ccdigest_info *di, ccdigest_ctx_t ctx) { + ccdigest_copy_state(di, ccdigest_state_ccn(di, ctx), di->initial_state); + ccdigest_nbits(di, ctx) = 0; + ccdigest_num(di, ctx) = 0; +} diff --git a/osfmk/corecrypto/ccdigest/src/ccdigest_update.c b/osfmk/corecrypto/ccdigest/src/ccdigest_update.c new file mode 100644 index 000000000..1f8b9e54b --- /dev/null +++ b/osfmk/corecrypto/ccdigest/src/ccdigest_update.c @@ -0,0 +1,40 @@ +/* + * ccdigest_update.c + * corecrypto + * + * Created by Michael Brouwer on 11/30/10. + * Copyright 2010,2011 Apple Inc. All rights reserved. + * + */ + +#include +#include + +void ccdigest_update(const struct ccdigest_info *di, ccdigest_ctx_t ctx, + unsigned long len, const void *data) { + char * data_ptr = (char *) data; + while (len > 0) { + if (ccdigest_num(di, ctx) == 0 && len > di->block_size) { + unsigned long nblocks = len / di->block_size; + di->compress(ccdigest_state(di, ctx), nblocks, data_ptr); + unsigned long nbytes = nblocks * di->block_size; + len -= nbytes; + data_ptr += nbytes; + ccdigest_nbits(di, ctx) += nbytes * 8; + } else { + unsigned long n = di->block_size - ccdigest_num(di, ctx); + if (len < n) + n = len; + CC_MEMCPY(ccdigest_data(di, ctx) + ccdigest_num(di, ctx), data_ptr, n); + /* typecast: less than block size, will always fit into an int */ + ccdigest_num(di, ctx) += (unsigned int)n; + len -= n; + data_ptr += n; + if (ccdigest_num(di, ctx) == di->block_size) { + di->compress(ccdigest_state(di, ctx), 1, ccdigest_data(di, ctx)); + ccdigest_nbits(di, ctx) += ccdigest_num(di, ctx) * 8; + ccdigest_num(di, ctx) = 0; + } + } + } +} diff --git a/osfmk/corecrypto/cchmac/src/cchmac.c b/osfmk/corecrypto/cchmac/src/cchmac.c new file mode 100644 index 000000000..28a9a2fe4 --- /dev/null +++ b/osfmk/corecrypto/cchmac/src/cchmac.c @@ -0,0 +1,20 @@ +/* + * cchmac.c + * corecrypto + * + * Created by Michael Brouwer on 12/7/10. + * Copyright 2010,2011 Apple Inc. All rights reserved. + * + */ + +#include + +void cchmac(const struct ccdigest_info *di, + unsigned long key_len, const void *key, + unsigned long data_len, const void *data, unsigned char *mac) { + cchmac_di_decl(di, hc); + cchmac_init(di, hc, key_len, key); + cchmac_update(di, hc, data_len, data); + cchmac_final(di, hc, mac); + cchmac_di_clear(di, hc); +} diff --git a/osfmk/corecrypto/cchmac/src/cchmac_final.c b/osfmk/corecrypto/cchmac/src/cchmac_final.c new file mode 100644 index 000000000..6ac62eedf --- /dev/null +++ b/osfmk/corecrypto/cchmac/src/cchmac_final.c @@ -0,0 +1,21 @@ +/* + * cchmac_final.c + * corecrypto + * + * Created by Michael Brouwer on 12/7/10. + * Copyright 2010,2011 Apple Inc. All rights reserved. + * + */ + +#include +#include + +void cchmac_final(const struct ccdigest_info *di, cchmac_ctx_t hc, + unsigned char *mac) { + ccdigest_final(di, cchmac_digest_ctx(di, hc), cchmac_data(di, hc)); + /* typecast: output size will alwys fit in an unsigned int */ + cchmac_num(di, hc) = (unsigned int)di->output_size; + cchmac_nbits(di, hc) = di->block_size * 8; + ccdigest_copy_state(di, cchmac_istate32(di, hc), cchmac_ostate32(di, hc)); + ccdigest_final(di, cchmac_digest_ctx(di, hc), mac); +} diff --git a/osfmk/corecrypto/cchmac/src/cchmac_init.c b/osfmk/corecrypto/cchmac/src/cchmac_init.c new file mode 100644 index 000000000..0e2db29e5 --- /dev/null +++ b/osfmk/corecrypto/cchmac/src/cchmac_init.c @@ -0,0 +1,59 @@ +/* + * cchmac_init.c + * corecrypto + * + * Created by Michael Brouwer on 12/7/10. + * Copyright 2010,2011 Apple Inc. All rights reserved. + * + */ + +#include +#include +#include + +/* The HMAC_ transform looks like: + (K XOR opad || (K XOR ipad || text)) + Where K is a n byte key + ipad is the byte 0x36 repeated 64 times. + opad is the byte 0x5c repeated 64 times. + text is the data being protected. + */ +void cchmac_init(const struct ccdigest_info *di, cchmac_ctx_t hc, + unsigned long key_len, const void *key_data) { + const unsigned char *key = key_data; + + /* Set cchmac_data(di, hc) to key ^ opad. */ + unsigned long byte = 0; + if (key_len <= di->block_size) { + for (;byte < key_len; ++byte) { + cchmac_data(di, hc)[byte] = key[byte] ^ 0x5c; + } + } else { + /* Key is longer than di->block size, reset it to key=digest(key) */ + ccdigest_init(di, cchmac_digest_ctx(di, hc)); + ccdigest_update(di, cchmac_digest_ctx(di, hc), key_len, key); + ccdigest_final(di, cchmac_digest_ctx(di, hc), cchmac_data(di, hc)); + key_len = di->output_size; + for (;byte < key_len; ++byte) { + cchmac_data(di, hc)[byte] ^= 0x5c; + } + } + /* Fill remainder of cchmac_data(di, hc) with opad. */ + if (key_len < di->block_size) { + CC_MEMSET(cchmac_data(di, hc) + key_len, 0x5c, di->block_size - key_len); + } + + /* Set cchmac_ostate32(di, hc) to the state of the first round of the + outer digest. */ + ccdigest_copy_state(di, cchmac_ostate32(di, hc), di->initial_state); + di->compress(cchmac_ostate(di, hc), 1, cchmac_data(di, hc)); + + /* Set cchmac_data(di, hc) to key ^ ipad. */ + for (byte = 0; byte < di->block_size; ++byte) { + cchmac_data(di, hc)[byte] ^= (0x5c ^ 0x36); + } + ccdigest_copy_state(di, cchmac_istate32(di, hc), di->initial_state); + di->compress(cchmac_istate(di, hc), 1, cchmac_data(di, hc)); + cchmac_num(di, hc) = 0; + cchmac_nbits(di, hc) = di->block_size * 8; +} diff --git a/osfmk/corecrypto/cchmac/src/cchmac_update.c b/osfmk/corecrypto/cchmac/src/cchmac_update.c new file mode 100644 index 000000000..b6c5df262 --- /dev/null +++ b/osfmk/corecrypto/cchmac/src/cchmac_update.c @@ -0,0 +1,15 @@ +/* + * cchmac_update.c + * corecrypto + * + * Created by Michael Brouwer on 12/7/10. + * Copyright 2010,2011 Apple Inc. All rights reserved. + * + */ + +#include + +void cchmac_update(const struct ccdigest_info *di, cchmac_ctx_t hc, + unsigned long data_len, const void *data) { + ccdigest_update(di, cchmac_digest_ctx(di, hc), data_len, data); +} diff --git a/osfmk/corecrypto/ccn/src/ccn_set.c b/osfmk/corecrypto/ccn/src/ccn_set.c new file mode 100644 index 000000000..bd95a27a7 --- /dev/null +++ b/osfmk/corecrypto/ccn/src/ccn_set.c @@ -0,0 +1,16 @@ +// +// ccn_set.c +// corecrypto +// +// Created by Fabrice Gautier on 2/17/12. +// Copyright (c) 2012 Apple, Inc. All rights reserved. +// + +#include + +#if !CCN_SET_ASM +void ccn_set(cc_size n, cc_unit *r, const cc_unit *s) +{ + CC_MEMCPY(r, s, ccn_sizeof_n(n)); +} +#endif diff --git a/osfmk/corecrypto/ccsha1/src/ccdigest_final_64be.c b/osfmk/corecrypto/ccsha1/src/ccdigest_final_64be.c new file mode 100644 index 000000000..68d2cd8aa --- /dev/null +++ b/osfmk/corecrypto/ccsha1/src/ccdigest_final_64be.c @@ -0,0 +1,41 @@ +/* + * ccdigest_final_64be.c + * corecrypto + * + * Created by Michael Brouwer on 12/1/10. + * Copyright 2010,2011 Apple Inc. All rights reserved. + * + */ + +#include +#include + +/* This can be used for SHA1, SHA256 and SHA224 */ +void ccdigest_final_64be(const struct ccdigest_info *di, ccdigest_ctx_t ctx, + unsigned char *digest) { + ccdigest_nbits(di, ctx) += ccdigest_num(di, ctx) * 8; + ccdigest_data(di, ctx)[ccdigest_num(di, ctx)++] = 0x80; + + /* If we don't have at least 8 bytes (for the length) left we need to add + a second block. */ + if (ccdigest_num(di, ctx) > 64 - 8) { + while (ccdigest_num(di, ctx) < 64) { + ccdigest_data(di, ctx)[ccdigest_num(di, ctx)++] = 0; + } + di->compress(ccdigest_state(di, ctx), 1, ccdigest_data(di, ctx)); + ccdigest_num(di, ctx) = 0; + } + + /* pad upto block_size minus 8 with 0s */ + while (ccdigest_num(di, ctx) < 64 - 8) { + ccdigest_data(di, ctx)[ccdigest_num(di, ctx)++] = 0; + } + + CC_STORE64_BE(ccdigest_nbits(di, ctx), ccdigest_data(di, ctx) + 64 - 8); + di->compress(ccdigest_state(di, ctx), 1, ccdigest_data(di, ctx)); + + /* copy output */ + for (unsigned int i = 0; i < di->output_size / 4; i++) { + CC_STORE32_BE(ccdigest_state_u32(di, ctx)[i], digest+(4*i)); + } +} diff --git a/osfmk/corecrypto/ccsha1/src/ccsha1_eay.c b/osfmk/corecrypto/ccsha1/src/ccsha1_eay.c new file mode 100644 index 000000000..84a9887a0 --- /dev/null +++ b/osfmk/corecrypto/ccsha1/src/ccsha1_eay.c @@ -0,0 +1,294 @@ +/* + * ccsha1_eay.c + * corecrypto + * + * Created by Fabrice Gautier on 12/6/10. + * Copyright 2010,2011 Apple Inc. All rights reserved. + * + * Based on ssleay implementation. + * + */ + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#define USE_SUPER_COOL_NEW_CCOID_T +#include +#include +#include + + +#ifndef SHA_LONG_LOG2 +#define SHA_LONG_LOG2 2 /* default to 32 bits */ +#endif + + +#define ROTATE(b, n) CC_ROLc(b, n) + +#define Xupdate(a,ix,ia,ib,ic,id) ( (a)=(ia^ib^ic^id), \ + ix=(a)=ROTATE((a),1) \ + ) + +#define MD32_REG_T uint32_t + +#define HOST_c2l(data, l) CC_LOAD32_BE(l, data); data+=4; + +#define K_00_19 0x5a827999 +#define K_20_39 0x6ed9eba1 +#define K_40_59 0x8f1bbcdc +#define K_60_79 0xca62c1d6 + +/* As pointed out by Wei Dai , F() below can be + * simplified to the code in F_00_19. Wei attributes these optimisations + * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel. + * #define F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) + * I've just become aware of another tweak to be made, again from Wei Dai, + * in F_40_59, (x&a)|(y&a) -> (x|y)&a + */ +#define F_00_19(b,c,d) ((((c) ^ (d)) & (b)) ^ (d)) +#define F_20_39(b,c,d) ((b) ^ (c) ^ (d)) +#define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d))) +#define F_60_79(b,c,d) F_20_39(b,c,d) + +#define BODY_00_15(i,a,b,c,d,e,f,xi) \ + (f)=xi+(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_16_19(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \ + Xupdate(f,xi,xa,xb,xc,xd); \ + (f)+=(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_20_31(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \ + Xupdate(f,xi,xa,xb,xc,xd); \ + (f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_32_39(i,a,b,c,d,e,f,xa,xb,xc,xd) \ + Xupdate(f,xa,xa,xb,xc,xd); \ + (f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_40_59(i,a,b,c,d,e,f,xa,xb,xc,xd) \ + Xupdate(f,xa,xa,xb,xc,xd); \ + (f)+=(e)+K_40_59+ROTATE((a),5)+F_40_59((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_60_79(i,a,b,c,d,e,f,xa,xb,xc,xd) \ + Xupdate(f,xa,xa,xb,xc,xd); \ + (f)=xa+(e)+K_60_79+ROTATE((a),5)+F_60_79((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#ifdef X +#undef X +#endif + +#ifndef MD32_XARRAY + /* + * Originally X was an array. As it's automatic it's natural + * to expect RISC compiler to accomodate at least part of it in + * the register bank, isn't it? Unfortunately not all compilers + * "find" this expectation reasonable:-( On order to make such + * compilers generate better code I replace X[] with a bunch of + * X0, X1, etc. See the function body below... + * + */ +# define X(i) XX##i +#else + /* + * However! Some compilers (most notably HP C) get overwhelmed by + * that many local variables so that we have to have the way to + * fall down to the original behavior. + */ +# define X(i) XX[i] +#endif + +static void sha1_compress(ccdigest_state_t s, unsigned long num, const void *buf) +{ + const unsigned char *data=buf; + register uint32_t A,B,C,D,E,T,l; +#ifndef MD32_XARRAY + uint32_t XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, + XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; +#else + uint32_t XX[16]; +#endif + uint32_t *state=ccdigest_u32(s); + + A=state[0]; + B=state[1]; + C=state[2]; + D=state[3]; + E=state[4]; + + for (;;) + { + + HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; + BODY_00_15( 0,A,B,C,D,E,T,X( 0)); HOST_c2l(data,l); X( 2)=l; + BODY_00_15( 1,T,A,B,C,D,E,X( 1)); HOST_c2l(data,l); X( 3)=l; + BODY_00_15( 2,E,T,A,B,C,D,X( 2)); HOST_c2l(data,l); X( 4)=l; + BODY_00_15( 3,D,E,T,A,B,C,X( 3)); HOST_c2l(data,l); X( 5)=l; + BODY_00_15( 4,C,D,E,T,A,B,X( 4)); HOST_c2l(data,l); X( 6)=l; + BODY_00_15( 5,B,C,D,E,T,A,X( 5)); HOST_c2l(data,l); X( 7)=l; + BODY_00_15( 6,A,B,C,D,E,T,X( 6)); HOST_c2l(data,l); X( 8)=l; + BODY_00_15( 7,T,A,B,C,D,E,X( 7)); HOST_c2l(data,l); X( 9)=l; + BODY_00_15( 8,E,T,A,B,C,D,X( 8)); HOST_c2l(data,l); X(10)=l; + BODY_00_15( 9,D,E,T,A,B,C,X( 9)); HOST_c2l(data,l); X(11)=l; + BODY_00_15(10,C,D,E,T,A,B,X(10)); HOST_c2l(data,l); X(12)=l; + BODY_00_15(11,B,C,D,E,T,A,X(11)); HOST_c2l(data,l); X(13)=l; + BODY_00_15(12,A,B,C,D,E,T,X(12)); HOST_c2l(data,l); X(14)=l; + BODY_00_15(13,T,A,B,C,D,E,X(13)); HOST_c2l(data,l); X(15)=l; + BODY_00_15(14,E,T,A,B,C,D,X(14)); + BODY_00_15(15,D,E,T,A,B,C,X(15)); + + BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13)); + BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14)); + BODY_16_19(18,A,B,C,D,E,T,X( 2),X( 2),X( 4),X(10),X(15)); + BODY_16_19(19,T,A,B,C,D,E,X( 3),X( 3),X( 5),X(11),X( 0)); + + BODY_20_31(20,E,T,A,B,C,D,X( 4),X( 4),X( 6),X(12),X( 1)); + BODY_20_31(21,D,E,T,A,B,C,X( 5),X( 5),X( 7),X(13),X( 2)); + BODY_20_31(22,C,D,E,T,A,B,X( 6),X( 6),X( 8),X(14),X( 3)); + BODY_20_31(23,B,C,D,E,T,A,X( 7),X( 7),X( 9),X(15),X( 4)); + BODY_20_31(24,A,B,C,D,E,T,X( 8),X( 8),X(10),X( 0),X( 5)); + BODY_20_31(25,T,A,B,C,D,E,X( 9),X( 9),X(11),X( 1),X( 6)); + BODY_20_31(26,E,T,A,B,C,D,X(10),X(10),X(12),X( 2),X( 7)); + BODY_20_31(27,D,E,T,A,B,C,X(11),X(11),X(13),X( 3),X( 8)); + BODY_20_31(28,C,D,E,T,A,B,X(12),X(12),X(14),X( 4),X( 9)); + BODY_20_31(29,B,C,D,E,T,A,X(13),X(13),X(15),X( 5),X(10)); + BODY_20_31(30,A,B,C,D,E,T,X(14),X(14),X( 0),X( 6),X(11)); + BODY_20_31(31,T,A,B,C,D,E,X(15),X(15),X( 1),X( 7),X(12)); + + BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13)); + BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14)); + BODY_32_39(34,C,D,E,T,A,B,X( 2),X( 4),X(10),X(15)); + BODY_32_39(35,B,C,D,E,T,A,X( 3),X( 5),X(11),X( 0)); + BODY_32_39(36,A,B,C,D,E,T,X( 4),X( 6),X(12),X( 1)); + BODY_32_39(37,T,A,B,C,D,E,X( 5),X( 7),X(13),X( 2)); + BODY_32_39(38,E,T,A,B,C,D,X( 6),X( 8),X(14),X( 3)); + BODY_32_39(39,D,E,T,A,B,C,X( 7),X( 9),X(15),X( 4)); + + BODY_40_59(40,C,D,E,T,A,B,X( 8),X(10),X( 0),X( 5)); + BODY_40_59(41,B,C,D,E,T,A,X( 9),X(11),X( 1),X( 6)); + BODY_40_59(42,A,B,C,D,E,T,X(10),X(12),X( 2),X( 7)); + BODY_40_59(43,T,A,B,C,D,E,X(11),X(13),X( 3),X( 8)); + BODY_40_59(44,E,T,A,B,C,D,X(12),X(14),X( 4),X( 9)); + BODY_40_59(45,D,E,T,A,B,C,X(13),X(15),X( 5),X(10)); + BODY_40_59(46,C,D,E,T,A,B,X(14),X( 0),X( 6),X(11)); + BODY_40_59(47,B,C,D,E,T,A,X(15),X( 1),X( 7),X(12)); + BODY_40_59(48,A,B,C,D,E,T,X( 0),X( 2),X( 8),X(13)); + BODY_40_59(49,T,A,B,C,D,E,X( 1),X( 3),X( 9),X(14)); + BODY_40_59(50,E,T,A,B,C,D,X( 2),X( 4),X(10),X(15)); + BODY_40_59(51,D,E,T,A,B,C,X( 3),X( 5),X(11),X( 0)); + BODY_40_59(52,C,D,E,T,A,B,X( 4),X( 6),X(12),X( 1)); + BODY_40_59(53,B,C,D,E,T,A,X( 5),X( 7),X(13),X( 2)); + BODY_40_59(54,A,B,C,D,E,T,X( 6),X( 8),X(14),X( 3)); + BODY_40_59(55,T,A,B,C,D,E,X( 7),X( 9),X(15),X( 4)); + BODY_40_59(56,E,T,A,B,C,D,X( 8),X(10),X( 0),X( 5)); + BODY_40_59(57,D,E,T,A,B,C,X( 9),X(11),X( 1),X( 6)); + BODY_40_59(58,C,D,E,T,A,B,X(10),X(12),X( 2),X( 7)); + BODY_40_59(59,B,C,D,E,T,A,X(11),X(13),X( 3),X( 8)); + + BODY_60_79(60,A,B,C,D,E,T,X(12),X(14),X( 4),X( 9)); + BODY_60_79(61,T,A,B,C,D,E,X(13),X(15),X( 5),X(10)); + BODY_60_79(62,E,T,A,B,C,D,X(14),X( 0),X( 6),X(11)); + BODY_60_79(63,D,E,T,A,B,C,X(15),X( 1),X( 7),X(12)); + BODY_60_79(64,C,D,E,T,A,B,X( 0),X( 2),X( 8),X(13)); + BODY_60_79(65,B,C,D,E,T,A,X( 1),X( 3),X( 9),X(14)); + BODY_60_79(66,A,B,C,D,E,T,X( 2),X( 4),X(10),X(15)); + BODY_60_79(67,T,A,B,C,D,E,X( 3),X( 5),X(11),X( 0)); + BODY_60_79(68,E,T,A,B,C,D,X( 4),X( 6),X(12),X( 1)); + BODY_60_79(69,D,E,T,A,B,C,X( 5),X( 7),X(13),X( 2)); + BODY_60_79(70,C,D,E,T,A,B,X( 6),X( 8),X(14),X( 3)); + BODY_60_79(71,B,C,D,E,T,A,X( 7),X( 9),X(15),X( 4)); + BODY_60_79(72,A,B,C,D,E,T,X( 8),X(10),X( 0),X( 5)); + BODY_60_79(73,T,A,B,C,D,E,X( 9),X(11),X( 1),X( 6)); + BODY_60_79(74,E,T,A,B,C,D,X(10),X(12),X( 2),X( 7)); + BODY_60_79(75,D,E,T,A,B,C,X(11),X(13),X( 3),X( 8)); + BODY_60_79(76,C,D,E,T,A,B,X(12),X(14),X( 4),X( 9)); + BODY_60_79(77,B,C,D,E,T,A,X(13),X(15),X( 5),X(10)); + BODY_60_79(78,A,B,C,D,E,T,X(14),X( 0),X( 6),X(11)); + BODY_60_79(79,T,A,B,C,D,E,X(15),X( 1),X( 7),X(12)); + + state[0]=(state[0]+E)&0xffffffff; + state[1]=(state[1]+T)&0xffffffff; + state[2]=(state[2]+A)&0xffffffff; + state[3]=(state[3]+B)&0xffffffff; + state[4]=(state[4]+C)&0xffffffff; + + if (--num <= 0) break; + + A=state[0]; + B=state[1]; + C=state[2]; + D=state[3]; + E=state[4]; + + } +} + +const struct ccdigest_info ccsha1_eay_di = { + .output_size = CCSHA1_OUTPUT_SIZE, + .state_size = CCSHA1_STATE_SIZE, + .block_size = CCSHA1_BLOCK_SIZE, + .oid_size = ccoid_sha1_len, + .oid = CC_DIGEST_OID_SHA1, + .initial_state = ccsha1_initial_state, + .compress = sha1_compress, + .final = ccdigest_final_64be, +}; diff --git a/osfmk/corecrypto/ccsha1/src/ccsha1_initial_state.c b/osfmk/corecrypto/ccsha1/src/ccsha1_initial_state.c new file mode 100644 index 000000000..2a5bc5e2d --- /dev/null +++ b/osfmk/corecrypto/ccsha1/src/ccsha1_initial_state.c @@ -0,0 +1,19 @@ +/* + * ccsha1_initial_state.c + * corecrypto + * + * Created by Fabrice Gautier on 12/7/10. + * Copyright 2010 Apple, Inc. All rights reserved. + * + */ + +#include +#include + +const uint32_t ccsha1_initial_state[5] = { + 0x67452301, + 0xefcdab89, + 0x98badcfe, + 0x10325476, + 0xc3d2e1f0 +}; diff --git a/osfmk/default_pager/default_pager_internal.h b/osfmk/default_pager/default_pager_internal.h index 0aa9a4604..7ae7452a6 100644 --- a/osfmk/default_pager/default_pager_internal.h +++ b/osfmk/default_pager/default_pager_internal.h @@ -68,7 +68,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c index b861ccf82..205e612a2 100644 --- a/osfmk/default_pager/dp_backing_store.c +++ b/osfmk/default_pager/dp_backing_store.c @@ -101,7 +101,7 @@ int physical_transfer_cluster_count = 0; #define VM_SUPER_CLUSTER 0x40000 -#define VM_SUPER_PAGES (VM_SUPER_CLUSTER / PAGE_SIZE) +#define VM_SUPER_PAGES (VM_SUPER_CLUSTER / PAGE_MIN_SIZE) /* * 0 means no shift to pages, so == 1 page/cluster. 1 would mean @@ -1908,6 +1908,7 @@ ps_vstruct_reclaim( fault_info.cluster_size = VM_SUPER_CLUSTER; fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; fault_info.user_tag = 0; + fault_info.pmap_options = 0; fault_info.lo_offset = 0; fault_info.hi_offset = ptoa_32(vs->vs_size << vs->vs_clshift); fault_info.io_sync = reclaim_backing_store; @@ -2924,7 +2925,7 @@ pvs_cluster_read( int cl_index; unsigned int xfer_size; dp_offset_t orig_vs_offset; - dp_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT]; + dp_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT]; paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT]; struct clmap clmap; upl_t upl; diff --git a/osfmk/default_pager/dp_memory_object.c b/osfmk/default_pager/dp_memory_object.c index 44dc731ff..3ffa10e44 100644 --- a/osfmk/default_pager/dp_memory_object.c +++ b/osfmk/default_pager/dp_memory_object.c @@ -777,6 +777,9 @@ dp_memory_object_data_return( vs_lookup(mem_obj, vs); default_pager_total++; + + /* might be unreachable if VS_TRY_LOCK is, by definition, always true */ + __unreachable_ok_push if(!VS_TRY_LOCK(vs)) { /* the call below will not be done by caller when we have */ /* a synchronous interface */ @@ -793,6 +796,7 @@ dp_memory_object_data_return( upl_deallocate(upl); return KERN_SUCCESS; } + __unreachable_ok_pop if ((vs->vs_seqno != vs->vs_next_seqno++) || (vs->vs_readers) diff --git a/osfmk/device/device.defs b/osfmk/device/device.defs index 65f7686eb..133b63cd5 100644 --- a/osfmk/device/device.defs +++ b/osfmk/device/device.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -274,7 +274,7 @@ routine io_connect_set_properties( out result : kern_return_t ); -#if IOKIT_ALL_IPC || (__ILP32__ && !MAP_32B_METHODS) +#if IOKIT_ALL_IPC || __ILP32__ routine io_connect_method_scalarI_scalarO( connection : io_connect_t; in selector : uint32_t; @@ -420,7 +420,7 @@ routine io_service_match_property_table( out matches : boolean_t ); -#if IOKIT_ALL_IPC || (__ILP32__ && !MAP_32B_ASYNC_METHODS) +#if IOKIT_ALL_IPC || __ILP32__ routine io_async_method_scalarI_scalarO( connection : io_connect_t; in wake_port : mach_port_make_send_t; @@ -714,6 +714,79 @@ routine io_service_get_matching_service_ool( out service : io_object_t ); +routine io_service_get_authorization_id( + service : io_object_t; + out authorization_id : uint64_t + ); + +routine io_service_set_authorization_id( + service : io_object_t; + in authorization_id : uint64_t + ); + +/* */ + +routine io_server_version( + master_port : mach_port_t; + out version : uint64_t + ); + +routine io_registry_entry_get_properties_bin( + registry_entry : io_object_t; + out properties : io_buf_ptr_t, physicalcopy + ); + +routine io_registry_entry_get_property_bin( + registry_entry : io_object_t; + in plane : io_name_t; + in property_name : io_name_t; + in options : uint32_t; + out properties : io_buf_ptr_t, physicalcopy + ); + +routine io_service_get_matching_service_bin( + master_port : mach_port_t; + in matching : io_struct_inband_t; + out service : io_object_t + ); + +routine io_service_get_matching_services_bin( + master_port : mach_port_t; + in matching : io_struct_inband_t; + out existing : io_object_t + ); + +routine io_service_match_property_table_bin( + service : io_object_t; + in matching : io_struct_inband_t; + out matches : boolean_t + ); + +#if IOKIT_ALL_IPC || __ILP32__ +routine io_service_add_notification_bin( + master_port : mach_port_t; + in notification_type : io_name_t; + in matching : io_struct_inband_t; + in wake_port : mach_port_make_send_t; + in reference : io_async_ref_t; + out notification : io_object_t + ); +#else +skip; +#endif + +#if IOKIT_ALL_IPC || __LP64__ +routine FUNC_NAME(io_service_add_notification_bin)( + master_port : mach_port_t; + in notification_type : io_name_t; + in matching : io_struct_inband_t; + in wake_port : mach_port_make_send_t; + in reference : io_async_ref64_t; + out notification : io_object_t + ); +#else +skip; +#endif #endif /* IOKIT */ diff --git a/osfmk/device/device_types.h b/osfmk/device/device_types.h index f71249e12..e7466c05e 100644 --- a/osfmk/device/device_types.h +++ b/osfmk/device/device_types.h @@ -71,6 +71,11 @@ #include #include +#if PRIVATE +#define IOKIT_SERVER_VERSION 20140421 +#endif + + /* * IO buffer - out-of-line array of characters. */ @@ -111,7 +116,7 @@ typedef uint64_t io_scalar_inband64_t[16]; typedef uint64_t io_async_ref64_t[8]; #endif // __LP64__ -#ifdef MACH_KERNEL +#ifdef MACH_KERNEL_PRIVATE typedef struct IOObject * io_object_t; typedef io_object_t io_connect_t; diff --git a/osfmk/device/subrs.c b/osfmk/device/subrs.c index 99f8d5032..c9556819f 100644 --- a/osfmk/device/subrs.c +++ b/osfmk/device/subrs.c @@ -188,6 +188,7 @@ strcmp( */ // ARM implementation in ../arm/strncmp.s +// ARM64 implementation in ../arm64/strncmp.s int strncmp( const char *s1, @@ -284,7 +285,7 @@ strcpy( * to the "to" string. */ -// ARM implementation in ../arm/strncpy.c +// ARM and ARM64 implementation in ../arm/strncpy.c char * strncpy( char *s1, @@ -380,6 +381,7 @@ atoi_term( */ // ARM implementation in ../arm/strnlen.s +// ARM64 implementation in ../arm64/strnlen.s size_t strnlen(const char *s, size_t max) { const char *es = s + max, *p = s; @@ -485,7 +487,7 @@ strlcat(char *dst, const char *src, size_t siz) * Returns strlen(src); if retval >= siz, truncation occurred. */ -// ARM implementation in ../arm/strlcpy.c +// ARM and ARM64 implementation in ../arm/strlcpy.c size_t strlcpy(char *dst, const char *src, size_t siz) { diff --git a/osfmk/i386/AT386/conf.c b/osfmk/i386/AT386/conf.c index 598a237c7..f90dac3ec 100644 --- a/osfmk/i386/AT386/conf.c +++ b/osfmk/i386/AT386/conf.c @@ -60,7 +60,6 @@ * Device switch for i386 AT bus. */ -#include #include #include diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c index dd3baf946..fb9157b88 100644 --- a/osfmk/i386/AT386/model_dep.c +++ b/osfmk/i386/AT386/model_dep.c @@ -66,7 +66,6 @@ * Basic initialization for I386 - ISA bus machines. */ -#include #include @@ -99,6 +98,8 @@ #endif #include #include +#include + #include /* inb() */ #include @@ -211,6 +212,8 @@ machine_startup(void) machine_conf(); + panic_hooks_init(); + /* * Start the system. */ @@ -1030,6 +1033,8 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu pbtcpu = cn; } + panic_check_hook(); + PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms)); if (msg != NULL) { diff --git a/osfmk/i386/Diagnostics.c b/osfmk/i386/Diagnostics.c index 34acf876f..88a86c31b 100644 --- a/osfmk/i386/Diagnostics.c +++ b/osfmk/i386/Diagnostics.c @@ -199,23 +199,11 @@ diagCall64(x86_saved_state_t * state) pkes.pkg_cres[0][2] = ((uint64_t)c6h << 32) | c6l; pkes.pkg_cres[0][3] = ((uint64_t)c7h << 32) | c7l; - uint32_t cpumodel = cpuid_info()->cpuid_model; - boolean_t c8avail; - switch (cpumodel) { - case CPUID_MODEL_HASWELL_ULT: - c8avail = TRUE; - break; - default: - c8avail = FALSE; - break; - } uint64_t c8r = ~0ULL, c9r = ~0ULL, c10r = ~0ULL; - if (c8avail) { - rdmsr64_carefully(MSR_IA32_PKG_C8_RESIDENCY, &c8r); - rdmsr64_carefully(MSR_IA32_PKG_C9_RESIDENCY, &c9r); - rdmsr64_carefully(MSR_IA32_PKG_C10_RESIDENCY, &c10r); - } + rdmsr64_carefully(MSR_IA32_PKG_C8_RESIDENCY, &c8r); + rdmsr64_carefully(MSR_IA32_PKG_C9_RESIDENCY, &c9r); + rdmsr64_carefully(MSR_IA32_PKG_C10_RESIDENCY, &c10r); pkes.pkg_cres[0][4] = c8r; pkes.pkg_cres[0][5] = c9r; diff --git a/osfmk/i386/Makefile b/osfmk/i386/Makefile index f773a15b3..a7cbd51fc 100644 --- a/osfmk/i386/Makefile +++ b/osfmk/i386/Makefile @@ -9,6 +9,7 @@ include $(MakeInc_def) EXPORT_ONLY_FILES = \ apic.h \ asm.h \ + bit_routines.h \ cpu_number.h \ cpu_capabilities.h \ cpu_data.h \ @@ -28,6 +29,7 @@ EXPORT_ONLY_FILES = \ pal_native.h \ pal_routines.h \ pal_hibernate.h \ + panic_hooks.h \ pmCPU.h \ pmap.h \ proc_reg.h \ @@ -47,7 +49,7 @@ INSTALL_MD_LCL_LIST = cpu_capabilities.h INSTALL_KF_MD_LIST = asm.h cpuid.h eflags.h locks.h machine_routines.h proc_reg.h vmx.h -INSTALL_KF_MD_LCL_LIST = $(filter-out cpu_data.h pal_i386.h, $(EXPORT_ONLY_FILES)) +INSTALL_KF_MD_LCL_LIST = $(filter-out bit_routines.h cpu_data.h pal_i386.h, $(EXPORT_ONLY_FILES)) EXPORT_MD_LIST = ${EXPORT_ONLY_FILES} diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c index fd73f24ff..e42d4aef2 100644 --- a/osfmk/i386/acpi.c +++ b/osfmk/i386/acpi.c @@ -71,6 +71,7 @@ extern void acpi_wake_prot(void); #endif extern kern_return_t IOCPURunPlatformQuiesceActions(void); extern kern_return_t IOCPURunPlatformActiveActions(void); +extern kern_return_t IOCPURunPlatformHaltRestartActions(uint32_t message); extern void fpinit(void); @@ -115,12 +116,14 @@ acpi_hibernate(void *refcon) { // off HIBLOG("power off\n"); + IOCPURunPlatformHaltRestartActions(kPEHaltCPU); if (PE_halt_restart) (*PE_halt_restart)(kPEHaltCPU); } else if( mode == kIOHibernatePostWriteRestart ) { // restart HIBLOG("restart\n"); + IOCPURunPlatformHaltRestartActions(kPERestartCPU); if (PE_halt_restart) (*PE_halt_restart)(kPERestartCPU); } else @@ -293,6 +296,8 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) /* let the realtime clock reset */ rtc_sleep_wakeup(acpi_sleep_abstime); acpi_wake_postrebase_abstime = mach_absolute_time(); + assert(mach_absolute_time() >= acpi_sleep_abstime); + kdebug_enable = save_kdebug_enable; if (kdebug_enable == 0) { @@ -420,6 +425,7 @@ acpi_idle_kernel(acpi_sleep_callback func, void *refcon) kdebug_enable = save_kdebug_enable; } acpi_wake_postrebase_abstime = mach_absolute_time(); + assert(mach_absolute_time() >= acpi_idle_abstime); cpu_datap(master_cpu)->cpu_running = TRUE; KERNEL_DEBUG_CONSTANT( diff --git a/osfmk/i386/bit_routines.h b/osfmk/i386/bit_routines.h new file mode 100644 index 000000000..adf0d9c5e --- /dev/null +++ b/osfmk/i386/bit_routines.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * Copyright (C) 1998 Apple Computer + * All Rights Reserved + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +/* + * Bit-mask manipulation routines + */ + +#ifndef _I386_BIT_ROUTINES_H_ +#define _I386_BIT_ROUTINES_H_ + +#ifdef MACH_KERNEL_PRIVATE + +/* + * General bit-lock routines. + */ + +#define bit_lock(bit,l) \ + __asm__ volatile(" jmp 1f \n \ + 0: btl %0, %1 \n \ + jb 0b \n \ + 1: lock \n \ + btsl %0,%1 \n \ + jb 0b" : \ + : \ + "r" (bit), "m" (*(volatile int *)(l)) : \ + "memory"); + +#define bit_unlock(bit,l) \ + __asm__ volatile(" lock \n \ + btrl %0,%1" : \ + : \ + "r" (bit), "m" (*(volatile int *)(l))); + +/* + * Set or clear individual bits in a long word. + * The locked access is needed only to lock access + * to the word, not to individual bits. + */ + +#define i_bit_set(bit,l) \ + __asm__ volatile(" lock \n \ + btsl %0,%1" : \ + : \ + "r" (bit), "m" (*(volatile int *)(l))); + +#define i_bit_clear(bit,l) \ + __asm__ volatile(" lock \n \ + btrl %0,%1" : \ + : \ + "r" (bit), "m" (*(volatile int *)(l))); + +static inline char xchgb(volatile char * cp, char new) +{ + register char old = new; + + __asm__ volatile (" xchgb %0,%2" : + "=q" (old) : + "0" (new), "m" (*(volatile char *)cp) : "memory"); + return (old); +} + +static inline void atomic_incl(volatile long * p, long delta) +{ + __asm__ volatile (" lock \n \ + add %0,%1" : \ + : \ + "r" (delta), "m" (*(volatile long *)p)); +} + +static inline void atomic_incs(volatile short * p, short delta) +{ + __asm__ volatile (" lock \n \ + addw %0,%1" : \ + : \ + "q" (delta), "m" (*(volatile short *)p)); +} + +static inline void atomic_incb(volatile char * p, char delta) +{ + __asm__ volatile (" lock \n \ + addb %0,%1" : \ + : \ + "q" (delta), "m" (*(volatile char *)p)); +} + +static inline void atomic_decl(volatile long * p, long delta) +{ + __asm__ volatile (" lock \n \ + sub %0,%1" : \ + : \ + "r" (delta), "m" (*(volatile long *)p)); +} + +static inline int atomic_decl_and_test(volatile long * p, long delta) +{ + uint8_t ret; + __asm__ volatile ( + " lock \n\t" + " sub %1,%2 \n\t" + " sete %0" + : "=qm" (ret) + : "r" (delta), "m" (*(volatile long *)p)); + return ret; +} + +static inline void atomic_decs(volatile short * p, short delta) +{ + __asm__ volatile (" lock \n \ + subw %0,%1" : \ + : \ + "q" (delta), "m" (*(volatile short *)p)); +} + +static inline void atomic_decb(volatile char * p, char delta) +{ + __asm__ volatile (" lock \n \ + subb %0,%1" : \ + : \ + "q" (delta), "m" (*(volatile char *)p)); +} + +static inline long atomic_getl(const volatile long * p) +{ + return (*p); +} + +static inline short atomic_gets(const volatile short * p) +{ + return (*p); +} + +static inline char atomic_getb(const volatile char * p) +{ + return (*p); +} + +static inline void atomic_setl(volatile long * p, long value) +{ + *p = value; +} + +static inline void atomic_sets(volatile short * p, short value) +{ + *p = value; +} + +static inline void atomic_setb(volatile char * p, char value) +{ + *p = value; +} + +#endif /* MACH_KERNEL_PRIVATE */ + +#endif /* _I386_BIT_ROUTINES_H_ */ diff --git a/osfmk/i386/bsd_i386.c b/osfmk/i386/bsd_i386.c index 147951308..bc510068b 100644 --- a/osfmk/i386/bsd_i386.c +++ b/osfmk/i386/bsd_i386.c @@ -312,8 +312,6 @@ machdep_syscall(x86_saved_state_t *state) default: panic("machdep_syscall: too many args"); } - if (current_thread()->funnel_lock) - (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs->eax); @@ -354,11 +352,12 @@ machdep_syscall64(x86_saved_state_t *state) case 1: regs->rax = (*entry->routine.args64_1)(regs->rdi); break; + case 2: + regs->rax = (*entry->routine.args64_2)(regs->rdi, regs->rsi); + break; default: panic("machdep_syscall64: too many args"); } - if (current_thread()->funnel_lock) - (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%llu\n", regs->rax); @@ -394,7 +393,11 @@ mach_call_arg_munger32(uint32_t sp, struct mach_call_args *args, const mach_trap { if (copyin((user_addr_t)(sp + sizeof(int)), (char *)args, trapp->mach_trap_u32_words * sizeof (int))) return KERN_INVALID_ARGUMENT; - trapp->mach_trap_arg_munge32(NULL, args); +#if CONFIG_REQUIRES_U32_MUNGING + trapp->mach_trap_arg_munge32(args); +#else +#error U32 mach traps on x86_64 kernel requires munging +#endif return KERN_SUCCESS; } @@ -485,6 +488,7 @@ mach_call_munger64(x86_saved_state_t *state) int call_number; int argc; mach_call_t mach_call; + struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; x86_saved_state64_t *regs; assert(is_saved_state64(state)); @@ -511,17 +515,23 @@ mach_call_munger64(x86_saved_state_t *state) /* NOTREACHED */ } argc = mach_trap_table[call_number].mach_trap_arg_count; + if (argc) { + int args_in_regs = MIN(6, argc); - if (argc > 6) { + memcpy(&args.arg1, ®s->rdi, args_in_regs * sizeof(syscall_arg_t)); + + if (argc > 6) { int copyin_count; - copyin_count = (argc - 6) * (int)sizeof(uint64_t); + assert(argc <= 9); + copyin_count = (argc - 6) * (int)sizeof(syscall_arg_t); - if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count)) { + if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&args.arg7, copyin_count)) { regs->rax = KERN_INVALID_ARGUMENT; - thread_exception_return(); - /* NOTREACHED */ + thread_exception_return(); + /* NOTREACHED */ + } } } @@ -529,7 +539,7 @@ mach_call_munger64(x86_saved_state_t *state) mach_kauth_cred_uthread_update(); #endif - regs->rax = (uint64_t)mach_call((void *)(®s->rdi)); + regs->rax = (uint64_t)mach_call((void *)&args); DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax); diff --git a/osfmk/i386/bsd_i386_native.c b/osfmk/i386/bsd_i386_native.c index 863344aa9..8e49e214d 100644 --- a/osfmk/i386/bsd_i386_native.c +++ b/osfmk/i386/bsd_i386_native.c @@ -157,28 +157,8 @@ thread_set_parent(thread_t parent, int pid) kern_return_t thread_fast_set_cthread_self(uint32_t self) { - thread_t thread = current_thread(); - pcb_t pcb = THREAD_TO_PCB(thread); - struct real_descriptor desc = { - .limit_low = 1, - .limit_high = 0, - .base_low = self & 0xffff, - .base_med = (self >> 16) & 0xff, - .base_high = (self >> 24) & 0xff, - .access = ACC_P|ACC_PL_U|ACC_DATA_W, - .granularity = SZ_32|SZ_G, - }; - - current_thread()->machine.cthread_self = (uint64_t) self; /* preserve old func too */ - - /* assign descriptor */ - mp_disable_preemption(); - pcb->cthread_desc = desc; - *ldt_desc_p(USER_CTHREAD) = desc; - saved_state32(pcb->iss)->gs = USER_CTHREAD; - mp_enable_preemption(); - - return (USER_CTHREAD); + machine_thread_set_tsd_base(current_thread(), self); + return (USER_CTHREAD); /* N.B.: not a kern_return_t! */ } /* @@ -193,21 +173,7 @@ thread_fast_set_cthread_self(uint32_t self) kern_return_t thread_fast_set_cthread_self64(uint64_t self) { - pcb_t pcb = THREAD_TO_PCB(current_thread()); - cpu_data_t *cdp; - - /* check for canonical address, set 0 otherwise */ - if (!IS_USERADDR64_CANONICAL(self)) - self = 0ULL; - - pcb->cthread_self = self; - mp_disable_preemption(); - cdp = current_cpu_datap(); - if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || - (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) - wrmsr64(MSR_IA32_KERNEL_GS_BASE, self); - cdp->cpu_uber.cu_user_gs_base = self; - mp_enable_preemption(); + machine_thread_set_tsd_base(current_thread(), self); return (USER_CTHREAD); /* N.B.: not a kern_return_t! */ } diff --git a/osfmk/i386/commpage/commpage.c b/osfmk/i386/commpage/commpage.c index 35ee69cdf..6bea2bef7 100644 --- a/osfmk/i386/commpage/commpage.c +++ b/osfmk/i386/commpage/commpage.c @@ -285,11 +285,38 @@ commpage_init_cpu_capabilities( void ) uint64_t misc_enable = rdmsr64(MSR_IA32_MISC_ENABLE); setif(bits, kHasENFSTRG, (misc_enable & 1ULL) && (cpuid_leaf7_features() & - CPUID_LEAF7_FEATURE_ENFSTRG)); + CPUID_LEAF7_FEATURE_ERMS)); _cpu_capabilities = bits; // set kernel version for use by drivers etc } +/* initialize the approx_time_supported flag and set the approx time to 0. + * Called during initial commpage population. + */ +static void +commpage_mach_approximate_time_init(void) +{ + char *cp = commPagePtr32; + uint8_t supported; + +#ifdef CONFIG_MACH_APPROXIMATE_TIME + supported = 1; +#else + supported = 0; +#endif + if ( cp ) { + cp += (_COMM_PAGE_APPROX_TIME_SUPPORTED - _COMM_PAGE32_BASE_ADDRESS); + *(boolean_t *)cp = supported; + } + cp = commPagePtr64; + if ( cp ) { + cp += (_COMM_PAGE_APPROX_TIME_SUPPORTED - _COMM_PAGE32_START_ADDRESS); + *(boolean_t *)cp = supported; + } + commpage_update_mach_approximate_time(0); +} + + uint64_t _get_cpu_capabilities(void) { @@ -430,6 +457,7 @@ commpage_populate( void ) simple_lock_init(&commpage_active_cpus_lock, 0); commpage_update_active_cpus(); + commpage_mach_approximate_time_init(); rtc_nanotime_init_commpage(); } @@ -662,6 +690,47 @@ commpage_update_active_cpus(void) simple_unlock(&commpage_active_cpus_lock); } +/* + * update the commpage data for last known value of mach_absolute_time() + */ + +void +commpage_update_mach_approximate_time(uint64_t abstime) +{ +#ifdef CONFIG_MACH_APPROXIMATE_TIME + uint64_t saved_data; + char *cp; + + cp = commPagePtr32; + if ( cp ) { + cp += (_COMM_PAGE_APPROX_TIME - _COMM_PAGE32_BASE_ADDRESS); + saved_data = *(uint64_t *)cp; + if (saved_data < abstime) { + /* ignoring the success/fail return value assuming that + * if the value has been updated since we last read it, + * "someone" has a newer timestamp than us and ours is + * now invalid. */ + OSCompareAndSwap64(saved_data, abstime, (uint64_t *)cp); + } + } + cp = commPagePtr64; + if ( cp ) { + cp += (_COMM_PAGE_APPROX_TIME - _COMM_PAGE32_START_ADDRESS); + saved_data = *(uint64_t *)cp; + if (saved_data < abstime) { + /* ignoring the success/fail return value assuming that + * if the value has been updated since we last read it, + * "someone" has a newer timestamp than us and ours is + * now invalid. */ + OSCompareAndSwap64(saved_data, abstime, (uint64_t *)cp); + } + } +#else +#pragma unused (abstime) +#endif +} + + extern user32_addr_t commpage_text32_location; extern user64_addr_t commpage_text64_location; diff --git a/osfmk/i386/commpage/commpage.h b/osfmk/i386/commpage/commpage.h index a39d47b46..a4ad20f0f 100644 --- a/osfmk/i386/commpage/commpage.h +++ b/osfmk/i386/commpage/commpage.h @@ -145,6 +145,7 @@ extern void commpage_set_memory_pressure(unsigned int pressure); extern void commpage_set_spin_count(unsigned int count); extern void commpage_sched_gen_inc(void); extern void commpage_update_active_cpus(void); +extern void commpage_update_mach_approximate_time(uint64_t abstime); extern uint32_t commpage_is_in_pfz32(uint32_t); extern uint32_t commpage_is_in_pfz64(addr64_t); diff --git a/osfmk/i386/cpu.c b/osfmk/i386/cpu.c index fc9fcc43e..2b6b6864b 100644 --- a/osfmk/i386/cpu.c +++ b/osfmk/i386/cpu.c @@ -185,8 +185,8 @@ cpu_machine_init( ml_init_interrupt(); #if CONFIG_VMX - /* for every CPU, get the VT specs */ - vmx_get_specs(); + /* initialize VMX for every CPU */ + vmx_cpu_init(); #endif } diff --git a/osfmk/i386/cpu_capabilities.h b/osfmk/i386/cpu_capabilities.h index 5205eb2e8..922faa5da 100644 --- a/osfmk/i386/cpu_capabilities.h +++ b/osfmk/i386/cpu_capabilities.h @@ -193,6 +193,10 @@ int _NumCPUs( void ) #define _COMM_PAGE_GTOD_GENERATION (_COMM_PAGE_START_ADDRESS+0x06c) /* used by gettimeofday() */ #define _COMM_PAGE_GTOD_NS_BASE (_COMM_PAGE_START_ADDRESS+0x070) /* used by gettimeofday() */ #define _COMM_PAGE_GTOD_SEC_BASE (_COMM_PAGE_START_ADDRESS+0x078) /* used by gettimeofday() */ +/* NOTE: APPROX_TIME must be aligned to 64-byte cache line size: */ +#define _COMM_PAGE_APPROX_TIME (_COMM_PAGE_START_ADDRESS+0x080) /* used by mach_approximate_time() */ +#define _COMM_PAGE_APPROX_TIME_SUPPORTED (_COMM_PAGE_START_ADDRESS+0x088) /* used by mach_approximate_time() */ + #define _COMM_PAGE_END (_COMM_PAGE_START_ADDRESS+0xfff) /* end of common page */ diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index e0bb1a7e4..f3b201e27 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,7 @@ struct cpu_cons_buffer; struct cpu_desc_table; struct mca_state; +struct prngContext; /* * Data structures embedded in per-cpu data: @@ -238,6 +240,7 @@ typedef struct cpu_data #if CONFIG_MCA struct mca_state *cpu_mca_state; /* State at MC fault */ #endif + struct prngContext *cpu_prng; /* PRNG's context */ int cpu_type; int cpu_subtype; int cpu_threadtype; diff --git a/osfmk/i386/cpu_threads.c b/osfmk/i386/cpu_threads.c index b1c0b6104..2e87fae6a 100644 --- a/osfmk/i386/cpu_threads.c +++ b/osfmk/i386/cpu_threads.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #define DIVISOR_GUARD(denom) \ if ((denom) == 0) { \ diff --git a/osfmk/i386/cpu_topology.c b/osfmk/i386/cpu_topology.c index 76a9e8edf..a8517d148 100644 --- a/osfmk/i386/cpu_topology.c +++ b/osfmk/i386/cpu_topology.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include @@ -175,7 +175,7 @@ cpu_topology_sort(int ncpus) if (lcpup->lnum == 0) lprim = cpup->cpu_processor; - processor_meta_init(cpup->cpu_processor, lprim); + processor_set_primary(cpup->cpu_processor, lprim); } } } diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c index fb171c4dc..2302c833b 100644 --- a/osfmk/i386/cpuid.c +++ b/osfmk/i386/cpuid.c @@ -28,7 +28,6 @@ /* * @OSF_COPYRIGHT@ */ -#include #include #include @@ -777,6 +776,7 @@ void cpuid_set_info(void) { i386_cpu_info_t *info_p = &cpuid_cpu_info; + boolean_t enable_x86_64h = TRUE; cpuid_set_generic_info(info_p); @@ -788,7 +788,24 @@ cpuid_set_info(void) panic("Unsupported CPU"); info_p->cpuid_cpu_type = CPU_TYPE_X86; - info_p->cpuid_cpu_subtype = CPU_SUBTYPE_X86_ARCH1; + + if (!PE_parse_boot_argn("-enable_x86_64h", &enable_x86_64h, sizeof(enable_x86_64h))) { + boolean_t disable_x86_64h = FALSE; + + if (PE_parse_boot_argn("-disable_x86_64h", &disable_x86_64h, sizeof(disable_x86_64h))) { + enable_x86_64h = FALSE; + } + } + + if (enable_x86_64h && + ((info_p->cpuid_features & CPUID_X86_64_H_FEATURE_SUBSET) == CPUID_X86_64_H_FEATURE_SUBSET) && + ((info_p->cpuid_extfeatures & CPUID_X86_64_H_EXTFEATURE_SUBSET) == CPUID_X86_64_H_EXTFEATURE_SUBSET) && + ((info_p->cpuid_leaf7_features & CPUID_X86_64_H_LEAF7_FEATURE_SUBSET) == CPUID_X86_64_H_LEAF7_FEATURE_SUBSET)) { + info_p->cpuid_cpu_subtype = CPU_SUBTYPE_X86_64_H; + } else { + info_p->cpuid_cpu_subtype = CPU_SUBTYPE_X86_ARCH1; + } + /* Must be invoked after set_generic_info */ cpuid_set_cache_info(info_p); @@ -820,6 +837,8 @@ cpuid_set_info(void) DBG("cpuid_set_info():\n"); DBG(" core_count : %d\n", info_p->core_count); DBG(" thread_count : %d\n", info_p->thread_count); + DBG(" cpu_type: 0x%08x\n", info_p->cpuid_cpu_type); + DBG(" cpu_subtype: 0x%08x\n", info_p->cpuid_cpu_subtype); info_p->cpuid_model_string = ""; /* deprecated */ } @@ -895,6 +914,8 @@ extfeature_map[] = { {CPUID_EXTFEATURE_1GBPAGE, "1GBPAGE"}, {CPUID_EXTFEATURE_EM64T, "EM64T"}, {CPUID_EXTFEATURE_LAHF, "LAHF"}, + {CPUID_EXTFEATURE_LZCNT, "LZCNT"}, + {CPUID_EXTFEATURE_PREFETCHW, "PREFETCHW"}, {CPUID_EXTFEATURE_RDTSCP, "RDTSCP"}, {CPUID_EXTFEATURE_TSCI, "TSCI"}, {0, 0} @@ -902,7 +923,7 @@ extfeature_map[] = { }, leaf7_feature_map[] = { {CPUID_LEAF7_FEATURE_SMEP, "SMEP"}, - {CPUID_LEAF7_FEATURE_ENFSTRG, "ENFSTRG"}, + {CPUID_LEAF7_FEATURE_ERMS, "ERMS"}, {CPUID_LEAF7_FEATURE_RDWRFSGS, "RDWRFSGS"}, {CPUID_LEAF7_FEATURE_TSCOFF, "TSC_THREAD_OFFSET"}, {CPUID_LEAF7_FEATURE_BMI1, "BMI1"}, @@ -1096,6 +1117,9 @@ cpuid_init_vmm_info(i386_vmm_info_t *info_p) if (0 == strcmp(info_p->cpuid_vmm_vendor, CPUID_VMM_ID_VMWARE)) { /* VMware identification string: kb.vmware.com/kb/1009458 */ info_p->cpuid_vmm_family = CPUID_VMM_FAMILY_VMWARE; + } else if (0 == strcmp(info_p->cpuid_vmm_vendor, CPUID_VMM_ID_PARALLELS)) { + /* Parallels identification string */ + info_p->cpuid_vmm_family = CPUID_VMM_FAMILY_PARALLELS; } else { info_p->cpuid_vmm_family = CPUID_VMM_FAMILY_UNKNOWN; } diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index c114c6bce..cd37a55a2 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -44,7 +44,8 @@ #define CPUID_VID_INTEL "GenuineIntel" #define CPUID_VID_AMD "AuthenticAMD" -#define CPUID_VMM_ID_VMWARE "VMwareVMware" +#define CPUID_VMM_ID_VMWARE "VMwareVMware" +#define CPUID_VMM_ID_PARALLELS "Parallels\0\0\0" #define CPUID_STRING_UNKNOWN "Unknown CPU Typ" @@ -114,26 +115,22 @@ #define CPUID_FEATURE_XSAVE _HBit(26) /* XSAVE instructions */ #define CPUID_FEATURE_OSXSAVE _HBit(27) /* XGETBV/XSETBV instructions */ #define CPUID_FEATURE_AVX1_0 _HBit(28) /* AVX 1.0 instructions */ -#define CPUID_FEATURE_VMM _HBit(31) /* VMM (Hypervisor) present */ -#define CPUID_FEATURE_SEGLIM64 _HBit(11) /* 64-bit segment limit checking */ -#define CPUID_FEATURE_PCID _HBit(17) /* ASID-PCID support */ -#define CPUID_FEATURE_TSCTMR _HBit(24) /* TSC deadline timer */ -#define CPUID_FEATURE_AVX1_0 _HBit(28) /* AVX 1.0 instructions */ #define CPUID_FEATURE_F16C _HBit(29) /* Float16 convert instructions */ #define CPUID_FEATURE_RDRAND _HBit(30) /* RDRAND instruction */ +#define CPUID_FEATURE_VMM _HBit(31) /* VMM (Hypervisor) present */ /* * Leaf 7, subleaf 0 additional features. * Bits returned in %ebx to a CPUID request with {%eax,%ecx} of (0x7,0x0}: */ #define CPUID_LEAF7_FEATURE_RDWRFSGS _Bit(0) /* FS/GS base read/write */ -#define CPUID_LEAF7_FEATURE_SMEP _Bit(7) /* Supervisor Mode Execute Protect */ -#define CPUID_LEAF7_FEATURE_ENFSTRG _Bit(9) /* ENhanced Fast STRinG copy */ #define CPUID_LEAF7_FEATURE_TSCOFF _Bit(1) /* TSC thread offset */ #define CPUID_LEAF7_FEATURE_BMI1 _Bit(3) /* Bit Manipulation Instrs, set 1 */ #define CPUID_LEAF7_FEATURE_HLE _Bit(4) /* Hardware Lock Elision*/ #define CPUID_LEAF7_FEATURE_AVX2 _Bit(5) /* AVX2 Instructions */ +#define CPUID_LEAF7_FEATURE_SMEP _Bit(7) /* Supervisor Mode Execute Protect */ #define CPUID_LEAF7_FEATURE_BMI2 _Bit(8) /* Bit Manipulation Instrs, set 2 */ +#define CPUID_LEAF7_FEATURE_ERMS _Bit(9) /* Enhanced Rep Movsb/Stosb */ #define CPUID_LEAF7_FEATURE_INVPCID _Bit(10) /* INVPCID intruction, TDB */ #define CPUID_LEAF7_FEATURE_RTM _Bit(11) /* TBD */ @@ -149,6 +146,8 @@ #define CPUID_EXTFEATURE_EM64T _Bit(29) /* Extended Mem 64 Technology */ #define CPUID_EXTFEATURE_LAHF _HBit(0) /* LAFH/SAHF instructions */ +#define CPUID_EXTFEATURE_LZCNT _HBit(5) /* LZCNT instruction */ +#define CPUID_EXTFEATURE_PREFETCHW _HBit(8) /* PREFETCHW instruction */ /* * The CPUID_EXTFEATURE_XXX values define 64-bit values @@ -156,6 +155,26 @@ */ #define CPUID_EXTFEATURE_TSCI _Bit(8) /* TSC Invariant */ +/* + * CPUID_X86_64_H_FEATURE_SUBSET and CPUID_X86_64_H_LEAF7_FEATURE_SUBSET + * indicate the bitmask of features that must be present before the system + * is eligible to run the "x86_64h" "Haswell feature subset" slice. + */ +#define CPUID_X86_64_H_FEATURE_SUBSET ( CPUID_FEATURE_FMA | \ + CPUID_FEATURE_SSE4_2 | \ + CPUID_FEATURE_MOVBE | \ + CPUID_FEATURE_POPCNT | \ + CPUID_FEATURE_AVX1_0 \ + ) + +#define CPUID_X86_64_H_EXTFEATURE_SUBSET ( CPUID_EXTFEATURE_LZCNT \ + ) + +#define CPUID_X86_64_H_LEAF7_FEATURE_SUBSET ( CPUID_LEAF7_FEATURE_BMI1 | \ + CPUID_LEAF7_FEATURE_AVX2 | \ + CPUID_LEAF7_FEATURE_BMI2 \ + ) + #define CPUID_CACHE_SIZE 16 /* Number of descriptor values */ #define CPUID_MWAIT_EXTENSION _Bit(0) /* enumeration of WMAIT extensions */ @@ -174,16 +193,15 @@ #define CPUID_MODEL_SANDYBRIDGE 0x2A #define CPUID_MODEL_JAKETOWN 0x2D #define CPUID_MODEL_IVYBRIDGE 0x3A -#ifdef PRIVATE #define CPUID_MODEL_IVYBRIDGE_EP 0x3E #define CPUID_MODEL_CRYSTALWELL 0x46 -#endif #define CPUID_MODEL_HASWELL 0x3C #define CPUID_MODEL_HASWELL_SVR 0x3F #define CPUID_MODEL_HASWELL_ULT 0x45 #define CPUID_VMM_FAMILY_UNKNOWN 0x0 #define CPUID_VMM_FAMILY_VMWARE 0x1 +#define CPUID_VMM_FAMILY_PARALLELS 0x2 #ifndef ASSEMBLER #include @@ -196,7 +214,7 @@ typedef enum { eax, ebx, ecx, edx } cpuid_register_t; static inline void cpuid(uint32_t *data) { - asm("cpuid" + __asm__ volatile ("cpuid" : "=a" (data[eax]), "=b" (data[ebx]), "=c" (data[ecx]), @@ -210,7 +228,7 @@ cpuid(uint32_t *data) static inline void do_cpuid(uint32_t selector, uint32_t *data) { - asm("cpuid" + __asm__ volatile ("cpuid" : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index d7b7056b2..fb0eca6f9 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -56,7 +56,6 @@ /* */ -#include #include #include @@ -138,32 +137,19 @@ extern void xrstor64o(void); #define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM)) -/* DRK: TODO replace opcodes with mnemonics when assembler support available */ - static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) { - __asm__ __volatile__(".short 0x010F\n\t.byte 0xD1" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0)); -} - -static inline void xsave(void *a) { - /* MOD 0x4, operand ECX 0x1 */ - __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x21" :: "a"(XMASK), "d"(0), "c" (a)); + __asm__ __volatile__("xsetbv" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0)); } -static inline void xrstor(void *a) { - /* MOD 0x5, operand ECX 0x1 */ - __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x29" :: "a"(XMASK), "d"(0), "c" (a)); +static inline void xsave(struct x86_fx_thread_state *a) { + __asm__ __volatile__("xsave %0" :"=m" (*a) : "a"(XMASK), "d"(0)); } -static inline void xsave64(void *a) { - /* Out of line call that executes in 64-bit mode on K32 */ - __asm__ __volatile__("call _xsave64o" :: "a"(XMASK), "d"(0), "c" (a)); -} - -static inline void xrstor64(void *a) { - /* Out of line call that executes in 64-bit mode on K32 */ - __asm__ __volatile__("call _xrstor64o" :: "a"(XMASK), "d"(0), "c" (a)); +static inline void xrstor(struct x86_fx_thread_state *a) { + __asm__ __volatile__("xrstor %0" :: "m" (*a), "a"(XMASK), "d"(0)); } +#if DEBUG static inline unsigned short fnstsw(void) { @@ -171,6 +157,7 @@ fnstsw(void) __asm__ volatile("fnstsw %0" : "=ma" (status)); return(status); } +#endif /* * Configure the initial FPU state presented to new threads. @@ -287,13 +274,14 @@ init_fpu(void) static void * fp_state_alloc(void) { - void *ifps = zalloc(ifps_zone); + struct x86_fx_thread_state *ifps = zalloc(ifps_zone); #if DEBUG if (!(ALIGNED(ifps,64))) { panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size); } #endif + bzero(ifps, sizeof(*ifps)); return ifps; } @@ -444,13 +432,14 @@ fpu_set_fxstate( x86_float_state64_t *state; pcb_t pcb; size_t state_size = sizeof(struct x86_fx_thread_state); - boolean_t old_valid; + boolean_t old_valid, fresh_state = FALSE; + if (fp_kind == FP_NO) - return KERN_FAILURE; + return KERN_FAILURE; if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) && !ml_fpu_avx_enabled()) - return KERN_FAILURE; + return KERN_FAILURE; state = (x86_float_state64_t *)tstate; @@ -458,94 +447,101 @@ fpu_set_fxstate( pcb = THREAD_TO_PCB(thr_act); if (state == NULL) { - /* - * new FPU state is 'invalid'. - * Deallocate the fp state if it exists. - */ - simple_lock(&pcb->lock); + /* + * new FPU state is 'invalid'. + * Deallocate the fp state if it exists. + */ + simple_lock(&pcb->lock); ifps = pcb->ifps; pcb->ifps = 0; - simple_unlock(&pcb->lock); + simple_unlock(&pcb->lock); - if (ifps != 0) - fp_state_free(ifps); + if (ifps != 0) { + fp_state_free(ifps); + } } else { - /* - * Valid state. Allocate the fp state if there is none. - */ - new_ifps = 0; - Retry: - simple_lock(&pcb->lock); + /* + * Valid incoming state. Allocate the fp state if there is none. + */ + new_ifps = 0; + Retry: + simple_lock(&pcb->lock); ifps = pcb->ifps; - if (ifps == 0) { - if (new_ifps == 0) { - simple_unlock(&pcb->lock); - new_ifps = fp_state_alloc(); - goto Retry; + if (ifps == 0) { + if (new_ifps == 0) { + simple_unlock(&pcb->lock); + new_ifps = fp_state_alloc(); + goto Retry; + } + ifps = new_ifps; + new_ifps = 0; + pcb->ifps = ifps; + fresh_state = TRUE; + } + + /* + * now copy over the new data. + */ + + old_valid = ifps->fp_valid; + +#if DEBUG || DEVELOPMENT + if ((fresh_state == FALSE) && (old_valid == FALSE) && (thr_act != current_thread())) { + panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act); } - ifps = new_ifps; - new_ifps = 0; - pcb->ifps = ifps; - } - /* - * now copy over the new data. - */ - old_valid = ifps->fp_valid; - -#if DEBUG - if ((old_valid == FALSE) && (thr_act != current_thread())) { - panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act); - } #endif - /* - * Clear any reserved bits in the MXCSR to prevent a GPF - * when issuing an FXRSTOR. - */ + /* + * Clear any reserved bits in the MXCSR to prevent a GPF + * when issuing an FXRSTOR. + */ - state->fpu_mxcsr &= mxcsr_capability_mask; + state->fpu_mxcsr &= mxcsr_capability_mask; - bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size); + bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size); - if (fpu_YMM_present) { - struct x86_avx_thread_state *iavx = (void *) ifps; - uint32_t fpu_nyreg = 0; + if (fpu_YMM_present) { + struct x86_avx_thread_state *iavx = (void *) ifps; + uint32_t fpu_nyreg = 0; - if (f == x86_AVX_STATE32) - fpu_nyreg = 8; - else if (f == x86_AVX_STATE64) - fpu_nyreg = 16; + if (f == x86_AVX_STATE32) + fpu_nyreg = 8; + else if (f == x86_AVX_STATE64) + fpu_nyreg = 16; - if (fpu_nyreg) { - x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; - bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG)); + if (fpu_nyreg) { + x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; + bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG)); + } + + iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32; + /* Sanitize XSAVE header */ + bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd)); + if (fpu_nyreg) + iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87); + else + iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87); + } else { + ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; } + ifps->fp_valid = old_valid; - iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32; - /* Sanitize XSAVE header */ - bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd)); - if (fpu_nyreg) - iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87); - else - iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87); - } - else - ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; - ifps->fp_valid = old_valid; - - if (old_valid == FALSE) { - boolean_t istate = ml_set_interrupts_enabled(FALSE); - ifps->fp_valid = TRUE; - set_ts(); - ml_set_interrupts_enabled(istate); - } - - simple_unlock(&pcb->lock); - - if (new_ifps != 0) - fp_state_free(new_ifps); + if (old_valid == FALSE) { + boolean_t istate = ml_set_interrupts_enabled(FALSE); + ifps->fp_valid = TRUE; + /* If altering the current thread's state, disable FPU */ + if (thr_act == current_thread()) + set_ts(); + + ml_set_interrupts_enabled(istate); + } + + simple_unlock(&pcb->lock); + + if (new_ifps != 0) + fp_state_free(new_ifps); } return KERN_SUCCESS; } @@ -962,7 +958,7 @@ fpSSEexterrflt(void) * Locking not needed on pcb->ifps, * since thread is running. */ - assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); + i386_exception(EXC_ARITHMETIC, EXC_I386_SSEEXTERR, ifps->fx_MXCSR); diff --git a/osfmk/i386/genassym.c b/osfmk/i386/genassym.c index e575adf97..287aa8bd0 100644 --- a/osfmk/i386/genassym.c +++ b/osfmk/i386/genassym.c @@ -54,7 +54,6 @@ * the rights to redistribute these changes. */ -#include #include /* @@ -63,7 +62,6 @@ #include #include #include -#include #include #include #include @@ -86,6 +84,9 @@ #include #include +#undef offsetof +#include + #if CONFIG_DTRACE #define NEED_DTRACE_DEFS #include <../bsd/sys/lockstat.h> @@ -104,16 +105,8 @@ * the values, but we cannot run anything on the target machine. */ -#undef offsetof -#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE)0)->MEMBER) - -#if 0 -#define DECLARE(SYM,VAL) \ - __asm("#DEFINITION#\t.set\t" SYM ",\t%0" : : "n" ((u_int)(VAL))) -#else #define DECLARE(SYM,VAL) \ __asm("#DEFINITION##define " SYM "\t%0" : : "n" ((u_int)(VAL))) -#endif int main( int argc, @@ -131,74 +124,74 @@ main( DECLARE("MAX_CPUS", MAX_CPUS); /* Simple Lock structure */ - DECLARE("SLOCK_ILK", offsetof(usimple_lock_t, interlock)); + DECLARE("SLOCK_ILK", offsetof(usimple_lock_data_t, interlock)); #if MACH_LDEBUG - DECLARE("SLOCK_TYPE", offsetof(usimple_lock_t, lock_type)); - DECLARE("SLOCK_PC", offsetof(usimple_lock_t, debug.lock_pc)); - DECLARE("SLOCK_THREAD", offsetof(usimple_lock_t, debug.lock_thread)); - DECLARE("SLOCK_DURATIONH",offsetof(usimple_lock_t, debug.duration[0])); - DECLARE("SLOCK_DURATIONL",offsetof(usimple_lock_t, debug.duration[1])); + DECLARE("SLOCK_TYPE", offsetof(usimple_lock_data_t, lock_type)); + DECLARE("SLOCK_PC", offsetof(usimple_lock_data_t, debug.lock_pc)); + DECLARE("SLOCK_THREAD", offsetof(usimple_lock_data_t, debug.lock_thread)); + DECLARE("SLOCK_DURATIONH",offsetof(usimple_lock_data_t, debug.duration[0])); + DECLARE("SLOCK_DURATIONL",offsetof(usimple_lock_data_t, debug.duration[1])); DECLARE("USLOCK_TAG", USLOCK_TAG); #endif /* MACH_LDEBUG */ /* Mutex structure */ - DECLARE("MUTEX_OWNER", offsetof(lck_mtx_t *, lck_mtx_owner)); - DECLARE("MUTEX_PTR", offsetof(lck_mtx_t *, lck_mtx_ptr)); - DECLARE("MUTEX_STATE", offsetof(lck_mtx_t *, lck_mtx_state)); + DECLARE("MUTEX_OWNER", offsetof(lck_mtx_t, lck_mtx_owner)); + DECLARE("MUTEX_PTR", offsetof(lck_mtx_t, lck_mtx_ptr)); + DECLARE("MUTEX_STATE", offsetof(lck_mtx_t, lck_mtx_state)); DECLARE("MUTEX_IND", LCK_MTX_TAG_INDIRECT); - DECLARE("MUTEX_PTR", offsetof(lck_mtx_t *, lck_mtx_ptr)); + DECLARE("MUTEX_PTR", offsetof(lck_mtx_t, lck_mtx_ptr)); DECLARE("MUTEX_ASSERT_OWNED", LCK_MTX_ASSERT_OWNED); DECLARE("MUTEX_ASSERT_NOTOWNED",LCK_MTX_ASSERT_NOTOWNED); - DECLARE("GRP_MTX_STAT_UTIL", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt)); - DECLARE("GRP_MTX_STAT_MISS", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt)); - DECLARE("GRP_MTX_STAT_WAIT", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt)); + DECLARE("GRP_MTX_STAT_UTIL", offsetof(lck_grp_t, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt)); + DECLARE("GRP_MTX_STAT_MISS", offsetof(lck_grp_t, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt)); + DECLARE("GRP_MTX_STAT_WAIT", offsetof(lck_grp_t, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt)); /* x86 only */ DECLARE("MUTEX_DESTROYED", LCK_MTX_TAG_DESTROYED); /* Per-mutex statistic element */ - DECLARE("MTX_ACQ_TSC", offsetof(lck_mtx_ext_t *, lck_mtx_stat)); + DECLARE("MTX_ACQ_TSC", offsetof(lck_mtx_ext_t, lck_mtx_stat)); /* Mutex group statistics elements */ - DECLARE("MUTEX_GRP", offsetof(lck_mtx_ext_t *, lck_mtx_grp)); + DECLARE("MUTEX_GRP", offsetof(lck_mtx_ext_t, lck_mtx_grp)); /* * The use of this field is somewhat at variance with the alias. */ - DECLARE("GRP_MTX_STAT_DIRECT_WAIT", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt)); + DECLARE("GRP_MTX_STAT_DIRECT_WAIT", offsetof(lck_grp_t, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt)); - DECLARE("GRP_MTX_STAT_HELD_MAX", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max)); + DECLARE("GRP_MTX_STAT_HELD_MAX", offsetof(lck_grp_t, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max)); /* Reader writer lock types */ DECLARE("RW_SHARED", LCK_RW_TYPE_SHARED); DECLARE("RW_EXCL", LCK_RW_TYPE_EXCLUSIVE); - DECLARE("TH_RECOVER", offsetof(thread_t, recover)); - DECLARE("TH_CONTINUATION", offsetof(thread_t, continuation)); - DECLARE("TH_KERNEL_STACK", offsetof(thread_t, kernel_stack)); - DECLARE("TH_MUTEX_COUNT", offsetof(thread_t, mutex_count)); - DECLARE("TH_WAS_PROMOTED_ON_WAKEUP", offsetof(thread_t, was_promoted_on_wakeup)); - DECLARE("TH_IOTIER_OVERRIDE", offsetof(thread_t, iotier_override)); + DECLARE("TH_RECOVER", offsetof(struct thread, recover)); + DECLARE("TH_CONTINUATION", offsetof(struct thread, continuation)); + DECLARE("TH_KERNEL_STACK", offsetof(struct thread, kernel_stack)); + DECLARE("TH_MUTEX_COUNT", offsetof(struct thread, mutex_count)); + DECLARE("TH_WAS_PROMOTED_ON_WAKEUP", offsetof(struct thread, was_promoted_on_wakeup)); + DECLARE("TH_IOTIER_OVERRIDE", offsetof(struct thread, iotier_override)); - DECLARE("TH_SYSCALLS_MACH", offsetof(thread_t, syscalls_mach)); - DECLARE("TH_SYSCALLS_UNIX", offsetof(thread_t, syscalls_unix)); + DECLARE("TH_SYSCALLS_MACH", offsetof(struct thread, syscalls_mach)); + DECLARE("TH_SYSCALLS_UNIX", offsetof(struct thread, syscalls_unix)); - DECLARE("TASK_VTIMERS", offsetof(struct task *, vtimers)); + DECLARE("TASK_VTIMERS", offsetof(struct task, vtimers)); /* These fields are being added on demand */ - DECLARE("TH_TASK", offsetof(thread_t, task)); - DECLARE("TH_AST", offsetof(thread_t, ast)); - DECLARE("TH_MAP", offsetof(thread_t, map)); - DECLARE("TH_SPF", offsetof(thread_t, machine.specFlags)); - DECLARE("TH_PCB_ISS", offsetof(thread_t, machine.iss)); - DECLARE("TH_PCB_IDS", offsetof(thread_t, machine.ids)); - DECLARE("TH_PCB_FPS", offsetof(thread_t, machine.ifps)); + DECLARE("TH_TASK", offsetof(struct thread, task)); + DECLARE("TH_AST", offsetof(struct thread, ast)); + DECLARE("TH_MAP", offsetof(struct thread, map)); + DECLARE("TH_SPF", offsetof(struct thread, machine.specFlags)); + DECLARE("TH_PCB_ISS", offsetof(struct thread, machine.iss)); + DECLARE("TH_PCB_IDS", offsetof(struct thread, machine.ids)); + DECLARE("TH_PCB_FPS", offsetof(struct thread, machine.ifps)); #if NCOPY_WINDOWS > 0 - DECLARE("TH_COPYIO_STATE", offsetof(thread_t, machine.copyio_state)); + DECLARE("TH_COPYIO_STATE", offsetof(struct thread, machine.copyio_state)); DECLARE("WINDOWS_CLEAN", WINDOWS_CLEAN); #endif - DECLARE("TH_RWLOCK_COUNT", offsetof(thread_t, rwlock_count)); + DECLARE("TH_RWLOCK_COUNT", offsetof(struct thread, rwlock_count)); - DECLARE("MAP_PMAP", offsetof(vm_map_t, pmap)); + DECLARE("MAP_PMAP", offsetof(struct _vm_map, pmap)); #define IEL_SIZE (sizeof(struct i386_exception_link *)) DECLARE("IKS_SIZE", sizeof(struct x86_kernel_state)); @@ -206,40 +199,40 @@ main( /* * KSS_* are offsets from the top of the kernel stack (cpu_kernel_stack) */ - DECLARE("KSS_RBX", offsetof(struct x86_kernel_state *, k_rbx)); - DECLARE("KSS_RSP", offsetof(struct x86_kernel_state *, k_rsp)); - DECLARE("KSS_RBP", offsetof(struct x86_kernel_state *, k_rbp)); - DECLARE("KSS_R12", offsetof(struct x86_kernel_state *, k_r12)); - DECLARE("KSS_R13", offsetof(struct x86_kernel_state *, k_r13)); - DECLARE("KSS_R14", offsetof(struct x86_kernel_state *, k_r14)); - DECLARE("KSS_R15", offsetof(struct x86_kernel_state *, k_r15)); - DECLARE("KSS_RIP", offsetof(struct x86_kernel_state *, k_rip)); + DECLARE("KSS_RBX", offsetof(struct x86_kernel_state, k_rbx)); + DECLARE("KSS_RSP", offsetof(struct x86_kernel_state, k_rsp)); + DECLARE("KSS_RBP", offsetof(struct x86_kernel_state, k_rbp)); + DECLARE("KSS_R12", offsetof(struct x86_kernel_state, k_r12)); + DECLARE("KSS_R13", offsetof(struct x86_kernel_state, k_r13)); + DECLARE("KSS_R14", offsetof(struct x86_kernel_state, k_r14)); + DECLARE("KSS_R15", offsetof(struct x86_kernel_state, k_r15)); + DECLARE("KSS_RIP", offsetof(struct x86_kernel_state, k_rip)); - DECLARE("DS_DR0", offsetof(struct x86_debug_state32 *, dr0)); - DECLARE("DS_DR1", offsetof(struct x86_debug_state32 *, dr1)); - DECLARE("DS_DR2", offsetof(struct x86_debug_state32 *, dr2)); - DECLARE("DS_DR3", offsetof(struct x86_debug_state32 *, dr3)); - DECLARE("DS_DR4", offsetof(struct x86_debug_state32 *, dr4)); - DECLARE("DS_DR5", offsetof(struct x86_debug_state32 *, dr5)); - DECLARE("DS_DR6", offsetof(struct x86_debug_state32 *, dr6)); - DECLARE("DS_DR7", offsetof(struct x86_debug_state32 *, dr7)); - - DECLARE("DS64_DR0", offsetof(struct x86_debug_state64 *, dr0)); - DECLARE("DS64_DR1", offsetof(struct x86_debug_state64 *, dr1)); - DECLARE("DS64_DR2", offsetof(struct x86_debug_state64 *, dr2)); - DECLARE("DS64_DR3", offsetof(struct x86_debug_state64 *, dr3)); - DECLARE("DS64_DR4", offsetof(struct x86_debug_state64 *, dr4)); - DECLARE("DS64_DR5", offsetof(struct x86_debug_state64 *, dr5)); - DECLARE("DS64_DR6", offsetof(struct x86_debug_state64 *, dr6)); - DECLARE("DS64_DR7", offsetof(struct x86_debug_state64 *, dr7)); - - DECLARE("FP_VALID", offsetof(struct x86_fx_thread_state *,fp_valid)); - - DECLARE("SS_FLAVOR", offsetof(x86_saved_state_t *, flavor)); + DECLARE("DS_DR0", offsetof(struct x86_debug_state32, dr0)); + DECLARE("DS_DR1", offsetof(struct x86_debug_state32, dr1)); + DECLARE("DS_DR2", offsetof(struct x86_debug_state32, dr2)); + DECLARE("DS_DR3", offsetof(struct x86_debug_state32, dr3)); + DECLARE("DS_DR4", offsetof(struct x86_debug_state32, dr4)); + DECLARE("DS_DR5", offsetof(struct x86_debug_state32, dr5)); + DECLARE("DS_DR6", offsetof(struct x86_debug_state32, dr6)); + DECLARE("DS_DR7", offsetof(struct x86_debug_state32, dr7)); + + DECLARE("DS64_DR0", offsetof(struct x86_debug_state64, dr0)); + DECLARE("DS64_DR1", offsetof(struct x86_debug_state64, dr1)); + DECLARE("DS64_DR2", offsetof(struct x86_debug_state64, dr2)); + DECLARE("DS64_DR3", offsetof(struct x86_debug_state64, dr3)); + DECLARE("DS64_DR4", offsetof(struct x86_debug_state64, dr4)); + DECLARE("DS64_DR5", offsetof(struct x86_debug_state64, dr5)); + DECLARE("DS64_DR6", offsetof(struct x86_debug_state64, dr6)); + DECLARE("DS64_DR7", offsetof(struct x86_debug_state64, dr7)); + + DECLARE("FP_VALID", offsetof(struct x86_fx_thread_state,fp_valid)); + + DECLARE("SS_FLAVOR", offsetof(x86_saved_state_t, flavor)); DECLARE("SS_32", x86_SAVED_STATE32); DECLARE("SS_64", x86_SAVED_STATE64); -#define R_(x) offsetof(x86_saved_state_t *, ss_32.x) +#define R_(x) offsetof(x86_saved_state_t, ss_32.x) DECLARE("R32_CS", R_(cs)); DECLARE("R32_SS", R_(ss)); DECLARE("R32_DS", R_(ds)); @@ -261,7 +254,7 @@ main( DECLARE("R32_CR2", R_(cr2)); DECLARE("ISS32_SIZE", sizeof (x86_saved_state32_t)); -#define R64_(x) offsetof(x86_saved_state_t *, ss_64.x) +#define R64_(x) offsetof(x86_saved_state_t, ss_64.x) DECLARE("R64_FS", R64_(fs)); DECLARE("R64_GS", R64_(gs)); DECLARE("R64_R8", R64_(r8)); @@ -279,9 +272,6 @@ main( DECLARE("R64_RDX", R64_(rdx)); DECLARE("R64_RSI", R64_(rsi)); DECLARE("R64_RDI", R64_(rdi)); - DECLARE("R64_V_ARG6", R64_(v_arg6)); - DECLARE("R64_V_ARG7", R64_(v_arg7)); - DECLARE("R64_V_ARG8", R64_(v_arg8)); DECLARE("R64_CS", R64_(isf.cs)); DECLARE("R64_SS", R64_(isf.ss)); DECLARE("R64_RSP", R64_(isf.rsp)); @@ -294,7 +284,7 @@ main( DECLARE("ISS64_OFFSET", R64_(isf)); DECLARE("ISS64_SIZE", sizeof (x86_saved_state64_t)); -#define ISF64_(x) offsetof(x86_64_intr_stack_frame_t *, x) +#define ISF64_(x) offsetof(x86_64_intr_stack_frame_t, x) DECLARE("ISF64_TRAPNO", ISF64_(trapno)); DECLARE("ISF64_TRAPFN", ISF64_(trapfn)); DECLARE("ISF64_ERR", ISF64_(err)); @@ -349,109 +339,109 @@ main( DECLARE("SYSCALL_CS", SYSCALL_CS); DECLARE("CPU_THIS", - offsetof(cpu_data_t *, cpu_this)); + offsetof(cpu_data_t, cpu_this)); DECLARE("CPU_ACTIVE_THREAD", - offsetof(cpu_data_t *, cpu_active_thread)); + offsetof(cpu_data_t, cpu_active_thread)); DECLARE("CPU_ACTIVE_STACK", - offsetof(cpu_data_t *, cpu_active_stack)); + offsetof(cpu_data_t, cpu_active_stack)); DECLARE("CPU_KERNEL_STACK", - offsetof(cpu_data_t *, cpu_kernel_stack)); + offsetof(cpu_data_t, cpu_kernel_stack)); DECLARE("CPU_INT_STACK_TOP", - offsetof(cpu_data_t *, cpu_int_stack_top)); + offsetof(cpu_data_t, cpu_int_stack_top)); #if MACH_RT DECLARE("CPU_PREEMPTION_LEVEL", - offsetof(cpu_data_t *, cpu_preemption_level)); + offsetof(cpu_data_t, cpu_preemption_level)); #endif /* MACH_RT */ DECLARE("CPU_HIBERNATE", - offsetof(cpu_data_t *, cpu_hibernate)); + offsetof(cpu_data_t, cpu_hibernate)); DECLARE("CPU_INTERRUPT_LEVEL", - offsetof(cpu_data_t *, cpu_interrupt_level)); + offsetof(cpu_data_t, cpu_interrupt_level)); DECLARE("CPU_NESTED_ISTACK", - offsetof(cpu_data_t *, cpu_nested_istack)); + offsetof(cpu_data_t, cpu_nested_istack)); DECLARE("CPU_NUMBER_GS", - offsetof(cpu_data_t *,cpu_number)); + offsetof(cpu_data_t,cpu_number)); DECLARE("CPU_RUNNING", - offsetof(cpu_data_t *,cpu_running)); + offsetof(cpu_data_t,cpu_running)); DECLARE("CPU_PENDING_AST", - offsetof(cpu_data_t *,cpu_pending_ast)); + offsetof(cpu_data_t,cpu_pending_ast)); DECLARE("CPU_DESC_TABLEP", - offsetof(cpu_data_t *,cpu_desc_tablep)); + offsetof(cpu_data_t,cpu_desc_tablep)); DECLARE("CPU_DESC_INDEX", - offsetof(cpu_data_t *,cpu_desc_index)); + offsetof(cpu_data_t,cpu_desc_index)); DECLARE("CDI_GDT", - offsetof(cpu_desc_index_t *,cdi_gdt)); + offsetof(cpu_desc_index_t,cdi_gdt)); DECLARE("CDI_IDT", - offsetof(cpu_desc_index_t *,cdi_idt)); + offsetof(cpu_desc_index_t,cdi_idt)); DECLARE("CPU_PROCESSOR", - offsetof(cpu_data_t *,cpu_processor)); + offsetof(cpu_data_t,cpu_processor)); DECLARE("CPU_INT_STATE", - offsetof(cpu_data_t *, cpu_int_state)); + offsetof(cpu_data_t, cpu_int_state)); DECLARE("CPU_INT_EVENT_TIME", - offsetof(cpu_data_t *, cpu_int_event_time)); + offsetof(cpu_data_t, cpu_int_event_time)); DECLARE("CPU_TASK_CR3", - offsetof(cpu_data_t *, cpu_task_cr3)); + offsetof(cpu_data_t, cpu_task_cr3)); DECLARE("CPU_ACTIVE_CR3", - offsetof(cpu_data_t *, cpu_active_cr3)); + offsetof(cpu_data_t, cpu_active_cr3)); DECLARE("CPU_KERNEL_CR3", - offsetof(cpu_data_t *, cpu_kernel_cr3)); + offsetof(cpu_data_t, cpu_kernel_cr3)); DECLARE("CPU_TLB_INVALID", - offsetof(cpu_data_t *, cpu_tlb_invalid)); + offsetof(cpu_data_t, cpu_tlb_invalid)); DECLARE("CPU_TASK_MAP", - offsetof(cpu_data_t *, cpu_task_map)); + offsetof(cpu_data_t, cpu_task_map)); DECLARE("TASK_MAP_32BIT", TASK_MAP_32BIT); DECLARE("TASK_MAP_64BIT", TASK_MAP_64BIT); DECLARE("CPU_UBER_USER_GS_BASE", - offsetof(cpu_data_t *, cpu_uber.cu_user_gs_base)); + offsetof(cpu_data_t, cpu_uber.cu_user_gs_base)); DECLARE("CPU_UBER_ISF", - offsetof(cpu_data_t *, cpu_uber.cu_isf)); + offsetof(cpu_data_t, cpu_uber.cu_isf)); DECLARE("CPU_UBER_TMP", - offsetof(cpu_data_t *, cpu_uber.cu_tmp)); + offsetof(cpu_data_t, cpu_uber.cu_tmp)); DECLARE("CPU_NANOTIME", - offsetof(cpu_data_t *, cpu_nanotime)); + offsetof(cpu_data_t, cpu_nanotime)); DECLARE("CPU_DR7", - offsetof(cpu_data_t *, cpu_dr7)); + offsetof(cpu_data_t, cpu_dr7)); - DECLARE("hwIntCnt", offsetof(cpu_data_t *,cpu_hwIntCnt)); + DECLARE("hwIntCnt", offsetof(cpu_data_t,cpu_hwIntCnt)); DECLARE("CPU_ACTIVE_PCID", - offsetof(cpu_data_t *, cpu_active_pcid)); + offsetof(cpu_data_t, cpu_active_pcid)); DECLARE("CPU_PCID_COHERENTP", - offsetof(cpu_data_t *, cpu_pmap_pcid_coherentp)); + offsetof(cpu_data_t, cpu_pmap_pcid_coherentp)); DECLARE("CPU_PCID_COHERENTP_KERNEL", - offsetof(cpu_data_t *, cpu_pmap_pcid_coherentp_kernel)); + offsetof(cpu_data_t, cpu_pmap_pcid_coherentp_kernel)); DECLARE("CPU_PMAP_PCID_ENABLED", - offsetof(cpu_data_t *, cpu_pmap_pcid_enabled)); + offsetof(cpu_data_t, cpu_pmap_pcid_enabled)); #ifdef PCID_STATS DECLARE("CPU_PMAP_USER_RETS", - offsetof(cpu_data_t *, cpu_pmap_user_rets)); + offsetof(cpu_data_t, cpu_pmap_user_rets)); DECLARE("CPU_PMAP_PCID_PRESERVES", - offsetof(cpu_data_t *, cpu_pmap_pcid_preserves)); + offsetof(cpu_data_t, cpu_pmap_pcid_preserves)); DECLARE("CPU_PMAP_PCID_FLUSHES", - offsetof(cpu_data_t *, cpu_pmap_pcid_flushes)); + offsetof(cpu_data_t, cpu_pmap_pcid_flushes)); #endif DECLARE("CPU_TLB_INVALID", - offsetof(cpu_data_t *, cpu_tlb_invalid)); + offsetof(cpu_data_t, cpu_tlb_invalid)); DECLARE("CPU_TLB_INVALID_LOCAL", - offsetof(cpu_data_t *, cpu_tlb_invalid_local)); + offsetof(cpu_data_t, cpu_tlb_invalid_local)); DECLARE("CPU_TLB_INVALID_GLOBAL", - offsetof(cpu_data_t *, cpu_tlb_invalid_global)); + offsetof(cpu_data_t, cpu_tlb_invalid_global)); DECLARE("enaExpTrace", enaExpTrace); DECLARE("enaUsrFCall", enaUsrFCall); DECLARE("enaUsrPhyMp", enaUsrPhyMp); DECLARE("enaDiagSCs", enaDiagSCs); DECLARE("enaDiagEM", enaDiagEM); DECLARE("enaNotifyEM", enaNotifyEM); - DECLARE("dgLock", offsetof(struct diagWork *, dgLock)); - DECLARE("dgFlags", offsetof(struct diagWork *, dgFlags)); - DECLARE("dgMisc1", offsetof(struct diagWork *, dgMisc1)); - DECLARE("dgMisc2", offsetof(struct diagWork *, dgMisc2)); - DECLARE("dgMisc3", offsetof(struct diagWork *, dgMisc3)); - DECLARE("dgMisc4", offsetof(struct diagWork *, dgMisc4)); - DECLARE("dgMisc5", offsetof(struct diagWork *, dgMisc5)); + DECLARE("dgLock", offsetof(struct diagWork, dgLock)); + DECLARE("dgFlags", offsetof(struct diagWork, dgFlags)); + DECLARE("dgMisc1", offsetof(struct diagWork, dgMisc1)); + DECLARE("dgMisc2", offsetof(struct diagWork, dgMisc2)); + DECLARE("dgMisc3", offsetof(struct diagWork, dgMisc3)); + DECLARE("dgMisc4", offsetof(struct diagWork, dgMisc4)); + DECLARE("dgMisc5", offsetof(struct diagWork, dgMisc5)); DECLARE("INTEL_PTE_KERNEL", INTEL_PTE_VALID|INTEL_PTE_WRITE); DECLARE("PDESHIFT", PDESHIFT); @@ -462,11 +452,11 @@ main( (LINEAR_KERNEL_ADDRESS >> PDESHIFT) * sizeof(pt_entry_t)); - DECLARE("TSS_ESP0", offsetof(struct i386_tss *, esp0)); - DECLARE("TSS_SS0", offsetof(struct i386_tss *, ss0)); - DECLARE("TSS_LDT", offsetof(struct i386_tss *, ldt)); - DECLARE("TSS_PDBR", offsetof(struct i386_tss *, cr3)); - DECLARE("TSS_LINK", offsetof(struct i386_tss *, back_link)); + DECLARE("TSS_ESP0", offsetof(struct i386_tss, esp0)); + DECLARE("TSS_SS0", offsetof(struct i386_tss, ss0)); + DECLARE("TSS_LDT", offsetof(struct i386_tss, ldt)); + DECLARE("TSS_PDBR", offsetof(struct i386_tss, cr3)); + DECLARE("TSS_LINK", offsetof(struct i386_tss, back_link)); DECLARE("K_TASK_GATE", ACC_P|ACC_PL_K|ACC_TASK_GATE); DECLARE("K_TRAP_GATE", ACC_P|ACC_PL_K|ACC_TRAP_GATE); @@ -478,52 +468,52 @@ main( /* * usimple_lock fields */ - DECLARE("USL_INTERLOCK", offsetof(usimple_lock_t, interlock)); + DECLARE("USL_INTERLOCK", offsetof(usimple_lock_data_t, interlock)); DECLARE("INTSTACK_SIZE", INTSTACK_SIZE); - DECLARE("KADDR", offsetof(struct boot_args *, kaddr)); - DECLARE("KSIZE", offsetof(struct boot_args *, ksize)); - DECLARE("MEMORYMAP", offsetof(struct boot_args *, MemoryMap)); - DECLARE("DEVICETREEP", offsetof(struct boot_args *, deviceTreeP)); + DECLARE("KADDR", offsetof(struct boot_args, kaddr)); + DECLARE("KSIZE", offsetof(struct boot_args, ksize)); + DECLARE("MEMORYMAP", offsetof(struct boot_args, MemoryMap)); + DECLARE("DEVICETREEP", offsetof(struct boot_args, deviceTreeP)); DECLARE("RNT_TSC_BASE", - offsetof(pal_rtc_nanotime_t *, tsc_base)); + offsetof(pal_rtc_nanotime_t, tsc_base)); DECLARE("RNT_NS_BASE", - offsetof(pal_rtc_nanotime_t *, ns_base)); + offsetof(pal_rtc_nanotime_t, ns_base)); DECLARE("RNT_SCALE", - offsetof(pal_rtc_nanotime_t *, scale)); + offsetof(pal_rtc_nanotime_t, scale)); DECLARE("RNT_SHIFT", - offsetof(pal_rtc_nanotime_t *, shift)); + offsetof(pal_rtc_nanotime_t, shift)); DECLARE("RNT_GENERATION", - offsetof(pal_rtc_nanotime_t *, generation)); + offsetof(pal_rtc_nanotime_t, generation)); /* values from kern/timer.h */ #ifdef __LP64__ - DECLARE("TIMER_ALL", offsetof(struct timer *, all_bits)); + DECLARE("TIMER_ALL", offsetof(struct timer, all_bits)); #else - DECLARE("TIMER_LOW", offsetof(struct timer *, low_bits)); - DECLARE("TIMER_HIGH", offsetof(struct timer *, high_bits)); - DECLARE("TIMER_HIGHCHK", offsetof(struct timer *, high_bits_check)); + DECLARE("TIMER_LOW", offsetof(struct timer, low_bits)); + DECLARE("TIMER_HIGH", offsetof(struct timer, high_bits)); + DECLARE("TIMER_HIGHCHK", offsetof(struct timer, high_bits_check)); #endif DECLARE("TIMER_TSTAMP", - offsetof(struct timer *, tstamp)); + offsetof(struct timer, tstamp)); DECLARE("THREAD_TIMER", - offsetof(struct processor *, processor_data.thread_timer)); + offsetof(struct processor, processor_data.thread_timer)); DECLARE("KERNEL_TIMER", - offsetof(struct processor *, processor_data.kernel_timer)); + offsetof(struct processor, processor_data.kernel_timer)); DECLARE("SYSTEM_TIMER", - offsetof(struct thread *, system_timer)); + offsetof(struct thread, system_timer)); DECLARE("USER_TIMER", - offsetof(struct thread *, user_timer)); + offsetof(struct thread, user_timer)); DECLARE("SYSTEM_STATE", - offsetof(struct processor *, processor_data.system_state)); + offsetof(struct processor, processor_data.system_state)); DECLARE("USER_STATE", - offsetof(struct processor *, processor_data.user_state)); + offsetof(struct processor, processor_data.user_state)); DECLARE("IDLE_STATE", - offsetof(struct processor *, processor_data.idle_state)); + offsetof(struct processor, processor_data.idle_state)); DECLARE("CURRENT_STATE", - offsetof(struct processor *, processor_data.current_state)); + offsetof(struct processor, processor_data.current_state)); DECLARE("OnProc", OnProc); diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index 4213489eb..0a7df1871 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -54,7 +54,6 @@ * the rights to redistribute these changes. */ -#include #include @@ -73,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -192,12 +192,19 @@ physmap_init(void) } * physmapL2 = ALLOCPAGES(NPHYSMAP); uint64_t i; - uint8_t phys_random_L3 = ml_early_random() & 0xFF; + uint8_t phys_random_L3 = early_random() & 0xFF; /* We assume NX support. Mark all levels of the PHYSMAP NX * to avoid granting executability via a single bit flip. */ - assert(cpuid_extfeatures() & CPUID_EXTFEATURE_XD); +#if DEVELOPMENT || DEBUG + uint32_t reg[4]; + do_cpuid(0x80000000, reg); + if (reg[eax] >= 0x80000001) { + do_cpuid(0x80000001, reg); + assert(reg[edx] & CPUID_EXTFEATURE_XD); + } +#endif /* DEVELOPMENT || DEBUG */ for(i = 0; i < NPHYSMAP; i++) { physmapL3[i + phys_random_L3] = @@ -344,27 +351,26 @@ vstart(vm_offset_t boot_args_start) kernelBootArgs, &kernelBootArgs->ksize, &kernelBootArgs->kaddr); - - postcode(VSTART_IDLE_PTS_INIT); - - Idle_PTs_init(); - - first_avail = (vm_offset_t)ID_MAP_VTOP(physfree); - - cpu = 0; - cpu_data_alloc(TRUE); - - /* * Setup boot args given the physical start address. + * Note: PE_init_platform needs to be called before Idle_PTs_init + * because access to the DeviceTree is required to read the + * random seed before generating a random physical map slide. */ kernelBootArgs = (boot_args *) ml_static_ptovirt(boot_args_start); DBG("i386_init(0x%lx) kernelBootArgs=%p\n", (unsigned long)boot_args_start, kernelBootArgs); - PE_init_platform(FALSE, kernelBootArgs); postcode(PE_INIT_PLATFORM_D); + + Idle_PTs_init(); + postcode(VSTART_IDLE_PTS_INIT); + + first_avail = (vm_offset_t)ID_MAP_VTOP(physfree); + + cpu = 0; + cpu_data_alloc(TRUE); } else { /* Switch to kernel's page tables (from the Boot PTs) */ set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); @@ -389,6 +395,11 @@ vstart(vm_offset_t boot_args_start) cpu_datap(cpu)->cpu_int_stack_top); } +void +pstate_trace(void) +{ +} + /* * Cpu initialization. Running virtual, but without MACH VM * set up. @@ -405,6 +416,11 @@ i386_init(void) postcode(I386_INIT_ENTRY); pal_i386_init(); + tsc_init(); + rtclock_early_init(); /* mach_absolute_time() now functionsl */ + + kernel_debug_string("i386_init"); + pstate_trace(); #if CONFIG_MCA /* Initialize machine-check handling */ @@ -420,8 +436,10 @@ i386_init(void) panic_init(); /* Init this in case we need debugger */ /* setup debugging output if one has been chosen */ + kernel_debug_string("PE_init_kprintf"); PE_init_kprintf(FALSE); + kernel_debug_string("kernel_early_bootstrap"); kernel_early_bootstrap(); if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags))) @@ -438,6 +456,7 @@ i386_init(void) } /* setup console output */ + kernel_debug_string("PE_init_printf"); PE_init_printf(FALSE); kprintf("version_variant = %s\n", version_variant); @@ -456,8 +475,9 @@ i386_init(void) /* * debug support for > 4G systems */ - if (!PE_parse_boot_argn("himemory_mode", &vm_himemory_mode, sizeof (vm_himemory_mode))) - vm_himemory_mode = 0; + PE_parse_boot_argn("himemory_mode", &vm_himemory_mode, sizeof (vm_himemory_mode)); + if (vm_himemory_mode != 0) + kprintf("himemory_mode: %d\n", vm_himemory_mode); if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof (fidn))) force_immediate_debugger_NMI = FALSE; @@ -476,8 +496,9 @@ i386_init(void) /* * VM initialization, after this we're using page tables... - * The maximum number of cpus must be set beforehand. + * Thn maximum number of cpus must be set beforehand. */ + kernel_debug_string("i386_vm_init"); i386_vm_init(maxmemtouse, IA32e, kernelBootArgs); /* create the console for verbose or pretty mode */ @@ -485,12 +506,15 @@ i386_init(void) PE_init_platform(TRUE, kernelBootArgs); PE_create_console(); - tsc_init(); + kernel_debug_string("power_management_init"); power_management_init(); processor_bootstrap(); thread_bootstrap(); + pstate_trace(); + kernel_debug_string("machine_startup"); machine_startup(); + pstate_trace(); } static void diff --git a/osfmk/i386/i386_lock.s b/osfmk/i386/i386_lock.s index 70d117605..bceb1559d 100644 --- a/osfmk/i386/i386_lock.s +++ b/osfmk/i386/i386_lock.s @@ -36,7 +36,6 @@ */ #include -#include #include #include #include @@ -50,41 +49,6 @@ #include -/* - * When performance isn't the only concern, it's - * nice to build stack frames... - */ -#define BUILD_STACK_FRAMES (GPROF) - -#if BUILD_STACK_FRAMES - -/* Stack-frame-relative: */ -#define L_PC B_PC -#define L_ARG0 B_ARG0 -#define L_ARG1 B_ARG1 - -#define LEAF_ENTRY(name) \ - Entry(name); \ - FRAME; \ - MCOUNT - -#define LEAF_ENTRY2(n1,n2) \ - Entry(n1); \ - Entry(n2); \ - FRAME; \ - MCOUNT - -#define LEAF_RET \ - EMARF; \ - ret - -#else /* BUILD_STACK_FRAMES */ - -/* Stack-pointer-relative: */ -#define L_PC S_PC -#define L_ARG0 S_ARG0 -#define L_ARG1 S_ARG1 - #define LEAF_ENTRY(name) \ Entry(name) @@ -95,21 +59,16 @@ #define LEAF_RET \ ret -#endif /* BUILD_STACK_FRAMES */ - - /* Non-leaf routines always have a stack frame: */ #define NONLEAF_ENTRY(name) \ Entry(name); \ - FRAME; \ - MCOUNT + FRAME #define NONLEAF_ENTRY2(n1,n2) \ Entry(n1); \ Entry(n2); \ - FRAME; \ - MCOUNT + FRAME #define NONLEAF_RET \ EMARF; \ @@ -286,21 +245,13 @@ * word is loaded/stored to the pointer */ -#define HW_LOCK_REGISTER %rdi -#define LOAD_HW_LOCK_REGISTER -#define HW_LOCK_THREAD_REGISTER %rcx -#define LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER -#define HW_LOCK_MOV_WORD movq -#define HW_LOCK_EXAM_REGISTER %rax - /* * void hw_lock_init(hw_lock_t) * * Initialize a hardware lock. */ LEAF_ENTRY(hw_lock_init) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER) /* clear the lock */ + movq $0, (%rdi) /* clear the lock */ LEAF_RET @@ -310,8 +261,7 @@ LEAF_ENTRY(hw_lock_init) * Initialize a hardware byte lock. */ LEAF_ENTRY(hw_lock_byte_init) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - movb $0, (HW_LOCK_REGISTER) /* clear the lock */ + movb $0, (%rdi) /* clear the lock */ LEAF_RET /* @@ -321,15 +271,14 @@ LEAF_ENTRY(hw_lock_byte_init) * MACH_RT: also return with preemption disabled. */ LEAF_ENTRY(hw_lock_lock) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - LOAD_HW_LOCK_THREAD_REGISTER /* get thread pointer */ + mov %gs:CPU_ACTIVE_THREAD, %rcx /* get thread pointer */ PREEMPTION_DISABLE 1: - mov (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER /* lock locked? */ + mov (%rdi), %rax + test %rax,%rax /* lock locked? */ jne 3f /* branch if so */ - lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ + lock; cmpxchg %rcx,(%rdi) /* try to acquire the HW lock */ jne 3f movl $1,%eax /* In case this was a timeout call */ LEAF_RET /* if yes, then nothing left to do */ @@ -345,14 +294,13 @@ LEAF_ENTRY(hw_lock_lock) */ LEAF_ENTRY(hw_lock_byte_lock) - LOAD_HW_LOCK_REGISTER /* Load lock pointer */ PREEMPTION_DISABLE movl $1, %ecx /* Set lock value */ 1: - movb (HW_LOCK_REGISTER), %al /* Load byte at address */ + movb (%rdi), %al /* Load byte at address */ testb %al,%al /* lock locked? */ jne 3f /* branch if so */ - lock; cmpxchg %cl,(HW_LOCK_REGISTER) /* attempt atomic compare exchange */ + lock; cmpxchg %cl,(%rdi) /* attempt atomic compare exchange */ jne 3f LEAF_RET /* if yes, then nothing left to do */ 3: @@ -367,8 +315,7 @@ LEAF_ENTRY(hw_lock_byte_lock) */ LEAF_ENTRY(hw_lock_to) 1: - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - LOAD_HW_LOCK_THREAD_REGISTER + mov %gs:CPU_ACTIVE_THREAD, %rcx /* * Attempt to grab the lock immediately @@ -376,10 +323,10 @@ LEAF_ENTRY(hw_lock_to) */ PREEMPTION_DISABLE - mov (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER /* lock locked? */ + mov (%rdi), %rax + test %rax,%rax /* lock locked? */ jne 2f /* branch if so */ - lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ + lock; cmpxchg %rcx,(%rdi) /* try to acquire the HW lock */ jne 2f /* branch on failure */ movl $1,%eax LEAF_RET @@ -405,8 +352,8 @@ LEAF_ENTRY(hw_lock_to) mov $(INNER_LOOP_COUNT),%r9 5: PAUSE /* pause for hyper-threading */ - mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER /* spin checking lock value in cache */ - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER + mov (%rdi),%rax /* spin checking lock value in cache */ + test %rax,%rax je 6f /* zero => unlocked, try to grab it */ decq %r9 /* decrement inner loop count */ jnz 5b /* time to check for timeout? */ @@ -429,8 +376,8 @@ LEAF_ENTRY(hw_lock_to) * Here to try to grab the lock that now appears to be free * after contention. */ - LOAD_HW_LOCK_THREAD_REGISTER - lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ + mov %gs:CPU_ACTIVE_THREAD, %rcx + lock; cmpxchg %rcx,(%rdi) /* try to acquire the HW lock */ jne 4b /* no - spin again */ movl $1,%eax /* yes */ pop %r9 @@ -443,8 +390,7 @@ LEAF_ENTRY(hw_lock_to) * MACH_RT: release preemption level. */ LEAF_ENTRY(hw_lock_unlock) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER) /* clear the lock */ + movq $0, (%rdi) /* clear the lock */ PREEMPTION_ENABLE LEAF_RET @@ -456,8 +402,7 @@ LEAF_ENTRY(hw_lock_unlock) */ LEAF_ENTRY(hw_lock_byte_unlock) - LOAD_HW_LOCK_REGISTER /* Load lock pointer */ - movb $0, (HW_LOCK_REGISTER) /* Clear the lock byte */ + movb $0, (%rdi) /* Clear the lock byte */ PREEMPTION_ENABLE LEAF_RET @@ -466,14 +411,13 @@ LEAF_ENTRY(hw_lock_byte_unlock) * MACH_RT: returns with preemption disabled on success. */ LEAF_ENTRY(hw_lock_try) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - LOAD_HW_LOCK_THREAD_REGISTER + mov %gs:CPU_ACTIVE_THREAD, %rcx PREEMPTION_DISABLE - mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER + mov (%rdi),%rax + test %rax,%rax jne 1f - lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ + lock; cmpxchg %rcx,(%rdi) /* try to acquire the HW lock */ jne 1f movl $1,%eax /* success */ @@ -490,9 +434,8 @@ LEAF_ENTRY(hw_lock_try) * N.B. Racy, of course. */ LEAF_ENTRY(hw_lock_held) - LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ - mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER /* check lock value */ - test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER + mov (%rdi),%rax /* check lock value */ + test %rax,%rax movl $1,%ecx cmovne %ecx,%eax /* 0 => unlocked, 1 => locked */ LEAF_RET @@ -523,11 +466,6 @@ LEAF_ENTRY(hw_lock_held) * register and examined */ -#define LCK_RW_REGISTER %rdi -#define LOAD_LCK_RW_REGISTER -#define LCK_RW_FLAGS_REGISTER %eax -#define LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER - #define RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE) /* * void lck_rw_lock_shared(lck_rw_t *) @@ -536,16 +474,15 @@ LEAF_ENTRY(hw_lock_held) Entry(lck_rw_lock_shared) mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ incl TH_RWLOCK_COUNT(%rcx) /* Increment count before atomic CAS */ - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ + mov (%rdi), %eax /* Load state bitfield and interlock */ testl $(RW_LOCK_SHARED_MASK), %eax /* Eligible for fastpath? */ jne 3f movl %eax, %ecx /* original value in %eax for cmpxchgl */ incl %ecx /* Increment reader refcount */ lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f #if CONFIG_DTRACE @@ -557,9 +494,9 @@ Entry(lck_rw_lock_shared) LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point) ret /* - Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER + Fall thru when patched, counting on lock pointer in %rdi */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, %rdi) #endif ret 2: @@ -576,9 +513,8 @@ Entry(lck_rw_lock_shared) * */ Entry(lck_rw_try_lock_shared) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ + mov (%rdi), %eax /* Load state bitfield and interlock */ testl $(LCK_RW_INTERLOCK), %eax jne 2f testl $(RW_TRY_LOCK_SHARED_MASK), %eax @@ -587,7 +523,7 @@ Entry(lck_rw_try_lock_shared) movl %eax, %ecx /* original value in %eax for cmpxchgl */ incl %ecx /* Increment reader refcount */ lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ @@ -603,8 +539,8 @@ Entry(lck_rw_try_lock_shared) */ LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in %rdi */ + LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, %rdi) #endif movl $1, %eax /* return TRUE */ ret @@ -622,21 +558,20 @@ Entry(lck_rw_try_lock_shared) * */ Entry(lck_rw_grab_shared) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ + mov (%rdi), %eax /* Load state bitfield and interlock */ testl $(LCK_RW_INTERLOCK), %eax jne 5f testl $(RW_LOCK_EXCLUSIVE_HELD), %eax jne 3f 2: - movl %eax, %ecx /* original value in %eax for cmpxchgl */ - incl %ecx /* Increment reader refcount */ + movl %eax, %ecx /* original value in %eax for cmpxchgl */ + incl %ecx /* Increment reader refcount */ lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 4f - movl $1, %eax /* return success */ + movl $1, %eax /* return success */ ret 3: testl $(LCK_RW_SHARED_MASK), %eax @@ -644,7 +579,7 @@ Entry(lck_rw_grab_shared) testl $(LCK_RW_PRIV_EXCL), %eax je 2b 4: - xorl %eax, %eax /* return failure */ + xorl %eax, %eax /* return failure */ ret 5: PAUSE @@ -661,16 +596,15 @@ Entry(lck_rw_grab_shared) Entry(lck_rw_lock_exclusive) mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ incl TH_RWLOCK_COUNT(%rcx) /* Increment count before atomic CAS */ - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ + mov (%rdi), %eax /* Load state bitfield, interlock and shared count */ testl $(RW_LOCK_EXCLUSIVE_MASK), %eax /* Eligible for fastpath? */ jne 3f /* no, go slow */ movl %eax, %ecx /* original value in %eax for cmpxchgl */ orl $(LCK_RW_WANT_WRITE), %ecx lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f #if CONFIG_DTRACE @@ -681,8 +615,8 @@ Entry(lck_rw_lock_exclusive) */ LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in %rdi */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, %rdi) #endif ret 2: @@ -702,18 +636,17 @@ Entry(lck_rw_lock_exclusive) * Returns FALSE if the lock is not held on return. */ Entry(lck_rw_try_lock_exclusive) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ + mov (%rdi), %eax /* Load state bitfield, interlock and shared count */ testl $(LCK_RW_INTERLOCK), %eax jne 2f testl $(RW_TRY_LOCK_EXCLUSIVE_MASK), %eax - jne 3f /* can't get it */ + jne 3f /* can't get it */ - movl %eax, %ecx /* original value in %eax for cmpxchgl */ + movl %eax, %ecx /* original value in %eax for cmpxchgl */ orl $(LCK_RW_WANT_WRITE), %ecx lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ @@ -729,8 +662,8 @@ Entry(lck_rw_try_lock_exclusive) */ LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in %rdi */ + LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, %rdi) #endif movl $1, %eax /* return TRUE */ ret @@ -763,9 +696,8 @@ Entry(lck_rw_try_lock_exclusive) * set RW_WANT_UPGRADE and get rid of the read count we hold */ Entry(lck_rw_lock_shared_to_exclusive) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ + mov (%rdi), %eax /* Load state bitfield, interlock and shared count */ testl $(LCK_RW_INTERLOCK), %eax jne 7f testl $(LCK_RW_WANT_UPGRADE), %eax @@ -775,7 +707,7 @@ Entry(lck_rw_lock_shared_to_exclusive) orl $(LCK_RW_WANT_UPGRADE), %ecx /* ask for WANT_UPGRADE */ decl %ecx /* and shed our read count */ lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 7f /* we now own the WANT_UPGRADE */ testl $(LCK_RW_SHARED_MASK), %ecx /* check to see if all of the readers are drained */ @@ -790,8 +722,8 @@ Entry(lck_rw_lock_shared_to_exclusive) */ LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in %rdi */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, %rdi) #endif movl $1, %eax /* return success */ ret @@ -804,7 +736,7 @@ Entry(lck_rw_lock_shared_to_exclusive) andl $(~LCK_W_WAITING), %ecx /* so clear the wait indicator */ 3: lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 7f mov %eax, %esi /* put old flags as second arg */ @@ -829,9 +761,8 @@ rwl_release_error_str: * */ Entry(lck_rw_done) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + mov (%rdi), %eax /* Load state bitfield, interlock and reader count */ testl $(LCK_RW_INTERLOCK), %eax jne 7f /* wait for interlock to clear */ @@ -869,7 +800,7 @@ Entry(lck_rw_done) andl $(~LCK_R_WAITING), %ecx 6: lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 7f mov %eax,%esi /* old flags in %rsi */ @@ -891,9 +822,8 @@ Entry(lck_rw_done) * */ Entry(lck_rw_lock_exclusive_to_shared) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + mov (%rdi), %eax /* Load state bitfield, interlock and reader count */ testl $(LCK_RW_INTERLOCK), %eax jne 6f /* wait for interlock to clear */ @@ -922,7 +852,7 @@ Entry(lck_rw_lock_exclusive_to_shared) andl $(~LCK_R_WAITING), %ecx 5: lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 6f mov %eax,%esi @@ -939,9 +869,8 @@ Entry(lck_rw_lock_exclusive_to_shared) * */ Entry(lck_rw_grab_want) - LOAD_LCK_RW_REGISTER 1: - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + mov (%rdi), %eax /* Load state bitfield, interlock and reader count */ testl $(LCK_RW_INTERLOCK), %eax jne 3f /* wait for interlock to clear */ testl $(LCK_RW_WANT_WRITE), %eax /* want_write has been grabbed by someone else */ @@ -950,7 +879,7 @@ Entry(lck_rw_grab_want) movl %eax, %ecx /* original value in %eax for cmpxchgl */ orl $(LCK_RW_WANT_WRITE), %ecx lock - cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + cmpxchgl %ecx, (%rdi) /* Attempt atomic exchange */ jne 2f /* we now own want_write */ movl $1, %eax /* return success */ @@ -969,8 +898,7 @@ Entry(lck_rw_grab_want) * */ Entry(lck_rw_held_read_or_upgrade) - LOAD_LCK_RW_REGISTER - LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + mov (%rdi), %eax andl $(RW_LOCK_SHARED_OR_UPGRADE_MASK), %eax ret @@ -1014,61 +942,42 @@ Entry(lck_rw_held_read_or_upgrade) #define M_PTR MUTEX_PTR #define M_STATE MUTEX_STATE -#define LMTX_ARG0 %rdi -#define LMTX_ARG1 %rsi -#define LMTX_REG_ORIG %rdi -#define LMTX_REG %rdx -#define LMTX_A_REG %rax -#define LMTX_A_REG32 %eax -#define LMTX_C_REG %rcx -#define LMTX_C_REG32 %ecx -#define LMTX_RET_REG %rax -#define LMTX_RET_REG32 %eax -#define LMTX_LGROUP_REG %r10 -#define LMTX_SSTATE_REG %r11 -#define LOAD_LMTX_REG(arg) mov %rdi, %rdx -#define LMTX_CHK_EXTENDED cmp LMTX_REG, LMTX_REG_ORIG -#define LMTX_ASSERT_OWNED cmp $(MUTEX_ASSERT_OWNED), LMTX_ARG1 #define LMTX_ENTER_EXTENDED \ - mov M_PTR(LMTX_REG), LMTX_REG ; \ - xor LMTX_SSTATE_REG, LMTX_SSTATE_REG ; \ - mov MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG ; \ + mov M_PTR(%rdx), %rdx ; \ + xor %r11, %r11 ; \ + mov MUTEX_GRP(%rdx), %r10 ; \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incq GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG) - -#define LMTX_EXIT_EXTENDED - -#define LMTX_CHK_EXTENDED_EXIT + incq GRP_MTX_STAT_UTIL(%r10) #if LOG_FIRST_MISS_ALONE #define LMTX_UPDATE_MISS \ - test $1, LMTX_SSTATE_REG ; \ + test $1, %r11 ; \ jnz 11f ; \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) ; \ - or $1, LMTX_SSTATE_REG ; \ + incl GRP_MTX_STAT_MISS(%r10) ; \ + or $1, %r11 ; \ 11: #else #define LMTX_UPDATE_MISS \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) + incl GRP_MTX_STAT_MISS(%r10) #endif #if LOG_FIRST_MISS_ALONE #define LMTX_UPDATE_WAIT \ - test $2, LMTX_SSTATE_REG ; \ + test $2, %r11 ; \ jnz 11f ; \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) ; \ - or $2, LMTX_SSTATE_REG ; \ + incl GRP_MTX_STAT_WAIT(%r10) ; \ + or $2, %r11 ; \ 11: #else #define LMTX_UPDATE_WAIT \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) + incl GRP_MTX_STAT_WAIT(%r10) #endif @@ -1080,42 +989,42 @@ Entry(lck_rw_held_read_or_upgrade) */ #define LMTX_UPDATE_DIRECT_WAIT \ LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG) + incl GRP_MTX_STAT_DIRECT_WAIT(%r10) #define LMTX_CALLEXT1(func_name) \ - LMTX_CHK_EXTENDED ; \ + cmp %rdx, %rdi ; \ je 12f ; \ - push LMTX_LGROUP_REG ; \ - push LMTX_SSTATE_REG ; \ -12: push LMTX_REG_ORIG ; \ - push LMTX_REG ; \ - mov LMTX_REG, LMTX_ARG0 ; \ + push %r10 ; \ + push %r11 ; \ +12: push %rdi ; \ + push %rdx ; \ + mov %rdx, %rdi ; \ call EXT(func_name) ; \ - pop LMTX_REG ; \ - pop LMTX_REG_ORIG ; \ - LMTX_CHK_EXTENDED ; \ + pop %rdx ; \ + pop %rdi ; \ + cmp %rdx, %rdi ; \ je 12f ; \ - pop LMTX_SSTATE_REG ; \ - pop LMTX_LGROUP_REG ; \ + pop %r11 ; \ + pop %r10 ; \ 12: #define LMTX_CALLEXT2(func_name, reg) \ - LMTX_CHK_EXTENDED ; \ + cmp %rdx, %rdi ; \ je 12f ; \ - push LMTX_LGROUP_REG ; \ - push LMTX_SSTATE_REG ; \ -12: push LMTX_REG_ORIG ; \ - push LMTX_REG ; \ - mov reg, LMTX_ARG1 ; \ - mov LMTX_REG, LMTX_ARG0 ; \ + push %r10 ; \ + push %r11 ; \ +12: push %rdi ; \ + push %rdx ; \ + mov reg, %rsi ; \ + mov %rdx, %rdi ; \ call EXT(func_name) ; \ - pop LMTX_REG ; \ - pop LMTX_REG_ORIG ; \ - LMTX_CHK_EXTENDED ; \ + pop %rdx ; \ + pop %rdi ; \ + cmp %rdx, %rdi ; \ je 12f ; \ - pop LMTX_SSTATE_REG ; \ - pop LMTX_LGROUP_REG ; \ + pop %r11 ; \ + pop %r10 ; \ 12: @@ -1137,33 +1046,33 @@ Entry(lck_rw_held_read_or_upgrade) */ NONLEAF_ENTRY(lck_mtx_assert) - LOAD_LMTX_REG(B_ARG0) /* Load lock address */ - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG /* Load current thread */ + mov %rdi, %rdx /* Load lock address */ + mov %gs:CPU_ACTIVE_THREAD, %rax /* Load current thread */ - mov M_STATE(LMTX_REG), LMTX_C_REG32 - cmp $(MUTEX_IND), LMTX_C_REG32 /* Is this an indirect mutex? */ + mov M_STATE(%rdx), %ecx + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ jne 0f - mov M_PTR(LMTX_REG), LMTX_REG /* If so, take indirection */ + mov M_PTR(%rdx), %rdx /* If so, take indirection */ 0: - mov M_OWNER(LMTX_REG), LMTX_C_REG /* Load owner */ - LMTX_ASSERT_OWNED + mov M_OWNER(%rdx), %rcx /* Load owner */ + cmp $(MUTEX_ASSERT_OWNED), %rsi jne 2f /* Assert ownership? */ - cmp LMTX_A_REG, LMTX_C_REG /* Current thread match? */ + cmp %rax, %rcx /* Current thread match? */ jne 3f /* no, go panic */ - testl $(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(LMTX_REG) + testl $(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(%rdx) je 3f 1: /* yes, we own it */ NONLEAF_RET 2: - cmp LMTX_A_REG, LMTX_C_REG /* Current thread match? */ + cmp %rax, %rcx /* Current thread match? */ jne 1b /* No, return */ ALIGN_STACK() - LOAD_PTR_ARG1(LMTX_REG) + LOAD_PTR_ARG1(%rdx) LOAD_STRING_ARG0(mutex_assert_owned_str) jmp 4f 3: ALIGN_STACK() - LOAD_PTR_ARG1(LMTX_REG) + LOAD_PTR_ARG1(%rdx) LOAD_STRING_ARG0(mutex_assert_not_owned_str) 4: CALL_PANIC() @@ -1171,7 +1080,7 @@ NONLEAF_ENTRY(lck_mtx_assert) lck_mtx_destroyed: ALIGN_STACK() - LOAD_PTR_ARG1(LMTX_REG) + LOAD_PTR_ARG1(%rdx) LOAD_STRING_ARG0(mutex_interlock_destroyed_str) CALL_PANIC() @@ -1198,70 +1107,69 @@ mutex_interlock_destroyed_str: * lck_mtx_convert_spin() */ NONLEAF_ENTRY(lck_mtx_lock_spin_always) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ jmp Llmls_avoid_check NONLEAF_ENTRY(lck_mtx_lock_spin) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ CHECK_PREEMPTION_LEVEL() Llmls_avoid_check: - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 /* is the interlock or mutex held */ + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* is the interlock or mutex held */ jnz Llmls_slow -Llmls_try: /* no - can't be INDIRECT, DESTROYED or locked */ - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32 +Llmls_try: /* no - can't be INDIRECT, DESTROYED or locked */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_SPIN_MSK), %ecx PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ jne Llmls_busy_disabled - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of interlock */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of interlock */ #if MACH_LDEBUG - test LMTX_A_REG, LMTX_A_REG + test %rax, %rax jz 1f - incl TH_MUTEX_COUNT(LMTX_A_REG) /* lock statistic */ + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ 1: #endif /* MACH_LDEBUG */ - LMTX_CHK_EXTENDED_EXIT /* return with the interlock held and preemption disabled */ leave #if CONFIG_DTRACE LOCKSTAT_LABEL(_lck_mtx_lock_spin_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG above */ - LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx above */ + LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, %rdx) #endif ret Llmls_slow: - test $M_ILOCKED_MSK, LMTX_C_REG32 /* is the interlock held */ - jz Llml_contended /* no, must have been the mutex */ + test $M_ILOCKED_MSK, %ecx /* is the interlock held */ + jz Llml_contended /* no, must have been the mutex */ - cmp $(MUTEX_DESTROYED), LMTX_C_REG32 /* check to see if its marked destroyed */ + cmp $(MUTEX_DESTROYED), %ecx /* check to see if its marked destroyed */ je lck_mtx_destroyed - cmp $(MUTEX_IND), LMTX_C_REG32 /* Is this an indirect mutex */ - jne Llmls_loop /* no... must be interlocked */ + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex */ + jne Llmls_loop /* no... must be interlocked */ LMTX_ENTER_EXTENDED - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_SPIN_MSK), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx + test $(M_SPIN_MSK), %ecx jz Llmls_loop1 LMTX_UPDATE_MISS /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */ Llmls_loop: PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx Llmls_loop1: - test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx jz Llmls_try - test $(M_MLOCKED_MSK), LMTX_C_REG32 - jnz Llml_contended /* mutex owned by someone else, go contend for it */ + test $(M_MLOCKED_MSK), %ecx + jnz Llml_contended /* mutex owned by someone else, go contend for it */ jmp Llmls_loop Llmls_busy_disabled: @@ -1271,87 +1179,86 @@ Llmls_busy_disabled: NONLEAF_ENTRY(lck_mtx_lock) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ CHECK_PREEMPTION_LEVEL() - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 /* is the interlock or mutex held */ + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* is the interlock or mutex held */ jnz Llml_slow -Llml_try: /* no - can't be INDIRECT, DESTROYED or locked */ - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 +Llml_try: /* no - can't be INDIRECT, DESTROYED or locked */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ jne Llml_busy_disabled - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ #if MACH_LDEBUG - test LMTX_A_REG, LMTX_A_REG + test %rax, %rax jz 1f - incl TH_MUTEX_COUNT(LMTX_A_REG) /* lock statistic */ + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ 1: #endif /* MACH_LDEBUG */ - testl $(M_WAITERS_MSK), M_STATE(LMTX_REG) + testl $(M_WAITERS_MSK), M_STATE(%rdx) jz Llml_finish LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) Llml_finish: - andl $(~M_ILOCKED_MSK), M_STATE(LMTX_REG) + andl $(~M_ILOCKED_MSK), M_STATE(%rdx) PREEMPTION_ENABLE - LMTX_CHK_EXTENDED /* is this an extended mutex */ + cmp %rdx, %rdi /* is this an extended mutex */ jne 2f leave #if CONFIG_DTRACE LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG above */ - LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx above */ + LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, %rdx) #endif ret 2: - LMTX_EXIT_EXTENDED leave #if CONFIG_DTRACE LOCKSTAT_LABEL(_lck_mtx_lock_ext_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG above */ - LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx above */ + LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, %rdx) #endif ret Llml_slow: - test $M_ILOCKED_MSK, LMTX_C_REG32 /* is the interlock held */ - jz Llml_contended /* no, must have been the mutex */ + test $M_ILOCKED_MSK, %ecx /* is the interlock held */ + jz Llml_contended /* no, must have been the mutex */ - cmp $(MUTEX_DESTROYED), LMTX_C_REG32 /* check to see if its marked destroyed */ + cmp $(MUTEX_DESTROYED), %ecx /* check to see if its marked destroyed */ je lck_mtx_destroyed - cmp $(MUTEX_IND), LMTX_C_REG32 /* Is this an indirect mutex? */ - jne Llml_loop /* no... must be interlocked */ + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ + jne Llml_loop /* no... must be interlocked */ LMTX_ENTER_EXTENDED - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_SPIN_MSK), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx + test $(M_SPIN_MSK), %ecx jz Llml_loop1 LMTX_UPDATE_MISS /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */ Llml_loop: PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx Llml_loop1: - test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx jz Llml_try - test $(M_MLOCKED_MSK), LMTX_C_REG32 - jnz Llml_contended /* mutex owned by someone else, go contend for it */ + test $(M_MLOCKED_MSK), %ecx + jnz Llml_contended /* mutex owned by someone else, go contend for it */ jmp Llml_loop Llml_busy_disabled: @@ -1360,60 +1267,60 @@ Llml_busy_disabled: Llml_contended: - LMTX_CHK_EXTENDED /* is this an extended mutex */ + cmp %rdx, %rdi /* is this an extended mutex */ je 0f LMTX_UPDATE_MISS 0: LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86) - test LMTX_RET_REG, LMTX_RET_REG + test %rax, %rax jz Llml_acquired /* acquired mutex, interlock held and preemption disabled */ - cmp $1, LMTX_RET_REG /* check for direct wait status */ + cmp $1, %rax /* check for direct wait status */ je 2f - LMTX_CHK_EXTENDED /* is this an extended mutex */ + cmp %rdx, %rdi /* is this an extended mutex */ je 2f LMTX_UPDATE_DIRECT_WAIT 2: - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK), %ecx jnz 6f - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK), LMTX_C_REG32 /* try to take the interlock */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK), %ecx /* try to take the interlock */ PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ jne 5f - test $(M_MLOCKED_MSK), LMTX_C_REG32 /* we've got the interlock and */ + test $(M_MLOCKED_MSK), %ecx /* we've got the interlock and */ jnz 3f - or $(M_MLOCKED_MSK), LMTX_C_REG32 /* the mutex is free... grab it directly */ - mov LMTX_C_REG32, M_STATE(LMTX_REG) + or $(M_MLOCKED_MSK), %ecx /* the mutex is free... grab it directly */ + mov %ecx, M_STATE(%rdx) - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ #if MACH_LDEBUG - test LMTX_A_REG, LMTX_A_REG + test %rax, %rax jz 1f - incl TH_MUTEX_COUNT(LMTX_A_REG) /* lock statistic */ + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ 1: #endif /* MACH_LDEBUG */ Llml_acquired: - testl $(M_WAITERS_MSK), M_STATE(LMTX_REG) + testl $(M_WAITERS_MSK), M_STATE(%rdx) jnz 1f - mov M_OWNER(LMTX_REG), LMTX_A_REG - mov TH_WAS_PROMOTED_ON_WAKEUP(LMTX_A_REG), LMTX_A_REG32 - test LMTX_A_REG32, LMTX_A_REG32 + mov M_OWNER(%rdx), %rax + mov TH_WAS_PROMOTED_ON_WAKEUP(%rax), %eax + test %eax, %eax jz Llml_finish 1: LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) jmp Llml_finish 3: /* interlock held, mutex busy */ - LMTX_CHK_EXTENDED /* is this an extended mutex */ + cmp %rdx, %rdi /* is this an extended mutex */ je 4f LMTX_UPDATE_WAIT 4: @@ -1427,64 +1334,63 @@ Llml_acquired: NONLEAF_ENTRY(lck_mtx_try_lock_spin_always) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ jmp Llmts_avoid_check NONLEAF_ENTRY(lck_mtx_try_lock_spin) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ Llmts_avoid_check: - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 /* is the interlock or mutex held */ + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* is the interlock or mutex held */ jnz Llmts_slow -Llmts_try: /* no - can't be INDIRECT, DESTROYED or locked */ - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG +Llmts_try: /* no - can't be INDIRECT, DESTROYED or locked */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_SPIN_MSK), %rcx PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ jne Llmts_busy_disabled - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ #if MACH_LDEBUG - test LMTX_A_REG, LMTX_A_REG + test %rax, %rax jz 1f - incl TH_MUTEX_COUNT(LMTX_A_REG) /* lock statistic */ + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ 1: #endif /* MACH_LDEBUG */ - LMTX_CHK_EXTENDED_EXIT leave #if CONFIG_DTRACE - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ LOCKSTAT_LABEL(_lck_mtx_try_lock_spin_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG above */ - LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx above */ + LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, %rdx) #endif - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ ret Llmts_slow: - test $(M_ILOCKED_MSK), LMTX_C_REG32 /* is the interlock held */ + test $(M_ILOCKED_MSK), %ecx /* is the interlock held */ jz Llmts_fail /* no, must be held as a mutex */ - cmp $(MUTEX_DESTROYED), LMTX_C_REG32 /* check to see if its marked destroyed */ + cmp $(MUTEX_DESTROYED), %ecx /* check to see if its marked destroyed */ je lck_mtx_destroyed - cmp $(MUTEX_IND), LMTX_C_REG32 /* Is this an indirect mutex? */ + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ jne Llmts_loop1 LMTX_ENTER_EXTENDED Llmts_loop: PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx Llmts_loop1: - test $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32 + test $(M_MLOCKED_MSK | M_SPIN_MSK), %ecx jnz Llmts_fail - test $(M_ILOCKED_MSK), LMTX_C_REG32 + test $(M_ILOCKED_MSK), %ecx jz Llmts_try jmp Llmts_loop @@ -1495,68 +1401,66 @@ Llmts_busy_disabled: NONLEAF_ENTRY(lck_mtx_try_lock) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ - mov M_STATE(LMTX_REG), LMTX_C_REG32 - test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 /* is the interlock or mutex held */ + mov M_STATE(%rdx), %ecx + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* is the interlock or mutex held */ jnz Llmt_slow -Llmt_try: /* no - can't be INDIRECT, DESTROYED or locked */ - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 +Llmt_try: /* no - can't be INDIRECT, DESTROYED or locked */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ jne Llmt_busy_disabled - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ #if MACH_LDEBUG - test LMTX_A_REG, LMTX_A_REG + test %rax, %rax jz 1f - incl TH_MUTEX_COUNT(LMTX_A_REG) /* lock statistic */ + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ 1: #endif /* MACH_LDEBUG */ - LMTX_CHK_EXTENDED_EXIT - - test $(M_WAITERS_MSK), LMTX_C_REG32 + test $(M_WAITERS_MSK), %ecx jz 0f LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) 0: - andl $(~M_ILOCKED_MSK), M_STATE(LMTX_REG) + andl $(~M_ILOCKED_MSK), M_STATE(%rdx) PREEMPTION_ENABLE leave #if CONFIG_DTRACE - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ /* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */ LOCKSTAT_LABEL(_lck_mtx_try_lock_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG from above */ - LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, LMTX_REG) + /* inherit lock pointer in %rdx from above */ + LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, %rdx) #endif - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ ret Llmt_slow: - test $(M_ILOCKED_MSK), LMTX_C_REG32 /* is the interlock held */ + test $(M_ILOCKED_MSK), %ecx /* is the interlock held */ jz Llmt_fail /* no, must be held as a mutex */ - cmp $(MUTEX_DESTROYED), LMTX_C_REG32 /* check to see if its marked destroyed */ + cmp $(MUTEX_DESTROYED), %ecx /* check to see if its marked destroyed */ je lck_mtx_destroyed - cmp $(MUTEX_IND), LMTX_C_REG32 /* Is this an indirect mutex? */ + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ jne Llmt_loop LMTX_ENTER_EXTENDED Llmt_loop: PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx Llmt_loop1: - test $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32 + test $(M_MLOCKED_MSK | M_SPIN_MSK), %ecx jnz Llmt_fail - test $(M_ILOCKED_MSK), LMTX_C_REG32 + test $(M_ILOCKED_MSK), %ecx jz Llmt_try jmp Llmt_loop @@ -1567,36 +1471,35 @@ Llmt_busy_disabled: Llmt_fail: Llmts_fail: - LMTX_CHK_EXTENDED /* is this an extended mutex */ + cmp %rdx, %rdi /* is this an extended mutex */ je 0f LMTX_UPDATE_MISS - LMTX_EXIT_EXTENDED 0: - xor LMTX_RET_REG, LMTX_RET_REG + xor %rax, %rax NONLEAF_RET NONLEAF_ENTRY(lck_mtx_convert_spin) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ - mov M_STATE(LMTX_REG), LMTX_C_REG32 - cmp $(MUTEX_IND), LMTX_C_REG32 /* Is this an indirect mutex? */ + mov M_STATE(%rdx), %ecx + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ jne 0f - mov M_PTR(LMTX_REG), LMTX_REG /* If so, take indirection */ - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_PTR(%rdx), %rdx /* If so, take indirection */ + mov M_STATE(%rdx), %ecx 0: - test $(M_MLOCKED_MSK), LMTX_C_REG32 /* already owned as a mutex, just return */ + test $(M_MLOCKED_MSK), %ecx /* already owned as a mutex, just return */ jnz 2f - test $(M_WAITERS_MSK), LMTX_C_REG32 /* are there any waiters? */ + test $(M_WAITERS_MSK), %ecx /* are there any waiters? */ jz 1f LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx 1: - and $(~(M_ILOCKED_MSK | M_SPIN_MSK)), LMTX_C_REG32 /* convert from spin version to mutex */ - or $(M_MLOCKED_MSK), LMTX_C_REG32 - mov LMTX_C_REG32, M_STATE(LMTX_REG) /* since I own the interlock, I don't need an atomic update */ + and $(~(M_ILOCKED_MSK | M_SPIN_MSK)), %ecx /* convert from spin version to mutex */ + or $(M_MLOCKED_MSK), %ecx + mov %ecx, M_STATE(%rdx) /* since I own the interlock, I don't need an atomic update */ PREEMPTION_ENABLE 2: @@ -1605,58 +1508,58 @@ NONLEAF_ENTRY(lck_mtx_convert_spin) NONLEAF_ENTRY(lck_mtx_unlock) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov %rdi, %rdx /* fetch lock pointer */ Llmu_entry: - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx Llmu_prim: - cmp $(MUTEX_IND), LMTX_C_REG32 /* Is this an indirect mutex? */ + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ je Llmu_ext Llmu_chktype: - test $(M_MLOCKED_MSK), LMTX_C_REG32 /* check for full mutex */ + test $(M_MLOCKED_MSK), %ecx /* check for full mutex */ jz Llmu_unlock Llmu_mutex: - test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ + test $(M_ILOCKED_MSK), %rcx /* have to wait for interlock to clear */ jnz Llmu_busy - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - and $(~M_MLOCKED_MSK), LMTX_C_REG32 /* drop mutex */ - or $(M_ILOCKED_MSK), LMTX_C_REG32 /* pick up interlock */ + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + and $(~M_MLOCKED_MSK), %ecx /* drop mutex */ + or $(M_ILOCKED_MSK), %ecx /* pick up interlock */ PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne Llmu_busy_disabled /* branch on failure to spin loop */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ + jne Llmu_busy_disabled /* branch on failure to spin loop */ Llmu_unlock: - xor LMTX_A_REG, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) - mov LMTX_C_REG, LMTX_A_REG /* keep original state in %ecx for later evaluation */ - and $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), LMTX_A_REG + xor %rax, %rax + mov %rax, M_OWNER(%rdx) + mov %rcx, %rax /* keep original state in %ecx for later evaluation */ + and $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), %rax - test $(M_WAITERS_MSK), LMTX_A_REG32 + test $(M_WAITERS_MSK), %eax jz 2f - dec LMTX_A_REG32 /* decrement waiter count */ + dec %eax /* decrement waiter count */ 2: - mov LMTX_A_REG32, M_STATE(LMTX_REG) /* since I own the interlock, I don't need an atomic update */ + mov %eax, M_STATE(%rdx) /* since I own the interlock, I don't need an atomic update */ #if MACH_LDEBUG /* perform lock statistics after drop to prevent delay */ - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - test LMTX_A_REG, LMTX_A_REG + mov %gs:CPU_ACTIVE_THREAD, %rax + test %rax, %rax jz 1f - decl TH_MUTEX_COUNT(LMTX_A_REG) /* lock statistic */ + decl TH_MUTEX_COUNT(%rax) /* lock statistic */ 1: #endif /* MACH_LDEBUG */ - test $(M_PROMOTED_MSK | M_WAITERS_MSK), LMTX_C_REG32 + test $(M_PROMOTED_MSK | M_WAITERS_MSK), %ecx jz 3f - LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, LMTX_C_REG) + LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, %rcx) 3: PREEMPTION_ENABLE - LMTX_CHK_EXTENDED + cmp %rdx, %rdi jne 4f leave @@ -1664,8 +1567,8 @@ Llmu_unlock: /* Dtrace: LS_LCK_MTX_UNLOCK_RELEASE */ LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG from above */ - LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, LMTX_REG) + /* inherit lock pointer in %rdx from above */ + LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, %rdx) #endif ret 4: @@ -1674,8 +1577,8 @@ Llmu_unlock: /* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */ LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point) ret - /* inherit lock pointer in LMTX_REG from above */ - LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, LMTX_REG) + /* inherit lock pointer in %rdx from above */ + LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, %rdx) #endif ret @@ -1684,88 +1587,88 @@ Llmu_busy_disabled: PREEMPTION_ENABLE Llmu_busy: PAUSE - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx jmp Llmu_mutex Llmu_ext: - mov M_PTR(LMTX_REG), LMTX_REG - mov M_OWNER(LMTX_REG), LMTX_A_REG - mov %gs:CPU_ACTIVE_THREAD, LMTX_C_REG - CHECK_UNLOCK(LMTX_C_REG, LMTX_A_REG) - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_PTR(%rdx), %rdx + mov M_OWNER(%rdx), %rax + mov %gs:CPU_ACTIVE_THREAD, %rcx + CHECK_UNLOCK(%rcx, %rax) + mov M_STATE(%rdx), %ecx jmp Llmu_chktype LEAF_ENTRY(lck_mtx_ilk_unlock) - LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ + mov %rdi, %rdx /* fetch lock pointer - no indirection here */ - andl $(~M_ILOCKED_MSK), M_STATE(LMTX_REG) + andl $(~M_ILOCKED_MSK), M_STATE(%rdx) - PREEMPTION_ENABLE /* need to re-enable preemption */ + PREEMPTION_ENABLE /* need to re-enable preemption */ LEAF_RET LEAF_ENTRY(lck_mtx_lock_grab_mutex) - LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ + mov %rdi, %rdx /* fetch lock pointer - no indirection here */ - mov M_STATE(LMTX_REG), LMTX_C_REG32 + mov M_STATE(%rdx), %ecx - test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 /* can't have the mutex yet */ + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx /* can't have the mutex yet */ jnz 3f - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_MLOCKED_MSK), %ecx PREEMPTION_DISABLE lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ jne 2f /* branch on failure to spin loop */ - mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG - mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + mov %gs:CPU_ACTIVE_THREAD, %rax + mov %rax, M_OWNER(%rdx) /* record owner of mutex */ #if MACH_LDEBUG - test LMTX_A_REG, LMTX_A_REG + test %rax, %rax jz 1f - incl TH_MUTEX_COUNT(LMTX_A_REG) /* lock statistic */ + incl TH_MUTEX_COUNT(%rax) /* lock statistic */ 1: #endif /* MACH_LDEBUG */ - mov $1, LMTX_RET_REG /* return success */ + mov $1, %rax /* return success */ LEAF_RET 2: PREEMPTION_ENABLE 3: - xor LMTX_RET_REG, LMTX_RET_REG /* return failure */ + xor %rax, %rax /* return failure */ LEAF_RET LEAF_ENTRY(lck_mtx_lock_mark_destroyed) - LOAD_LMTX_REG(L_ARG0) + mov %rdi, %rdx 1: - mov M_STATE(LMTX_REG), LMTX_C_REG32 - cmp $(MUTEX_IND), LMTX_C_REG32 /* Is this an indirect mutex? */ + mov M_STATE(%rdx), %ecx + cmp $(MUTEX_IND), %ecx /* Is this an indirect mutex? */ jne 2f - movl $(MUTEX_DESTROYED), M_STATE(LMTX_REG) /* convert to destroyed state */ + movl $(MUTEX_DESTROYED), M_STATE(%rdx) /* convert to destroyed state */ jmp 3f 2: - test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ + test $(M_ILOCKED_MSK), %rcx /* have to wait for interlock to clear */ jnz 5f PREEMPTION_DISABLE - mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ - or $(M_ILOCKED_MSK), LMTX_C_REG32 + mov %rcx, %rax /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK), %ecx lock - cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ - jne 4f /* branch on failure to spin loop */ - movl $(MUTEX_DESTROYED), M_STATE(LMTX_REG) /* convert to destroyed state */ + cmpxchg %ecx, M_STATE(%rdx) /* atomic compare and exchange */ + jne 4f /* branch on failure to spin loop */ + movl $(MUTEX_DESTROYED), M_STATE(%rdx) /* convert to destroyed state */ PREEMPTION_ENABLE 3: - LEAF_RET /* return with M_ILOCKED set */ + LEAF_RET /* return with M_ILOCKED set */ 4: PREEMPTION_ENABLE 5: @@ -1875,42 +1778,6 @@ _mp_enable_preemption_no_check_less_than_zero: #endif /* MACH_RT */ LEAF_RET - -LEAF_ENTRY(i_bit_set) - lock - bts %edi,(%rsi) - LEAF_RET - -LEAF_ENTRY(i_bit_clear) - lock - btr %edi,(%rsi) - LEAF_RET - - -LEAF_ENTRY(bit_lock) -1: - lock - bts %edi,(%rsi) - jb 1b - LEAF_RET - - -LEAF_ENTRY(bit_lock_try) - lock - bts %edi,(%rsi) - jb bit_lock_failed - movl $1, %eax - LEAF_RET -bit_lock_failed: - xorl %eax,%eax - LEAF_RET - -LEAF_ENTRY(bit_unlock) - lock - btr %edi,(%rsi) - LEAF_RET - - /* * Atomic primitives, prototyped in kern/simple_lock.h */ @@ -1922,7 +1789,7 @@ LEAF_ENTRY(hw_atomic_add) 1: #endif movl %esi, %eax /* Load addend */ - lock xaddl %eax, (%rdi) /* Atomic exchange and add */ + lock xaddl %eax, (%rdi) /* Atomic exchange and add */ addl %esi, %eax /* Calculate result */ LEAF_RET @@ -1935,7 +1802,7 @@ LEAF_ENTRY(hw_atomic_sub) #endif negl %esi movl %esi, %eax - lock xaddl %eax, (%rdi) /* Atomic exchange and add */ + lock xaddl %eax, (%rdi) /* Atomic exchange and add */ addl %esi, %eax /* Calculate result */ LEAF_RET @@ -1998,6 +1865,6 @@ LEAF_ENTRY(hw_atomic_and_noret) ud2 1: #endif - lock andl %esi, (%rdi) /* Atomic OR */ + lock andl %esi, (%rdi) /* Atomic OR */ LEAF_RET diff --git a/osfmk/i386/i386_timer.c b/osfmk/i386/i386_timer.c index 55c460461..ae6eb1029 100644 --- a/osfmk/i386/i386_timer.c +++ b/osfmk/i386/i386_timer.c @@ -254,21 +254,6 @@ __unused void *arg) timer_resync_deadlines(); } -/* N.B.: Max leeway values assume 1GHz timebase */ -timer_coalescing_priority_params_t tcoal_prio_params = -{ - /* Deadline scale values for each thread attribute */ - 0, -5, 3, 3, 3, - /* Maximum leeway in abstime for each thread attribute */ - 0ULL, 100*NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC, - /* Deadline scale values for each latency QoS tier */ - {3, 2, 1, -2, -15, -15}, - /* Maximum leeway in abstime for each latency QoS Tier*/ - {1*NSEC_PER_MSEC, 5*NSEC_PER_MSEC, 20*NSEC_PER_MSEC, 75*NSEC_PER_MSEC, - 10*NSEC_PER_SEC, 10*NSEC_PER_SEC}, - /* Signifies that the tier requires rate-limiting */ - {FALSE, FALSE, FALSE, FALSE, TRUE, TRUE} -}; #define TIMER_RESORT_THRESHOLD_ABSTIME (50 * NSEC_PER_MSEC) #if TCOAL_PRIO_STATS @@ -278,111 +263,6 @@ int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; #define TCOAL_PRIO_STAT(x) #endif -/* Select timer coalescing window based on per-task quality-of-service hints */ -static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) { - uint32_t latency_qos; - boolean_t adjusted = FALSE; - task_t ctask = t->task; - - if (ctask) { - latency_qos = proc_get_effective_task_policy(ctask, TASK_POLICY_LATENCY_QOS); - - assert(latency_qos <= NUM_LATENCY_QOS_TIERS); - - if (latency_qos) { - *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; - *tmax = tcoal_prio_params.latency_qos_ns_max[latency_qos - 1]; - *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; - adjusted = TRUE; - } - } - return adjusted; -} - -/* Adjust timer deadlines based on priority of the thread and the - * urgency value provided at timeout establishment. With this mechanism, - * timers are no longer necessarily sorted in order of soft deadline - * on a given timer queue, i.e. they may be differentially skewed. - * In the current scheme, this could lead to fewer pending timers - * processed than is technically possible when the HW deadline arrives. - */ -static void -timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) { - int16_t tpri = cthread->sched_pri; - - if ((urgency & TIMER_CALL_USER_MASK) != 0) { - if (tpri >= BASEPRI_RTQUEUES || - urgency == TIMER_CALL_USER_CRITICAL) { - *tshift = tcoal_prio_params.timer_coalesce_rt_shift; - *tmax = tcoal_prio_params.timer_coalesce_rt_ns_max; - TCOAL_PRIO_STAT(rt_tcl); - } else if ((urgency == TIMER_CALL_USER_BACKGROUND) || - proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG)) { - /* Determine if timer should be subjected to a lower QoS */ - if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) { - if (*tmax > tcoal_prio_params.timer_coalesce_bg_ns_max) { - return; - } else { - *pratelimited = FALSE; - } - } - *tshift = tcoal_prio_params.timer_coalesce_bg_shift; - *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max; - TCOAL_PRIO_STAT(bg_tcl); - } else if (tpri >= MINPRI_KERNEL) { - *tshift = tcoal_prio_params.timer_coalesce_kt_shift; - *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max; - TCOAL_PRIO_STAT(kt_tcl); - } else if (cthread->sched_mode == TH_MODE_FIXED) { - *tshift = tcoal_prio_params.timer_coalesce_fp_shift; - *tmax = tcoal_prio_params.timer_coalesce_fp_ns_max; - TCOAL_PRIO_STAT(fp_tcl); - } else if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) { - TCOAL_PRIO_STAT(qos_tcl); - } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { - *tshift = tcoal_prio_params.timer_coalesce_ts_shift; - *tmax = tcoal_prio_params.timer_coalesce_ts_ns_max; - TCOAL_PRIO_STAT(ts_tcl); - } else { - TCOAL_PRIO_STAT(nc_tcl); - } - } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { - *tshift = tcoal_prio_params.timer_coalesce_bg_shift; - *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max; - TCOAL_PRIO_STAT(bg_tcl); - } else { - *tshift = tcoal_prio_params.timer_coalesce_kt_shift; - *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max; - TCOAL_PRIO_STAT(kt_tcl); - } -} - -int timer_user_idle_level; - -uint64_t -timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) -{ - int32_t tcs_shift = 0; - uint64_t tcs_ns_max = 0; - uint64_t adjval; - uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); - - if (mach_timer_coalescing_enabled && - (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { - timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_ns_max, pratelimited); - - if (tcs_shift >= 0) - adjval = MIN((deadline - now) >> tcs_shift, tcs_ns_max); - else - adjval = MIN((deadline - now) << (-tcs_shift), tcs_ns_max); - /* Apply adjustments derived from "user idle level" heuristic */ - adjval += (adjval * timer_user_idle_level) >> 7; - return adjval; - } else { - return 0; - } -} - boolean_t timer_resort_threshold(uint64_t skew) { if (skew >= TIMER_RESORT_THRESHOLD_ABSTIME) @@ -391,29 +271,6 @@ timer_resort_threshold(uint64_t skew) { return FALSE; } -int -ml_timer_get_user_idle_level(void) { - return timer_user_idle_level; -} - -kern_return_t ml_timer_set_user_idle_level(int ilevel) { - boolean_t do_reeval = FALSE; - - if ((ilevel < 0) || (ilevel > 128)) - return KERN_INVALID_ARGUMENT; - - if (ilevel < timer_user_idle_level) { - do_reeval = TRUE; - } - - timer_user_idle_level = ilevel; - - if (do_reeval) - ml_timer_evaluate(); - - return KERN_SUCCESS; -} - /* * Return the local timer queue for a running processor * else return the boot processor's timer queue. @@ -518,3 +375,29 @@ timer_call_nosync_cpu(int cpu, void (*fn)(void *), void *arg) mp_cpus_call(cpu_to_cpumask(cpu), NOSYNC, fn, arg); } + +static timer_coalescing_priority_params_ns_t tcoal_prio_params_init = +{ + .idle_entry_timer_processing_hdeadline_threshold_ns = 5000ULL * NSEC_PER_USEC, + .interrupt_timer_coalescing_ilat_threshold_ns = 30ULL * NSEC_PER_USEC, + .timer_resort_threshold_ns = 50 * NSEC_PER_MSEC, + .timer_coalesce_rt_shift = 0, + .timer_coalesce_bg_shift = -5, + .timer_coalesce_kt_shift = 3, + .timer_coalesce_fp_shift = 3, + .timer_coalesce_ts_shift = 3, + .timer_coalesce_rt_ns_max = 0ULL, + .timer_coalesce_bg_ns_max = 100 * NSEC_PER_MSEC, + .timer_coalesce_kt_ns_max = 1 * NSEC_PER_MSEC, + .timer_coalesce_fp_ns_max = 1 * NSEC_PER_MSEC, + .timer_coalesce_ts_ns_max = 1 * NSEC_PER_MSEC, + .latency_qos_scale = {3, 2, 1, -2, -15, -15}, + .latency_qos_ns_max ={1 * NSEC_PER_MSEC, 5 * NSEC_PER_MSEC, 20 * NSEC_PER_MSEC, + 75 * NSEC_PER_MSEC, 10000 * NSEC_PER_MSEC, 10000 * NSEC_PER_MSEC}, + .latency_tier_rate_limited = {FALSE, FALSE, FALSE, FALSE, TRUE, TRUE}, +}; + +timer_coalescing_priority_params_ns_t * timer_call_get_priority_params(void) +{ + return &tcoal_prio_params_init; +} diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c index d5c3db7f0..4ec9f5cbe 100644 --- a/osfmk/i386/i386_vm_init.c +++ b/osfmk/i386/i386_vm_init.c @@ -54,7 +54,6 @@ * the rights to redistribute these changes. */ -#include #include @@ -101,6 +100,7 @@ vm_offset_t vm_kernel_top; vm_offset_t vm_kernel_stext; vm_offset_t vm_kernel_etext; vm_offset_t vm_kernel_slide; +vm_offset_t vm_hib_base; vm_offset_t vm_kext_base = VM_MIN_KERNEL_AND_KEXT_ADDRESS; vm_offset_t vm_kext_top = VM_MIN_KERNEL_ADDRESS; @@ -259,6 +259,7 @@ i386_vm_init(uint64_t maxmem, sHIB = segHIBB; eHIB = segHIBB + segSizeHIB; + vm_hib_base = sHIB; /* Zero-padded from ehib to stext if text is 2M-aligned */ stext = segTEXTB; lowGlo.lgStext = stext; diff --git a/osfmk/i386/lapic.h b/osfmk/i386/lapic.h index 54a97c7a5..ca9db2dbf 100644 --- a/osfmk/i386/lapic.h +++ b/osfmk/i386/lapic.h @@ -228,6 +228,7 @@ typedef uint32_t lapic_timer_count_t; #define LAPIC_CMCI_INTERRUPT 0x9 #define LAPIC_PMC_SW_INTERRUPT 0x8 #define LAPIC_PM_INTERRUPT 0x7 +#define LAPIC_KICK_INTERRUPT 0x6 #define LAPIC_PMC_SWI_VECTOR (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_PMC_SW_INTERRUPT) #define LAPIC_TIMER_VECTOR (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT) diff --git a/osfmk/i386/lapic_native.c b/osfmk/i386/lapic_native.c index e9983aa79..b8b62f6c2 100644 --- a/osfmk/i386/lapic_native.c +++ b/osfmk/i386/lapic_native.c @@ -811,6 +811,10 @@ lapic_interrupt(int interrupt_num, x86_saved_state_t *state) #endif /* CONFIG_COUNTERS */ } break; + case LAPIC_KICK_INTERRUPT: + _lapic_end_of_interrupt(); + retval = 1; + break; } return retval; diff --git a/osfmk/i386/lock.h b/osfmk/i386/lock.h index d353be3c7..935742195 100644 --- a/osfmk/i386/lock.h +++ b/osfmk/i386/lock.h @@ -69,216 +69,7 @@ #ifndef _I386_LOCK_H_ #define _I386_LOCK_H_ -#include - -#ifdef __APPLE_API_PRIVATE - -#ifdef MACH_KERNEL_PRIVATE - -#include -#include -#include -#include - -#include -#include - -typedef lck_rw_t lock_t; - -extern unsigned int LockTimeOutTSC; /* Lock timeout in TSC ticks */ -extern unsigned int LockTimeOut; /* Lock timeout in absolute time */ - - -#if defined(__GNUC__) - -/* - * General bit-lock routines. - */ - -#define bit_lock(bit,l) \ - __asm__ volatile(" jmp 1f \n \ - 0: btl %0, %1 \n \ - jb 0b \n \ - 1: lock \n \ - btsl %0,%1 \n \ - jb 0b" : \ - : \ - "r" (bit), "m" (*(volatile int *)(l)) : \ - "memory"); - -#define bit_unlock(bit,l) \ - __asm__ volatile(" lock \n \ - btrl %0,%1" : \ - : \ - "r" (bit), "m" (*(volatile int *)(l))); - -/* - * Set or clear individual bits in a long word. - * The locked access is needed only to lock access - * to the word, not to individual bits. - */ - -#define i_bit_set(bit,l) \ - __asm__ volatile(" lock \n \ - btsl %0,%1" : \ - : \ - "r" (bit), "m" (*(volatile int *)(l))); - -#define i_bit_clear(bit,l) \ - __asm__ volatile(" lock \n \ - btrl %0,%1" : \ - : \ - "r" (bit), "m" (*(volatile int *)(l))); - -static inline char xchgb(volatile char * cp, char new); - -static inline void atomic_incl(volatile long * p, long delta); -static inline void atomic_incs(volatile short * p, short delta); -static inline void atomic_incb(volatile char * p, char delta); - -static inline void atomic_decl(volatile long * p, long delta); -static inline void atomic_decs(volatile short * p, short delta); -static inline void atomic_decb(volatile char * p, char delta); - -static inline long atomic_getl(const volatile long * p); -static inline short atomic_gets(const volatile short * p); -static inline char atomic_getb(const volatile char * p); - -static inline void atomic_setl(volatile long * p, long value); -static inline void atomic_sets(volatile short * p, short value); -static inline void atomic_setb(volatile char * p, char value); - -static inline char xchgb(volatile char * cp, char new) -{ - register char old = new; - - __asm__ volatile (" xchgb %0,%2" : - "=q" (old) : - "0" (new), "m" (*(volatile char *)cp) : "memory"); - return (old); -} - -static inline void atomic_incl(volatile long * p, long delta) -{ - __asm__ volatile (" lock \n \ - add %0,%1" : \ - : \ - "r" (delta), "m" (*(volatile long *)p)); -} - -static inline void atomic_incs(volatile short * p, short delta) -{ - __asm__ volatile (" lock \n \ - addw %0,%1" : \ - : \ - "q" (delta), "m" (*(volatile short *)p)); -} - -static inline void atomic_incb(volatile char * p, char delta) -{ - __asm__ volatile (" lock \n \ - addb %0,%1" : \ - : \ - "q" (delta), "m" (*(volatile char *)p)); -} - -static inline void atomic_decl(volatile long * p, long delta) -{ - __asm__ volatile (" lock \n \ - sub %0,%1" : \ - : \ - "r" (delta), "m" (*(volatile long *)p)); -} - -static inline int atomic_decl_and_test(volatile long * p, long delta) -{ - uint8_t ret; - __asm__ volatile ( - " lock \n\t" - " sub %1,%2 \n\t" - " sete %0" - : "=qm" (ret) - : "r" (delta), "m" (*(volatile long *)p)); - return ret; -} - -static inline void atomic_decs(volatile short * p, short delta) -{ - __asm__ volatile (" lock \n \ - subw %0,%1" : \ - : \ - "q" (delta), "m" (*(volatile short *)p)); -} - -static inline void atomic_decb(volatile char * p, char delta) -{ - __asm__ volatile (" lock \n \ - subb %0,%1" : \ - : \ - "q" (delta), "m" (*(volatile char *)p)); -} - -static inline long atomic_getl(const volatile long * p) -{ - return (*p); -} - -static inline short atomic_gets(const volatile short * p) -{ - return (*p); -} - -static inline char atomic_getb(const volatile char * p) -{ - return (*p); -} - -static inline void atomic_setl(volatile long * p, long value) -{ - *p = value; -} - -static inline void atomic_sets(volatile short * p, short value) -{ - *p = value; -} - -static inline void atomic_setb(volatile char * p, char value) -{ - *p = value; -} - - -#else /* !defined(__GNUC__) */ - -extern void i_bit_set( - int index, - void *addr); - -extern void i_bit_clear( - int index, - void *addr); - -extern void bit_lock( - int index, - void *addr); - -extern void bit_unlock( - int index, - void *addr); - -/* - * All other routines defined in __GNUC__ case lack - * definitions otherwise. - XXX - */ - -#endif /* !defined(__GNUC__) */ - -extern void kernel_preempt_check (void); - -#endif /* MACH_KERNEL_PRIVATE */ - -#endif /* __APLE_API_PRIVATE */ +#warning This header is deprecated. Use instead. #endif /* _I386_LOCK_H_ */ diff --git a/osfmk/i386/locks.h b/osfmk/i386/locks.h index 368b34141..22e1a01f9 100644 --- a/osfmk/i386/locks.h +++ b/osfmk/i386/locks.h @@ -226,4 +226,10 @@ typedef struct __lck_rw_t__ lck_rw_t; #endif #endif +#ifdef MACH_KERNEL_PRIVATE + +extern void kernel_preempt_check (void); + +#endif /* MACH_KERNEL_PRIVATE */ + #endif /* _I386_LOCKS_H_ */ diff --git a/osfmk/i386/locks_i386.c b/osfmk/i386/locks_i386.c index 69b83f1c3..4dd253e01 100644 --- a/osfmk/i386/locks_i386.c +++ b/osfmk/i386/locks_i386.c @@ -63,7 +63,6 @@ #include -#include #include #include #include @@ -126,6 +125,7 @@ decl_simple_lock_data(extern , printf_lock) decl_simple_lock_data(extern , panic_lock) #endif /* USLOCK_DEBUG */ +extern unsigned int not_in_kdp; /* * We often want to know the addresses of the callers @@ -290,6 +290,19 @@ lck_spin_try_lock( return((boolean_t)usimple_lock_try((usimple_lock_t) lck)); } +/* + * Routine: lck_spin_is_acquired + * NOT SAFE: To be used only by kernel debugger to avoid deadlock. + * Returns: TRUE if lock is acquired. + */ +boolean_t +lck_spin_is_acquired(lck_spin_t *lck) { + if (not_in_kdp) { + panic("panic: spinlock acquired check done outside of kernel debugger"); + } + return (lck->interlock != 0)? TRUE : FALSE; +} + /* * Initialize a usimple_lock. * @@ -311,7 +324,7 @@ usimple_lock_init( volatile uint32_t spinlock_owner_cpu = ~0; volatile usimple_lock_t spinlock_timed_out; -static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) { +uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) { uint64_t deadline; uint32_t i; @@ -688,126 +701,6 @@ usl_trace( #endif /* USLOCK_DEBUG */ -/* - * Routine: lock_alloc - * Function: - * Allocate a lock for external users who cannot - * hard-code the structure definition into their - * objects. - * For now just use kalloc, but a zone is probably - * warranted. - */ -lock_t * -lock_alloc( - boolean_t can_sleep, - unsigned short tag, - unsigned short tag1) -{ - lock_t *l; - - if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0) - lock_init(l, can_sleep, tag, tag1); - return(l); -} - -/* - * Routine: lock_free - * Function: - * Free a lock allocated for external users. - * For now just use kfree, but a zone is probably - * warranted. - */ -void -lock_free( - lock_t *l) -{ - kfree(l, sizeof(lock_t)); -} - - -/* - * Routine: lock_init - * Function: - * Initialize a lock; required before use. - * Note that clients declare the "struct lock" - * variables and then initialize them, rather - * than getting a new one from this module. - */ -void -lock_init( - lock_t *l, - boolean_t can_sleep, - __unused unsigned short tag, - __unused unsigned short tag1) -{ - hw_lock_byte_init(&l->lck_rw_interlock); - l->lck_rw_want_write = FALSE; - l->lck_rw_want_upgrade = FALSE; - l->lck_rw_shared_count = 0; - l->lck_rw_can_sleep = can_sleep; - l->lck_rw_tag = tag; - l->lck_rw_priv_excl = 1; - l->lck_r_waiting = l->lck_w_waiting = 0; -} - - -/* - * Sleep locks. These use the same data structure and algorithm - * as the spin locks, but the process sleeps while it is waiting - * for the lock. These work on uniprocessor systems. - */ - -#define DECREMENTER_TIMEOUT 1000000 - -void -lock_write( - register lock_t * l) -{ - lck_rw_lock_exclusive(l); -} - -void -lock_done( - register lock_t * l) -{ - (void) lck_rw_done(l); -} - -void -lock_read( - register lock_t * l) -{ - lck_rw_lock_shared(l); -} - - -/* - * Routine: lock_read_to_write - * Function: - * Improves a read-only lock to one with - * write permission. If another reader has - * already requested an upgrade to a write lock, - * no lock is held upon return. - * - * Returns FALSE if the upgrade *failed*. - */ - -boolean_t -lock_read_to_write( - register lock_t * l) -{ - return lck_rw_lock_shared_to_exclusive(l); -} - -void -lock_write_to_read( - register lock_t * l) -{ - lck_rw_lock_exclusive_to_shared(l); -} - - - /* * Routine: lck_rw_alloc_init */ @@ -1184,21 +1077,9 @@ lck_rw_done_gen( { lck_rw_t *fake_lck; lck_rw_type_t lock_type; - thread_t thread = current_thread(); + thread_t thread; uint32_t rwlock_count; - /* Check if dropping the lock means that we need to unpromote */ - rwlock_count = thread->rwlock_count--; -#if MACH_LDEBUG - if (rwlock_count == 0) { - panic("rw lock count underflow for thread %p", thread); - } -#endif - if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { - /* sched_flags checked without lock, but will be rechecked while clearing */ - lck_rw_clear_promotion(thread); - } - /* * prior_lock state is a snapshot of the 1st word of the * lock in question... we'll fake up a pointer to it @@ -1219,6 +1100,19 @@ lck_rw_done_gen( else lock_type = LCK_RW_TYPE_EXCLUSIVE; + /* Check if dropping the lock means that we need to unpromote */ + thread = current_thread(); + rwlock_count = thread->rwlock_count--; +#if MACH_LDEBUG + if (rwlock_count == 0) { + panic("rw lock count underflow for thread %p", thread); + } +#endif + if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { + /* sched_flags checked without lock, but will be rechecked while clearing */ + lck_rw_clear_promotion(thread); + } + #if CONFIG_DTRACE LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1); #endif @@ -1889,7 +1783,9 @@ lck_mtx_unlock_wakeup_x86 ( thread->sched_flags &= ~TH_SFLAG_PROMOTED; - if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { + if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) { + /* Thread still has a RW lock promotion */ + } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE, thread->sched_pri, DEPRESSPRI, 0, mutex, 0); @@ -1950,8 +1846,8 @@ lck_mtx_lock_acquire_x86( thread_lock(thread); if (thread->sched_pri < priority) { - /* Do not promote into the realtime priority band */ - assert(priority <= MAXPRI_KERNEL); + /* Do not promote past promotion ceiling */ + assert(priority <= MAXPRI_PROMOTE); set_sched_pri(thread, priority); } if (mutex->lck_mtx_promoted == 0) { @@ -2090,8 +1986,8 @@ lck_mtx_lock_wait_x86 ( if (priority < BASEPRI_DEFAULT) priority = BASEPRI_DEFAULT; - /* Do not promote into the realtime priority band */ - priority = MIN(priority, MAXPRI_KERNEL); + /* Do not promote past promotion ceiling */ + priority = MIN(priority, MAXPRI_PROMOTE); if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri) mutex->lck_mtx_pri = priority; @@ -2099,18 +1995,20 @@ lck_mtx_lock_wait_x86 ( if ( (holder = (thread_t)mutex->lck_mtx_owner) && holder->sched_pri < mutex->lck_mtx_pri ) { - /* Assert that we're not altering the priority of a - * MAXPRI_KERNEL or RT prio band thread - */ - assert(holder->sched_pri < MAXPRI_KERNEL); s = splsched(); thread_lock(holder); + /* holder priority may have been bumped by another thread + * before thread_lock was taken + */ if (holder->sched_pri < mutex->lck_mtx_pri) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE, holder->sched_pri, priority, thread_tid(holder), mutex, 0); - + /* Assert that we're not altering the priority of a + * thread above the MAXPRI_PROMOTE band + */ + assert(holder->sched_pri < MAXPRI_PROMOTE); set_sched_pri(holder, priority); if (mutex->lck_mtx_promoted == 0) { diff --git a/osfmk/i386/machdep_call.c b/osfmk/i386/machdep_call.c index 454eb1b02..04bd5fb0d 100644 --- a/osfmk/i386/machdep_call.c +++ b/osfmk/i386/machdep_call.c @@ -52,8 +52,13 @@ const machdep_call_t machdep_call_table[] = { MACHDEP_BSD_CALL_ROUTINE(i386_get_ldt,3), }; const machdep_call_t machdep_call_table64[] = { +#if HYPERVISOR + MACHDEP_CALL_ROUTINE64(hv_task_trap,2), + MACHDEP_CALL_ROUTINE64(hv_thread_trap,2), +#else MACHDEP_CALL_ROUTINE(kern_invalid,0), MACHDEP_CALL_ROUTINE(kern_invalid,0), +#endif MACHDEP_CALL_ROUTINE(kern_invalid,0), MACHDEP_CALL_ROUTINE64(thread_fast_set_cthread_self64,1), MACHDEP_CALL_ROUTINE(kern_invalid,0), diff --git a/osfmk/i386/machdep_call.h b/osfmk/i386/machdep_call.h index 3b6d9fbe9..29a25f4d7 100644 --- a/osfmk/i386/machdep_call.h +++ b/osfmk/i386/machdep_call.h @@ -41,6 +41,7 @@ typedef union { kern_return_t (*args_1)(uint32_t); kern_return_t (*args64_1)(uint64_t); kern_return_t (*args_2)(uint32_t,uint32_t); + kern_return_t (*args64_2)(uint64_t,uint64_t); kern_return_t (*args_3)(uint32_t,uint32_t,uint32_t); kern_return_t (*args_4)(uint32_t,uint32_t,uint32_t,uint32_t); kern_return_t (*args_var)(uint32_t,...); @@ -68,6 +69,11 @@ extern const machdep_call_t machdep_call_table64[]; extern int machdep_call_count; +#if HYPERVISOR +extern kern_return_t hv_task_trap(uint64_t,uint64_t); +extern kern_return_t hv_thread_trap(uint64_t,uint64_t); +#endif + extern kern_return_t thread_fast_set_cthread_self(uint32_t); extern kern_return_t thread_fast_set_cthread_self64(uint64_t); extern kern_return_t thread_set_user_ldt(uint32_t,uint32_t,uint32_t); diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c index d958d8c2e..85999aef6 100644 --- a/osfmk/i386/machine_routines.c +++ b/osfmk/i386/machine_routines.c @@ -37,9 +37,10 @@ #include #include #include +#include #include #include -#include +#include #include #include #include @@ -54,6 +55,7 @@ #if KPC #include #endif +#include #if DEBUG #define DBG(x...) kprintf("DBG: " x) @@ -66,6 +68,7 @@ extern void wakeup(void *); static int max_cpus_initialized = 0; unsigned int LockTimeOut; +unsigned int TLBTimeOut; unsigned int LockTimeOutTSC; unsigned int MutexSpin; uint64_t LastDebuggerEntryAllowance; @@ -277,7 +280,7 @@ boolean_t ml_set_interrupts_enabled(boolean_t enable) __asm__ volatile("sti;nop"); if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) - __asm__ volatile ("int $0xff"); + __asm__ volatile ("int %0" :: "N" (T_PREEMPT)); } else { if (istate) @@ -308,20 +311,25 @@ void ml_cause_interrupt(void) panic("ml_cause_interrupt not defined yet on Intel"); } +/* + * TODO: transition users of this to kernel_thread_start_priority + * ml_thread_policy is an unsupported KPI + */ void ml_thread_policy( thread_t thread, __unused unsigned policy_id, unsigned policy_info) { if (policy_info & MACHINE_NETWORK_WORKLOOP) { - spl_t s = splsched(); + thread_precedence_policy_data_t info; + __assert_only kern_return_t kret; - thread_lock(thread); + info.importance = 1; - set_priority(thread, thread->priority + 1); - - thread_unlock(thread); - splx(s); + kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY, + (thread_policy_t)&info, + THREAD_PRECEDENCE_POLICY_COUNT); + assert(kret == KERN_SUCCESS); } } @@ -490,6 +498,12 @@ ml_processor_register( /* fix the CPU id */ this_cpu_datap->cpu_id = cpu_id; + /* allocate and initialize other per-cpu structures */ + if (!boot_cpu) { + mp_cpus_call_cpu_init(cpunum); + prng_cpu_init(cpunum); + } + /* output arg */ *processor_out = this_cpu_datap->cpu_processor; @@ -619,6 +633,20 @@ ml_init_lock_timeout(void) LockTimeOut = (uint32_t) abstime; LockTimeOutTSC = (uint32_t) tmrCvt(abstime, tscFCvtn2t); + /* + * TLBTimeOut dictates the TLB flush timeout period. It defaults to + * LockTimeOut but can be overriden separately. In particular, a + * zero value inhibits the timeout-panic and cuts a trace evnt instead + * - see pmap_flush_tlbs(). + */ + if (PE_parse_boot_argn("tlbto_us", &slto, sizeof (slto))) { + default_timeout_ns = slto * NSEC_PER_USEC; + nanoseconds_to_absolutetime(default_timeout_ns, &abstime); + TLBTimeOut = (uint32_t) abstime; + } else { + TLBTimeOut = LockTimeOut; + } + if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) { if (mtxspin > USEC_PER_SEC>>4) mtxspin = USEC_PER_SEC>>4; @@ -799,3 +827,38 @@ boolean_t ml_timer_forced_evaluation(void) { return ml_timer_evaluation_in_progress; } + +/* 32-bit right-rotate n bits */ +static inline uint32_t ror32(uint32_t val, const unsigned int n) +{ + __asm__ volatile("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n)); + return val; +} + +void +ml_entropy_collect(void) +{ + uint32_t tsc_lo, tsc_hi; + uint32_t *ep; + + assert(cpu_number() == master_cpu); + + /* update buffer pointer cyclically */ + if (EntropyData.index_ptr - EntropyData.buffer == ENTROPY_BUFFER_SIZE) + ep = EntropyData.index_ptr = EntropyData.buffer; + else + ep = EntropyData.index_ptr++; + + rdtsc_nofence(tsc_lo, tsc_hi); + *ep = ror32(*ep, 9) ^ tsc_lo; +} + +void +ml_gpu_stat_update(uint64_t gpu_ns_delta) { + current_thread()->machine.thread_gpu_ns += gpu_ns_delta; +} + +uint64_t +ml_gpu_stat(thread_t t) { + return t->machine.thread_gpu_ns; +} diff --git a/osfmk/i386/machine_routines.h b/osfmk/i386/machine_routines.h index eeb451caf..eabfb5c87 100644 --- a/osfmk/i386/machine_routines.h +++ b/osfmk/i386/machine_routines.h @@ -78,7 +78,9 @@ void ml_install_interrupt_handler( IOInterruptHandler handler, void *refCon); -void ml_get_timebase(unsigned long long *timestamp); +void ml_entropy_collect(void); + +uint64_t ml_get_timebase(); void ml_init_lock_timeout(void); void ml_init_delay_spin_threshold(int); @@ -120,25 +122,6 @@ extern void ml_cpu_down(void); void bzero_phys_nc( addr64_t phys_address, uint32_t length); -#define NUM_LATENCY_QOS_TIERS (6) -typedef struct { - int32_t timer_coalesce_rt_shift; - int32_t timer_coalesce_bg_shift; - int32_t timer_coalesce_kt_shift; - int32_t timer_coalesce_fp_shift; - int32_t timer_coalesce_ts_shift; - - uint64_t timer_coalesce_rt_ns_max; - uint64_t timer_coalesce_bg_ns_max; - uint64_t timer_coalesce_kt_ns_max; - uint64_t timer_coalesce_fp_ns_max; - uint64_t timer_coalesce_ts_ns_max; - - uint32_t latency_qos_scale[NUM_LATENCY_QOS_TIERS]; - uint64_t latency_qos_ns_max[NUM_LATENCY_QOS_TIERS]; - boolean_t latency_tier_rate_limited[NUM_LATENCY_QOS_TIERS]; -} timer_coalescing_priority_params_t; -extern timer_coalescing_priority_params_t tcoal_prio_params; extern uint32_t interrupt_timer_coalescing_enabled; extern uint32_t idle_entry_timer_processing_hdeadline_threshold; @@ -348,9 +331,9 @@ void timer_queue_expire_local(void*); void timer_queue_expire_rescan(void*); void ml_timer_evaluate(void); boolean_t ml_timer_forced_evaluation(void); -int ml_timer_get_user_idle_level(void); -kern_return_t ml_timer_set_user_idle_level(int); +void ml_gpu_stat_update(uint64_t); +uint64_t ml_gpu_stat(thread_t); boolean_t ml_recent_wake(void); #endif /* XNU_KERNEL_PRIVATE */ #endif /* _I386_MACHINE_ROUTINES_H_ */ diff --git a/osfmk/i386/machine_task.c b/osfmk/i386/machine_task.c index 3c2d93ffd..342f123f5 100644 --- a/osfmk/i386/machine_task.c +++ b/osfmk/i386/machine_task.c @@ -58,6 +58,10 @@ #include #include +#if HYPERVISOR +#include +#endif + extern zone_t ids_zone; kern_return_t @@ -240,6 +244,13 @@ machine_task_terminate(task_t task) user_ldt_t user_ldt; void *task_debug; +#if HYPERVISOR + if (task->hv_task_target) { + hv_callbacks.task_destroy(task->hv_task_target); + task->hv_task_target = NULL; + } +#endif + user_ldt = task->i386_ldt; if (user_ldt != 0) { task->i386_ldt = 0; diff --git a/osfmk/i386/misc_protos.h b/osfmk/i386/misc_protos.h index 8bba95cda..708404a98 100644 --- a/osfmk/i386/misc_protos.h +++ b/osfmk/i386/misc_protos.h @@ -152,6 +152,8 @@ extern void act_machine_switch_pcb(thread_t old, thread_t new); #define FULL_SLAVE_INIT (NULL) #define FAST_SLAVE_INIT ((void *)(uintptr_t)1) -uint64_t ml_early_random(void); void cpu_pmc_control(void *); + +extern void pstate_trace(void); + #endif /* _I386_MISC_PROTOS_H_ */ diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index 50c755b54..ec0cda52c 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -50,12 +50,14 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -167,10 +169,11 @@ volatile long NMI_count = 0; extern void NMI_cpus(void); static void mp_cpus_call_init(void); -static void mp_cpus_call_cpu_init(void); static void mp_cpus_call_action(void); static void mp_call_PM(void); +static boolean_t mp_cpus_call_wait_timeout = FALSE; + char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init /* PAL-related routines */ @@ -239,7 +242,7 @@ smp_init(void) DBGLOG_CPU_INIT(master_cpu); mp_cpus_call_init(); - mp_cpus_call_cpu_init(); + mp_cpus_call_cpu_init(master_cpu); if (PE_parse_boot_argn("TSC_sync_margin", &TSC_sync_margin, sizeof(TSC_sync_margin))) { @@ -487,6 +490,9 @@ MP_EVENT_NAME_DECL(); #endif /* MP_DEBUG */ +/* + * Note: called with NULL state when polling for TLB flush and cross-calls. + */ int cpu_signal_handler(x86_saved_state_t *regs) { @@ -509,7 +515,7 @@ cpu_signal_handler(x86_saved_state_t *regs) do { #if MACH_KDP - if (i_bit(MP_KDP, my_word) && regs != NULL) { + if (i_bit(MP_KDP, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_KDP); i_bit_clear(MP_KDP, my_word); /* Ensure that the i386_kernel_state at the base of the @@ -529,10 +535,6 @@ cpu_signal_handler(x86_saved_state_t *regs) DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH); i_bit_clear(MP_TLB_FLUSH, my_word); pmap_update_interrupt(); - } else if (i_bit(MP_AST, my_word)) { - DBGLOG(cpu_handle,my_cpu,MP_AST); - i_bit_clear(MP_AST, my_word); - ast_check(cpu_to_processor(my_cpu)); } else if (i_bit(MP_RENDEZVOUS, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS); i_bit_clear(MP_RENDEZVOUS, my_word); @@ -554,15 +556,24 @@ cpu_signal_handler(x86_saved_state_t *regs) i_bit_clear(MP_CALL_PM, my_word); mp_call_PM(); } + if (regs == NULL) { + /* Called to poll only for cross-calls and TLB flush */ + break; + } else if (i_bit(MP_AST, my_word)) { + DBGLOG(cpu_handle,my_cpu,MP_AST); + i_bit_clear(MP_AST, my_word); + ast_check(cpu_to_processor(my_cpu)); + } } while (*my_word); return 0; } +extern void kprintf_break_lock(void); static int NMIInterruptHandler(x86_saved_state_t *regs) { - void *stackptr; + void *stackptr; if (panic_active() && !panicDebugging) { if (pmsafe_debug) @@ -577,24 +588,46 @@ NMIInterruptHandler(x86_saved_state_t *regs) __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); if (cpu_number() == debugger_cpu) - goto NMExit; + goto NMExit; if (spinlock_timed_out) { char pstr[192]; snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu); panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); + } else if (mp_cpus_call_wait_timeout) { + char pstr[192]; + snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor, this CPU timed-out during cross-call\n", cpu_number()); + panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); } else if (pmap_tlb_flush_timeout == TRUE) { char pstr[128]; snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid); panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs); - } + } #if MACH_KDP if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); current_cpu_datap()->cpu_NMI_acknowledged = TRUE; i_bit_clear(MP_KDP, ¤t_cpu_datap()->cpu_signals); - mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active()); + if (pmap_tlb_flush_timeout || + spinlock_timed_out || + mp_cpus_call_wait_timeout || + panic_active()) { + mp_kdp_wait(FALSE, TRUE); + } else if (virtualized && (debug_boot_arg & DB_NMI)) { + /* + * Under a VMM with the debug boot-arg set, drop into kdp. + * Since an NMI is involved, there's a risk of contending with + * a panic. And side-effects of NMIs may result in entry into, + * and continuing from, the debugger being unreliable. + */ + kprintf_break_lock(); + kprintf("Debugger entry requested by NMI\n"); + kdp_i386_trap(T_DEBUG, saved_state64(regs), 0, 0); + printf("Debugger entry requested by NMI\n"); + } else { + mp_kdp_wait(FALSE, FALSE); + } if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); #endif @@ -769,14 +802,14 @@ i386_active_cpus(void) * Helper function called when busy-waiting: panic if too long * a TSC-based time has elapsed since the start of the spin. */ -static void -mp_spin_timeout_check(uint64_t tsc_start, const char *msg) +static boolean_t +mp_spin_timeout(uint64_t tsc_start) { uint64_t tsc_timeout; cpu_pause(); if (machine_timeout_suspended()) - return; + return FALSE; /* * The timeout is 4 * the spinlock timeout period @@ -785,8 +818,38 @@ mp_spin_timeout_check(uint64_t tsc_start, const char *msg) */ tsc_timeout = disable_serial_output ? (uint64_t) LockTimeOutTSC << 2 : (uint64_t) LockTimeOutTSC << 4; - if (rdtsc64() > tsc_start + tsc_timeout) - panic("%s: spin timeout", msg); + return (rdtsc64() > tsc_start + tsc_timeout); +} + +/* + * Helper function to take a spinlock while ensuring that incoming IPIs + * are still serviced if interrupts are masked while we spin. + */ +static boolean_t +mp_safe_spin_lock(usimple_lock_t lock) +{ + if (ml_get_interrupts_enabled()) { + simple_lock(lock); + return TRUE; + } else { + uint64_t tsc_spin_start = rdtsc64(); + while (!simple_lock_try(lock)) { + cpu_signal_handler(NULL); + if (mp_spin_timeout(tsc_spin_start)) { + uint32_t lock_cpu; + uintptr_t lowner = (uintptr_t) + lock->interlock.lock_data; + spinlock_timed_out = lock; + lock_cpu = spinlock_timeout_NMI(lowner); + panic("mp_safe_spin_lock() timed out," + " lock: %p, owner thread: 0x%lx," + " current_thread: %p, owner on CPU 0x%x", + lock, lowner, + current_thread(), lock_cpu); + } + } + return FALSE; + } } /* @@ -822,8 +885,8 @@ mp_rendezvous_action(void) /* poll for pesky tlb flushes if interrupts disabled */ if (!intrs_enabled) handle_pending_TLB_flushes(); - mp_spin_timeout_check(tsc_spin_start, - "mp_rendezvous_action() entry"); + if (mp_spin_timeout(tsc_spin_start)) + panic("mp_rendezvous_action() entry"); } /* action function */ @@ -836,8 +899,8 @@ mp_rendezvous_action(void) while (mp_rv_exit < mp_rv_ncpus) { if (!intrs_enabled) handle_pending_TLB_flushes(); - mp_spin_timeout_check(tsc_spin_start, - "mp_rendezvous_action() exit"); + if (mp_spin_timeout(tsc_spin_start)) + panic("mp_rendezvous_action() exit"); } /* teardown function */ @@ -867,7 +930,7 @@ mp_rendezvous(void (*setup_func)(void *), } /* obtain rendezvous lock */ - simple_lock(&mp_rv_lock); + (void) mp_safe_spin_lock(&mp_rv_lock); /* set static function pointers */ mp_rv_setup_func = setup_func; @@ -883,7 +946,7 @@ mp_rendezvous(void (*setup_func)(void *), * signal other processors, which will call mp_rendezvous_action() * with interrupts disabled */ - simple_lock(&x86_topo_lock); + (void) mp_safe_spin_lock(&x86_topo_lock); mp_rv_ncpus = i386_active_cpus(); i386_signal_cpus(MP_RENDEZVOUS, ASYNC); simple_unlock(&x86_topo_lock); @@ -898,7 +961,8 @@ mp_rendezvous(void (*setup_func)(void *), */ tsc_spin_start = rdtsc64(); while (mp_rv_complete < mp_rv_ncpus) { - mp_spin_timeout_check(tsc_spin_start, "mp_rendezvous()"); + if (mp_spin_timeout(tsc_spin_start)) + panic("mp_rendezvous() timeout"); } /* Tidy up */ @@ -956,7 +1020,7 @@ typedef struct { void (*func)(void *,void *); /* routine to call */ void *arg0; /* routine's 1st arg */ void *arg1; /* routine's 2nd arg */ - volatile long *countp; /* completion counter */ + cpumask_t *maskp; /* completion response mask */ } mp_call_t; @@ -979,12 +1043,28 @@ mp_call_head_lock(mp_call_queue_t *cqp) return intrs_enabled; } +void +mp_cpus_NMIPI(cpumask_t cpu_mask) { + unsigned int cpu, cpu_bit; + uint64_t deadline; + + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + if (cpu_mask & cpu_bit) + cpu_NMI_interrupt(cpu); + } + deadline = mach_absolute_time() + (LockTimeOut); + while (mach_absolute_time() < deadline) + cpu_pause(); +} + +#if MACH_ASSERT static inline boolean_t mp_call_head_is_locked(mp_call_queue_t *cqp) { return !ml_get_interrupts_enabled() && hw_lock_held((hw_lock_t)&cqp->lock); } +#endif static inline void mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled) @@ -1050,20 +1130,16 @@ mp_cpus_call_init(void) } /* - * Called by each processor to add call buffers to the free list + * Called at processor registration to add call buffers to the free list * and to initialize the per-cpu call queue. - * Also called but ignored on slave processors on re-start/wake. */ -static void -mp_cpus_call_cpu_init(void) +void +mp_cpus_call_cpu_init(int cpu) { int i; - mp_call_queue_t *cqp = &mp_cpus_call_head[cpu_number()]; + mp_call_queue_t *cqp = &mp_cpus_call_head[cpu]; mp_call_t *callp; - if (cqp->queue.next != NULL) - return; /* restart/wake case: called already */ - simple_lock_init(&cqp->lock, 0); queue_init(&cqp->queue); for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) { @@ -1071,7 +1147,7 @@ mp_cpus_call_cpu_init(void) mp_call_free(callp); } - DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number()); + DBG("mp_cpus_call_init(%d) done\n", cpu); } /* @@ -1097,12 +1173,12 @@ mp_cpus_call_action(void) mp_call_head_unlock(cqp, intrs_enabled); KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL_ACTION, - call.func, call.arg0, call.arg1, call.countp, 0); + call.func, call.arg0, call.arg1, call.maskp, 0); call.func(call.arg0, call.arg1); (void) mp_call_head_lock(cqp); } - if (call.countp != NULL) - atomic_incl(call.countp, 1); + if (call.maskp != NULL) + i_bit_set(cpu_number(), call.maskp); } mp_call_head_unlock(cqp, intrs_enabled); } @@ -1139,8 +1215,8 @@ mp_cpus_call( static void mp_cpus_call_wait(boolean_t intrs_enabled, - long mp_cpus_signals, - volatile long *mp_cpus_calls) + cpumask_t cpus_called, + cpumask_t *cpus_responded) { mp_call_queue_t *cqp; uint64_t tsc_spin_start; @@ -1148,14 +1224,22 @@ mp_cpus_call_wait(boolean_t intrs_enabled, cqp = &mp_cpus_call_head[cpu_number()]; tsc_spin_start = rdtsc64(); - while (*mp_cpus_calls < mp_cpus_signals) { + while (*cpus_responded != cpus_called) { if (!intrs_enabled) { /* Sniffing w/o locking */ if (!queue_empty(&cqp->queue)) mp_cpus_call_action(); - handle_pending_TLB_flushes(); + cpu_signal_handler(NULL); + } + if (mp_spin_timeout(tsc_spin_start)) { + cpumask_t cpus_unresponsive; + + mp_cpus_call_wait_timeout = TRUE; + cpus_unresponsive = cpus_called & ~(*cpus_responded); + mp_cpus_NMIPI(cpus_unresponsive); + panic("mp_cpus_call_wait() timeout, cpus: 0x%lx", + cpus_unresponsive); } - mp_spin_timeout_check(tsc_spin_start, "mp_cpus_call_wait()"); } } @@ -1174,9 +1258,10 @@ mp_cpus_call1( boolean_t call_self = FALSE; cpumask_t cpus_called = 0; cpumask_t cpus_notcalled = 0; - long mp_cpus_signals = 0; - volatile long mp_cpus_calls = 0; + cpumask_t cpus_responded = 0; + long cpus_call_count = 0; uint64_t tsc_spin_start; + boolean_t topo_lock; KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL | DBG_FUNC_START, @@ -1196,23 +1281,30 @@ mp_cpus_call1( /* * Queue the call for each non-local requested cpu. - * The topo lock is not taken. Instead we sniff the cpu_running state - * and then re-check it after taking the call lock. A cpu being taken - * offline runs the action function after clearing the cpu_running. + * This is performed under the topo lock to prevent changes to + * cpus online state and to prevent concurrent rendezvouses -- + * although an exception is made if we're calling only the master + * processor since that always remains active. Note: this exception + * is expected for longterm timer nosync cross-calls to the master cpu. */ - mp_disable_preemption(); /* interrupts may be enabled */ - tsc_spin_start = rdtsc64(); + mp_disable_preemption(); + intrs_enabled = ml_get_interrupts_enabled(); + topo_lock = (cpus != cpu_to_cpumask(master_cpu)); + if (topo_lock) { + ml_set_interrupts_enabled(FALSE); + (void) mp_safe_spin_lock(&x86_topo_lock); + } for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { if (((cpu_to_cpumask(cpu) & cpus) == 0) || !cpu_datap(cpu)->cpu_running) continue; + tsc_spin_start = rdtsc64(); if (cpu == (cpu_t) cpu_number()) { /* * We don't IPI ourself and if calling asynchronously, * we defer our call until we have signalled all others. */ call_self = TRUE; - cpus_called |= cpu_to_cpumask(cpu); if (mode == SYNC && action_func != NULL) { KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL_LOCAL, @@ -1227,57 +1319,57 @@ mp_cpus_call1( */ mp_call_t *callp = NULL; mp_call_queue_t *cqp = &mp_cpus_call_head[cpu]; + boolean_t intrs_inner; queue_call: if (callp == NULL) callp = mp_call_alloc(); - intrs_enabled = mp_call_head_lock(cqp); - if (!cpu_datap(cpu)->cpu_running) { - mp_call_head_unlock(cqp, intrs_enabled); - continue; - } + intrs_inner = mp_call_head_lock(cqp); if (mode == NOSYNC) { if (callp == NULL) { cpus_notcalled |= cpu_to_cpumask(cpu); - mp_call_head_unlock(cqp, intrs_enabled); + mp_call_head_unlock(cqp, intrs_inner); KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL_NOBUF, cpu, 0, 0, 0, 0); continue; } - callp->countp = NULL; + callp->maskp = NULL; } else { if (callp == NULL) { - mp_call_head_unlock(cqp, intrs_enabled); + mp_call_head_unlock(cqp, intrs_inner); KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL_NOBUF, cpu, 0, 0, 0, 0); - if (!intrs_enabled) { + if (!intrs_inner) { /* Sniffing w/o locking */ if (!queue_empty(&cqp->queue)) mp_cpus_call_action(); handle_pending_TLB_flushes(); } - mp_spin_timeout_check( - tsc_spin_start, - "mp_cpus_call1()"); + if (mp_spin_timeout(tsc_spin_start)) + panic("mp_cpus_call1() timeout"); goto queue_call; } - callp->countp = &mp_cpus_calls; + callp->maskp = &cpus_responded; } callp->func = action_func; callp->arg0 = arg0; callp->arg1 = arg1; mp_call_enqueue_locked(cqp, callp); - mp_cpus_signals++; + cpus_call_count++; cpus_called |= cpu_to_cpumask(cpu); i386_signal_cpu(cpu, MP_CALL, ASYNC); - mp_call_head_unlock(cqp, intrs_enabled); + mp_call_head_unlock(cqp, intrs_inner); if (mode == SYNC) { - mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls); + mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded); } } } + if (topo_lock) { + simple_unlock(&x86_topo_lock); + ml_set_interrupts_enabled(intrs_enabled); + } /* Call locally if mode not SYNC */ if (mode != SYNC && call_self ) { @@ -1295,12 +1387,14 @@ mp_cpus_call1( mp_enable_preemption(); /* For ASYNC, now wait for all signaled cpus to complete their calls */ - if (mode == ASYNC) { - mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls); - } + if (mode == ASYNC) + mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded); out: - cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0); + if (call_self){ + cpus_called |= cpu_to_cpumask(cpu); + cpus_call_count++; + } if (cpus_calledp) *cpus_calledp = cpus_called; @@ -1309,9 +1403,9 @@ out: KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL | DBG_FUNC_END, - cpu, cpus_called, cpus_notcalled, 0, 0); + cpus_call_count, cpus_called, cpus_notcalled, 0, 0); - return cpu; + return (cpu_t) cpus_call_count; } @@ -1374,6 +1468,30 @@ mp_broadcast( lck_mtx_unlock(&mp_bc_lock); } +void +mp_cpus_kick(cpumask_t cpus) +{ + cpu_t cpu; + boolean_t intrs_enabled = FALSE; + + intrs_enabled = ml_set_interrupts_enabled(FALSE); + mp_safe_spin_lock(&x86_topo_lock); + + for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { + if ((cpu == (cpu_t) cpu_number()) + || ((cpu_to_cpumask(cpu) & cpus) == 0) + || (!cpu_datap(cpu)->cpu_running)) + { + continue; + } + + lapic_send_ipi(cpu, LAPIC_VECTOR(KICK)); + } + + simple_unlock(&x86_topo_lock); + ml_set_interrupts_enabled(intrs_enabled); +} + void i386_activate_cpu(void) { @@ -1707,11 +1825,8 @@ slave_machine_init(void *param) * Cold start */ clock_init(); - cpu_machine_init(); /* Interrupts enabled hereafter */ - mp_cpus_call_cpu_init(); - } else { - cpu_machine_init(); /* Interrupts enabled hereafter */ } + cpu_machine_init(); /* Interrupts enabled hereafter */ } #undef cpu_number diff --git a/osfmk/i386/mp.h b/osfmk/i386/mp.h index 1e5a13dc9..4e333ce7d 100644 --- a/osfmk/i386/mp.h +++ b/osfmk/i386/mp.h @@ -75,7 +75,7 @@ #include #include #include -#include +#include __BEGIN_DECLS @@ -107,6 +107,7 @@ extern volatile boolean_t force_immediate_debugger_NMI; extern volatile boolean_t pmap_tlb_flush_timeout; extern volatile usimple_lock_t spinlock_timed_out; extern volatile uint32_t spinlock_owner_cpu; +extern uint32_t spinlock_timeout_NMI(uintptr_t thread_addr); extern uint64_t LastDebuggerEntryAllowance; @@ -146,7 +147,7 @@ typedef enum {KDP_XCPU_NONE = 0xffff, KDP_CURRENT_LCPU = 0xfffe} kdp_cpu_t; #endif typedef uint32_t cpu_t; -typedef uint32_t cpumask_t; +typedef volatile long cpumask_t; static inline cpumask_t cpu_to_cpumask(cpu_t cpu) { @@ -156,6 +157,9 @@ cpu_to_cpumask(cpu_t cpu) #define CPUMASK_SELF cpu_to_cpumask(cpu_number()) #define CPUMASK_OTHERS (CPUMASK_ALL & ~CPUMASK_SELF) +/* Initialation routing called at processor registration */ +extern void mp_cpus_call_cpu_init(int cpu); + /* * Invoke a function (possibly NULL) on a set of cpus specified by a mask. * The mask may include the local cpu. @@ -182,6 +186,10 @@ extern cpu_t mp_cpus_call1( cpumask_t *cpus_calledp, cpumask_t *cpus_notcalledp); +extern void mp_cpus_NMIPI(cpumask_t cpus); + +/* Interrupt a set of cpus, forcing an exit out of non-root mode */ +extern void mp_cpus_kick(cpumask_t cpus); /* * Power-management-specific SPI to: * - register a callout function, and diff --git a/osfmk/i386/mp_desc.c b/osfmk/i386/mp_desc.c index d4fd11af1..4b4306ad8 100644 --- a/osfmk/i386/mp_desc.c +++ b/osfmk/i386/mp_desc.c @@ -67,7 +67,7 @@ #include #include -#include +#include #include #include #include @@ -647,6 +647,16 @@ cpu_data_alloc(boolean_t is_boot_cpu) real_ncpus++; simple_unlock(&ncpus_lock); + /* + * Before this cpu has been assigned a real thread context, + * we give it a fake, unique, non-zero thread id which the locking + * primitives use as their lock value. + * Note that this does not apply to the boot processor, cpu 0, which + * transitions to a thread context well before other processors are + * started. + */ + cdp->cpu_active_thread = (thread_t) (uintptr_t) cdp->cpu_number; + cdp->cpu_nanotime = &pal_rtc_nanotime_info; kprintf("cpu_data_alloc(%d) %p desc_table: %p " diff --git a/osfmk/i386/mp_native.c b/osfmk/i386/mp_native.c index c013a149b..f517f173a 100644 --- a/osfmk/i386/mp_native.c +++ b/osfmk/i386/mp_native.c @@ -33,6 +33,7 @@ #include #include #include +#include /* PAL-related routines */ void i386_cpu_IPI(int cpu); diff --git a/osfmk/i386/panic_hooks.c b/osfmk/i386/panic_hooks.c new file mode 100644 index 000000000..113031cfa --- /dev/null +++ b/osfmk/i386/panic_hooks.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include "panic_hooks.h" + +#include +#include +#include +#include +#include + +#include "pmap.h" + +struct panic_hook { + uint32_t magic1; + queue_chain_t chain; + thread_t thread; + panic_hook_fn_t hook_fn; + uint32_t magic2; +}; + +typedef char check1_[sizeof(struct panic_hook) + <= sizeof(panic_hook_t) ? 1 : -1]; +typedef char check2_[PAGE_SIZE == 4096 ? 1 : -1]; + +static hw_lock_data_t panic_hooks_lock; +static queue_head_t panic_hooks; +static uint8_t panic_dump_buf[8192]; + +#define PANIC_HOOK_MAGIC1 0x4A1C400C +#define PANIC_HOOK_MAGIC2 0xC004C1A4 + +void panic_hooks_init(void) +{ + hw_lock_init(&panic_hooks_lock); + queue_init(&panic_hooks); +} + +void panic_hook(panic_hook_t *hook_, panic_hook_fn_t hook_fn) +{ + struct panic_hook *hook = (struct panic_hook *)hook_; + + hook->magic1 = PANIC_HOOK_MAGIC1; + hook->magic2 = PANIC_HOOK_MAGIC2; + hook->hook_fn = hook_fn; + hook->thread = current_thread(); + + hw_lock_lock(&panic_hooks_lock); + queue_enter(&panic_hooks, hook, struct panic_hook *, chain); + hw_lock_unlock(&panic_hooks_lock); +} + +void panic_unhook(panic_hook_t *hook_) +{ + struct panic_hook *hook = (struct panic_hook *)hook_; + + hw_lock_lock(&panic_hooks_lock); + queue_remove(&panic_hooks, hook, struct panic_hook *, chain); + hw_lock_unlock(&panic_hooks_lock); +} + +void panic_check_hook(void) +{ + struct panic_hook *hook; + thread_t thread = current_thread(); + uint32_t count = 0; + + queue_iterate(&panic_hooks, hook, struct panic_hook *, chain) { + if (++count > 1024 + || !kvtophys((vm_offset_t)hook) + || !kvtophys((vm_offset_t)hook + sizeof (*hook) - 1) + || hook->magic1 != PANIC_HOOK_MAGIC1 + || hook->magic2 != PANIC_HOOK_MAGIC2 + || !kvtophys((vm_offset_t)hook->hook_fn)) + return; + + if (hook->thread == thread) { + hook->hook_fn((panic_hook_t *)hook); + return; + } + } +} + +/* + * addr should be page aligned and len should be multiple of page + * size. This will currently only work if each page can be compressed + * to no more than 4095 bytes. + * + * Remember the debug buffer isn't very big so don't try and dump too + * much. + */ +void panic_dump_mem(const void *addr, int len) +{ + void *scratch = panic_dump_buf + 4096; + + for (; len > 0; addr = (uint8_t *)addr + PAGE_SIZE, len -= PAGE_SIZE) { + if (!kvtophys((vm_offset_t)addr)) + continue; + + // 4095 is multiple of 3 -- see below + int n = WKdm_compress_new((WK_word *)addr, (WK_word *)(void *)panic_dump_buf, + scratch, 4095); + + if (n == -1) + return; // Give up + + kdb_log("%p: ", addr); + + // Dump out base64 + static char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz0123456789+/"; + + // Pad to multiple of 3 + switch (n % 3) { + case 1: + panic_dump_buf[n++] = 0; + case 2: + panic_dump_buf[n++] = 0; + } + + uint8_t *p = panic_dump_buf; + while (n) { + uint8_t c; + + c = p[0] >> 2; + consdebug_log(base64_table[c]); + + c = (p[0] << 4 | p[1] >> 4) & 0x3f; + consdebug_log(base64_table[c]); + + c = (p[1] << 2 | p[2] >> 6) & 0x3f; + consdebug_log(base64_table[c]); + + c = p[2] & 0x3f; + consdebug_log(base64_table[c]); + + p += 3; + n -= 3; + } + + consdebug_log('\n'); + } +} + +bool panic_phys_range_before(const void *addr, uint64_t *pphys, + panic_phys_range_t *range) +{ + *pphys = kvtophys((vm_offset_t)addr); + + const boot_args *args = PE_state.bootArgs; + + if (!kvtophys((vm_offset_t)args)) + return FALSE; + + const EfiMemoryRange *r = PHYSMAP_PTOV((uintptr_t)args->MemoryMap), *closest = NULL; + const uint32_t size = args->MemoryMapDescriptorSize; + const uint32_t count = args->MemoryMapSize / size; + + if (count > 1024) // Sanity check + return FALSE; + + for (uint32_t i = 0; i < count; ++i, r = (EfiMemoryRange *)(void *)((uint8_t *)r + size)) { + if (r->PhysicalStart + r->NumberOfPages * PAGE_SIZE > *pphys) + continue; + + if (!closest || r->PhysicalStart > closest->PhysicalStart) + closest = r; + } + + if (!closest) + return FALSE; + + range->type = closest->Type; + range->phys_start = closest->PhysicalStart; + range->len = closest->NumberOfPages * PAGE_SIZE; + + return TRUE; +} diff --git a/osfmk/i386/gdb_defs.h b/osfmk/i386/panic_hooks.h similarity index 63% rename from osfmk/i386/gdb_defs.h rename to osfmk/i386/panic_hooks.h index c039f2ee8..92905ebb4 100644 --- a/osfmk/i386/gdb_defs.h +++ b/osfmk/i386/panic_hooks.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,43 +25,37 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * @OSF_COPYRIGHT@ - * - */ -#ifndef _I386_GDB_DEFS_H_ -#define _I386_GDB_DEFS_H_ +#ifndef PANICHOOKS_H_ +#define PANICHOOKS_H_ -/* - * GDB DEPENDENT DEFINITIONS - * - * The following definitions match data descriptions in the gdb source file - * gdb/include/AT386/tm.h. They cannot be independently modified. - */ +#if XNU_KERNEL_PRIVATE + +#include +#include typedef struct { - unsigned int eax; - unsigned int ecx; - unsigned int edx; - unsigned int ebx; - unsigned int esp; - unsigned int ebp; - unsigned int esi; - unsigned int edi; - unsigned int eip; - unsigned int efl; - unsigned int cs; - unsigned int ss; - unsigned int ds; - unsigned int es; - unsigned int fs; - unsigned int gs; - unsigned int reason; -} kgdb_regs_t; + uint64_t opaque[6]; +} panic_hook_t; + +typedef void (*panic_hook_fn_t)(panic_hook_t *); + +void panic_hooks_init(void); +void panic_check_hook(void); + +void panic_hook(panic_hook_t *hook, panic_hook_fn_t hook_fn); +void panic_unhook(panic_hook_t *hook); +void panic_dump_mem(const void *addr, int len); + +typedef struct panic_phys_range { + uint32_t type; + uint64_t phys_start; + uint64_t len; +} panic_phys_range_t; -#define NUM_REGS 16 -#define REGISTER_BYTES (NUM_REGS * 4) +bool panic_phys_range_before(const void *addr, uint64_t *pphys, + panic_phys_range_t *range); -#endif /* _I386_GDB_DEFS_H_ */ +#endif // XNU_KERNEL_PRIVATE +#endif // PANICHOOKS_H_ diff --git a/osfmk/i386/pcb.c b/osfmk/i386/pcb.c index cc5b22787..dce925524 100644 --- a/osfmk/i386/pcb.c +++ b/osfmk/i386/pcb.c @@ -106,6 +106,10 @@ #include #endif +#if HYPERVISOR +#include +#endif + /* * Maps state flavor to number of words in the state: */ @@ -208,6 +212,18 @@ ml_kperf_cswitch(thread_t old, thread_t new) } #endif +#if HYPERVISOR +static inline void +ml_hv_cswitch(thread_t old, thread_t new) +{ + if (old->hv_thread_target) + hv_callbacks.preempt(old->hv_thread_target); + + if (new->hv_thread_target) + hv_callbacks.dispatch(new->hv_thread_target); +} +#endif + /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns @@ -268,29 +284,8 @@ dr7_is_valid(uint32_t *dr7) return (TRUE); } -static inline void -set_live_debug_state32(cpu_data_t *cdp, x86_debug_state32_t *ds) -{ - __asm__ volatile ("movl %0,%%db0" : :"r" (ds->dr0)); - __asm__ volatile ("movl %0,%%db1" : :"r" (ds->dr1)); - __asm__ volatile ("movl %0,%%db2" : :"r" (ds->dr2)); - __asm__ volatile ("movl %0,%%db3" : :"r" (ds->dr3)); - cdp->cpu_dr7 = ds->dr7; -} - extern void set_64bit_debug_regs(x86_debug_state64_t *ds); -static inline void -set_live_debug_state64(cpu_data_t *cdp, x86_debug_state64_t *ds) -{ - /* - * We need to enter 64-bit mode in order to set the full - * width of these registers - */ - set_64bit_debug_regs(ds); - cdp->cpu_dr7 = ds->dr7; -} - boolean_t debug_state_is_valid32(x86_debug_state32_t *ds) { @@ -382,6 +377,13 @@ set_debug_state64(thread_t thread, x86_debug_state64_t *ds) ids = zalloc(ids_zone); bzero(ids, sizeof *ids); +#if HYPERVISOR + if (thread->hv_thread_target) { + hv_callbacks.volatile_state(thread->hv_thread_target, + HV_DEBUG_STATE); + } +#endif + simple_lock(&pcb->lock); /* make sure it wasn't already alloc()'d elsewhere */ if (pcb->ids == NULL) { @@ -508,6 +510,10 @@ machine_switch_context( */ act_machine_switch_pcb(old, new); +#if HYPERVISOR + ml_hv_cswitch(old, new); +#endif + return(Switch_context(old, continuation, new)); } @@ -636,6 +642,9 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts) ts->ds = USER_DS; ts->es = USER_DS; + /* Set GS to CTHREAD only if's been established */ + ts->gs = thread->machine.cthread_self ? USER_CTHREAD : NULL_SEG; + /* Check segment selectors are safe */ if (!valid_user_segment_selectors(ts->cs, ts->ss, @@ -1857,6 +1866,10 @@ machine_stack_handoff(thread_t old, PMAP_SWITCH_CONTEXT(old, new, cpu_number()); act_machine_switch_pcb(old, new); +#if HYPERVISOR + ml_hv_cswitch(old, new); +#endif + machine_set_current_thread(new); return; diff --git a/osfmk/i386/pcb_native.c b/osfmk/i386/pcb_native.c index 1c4e9ebfe..6f200efcc 100644 --- a/osfmk/i386/pcb_native.c +++ b/osfmk/i386/pcb_native.c @@ -97,6 +97,10 @@ #include #include +#if HYPERVISOR +#include +#endif + #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \ extern char assert_is_16byte_multiple_sizeof_ ## _type_ \ [(sizeof(_type_) % 16) == 0 ? 1 : -1] @@ -110,7 +114,6 @@ ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t); extern zone_t iss_zone; /* zone for saved_state area */ extern zone_t ids_zone; /* zone for debug_state area */ -extern void *get_bsduthreadarg(thread_t); void act_machine_switch_pcb(__unused thread_t old, thread_t new) { @@ -389,7 +392,7 @@ machine_thread_create( pcb->cthread_self = 0; pcb->uldt_selector = 0; - + pcb->thread_gpu_ns = 0; /* Ensure that the "cthread" descriptor describes a valid * segment. */ @@ -411,6 +414,13 @@ machine_thread_destroy( { register pcb_t pcb = THREAD_TO_PCB(thread); +#if HYPERVISOR + if (thread->hv_thread_target) { + hv_callbacks.thread_destroy(thread->hv_thread_target); + thread->hv_thread_target = NULL; + } +#endif + if (pcb->ifps != 0) fpu_free(pcb->ifps); if (pcb->iss != 0) { @@ -422,3 +432,66 @@ machine_thread_destroy( pcb->ids = NULL; } } + +kern_return_t +machine_thread_set_tsd_base( + thread_t thread, + mach_vm_offset_t tsd_base) +{ + + if (thread->task == kernel_task) { + return KERN_INVALID_ARGUMENT; + } + + if (thread_is_64bit(thread)) { + /* check for canonical address, set 0 otherwise */ + if (!IS_USERADDR64_CANONICAL(tsd_base)) + tsd_base = 0ULL; + } else { + if (tsd_base > UINT32_MAX) + tsd_base = 0ULL; + } + + pcb_t pcb = THREAD_TO_PCB(thread); + pcb->cthread_self = tsd_base; + + if (!thread_is_64bit(thread)) { + /* Set up descriptor for later use */ + struct real_descriptor desc = { + .limit_low = 1, + .limit_high = 0, + .base_low = tsd_base & 0xffff, + .base_med = (tsd_base >> 16) & 0xff, + .base_high = (tsd_base >> 24) & 0xff, + .access = ACC_P|ACC_PL_U|ACC_DATA_W, + .granularity = SZ_32|SZ_G, + }; + + pcb->cthread_desc = desc; + saved_state32(pcb->iss)->gs = USER_CTHREAD; + } + + /* For current thread, make the TSD base active immediately */ + if (thread == current_thread()) { + + if (thread_is_64bit(thread)) { + cpu_data_t *cdp; + + mp_disable_preemption(); + cdp = current_cpu_datap(); + if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || + (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) + wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base); + cdp->cpu_uber.cu_user_gs_base = tsd_base; + mp_enable_preemption(); + } else { + + /* assign descriptor */ + mp_disable_preemption(); + *ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc; + mp_enable_preemption(); + } + } + + return KERN_SUCCESS; +} diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c index 1efffe69c..94a005637 100644 --- a/osfmk/i386/pmCPU.c +++ b/osfmk/i386/pmCPU.c @@ -677,18 +677,6 @@ machine_run_count(uint32_t count) saved_run_count = count; } -boolean_t -machine_processor_is_inactive(processor_t processor) -{ - int cpu = processor->cpu_id; - - if (pmDispatch != NULL - && pmDispatch->pmIsCPUUnAvailable != NULL) - return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu))); - else - return(FALSE); -} - processor_t machine_choose_processor(processor_set_t pset, processor_t preferred) diff --git a/osfmk/i386/pmap.h b/osfmk/i386/pmap.h index bccf937c6..eb9e7e297 100644 --- a/osfmk/i386/pmap.h +++ b/osfmk/i386/pmap.h @@ -70,7 +70,6 @@ #ifndef ASSEMBLER -#include #include #include @@ -79,7 +78,7 @@ #include #include #include -#include +#include #include #include @@ -385,8 +384,6 @@ static inline void * PHYSMAP_PTOV_check(void *paddr) { #endif /*__x86_64__ */ -typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */ - /* changed by other processors */ #include /* @@ -463,8 +460,8 @@ extern pmap_memory_region_t pmap_memory_regions[]; #include static inline void -set_dirbase(pmap_t tpmap, __unused thread_t thread) { - int ccpu = cpu_number(); +set_dirbase(pmap_t tpmap, __unused thread_t thread, int my_cpu) { + int ccpu = my_cpu; cpu_datap(ccpu)->cpu_task_cr3 = tpmap->pm_cr3; cpu_datap(ccpu)->cpu_task_map = tpmap->pm_task_map; /* @@ -587,16 +584,16 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr #include -#define PMAP_ACTIVATE_MAP(map, thread) { \ +#define PMAP_ACTIVATE_MAP(map, thread, my_cpu) { \ register pmap_t tpmap; \ \ tpmap = vm_map_pmap(map); \ - set_dirbase(tpmap, thread); \ + set_dirbase(tpmap, thread, my_cpu); \ } #if defined(__x86_64__) -#define PMAP_DEACTIVATE_MAP(map, thread) \ - pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, cpu_number()) == (get_cr3_raw() & 0xFFF)) : TRUE); +#define PMAP_DEACTIVATE_MAP(map, thread, ccpu) \ + pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, ccpu) == (get_cr3_raw() & 0xFFF)) : TRUE); #else #define PMAP_DEACTIVATE_MAP(map, thread) #endif @@ -605,8 +602,8 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr \ pmap_assert(ml_get_interrupts_enabled() == FALSE); \ if (old_th->map != new_th->map) { \ - PMAP_DEACTIVATE_MAP(old_th->map, old_th); \ - PMAP_ACTIVATE_MAP(new_th->map, new_th); \ + PMAP_DEACTIVATE_MAP(old_th->map, old_th, my_cpu); \ + PMAP_ACTIVATE_MAP(new_th->map, new_th, my_cpu); \ } \ } @@ -626,9 +623,9 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr spl_t spl; \ \ spl = splhigh(); \ - PMAP_DEACTIVATE_MAP(th->map, th); \ + PMAP_DEACTIVATE_MAP(th->map, th, my_cpu); \ th->map = new_map; \ - PMAP_ACTIVATE_MAP(th->map, th); \ + PMAP_ACTIVATE_MAP(th->map, th, my_cpu); \ splx(spl); \ } #endif @@ -694,6 +691,7 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr (((vm_offset_t) (VA)) <= vm_max_kernel_address)) +#define pmap_compressed(pmap) ((pmap)->stats.compressed) #define pmap_resident_count(pmap) ((pmap)->stats.resident_count) #define pmap_resident_max(pmap) ((pmap)->stats.resident_max) #define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr) diff --git a/osfmk/i386/pmap_common.c b/osfmk/i386/pmap_common.c index 4bf41195e..ec05a97e2 100644 --- a/osfmk/i386/pmap_common.c +++ b/osfmk/i386/pmap_common.c @@ -194,6 +194,35 @@ compute_pmap_gc_throttle(void *arg __unused) } +void +pmap_lock_phys_page(ppnum_t pn) +{ + int pai; + + pai = ppn_to_pai(pn); + + if (IS_MANAGED_PAGE(pai)) { + LOCK_PVH(pai); + } else + simple_lock(&phys_backup_lock); +} + + +void +pmap_unlock_phys_page(ppnum_t pn) +{ + int pai; + + pai = ppn_to_pai(pn); + + if (IS_MANAGED_PAGE(pai)) { + UNLOCK_PVH(pai); + } else + simple_unlock(&phys_backup_lock); +} + + + __private_extern__ void pmap_pagetable_corruption_msg_log(int (*log_func)(const char * fmt, ...)__printflike(1,2)) { if (pmap_pagetable_corruption_incidents > 0) { diff --git a/osfmk/i386/pmap_internal.h b/osfmk/i386/pmap_internal.h index 6ca95d7b1..6227e50f7 100644 --- a/osfmk/i386/pmap_internal.h +++ b/osfmk/i386/pmap_internal.h @@ -34,6 +34,8 @@ #include #include #include +#include +#include /* * pmap locking @@ -236,12 +238,13 @@ typedef struct pv_hashed_entry { //#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */ #ifdef PV_DEBUG -#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized"); +#define CHK_NPVHASH() if(0 == npvhashmask) panic("npvhash uninitialized"); #else #define CHK_NPVHASH(x) #endif -#define NPVHASH 4095 /* MUST BE 2^N - 1 */ +#define NPVHASHBUCKETS (4096) +#define NPVHASHMASK ((NPVHASHBUCKETS) - 1) /* MUST BE 2^N - 1 */ #define PV_HASHED_LOW_WATER_MARK_DEFAULT 5000 #define PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT 2000 #define PV_HASHED_ALLOC_CHUNK_INITIAL 2000 @@ -256,13 +259,14 @@ extern uint32_t pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark; #define LOCK_PV_HASH(hash) lock_hash_hash(hash) #define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash) -extern uint32_t npvhash; +extern uint32_t npvhashmask; extern pv_hashed_entry_t *pv_hash_table; /* hash lists */ extern pv_hashed_entry_t pv_hashed_free_list; extern pv_hashed_entry_t pv_hashed_kern_free_list; decl_simple_lock_data(extern, pv_hashed_free_list_lock) decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock) decl_simple_lock_data(extern, pv_hash_table_lock) +decl_simple_lock_data(extern, phys_backup_lock) extern zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry * structures */ @@ -477,9 +481,10 @@ extern unsigned int inuse_ptepages_count; static inline uint32_t pvhashidx(pmap_t pmap, vm_map_offset_t va) { - return ((uint32_t)(uintptr_t)pmap ^ + uint32_t hashidx = ((uint32_t)(uintptr_t)pmap ^ ((uint32_t)(va >> PAGE_SHIFT) & 0xFFFFFFFF)) & - npvhash; + npvhashmask; + return hashidx; } diff --git a/osfmk/i386/pmap_x86_common.c b/osfmk/i386/pmap_x86_common.c index 7994bfbd7..ba50c3320 100644 --- a/osfmk/i386/pmap_x86_common.c +++ b/osfmk/i386/pmap_x86_common.c @@ -25,6 +25,9 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + +#include + #include #include #include @@ -1228,6 +1231,14 @@ pmap_remove_options( if (s64 < e64 && rdtsc64() >= deadline) { PMAP_UNLOCK(map) + /* TODO: Rapid release/reacquisition can defeat + * the "backoff" intent here; either consider a + * fair spinlock, or a scheme whereby each lock + * attempt marks the processor as within a spinlock + * acquisition, and scan CPUs here to determine + * if a backoff is necessary, to avoid sacrificing + * performance in the common case. + */ PMAP_LOCK(map) deadline = rdtsc64() + max_preemption_latency_tsc; } @@ -1391,7 +1402,7 @@ pmap_page_protect_options( * This removal is only being done so we can send this page to * the compressor; therefore it mustn't affect total task footprint. */ - pmap_ledger_credit(pmap, task_ledgers.phys_compressed, PAGE_SIZE); + pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE); } else { pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); } @@ -1492,7 +1503,16 @@ phys_attribute_clear( int pai; pmap_t pmap; char attributes = 0; - + boolean_t is_internal, is_reusable; + + if ((bits & PHYS_MODIFIED) && + (options & PMAP_OPTIONS_NOFLUSH) && + arg == NULL) { + panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): " + "should not clear 'modified' without flushing TLBs\n", + pn, bits, options, arg); + } + pmap_intr_assert(); assert(pn != vm_page_fictitious_addr); if (pn == vm_page_guard_addr) @@ -1524,31 +1544,91 @@ phys_attribute_clear( * There are some mappings. */ + is_internal = IS_INTERNAL_PAGE(pai); + is_reusable = IS_REUSABLE_PAGE(pai); + pv_e = (pv_hashed_entry_t)pv_h; do { vm_map_offset_t va; + char pte_bits; pmap = pv_e->pmap; va = pv_e->va; + pte_bits = 0; + + if (bits) { + pte = pmap_pte(pmap, va); + /* grab ref/mod bits from this PTE */ + pte_bits = (*pte & (PHYS_MODIFIED | + PHYS_REFERENCED)); + /* propagate to page's global attributes */ + attributes |= pte_bits; + /* which bits to clear for this PTE? */ + pte_bits &= bits; + } /* * Clear modify and/or reference bits. */ - pte = pmap_pte(pmap, va); - attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); - pmap_update_pte(pte, bits, 0); - /* Ensure all processors using this translation - * invalidate this TLB entry. The invalidation *must* - * follow the PTE update, to ensure that the TLB - * shadow of the 'D' bit (in particular) is - * synchronized with the updated PTE. - */ - if (options & PMAP_OPTIONS_NOFLUSH) { - if (arg) - PMAP_UPDATE_TLBS_DELAYED(pmap, va, va + PAGE_SIZE, (pmap_flush_context *)arg); - } else - PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); + if (pte_bits) { + pmap_update_pte(pte, bits, 0); + + /* Ensure all processors using this translation + * invalidate this TLB entry. The invalidation + * *must* follow the PTE update, to ensure that + * the TLB shadow of the 'D' bit (in particular) + * is synchronized with the updated PTE. + */ + if (! (options & PMAP_OPTIONS_NOFLUSH)) { + /* flush TLBS now */ + PMAP_UPDATE_TLBS(pmap, + va, + va + PAGE_SIZE); + } else if (arg) { + /* delayed TLB flush: add "pmap" info */ + PMAP_UPDATE_TLBS_DELAYED( + pmap, + va, + va + PAGE_SIZE, + (pmap_flush_context *)arg); + } else { + /* no TLB flushing at all */ + } + } + + /* update pmap "reusable" stats */ + if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) && + is_reusable && + pmap != kernel_pmap) { + /* one less "reusable" */ + assert(pmap->stats.reusable > 0); + OSAddAtomic(-1, &pmap->stats.reusable); + if (is_internal) { + /* one more "internal" */ + OSAddAtomic(+1, &pmap->stats.internal); + PMAP_STATS_PEAK(pmap->stats.internal); + } else { + /* one more "external" */ + OSAddAtomic(+1, &pmap->stats.external); + PMAP_STATS_PEAK(pmap->stats.external); + } + } else if ((options & PMAP_OPTIONS_SET_REUSABLE) && + !is_reusable && + pmap != kernel_pmap) { + /* one more "reusable" */ + OSAddAtomic(+1, &pmap->stats.reusable); + PMAP_STATS_PEAK(pmap->stats.reusable); + if (is_internal) { + /* one less "internal" */ + assert(pmap->stats.internal > 0); + OSAddAtomic(-1, &pmap->stats.internal); + } else { + /* one less "external" */ + assert(pmap->stats.external > 0); + OSAddAtomic(-1, &pmap->stats.external); + } + } pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); @@ -1561,6 +1641,13 @@ phys_attribute_clear( pmap_phys_attributes[pai] |= attributes; pmap_phys_attributes[pai] &= (~bits); + /* update this page's "reusable" status */ + if (options & PMAP_OPTIONS_CLEAR_REUSABLE) { + pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; + } else if (options & PMAP_OPTIONS_SET_REUSABLE) { + pmap_phys_attributes[pai] |= PHYS_REUSABLE; + } + UNLOCK_PVH(pai); PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END, @@ -1742,162 +1829,6 @@ pmap_map_bd( return(virt); } -void -pmap_reusable( - pmap_t pmap, - addr64_t s64, - addr64_t e64, - boolean_t reusable) -{ - pt_entry_t *pde; - pt_entry_t *spte, *epte; - addr64_t l64; - uint64_t deadline; - - pmap_intr_assert(); - - if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) - return; - - PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_START, - pmap, - (uint32_t) (s64 >> 32), s64, - (uint32_t) (e64 >> 32), e64); - - PMAP_LOCK(pmap); - - deadline = rdtsc64() + max_preemption_latency_tsc; - - while (s64 < e64) { - l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); - if (l64 > e64) - l64 = e64; - pde = pmap_pde(pmap, s64); - - if (pde && (*pde & INTEL_PTE_VALID)) { - if (*pde & INTEL_PTE_PS) { - /* superpage: not supported */ - } else { - spte = pmap_pte(pmap, - (s64 & ~(pde_mapped_size - 1))); - spte = &spte[ptenum(s64)]; - epte = &spte[intel_btop(l64 - s64)]; - pmap_reusable_range(pmap, s64, spte, epte, - reusable); - } - } - s64 = l64; - - if (s64 < e64 && rdtsc64() >= deadline) { - PMAP_UNLOCK(pmap); - PMAP_LOCK(pmap); - deadline = rdtsc64() + max_preemption_latency_tsc; - } - } - - PMAP_UNLOCK(pmap); - - PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_END, - pmap, reusable, 0, 0, 0); -} - -void -pmap_reusable_range( - pmap_t pmap, - vm_map_offset_t start_vaddr, - pt_entry_t *spte, - pt_entry_t *epte, - boolean_t reusable) -{ - pt_entry_t *cpte; - int num_external, num_internal, num_reusable; - ppnum_t pai; - pmap_paddr_t pa; - vm_map_offset_t vaddr; - - num_external = 0; - num_internal = 0; - num_reusable = 0; - - for (cpte = spte, vaddr = start_vaddr; - cpte < epte; - cpte++, vaddr += PAGE_SIZE_64) { - - pa = pte_to_pa(*cpte); - if (pa == 0) - continue; - - pai = pa_index(pa); - - LOCK_PVH(pai); - - pa = pte_to_pa(*cpte); - if (pa == 0) { - UNLOCK_PVH(pai); - continue; - } - if (reusable) { - /* we want to set "reusable" */ - if (IS_REUSABLE_PAGE(pai)) { - /* already reusable: no change */ - } else { - pmap_phys_attributes[pai] |= PHYS_REUSABLE; - /* one more "reusable" */ - num_reusable++; - if (IS_INTERNAL_PAGE(pai)) { - /* one less "internal" */ - num_internal--; - } else { - /* one less "external" */ - num_external--; - } - } - } else { - /* we want to clear "reusable" */ - if (IS_REUSABLE_PAGE(pai)) { - pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; - /* one less "reusable" */ - num_reusable--; - if (IS_INTERNAL_PAGE(pai)) { - /* one more "internal" */ - num_internal++; - } else { - /* one more "external" */ - num_external++; - } - } else { - /* already not reusable: no change */ - } - } - - UNLOCK_PVH(pai); - - } /* for loop */ - - /* - * Update the counts - */ - if (pmap != kernel_pmap) { - if (num_external) { - OSAddAtomic(num_external, &pmap->stats.external); - PMAP_STATS_PEAK(pmap->stats.external); - } - assert(pmap->stats.external >= 0); - if (num_internal) { - OSAddAtomic(num_internal, &pmap->stats.internal); - PMAP_STATS_PEAK(pmap->stats.internal); - } - assert(pmap->stats.internal >= 0); - if (num_reusable) { - OSAddAtomic(num_reusable, &pmap->stats.reusable); - PMAP_STATS_PEAK(pmap->stats.reusable); - } - assert(pmap->stats.reusable >= 0); - } - - return; -} - unsigned int pmap_query_resident( pmap_t pmap, @@ -1965,3 +1896,13 @@ pmap_query_resident( return result; } + +#if MACH_ASSERT +void +pmap_set_process( + __unused pmap_t pmap, + __unused int pid, + __unused char *procname) +{ +} +#endif /* MACH_ASSERT */ diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h index 1ba613a21..22ce11999 100644 --- a/osfmk/i386/proc_reg.h +++ b/osfmk/i386/proc_reg.h @@ -177,8 +177,6 @@ #define PMAP_PCID_PRESERVE (1ULL << 63) #define PMAP_PCID_MASK (0xFFF) -#define RDRAND_RAX .byte 0x48, 0x0f, 0xc7, 0xf0 - #ifndef ASSEMBLER #include @@ -389,6 +387,9 @@ static inline void invlpg(uintptr_t addr) #define rdtsc(lo,hi) \ __asm__ volatile("lfence; rdtsc; lfence" : "=a" (lo), "=d" (hi)) +#define rdtsc_nofence(lo,hi) \ + __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi)) + #define write_tsc(lo,hi) wrmsr(0x10, lo, hi) #define rdpmc(counter,lo,hi) \ @@ -546,17 +547,25 @@ __END_DECLS #define MSR_IA32_MC0_ADDR 0x402 #define MSR_IA32_MC0_MISC 0x403 -#define MSR_IA32_VMX_BASE 0x480 -#define MSR_IA32_VMX_BASIC MSR_IA32_VMX_BASE -#define MSR_IA32_VMXPINBASED_CTLS MSR_IA32_VMX_BASE+1 -#define MSR_IA32_PROCBASED_CTLS MSR_IA32_VMX_BASE+2 -#define MSR_IA32_VMX_EXIT_CTLS MSR_IA32_VMX_BASE+3 -#define MSR_IA32_VMX_ENTRY_CTLS MSR_IA32_VMX_BASE+4 -#define MSR_IA32_VMX_MISC MSR_IA32_VMX_BASE+5 -#define MSR_IA32_VMX_CR0_FIXED0 MSR_IA32_VMX_BASE+6 -#define MSR_IA32_VMX_CR0_FIXED1 MSR_IA32_VMX_BASE+7 -#define MSR_IA32_VMX_CR4_FIXED0 MSR_IA32_VMX_BASE+8 -#define MSR_IA32_VMX_CR4_FIXED1 MSR_IA32_VMX_BASE+9 +#define MSR_IA32_VMX_BASE 0x480 +#define MSR_IA32_VMX_BASIC MSR_IA32_VMX_BASE +#define MSR_IA32_VMX_PINBASED_CTLS MSR_IA32_VMX_BASE+1 +#define MSR_IA32_VMX_PROCBASED_CTLS MSR_IA32_VMX_BASE+2 +#define MSR_IA32_VMX_EXIT_CTLS MSR_IA32_VMX_BASE+3 +#define MSR_IA32_VMX_ENTRY_CTLS MSR_IA32_VMX_BASE+4 +#define MSR_IA32_VMX_MISC MSR_IA32_VMX_BASE+5 +#define MSR_IA32_VMX_CR0_FIXED0 MSR_IA32_VMX_BASE+6 +#define MSR_IA32_VMX_CR0_FIXED1 MSR_IA32_VMX_BASE+7 +#define MSR_IA32_VMX_CR4_FIXED0 MSR_IA32_VMX_BASE+8 +#define MSR_IA32_VMX_CR4_FIXED1 MSR_IA32_VMX_BASE+9 +#define MSR_IA32_VMX_VMCS_ENUM MSR_IA32_VMX_BASE+10 +#define MSR_IA32_VMX_PROCBASED_CTLS2 MSR_IA32_VMX_BASE+11 +#define MSR_IA32_VMX_EPT_VPID_CAP MSR_IA32_VMX_BASE+12 +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS MSR_IA32_VMX_BASE+13 +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS MSR_IA32_VMX_BASE+14 +#define MSR_IA32_VMX_TRUE_VMEXIT_CTLS MSR_IA32_VMX_BASE+15 +#define MSR_IA32_VMX_TRUE_VMENTRY_CTLS MSR_IA32_VMX_BASE+16 +#define MSR_IA32_VMX_VMFUNC MSR_IA32_VMX_BASE+17 #define MSR_IA32_DS_AREA 0x600 diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c index 415ee92fd..d0a2ed840 100644 --- a/osfmk/i386/rtclock.c +++ b/osfmk/i386/rtclock.c @@ -39,7 +39,6 @@ * the cpu clock counted by the timestamp MSR. */ -#include #include @@ -72,8 +71,6 @@ #include #define UI_CPUFREQ_ROUNDING_FACTOR 10000000 -int rtclock_config(void); - int rtclock_init(void); uint64_t tsc_rebase_abs_time = 0; @@ -108,19 +105,6 @@ _absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nan *nanosecs = (clock_usec_t)(abstime % (uint64_t)NSEC_PER_SEC); } -/* - * Configure the real-time clock device. Return success (1) - * or failure (0). - */ - -int -rtclock_config(void) -{ - /* nothing to do */ - return (1); -} - - /* * Nanotime/mach_absolutime_time * ----------------------------- @@ -271,7 +255,7 @@ rtc_sleep_wakeup( uint64_t base) { /* Set fixed configuration for lapic timers */ - rtc_timer->config(); + rtc_timer->rtc_config(); /* * Reset nanotime. @@ -281,6 +265,17 @@ rtc_sleep_wakeup( rtc_nanotime_init(base); } +/* + * rtclock_early_init() is called very early at boot to + * establish mach_absolute_time() and set it to zero. + */ +void +rtclock_early_init(void) +{ + assert(tscFreq); + rtc_set_timescale(tscFreq); +} + /* * Initialize the real-time clock device. * In addition, various variables used to support the clock are initialized. @@ -295,7 +290,6 @@ rtclock_init(void) if (cpu_number() == master_cpu) { assert(tscFreq); - rtc_set_timescale(tscFreq); /* * Adjust and set the exported cpu speed. @@ -316,7 +310,7 @@ rtclock_init(void) } /* Set fixed configuration for lapic timers */ - rtc_timer->config(); + rtc_timer->rtc_config(); rtc_timer_start(); return (1); @@ -338,9 +332,6 @@ rtc_set_timescale(uint64_t cycles) cycles <<= 1; } - if ( shift != 0 ) - printf("Slow TSC, rtc_nanotime.shift == %d\n", shift); - rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles); rntp->shift = shift; @@ -361,8 +352,12 @@ rtc_set_timescale(uint64_t cycles) static uint64_t rtc_export_speed(uint64_t cyc_per_sec) { + pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info; uint64_t cycles; + if (rntp->shift != 0 ) + printf("Slow TSC, rtc_nanotime.shift == %d\n", rntp->shift); + /* Round: */ cycles = ((cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2)) / UI_CPUFREQ_ROUNDING_FACTOR) @@ -468,8 +463,7 @@ rtclock_intr( */ uint64_t -setPop( - uint64_t time) +setPop(uint64_t time) { uint64_t now; uint64_t pop; @@ -478,10 +472,10 @@ setPop( if (time == 0 || time == EndOfAllTime ) { time = EndOfAllTime; now = 0; - pop = rtc_timer->set(0, 0); + pop = rtc_timer->rtc_set(0, 0); } else { now = rtc_nanotime_read(); /* The time in nanoseconds */ - pop = rtc_timer->set(time, now); + pop = rtc_timer->rtc_set(time, now); } /* Record requested and actual deadlines set */ @@ -515,15 +509,6 @@ absolutetime_to_microtime( _absolutetime_to_microtime(abstime, secs, microsecs); } -void -absolutetime_to_nanotime( - uint64_t abstime, - clock_sec_t *secs, - clock_nsec_t *nanosecs) -{ - _absolutetime_to_nanotime(abstime, secs, nanosecs); -} - void nanotime_to_absolutetime( clock_sec_t secs, diff --git a/osfmk/i386/rtclock_native.c b/osfmk/i386/rtclock_native.c index 436877ae3..13cde8e79 100644 --- a/osfmk/i386/rtclock_native.c +++ b/osfmk/i386/rtclock_native.c @@ -29,7 +29,6 @@ * @OSF_COPYRIGHT@ */ -#include #include @@ -95,6 +94,13 @@ rtc_lapic_set_timer(uint64_t deadline, uint64_t now) } else { lapic_set_timer(FALSE, one_shot, divide_by_1, 0); } + + KERNEL_DEBUG_CONSTANT( + DECR_SET_APIC_DEADLINE | DBG_FUNC_NONE, + now, deadline, + set, LAPIC_READ(TIMER_CURRENT_COUNT), + 0); + return set; } diff --git a/osfmk/i386/rtclock_protos.h b/osfmk/i386/rtclock_protos.h index b467df170..469a04cf5 100644 --- a/osfmk/i386/rtclock_protos.h +++ b/osfmk/i386/rtclock_protos.h @@ -61,11 +61,13 @@ extern void rtclock_intr(x86_saved_state_t *regs); * Timer control. */ typedef struct { - void (*config)(void); - uint64_t (*set) (uint64_t, uint64_t); + void (*rtc_config)(void); + uint64_t (*rtc_set) (uint64_t, uint64_t); } rtc_timer_t; extern rtc_timer_t *rtc_timer; extern void rtc_timer_init(void); +extern void rtclock_early_init(void); + #endif /* _I386_RTCLOCK_PROTOS_H_ */ diff --git a/osfmk/i386/simple_lock.h b/osfmk/i386/simple_lock.h index 563c17739..ce1708ab9 100644 --- a/osfmk/i386/simple_lock.h +++ b/osfmk/i386/simple_lock.h @@ -73,6 +73,9 @@ #include #include +extern unsigned int LockTimeOutTSC; /* Lock timeout in TSC ticks */ +extern unsigned int LockTimeOut; /* Lock timeout in absolute time */ + #if MACH_LDEBUG #define USLOCK_DEBUG 1 #else diff --git a/osfmk/i386/startup64.c b/osfmk/i386/startup64.c index e0c687a2b..ddf892451 100644 --- a/osfmk/i386/startup64.c +++ b/osfmk/i386/startup64.c @@ -34,7 +34,6 @@ #include #include -#include #include #include @@ -155,9 +154,6 @@ dump_frame64(x86_saved_state64_t *sp) kprintf("sp->r14: 0x%016llx\n", sp->r14); kprintf("sp->r15: 0x%016llx\n", sp->r15); kprintf("sp->cr2: 0x%016llx\n", sp->cr2); - kprintf("sp->v_arg8: 0x%016llx\n", sp->v_arg8); - kprintf("sp->v_arg7: 0x%016llx\n", sp->v_arg7); - kprintf("sp->v_arg6: 0x%016llx\n", sp->v_arg6); kprintf("sp->r9: 0x%016llx\n", sp->r9); kprintf("sp->r8: 0x%016llx\n", sp->r8); kprintf("sp->r10: 0x%016llx\n", sp->r10); diff --git a/osfmk/i386/thread.h b/osfmk/i386/thread.h index 91d684b8d..b75e36a99 100644 --- a/osfmk/i386/thread.h +++ b/osfmk/i386/thread.h @@ -71,7 +71,7 @@ #include #include -#include +#include #include #include @@ -130,7 +130,7 @@ struct machine_thread { uint32_t specFlags; #define OnProc 0x1 #define CopyIOActive 0x2 /* Checked to ensure DTrace actions do not re-enter copyio(). */ - + uint64_t thread_gpu_ns; #if NCOPY_WINDOWS > 0 struct { user_addr_t user_base; diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index e15f40b05..0cedaa19d 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -93,6 +93,7 @@ #include #endif #include +#include #include @@ -128,6 +129,7 @@ extern boolean_t dtrace_tally_fault(user_addr_t); #endif extern boolean_t pmap_smep_enabled; +extern boolean_t pmap_smap_enabled; void thread_syscall_return( @@ -256,9 +258,6 @@ kprint_state(x86_saved_state64_t *saved_state) kprintf(" r10 0x%llx\n", saved_state->r10); kprintf(" r8 0x%llx\n", saved_state->r8); kprintf(" r9 0x%llx\n", saved_state->r9); - kprintf(" v_arg6 0x%llx\n", saved_state->v_arg6); - kprintf(" v_arg7 0x%llx\n", saved_state->v_arg7); - kprintf(" v_arg8 0x%llx\n", saved_state->v_arg8); kprintf(" cr2 0x%llx\n", saved_state->cr2); kprintf("real cr2 0x%lx\n", get_cr2()); @@ -408,8 +407,9 @@ interrupt(x86_saved_state_t *state) * Handle local APIC interrupts * else call platform expert for devices. */ - if (!lapic_interrupt(interrupt_num, state)) + if (!lapic_interrupt(interrupt_num, state)) { PE_incoming_interrupt(interrupt_num); + } if (__improbable(get_preemption_level() != ipl)) { panic("Preemption level altered by interrupt vector 0x%x: initial 0x%x, final: 0x%x\n", interrupt_num, ipl, get_preemption_level()); @@ -469,6 +469,9 @@ interrupt(x86_saved_state_t *state) } } + if (cnum == master_cpu) + ml_entropy_collect(); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, interrupt_num, 0, 0, 0, 0); @@ -539,20 +542,11 @@ kernel_trap( } else *myast &= ~AST_CHUD_ALL; - /* - * Is there a hook? - */ - perfCallback fn = perfTrapHook; - if (__improbable(fn != NULL)) { - if (fn(type, NULL, 0, 0) == KERN_SUCCESS) { - /* - * If it succeeds, we are done... - */ - return; - } - } #if CONFIG_DTRACE + /* + * Is there a DTrace hook? + */ if (__improbable(tempDTraceTrapHook != NULL)) { if (tempDTraceTrapHook(type, state, lo_spp, 0) == KERN_SUCCESS) { /* @@ -642,6 +636,7 @@ kernel_trap( set_cr3_raw(map->pmap->pm_cr3); return; } + } #endif } @@ -707,10 +702,8 @@ kernel_trap( if (code & T_PF_WRITE) prot |= VM_PROT_WRITE; -#if PAE if (code & T_PF_EXECUTE) prot |= VM_PROT_EXECUTE; -#endif result = vm_fault(map, vm_map_trunc_page(vaddr, @@ -843,7 +836,7 @@ panic_trap(x86_saved_state64_t *regs) "R8: 0x%016llx, R9: 0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n" "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n" "RFL: 0x%016llx, RIP: 0x%016llx, CS: 0x%016llx, SS: 0x%016llx\n" - "Fault CR2: 0x%016llx, Error code: 0x%016llx, Fault CPU: 0x%x%s%s%s\n", + "Fault CR2: 0x%016llx, Error code: 0x%016llx, Fault CPU: 0x%x%s%s%s%s\n", regs->isf.rip, regs->isf.trapno, trapname, cr0, cr2, cr3, cr4, regs->rax, regs->rbx, regs->rcx, regs->rdx, @@ -854,7 +847,8 @@ panic_trap(x86_saved_state64_t *regs) regs->isf.ss & 0xFFFF,regs->cr2, regs->isf.err, regs->isf.cpu, virtualized ? " VMM" : "", potential_kernel_NX_fault ? " Kernel NX fault" : "", - potential_smep_fault ? " SMEP/User NX fault" : ""); + potential_smep_fault ? " SMEP/User NX fault" : "", + ""); /* * This next statement is not executed, * but it's needed to stop the compiler using tail call optimization @@ -956,11 +950,14 @@ user_trap( return; /* If it succeeds, we are done... */ } +#if CONFIG_DTRACE /* * DTrace does not consume all user traps, only INT_3's for now. * Avoid needlessly calling tempDTraceTrapHook here, and let the * INT_3 case handle them. */ +#endif + DEBUG_KPRINT_SYSCALL_MASK(1, "user_trap: type=0x%x(%s) err=0x%x cr2=%p rip=%p\n", type, trap_type[type], err, (void *)(long) vaddr, (void *)(long) rip); @@ -1075,10 +1072,8 @@ user_trap( if (err & T_PF_WRITE) prot |= VM_PROT_WRITE; -#if PAE if (__improbable(err & T_PF_EXECUTE)) prot |= VM_PROT_EXECUTE; -#endif kret = vm_fault(thread->map, vm_map_trunc_page(vaddr, PAGE_MASK), @@ -1177,10 +1172,11 @@ i386_exception( } -/* Synchronize a thread's i386_kernel_state (if any) with the given - * i386_saved_state_t obtained from the trap/IPI handler; called in +/* Synchronize a thread's x86_kernel_state (if any) with the given + * x86_saved_state_t obtained from the trap/IPI handler; called in * kernel_trap() prior to entering the debugger, and when receiving - * an "MP_KDP" IPI. + * an "MP_KDP" IPI. Called with null saved_state if an incoming IPI + * was detected from the kernel while spinning with interrupts masked. */ void @@ -1191,7 +1187,7 @@ sync_iss_to_iks(x86_saved_state_t *saved_state) boolean_t record_active_regs = FALSE; /* The PAL may have a special way to sync registers */ - if( saved_state->flavor == THREAD_STATE_NONE ) + if (saved_state && saved_state->flavor == THREAD_STATE_NONE) pal_get_kern_regs( saved_state ); if ((kstack = current_thread()->kernel_stack) != 0) { @@ -1200,7 +1196,8 @@ sync_iss_to_iks(x86_saved_state_t *saved_state) iks = STACK_IKS(kstack); /* Did we take the trap/interrupt in kernel mode? */ - if (regs == USER_REGS64(current_thread())) + if (saved_state == NULL || /* NULL => polling in kernel */ + regs == USER_REGS64(current_thread())) record_active_regs = TRUE; else { iks->k_rbx = regs->rbx; diff --git a/osfmk/i386/trap.h b/osfmk/i386/trap.h index d5c14fe4f..5eed5e2d1 100644 --- a/osfmk/i386/trap.h +++ b/osfmk/i386/trap.h @@ -109,10 +109,8 @@ #define T_PF_WRITE 0x2 /* write access */ #define T_PF_USER 0x4 /* from user state */ -#ifdef PAE #define T_PF_RSVD 0x8 /* reserved bit set to 1 */ #define T_PF_EXECUTE 0x10 /* instruction fetch when NX */ -#endif #if !defined(ASSEMBLER) && defined(MACH_KERNEL) diff --git a/osfmk/i386/tsc.c b/osfmk/i386/tsc.c index f79a8fcb2..ebf7f5993 100644 --- a/osfmk/i386/tsc.c +++ b/osfmk/i386/tsc.c @@ -35,7 +35,6 @@ * factors needed by other parts of the system. */ -#include #include @@ -180,11 +179,7 @@ tsc_init(void) busFreq = EFI_FSB_frequency(); switch (cpuid_cpufamily()) { - case CPUFAMILY_INTEL_HASWELL: - case CPUFAMILY_INTEL_IVYBRIDGE: - case CPUFAMILY_INTEL_SANDYBRIDGE: - case CPUFAMILY_INTEL_WESTMERE: - case CPUFAMILY_INTEL_NEHALEM: { + default: { uint64_t msr_flex_ratio; uint64_t msr_platform_info; @@ -210,7 +205,8 @@ tsc_init(void) break; } - default: { + case CPUFAMILY_INTEL_MEROM: + case CPUFAMILY_INTEL_PENRYN: { uint64_t prfsts; prfsts = rdmsr64(IA32_PERF_STS); diff --git a/osfmk/i386/vmx/vmx_cpu.c b/osfmk/i386/vmx/vmx_cpu.c index 27c1b3fc9..0b0b36580 100644 --- a/osfmk/i386/vmx/vmx_cpu.c +++ b/osfmk/i386/vmx/vmx_cpu.c @@ -41,7 +41,9 @@ int vmx_use_count = 0; boolean_t vmx_exclusive = FALSE; -decl_simple_lock_data(static,vmx_use_count_lock) + +lck_grp_t *vmx_lck_grp = NULL; +lck_mtx_t *vmx_lck_mtx = NULL; /* ----------------------------------------------------------------------------- vmx_is_available() @@ -64,6 +66,7 @@ vmxon_is_enabled(void) (rdmsr64(MSR_IA32_FEATURE_CONTROL) & MSR_IA32_FEATCTL_VMXON)); } +#if MACH_ASSERT /* ----------------------------------------------------------------------------- vmx_is_cr0_valid() Is CR0 valid for executing VMXON on this CPU? @@ -86,8 +89,10 @@ vmx_is_cr4_valid(vmx_specs_t *specs) return (0 == ((~cr4 & specs->cr4_fixed_0)|(cr4 & ~specs->cr4_fixed_1))); } +#endif + static void -vmx_init(void) +vmx_enable(void) { uint64_t msr_image; @@ -104,6 +109,18 @@ vmx_init(void) (msr_image | MSR_IA32_FEATCTL_VMXON | MSR_IA32_FEATCTL_LOCK)); + + set_cr4(get_cr4() | CR4_VMXE); +} + +void +vmx_init() +{ + vmx_lck_grp = lck_grp_alloc_init("vmx", LCK_GRP_ATTR_NULL); + assert(vmx_lck_grp); + + vmx_lck_mtx = lck_mtx_alloc_init(vmx_lck_grp, LCK_ATTR_NULL); + assert(vmx_lck_mtx); } /* ----------------------------------------------------------------------------- @@ -114,20 +131,13 @@ vmx_init(void) the remainder of the vmx_specs_t uninitialized. -------------------------------------------------------------------------- */ void -vmx_get_specs() +vmx_cpu_init() { vmx_specs_t *specs = ¤t_cpu_datap()->cpu_vmx.specs; - uint64_t msr_image; - - /* this is called once for every CPU, but the lock doesn't care :-) */ - simple_lock_init(&vmx_use_count_lock, 0); - vmx_init(); + vmx_enable(); - /* - * if we have read the data on boot, we won't read it - * again on wakeup, otherwise *bad* things will happen - */ + /* if we have read the data on boot, we won't read it again on wakeup */ if (specs->initialized) return; else @@ -138,50 +148,16 @@ vmx_get_specs() if (!specs->vmx_present) return; -#define bitfield(x,f) ((x >> f##_BIT) & f##_MASK) - /* Obtain and decode VMX general capabilities */ - msr_image = rdmsr64(MSR_IA32_VMX_BASIC); - specs->vmcs_id = (uint32_t)(msr_image & VMX_VCR_VMCS_REV_ID); - specs->vmcs_mem_type = bitfield(msr_image, VMX_VCR_VMCS_MEM_TYPE) != 0; - specs->vmcs_size = bitfield(msr_image, VMX_VCR_VMCS_SIZE); - - /* Obtain allowed settings for pin-based execution controls */ - msr_image = rdmsr64(MSR_IA32_VMXPINBASED_CTLS); - specs->pin_exctls_0 = (uint32_t)(msr_image & 0xFFFFFFFF); - specs->pin_exctls_1 = (uint32_t)(msr_image >> 32); - - /* Obtain allowed settings for processor-based execution controls */ - msr_image = rdmsr64(MSR_IA32_PROCBASED_CTLS); - specs->proc_exctls_0 = (uint32_t)(msr_image & 0xFFFFFFFF); - specs->proc_exctls_1 = (uint32_t)(msr_image >> 32); - - /* Obtain allowed settings for VM-exit controls */ - msr_image = rdmsr64(MSR_IA32_VMX_EXIT_CTLS); - specs->exit_ctls_0 = (uint32_t)(msr_image & 0xFFFFFFFF); - specs->exit_ctls_1 = (uint32_t)(msr_image >> 32); - - /* Obtain allowed settings for VM-entry controls */ - msr_image = rdmsr64(MSR_IA32_VMX_ENTRY_CTLS); - specs->enter_ctls_0 = (uint32_t)(msr_image & 0xFFFFFFFF); - specs->enter_ctls_0 = (uint32_t)(msr_image >> 32); - - /* Obtain and decode miscellaneous capabilities */ - msr_image = rdmsr64(MSR_IA32_VMX_MISC); - specs->act_halt = bitfield(msr_image, VMX_VCR_ACT_HLT) != 0; - specs->act_shutdown = bitfield(msr_image, VMX_VCR_ACT_SHUTDOWN) != 0; - specs->act_SIPI = bitfield(msr_image, VMX_VCR_ACT_SIPI) != 0; - specs->act_CSTATE = bitfield(msr_image, VMX_VCR_ACT_CSTATE) != 0; - specs->cr3_targs = bitfield(msr_image, VMX_VCR_CR3_TARGS); - specs->max_msrs = (uint32_t)(512 * (1 + bitfield(msr_image, VMX_VCR_MAX_MSRS))); - specs->mseg_id = (uint32_t)bitfield(msr_image, VMX_VCR_MSEG_ID); - +#define rdmsr_mask(msr, mask) (uint32_t)(rdmsr64(msr) & (mask)) + specs->vmcs_id = rdmsr_mask(MSR_IA32_VMX_BASIC, VMX_VCR_VMCS_REV_ID); + /* Obtain VMX-fixed bits in CR0 */ - specs->cr0_fixed_0 = (uint32_t)rdmsr64(MSR_IA32_VMX_CR0_FIXED0) & 0xFFFFFFFF; - specs->cr0_fixed_1 = (uint32_t)rdmsr64(MSR_IA32_VMX_CR0_FIXED1) & 0xFFFFFFFF; + specs->cr0_fixed_0 = rdmsr_mask(MSR_IA32_VMX_CR0_FIXED0, 0xFFFFFFFF); + specs->cr0_fixed_1 = rdmsr_mask(MSR_IA32_VMX_CR0_FIXED1, 0xFFFFFFFF); /* Obtain VMX-fixed bits in CR4 */ - specs->cr4_fixed_0 = (uint32_t)rdmsr64(MSR_IA32_VMX_CR4_FIXED0) & 0xFFFFFFFF; - specs->cr4_fixed_1 = (uint32_t)rdmsr64(MSR_IA32_VMX_CR4_FIXED1) & 0xFFFFFFFF; + specs->cr4_fixed_0 = rdmsr_mask(MSR_IA32_VMX_CR4_FIXED0, 0xFFFFFFFF); + specs->cr4_fixed_1 = rdmsr_mask(MSR_IA32_VMX_CR4_FIXED1, 0xFFFFFFFF); } /* ----------------------------------------------------------------------------- @@ -195,8 +171,6 @@ vmx_on(void *arg __unused) addr64_t vmxon_region_paddr; int result; - vmx_init(); - assert(cpu->specs.vmx_present); if (NULL == cpu->vmxon_region) @@ -206,15 +180,17 @@ vmx_on(void *arg __unused) /* * Enable VMX operation. */ - set_cr4(get_cr4() | CR4_VMXE); - - assert(vmx_is_cr0_valid(&cpu->specs)); - assert(vmx_is_cr4_valid(&cpu->specs)); + if (FALSE == cpu->specs.vmx_on) { + assert(vmx_is_cr0_valid(&cpu->specs)); + assert(vmx_is_cr4_valid(&cpu->specs)); - result = __vmxon(vmxon_region_paddr); + result = __vmxon(vmxon_region_paddr); + + if (result != VMX_SUCCEED) { + panic("vmx_on: unexpected return %d from __vmxon()", result); + } - if (result != VMX_SUCCEED) { - panic("vmx_on: unexpected return %d from __vmxon()", result); + cpu->specs.vmx_on = TRUE; } } @@ -225,16 +201,19 @@ vmx_on(void *arg __unused) static void vmx_off(void *arg __unused) { + vmx_cpu_t *cpu = ¤t_cpu_datap()->cpu_vmx; int result; - /* Tell the CPU to release the VMXON region */ - result = __vmxoff(); + if (TRUE == cpu->specs.vmx_on) { + /* Tell the CPU to release the VMXON region */ + result = __vmxoff(); - if (result != VMX_SUCCEED) { - panic("vmx_off: unexpected return %d from __vmxoff()", result); + if (result != VMX_SUCCEED) { + panic("vmx_off: unexpected return %d from __vmxoff()", result); + } + + cpu->specs.vmx_on = FALSE; } - - set_cr4(get_cr4() & ~CR4_VMXE); } /* ----------------------------------------------------------------------------- @@ -282,10 +261,10 @@ static boolean_t vmx_globally_available(void) { unsigned int i; - + unsigned int ncpus = ml_get_max_cpus(); boolean_t available = TRUE; - for (i=0; icpu_vmx; if (!cpu->specs.vmx_present) @@ -304,31 +283,33 @@ int host_vmxon(boolean_t exclusive) { int error; - boolean_t do_it = FALSE; /* do the cpu sync outside of the area holding the lock */ + + assert(0 == get_preemption_level()); if (!vmx_globally_available()) return VMX_UNSUPPORTED; - simple_lock(&vmx_use_count_lock); + lck_mtx_lock(vmx_lck_mtx); - if (vmx_exclusive) { + if (vmx_exclusive || (exclusive && vmx_use_count)) { error = VMX_INUSE; } else { - vmx_use_count++; - if (vmx_use_count == 1) /* was turned off before */ - do_it = TRUE; - vmx_exclusive = exclusive; + if (0 == vmx_use_count) { + vmx_allocate_vmxon_regions(); + vmx_exclusive = exclusive; + vmx_use_count = 1; + mp_cpus_call(CPUMASK_ALL, ASYNC, vmx_on, NULL); + + } else { + vmx_use_count++; + } VMX_KPRINTF("VMX use count: %d\n", vmx_use_count); error = VMX_OK; } - simple_unlock(&vmx_use_count_lock); + lck_mtx_unlock(vmx_lck_mtx); - if (do_it) { - vmx_allocate_vmxon_regions(); - mp_rendezvous(NULL, vmx_on, NULL, NULL); - } return error; } @@ -339,24 +320,21 @@ host_vmxon(boolean_t exclusive) void host_vmxoff() { - boolean_t do_it = FALSE; /* do the cpu sync outside of the area holding the lock */ + assert(0 == get_preemption_level()); - simple_lock(&vmx_use_count_lock); + lck_mtx_lock(vmx_lck_mtx); - if (vmx_use_count) { - vmx_use_count--; + if (1 == vmx_use_count) { vmx_exclusive = FALSE; - if (!vmx_use_count) - do_it = TRUE; - } - - simple_unlock(&vmx_use_count_lock); - - if (do_it) { - mp_rendezvous(NULL, vmx_off, NULL, NULL); + vmx_use_count = 0; + mp_cpus_call(CPUMASK_ALL, ASYNC, vmx_off, NULL); vmx_free_vmxon_regions(); + } else { + vmx_use_count--; } + lck_mtx_unlock(vmx_lck_mtx); + VMX_KPRINTF("VMX use count: %d\n", vmx_use_count); } @@ -369,6 +347,7 @@ void vmx_suspend() { VMX_KPRINTF("vmx_suspend\n"); + if (vmx_use_count) vmx_off(NULL); } @@ -381,7 +360,33 @@ void vmx_resume() { VMX_KPRINTF("vmx_resume\n"); - vmx_init(); /* init VMX on CPU #0 */ + + vmx_enable(); + if (vmx_use_count) vmx_on(NULL); } + +/* ----------------------------------------------------------------------------- + vmx_hv_support() + Determine if the VMX feature set is sufficent for kernel HV support. + -------------------------------------------------------------------------- */ +boolean_t +vmx_hv_support() +{ + if (!vmx_is_available()) + return FALSE; + +#define CHK(msr, shift, mask) if (!VMX_CAP(msr, shift, mask)) return FALSE; + + /* 'EPT' and 'Unrestricted Mode' are part of the secondary processor-based + * VM-execution controls */ + CHK(MSR_IA32_VMX_BASIC, 0, VMX_BASIC_TRUE_CTLS) + CHK(MSR_IA32_VMX_TRUE_PROCBASED_CTLS, 32, VMX_TRUE_PROCBASED_SECONDARY_CTLS) + + /* if we have these, check for 'EPT' and 'Unrestricted Mode' */ + CHK(MSR_IA32_VMX_PROCBASED_CTLS2, 32, VMX_PROCBASED_CTLS2_EPT) + CHK(MSR_IA32_VMX_PROCBASED_CTLS2, 32, VMX_PROCBASED_CTLS2_UNRESTRICTED) + + return TRUE; +} diff --git a/osfmk/i386/vmx/vmx_cpu.h b/osfmk/i386/vmx/vmx_cpu.h index bb9f5ad51..2abe13d50 100644 --- a/osfmk/i386/vmx/vmx_cpu.h +++ b/osfmk/i386/vmx/vmx_cpu.h @@ -40,37 +40,8 @@ typedef struct vmx_specs { boolean_t initialized; /* the specs have already been read */ boolean_t vmx_present; /* VMX feature available and enabled */ - uint32_t vmcs_id; /* VMCS revision identifier */ - uint8_t vmcs_mem_type; /* VMCS memory type, (see enum above) */ - uint16_t vmcs_size; /* VMCS region size in bytes */ - boolean_t act_halt; /* HLT activity state supported */ - boolean_t act_shutdown; /* shutdown activity state supported */ - boolean_t act_SIPI; /* wait-for-SIPI activity supported */ - boolean_t act_CSTATE; /* C-state activity state supported */ - uint8_t cr3_targs; /* CR3 target values supported */ - uint32_t max_msrs; /* max MSRs to load/store on VMX transition */ - uint32_t mseg_id; /* MSEG revision identifier for SMI */ - /* - * Allowed settings for these controls are specified by - * a pair of bitfields: 0-settings contain 0 bits - * corresponding to controls thay may be 0; 1-settings - * contain 1 bits corresponding to controls that may be 1. - */ - uint32_t pin_exctls_0; /* allowed 0s pin-based controls */ - uint32_t pin_exctls_1; /* allowed 1s pin-based controls */ - - uint32_t proc_exctls_0; /* allowed 0s proc-based controls */ - uint32_t proc_exctls_1; /* allowed 1s proc-based controls */ - - uint32_t sec_exctls_0; /* allowed 0s 2ndary proc-based ctrls */ - uint32_t sec_exctls_1; /* allowed 1s 2ndary proc-based ctrls */ - - uint32_t exit_ctls_0; /* allowed 0s VM-exit controls */ - uint32_t exit_ctls_1; /* allowed 1s VM-exit controls */ - - uint32_t enter_ctls_0; /* allowed 0s VM-entry controls */ - uint32_t enter_ctls_1; /* allowed 1s VM-entry controls */ - + boolean_t vmx_on; /* VMX is active */ + uint32_t vmcs_id; /* VMCS revision identifier */ /* * Fixed control register bits are specified by a pair of * bitfields: 0-settings contain 0 bits corresponding to @@ -89,10 +60,20 @@ typedef struct vmx_cpu { void *vmxon_region; /* the logical address of the VMXON region page */ } vmx_cpu_t; -void vmx_get_specs(void); +void vmx_init(void); +void vmx_cpu_init(void); void vmx_resume(void); void vmx_suspend(void); +#define VMX_BASIC_TRUE_CTLS (1ull << 55) +#define VMX_TRUE_PROCBASED_SECONDARY_CTLS (1ull << 31) +#define VMX_PROCBASED_CTLS2_EPT (1ull << 1) +#define VMX_PROCBASED_CTLS2_UNRESTRICTED (1ull << 7) + +#define VMX_CAP(msr, shift, mask) (rdmsr64(msr) & ((mask) << (shift))) + +boolean_t vmx_hv_support(void); + /* * __vmxoff -- Leave VMX Operation * diff --git a/osfmk/i386/xpr.h b/osfmk/i386/xpr.h index 9221068a7..9ffc12c8f 100644 --- a/osfmk/i386/xpr.h +++ b/osfmk/i386/xpr.h @@ -62,8 +62,5 @@ * Machine dependent module for the XPR tracing facility. */ -#include -#include - #define XPR_TIMESTAMP (0) diff --git a/osfmk/ipc/Makefile b/osfmk/ipc/Makefile index a8dcf06ad..8aa306ba6 100644 --- a/osfmk/ipc/Makefile +++ b/osfmk/ipc/Makefile @@ -13,7 +13,8 @@ EXPORT_ONLY_FILES = \ ipc_types.h EXPORT_PRIVATE_FILES = \ - ipc_port.h + ipc_port.h \ + ipc_voucher.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/osfmk/ipc/ipc_entry.c b/osfmk/ipc/ipc_entry.c index 80eb3d662..aabb15ada 100644 --- a/osfmk/ipc/ipc_entry.c +++ b/osfmk/ipc/ipc_entry.c @@ -99,7 +99,6 @@ ipc_entry_lookup( assert(is_active(space)); - index = MACH_PORT_INDEX(name); if (index < space->is_table_size) { entry = &space->is_table[index]; @@ -115,69 +114,127 @@ ipc_entry_lookup( return entry; } + /* - * Routine: ipc_entry_get + * Routine: ipc_entries_hold * Purpose: - * Tries to allocate an entry out of the space. + * Verifies that there are at least 'entries_needed' + * free list members * Conditions: * The space is write-locked and active throughout. * An object may be locked. Will not allocate memory. * Returns: - * KERN_SUCCESS A free entry was found. + * KERN_SUCCESS Free entries were found. * KERN_NO_SPACE No entry allocated. */ kern_return_t -ipc_entry_get( +ipc_entries_hold( + ipc_space_t space, + uint32_t entries_needed) +{ + + ipc_entry_t table; + mach_port_index_t next_free = 0; + uint32_t i; + + assert(is_active(space)); + + table = &space->is_table[0]; + + for (i = 0; i < entries_needed; i++) { + next_free = table[next_free].ie_next; + if (next_free == 0) { + return KERN_NO_SPACE; + } + assert(next_free < space->is_table_size); + assert(table[next_free].ie_object == IO_NULL); + } + return KERN_SUCCESS; +} + +/* + * Routine: ipc_entry_claim + * Purpose: + * Take formal ownership of a held entry. + * Conditions: + * The space is write-locked and active throughout. + * An object may be locked. Will not allocate memory. + * + * Note: The returned entry must be marked as modified before + * releasing the space lock + */ + +kern_return_t +ipc_entry_claim( ipc_space_t space, mach_port_name_t *namep, ipc_entry_t *entryp) { + ipc_entry_t entry; ipc_entry_t table; mach_port_index_t first_free; - ipc_entry_t free_entry; + mach_port_gen_t gen; + mach_port_name_t new_name; - assert(is_active(space)); + table = &space->is_table[0]; - { - table = space->is_table; - first_free = table->ie_next; + first_free = table->ie_next; + assert(first_free != 0); - if (first_free == 0) - return KERN_NO_SPACE; + entry = &table[first_free]; + table->ie_next = entry->ie_next; + space->is_table_free--; - assert(first_free < space->is_table_size); - free_entry = &table[first_free]; - table->ie_next = free_entry->ie_next; - } + assert(table->ie_next < space->is_table_size); /* * Initialize the new entry. We need only * increment the generation number and clear ie_request. */ - { - mach_port_name_t new_name; - mach_port_gen_t gen; + gen = IE_BITS_NEW_GEN(entry->ie_bits); + entry->ie_bits = gen; + entry->ie_request = IE_REQ_NONE; - gen = IE_BITS_NEW_GEN(free_entry->ie_bits); - free_entry->ie_bits = gen; - free_entry->ie_request = IE_REQ_NONE; + /* + * The new name can't be MACH_PORT_NULL because index + * is non-zero. It can't be MACH_PORT_DEAD because + * the table isn't allowed to grow big enough. + * (See comment in ipc/ipc_table.h.) + */ + new_name = MACH_PORT_MAKE(first_free, gen); + assert(MACH_PORT_VALID(new_name)); + *namep = new_name; + *entryp = entry; - /* - * The new name can't be MACH_PORT_NULL because index - * is non-zero. It can't be MACH_PORT_DEAD because - * the table isn't allowed to grow big enough. - * (See comment in ipc/ipc_table.h.) - */ - new_name = MACH_PORT_MAKE(first_free, gen); - assert(MACH_PORT_VALID(new_name)); - *namep = new_name; - } + return KERN_SUCCESS; +} - assert(free_entry->ie_object == IO_NULL); +/* + * Routine: ipc_entry_get + * Purpose: + * Tries to allocate an entry out of the space. + * Conditions: + * The space is write-locked and active throughout. + * An object may be locked. Will not allocate memory. + * Returns: + * KERN_SUCCESS A free entry was found. + * KERN_NO_SPACE No entry allocated. + */ - *entryp = free_entry; - return KERN_SUCCESS; +kern_return_t +ipc_entry_get( + ipc_space_t space, + mach_port_name_t *namep, + ipc_entry_t *entryp) +{ + kern_return_t kr; + + kr = ipc_entries_hold(space, 1); + if (KERN_SUCCESS != kr) + return kr; + + return ipc_entry_claim(space, namep, entryp); } /* @@ -306,7 +363,8 @@ ipc_entry_alloc_name( table[free_index].ie_next = table[next_index].ie_next; - + space->is_table_free--; + /* mark the previous entry modified - reconstructing the name */ ipc_entry_modified(space, MACH_PORT_MAKE(free_index, @@ -376,6 +434,7 @@ ipc_entry_dealloc( entry->ie_bits &= IE_BITS_GEN_MASK; entry->ie_next = table->ie_next; table->ie_next = index; + space->is_table_free++; } else { /* * Nothing to do. The entry does not match @@ -675,6 +734,7 @@ ipc_entry_grow_table( space->is_table = table; space->is_table_size = size; space->is_table_next = nits; + space->is_table_free += size - osize; is_done_growing(space); is_write_unlock(space); diff --git a/osfmk/ipc/ipc_entry.h b/osfmk/ipc/ipc_entry.h index 592a31c73..4a34f110d 100644 --- a/osfmk/ipc/ipc_entry.h +++ b/osfmk/ipc/ipc_entry.h @@ -139,6 +139,17 @@ extern ipc_entry_t ipc_entry_lookup( ipc_space_t space, mach_port_name_t name); +/* Hold a number of entries in a locked space */ +extern kern_return_t ipc_entries_hold( + ipc_space_t space, + natural_t count); + +/* claim and initialize a held entry in a locked space */ +extern kern_return_t ipc_entry_claim( + ipc_space_t space, + mach_port_name_t *namep, + ipc_entry_t *entryp); + /* Allocate an entry in a space */ extern kern_return_t ipc_entry_get( ipc_space_t space, diff --git a/osfmk/ipc/ipc_hash.c b/osfmk/ipc/ipc_hash.c index 491111b51..a8f79a647 100644 --- a/osfmk/ipc/ipc_hash.c +++ b/osfmk/ipc/ipc_hash.c @@ -65,7 +65,6 @@ #include #include -#include #include #include #include diff --git a/osfmk/ipc/ipc_importance.c b/osfmk/ipc/ipc_importance.c new file mode 100644 index 000000000..a8aa9c400 --- /dev/null +++ b/osfmk/ipc/ipc_importance.c @@ -0,0 +1,3266 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +extern int proc_pid(void *); +extern int proc_selfpid(void); +extern uint64_t proc_uniqueid(void *p); +extern char *proc_name_address(void *p); + +/* + * Globals for delayed boost drop processing. + */ +static queue_head_t ipc_importance_delayed_drop_queue; +static thread_call_t ipc_importance_delayed_drop_call; +static uint64_t ipc_importance_delayed_drop_timestamp; +static boolean_t ipc_importance_delayed_drop_call_requested = FALSE; + +#define DENAP_DROP_TARGET (1000 * NSEC_PER_MSEC) /* optimum denap delay */ +#define DENAP_DROP_SKEW (100 * NSEC_PER_MSEC) /* request skew for wakeup */ +#define DENAP_DROP_LEEWAY (2 * DENAP_DROP_SKEW) /* specified wakeup leeway */ + +#define DENAP_DROP_DELAY (DENAP_DROP_TARGET + DENAP_DROP_SKEW) +#define DENAP_DROP_FLAGS (THREAD_CALL_DELAY_SYS_NORMAL | THREAD_CALL_DELAY_LEEWAY) + +/* + * Importance Voucher Attribute Manager + */ + +static lck_spin_t ipc_importance_lock_data; /* single lock for now */ + + +#define ipc_importance_lock_init() \ + lck_spin_init(&ipc_importance_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define ipc_importance_lock_destroy() \ + lck_spin_destroy(&ipc_importance_lock_data, &ipc_lck_grp) +#define ipc_importance_lock() \ + lck_spin_lock(&ipc_importance_lock_data) +#define ipc_importance_lock_try() \ + lck_spin_try_lock(&ipc_importance_lock_data) +#define ipc_importance_unlock() \ + lck_spin_unlock(&ipc_importance_lock_data) +#define ipc_importance_sleep(elem) lck_spin_sleep(&ipc_importance_lock_data, \ + LCK_SLEEP_DEFAULT, \ + (event_t)(elem), \ + THREAD_UNINT) +#define ipc_importance_wakeup(elem) thread_wakeup((event_t)(elem)) + +#if IIE_REF_DEBUG +#define incr_ref_counter(x) (hw_atomic_add(&(x), 1)) + +static inline +uint32_t ipc_importance_reference_internal(ipc_importance_elem_t elem) +{ + incr_ref_counter(elem->iie_refs_added); + return (hw_atomic_add(&elem->iie_bits, 1) & IIE_REFS_MASK); +} + +static inline +uint32_t ipc_importance_release_internal(ipc_importance_elem_t elem) +{ + incr_ref_counter(elem->iie_refs_dropped); + return (hw_atomic_sub(&elem->iie_bits, 1) & IIE_REFS_MASK); +} + +static inline +uint32_t ipc_importance_task_reference_internal(ipc_importance_task_t task_imp) +{ + uint32_t out; + out = ipc_importance_reference_internal(&task_imp->iit_elem); + incr_ref_counter(task_imp->iit_elem.iie_task_refs_added); + return out; +} + +static inline +uint32_t ipc_importance_task_release_internal(ipc_importance_task_t task_imp) +{ + uint32_t out; + + assert(1 < IIT_REFS(task_imp)); + incr_ref_counter(task_imp->iit_elem.iie_task_refs_dropped); + out = ipc_importance_release_internal(&task_imp->iit_elem); + return out; +} + +static inline +void ipc_importance_counter_init(ipc_importance_elem_t elem) +{ + + elem->iie_refs_added = 0; + elem->iie_refs_dropped = 0; + elem->iie_kmsg_refs_added = 0; + elem->iie_kmsg_refs_inherited = 0; + elem->iie_kmsg_refs_coalesced = 0; + elem->iie_kmsg_refs_dropped = 0; + elem->iie_task_refs_added = 0; + elem->iie_task_refs_added_inherit_from = 0; + elem->iie_task_refs_added_transition = 0; + elem->iie_task_refs_self_added = 0; + elem->iie_task_refs_inherited = 0; + elem->iie_task_refs_coalesced = 0; + elem->iie_task_refs_dropped = 0; +} +#else +#define incr_ref_counter(x) +#endif + +#if DEVELOPMENT || DEBUG +static queue_head_t global_iit_alloc_queue; +#endif + +/* TODO: remove this varibale when interactive daemon audit is complete */ +boolean_t ipc_importance_interactive_receiver = FALSE; + +static zone_t ipc_importance_task_zone; +static zone_t ipc_importance_inherit_zone; + +static ipc_voucher_attr_control_t ipc_importance_control; + +/* + * Routine: ipc_importance_kmsg_link + * Purpose: + * Link the kmsg onto the appropriate propagation chain. + * If the element is a task importance, we link directly + * on its propagation chain. Otherwise, we link onto the + * destination task of the inherit. + * Conditions: + * Importance lock held. + * Caller is donating an importance elem reference to the kmsg. + */ +static void +ipc_importance_kmsg_link( + ipc_kmsg_t kmsg, + ipc_importance_elem_t elem) +{ + ipc_importance_elem_t link_elem; + + assert(IIE_NULL == kmsg->ikm_importance); + + link_elem = (IIE_TYPE_INHERIT == IIE_TYPE(elem)) ? + (ipc_importance_elem_t)((ipc_importance_inherit_t)elem)->iii_to_task : + elem; + + queue_enter(&link_elem->iie_kmsgs, kmsg, ipc_kmsg_t, ikm_inheritance); + kmsg->ikm_importance = elem; +} + +/* + * Routine: ipc_importance_kmsg_unlink + * Purpose: + * Unlink the kmsg from its current propagation chain. + * If the element is a task importance, we unlink directly + * from its propagation chain. Otherwise, we unlink from the + * destination task of the inherit. + * Returns: + * The reference to the importance element it was linked on. + * Conditions: + * Importance lock held. + * Caller is responsible for dropping reference on returned elem. + */ +static ipc_importance_elem_t +ipc_importance_kmsg_unlink( + ipc_kmsg_t kmsg) +{ + ipc_importance_elem_t elem = kmsg->ikm_importance; + + if (IIE_NULL != elem) { + ipc_importance_elem_t unlink_elem; + + unlink_elem = (IIE_TYPE_INHERIT == IIE_TYPE(elem)) ? + (ipc_importance_elem_t)((ipc_importance_inherit_t)elem)->iii_to_task : + elem; + + queue_remove(&unlink_elem->iie_kmsgs, kmsg, ipc_kmsg_t, ikm_inheritance); + kmsg->ikm_importance = IIE_NULL; + } + return elem; +} + +/* + * Routine: ipc_importance_inherit_link + * Purpose: + * Link the inherit onto the appropriate propagation chain. + * If the element is a task importance, we link directly + * on its propagation chain. Otherwise, we link onto the + * destination task of the inherit. + * Conditions: + * Importance lock held. + * Caller is donating an elem importance reference to the inherit. + */ +static void +ipc_importance_inherit_link( + ipc_importance_inherit_t inherit, + ipc_importance_elem_t elem) +{ + ipc_importance_elem_t link_elem; + + assert(IIE_NULL == inherit->iii_from_elem); + link_elem = (IIE_TYPE_INHERIT == IIE_TYPE(elem)) ? + (ipc_importance_elem_t)((ipc_importance_inherit_t)elem)->iii_to_task : + elem; + + queue_enter(&link_elem->iie_inherits, inherit, + ipc_importance_inherit_t, iii_inheritance); + inherit->iii_from_elem = elem; +} + +/* + * Routine: ipc_importance_inherit_unlink + * Purpose: + * Unlink the inherit from its current propagation chain. + * If the element is a task importance, we unlink directly + * from its propagation chain. Otherwise, we unlink from the + * destination task of the inherit. + * Returns: + * The reference to the importance element it was linked on. + * Conditions: + * Importance lock held. + * Caller is responsible for dropping reference on returned elem. + */ +static ipc_importance_elem_t +ipc_importance_inherit_unlink( + ipc_importance_inherit_t inherit) +{ + ipc_importance_elem_t elem = inherit->iii_from_elem; + + if (IIE_NULL != elem) { + ipc_importance_elem_t unlink_elem; + + unlink_elem = (IIE_TYPE_INHERIT == IIE_TYPE(elem)) ? + (ipc_importance_elem_t)((ipc_importance_inherit_t)elem)->iii_to_task : + elem; + + queue_remove(&unlink_elem->iie_inherits, inherit, + ipc_importance_inherit_t, iii_inheritance); + inherit->iii_from_elem = IIE_NULL; + } + return elem; +} + +/* + * Routine: ipc_importance_reference + * Purpose: + * Add a reference to the importance element. + * Conditions: + * Caller must hold a reference on the element. + */ +void +ipc_importance_reference(ipc_importance_elem_t elem) +{ + assert(0 < IIE_REFS(elem)); + ipc_importance_reference_internal(elem); +} + +/* + * Routine: ipc_importance_release_locked + * Purpose: + * Release a reference on an importance attribute value, + * unlinking and deallocating the attribute if the last reference. + * Conditions: + * Entered with importance lock held, leaves with it unlocked. + */ +static void +ipc_importance_release_locked(ipc_importance_elem_t elem) +{ + assert(0 < IIE_REFS(elem)); + + if (0 < ipc_importance_release_internal(elem)) { + +#if DEVELOPMENT || DEBUG + ipc_importance_inherit_t temp_inherit; + ipc_importance_task_t link_task; + ipc_kmsg_t temp_kmsg; + uint32_t expected = 0; + + if (0 < elem->iie_made) + expected++; + + link_task = (IIE_TYPE_INHERIT == IIE_TYPE(elem)) ? + ((ipc_importance_inherit_t)elem)->iii_to_task : + (ipc_importance_task_t)elem; + + queue_iterate(&link_task->iit_kmsgs, temp_kmsg, ipc_kmsg_t, ikm_inheritance) + if (temp_kmsg->ikm_importance == elem) + expected++; + queue_iterate(&link_task->iit_inherits, temp_inherit, + ipc_importance_inherit_t, iii_inheritance) + if (temp_inherit->iii_from_elem == elem) + expected++; + + if (IIE_REFS(elem) < expected) + panic("ipc_importance_release_locked (%p)", elem); +#endif + ipc_importance_unlock(); + return; + } + + /* last ref */ + /* can't get to no refs if we contribute to something else's importance */ + assert(queue_empty(&elem->iie_kmsgs)); + assert(queue_empty(&elem->iie_inherits)); + + switch (IIE_TYPE(elem)) { + + /* just a "from" task reference to drop */ + case IIE_TYPE_TASK: + { + ipc_importance_task_t task_elem; + + task_elem = (ipc_importance_task_t)elem; + assert(TASK_NULL == task_elem->iit_task); + +#if DEVELOPMENT || DEBUG + queue_remove(&global_iit_alloc_queue, task_elem, ipc_importance_task_t, iit_allocation); +#endif + + ipc_importance_unlock(); + + zfree(ipc_importance_task_zone, task_elem); + break; + } + + /* dropping an inherit element */ + case IIE_TYPE_INHERIT: + { + ipc_importance_inherit_t inherit; + ipc_importance_elem_t from_elem; + ipc_importance_task_t to_task; + + + inherit = (ipc_importance_inherit_t)elem; + to_task = inherit->iii_to_task; + assert(IIT_NULL != to_task); + assert(!inherit->iii_donating); + + /* unlink and release the inherit */ + assert(ipc_importance_task_is_any_receiver_type(to_task)); + from_elem = ipc_importance_inherit_unlink(inherit); + assert(IIE_NULL != from_elem); + ipc_importance_release_locked(from_elem); + /* unlocked on return */ + + ipc_importance_task_release(to_task); + + zfree(ipc_importance_inherit_zone, inherit); + break; + } + } +} + +/* + * Routine: ipc_importance_release + * Purpose: + * Release a reference on an importance attribute value, + * unlinking and deallocating the attribute if the last reference. + * Conditions: + * nothing locked on entrance, nothing locked on exit. + * May block. + */ +void +ipc_importance_release(ipc_importance_elem_t elem) +{ + if (IIE_NULL == elem) + return; + + ipc_importance_lock(); + ipc_importance_release_locked(elem); + /* unlocked */ +} + +/* + * Routine: ipc_importance_task_reference + + + * Purpose: + * Retain a reference on a task importance attribute value. + * Conditions: + * nothing locked on entrance, nothing locked on exit. + * caller holds a reference already. + */ +void +ipc_importance_task_reference(ipc_importance_task_t task_elem) +{ + if (IIT_NULL == task_elem) + return; +#if IIE_REF_DEBUG + incr_ref_counter(task_elem->iit_elem.iie_task_refs_added); +#endif + ipc_importance_reference(&task_elem->iit_elem); +} + +/* + * Routine: ipc_importance_task_release + * Purpose: + * Release a reference on a task importance attribute value, + * unlinking and deallocating the attribute if the last reference. + * Conditions: + * nothing locked on entrance, nothing locked on exit. + * May block. + */ +void +ipc_importance_task_release(ipc_importance_task_t task_elem) +{ + if (IIT_NULL == task_elem) + return; + + ipc_importance_lock(); +#if IIE_REF_DEBUG + incr_ref_counter(task_elem->iit_elem.iie_task_refs_dropped); +#endif + ipc_importance_release_locked(&task_elem->iit_elem); + /* unlocked */ +} + +/* + * Routine: ipc_importance_task_release_locked + * Purpose: + * Release a reference on a task importance attribute value, + * unlinking and deallocating the attribute if the last reference. + * Conditions: + * importance lock held on entry, nothing locked on exit. + * May block. + */ +static void +ipc_importance_task_release_locked(ipc_importance_task_t task_elem) +{ + if (IIT_NULL == task_elem) { + ipc_importance_unlock(); + return; + } +#if IIE_REF_DEBUG + incr_ref_counter(task_elem->iit_elem.iie_task_refs_dropped); +#endif + ipc_importance_release_locked(&task_elem->iit_elem); + /* unlocked */ +} + +/* + * Routines for importance donation/inheritance/boosting + */ + + +/* + * External importance assertions are managed by the process in userspace + * Internal importance assertions are the responsibility of the kernel + * Assertions are changed from internal to external via task_importance_externalize_assertion + */ + +/* + * Routine: ipc_importance_task_check_transition + * Purpose: + * Increase or decrement the internal task importance counter of the + * specified task and determine if propagation and a task policy + * update is required. + * + * If it is already enqueued for a policy update, steal it from that queue + * (as we are reversing that update before it happens). + * + * Conditions: + * Called with the importance lock held. + * It is the caller's responsibility to perform the propagation of the + * transition and/or policy changes by checking the return value. + */ +static boolean_t +ipc_importance_task_check_transition( + ipc_importance_task_t task_imp, + iit_update_type_t type, + uint32_t delta) +{ + + task_t target_task = task_imp->iit_task; + boolean_t boost = (IIT_UPDATE_HOLD == type); + boolean_t before_boosted, after_boosted; + + if (!ipc_importance_task_is_any_receiver_type(task_imp)) + return FALSE; + +#if IMPORTANCE_DEBUG + int target_pid = (TASK_NULL != target_task) ? audit_token_pid_from_task(target_task) : -1; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (((boost) ? IMP_HOLD : IMP_DROP) | TASK_POLICY_INTERNAL))) | DBG_FUNC_START, + proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_EXTERN(task_imp), 0); +#endif + + /* snapshot the effective boosting status before making any changes */ + before_boosted = (task_imp->iit_assertcnt > 0); + + /* Adjust the assertcnt appropriately */ + if (boost) { + task_imp->iit_assertcnt += delta; +#if IMPORTANCE_DEBUG + DTRACE_BOOST6(send_boost, task_t, target_task, int, target_pid, + task_t, current_task(), int, proc_selfpid(), int, delta, int, task_imp->iit_assertcnt); +#endif + } else { + // assert(delta <= task_imp->iit_assertcnt); + if (delta > task_imp->iit_assertcnt - IIT_EXTERN(task_imp)) { + /* TODO: Turn this back into a panic */ + if (target_task != TASK_NULL) { + printf("Over-release of kernel-internal importance assertions for pid %d (%s), " + "dropping %d assertion(s) but task only has %d remaining (%d external).\n", + audit_token_pid_from_task(target_task), + (target_task->bsd_info == NULL) ? "" : proc_name_address(target_task->bsd_info), + delta, + task_imp->iit_assertcnt, + IIT_EXTERN(task_imp)); + } + task_imp->iit_assertcnt = IIT_EXTERN(task_imp); + } else { + task_imp->iit_assertcnt -= delta; + } +#if IMPORTANCE_DEBUG + // This convers both legacy and voucher-based importance. + DTRACE_BOOST4(drop_boost, task_t, target_task, int, target_pid, int, delta, int, task_imp->iit_assertcnt); +#endif + } + +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (((boost) ? IMP_HOLD : IMP_DROP) | TASK_POLICY_INTERNAL))) | DBG_FUNC_END, + proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_EXTERN(task_imp), 0); +#endif + + /* did the change result in an effective donor status change? */ + after_boosted = (task_imp->iit_assertcnt > 0); + + if (after_boosted != before_boosted) { + + /* + * If the task importance is already on an update queue, we just reversed the need for a + * pending policy update. If the queue is any other than the delayed-drop-queue, pull it + * off that queue and release the reference it got going onto the update queue. If it is + * the delayed-drop-queue we leave it in place in case it comes back into the drop state + * before its time delay is up. + * + * We still need to propagate the change downstream to reverse the assertcnt effects, + * but we no longer need to update this task's boost policy state. + * + * Otherwise, mark it as needing a policy update. + */ + assert(0 == task_imp->iit_updatepolicy); + if (NULL != task_imp->iit_updateq) { + if (&ipc_importance_delayed_drop_queue != task_imp->iit_updateq) { + queue_remove(task_imp->iit_updateq, task_imp, ipc_importance_task_t, iit_updates); + task_imp->iit_updateq = NULL; + ipc_importance_task_release_internal(task_imp); /* can't be last ref */ + } + } else { + task_imp->iit_updatepolicy = 1; + } + return TRUE; + } + + return FALSE; +} + + +/* + * Routine: ipc_importance_task_propagate_helper + * Purpose: + * Increase or decrement the internal task importance counter of all + * importance tasks inheriting from the specified one. If this causes + * that importance task to change state, add it to the list of tasks + * to do a policy update against. + * Conditions: + * Called with the importance lock held. + * It is the caller's responsibility to iterate down the generated list + * and propagate any subsequent assertion changes from there. + */ +static void +ipc_importance_task_propagate_helper( + ipc_importance_task_t task_imp, + iit_update_type_t type, + queue_t propagation) +{ + ipc_importance_task_t temp_task_imp; + + /* + * iterate the downstream kmsgs, adjust their boosts, + * and capture the next task to adjust for each message + */ + + ipc_kmsg_t temp_kmsg; + + queue_iterate(&task_imp->iit_kmsgs, temp_kmsg, ipc_kmsg_t, ikm_inheritance) { + mach_msg_header_t *hdr = temp_kmsg->ikm_header; + mach_port_delta_t delta; + ipc_port_t port; + + /* toggle the kmsg importance bit as a barrier to parallel adjusts */ + if (IIT_UPDATE_HOLD == type) { + if (MACH_MSGH_BITS_RAISED_IMPORTANCE(hdr->msgh_bits)) { + continue; + } + + /* mark the message as now carrying importance */ + hdr->msgh_bits |= MACH_MSGH_BITS_RAISEIMP; + delta = 1; + } else { + if (!MACH_MSGH_BITS_RAISED_IMPORTANCE(hdr->msgh_bits)) { + continue; + } + + /* clear the message as now carrying importance */ + hdr->msgh_bits &= ~MACH_MSGH_BITS_RAISEIMP; + delta = -1; + } + + /* determine the task importance to adjust as result (if any) */ + port = (ipc_port_t) hdr->msgh_remote_port; + assert(IP_VALID(port)); + ip_lock(port); + temp_task_imp = IIT_NULL; + if (!ipc_port_importance_delta_internal(port, &delta, &temp_task_imp)) { + ip_unlock(port); + } + + /* no task importance to adjust associated with the port? */ + if (IIT_NULL == temp_task_imp) { + continue; + } + + /* hold a reference on temp_task_imp */ + + /* Adjust the task assertions and determine if an edge was crossed */ + if (ipc_importance_task_check_transition(temp_task_imp, type, 1)) { + incr_ref_counter(task_imp->iit_elem.iie_task_refs_added_transition); + queue_enter(propagation, temp_task_imp, ipc_importance_task_t, iit_props); + /* reference donated */ + } else { + ipc_importance_task_release_internal(temp_task_imp); + } + } + + /* + * iterate the downstream importance inherits + * and capture the next task importance to boost for each + */ + ipc_importance_inherit_t temp_inherit; + + queue_iterate(&task_imp->iit_inherits, temp_inherit, ipc_importance_inherit_t, iii_inheritance) { + uint32_t assertcnt = III_EXTERN(temp_inherit); + + temp_task_imp = temp_inherit->iii_to_task; + assert(IIT_NULL != temp_task_imp); + + if (IIT_UPDATE_HOLD == type) { + /* if no undropped externcnts in the inherit, nothing to do */ + if (0 == assertcnt) { + assert(temp_inherit->iii_donating == FALSE); + continue; + } + + /* nothing to do if the inherit is already donating (forced donation) */ + if (temp_inherit->iii_donating) { + continue; + } + + /* mark it donating and contribute to the task externcnts */ + temp_inherit->iii_donating = TRUE; + temp_task_imp->iit_externcnt += temp_inherit->iii_externcnt; + temp_task_imp->iit_externdrop += temp_inherit->iii_externdrop; + + } else { + /* if no contributing assertions, move on */ + if (0 == assertcnt) { + assert(temp_inherit->iii_donating == FALSE); + continue; + } + + /* nothing to do if the inherit is not donating */ + if (!temp_inherit->iii_donating) { + continue; + } + + /* mark it no longer donating */ + temp_inherit->iii_donating = FALSE; + + /* remove the contribution the inherit made to the to-task */ + assert(IIT_EXTERN(temp_task_imp) >= III_EXTERN(temp_inherit)); + assert(temp_task_imp->iit_externcnt >= temp_inherit->iii_externcnt); + assert(temp_task_imp->iit_externdrop >= temp_inherit->iii_externdrop); + temp_task_imp->iit_externcnt -= temp_inherit->iii_externcnt; + temp_task_imp->iit_externdrop -= temp_inherit->iii_externdrop; + + } + + /* Adjust the task assertions and determine if an edge was crossed */ + assert(ipc_importance_task_is_any_receiver_type(temp_task_imp)); + if (ipc_importance_task_check_transition(temp_task_imp, type, assertcnt)) { + ipc_importance_task_reference(temp_task_imp); + incr_ref_counter(task_imp->iit_elem.iie_task_refs_added_transition); + queue_enter(propagation, temp_task_imp, ipc_importance_task_t, iit_props); + } + } +} + +/* + * Routine: ipc_importance_task_process_updates + * Purpose: + * Process the queue of task importances and apply the policy + * update called for. Only process tasks in the queue with an + * update timestamp less than the supplied max. + * Conditions: + * Called and returns with importance locked. + * May drop importance lock and block temporarily. + */ +static void +ipc_importance_task_process_updates( + queue_t supplied_queue, + boolean_t boost, + uint64_t max_timestamp) +{ + ipc_importance_task_t task_imp; + queue_head_t second_chance; + queue_t queue = supplied_queue; + + /* + * This queue will hold the task's we couldn't trylock on first pass. + * By using a second (private) queue, we guarantee all tasks that get + * entered on this queue have a timestamp under the maximum. + */ + queue_init(&second_chance); + + /* process any resulting policy updates */ + retry: + while(!queue_empty(queue)) { + task_t target_task; + struct task_pend_token pend_token = {}; + + task_imp = (ipc_importance_task_t)queue_first(queue); + assert(0 == task_imp->iit_updatepolicy); + assert(queue == task_imp->iit_updateq); + + /* if timestamp is too big, we're done */ + if (task_imp->iit_updatetime > max_timestamp) { + break; + } + + /* we were given a reference on each task in the queue */ + + /* remove it from the supplied queue */ + queue_remove(queue, task_imp, ipc_importance_task_t, iit_updates); + task_imp->iit_updateq = NULL; + + target_task = task_imp->iit_task; + + /* Is it well on the way to exiting? */ + if (TASK_NULL == target_task) { + ipc_importance_task_release_locked(task_imp); + /* importance unlocked */ + ipc_importance_lock(); + continue; + } + + /* Has the update been reversed on the hysteresis queue? */ + if (0 < task_imp->iit_assertcnt && + queue == &ipc_importance_delayed_drop_queue) { + ipc_importance_task_release_locked(task_imp); + /* importance unlocked */ + ipc_importance_lock(); + continue; + } + + /* + * Can we get the task lock out-of-order? + * If not, stick this back on the second-chance queue. + */ + if (!task_lock_try(target_task)) { + boolean_t should_wait_lock = (queue == &second_chance); + task_imp->iit_updateq = &second_chance; + + /* + * If we're already processing second-chances on + * tasks, keep this task on the front of the queue. + * We will wait for the task lock before coming + * back and trying again, and we have a better + * chance of re-acquiring the lock if we come back + * to it right away. + */ + if (should_wait_lock){ + task_reference(target_task); + queue_enter_first(&second_chance, task_imp, + ipc_importance_task_t, iit_updates); + } else { + queue_enter(&second_chance, task_imp, + ipc_importance_task_t, iit_updates); + } + ipc_importance_unlock(); + + if (should_wait_lock) { + task_lock(target_task); + task_unlock(target_task); + task_deallocate(target_task); + } + + ipc_importance_lock(); + continue; + } + + /* is it going away? */ + if (!target_task->active) { + task_unlock(target_task); + ipc_importance_task_release_locked(task_imp); + /* importance unlocked */ + ipc_importance_lock(); + continue; + } + + /* take a task reference for while we don't have the importance lock */ + task_reference(target_task); + + /* count the transition */ + if (boost) + task_imp->iit_transitions++; + + ipc_importance_unlock(); + + /* apply the policy adjust to the target task (while it is still locked) */ + task_update_boost_locked(target_task, boost, &pend_token); + + /* complete the policy update with the task unlocked */ + ipc_importance_task_release(task_imp); + task_unlock(target_task); + task_policy_update_complete_unlocked(target_task, THREAD_NULL, &pend_token); + task_deallocate(target_task); + + ipc_importance_lock(); + } + + /* If there are tasks we couldn't update the first time, try again */ + if (!queue_empty(&second_chance)) { + queue = &second_chance; + goto retry; + } +} + + +/* + * Routine: ipc_importance_task_delayed_drop_scan + * Purpose: + * The thread call routine to scan the delayed drop queue, + * requesting all updates with a deadline up to the last target + * for the thread-call (which is DENAP_DROP_SKEW beyond the first + * thread's optimum delay). + * update to drop its boost. + * Conditions: + * Nothing locked + */ +static void +ipc_importance_task_delayed_drop_scan( + __unused void *arg1, + __unused void *arg2) +{ + ipc_importance_lock(); + + /* process all queued task drops with timestamps up to TARGET(first)+SKEW */ + ipc_importance_task_process_updates(&ipc_importance_delayed_drop_queue, + FALSE, + ipc_importance_delayed_drop_timestamp); + + /* importance lock may have been temporarily dropped */ + + /* If there are any entries left in the queue, re-arm the call here */ + if (!queue_empty(&ipc_importance_delayed_drop_queue)) { + ipc_importance_task_t task_imp; + uint64_t deadline; + uint64_t leeway; + + task_imp = (ipc_importance_task_t)queue_first(&ipc_importance_delayed_drop_queue); + + nanoseconds_to_absolutetime(DENAP_DROP_DELAY, &deadline); + deadline += task_imp->iit_updatetime; + ipc_importance_delayed_drop_timestamp = deadline; + + nanoseconds_to_absolutetime(DENAP_DROP_LEEWAY, &leeway); + + thread_call_enter_delayed_with_leeway( + ipc_importance_delayed_drop_call, + NULL, + deadline, + leeway, + DENAP_DROP_FLAGS); + } else { + ipc_importance_delayed_drop_call_requested = FALSE; + } + ipc_importance_unlock(); +} + +/* + * Routine: ipc_importance_task_delayed_drop + * Purpose: + * Queue the specified task importance for delayed policy + * update to drop its boost. + * Conditions: + * Called with the importance lock held. + */ +static void +ipc_importance_task_delayed_drop(ipc_importance_task_t task_imp) +{ + uint64_t timestamp = mach_absolute_time(); /* no mach_approximate_time() in kernel */ + + assert(ipc_importance_delayed_drop_call != NULL); + + /* + * If still on an update queue from a previous change, + * remove it first (and use that reference). Otherwise, take + * a new reference for the delay drop update queue. + */ + if (NULL != task_imp->iit_updateq) { + queue_remove(task_imp->iit_updateq, task_imp, + ipc_importance_task_t, iit_updates); + } else { + ipc_importance_task_reference_internal(task_imp); + } + + task_imp->iit_updateq = &ipc_importance_delayed_drop_queue; + task_imp->iit_updatetime = timestamp; + + queue_enter(&ipc_importance_delayed_drop_queue, task_imp, + ipc_importance_task_t, iit_updates); + + /* request the delayed thread-call if not already requested */ + if (!ipc_importance_delayed_drop_call_requested) { + uint64_t deadline; + uint64_t leeway; + + nanoseconds_to_absolutetime(DENAP_DROP_DELAY, &deadline); + deadline += task_imp->iit_updatetime; + ipc_importance_delayed_drop_timestamp = deadline; + + nanoseconds_to_absolutetime(DENAP_DROP_LEEWAY, &leeway); + + ipc_importance_delayed_drop_call_requested = TRUE; + thread_call_enter_delayed_with_leeway( + ipc_importance_delayed_drop_call, + NULL, + deadline, + leeway, + DENAP_DROP_FLAGS); + } +} + + +/* + * Routine: ipc_importance_task_propagate_assertion_locked + * Purpose: + * Propagate the importance transition type to every item + * If this causes a boost to be applied, determine if that + * boost should propagate downstream. + * Conditions: + * Called with the importance lock held. + */ +static void +ipc_importance_task_propagate_assertion_locked( + ipc_importance_task_t task_imp, + iit_update_type_t type, + boolean_t update_task_imp) +{ + boolean_t boost = (IIT_UPDATE_HOLD == type); + ipc_importance_task_t temp_task_imp; + queue_head_t propagate; + queue_head_t updates; + + queue_init(&updates); + queue_init(&propagate); + + /* + * If we're going to update the policy for the provided task, + * enqueue it on the propagate queue itself. Otherwise, only + * enqueue downstream things. + */ + if (update_task_imp) { + queue_enter(&propagate, task_imp, ipc_importance_task_t, iit_props); + } else { + ipc_importance_task_propagate_helper(task_imp, type, &propagate); + } + + /* + * for each item on the propagation list, propagate any change downstream, + * adding new tasks to propagate further if they transistioned as well. + */ + while (!queue_empty(&propagate)) { + boolean_t need_update; + + queue_remove_first(&propagate, temp_task_imp, ipc_importance_task_t, iit_props); + assert(IIT_NULL != temp_task_imp); + + /* only propagate for receivers not already marked as a donor */ + if (!ipc_importance_task_is_marked_donor(temp_task_imp) && + ipc_importance_task_is_marked_receiver(temp_task_imp)) { + ipc_importance_task_propagate_helper(temp_task_imp, type, &propagate); + } + + /* if we have a policy update to apply, enqueue a reference for later processing */ + need_update = (0 != temp_task_imp->iit_updatepolicy); + temp_task_imp->iit_updatepolicy = 0; + if (need_update && TASK_NULL != temp_task_imp->iit_task) { + if (NULL == temp_task_imp->iit_updateq) { + temp_task_imp->iit_updatetime = 0; + temp_task_imp->iit_updateq = &updates; + ipc_importance_task_reference_internal(temp_task_imp); + if (boost) { + queue_enter(&updates, temp_task_imp, + ipc_importance_task_t, iit_updates); + } else { + queue_enter_first(&updates, temp_task_imp, + ipc_importance_task_t, iit_updates); + } + } else { + /* Must already be on the AppNap hysteresis queue */ + assert(&ipc_importance_delayed_drop_queue); + assert(ipc_importance_task_is_marked_denap_receiver(temp_task_imp)); + } + } + } + + /* apply updates to task (may drop importance lock) */ + if (!queue_empty(&updates)) { + ipc_importance_task_process_updates(&updates, boost, 0); + } +} + +/* + * Routine: ipc_importance_task_hold_internal_assertion_locked + * Purpose: + * Increment the assertion count on the task importance. + * If this results in a boost state change in that task, + * prepare to update task policy for this task AND, if + * if not just waking out of App Nap, all down-stream + * tasks that have a similar transition through inheriting + * this update. + * Conditions: + * importance locked on entry and exit. + * May temporarily drop importance lock and block. + */ +static kern_return_t +ipc_importance_task_hold_internal_assertion_locked(ipc_importance_task_t task_imp, uint32_t count) +{ + if (ipc_importance_task_check_transition(task_imp, IIT_UPDATE_HOLD, count)) { + ipc_importance_task_propagate_assertion_locked(task_imp, IIT_UPDATE_HOLD, TRUE); + } + return KERN_SUCCESS; +} + +/* + * Routine: ipc_importance_task_drop_internal_assertion_locked + * Purpose: + * Decrement the assertion count on the task importance. + * If this results in a boost state change in that task, + * prepare to update task policy for this task AND, if + * if not just waking out of App Nap, all down-stream + * tasks that have a similar transition through inheriting + * this update. + * Conditions: + * importance locked on entry and exit. + * May temporarily drop importance lock and block. + */ +static kern_return_t +ipc_importance_task_drop_internal_assertion_locked(ipc_importance_task_t task_imp, uint32_t count) +{ + if (ipc_importance_task_check_transition(task_imp, IIT_UPDATE_DROP, count)) { + ipc_importance_task_propagate_assertion_locked(task_imp, IIT_UPDATE_DROP, TRUE); + } + return KERN_SUCCESS; +} + +/* + * Routine: ipc_importance_task_hold_internal_assertion + * Purpose: + * Increment the assertion count on the task importance. + * If this results in a 0->1 change in that count, + * prepare to update task policy for this task AND + * (potentially) all down-stream tasks that have a + * similar transition through inheriting this update. + * Conditions: + * Nothing locked + * May block after dropping importance lock. + */ +int +ipc_importance_task_hold_internal_assertion(ipc_importance_task_t task_imp, uint32_t count) +{ + int ret = KERN_SUCCESS; + + if (ipc_importance_task_is_any_receiver_type(task_imp)) { + ipc_importance_lock(); + ret = ipc_importance_task_hold_internal_assertion_locked(task_imp, count); + ipc_importance_unlock(); + } + return ret; +} + +/* + * Routine: ipc_importance_task_drop_internal_assertion + * Purpose: + * Decrement the assertion count on the task importance. + * If this results in a X->0 change in that count, + * prepare to update task policy for this task AND + * all down-stream tasks that have a similar transition + * through inheriting this drop update. + * Conditions: + * Nothing locked on entry. + * May block after dropping importance lock. + */ +kern_return_t +ipc_importance_task_drop_internal_assertion(ipc_importance_task_t task_imp, uint32_t count) +{ + kern_return_t ret = KERN_SUCCESS; + + if (ipc_importance_task_is_any_receiver_type(task_imp)) { + ipc_importance_lock(); + ret = ipc_importance_task_drop_internal_assertion_locked(task_imp, count); + ipc_importance_unlock(); + } + return ret; +} + +/* + * Routine: ipc_importance_task_hold_file_lock_assertion + * Purpose: + * Increment the file lock assertion count on the task importance. + * If this results in a 0->1 change in that count, + * prepare to update task policy for this task AND + * (potentially) all down-stream tasks that have a + * similar transition through inheriting this update. + * Conditions: + * Nothing locked + * May block after dropping importance lock. + */ +kern_return_t +ipc_importance_task_hold_file_lock_assertion(ipc_importance_task_t task_imp, uint32_t count) +{ + kern_return_t ret = KERN_SUCCESS; + + if (ipc_importance_task_is_any_receiver_type(task_imp)) { + ipc_importance_lock(); + ret = ipc_importance_task_hold_internal_assertion_locked(task_imp, count); + if (KERN_SUCCESS == ret) { + task_imp->iit_filelocks += count; + } + ipc_importance_unlock(); + } + return ret; +} + +/* + * Routine: ipc_importance_task_drop_file_lock_assertion + * Purpose: + * Decrement the assertion count on the task importance. + * If this results in a X->0 change in that count, + * prepare to update task policy for this task AND + * all down-stream tasks that have a similar transition + * through inheriting this drop update. + * Conditions: + * Nothing locked on entry. + * May block after dropping importance lock. + */ +kern_return_t +ipc_importance_task_drop_file_lock_assertion(ipc_importance_task_t task_imp, uint32_t count) +{ + kern_return_t ret = KERN_SUCCESS; + + if (ipc_importance_task_is_any_receiver_type(task_imp)) { + ipc_importance_lock(); + if (count <= task_imp->iit_filelocks) { + task_imp->iit_filelocks -= count; + ret = ipc_importance_task_drop_internal_assertion_locked(task_imp, count); + } else { + ret = KERN_INVALID_ARGUMENT; + } + ipc_importance_unlock(); + } + return ret; +} + +/* + * Routine: ipc_importance_task_hold_legacy_external_assertion + * Purpose: + * Increment the external assertion count on the task importance. + * This cannot result in an 0->1 transition, as the caller must + * already hold an external boost. + * Conditions: + * Nothing locked on entry. + * May block after dropping importance lock. + * A queue of task importance structures is returned + * by ipc_importance_task_hold_assertion_locked(). Each + * needs to be updated (outside the importance lock hold). + */ +kern_return_t +ipc_importance_task_hold_legacy_external_assertion(ipc_importance_task_t task_imp, uint32_t count) +{ + task_t target_task; + uint32_t target_assertcnt; + uint32_t target_externcnt; + uint32_t target_legacycnt; + + kern_return_t ret; + + ipc_importance_lock(); + target_task = task_imp->iit_task; + +#if IMPORTANCE_DEBUG + int target_pid = (TASK_NULL != target_task) ? audit_token_pid_from_task(target_task) : -1; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_HOLD | TASK_POLICY_EXTERNAL))) | DBG_FUNC_START, + proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_LEGACY_EXTERN(task_imp), 0); +#endif + + if (IIT_LEGACY_EXTERN(task_imp) == 0) { + /* Only allowed to take a new boost assertion when holding an external boost */ + /* save data for diagnostic printf below */ + target_assertcnt = task_imp->iit_assertcnt; + target_externcnt = IIT_EXTERN(task_imp); + target_legacycnt = IIT_LEGACY_EXTERN(task_imp); + ret = KERN_FAILURE; + count = 0; + } else { + assert(ipc_importance_task_is_any_receiver_type(task_imp)); + assert(0 < task_imp->iit_assertcnt); + assert(0 < IIT_EXTERN(task_imp)); + task_imp->iit_assertcnt += count; + task_imp->iit_externcnt += count; + task_imp->iit_legacy_externcnt += count; + ret = KERN_SUCCESS; + } + ipc_importance_unlock(); + +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_HOLD | TASK_POLICY_EXTERNAL))) | DBG_FUNC_END, + proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_LEGACY_EXTERN(task_imp), 0); + // This covers the legacy case where a task takes an extra boost. + DTRACE_BOOST5(receive_boost, task_t, target_task, int, target_pid, int, proc_selfpid(), int, count, int, task_imp->iit_assertcnt); +#endif + + if (KERN_FAILURE == ret && target_task != TASK_NULL) { + printf("BUG in process %s[%d]: " + "attempt to acquire an additional legacy external boost assertion without holding an existing legacy external assertion. " + "(%d total, %d external, %d legacy-external)\n", + proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task), + target_assertcnt, target_externcnt, target_legacycnt); + } + + return(ret); +} + +/* + * Routine: ipc_importance_task_drop_legacy_external_assertion + * Purpose: + * Drop the legacy external assertion count on the task and + * reflect that change to total external assertion count and + * then onto the internal importance count. + * + * If this results in a X->0 change in the internal, + * count, prepare to update task policy for this task AND + * all down-stream tasks that have a similar transition + * through inheriting this update. + * Conditions: + * Nothing locked on entry. + */ +kern_return_t +ipc_importance_task_drop_legacy_external_assertion(ipc_importance_task_t task_imp, uint32_t count) +{ + int ret = KERN_SUCCESS; + task_t target_task; + uint32_t target_assertcnt; + uint32_t target_externcnt; + uint32_t target_legacycnt; + + if (count > 1) { + return KERN_INVALID_ARGUMENT; + } + + ipc_importance_lock(); + target_task = task_imp->iit_task; + +#if IMPORTANCE_DEBUG + int target_pid = (TASK_NULL != target_task) ? audit_token_pid_from_task(target_task) : -1; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_DROP | TASK_POLICY_EXTERNAL))) | DBG_FUNC_START, + proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_LEGACY_EXTERN(task_imp), 0); +#endif + + if (count > IIT_LEGACY_EXTERN(task_imp)) { + /* Process over-released its boost count - save data for diagnostic printf */ + /* TODO: If count > 1, we should clear out as many external assertions as there are left. */ + target_assertcnt = task_imp->iit_assertcnt; + target_externcnt = IIT_EXTERN(task_imp); + target_legacycnt = IIT_LEGACY_EXTERN(task_imp); + ret = KERN_FAILURE; + } else { + /* + * decrement legacy external count from the top level and reflect + * into internal for this and all subsequent updates. + */ + assert(ipc_importance_task_is_any_receiver_type(task_imp)); + assert(IIT_EXTERN(task_imp) >= count); + + task_imp->iit_legacy_externdrop += count; + task_imp->iit_externdrop += count; + + /* reset extern counters (if appropriate) */ + if (IIT_LEGACY_EXTERN(task_imp) == 0) { + if (IIT_EXTERN(task_imp) != 0) { + task_imp->iit_externcnt -= task_imp->iit_legacy_externcnt; + task_imp->iit_externdrop -= task_imp->iit_legacy_externdrop; + } else { + task_imp->iit_externcnt = 0; + task_imp->iit_externdrop = 0; + } + task_imp->iit_legacy_externcnt = 0; + task_imp->iit_legacy_externdrop = 0; + } + + /* reflect the drop to the internal assertion count (and effect any importance change) */ + if (ipc_importance_task_check_transition(task_imp, IIT_UPDATE_DROP, count)) { + ipc_importance_task_propagate_assertion_locked(task_imp, IIT_UPDATE_DROP, TRUE); + } + ret = KERN_SUCCESS; + } + +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_DROP | TASK_POLICY_EXTERNAL))) | DBG_FUNC_END, + proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_LEGACY_EXTERN(task_imp), 0); +#endif + + ipc_importance_unlock(); + + /* delayed printf for user-supplied data failures */ + if (KERN_FAILURE == ret && TASK_NULL != target_task) { + printf("BUG in process %s[%d]: over-released legacy external boost assertions (%d total, %d external, %d legacy-external)\n", + proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task), + target_assertcnt, target_externcnt, target_legacycnt); + } + + return(ret); +} + + + +/* Transfer an assertion to legacy userspace responsibility */ +static kern_return_t +ipc_importance_task_externalize_legacy_assertion(ipc_importance_task_t task_imp, uint32_t count, __unused int sender_pid) +{ + task_t target_task; + + assert(IIT_NULL != task_imp); + target_task = task_imp->iit_task; + + if (TASK_NULL == target_task || + !ipc_importance_task_is_any_receiver_type(task_imp)) { + return KERN_FAILURE; + } + +#if IMPORTANCE_DEBUG + int target_pid = audit_token_pid_from_task(target_task); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, IMP_EXTERN)) | DBG_FUNC_START, + proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_EXTERN(task_imp), 0); +#endif + + ipc_importance_lock(); + /* assert(task_imp->iit_assertcnt >= IIT_EXTERN(task_imp) + count); */ + assert(IIT_EXTERN(task_imp) >= IIT_LEGACY_EXTERN(task_imp)); + task_imp->iit_legacy_externcnt += count; + task_imp->iit_externcnt += count; + ipc_importance_unlock(); + +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, IMP_EXTERN)) | DBG_FUNC_END, + proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_LEGACY_EXTERN(task_imp), 0); + // This is the legacy boosting path + DTRACE_BOOST5(receive_boost, task_t, target_task, int, target_pid, int, sender_pid, int, count, int, IIT_LEGACY_EXTERN(task_imp)); +#endif /* IMPORTANCE_DEBUG */ + + return(KERN_SUCCESS); +} + +/* + * Routine: ipc_importance_task_update_live_donor + * Purpose: + * Read the live donor status and update the live_donor bit/propagate the change in importance. + * Conditions: + * Nothing locked on entrance, nothing locked on exit. + * + * TODO: Need tracepoints around this function... + */ +void +ipc_importance_task_update_live_donor(ipc_importance_task_t task_imp) +{ + uint32_t task_live_donor; + boolean_t before_donor; + boolean_t after_donor; + task_t target_task; + + assert(task_imp != NULL); + + /* + * Nothing to do if the task is not marked as expecting + * live donor updates. + */ + if (!ipc_importance_task_is_marked_live_donor(task_imp)) { + return; + } + + ipc_importance_lock(); + + /* If the task got disconnected on the way here, no use (or ability) adjusting live donor status */ + target_task = task_imp->iit_task; + if (TASK_NULL == target_task) { + ipc_importance_unlock(); + return; + } + before_donor = ipc_importance_task_is_marked_donor(task_imp); + + /* snapshot task live donor status - may change, but another call will accompany the change */ + task_live_donor = target_task->effective_policy.t_live_donor; + +#if IMPORTANCE_DEBUG + int target_pid = audit_token_pid_from_task(target_task); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_DONOR_CHANGE, IMP_DONOR_UPDATE_LIVE_DONOR_STATE)) | DBG_FUNC_START, + target_pid, task_imp->iit_donor, task_live_donor, before_donor, 0); +#endif + + /* update the task importance live donor status based on the task's value */ + task_imp->iit_donor = task_live_donor; + + after_donor = ipc_importance_task_is_marked_donor(task_imp); + + /* Has the effectiveness of being a donor changed as a result of this update? */ + if (before_donor != after_donor) { + iit_update_type_t type; + + /* propagate assertions without updating the current task policy (already handled) */ + if (0 == before_donor) { + task_imp->iit_transitions++; + type = IIT_UPDATE_HOLD; + } else { + type = IIT_UPDATE_DROP; + } + ipc_importance_task_propagate_assertion_locked(task_imp, type, FALSE); + } + +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_DONOR_CHANGE, IMP_DONOR_UPDATE_LIVE_DONOR_STATE)) | DBG_FUNC_END, + target_pid, task_imp->iit_donor, task_live_donor, after_donor, 0); +#endif + + ipc_importance_unlock(); +} + + +/* + * Routine: ipc_importance_task_mark_donor + * Purpose: + * Set the task importance donor flag. + * Conditions: + * Nothing locked on entrance, nothing locked on exit. + * + * This is only called while the task is being constructed, + * so no need to update task policy or propagate downstream. + */ +void +ipc_importance_task_mark_donor(ipc_importance_task_t task_imp, boolean_t donating) +{ + assert(task_imp != NULL); + + ipc_importance_lock(); + + int old_donor = task_imp->iit_donor; + + task_imp->iit_donor = (donating ? 1 : 0); + + if (task_imp->iit_donor > 0 && old_donor == 0) + task_imp->iit_transitions++; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_DONOR_CHANGE, IMP_DONOR_INIT_DONOR_STATE)) | DBG_FUNC_NONE, + audit_token_pid_from_task(task_imp->iit_task), donating, + old_donor, task_imp->iit_donor, 0); + + ipc_importance_unlock(); +} + +/* + * Routine: ipc_importance_task_marked_donor + * Purpose: + * Query the donor flag for the given task importance. + * Conditions: + * May be called without taking the importance lock. + * In that case, donor status can change so you must + * check only once for each donation event. + */ +boolean_t +ipc_importance_task_is_marked_donor(ipc_importance_task_t task_imp) +{ + if (IIT_NULL == task_imp) { + return FALSE; + } + return (0 != task_imp->iit_donor); +} + +/* + * Routine: ipc_importance_task_mark_live_donor + * Purpose: + * Indicate that the task is eligible for live donor updates. + * Conditions: + * Nothing locked on entrance, nothing locked on exit. + * + * This is only called while the task is being constructed. + */ +void +ipc_importance_task_mark_live_donor(ipc_importance_task_t task_imp, boolean_t live_donating) +{ + assert(task_imp != NULL); + + ipc_importance_lock(); + task_imp->iit_live_donor = (live_donating ? 1 : 0); + ipc_importance_unlock(); +} + +/* + * Routine: ipc_importance_task_marked_live_donor + * Purpose: + * Query the live donor and donor flags for the given task importance. + * Conditions: + * May be called without taking the importance lock. + * In that case, donor status can change so you must + * check only once for each donation event. + */ +boolean_t +ipc_importance_task_is_marked_live_donor(ipc_importance_task_t task_imp) +{ + if (IIT_NULL == task_imp) { + return FALSE; + } + return (0 != task_imp->iit_live_donor); +} + +/* + * Routine: ipc_importance_task_is_donor + * Purpose: + * Query the full donor status for the given task importance. + * Conditions: + * May be called without taking the importance lock. + * In that case, donor status can change so you must + * check only once for each donation event. + */ +boolean_t +ipc_importance_task_is_donor(ipc_importance_task_t task_imp) +{ + if (IIT_NULL == task_imp) { + return FALSE; + } + return (ipc_importance_task_is_marked_donor(task_imp) || + (ipc_importance_task_is_marked_receiver(task_imp) && + task_imp->iit_assertcnt > 0)); +} + +/* + * Routine: ipc_importance_task_is_never_donor + * Purpose: + * Query if a given task can ever donate importance. + * Conditions: + * May be called without taking the importance lock. + * Condition is permanent for a give task. + */ +boolean_t +ipc_importance_task_is_never_donor(ipc_importance_task_t task_imp) +{ + if (IIT_NULL == task_imp) { + return FALSE; + } + return (!ipc_importance_task_is_marked_donor(task_imp) && + !ipc_importance_task_is_marked_live_donor(task_imp) && + !ipc_importance_task_is_marked_receiver(task_imp)); +} + +/* + * Routine: ipc_importance_task_mark_receiver + * Purpose: + * Update the task importance receiver flag. + * Conditions: + * Nothing locked on entrance, nothing locked on exit. + * This can only be invoked before the task is discoverable, + * so no worries about atomicity(?) + */ +void +ipc_importance_task_mark_receiver(ipc_importance_task_t task_imp, boolean_t receiving) +{ + assert(task_imp != NULL); + + ipc_importance_lock(); + if (receiving) { + assert(task_imp->iit_assertcnt == 0); + assert(task_imp->iit_externcnt == 0); + assert(task_imp->iit_externdrop == 0); + assert(task_imp->iit_denap == 0); + task_imp->iit_receiver = 1; /* task can receive importance boost */ + } else if (task_imp->iit_receiver) { + assert(task_imp->iit_denap == 0); + if (task_imp->iit_assertcnt != 0 || IIT_EXTERN(task_imp) != 0) { + panic("disabling imp_receiver on task with pending importance boosts!"); + } + task_imp->iit_receiver = 0; + } + ipc_importance_unlock(); +} + + +/* + * Routine: ipc_importance_task_marked_receiver + * Purpose: + * Query the receiver flag for the given task importance. + * Conditions: + * May be called without taking the importance lock as + * the importance flag can never change after task init. + */ +boolean_t +ipc_importance_task_is_marked_receiver(ipc_importance_task_t task_imp) +{ + return (IIT_NULL != task_imp && 0 != task_imp->iit_receiver); +} + + +/* + * Routine: ipc_importance_task_mark_denap_receiver + * Purpose: + * Update the task importance de-nap receiver flag. + * Conditions: + * Nothing locked on entrance, nothing locked on exit. + * This can only be invoked before the task is discoverable, + * so no worries about atomicity(?) + */ +void +ipc_importance_task_mark_denap_receiver(ipc_importance_task_t task_imp, boolean_t denap) +{ + assert(task_imp != NULL); + + ipc_importance_lock(); + if (denap) { + assert(task_imp->iit_assertcnt == 0); + assert(task_imp->iit_externcnt == 0); + assert(task_imp->iit_receiver == 0); + task_imp->iit_denap = 1; /* task can receive de-nap boost */ + } else if (task_imp->iit_denap) { + assert(task_imp->iit_receiver == 0); + if (0 < task_imp->iit_assertcnt || 0 < IIT_EXTERN(task_imp)) { + panic("disabling de-nap on task with pending de-nap boosts!"); + } + task_imp->iit_denap = 0; + } + ipc_importance_unlock(); +} + + +/* + * Routine: ipc_importance_task_marked_denap_receiver + * Purpose: + * Query the de-nap receiver flag for the given task importance. + * Conditions: + * May be called without taking the importance lock as + * the de-nap flag can never change after task init. + */ +boolean_t +ipc_importance_task_is_marked_denap_receiver(ipc_importance_task_t task_imp) +{ + return (IIT_NULL != task_imp && 0 != task_imp->iit_denap); +} + +/* + * Routine: ipc_importance_task_is_denap_receiver + * Purpose: + * Query the full de-nap receiver status for the given task importance. + * For now, that is simply whether the receiver flag is set. + * Conditions: + * May be called without taking the importance lock as + * the de-nap receiver flag can never change after task init. + */ +boolean_t +ipc_importance_task_is_denap_receiver(ipc_importance_task_t task_imp) +{ + return (ipc_importance_task_is_marked_denap_receiver(task_imp)); +} + +/* + * Routine: ipc_importance_task_is_any_receiver_type + * Purpose: + * Query if the task is marked to receive boosts - either + * importance or denap. + * Conditions: + * May be called without taking the importance lock as both + * the importance and de-nap receiver flags can never change + * after task init. + */ +boolean_t +ipc_importance_task_is_any_receiver_type(ipc_importance_task_t task_imp) +{ + return (ipc_importance_task_is_marked_receiver(task_imp) || + ipc_importance_task_is_marked_denap_receiver(task_imp)); +} + +#if 0 /* currently unused */ + +/* + * Routine: ipc_importance_inherit_reference + * Purpose: + * Add a reference to the inherit importance element. + * Conditions: + * Caller most hold a reference on the inherit element. + */ +static inline void +ipc_importance_inherit_reference(ipc_importance_inherit_t inherit) +{ + ipc_importance_reference(&inherit->iii_elem); +} +#endif /* currently unused */ + +/* + * Routine: ipc_importance_inherit_release_locked + * Purpose: + * Release a reference on an inherit importance attribute value, + * unlinking and deallocating the attribute if the last reference. + * Conditions: + * Entered with importance lock held, leaves with it unlocked. + */ +static inline void +ipc_importance_inherit_release_locked(ipc_importance_inherit_t inherit) +{ + ipc_importance_release_locked(&inherit->iii_elem); +} + +#if 0 /* currently unused */ +/* + * Routine: ipc_importance_inherit_release + * Purpose: + * Release a reference on an inherit importance attribute value, + * unlinking and deallocating the attribute if the last reference. + * Conditions: + * nothing locked on entrance, nothing locked on exit. + * May block. + */ +void +ipc_importance_inherit_release(ipc_importance_inherit_t inherit) +{ + if (III_NULL != inherit) + ipc_importance_release(&inherit->iii_elem); +} +#endif /* 0 currently unused */ + +/* + * Routine: ipc_importance_for_task + * Purpose: + * Create a reference for the specified task's base importance + * element. If the base importance element doesn't exist, make it and + * bind it to the active task. If the task is inactive, there isn't + * any need to return a new reference. + * Conditions: + * If made is true, a "made" reference is returned (for donating to + * the voucher system). Otherwise an internal reference is returned. + * + * Nothing locked on entry. May block. + */ +ipc_importance_task_t +ipc_importance_for_task(task_t task, boolean_t made) +{ + ipc_importance_task_t task_elem; + boolean_t first_pass = TRUE; + + assert(TASK_NULL != task); + + retry: + /* No use returning anything for inactive task */ + if (!task->active) + return IIT_NULL; + + ipc_importance_lock(); + task_elem = task->task_imp_base; + if (IIT_NULL != task_elem) { + /* Add a made reference (borrowing active task ref to do it) */ + if (made) { + if (0 == task_elem->iit_made++) { + assert(IIT_REFS_MAX > IIT_REFS(task_elem)); + ipc_importance_task_reference_internal(task_elem); + } + } else { + assert(IIT_REFS_MAX > IIT_REFS(task_elem)); + ipc_importance_task_reference_internal(task_elem); + } + ipc_importance_unlock(); + return task_elem; + } + ipc_importance_unlock(); + + if (!first_pass) + return IIT_NULL; + first_pass = FALSE; + + /* Need to make one - may race with others (be prepared to drop) */ + task_elem = (ipc_importance_task_t)zalloc(ipc_importance_task_zone); + if (IIT_NULL == task_elem) + goto retry; + + task_elem->iit_bits = IIE_TYPE_TASK | 2; /* one for task, one for return/made */ + task_elem->iit_made = (made) ? 1 : 0; + task_elem->iit_task = task; /* take actual ref when we're sure */ + task_elem->iit_updateq = NULL; + task_elem->iit_receiver = 0; + task_elem->iit_denap = 0; + task_elem->iit_donor = 0; + task_elem->iit_live_donor = 0; + task_elem->iit_updatepolicy = 0; + task_elem->iit_reserved = 0; + task_elem->iit_filelocks = 0; + task_elem->iit_updatetime = 0; + task_elem->iit_transitions = 0; + task_elem->iit_assertcnt = 0; + task_elem->iit_externcnt = 0; + task_elem->iit_externdrop = 0; + task_elem->iit_legacy_externcnt = 0; + task_elem->iit_legacy_externdrop = 0; +#if IIE_REF_DEBUG + ipc_importance_counter_init(&task_elem->iit_elem); +#endif + queue_init(&task_elem->iit_kmsgs); + queue_init(&task_elem->iit_inherits); + + ipc_importance_lock(); + if (!task->active) { + ipc_importance_unlock(); + zfree(ipc_importance_task_zone, task_elem); + return IIT_NULL; + } + + /* did we lose the race? */ + if (IIT_NULL != task->task_imp_base) { + ipc_importance_unlock(); + zfree(ipc_importance_task_zone, task_elem); + goto retry; + } + + /* we won the race */ + task->task_imp_base = task_elem; + task_reference(task); +#if DEVELOPMENT || DEBUG + queue_enter(&global_iit_alloc_queue, task_elem, ipc_importance_task_t, iit_allocation); + task_importance_update_owner_info(task); +#endif + ipc_importance_unlock(); + + return task_elem; +} + +#if DEVELOPMENT || DEBUG +void task_importance_update_owner_info(task_t task) { + + if (task != TASK_NULL && task->task_imp_base != IIT_NULL) { + ipc_importance_task_t task_elem = task->task_imp_base; + + task_elem->iit_bsd_pid = audit_token_pid_from_task(task); + if (task->bsd_info) { + strncpy(&task_elem->iit_procname[0], proc_name_address(task->bsd_info), 16); + task_elem->iit_procname[16] = '\0'; + } else { + strncpy(&task_elem->iit_procname[0], "unknown", 16); + } + } +} +#endif + +/* + * Routine: ipc_importance_reset_locked + * Purpose: + * Reset a task's IPC importance (the task is going away or exec'ing) + * + * Remove the donor bit and legacy externalized assertions from the + * current task importance and see if that wipes out downstream donations. + * Conditions: + * importance lock held. + */ + +static void +ipc_importance_reset_locked(ipc_importance_task_t task_imp, boolean_t donor) +{ + boolean_t before_donor, after_donor; + + /* remove the donor bit, live-donor bit and externalized boosts */ + before_donor = ipc_importance_task_is_donor(task_imp); + if (donor) { + task_imp->iit_donor = 0; + } + assert(IIT_LEGACY_EXTERN(task_imp) <= IIT_EXTERN(task_imp)); + assert(task_imp->iit_legacy_externcnt <= task_imp->iit_externcnt); + assert(task_imp->iit_legacy_externdrop <= task_imp->iit_externdrop); + task_imp->iit_externcnt -= task_imp->iit_legacy_externcnt; + task_imp->iit_externdrop -= task_imp->iit_legacy_externdrop; + + /* assert(IIT_LEGACY_EXTERN(task_imp) <= task_imp->iit_assertcnt); */ + if (IIT_LEGACY_EXTERN(task_imp) < task_imp->iit_assertcnt) { + task_imp->iit_assertcnt -= IIT_LEGACY_EXTERN(task_imp); + } else { + assert(IIT_LEGACY_EXTERN(task_imp) == task_imp->iit_assertcnt); + task_imp->iit_assertcnt = 0; + } + task_imp->iit_legacy_externcnt = 0; + task_imp->iit_legacy_externdrop = 0; + after_donor = ipc_importance_task_is_donor(task_imp); + +#if DEVELOPMENT || DEBUG + if (task_imp->iit_assertcnt > 0 && task_imp->iit_live_donor) { + printf("Live donor task %s[%d] still has %d importance assertions after reset\n", + task_imp->iit_procname, task_imp->iit_bsd_pid, task_imp->iit_assertcnt); + } +#endif + + /* propagate a downstream drop if there was a change in donor status */ + if (after_donor != before_donor) { + ipc_importance_task_propagate_assertion_locked(task_imp, IIT_UPDATE_DROP, FALSE); + } +} + +/* + * Routine: ipc_importance_reset + * Purpose: + * Reset a task's IPC importance + * + * The task is being reset, although staying around. Arrange to have the + * external state of the task reset from the importance. + * Conditions: + * importance lock not held. + */ + +void +ipc_importance_reset(ipc_importance_task_t task_imp, boolean_t donor) +{ + if (IIT_NULL == task_imp) { + return; + } + ipc_importance_lock(); + ipc_importance_reset_locked(task_imp, donor); + ipc_importance_unlock(); +} + +/* + * Routine: ipc_importance_disconnect_task + * Purpose: + * Disconnect a task from its importance. + * + * Clear the task pointer from the importance and drop the + * reference the task held on the importance object. Before + * doing that, reset the effects the current task holds on + * the importance and see if that wipes out downstream donations. + * + * We allow the upstream boosts to continue to affect downstream + * even though the local task is being effectively pulled from + * the chain. + * Conditions: + * Nothing locked. + */ +void +ipc_importance_disconnect_task(task_t task) +{ + ipc_importance_task_t task_imp; + + task_lock(task); + ipc_importance_lock(); + task_imp = task->task_imp_base; + + /* did somebody beat us to it? */ + if (IIT_NULL == task_imp) { + ipc_importance_unlock(); + task_unlock(task); + return; + } + + /* disconnect the task from this importance */ + assert(task_imp->iit_task == task); + task_imp->iit_task = TASK_NULL; + task->task_imp_base = IIT_NULL; + task_unlock(task); + + /* reset the effects the current task hold on the importance */ + ipc_importance_reset_locked(task_imp, TRUE); + + ipc_importance_task_release_locked(task_imp); + /* importance unlocked */ + + /* deallocate the task now that the importance is unlocked */ + task_deallocate(task); +} + +/* + * Routine: ipc_importance_send + * Purpose: + * Post the importance voucher attribute [if sent] or a static + * importance boost depending upon options and conditions. + * Conditions: + * Destination port locked on entry and exit, may be dropped during the call. + * Returns: + * A boolean identifying if the port lock was tempoarily dropped. + */ +boolean_t +ipc_importance_send( + ipc_kmsg_t kmsg, + mach_msg_option_t option) +{ + ipc_port_t port = (ipc_port_t) kmsg->ikm_header->msgh_remote_port; + boolean_t port_lock_dropped = FALSE; + ipc_importance_elem_t elem; + task_t task; + ipc_importance_task_t task_imp; + kern_return_t kr; + + + assert(IP_VALID(port)); + + /* If no donation to be made, return quickly */ + if ((port->ip_impdonation == 0) || + (option & MACH_SEND_NOIMPORTANCE) != 0) { + return port_lock_dropped; + } + + task = current_task(); + + /* If forced sending a static boost, go update the port */ + if ((option & MACH_SEND_IMPORTANCE) != 0) { + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_RAISEIMP; + goto portupdate; + } + + task_imp = task->task_imp_base; + assert(IIT_NULL != task_imp); + + /* If the sender can never donate importance, nothing to do */ + if (ipc_importance_task_is_never_donor(task_imp)) { + return port_lock_dropped; + } + + elem = IIE_NULL; + + /* If importance receiver and passing a voucher, look for importance in there */ + if (IP_VALID(kmsg->ikm_voucher) && + ipc_importance_task_is_marked_receiver(task_imp)) { + mach_voucher_attr_value_handle_t vals[MACH_VOUCHER_ATTR_VALUE_MAX_NESTED]; + mach_voucher_attr_value_handle_array_size_t val_count; + ipc_voucher_t voucher; + + assert(ip_kotype(kmsg->ikm_voucher) == IKOT_VOUCHER); + voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject; + + /* check to see if the voucher has an importance attribute */ + val_count = MACH_VOUCHER_ATTR_VALUE_MAX_NESTED; + kr = mach_voucher_attr_control_get_values(ipc_importance_control, voucher, + vals, &val_count); + assert(KERN_SUCCESS == kr); + + /* + * Only use importance associated with our task (either directly + * or through an inherit that donates to our task). + */ + if (0 < val_count) { + ipc_importance_elem_t check_elem; + + check_elem = (ipc_importance_elem_t)vals[0]; + assert(IIE_NULL != check_elem); + if (IIE_TYPE_INHERIT == IIE_TYPE(check_elem)) { + ipc_importance_inherit_t inherit; + inherit = (ipc_importance_inherit_t) check_elem; + if (inherit->iii_to_task == task_imp) { + elem = check_elem; + } + } else if (check_elem == (ipc_importance_elem_t)task_imp) { + elem = check_elem; + } + } + } + + /* If we haven't found an importance attribute to send yet, use the task's */ + if (IIE_NULL == elem) { + elem = (ipc_importance_elem_t)task_imp; + } + + /* take a reference for the message to hold */ + ipc_importance_reference_internal(elem); + + /* acquire the importance lock while trying to hang on to port lock */ + if (!ipc_importance_lock_try()) { + port_lock_dropped = TRUE; + ip_unlock(port); + ipc_importance_lock(); + } + + /* link kmsg onto the donor element propagation chain */ + ipc_importance_kmsg_link(kmsg, elem); + /* elem reference transfered to kmsg */ + + incr_ref_counter(elem->iie_kmsg_refs_added); + + /* If the sender isn't currently a donor, no need to apply boost */ + if (!ipc_importance_task_is_donor(task_imp)) { + ipc_importance_unlock(); + + /* re-acquire port lock, if needed */ + if (TRUE == port_lock_dropped) + ip_lock(port); + + return port_lock_dropped; + } + + /* Mark the fact that we are (currently) donating through this message */ + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_RAISEIMP; + + /* + * If we need to relock the port, do it with the importance still locked. + * This assures we get to add the importance boost through the port to + * the task BEFORE anyone else can attempt to undo that operation because + * the sender lost donor status. + */ + if (TRUE == port_lock_dropped) { + ip_lock(port); + } + ipc_importance_unlock(); + + portupdate: + +#if IMPORTANCE_DEBUG + if (kdebug_enable) { + mach_msg_max_trailer_t *dbgtrailer = (mach_msg_max_trailer_t *) + ((vm_offset_t)kmsg->ikm_header + round_msg(kmsg->ikm_header->msgh_size)); + unsigned int sender_pid = dbgtrailer->msgh_audit.val[5]; + mach_msg_id_t imp_msgh_id = kmsg->ikm_header->msgh_id; + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_SEND)) | DBG_FUNC_START, + audit_token_pid_from_task(task), sender_pid, imp_msgh_id, 0, 0); + } +#endif /* IMPORTANCE_DEBUG */ + + /* adjust port boost count (with port locked) */ + if (TRUE == ipc_port_importance_delta(port, 1)) { + port_lock_dropped = TRUE; + ip_lock(port); + } + return port_lock_dropped; +} + +/* + * Routine: ipc_importance_inherit_from + * Purpose: + * Create a "made" reference for an importance attribute representing + * an inheritance between the sender of a message (if linked) and the + * current task importance. If the message is not linked, a static + * boost may be created, based on the boost state of the message. + * + * Any transfer from kmsg linkage to inherit linkage must be atomic. + * + * If the task is inactive, there isn't any need to return a new reference. + * Conditions: + * Nothing locked on entry. May block. + */ +static ipc_importance_inherit_t +ipc_importance_inherit_from(ipc_kmsg_t kmsg) +{ + ipc_importance_task_t task_imp = IIT_NULL; + ipc_importance_elem_t from_elem = kmsg->ikm_importance; + ipc_importance_elem_t elem; + task_t task_self = current_task(); + + ipc_port_t port = kmsg->ikm_header->msgh_remote_port; + ipc_importance_inherit_t inherit = III_NULL; + ipc_importance_inherit_t alloc = III_NULL; + ipc_importance_inherit_t temp_inherit; + boolean_t cleared_self_donation = FALSE; + boolean_t donating; + uint32_t depth = 1; + + /* The kmsg must have an importance donor or static boost to proceed */ + if (IIE_NULL == kmsg->ikm_importance && + !MACH_MSGH_BITS_RAISED_IMPORTANCE(kmsg->ikm_header->msgh_bits)) { + return III_NULL; + } + + /* + * No need to set up an inherit linkage if the dest isn't a receiver + * of one type or the other. + */ + if (!ipc_importance_task_is_any_receiver_type(task_self->task_imp_base)) { + ipc_importance_lock(); + goto out_locked; + } + + /* Grab a reference on the importance of the destination */ + task_imp = ipc_importance_for_task(task_self, FALSE); + + ipc_importance_lock(); + + if (IIT_NULL == task_imp) { + goto out_locked; + } + + incr_ref_counter(task_imp->iit_elem.iie_task_refs_added_inherit_from); + + /* If message is already associated with an inherit... */ + if (IIE_TYPE_INHERIT == IIE_TYPE(from_elem)) { + ipc_importance_inherit_t from_inherit = (ipc_importance_inherit_t)from_elem; + + /* already targeting our task? - just use it */ + if (from_inherit->iii_to_task == task_imp) { + /* clear self-donation if not also present in inherit */ + if (!from_inherit->iii_donating && + MACH_MSGH_BITS_RAISED_IMPORTANCE(kmsg->ikm_header->msgh_bits)) { + kmsg->ikm_header->msgh_bits &= ~MACH_MSGH_BITS_RAISEIMP; + cleared_self_donation = TRUE; + } + inherit = from_inherit; + + } else if (III_DEPTH_MAX == III_DEPTH(from_inherit)) { + ipc_importance_task_t to_task; + ipc_importance_elem_t unlinked_from; + + /* + * Chain too long. Switch to looking + * directly at the from_inherit's to-task + * as our source of importance. + */ + to_task = from_inherit->iii_to_task; + ipc_importance_task_reference(to_task); + from_elem = (ipc_importance_elem_t)to_task; + depth = III_DEPTH_RESET | 1; + + /* Fixup the kmsg linkage to reflect change */ + unlinked_from = ipc_importance_kmsg_unlink(kmsg); + assert(unlinked_from == (ipc_importance_elem_t)from_inherit); + ipc_importance_kmsg_link(kmsg, from_elem); + ipc_importance_inherit_release_locked(from_inherit); + /* importance unlocked */ + ipc_importance_lock(); + + } else { + /* inheriting from an inherit */ + depth = from_inherit->iii_depth + 1; + } + } + + /* + * Don't allow a task to inherit from itself (would keep it permanently + * boosted even if all other donors to the task went away). + */ + + if (from_elem == (ipc_importance_elem_t)task_imp) { + goto out_locked; + } + + /* + * But if the message isn't associated with any linked source, it is + * intended to be permanently boosting (static boost from kernel). + * In that case DO let the process permanently boost itself. + */ + if (IIE_NULL == from_elem) { + assert(MACH_MSGH_BITS_RAISED_IMPORTANCE(kmsg->ikm_header->msgh_bits)); + ipc_importance_task_reference_internal(task_imp); + from_elem = (ipc_importance_elem_t)task_imp; + } + + /* + * Now that we have the from_elem figured out, + * check to see if we already have an inherit for this pairing + */ + while (III_NULL == inherit) { + queue_iterate(&from_elem->iie_inherits, temp_inherit, + ipc_importance_inherit_t, iii_inheritance) { + if (temp_inherit->iii_to_task == task_imp && + temp_inherit->iii_depth == depth) { + inherit = temp_inherit; + break; + } + } + + /* Do we have to allocate a new inherit */ + if (III_NULL == inherit) { + if (III_NULL != alloc) { + break; + } + + /* allocate space */ + ipc_importance_unlock(); + alloc = (ipc_importance_inherit_t) + zalloc(ipc_importance_inherit_zone); + ipc_importance_lock(); + } + } + + /* snapshot the donating status while we have importance locked */ + donating = MACH_MSGH_BITS_RAISED_IMPORTANCE(kmsg->ikm_header->msgh_bits); + + if (III_NULL != inherit) { + /* We found one, piggyback on that */ + assert(0 < III_REFS(inherit)); + assert(0 < IIE_REFS(inherit->iii_from_elem)); + assert(inherit->iii_externcnt >= inherit->iii_made); + + /* add in a made reference */ + if (0 == inherit->iii_made++) { + assert(III_REFS_MAX > III_REFS(inherit)); + ipc_importance_inherit_reference_internal(inherit); + } + + /* Reflect the inherit's change of status into the task boosts */ + if (0 == III_EXTERN(inherit)) { + assert(!inherit->iii_donating); + inherit->iii_donating = donating; + if (donating) { + task_imp->iit_externcnt += inherit->iii_externcnt; + task_imp->iit_externdrop += inherit->iii_externdrop; + } + } else { + assert(donating == inherit->iii_donating); + } + + /* add in a external reference for this use of the inherit */ + inherit->iii_externcnt++; + if (donating) { + task_imp->iit_externcnt++; + } + } else { + /* initialize the previously allocated space */ + inherit = alloc; + inherit->iii_bits = IIE_TYPE_INHERIT | 1; + inherit->iii_made = 1; + inherit->iii_externcnt = 1; + inherit->iii_externdrop = 0; + inherit->iii_depth = depth; + inherit->iii_to_task = task_imp; + inherit->iii_from_elem = IIE_NULL; + queue_init(&inherit->iii_kmsgs); + queue_init(&inherit->iii_inherits); + + /* If donating, reflect that in the task externcnt */ + if (donating) { + inherit->iii_donating = TRUE; + task_imp->iit_externcnt++; + } else { + inherit->iii_donating = FALSE; + } + + /* + * Chain our new inherit on the element it inherits from. + * The new inherit takes our reference on from_elem. + */ + ipc_importance_inherit_link(inherit, from_elem); + +#if IIE_REF_DEBUG + ipc_importance_counter_init(&inherit->iii_elem); + from_elem->iie_kmsg_refs_inherited++; + task_imp->iit_elem.iie_task_refs_inherited++; +#endif + } + + out_locked: + /* + * for those paths that came straight here: snapshot the donating status + * (this should match previous snapshot for other paths). + */ + donating = MACH_MSGH_BITS_RAISED_IMPORTANCE(kmsg->ikm_header->msgh_bits); + + /* unlink the kmsg inheritance (if any) */ + elem = ipc_importance_kmsg_unlink(kmsg); + assert(elem == from_elem); + + /* If we didn't create a new inherit, we have some resources to release */ + if (III_NULL == inherit || inherit != alloc) { + if (IIE_NULL != from_elem) { + if (III_NULL != inherit) { + incr_ref_counter(from_elem->iie_kmsg_refs_coalesced); + } else { + incr_ref_counter(from_elem->iie_kmsg_refs_dropped); + } + ipc_importance_release_locked(from_elem); + /* importance unlocked */ + } else { + ipc_importance_unlock(); + } + + if (IIT_NULL != task_imp) { + if (III_NULL != inherit) { + incr_ref_counter(task_imp->iit_elem.iie_task_refs_coalesced); + } + ipc_importance_task_release(task_imp); + } + + if (III_NULL != alloc) + zfree(ipc_importance_inherit_zone, alloc); + } else { + /* from_elem and task_imp references transferred to new inherit */ + ipc_importance_unlock(); + } + + /* decrement port boost count */ + if (donating) { + ip_lock(port); + if (III_NULL != inherit) { + /* task assertions transferred to inherit, just adjust port count */ + ipc_port_impcount_delta(port, -1, IP_NULL); + ip_unlock(port); + } else { + /* drop importance from port and destination task */ + if (ipc_port_importance_delta(port, -1) == FALSE) { + ip_unlock(port); + } + } + } else if (cleared_self_donation) { + ip_lock(port); + /* drop cleared donation from port and destination task */ + if (ipc_port_importance_delta(port, -1) == FALSE) { + ip_unlock(port); + } + } + + if (III_NULL != inherit) { + /* have an associated importance attr, even if currently not donating */ + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_RAISEIMP; + } else { + /* we won't have an importance attribute associated with our message */ + kmsg->ikm_header->msgh_bits &= ~MACH_MSGH_BITS_RAISEIMP; + } + + return inherit; +} + +/* + * Routine: ipc_importance_receive + * Purpose: + * Process importance attributes in a received message. + * + * If an importance voucher attribute was sent, transform + * that into an attribute value reflecting the inheritance + * from the sender to the receiver. + * + * If a static boost is received (or the voucher isn't on + * a voucher-based boost), export a static boost. + * Conditions: + * Nothing locked. + */ +void +ipc_importance_receive( + ipc_kmsg_t kmsg, + mach_msg_option_t option) +{ + unsigned int sender_pid = ((mach_msg_max_trailer_t *) + ((vm_offset_t)kmsg->ikm_header + + round_msg(kmsg->ikm_header->msgh_size)))->msgh_audit.val[5]; + task_t task_self = current_task(); + int impresult = -1; + + /* convert to a voucher with an inherit importance attribute? */ + if ((option & MACH_RCV_VOUCHER) != 0) { + uint8_t recipes[2 * sizeof(ipc_voucher_attr_recipe_data_t) + + sizeof(mach_voucher_attr_value_handle_t)]; + ipc_voucher_attr_raw_recipe_array_size_t recipe_size = 0; + ipc_voucher_attr_recipe_t recipe = (ipc_voucher_attr_recipe_t)recipes; + ipc_voucher_t recv_voucher; + mach_voucher_attr_value_handle_t handle; + ipc_importance_inherit_t inherit; + kern_return_t kr; + + /* set up recipe to copy the old voucher */ + if (IP_VALID(kmsg->ikm_voucher)) { + ipc_voucher_t sent_voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject; + + recipe->key = MACH_VOUCHER_ATTR_KEY_ALL; + recipe->command = MACH_VOUCHER_ATTR_COPY; + recipe->previous_voucher = sent_voucher; + recipe->content_size = 0; + recipe_size += sizeof(*recipe); + } + + /* + * create an inheritance attribute from the kmsg (may be NULL) + * transferring any boosts from the kmsg linkage through the + * port directly to the new inheritance object. + */ + inherit = ipc_importance_inherit_from(kmsg); + handle = (mach_voucher_attr_value_handle_t)inherit; + + assert(IIE_NULL == kmsg->ikm_importance); + + /* replace the importance attribute with the handle we created */ + /* our made reference on the inhert is donated to the voucher */ + recipe = (ipc_voucher_attr_recipe_t)&recipes[recipe_size]; + recipe->key = MACH_VOUCHER_ATTR_KEY_IMPORTANCE; + recipe->command = MACH_VOUCHER_ATTR_SET_VALUE_HANDLE; + recipe->previous_voucher = IPC_VOUCHER_NULL; + recipe->content_size = sizeof(mach_voucher_attr_value_handle_t); + *(mach_voucher_attr_value_handle_t *)(void *)recipe->content = handle; + recipe_size += sizeof(*recipe) + sizeof(mach_voucher_attr_value_handle_t); + + kr = ipc_voucher_attr_control_create_mach_voucher(ipc_importance_control, + recipes, + recipe_size, + &recv_voucher); + assert(KERN_SUCCESS == kr); + + /* swap the voucher port (and set voucher bits in case it didn't already exist) */ + kmsg->ikm_header->msgh_bits |= (MACH_MSG_TYPE_MOVE_SEND << 16); + ipc_port_release_send(kmsg->ikm_voucher); + kmsg->ikm_voucher = convert_voucher_to_port(recv_voucher); + if (III_NULL != inherit) + impresult = 2; + + } else { /* Don't want a voucher */ + + /* got linked importance? have to drop */ + if (IIE_NULL != kmsg->ikm_importance) { + ipc_importance_elem_t elem; + + ipc_importance_lock(); + elem = ipc_importance_kmsg_unlink(kmsg); +#if IIE_REF_DEBUG + elem->iie_kmsg_refs_dropped++; +#endif + ipc_importance_release_locked(elem); + /* importance unlocked */ + } + + /* With kmsg unlinked, can safely examine message importance attribute. */ + if (MACH_MSGH_BITS_RAISED_IMPORTANCE(kmsg->ikm_header->msgh_bits)) { + ipc_importance_task_t task_imp = task_self->task_imp_base; + ipc_port_t port = kmsg->ikm_header->msgh_remote_port; + + /* defensive deduction for release builds lacking the assert */ + ip_lock(port); + ipc_port_impcount_delta(port, -1, IP_NULL); + ip_unlock(port); + + /* will user accept legacy responsibility for the importance boost */ + if (KERN_SUCCESS == ipc_importance_task_externalize_legacy_assertion(task_imp, 1, sender_pid)) { + impresult = 1; + } else { + /* The importance boost never applied to task (clear the bit) */ + kmsg->ikm_header->msgh_bits &= ~MACH_MSGH_BITS_RAISEIMP; + impresult = 0; + } + } + } + +#if IMPORTANCE_DEBUG + if (-1 < impresult) + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_DELV)) | DBG_FUNC_NONE, + sender_pid, audit_token_pid_from_task(task_self), + kmsg->ikm_header->msgh_id, impresult, 0); + if (impresult == 2){ + /* + * This probe only covers new voucher-based path. Legacy importance + * will trigger the probe in ipc_importance_task_externalize_assertion() + * above and have impresult==1 here. + */ + DTRACE_BOOST5(receive_boost, task_t, task_self, int, audit_token_pid_from_task(task_self), int, sender_pid, int, 1, int, task_self->task_imp_base->iit_assertcnt); + } +#endif /* IMPORTANCE_DEBUG */ +} + +/* + * Routine: ipc_importance_unreceive + * Purpose: + * Undo receive of importance attributes in a message. + * + * Conditions: + * Nothing locked. + */ +void +ipc_importance_unreceive( + ipc_kmsg_t kmsg, + mach_msg_option_t __unused option) +{ + /* importance should already be in the voucher and out of the kmsg */ + assert(IIE_NULL == kmsg->ikm_importance); + + /* See if there is a legacy boost to be dropped from receiver */ + if (MACH_MSGH_BITS_RAISED_IMPORTANCE(kmsg->ikm_header->msgh_bits)) { + ipc_importance_task_t task_imp; + + kmsg->ikm_header->msgh_bits &= ~MACH_MSGH_BITS_RAISEIMP; + task_imp = current_task()->task_imp_base; + if (!IP_VALID(kmsg->ikm_voucher) && IIT_NULL != task_imp) { + ipc_importance_task_drop_legacy_external_assertion(task_imp, 1); + } + /* + * ipc_kmsg_copyout_dest() will consume the voucher + * and any contained importance. + */ + } +} + +/* + * Routine: ipc_importance_clean + * Purpose: + * Clean up importance state in a kmsg that is being cleaned. + * Unlink the importance chain if one was set up, and drop + * the reference this kmsg held on the donor. Then check to + * if importance was carried to the port, and remove that if + * needed. + * Conditions: + * Nothing locked. + */ +void +ipc_importance_clean( + ipc_kmsg_t kmsg) +{ + ipc_port_t port; + + /* Is the kmsg still linked? If so, remove that first */ + if (IIE_NULL != kmsg->ikm_importance) { + ipc_importance_elem_t elem; + + ipc_importance_lock(); + elem = ipc_importance_kmsg_unlink(kmsg); + assert(IIE_NULL != elem); + ipc_importance_release_locked(elem); + /* importance unlocked */ + } + + /* See if there is a legacy importance boost to be dropped from port */ + if (MACH_MSGH_BITS_RAISED_IMPORTANCE(kmsg->ikm_header->msgh_bits)) { + kmsg->ikm_header->msgh_bits &= ~MACH_MSGH_BITS_RAISEIMP; + port = kmsg->ikm_header->msgh_remote_port; + if (IP_VALID(port)) { + ip_lock(port); + /* inactive ports already had their importance boosts dropped */ + if (!ip_active(port) || + ipc_port_importance_delta(port, -1) == FALSE) { + ip_unlock(port); + } + } + } +} + +void +ipc_importance_assert_clean(__assert_only ipc_kmsg_t kmsg) +{ + assert(IIE_NULL == kmsg->ikm_importance); + assert(!MACH_MSGH_BITS_RAISED_IMPORTANCE(kmsg->ikm_header->msgh_bits)); +} + +/* + * IPC Importance Attribute Manager definition + */ + +static kern_return_t +ipc_importance_release_value( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_key_t key, + mach_voucher_attr_value_handle_t value, + mach_voucher_attr_value_reference_t sync); + +static kern_return_t +ipc_importance_get_value( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_key_t key, + mach_voucher_attr_recipe_command_t command, + mach_voucher_attr_value_handle_array_t prev_values, + mach_voucher_attr_value_handle_array_size_t prev_value_count, + mach_voucher_attr_content_t content, + mach_voucher_attr_content_size_t content_size, + mach_voucher_attr_value_handle_t *out_value, + ipc_voucher_t *out_value_voucher); + +static kern_return_t +ipc_importance_extract_content( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_key_t key, + mach_voucher_attr_value_handle_array_t values, + mach_voucher_attr_value_handle_array_size_t value_count, + mach_voucher_attr_recipe_command_t *out_command, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *in_out_content_size); + +static kern_return_t +ipc_importance_command( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_key_t key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_command_t command, + mach_voucher_attr_content_t in_content, + mach_voucher_attr_content_size_t in_content_size, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *out_content_size); + +static void +ipc_importance_manager_release( + ipc_voucher_attr_manager_t manager); + +struct ipc_voucher_attr_manager ipc_importance_manager = { + .ivam_release_value = ipc_importance_release_value, + .ivam_get_value = ipc_importance_get_value, + .ivam_extract_content = ipc_importance_extract_content, + .ivam_command = ipc_importance_command, + .ivam_release = ipc_importance_manager_release, +}; + +#define IMPORTANCE_ASSERT_KEY(key) assert(MACH_VOUCHER_ATTR_KEY_IMPORTANCE == (key)) +#define IMPORTANCE_ASSERT_MANAGER(manager) assert(&ipc_importance_manager == (manager)) + +/* + * Routine: ipc_importance_release_value [Voucher Attribute Manager Interface] + * Purpose: + * Release what the voucher system believes is the last "made" reference + * on an importance attribute value handle. The sync parameter is used to + * avoid races with new made references concurrently being returned to the + * voucher system in other threads. + * Conditions: + * Nothing locked on entry. May block. + */ +static kern_return_t +ipc_importance_release_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_t value, + mach_voucher_attr_value_reference_t sync) +{ + ipc_importance_elem_t elem; + + IMPORTANCE_ASSERT_MANAGER(manager); + IMPORTANCE_ASSERT_KEY(key); + assert(0 < sync); + + elem = (ipc_importance_elem_t)value; + + ipc_importance_lock(); + + /* Any oustanding made refs? */ + if (sync != elem->iie_made) { + assert(sync < elem->iie_made); + ipc_importance_unlock(); + return KERN_FAILURE; + } + + /* clear made */ + elem->iie_made = 0; + + /* + * If there are pending external boosts represented by this attribute, + * drop them from the apropriate task + */ + if (IIE_TYPE_INHERIT == IIE_TYPE(elem)) { + ipc_importance_inherit_t inherit = (ipc_importance_inherit_t)elem; + + assert(inherit->iii_externcnt >= inherit->iii_externdrop); + + if (inherit->iii_donating) { + ipc_importance_task_t imp_task = inherit->iii_to_task; + uint32_t assertcnt = III_EXTERN(inherit); + + assert(ipc_importance_task_is_any_receiver_type(imp_task)); + assert(imp_task->iit_externcnt >= inherit->iii_externcnt); + assert(imp_task->iit_externdrop >= inherit->iii_externdrop); + imp_task->iit_externcnt -= inherit->iii_externcnt; + imp_task->iit_externdrop -= inherit->iii_externdrop; + inherit->iii_externcnt = 0; + inherit->iii_externdrop = 0; + inherit->iii_donating = FALSE; + + /* adjust the internal assertions - and propagate if needed */ + if (ipc_importance_task_check_transition(imp_task, IIT_UPDATE_DROP, assertcnt)) { + ipc_importance_task_propagate_assertion_locked(imp_task, IIT_UPDATE_DROP, TRUE); + } + } else { + inherit->iii_externcnt = 0; + inherit->iii_externdrop = 0; + } + } + + /* drop the made reference on elem */ + ipc_importance_release_locked(elem); + /* returns unlocked */ + + return KERN_SUCCESS; +} + + +/* + * Routine: ipc_importance_get_value [Voucher Attribute Manager Interface] + * Purpose: + * Convert command and content data into a reference on a [potentially new] + * attribute value. The importance attribute manager will only allow the + * caller to get a value for the current task's importance, or to redeem + * an importance attribute from an existing voucher. + * Conditions: + * Nothing locked on entry. May block. + */ +static kern_return_t +ipc_importance_get_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_recipe_command_t command, + mach_voucher_attr_value_handle_array_t prev_values, + mach_voucher_attr_value_handle_array_size_t prev_value_count, + mach_voucher_attr_content_t __unused content, + mach_voucher_attr_content_size_t content_size, + mach_voucher_attr_value_handle_t *out_value, + ipc_voucher_t *out_value_voucher) +{ + ipc_importance_elem_t elem; + task_t self; + + IMPORTANCE_ASSERT_MANAGER(manager); + IMPORTANCE_ASSERT_KEY(key); + + if (0 != content_size) + return KERN_INVALID_ARGUMENT; + + /* never an out voucher */ + + switch (command) { + + case MACH_VOUCHER_ATTR_REDEEM: + + /* redeem of previous values is the value */ + if (0 < prev_value_count) { + elem = (ipc_importance_elem_t)prev_values[0]; + assert(IIE_NULL != elem); + + ipc_importance_lock(); + assert(0 < elem->iie_made); + elem->iie_made++; + ipc_importance_unlock(); + + *out_value = prev_values[0]; + return KERN_SUCCESS; + } + + /* redeem of default is default */ + *out_value = 0; + *out_value_voucher = IPC_VOUCHER_NULL; + return KERN_SUCCESS; + + case MACH_VOUCHER_ATTR_IMPORTANCE_SELF: + self = current_task(); + + elem = (ipc_importance_elem_t)ipc_importance_for_task(self, TRUE); + /* made reference added (or IIE_NULL which isn't referenced) */ + + *out_value = (mach_voucher_attr_value_handle_t)elem; + *out_value_voucher = IPC_VOUCHER_NULL; + return KERN_SUCCESS; + + default: + /* + * every other command is unknown + * + * Specifically, there is no mechanism provided to construct an + * importance attribute for a task/process from just a pid or + * task port. It has to be copied (or redeemed) from a previous + * voucher that has it. + */ + return KERN_INVALID_ARGUMENT; + } +} + +/* + * Routine: ipc_importance_extract_content [Voucher Attribute Manager Interface] + * Purpose: + * Extract meaning from the attribute value present in a voucher. While + * the real goal is to provide commands and data that can reproduce the + * voucher's value "out of thin air", this isn't possible with importance + * attribute values. Instead, return debug info to help track down dependencies. + * Conditions: + * Nothing locked on entry. May block. + */ +static kern_return_t +ipc_importance_extract_content( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_voucher_attr_value_handle_array_size_t value_count, + mach_voucher_attr_recipe_command_t *out_command, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *in_out_content_size) +{ + mach_voucher_attr_content_size_t size = 0; + ipc_importance_elem_t elem; + unsigned int i; + + IMPORTANCE_ASSERT_MANAGER(manager); + IMPORTANCE_ASSERT_KEY(key); + + /* the first non-default value provides the data */ + for (i = 0; i < value_count ; i++) { + elem = (ipc_importance_elem_t)values[i]; + if (IIE_NULL == elem) + continue; + + snprintf((char *)out_content, *in_out_content_size, "Importance for pid "); + size = (mach_voucher_attr_content_size_t)strlen((char *)out_content); + + for(;;) { + ipc_importance_inherit_t inherit = III_NULL; + ipc_importance_task_t task_imp; + task_t task; + int task_pid; + + if (IIE_TYPE_TASK == IIE_TYPE(elem)) { + task_imp = (ipc_importance_task_t)elem; + task = task_imp->iit_task; + task_pid = (TASK_NULL != task) ? + audit_token_pid_from_task(task) : -1; + snprintf((char *)out_content + size, *in_out_content_size - size, "%d", task_pid); + } else { + inherit = (ipc_importance_inherit_t)elem; + task_imp = inherit->iii_to_task; + task = task_imp->iit_task; + task_pid = (TASK_NULL != task) ? + audit_token_pid_from_task(task) : -1; + snprintf((char *)out_content + size, *in_out_content_size - size, + "%d (%d of %d boosts) %s from pid ", task_pid, + III_EXTERN(inherit), inherit->iii_externcnt, + (inherit->iii_donating) ? "donated" : "linked"); + } + + size = (mach_voucher_attr_content_size_t)strlen((char *)out_content); + + if (III_NULL == inherit) + break; + + elem = inherit->iii_from_elem; + } + size++; /* account for NULL */ + } + *out_command = MACH_VOUCHER_ATTR_NOOP; /* cannot be used to regenerate value */ + *in_out_content_size = size; + return KERN_SUCCESS; +} + +/* + * Routine: ipc_importance_command [Voucher Attribute Manager Interface] + * Purpose: + * Run commands against the importance attribute value found in a voucher. + * No such commands are currently supported. + * Conditions: + * Nothing locked on entry. May block. + */ +static kern_return_t +ipc_importance_command( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_command_t command, + mach_voucher_attr_content_t in_content, + mach_voucher_attr_content_size_t in_content_size, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *out_content_size) +{ + ipc_importance_inherit_t inherit; + ipc_importance_task_t to_task; + uint32_t refs, *outrefsp; + mach_msg_type_number_t i; + uint32_t externcnt; + + IMPORTANCE_ASSERT_MANAGER(manager); + IMPORTANCE_ASSERT_KEY(key); + + if (in_content_size != sizeof(refs) || + (*out_content_size != 0 && *out_content_size != sizeof(refs))) { + return KERN_INVALID_ARGUMENT; + } + refs = *(uint32_t *)(void *)in_content; + outrefsp = (*out_content_size != 0) ? (uint32_t *)(void *)out_content : NULL; + + if (MACH_VOUCHER_IMPORTANCE_ATTR_DROP_EXTERNAL != command) { + return KERN_NOT_SUPPORTED; + } + + /* the first non-default value of the apropos type provides the data */ + inherit = III_NULL; + for (i = 0; i < value_count; i++) { + ipc_importance_elem_t elem = (ipc_importance_elem_t)values[i]; + + if (IIE_NULL != elem && IIE_TYPE_INHERIT == IIE_TYPE(elem)) { + inherit = (ipc_importance_inherit_t)elem; + break; + } + } + if (III_NULL == inherit) { + return KERN_INVALID_ARGUMENT; + } + + ipc_importance_lock(); + + if (0 == refs) { + if (NULL != outrefsp) { + *outrefsp = III_EXTERN(inherit); + } + ipc_importance_unlock(); + return KERN_SUCCESS; + } + + /* Enough external references left to drop? */ + if (III_EXTERN(inherit) < refs) { + ipc_importance_unlock(); + return KERN_FAILURE; + } + + to_task = inherit->iii_to_task; + assert(ipc_importance_task_is_any_receiver_type(to_task)); + + /* re-base external and internal counters at the inherit and the to-task (if apropos) */ + if (inherit->iii_donating) { + assert(IIT_EXTERN(to_task) >= III_EXTERN(inherit)); + assert(to_task->iit_externcnt >= inherit->iii_externcnt); + assert(to_task->iit_externdrop >= inherit->iii_externdrop); + inherit->iii_externdrop += refs; + to_task->iit_externdrop += refs; + externcnt = III_EXTERN(inherit); + if (0 == externcnt) { + inherit->iii_donating = FALSE; + to_task->iit_externcnt -= inherit->iii_externcnt; + to_task->iit_externdrop -= inherit->iii_externdrop; + + + /* Start AppNap delay hysteresis - even if not the last boost for the task. */ + if (ipc_importance_delayed_drop_call != NULL && + ipc_importance_task_is_marked_denap_receiver(to_task)) { + ipc_importance_task_delayed_drop(to_task); + } + + /* drop task assertions associated with the dropped boosts */ + if (ipc_importance_task_check_transition(to_task, IIT_UPDATE_DROP, refs)) { + ipc_importance_task_propagate_assertion_locked(to_task, IIT_UPDATE_DROP, TRUE); + /* may have dropped and retaken importance lock */ + } + } else { + /* assert(to_task->iit_assertcnt >= refs + externcnt); */ + /* defensive deduction in case of assertcnt underflow */ + if (to_task->iit_assertcnt > refs + externcnt) { + to_task->iit_assertcnt -= refs; + } else { + to_task->iit_assertcnt = externcnt; + } + } + } else { + inherit->iii_externdrop += refs; + externcnt = III_EXTERN(inherit); + } + + /* capture result (if requested) */ + if (NULL != outrefsp) { + *outrefsp = externcnt; + } + + ipc_importance_unlock(); + return KERN_SUCCESS; +} + +/* + * Routine: ipc_importance_manager_release [Voucher Attribute Manager Interface] + * Purpose: + * Release the Voucher system's reference on the IPC importance attribute + * manager. + * Conditions: + * As this can only occur after the manager drops the Attribute control + * reference granted back at registration time, and that reference is never + * dropped, this should never be called. + */ +static void +ipc_importance_manager_release( + ipc_voucher_attr_manager_t __assert_only manager) +{ + IMPORTANCE_ASSERT_MANAGER(manager); + panic("Voucher importance manager released"); +} + +/* + * Routine: ipc_importance_init + * Purpose: + * Initialize the IPC importance manager. + * Conditions: + * Zones and Vouchers are already initialized. + */ +void +ipc_importance_init(void) +{ + natural_t ipc_importance_max = (task_max + thread_max) * 2; + char temp_buf[26]; + kern_return_t kr; + + if (PE_parse_boot_argn("imp_interactive_receiver", temp_buf, sizeof(temp_buf))) { + ipc_importance_interactive_receiver = TRUE; + } + + ipc_importance_task_zone = zinit(sizeof(struct ipc_importance_task), + ipc_importance_max * sizeof(struct ipc_importance_task), + sizeof(struct ipc_importance_task), + "ipc task importance"); + zone_change(ipc_importance_task_zone, Z_NOENCRYPT, TRUE); + + ipc_importance_inherit_zone = zinit(sizeof(struct ipc_importance_inherit), + ipc_importance_max * sizeof(struct ipc_importance_inherit), + sizeof(struct ipc_importance_inherit), + "ipc importance inherit"); + zone_change(ipc_importance_inherit_zone, Z_NOENCRYPT, TRUE); + + +#if DEVELOPMENT || DEBUG + queue_init(&global_iit_alloc_queue); +#endif + + /* initialize global locking */ + ipc_importance_lock_init(); + + kr = ipc_register_well_known_mach_voucher_attr_manager(&ipc_importance_manager, + (mach_voucher_attr_value_handle_t)0, + MACH_VOUCHER_ATTR_KEY_IMPORTANCE, + &ipc_importance_control); + if (KERN_SUCCESS != kr) + printf("Voucher importance manager register returned %d", kr); +} + +/* + * Routine: ipc_importance_thread_call_init + * Purpose: + * Initialize the IPC importance code dependent upon + * thread-call support being available. + * Conditions: + * Thread-call mechanism is already initialized. + */ +void +ipc_importance_thread_call_init(void) +{ + /* initialize delayed drop queue and thread-call */ + queue_init(&ipc_importance_delayed_drop_queue); + ipc_importance_delayed_drop_call = + thread_call_allocate(ipc_importance_task_delayed_drop_scan, NULL); + if (NULL == ipc_importance_delayed_drop_call) { + panic("ipc_importance_init"); + } +} + +/* + * Routing: task_importance_list_pids + * Purpose: list pids where task in donating importance. + * Conditions: To be called only from kdp stackshot code. + * Will panic the system otherwise. + */ +extern int +task_importance_list_pids(task_t task, int flags, int *pid_list, unsigned int max_count) +{ + if (lck_spin_is_acquired(&ipc_importance_lock_data) || + max_count < 1 || + task->task_imp_base == IIT_NULL || + pid_list == NULL || + flags != TASK_IMP_LIST_DONATING_PIDS) { + return 0; + } + unsigned int pidcount = 0; + task_t temp_task; + ipc_importance_task_t task_imp = task->task_imp_base; + ipc_kmsg_t temp_kmsg; + ipc_importance_inherit_t temp_inherit; + ipc_importance_elem_t elem; + int target_pid; + + queue_iterate(&task_imp->iit_inherits, temp_inherit, ipc_importance_inherit_t, iii_inheritance) { + /* check space in buffer */ + if (pidcount >= max_count) + break; + target_pid = -1; + + if (temp_inherit->iii_donating) { + +#if DEVELOPMENT || DEBUG + target_pid = temp_inherit->iii_to_task->iit_bsd_pid; +#else + temp_task = temp_inherit->iii_to_task->iit_task; + if (temp_task != TASK_NULL) { + target_pid = audit_token_pid_from_task(temp_task); + } +#endif + } + + if (target_pid != -1) { + pid_list[pidcount++] = target_pid; + } + + } + + queue_iterate(&task_imp->iit_kmsgs, temp_kmsg, ipc_kmsg_t, ikm_inheritance) { + if (pidcount >= max_count) + break; + target_pid = -1; + elem = temp_kmsg->ikm_importance; + temp_task = TASK_NULL; + + if (elem == IIE_NULL) { + continue; + } + + if (!(temp_kmsg->ikm_header && MACH_MSGH_BITS_RAISED_IMPORTANCE(temp_kmsg->ikm_header->msgh_bits))) { + continue; + } + + if (IIE_TYPE_TASK == IIE_TYPE(elem) && + (((ipc_importance_task_t)elem)->iit_task != TASK_NULL)) { + target_pid = audit_token_pid_from_task(((ipc_importance_task_t)elem)->iit_task); + } else { + temp_inherit = (ipc_importance_inherit_t)elem; +#if DEVELOPMENT || DEBUG + target_pid = temp_inherit->iii_to_task->iit_bsd_pid; +#else + temp_task = temp_inherit->iii_to_task->iit_task; + if (temp_task != TASK_NULL) { + target_pid = audit_token_pid_from_task(temp_task); + } +#endif + } + + if (target_pid != -1) { + pid_list[pidcount++] = target_pid; + } + } + + return pidcount; +} + diff --git a/osfmk/ipc/ipc_importance.h b/osfmk/ipc/ipc_importance.h new file mode 100644 index 000000000..f811faf16 --- /dev/null +++ b/osfmk/ipc/ipc_importance.h @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _IPC_IPC_IMPORTANCE_H_ +#define _IPC_IPC_IMPORTANCE_H_ + +#include +#include +#include +#include +#include + +/* + * IPC Importance - All definitions are MACH_KERNEL_PRIVATE + */ +#ifdef MACH_KERNEL_PRIVATE + +#include +#include + +/* + * IPC Importance Value Element + * + * This element represents a single task's (base) importance, + * or in the case of inherited importance, the inheritance + * linkage from the source to the destination task. In the + * inheritance case, this source can be a base importance or + * another inherited importace. + * + * Whenever the task importance is adjusted, it walks the + * list of IPC-related items it influences (ports and downstream + * tasks doing work on its behalf) and makes adjustments to + * their importance attributes accordingly. + * + */ + +struct ipc_importance_elem { + uint32_t iie_bits; /* type and refs */ + mach_voucher_attr_value_reference_t iie_made; /* references given to vouchers */ + queue_head_t iie_kmsgs; /* list of kmsgs inheriting from this */ + queue_head_t iie_inherits; /* list of inherit elems hung off this */ + uint32_t iie_externcnt; /* number of externalized boosts */ + uint32_t iie_externdrop; /* number of those dropped already */ +#define IIE_REF_DEBUG 0 +#if IIE_REF_DEBUG + uint32_t iie_refs_added; /* all refs added via all means */ + uint32_t iie_refs_dropped; /* all refs dropped via all means */ + uint32_t iie_kmsg_refs_added; /* all refs added by kmsgs taking a ref */ + uint32_t iie_kmsg_refs_inherited; /* kmsg refs consumed by a new inherit */ + uint32_t iie_kmsg_refs_coalesced; /* kmsg refs coalesced into an existing inherit */ + uint32_t iie_kmsg_refs_dropped; /* kmsg refs dropped by not accepting msg importance */ + uint32_t iie_task_refs_added; /* refs added by a task reference call */ + uint32_t iie_task_refs_added_inherit_from; /* task references added by inherit from */ + uint32_t iie_task_refs_added_transition; /* task references added by imp transition code */ + uint32_t iie_task_refs_self_added; /* task refs added by self-boost */ + uint32_t iie_task_refs_inherited; /* task refs consumed by a new inherit */ + uint32_t iie_task_refs_coalesced; /* task refs coalesced into an existing inherit */ + uint32_t iie_task_refs_dropped; /* all refs dropped via all task means */ +#endif +}; + +#define IIE_TYPE_MASK 0x80000000 /* Just the high bit for now */ +#define IIE_TYPE_TASK 0x00000000 /* Element is a task element */ +#define IIE_TYPE_INHERIT 0x80000000 /* Element inherits from a previous element */ +#define IIE_TYPE(e) ((e)->iie_bits & IIE_TYPE_MASK) + +#define IIE_REFS_MASK 0x7FFFFFFF /* Mask to extract references */ +#define IIE_REFS_MAX 0x7FFFFFFF +#define IIE_REFS(e) ((e)->iie_bits & IIE_REFS_MASK) + +#define IIE_EXTERN(e) ((e)->iie_externcnt - (e)->iie_externdrop) + +#if !IIE_REF_DEBUG +#define ipc_importance_reference_internal(elem) \ + (hw_atomic_add(&(elem)->iie_bits, 1) & IIE_REFS_MASK) + +#define ipc_importance_release_internal(elem) \ + (hw_atomic_sub(&(elem)->iie_bits, 1) & IIE_REFS_MASK) +#endif + +struct ipc_importance_task { + struct ipc_importance_elem iit_elem; /* common element parts */ + task_t iit_task; /* task associated with */ + queue_t iit_updateq; /* queue chained on for task policy updates */ + queue_chain_t iit_updates; /* link on update chain */ + queue_chain_t iit_props; /* link on propagation chain */ + uint64_t iit_updatetime; /* timestamp of our last policy update request */ + uint64_t iit_transitions;/* total number of boost transitions (lifetime) */ + uint32_t iit_assertcnt; /* net number of boost assertions (internal, external and legacy) */ + uint32_t iit_legacy_externcnt; /* Legacy external boost count */ + uint32_t iit_legacy_externdrop; /* Legacy external boost drop count */ + uint32_t iit_receiver:1, /* the task can receive importance boost */ + iit_denap:1, /* the task can be awaked from App Nap */ + iit_donor:1, /* the task always sends boosts regardless of boost status */ + iit_live_donor:1, /* the task temporarily sends boosts regardless of boost status */ + iit_updatepolicy:1, /* enqueue for policy update at the end of propagation */ + iit_reserved:3, /* reserved for future use */ + iit_filelocks:24; /* number of file lock boosts */ +#if DEVELOPMENT || DEBUG + char iit_procname[20]; /* name of proc */ + uint32_t iit_bsd_pid; /* pid of proc creating this iit */ + queue_chain_t iit_allocation; /* link on global iit allocation chain */ +#endif + +}; +#define iit_bits iit_elem.iie_bits +#define iit_made iit_elem.iie_made +#define iit_kmsgs iit_elem.iie_kmsgs +#define iit_inherits iit_elem.iie_inherits +#define iit_externcnt iit_elem.iie_externcnt +#define iit_externdrop iit_elem.iie_externdrop + +#define IIT_REFS_MAX IIE_REFS_MAX +#define IIT_REFS(t) IIE_REFS(&(t)->iit_elem) +#define IIT_EXTERN(t) IIE_EXTERN(&(t)->iit_elem) +#define IIT_LEGACY_EXTERN(t) ((t)->iit_legacy_externcnt - (t)->iit_legacy_externdrop) + +#if !IIE_REF_DEBUG +#define ipc_importance_task_reference_internal(task_imp) \ + (ipc_importance_reference_internal(&(task_imp)->iit_elem)) + +#define ipc_importance_task_release_internal(task_imp) \ + (assert(1 < IIT_REFS(task_imp)), ipc_importance_release_internal(&(task_imp)->iit_elem)) +#endif + +typedef int iit_update_type_t; +#define IIT_UPDATE_HOLD ((iit_update_type_t)1) +#define IIT_UPDATE_DROP ((iit_update_type_t)2) + +struct ipc_importance_inherit { + struct ipc_importance_elem iii_elem; /* common element partss */ + boolean_t iii_donating; /* is this donating importance */ + uint32_t iii_depth; /* nesting depth */ + ipc_importance_task_t iii_to_task; /* donating to */ + ipc_importance_elem_t iii_from_elem; /* other elem contributing */ + queue_chain_t iii_inheritance; /* inherited from link */ +}; +#define iii_bits iii_elem.iie_bits +#define iii_made iii_elem.iie_made +#define iii_kmsgs iii_elem.iie_kmsgs +#define iii_inherits iii_elem.iie_inherits +#define iii_externcnt iii_elem.iie_externcnt +#define iii_externdrop iii_elem.iie_externdrop +#define III_REFS_MAX IIE_REFS_MAX +#define III_REFS(i) IIE_REFS(&(i)->iii_elem) +#define III_EXTERN(i) IIE_EXTERN(&(i)->iii_elem) + +#define III_DEPTH_RESET 0x80000000 +#define III_DEPTH_MASK 0x000000FF +#define III_DEPTH(i) ((i)->iii_depth & III_DEPTH_MASK) +#define III_DEPTH_MAX 32 /* maximum inherit->inherit chain depth */ + +#define ipc_importance_inherit_reference_internal(inherit) \ + (ipc_importance_reference_internal(&(inherit)->iii_elem)) + +__BEGIN_DECLS + +/* add a reference to an importance attribute */ +extern void ipc_importance_reference(ipc_importance_elem_t elem); + +/* release an importance attribute reference */ +extern void ipc_importance_release(ipc_importance_elem_t elem); + +/* retain a task importance attribute reference */ +extern void ipc_importance_task_reference(ipc_importance_task_t task_elem); + +/* release a task importance attribute reference */ +extern void ipc_importance_task_release(ipc_importance_task_t task_imp); + +/* reset the influence of the task on the importance */ +extern void ipc_importance_reset(ipc_importance_task_t task_imp, boolean_t donor); + +extern ipc_importance_task_t ipc_importance_for_task(task_t task, boolean_t made); +extern void ipc_importance_disconnect_task(task_t task); + +extern boolean_t ipc_importance_task_is_donor(ipc_importance_task_t task_imp); +extern boolean_t ipc_importance_task_is_never_donor(ipc_importance_task_t task_imp); +extern boolean_t ipc_importance_task_is_marked_donor(ipc_importance_task_t task_imp); +extern boolean_t ipc_importance_task_is_marked_live_donor(ipc_importance_task_t task_imp); + +extern void ipc_importance_task_mark_donor(ipc_importance_task_t task_imp, boolean_t donating); +extern void ipc_importance_task_mark_live_donor(ipc_importance_task_t task_imp, boolean_t live_donating); +extern void ipc_importance_task_update_live_donor(ipc_importance_task_t task_imp); + +extern boolean_t ipc_importance_task_is_marked_receiver(ipc_importance_task_t task_imp); +extern void ipc_importance_task_mark_receiver(ipc_importance_task_t task_imp, boolean_t receiving); + +extern boolean_t ipc_importance_task_is_denap_receiver(ipc_importance_task_t task_imp); +extern boolean_t ipc_importance_task_is_marked_denap_receiver(ipc_importance_task_t task_imp); +extern void ipc_importance_task_mark_denap_receiver(ipc_importance_task_t task_imp, boolean_t receiving); + +extern boolean_t ipc_importance_task_is_any_receiver_type(ipc_importance_task_t task_imp); + +extern kern_return_t ipc_importance_task_hold_internal_assertion(ipc_importance_task_t task_imp, uint32_t count); +extern kern_return_t ipc_importance_task_drop_internal_assertion(ipc_importance_task_t task_imp, uint32_t count); + +extern kern_return_t ipc_importance_task_hold_file_lock_assertion(ipc_importance_task_t task_imp, uint32_t count); +extern kern_return_t ipc_importance_task_drop_file_lock_assertion(ipc_importance_task_t task_imp, uint32_t count); + +extern kern_return_t ipc_importance_task_hold_legacy_external_assertion(ipc_importance_task_t task_imp, uint32_t count); +extern kern_return_t ipc_importance_task_drop_legacy_external_assertion(ipc_importance_task_t task_imp, uint32_t count); + +/* prepare importance attributes for sending */ +extern boolean_t ipc_importance_send( + ipc_kmsg_t kmsg, + mach_msg_option_t option); + +/* receive importance attributes from message */ +extern void ipc_importance_receive( + ipc_kmsg_t kmsg, + mach_msg_option_t option); + +/* undo receive of importance attributes from message */ +extern void ipc_importance_unreceive( + ipc_kmsg_t kmsg, + mach_msg_option_t option); + +/* clean importance attributes out of destroyed message */ +extern void ipc_importance_clean(ipc_kmsg_t kmsg); + +/* assert a message is clean w.r.t. importance attributes */ +extern void ipc_importance_assert_clean(ipc_kmsg_t kmsg); + +/* initialize the ipc importance subsystem */ +extern void ipc_importance_init(void); + +/* initialize the ipc importance delayed calls */ +extern void ipc_importance_thread_call_init(void); + +#if DEVELOPMENT || DEBUG +extern void task_importance_update_owner_info(task_t task); +#endif + +#if XNU_KERNEL_PRIVATE +#define TASK_IMP_LIST_DONATING_PIDS 0x1 +extern int task_importance_list_pids(task_t task, int flags, int *pid_list, unsigned int max_count); +#endif + +__END_DECLS + +#endif /* MACH_KERNEL_PRIVATE */ + +#endif /* _IPC_IPC_IMPORTANCE_H_ */ diff --git a/osfmk/ipc/ipc_init.c b/osfmk/ipc/ipc_init.c index 879748a89..2211d1b18 100644 --- a/osfmk/ipc/ipc_init.c +++ b/osfmk/ipc/ipc_init.c @@ -79,6 +79,7 @@ #include #include +#include #include #include #include @@ -100,9 +101,10 @@ #include #include #include +#include +#include #include /* NDR_record */ -#include vm_map_t ipc_kernel_map; vm_size_t ipc_kernel_map_size = 1024 * 1024; @@ -190,17 +192,6 @@ ipc_bootstrap(void) "ipc kmsgs"); zone_change(ipc_kmsg_zone, Z_CALLERACCT, FALSE); -#if CONFIG_MACF_MACH - ipc_labelh_zone = - zinit(sizeof(struct ipc_labelh), - ipc_port_max * sizeof(struct ipc_labelh), - sizeof(struct ipc_labelh), - "label handles"); - /* cant charge callers for label allocations (port refs passed) */ - zone_change(ipc_labelh_zone, Z_CALLERACCT, FALSE); - -#endif - /* create special spaces */ kr = ipc_space_create_special(&ipc_space_kernel); @@ -217,6 +208,11 @@ ipc_bootstrap(void) #endif mig_init(); ipc_table_init(); + ipc_voucher_init(); + +#if IMPORTANCE_INHERITANCE + ipc_importance_init(); +#endif semaphore_init(); mk_timer_init(); @@ -270,4 +266,20 @@ ipc_init(void) } ipc_host_init(); + +} + + +/* + * Routine: ipc_thread_call_init + * Purpose: + * Initialize IPC logic that needs thread call support + */ + +void +ipc_thread_call_init(void) +{ +#if IMPORTANCE_INHERITANCE + ipc_importance_thread_call_init(); +#endif } diff --git a/osfmk/ipc/ipc_init.h b/osfmk/ipc/ipc_init.h index 30e916a28..777c7e482 100644 --- a/osfmk/ipc/ipc_init.h +++ b/osfmk/ipc/ipc_init.h @@ -123,7 +123,10 @@ extern int ipc_pset_max; /* IPC initialization needed before creation of kernel task */ extern void ipc_bootstrap(void); -/* Remaining IPC initialization */ +/* Remaining IPC initialization (not thread based) */ extern void ipc_init(void); +/* IPC initialization dependent on thread call support */ +extern void ipc_thread_call_init(void); + #endif /* _IPC_IPC_INIT_H_ */ diff --git a/osfmk/ipc/ipc_kmsg.c b/osfmk/ipc/ipc_kmsg.c index 70ae8f7ef..1005a37d9 100644 --- a/osfmk/ipc/ipc_kmsg.c +++ b/osfmk/ipc/ipc_kmsg.c @@ -111,6 +111,7 @@ #include #include #include +#include #include @@ -134,9 +135,9 @@ typedef struct { mach_msg_bits_t msgh_bits; mach_msg_size_t msgh_size; - uint32_t msgh_remote_port; - uint32_t msgh_local_port; - mach_msg_size_t msgh_reserved; + mach_port_name_t msgh_remote_port; + mach_port_name_t msgh_local_port; + mach_port_name_t msgh_voucher_port; mach_msg_id_t msgh_id; } mach_msg_legacy_header_t; @@ -167,6 +168,7 @@ typedef union #pragma pack() #define LEGACY_HEADER_SIZE_DELTA ((mach_msg_size_t)(sizeof(mach_msg_header_t) - sizeof(mach_msg_legacy_header_t))) + // END LP64 fixes @@ -473,13 +475,18 @@ ipc_msg_print_untyped64( } #define DEBUG_IPC_KMSG_PRINT(kmsg,string) \ + __unreachable_ok_push \ if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { \ ipc_kmsg_print64(kmsg, string); \ - } + } \ + __unreachable_ok_pop + #define DEBUG_IPC_MSG_BODY_PRINT(body,size) \ + __unreachable_ok_push \ if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { \ ipc_msg_body_print64(body,size);\ - } + } \ + __unreachable_ok_pop #else /* !DEBUG_MSGS_K64 */ #define DEBUG_IPC_KMSG_PRINT(kmsg,string) #define DEBUG_IPC_MSG_BODY_PRINT(body,size) @@ -611,7 +618,7 @@ ipc_kmsg_alloc( max_expanded_size = msg_and_trailer_size + max_desc; } else - max_expanded_size = msg_and_trailer_size; + max_expanded_size = msg_and_trailer_size; if (max_expanded_size < IKM_SAVED_MSG_SIZE) max_expanded_size = IKM_SAVED_MSG_SIZE; /* round up for ikm_cache */ @@ -664,12 +671,11 @@ ipc_kmsg_free( mach_msg_size_t size = kmsg->ikm_size; ipc_port_t port; -#if CONFIG_MACF_MACH - if (kmsg->ikm_sender != NULL) { - task_deallocate(kmsg->ikm_sender); - kmsg->ikm_sender = NULL; - } -#endif + assert(!IP_VALID(kmsg->ikm_voucher)); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC,MACH_IPC_KMSG_FREE) | DBG_FUNC_NONE, + VM_KERNEL_ADDRPERM((uintptr_t)kmsg), + 0, 0, 0, 0); /* * Check to see if the message is bound to the port. If so, @@ -992,6 +998,9 @@ ipc_kmsg_clean_partial( ipc_object_t object; mach_msg_bits_t mbits = kmsg->ikm_header->msgh_bits; + /* deal with importance chain while we still have dest and voucher references */ + ipc_importance_clean(kmsg); + object = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; assert(IO_VALID(object)); ipc_object_destroy_dest(object, MACH_MSGH_BITS_REMOTE(mbits)); @@ -1000,6 +1009,13 @@ ipc_kmsg_clean_partial( if (IO_VALID(object)) ipc_object_destroy(object, MACH_MSGH_BITS_LOCAL(mbits)); + object = (ipc_object_t) kmsg->ikm_voucher; + if (IO_VALID(object)) { + assert(MACH_MSGH_BITS_VOUCHER(mbits) == MACH_MSG_TYPE_MOVE_SEND); + ipc_object_destroy(object, MACH_MSG_TYPE_PORT_SEND); + kmsg->ikm_voucher = IP_NULL; + } + if (paddr) { (void) vm_deallocate(ipc_kernel_copy_map, paddr, length); } @@ -1023,6 +1039,9 @@ ipc_kmsg_clean( ipc_object_t object; mach_msg_bits_t mbits; + /* deal with importance chain while we still have dest and voucher references */ + ipc_importance_clean(kmsg); + mbits = kmsg->ikm_header->msgh_bits; object = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; if (IO_VALID(object)) @@ -1032,6 +1051,13 @@ ipc_kmsg_clean( if (IO_VALID(object)) ipc_object_destroy(object, MACH_MSGH_BITS_LOCAL(mbits)); + object = (ipc_object_t) kmsg->ikm_voucher; + if (IO_VALID(object)) { + assert(MACH_MSGH_BITS_VOUCHER(mbits) == MACH_MSG_TYPE_MOVE_SEND); + ipc_object_destroy(object, MACH_MSG_TYPE_PORT_SEND); + kmsg->ikm_voucher = IP_NULL; + } + if (mbits & MACH_MSGH_BITS_COMPLEX) { mach_msg_body_t *body; @@ -1039,13 +1065,6 @@ ipc_kmsg_clean( ipc_kmsg_clean_body(kmsg, body->msgh_descriptor_count, (mach_msg_descriptor_t *)(body + 1)); } - -#if CONFIG_MACF_MACH - if (kmsg->ikm_sender != NULL) { - task_deallocate(kmsg->ikm_sender); - kmsg->ikm_sender = NULL; - } -#endif } /* @@ -1151,12 +1170,15 @@ ipc_kmsg_get( #if defined(__LP64__) size += LEGACY_HEADER_SIZE_DELTA; #endif + /* unreachable if !DEBUG */ + __unreachable_ok_push if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { unsigned int j; for (j=0; jikm_header->msgh_bits = legacy_base.header.msgh_bits; kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(legacy_base.header.msgh_remote_port); kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(legacy_base.header.msgh_local_port); - kmsg->ikm_header->msgh_reserved = legacy_base.header.msgh_reserved; + kmsg->ikm_header->msgh_voucher_port = legacy_base.header.msgh_voucher_port; kmsg->ikm_header->msgh_id = legacy_base.header.msgh_id; DEBUG_KPRINT_SYSCALL_IPC("ipc_kmsg_get header:\n" @@ -1175,13 +1197,13 @@ ipc_kmsg_get( " bits: 0x%.8x\n" " remote_port: %p\n" " local_port: %p\n" - " reserved: 0x%.8x\n" + " voucher_port: 0x%.8x\n" " id: %.8d\n", kmsg->ikm_header->msgh_size, kmsg->ikm_header->msgh_bits, kmsg->ikm_header->msgh_remote_port, kmsg->ikm_header->msgh_local_port, - kmsg->ikm_header->msgh_reserved, + kmsg->ikm_header->msgh_voucher_port, kmsg->ikm_header->msgh_id); if (copyinmsg(msg_addr, (char *)(kmsg->ikm_header + 1), size - (mach_msg_size_t)sizeof(mach_msg_header_t))) { @@ -1189,6 +1211,8 @@ ipc_kmsg_get( return MACH_SEND_INVALID_DATA; } + /* unreachable if !DEBUG */ + __unreachable_ok_push if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { kprintf("body: size: %lu\n", (size - sizeof(mach_msg_header_t))); @@ -1198,6 +1222,7 @@ ipc_kmsg_get( kprintf("%.4x\n",((uint32_t *)(kmsg->ikm_header + 1))[i]); } } + __unreachable_ok_pop DEBUG_IPC_KMSG_PRINT(kmsg, "ipc_kmsg_get()"); /* @@ -1218,18 +1243,7 @@ ipc_kmsg_get( (unsigned int)kmsg->ikm_header->msgh_local_port, 0); #endif -#if CONFIG_MACF_MACH - /* XXX - why do we zero sender labels here instead of in mach_msg()? */ - task_t cur = current_task(); - if (cur) { - task_reference(cur); - kmsg->ikm_sender = cur; - } else - trailer->msgh_labels.sender = 0; -#else trailer->msgh_labels.sender = 0; -#endif - *kmsgp = kmsg; return MACH_MSG_SUCCESS; } @@ -1328,9 +1342,6 @@ ipc_kmsg_get_from_kernel( trailer->msgh_labels.sender = 0; -#if CONFIG_MACF_MACH - kmsg->ikm_sender = NULL; -#endif *kmsgp = kmsg; return MACH_MSG_SUCCESS; } @@ -1383,6 +1394,22 @@ ipc_kmsg_send( assert(IP_VALID(port)); ip_lock(port); +#if IMPORTANCE_INHERITANCE +retry: +#endif /* IMPORTANCE_INHERITANCE */ + /* + * Can't deliver to a dead port. + * However, we can pretend it got sent + * and was then immediately destroyed. + */ + if (!ip_active(port)) { + ip_unlock(port); + ip_release(port); /* JMM - Future: release right, not just ref */ + kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL; + ipc_kmsg_destroy(kmsg); + return MACH_MSG_SUCCESS; + } + if (port->ip_receiver == ipc_space_kernel) { /* @@ -1410,60 +1437,16 @@ ipc_kmsg_send( /* fall thru with reply - same options */ } -#if IMPORTANCE_INHERITANCE - retry: -#endif /* IMPORTANCE_INHERITANCE */ - - /* - * Can't deliver to a dead port. - * However, we can pretend it got sent - * and was then immediately destroyed. - */ - if (!ip_active(port)) { - /* - * We can't let ipc_kmsg_destroy deallocate - * the port right, because we might end up - * in an infinite loop trying to deliver - * a send-once notification. - */ - ip_unlock(port); - ip_release(port); - kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL; - ipc_kmsg_destroy(kmsg); - return MACH_MSG_SUCCESS; - } - #if IMPORTANCE_INHERITANCE /* * Need to see if this message needs importance donation and/or - * propagation. That routine can drop the port lock. If it does - * we'll have to revalidate the destination. + * propagation. That routine can drop the port lock temporarily. + * If it does we'll have to revalidate the destination. */ - if ((did_importance == FALSE) && - (port->ip_impdonation != 0) && - ((option & MACH_SEND_NOIMPORTANCE) == 0) && - (((option & MACH_SEND_IMPORTANCE) != 0) || - (task_is_importance_donor(current_task())))) { - + if (did_importance == FALSE) { did_importance = TRUE; - kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_RAISEIMP; - -#if IMPORTANCE_DEBUG - if (kdebug_enable) { - mach_msg_max_trailer_t *dbgtrailer = (mach_msg_max_trailer_t *) - ((vm_offset_t)kmsg->ikm_header + round_msg(kmsg->ikm_header->msgh_size)); - sender_pid = dbgtrailer->msgh_audit.val[5]; - imp_msgh_id = kmsg->ikm_header->msgh_id; - - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_SEND)) | DBG_FUNC_START, - audit_token_pid_from_task(current_task()), sender_pid, imp_msgh_id, 0, 0); - } -#endif /* IMPORTANCE_DEBUG */ - - if (ipc_port_importance_delta(port, 1) == TRUE) { - ip_lock(port); - goto retry; - } + if (ipc_importance_send(kmsg, option)) + goto retry; } #endif /* IMPORTANCE_INHERITANCE */ @@ -1486,6 +1469,7 @@ ipc_kmsg_send( case MACH_SEND_TIMED_OUT: case MACH_SEND_NO_BUFFER: case MACH_SEND_INTERRUPTED: + case MACH_SEND_INVALID_DEST: /* * We still have the kmsg and its * reference on the port. But we @@ -1502,21 +1486,9 @@ ipc_kmsg_send( * to the old owner(s)). */ importance_cleared = 1; - ip_lock(port); - if (ipc_port_importance_delta(port, -1) == FALSE) - ip_unlock(port); + ipc_importance_clean(kmsg); break; - case MACH_SEND_INVALID_DEST: - /* - * In the case that the receive right has - * gone away, the assertion count for the - * message we were trying to enqueue was - * already subtracted from the destination - * task (as part of port destruction). - */ - break; - case MACH_MSG_SUCCESS: default: break; @@ -1533,6 +1505,7 @@ ipc_kmsg_send( * as a successful delivery (like we do for an inactive port). */ if (error == MACH_SEND_INVALID_DEST) { + ip_release(port); /* JMM - Future: release right, not just ref */ kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL; ipc_kmsg_destroy(kmsg); return MACH_MSG_SUCCESS; @@ -1570,13 +1543,13 @@ ipc_kmsg_put( " bits: 0x%.8x\n" " remote_port: %p\n" " local_port: %p\n" - " reserved: 0x%.8x\n" + " voucher_port: 0x%.8x\n" " id: %.8d\n", kmsg->ikm_header->msgh_size, kmsg->ikm_header->msgh_bits, kmsg->ikm_header->msgh_remote_port, kmsg->ikm_header->msgh_local_port, - kmsg->ikm_header->msgh_reserved, + kmsg->ikm_header->msgh_voucher_port, kmsg->ikm_header->msgh_id); #if defined(__LP64__) @@ -1588,13 +1561,13 @@ ipc_kmsg_put( mach_msg_size_t msg_size = kmsg->ikm_header->msgh_size; mach_port_name_t remote_port = CAST_MACH_PORT_TO_NAME(kmsg->ikm_header->msgh_remote_port); mach_port_name_t local_port = CAST_MACH_PORT_TO_NAME(kmsg->ikm_header->msgh_local_port); - mach_msg_size_t reserved = kmsg->ikm_header->msgh_reserved; + mach_port_name_t voucher_port = kmsg->ikm_header->msgh_voucher_port; mach_msg_id_t id = kmsg->ikm_header->msgh_id; legacy_header->msgh_id = id; - legacy_header->msgh_reserved = reserved; - legacy_header->msgh_local_port = local_port; - legacy_header->msgh_remote_port = remote_port; + legacy_header->msgh_local_port = local_port; + legacy_header->msgh_remote_port = remote_port; + legacy_header->msgh_voucher_port = voucher_port; legacy_header->msgh_size = msg_size - LEGACY_HEADER_SIZE_DELTA; legacy_header->msgh_bits = bits; @@ -1603,6 +1576,8 @@ ipc_kmsg_put( } #endif + /* unreachable if !DEBUG */ + __unreachable_ok_push if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { kprintf("ipc_kmsg_put header+body: %d\n", (size)); uint32_t i; @@ -1612,6 +1587,7 @@ ipc_kmsg_put( } kprintf("type: %d\n", ((mach_msg_type_descriptor_t *)(((mach_msg_base_t *)kmsg->ikm_header)+1))->type); } + __unreachable_ok_pop if (copyoutmsg((const char *) kmsg->ikm_header, msg_addr, size)) mr = MACH_RCV_INVALID_DATA; else @@ -1668,21 +1644,31 @@ ipc_kmsg_put_to_kernel( mach_msg_return_t ipc_kmsg_copyin_header( - mach_msg_header_t *msg, + ipc_kmsg_t kmsg, ipc_space_t space, mach_msg_option_t *optionp) { + mach_msg_header_t *msg = kmsg->ikm_header; mach_msg_bits_t mbits = msg->msgh_bits & MACH_MSGH_BITS_USER; mach_port_name_t dest_name = CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port); mach_port_name_t reply_name = CAST_MACH_PORT_TO_NAME(msg->msgh_local_port); + mach_port_name_t voucher_name = MACH_PORT_NULL; kern_return_t kr; mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits); mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits); - ipc_object_t dest_port, reply_port; - ipc_entry_t dest_entry, reply_entry; - ipc_port_t dest_soright, reply_soright; + mach_msg_type_name_t voucher_type = MACH_MSGH_BITS_VOUCHER(mbits); + ipc_object_t dest_port = IO_NULL; + ipc_object_t reply_port = IO_NULL; + ipc_port_t dest_soright = IP_NULL; + ipc_port_t reply_soright = IP_NULL; + ipc_port_t voucher_soright = IP_NULL; ipc_port_t release_port = IP_NULL; + ipc_port_t voucher_port = IP_NULL; + ipc_port_t voucher_release_port = IP_NULL; + ipc_entry_t dest_entry = IE_NULL; + ipc_entry_t reply_entry = IE_NULL; + ipc_entry_t voucher_entry = IE_NULL; #if IMPORTANCE_INHERITANCE int assertcnt = 0; @@ -1702,119 +1688,116 @@ ipc_kmsg_copyin_header( !MACH_MSG_TYPE_PORT_ANY_SEND(reply_type))) return MACH_SEND_INVALID_HEADER; - reply_soright = IP_NULL; /* in case we go to invalid dest early */ + if (!MACH_PORT_VALID(dest_name)) + return MACH_SEND_INVALID_DEST; is_write_lock(space); - if (!is_active(space)) - goto invalid_dest; - - if (!MACH_PORT_VALID(dest_name)) - goto invalid_dest; + if (!is_active(space)) { + is_write_unlock(space); + return MACH_SEND_INVALID_DEST; + } + /* space locked and active */ -#if CONFIG_MACF_MACH /* - * We do the port send check here instead of in ipc_kmsg_send() - * because copying the header involves copying the port rights too - * and we need to do the send check before anything is actually copied. + * If there is a voucher specified, make sure the disposition is + * valid and the entry actually refers to a voucher port. Don't + * actually copy in until we validate destination and reply. */ - dest_entry = ipc_entry_lookup(space, dest_name); - if (dest_entry != IE_NULL) { - int error = 0; - ipc_port_t port = (ipc_port_t) dest_entry->ie_object; - if (port == IP_NULL) - goto invalid_dest; - ip_lock(port); - if (ip_active(port)) { - task_t self = current_task(); - tasklabel_lock(self); - error = mac_port_check_send(&self->maclabel, - &port->ip_label); - tasklabel_unlock(self); + if (voucher_type != MACH_MSGH_BITS_ZERO) { + + voucher_name = msg->msgh_voucher_port; + + if (voucher_name == MACH_PORT_DEAD || + (voucher_type != MACH_MSG_TYPE_MOVE_SEND && + voucher_type != MACH_MSG_TYPE_COPY_SEND)) { + is_write_unlock(space); + return MACH_SEND_INVALID_VOUCHER; + } + + if (voucher_name != MACH_PORT_NULL) { + voucher_entry = ipc_entry_lookup(space, voucher_name); + if (voucher_entry == IE_NULL || + (voucher_entry->ie_bits & MACH_PORT_TYPE_SEND) == 0 || + io_kotype(voucher_entry->ie_object) != IKOT_VOUCHER) { + is_write_unlock(space); + return MACH_SEND_INVALID_VOUCHER; + } + } else { + voucher_type = MACH_MSG_TYPE_MOVE_SEND; } - ip_unlock(port); - if (error != 0) - goto invalid_dest; } -#endif - if (dest_name == reply_name) { - mach_port_name_t name = dest_name; + /* + * Handle combinations of validating destination and reply; along + * with copying in destination, reply, and voucher in an atomic way. + */ + + if (dest_name == voucher_name) { /* - * Destination and reply ports are the same! - * This is a little tedious to make atomic, because - * there are 25 combinations of dest_type/reply_type. - * However, most are easy. If either is move-sonce, - * then there must be an error. If either are - * make-send or make-sonce, then we must be looking - * at a receive right so the port can't die. - * The hard cases are the combinations of - * copy-send and make-send. + * If the destination name is the same as the voucher name, + * the voucher_entry must already be known. Either that or + * the destination name is MACH_PORT_NULL (i.e. invalid). */ - - dest_entry = ipc_entry_lookup(space, name); - if (dest_entry == IE_NULL) + dest_entry = voucher_entry; + if (dest_entry == IE_NULL) { goto invalid_dest; + } - reply_entry = dest_entry; - assert(reply_type != 0); /* because name not null */ - - if (!ipc_right_copyin_check(space, name, reply_entry, reply_type)) - goto invalid_reply; + /* + * Make sure a future copyin of the reply port will succeed. + * Once we start copying in the dest/voucher pair, we can't + * back out. + */ + if (MACH_PORT_VALID(reply_name)) { + assert(reply_type != 0); /* because reply_name not null */ - if ((dest_type == MACH_MSG_TYPE_MOVE_SEND_ONCE) || - (reply_type == MACH_MSG_TYPE_MOVE_SEND_ONCE)) { - /* - * Why must there be an error? To get a valid - * destination, this entry must name a live - * port (not a dead name or dead port). However - * a successful move-sonce will destroy a - * live entry. Therefore the other copyin, - * whatever it is, would fail. We've already - * checked for reply port errors above, - * so report a destination error. - */ + /* It is just WRONG if dest, voucher, and reply are all the same. */ + if (voucher_name == reply_name) { + goto invalid_reply; + } + reply_entry = ipc_entry_lookup(space, reply_name); + if (reply_entry == IE_NULL) { + goto invalid_reply; + } + assert(dest_entry != reply_entry); /* names are not equal */ + if (!ipc_right_copyin_check(space, reply_name, reply_entry, reply_type)) { + goto invalid_reply; + } + } + /* + * Do the joint copyin of the dest disposition and + * voucher disposition from the one entry/port. We + * already validated that the voucher copyin would + * succeed (above). So, any failure in combining + * the copyins can be blamed on the destination. + */ + kr = ipc_right_copyin_two(space, dest_name, dest_entry, + dest_type, voucher_type, + &dest_port, &dest_soright, + &release_port); + if (kr != KERN_SUCCESS) { + assert(kr != KERN_INVALID_CAPABILITY); goto invalid_dest; - } else if ((dest_type == MACH_MSG_TYPE_MAKE_SEND) || - (dest_type == MACH_MSG_TYPE_MAKE_SEND_ONCE) || - (reply_type == MACH_MSG_TYPE_MAKE_SEND) || - (reply_type == MACH_MSG_TYPE_MAKE_SEND_ONCE)) { - -#if IMPORTANCE_INHERITANCE - kr = ipc_right_copyin(space, name, dest_entry, - dest_type, FALSE, - &dest_port, &dest_soright, - &release_port, - &assertcnt, - links); - assert(assertcnt == 0); -#else - kr = ipc_right_copyin(space, name, dest_entry, - dest_type, FALSE, - &dest_port, &dest_soright, - &release_port, - links); -#endif /* IMPORTANCE_INHERITANCE */ - - if (kr != KERN_SUCCESS) - goto invalid_dest; - - /* - * Either dest or reply needs a receive right. - * We know the receive right is there, because - * of the copyin_check and copyin calls. Hence - * the port is not in danger of dying. If dest - * used the receive right, then the right needed - * by reply (and verified by copyin_check) will - * still be there. - */ - - assert(IO_VALID(dest_port)); - assert(dest_soright == IP_NULL); + } + voucher_port = (ipc_port_t)dest_port; + /* + * could not have been one of these dispositions, + * validated the port was a true kernel voucher port above, + * AND was successfully able to copyin both dest and voucher. + */ + assert(dest_type != MACH_MSG_TYPE_MAKE_SEND); + assert(dest_type != MACH_MSG_TYPE_MAKE_SEND_ONCE); + assert(dest_type != MACH_MSG_TYPE_MOVE_SEND_ONCE); + + /* + * Perform the delayed reply right copyin (guaranteed success). + */ + if (reply_entry != IE_NULL) { #if IMPORTANCE_INHERITANCE - kr = ipc_right_copyin(space, name, reply_entry, + kr = ipc_right_copyin(space, reply_name, reply_entry, reply_type, TRUE, &reply_port, &reply_soright, &release_port, @@ -1822,271 +1805,203 @@ ipc_kmsg_copyin_header( links); assert(assertcnt == 0); #else - kr = ipc_right_copyin(space, name, reply_entry, + kr = ipc_right_copyin(space, reply_name, reply_entry, reply_type, TRUE, &reply_port, &reply_soright, &release_port, links); #endif /* IMPORTANCE_INHERITANCE */ - assert(kr == KERN_SUCCESS); - assert(reply_port == dest_port); - assert(reply_entry->ie_bits & MACH_PORT_TYPE_RECEIVE); - assert(reply_soright == IP_NULL); - } else if ((dest_type == MACH_MSG_TYPE_COPY_SEND) && - (reply_type == MACH_MSG_TYPE_COPY_SEND)) { - /* - * To make this atomic, just do one copy-send, - * and dup the send right we get out. - */ - -#if IMPORTANCE_INHERITANCE - kr = ipc_right_copyin(space, name, dest_entry, - dest_type, FALSE, - &dest_port, &dest_soright, - &release_port, - &assertcnt, - links); - assert(assertcnt == 0); -#else - kr = ipc_right_copyin(space, name, dest_entry, - dest_type, FALSE, - &dest_port, &dest_soright, - &release_port, - links); -#endif /* IMPORTANCE_INHERITANCE */ - - if (kr != KERN_SUCCESS) - goto invalid_dest; - - assert(dest_entry->ie_bits & MACH_PORT_TYPE_SEND); - assert(dest_soright == IP_NULL); + } + } else { + if (dest_name == reply_name) { /* - * It's OK if the port we got is dead now, - * so reply_port is IP_DEAD, because the msg - * won't go anywhere anyway. + * Destination and reply ports are the same! + * This is very similar to the case where the + * destination and voucher ports were the same + * (except the reply port disposition is not + * previously validated). */ + dest_entry = ipc_entry_lookup(space, dest_name); + if (dest_entry == IE_NULL) { + goto invalid_dest; + } + reply_entry = dest_entry; + assert(reply_type != 0); /* because name not null */ - reply_port = (ipc_object_t) - ipc_port_copy_send((ipc_port_t) dest_port); - reply_soright = IP_NULL; - } else if ((dest_type == MACH_MSG_TYPE_MOVE_SEND) && - (reply_type == MACH_MSG_TYPE_MOVE_SEND)) { - /* - * This is an easy case. Just use our - * handy-dandy special-purpose copyin call - * to get two send rights for the price of one. + /* + * Do the joint copyin of the dest disposition and + * reply disposition from the one entry/port. */ - - kr = ipc_right_copyin_two(space, name, dest_entry, + kr = ipc_right_copyin_two(space, dest_name, dest_entry, + dest_type, reply_type, &dest_port, &dest_soright, &release_port); - if (kr != KERN_SUCCESS) + if (kr == KERN_INVALID_CAPABILITY) { + goto invalid_reply; + } else if (kr != KERN_SUCCESS) { goto invalid_dest; - - /* the entry might need to be deallocated */ - if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE) { - ipc_entry_dealloc(space, name, dest_entry); - dest_entry = IE_NULL; } - reply_port = dest_port; - reply_soright = IP_NULL; + + } else { - ipc_port_t soright; + /* + * Handle destination and reply independently, as + * they are independent entries (even if the entries + * refer to the same port). + * + * This can be the tough case to make atomic. + * + * The difficult problem is serializing with port death. + * The bad case is when dest_port dies after its copyin, + * reply_port dies before its copyin, and dest_port dies before + * reply_port. Then the copyins operated as if dest_port was + * alive and reply_port was dead, which shouldn't have happened + * because they died in the other order. + * + * Note that it is easy for a user task to tell if + * a copyin happened before or after a port died. + * If a port dies before copyin, a dead-name notification + * is generated and the dead name's urefs are incremented, + * and if the copyin happens first, a port-deleted + * notification is generated. + * + * Even so, avoiding that potentially detectable race is too + * expensive - and no known code cares about it. So, we just + * do the expedient thing and copy them in one after the other. + */ - assert(((dest_type == MACH_MSG_TYPE_COPY_SEND) && - (reply_type == MACH_MSG_TYPE_MOVE_SEND)) || - ((dest_type == MACH_MSG_TYPE_MOVE_SEND) && - (reply_type == MACH_MSG_TYPE_COPY_SEND))); + dest_entry = ipc_entry_lookup(space, dest_name); + if (dest_entry == IE_NULL) { + goto invalid_dest; + } + assert(dest_entry != voucher_entry); /* - * To make this atomic, just do a move-send, - * and dup the send right we get out. + * Make sure reply port entry is valid before dest copyin. */ + if (MACH_PORT_VALID(reply_name)) { + if (reply_name == voucher_name) { + goto invalid_reply; + } + reply_entry = ipc_entry_lookup(space, reply_name); + if (reply_entry == IE_NULL) { + goto invalid_reply; + } + assert(dest_entry != reply_entry); /* names are not equal */ + assert(reply_type != 0); /* because reply_name not null */ + + if (!ipc_right_copyin_check(space, reply_name, reply_entry, reply_type)) { + goto invalid_reply; + } + } + /* + * copyin the destination. + */ #if IMPORTANCE_INHERITANCE - kr = ipc_right_copyin(space, name, dest_entry, - MACH_MSG_TYPE_MOVE_SEND, FALSE, - &dest_port, &soright, + kr = ipc_right_copyin(space, dest_name, dest_entry, + dest_type, FALSE, + &dest_port, &dest_soright, &release_port, &assertcnt, links); assert(assertcnt == 0); #else - kr = ipc_right_copyin(space, name, dest_entry, - MACH_MSG_TYPE_MOVE_SEND, FALSE, - &dest_port, &soright, + kr = ipc_right_copyin(space, dest_name, dest_entry, + dest_type, FALSE, + &dest_port, &dest_soright, &release_port, links); #endif /* IMPORTANCE_INHERITANCE */ - - if (kr != KERN_SUCCESS) + if (kr != KERN_SUCCESS) { goto invalid_dest; - - /* the entry might need to be deallocated */ - - if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE) { - ipc_entry_dealloc(space, name, dest_entry); - dest_entry = IE_NULL; } + assert(IO_VALID(dest_port)); + assert(!IP_VALID(release_port)); /* - * It's OK if the port we got is dead now, - * so reply_port is IP_DEAD, because the msg - * won't go anywhere anyway. + * Copyin the pre-validated reply right. + * It's OK if the reply right has gone dead in the meantime. */ - - reply_port = (ipc_object_t) - ipc_port_copy_send((ipc_port_t) dest_port); - - if (dest_type == MACH_MSG_TYPE_MOVE_SEND) { - dest_soright = soright; - reply_soright = IP_NULL; - } else { - dest_soright = IP_NULL; - reply_soright = soright; - } - } - } else if (!MACH_PORT_VALID(reply_name)) { - /* - * No reply port! This is an easy case - * to make atomic. Just copyin the destination. - */ - - dest_entry = ipc_entry_lookup(space, dest_name); - if (dest_entry == IE_NULL) - goto invalid_dest; - + if (MACH_PORT_VALID(reply_name)) { #if IMPORTANCE_INHERITANCE - kr = ipc_right_copyin(space, dest_name, dest_entry, - dest_type, FALSE, - &dest_port, &dest_soright, - &release_port, - &assertcnt, - links); - assert(assertcnt == 0); + kr = ipc_right_copyin(space, reply_name, reply_entry, + reply_type, TRUE, + &reply_port, &reply_soright, + &release_port, + &assertcnt, + links); + assert(assertcnt == 0); #else - kr = ipc_right_copyin(space, dest_name, dest_entry, - dest_type, FALSE, - &dest_port, &dest_soright, - &release_port, - links); + kr = ipc_right_copyin(space, reply_name, reply_entry, + reply_type, TRUE, + &reply_port, &reply_soright, + &release_port, + links); #endif /* IMPORTANCE_INHERITANCE */ - - if (kr != KERN_SUCCESS) - goto invalid_dest; - - /* the entry might need to be deallocated */ - if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE) { - ipc_entry_dealloc(space, dest_name, dest_entry); - dest_entry = IE_NULL; + assert(kr == KERN_SUCCESS); + } else { + /* convert invalid name to equivalent ipc_object type */ + reply_port = (ipc_object_t)CAST_MACH_NAME_TO_PORT(reply_name); + } } - reply_port = (ipc_object_t)CAST_MACH_NAME_TO_PORT(reply_name); - reply_soright = IP_NULL; - } else { /* - * This is the tough case to make atomic. - * The difficult problem is serializing with port death. - * At the time we copyin dest_port, it must be alive. - * If reply_port is alive when we copyin it, then - * we are OK, because we serialize before the death - * of both ports. Assume reply_port is dead at copyin. - * Then if dest_port dies/died after reply_port died, - * we are OK, because we serialize between the death - * of the two ports. So the bad case is when dest_port - * dies after its copyin, reply_port dies before its - * copyin, and dest_port dies before reply_port. Then - * the copyins operated as if dest_port was alive - * and reply_port was dead, which shouldn't have happened - * because they died in the other order. - * - * Note that it is easy for a user task to tell if - * a copyin happened before or after a port died. - * For example, suppose both dest and reply are - * send-once rights (types are both move-sonce) and - * both rights have dead-name requests registered. - * If a port dies before copyin, a dead-name notification - * is generated and the dead name's urefs are incremented, - * and if the copyin happens first, a port-deleted - * notification is generated. - * - * Note that although the entries are different, - * dest_port and reply_port might still be the same. - * - * JMM - The code to handle this was too expensive and, anyway, - * we intend to separate the dest lookup from the reply copyin - * by a wide margin, so the user will have to learn to deal! - * I will be making the change soon in rdar://problem/6275821. + * Finally can copyin the voucher right now that dest and reply + * are fully copied in (guaranteed success). */ - - dest_entry = ipc_entry_lookup(space, dest_name); - if (dest_entry == IE_NULL) - goto invalid_dest; - - reply_entry = ipc_entry_lookup(space, reply_name); - if (reply_entry == IE_NULL) - goto invalid_reply; - - assert(dest_entry != reply_entry); /* names are not equal */ - assert(reply_type != 0); /* because reply_name not null */ - - if (!ipc_right_copyin_check(space, reply_name, reply_entry, - reply_type)) - goto invalid_reply; - -#if IMPORTANCE_INHERITANCE - kr = ipc_right_copyin(space, dest_name, dest_entry, - dest_type, FALSE, - &dest_port, &dest_soright, - &release_port, - &assertcnt, - links); - assert(assertcnt == 0); -#else - kr = ipc_right_copyin(space, dest_name, dest_entry, - dest_type, FALSE, - &dest_port, &dest_soright, - &release_port, - links); -#endif /* IMPORTANCE_INHERITANCE */ - - if (kr != KERN_SUCCESS) - goto invalid_dest; - - assert(IO_VALID(dest_port)); - + if (IE_NULL != voucher_entry) { #if IMPORTANCE_INHERITANCE - kr = ipc_right_copyin(space, reply_name, reply_entry, - reply_type, TRUE, - &reply_port, &reply_soright, - &release_port, - &assertcnt, - links); - assert(assertcnt == 0); + kr = ipc_right_copyin(space, voucher_name, voucher_entry, + voucher_type, FALSE, + (ipc_object_t *)&voucher_port, + &voucher_soright, + &voucher_release_port, + &assertcnt, + links); + assert(assertcnt == 0); #else - kr = ipc_right_copyin(space, reply_name, reply_entry, - reply_type, TRUE, - &reply_port, &reply_soright, - &release_port, - links); + kr = ipc_right_copyin(space, voucher_name, voucher_entry, + voucher_type, FALSE, + (ipc_object_t *)&voucher_port, + &voucher_soright, + &voucher_release_port, + links); #endif /* IMPORTANCE_INHERITANCE */ - - assert(kr == KERN_SUCCESS); - - /* the entries might need to be deallocated */ - - if (IE_BITS_TYPE(reply_entry->ie_bits) == MACH_PORT_TYPE_NONE) { - ipc_entry_dealloc(space, reply_name, reply_entry); - reply_entry = IE_NULL; + assert(KERN_SUCCESS == kr); + assert(IP_VALID(voucher_port)); + assert(ip_active(voucher_port)); } + } - if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE) { - ipc_entry_dealloc(space, dest_name, dest_entry); - dest_entry = IE_NULL; - } + /* the entry(s) might need to be deallocated */ + assert(IE_NULL != dest_entry); + if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE) { + ipc_entry_dealloc(space, dest_name, dest_entry); + dest_entry = IE_NULL; + } + if (dest_entry != reply_entry && IE_NULL != reply_entry && + IE_BITS_TYPE(reply_entry->ie_bits) == MACH_PORT_TYPE_NONE) { + ipc_entry_dealloc(space, reply_name, reply_entry); + reply_entry = IE_NULL; + } + if (dest_entry != voucher_entry && IE_NULL != voucher_entry && + IE_BITS_TYPE(voucher_entry->ie_bits) == MACH_PORT_TYPE_NONE) { + ipc_entry_dealloc(space, voucher_name, voucher_entry); + voucher_entry = IE_NULL; + } + + /* + * No room to store voucher port in in-kernel msg header, + * so we store it back in the kmsg itself. + */ + if (IP_VALID(voucher_port)) { + assert(ip_active(voucher_port)); + kmsg->ikm_voucher = voucher_port; + voucher_type = MACH_MSG_TYPE_MOVE_SEND; } dest_type = ipc_object_copyin_type(dest_type); @@ -2114,7 +2029,6 @@ ipc_kmsg_copyin_header( if (needboost == FALSE) ip_unlock(dport); #else - ipc_port_request_sparm(dport, dest_name, dest_entry->ie_request); ip_unlock(dport); #endif /* IMPORTANCE_INHERITANCE */ @@ -2139,19 +2053,22 @@ ipc_kmsg_copyin_header( ipc_port_t dport = (ipc_port_t)dest_port; /* dport still locked from above */ - if (ipc_port_importance_delta(dport, 1) == FALSE) + if (ipc_port_importance_delta(dport, 1) == FALSE) { ip_unlock(dport); + } } #endif /* IMPORTANCE_INHERITANCE */ - if (dest_soright != IP_NULL) + if (dest_soright != IP_NULL) { ipc_notify_port_deleted(dest_soright, dest_name); - - if (reply_soright != IP_NULL) + } + if (reply_soright != IP_NULL) { ipc_notify_port_deleted(reply_soright, reply_name); - - msg->msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) | - MACH_MSGH_BITS(dest_type, reply_type)); + } + if (voucher_soright != IP_NULL) { + ipc_notify_port_deleted(voucher_soright, voucher_name); + } + msg->msgh_bits = MACH_MSGH_BITS_SET(dest_type, reply_type, voucher_type, mbits); msg->msgh_remote_port = (ipc_port_t)dest_port; msg->msgh_local_port = (ipc_port_t)reply_port; @@ -2163,6 +2080,8 @@ ipc_kmsg_copyin_header( if (release_port != IP_NULL) ip_release(release_port); + if (voucher_release_port != IP_NULL) + ip_release(voucher_release_port); return MACH_MSG_SUCCESS; @@ -2177,6 +2096,9 @@ invalid_reply: if (release_port != IP_NULL) ip_release(release_port); + assert(voucher_port == IP_NULL); + assert(voucher_soright == IP_NULL); + return MACH_SEND_INVALID_REPLY; invalid_dest: @@ -2193,6 +2115,9 @@ invalid_dest: if (reply_soright != IP_NULL) ipc_notify_port_deleted(reply_soright, reply_name); + assert(voucher_port == IP_NULL); + assert(voucher_soright == IP_NULL); + return MACH_SEND_INVALID_DEST; } @@ -2565,6 +2490,8 @@ ipc_kmsg_copyin_body( */ daddr = NULL; for (i = 0; i < dsc_count; i++) { + mach_msg_size_t size; + daddr = naddr; /* make sure the descriptor fits in the message */ @@ -2594,8 +2521,6 @@ ipc_kmsg_copyin_body( } switch (daddr->type.type) { - mach_msg_size_t size; - case MACH_MSG_OOL_DESCRIPTOR: case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: size = (is_task_64bit) ? @@ -2657,11 +2582,14 @@ ipc_kmsg_copyin_body( /* user_addr = just after base as it was copied in */ user_addr = (mach_msg_descriptor_t *)((vm_offset_t)kmsg->ikm_header + sizeof(mach_msg_base_t)); - /* Shift the mach_msg_base_t down to make for dsc_count*16bytes of descriptors */ + + /* Shift the mach_msg_base_t down to make room for dsc_count*16bytes of descriptors */ if(descriptor_size != 16*dsc_count) { vm_offset_t dsc_adjust = 16*dsc_count - descriptor_size; + memmove((char *)(((vm_offset_t)kmsg->ikm_header) - dsc_adjust), kmsg->ikm_header, sizeof(mach_msg_base_t)); kmsg->ikm_header = (mach_msg_header_t *)((vm_offset_t)kmsg->ikm_header - dsc_adjust); + /* Update the message size for the larger in-kernel representation */ kmsg->ikm_header->msgh_size += (mach_msg_size_t)dsc_adjust; } @@ -2750,24 +2678,33 @@ ipc_kmsg_copyin( kmsg->ikm_header->msgh_bits &= MACH_MSGH_BITS_USER; - mr = ipc_kmsg_copyin_header(kmsg->ikm_header, space, optionp); + mr = ipc_kmsg_copyin_header(kmsg, space, optionp); if (mr != MACH_MSG_SUCCESS) return mr; - - DEBUG_KPRINT_SYSCALL_IPC("ipc_kmsg_copyin header:\n%.8x\n%.8x\n%p\n%p\n%.8x\n%.8x\n", - kmsg->ikm_header->msgh_size, - kmsg->ikm_header->msgh_bits, - kmsg->ikm_header->msgh_remote_port, - kmsg->ikm_header->msgh_local_port, - kmsg->ikm_header->msgh_reserved, - kmsg->ikm_header->msgh_id); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC,MACH_IPC_MSG_SEND) | DBG_FUNC_NONE, + VM_KERNEL_ADDRPERM((uintptr_t)kmsg), + (uintptr_t)kmsg->ikm_header->msgh_bits, + (uintptr_t)kmsg->ikm_header->msgh_id, + VM_KERNEL_ADDRPERM((uintptr_t)unsafe_convert_port_to_voucher(kmsg->ikm_voucher)), + 0); + + DEBUG_KPRINT_SYSCALL_IPC("ipc_kmsg_copyin header:\n%.8x\n%.8x\n%p\n%p\n%p\n%.8x\n", + kmsg->ikm_header->msgh_size, + kmsg->ikm_header->msgh_bits, + kmsg->ikm_header->msgh_remote_port, + kmsg->ikm_header->msgh_local_port, + kmsg->ikm_voucher, + kmsg->ikm_header->msgh_id); if ((kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) == 0) return MACH_MSG_SUCCESS; mr = ipc_kmsg_copyin_body( kmsg, space, map); + /* unreachable if !DEBUG */ + __unreachable_ok_push if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { kprintf("body:\n"); @@ -2777,6 +2714,7 @@ ipc_kmsg_copyin( kprintf("%.4x\n",((uint32_t *)(kmsg->ikm_header + 1))[i]); } } + __unreachable_ok_pop return mr; } @@ -3129,9 +3067,11 @@ ipc_kmsg_copyin_from_kernel_legacy( mach_msg_return_t ipc_kmsg_copyout_header( - mach_msg_header_t *msg, - ipc_space_t space) + ipc_kmsg_t kmsg, + ipc_space_t space, + mach_msg_option_t option) { + mach_msg_header_t *msg = kmsg->ikm_header; mach_msg_bits_t mbits = msg->msgh_bits; ipc_port_t dest = (ipc_port_t) msg->msgh_remote_port; @@ -3147,89 +3087,154 @@ ipc_kmsg_copyout_header( { mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits); mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits); - ipc_port_t reply = (ipc_port_t) msg->msgh_local_port; - ipc_port_t release_port = IP_NULL; + mach_msg_type_name_t voucher_type = MACH_MSGH_BITS_VOUCHER(mbits); + ipc_port_t reply = msg->msgh_local_port; + ipc_port_t release_reply_port = IP_NULL; mach_port_name_t dest_name, reply_name; + ipc_port_t voucher = kmsg->ikm_voucher; + ipc_port_t release_voucher_port = IP_NULL; + mach_port_name_t voucher_name; + + uint32_t entries_held = 0; + boolean_t need_write_lock = FALSE; + kern_return_t kr; + + /* + * Reserve any potentially needed entries in the target space. + * We'll free any unused before unlocking the space. + */ if (IP_VALID(reply)) { - ipc_entry_t entry; - kern_return_t kr; + entries_held++; + need_write_lock = TRUE; + } + if (IP_VALID(voucher)) { + assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND); - /* - * Get reply port entry (if none, skip to dest port - * copyout). This may require growing the space. - */ + if ((option & MACH_RCV_VOUCHER) != 0) + entries_held++; + need_write_lock = TRUE; + } + + if (need_write_lock) { is_write_lock(space); - for (;;) { + while(entries_held) { if (!is_active(space)) { is_write_unlock(space); return (MACH_RCV_HEADER_ERROR| MACH_MSG_IPC_SPACE); } - - if ((reply_type != MACH_MSG_TYPE_PORT_SEND_ONCE) && - ipc_right_reverse(space, (ipc_object_t) reply, - &reply_name, &entry)) { - /* reply port is locked and active */ - assert(entry->ie_bits & - MACH_PORT_TYPE_SEND_RECEIVE); + + kr = ipc_entries_hold(space, entries_held); + if (KERN_SUCCESS == kr) break; - } - ip_lock(reply); - if (!ip_active(reply)) { - ip_unlock(reply); - ip_lock(dest); - is_write_unlock(space); + kr = ipc_entry_grow_table(space, ITS_SIZE_NONE); + if (KERN_SUCCESS != kr) + return(MACH_RCV_HEADER_ERROR| + MACH_MSG_IPC_SPACE); + /* space was unlocked and relocked - retry */ + } - release_port = reply; - reply = IP_DEAD; - reply_name = MACH_PORT_DEAD; - goto copyout_dest; - } + /* Handle reply port. */ + if (IP_VALID(reply)) { + ipc_entry_t entry; - reply_name = CAST_MACH_PORT_TO_NAME(reply); - kr = ipc_entry_get(space, &reply_name, &entry); - if (kr != KERN_SUCCESS) { - ip_unlock(reply); - - /* space is locked */ - kr = ipc_entry_grow_table(space, - ITS_SIZE_NONE); - if (kr != KERN_SUCCESS) { - return (MACH_RCV_HEADER_ERROR| - MACH_MSG_IPC_SPACE); + /* Is there already an entry we can use? */ + if ((reply_type != MACH_MSG_TYPE_PORT_SEND_ONCE) && + ipc_right_reverse(space, (ipc_object_t) reply, &reply_name, &entry)) { + /* reply port is locked and active */ + assert(entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE); + } else { + ip_lock(reply); + if (!ip_active(reply)) { + ip_unlock(reply); + + release_reply_port = reply; + reply = IP_DEAD; + reply_name = MACH_PORT_DEAD; + goto done_with_reply; } - /* space is locked again; start over */ - - continue; + + /* claim a held entry for the reply port */ + assert(entries_held > 0); + entries_held--; + ipc_entry_claim(space, &reply_name, &entry); + assert(IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE); + assert(entry->ie_object == IO_NULL); + entry->ie_object = (ipc_object_t) reply; } - assert(IE_BITS_TYPE(entry->ie_bits) == - MACH_PORT_TYPE_NONE); - assert(entry->ie_object == IO_NULL); - entry->ie_object = (ipc_object_t) reply; - break; - } + /* space and reply port are locked and active */ + ip_reference(reply); /* hold onto the reply port */ + + kr = ipc_right_copyout(space, reply_name, entry, + reply_type, TRUE, (ipc_object_t) reply); + assert(kr == KERN_SUCCESS); + /* reply port is unlocked */ + } else + reply_name = CAST_MACH_PORT_TO_NAME(reply); - /* space and reply port are locked and active */ + done_with_reply: - ip_reference(reply); /* hold onto the reply port */ + /* Handle voucher port. */ + if (voucher_type != MACH_MSGH_BITS_ZERO) { + assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND); - kr = ipc_right_copyout(space, reply_name, entry, - reply_type, TRUE, (ipc_object_t) reply); + if (!IP_VALID(voucher)) { + if ((option & MACH_RCV_VOUCHER) == 0) { + voucher_type = MACH_MSGH_BITS_ZERO; + } + voucher_name = MACH_PORT_NULL; + goto done_with_voucher; + } + + /* clear voucher from its hiding place back in the kmsg */ + kmsg->ikm_voucher = IP_NULL; + + if ((option & MACH_RCV_VOUCHER) != 0) { + ipc_entry_t entry; + + if (ipc_right_reverse(space, (ipc_object_t) voucher, + &voucher_name, &entry)) { + /* voucher port locked */ + assert(entry->ie_bits & MACH_PORT_TYPE_SEND); + } else { + assert(entries_held > 0); + entries_held--; + ipc_entry_claim(space, &voucher_name, &entry); + assert(IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE); + assert(entry->ie_object == IO_NULL); + entry->ie_object = (ipc_object_t) voucher; + ip_lock(voucher); + } + /* space is locked and active */ + + assert(ip_active(voucher)); + assert(ip_kotype(voucher) == IKOT_VOUCHER); + kr = ipc_right_copyout(space, voucher_name, entry, + MACH_MSG_TYPE_MOVE_SEND, TRUE, + (ipc_object_t) voucher); + /* voucher port is unlocked */ + } else { + voucher_type = MACH_MSGH_BITS_ZERO; + release_voucher_port = voucher; + voucher_name = MACH_PORT_NULL; + } + } else { + voucher_name = msg->msgh_voucher_port; + } - /* reply port is unlocked */ - assert(kr == KERN_SUCCESS); + done_with_voucher: ip_lock(dest); is_write_unlock(space); } else { /* - * No reply port! This is an easy case. + * No reply or voucher port! This is an easy case. * We only need to have the space locked * when locking the destination. */ @@ -3244,6 +3249,16 @@ ipc_kmsg_copyout_header( is_read_unlock(space); reply_name = CAST_MACH_PORT_TO_NAME(reply); + + if (voucher_type != MACH_MSGH_BITS_ZERO) { + assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND); + if ((option & MACH_RCV_VOUCHER) == 0) { + voucher_type = MACH_MSGH_BITS_ZERO; + } + voucher_name = MACH_PORT_NULL; + } else { + voucher_name = msg->msgh_voucher_port; + } } /* @@ -3293,8 +3308,6 @@ ipc_kmsg_copyout_header( * is done correctly. */ - copyout_dest: - if (ip_active(dest)) { ipc_object_copyout_dest(space, (ipc_object_t) dest, dest_type, &dest_name); @@ -3323,16 +3336,41 @@ ipc_kmsg_copyout_header( if (IP_VALID(reply)) ip_release(reply); - if (IP_VALID(release_port)) - ip_release(release_port); + if (IP_VALID(release_reply_port)) { + if (reply_type == MACH_MSG_TYPE_PORT_SEND_ONCE) + ipc_port_release_sonce(release_reply_port); + else + ipc_port_release_send(release_reply_port); + } + + if (IP_VALID(release_voucher_port)) + ipc_port_release_send(release_voucher_port); + - msg->msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) | - MACH_MSGH_BITS(reply_type, dest_type)); + if ((option & MACH_RCV_VOUCHER) != 0) { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_MSG_RECV) | DBG_FUNC_NONE, + VM_KERNEL_ADDRPERM((uintptr_t)kmsg), + (uintptr_t)kmsg->ikm_header->msgh_bits, + (uintptr_t)kmsg->ikm_header->msgh_id, + VM_KERNEL_ADDRPERM((uintptr_t)unsafe_convert_port_to_voucher(voucher)), + 0); + } else { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_MSG_RECV_VOUCHER_REFUSED) | DBG_FUNC_NONE, + VM_KERNEL_ADDRPERM((uintptr_t)kmsg), + (uintptr_t)kmsg->ikm_header->msgh_bits, + (uintptr_t)kmsg->ikm_header->msgh_id, + VM_KERNEL_ADDRPERM((uintptr_t)unsafe_convert_port_to_voucher(voucher)), + 0); + } + + msg->msgh_bits = MACH_MSGH_BITS_SET(reply_type, dest_type, + voucher_type, mbits); msg->msgh_local_port = CAST_MACH_NAME_TO_PORT(dest_name); msg->msgh_remote_port = CAST_MACH_NAME_TO_PORT(reply_name); + msg->msgh_voucher_port = voucher_name; } - return MACH_MSG_SUCCESS; + return MACH_MSG_SUCCESS; } /* @@ -3871,11 +3909,12 @@ ipc_kmsg_copyout( ipc_kmsg_t kmsg, ipc_space_t space, vm_map_t map, - mach_msg_body_t *slist) + mach_msg_body_t *slist, + mach_msg_option_t option) { mach_msg_return_t mr; - mr = ipc_kmsg_copyout_header(kmsg->ikm_header, space); + mr = ipc_kmsg_copyout_header(kmsg, space, option); if (mr != MACH_MSG_SUCCESS) { return mr; } @@ -3920,13 +3959,28 @@ ipc_kmsg_copyout_pseudo( mach_msg_bits_t mbits = kmsg->ikm_header->msgh_bits; ipc_object_t dest = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; ipc_object_t reply = (ipc_object_t) kmsg->ikm_header->msgh_local_port; + ipc_object_t voucher = (ipc_object_t) kmsg->ikm_voucher; mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits); mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits); + mach_msg_type_name_t voucher_type = MACH_MSGH_BITS_VOUCHER(mbits); + mach_port_name_t voucher_name = kmsg->ikm_header->msgh_voucher_port; mach_port_name_t dest_name, reply_name; mach_msg_return_t mr; assert(IO_VALID(dest)); +#if 0 + /* + * If we did this here, it looks like we wouldn't need the undo logic + * at the end of ipc_kmsg_send() in the error cases. Not sure which + * would be more elegant to keep. + */ + ipc_importance_clean(kmsg); +#else + /* just assert it is already clean */ + ipc_importance_assert_clean(kmsg); +#endif + mr = (ipc_kmsg_copyout_object(space, dest, dest_type, &dest_name) | ipc_kmsg_copyout_object(space, reply, reply_type, &reply_name)); @@ -3934,6 +3988,14 @@ ipc_kmsg_copyout_pseudo( kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(dest_name); kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(reply_name); + if (IO_VALID(voucher)) { + assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND); + + kmsg->ikm_voucher = IP_NULL; + mr |= ipc_kmsg_copyout_object(space, voucher, voucher_type, &voucher_name); + kmsg->ikm_header->msgh_voucher_port = voucher_name; + } + if (mbits & MACH_MSGH_BITS_COMPLEX) { mr |= ipc_kmsg_copyout_body(kmsg, space, map, slist); } @@ -3958,18 +4020,25 @@ ipc_kmsg_copyout_dest( mach_msg_bits_t mbits; ipc_object_t dest; ipc_object_t reply; + ipc_object_t voucher; mach_msg_type_name_t dest_type; mach_msg_type_name_t reply_type; - mach_port_name_t dest_name, reply_name; + mach_msg_type_name_t voucher_type; + mach_port_name_t dest_name, reply_name, voucher_name; mbits = kmsg->ikm_header->msgh_bits; dest = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; reply = (ipc_object_t) kmsg->ikm_header->msgh_local_port; + voucher = (ipc_object_t) kmsg->ikm_voucher; + voucher_name = kmsg->ikm_header->msgh_voucher_port; dest_type = MACH_MSGH_BITS_REMOTE(mbits); reply_type = MACH_MSGH_BITS_LOCAL(mbits); + voucher_type = MACH_MSGH_BITS_VOUCHER(mbits); assert(IO_VALID(dest)); + ipc_importance_assert_clean(kmsg); + io_lock(dest); if (io_active(dest)) { ipc_object_copyout_dest(space, dest, dest_type, &dest_name); @@ -3986,10 +4055,19 @@ ipc_kmsg_copyout_dest( } else reply_name = CAST_MACH_PORT_TO_NAME(reply); - kmsg->ikm_header->msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) | - MACH_MSGH_BITS(reply_type, dest_type)); + if (IO_VALID(voucher)) { + assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND); + + kmsg->ikm_voucher = IP_NULL; + ipc_object_destroy((ipc_object_t)voucher, voucher_type); + voucher_name = MACH_PORT_NULL; + } + + kmsg->ikm_header->msgh_bits = MACH_MSGH_BITS_SET(reply_type, dest_type, + voucher_type, mbits); kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(dest_name); kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(reply_name); + kmsg->ikm_header->msgh_voucher_port = voucher_name; if (mbits & MACH_MSGH_BITS_COMPLEX) { mach_msg_body_t *body; @@ -4326,8 +4404,9 @@ ipc_kmsg_copyout_to_kernel_legacy( } #endif /* IKM_SUPPORT_LEGACY */ + mach_msg_trailer_size_t -ipc_kmsg_add_trailer(ipc_kmsg_t kmsg, ipc_space_t space, +ipc_kmsg_add_trailer(ipc_kmsg_t kmsg, ipc_space_t space __unused, mach_msg_option_t option, thread_t thread, mach_port_seqno_t seqno, boolean_t minimal_trailer, mach_vm_offset_t context) @@ -4353,17 +4432,7 @@ ipc_kmsg_add_trailer(ipc_kmsg_t kmsg, ipc_space_t space, if (MACH_RCV_TRAILER_ELEMENTS(option) >= MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_AV)){ -#if CONFIG_MACF_MACH - if (kmsg->ikm_sender != NULL && - IP_VALID(kmsg->ikm_header->msgh_remote_port) && - mac_port_check_method(kmsg->ikm_sender, - &kmsg->ikm_sender->maclabel, - &kmsg->ikm_header->msgh_remote_port->ip_label, - kmsg->ikm_header->msgh_id) == 0) - trailer->msgh_ad = 1; - else -#endif - trailer->msgh_ad = 0; + trailer->msgh_ad = 0; } /* @@ -4373,34 +4442,9 @@ ipc_kmsg_add_trailer(ipc_kmsg_t kmsg, ipc_space_t space, */ if (option & MACH_RCV_TRAILER_ELEMENTS (MACH_RCV_TRAILER_LABELS)) { -#if CONFIG_MACF_MACH - if (kmsg->ikm_sender != NULL) { - ipc_labelh_t lh = kmsg->ikm_sender->label; - kern_return_t kr; - - ip_lock(lh->lh_port); - lh->lh_port->ip_mscount++; - lh->lh_port->ip_srights++; - ip_reference(lh->lh_port); - ip_unlock(lh->lh_port); - - kr = ipc_object_copyout(space, (ipc_object_t)lh->lh_port, - MACH_MSG_TYPE_PORT_SEND, 0, - &trailer->msgh_labels.sender); - if (kr != KERN_SUCCESS) { - ip_release(lh->lh_port); - trailer->msgh_labels.sender = 0; - } - } else { - trailer->msgh_labels.sender = 0; - } -#else - (void)space; trailer->msgh_labels.sender = 0; -#endif } - done: return trailer->msgh_trailer_size; diff --git a/osfmk/ipc/ipc_kmsg.h b/osfmk/ipc/ipc_kmsg.h index 5e742e007..89fa46523 100644 --- a/osfmk/ipc/ipc_kmsg.h +++ b/osfmk/ipc/ipc_kmsg.h @@ -93,15 +93,15 @@ * of the message. */ -struct ipc_labelh; - struct ipc_kmsg { - struct ipc_kmsg *ikm_next; - struct ipc_kmsg *ikm_prev; - ipc_port_t ikm_prealloc; /* port we were preallocated from */ - mach_msg_size_t ikm_size; - struct ipc_labelh *ikm_sender; - mach_msg_header_t *ikm_header; + mach_msg_size_t ikm_size; + struct ipc_kmsg *ikm_next; /* next message on port/discard queue */ + struct ipc_kmsg *ikm_prev; /* prev message on port/discard queue */ + mach_msg_header_t *ikm_header; + ipc_port_t ikm_prealloc; /* port we were preallocated from */ + ipc_port_t ikm_voucher; /* voucher port carried */ + struct ipc_importance_elem *ikm_importance;/* inherited from */ + queue_chain_t ikm_inheritance;/* inherited from link */ }; #if defined(__i386__) || defined(__arm__) @@ -150,7 +150,8 @@ MACRO_END MACRO_BEGIN \ (kmsg)->ikm_size = (size); \ (kmsg)->ikm_prealloc = IP_NULL; \ - (kmsg)->ikm_sender = NULL; \ + (kmsg)->ikm_voucher = IP_NULL; \ + (kmsg)->ikm_importance = IIE_NULL; \ assert((kmsg)->ikm_prev = (kmsg)->ikm_next = IKM_BOGUS); \ MACRO_END @@ -313,7 +314,7 @@ extern void ipc_kmsg_put_to_kernel( /* Copyin port rights in the header of a message */ extern mach_msg_return_t ipc_kmsg_copyin_header( - mach_msg_header_t *msg, + ipc_kmsg_t kmsg, ipc_space_t space, mach_msg_option_t *optionp); @@ -335,8 +336,9 @@ extern mach_msg_return_t ipc_kmsg_copyin_from_kernel_legacy( /* Copyout port rights in the header of a message */ extern mach_msg_return_t ipc_kmsg_copyout_header( - mach_msg_header_t *msg, - ipc_space_t space); + ipc_kmsg_t kmsg, + ipc_space_t space, + mach_msg_option_t option); /* Copyout a port right returning a name */ extern mach_msg_return_t ipc_kmsg_copyout_object( @@ -350,7 +352,8 @@ extern mach_msg_return_t ipc_kmsg_copyout( ipc_kmsg_t kmsg, ipc_space_t space, vm_map_t map, - mach_msg_body_t *slist); + mach_msg_body_t *slist, + mach_msg_option_t option); /* Copyout port rights and out-of-line memory from the body of a message */ extern mach_msg_return_t ipc_kmsg_copyout_body( diff --git a/osfmk/ipc/ipc_labelh.c b/osfmk/ipc/ipc_labelh.c deleted file mode 100644 index 9b14d07d5..000000000 --- a/osfmk/ipc/ipc_labelh.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 2005-2006 SPARTA, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include - -#if CONFIG_MACF_MACH -zone_t ipc_labelh_zone; - -/* - * Create a new label handle in the task described by the specified space. - * The specified label is used in the label handle. The associated port - * name is copied out to namep and the task is granted send and receive rights. - */ -kern_return_t -labelh_new_user(ipc_space_t space, struct label *inl, mach_port_name_t *namep) -{ - kern_return_t kr; - ipc_labelh_t lh; - ipc_entry_t entry; - ipc_port_t port; - - if (space == IS_NULL || space->is_task == NULL) - return (KERN_INVALID_TASK); - - /* XXX - perform entrypoint check here? */ - - /* JMM - redo as port allocation, kobject set, and then copyout */ - assert(!CONFIG_MACF_MACH); - - /* - * Note: the calling task will have a receive right for the port. - * This is different from label handles that reference tasks - * where the kernel holds the receive right and the caller only - * gets a send right. - */ - kr = ipc_port_alloc(space, namep, &port); - if (kr != KERN_SUCCESS) - return (kr); - ip_reference(port); /* ipc_port_alloc() does not add a reference */ - - /* Convert right to MACH_PORT_TYPE_SEND_RECEIVE */ - port->ip_mscount++; - port->ip_srights++; - is_write_lock(space); - /* XXX - must validate space is still active and unwind if not */ - entry = ipc_entry_lookup(space, *namep); - if (entry != IE_NULL) - entry->ie_bits |= MACH_PORT_TYPE_SEND; - is_write_unlock(space); - - /* Allocate new label handle, insert port and label. */ - lh = (ipc_labelh_t)zalloc(ipc_labelh_zone); - lh_lock_init(lh); - lh->lh_port = port; - lh->lh_label = *inl; - lh->lh_type = LABELH_TYPE_USER; - lh->lh_references = 1; /* unused for LABELH_TYPE_USER */ - - /* Must call ipc_kobject_set() with port unlocked. */ - ip_unlock(lh->lh_port); - ipc_kobject_set(lh->lh_port, (ipc_kobject_t)lh, IKOT_LABELH); - - return (KERN_SUCCESS); -} - -kern_return_t -mac_label_new(ipc_space_t space, mach_port_name_t *namep, labelstr_t labelstr) -{ - struct label inl; - kern_return_t kr; - - mac_task_label_init(&inl); - if (mac_task_label_internalize(&inl, labelstr)) - return (KERN_INVALID_ARGUMENT); - - kr = labelh_new_user(space, &inl, namep); - if (kr != KERN_SUCCESS) { - mac_task_label_destroy(&inl); - return (kr); - } - - return (KERN_SUCCESS); -} - -/* - * This function should be used to allocate label handles - * that are stored in other kernel objects, such as tasks. - * They must be released along with that object. - * The caller gets one reference, which can be applied to either the - * port or the ipc_label_t structure itself. - */ -ipc_labelh_t -labelh_new(int canblock) -{ - ipc_labelh_t lh; - - lh = (ipc_labelh_t)zalloc_canblock(ipc_labelh_zone, canblock); - lh_lock_init(lh); - lh->lh_port = ipc_port_alloc_kernel(); - lh->lh_type = LABELH_TYPE_KERN; - lh->lh_references = 1; - ipc_kobject_set(lh->lh_port, (ipc_kobject_t)lh, IKOT_LABELH); - - return (lh); -} - -/* - * Call with old label handle locked. - * Returned label handle is unlocked. - */ -ipc_labelh_t -labelh_duplicate(ipc_labelh_t old) -{ - ipc_labelh_t lh; - - lh = labelh_new(0); - ip_lock(lh->lh_port); - mac_task_label_init(&lh->lh_label); - mac_task_label_copy(&old->lh_label, &lh->lh_label); - ip_unlock(lh->lh_port); - return (lh); -} - -/* - * Call with old label handle locked. - * Returned label handle is locked. - */ -ipc_labelh_t -labelh_modify(ipc_labelh_t old) -{ - ipc_labelh_t lh; - - /* - * A label handle may only have a single reference. - * If there are no other references this is a no-op. - * Otherwise, make a copy we can write to and return it. - */ - if (old->lh_references == 1) - return (old); - lh = labelh_duplicate(old); - lh_release(old); - lh_check_unlock(old); - lh_lock(lh); - return (lh); -} - -/* - * Add or drop a reference on an (unlocked) label handle. - */ -ipc_labelh_t -labelh_reference(ipc_labelh_t lh) -{ - lh_lock(lh); - lh_reference(lh); - lh_unlock(lh); - return (lh); -} - -/* - * Release a reference on an (unlocked) label handle. - */ -void -labelh_release(ipc_labelh_t lh) -{ - lh_lock(lh); - lh_release(lh); - lh_check_unlock(lh); -} - -/* - * Deallocate space associated with the label handle backed by the - * specified port. For kernel-allocated label handles the - * label handle reference count should be 0. For user-allocated - * handles the ref count is not used (it was initialized to 1). - */ -void -labelh_destroy(ipc_port_t port) -{ - ipc_labelh_t lh = (ipc_labelh_t) port->ip_kobject; - - mac_task_label_destroy(&lh->lh_label); - lh_lock_destroy(lh); - zfree(ipc_labelh_zone, (vm_offset_t)lh); -} -#else -kern_return_t -mac_label_new(__unused ipc_space_t space, - __unused mach_port_name_t *namep, - __unused labelstr_t labelstr) -{ - return KERN_FAILURE; -} -#endif /* MAC_MACH */ diff --git a/osfmk/ipc/ipc_labelh.h b/osfmk/ipc/ipc_labelh.h deleted file mode 100644 index 7126ff162..000000000 --- a/osfmk/ipc/ipc_labelh.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 2005-2006 SPARTA, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _IPC_LABELH_H_ -#define _IPC_LABELH_H_ - -#include -#include -#include - -#if CONFIG_MACF_MACH -typedef struct ipc_labelh -{ - natural_t lh_references; - int lh_type; - struct label lh_label; - ipc_port_t lh_port; - decl_lck_mtx_data(, lh_lock_data) -} *ipc_labelh_t; - -#define LABELH_TYPE_KERN 0 -#define LABELH_TYPE_USER 1 - -void labelh_destroy(ipc_port_t port); -ipc_labelh_t labelh_duplicate(ipc_labelh_t old); -ipc_labelh_t labelh_modify(ipc_labelh_t old); -ipc_labelh_t labelh_new(int canblock); -kern_return_t labelh_new_user(ipc_space_t, struct label *, mach_port_name_t *); -void labelh_release(ipc_labelh_t lh); -ipc_labelh_t labelh_reference(ipc_labelh_t lh); - -#define lh_reference(lh) ((lh)->lh_references++) -#define lh_release(lh) \ -MACRO_BEGIN \ - assert((lh)->lh_references > 0); \ - (lh)->lh_references--; \ -MACRO_END - -extern zone_t ipc_labelh_zone; - -#define lh_lock_init(lh) lck_mtx_init(&(lh)->lh_lock_data, &ipc_lck_grp, &ipc_lck_attr) -#define lh_lock_destroy(lh) lck_mtx_destroy(&(lh)->lh_lock_data, &ipc_lck_grp) -#define lh_lock(lh) lck_mtx_lock(&(lh)->lh_lock_data) -#define lh_unlock(lh) lck_mtx_unlock(&(lh)->lh_lock_data) - -/* - * Check the number of references the label handle has left. - * If there are 0 references and this is a kernel-allocated - * label handle, deallocate the associated port. The - * storage space for the label handle will be deallocated - * as part of the port destruction. User-allocated label - * handles are destroyed along with their ports. - */ -#define lh_check_unlock(lh) \ -MACRO_BEGIN \ - _VOLATILE_ natural_t _refs = (lh)->lh_references; \ - \ - lh_unlock(lh); \ - if (_refs == 0 && (lh)->lh_type == LABELH_TYPE_KERN) \ - ipc_port_dealloc_kernel((lh)->lh_port); \ -MACRO_END - -#endif /* MAC_MACH */ -#endif /* _IPC_LABELH_H_ */ diff --git a/osfmk/ipc/ipc_mqueue.c b/osfmk/ipc/ipc_mqueue.c index 4bcb66adc..1a98bab1f 100644 --- a/osfmk/ipc/ipc_mqueue.c +++ b/osfmk/ipc/ipc_mqueue.c @@ -94,10 +94,6 @@ #include #endif -#if CONFIG_MACF_MACH -#include -#endif - int ipc_mqueue_full; /* address is event for queue space */ int ipc_mqueue_rcv; /* address is event for message arrival */ @@ -724,11 +720,6 @@ ipc_mqueue_receive_on_thread( wait_result_t wresult; uint64_t deadline; spl_t s; -#if CONFIG_MACF_MACH - ipc_labelh_t lh; - task_t task; - int rc; -#endif s = splsched(); imq_lock(mqueue); @@ -798,21 +789,6 @@ ipc_mqueue_receive_on_thread( */ ipc_mqueue_select_on_thread(port_mq, option, max_size, thread); imq_unlock(port_mq); -#if CONFIG_MACF_MACH - if (thread->task != TASK_NULL && - thread->ith_kmsg != NULL && - thread->ith_kmsg->ikm_sender != NULL) { - lh = thread->ith_kmsg->ikm_sender->label; - tasklabel_lock(thread->task); - ip_lock(lh->lh_port); - rc = mac_port_check_receive(&thread->task->maclabel, - &lh->lh_label); - ip_unlock(lh->lh_port); - tasklabel_unlock(thread->task); - if (rc) - thread->ith_state = MACH_RCV_INVALID_DATA; - } -#endif splx(s); return THREAD_NOT_WAITING; @@ -827,21 +803,6 @@ ipc_mqueue_receive_on_thread( if (ipc_kmsg_queue_first(kmsgs) != IKM_NULL) { ipc_mqueue_select_on_thread(mqueue, option, max_size, thread); imq_unlock(mqueue); -#if CONFIG_MACF_MACH - if (thread->task != TASK_NULL && - thread->ith_kmsg != NULL && - thread->ith_kmsg->ikm_sender != NULL) { - lh = thread->ith_kmsg->ikm_sender->label; - tasklabel_lock(thread->task); - ip_lock(lh->lh_port); - rc = mac_port_check_receive(&thread->task->maclabel, - &lh->lh_label); - ip_unlock(lh->lh_port); - tasklabel_unlock(thread->task); - if (rc) - thread->ith_state = MACH_RCV_INVALID_DATA; - } -#endif splx(s); return THREAD_NOT_WAITING; } diff --git a/osfmk/ipc/ipc_object.c b/osfmk/ipc/ipc_object.c index 054b7aec6..49b7e4690 100644 --- a/osfmk/ipc/ipc_object.c +++ b/osfmk/ipc/ipc_object.c @@ -83,6 +83,7 @@ #include #include +#include #include #include #include @@ -92,7 +93,6 @@ #include #include #include -#include #include @@ -341,9 +341,6 @@ ipc_object_alloc( ipc_port_t port = (ipc_port_t)object; bzero((char *)port, sizeof(*port)); -#if CONFIG_MACF_MACH - mac_port_label_init(&port->ip_label); -#endif } else if (otype == IOT_PORT_SET) { ipc_pset_t pset = (ipc_pset_t)object; @@ -412,9 +409,6 @@ ipc_object_alloc_name( ipc_port_t port = (ipc_port_t)object; bzero((char *)port, sizeof(*port)); -#if CONFIG_MACF_MACH - mac_port_label_init(&port->ip_label); -#endif } else if (otype == IOT_PORT_SET) { ipc_pset_t pset = (ipc_pset_t)object; @@ -461,7 +455,6 @@ ipc_object_copyin_type( switch (msgt_name) { case MACH_MSG_TYPE_MOVE_RECEIVE: - case MACH_MSG_TYPE_COPY_RECEIVE: return MACH_MSG_TYPE_PORT_RECEIVE; case MACH_MSG_TYPE_MOVE_SEND_ONCE: @@ -473,6 +466,10 @@ ipc_object_copyin_type( case MACH_MSG_TYPE_COPY_SEND: return MACH_MSG_TYPE_PORT_SEND; + case MACH_MSG_TYPE_DISPOSE_RECEIVE: + case MACH_MSG_TYPE_DISPOSE_SEND: + case MACH_MSG_TYPE_DISPOSE_SEND_ONCE: + /* fall thru */ default: return MACH_MSG_TYPE_PORT_NONE; } @@ -544,8 +541,8 @@ ipc_object_copyin( } #if IMPORTANCE_INHERITANCE - if (assertcnt > 0 && current_task()->imp_receiver != 0) { - task_importance_drop_internal_assertion(current_task(), assertcnt); + if (0 < assertcnt && ipc_importance_task_is_any_receiver_type(current_task()->task_imp_base)) { + ipc_importance_task_drop_internal_assertion(current_task()->task_imp_base, assertcnt); } #endif /* IMPORTANCE_INHERITANCE */ @@ -866,7 +863,7 @@ ipc_object_copyout_name( #if IMPORTANCE_INHERITANCE int assertcnt = 0; - task_t task = TASK_NULL; + ipc_importance_task_t task_imp = IIT_NULL; #endif /* IMPORTANCE_INHERITANCE */ assert(IO_VALID(object)); @@ -923,11 +920,12 @@ ipc_object_copyout_name( if (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE) { ipc_port_t port = (ipc_port_t)object; - if ((space->is_task != TASK_NULL) && - (space->is_task->imp_receiver != 0)) { - assertcnt = port->ip_impcount; - task = space->is_task; - task_reference(task); + if (space->is_task != TASK_NULL) { + task_imp = space->is_task->task_imp_base; + if (ipc_importance_task_is_any_receiver_type(task_imp)) { + assertcnt = port->ip_impcount; + ipc_importance_task_reference(task_imp); + } } /* take port out of limbo */ @@ -947,10 +945,9 @@ ipc_object_copyout_name( /* * Add the assertions to the task that we captured before */ - if (task != TASK_NULL) { - if (assertcnt > 0) - task_importance_hold_internal_assertion(task, assertcnt); - task_deallocate(task); + if (task_imp != IIT_NULL) { + ipc_importance_task_hold_internal_assertion(task_imp, assertcnt); + ipc_importance_task_release(task_imp); } #endif /* IMPORTANCE_INHERITANCE */ @@ -1102,31 +1099,6 @@ ipc_object_rename( return kr; } -/* - * Get a label out of a port, to be used by a kernel call - * that takes a security label as a parameter. In this case, we want - * to use the label stored in the label handle and not the label on its - * port. - * - * The port should be locked for this call. The lock protecting - * label handle contents should not be necessary, as they can only - * be modified when a label handle with one reference is a task label. - * User allocated label handles can never be modified. - */ -#if CONFIG_MACF_MACH -struct label *io_getlabel (ipc_object_t objp) -{ - ipc_port_t port = (ipc_port_t)objp; - - assert(io_otype(objp) == IOT_PORT); - - if (ip_kotype(port) == IKOT_LABELH) - return &((ipc_labelh_t) port->ip_kobject)->lh_label; - else - return &port->ip_label; -} -#endif - /* * Check whether the object is a port if so, free it. But * keep track of that fact. diff --git a/osfmk/ipc/ipc_object.h b/osfmk/ipc/ipc_object.h index e040b4be3..ef967b055 100644 --- a/osfmk/ipc/ipc_object.h +++ b/osfmk/ipc/ipc_object.h @@ -76,8 +76,9 @@ #include #include -#include +#include #include +#include #include #include #include diff --git a/osfmk/ipc/ipc_port.c b/osfmk/ipc/ipc_port.c index f8f96739a..e3e727ba8 100644 --- a/osfmk/ipc/ipc_port.c +++ b/osfmk/ipc/ipc_port.c @@ -74,7 +74,6 @@ #include #include -#include #include #include #include @@ -88,13 +87,13 @@ #include #include #include +#include #include #include -decl_lck_mtx_data(, ipc_port_multiple_lock_data) -lck_mtx_ext_t ipc_port_multiple_lock_data_ext; +decl_lck_spin_data(, ipc_port_multiple_lock_data) ipc_port_timestamp_t ipc_port_timestamp_data; int ipc_portbt; @@ -202,6 +201,7 @@ ipc_port_request_alloc( if (port->ip_sprequests == 0) { port->ip_sprequests = 1; #if IMPORTANCE_INHERITANCE + /* TODO: Live importance support in send-possible */ if (port->ip_impdonation != 0 && port->ip_spimportant == 0 && (task_is_importance_donor(current_task()))) { @@ -610,7 +610,6 @@ ipc_port_init( port->ip_spimportant = 0; port->ip_impdonation = 0; port->ip_tempowner = 0; - port->ip_taskptr = 0; port->ip_guarded = 0; port->ip_strict_guard = 0; @@ -666,14 +665,6 @@ ipc_port_alloc( /* unlock space after init */ is_write_unlock(space); -#if CONFIG_MACF_MACH - task_t issuer = current_task(); - tasklabel_lock2 (issuer, space->is_task); - mac_port_label_associate(&issuer->maclabel, &space->is_task->maclabel, - &port->ip_label); - tasklabel_unlock2 (issuer, space->is_task); -#endif - *namep = name; *portp = port; @@ -722,14 +713,6 @@ ipc_port_alloc_name( ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX); #endif /* MACH_ASSERT */ -#if CONFIG_MACF_MACH - task_t issuer = current_task(); - tasklabel_lock2 (issuer, space->is_task); - mac_port_label_associate(&issuer->maclabel, &space->is_task->maclabel, - &port->ip_label); - tasklabel_unlock2 (issuer, space->is_task); -#endif - *portp = port; return KERN_SUCCESS; @@ -764,8 +747,9 @@ ipc_port_spnotify( #if IMPORTANCE_INHERITANCE if (port->ip_spimportant != 0) { port->ip_spimportant = 0; - port->ip_impcount--; - dropassert = TRUE; + if (ipc_port_impcount_delta(port, -1, IP_NULL) == -1) { + dropassert = TRUE; + } } #endif /* IMPORTANCE_INHERITANCE */ @@ -812,9 +796,9 @@ revalidate: ip_unlock(port); out: #if IMPORTANCE_INHERITANCE - if ((dropassert == TRUE) && (current_task()->imp_receiver != 0)) { - /* drop internal assertion and no task lock held */ - task_importance_drop_internal_assertion(current_task(), 1); + if (dropassert == TRUE && ipc_importance_task_is_any_receiver_type(current_task()->task_imp_base)) { + /* drop internal assertion */ + ipc_importance_task_drop_internal_assertion(current_task()->task_imp_base, 1); } #endif /* IMPORTANCE_INHERITANCE */ return; @@ -877,7 +861,7 @@ ipc_port_destroy( ipc_kmsg_t kmsg; #if IMPORTANCE_INHERITANCE - task_t release_imp_task = TASK_NULL; + ipc_importance_task_t release_imp_task = IIT_NULL; thread_t self = current_thread(); boolean_t top = (self->ith_assertions == 0); natural_t assertcnt = 0; @@ -893,18 +877,16 @@ ipc_port_destroy( pdrequest = port->ip_pdrequest; #if IMPORTANCE_INHERITANCE - /* determine how may assertions to drop and from whom */ + /* determine how many assertions to drop and from whom */ if (port->ip_tempowner != 0) { assert(top); - if (port->ip_taskptr != 0) { - release_imp_task = port->ip_imp_task; - port->ip_imp_task = TASK_NULL; - port->ip_taskptr = 0; + release_imp_task = port->ip_imp_task; + if (IIT_NULL != release_imp_task) { + port->ip_imp_task = IIT_NULL; assertcnt = port->ip_impcount; } /* Otherwise, nothing to drop */ } else { - assert(port->ip_taskptr == 0); assertcnt = port->ip_impcount; if (pdrequest != IP_NULL) /* mark in limbo for the journey */ @@ -934,6 +916,7 @@ ipc_port_destroy( port->ip_object.io_bits &= ~IO_BITS_ACTIVE; port->ip_timestamp = ipc_port_timestamp(); + nsrequest = port->ip_nsrequest; /* * If the port has a preallocated message buffer and that buffer @@ -959,7 +942,6 @@ ipc_port_destroy( } /* throw away no-senders request */ - nsrequest = port->ip_nsrequest; if (nsrequest != IP_NULL) ipc_notify_send_once(nsrequest); /* consumes ref */ @@ -976,25 +958,22 @@ ipc_port_destroy( drop_assertions: #if IMPORTANCE_INHERITANCE - if (release_imp_task != TASK_NULL) { + if (release_imp_task != IIT_NULL) { if (assertcnt > 0) { assert(top); self->ith_assertions = 0; - assert(release_imp_task->imp_receiver != 0); - task_importance_drop_internal_assertion(release_imp_task, assertcnt); + assert(ipc_importance_task_is_any_receiver_type(release_imp_task)); + ipc_importance_task_drop_internal_assertion(release_imp_task, assertcnt); } - task_deallocate(release_imp_task); + ipc_importance_task_release(release_imp_task); } else if (assertcnt > 0) { if (top) { self->ith_assertions = 0; - release_imp_task = current_task(); - if (release_imp_task->imp_receiver != 0) { - task_importance_drop_internal_assertion(release_imp_task, assertcnt); + release_imp_task = current_task()->task_imp_base; + if (ipc_importance_task_is_any_receiver_type(release_imp_task)) { + ipc_importance_task_drop_internal_assertion(release_imp_task, assertcnt); } - } else { - /* the port chain we are enqueued on should cover our assertions */ - assert(assertcnt <= self->ith_assertions); } } #endif /* IMPORTANCE_INHERITANCE */ @@ -1030,8 +1009,8 @@ ipc_port_check_circularity( ipc_port_t base; #if IMPORTANCE_INHERITANCE - task_t task = TASK_NULL; - task_t release_task = TASK_NULL; + ipc_importance_task_t imp_task = IIT_NULL; + ipc_importance_task_t release_imp_task = IIT_NULL; int assertcnt = 0; #endif /* IMPORTANCE_INHERITANCE */ @@ -1131,15 +1110,14 @@ ipc_port_check_circularity( /* must have been in limbo or still bound to a task */ assert(port->ip_tempowner != 0); - if (port->ip_taskptr != 0) { - /* - * We delayed dropping assertions from a specific task. - * Cache that info now (we'll drop assertions and the - * task reference below). - */ - release_task = port->ip_imp_task; - port->ip_imp_task = TASK_NULL; - port->ip_taskptr = 0; + /* + * We delayed dropping assertions from a specific task. + * Cache that info now (we'll drop assertions and the + * task reference below). + */ + release_imp_task = port->ip_imp_task; + if (IIT_NULL != release_imp_task) { + port->ip_imp_task = IIT_NULL; } assertcnt = port->ip_impcount; @@ -1191,9 +1169,11 @@ ipc_port_check_circularity( */ if (ip_active(base) && (assertcnt > 0)) { if (base->ip_tempowner != 0) { - if (base->ip_taskptr != 0) + if (IIT_NULL != base->ip_imp_task) { /* specified tempowner task */ - task = base->ip_imp_task; + imp_task = base->ip_imp_task; + assert(ipc_importance_task_is_any_receiver_type(imp_task)); + } /* otherwise don't boost current task */ } else if (base->ip_receiver_name != MACH_PORT_NULL) { @@ -1201,14 +1181,13 @@ ipc_port_check_circularity( /* only spaces with boost-accepting tasks */ if (space->is_task != TASK_NULL && - space->is_task->imp_receiver != 0) - task = space->is_task; + ipc_importance_task_is_any_receiver_type(space->is_task->task_imp_base)) + imp_task = space->is_task->task_imp_base; } /* take reference before unlocking base */ - if (task != TASK_NULL) { - assert(task->imp_receiver != 0); - task_reference(task); + if (imp_task != IIT_NULL) { + ipc_importance_task_reference(imp_task); } } #endif /* IMPORTANCE_INHERITANCE */ @@ -1220,20 +1199,20 @@ ipc_port_check_circularity( * Transfer assertions now that the ports are unlocked. * Avoid extra overhead if transferring to/from the same task. */ - boolean_t transfer_assertions = (task != release_task) ? TRUE : FALSE; + boolean_t transfer_assertions = (imp_task != release_imp_task) ? TRUE : FALSE; - if (task != TASK_NULL) { + if (imp_task != IIT_NULL) { if (transfer_assertions) - task_importance_hold_internal_assertion(task, assertcnt); - task_deallocate(task); - task = TASK_NULL; + ipc_importance_task_hold_internal_assertion(imp_task, assertcnt); + ipc_importance_task_release(imp_task); + imp_task = IIT_NULL; } - if (release_task != TASK_NULL) { + if (release_imp_task != IIT_NULL) { if (transfer_assertions) - task_importance_drop_internal_assertion(release_task, assertcnt); - task_deallocate(release_task); - release_task = TASK_NULL; + ipc_importance_task_drop_internal_assertion(release_imp_task, assertcnt); + ipc_importance_task_release(release_imp_task); + release_imp_task = IIT_NULL; } #endif /* IMPORTANCE_INHERITANCE */ @@ -1241,7 +1220,94 @@ ipc_port_check_circularity( } /* - * Routine: ipc_port_importance_delta + * Routine: ipc_port_impcount_delta + * Purpose: + * Adjust only the importance count associated with a port. + * If there are any adjustments to be made to receiver task, + * those are handled elsewhere. + * + * For now, be defensive during deductions to make sure the + * impcount for the port doesn't underflow zero. This will + * go away when the port boost addition is made atomic (see + * note in ipc_port_importance_delta()). + * Conditions: + * The port is referenced and locked. + * Nothing else is locked. + */ +mach_port_delta_t +ipc_port_impcount_delta( + ipc_port_t port, + mach_port_delta_t delta, + ipc_port_t __unused base) +{ + mach_port_delta_t absdelta; + + if (!ip_active(port)) { + return 0; + } + + /* adding/doing nothing is easy */ + if (delta >= 0) { + port->ip_impcount += delta; + return delta; + } + + absdelta = 0 - delta; + //assert(port->ip_impcount >= absdelta); + /* if we have enough to deduct, we're done */ + if (port->ip_impcount >= absdelta) { + port->ip_impcount -= absdelta; + return delta; + } + +#if DEVELOPMENT || DEBUG + if (port->ip_receiver_name != MACH_PORT_NULL) { + task_t target_task = port->ip_receiver->is_task; + ipc_importance_task_t target_imp = target_task->task_imp_base; + const char *target_procname; + int target_pid; + + if (target_imp != IIT_NULL) { + target_procname = target_imp->iit_procname; + target_pid = target_imp->iit_bsd_pid; + } else { + target_procname = "unknown"; + target_pid = -1; + } + printf("Over-release of importance assertions for port 0x%x receiver pid %d (%s), " + "dropping %d assertion(s) but port only has %d remaining.\n", + port->ip_receiver_name, + target_imp->iit_bsd_pid, target_imp->iit_procname, + absdelta, port->ip_impcount); + + } else if (base != IP_NULL) { + task_t target_task = base->ip_receiver->is_task; + ipc_importance_task_t target_imp = target_task->task_imp_base; + const char *target_procname; + int target_pid; + + if (target_imp != IIT_NULL) { + target_procname = target_imp->iit_procname; + target_pid = target_imp->iit_bsd_pid; + } else { + target_procname = "unknown"; + target_pid = -1; + } + printf("Over-release of importance assertions for port %p " + "enqueued on port 0x%x with receiver pid %d (%s), " + "dropping %d assertion(s) but port only has %d remaining.\n", + port, base->ip_receiver_name, + target_imp->iit_bsd_pid, target_imp->iit_procname, + absdelta, port->ip_impcount); + } +#endif + delta = 0 - port->ip_impcount; + port->ip_impcount = 0; + return delta; +} + +/* + * Routine: ipc_port_importance_delta_internal * Purpose: * Adjust the importance count through the given port. * If the port is in transit, apply the delta throughout @@ -1257,15 +1323,17 @@ ipc_port_check_circularity( #if IMPORTANCE_INHERITANCE boolean_t -ipc_port_importance_delta( +ipc_port_importance_delta_internal( ipc_port_t port, - mach_port_delta_t delta) + mach_port_delta_t *deltap, + ipc_importance_task_t *imp_task) { ipc_port_t next, base; - task_t task = TASK_NULL; boolean_t dropped = FALSE; - if (delta == 0) + *imp_task = IIT_NULL; + + if (*deltap == 0) return FALSE; base = port; @@ -1293,10 +1361,24 @@ ipc_port_importance_delta( /* unlock down to the base, adding a boost at each level */ for (;;) { - port->ip_impcount += delta; + /* + * JMM TODO - because of the port unlock to grab the multiple lock + * above, a subsequent drop of importance could race and beat + * the "previous" increase - causing the port impcount to go + * negative briefly. The defensive deduction performed by + * ipc_port_impcount_delta() defeats that, and therefore can + * cause an importance leak once the increase finally arrives. + * + * Need to rework the importance delta logic to be more like + * ipc_importance_inherit_from() where it locks all it needs in + * one pass to avoid any lock drops - to keep that race from + * ever occuring. + */ + *deltap = ipc_port_impcount_delta(port, *deltap, base); - if (port == base) + if (port == base) { break; + } /* port is in transit */ assert(port->ip_tempowner == 0); @@ -1308,8 +1390,8 @@ ipc_port_importance_delta( /* find the task (if any) to boost according to the base */ if (ip_active(base)) { if (base->ip_tempowner != 0) { - if (base->ip_taskptr != 0) - task = base->ip_imp_task; + if (IIT_NULL != base->ip_imp_task) + *imp_task = base->ip_imp_task; /* otherwise don't boost */ } else if (base->ip_receiver_name != MACH_PORT_NULL) { @@ -1317,8 +1399,9 @@ ipc_port_importance_delta( /* only spaces with boost-accepting tasks */ if (space->is_task != TASK_NULL && - space->is_task->imp_receiver != 0) - task = space->is_task; + ipc_importance_task_is_any_receiver_type(space->is_task->task_imp_base)) { + *imp_task = space->is_task->task_imp_base; + } } } @@ -1326,24 +1409,62 @@ ipc_port_importance_delta( * Only the base is locked. If we have to hold or drop task * importance assertions, we'll have to drop that lock as well. */ - if (task != TASK_NULL) { + if (*imp_task != IIT_NULL) { /* take a reference before unlocking base */ - assert(task->imp_receiver != 0); - task_reference(task); + ipc_importance_task_reference(*imp_task); + } + if (dropped == TRUE) { ip_unlock(base); - dropped = TRUE; + } - if (delta > 0) - task_importance_hold_internal_assertion(task, delta); - else - task_importance_drop_internal_assertion(task, -delta); + return dropped; +} +#endif /* IMPORTANCE_INHERITANCE */ - task_deallocate(task); - } else if (dropped == TRUE) { - ip_unlock(base); +/* + * Routine: ipc_port_importance_delta + * Purpose: + * Adjust the importance count through the given port. + * If the port is in transit, apply the delta throughout + * the chain. + * + * If there is a task at the base of the chain that wants/needs + * to be adjusted, apply the delta. + * Conditions: + * The port is referenced and locked on entry. + * Nothing else is locked. + * The lock may be dropped on exit. + * Returns TRUE if lock was dropped. + */ +#if IMPORTANCE_INHERITANCE + +boolean_t +ipc_port_importance_delta( + ipc_port_t port, + mach_port_delta_t delta) +{ + ipc_importance_task_t imp_task = IIT_NULL; + boolean_t dropped; + + dropped = ipc_port_importance_delta_internal(port, &delta, &imp_task); + + if (IIT_NULL == imp_task) + return dropped; + + if (!dropped) { + dropped = TRUE; + ip_unlock(port); } + assert(ipc_importance_task_is_any_receiver_type(imp_task)); + + if (delta > 0) + ipc_importance_task_hold_internal_assertion(imp_task, delta); + else + ipc_importance_task_drop_internal_assertion(imp_task, -delta); + + ipc_importance_task_release(imp_task); return dropped; } #endif /* IMPORTANCE_INHERITANCE */ @@ -1527,15 +1648,16 @@ ipc_port_release_send( ip_lock(port); + assert(port->ip_srights > 0); + port->ip_srights--; + if (!ip_active(port)) { ip_unlock(port); ip_release(port); return; } - assert(port->ip_srights > 0); - - if (--port->ip_srights == 0 && + if (port->ip_srights == 0 && port->ip_nsrequest != IP_NULL) { nsrequest = port->ip_nsrequest; port->ip_nsrequest = IP_NULL; @@ -1689,18 +1811,6 @@ ipc_port_alloc_special( ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX); #endif /* MACH_ASSERT */ -#if CONFIG_MACF_MACH - /* Currently, ipc_port_alloc_special is used for two things: - * - Reply ports for messages from the kernel - * - Ports for communication with the kernel (e.g. task ports) - * Since both of these would typically be labelled as kernel objects, - * we will use a new entry point for this purpose, as current_task() - * is often wrong (i.e. not kernel_task) or null. - */ - mac_port_label_init(&port->ip_label); - mac_port_label_associate_kernel(&port->ip_label, space == ipc_space_reply); -#endif - return port; } @@ -1763,11 +1873,6 @@ ipc_port_finalize( #if MACH_ASSERT ipc_port_track_dealloc(port); #endif /* MACH_ASSERT */ - -#if CONFIG_MACF_MACH - /* Port label should have been initialized after creation. */ - mac_port_label_destroy(&port->ip_label); -#endif } #if MACH_ASSERT diff --git a/osfmk/ipc/ipc_port.h b/osfmk/ipc/ipc_port.h index 169189f63..ec1f3efc9 100644 --- a/osfmk/ipc/ipc_port.h +++ b/osfmk/ipc/ipc_port.h @@ -83,6 +83,7 @@ #include #include +#include #include #include @@ -127,7 +128,7 @@ struct ipc_port { union { ipc_kobject_t kobject; - task_t imp_task; + ipc_importance_task_t imp_task; uintptr_t alias; } kdata; @@ -144,10 +145,9 @@ struct ipc_port { ip_spimportant:1, /* ... at least one is importance donating */ ip_impdonation:1, /* port supports importance donation */ ip_tempowner:1, /* dont give donations to current receiver */ - ip_taskptr:1, /* ... instead give them to a specified task */ ip_guarded:1, /* port guarded (use context value as guard) */ ip_strict_guard:1, /* Strict guarding; Prevents user manipulation of context values directly */ - ip_reserved:1, + ip_reserved:2, ip_impcount:24; /* number of importance donations in nested queue */ mach_vm_address_t ip_context; @@ -162,10 +162,6 @@ struct ipc_port { uintptr_t ip_callstack[IP_CALLSTACK_MAX]; /* stack trace */ unsigned long ip_spares[IP_NSPARES]; /* for debugging */ #endif /* MACH_ASSERT */ - -#if CONFIG_MACF_MACH - struct label ip_label; -#endif }; @@ -264,20 +260,7 @@ extern lck_attr_t ipc_lck_attr; * when it is taken. */ -#if 1 -decl_lck_mtx_data(extern,ipc_port_multiple_lock_data) -extern lck_mtx_ext_t ipc_port_multiple_lock_data_ext; - -#define ipc_port_multiple_lock_init() \ - lck_mtx_init_ext(&ipc_port_multiple_lock_data, &ipc_port_multiple_lock_data_ext, &ipc_lck_grp, &ipc_lck_attr) - -#define ipc_port_multiple_lock() \ - lck_mtx_lock(&ipc_port_multiple_lock_data) - -#define ipc_port_multiple_unlock() \ - lck_mtx_unlock(&ipc_port_multiple_lock_data) -#else -lck_spin_t ipc_port_multiple_lock_data; +extern lck_spin_t ipc_port_multiple_lock_data; #define ipc_port_multiple_lock_init() \ lck_spin_init(&ipc_port_multiple_lock_data, &ipc_lck_grp, &ipc_lck_attr) @@ -287,7 +270,6 @@ lck_spin_t ipc_port_multiple_lock_data; #define ipc_port_multiple_unlock() \ lck_spin_unlock(&ipc_port_multiple_lock_data) -#endif /* * The port timestamp facility provides timestamps @@ -452,7 +434,21 @@ ipc_port_check_circularity( ipc_port_t dest); #if IMPORTANCE_INHERITANCE -/* Apply an importance delta to a port */ +/* apply importance delta to port only */ +extern mach_port_delta_t +ipc_port_impcount_delta( + ipc_port_t port, + mach_port_delta_t delta, + ipc_port_t base); + +/* apply importance delta to port, and return task importance for update */ +extern boolean_t +ipc_port_importance_delta_internal( + ipc_port_t port, + mach_port_delta_t *delta, + ipc_importance_task_t *imp_task); + +/* Apply an importance delta to a port and reflect change in receiver task */ extern boolean_t ipc_port_importance_delta( ipc_port_t port, diff --git a/osfmk/ipc/ipc_pset.c b/osfmk/ipc/ipc_pset.c index 3a47ddae9..533ee3f08 100644 --- a/osfmk/ipc/ipc_pset.c +++ b/osfmk/ipc/ipc_pset.c @@ -430,7 +430,8 @@ filt_machport( * provided, just force a MACH_RCV_TOO_LARGE to detect the * name of the port and sizeof the waiting message. */ - option = kn->kn_sfflags & (MACH_RCV_MSG|MACH_RCV_LARGE|MACH_RCV_LARGE_IDENTITY|MACH_RCV_TRAILER_MASK); + option = kn->kn_sfflags & (MACH_RCV_MSG|MACH_RCV_LARGE|MACH_RCV_LARGE_IDENTITY| + MACH_RCV_TRAILER_MASK|MACH_RCV_VOUCHER); if (option & MACH_RCV_MSG) { self->ith_msg_addr = (mach_vm_address_t) kn->kn_ext[0]; size = (mach_msg_size_t)kn->kn_ext[1]; diff --git a/osfmk/ipc/ipc_right.c b/osfmk/ipc/ipc_right.c index f4e102b47..205f91ca7 100644 --- a/osfmk/ipc/ipc_right.c +++ b/osfmk/ipc/ipc_right.c @@ -86,6 +86,7 @@ #include #include #include +#include #include /* Allow IPC to generate mach port guard exceptions */ @@ -1689,10 +1690,6 @@ ipc_right_copyin_check( { ipc_entry_bits_t bits; ipc_port_t port; -#if CONFIG_MACF_MACH - task_t self = current_task(); - int rc = 0; -#endif bits= entry->ie_bits; assert(is_active(space)); @@ -1701,54 +1698,21 @@ ipc_right_copyin_check( case MACH_MSG_TYPE_MAKE_SEND: if ((bits & MACH_PORT_TYPE_RECEIVE) == 0) return FALSE; - -#if CONFIG_MACF_MACH - port = (ipc_port_t) entry->ie_object; - ip_lock(port); - tasklabel_lock(self); - rc = mac_port_check_make_send(&self->maclabel, &port->ip_label); tasklabel_unlock(self); - ip_unlock(port); - if (rc) - return FALSE; -#endif break; case MACH_MSG_TYPE_MAKE_SEND_ONCE: if ((bits & MACH_PORT_TYPE_RECEIVE) == 0) return FALSE; - -#if CONFIG_MACF_MACH - port = (ipc_port_t) entry->ie_object; - ip_lock(port); - tasklabel_lock(self); - rc = mac_port_check_make_send_once(&self->maclabel, &port->ip_label); - tasklabel_unlock(self); - ip_unlock(port); - if (rc) - return FALSE; -#endif break; case MACH_MSG_TYPE_MOVE_RECEIVE: if ((bits & MACH_PORT_TYPE_RECEIVE) == 0) return FALSE; - -#if CONFIG_MACF_MACH - port = (ipc_port_t) entry->ie_object; - ip_lock(port); - tasklabel_lock(self); - rc = mac_port_check_move_receive(&self->maclabel, &port->ip_label); - tasklabel_unlock(self); - ip_unlock(port); - if (rc) - return FALSE; -#endif break; case MACH_MSG_TYPE_COPY_SEND: case MACH_MSG_TYPE_MOVE_SEND: case MACH_MSG_TYPE_MOVE_SEND_ONCE: { - boolean_t active; if (bits & MACH_PORT_TYPE_DEAD_NAME) break; @@ -1759,35 +1723,12 @@ ipc_right_copyin_check( port = (ipc_port_t) entry->ie_object; assert(port != IP_NULL); - ip_lock(port); - active = ip_active(port); -#if CONFIG_MACF_MACH - tasklabel_lock(self); - switch (msgt_name) { - case MACH_MSG_TYPE_COPY_SEND: - rc = mac_port_check_copy_send(&self->maclabel, - &port->ip_label); - break; - case MACH_MSG_TYPE_MOVE_SEND: - rc = mac_port_check_move_send(&self->maclabel, - &port->ip_label); - break; - case MACH_MSG_TYPE_MOVE_SEND_ONCE: - rc = mac_port_check_move_send_once(&self->maclabel, - &port->ip_label); - break; - default: - panic("ipc_right_copyin_check: strange rights"); - } - tasklabel_unlock(self); - if (rc) { - ip_unlock(port); - return FALSE; - } -#endif - ip_unlock(port); - - if (!active) { + /* + * active status peek to avoid checks that will be skipped + * on copyin for dead ports. Lock not held, so will not be + * atomic (but once dead, there's no going back). + */ + if (!ip_active(port)) { break; } @@ -1849,11 +1790,7 @@ ipc_right_copyin( { ipc_entry_bits_t bits; ipc_port_t port; -#if CONFIG_MACF_MACH - task_t self = current_task(); - int rc; -#endif - + *releasep = IP_NULL; #if IMPORTANCE_INHERITANCE @@ -1878,16 +1815,6 @@ ipc_right_copyin( assert(port->ip_receiver_name == name); assert(port->ip_receiver == space); -#if CONFIG_MACF_MACH - tasklabel_lock(self); - rc = mac_port_check_make_send(&self->maclabel, &port->ip_label); - tasklabel_unlock(self); - if (rc) { - ip_unlock(port); - return KERN_NO_ACCESS; - } -#endif - port->ip_mscount++; port->ip_srights++; ip_reference(port); @@ -1911,16 +1838,6 @@ ipc_right_copyin( assert(port->ip_receiver_name == name); assert(port->ip_receiver == space); -#if CONFIG_MACF_MACH - tasklabel_lock(self); - rc = mac_port_check_make_send_once(&self->maclabel, &port->ip_label); - tasklabel_unlock(self); - if (rc) { - ip_unlock(port); - return KERN_NO_ACCESS; - } -#endif - port->ip_sorights++; ip_reference(port); ip_unlock(port); @@ -1944,17 +1861,6 @@ ipc_right_copyin( assert(port->ip_receiver_name == name); assert(port->ip_receiver == space); -#if CONFIG_MACF_MACH - tasklabel_lock(self); - rc = mac_port_check_move_receive(&self->maclabel, - &port->ip_label); - tasklabel_unlock(self); - if (rc) { - ip_unlock(port); - return KERN_NO_ACCESS; - } -#endif - if (bits & MACH_PORT_TYPE_SEND) { assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND_RECEIVE); @@ -1990,7 +1896,7 @@ ipc_right_copyin( * destination port (see ipc_port_check_circularity()). */ if (port->ip_tempowner == 0) { - assert(port->ip_taskptr == 0); + assert(IIT_NULL == port->ip_imp_task); /* ports in limbo have to be tempowner */ port->ip_tempowner = 1; @@ -2027,16 +1933,6 @@ ipc_right_copyin( } /* port is locked and active */ -#if CONFIG_MACF_MACH - tasklabel_lock(self); - rc = mac_port_check_copy_send(&self->maclabel, &port->ip_label); - tasklabel_unlock(self); - if (rc) { - ip_unlock(port); - return KERN_NO_ACCESS; - } -#endif - if ((bits & MACH_PORT_TYPE_SEND) == 0) { assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND_ONCE); assert(port->ip_sorights > 0); @@ -2079,17 +1975,6 @@ ipc_right_copyin( } /* port is locked and active */ -#if CONFIG_MACF_MACH - tasklabel_lock (self); - rc = mac_port_check_copy_send (&self->maclabel, &port->ip_label); - tasklabel_unlock (self); - if (rc) - { - ip_unlock (port); - return KERN_NO_ACCESS; - } -#endif - if ((bits & MACH_PORT_TYPE_SEND) == 0) { assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND_ONCE); assert(port->ip_sorights > 0); @@ -2155,17 +2040,6 @@ ipc_right_copyin( } /* port is locked and active */ -#if CONFIG_MACF_MACH - tasklabel_lock (self); - rc = mac_port_check_copy_send (&self->maclabel, &port->ip_label); - tasklabel_unlock (self); - if (rc) - { - ip_unlock (port); - return KERN_NO_ACCESS; - } -#endif - if ((bits & MACH_PORT_TYPE_SEND_ONCE) == 0) { assert(bits & MACH_PORT_TYPE_SEND); assert(port->ip_srights > 0); @@ -2312,7 +2186,7 @@ ipc_right_copyin_undo( } /* - * Routine: ipc_right_copyin_two + * Routine: ipc_right_copyin_two_move_sends * Purpose: * Like ipc_right_copyin with MACH_MSG_TYPE_MOVE_SEND * and deadok == FALSE, except that this moves two @@ -2324,9 +2198,9 @@ ipc_right_copyin_undo( * KERN_SUCCESS Acquired an object. * KERN_INVALID_RIGHT Name doesn't denote correct right. */ - +static kern_return_t -ipc_right_copyin_two( +ipc_right_copyin_two_move_sends( ipc_space_t space, mach_port_name_t name, ipc_entry_t entry, @@ -2338,10 +2212,6 @@ ipc_right_copyin_two( mach_port_urefs_t urefs; ipc_port_t port; ipc_port_t request = IP_NULL; -#if CONFIG_MACF_MACH - task_t self = current_task(); - int rc; -#endif *releasep = IP_NULL; @@ -2365,16 +2235,6 @@ ipc_right_copyin_two( } /* port is locked and active */ -#if CONFIG_MACF_MACH - tasklabel_lock(self); - rc = mac_port_check_copy_send(&self->maclabel, &port->ip_label); - tasklabel_unlock(self); - if (rc) { - ip_unlock(port); - return KERN_NO_ACCESS; - } -#endif - assert(port->ip_srights > 0); if (urefs == 2) { @@ -2418,6 +2278,188 @@ ipc_right_copyin_two( return KERN_INVALID_RIGHT; } + +/* + * Routine: ipc_right_copyin_two + * Purpose: + * Like ipc_right_copyin with two dispositions, + * each of which results in a send or send-once right, + * and deadok = FALSE. + * Conditions: + * The space is write-locked and active. + * The object is returned with two refs/rights. + * Returns: + * KERN_SUCCESS Acquired an object. + * KERN_INVALID_RIGHT Name doesn't denote correct right(s). + * KERN_INVALID_CAPABILITY Name doesn't denote correct right for msgt_two. + */ +kern_return_t +ipc_right_copyin_two( + ipc_space_t space, + mach_port_name_t name, + ipc_entry_t entry, + mach_msg_type_name_t msgt_one, + mach_msg_type_name_t msgt_two, + ipc_object_t *objectp, + ipc_port_t *sorightp, + ipc_port_t *releasep) +{ + queue_head_t links_data; + queue_t links = &links_data; + kern_return_t kr; + +#if IMPORTANCE_INHERITANCE + int assertcnt = 0; +#endif + + queue_init(links); + + assert(MACH_MSG_TYPE_PORT_ANY_SEND(msgt_one)); + assert(MACH_MSG_TYPE_PORT_ANY_SEND(msgt_two)); + + + /* + * Pre-validate the second disposition is possible all by itself. + */ + if (!ipc_right_copyin_check(space, name, entry, msgt_two)) { + return KERN_INVALID_CAPABILITY; + } + + /* + * This is a little tedious to make atomic, because + * there are 25 combinations of valid dispositions. + * However, most are easy. + */ + + /* + * If either is move-sonce, then there must be an error. + */ + if (msgt_one == MACH_MSG_TYPE_MOVE_SEND_ONCE || + msgt_two == MACH_MSG_TYPE_MOVE_SEND_ONCE) { + return KERN_INVALID_RIGHT; + } + + if ((msgt_one == MACH_MSG_TYPE_MAKE_SEND) || + (msgt_one == MACH_MSG_TYPE_MAKE_SEND_ONCE) || + (msgt_two == MACH_MSG_TYPE_MAKE_SEND) || + (msgt_two == MACH_MSG_TYPE_MAKE_SEND_ONCE)) { + /* + * One of the dispositions needs a receive right. + * + * If the copyin below succeeds, we know the receive + * right is there (because the pre-validation of + * the second disposition already succeeded in our + * caller). + * + * Hence the port is not in danger of dying. + */ + ipc_object_t object_two; + +#if IMPORTANCE_INHERITANCE + kr = ipc_right_copyin(space, name, entry, + msgt_one, FALSE, + objectp, sorightp, releasep, + &assertcnt, links); + assert(assertcnt == 0); +#else + kr = ipc_right_copyin(space, name, entry, + msgt_one, FALSE, + objectp, sorightp, releasep, + links); +#endif /* IMPORTANCE_INHERITANCE */ + if (kr != KERN_SUCCESS) { + return kr; + } + + assert(IO_VALID(*objectp)); + assert(*sorightp == IP_NULL); + assert(*releasep == IP_NULL); + + /* + * Now copyin the second (previously validated) + * disposition. The result can't be a dead port, + * as no valid disposition can make us lose our + * receive right. + */ +#if IMPORTANCE_INHERITANCE + kr = ipc_right_copyin(space, name, entry, + msgt_two, FALSE, + &object_two, sorightp, releasep, + &assertcnt, links); + assert(assertcnt == 0); +#else + kr = ipc_right_copyin(space, name, entry, + msgt_two, FALSE, + &object_two, sorightp, releasep, + links); +#endif /* IMPORTANCE_INHERITANCE */ + assert(kr == KERN_SUCCESS); + assert(*sorightp == IP_NULL); + assert(*releasep == IP_NULL); + assert(object_two == *objectp); + assert(entry->ie_bits & MACH_PORT_TYPE_RECEIVE); + + } else if ((msgt_one == MACH_MSG_TYPE_MOVE_SEND) && + (msgt_two == MACH_MSG_TYPE_MOVE_SEND)) { + /* + * This is an easy case. Just use our + * handy-dandy special-purpose copyin call + * to get two send rights for the price of one. + */ + kr = ipc_right_copyin_two_move_sends(space, name, entry, + objectp, sorightp, + releasep); + if (kr != KERN_SUCCESS) { + return kr; + } + + } else { + mach_msg_type_name_t msgt_name; + + /* + * Must be either a single move-send and a + * copy-send, or two copy-send dispositions. + * Use the disposition with the greatest side + * effects for the actual copyin - then just + * duplicate the send right you get back. + */ + if (msgt_one == MACH_MSG_TYPE_MOVE_SEND || + msgt_two == MACH_MSG_TYPE_MOVE_SEND) { + msgt_name = MACH_MSG_TYPE_MOVE_SEND; + } else { + msgt_name = MACH_MSG_TYPE_COPY_SEND; + } + +#if IMPORTANCE_INHERITANCE + kr = ipc_right_copyin(space, name, entry, + msgt_name, FALSE, + objectp, sorightp, releasep, + &assertcnt, links); + assert(assertcnt == 0); +#else + kr = ipc_right_copyin(space, name, entry, + msgt_name, FALSE, + objectp, sorightp, releasep, + links); +#endif /* IMPORTANCE_INHERITANCE */ + if (kr != KERN_SUCCESS) { + return kr; + } + + /* + * Copy the right we got back. If it is dead now, + * that's OK. Neither right will be usable to send + * a message anyway. + */ + (void)ipc_port_copy_send((ipc_port_t)*objectp); + } + + assert(queue_empty(links)); + + return KERN_SUCCESS; +} + + /* * Routine: ipc_right_copyout * Purpose: @@ -2453,9 +2495,6 @@ ipc_right_copyout( { ipc_entry_bits_t bits; ipc_port_t port; -#if CONFIG_MACF_MACH - int rc; -#endif bits = entry->ie_bits; @@ -2472,19 +2511,6 @@ ipc_right_copyout( assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_NONE); assert(port->ip_sorights > 0); -#if CONFIG_MACF_MACH - if (space->is_task) { - tasklabel_lock(space->is_task); - rc = mac_port_check_hold_send_once(&space->is_task->maclabel, - &port->ip_label); - tasklabel_unlock(space->is_task); - - if (rc) { - ip_unlock(port); - return KERN_NO_ACCESS; - } - } -#endif /* transfer send-once right and ref to entry */ ip_unlock(port); @@ -2495,20 +2521,6 @@ ipc_right_copyout( case MACH_MSG_TYPE_PORT_SEND: assert(port->ip_srights > 0); -#if CONFIG_MACF_MACH - if (space->is_task) { - tasklabel_lock(space->is_task); - rc = mac_port_check_hold_send(&space->is_task->maclabel, - &port->ip_label); - tasklabel_unlock(space->is_task); - - if (rc) { - ip_unlock(port); - return KERN_NO_ACCESS; - } - } -#endif - if (bits & MACH_PORT_TYPE_SEND) { mach_port_urefs_t urefs = IE_BITS_UREFS(bits); @@ -2569,20 +2581,6 @@ ipc_right_copyout( assert(port->ip_receiver_name == MACH_PORT_NULL); dest = port->ip_destination; -#if CONFIG_MACF_MACH - if (space->is_task) { - tasklabel_lock(space->is_task); - rc = mac_port_check_hold_receive(&space->is_task->maclabel, - &port->ip_label); - tasklabel_unlock(space->is_task); - - if (rc) { - ip_unlock(port); - return KERN_NO_ACCESS; - } - } -#endif - port->ip_receiver_name = name; port->ip_receiver = space; @@ -2619,8 +2617,7 @@ ipc_right_copyout( * getting enqueued. */ ip_lock(dest); - assert(dest->ip_impcount >= assertcnt); - dest->ip_impcount -= assertcnt; + ipc_port_impcount_delta(dest, 0 - assertcnt, IP_NULL); ip_unlock(dest); #endif /* IMPORTANCE_INHERITANCE */ ip_release(dest); diff --git a/osfmk/ipc/ipc_right.h b/osfmk/ipc/ipc_right.h index b844f26fa..b42be8d21 100644 --- a/osfmk/ipc/ipc_right.h +++ b/osfmk/ipc/ipc_right.h @@ -203,11 +203,13 @@ extern void ipc_right_copyin_undo( ipc_object_t object, ipc_port_t soright); -/* Copyin two send rights from a space */ +/* Copyin a pair of dispositions from a space */ extern kern_return_t ipc_right_copyin_two( ipc_space_t space, mach_port_name_t name, ipc_entry_t entry, + mach_msg_type_name_t msgt_one, + mach_msg_type_name_t msgt_two, ipc_object_t *objectp, ipc_port_t *sorightp, ipc_port_t *releasep); diff --git a/osfmk/ipc/ipc_space.c b/osfmk/ipc/ipc_space.c index b7152e720..7d0305e32 100644 --- a/osfmk/ipc/ipc_space.c +++ b/osfmk/ipc/ipc_space.c @@ -164,6 +164,7 @@ ipc_space_create( is_lock_init(space); space->is_bits = 2; /* 2 refs, active, not growing */ space->is_table_size = new_size; + space->is_table_free = new_size - 1; space->is_table = table; space->is_table_next = initial+1; space->is_task = NULL; @@ -332,6 +333,7 @@ ipc_space_terminate( it_entries_free(space->is_table_next-1, table); space->is_table_size = 0; + space->is_table_free = 0; /* * Because the space is now dead, diff --git a/osfmk/ipc/ipc_space.h b/osfmk/ipc/ipc_space.h index b98eba367..ff7588ed0 100644 --- a/osfmk/ipc/ipc_space.h +++ b/osfmk/ipc/ipc_space.h @@ -84,7 +84,7 @@ #if MACH_KERNEL_PRIVATE #include #include -#include +#include #include #include #include @@ -114,6 +114,7 @@ struct ipc_space { lck_spin_t is_lock_data; ipc_space_refs_t is_bits; /* holds refs, active, growing */ ipc_entry_num_t is_table_size; /* current size of table */ + ipc_entry_num_t is_table_free; /* count of free elements */ ipc_entry_t is_table; /* an array of entries */ task_t is_task; /* associated task */ struct ipc_table_size *is_table_next; /* info for larger table */ diff --git a/osfmk/ipc/ipc_table.c b/osfmk/ipc/ipc_table.c index 1add1f5b3..67086488c 100644 --- a/osfmk/ipc/ipc_table.c +++ b/osfmk/ipc/ipc_table.c @@ -129,7 +129,7 @@ ipc_table_fill( index++; } } - if (incrsize < (PAGE_SIZE << 3)) + if (incrsize < (vm_size_t)(PAGE_SIZE << 3)) incrsize <<= 1; } } diff --git a/osfmk/ipc/ipc_types.h b/osfmk/ipc/ipc_types.h index b601a2e69..26cd73edc 100644 --- a/osfmk/ipc/ipc_types.h +++ b/osfmk/ipc/ipc_types.h @@ -72,6 +72,16 @@ typedef struct ipc_kmsg *ipc_kmsg_t; typedef void (*mach_msg_continue_t)(mach_msg_return_t); /* after wakeup */ #define MACH_MSG_CONTINUE_NULL ((mach_msg_continue_t) 0) +typedef struct ipc_importance_elem *ipc_importance_elem_t; +#define IIE_NULL ((ipc_importance_elem_t)0) + +typedef struct ipc_importance_task *ipc_importance_task_t; +#define IIT_NULL ((ipc_importance_task_t)0) + +typedef struct ipc_importance_inherit *ipc_importance_inherit_t; +#define III_NULL ((ipc_importance_inherit_t)0) + + #else /* MACH_KERNEL_PRIVATE */ struct ipc_object ; diff --git a/osfmk/ipc/ipc_voucher.c b/osfmk/ipc/ipc_voucher.c new file mode 100644 index 000000000..36e77dfd5 --- /dev/null +++ b/osfmk/ipc/ipc_voucher.c @@ -0,0 +1,2977 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +/* + * Sysctl variable; enable and disable tracing of voucher contents + */ +uint32_t ipc_voucher_trace_contents = 0; + +static zone_t ipc_voucher_zone; +static zone_t ipc_voucher_attr_control_zone; + +/* + * Voucher hash table + */ +#define IV_HASH_BUCKETS 127 +#define IV_HASH_BUCKET(x) ((x) % IV_HASH_BUCKETS) + +static queue_head_t ivht_bucket[IV_HASH_BUCKETS]; +static lck_spin_t ivht_lock_data; +static uint32_t ivht_count = 0; + +#define ivht_lock_init() \ + lck_spin_init(&ivht_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define ivht_lock_destroy() \ + lck_spin_destroy(&ivht_lock_data, &ipc_lck_grp) +#define ivht_lock() \ + lck_spin_lock(&ivht_lock_data) +#define ivht_lock_try() \ + lck_spin_try_lock(&ivht_lock_data) +#define ivht_unlock() \ + lck_spin_unlock(&ivht_lock_data) + +/* + * Global table of resource manager registrations + * + * NOTE: For now, limited to well-known resource managers + * eventually, will include dynamic allocations requiring + * table growth and hashing by key. + */ +static iv_index_t ivgt_keys_in_use = MACH_VOUCHER_ATTR_KEY_NUM_WELL_KNOWN; +static ipc_voucher_global_table_element iv_global_table[MACH_VOUCHER_ATTR_KEY_NUM_WELL_KNOWN]; +static lck_spin_t ivgt_lock_data; + +#define ivgt_lock_init() \ + lck_spin_init(&ivgt_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define ivgt_lock_destroy() \ + lck_spin_destroy(&ivgt_lock_data, &ipc_lck_grp) +#define ivgt_lock() \ + lck_spin_lock(&ivgt_lock_data) +#define ivgt_lock_try() \ + lck_spin_try_lock(&ivgt_lock_data) +#define ivgt_unlock() \ + lck_spin_unlock(&ivgt_lock_data) + +ipc_voucher_t iv_alloc(iv_index_t entries); +void iv_dealloc(ipc_voucher_t iv, boolean_t unhash); + +static inline iv_refs_t +iv_reference(ipc_voucher_t iv) +{ + iv_refs_t refs; + + refs = hw_atomic_add(&iv->iv_refs, 1); + return refs; +} + +static inline void +iv_release(ipc_voucher_t iv) +{ + iv_refs_t refs; + + assert(0 < iv->iv_refs); + refs = hw_atomic_sub(&iv->iv_refs, 1); + if (0 == refs) + iv_dealloc(iv, TRUE); +} + +/* + * freelist helper macros + */ +#define IV_FREELIST_END ((iv_index_t) 0) + +/* + * Attribute value hashing helper macros + */ +#define IV_HASH_END UINT32_MAX +#define IV_HASH_VAL(sz, val) \ + (((val) >> 3) % (sz)) + +static inline iv_index_t +iv_hash_value( + iv_index_t key_index, + mach_voucher_attr_value_handle_t value) +{ + ipc_voucher_attr_control_t ivac; + + ivac = iv_global_table[key_index].ivgte_control; + assert(IVAC_NULL != ivac); + return IV_HASH_VAL(ivac->ivac_init_table_size, value); +} + +/* + * Convert a key to an index. This key-index is used to both index + * into the voucher table of attribute cache indexes and also the + * table of resource managers by key. + * + * For now, well-known keys have a one-to-one mapping of indexes + * into these tables. But as time goes on, that may not always + * be the case (sparse use over time). This isolates the code from + * having to change in these cases - yet still lets us keep a densely + * packed set of tables. + */ +static inline iv_index_t +iv_key_to_index(mach_voucher_attr_key_t key) +{ + if (MACH_VOUCHER_ATTR_KEY_ALL == key || + MACH_VOUCHER_ATTR_KEY_NUM_WELL_KNOWN < key) + return IV_UNUSED_KEYINDEX; + return (iv_index_t)key - 1; +} + +static inline mach_voucher_attr_key_t +iv_index_to_key(iv_index_t key_index) +{ + if (MACH_VOUCHER_ATTR_KEY_NUM_WELL_KNOWN > key_index) + return iv_global_table[key_index].ivgte_key; + return MACH_VOUCHER_ATTR_KEY_NONE; + +} + +static void ivace_release(iv_index_t key_index, iv_index_t value_index); +static void ivace_lookup_values(iv_index_t key_index, iv_index_t value_index, + mach_voucher_attr_value_handle_array_t values, + mach_voucher_attr_value_handle_array_size_t *count); + +static iv_index_t iv_lookup(ipc_voucher_t, iv_index_t); + + +static void ivgt_lookup(iv_index_t, + boolean_t, + ipc_voucher_attr_manager_t *, + ipc_voucher_attr_control_t *); + + +#if defined(MACH_VOUCHER_ATTR_KEY_USER_DATA) || defined(MACH_VOUCHER_ATTR_KEY_TEST) +void user_data_attr_manager_init(void); +#endif + +void +ipc_voucher_init(void) +{ + natural_t ipc_voucher_max = (task_max + thread_max) * 2; + natural_t attr_manager_max = MACH_VOUCHER_ATTR_KEY_NUM_WELL_KNOWN; + iv_index_t i; + + ipc_voucher_zone = zinit(sizeof(struct ipc_voucher), + ipc_voucher_max * sizeof(struct ipc_voucher), + sizeof(struct ipc_voucher), + "ipc vouchers"); + zone_change(ipc_voucher_zone, Z_NOENCRYPT, TRUE); + + ipc_voucher_attr_control_zone = zinit(sizeof(struct ipc_voucher_attr_control), + attr_manager_max * sizeof(struct ipc_voucher_attr_control), + sizeof(struct ipc_voucher_attr_control), + "ipc voucher attr controls"); + zone_change(ipc_voucher_attr_control_zone, Z_NOENCRYPT, TRUE); + + /* initialize voucher hash */ + ivht_lock_init(); + for (i = 0; i < IV_HASH_BUCKETS; i++) + queue_init(&ivht_bucket[i]); + + /* initialize global table locking */ + ivgt_lock_init(); + +#if defined(MACH_VOUCHER_ATTR_KEY_USER_DATA) || defined(MACH_VOUCHER_ATTR_KEY_TEST) + user_data_attr_manager_init(); +#endif +} + +ipc_voucher_t +iv_alloc(iv_index_t entries) +{ + ipc_voucher_t iv; + iv_index_t i; + + + iv = (ipc_voucher_t)zalloc(ipc_voucher_zone); + if (IV_NULL == iv) + return IV_NULL; + + iv->iv_refs = 1; + iv->iv_sum = 0; + iv->iv_hash = 0; + iv->iv_port = IP_NULL; + + if (entries > IV_ENTRIES_INLINE) { + iv_entry_t table; + + /* TODO - switch to ipc_table method of allocation */ + table = (iv_entry_t) kalloc(sizeof(*table) * entries); + if (IVE_NULL == table) { + zfree(ipc_voucher_zone, iv); + return IV_NULL; + } + iv->iv_table = table; + iv->iv_table_size = entries; + } else { + iv->iv_table = iv->iv_inline_table; + iv->iv_table_size = IV_ENTRIES_INLINE; + } + + /* initialize the table entries */ + for (i=0; i < iv->iv_table_size; i++) + iv->iv_table[i] = IV_UNUSED_VALINDEX; + + return (iv); +} + +/* + * Routine: iv_set + * Purpose: + * Set the voucher's value index for a given key index. + * Conditions: + * This is only called during voucher creation, as + * they are immutable once references are distributed. + */ +static void +iv_set(ipc_voucher_t iv, + iv_index_t key_index, + iv_index_t value_index) +{ + assert(key_index < iv->iv_table_size); + iv->iv_table[key_index] = value_index; +} + +void +iv_dealloc(ipc_voucher_t iv, boolean_t unhash) +{ + ipc_port_t port = iv->iv_port; + natural_t i; + + /* + * Do we have to remove it from the hash? + */ + if (unhash) { + ivht_lock(); + assert(0 == iv->iv_refs); + assert(IV_HASH_BUCKETS > iv->iv_hash); + queue_remove(&ivht_bucket[iv->iv_hash], iv, ipc_voucher_t, iv_hash_link); + ivht_count--; + ivht_unlock(); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC,MACH_IPC_VOUCHER_DESTROY) | DBG_FUNC_NONE, + VM_KERNEL_ADDRPERM((uintptr_t)iv), 0, ivht_count, 0, 0); + + } else + assert(0 == --iv->iv_refs); + + /* + * if a port was allocated for this voucher, + * it must not have any remaining send rights, + * because the port's reference on the voucher + * is gone. We can just discard it now. + */ + if (IP_VALID(port)) { + assert(ip_active(port)); + assert(port->ip_srights == 0); + + ipc_port_dealloc_kernel(port); + } + + /* release the attribute references held by this voucher */ + for (i = 0; i < iv->iv_table_size; i++) { + ivace_release(i, iv->iv_table[i]); +#if MACH_ASSERT + iv_set(iv, i, ~0); +#endif + } + + if (iv->iv_table != iv->iv_inline_table) + kfree(iv->iv_table, + iv->iv_table_size * sizeof(*iv->iv_table)); + + zfree(ipc_voucher_zone, iv); +} + +/* + * Routine: iv_lookup + * Purpose: + * Find the voucher's value index for a given key_index + * Conditions: + * Vouchers are immutable, so no locking required to do + * a lookup. + */ +static inline iv_index_t +iv_lookup(ipc_voucher_t iv, iv_index_t key_index) +{ + if (key_index < iv->iv_table_size) + return iv->iv_table[key_index]; + return IV_UNUSED_VALINDEX; +} + +/* + * Routine: unsafe_convert_port_to_voucher + * Purpose: + * Unsafe conversion of a port to a voucher. + * Intended only for use by trace and debugging + * code. Consumes nothing, validates very little, + * produces an unreferenced voucher, which you + * MAY NOT use as a voucher, only log as an + * address. + * Conditions: + * Caller has a send-right reference to port. + * Port may or may not be locked. + */ +uintptr_t +unsafe_convert_port_to_voucher( + ipc_port_t port) +{ + if (IP_VALID(port)) { + uintptr_t voucher = (uintptr_t) port->ip_kobject; + + /* + * No need to lock because we have a reference on the + * port, and if it is a true voucher port, that reference + * keeps the voucher bound to the port (and active). + */ + if (ip_kotype(port) == IKOT_VOUCHER) + return (voucher); + } + return (uintptr_t)IV_NULL; +} + +/* + * Routine: convert_port_to_voucher + * Purpose: + * Convert from a port to a voucher. + * Doesn't consume the port [send-right] ref; + * produces a voucher ref, which may be null. + * Conditions: + * Caller has a send-right reference to port. + * Port may or may not be locked. + */ +ipc_voucher_t +convert_port_to_voucher( + ipc_port_t port) +{ + if (IP_VALID(port)) { + ipc_voucher_t voucher = (ipc_voucher_t) port->ip_kobject; + + /* + * No need to lock because we have a reference on the + * port, and if it is a true voucher port, that reference + * keeps the voucher bound to the port (and active). + */ + if (ip_kotype(port) != IKOT_VOUCHER) + return IV_NULL; + + assert(ip_active(port)); + + ipc_voucher_reference(voucher); + return (voucher); + } + return IV_NULL; +} + +/* + * Routine: convert_port_name_to_voucher + * Purpose: + * Convert from a port name in the current space to a voucher. + * Produces a voucher ref, which may be null. + * Conditions: + * Nothing locked. + */ + +ipc_voucher_t +convert_port_name_to_voucher( + mach_port_name_t voucher_name) +{ + ipc_voucher_t iv; + kern_return_t kr; + ipc_port_t port; + + if (MACH_PORT_VALID(voucher_name)) { + kr = ipc_port_translate_send(current_space(), voucher_name, &port); + if (KERN_SUCCESS != kr) + return IV_NULL; + + iv = convert_port_to_voucher(port); + ip_unlock(port); + return iv; + } + return IV_NULL; +} + + +void +ipc_voucher_reference(ipc_voucher_t voucher) +{ + iv_refs_t refs; + + if (IPC_VOUCHER_NULL == voucher) + return; + + refs = iv_reference(voucher); + assert(1 < refs); +} + +void +ipc_voucher_release(ipc_voucher_t voucher) +{ + if (IPC_VOUCHER_NULL != voucher) + iv_release(voucher); +} + +/* + * Routine: ipc_voucher_notify + * Purpose: + * Called whenever the Mach port system detects no-senders + * on the voucher port. + * + * Each time the send-right count goes positive, a no-senders + * notification is armed (and a voucher reference is donated). + * So, each notification that comes in must release a voucher + * reference. If more send rights have been added since it + * fired (asynchronously), they will be protected by a different + * reference hold. + */ +void +ipc_voucher_notify(mach_msg_header_t *msg) +{ + mach_no_senders_notification_t *notification = (void *)msg; + ipc_port_t port = notification->not_header.msgh_remote_port; + ipc_voucher_t iv; + + assert(ip_active(port)); + assert(IKOT_VOUCHER == ip_kotype(port)); + iv = (ipc_voucher_t)port->ip_kobject; + + ipc_voucher_release(iv); +} + +/* + * Convert a voucher to a port. + */ +ipc_port_t +convert_voucher_to_port(ipc_voucher_t voucher) +{ + ipc_port_t port, send; + + if (IV_NULL == voucher) + return (IP_NULL); + + assert(0 < voucher->iv_refs); + + /* create a port if needed */ + port = voucher->iv_port; + if (!IP_VALID(port)) { + port = ipc_port_alloc_kernel(); + assert(IP_VALID(port)); + ipc_kobject_set_atomically(port, (ipc_kobject_t) voucher, IKOT_VOUCHER); + + /* If we lose the race, deallocate and pick up the other guy's port */ + if (!OSCompareAndSwapPtr(IP_NULL, port, &voucher->iv_port)) { + ipc_port_dealloc_kernel(port); + port = voucher->iv_port; + assert(ip_kotype(port) == IKOT_VOUCHER); + assert(port->ip_kobject == (ipc_kobject_t)voucher); + } + } + + ip_lock(port); + assert(ip_active(port)); + send = ipc_port_make_send_locked(port); + + if (1 == port->ip_srights) { + ipc_port_t old_notify; + + /* transfer our ref to the port, and arm the no-senders notification */ + assert(IP_NULL == port->ip_nsrequest); + ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify); + /* port unlocked */ + assert(IP_NULL == old_notify); + } else { + /* piggyback on the existing port reference, so consume ours */ + ip_unlock(port); + ipc_voucher_release(voucher); + } + return (send); +} + +#define ivace_reset_data(ivace_elem, next_index) { \ + (ivace_elem)->ivace_value = 0xDEADC0DEDEADC0DE; \ + (ivace_elem)->ivace_refs = 0; \ + (ivace_elem)->ivace_made = 0; \ + (ivace_elem)->ivace_free = TRUE; \ + (ivace_elem)->ivace_releasing = FALSE; \ + (ivace_elem)->ivace_layered = 0; \ + (ivace_elem)->ivace_index = IV_HASH_END; \ + (ivace_elem)->ivace_next = (next_index); \ +} + +#define ivace_copy_data(ivace_src_elem, ivace_dst_elem) { \ + (ivace_dst_elem)->ivace_value = (ivace_src_elem)->ivace_value; \ + (ivace_dst_elem)->ivace_refs = (ivace_src_elem)->ivace_refs; \ + (ivace_dst_elem)->ivace_made = (ivace_src_elem)->ivace_made; \ + (ivace_dst_elem)->ivace_free = (ivace_src_elem)->ivace_free; \ + (ivace_dst_elem)->ivace_layered = (ivace_src_elem)->ivace_layered; \ + (ivace_dst_elem)->ivace_releasing = (ivace_src_elem)->ivace_releasing; \ + (ivace_dst_elem)->ivace_index = (ivace_src_elem)->ivace_index; \ + (ivace_dst_elem)->ivace_next = (ivace_src_elem)->ivace_next; \ +} + +ipc_voucher_attr_control_t +ivac_alloc(iv_index_t key_index) +{ + ipc_voucher_attr_control_t ivac; + ivac_entry_t table; + natural_t i; + + + ivac = (ipc_voucher_attr_control_t)zalloc(ipc_voucher_attr_control_zone); + if (IVAC_NULL == ivac) + return IVAC_NULL; + + ivac->ivac_refs = 1; + ivac->ivac_is_growing = FALSE; + ivac->ivac_port = IP_NULL; + + /* start with just the inline table */ + table = (ivac_entry_t) kalloc(IVAC_ENTRIES_MIN * sizeof(ivac_entry)); + ivac->ivac_table = table; + ivac->ivac_table_size = IVAC_ENTRIES_MIN; + ivac->ivac_init_table_size = IVAC_ENTRIES_MIN; + for (i = 0; i < ivac->ivac_table_size; i++) { + ivace_reset_data(&table[i], i+1); + } + + /* the default table entry is never on freelist */ + table[0].ivace_next = IV_HASH_END; + table[0].ivace_free = FALSE; + table[i-1].ivace_next = IV_FREELIST_END; + ivac->ivac_freelist = 1; + ivac_lock_init(ivac); + ivac->ivac_key_index = key_index; + return (ivac); +} + + +void +ivac_dealloc(ipc_voucher_attr_control_t ivac) +{ + ipc_voucher_attr_manager_t ivam = IVAM_NULL; + iv_index_t key_index = ivac->ivac_key_index; + ipc_port_t port = ivac->ivac_port; + natural_t i; + + /* + * If the control is in the global table, we + * have to remove it from there before we (re)confirm + * that the reference count is still zero. + */ + ivgt_lock(); + if (ivac->ivac_refs > 0) { + ivgt_unlock(); + return; + } + + /* take it out of the global table */ + if (iv_global_table[key_index].ivgte_control == ivac) { + ivam = iv_global_table[key_index].ivgte_manager; + iv_global_table[key_index].ivgte_manager = IVAM_NULL; + iv_global_table[key_index].ivgte_control = IVAC_NULL; + iv_global_table[key_index].ivgte_key = MACH_VOUCHER_ATTR_KEY_NONE; + } + ivgt_unlock(); + + /* release the reference held on the resource manager */ + if (IVAM_NULL != ivam) + (ivam->ivam_release)(ivam); + + /* + * if a port was allocated for this voucher, + * it must not have any remaining send rights, + * because the port's reference on the voucher + * is gone. We can just discard it now. + */ + if (IP_VALID(port)) { + assert(ip_active(port)); + assert(port->ip_srights == 0); + + ipc_port_dealloc_kernel(port); + } + + /* + * the resource manager's control reference and all references + * held by the specific value caches are gone, so free the + * table. + */ +#ifdef MACH_DEBUG + for (i = 0; i < ivac->ivac_table_size; i++) + if (ivac->ivac_table[i].ivace_refs != 0) + panic("deallocing a resource manager with live refs to its attr values\n"); +#endif + kfree(ivac->ivac_table, ivac->ivac_table_size * sizeof(*ivac->ivac_table)); + ivac_lock_destroy(ivac); + zfree(ipc_voucher_attr_control_zone, ivac); +} + +void +ipc_voucher_attr_control_reference(ipc_voucher_attr_control_t control) +{ + ivac_reference(control); +} + +void +ipc_voucher_attr_control_release(ipc_voucher_attr_control_t control) +{ + ivac_release(control); +} + +/* + * Routine: convert_port_to_voucher_attr_control reference + * Purpose: + * Convert from a port to a voucher attribute control. + * Doesn't consume the port ref; produces a voucher ref, + * which may be null. + * Conditions: + * Nothing locked. + */ +ipc_voucher_attr_control_t +convert_port_to_voucher_attr_control( + ipc_port_t port) +{ + if (IP_VALID(port)) { + ipc_voucher_attr_control_t ivac = (ipc_voucher_attr_control_t) port->ip_kobject; + + /* + * No need to lock because we have a reference on the + * port, and if it is a true voucher control port, + * that reference keeps the voucher bound to the port + * (and active). + */ + if (ip_kotype(port) != IKOT_VOUCHER_ATTR_CONTROL) + return IVAC_NULL; + + assert(ip_active(port)); + + ivac_reference(ivac); + return (ivac); + } + return IVAC_NULL; +} + +void +ipc_voucher_attr_control_notify(mach_msg_header_t *msg) +{ + mach_no_senders_notification_t *notification = (void *)msg; + ipc_port_t port = notification->not_header.msgh_remote_port; + ipc_voucher_attr_control_t ivac; + + assert(IKOT_VOUCHER_ATTR_CONTROL == ip_kotype(port)); + ip_lock(port); + assert(ip_active(port)); + + /* if no new send rights, drop a control reference */ + if (port->ip_mscount == notification->not_count) { + ivac = (ipc_voucher_attr_control_t)port->ip_kobject; + ip_unlock(port); + + ivac_release(ivac); + } + ip_unlock(port); +} + +/* + * Convert a voucher attr control to a port. + */ +ipc_port_t +convert_voucher_attr_control_to_port(ipc_voucher_attr_control_t control) +{ + ipc_port_t port, send; + + if (IVAC_NULL == control) + return (IP_NULL); + + /* create a port if needed */ + port = control->ivac_port; + if (!IP_VALID(port)) { + port = ipc_port_alloc_kernel(); + assert(IP_VALID(port)); + if (OSCompareAndSwapPtr(IP_NULL, port, &control->ivac_port)) { + ip_lock(port); + ipc_kobject_set_atomically(port, (ipc_kobject_t) control, IKOT_VOUCHER_ATTR_CONTROL); + } else { + ipc_port_dealloc_kernel(port); + port = control->ivac_port; + ip_lock(port); + assert(ip_kotype(port) == IKOT_VOUCHER_ATTR_CONTROL); + assert(port->ip_kobject == (ipc_kobject_t)control); + } + } else + ip_lock(port); + + assert(ip_active(port)); + send = ipc_port_make_send_locked(port); + + if (1 == port->ip_srights) { + ipc_port_t old_notify; + + /* transfer our ref to the port, and arm the no-senders notification */ + assert(IP_NULL == port->ip_nsrequest); + ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify); + assert(IP_NULL == old_notify); + ip_unlock(port); + } else { + /* piggyback on the existing port reference, so consume ours */ + ip_unlock(port); + ivac_release(control); + } + return (send); +} + +/* + * Look up the values for a given pair. + */ +static void +ivace_lookup_values( + iv_index_t key_index, + iv_index_t value_index, + mach_voucher_attr_value_handle_array_t values, + mach_voucher_attr_value_handle_array_size_t *count) +{ + ipc_voucher_attr_control_t ivac; + ivac_entry_t ivace; + + if (IV_UNUSED_VALINDEX == value_index || + MACH_VOUCHER_ATTR_KEY_NUM_WELL_KNOWN <= key_index) { + *count = 0; + return; + } + + ivac = iv_global_table[key_index].ivgte_control; + assert(IVAC_NULL != ivac); + + /* + * Get the entry and then the linked values. + */ + ivac_lock(ivac); + assert(value_index < ivac->ivac_table_size); + ivace = &ivac->ivac_table[value_index]; + + /* + * TODO: support chained values (for effective vouchers). + */ + assert(ivace->ivace_refs > 0); + values[0] = ivace->ivace_value; + ivac_unlock(ivac); + *count = 1; +} + +/* + * ivac_grow_table - Allocate a bigger table of attribute values + * + * Conditions: ivac is locked on entry and again on return + */ +static void +ivac_grow_table(ipc_voucher_attr_control_t ivac) +{ + iv_index_t i = 0; + + /* NOTE: do not modify *_table and *_size values once set */ + ivac_entry_t new_table = NULL, old_table = NULL; + iv_index_t new_size, old_size; + + if (ivac->ivac_is_growing) { + ivac_sleep(ivac); + return; + } + + ivac->ivac_is_growing = 1; + if (ivac->ivac_table_size >= IVAC_ENTRIES_MAX) { + panic("Cannot grow ipc space beyond IVAC_ENTRIES_MAX. Some process is leaking vouchers"); + } + + old_size = ivac->ivac_table_size; + ivac_unlock(ivac); + + /* + * if initial size is not leading to page aligned allocations, + * set new_size such that new_size * sizeof(ivac_entry) is page aligned. + */ + + if ((old_size * sizeof(ivac_entry)) & PAGE_MASK){ + new_size = (iv_index_t)round_page((old_size * sizeof(ivac_entry)))/(sizeof (ivac_entry)); + } else { + new_size = old_size * 2; + } + + assert(new_size > old_size); + new_table = kalloc(sizeof(ivac_entry) * new_size); + if (!new_table){ + panic("Failed to grow ivac table to size %d\n", new_size); + return; + } + + /* setup the free list for new entries */ + for (i = old_size; i < new_size; i++) { + ivace_reset_data(&new_table[i], i+1); + } + + ivac_lock(ivac); + + for (i = 0; i < ivac->ivac_table_size; i++){ + ivace_copy_data(&ivac->ivac_table[i], &new_table[i]); + } + + old_table = ivac->ivac_table; + + ivac->ivac_table = new_table; + ivac->ivac_table_size = new_size; + + /* adding new free entries at head of freelist */ + ivac->ivac_table[new_size - 1].ivace_next = ivac->ivac_freelist; + ivac->ivac_freelist = old_size; + ivac->ivac_is_growing = 0; + ivac_wakeup(ivac); + + if (old_table){ + ivac_unlock(ivac); + kfree(old_table, old_size * sizeof(ivac_entry)); + ivac_lock(ivac); + } +} + +/* + * ivace_reference_by_index + * + * Take an additional reference on the + * cached value. It is assumed the caller already holds a + * reference to the same cached key-value pair. + */ +static void +ivace_reference_by_index( + iv_index_t key_index, + iv_index_t val_index) +{ + ipc_voucher_attr_control_t ivac; + ivac_entry_t ivace; + + if (IV_UNUSED_VALINDEX == val_index) + return; + + ivgt_lookup(key_index, FALSE, NULL, &ivac); + assert(IVAC_NULL != ivac); + + ivac_lock(ivac); + assert(val_index < ivac->ivac_table_size); + ivace = &ivac->ivac_table[val_index]; + + assert(0xdeadc0dedeadc0de != ivace->ivace_value); + assert(0 < ivace->ivace_refs); + assert(!ivace->ivace_free); + ivace->ivace_refs++; + ivac_unlock(ivac); +} + + +/* + * Look up the values for a given pair. + * + * Consumes a reference on the passed voucher control. + * Either it is donated to a newly-created value cache + * or it is released (if we piggy back on an existing + * value cache entry). + */ +static iv_index_t +ivace_reference_by_value( + ipc_voucher_attr_control_t ivac, + mach_voucher_attr_value_handle_t value) +{ + ivac_entry_t ivace = IVACE_NULL; + iv_index_t hash_index; + iv_index_t index; + + if (IVAC_NULL == ivac) { + return IV_UNUSED_VALINDEX; + } + + ivac_lock(ivac); +restart: + hash_index = IV_HASH_VAL(ivac->ivac_init_table_size, value); + index = ivac->ivac_table[hash_index].ivace_index; + while (index != IV_HASH_END) { + assert(index < ivac->ivac_table_size); + ivace = &ivac->ivac_table[index]; + assert(!ivace->ivace_free); + + if (ivace->ivace_value == value) + break; + + assert(ivace->ivace_next != index); + index = ivace->ivace_next; + } + + /* found it? */ + if (index != IV_HASH_END) { + /* only add reference on non-default value */ + if (IV_UNUSED_VALINDEX != index) { + ivace->ivace_refs++; + ivace->ivace_made++; + } + + ivac_unlock(ivac); + ivac_release(ivac); + return index; + } + + /* insert new entry in the table */ + index = ivac->ivac_freelist; + if (IV_FREELIST_END == index) { + /* freelist empty */ + ivac_grow_table(ivac); + goto restart; + } + + /* take the entry off the freelist */ + ivace = &ivac->ivac_table[index]; + ivac->ivac_freelist = ivace->ivace_next; + + /* initialize the new entry */ + ivace->ivace_value = value; + ivace->ivace_refs = 1; + ivace->ivace_made = 1; + ivace->ivace_free = FALSE; + + /* insert the new entry in the proper hash chain */ + ivace->ivace_next = ivac->ivac_table[hash_index].ivace_index; + ivac->ivac_table[hash_index].ivace_index = index; + ivac_unlock(ivac); + + /* donated passed in ivac reference to new entry */ + + return index; +} + +/* + * Release a reference on the given pair. + * + * Conditions: called with nothing locked, as it may cause + * callouts and/or messaging to the resource + * manager. + */ +static void ivace_release( + iv_index_t key_index, + iv_index_t value_index) +{ + ipc_voucher_attr_control_t ivac; + ipc_voucher_attr_manager_t ivam; + mach_voucher_attr_value_handle_t value; + mach_voucher_attr_value_reference_t made; + mach_voucher_attr_key_t key; + iv_index_t hash_index; + ivac_entry_t ivace; + kern_return_t kr; + + /* cant release the default value */ + if (IV_UNUSED_VALINDEX == value_index) + return; + + ivgt_lookup(key_index, FALSE, &ivam, &ivac); + assert(IVAC_NULL != ivac); + assert(IVAM_NULL != ivam); + + ivac_lock(ivac); + assert(value_index < ivac->ivac_table_size); + ivace = &ivac->ivac_table[value_index]; + + assert(0 < ivace->ivace_refs); + + if (0 < --ivace->ivace_refs) { + ivac_unlock(ivac); + return; + } + + key = iv_index_to_key(key_index); + assert(MACH_VOUCHER_ATTR_KEY_NONE != key); + + /* + * if last return reply is still pending, + * let it handle this later return when + * the previous reply comes in. + */ + if (ivace->ivace_releasing) { + ivac_unlock(ivac); + return; + } + + /* claim releasing */ + ivace->ivace_releasing = TRUE; + value = ivace->ivace_value; + + redrive: + assert(value == ivace->ivace_value); + assert(!ivace->ivace_free); + made = ivace->ivace_made; + ivac_unlock(ivac); + + /* callout to manager's release_value */ + kr = (ivam->ivam_release_value)(ivam, key, value, made); + + /* recalculate entry address as table may have changed */ + ivac_lock(ivac); + ivace = &ivac->ivac_table[value_index]; + assert(value == ivace->ivace_value); + + /* + * new made values raced with this return. If the + * manager OK'ed the prior release, we have to start + * the made numbering over again (pretend the race + * didn't happen). If the entry has zero refs again, + * re-drive the release. + */ + if (ivace->ivace_made != made) { + assert(made < ivace->ivace_made); + + if (KERN_SUCCESS == kr) + ivace->ivace_made -= made; + + if (0 == ivace->ivace_refs) + goto redrive; + + ivace->ivace_releasing = FALSE; + ivac_unlock(ivac); + return; + } else { + /* + * If the manager returned FAILURE, someone took a + * reference on the value but have not updated the ivace, + * release the lock and return since thread who got + * the new reference will update the ivace and will have + * non-zero reference on the value. + */ + if (KERN_SUCCESS != kr) { + ivace->ivace_releasing = FALSE; + ivac_unlock(ivac); + return; + } + } + + assert(0 == ivace->ivace_refs); + + /* + * going away - remove entry from its hash + * If its at the head of the hash bucket list (common), unchain + * at the head. Otherwise walk the chain until the next points + * at this entry, and remove it from the the list there. + */ + hash_index = iv_hash_value(key_index, value); + if (ivac->ivac_table[hash_index].ivace_index == value_index) { + ivac->ivac_table[hash_index].ivace_index = ivace->ivace_next; + } else { + hash_index = ivac->ivac_table[hash_index].ivace_index; + assert(IV_HASH_END != hash_index); + while (ivac->ivac_table[hash_index].ivace_next != value_index) { + hash_index = ivac->ivac_table[hash_index].ivace_next; + assert(IV_HASH_END != hash_index); + } + ivac->ivac_table[hash_index].ivace_next = ivace->ivace_next; + } + + /* Put this entry on the freelist */ + ivace->ivace_value = 0xdeadc0dedeadc0de; + ivace->ivace_releasing = FALSE; + ivace->ivace_free = TRUE; + ivace->ivace_made = 0; + ivace->ivace_next = ivac->ivac_freelist; + ivac->ivac_freelist = value_index; + ivac_unlock(ivac); + + /* release the reference this value held on its cache control */ + ivac_release(ivac); + + return; +} + + +/* + * ivgt_looup + * + * Lookup an entry in the global table from the context of a manager + * registration. Adds a reference to the control to keep the results + * around (if needed). + * + * Because of the calling point, we can't be sure the manager is + * [fully] registered yet. So, we must hold the global table lock + * during the lookup to synchronize with in-parallel registrations + * (and possible table growth). + */ +static void +ivgt_lookup(iv_index_t key_index, + boolean_t take_reference, + ipc_voucher_attr_manager_t *manager, + ipc_voucher_attr_control_t *control) +{ + ipc_voucher_attr_control_t ivac; + + if (key_index < MACH_VOUCHER_ATTR_KEY_NUM_WELL_KNOWN) { + ivgt_lock(); + if (NULL != manager) + *manager = iv_global_table[key_index].ivgte_manager; + ivac = iv_global_table[key_index].ivgte_control; + if (IVAC_NULL != ivac) { + assert(key_index == ivac->ivac_key_index); + if (take_reference) { + assert(NULL != control); + ivac_reference(ivac); + } + } + ivgt_unlock(); + if (NULL != control) + *control = ivac; + } else { + if (NULL != manager) + *manager = IVAM_NULL; + if (NULL != control) + *control = IVAC_NULL; + } +} + +/* + * Routine: ipc_replace_voucher_value + * Purpose: + * Replace the value with the results of + * running the supplied command through the resource + * manager's get-value callback. + * Conditions: + * Nothing locked (may invoke user-space repeatedly). + * Caller holds references on voucher and previous voucher. + */ +static kern_return_t +ipc_replace_voucher_value( + ipc_voucher_t voucher, + mach_voucher_attr_key_t key, + mach_voucher_attr_recipe_command_t command, + ipc_voucher_t prev_voucher, + mach_voucher_attr_content_t content, + mach_voucher_attr_content_size_t content_size) +{ + mach_voucher_attr_value_handle_t previous_vals[MACH_VOUCHER_ATTR_VALUE_MAX_NESTED]; + mach_voucher_attr_value_handle_array_size_t previous_vals_count; + mach_voucher_attr_value_handle_t new_value; + ipc_voucher_t new_value_voucher; + ipc_voucher_attr_manager_t ivam; + ipc_voucher_attr_control_t ivac; + iv_index_t prev_val_index; + iv_index_t save_val_index; + iv_index_t val_index; + iv_index_t key_index; + kern_return_t kr; + + /* + * Get the manager for this key_index. + * Returns a reference on the control. + */ + key_index = iv_key_to_index(key); + ivgt_lookup(key_index, TRUE, &ivam, &ivac); + if (IVAM_NULL == ivam) + return KERN_INVALID_ARGUMENT; + + /* save the current value stored in the forming voucher */ + save_val_index = iv_lookup(voucher, key_index); + + /* + * Get the previous value(s) for this key creation. + * If a previous voucher is specified, they come from there. + * Otherwise, they come from the intermediate values already + * in the forming voucher. + */ + prev_val_index = (IV_NULL != prev_voucher) ? + iv_lookup(prev_voucher, key_index) : + save_val_index; + ivace_lookup_values(key_index, prev_val_index, + previous_vals, &previous_vals_count); + + /* Call out to resource manager to get new value */ + new_value_voucher = IV_NULL; + kr = (ivam->ivam_get_value)( + ivam, key, command, + previous_vals, previous_vals_count, + content, content_size, + &new_value, &new_value_voucher); + if (KERN_SUCCESS != kr) { + ivac_release(ivac); + return kr; + } + + /* TODO: value insertion from returned voucher */ + if (IV_NULL != new_value_voucher) + iv_release(new_value_voucher); + + /* + * Find or create a slot in the table associated + * with this attribute value. The ivac reference + * is transferred to a new value, or consumed if + * we find a matching existing value. + */ + val_index = ivace_reference_by_value(ivac, new_value); + iv_set(voucher, key_index, val_index); + + /* + * release saved old value from the newly forming voucher + * This is saved until the end to avoid churning the + * release logic in cases where the same value is returned + * as was there before. + */ + ivace_release(key_index, save_val_index); + + return KERN_SUCCESS; +} + +/* + * Routine: ipc_directly_replace_voucher_value + * Purpose: + * Replace the value with the value-handle + * supplied directly by the attribute manager. + * Conditions: + * Nothing locked. + * Caller holds references on voucher. + * A made reference to the value-handle is donated by the caller. + */ +static kern_return_t +ipc_directly_replace_voucher_value( + ipc_voucher_t voucher, + mach_voucher_attr_key_t key, + mach_voucher_attr_value_handle_t new_value) +{ + ipc_voucher_attr_manager_t ivam; + ipc_voucher_attr_control_t ivac; + iv_index_t save_val_index; + iv_index_t val_index; + iv_index_t key_index; + + /* + * Get the manager for this key_index. + * Returns a reference on the control. + */ + key_index = iv_key_to_index(key); + ivgt_lookup(key_index, TRUE, &ivam, &ivac); + if (IVAM_NULL == ivam) + return KERN_INVALID_ARGUMENT; + + /* save the current value stored in the forming voucher */ + save_val_index = iv_lookup(voucher, key_index); + + /* + * Find or create a slot in the table associated + * with this attribute value. The ivac reference + * is transferred to a new value, or consumed if + * we find a matching existing value. + */ + val_index = ivace_reference_by_value(ivac, new_value); + iv_set(voucher, key_index, val_index); + + /* + * release saved old value from the newly forming voucher + * This is saved until the end to avoid churning the + * release logic in cases where the same value is returned + * as was there before. + */ + ivace_release(key_index, save_val_index); + + return KERN_SUCCESS; +} + +static kern_return_t +ipc_execute_voucher_recipe_command( + ipc_voucher_t voucher, + mach_voucher_attr_key_t key, + mach_voucher_attr_recipe_command_t command, + ipc_voucher_t prev_iv, + mach_voucher_attr_content_t content, + mach_voucher_attr_content_size_t content_size, + boolean_t key_priv) +{ + iv_index_t prev_val_index; + iv_index_t val_index; + kern_return_t kr; + + switch (command) { + + /* + * MACH_VOUCHER_ATTR_COPY + * Copy the attribute(s) from the previous voucher to the new + * one. A wildcard key is an acceptable value - indicating a + * desire to copy all the attribute values from the previous + * voucher. + */ + case MACH_VOUCHER_ATTR_COPY: + + /* no recipe data on a copy */ + if (0 < content_size) + return KERN_INVALID_ARGUMENT; + + /* nothing to copy from? - done */ + if (IV_NULL == prev_iv) + return KERN_SUCCESS; + + if (MACH_VOUCHER_ATTR_KEY_ALL == key) { + iv_index_t limit, j; + + /* reconcile possible difference in voucher sizes */ + limit = (prev_iv->iv_table_size < voucher->iv_table_size) ? + prev_iv->iv_table_size : + voucher->iv_table_size; + + /* wildcard matching */ + for (j = 0; j < limit; j++) { + /* release old value being replaced */ + val_index = iv_lookup(voucher, j); + ivace_release(j, val_index); + + /* replace with reference to prev voucher's value */ + prev_val_index = iv_lookup(prev_iv, j); + ivace_reference_by_index(j, prev_val_index); + iv_set(voucher, j, prev_val_index); + } + } else { + iv_index_t key_index; + + /* copy just one key */ + key_index = iv_key_to_index(key); + if (ivgt_keys_in_use < key_index) + return KERN_INVALID_ARGUMENT; + + /* release old value being replaced */ + val_index = iv_lookup(voucher, key_index); + ivace_release(key_index, val_index); + + /* replace with reference to prev voucher's value */ + prev_val_index = iv_lookup(prev_iv, key_index); + ivace_reference_by_index(key_index, prev_val_index); + iv_set(voucher, key_index, prev_val_index); + } + break; + + /* + * MACH_VOUCHER_ATTR_REMOVE + * Remove the attribute(s) from the under construction voucher. + * A wildcard key is an acceptable value - indicating a desire + * to remove all the attribute values set up so far in the voucher. + * If a previous voucher is specified, only remove the value it + * it matches the value in the previous voucher. + */ + case MACH_VOUCHER_ATTR_REMOVE: + /* no recipe data on a remove */ + if (0 < content_size) + return KERN_INVALID_ARGUMENT; + + if (MACH_VOUCHER_ATTR_KEY_ALL == key) { + iv_index_t limit, j; + + /* reconcile possible difference in voucher sizes */ + limit = (IV_NULL == prev_iv) ? voucher->iv_table_size : + ((prev_iv->iv_table_size < voucher->iv_table_size) ? + prev_iv->iv_table_size : voucher->iv_table_size); + + /* wildcard matching */ + for (j = 0; j < limit; j++) { + val_index = iv_lookup(voucher, j); + + /* If not matched in previous, skip */ + if (IV_NULL != prev_iv) { + prev_val_index = iv_lookup(prev_iv, j); + if (val_index != prev_val_index) + continue; + } + /* release and clear */ + ivace_release(j, val_index); + iv_set(voucher, j, IV_UNUSED_VALINDEX); + } + } else { + iv_index_t key_index; + + /* copy just one key */ + key_index = iv_key_to_index(key); + if (ivgt_keys_in_use < key_index) + return KERN_INVALID_ARGUMENT; + + val_index = iv_lookup(voucher, key_index); + + /* If not matched in previous, skip */ + if (IV_NULL != prev_iv) { + prev_val_index = iv_lookup(prev_iv, key_index); + if (val_index != prev_val_index) + break; + } + + /* release and clear */ + ivace_release(key_index, val_index); + iv_set(voucher, key_index, IV_UNUSED_VALINDEX); + } + break; + + /* + * MACH_VOUCHER_ATTR_SET_VALUE_HANDLE + * Use key-privilege to set a value handle for the attribute directly, + * rather than triggering a callback into the attribute manager to + * interpret a recipe to generate the value handle. + */ + case MACH_VOUCHER_ATTR_SET_VALUE_HANDLE: + if (key_priv) { + mach_voucher_attr_value_handle_t new_value; + + if (sizeof(mach_voucher_attr_value_handle_t) != content_size) + return KERN_INVALID_ARGUMENT; + + new_value = *(mach_voucher_attr_value_handle_t *)(void *)content; + kr = ipc_directly_replace_voucher_value(voucher, + key, + new_value); + if (KERN_SUCCESS != kr) + return kr; + } else + return KERN_INVALID_CAPABILITY; + break; + + /* + * MACH_VOUCHER_ATTR_REDEEM + * Redeem the attribute(s) from the previous voucher for a possibly + * new value in the new voucher. A wildcard key is an acceptable value, + * indicating a desire to redeem all the values. + */ + case MACH_VOUCHER_ATTR_REDEEM: + + if (MACH_VOUCHER_ATTR_KEY_ALL == key) { + iv_index_t limit, j; + + /* reconcile possible difference in voucher sizes */ + if (IV_NULL != prev_iv) + limit = (prev_iv->iv_table_size < voucher->iv_table_size) ? + prev_iv->iv_table_size : + voucher->iv_table_size; + else + limit = voucher->iv_table_size; + + /* wildcard matching */ + for (j = 0; j < limit; j++) { + mach_voucher_attr_key_t j_key; + + j_key = iv_index_to_key(j); + + /* skip non-existent managers */ + if (MACH_VOUCHER_ATTR_KEY_NONE == j_key) + continue; + + /* get the new value from redeem (skip empty previous) */ + kr = ipc_replace_voucher_value(voucher, + j_key, + command, + prev_iv, + content, + content_size); + if (KERN_SUCCESS != kr) + return kr; + } + break; + } + /* fall thru for single key redemption */ + + /* + * DEFAULT: + * Replace the current value for the pair with whatever + * value the resource manager returns for the command and recipe + * combination provided. + */ + default: + kr = ipc_replace_voucher_value(voucher, + key, + command, + prev_iv, + content, + content_size); + if (KERN_SUCCESS != kr) + return kr; + + break; + } + return KERN_SUCCESS; +} + +/* + * Routine: iv_checksum + * Purpose: + * Compute the voucher sum. This is more position- + * relevant than many other checksums - important for + * vouchers (arrays of low, oft-reused, indexes). + */ +static inline iv_index_t +iv_checksum(ipc_voucher_t voucher, boolean_t *emptyp) +{ + iv_index_t c = 0; + + boolean_t empty = TRUE; + if (0 < voucher->iv_table_size) { + iv_index_t i = voucher->iv_table_size - 1; + + do { + iv_index_t v = voucher->iv_table[i]; + c = c << 3 | c >> (32 - 3); /* rotate */ + c = ~c; /* invert */ + if (0 < v) { + c += v; /* add in */ + empty = FALSE; + } + } while (0 < i--); + } + *emptyp = empty; + return c; +} + +/* + * Routine: iv_dedup + * Purpose: + * See if the set of values represented by this new voucher + * already exist in another voucher. If so return a reference + * to the existing voucher and deallocate the voucher provided. + * Otherwise, insert this one in the hash and return it. + * Conditions: + * A voucher reference is donated on entry. + * Returns: + * A voucher reference (may be different than on entry). + */ +static ipc_voucher_t +iv_dedup(ipc_voucher_t new_iv) +{ + boolean_t empty; + iv_index_t sum; + iv_index_t hash; + ipc_voucher_t iv; + + sum = iv_checksum(new_iv, &empty); + + /* If all values are default, that's the empty (NULL) voucher */ + if (empty) { + iv_dealloc(new_iv, FALSE); + return IV_NULL; + } + + hash = IV_HASH_BUCKET(sum); + + ivht_lock(); + queue_iterate(&ivht_bucket[hash], iv, ipc_voucher_t, iv_hash_link) { + assert(iv->iv_hash == hash); + + /* if not already deallocating and sums match... */ + if (0 < iv->iv_refs && iv->iv_sum == sum) { + iv_refs_t refs; + iv_index_t i; + + assert(iv->iv_table_size <= new_iv->iv_table_size); + + /* and common entries match... */ + for (i = 0; i < iv->iv_table_size; i++) + if (iv->iv_table[i] != new_iv->iv_table[i]) + break; + if (i < iv->iv_table_size) + continue; + + /* and all extra entries in new one are unused... */ + while (i < new_iv->iv_table_size) + if (new_iv->iv_table[i++] != IV_UNUSED_VALINDEX) + break; + if (i < new_iv->iv_table_size) + continue; + + /* ... we found a match... */ + + /* can we get a ref before it hits 0 + * + * This is thread safe. The reference is just an atomic + * add. If the reference count is zero when we adjust it, + * no other thread can have a reference to the voucher. + * The dealloc code requires holding the ivht_lock, so + * the voucher cannot be yanked out from under us. + */ + refs = iv_reference(iv); + if (1 == refs) { + /* drats! going away. Put back to zero */ + iv->iv_refs = 0; + continue; + } + + ivht_unlock(); + + /* referenced previous, so deallocate the new one */ + iv_dealloc(new_iv, FALSE); + return iv; + } + } + + /* add the new voucher to the hash, and return it */ + new_iv->iv_sum = sum; + new_iv->iv_hash = hash; + queue_enter(&ivht_bucket[hash], new_iv, ipc_voucher_t, iv_hash_link); + ivht_count++; + ivht_unlock(); + + /* + * This code is disabled for KDEBUG_LEVEL_IST and KDEBUG_LEVEL_NONE + */ +#if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) + if (kdebug_enable & ~KDEBUG_ENABLE_PPT) { + uintptr_t voucher_addr = VM_KERNEL_ADDRPERM((uintptr_t)new_iv); + uintptr_t attr_tracepoints_needed = 0; + + if (ipc_voucher_trace_contents) { + /* + * voucher_contents sizing is a bit more constrained + * than might be obvious. + * + * This is typically a uint8_t typed array. However, + * we want to access it as a uintptr_t to efficiently + * copyout the data in tracepoints. + * + * This constrains the size to uintptr_t bytes, and + * adds a minimimum alignment requirement equivalent + * to a uintptr_t. + * + * Further constraining the size is the fact that it + * is copied out 4 uintptr_t chunks at a time. We do + * NOT want to run off the end of the array and copyout + * random stack data. + * + * So the minimum size is 4 * sizeof(uintptr_t), and + * the minimum alignment is uintptr_t aligned. + */ + +#define PAYLOAD_PER_TRACEPOINT (4 * sizeof(uintptr_t)) +#define PAYLOAD_SIZE 1024 + + _Static_assert(PAYLOAD_SIZE % PAYLOAD_PER_TRACEPOINT == 0, "size invariant violated"); + + mach_voucher_attr_raw_recipe_array_size_t payload_size = PAYLOAD_SIZE; + uintptr_t payload[PAYLOAD_SIZE / sizeof(uintptr_t)]; + kern_return_t kr; + + kr = mach_voucher_extract_all_attr_recipes(new_iv, (mach_voucher_attr_raw_recipe_array_t)payload, &payload_size); + if (KERN_SUCCESS == kr) { + attr_tracepoints_needed = (payload_size + PAYLOAD_PER_TRACEPOINT - 1) / PAYLOAD_PER_TRACEPOINT; + + /* + * To prevent leaking data from the stack, we + * need to zero data to the end of a tracepoint + * payload. + */ + size_t remainder = payload_size % PAYLOAD_PER_TRACEPOINT; + if (remainder) { + bzero((uint8_t*)payload + payload_size, + PAYLOAD_PER_TRACEPOINT - remainder); + } + } + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC,MACH_IPC_VOUCHER_CREATE) | DBG_FUNC_NONE, + voucher_addr, + new_iv->iv_table_size, ivht_count, payload_size, 0); + + uintptr_t index = 0; + while (attr_tracepoints_needed--) { + KERNEL_DEBUG_CONSTANT1(MACHDBG_CODE(DBG_MACH_IPC,MACH_IPC_VOUCHER_CREATE_ATTR_DATA) | DBG_FUNC_NONE, + payload[index], + payload[index+1], + payload[index+2], + payload[index+3], + voucher_addr); + index += 4; + } + } else { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC,MACH_IPC_VOUCHER_CREATE) | DBG_FUNC_NONE, + voucher_addr, + new_iv->iv_table_size, ivht_count, 0, 0); + } + } +#endif /* KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD */ + + return new_iv; +} + +/* + * Routine: ipc_create_mach_voucher + * Purpose: + * Create a new mach voucher and initialize it with the + * value(s) created by having the appropriate resource + * managers interpret the supplied recipe commands and + * data. + * Conditions: + * Nothing locked (may invoke user-space repeatedly). + * Caller holds references on previous vouchers. + * Previous vouchers are passed as voucher indexes. + */ +kern_return_t +ipc_create_mach_voucher( + ipc_voucher_attr_raw_recipe_array_t recipes, + ipc_voucher_attr_raw_recipe_array_size_t recipe_size, + ipc_voucher_t *new_voucher) +{ + ipc_voucher_attr_recipe_t sub_recipe; + ipc_voucher_attr_recipe_size_t recipe_used = 0; + ipc_voucher_t voucher; + kern_return_t kr = KERN_SUCCESS; + + /* if nothing to do ... */ + if (0 == recipe_size) { + *new_voucher = IV_NULL; + return KERN_SUCCESS; + } + + /* allocate a voucher */ + voucher = iv_alloc(ivgt_keys_in_use); + if (IV_NULL == voucher) + return KERN_RESOURCE_SHORTAGE; + + /* iterate over the recipe items */ + while (0 < recipe_size - recipe_used) { + + if (recipe_size - recipe_used < sizeof(*sub_recipe)) { + kr = KERN_INVALID_ARGUMENT; + break; + } + + /* find the next recipe */ + sub_recipe = (ipc_voucher_attr_recipe_t)(void *)&recipes[recipe_used]; + if (recipe_size - recipe_used - sizeof(*sub_recipe) < sub_recipe->content_size) { + kr = KERN_INVALID_ARGUMENT; + break; + } + recipe_used += sizeof(*sub_recipe) + sub_recipe->content_size; + + kr = ipc_execute_voucher_recipe_command(voucher, + sub_recipe->key, + sub_recipe->command, + sub_recipe->previous_voucher, + sub_recipe->content, + sub_recipe->content_size, + FALSE); + if (KERN_SUCCESS != kr) + break; + } + + if (KERN_SUCCESS == kr) { + *new_voucher = iv_dedup(voucher); + } else { + iv_dealloc(voucher, FALSE); + *new_voucher = IV_NULL; + } + return kr; +} + +/* + * Routine: ipc_voucher_attr_control_create_mach_voucher + * Purpose: + * Create a new mach voucher and initialize it with the + * value(s) created by having the appropriate resource + * managers interpret the supplied recipe commands and + * data. + * + * The resource manager control's privilege over its + * particular key value is reflected on to the execution + * code, allowing internal commands (like setting a + * key value handle directly, rather than having to + * create a recipe, that will generate a callback just + * to get the value. + * + * Conditions: + * Nothing locked (may invoke user-space repeatedly). + * Caller holds references on previous vouchers. + * Previous vouchers are passed as voucher indexes. + */ +kern_return_t +ipc_voucher_attr_control_create_mach_voucher( + ipc_voucher_attr_control_t control, + ipc_voucher_attr_raw_recipe_array_t recipes, + ipc_voucher_attr_raw_recipe_array_size_t recipe_size, + ipc_voucher_t *new_voucher) +{ + mach_voucher_attr_key_t control_key; + ipc_voucher_attr_recipe_t sub_recipe; + ipc_voucher_attr_recipe_size_t recipe_used = 0; + ipc_voucher_t voucher = IV_NULL; + kern_return_t kr = KERN_SUCCESS; + + if (IPC_VOUCHER_ATTR_CONTROL_NULL == control) + return KERN_INVALID_CAPABILITY; + + /* if nothing to do ... */ + if (0 == recipe_size) { + *new_voucher = IV_NULL; + return KERN_SUCCESS; + } + + /* allocate new voucher */ + voucher = iv_alloc(ivgt_keys_in_use); + if (IV_NULL == voucher) + return KERN_RESOURCE_SHORTAGE; + + control_key = iv_index_to_key(control->ivac_key_index); + + /* iterate over the recipe items */ + while (0 < recipe_size - recipe_used) { + + if (recipe_size - recipe_used < sizeof(*sub_recipe)) { + kr = KERN_INVALID_ARGUMENT; + break; + } + + /* find the next recipe */ + sub_recipe = (ipc_voucher_attr_recipe_t)(void *)&recipes[recipe_used]; + if (recipe_size - recipe_used - sizeof(*sub_recipe) < sub_recipe->content_size) { + kr = KERN_INVALID_ARGUMENT; + break; + } + recipe_used += sizeof(*sub_recipe) + sub_recipe->content_size; + + kr = ipc_execute_voucher_recipe_command(voucher, + sub_recipe->key, + sub_recipe->command, + sub_recipe->previous_voucher, + sub_recipe->content, + sub_recipe->content_size, + (sub_recipe->key == control_key)); + if (KERN_SUCCESS != kr) + break; + } + + if (KERN_SUCCESS == kr) { + *new_voucher = iv_dedup(voucher); + } else { + *new_voucher = IV_NULL; + iv_dealloc(voucher, FALSE); + } + return kr; +} + +/* + * ipc_register_well_known_mach_voucher_attr_manager + * + * Register the resource manager responsible for a given key value. + */ +kern_return_t +ipc_register_well_known_mach_voucher_attr_manager( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_value_handle_t default_value, + mach_voucher_attr_key_t key, + ipc_voucher_attr_control_t *control) +{ + ipc_voucher_attr_control_t new_control; + iv_index_t key_index; + iv_index_t hash_index; + + if (IVAM_NULL == manager) + return KERN_INVALID_ARGUMENT; + + key_index = iv_key_to_index(key); + if (IV_UNUSED_KEYINDEX == key_index) + return KERN_INVALID_ARGUMENT; + + new_control = ivac_alloc(key_index); + if (IVAC_NULL == new_control) + return KERN_RESOURCE_SHORTAGE; + + /* insert the default value into slot 0 */ + new_control->ivac_table[IV_UNUSED_VALINDEX].ivace_value = default_value; + new_control->ivac_table[IV_UNUSED_VALINDEX].ivace_refs = IVACE_REFS_MAX; + new_control->ivac_table[IV_UNUSED_VALINDEX].ivace_made = IVACE_REFS_MAX; + assert(IV_HASH_END == new_control->ivac_table[IV_UNUSED_VALINDEX].ivace_next); + + ivgt_lock(); + if (IVAM_NULL != iv_global_table[key_index].ivgte_manager) { + ivgt_unlock(); + ivac_release(new_control); + return KERN_INVALID_ARGUMENT; + } + + /* fill in the global table slot for this key */ + iv_global_table[key_index].ivgte_manager = manager; + iv_global_table[key_index].ivgte_control = new_control; + iv_global_table[key_index].ivgte_key = key; + + /* insert the default value into the hash (in case it is returned later) */ + hash_index = iv_hash_value(key_index, default_value); + assert(IV_HASH_END == new_control->ivac_table[hash_index].ivace_index); + new_control->ivac_table[hash_index].ivace_index = IV_UNUSED_VALINDEX; + + ivgt_unlock(); + + /* return the reference on the new cache control to the caller */ + *control = new_control; + + return KERN_SUCCESS; +} + +/* + * Routine: mach_voucher_extract_attr_content + * Purpose: + * Extract the content for a given pair. + * + * If a value other than the default is present for this + * pair, we need to contact the resource + * manager to extract the content/meaning of the value(s) + * present. Otherwise, return success (but no data). + * + * Conditions: + * Nothing locked - as it may upcall to user-space. + * The caller holds a reference on the voucher. + */ +kern_return_t +mach_voucher_extract_attr_content( + ipc_voucher_t voucher, + mach_voucher_attr_key_t key, + mach_voucher_attr_content_t content, + mach_voucher_attr_content_size_t *in_out_size) +{ + mach_voucher_attr_value_handle_t vals[MACH_VOUCHER_ATTR_VALUE_MAX_NESTED]; + mach_voucher_attr_value_handle_array_size_t vals_count; + mach_voucher_attr_recipe_command_t command; + ipc_voucher_attr_manager_t manager; + iv_index_t value_index; + iv_index_t key_index; + kern_return_t kr; + + + if (IV_NULL == voucher) + return KERN_INVALID_ARGUMENT; + + key_index = iv_key_to_index(key); + + value_index = iv_lookup(voucher, key_index); + if (IV_UNUSED_VALINDEX == value_index) { + *in_out_size = 0; + return KERN_SUCCESS; + } + + /* + * Get the manager for this key_index. The + * existence of a non-default value for this + * slot within our voucher will keep the + * manager referenced during the callout. + */ + ivgt_lookup(key_index, FALSE, &manager, NULL); + assert(IVAM_NULL != manager); + + /* + * Get the value(s) to pass to the manager + * for this value_index. + */ + ivace_lookup_values(key_index, value_index, + vals, &vals_count); + assert(0 < vals_count); + + /* callout to manager */ + + kr = (manager->ivam_extract_content)(manager, key, + vals, vals_count, + &command, + content, in_out_size); + return kr; +} + +/* + * Routine: mach_voucher_extract_attr_recipe + * Purpose: + * Extract a recipe for a given pair. + * + * If a value other than the default is present for this + * pair, we need to contact the resource + * manager to extract the content/meaning of the value(s) + * present. Otherwise, return success (but no data). + * + * Conditions: + * Nothing locked - as it may upcall to user-space. + * The caller holds a reference on the voucher. + */ +kern_return_t +mach_voucher_extract_attr_recipe( + ipc_voucher_t voucher, + mach_voucher_attr_key_t key, + mach_voucher_attr_raw_recipe_t raw_recipe, + mach_voucher_attr_raw_recipe_size_t *in_out_size) +{ + mach_voucher_attr_value_handle_t vals[MACH_VOUCHER_ATTR_VALUE_MAX_NESTED]; + mach_voucher_attr_value_handle_array_size_t vals_count; + ipc_voucher_attr_manager_t manager; + mach_voucher_attr_recipe_t recipe; + iv_index_t value_index; + iv_index_t key_index; + kern_return_t kr; + + + if (IV_NULL == voucher) + return KERN_INVALID_ARGUMENT; + + key_index = iv_key_to_index(key); + + value_index = iv_lookup(voucher, key_index); + if (IV_UNUSED_VALINDEX == value_index) { + *in_out_size = 0; + return KERN_SUCCESS; + } + + if (*in_out_size < sizeof(*recipe)) + return KERN_NO_SPACE; + + recipe = (mach_voucher_attr_recipe_t)(void *)raw_recipe; + recipe->key = key; + recipe->command = MACH_VOUCHER_ATTR_NOOP; + recipe->previous_voucher = MACH_VOUCHER_NAME_NULL; + recipe->content_size = *in_out_size - sizeof(*recipe); + + /* + * Get the manager for this key_index. The + * existence of a non-default value for this + * slot within our voucher will keep the + * manager referenced during the callout. + */ + ivgt_lookup(key_index, FALSE, &manager, NULL); + assert(IVAM_NULL != manager); + + /* + * Get the value(s) to pass to the manager + * for this value_index. + */ + ivace_lookup_values(key_index, value_index, + vals, &vals_count); + assert(0 < vals_count); + + /* callout to manager */ + kr = (manager->ivam_extract_content)(manager, key, + vals, vals_count, + &recipe->command, + recipe->content, &recipe->content_size); + if (KERN_SUCCESS == kr) { + assert(*in_out_size - sizeof(*recipe) >= recipe->content_size); + *in_out_size = sizeof(*recipe) + recipe->content_size; + } + + return kr; +} + + + +/* + * Routine: mach_voucher_extract_all_attr_recipes + * Purpose: + * Extract all the (non-default) contents for a given voucher, + * building up a recipe that could be provided to a future + * voucher creation call. + * Conditions: + * Nothing locked (may invoke user-space). + * Caller holds a reference on the supplied voucher. + */ +kern_return_t +mach_voucher_extract_all_attr_recipes( + ipc_voucher_t voucher, + mach_voucher_attr_raw_recipe_array_t recipes, + mach_voucher_attr_raw_recipe_array_size_t *in_out_size) +{ + mach_voucher_attr_recipe_size_t recipe_size = *in_out_size; + mach_voucher_attr_recipe_size_t recipe_used = 0; + iv_index_t key_index; + + if (IV_NULL == voucher) + return KERN_INVALID_ARGUMENT; + + for (key_index = 0; key_index < voucher->iv_table_size; key_index++) { + mach_voucher_attr_value_handle_t vals[MACH_VOUCHER_ATTR_VALUE_MAX_NESTED]; + mach_voucher_attr_value_handle_array_size_t vals_count; + mach_voucher_attr_content_size_t content_size; + ipc_voucher_attr_manager_t manager; + mach_voucher_attr_recipe_t recipe; + mach_voucher_attr_key_t key; + iv_index_t value_index; + kern_return_t kr; + + /* don't output anything for a default value */ + value_index = iv_lookup(voucher, key_index); + if (IV_UNUSED_VALINDEX == value_index) + continue; + + if (recipe_size - recipe_used < sizeof(*recipe)) + return KERN_NO_SPACE; + + recipe = (mach_voucher_attr_recipe_t)(void *)&recipes[recipe_used]; + content_size = recipe_size - recipe_used - sizeof(*recipe); + + /* + * Get the manager for this key_index. The + * existence of a non-default value for this + * slot within our voucher will keep the + * manager referenced during the callout. + */ + ivgt_lookup(key_index, FALSE, &manager, NULL); + assert(IVAM_NULL != manager); + + /* + * Get the value(s) to pass to the manager + * for this value_index. + */ + ivace_lookup_values(key_index, value_index, + vals, &vals_count); + assert(0 < vals_count); + + key = iv_index_to_key(key_index); + + recipe->key = key; + recipe->command = MACH_VOUCHER_ATTR_NOOP; + recipe->content_size = content_size; + + /* callout to manager */ + kr = (manager->ivam_extract_content)(manager, key, + vals, vals_count, + &recipe->command, + recipe->content, &recipe->content_size); + if (KERN_SUCCESS != kr) + return kr; + + assert(recipe->content_size <= content_size); + recipe_used += sizeof(*recipe) + recipe->content_size; + } + + *in_out_size = recipe_used; + return KERN_SUCCESS; +} + +/* + * Routine: mach_voucher_debug_info + * Purpose: + * Extract all the (non-default) contents for a given mach port name, + * building up a recipe that could be provided to a future + * voucher creation call. + * Conditions: + * Nothing locked (may invoke user-space). + * Caller may not hold a reference on the supplied voucher. + */ +#if !(DEVELOPMENT || DEBUG) +kern_return_t +mach_voucher_debug_info( + ipc_space_t __unused space, + mach_port_name_t __unused voucher_name, + mach_voucher_attr_raw_recipe_array_t __unused recipes, + mach_voucher_attr_raw_recipe_array_size_t __unused *in_out_size) +{ + return KERN_NOT_SUPPORTED; +} +#else +kern_return_t +mach_voucher_debug_info( + ipc_space_t space, + mach_port_name_t voucher_name, + mach_voucher_attr_raw_recipe_array_t recipes, + mach_voucher_attr_raw_recipe_array_size_t *in_out_size) +{ + ipc_voucher_t voucher = IPC_VOUCHER_NULL; + kern_return_t kr; + ipc_port_t port = MACH_PORT_NULL; + + if (!MACH_PORT_VALID(voucher_name)) { + return KERN_INVALID_ARGUMENT; + } + + kr = ipc_port_translate_send(space, voucher_name, &port); + if (KERN_SUCCESS != kr) + return KERN_INVALID_ARGUMENT; + + voucher = convert_port_to_voucher(port); + ip_unlock(port); + + if (voucher) { + kr = mach_voucher_extract_all_attr_recipes(voucher, recipes, in_out_size); + ipc_voucher_release(voucher); + return kr; + } + + return KERN_FAILURE; +} +#endif + +/* + * Routine: mach_voucher_attr_command + * Purpose: + * Invoke an attribute-specific command through this voucher. + * + * The voucher layout, membership, etc... is not altered + * through the execution of this command. + * + * Conditions: + * Nothing locked - as it may upcall to user-space. + * The caller holds a reference on the voucher. + */ +kern_return_t +mach_voucher_attr_command( + ipc_voucher_t voucher, + mach_voucher_attr_key_t key, + mach_voucher_attr_command_t command, + mach_voucher_attr_content_t in_content, + mach_voucher_attr_content_size_t in_content_size, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *out_content_size) +{ + mach_voucher_attr_value_handle_t vals[MACH_VOUCHER_ATTR_VALUE_MAX_NESTED]; + mach_voucher_attr_value_handle_array_size_t vals_count; + ipc_voucher_attr_manager_t manager; + ipc_voucher_attr_control_t control; + iv_index_t value_index; + iv_index_t key_index; + kern_return_t kr; + + + if (IV_NULL == voucher) + return KERN_INVALID_ARGUMENT; + + key_index = iv_key_to_index(key); + + /* + * Get the manager for this key_index. + * Allowing commands against the default value + * for an attribute means that we have to hold + * reference on the attribute manager control + * to keep the manager around during the command + * execution. + */ + ivgt_lookup(key_index, TRUE, &manager, &control); + assert(IVAM_NULL != manager); + + /* + * Get the values for this pair + * to pass to the attribute manager. It is still + * permissible to execute a command against the + * default value (empty value array). + */ + value_index = iv_lookup(voucher, key_index); + ivace_lookup_values(key_index, value_index, + vals, &vals_count); + + /* callout to manager */ + kr = (manager->ivam_command)(manager, key, + vals, vals_count, + command, + in_content, in_content_size, + out_content, out_content_size); + + /* release reference on control */ + ivac_release(control); + + return kr; +} + +/* + * Routine: mach_voucher_attr_control_get_values + * Purpose: + * For a given voucher, get the value handle associated with the + * specified attribute manager. + */ +kern_return_t +mach_voucher_attr_control_get_values( + ipc_voucher_attr_control_t control, + ipc_voucher_t voucher, + mach_voucher_attr_value_handle_array_t out_values, + mach_voucher_attr_value_handle_array_size_t *in_out_size) +{ + iv_index_t key_index, value_index; + + if (IPC_VOUCHER_ATTR_CONTROL_NULL == control) + return KERN_INVALID_CAPABILITY; + + if (IV_NULL == voucher) + return KERN_INVALID_ARGUMENT; + + if (0 == *in_out_size) + return KERN_SUCCESS; + + key_index = control->ivac_key_index; + + assert(0 < voucher->iv_refs); + value_index = iv_lookup(voucher, key_index); + ivace_lookup_values(key_index, value_index, + out_values, in_out_size); + return KERN_SUCCESS; +} + + +/* + * Routine: mach_voucher_attr_control_create_mach_voucher + * Purpose: + * Create a new mach voucher and initialize it by processing the + * supplied recipe(s). + * + * Coming in on the attribute control port denotes special privileges + * over they key associated with the control port. + * + * Coming in from user-space, each recipe item will have a previous + * recipe port name that needs to be converted to a voucher. Because + * we can't rely on the port namespace to hold a reference on each + * previous voucher port for the duration of processing that command, + * we have to convert the name to a voucher reference and release it + * after the command processing is done. + */ +kern_return_t +mach_voucher_attr_control_create_mach_voucher( + ipc_voucher_attr_control_t control, + mach_voucher_attr_raw_recipe_array_t recipes, + mach_voucher_attr_raw_recipe_size_t recipe_size, + ipc_voucher_t *new_voucher) +{ + mach_voucher_attr_key_t control_key; + mach_voucher_attr_recipe_t sub_recipe; + mach_voucher_attr_recipe_size_t recipe_used = 0; + ipc_voucher_t voucher = IV_NULL; + kern_return_t kr = KERN_SUCCESS; + + if (IPC_VOUCHER_ATTR_CONTROL_NULL == control) + return KERN_INVALID_CAPABILITY; + + /* if nothing to do ... */ + if (0 == recipe_size) { + *new_voucher = IV_NULL; + return KERN_SUCCESS; + } + + /* allocate new voucher */ + voucher = iv_alloc(ivgt_keys_in_use); + if (IV_NULL == voucher) + return KERN_RESOURCE_SHORTAGE; + + control_key = iv_index_to_key(control->ivac_key_index); + + /* iterate over the recipe items */ + while (0 < recipe_size - recipe_used) { + ipc_voucher_t prev_iv; + + if (recipe_size - recipe_used < sizeof(*sub_recipe)) { + kr = KERN_INVALID_ARGUMENT; + break; + } + + /* find the next recipe */ + sub_recipe = (mach_voucher_attr_recipe_t)(void *)&recipes[recipe_used]; + if (recipe_size - recipe_used - sizeof(*sub_recipe) < sub_recipe->content_size) { + kr = KERN_INVALID_ARGUMENT; + break; + } + recipe_used += sizeof(*sub_recipe) + sub_recipe->content_size; + + /* convert voucher port name (current space) into a voucher reference */ + prev_iv = convert_port_name_to_voucher(sub_recipe->previous_voucher); + if (MACH_PORT_NULL != sub_recipe->previous_voucher && IV_NULL == prev_iv) { + kr = KERN_INVALID_CAPABILITY; + break; + } + + kr = ipc_execute_voucher_recipe_command(voucher, + sub_recipe->key, + sub_recipe->command, + prev_iv, + sub_recipe->content, + sub_recipe->content_size, + (sub_recipe->key == control_key)); + ipc_voucher_release(prev_iv); + + if (KERN_SUCCESS != kr) + break; + } + + if (KERN_SUCCESS == kr) { + *new_voucher = iv_dedup(voucher); + } else { + *new_voucher = IV_NULL; + iv_dealloc(voucher, FALSE); + } + return kr; +} + +/* + * Routine: host_create_mach_voucher + * Purpose: + * Create a new mach voucher and initialize it by processing the + * supplied recipe(s). + * + * Comming in from user-space, each recipe item will have a previous + * recipe port name that needs to be converted to a voucher. Because + * we can't rely on the port namespace to hold a reference on each + * previous voucher port for the duration of processing that command, + * we have to convert the name to a voucher reference and release it + * after the command processing is done. + */ +kern_return_t +host_create_mach_voucher( + host_t host, + mach_voucher_attr_raw_recipe_array_t recipes, + mach_voucher_attr_raw_recipe_size_t recipe_size, + ipc_voucher_t *new_voucher) +{ + mach_voucher_attr_recipe_t sub_recipe; + mach_voucher_attr_recipe_size_t recipe_used = 0; + ipc_voucher_t voucher = IV_NULL; + kern_return_t kr = KERN_SUCCESS; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + /* if nothing to do ... */ + if (0 == recipe_size) { + *new_voucher = IV_NULL; + return KERN_SUCCESS; + } + + /* allocate new voucher */ + voucher = iv_alloc(ivgt_keys_in_use); + if (IV_NULL == voucher) + return KERN_RESOURCE_SHORTAGE; + + /* iterate over the recipe items */ + while (0 < recipe_size - recipe_used) { + ipc_voucher_t prev_iv; + + if (recipe_size - recipe_used < sizeof(*sub_recipe)) { + kr = KERN_INVALID_ARGUMENT; + break; + } + + /* find the next recipe */ + sub_recipe = (mach_voucher_attr_recipe_t)(void *)&recipes[recipe_used]; + if (recipe_size - recipe_used - sizeof(*sub_recipe) < sub_recipe->content_size) { + kr = KERN_INVALID_ARGUMENT; + break; + } + recipe_used += sizeof(*sub_recipe) + sub_recipe->content_size; + + /* convert voucher port name (current space) into a voucher reference */ + prev_iv = convert_port_name_to_voucher(sub_recipe->previous_voucher); + if (MACH_PORT_NULL != sub_recipe->previous_voucher && IV_NULL == prev_iv) { + kr = KERN_INVALID_CAPABILITY; + break; + } + + kr = ipc_execute_voucher_recipe_command(voucher, + sub_recipe->key, + sub_recipe->command, + prev_iv, + sub_recipe->content, + sub_recipe->content_size, + FALSE); + ipc_voucher_release(prev_iv); + + if (KERN_SUCCESS != kr) + break; + } + + if (KERN_SUCCESS == kr) { + *new_voucher = iv_dedup(voucher); + } else { + *new_voucher = IV_NULL; + iv_dealloc(voucher, FALSE); + } + return kr; +} + +/* + * Routine: host_register_well_known_mach_voucher_attr_manager + * Purpose: + * Register the user-level resource manager responsible for a given + * key value. + * Conditions: + * The manager port passed in has to be converted/wrapped + * in an ipc_voucher_attr_manager_t structure and then call the + * internal variant. We have a generic ipc voucher manager + * type that implements a MIG proxy out to user-space just for + * this purpose. + */ +kern_return_t +host_register_well_known_mach_voucher_attr_manager( + host_t host, + mach_voucher_attr_manager_t __unused manager, + mach_voucher_attr_value_handle_t __unused default_value, + mach_voucher_attr_key_t __unused key, + ipc_voucher_attr_control_t __unused *control) +{ + if (HOST_NULL == host) + return KERN_INVALID_HOST; + +#if 1 + return KERN_NOT_SUPPORTED; +#else + /* + * Allocate a mig_voucher_attr_manager_t that provides the + * MIG proxy functions for the three manager callbacks and + * store the port right in there. + * + * If the user-space manager dies, we'll detect it on our + * next upcall, and cleanup the proxy at that point. + */ + mig_voucher_attr_manager_t proxy; + kern_return_t kr; + + proxy = mvam_alloc(manager); + + kr = ipc_register_well_known_mach_voucher_attr_manager(&proxy->mvam_manager, + default_value, + key, + control); + if (KERN_SUCCESS != kr) + mvam_release(proxy); + + return kr; +#endif +} + +/* + * Routine: host_register_mach_voucher_attr_manager + * Purpose: + * Register the user-space resource manager and return a + * dynamically allocated key. + * Conditions: + * Wrap the supplied port with the MIG proxy ipc + * voucher resource manager, and then call the internal + * variant. + */ +kern_return_t +host_register_mach_voucher_attr_manager( + host_t host, + mach_voucher_attr_manager_t __unused manager, + mach_voucher_attr_value_handle_t __unused default_value, + mach_voucher_attr_key_t __unused *key, + ipc_voucher_attr_control_t __unused *control) +{ + if (HOST_NULL == host) + return KERN_INVALID_HOST; + + return KERN_NOT_SUPPORTED; +} + + +#if defined(MACH_VOUCHER_ATTR_KEY_USER_DATA) || defined(MACH_VOUCHER_ATTR_KEY_TEST) + +/* + * Build-in a simple User Data Resource Manager + */ +#define USER_DATA_MAX_DATA (16*1024) + +struct user_data_value_element { + mach_voucher_attr_value_reference_t e_made; + mach_voucher_attr_content_size_t e_size; + iv_index_t e_sum; + iv_index_t e_hash; + queue_chain_t e_hash_link; + uint8_t e_data[]; +}; + +typedef struct user_data_value_element *user_data_element_t; + +/* + * User Data Voucher Hash Table + */ +#define USER_DATA_HASH_BUCKETS 127 +#define USER_DATA_HASH_BUCKET(x) ((x) % USER_DATA_HASH_BUCKETS) + +static queue_head_t user_data_bucket[USER_DATA_HASH_BUCKETS]; +static lck_spin_t user_data_lock_data; + +#define user_data_lock_init() \ + lck_spin_init(&user_data_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define user_data_lock_destroy() \ + lck_spin_destroy(&user_data_lock_data, &ipc_lck_grp) +#define user_data_lock() \ + lck_spin_lock(&user_data_lock_data) +#define user_data_lock_try() \ + lck_spin_try_lock(&user_data_lock_data) +#define user_data_unlock() \ + lck_spin_unlock(&user_data_lock_data) + +static kern_return_t +user_data_release_value( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_key_t key, + mach_voucher_attr_value_handle_t value, + mach_voucher_attr_value_reference_t sync); + +static kern_return_t +user_data_get_value( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_key_t key, + mach_voucher_attr_recipe_command_t command, + mach_voucher_attr_value_handle_array_t prev_values, + mach_voucher_attr_value_handle_array_size_t prev_value_count, + mach_voucher_attr_content_t content, + mach_voucher_attr_content_size_t content_size, + mach_voucher_attr_value_handle_t *out_value, + ipc_voucher_t *out_value_voucher); + +static kern_return_t +user_data_extract_content( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_key_t key, + mach_voucher_attr_value_handle_array_t values, + mach_voucher_attr_value_handle_array_size_t value_count, + mach_voucher_attr_recipe_command_t *out_command, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *in_out_content_size); + +static kern_return_t +user_data_command( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_key_t key, + mach_voucher_attr_value_handle_array_t values, + mach_msg_type_number_t value_count, + mach_voucher_attr_command_t command, + mach_voucher_attr_content_t in_content, + mach_voucher_attr_content_size_t in_content_size, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *out_content_size); + +static void +user_data_release( + ipc_voucher_attr_manager_t manager); + +struct ipc_voucher_attr_manager user_data_manager = { + .ivam_release_value = user_data_release_value, + .ivam_get_value = user_data_get_value, + .ivam_extract_content = user_data_extract_content, + .ivam_command = user_data_command, + .ivam_release = user_data_release, +}; + +ipc_voucher_attr_control_t user_data_control; +ipc_voucher_attr_control_t test_control; + +#if defined(MACH_VOUCHER_ATTR_KEY_USER_DATA) && defined(MACH_VOUCHER_ATTR_KEY_TEST) +#define USER_DATA_ASSERT_KEY(key) \ + assert(MACH_VOUCHER_ATTR_KEY_USER_DATA == (key) || \ + MACH_VOUCHER_ATTR_KEY_TEST == (key)); +#elif defined(MACH_VOUCHER_ATTR_KEY_USER_DATA) +#define USER_DATA_ASSERT_KEY(key) assert(MACH_VOUCHER_ATTR_KEY_USER_DATA == (key)) +#else +#define USER_DATA_ASSERT_KEY(key) assert(MACH_VOUCHER_ATTR_KEY_TEST == (key)) +#endif + +/* + * Routine: user_data_release_value + * Purpose: + * Release a made reference on a specific value managed by + * this voucher attribute manager. + * Conditions: + * Must remove the element associated with this value from + * the hash if this is the last know made reference. + */ +static kern_return_t +user_data_release_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_t value, + mach_voucher_attr_value_reference_t sync) +{ + user_data_element_t elem; + iv_index_t hash; + + assert (&user_data_manager == manager); + USER_DATA_ASSERT_KEY(key); + + elem = (user_data_element_t)value; + hash = elem->e_hash; + + user_data_lock(); + if (sync == elem->e_made) { + queue_remove(&user_data_bucket[hash], elem, user_data_element_t, e_hash_link); + user_data_unlock(); + kfree(elem, sizeof(*elem) + elem->e_size); + return KERN_SUCCESS; + } + assert(sync < elem->e_made); + user_data_unlock(); + + return KERN_FAILURE; +} + +/* + * Routine: user_data_checksum + * Purpose: + * Provide a rudimentary checksum for the data presented + * to these voucher attribute managers. + */ +static iv_index_t +user_data_checksum( + mach_voucher_attr_content_t content, + mach_voucher_attr_content_size_t content_size) +{ + mach_voucher_attr_content_size_t i; + iv_index_t cksum = 0; + + for(i = 0; i < content_size; i++, content++) { + cksum = (cksum << 8) ^ (cksum + *(unsigned char *)content); + } + + return (~cksum); +} + +/* + * Routine: user_data_dedup + * Purpose: + * See if the content represented by this request already exists + * in another user data element. If so return a made reference + * to the existing element. Otherwise, create a new element and + * return that (after inserting it in the hash). + * Conditions: + * Nothing locked. + * Returns: + * A made reference on the user_data_element_t + */ +static user_data_element_t +user_data_dedup( + mach_voucher_attr_content_t content, + mach_voucher_attr_content_size_t content_size) +{ + iv_index_t sum; + iv_index_t hash; + user_data_element_t elem; + user_data_element_t alloc = NULL; + + sum = user_data_checksum(content, content_size); + hash = USER_DATA_HASH_BUCKET(sum); + + retry: + user_data_lock(); + queue_iterate(&user_data_bucket[hash], elem, user_data_element_t, e_hash_link) { + assert(elem->e_hash == hash); + + /* if sums match... */ + if (elem->e_sum == sum && elem->e_size == content_size) { + iv_index_t i; + + /* and all data matches */ + for (i = 0; i < content_size; i++) + if (elem->e_data[i] != content[i]) + break; + if (i < content_size) + continue; + + /* ... we found a match... */ + + elem->e_made++; + user_data_unlock(); + + if (NULL != alloc) + kfree(alloc, sizeof(*alloc) + content_size); + + return elem; + } + } + + if (NULL == alloc) { + user_data_unlock(); + + alloc = (user_data_element_t)kalloc(sizeof(*alloc) + content_size); + alloc->e_made = 1; + alloc->e_size = content_size; + alloc->e_sum = sum; + alloc->e_hash = hash; + memcpy(alloc->e_data, content, content_size); + goto retry; + } + + queue_enter(&user_data_bucket[hash], alloc, user_data_element_t, e_hash_link); + user_data_unlock(); + + return alloc; +} + +static kern_return_t +user_data_get_value( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_recipe_command_t command, + mach_voucher_attr_value_handle_array_t prev_values, + mach_voucher_attr_value_handle_array_size_t prev_value_count, + mach_voucher_attr_content_t content, + mach_voucher_attr_content_size_t content_size, + mach_voucher_attr_value_handle_t *out_value, + ipc_voucher_t *out_value_voucher) +{ + user_data_element_t elem; + + assert (&user_data_manager == manager); + USER_DATA_ASSERT_KEY(key); + + /* never an out voucher */ + *out_value_voucher = IPC_VOUCHER_NULL; + + switch (command) { + + case MACH_VOUCHER_ATTR_REDEEM: + + /* redeem of previous values is the value */ + if (0 < prev_value_count) { + elem = (user_data_element_t)prev_values[0]; + assert(0 < elem->e_made); + elem->e_made++; + *out_value = prev_values[0]; + return KERN_SUCCESS; + } + + /* redeem of default is default */ + *out_value = 0; + return KERN_SUCCESS; + + case MACH_VOUCHER_ATTR_USER_DATA_STORE: + if (USER_DATA_MAX_DATA < content_size) + return KERN_RESOURCE_SHORTAGE; + + /* empty is the default */ + if (0 == content_size) { + *out_value = 0; + return KERN_SUCCESS; + } + + elem = user_data_dedup(content, content_size); + *out_value = (mach_voucher_attr_value_handle_t)elem; + return KERN_SUCCESS; + + default: + /* every other command is unknown */ + return KERN_INVALID_ARGUMENT; + } +} + +static kern_return_t +user_data_extract_content( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t values, + mach_voucher_attr_value_handle_array_size_t value_count, + mach_voucher_attr_recipe_command_t *out_command, + mach_voucher_attr_content_t out_content, + mach_voucher_attr_content_size_t *in_out_content_size) +{ + mach_voucher_attr_content_size_t size = 0; + user_data_element_t elem; + unsigned int i; + + assert (&user_data_manager == manager); + USER_DATA_ASSERT_KEY(key); + + /* concatenate the stored data items */ + for (i = 0; i < value_count ; i++) { + elem = (user_data_element_t)values[i]; + assert(USER_DATA_MAX_DATA >= elem->e_size); + + if (size + elem->e_size > *in_out_content_size) + return KERN_NO_SPACE; + + memcpy(&out_content[size], elem->e_data, elem->e_size); + size += elem->e_size; + } + *out_command = MACH_VOUCHER_ATTR_BITS_STORE; + *in_out_content_size = size; + return KERN_SUCCESS; +} + +static kern_return_t +user_data_command( + ipc_voucher_attr_manager_t __assert_only manager, + mach_voucher_attr_key_t __assert_only key, + mach_voucher_attr_value_handle_array_t __unused values, + mach_msg_type_number_t __unused value_count, + mach_voucher_attr_command_t __unused command, + mach_voucher_attr_content_t __unused in_content, + mach_voucher_attr_content_size_t __unused in_content_size, + mach_voucher_attr_content_t __unused out_content, + mach_voucher_attr_content_size_t __unused *out_content_size) +{ + assert (&user_data_manager == manager); + USER_DATA_ASSERT_KEY(key); + return KERN_FAILURE; +} + +static void +user_data_release( + ipc_voucher_attr_manager_t manager) +{ + if (manager != &user_data_manager) + return; + + panic("Voucher user-data manager released"); +} + +static int user_data_manager_inited = 0; + +void +user_data_attr_manager_init() +{ + kern_return_t kr; + +#if defined(MACH_VOUCHER_ATTR_KEY_USER_DATA) + if ((user_data_manager_inited & 0x1) != 0x1) { + kr = ipc_register_well_known_mach_voucher_attr_manager(&user_data_manager, + (mach_voucher_attr_value_handle_t)0, + MACH_VOUCHER_ATTR_KEY_USER_DATA, + &user_data_control); + if (KERN_SUCCESS != kr) + printf("Voucher user-data manager register(USER-DATA) returned %d", kr); + else + user_data_manager_inited |= 0x1; + } +#endif +#if defined(MACH_VOUCHER_ATTR_KEY_TEST) + if ((user_data_manager_inited & 0x2) != 0x2) { + kr = ipc_register_well_known_mach_voucher_attr_manager(&user_data_manager, + (mach_voucher_attr_value_handle_t)0, + MACH_VOUCHER_ATTR_KEY_TEST, + &test_control); + if (KERN_SUCCESS != kr) + printf("Voucher user-data manager register(TEST) returned %d", kr); + else + user_data_manager_inited |= 0x2; + } +#endif +#if defined(MACH_VOUCHER_ATTR_KEY_USER_DATA) || defined(MACH_VOUCHER_ATTR_KEY_TEST) + int i; + + for (i=0; i < USER_DATA_HASH_BUCKETS; i++) + queue_init(&user_data_bucket[i]); + + user_data_lock_init(); +#endif +} + +#endif /* MACH_DEBUG */ diff --git a/osfmk/ipc/ipc_voucher.h b/osfmk/ipc/ipc_voucher.h new file mode 100644 index 000000000..a83695fac --- /dev/null +++ b/osfmk/ipc/ipc_voucher.h @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _IPC_IPC_VOUCHER_H_ +#define _IPC_IPC_VOUCHER_H_ + +#include +#include +#include +#include + +#ifdef MACH_KERNEL_PRIVATE + +#include +#include +#include + +/* locking */ +extern lck_grp_t ipc_lck_grp; +extern lck_attr_t ipc_lck_attr; + +extern void ipc_voucher_init(void); + +/* some shorthand for longer types */ +typedef mach_voucher_attr_value_handle_t iv_value_handle_t; +typedef mach_voucher_attr_value_reference_t iv_value_refs_t; + +typedef natural_t iv_refs_t; + +typedef natural_t iv_index_t; +#define IV_UNUSED_VALINDEX ((iv_index_t) 0) +#define IV_UNUSED_KEYINDEX ((iv_index_t) ~0) + +typedef iv_index_t *iv_entry_t; +#define IVE_NULL ((iv_entry_t) 0) + +#define IV_ENTRIES_INLINE MACH_VOUCHER_ATTR_KEY_NUM_WELL_KNOWN + +/* + * IPC Voucher + * + * Vouchers are a reference counted immutable (once-created) set of + * indexes to particular resource manager attribute values + * (which themselves are reference counted). + */ +struct ipc_voucher { + iv_index_t iv_hash; /* checksum hash */ + iv_index_t iv_sum; /* checksum of values */ + iv_refs_t iv_refs; /* reference count */ + iv_index_t iv_table_size; /* size of the voucher table */ + iv_index_t iv_inline_table[IV_ENTRIES_INLINE]; + iv_entry_t iv_table; /* table of voucher attr entries */ + ipc_port_t iv_port; /* port representing the voucher */ + queue_chain_t iv_hash_link; /* link on hash chain */ +}; + +#define IV_NULL IPC_VOUCHER_NULL + + +/* + * Voucher Attribute Cache Control Object + * + * This is where the Voucher system stores its caches/references to + * returned resource manager attribute values. Each value only appears + * once in the table. If a value is returned more than once by the + * resource manager, the voucher system will increase the reference + * on the previous value. + * + * The voucher itself contains one entry per key, that indexes into + * this table. + * + * A voucher that does not have an explicit index for a given key + * is assumed to have a reference on slot zero - which is where the + * voucher system stores the default value for the given attribute + * (specified at the time of resource manager registration). + * + * The ivace_releasing field limits the entry to a single concurrent + * return. Without it, a previous release's reply might still be + * working its way back to the voucher code, and a subsequent get- + * value could return the same value as was previously returned. If + * the resource manager already knew that, it would return a failure + * on the return, and all is well. We just treat the additional made + * references on the value as we normally would. However, if the resource + * manager accepted the return, and the get-value response raced the + * release's reply, the newly made references will look like an extension + * of the old value's cache lifetime, rather than a new one. Dropping + * that new lifetime's references to zero would result in a second + * release callback to the resource manager - this time with the wrong + * "made" reference count. We avoid the race with this flag. + */ + +struct ivac_entry_s { + iv_value_handle_t ivace_value; + iv_value_refs_t ivace_layered:1, /* layered effective entry */ + ivace_releasing:1, /* release in progress */ + ivace_free:1, /* on freelist */ + ivace_refs:29; /* reference count */ + union { + iv_value_refs_t ivaceu_made; /* made count (non-layered) */ + iv_index_t ivaceu_layer; /* next effective layer (layered) */ + } ivace_u; + iv_index_t ivace_next; /* hash or freelist */ + iv_index_t ivace_index; /* hash head (independent) */ +}; +typedef struct ivac_entry_s ivac_entry; +typedef ivac_entry *ivac_entry_t; + +#define ivace_made ivace_u.ivaceu_made +#define ivace_layer ivace_u.ivaceu_layer + +#define IVACE_NULL ((ivac_entry_t) 0); + +#define IVACE_REFS_MAX ((1 << 29) - 1) + +#define IVAC_ENTRIES_MIN 512 +#define IVAC_ENTRIES_MAX 524288 + +struct ipc_voucher_attr_control { + iv_refs_t ivac_refs; + boolean_t ivac_is_growing; /* is the table being grown */ + ivac_entry_t ivac_table; /* table of voucher attr value entries */ + iv_index_t ivac_table_size; /* size of the attr value table */ + iv_index_t ivac_init_table_size; /* size of the attr value table */ + iv_index_t ivac_freelist; /* index of the first free element */ + ipc_port_t ivac_port; /* port for accessing the cache control */ + lck_spin_t ivac_lock_data; + iv_index_t ivac_key_index; /* key index for this value */ +}; +typedef ipc_voucher_attr_control_t iv_attr_control_t; + +#define IVAC_NULL IPC_VOUCHER_ATTR_CONTROL_NULL + +extern ipc_voucher_attr_control_t ivac_alloc(iv_index_t); + +#define ivac_lock_init(ivac) \ + lck_spin_init(&(ivac)->ivac_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define ivac_lock_destroy(ivac) \ + lck_spin_destroy(&(ivac)->ivac_lock_data, &ipc_lck_grp) +#define ivac_lock(ivac) \ + lck_spin_lock(&(ivac)->ivac_lock_data) +#define ivac_lock_try(ivac) \ + lck_spin_try_lock(&(ivac)->ivac_lock_data) +#define ivac_unlock(ivac) \ + lck_spin_unlock(&(ivac)->ivac_lock_data) +#define ivac_sleep(ivac) lck_spin_sleep(&(ivac)->ivac_lock_data, \ + LCK_SLEEP_DEFAULT, \ + (event_t)(ivac), \ + THREAD_UNINT) +#define ivac_wakeup(ivac) thread_wakeup((event_t)(ivac)) + +extern void ivac_dealloc(ipc_voucher_attr_control_t ivac); + +static inline void +ivac_reference(ipc_voucher_attr_control_t ivac) +{ + (void)hw_atomic_add(&ivac->ivac_refs, 1); +} + +static inline void +ivac_release(ipc_voucher_attr_control_t ivac) +{ + iv_refs_t refs; + + if (IVAC_NULL == ivac) + return; + + refs = hw_atomic_sub(&ivac->ivac_refs, 1); + if (refs == 0) + ivac_dealloc(ivac); +} + +#define IVAM_NULL IPC_VOUCHER_ATTR_MANAGER_NULL + +/* + * IPC voucher Resource Manager table element + * + * Information Associated with a specific registration of + * a voucher resource manager. + * + * NOTE: For now, this table is indexed directly by the key. In the future, + * it will have to be growable and sparse by key. When that is implemented + * the index will be independent from the key (but there will be a hash to + * find the index by key). + */ +typedef struct ipc_voucher_global_table_element { + ipc_voucher_attr_manager_t ivgte_manager; + ipc_voucher_attr_control_t ivgte_control; + mach_voucher_attr_key_t ivgte_key; +} ipc_voucher_global_table_element; + +typedef ipc_voucher_global_table_element *ipc_voucher_global_table_element_t; + +#endif /* MACH_KERNEL_PRIVATE */ + +/* + * IPC voucher attribute recipe + * + * In-kernel recipe format with an ipc_voucher_t pointer for the previous + * voucher reference. + */ +#pragma pack(1) +typedef struct ipc_voucher_attr_recipe_data { + mach_voucher_attr_key_t key; + mach_voucher_attr_recipe_command_t command; + ipc_voucher_t previous_voucher; + mach_voucher_attr_content_size_t content_size; + uint8_t content[]; +} ipc_voucher_attr_recipe_data_t; +typedef ipc_voucher_attr_recipe_data_t *ipc_voucher_attr_recipe_t; +typedef mach_msg_type_number_t ipc_voucher_attr_recipe_size_t; + +typedef uint8_t *ipc_voucher_attr_raw_recipe_t; +typedef ipc_voucher_attr_raw_recipe_t ipc_voucher_attr_raw_recipe_array_t; +typedef mach_msg_type_number_t ipc_voucher_attr_raw_recipe_size_t; +typedef mach_msg_type_number_t ipc_voucher_attr_raw_recipe_array_size_t; + +#pragma pack() + +/* + * In-kernel Resource Manager Definition + * + * In-kernel resource managers are defined by a v-table like structure for + * the three callouts supported by a resource manager (and release function). + * + * There is a single in-kernel resource manager that represents all the + * outside kernel managers (and reflects the calls through MIG to user-space). + */ + +typedef kern_return_t (*ipc_voucher_attr_manager_release_value_t)(ipc_voucher_attr_manager_t, + mach_voucher_attr_key_t, + mach_voucher_attr_value_handle_t, + mach_voucher_attr_value_reference_t); + +typedef kern_return_t (*ipc_voucher_attr_manager_get_value_t)(ipc_voucher_attr_manager_t, + mach_voucher_attr_key_t, + mach_voucher_attr_recipe_command_t, + mach_voucher_attr_value_handle_array_t, + mach_voucher_attr_value_handle_array_size_t, + mach_voucher_attr_content_t, + mach_voucher_attr_content_size_t, + mach_voucher_attr_value_handle_t *, + ipc_voucher_t *); + +typedef kern_return_t (*ipc_voucher_attr_manager_extract_content_t)(ipc_voucher_attr_manager_t, + mach_voucher_attr_key_t, + mach_voucher_attr_value_handle_array_t, + mach_voucher_attr_value_handle_array_size_t, + mach_voucher_attr_recipe_command_t *, + mach_voucher_attr_content_t, + mach_voucher_attr_content_size_t *); + +typedef kern_return_t (*ipc_voucher_attr_manager_command_t)(ipc_voucher_attr_manager_t, + mach_voucher_attr_key_t, + mach_voucher_attr_value_handle_array_t, + mach_voucher_attr_value_handle_array_size_t, + mach_voucher_attr_command_t, + mach_voucher_attr_content_t, + mach_voucher_attr_content_size_t, + mach_voucher_attr_content_t, + mach_voucher_attr_content_size_t *); + +typedef void (*ipc_voucher_attr_manager_release_t)(ipc_voucher_attr_manager_t); + +struct ipc_voucher_attr_manager { + ipc_voucher_attr_manager_release_value_t ivam_release_value; + ipc_voucher_attr_manager_get_value_t ivam_get_value; + ipc_voucher_attr_manager_extract_content_t ivam_extract_content; + ipc_voucher_attr_manager_command_t ivam_command; + ipc_voucher_attr_manager_release_t ivam_release; +}; + +__BEGIN_DECLS + +/* DEBUG/TRACE Convert from a port to a voucher */ +extern uintptr_t unsafe_convert_port_to_voucher( + ipc_port_t port); + +/* Convert from a port to a voucher */ +extern ipc_voucher_t convert_port_to_voucher( + ipc_port_t port); + +/* Convert from a port name to an ipc_voucher */ +extern ipc_voucher_t convert_port_name_to_voucher( + mach_port_name_t name); + +/* add a reference to the specified voucher */ +extern void ipc_voucher_reference( + ipc_voucher_t voucher); + +/* drop the voucher reference picked up above */ +extern void ipc_voucher_release( + ipc_voucher_t voucher); + +/* deliver voucher notifications */ +extern void ipc_voucher_notify( + mach_msg_header_t *msg); + +/* Convert from a voucher to a port */ +extern ipc_port_t convert_voucher_to_port( + ipc_voucher_t voucher); + +/* convert from a voucher attribute control to a port */ +extern ipc_port_t convert_voucher_attr_control_to_port( + ipc_voucher_attr_control_t control); + +/* add a reference to the specified voucher */ +extern void ipc_voucher_attr_control_reference( + ipc_voucher_attr_control_t control); + +/* drop the reference picked up above */ +extern void ipc_voucher_attr_control_release( + ipc_voucher_attr_control_t control); + +/* deliver voucher control notifications */ +extern void ipc_voucher_attr_control_notify( + mach_msg_header_t *msg); + +/* convert from a port to a voucher attribute control */ +extern ipc_voucher_attr_control_t convert_port_to_voucher_attr_control( + ipc_port_t port); + +/* + * In-kernel equivalents to the user syscalls + */ +extern kern_return_t +ipc_create_mach_voucher( + ipc_voucher_attr_raw_recipe_array_t recipes, + ipc_voucher_attr_raw_recipe_array_size_t recipe_size, + ipc_voucher_t *new_voucher); + +extern kern_return_t +ipc_voucher_attr_control_create_mach_voucher( + ipc_voucher_attr_control_t control, + ipc_voucher_attr_raw_recipe_array_t recipes, + ipc_voucher_attr_raw_recipe_array_size_t recipe_size, + ipc_voucher_t *new_voucher); + +extern kern_return_t +ipc_register_well_known_mach_voucher_attr_manager( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_value_handle_t default_value, + mach_voucher_attr_key_t key, + ipc_voucher_attr_control_t *control); + + +extern kern_return_t +ipc_register_mach_voucher_attr_manager( + ipc_voucher_attr_manager_t manager, + mach_voucher_attr_value_handle_t default_value, + mach_voucher_attr_key_t *key, + ipc_voucher_attr_control_t *control); + +__END_DECLS + +#endif /* _IPC_IPC_VOUCHER_H_ */ diff --git a/osfmk/ipc/mach_debug.c b/osfmk/ipc/mach_debug.c index c97e27bcf..67c303afe 100644 --- a/osfmk/ipc/mach_debug.c +++ b/osfmk/ipc/mach_debug.c @@ -290,6 +290,56 @@ mach_port_space_info( } #endif /* MACH_IPC_DEBUG */ +/* + * Routine: mach_port_space_basic_info + * Purpose: + * Returns basic information about an IPC space. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Returned information. + * KERN_FAILURE The call is not supported. + * KERN_INVALID_TASK The space is dead. + */ + +#if !MACH_IPC_DEBUG +kern_return_t +mach_port_space_basic_info( + __unused ipc_space_t space, + __unused ipc_info_space_basic_t *infop) +{ + return KERN_FAILURE; +} +#else +kern_return_t +mach_port_space_basic_info( + ipc_space_t space, + ipc_info_space_basic_t *infop) +{ + if (space == IS_NULL) + return KERN_INVALID_TASK; + + + is_read_lock(space); + if (!is_active(space)) { + is_read_unlock(space); + return KERN_INVALID_TASK; + } + + /* get the basic space info */ + infop->iisb_genno_mask = MACH_PORT_NGEN(MACH_PORT_DEAD); + infop->iisb_table_size = space->is_table_size; + infop->iisb_table_next = space->is_table_next->its_size; + infop->iisb_table_inuse = space->is_table_size - space->is_table_free - 1; + infop->iisb_reserved[0] = 0; + infop->iisb_reserved[1] = 0; + + is_read_unlock(space); + + return KERN_SUCCESS; +} +#endif /* MACH_IPC_DEBUG */ + /* * Routine: mach_port_dnrequest_info * Purpose: @@ -431,7 +481,7 @@ mach_port_kobject( ip_unlock(port); if (0 != kaddr && is_ipc_kobject(*typep)) - *addrp = VM_KERNEL_ADDRPERM(VM_KERNEL_UNSLIDE(kaddr)); + *addrp = VM_KERNEL_UNSLIDE_OR_PERM(kaddr); else *addrp = 0; diff --git a/osfmk/ipc/mach_msg.c b/osfmk/ipc/mach_msg.c index af1ce5928..a12c70919 100644 --- a/osfmk/ipc/mach_msg.c +++ b/osfmk/ipc/mach_msg.c @@ -85,7 +85,6 @@ #include #include #include -#include #include #include #include @@ -104,6 +103,7 @@ #include #include #include +#include #include #include @@ -312,54 +312,25 @@ mach_msg_receive_results(void) if (copyout((char *) &self->ith_msize, msg_addr + offsetof(mach_msg_user_header_t, msgh_size), sizeof(mach_msg_size_t))) - mr = MACH_RCV_INVALID_DATA; - goto out; + mr = MACH_RCV_INVALID_DATA; + } else { + + /* discard importance in message */ + ipc_importance_clean(kmsg); + + if (msg_receive_error(kmsg, msg_addr, option, seqno, space) + == MACH_RCV_INVALID_DATA) + mr = MACH_RCV_INVALID_DATA; } - - if (msg_receive_error(kmsg, msg_addr, option, seqno, space) - == MACH_RCV_INVALID_DATA) - mr = MACH_RCV_INVALID_DATA; } - goto out; + return mr; } #if IMPORTANCE_INHERITANCE - if ((kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_RAISEIMP) != 0) { - __unused int impresult; - int sender_pid = -1; -#if IMPORTANCE_DEBUG - sender_pid = ((mach_msg_max_trailer_t *) - ((vm_offset_t)kmsg->ikm_header + round_msg(kmsg->ikm_header->msgh_size)))->msgh_audit.val[5]; -#endif /* IMPORTANCE_DEBUG */ - ipc_port_t port = kmsg->ikm_header->msgh_remote_port; - task_t task_self = current_task(); - - ip_lock(port); - assert(port->ip_impcount > 0); - port->ip_impcount--; - ip_unlock(port); - - if (task_self->imp_receiver == 0) { - /* - * The task was never ready to receive importance boost, remove msghbit. - * This can happen when a receive right (which has donor messages) is copied - * out to a non-imp_receiver task (we don't clear the bits on the messages, - * but we did't transfer any boost counts either). - */ - kmsg->ikm_header->msgh_bits &= ~MACH_MSGH_BITS_RAISEIMP; - impresult = 0; - } else { - /* user will accept responsibility for the importance boost */ - task_importance_externalize_assertion(task_self, 1, sender_pid); - impresult = 1; - } -#if IMPORTANCE_DEBUG - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_DELV)) | DBG_FUNC_NONE, - sender_pid, audit_token_pid_from_task(task_self), - kmsg->ikm_header->msgh_id, impresult, 0); -#endif /* IMPORTANCE_DEBUG */ - } + /* adopt/transform any importance attributes carried in the message */ + ipc_importance_receive(kmsg, option); + #endif /* IMPORTANCE_INHERITANCE */ trailer_size = ipc_kmsg_add_trailer(kmsg, space, option, self, seqno, FALSE, @@ -375,13 +346,16 @@ mach_msg_receive_results(void) mach_msg_body_t *slist; slist = ipc_kmsg_get_scatter(msg_addr, slist_size, kmsg); - mr = ipc_kmsg_copyout(kmsg, space, map, slist); + mr = ipc_kmsg_copyout(kmsg, space, map, slist, option); ipc_kmsg_free_scatter(slist, slist_size); } else { - mr = ipc_kmsg_copyout(kmsg, space, map, MACH_MSG_BODY_NULL); + mr = ipc_kmsg_copyout(kmsg, space, map, MACH_MSG_BODY_NULL, option); } if (mr != MACH_MSG_SUCCESS) { + /* already received importance, so have to undo that here */ + ipc_importance_unreceive(kmsg, option); + if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) { if (ipc_kmsg_put(msg_addr, kmsg, kmsg->ikm_header->msgh_size + trailer_size) == MACH_RCV_INVALID_DATA) @@ -392,13 +366,13 @@ mach_msg_receive_results(void) == MACH_RCV_INVALID_DATA) mr = MACH_RCV_INVALID_DATA; } - goto out; + } else { + mr = ipc_kmsg_put(msg_addr, + kmsg, + kmsg->ikm_header->msgh_size + + trailer_size); } - mr = ipc_kmsg_put(msg_addr, - kmsg, - kmsg->ikm_header->msgh_size + - trailer_size); - out: + return mr; } diff --git a/osfmk/ipc/mach_port.c b/osfmk/ipc/mach_port.c index 39f1a6696..d8e489eb5 100644 --- a/osfmk/ipc/mach_port.c +++ b/osfmk/ipc/mach_port.c @@ -95,11 +95,12 @@ #include #include #include -#include #include #include -#include +#if IMPORTANCE_INHERITANCE +#include +#endif /* * Forward declarations @@ -1717,11 +1718,21 @@ void mach_port_get_status_helper( statusp->mps_pdrequest = port->ip_pdrequest != IP_NULL; statusp->mps_nsrequest = port->ip_nsrequest != IP_NULL; statusp->mps_flags = 0; - statusp->mps_flags |= ((port->ip_impdonation) ? MACH_PORT_STATUS_FLAG_IMP_DONATION:0); - statusp->mps_flags |= ((port->ip_tempowner) ? MACH_PORT_STATUS_FLAG_TEMPOWNER:0); - statusp->mps_flags |= ((port->ip_taskptr) ? MACH_PORT_STATUS_FLAG_TASKPTR:0); - statusp->mps_flags |= ((port->ip_guarded) ? MACH_PORT_STATUS_FLAG_GUARDED:0); - statusp->mps_flags |= ((port->ip_strict_guard) ? MACH_PORT_STATUS_FLAG_STRICT_GUARD:0); + if (port->ip_impdonation) { + statusp->mps_flags |= MACH_PORT_STATUS_FLAG_IMP_DONATION; + if (port->ip_tempowner) { + statusp->mps_flags |= MACH_PORT_STATUS_FLAG_TEMPOWNER; + if (IIT_NULL != port->ip_imp_task) { + statusp->mps_flags |= MACH_PORT_STATUS_FLAG_TASKPTR; + } + } + } + if (port->ip_guarded) { + statusp->mps_flags |= MACH_PORT_STATUS_FLAG_GUARDED; + if (port->ip_strict_guard) { + statusp->mps_flags |= MACH_PORT_STATUS_FLAG_STRICT_GUARD; + } + } return; } @@ -1894,20 +1905,27 @@ mach_port_set_attributes( if (!MACH_PORT_VALID(name)) return KERN_INVALID_RIGHT; - task_t release_imp_task = TASK_NULL; + ipc_importance_task_t release_imp_task = IIT_NULL; natural_t assertcnt = 0; kr = ipc_port_translate_receive(space, name, &port); if (kr != KERN_SUCCESS) return kr; - /* port is locked and active */ + /* + * don't allow temp-owner importance donation if user + * associated it with a kobject already (timer, host_notify target). + */ + if (is_ipc_kobject(ip_kotype(port))) { + ip_unlock(port); + return KERN_INVALID_ARGUMENT; + } + if (port->ip_tempowner != 0) { - if (port->ip_taskptr != 0) { + if (IIT_NULL != port->ip_imp_task) { release_imp_task = port->ip_imp_task; - port->ip_taskptr = 0; - port->ip_imp_task = TASK_NULL; + port->ip_imp_task = IIT_NULL; assertcnt = port->ip_impcount; } } else { @@ -1920,23 +1938,27 @@ mach_port_set_attributes( #if IMPORTANCE_INHERITANCE /* drop assertions from previous destination task */ - if (release_imp_task != TASK_NULL) { - assert(release_imp_task->imp_receiver != 0); + if (release_imp_task != IIT_NULL) { + assert(ipc_importance_task_is_any_receiver_type(release_imp_task)); if (assertcnt > 0) - task_importance_drop_internal_assertion(release_imp_task, assertcnt); - task_deallocate(release_imp_task); + ipc_importance_task_drop_internal_assertion(release_imp_task, assertcnt); + ipc_importance_task_release(release_imp_task); } else if (assertcnt > 0) { - release_imp_task = current_task(); - if (release_imp_task->imp_receiver != 0) - task_importance_drop_internal_assertion(release_imp_task, assertcnt); + release_imp_task = current_task()->task_imp_base; + if (release_imp_task != IIT_NULL && + ipc_importance_task_is_any_receiver_type(release_imp_task)) { + ipc_importance_task_drop_internal_assertion(release_imp_task, assertcnt); + } } #else - if (release_imp_task != TASK_NULL) - task_deallocate(release_imp_task); + if (release_imp_task != IIT_NULL) + ipc_importance_task_release(release_imp_task); #endif /* IMPORTANCE_INHERITANCE */ break; + #if IMPORTANCE_INHERITANCE + case MACH_PORT_DENAP_RECEIVER: case MACH_PORT_IMPORTANCE_RECEIVER: if (!MACH_PORT_VALID(name)) return KERN_INVALID_RIGHT; @@ -1944,8 +1966,17 @@ mach_port_set_attributes( kr = ipc_port_translate_receive(space, name, &port); if (kr != KERN_SUCCESS) return kr; - /* port is locked and active */ + /* + * don't allow importance donation if user associated + * it with a kobject already (timer, host_notify target). + */ + if (is_ipc_kobject(ip_kotype(port))) { + ip_unlock(port); + return KERN_INVALID_ARGUMENT; + } + + /* port is locked and active */ port->ip_impdonation = 1; ip_unlock(port); @@ -2313,6 +2344,12 @@ mach_port_construct( goto cleanup; } + if (options->flags & MPO_DENAP_RECEIVER) { + kr = mach_port_set_attributes(space, *name, MACH_PORT_DENAP_RECEIVER, NULL, 0); + if (kr != KERN_SUCCESS) + goto cleanup; + } + if (options->flags & MPO_INSERT_SEND_RIGHT) { kr = ipc_object_copyin(space, *name, MACH_MSG_TYPE_MAKE_SEND, (ipc_object_t *)&port); if (kr != KERN_SUCCESS) @@ -2459,180 +2496,3 @@ mach_port_unguard( return kr; } -/* - * Get a (new) label handle representing the given port's port label. - */ -#if CONFIG_MACF_MACH -kern_return_t -mach_get_label( - ipc_space_t space, - mach_port_name_t name, - mach_port_name_t *outlabel) -{ - ipc_entry_t entry; - ipc_port_t port; - struct label outl; - kern_return_t kr; - int dead; - - if (!MACH_PORT_VALID(name)) - return KERN_INVALID_NAME; - - /* Lookup the port name in the task's space. */ - kr = ipc_right_lookup_write(space, name, &entry); - if (kr != KERN_SUCCESS) - return kr; - - port = (ipc_port_t) entry->ie_object; - dead = ipc_right_check(space, port, name, entry); - if (dead) { - is_write_unlock(space); - ip_release(port); - return KERN_INVALID_RIGHT; - } - /* port is now locked */ - - is_write_unlock(space); - /* Make sure we are not dealing with a label handle. */ - if (ip_kotype(port) == IKOT_LABELH) { - /* already is a label handle! */ - ip_unlock(port); - return KERN_INVALID_ARGUMENT; - } - - /* Copy the port label and stash it in a new label handle. */ - mac_port_label_init(&outl); - mac_port_label_copy(&port->ip_label, &outl); - kr = labelh_new_user(space, &outl, outlabel); - ip_unlock(port); - - return KERN_SUCCESS; -} -#else -kern_return_t -mach_get_label( - __unused ipc_space_t space, - __unused mach_port_name_t name, - __unused mach_port_name_t *outlabel) -{ - return KERN_INVALID_ARGUMENT; -} -#endif - -/* - * also works on label handles - */ -#if CONFIG_MACF_MACH -kern_return_t -mach_get_label_text( - ipc_space_t space, - mach_port_name_t name, - labelstr_t policies, - labelstr_t outlabel) -{ - ipc_entry_t entry; - ipc_port_t port; - kern_return_t kr; - struct label *l; - int dead; - - if (space == IS_NULL || space->is_task == NULL) - return KERN_INVALID_TASK; - - if (!MACH_PORT_VALID(name)) - return KERN_INVALID_NAME; - - kr = ipc_right_lookup_write(space, name, &entry); - if (kr != KERN_SUCCESS) - return kr; - - port = (ipc_port_t)entry->ie_object; - dead = ipc_right_check(space, port, name, entry); - if (dead) { - is_write_unlock(space); - ip_release(port); - return KERN_INVALID_RIGHT; - } - /* object (port) is now locked */ - - is_write_unlock (space); - l = io_getlabel(entry->ie_object); - - mac_port_label_externalize(l, policies, outlabel, 512, 0); - - io_unlocklabel(entry->ie_object); - io_unlock(entry->ie_object); - return KERN_SUCCESS; -} -#else -kern_return_t -mach_get_label_text( - __unused ipc_space_t space, - __unused mach_port_name_t name, - __unused labelstr_t policies, - __unused labelstr_t outlabel) -{ - return KERN_INVALID_ARGUMENT; -} -#endif - - -#if CONFIG_MACF_MACH -kern_return_t -mach_set_port_label( - ipc_space_t space, - mach_port_name_t name, - labelstr_t labelstr) -{ - ipc_entry_t entry; - kern_return_t kr; - struct label inl; - ipc_port_t port; - int rc; - - if (space == IS_NULL || space->is_task == NULL) - return KERN_INVALID_TASK; - - if (!MACH_PORT_VALID(name)) - return KERN_INVALID_NAME; - - mac_port_label_init(&inl); - rc = mac_port_label_internalize(&inl, labelstr); - if (rc) - return KERN_INVALID_ARGUMENT; - - kr = ipc_right_lookup_write(space, name, &entry); - if (kr != KERN_SUCCESS) - return kr; - - if (io_otype(entMACry->ie_object) != IOT_PORT) { - is_write_unlock(space); - return KERN_INVALID_RIGHT; - } - - port = (ipc_port_t) entry->ie_object; - ip_lock(port); - - tasklabel_lock(space->is_task); - rc = mac_port_check_label_update(&space->is_task->maclabel, - &port->ip_label, &inl); - tasklabel_unlock(space->is_task); - if (rc) - kr = KERN_NO_ACCESS; - else - mac_port_label_copy(&inl, &port->ip_label); - - ip_unlock(port); - is_write_unlock(space); - return kr; -} -#else -kern_return_t -mach_set_port_label( - ipc_space_t space __unused, - mach_port_name_t name __unused, - labelstr_t labelstr __unused) -{ - return KERN_INVALID_ARGUMENT; -} -#endif diff --git a/osfmk/kdp/kdp.c b/osfmk/kdp/kdp.c index 817b94829..a397a9843 100644 --- a/osfmk/kdp/kdp.c +++ b/osfmk/kdp/kdp.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -51,8 +50,6 @@ #include #include -extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */ - #define DO_ALIGN 1 /* align all packet data accesses */ #define KDP_TEST_HARNESS 0 @@ -124,34 +121,6 @@ int reattach_wait = 0; int noresume_on_disconnect = 0; extern unsigned int return_on_panic; -typedef struct thread_snapshot *thread_snapshot_t; -typedef struct task_snapshot *task_snapshot_t; - -extern int -machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); -extern int -machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); -extern int -proc_pid(void *p); -extern uint64_t -proc_uniqueid(void *p); -extern uint64_t -proc_was_throttled(void *p); -extern uint64_t -proc_did_throttle(void *p); - -extern void -proc_name_kdp(task_t task, char *buf, int size); - -extern void -kdp_snapshot_postflight(void); - -static int -pid_from_task(task_t task); - -static uint64_t -proc_uniqueid_from_task(task_t task); - kdp_error_t kdp_set_breakpoint_internal( mach_vm_address_t address @@ -162,13 +131,6 @@ kdp_remove_breakpoint_internal( mach_vm_address_t address ); - -int -kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced); - -boolean_t kdp_copyin(pmap_t, uint64_t, void *, size_t); -extern void bcopy_phys(addr64_t, addr64_t, vm_size_t); - boolean_t kdp_packet( unsigned char *pkt, @@ -1033,481 +995,6 @@ kdp_reboot( return (TRUE); // no, not really, we won't return } -#define MAX_FRAMES 1000 - -static int pid_from_task(task_t task) -{ - int pid = -1; - - if (task->bsd_info) - pid = proc_pid(task->bsd_info); - - return pid; -} - -static uint64_t -proc_uniqueid_from_task(task_t task) -{ - uint64_t uniqueid = ~(0ULL); - - if (task->bsd_info) - uniqueid = proc_uniqueid(task->bsd_info); - - return uniqueid; -} - -static uint64_t -proc_was_throttled_from_task(task_t task) -{ - uint64_t was_throttled = 0; - - if (task->bsd_info) - was_throttled = proc_was_throttled(task->bsd_info); - - return was_throttled; -} - -static uint64_t -proc_did_throttle_from_task(task_t task) -{ - uint64_t did_throttle = 0; - - if (task->bsd_info) - did_throttle = proc_did_throttle(task->bsd_info); - - return did_throttle; -} - -boolean_t -kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) { - size_t rem = size; - char *kvaddr = dest; - - while (rem) { - ppnum_t upn = pmap_find_phys(p, uaddr); - uint64_t phys_src = ptoa_64(upn) | (uaddr & PAGE_MASK); - uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr); - uint64_t src_rem = PAGE_SIZE - (phys_src & PAGE_MASK); - uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK); - size_t cur_size = (uint32_t) MIN(src_rem, dst_rem); - cur_size = MIN(cur_size, rem); - - if (upn && pmap_valid_page(upn) && phys_dest) { - bcopy_phys(phys_src, phys_dest, cur_size); - } - else - break; - uaddr += cur_size; - kvaddr += cur_size; - rem -= cur_size; - } - return (rem == 0); -} - - -static void -kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap) -{ - unsigned int pages_reclaimed; - unsigned int pages_wanted; - kern_return_t kErr; - - processor_t processor; - vm_statistics64_t stat; - vm_statistics64_data_t host_vm_stat; - - processor = processor_list; - stat = &PROCESSOR_DATA(processor, vm_stat); - host_vm_stat = *stat; - - if (processor_count > 1) { - simple_lock(&processor_list_lock); - - while ((processor = processor->processor_list) != NULL) { - stat = &PROCESSOR_DATA(processor, vm_stat); - host_vm_stat.compressions += stat->compressions; - host_vm_stat.decompressions += stat->decompressions; - } - - simple_unlock(&processor_list_lock); - } - - memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC; - memio_snap->free_pages = vm_page_free_count; - memio_snap->active_pages = vm_page_active_count; - memio_snap->inactive_pages = vm_page_inactive_count; - memio_snap->purgeable_pages = vm_page_purgeable_count; - memio_snap->wired_pages = vm_page_wire_count; - memio_snap->speculative_pages = vm_page_speculative_count; - memio_snap->throttled_pages = vm_page_throttled_count; - memio_snap->busy_buffer_count = count_busy_buffers(); - memio_snap->filebacked_pages = vm_page_external_count; - memio_snap->compressions = (uint32_t)host_vm_stat.compressions; - memio_snap->decompressions = (uint32_t)host_vm_stat.decompressions; - memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT; - kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted); - if ( ! kErr ) { - memio_snap->pages_wanted = (uint32_t)pages_wanted; - memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed; - memio_snap->pages_wanted_reclaimed_valid = 1; - } else { - memio_snap->pages_wanted = 0; - memio_snap->pages_reclaimed = 0; - memio_snap->pages_wanted_reclaimed_valid = 0; - } -} - - - -/* - * Method for grabbing timer values safely, in the sense that no infinite loop will occur - * Certain flavors of the timer_grab function, which would seem to be the thing to use, - * can loop infinitely if called while the timer is in the process of being updated. - * Unfortunately, it is (rarely) possible to get inconsistent top and bottom halves of - * the timer using this method. This seems insoluble, since stackshot runs in a context - * where the timer might be half-updated, and has no way of yielding control just long - * enough to finish the update. - */ - -static uint64_t safe_grab_timer_value(struct timer *t) -{ -#if defined(__LP64__) - return t->all_bits; -#else - uint64_t time = t->high_bits; /* endian independent grab */ - time = (time << 32) | t->low_bits; - return time; -#endif -} - -int -kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced) -{ - char *tracepos = (char *) tracebuf; - char *tracebound = tracepos + tracebuf_size; - uint32_t tracebytes = 0; - int error = 0; - - task_t task = TASK_NULL; - thread_t thread = THREAD_NULL; - thread_snapshot_t tsnap = NULL; - unsigned framesize = 2 * sizeof(vm_offset_t); - - queue_head_t *task_list = &tasks; - boolean_t is_active_list = TRUE; - - boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0); - boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0); - boolean_t save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0); - boolean_t save_userframes_p = ((trace_flags & STACKSHOT_SAVE_KERNEL_FRAMES_ONLY) == 0); - - if(trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) { - if(tracepos + sizeof(struct mem_and_io_snapshot) > tracebound) { - error = -1; - goto error_exit; - } - kdp_mem_and_io_snapshot((struct mem_and_io_snapshot *)tracepos); - tracepos += sizeof(struct mem_and_io_snapshot); - } - -walk_list: - queue_iterate(task_list, task, task_t, tasks) { - if ((task == NULL) || !ml_validate_nofault((vm_offset_t) task, sizeof(struct task))) - goto error_exit; - - int task_pid = pid_from_task(task); - uint64_t task_uniqueid = proc_uniqueid_from_task(task); - boolean_t task64 = task_has_64BitAddr(task); - - if (!task->active) { - /* - * Not interested in terminated tasks without threads, and - * at the moment, stackshot can't handle a task without a name. - */ - if (queue_empty(&task->threads) || task_pid == -1) { - continue; - } - } - - /* Trace everything, unless a process was specified */ - if ((pid == -1) || (pid == task_pid)) { - task_snapshot_t task_snap; - uint32_t uuid_info_count = 0; - mach_vm_address_t uuid_info_addr = 0; - boolean_t have_map = (task->map != NULL) && - (ml_validate_nofault((vm_offset_t)(task->map), sizeof(struct _vm_map))); - boolean_t have_pmap = have_map && (task->map->pmap != NULL) && - (ml_validate_nofault((vm_offset_t)(task->map->pmap), sizeof(struct pmap))); - uint64_t shared_cache_base_address = 0; - - if (have_pmap && task->active && save_loadinfo_p && task_pid > 0) { - // Read the dyld_all_image_infos struct from the task memory to get UUID array count and location - if (task64) { - struct user64_dyld_all_image_infos task_image_infos; - if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user64_dyld_all_image_infos))) { - uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount; - uuid_info_addr = task_image_infos.uuidArray; - } - } else { - struct user32_dyld_all_image_infos task_image_infos; - if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user32_dyld_all_image_infos))) { - uuid_info_count = task_image_infos.uuidArrayCount; - uuid_info_addr = task_image_infos.uuidArray; - } - } - - // If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating - // this data structure), we zero the uuid_info_count so that we won't even try to save load info - // for this task. - if (!uuid_info_addr) { - uuid_info_count = 0; - } - } - - if (have_pmap && save_kextloadinfo_p && task_pid == 0) { - if (ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) { - uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */ - } - } - - if (tracepos + sizeof(struct task_snapshot) > tracebound) { - error = -1; - goto error_exit; - } - - task_snap = (task_snapshot_t) tracepos; - task_snap->snapshot_magic = STACKSHOT_TASK_SNAPSHOT_MAGIC; - task_snap->pid = task_pid; - task_snap->uniqueid = task_uniqueid; - task_snap->nloadinfos = uuid_info_count; - /* Add the BSD process identifiers */ - if (task_pid != -1) - proc_name_kdp(task, task_snap->p_comm, sizeof(task_snap->p_comm)); - else - task_snap->p_comm[0] = '\0'; - task_snap->ss_flags = 0; - if (task64) - task_snap->ss_flags |= kUser64_p; - if (task64 && task_pid == 0) - task_snap->ss_flags |= kKernel64_p; - if (!task->active) - task_snap->ss_flags |= kTerminatedSnapshot; - if(task->pidsuspended) task_snap->ss_flags |= kPidSuspended; - if(task->frozen) task_snap->ss_flags |= kFrozen; - - if (task->effective_policy.darwinbg == 1) { - task_snap->ss_flags |= kTaskDarwinBG; - } - - if (task->effective_policy.t_sup_active == 1) - task_snap->ss_flags |= kTaskIsSuppressed; - - task_snap->latency_qos = (task->effective_policy.t_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ? - LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.t_latency_qos); - - task_snap->suspend_count = task->suspend_count; - task_snap->task_size = have_pmap ? pmap_resident_count(task->map->pmap) : 0; - task_snap->faults = task->faults; - task_snap->pageins = task->pageins; - task_snap->cow_faults = task->cow_faults; - - task_snap->user_time_in_terminated_threads = task->total_user_time; - task_snap->system_time_in_terminated_threads = task->total_system_time; - /* - * The throttling counters are maintained as 64-bit counters in the proc - * structure. However, we reserve 32-bits (each) for them in the task_snapshot - * struct to save space and since we do not expect them to overflow 32-bits. If we - * find these values overflowing in the future, the fix would be to simply - * upgrade these counters to 64-bit in the task_snapshot struct - */ - task_snap->was_throttled = (uint32_t) proc_was_throttled_from_task(task); - task_snap->did_throttle = (uint32_t) proc_did_throttle_from_task(task); - - if (task->shared_region && ml_validate_nofault((vm_offset_t)task->shared_region, - sizeof(struct vm_shared_region))) { - struct vm_shared_region *sr = task->shared_region; - - shared_cache_base_address = sr->sr_base_address + sr->sr_first_mapping; - } - if (!shared_cache_base_address - || !kdp_copyin(task->map->pmap, shared_cache_base_address, task_snap->shared_cache_identifier, sizeof(task_snap->shared_cache_identifier))) { - memset(task_snap->shared_cache_identifier, 0x0, sizeof(task_snap->shared_cache_identifier)); - } - if (task->shared_region) { - /* - * No refcounting here, but we are in debugger - * context, so that should be safe. - */ - task_snap->shared_cache_slide = task->shared_region->sr_slide_info.slide; - } else { - task_snap->shared_cache_slide = 0; - } - - tracepos += sizeof(struct task_snapshot); - - if (task_pid > 0 && uuid_info_count > 0) { - uint32_t uuid_info_size = (uint32_t)(task64 ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info)); - uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size; - - if (tracepos + uuid_info_array_size > tracebound) { - error = -1; - goto error_exit; - } - - // Copy in the UUID info array - // It may be nonresident, in which case just fix up nloadinfos to 0 in the task_snap - if (have_pmap && !kdp_copyin(task->map->pmap, uuid_info_addr, tracepos, uuid_info_array_size)) - task_snap->nloadinfos = 0; - else - tracepos += uuid_info_array_size; - } else if (task_pid == 0 && uuid_info_count > 0) { - uint32_t uuid_info_size = (uint32_t)sizeof(kernel_uuid_info); - uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size; - kernel_uuid_info *output_uuids; - - if (tracepos + uuid_info_array_size > tracebound) { - error = -1; - goto error_exit; - } - - output_uuids = (kernel_uuid_info *)tracepos; - - do { - - if (!kernel_uuid || !ml_validate_nofault((vm_offset_t)kernel_uuid, sizeof(uuid_t))) { - /* Kernel UUID not found or inaccessible */ - task_snap->nloadinfos = 0; - break; - } - - output_uuids[0].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext); - memcpy(&output_uuids[0].imageUUID, kernel_uuid, sizeof(uuid_t)); - - if (ml_validate_nofault((vm_offset_t)(&gLoadedKextSummaries->summaries[0]), - gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) { - uint32_t kexti; - - for (kexti=0 ; kexti < gLoadedKextSummaries->numSummaries; kexti++) { - output_uuids[1+kexti].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address); - memcpy(&output_uuids[1+kexti].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t)); - } - - tracepos += uuid_info_array_size; - } else { - /* kext summary invalid, but kernel UUID was copied */ - task_snap->nloadinfos = 1; - tracepos += uuid_info_size; - break; - } - } while(0); - } - - queue_iterate(&task->threads, thread, thread_t, task_threads){ - uint64_t tval; - - if ((thread == NULL) || !ml_validate_nofault((vm_offset_t) thread, sizeof(struct thread))) - goto error_exit; - - if (((tracepos + 4 * sizeof(struct thread_snapshot)) > tracebound)) { - error = -1; - goto error_exit; - } - if (!save_userframes_p && thread->kernel_stack == 0) - continue; - - /* Populate the thread snapshot header */ - tsnap = (thread_snapshot_t) tracepos; - tsnap->thread_id = thread_tid(thread); - tsnap->state = thread->state; - tsnap->priority = thread->priority; - tsnap->sched_pri = thread->sched_pri; - tsnap->sched_flags = thread->sched_flags; - tsnap->wait_event = VM_KERNEL_UNSLIDE(thread->wait_event); - tsnap->continuation = VM_KERNEL_UNSLIDE(thread->continuation); - tval = safe_grab_timer_value(&thread->user_timer); - tsnap->user_time = tval; - tval = safe_grab_timer_value(&thread->system_timer); - if (thread->precise_user_kernel_time) { - tsnap->system_time = tval; - } else { - tsnap->user_time += tval; - tsnap->system_time = 0; - } - tsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC; - tracepos += sizeof(struct thread_snapshot); - tsnap->ss_flags = 0; - - if (thread->effective_policy.darwinbg) { - tsnap->ss_flags |= kThreadDarwinBG; - } - - if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) { - uint64_t dqkeyaddr = thread_dispatchqaddr(thread); - if (dqkeyaddr != 0) { - uint64_t dqaddr = 0; - if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) { - uint64_t dqserialnumaddr = dqaddr + dispatch_offset; - uint64_t dqserialnum = 0; - if (kdp_copyin(task->map->pmap, dqserialnumaddr, &dqserialnum, (task64 ? 8 : 4))) { - tsnap->ss_flags |= kHasDispatchSerial; - *(uint64_t *)tracepos = dqserialnum; - tracepos += 8; - } - } - } - } -/* Call through to the machine specific trace routines - * Frames are added past the snapshot header. - */ - tracebytes = 0; - if (thread->kernel_stack != 0) { -#if defined(__LP64__) - tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, FALSE); - tsnap->ss_flags |= kKernel64_p; - framesize = 16; -#else - tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, FALSE); - framesize = 8; -#endif - } - tsnap->nkern_frames = tracebytes/framesize; - tracepos += tracebytes; - tracebytes = 0; - /* Trace user stack, if any */ - if (save_userframes_p && task->active && thread->task->map != kernel_map) { - /* 64-bit task? */ - if (task_has_64BitAddr(thread->task)) { - tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE); - tsnap->ss_flags |= kUser64_p; - framesize = 16; - } - else { - tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, TRUE); - framesize = 8; - } - } - tsnap->nuser_frames = tracebytes/framesize; - tracepos += tracebytes; - tracebytes = 0; - } - } - } - - if (is_active_list) { - is_active_list = FALSE; - task_list = &terminated_tasks; - goto walk_list; - } - -error_exit: - /* Release stack snapshot wait indicator */ - kdp_snapshot_postflight(); - - *pbytesTraced = (uint32_t)(tracepos - (char *) tracebuf); - - return error; -} - static boolean_t kdp_readioport( kdp_pkt_t *pkt, @@ -1663,3 +1150,4 @@ kdp_dumpinfo( return (TRUE); } + diff --git a/osfmk/kdp/kdp_internal.h b/osfmk/kdp/kdp_internal.h index 337dad81d..f73d34582 100644 --- a/osfmk/kdp/kdp_internal.h +++ b/osfmk/kdp/kdp_internal.h @@ -203,3 +203,4 @@ kdp_machine_msr64_read(kdp_readmsr64_req_t *, caddr_t /* data */, uint16_t /* lc int kdp_machine_msr64_write(kdp_writemsr64_req_t *, caddr_t /* data */, uint16_t /* lcpu */); + diff --git a/osfmk/kdp/kdp_udp.c b/osfmk/kdp/kdp_udp.c index e51c48286..9b2bc17ec 100644 --- a/osfmk/kdp/kdp_udp.c +++ b/osfmk/kdp/kdp_udp.c @@ -71,6 +71,12 @@ #include #include +extern unsigned int not_in_kdp; +extern int kdp_snapshot; +extern void do_stackshot(void); + +#ifdef CONFIG_KDP_INTERACTIVE_DEBUGGING + extern int inet_aton(const char *, struct kdp_in_addr *); /* in libkern */ extern char *inet_ntoa_r(struct kdp_in_addr ina, char *buf, size_t buflen); /* in libkern */ @@ -82,12 +88,6 @@ extern char *inet_ntoa_r(struct kdp_in_addr ina, char *buf, extern int kdp_getc(void); extern int reattach_wait; -/* only used by IONetworkingFamily */ -typedef uint32_t (*kdp_link_t)(void); -typedef boolean_t (*kdp_mode_t)(boolean_t); -void kdp_register_link(kdp_link_t link, kdp_mode_t mode); -void kdp_unregister_link(kdp_link_t link, kdp_mode_t mode); - static u_short ip_id; /* ip packet ctr, for ids */ /* @(#)udp_usrreq.c 2.2 88/05/23 4.0NFSSRC SMI; from UCB 7.1 6/5/86 */ @@ -247,7 +247,7 @@ static const char volatile int kdp_flag = 0; -static kdp_send_t kdp_en_send_pkt; +kdp_send_t kdp_en_send_pkt; static kdp_receive_t kdp_en_recv_pkt; static kdp_link_t kdp_en_linkstatus; static kdp_mode_t kdp_en_setmode; @@ -305,7 +305,6 @@ static unsigned int panic_block = 0; volatile unsigned int kdp_trigger_core_dump = 0; __private_extern__ volatile unsigned int flag_kdp_trigger_reboot = 0; -extern unsigned int not_in_kdp; extern unsigned int disableConsoleOutput; @@ -333,42 +332,12 @@ char kdp_kernelversion_string[256]; static boolean_t gKDPDebug = FALSE; #define KDP_DEBUG(...) if (gKDPDebug) printf(__VA_ARGS__); -int kdp_snapshot = 0; -static int stack_snapshot_ret = 0; -static unsigned stack_snapshot_bytes_traced = 0; - -static void *stack_snapshot_buf; -static uint32_t stack_snapshot_bufsize; -static int stack_snapshot_pid; -static uint32_t stack_snapshot_flags; -static uint32_t stack_snapshot_dispatch_offset; - -static unsigned int old_debugger; - #define SBLOCKSZ (2048) uint64_t kdp_dump_start_time = 0; uint64_t kdp_min_superblock_dump_time = ~1ULL; uint64_t kdp_max_superblock_dump_time = 0; uint64_t kdp_superblock_dump_time = 0; uint64_t kdp_superblock_dump_start_time = 0; - -void -kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, - uint32_t flags, uint32_t dispatch_offset); - -void -kdp_snapshot_postflight(void); - -extern int -kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, - uint32_t flags, uint32_t dispatch_offset, uint32_t *pbytesTraced); - -int -kdp_stack_snapshot_geterror(void); - -int -kdp_stack_snapshot_bytes_traced(void); - static thread_call_t kdp_timer_call; @@ -499,46 +468,6 @@ kdp_unregister_send_receive( kdp_en_recv_pkt = NULL; } -/* Cache stack snapshot parameters in preparation for a trace */ -void -kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset) -{ - stack_snapshot_pid = pid; - stack_snapshot_buf = tracebuf; - stack_snapshot_bufsize = tracebuf_size; - stack_snapshot_flags = flags; - stack_snapshot_dispatch_offset = dispatch_offset; - kdp_snapshot++; - /* Mark this debugger as active, since the polled mode driver that - * ordinarily does this may not be enabled (yet), or since KDB may be - * the primary debugger. - */ - old_debugger = current_debugger; - if (old_debugger != KDP_CUR_DB) { - current_debugger = KDP_CUR_DB; - } -} - -void -kdp_snapshot_postflight(void) -{ - kdp_snapshot--; - if ((kdp_en_send_pkt == NULL) || (old_debugger == KDB_CUR_DB)) - current_debugger = old_debugger; -} - -int -kdp_stack_snapshot_geterror(void) -{ - return stack_snapshot_ret; -} - -int -kdp_stack_snapshot_bytes_traced(void) -{ - return stack_snapshot_bytes_traced; -} - static void kdp_schedule_debugger_reentry(unsigned interval) { uint64_t deadline;; @@ -1371,29 +1300,14 @@ kdp_send_exception( } } -void -kdp_raise_exception( +static void +kdp_debugger_loop( unsigned int exception, unsigned int code, unsigned int subcode, - void *saved_state -) + void *saved_state) { int index; - unsigned int initial_not_in_kdp = not_in_kdp; - - not_in_kdp = 0; - /* Was a system trace requested ? */ - if (kdp_snapshot && (!panic_active()) && (panic_caller == 0)) { - stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid, - stack_snapshot_buf, stack_snapshot_bufsize, - stack_snapshot_flags, stack_snapshot_dispatch_offset, - &stack_snapshot_bytes_traced); - not_in_kdp = initial_not_in_kdp; - return; - } - - disable_preemption(); if (saved_state == 0) printf("kdp_raise_exception with NULL state\n"); @@ -1437,7 +1351,7 @@ kdp_raise_exception( kdp_panic_dump(); if (!(kdp_flag & DBG_POST_CORE)) - goto exit_raise_exception; + goto exit_debugger_loop; } again: @@ -1496,13 +1410,9 @@ kdp_raise_exception( if (reattach_wait == 1) goto again; -exit_raise_exception: +exit_debugger_loop: if (kdp_en_setmode) (*kdp_en_setmode)(FALSE); /* link cleanup */ - - not_in_kdp = initial_not_in_kdp; - - enable_preemption(); } void @@ -2062,8 +1972,8 @@ kdp_panic_dump(void) if ((panicstr != (char *) 0) && (kdp_flag & PANIC_LOG_DUMP)) { kdb_printf_unbuffered("Transmitting panic log, please wait: "); kdp_send_crashdump_data(KDP_DATA, corename_str, - debug_buf_ptr - debug_buf, - debug_buf); + debug_buf_ptr - debug_buf_addr, + debug_buf_addr); kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0)); printf("Please file a bug report on this panic, if possible.\n"); goto panic_dump_exit; @@ -2163,7 +2073,8 @@ kdp_serial_setmode(boolean_t active) } -static void kdp_serial_callout(__unused void *arg, kdp_event_t event) +static void +kdp_serial_callout(__unused void *arg, kdp_event_t event) { /* When we stop KDP, set the bit to re-initialize the console serial port * the next time we send/receive a KDP packet. We don't do it on @@ -2205,7 +2116,7 @@ kdp_init(void) debug_log_init(); -#if defined(__x86_64__) || defined(__arm__) +#if defined(__x86_64__) || defined(__arm__) || defined(__arm64__) if (vm_kernel_slide) { char KASLR_stext[19]; strlcat(kdp_kernelversion_string, "; stext=", sizeof(kdp_kernelversion_string)); @@ -2251,3 +2162,70 @@ kdp_init(void) #endif /* CONFIG_SERIAL_KDP */ } + +#else /* CONFIG_KDP_INTERACTIVE_DEBUGGING */ +void +kdp_init(void) +{ +} +#endif /* CONFIG_KDP_INTERACTIVE_DEBUGGING */ + +#if defined(__arm64__) || !CONFIG_KDP_INTERACTIVE_DEBUGGING +static void +panic_spin_forever() +{ + kdb_printf("\nPlease go to https://panic.apple.com to report this panic\n"); + for (;;) { } +} +#endif + +void +kdp_raise_exception( + unsigned int exception, + unsigned int code, + unsigned int subcode, + void *saved_state +) +{ + unsigned int initial_not_in_kdp = not_in_kdp; + + not_in_kdp = 0; + /* Was a system trace requested ? */ + if (kdp_snapshot && (!panic_active()) && (panic_caller == 0)) { + do_stackshot(); + not_in_kdp = initial_not_in_kdp; + return; + } + + +#if CONFIG_KDP_INTERACTIVE_DEBUGGING + + if (current_debugger != KDP_CUR_DB) { + kdb_printf("\nDebugger not configured. Hanging.\n"); + for (;;) { } + } + + disable_preemption(); + + kdp_debugger_loop(exception, code, subcode, saved_state); + not_in_kdp = initial_not_in_kdp; + enable_preemption(); +#else /* CONFIG_KDP_INTERACTIVE_DEBUGGING */ + assert(current_debugger != KDP_CUR_DB); + + /* + * If kernel debugging is enabled via boot-args, but KDP debugging + * is not compiled into the kernel, spin here waiting for debugging + * via another method. Why here? Because we want to have watchdog + * disabled (via KDP callout) while sitting waiting to be debugged. + */ + panic_spin_forever(); + + (void)exception; + (void)code; + (void)subcode; + (void)saved_state; +#endif /* CONFIG_KDP_INTERACTIVE_DEBUGGING */ +} + + diff --git a/osfmk/kdp/kdp_udp.h b/osfmk/kdp/kdp_udp.h index 49bb1ab23..6c587d274 100644 --- a/osfmk/kdp/kdp_udp.h +++ b/osfmk/kdp/kdp_udp.h @@ -29,6 +29,8 @@ * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. */ +#ifndef __KDP_UDP_H +#define __KDP_UDP_H #include #include /* OSSwap functions */ @@ -61,3 +63,13 @@ typedef struct kdp_ether_header ether_header_t; #define ntohl(x) OSSwapBigToHostInt32(x) #define htons(x) OSSwapHostToBigInt16(x) #define htonl(x) OSSwapHostToBigInt32(x) + +/* + * IONetworkingFamily only. + */ +typedef uint32_t (*kdp_link_t)(void); +typedef boolean_t (*kdp_mode_t)(boolean_t); +void kdp_register_link(kdp_link_t link, kdp_mode_t mode); +void kdp_unregister_link(kdp_link_t link, kdp_mode_t mode); + +#endif /* __KDP_UDP_H */ diff --git a/osfmk/kdp/ml/i386/kdp_x86_common.c b/osfmk/kdp/ml/i386/kdp_x86_common.c index ff05a8e5e..6d4581287 100644 --- a/osfmk/kdp/ml/i386/kdp_x86_common.c +++ b/osfmk/kdp/ml/i386/kdp_x86_common.c @@ -66,7 +66,7 @@ boolean_t kdp_read_io; boolean_t kdp_trans_off; -static addr64_t kdp_vtophys(pmap_t pmap, addr64_t va); +addr64_t kdp_vtophys(pmap_t pmap, addr64_t va); int kern_dump_pmap_traverse_preflight_callback(vm_map_offset_t start, vm_map_offset_t end, @@ -77,7 +77,7 @@ int kern_dump_pmap_traverse_send_callback(vm_map_offset_t start, pmap_t kdp_pmap = 0; -static addr64_t +addr64_t kdp_vtophys( pmap_t pmap, addr64_t va) @@ -88,7 +88,7 @@ kdp_vtophys( pp = pmap_find_phys(pmap, va); if(!pp) return 0; - pa = ((addr64_t)pp << 12) | (va & 0x0000000000000FFFULL); + pa = ((addr64_t)pp << PAGE_SHIFT) | (va & PAGE_MASK); return(pa); } diff --git a/osfmk/kdp/ml/x86_64/kdp_machdep.c b/osfmk/kdp/ml/x86_64/kdp_machdep.c index d8587db3d..1f35a37bf 100644 --- a/osfmk/kdp/ml/x86_64/kdp_machdep.c +++ b/osfmk/kdp/ml/x86_64/kdp_machdep.c @@ -57,6 +57,9 @@ extern cpu_type_t cpuid_cputype(void); extern cpu_subtype_t cpuid_cpusubtype(void); +extern vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr); +extern void machine_trace_thread_clear_validation_cache(void); + void print_saved_state(void *); void kdp_call(void); int kdp_getc(void); @@ -517,7 +520,8 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra uint32_t init_eip = 0; uint32_t prevsp = 0; uint32_t framesize = 2 * sizeof(vm_offset_t); - + vm_offset_t kern_virt_addr = 0; + if (user_p) { x86_saved_state32_t *iss32; @@ -559,18 +563,27 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra break; } - if (kdp_machine_vm_read((mach_vm_address_t)(stackptr + RETURN_OFFSET), (caddr_t) tracebuf, sizeof(*tracebuf)) != sizeof(*tracebuf)) { + kern_virt_addr = machine_trace_thread_get_kva(stackptr + RETURN_OFFSET); + + if (!kern_virt_addr) { break; } + + *tracebuf = *(uint32_t *)kern_virt_addr; tracebuf++; prevsp = stackptr; - if (kdp_machine_vm_read((mach_vm_address_t)stackptr, (caddr_t) &stackptr, sizeof(stackptr)) != sizeof(stackptr)) { + kern_virt_addr = machine_trace_thread_get_kva(stackptr); + + if (!kern_virt_addr) { *tracebuf++ = 0; break; } + + stackptr = *(uint32_t *)kern_virt_addr; } + machine_trace_thread_clear_validation_cache(); kdp_pmap = 0; return (uint32_t) (((char *) tracebuf) - tracepos); @@ -595,6 +608,7 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf addr64_t init_rip = 0; addr64_t prevsp = 0; unsigned framesize = 2 * sizeof(addr64_t); + vm_offset_t kern_virt_addr = 0; if (user_p) { x86_saved_state64_t *iss64; @@ -625,7 +639,7 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf break; } - if (stackptr & 0x0000003) { + if (stackptr & 0x0000007) { break; } @@ -633,21 +647,30 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf break; } - if (machine_read64(stackptr + RETURN_OFFSET64, (caddr_t) tracebuf, sizeof(addr64_t)) != sizeof(addr64_t)) { + kern_virt_addr = machine_trace_thread_get_kva(stackptr + RETURN_OFFSET64); + + if (!kern_virt_addr) { break; } + + *tracebuf = *(uint64_t *)kern_virt_addr; if (!user_p) *tracebuf = VM_KERNEL_UNSLIDE(*tracebuf); tracebuf++; prevsp = stackptr; - if (machine_read64(stackptr, (caddr_t) &stackptr, sizeof(addr64_t)) != sizeof(addr64_t)) { + kern_virt_addr = machine_trace_thread_get_kva(stackptr); + + if (!kern_virt_addr) { *tracebuf++ = 0; break; } + + stackptr = *(uint64_t *)kern_virt_addr; } + machine_trace_thread_clear_validation_cache(); kdp_pmap = NULL; return (uint32_t) (((char *) tracebuf) - tracepos); diff --git a/osfmk/kern/Makefile b/osfmk/kern/Makefile index fa484a3c9..fcf236084 100644 --- a/osfmk/kern/Makefile +++ b/osfmk/kern/Makefile @@ -6,20 +6,26 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -DATAFILES = +DATAFILES = \ + exc_resource.h -PRIVATE_DATAFILES = exc_resource.h +PRIVATE_DATAFILES = \ + ecc.h \ + exc_resource.h -EXPORT_ONLY_FILES = \ +EXPORT_FILES = \ affinity.h \ assert.h \ audit_sessionport.h \ call_entry.h \ clock.h \ + coalition.h \ cpu_number.h \ cpu_data.h \ debug.h \ + energy_perf.h \ extmod_statistics.h \ + hv_support.h \ ipc_mig.h \ ipc_misc.h \ kalloc.h \ @@ -37,6 +43,7 @@ EXPORT_ONLY_FILES = \ processor.h \ queue.h \ sched_prim.h \ + sfi.h \ simple_lock.h \ startup.h \ task.h \ @@ -47,11 +54,15 @@ EXPORT_ONLY_FILES = \ wait_queue.h \ zalloc.h -INSTALL_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LIST = ${DATAFILES} + +INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} debug.h + +INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} ${EXPORT_FILES} INSTALL_MI_DIR = kern -EXPORT_MI_LIST = ${DATAFILES} ${EXPORT_ONLY_FILES} +EXPORT_MI_LIST = ${PRIVATE_DATAFILES} ${EXPORT_FILES} EXPORT_MI_DIR = kern diff --git a/osfmk/kern/affinity.c b/osfmk/kern/affinity.c index 3af3c4b10..8e6b3ee5a 100644 --- a/osfmk/kern/affinity.c +++ b/osfmk/kern/affinity.c @@ -79,6 +79,10 @@ static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread); * kern.affinity_sets_enabled - disables hinting if cleared * kern.affinity_sets_mapping - controls cache distribution policy * See bsd/kern_sysctl.c + * + * Affinity sets are not used on embedded, which typically only + * has a single pset, and last-processor affinity is + * more important than pset affinity. */ boolean_t affinity_sets_enabled = TRUE; int affinity_sets_mapping = 1; diff --git a/osfmk/kern/affinity.h b/osfmk/kern/affinity.h index 167bfd2d8..98f46cae7 100644 --- a/osfmk/kern/affinity.h +++ b/osfmk/kern/affinity.h @@ -35,7 +35,6 @@ #include #include -#include /* * An affinity set object represents a set of threads identified by the user diff --git a/osfmk/kern/assert.h b/osfmk/kern/assert.h index c704dca5f..17d625506 100644 --- a/osfmk/kern/assert.h +++ b/osfmk/kern/assert.h @@ -73,7 +73,7 @@ __BEGIN_DECLS extern void Assert( const char *file, int line, - const char *expression); + const char *expression) __attribute__((noinline)); #if CONFIG_NO_PANIC_STRINGS #define Assert(file, line, ex) (Assert)("", line, "") @@ -84,7 +84,7 @@ __END_DECLS #if MACH_ASSERT #define assert(ex) \ - ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) + (__builtin_expect(!!((long)(ex)), 1L) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) #define assert_static(x) assert(x) #define __assert_only diff --git a/osfmk/kern/ast.c b/osfmk/kern/ast.c index ee197f7f6..3a3caa03a 100644 --- a/osfmk/kern/ast.c +++ b/osfmk/kern/ast.c @@ -65,9 +65,6 @@ * */ -#include -#include - #include #include #include @@ -77,6 +74,7 @@ #include #include #include +#include #if CONFIG_TELEMETRY #include #endif @@ -141,7 +139,7 @@ ast_taken( if (reasons & AST_PREEMPT) { counter(c_ast_taken_block++); thread_block_reason(THREAD_CONTINUE_NULL, NULL, - AST_PREEMPT | AST_URGENT); + reasons & AST_PREEMPTION); } reasons &= ~AST_PREEMPTION; @@ -198,27 +196,35 @@ ast_taken( #if CONFIG_TELEMETRY if (reasons & AST_TELEMETRY_ALL) { - boolean_t interrupted_userspace; + boolean_t interrupted_userspace = FALSE; + boolean_t is_windowed = FALSE; assert((reasons & AST_TELEMETRY_ALL) != AST_TELEMETRY_ALL); /* only one is valid at a time */ interrupted_userspace = (reasons & AST_TELEMETRY_USER) ? TRUE : FALSE; + is_windowed = ((reasons & AST_TELEMETRY_WINDOWED) ? TRUE : FALSE); thread_ast_clear(thread, AST_TELEMETRY_ALL); - telemetry_ast(thread, interrupted_userspace); + telemetry_ast(thread, interrupted_userspace, is_windowed); } #endif ml_set_interrupts_enabled(FALSE); + if (reasons & AST_SFI) { + sfi_ast(thread); + } + /* - * Check for preemption. + * Check for preemption. Conditions may have changed from when the AST_PREEMPT was originally set. */ + thread_lock(thread); if (reasons & AST_PREEMPT) - reasons = csw_check(current_processor()); + reasons = csw_check(current_processor(), reasons & AST_QUANTUM); + thread_unlock(thread); if ( (reasons & AST_PREEMPT) && wait_queue_assert_possible(thread) ) { counter(c_ast_taken_block++); - thread_block_reason((thread_continue_t)thread_exception_return, NULL, AST_PREEMPT); + thread_block_reason((thread_continue_t)thread_exception_return, NULL, reasons & AST_PREEMPTION); } } } @@ -235,8 +241,6 @@ ast_check( { thread_t thread = processor->active_thread; - processor->current_pri = thread->sched_pri; - processor->current_thmode = thread->sched_mode; if ( processor->state == PROCESSOR_RUNNING || processor->state == PROCESSOR_SHUTDOWN ) { ast_t preempt; @@ -251,7 +255,14 @@ ast_check( /* * Context switch check. */ - if ((preempt = csw_check(processor)) != AST_NONE) + thread_lock(thread); + + processor->current_pri = thread->sched_pri; + processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class = sfi_thread_classify(thread); + + if ((preempt = csw_check(processor, AST_NONE)) != AST_NONE) ast_on(preempt); + thread_unlock(thread); } } diff --git a/osfmk/kern/ast.h b/osfmk/kern/ast.h index 272396cbf..28886bb2e 100644 --- a/osfmk/kern/ast.h +++ b/osfmk/kern/ast.h @@ -63,11 +63,9 @@ #ifndef _KERN_AST_H_ #define _KERN_AST_H_ -#include #include #include -#include #include #include @@ -80,6 +78,29 @@ */ typedef uint32_t ast_t; +/* + * When returning from interrupt/trap context to kernel mode, + * the pending ASTs are masked with AST_URGENT to determine if + * ast_taken(AST_PREEMPTION) should be called, for instance to + * effect preemption of a kernel thread by a realtime thread. + * This is also done when re-enabling preemption or re-enabling + * interrupts, since an AST may have been set while preemption + * was disabled, and it should take effect as soon as possible. + * + * When returning from interrupt/trap/syscall context to user + * mode, any and all ASTs that are pending should be handled. + * + * If a thread context switches, only ASTs not in AST_PER_THREAD + * remain active. The per-thread ASTs are stored in the thread_t + * and re-enabled when the thread context switches back. + * + * Typically the preemption ASTs are set as a result of threads + * becoming runnable, threads changing priority, or quantum + * expiration. If a thread becomes runnable and is chosen + * to run on another processor, cause_ast_check() may be called + * to IPI that processor and request csw_check() be run there. + */ + /* * Bits for reasons */ @@ -102,8 +123,11 @@ typedef uint32_t ast_t; #define AST_CHUD 0x400 #define AST_CHUD_URGENT 0x800 #define AST_GUARD 0x1000 -#define AST_TELEMETRY_USER 0x2000 -#define AST_TELEMETRY_KERNEL 0x4000 +#define AST_TELEMETRY_USER 0x2000 /* telemetry sample requested on interrupt from userspace */ +#define AST_TELEMETRY_KERNEL 0x4000 /* telemetry sample requested on interrupt from kernel */ +#define AST_TELEMETRY_WINDOWED 0x8000 /* telemetry sample meant for the window buffer */ + +#define AST_SFI 0x10000 /* Evaluate if SFI wait is needed before return to userspace */ #define AST_NONE 0x00 #define AST_ALL (~AST_NONE) @@ -112,7 +136,7 @@ typedef uint32_t ast_t; #define AST_PREEMPTION (AST_PREEMPT | AST_QUANTUM | AST_URGENT) #define AST_CHUD_ALL (AST_CHUD_URGENT|AST_CHUD) -#define AST_TELEMETRY_ALL (AST_TELEMETRY_USER | AST_TELEMETRY_KERNEL) +#define AST_TELEMETRY_ALL (AST_TELEMETRY_USER | AST_TELEMETRY_KERNEL | AST_TELEMETRY_WINDOWED) #ifdef MACHINE_AST /* @@ -147,7 +171,7 @@ extern ast_t *ast_pending(void); #define MACHINE_AST_PER_THREAD 0 #endif -#define AST_PER_THREAD (AST_APC | AST_BSD | AST_MACF | MACHINE_AST_PER_THREAD | AST_LEDGER | AST_GUARD | AST_TELEMETRY_USER | AST_TELEMETRY_KERNEL) +#define AST_PER_THREAD (AST_APC | AST_BSD | AST_MACF | MACHINE_AST_PER_THREAD | AST_LEDGER | AST_GUARD | AST_TELEMETRY_USER | AST_TELEMETRY_KERNEL | AST_TELEMETRY_WINDOWED) /* * ast_pending(), ast_on(), ast_off(), ast_context(), and ast_propagate() * assume splsched. diff --git a/osfmk/kern/bsd_kern.c b/osfmk/kern/bsd_kern.c index 8d4f388ed..3fc7a40a7 100644 --- a/osfmk/kern/bsd_kern.c +++ b/osfmk/kern/bsd_kern.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -60,9 +59,10 @@ boolean_t current_thread_aborted(void); void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); kern_return_t get_signalact(task_t , thread_t *, int); int get_vmsubmap_entries(vm_map_t, vm_object_offset_t, vm_object_offset_t); -void syscall_exit_funnelcheck(void); -int fill_task_rusage_v2(task_t task, struct rusage_info_v2 *ri); - +int fill_task_rusage(task_t task, rusage_info_current *ri); +int fill_task_io_rusage(task_t task, rusage_info_current *ri); +int fill_task_qos_rusage(task_t task, rusage_info_current *ri); +void fill_task_billed_usage(task_t task, rusage_info_current *ri); /* * @@ -340,6 +340,34 @@ uint64_t get_task_resident_size(task_t task) return((uint64_t)pmap_resident_count(map->pmap) * PAGE_SIZE_64); } +uint64_t get_task_compressed(task_t task) +{ + vm_map_t map; + + map = (task == kernel_task) ? kernel_map: task->map; + return((uint64_t)pmap_compressed(map->pmap) * PAGE_SIZE_64); +} + +uint64_t get_task_resident_max(task_t task) +{ + vm_map_t map; + + map = (task == kernel_task) ? kernel_map: task->map; + return((uint64_t)pmap_resident_max(map->pmap) * PAGE_SIZE_64); +} + +uint64_t get_task_purgeable_size(task_t task) +{ + vm_map_t map; + mach_vm_size_t volatile_virtual_size; + mach_vm_size_t volatile_resident_size; + mach_vm_size_t volatile_pmap_size; + + map = (task == kernel_task) ? kernel_map: task->map; + vm_map_query_volatile(map, &volatile_virtual_size, &volatile_resident_size, &volatile_pmap_size); + + return((uint64_t)volatile_resident_size); +} /* * */ @@ -372,6 +400,19 @@ uint64_t get_task_phys_footprint_max(task_t task) return 0; } +uint64_t get_task_cpu_time(task_t task) +{ + kern_return_t ret; + ledger_amount_t credit, debit; + + ret = ledger_get_entries(task->ledger, task_ledgers.cpu_time, &credit, &debit); + if (KERN_SUCCESS == ret) { + return (credit - debit); + } + + return 0; +} + /* * */ @@ -757,31 +798,19 @@ get_numthreads(task_t task) return(task->thread_count); } -void -syscall_exit_funnelcheck(void) -{ - thread_t thread; - - thread = current_thread(); - - if (thread->funnel_lock) - panic("syscall exit with funnel held\n"); -} - - /* * Gather the various pieces of info about the designated task, * and collect it all into a single rusage_info. */ int -fill_task_rusage_v2(task_t task, struct rusage_info_v2 *ri) +fill_task_rusage(task_t task, rusage_info_current *ri) { struct task_power_info powerinfo; assert(task != TASK_NULL); task_lock(task); - task_power_info_locked(task, &powerinfo); + task_power_info_locked(task, &powerinfo, NULL); ri->ri_pkg_idle_wkups = powerinfo.task_platform_idle_wakeups; ri->ri_interrupt_wkups = powerinfo.task_interrupt_wakeups; ri->ri_user_time = powerinfo.total_user; @@ -799,3 +828,63 @@ fill_task_rusage_v2(task_t task, struct rusage_info_v2 *ri) task_unlock(task); return (0); } + +void +fill_task_billed_usage(task_t task __unused, rusage_info_current *ri) +{ +#if CONFIG_BANK + ri->ri_billed_system_time = bank_billed_time(task->bank_context); + ri->ri_serviced_system_time = bank_serviced_time(task->bank_context); +#else + ri->ri_billed_system_time = 0; + ri->ri_serviced_system_time = 0; +#endif +} + +int +fill_task_io_rusage(task_t task, rusage_info_current *ri) +{ + assert(task != TASK_NULL); + task_lock(task); + + if (task->task_io_stats) { + ri->ri_diskio_bytesread = task->task_io_stats->disk_reads.size; + ri->ri_diskio_byteswritten = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size); + } else { + /* I/O Stats unavailable */ + ri->ri_diskio_bytesread = 0; + ri->ri_diskio_byteswritten = 0; + } + task_unlock(task); + return (0); +} + +int +fill_task_qos_rusage(task_t task, rusage_info_current *ri) +{ + thread_t thread; + + assert(task != TASK_NULL); + task_lock(task); + + /* Rollup Qos time of all the threads to task */ + queue_iterate(&task->threads, thread, thread_t, task_threads) { + if (thread->options & TH_OPT_IDLE_THREAD) + continue; + + thread_mtx_lock(thread); + thread_update_qos_cpu_time(thread, TRUE); + thread_mtx_unlock(thread); + + } + ri->ri_cpu_time_qos_default = task->cpu_time_qos_stats.cpu_time_qos_default; + ri->ri_cpu_time_qos_maintenance = task->cpu_time_qos_stats.cpu_time_qos_maintenance; + ri->ri_cpu_time_qos_background = task->cpu_time_qos_stats.cpu_time_qos_background; + ri->ri_cpu_time_qos_utility = task->cpu_time_qos_stats.cpu_time_qos_utility; + ri->ri_cpu_time_qos_legacy = task->cpu_time_qos_stats.cpu_time_qos_legacy; + ri->ri_cpu_time_qos_user_initiated = task->cpu_time_qos_stats.cpu_time_qos_user_initiated; + ri->ri_cpu_time_qos_user_interactive = task->cpu_time_qos_stats.cpu_time_qos_user_interactive; + + task_unlock(task); + return (0); +} diff --git a/osfmk/kern/btlog.c b/osfmk/kern/btlog.c index 50fc5991e..53a01e86d 100644 --- a/osfmk/kern/btlog.c +++ b/osfmk/kern/btlog.c @@ -31,6 +31,7 @@ #include #include #include +#include #include /* @@ -73,6 +74,7 @@ struct btlog { btlog_recordindex_t freelist; }; +extern boolean_t vm_kernel_ready; extern boolean_t kmem_alloc_ready; #define lookup_btrecord(btlog, index) \ @@ -92,7 +94,7 @@ btlog_create(size_t numrecords, kern_return_t ret; size_t btrecord_size; - if (!kmem_alloc_ready) + if (vm_kernel_ready && !kmem_alloc_ready) return NULL; if (numrecords > BTLOG_MAX_RECORDS) @@ -118,8 +120,13 @@ btlog_create(size_t numrecords, /* since rounding to a page size might hold more, recalculate */ numrecords = MIN(BTLOG_MAX_RECORDS, (buffersize_needed - sizeof(btlog_t))/btrecord_size); - - ret = kmem_alloc(kernel_map, &buffer, buffersize_needed); + + if (kmem_alloc_ready) { + ret = kmem_alloc(kernel_map, &buffer, buffersize_needed); + } else { + buffer = (vm_address_t)pmap_steal_memory(buffersize_needed); + ret = KERN_SUCCESS; + } if (ret != KERN_SUCCESS) return NULL; diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c index 740f16340..1bd578496 100644 --- a/osfmk/kern/clock.c +++ b/osfmk/kern/clock.c @@ -33,7 +33,6 @@ #include -#include #include #include #include @@ -409,9 +408,11 @@ clock_set_calendar_microtime( clock_sec_t sys; clock_usec_t microsys; clock_sec_t newsecs; + clock_usec_t newmicrosecs; spl_t s; - newsecs = (microsecs < 500*USEC_PER_SEC)? secs: secs + 1; + newsecs = secs; + newmicrosecs = microsecs; s = splclock(); clock_lock(); @@ -447,7 +448,7 @@ clock_set_calendar_microtime( /* * Set the new value for the platform clock. */ - PESetGMTTimeOfDay(newsecs); + PESetUTCTimeOfDay(newsecs, newmicrosecs); splx(s); @@ -473,10 +474,12 @@ clock_set_calendar_microtime( void clock_initialize_calendar(void) { - clock_sec_t sys, secs = PEGetGMTTimeOfDay(); - clock_usec_t microsys, microsecs = 0; + clock_sec_t sys, secs; + clock_usec_t microsys, microsecs; spl_t s; + PEGetUTCTimeOfDay(&secs, µsecs); + s = splclock(); clock_lock(); @@ -1023,3 +1026,4 @@ clock_track_calend_nowait(void) } #endif /* CONFIG_DTRACE */ + diff --git a/osfmk/kern/clock.h b/osfmk/kern/clock.h index b2e4b77bf..a7641adb6 100644 --- a/osfmk/kern/clock.h +++ b/osfmk/kern/clock.h @@ -122,11 +122,6 @@ extern void machine_delay_until(uint64_t interval, extern uint32_t hz_tick_interval; -extern void absolutetime_to_nanotime( - uint64_t abstime, - clock_sec_t *secs, - clock_nsec_t *nanosecs); - extern void nanotime_to_absolutetime( clock_sec_t secs, clock_nsec_t nanosecs, diff --git a/osfmk/kern/clock_oldops.c b/osfmk/kern/clock_oldops.c index 13a9fa1c6..cdb1bf670 100644 --- a/osfmk/kern/clock_oldops.c +++ b/osfmk/kern/clock_oldops.c @@ -39,7 +39,6 @@ #include -#include #include #include #include @@ -128,8 +127,6 @@ static kern_return_t clock_sleep_internal( sleep_type_t sleep_type, mach_timespec_t *sleep_time); -int rtclock_config(void); - int rtclock_init(void); kern_return_t rtclock_gettime( @@ -141,7 +138,7 @@ kern_return_t rtclock_getattr( mach_msg_type_number_t *count); struct clock_ops sysclk_ops = { - rtclock_config, rtclock_init, + NULL, rtclock_init, rtclock_gettime, rtclock_getattr, }; diff --git a/osfmk/kern/coalition.c b/osfmk/kern/coalition.c new file mode 100644 index 000000000..df8126bb6 --- /dev/null +++ b/osfmk/kern/coalition.c @@ -0,0 +1,743 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include /* for TASK_CHUNK */ +#include +#include + +#include + +#include +#include +#include + +#include + +/* defined in task.c */ +extern ledger_template_t task_ledger_template; + +/* + * Coalition zone needs limits. We expect there will be as many coalitions as + * tasks (same order of magnitude), so use the task zone's limits. + * */ +#define CONFIG_COALITION_MAX CONFIG_TASK_MAX +#define COALITION_CHUNK TASK_CHUNK + +int unrestrict_coalition_syscalls; + +lck_attr_t coalitions_lck_attr; +lck_grp_t coalitions_lck_grp; +lck_grp_attr_t coalitions_lck_grp_attr; + +/* coalitions_list_lock protects coalition_count, coalitions queue, next_coalition_id. */ +decl_lck_mtx_data(static,coalitions_list_lock); +static uint64_t coalition_count; +static uint64_t coalition_next_id = 1; +static queue_head_t coalitions; + +coalition_t default_coalition; + +zone_t coalition_zone; + +struct coalition { + uint64_t id; /* monotonically increasing */ + + ledger_t ledger; + uint64_t bytesread; + uint64_t byteswritten; + uint64_t gpu_time; + + /* + * Count the length of time this coalition had at least one active task. + * This can be a 'denominator' to turn e.g. cpu_time to %cpu. + * */ + uint64_t last_became_nonempty_time; + uint64_t time_nonempty; + + uint64_t task_count; /* Count of tasks that have started in this coalition */ + uint64_t dead_task_count; /* Count of tasks that have exited in this coalition; subtract from task_count to get count of "active" */ + queue_head_t tasks; /* List of active tasks in the coalition */ + + queue_chain_t coalitions; /* global list of coalitions */ + + decl_lck_mtx_data(,lock) /* Coalition lock. */ + + uint32_t ref_count; /* Number of references to the memory containing this struct */ + uint32_t active_count; /* Number of members of (tasks in) the coalition, plus vouchers referring to the coalition */ + + unsigned int privileged : 1; /* Members of this coalition may create and manage coalitions and may posix_spawn processes into selected coalitions */ + + /* ast? */ + + /* voucher */ + + /* state of the coalition */ + unsigned int termrequested : 1; /* launchd has requested termination when coalition becomes empty */ + unsigned int terminated : 1; /* coalition became empty and spawns are now forbidden */ + unsigned int reaped : 1; /* reaped, invisible to userspace, but waiting for ref_count to go to zero */ + unsigned int notified : 1; /* no-more-processes notification was sent via special port */ +}; + +#define coalition_lock(c) do{ lck_mtx_lock(&c->lock); }while(0) +#define coalition_unlock(c) do{ lck_mtx_unlock(&c->lock); }while(0) + +static void +coalition_notify_user(uint64_t id, uint32_t flags) +{ + mach_port_t user_port; + kern_return_t kr; + + kr = host_get_coalition_port(host_priv_self(), &user_port); + if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) { + return; + } + + coalition_notification(user_port, id, flags); +} + +/* + * coalition_find_by_id_internal + * Returns: Coalition object with specified id, NOT referenced. + * If not found, returns COALITION_NULL. + * Condition: coalitions_list_lock must be LOCKED. + */ +static coalition_t +coalition_find_by_id_internal(uint64_t coal_id) +{ + if (coal_id == 0) { + return COALITION_NULL; + } + + lck_mtx_assert(&coalitions_list_lock, LCK_MTX_ASSERT_OWNED); + coalition_t coal; + queue_iterate(&coalitions, coal, coalition_t, coalitions) { + if (coal->id == coal_id) { + return coal; + } + } + return COALITION_NULL; +} + +kern_return_t +coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_usage *cru_out) +{ + kern_return_t kr; + ledger_amount_t credit, debit; + + ledger_t sum_ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES); + if (sum_ledger == LEDGER_NULL) { + return KERN_RESOURCE_SHORTAGE; + } + + coalition_lock(coal); + + /* + * Start with the coalition's ledger, which holds the totals from all + * the dead tasks. + */ + ledger_rollup(sum_ledger, coal->ledger); + uint64_t bytesread = coal->bytesread; + uint64_t byteswritten = coal->byteswritten; + uint64_t gpu_time = coal->gpu_time; + + /* + * Add to that all the active tasks' ledgers. Tasks cannot deallocate + * out from under us, since we hold the coalition lock. + */ + task_t task; + queue_iterate(&coal->tasks, task, task_t, coalition_tasks) { + ledger_rollup(sum_ledger, task->ledger); + bytesread += task->task_io_stats->disk_reads.size; + byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size; + gpu_time += task_gpu_utilisation(task); + } + + /* collect information from the coalition itself */ + cru_out->tasks_started = coal->task_count; + cru_out->tasks_exited = coal->dead_task_count; + + uint64_t time_nonempty = coal->time_nonempty; + uint64_t last_became_nonempty_time = coal->last_became_nonempty_time; + + coalition_unlock(coal); + + /* Copy the totals out of sum_ledger */ + kr = ledger_get_entries(sum_ledger, task_ledgers.cpu_time, + &credit, &debit); + if (kr != KERN_SUCCESS) { + credit = 0; + } + cru_out->cpu_time = credit; + + kr = ledger_get_entries(sum_ledger, task_ledgers.interrupt_wakeups, + &credit, &debit); + if (kr != KERN_SUCCESS) { + credit = 0; + } + cru_out->interrupt_wakeups = credit; + + kr = ledger_get_entries(sum_ledger, task_ledgers.platform_idle_wakeups, + &credit, &debit); + if (kr != KERN_SUCCESS) { + credit = 0; + } + cru_out->platform_idle_wakeups = credit; + + cru_out->bytesread = bytesread; + cru_out->byteswritten = byteswritten; + cru_out->gpu_time = gpu_time; + + ledger_dereference(sum_ledger); + sum_ledger = LEDGER_NULL; + + if (last_became_nonempty_time) { + time_nonempty += mach_absolute_time() - last_became_nonempty_time; + } + absolutetime_to_nanoseconds(time_nonempty, &cru_out->time_nonempty); + + return KERN_SUCCESS; +} + +/* + * coalition_create_internal + * Returns: New coalition object, referenced for the caller and unlocked. + * Condition: coalitions_list_lock must be UNLOCKED. + */ +kern_return_t +coalition_create_internal(coalition_t *out, boolean_t privileged) +{ + struct coalition *new_coal = (struct coalition *)zalloc(coalition_zone); + if (new_coal == COALITION_NULL) { + return KERN_RESOURCE_SHORTAGE; + } + bzero(new_coal, sizeof(*new_coal)); + + new_coal->ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES); + if (new_coal->ledger == NULL) { + zfree(coalition_zone, new_coal); + return KERN_RESOURCE_SHORTAGE; + } + + /* One for caller, one for coalitions list */ + new_coal->ref_count = 2; + + new_coal->privileged = privileged ? TRUE : FALSE; + + lck_mtx_init(&new_coal->lock, &coalitions_lck_grp, &coalitions_lck_attr); + queue_init(&new_coal->tasks); + + lck_mtx_lock(&coalitions_list_lock); + new_coal->id = coalition_next_id++; + coalition_count++; + queue_enter(&coalitions, new_coal, coalition_t, coalitions); + lck_mtx_unlock(&coalitions_list_lock); + +#if COALITION_DEBUG + printf("%s: new coal id %llu\n", __func__, new_coal->id); +#endif + + *out = new_coal; + return KERN_SUCCESS; +} + +/* + * coalition_release + * Condition: coalition must be UNLOCKED. + * */ +void +coalition_release(coalition_t coal) +{ + boolean_t do_dealloc = FALSE; + + /* TODO: This can be done with atomics. */ + coalition_lock(coal); + coal->ref_count--; + if (coal->ref_count == 0) { + do_dealloc = TRUE; + } +#if COALITION_DEBUG + uint32_t rc = coal->ref_count; +#endif /* COALITION_DEBUG */ + + coalition_unlock(coal); + +#if COALITION_DEBUG + printf("%s: coal %llu ref_count-- -> %u%s\n", __func__, coal->id, rc, + do_dealloc ? ", will deallocate now" : ""); +#endif /* COALITION_DEBUG */ + + if (do_dealloc) { + assert(coal->termrequested); + assert(coal->terminated); + assert(coal->active_count == 0); + assert(coal->reaped); + + ledger_dereference(coal->ledger); + lck_mtx_destroy(&coal->lock, &coalitions_lck_grp); + zfree(coalition_zone, coal); + } +} + +/* + * coalition_find_by_id + * Returns: Coalition object with specified id, referenced. + * Condition: coalitions_list_lock must be UNLOCKED. + */ +coalition_t +coalition_find_by_id(uint64_t cid) +{ + if (cid == 0) { + return COALITION_NULL; + } + + lck_mtx_lock(&coalitions_list_lock); + + coalition_t coal = coalition_find_by_id_internal(cid); + if (coal == COALITION_NULL) { + lck_mtx_unlock(&coalitions_list_lock); + return COALITION_NULL; + } + + coalition_lock(coal); + + if (coal->reaped) { + coalition_unlock(coal); + lck_mtx_unlock(&coalitions_list_lock); + return COALITION_NULL; + } + + if (coal->ref_count == 0) { + panic("resurrecting coalition %p id %llu, active_count = %u\n", + coal, coal->id, coal->active_count); + } + coal->ref_count++; +#if COALITION_DEBUG + uint32_t rc = coal->ref_count; +#endif + + coalition_unlock(coal); + lck_mtx_unlock(&coalitions_list_lock); + +#if COALITION_DEBUG + printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc); +#endif + return coal; +} + +/* + * coalition_find_and_activate_by_id + * Returns: Coalition object with specified id, referenced, and activated. + * Condition: coalitions_list_lock must be UNLOCKED. + * This is the function to use when putting a 'new' thing into a coalition, + * like posix_spawn of an XPC service by launchd. + * See also coalition_extend_active. + */ +coalition_t +coalition_find_and_activate_by_id(uint64_t cid) +{ + if (cid == 0) { + return COALITION_NULL; + } + + lck_mtx_lock(&coalitions_list_lock); + + coalition_t coal = coalition_find_by_id_internal(cid); + if (coal == COALITION_NULL) { + lck_mtx_unlock(&coalitions_list_lock); + return COALITION_NULL; + } + + coalition_lock(coal); + + if (coal->reaped || coal->terminated) { + /* Too late to put something new into this coalition, it's + * already on its way out the door */ + coalition_unlock(coal); + lck_mtx_unlock(&coalitions_list_lock); + return COALITION_NULL; + } + + if (coal->ref_count == 0) { + panic("resurrecting coalition %p id %llu, active_count = %u\n", + coal, coal->id, coal->active_count); + } + + coal->ref_count++; + coal->active_count++; + +#if COALITION_DEBUG + uint32_t rc = coal->ref_count; + uint32_t ac = coal->active_count; +#endif + + coalition_unlock(coal); + lck_mtx_unlock(&coalitions_list_lock); + +#if COALITION_DEBUG + printf("%s: coal %llu ref_count++ -> %u, active_count++ -> %u\n", + __func__, coal->id, rc, ac); +#endif + return coal; +} + +uint64_t +coalition_id(coalition_t coal) +{ + return coal->id; +} + +uint64_t +task_coalition_id(task_t task) +{ + return task->coalition->id; +} + +boolean_t +coalition_is_privileged(coalition_t coal) +{ + return coal->privileged || unrestrict_coalition_syscalls; +} + +boolean_t +task_is_in_privileged_coalition(task_t task) +{ + return task->coalition->privileged || unrestrict_coalition_syscalls; +} + +/* + * coalition_get_ledger + * Returns: Coalition's ledger, NOT referenced. + * Condition: Caller must have a coalition reference. + */ +ledger_t +coalition_get_ledger(coalition_t coal) +{ + return coal->ledger; +} + +/* + * This is the function to use when you already hold an activation on the + * coalition, and want to extend it to a second activation owned by a new + * object, like when a task in the coalition calls fork(). This is analogous + * to taking a second reference when you already hold one. + * See also coalition_find_and_activate_by_id. + */ +kern_return_t +coalition_extend_active(coalition_t coal) +{ + coalition_lock(coal); + + if (coal->reaped) { + panic("cannot make a reaped coalition active again"); + } + + if (coal->terminated) { + coalition_unlock(coal); + return KERN_TERMINATED; + } + + assert(coal->active_count > 0); + coal->active_count++; + + coalition_unlock(coal); + return KERN_SUCCESS; +} + +void +coalition_remove_active(coalition_t coal) +{ + coalition_lock(coal); + + assert(!coal->reaped); + assert(coal->active_count > 0); + + coal->active_count--; + + boolean_t do_notify = FALSE; + uint64_t notify_id = 0; + uint32_t notify_flags = 0; + if (coal->termrequested && coal->active_count == 0) { + /* We only notify once, when active_count reaches zero. + * We just decremented, so if it reached zero, we mustn't have + * notified already. + */ + assert(!coal->terminated); + coal->terminated = TRUE; + + assert(!coal->notified); + + coal->notified = TRUE; + do_notify = TRUE; + notify_id = coal->id; + notify_flags = 0; + } + + coalition_unlock(coal); + + if (do_notify) { + coalition_notify_user(notify_id, notify_flags); + } +} + +/* Used for kernel_task, launchd, launchd's early boot tasks... */ +kern_return_t +coalition_default_adopt_task(task_t task) +{ + kern_return_t kr; + kr = coalition_adopt_task(default_coalition, task); + if (kr != KERN_SUCCESS) { + panic("failed to adopt task %p into default coalition: %d", task, kr); + } + return kr; +} + +/* + * coalition_adopt_task + * Condition: Coalition must be referenced and unlocked. Will fail if coalition + * is already terminated. + */ +kern_return_t +coalition_adopt_task(coalition_t coal, task_t task) +{ + if (task->coalition) { + return KERN_ALREADY_IN_SET; + } + + coalition_lock(coal); + + if (coal->reaped || coal->terminated) { + coalition_unlock(coal); + return KERN_TERMINATED; + } + + coal->active_count++; + + coal->ref_count++; + task->coalition = coal; + + queue_enter(&coal->tasks, task, task_t, coalition_tasks); + coal->task_count++; + + if(coal->task_count < coal->dead_task_count) { + panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id); + } + + /* If moving from 0->1 active tasks */ + if (coal->task_count - coal->dead_task_count == 1) { + coal->last_became_nonempty_time = mach_absolute_time(); + } + +#if COALITION_DEBUG + uint32_t rc = coal->ref_count; +#endif + + coalition_unlock(coal); + +#if COALITION_DEBUG + if (rc) { + printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc); + } +#endif + return KERN_SUCCESS; +} + +/* + * coalition_remove_task + * Condition: task must be referenced and UNLOCKED; task's coalition must be UNLOCKED + */ +kern_return_t +coalition_remove_task(task_t task) +{ + coalition_t coal = task->coalition; + assert(coal); + + coalition_lock(coal); + + queue_remove(&coal->tasks, task, task_t, coalition_tasks); + coal->dead_task_count++; + + if(coal->task_count < coal->dead_task_count) { + panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id); + } + + /* If moving from 1->0 active tasks */ + if (coal->task_count - coal->dead_task_count == 0) { + uint64_t last_time_nonempty = mach_absolute_time() - coal->last_became_nonempty_time; + coal->last_became_nonempty_time = 0; + coal->time_nonempty += last_time_nonempty; + } + + ledger_rollup(coal->ledger, task->ledger); + coal->bytesread += task->task_io_stats->disk_reads.size; + coal->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size; + coal->gpu_time += task_gpu_utilisation(task); + + coalition_unlock(coal); + + coalition_remove_active(coal); + return KERN_SUCCESS; +} + +/* + * coalition_terminate_internal + * Condition: Coalition must be referenced and UNLOCKED. + */ +kern_return_t +coalition_request_terminate_internal(coalition_t coal) +{ + if (coal == default_coalition) { + return KERN_DEFAULT_SET; + } + + coalition_lock(coal); + + if (coal->reaped) { + coalition_unlock(coal); + return KERN_INVALID_NAME; + } + + if (coal->terminated || coal->termrequested) { + coalition_unlock(coal); + return KERN_TERMINATED; + } + + coal->termrequested = TRUE; + + boolean_t do_notify = FALSE; + uint64_t note_id = 0; + uint32_t note_flags = 0; + + if (coal->active_count == 0) { + /* + * We only notify once, when active_count reaches zero. + * We just decremented, so if it reached zero, we mustn't have + * notified already. + */ + assert(!coal->terminated); + coal->terminated = TRUE; + + assert(!coal->notified); + + coal->notified = TRUE; + do_notify = TRUE; + note_id = coal->id; + note_flags = 0; + } + + coalition_unlock(coal); + + if (do_notify) { + coalition_notify_user(note_id, note_flags); + } + + return KERN_SUCCESS; +} + +/* + * coalition_reap_internal + * Condition: Coalition must be referenced and UNLOCKED. + */ +kern_return_t +coalition_reap_internal(coalition_t coal) +{ + if (coal == default_coalition) { + return KERN_DEFAULT_SET; + } + + coalition_lock(coal); + if (coal->reaped) { + coalition_unlock(coal); + return KERN_TERMINATED; + } + if (!coal->terminated) { + coalition_unlock(coal); + return KERN_FAILURE; + } + assert(coal->termrequested); + if (coal->active_count > 0) { + coalition_unlock(coal); + return KERN_FAILURE; + } + + coal->reaped = TRUE; + + /* Caller, launchd, and coalitions list should each have a reference */ + assert(coal->ref_count > 2); + + coalition_unlock(coal); + + lck_mtx_lock(&coalitions_list_lock); + coalition_count--; + queue_remove(&coalitions, coal, coalition_t, coalitions); + lck_mtx_unlock(&coalitions_list_lock); + + /* Release the list's reference and launchd's reference. */ + coalition_release(coal); + coalition_release(coal); + + return KERN_SUCCESS; +} + +void +coalition_init(void) +{ + coalition_zone = zinit( + sizeof(struct coalition), + CONFIG_COALITION_MAX * sizeof(struct coalition), + COALITION_CHUNK * sizeof(struct coalition), + "coalitions"); + zone_change(coalition_zone, Z_NOENCRYPT, TRUE); + queue_init(&coalitions); + + if (!PE_parse_boot_argn("unrestrict_coalition_syscalls", &unrestrict_coalition_syscalls, + sizeof (unrestrict_coalition_syscalls))) { + unrestrict_coalition_syscalls = 0; + } + + lck_grp_attr_setdefault(&coalitions_lck_grp_attr); + lck_grp_init(&coalitions_lck_grp, "coalition", &coalitions_lck_grp_attr); + lck_attr_setdefault(&coalitions_lck_attr); + lck_mtx_init(&coalitions_list_lock, &coalitions_lck_grp, &coalitions_lck_attr); + + init_task_ledgers(); + + kern_return_t kr = coalition_create_internal(&default_coalition, TRUE); + if (kr != KERN_SUCCESS) { + panic("%s: could not create default coalition: %d", __func__, kr); + } + /* "Leak" our reference to the global object */ +} + diff --git a/osfmk/kern/coalition.h b/osfmk/kern/coalition.h new file mode 100644 index 000000000..38bdefd58 --- /dev/null +++ b/osfmk/kern/coalition.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _KERN_COALITION_H_ +#define _KERN_COALITION_H_ + +#ifdef XNU_KERNEL_PRIVATE + +void coalition_init(void); + +/* These may return: + * KERN_ALREADY_IN_SET task is already in a coalition (maybe this one, maybe a different one) + * KERN_TERMINATED coalition is already terminated (so it may not adopt any more tasks) + */ +kern_return_t coalition_adopt_task(coalition_t coal, task_t task); +kern_return_t coalition_default_adopt_task(task_t task); + +/* Currently, no error conditions. If task is not already in a coalition, + * KERN_SUCCESS is returned because removing it did not fail. + */ +kern_return_t coalition_remove_task(task_t task); + +uint64_t coalition_id(coalition_t coal); +uint64_t task_coalition_id(task_t task); + +/* Returns with a reference, or COALITION_NULL. + * There is no coalition with id 0. + */ +coalition_t coalition_find_by_id(uint64_t coal_id); + +/* Returns with a reference and an activation, or COALITION_NULL. + * There is no coalition with id 0. + */ +coalition_t coalition_find_and_activate_by_id(uint64_t coal_id); + +/* This may return: + * KERN_TERMINATED coalition is terminated + * This will panic if the coalition is already reaped, which implies + * that it wasn't active. + */ +kern_return_t coalition_extend_active(coalition_t coal); + +void coalition_remove_active(coalition_t coal); + +void coalition_release(coalition_t coal); + +/* + * The following functions are to be used by the syscall wrapper + * in bsd/kern/kern_proc.c, after it has verified the caller's privilege. + */ + +/* This may return: + * KERN_DEFAULT_SET The default coalition, which contains the kernel, may + * not be terminated. + * KERN_TERMINATED The coalition was already reaped. + * KERN_FAILURE The coalition was not empty or has never been terminated. + */ +kern_return_t coalition_reap_internal(coalition_t coal); + +/* This may return: + * KERN_DEFAULT_SET The default coalition, which contains the kernel, may + * not be terminated. + * KERN_TERMINATED The coalition was already terminated (or even reaped) + * KERN_INVALID_NAME The coalition was already reaped. + */ +kern_return_t coalition_request_terminate_internal(coalition_t coal); + +/* This may return: + * KERN_RESOURCE_SHORTAGE Unable to allocate kernel resources for a + * new coalition. + */ +kern_return_t coalition_create_internal(coalition_t *out, boolean_t privileged); + +boolean_t coalition_is_privileged(coalition_t coal); +boolean_t task_is_in_privileged_coalition(task_t task); + +/* This struct is also defined in bsd/sys/coalition.h. Keep in sync. */ +struct coalition_resource_usage { + uint64_t tasks_started; + uint64_t tasks_exited; + uint64_t time_nonempty; + uint64_t cpu_time; + uint64_t interrupt_wakeups; + uint64_t platform_idle_wakeups; + uint64_t bytesread; + uint64_t byteswritten; + uint64_t gpu_time; +}; + +kern_return_t coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_usage *cru_out); + +ledger_t coalition_get_ledger(coalition_t coal); + +#endif /* XNU_KERNEL_PRIVATE */ + +#endif /* _KERN_COALITION_H */ diff --git a/osfmk/kern/debug.c b/osfmk/kern/debug.c index 7ee11d7a9..6dc10f748 100644 --- a/osfmk/kern/debug.c +++ b/osfmk/kern/debug.c @@ -59,7 +59,6 @@ #include #include -#include #include #include #include @@ -67,10 +66,11 @@ #include #include #include +#include #include #include #include -#if !MACH_KDP +#if !(MACH_KDP && CONFIG_KDP_INTERACTIVE_DEBUGGING) #include #endif @@ -88,6 +88,10 @@ #include #include +#if (defined(__arm64__) || defined(NAND_PANIC_DEVICE)) && !defined(LEGACY_PANIC_LOGS) +#include /* For gPanicBase */ +#endif + unsigned int halt_in_debugger = 0; unsigned int switch_debugger = 0; unsigned int current_debugger = 0; @@ -111,9 +115,17 @@ unsigned long panic_caller; #define DEBUG_BUF_SIZE (3 * PAGE_SIZE) +/* debug_buf is directly linked with iBoot panic region for ARM64 targets */ +#if (defined(__arm64__) || defined(NAND_PANIC_DEVICE)) && !defined(LEGACY_PANIC_LOGS) +char *debug_buf_addr = NULL; +char *debug_buf_ptr = NULL; +unsigned int debug_buf_size = 0; +#else char debug_buf[DEBUG_BUF_SIZE]; +__used char *debug_buf_addr = debug_buf; char *debug_buf_ptr = debug_buf; unsigned int debug_buf_size = sizeof(debug_buf); +#endif static char model_name[64]; unsigned char *kernel_uuid; @@ -140,7 +152,7 @@ typedef struct pasc pasc_t; #undef Assert #endif -void +void __attribute__((noinline)) Assert( const char *file, int line, @@ -209,8 +221,20 @@ debug_log_init(void) { if (debug_buf_size != 0) return; +#if (defined(__arm64__) || defined(NAND_PANIC_DEVICE)) && !defined(LEGACY_PANIC_LOGS) + if (!gPanicBase) { + printf("debug_log_init: Error!! gPanicBase is still not initialized\n"); + return; + } + /* Shift debug buf start location and size by 8 bytes for magic header and crc value */ + debug_buf_addr = (char*)gPanicBase + 8; + debug_buf_ptr = debug_buf_addr; + debug_buf_size = gPanicSize - 8; +#else + debug_buf_addr = debug_buf; debug_buf_ptr = debug_buf; debug_buf_size = sizeof(debug_buf); +#endif } #if defined(__i386__) || defined(__x86_64__) @@ -234,12 +258,14 @@ void _consume_panic_args(int a __unused, ...) panic("panic"); } +extern unsigned int write_trace_on_panic; + static spl_t panic_prologue(const char *str) { spl_t s; - if (kdebug_enable) { + if (write_trace_on_panic && kdebug_enable) { if (get_preemption_level() == 0 && !ml_at_interrupt_context()) { ml_set_interrupts_enabled(TRUE); kdbg_dump_trace_to_file("/var/tmp/panic.trace"); @@ -395,7 +421,7 @@ void debug_putc(char c) { if ((debug_buf_size != 0) && - ((debug_buf_ptr-debug_buf) < (int)debug_buf_size)) { + ((debug_buf_ptr-debug_buf_addr) < (int)debug_buf_size)) { *debug_buf_ptr=c; debug_buf_ptr++; } @@ -500,6 +526,12 @@ void panic_display_kernel_aslr(void) { } } +void panic_display_hibb(void) { +#if defined(__i386__) || defined (__x86_64__) + kdb_printf("__HIB text base: %p\n", (void *) vm_hib_base); +#endif +} + static void panic_display_uptime(void) { uint64_t uptime; absolutetime_to_nanoseconds(mach_absolute_time(), &uptime); @@ -524,6 +556,7 @@ __private_extern__ void panic_display_system_configuration(void) { kdb_printf("\nKernel version:\n%s\n",version); panic_display_kernel_uuid(); panic_display_kernel_aslr(); + panic_display_hibb(); panic_display_pal_info(); panic_display_model_name(); panic_display_uptime(); @@ -581,6 +614,17 @@ __private_extern__ void panic_display_zprint() } } +#if CONFIG_ECC_LOGGING +__private_extern__ void panic_display_ecc_errors() +{ + uint32_t count = ecc_log_get_correction_count(); + + if (count > 0) { + kdb_printf("ECC Corrections:%u\n", count); + } +} +#endif /* CONFIG_ECC_LOGGING */ + #if CONFIG_ZLEAKS extern boolean_t panic_include_ztrace; extern struct ztrace* top_ztrace; @@ -612,7 +656,7 @@ __private_extern__ void panic_display_ztrace(void) } #endif /* CONFIG_ZLEAKS */ -#if !MACH_KDP +#if ! (MACH_KDP && CONFIG_KDP_INTERACTIVE_DEBUGGING) static struct kdp_ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}}; /* XXX ugly forward declares to stop warnings */ @@ -664,22 +708,11 @@ void kdp_unregister_send_receive(__unused void *send, __unused void *receive) {} -void -kdp_snapshot_preflight(__unused int pid, __unused void * tracebuf, - __unused uint32_t tracebuf_size, __unused uint32_t options) +void kdp_register_link(__unused kdp_link_t link, __unused kdp_mode_t mode) {} -int -kdp_stack_snapshot_geterror(void) -{ - return -1; -} - -int -kdp_stack_snapshot_bytes_traced(void) -{ - return 0; -} +void kdp_unregister_link(__unused kdp_link_t link, __unused kdp_mode_t mode) +{} #endif diff --git a/osfmk/kern/debug.h b/osfmk/kern/debug.h index a61206294..85acc47fd 100644 --- a/osfmk/kern/debug.h +++ b/osfmk/kern/debug.h @@ -40,6 +40,11 @@ #ifdef __APPLE_API_PRIVATE #ifdef __APPLE_API_UNSTABLE +/* This value must always match IO_NUM_PRIORITIES defined in thread_info.h */ +#define STACKSHOT_IO_NUM_PRIORITIES 4 +/* This value must always match MAXTHREADNAMESIZE used in bsd */ +#define STACKSHOT_MAX_THREAD_NAME_SIZE 64 + struct thread_snapshot { uint32_t snapshot_magic; uint32_t nkern_frames; @@ -50,10 +55,37 @@ struct thread_snapshot { uint64_t user_time; uint64_t system_time; int32_t state; - int32_t priority; // static priority - int32_t sched_pri; // scheduled (current) priority - int32_t sched_flags; // scheduler flags + int32_t priority; /* static priority */ + int32_t sched_pri; /* scheduled (current) priority */ + int32_t sched_flags; /* scheduler flags */ char ss_flags; + char ts_qos; + char io_tier; + + /* + * I/O Statistics + * XXX: These fields must be together + */ + uint64_t disk_reads_count; + uint64_t disk_reads_size; + uint64_t disk_writes_count; + uint64_t disk_writes_size; + uint64_t io_priority_count[STACKSHOT_IO_NUM_PRIORITIES]; + uint64_t io_priority_size[STACKSHOT_IO_NUM_PRIORITIES]; + uint64_t paging_count; + uint64_t paging_size; + uint64_t non_paging_count; + uint64_t non_paging_size; + uint64_t data_count; + uint64_t data_size; + uint64_t metadata_count; + uint64_t metadata_size; + /* XXX: I/O Statistics end */ + + uint64_t voucher_identifier; /* obfuscated voucher identifier */ + uint64_t total_syscalls; + char pth_name[STACKSHOT_MAX_THREAD_NAME_SIZE]; + } __attribute__ ((packed)); struct task_snapshot { @@ -65,13 +97,17 @@ struct task_snapshot { uint8_t shared_cache_identifier[16]; uint64_t shared_cache_slide; uint32_t nloadinfos; - int suspend_count; - int task_size; // pages - int faults; // number of page faults - int pageins; // number of actual pageins - int cow_faults; // number of copy-on-write faults + int suspend_count; + int task_size; /* pages */ + int faults; /* number of page faults */ + int pageins; /* number of actual pageins */ + int cow_faults; /* number of copy-on-write faults */ uint32_t ss_flags; - /* We restrict ourselves to a statically defined + uint64_t p_start_sec; /* from the bsd proc struct */ + uint64_t p_start_usec; /* from the bsd proc struct */ + + /* + * We restrict ourselves to a statically defined * (current as of 2009) length for the * p_comm string, due to scoping issues (osfmk/bsd and user/kernel * binary compatibility). @@ -80,6 +116,28 @@ struct task_snapshot { uint32_t was_throttled; uint32_t did_throttle; uint32_t latency_qos; + /* + * I/O Statistics + * XXX: These fields must be together. + */ + uint64_t disk_reads_count; + uint64_t disk_reads_size; + uint64_t disk_writes_count; + uint64_t disk_writes_size; + uint64_t io_priority_count[STACKSHOT_IO_NUM_PRIORITIES]; + uint64_t io_priority_size[STACKSHOT_IO_NUM_PRIORITIES]; + uint64_t paging_count; + uint64_t paging_size; + uint64_t non_paging_count; + uint64_t non_paging_size; + uint64_t data_count; + uint64_t data_size; + uint64_t metadata_count; + uint64_t metadata_size; + /* XXX: I/O Statistics end */ + + uint32_t donating_pid_count; + } __attribute__ ((packed)); struct micro_snapshot { @@ -173,28 +231,36 @@ enum generic_snapshot_flags { kTaskIsForeground = 0x400, kTaskIsBoosted = 0x800, kTaskIsSuppressed = 0x1000, - kTaskIsTimerThrottled = 0x2000 /* deprecated */ + kTaskIsTimerThrottled = 0x2000, /* deprecated */ + kTaskIsImpDonor = 0x4000, + kTaskIsLiveImpDonor = 0x8000 }; enum thread_snapshot_flags { - kHasDispatchSerial = 0x4, + kHasDispatchSerial = 0x4, kStacksPCOnly = 0x8, /* Stack traces have no frame pointers. */ - kThreadDarwinBG = 0x10 /* Thread is darwinbg */ + kThreadDarwinBG = 0x10, /* Thread is darwinbg */ + kThreadIOPassive = 0x20, /* Thread uses passive IO */ + kThreadSuspended = 0x40 /* Thread is supsended */ }; #define VM_PRESSURE_TIME_WINDOW 5 /* seconds */ enum { - STACKSHOT_GET_DQ = 0x01, - STACKSHOT_SAVE_LOADINFO = 0x02, - STACKSHOT_GET_GLOBAL_MEM_STATS = 0x04, - STACKSHOT_SAVE_KEXT_LOADINFO = 0x08, - STACKSHOT_GET_MICROSTACKSHOT = 0x10, - STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE = 0x20, - STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE = 0x40, - STACKSHOT_SET_MICROSTACKSHOT_MARK = 0x80, - STACKSHOT_SAVE_KERNEL_FRAMES_ONLY = 0x100, - STACKSHOT_GET_BOOT_PROFILE = 0x200, + STACKSHOT_GET_DQ = 0x01, + STACKSHOT_SAVE_LOADINFO = 0x02, + STACKSHOT_GET_GLOBAL_MEM_STATS = 0x04, + STACKSHOT_SAVE_KEXT_LOADINFO = 0x08, + STACKSHOT_GET_MICROSTACKSHOT = 0x10, + STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE = 0x20, + STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE = 0x40, + STACKSHOT_SET_MICROSTACKSHOT_MARK = 0x80, + STACKSHOT_SAVE_KERNEL_FRAMES_ONLY = 0x100, + STACKSHOT_GET_BOOT_PROFILE = 0x200, + STACKSHOT_GET_WINDOWED_MICROSTACKSHOTS = 0x400, + STACKSHOT_WINDOWED_MICROSTACKSHOTS_ENABLE = 0x800, + STACKSHOT_WINDOWED_MICROSTACKSHOTS_DISABLE = 0x1000, + STACKSHOT_SAVE_IMP_DONATION_PIDS = 0x2000 }; #define STACKSHOT_THREAD_SNAPSHOT_MAGIC 0xfeedface @@ -208,7 +274,10 @@ enum { #ifdef KERNEL_PRIVATE extern unsigned int systemLogDiags; +#if (!defined(__arm64__) && !defined(NAND_PANIC_DEVICE)) || defined(LEGACY_PANIC_LOGS) extern char debug_buf[]; +#endif +extern char *debug_buf_addr; extern unsigned int debug_boot_arg; extern unsigned char *kernel_uuid; extern char kernel_uuid_string[]; @@ -262,9 +331,13 @@ void unpackA(char *inbuf, uint32_t length); void panic_display_system_configuration(void); void panic_display_zprint(void); void panic_display_kernel_aslr(void); +void panic_display_hibb(void); #if CONFIG_ZLEAKS void panic_display_ztrace(void); #endif /* CONFIG_ZLEAKS */ +#if CONFIG_ECC_LOGGING +void panic_display_ecc_errors(void); +#endif /* CONFIG_ECC_LOGGING */ #endif /* MACH_KERNEL_PRIVATE */ #define DB_HALT 0x1 @@ -335,6 +408,8 @@ enum { #endif /* KERNEL_PRIVATE */ +#ifdef KERNEL + __BEGIN_DECLS extern void panic(const char *string, ...) __printflike(1,2); @@ -362,8 +437,13 @@ void panic_context(unsigned int reason, void *ctx, const char *string, ...); (panic)(# ex "@" PANIC_LOCATION, ## __VA_ARGS__) #endif /* CONFIGS_NO_PANIC_STRINGS */ +#ifdef KERNEL_PRIVATE void populate_model_name(char *); unsigned panic_active(void); +#endif + __END_DECLS +#endif /* KERNEL */ + #endif /* _KERN_DEBUG_H_ */ diff --git a/bsd/dev/i386/memmove.c b/osfmk/kern/ecc.h similarity index 61% rename from bsd/dev/i386/memmove.c rename to osfmk/kern/ecc.h index 58ca278a7..e45247044 100644 --- a/bsd/dev/i386/memmove.c +++ b/osfmk/kern/ecc.h @@ -1,5 +1,12 @@ +#if !defined(_KERN_ECC_H) +#define _KERN_ECC_H + +#include + +__BEGIN_DECLS + /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,36 +32,26 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* Copyright (c) 1991,1993 NeXT Computer, Inc. All rights reserved. - * - * File: machdep/ppc/libc/memmove.c - * History: - * - * Fixed sleep integration problem. sleep was not properly - * handling thread states of THREAD_INTERRUPTED and - * THREAD_MUST_TERMINATE, so callers of sleep were getting - * confused and many times looping. This fixes the (in)famous - * unkillable gdb problem, the PB (and other processes) don't - * terminate, and more. Removed debugging kprintf left in - * bcopy code - * - */ -#include +#define ECC_EVENT_INFO_DATA_ENTRIES 8 +struct ecc_event { + uint8_t id; // ID of memory (e.g. L2C), platform-specific + uint8_t count; // Of uint64_t's used, starting at index 0 + uint64_t data[ECC_EVENT_INFO_DATA_ENTRIES] __attribute__((aligned(8))); // Event-specific data +}; + -#if 0 -void *memcpy(void *dst, const void *src, unsigned int ulen) -{ - bcopy(src, dst, ulen); - return dst; -} -#endif /* 0 */ +#ifdef KERNEL_PRIVATE +extern kern_return_t ecc_log_record_event(const struct ecc_event *ev); +extern boolean_t ecc_log_prefer_panic(void); +#endif -void * -memmove(void *dst, const void *src, size_t ulen) -{ - bcopy(src, dst, ulen); - return dst; -} +#ifdef XNU_KERNEL_PRIVATE +extern void ecc_log_init(void); +extern kern_return_t ecc_log_get_next_event(struct ecc_event *ev); +extern uint32_t ecc_log_get_correction_count(void); +#endif +__END_DECLS +#endif /* !defined(_KERN_ECC_H) */ diff --git a/osfmk/kern/energy_perf.c b/osfmk/kern/energy_perf.c new file mode 100644 index 000000000..f44a09a73 --- /dev/null +++ b/osfmk/kern/energy_perf.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include + +void gpu_describe(__unused gpu_descriptor_t gdesc) { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ENERGY_PERF, 1), gdesc->gpu_id, gdesc->gpu_max_domains, 0, 0, 0); +} + +uint64_t gpu_accumulate_time(__unused uint32_t scope, __unused uint32_t gpu_id, __unused uint32_t gpu_domain, __unused uint64_t gpu_accumulated_ns, __unused uint64_t gpu_tstamp_ns) { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ENERGY_PERF, 2), scope, gpu_id, gpu_domain, gpu_accumulated_ns, gpu_tstamp_ns); + ml_gpu_stat_update(gpu_accumulated_ns); + return 0; +} + +static uint64_t io_rate_update_cb_default(__unused uint64_t io_rate_flags, __unused uint64_t read_ops_delta, __unused uint64_t write_ops_delta, __unused uint64_t read_bytes_delta, __unused uint64_t write_bytes_delta) { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_ENERGY_PERF, 3), io_rate_flags, read_ops_delta, write_ops_delta, read_bytes_delta, write_bytes_delta); + return 0; +} + +io_rate_update_callback_t io_rate_update_cb = io_rate_update_cb_default; + +void io_rate_update_register(io_rate_update_callback_t io_rate_update_cb_new) { + if (io_rate_update_cb_new != NULL) { + io_rate_update_cb = io_rate_update_cb_new; + } else { + io_rate_update_cb = io_rate_update_cb_default; + } +} + +uint64_t io_rate_update(uint64_t io_rate_flags, uint64_t read_ops_delta, uint64_t write_ops_delta, uint64_t read_bytes_delta, uint64_t write_bytes_delta) { + return io_rate_update_cb(io_rate_flags, read_ops_delta, write_ops_delta, read_bytes_delta, write_bytes_delta); +} diff --git a/osfmk/kern/energy_perf.h b/osfmk/kern/energy_perf.h new file mode 100644 index 000000000..b7e2e1b94 --- /dev/null +++ b/osfmk/kern/energy_perf.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * Interfaces for non-kernel managed devices to inform the kernel of their + * energy and performance relevant activity and resource utilisation, typically + * on a per-thread or task basis. + */ + +#ifndef _KERN_ENERGY_PERF_H_ +#define _KERN_ENERGY_PERF_H_ + +#include + +#ifdef KERNEL +__BEGIN_DECLS + +typedef struct { + uint32_t gpu_id; + uint32_t gpu_max_domains; +} gpu_descriptor; + +typedef gpu_descriptor *gpu_descriptor_t; +/* The GPU is expected to describe itself with this interface prior to reporting + * resource usage. + */ +void gpu_describe(gpu_descriptor_t); + +#define GPU_SCOPE_CURRENT_THREAD (0x1) +#define GPU_SCOPE_MISC (0x2) + +/* GPU utilisation update for the current thread. */ +uint64_t gpu_accumulate_time(uint32_t scope, uint32_t gpu_id, uint32_t gpu_domain, uint64_t gpu_accumulated_ns, uint64_t gpu_tstamp_ns); + +/* Interfaces for the block storage driver to advise the perf. controller of + * recent IOs + */ + +/* Target medium for this set of IOs. Updates can occur in parallel if + * multiple devices exist, hence consumers must synchronize internally, ideally + * in a low-overhead fashion such as per-CPU counters, as this may be invoked + * within the IO path. + */ + +#define IO_MEDIUM_ROTATING (0x0ULL) +#define IO_MEDIUM_SOLID_STATE (0x1ULL) + +/* As there are several priority bands whose nature is evolving, we rely on the + * block storage driver to classify non-performance-critical IOs as "low" + * priority. Separate updates are expected for low/high priority IOs. + */ + +#define IO_PRIORITY_LOW (0x1ULL << 8) + +/* Reserved for estimates of bursts of future IOs; could possibly benefit from + * a time horizon, but it's unclear if it will be specifiable by any layer with + * reasonable accuracy + */ +#define IO_PRIORITY_PREDICTIVE (0x1ULL << 16) + +uint64_t io_rate_update( + uint64_t io_rate_flags, /* Rotating/NAND, IO priority level */ + uint64_t read_ops_delta, + uint64_t write_ops_delta, + uint64_t read_bytes_delta, + uint64_t write_bytes_delta); + +typedef uint64_t (*io_rate_update_callback_t) (uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); + +void io_rate_update_register(io_rate_update_callback_t); + +__END_DECLS +#endif /* KERNEL */ + +#endif /* _KERN_ENERGY_PERF_H_ */ diff --git a/osfmk/kern/exc_resource.h b/osfmk/kern/exc_resource.h index a48f12608..336c41327 100644 --- a/osfmk/kern/exc_resource.h +++ b/osfmk/kern/exc_resource.h @@ -63,7 +63,8 @@ #define RESOURCE_TYPE_MEMORY 3 /* RESOURCE_TYPE_CPU flavors */ -#define FLAVOR_CPU_MONITOR 1 +#define FLAVOR_CPU_MONITOR 1 +#define FLAVOR_CPU_MONITOR_FATAL 2 /* * RESOURCE_TYPE_CPU exception code & subcode. @@ -74,7 +75,7 @@ * code: * +-----------------------------------------------+ * |[63:61] RESOURCE |[60:58] FLAVOR_CPU_ |[57:32] | - * |_TYPE_CPU |MONITOR |Unused | + * |_TYPE_CPU |MONITOR[_FATAL] |Unused | * +-----------------------------------------------+ * |[31:7] Interval (sec) | [6:0] CPU limit (%)| * +-----------------------------------------------+ diff --git a/osfmk/kern/exception.c b/osfmk/kern/exception.c index d8128145d..7d9cb9342 100644 --- a/osfmk/kern/exception.c +++ b/osfmk/kern/exception.c @@ -102,6 +102,12 @@ kern_return_t exception_deliver( struct exception_action *excp, lck_mtx_t *mutex); +static kern_return_t +check_exc_receiver_dependancy( + exception_type_t exception, + struct exception_action *excp, + lck_mtx_t *mutex); + #ifdef MACH_BSD kern_return_t bsd_exception( exception_type_t exception, @@ -296,6 +302,42 @@ exception_deliver( }/* switch */ } +/* + * Routine: check_exc_receiver_dependancy + * Purpose: + * Verify that the port destined for receiving this exception is not + * on the current task. This would cause hang in kernel for + * EXC_CRASH primarily. Note: If port is transferred + * between check and delivery then deadlock may happen. + * + * Conditions: + * Nothing locked and no resources held. + * Called from an exception context. + * Returns: + * KERN_SUCCESS if its ok to send exception message. + */ +kern_return_t +check_exc_receiver_dependancy( + exception_type_t exception, + struct exception_action *excp, + lck_mtx_t *mutex) +{ + kern_return_t retval = KERN_SUCCESS; + + if (excp == NULL || exception != EXC_CRASH) + return retval; + + task_t task = current_task(); + lck_mtx_lock(mutex); + ipc_port_t xport = excp[exception].port; + if ( IP_VALID(xport) + && ip_active(xport) + && task->itk_space == xport->ip_receiver) + retval = KERN_FAILURE; + lck_mtx_unlock(mutex); + return retval; +} + /* * Routine: exception * Purpose: @@ -329,27 +371,37 @@ exception_triage( * Try to raise the exception at the activation level. */ mutex = &thread->mutex; - kr = exception_deliver(thread, exception, code, codeCnt, thread->exc_actions, mutex); - if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) - goto out; + if (KERN_SUCCESS == check_exc_receiver_dependancy(exception, thread->exc_actions, mutex)) + { + kr = exception_deliver(thread, exception, code, codeCnt, thread->exc_actions, mutex); + if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) + goto out; + } /* * Maybe the task level will handle it. */ task = current_task(); mutex = &task->lock; - kr = exception_deliver(thread, exception, code, codeCnt, task->exc_actions, mutex); - if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) - goto out; + if (KERN_SUCCESS == check_exc_receiver_dependancy(exception, task->exc_actions, mutex)) + { + kr = exception_deliver(thread, exception, code, codeCnt, task->exc_actions, mutex); + if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) + goto out; + } /* * How about at the host level? */ host_priv = host_priv_self(); mutex = &host_priv->lock; - kr = exception_deliver(thread, exception, code, codeCnt, host_priv->exc_actions, mutex); - if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) - goto out; + + if (KERN_SUCCESS == check_exc_receiver_dependancy(exception, host_priv->exc_actions, mutex)) + { + kr = exception_deliver(thread, exception, code, codeCnt, host_priv->exc_actions, mutex); + if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) + goto out; + } /* * Nobody handled it, terminate the task. diff --git a/osfmk/kern/extmod_statistics.h b/osfmk/kern/extmod_statistics.h index 5bf20066a..4aa18c42a 100644 --- a/osfmk/kern/extmod_statistics.h +++ b/osfmk/kern/extmod_statistics.h @@ -34,7 +34,7 @@ */ #ifndef _KERN_EXTMOD_STATISTICS_H_ -#define _KERN_EXTMODE_STATISTICS_H_ +#define _KERN_EXTMOD_STATISTICS_H_ #include #include diff --git a/osfmk/kern/hibernate.c b/osfmk/kern/hibernate.c index 8d0806589..c534a5c57 100644 --- a/osfmk/kern/hibernate.c +++ b/osfmk/kern/hibernate.c @@ -106,6 +106,8 @@ hibernate_setup(IOHibernateImageHeader * header, kern_return_t retval = KERN_SUCCESS; hibernate_create_paddr_map(); + + hibernate_reset_stats(); if (vmflush && (COMPRESSED_PAGER_IS_ACTIVE || dp_isssd)) { @@ -164,4 +166,3 @@ hibernate_teardown(hibernate_page_list_t * page_list, } return (KERN_SUCCESS); } - diff --git a/osfmk/kern/host.c b/osfmk/kern/host.c index e61673978..0fa13c974 100644 --- a/osfmk/kern/host.c +++ b/osfmk/kern/host.c @@ -933,8 +933,6 @@ host_set_special_port( { if (host_priv == HOST_PRIV_NULL || id <= HOST_MAX_SPECIAL_KERNEL_PORT || id > HOST_MAX_SPECIAL_PORT ) { - if (IP_VALID(port)) - ipc_port_release_send(port); return KERN_INVALID_ARGUMENT; } diff --git a/osfmk/kern/host_notify.c b/osfmk/kern/host_notify.c index 1ca87dcaf..83826c191 100644 --- a/osfmk/kern/host_notify.c +++ b/osfmk/kern/host_notify.c @@ -100,7 +100,7 @@ host_request_notification( lck_mtx_lock(&host_notify_lock); ip_lock(port); - if (!ip_active(port) || ip_kotype(port) != IKOT_NONE) { + if (!ip_active(port) || port->ip_tempowner || ip_kotype(port) != IKOT_NONE) { ip_unlock(port); lck_mtx_unlock(&host_notify_lock); @@ -167,10 +167,11 @@ host_notify_all( send_queue.next->prev = &send_queue; send_queue.prev->next = &send_queue; - msg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND_ONCE, 0); + msg->msgh_bits = + MACH_MSGH_BITS_SET(MACH_MSG_TYPE_MOVE_SEND_ONCE, 0, 0, 0); msg->msgh_local_port = MACH_PORT_NULL; + msg->msgh_voucher_port = MACH_PORT_NULL; msg->msgh_id = host_notify_replyid[notify_type]; - msg->msgh_reserved = 0; while ((entry = (host_notify_t)dequeue(&send_queue)) != NULL) { ipc_port_t port; diff --git a/osfmk/kern/hv_support.c b/osfmk/kern/hv_support.c new file mode 100644 index 000000000..9d032d2d0 --- /dev/null +++ b/osfmk/kern/hv_support.c @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include + +#if defined(__x86_64__) && CONFIG_VMX +#include +#endif + +#include + +int hv_support_available = 0; + +/* callbacks for tasks/threads with associated hv objects */ +hv_callbacks_t hv_callbacks = { + .dispatch = NULL, /* thread is being dispatched for execution */ + .preempt = NULL, /* thread is being preempted */ + .thread_destroy = NULL, /* thread is being destroyed */ + .task_destroy = NULL, /* task is being destroyed */ + .volatile_state = NULL, /* thread state is becoming volatile */ + .memory_pressure = NULL /* memory pressure notification */ +}; + +/* trap tables for hv_*_trap syscalls */ +static hv_trap_table_t hv_trap_table[] = { + [HV_TASK_TRAP] = { + .traps = NULL, + .trap_count = 0 + }, + [HV_THREAD_TRAP] = { + .traps = NULL, + .trap_count = 0 + } +}; + +static int hv_callbacks_enabled = 0; +static int hv_mp_notify_enabled = 0; +static int hv_mp_notify_destroy = 0; +static lck_grp_t *hv_support_lck_grp = NULL; +static lck_mtx_t *hv_support_lck_mtx = NULL; +static thread_t hv_mp_notify_thread = THREAD_NULL; +static void hv_mp_notify(void); + +/* hv_support boot initialization */ +void +hv_support_init(void) { +#if defined(__x86_64__) && CONFIG_VMX + hv_support_available = vmx_hv_support(); +#endif + + hv_support_lck_grp = lck_grp_alloc_init("hv_support", LCK_GRP_ATTR_NULL); + assert(hv_support_lck_grp); + + hv_support_lck_mtx = lck_mtx_alloc_init(hv_support_lck_grp, LCK_ATTR_NULL); + assert(hv_support_lck_mtx); +} + +/* returns true if hv_support is available on this machine */ +int +hv_get_support(void) { + return hv_support_available; +} + +/* associate an hv object with the current task */ +void +hv_set_task_target(void *target) { + current_task()->hv_task_target = target; +} + +/* associate an hv object with the current thread */ +void +hv_set_thread_target(void *target) { + current_thread()->hv_thread_target = target; +} + +/* get hv object associated with the current task */ +void* +hv_get_task_target(void) { + return current_task()->hv_task_target; +} + +/* get hv object associated with the current thread */ +void* +hv_get_thread_target(void) { + return current_thread()->hv_thread_target; +} + +/* test if a given thread state may be volatile between dispatch + and preemption */ +int +hv_get_volatile_state(hv_volatile_state_t state) { + int is_volatile = 0; + +#if (defined(__x86_64__)) + if (state == HV_DEBUG_STATE) { + is_volatile = (current_thread()->machine.ids != NULL); + } +#endif + + return is_volatile; +} + +/* memory pressure monitor thread */ +static void +hv_mp_notify(void) { + while (1) { + mach_vm_pressure_monitor(TRUE, 0, NULL, NULL); + + lck_mtx_lock(hv_support_lck_mtx); + if (hv_mp_notify_destroy == 1) { + hv_mp_notify_destroy = 0; + hv_mp_notify_enabled = 0; + lck_mtx_unlock(hv_support_lck_mtx); + break; + } else { + hv_callbacks.memory_pressure(NULL); + } + lck_mtx_unlock(hv_support_lck_mtx); + } + + thread_deallocate(current_thread()); +} + +/* subscribe to memory pressure notifications */ +kern_return_t +hv_set_mp_notify(void) { + kern_return_t kr; + + lck_mtx_lock(hv_support_lck_mtx); + if (hv_callbacks_enabled == 0) { + lck_mtx_unlock(hv_support_lck_mtx); + return KERN_FAILURE; + } + + if (hv_mp_notify_enabled == 1) { + hv_mp_notify_destroy = 0; + lck_mtx_unlock(hv_support_lck_mtx); + return KERN_SUCCESS; + } + + kr = kernel_thread_start((thread_continue_t) &hv_mp_notify, NULL, + &hv_mp_notify_thread); + + if (kr == KERN_SUCCESS) { + hv_mp_notify_enabled = 1; + } + lck_mtx_unlock(hv_support_lck_mtx); + + return kr; +} + +/* unsubscribe from memory pressure notifications */ +void +hv_release_mp_notify(void) { + lck_mtx_lock(hv_support_lck_mtx); + if (hv_mp_notify_enabled == 1) { + hv_mp_notify_destroy = 1; + } + lck_mtx_unlock(hv_support_lck_mtx); +} + +/* register a list of trap handlers for the hv_*_trap syscalls */ +kern_return_t +hv_set_traps(hv_trap_type_t trap_type, const hv_trap_t *traps, + unsigned trap_count) +{ + hv_trap_table_t *trap_table = &hv_trap_table[trap_type]; + kern_return_t kr = KERN_FAILURE; + + lck_mtx_lock(hv_support_lck_mtx); + if (trap_table->trap_count == 0) { + trap_table->traps = traps; + OSMemoryBarrier(); + trap_table->trap_count = trap_count; + kr = KERN_SUCCESS; + } + lck_mtx_unlock(hv_support_lck_mtx); + + return kr; +} + +/* release hv_*_trap traps */ +void +hv_release_traps(hv_trap_type_t trap_type) { + hv_trap_table_t *trap_table = &hv_trap_table[trap_type]; + + lck_mtx_lock(hv_support_lck_mtx); + trap_table->trap_count = 0; + OSMemoryBarrier(); + trap_table->traps = NULL; + lck_mtx_unlock(hv_support_lck_mtx); +} + +/* register callbacks for certain task/thread events for tasks/threads with + associated hv objects */ +kern_return_t +hv_set_callbacks(hv_callbacks_t callbacks) { + kern_return_t kr = KERN_FAILURE; + + lck_mtx_lock(hv_support_lck_mtx); + if (hv_callbacks_enabled == 0) { + hv_callbacks = callbacks; + hv_callbacks_enabled = 1; + kr = KERN_SUCCESS; + } + lck_mtx_unlock(hv_support_lck_mtx); + + return kr; +} + +/* release callbacks for task/thread events */ +void +hv_release_callbacks(void) { + lck_mtx_lock(hv_support_lck_mtx); + hv_callbacks = (hv_callbacks_t) { + .dispatch = NULL, + .preempt = NULL, + .thread_destroy = NULL, + .task_destroy = NULL, + .volatile_state = NULL, + .memory_pressure = NULL + }; + + hv_callbacks_enabled = 0; + lck_mtx_unlock(hv_support_lck_mtx); +} + +/* dispatch hv_task_trap/hv_thread_trap syscalls to trap handlers, + fail for invalid index or absence of trap handlers, trap handler is + responsible for validating targets */ +#define HV_TRAP_DISPATCH(type, index, target, argument)\ + ((__probable(index < hv_trap_table[type].trap_count)) ? \ + hv_trap_table[type].traps[index](target, argument) \ + : KERN_INVALID_ARGUMENT) + +kern_return_t hv_task_trap(uint64_t index, uint64_t arg) { + return HV_TRAP_DISPATCH(HV_TASK_TRAP, index, hv_get_task_target(), arg); +} + +kern_return_t hv_thread_trap(uint64_t index, uint64_t arg) { + return HV_TRAP_DISPATCH(HV_THREAD_TRAP, index, hv_get_thread_target(), arg); +} diff --git a/osfmk/kern/hv_support.h b/osfmk/kern/hv_support.h new file mode 100644 index 000000000..485654f70 --- /dev/null +++ b/osfmk/kern/hv_support.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _KERN_HV_SUPPORT_H_ +#define _KERN_HV_SUPPORT_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include + +typedef enum { + HV_DEBUG_STATE +} hv_volatile_state_t; + +typedef enum { + HV_TASK_TRAP = 0, + HV_THREAD_TRAP = 1 +} hv_trap_type_t; + +typedef kern_return_t (*hv_trap_t) (void *thread_target, uint64_t arg); +typedef void (*hv_callback_0_t)(void *target); +typedef void (*hv_callback_1_t)(void *target, int argument); + +typedef struct { + const hv_trap_t *traps; + unsigned trap_count; +} hv_trap_table_t; + +typedef struct { + hv_callback_0_t dispatch; + hv_callback_0_t preempt; + hv_callback_0_t thread_destroy; + hv_callback_0_t task_destroy; + hv_callback_1_t volatile_state; + hv_callback_0_t memory_pressure; +} hv_callbacks_t; + +extern hv_callbacks_t hv_callbacks; +extern int hv_support_available; + +extern void hv_support_init(void); +extern int hv_get_support(void); +extern void hv_set_task_target(void *target); +extern void hv_set_thread_target(void *target); +extern void *hv_get_task_target(void); +extern void *hv_get_thread_target(void); +extern int hv_get_volatile_state(hv_volatile_state_t state); +extern kern_return_t hv_set_mp_notify(void); +extern void hv_release_mp_notify(void); +extern kern_return_t hv_set_traps(hv_trap_type_t trap_type, + const hv_trap_t *traps, unsigned trap_count); +extern void hv_release_traps(hv_trap_type_t trap_type); +extern kern_return_t hv_set_callbacks(hv_callbacks_t callbacks); +extern void hv_release_callbacks(void) ; +extern kern_return_t hv_task_trap(uint64_t index, uint64_t arg); +extern kern_return_t hv_thread_trap(uint64_t index, uint64_t arg); + +#if defined(__cplusplus) +} +#endif + +#endif /* _KERN_HV_SUPPORT_H_ */ diff --git a/osfmk/kern/ipc_host.c b/osfmk/kern/ipc_host.c index a0a7ccef5..73cc4d4d3 100644 --- a/osfmk/kern/ipc_host.c +++ b/osfmk/kern/ipc_host.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include #include @@ -563,10 +562,15 @@ host_set_exception_ports( return KERN_INVALID_ARGUMENT; } } - /* Cannot easily check "new_flavor", but that just means that - * the flavor in the generated exception message might be garbage: - * GIGO + + /* + * Check the validity of the thread_state_flavor by calling the + * VALID_THREAD_STATE_FLAVOR architecture dependent macro defined in + * osfmk/mach/ARCHITECTURE/thread_status.h */ + if (new_flavor != 0 && !VALID_THREAD_STATE_FLAVOR(new_flavor)) + return (KERN_INVALID_ARGUMENT); + host_lock(host_priv); for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { @@ -706,9 +710,8 @@ host_swap_exception_ports( } } - /* Cannot easily check "new_flavor", but that just means that - * the flavor in the generated exception message might be garbage: - * GIGO */ + if (new_flavor != 0 && !VALID_THREAD_STATE_FLAVOR(new_flavor)) + return (KERN_INVALID_ARGUMENT); host_lock(host_priv); diff --git a/osfmk/kern/ipc_kobject.c b/osfmk/kern/ipc_kobject.c index bb224e8f0..6fe3e7b92 100644 --- a/osfmk/kern/ipc_kobject.c +++ b/osfmk/kern/ipc_kobject.c @@ -72,9 +72,7 @@ #include #include -#include #include -#include #include #include @@ -97,10 +95,11 @@ #include #include #include -#include #include #include #include +#include +#include #if VM32_SUPPORT #include #endif @@ -131,7 +130,7 @@ #include #include #include -#include +#include #include #include @@ -160,20 +159,16 @@ typedef struct { #endif } mig_hash_t; -#define MAX_MIG_ENTRIES 1024 +#define MAX_MIG_ENTRIES 1031 #define MIG_HASH(x) (x) #ifndef max #define max(a,b) (((a) > (b)) ? (a) : (b)) #endif /* max */ -mig_hash_t mig_buckets[MAX_MIG_ENTRIES]; -int mig_table_max_displ; -mach_msg_size_t mig_reply_size; - -#if CONFIG_MACF -#include -#endif +static mig_hash_t mig_buckets[MAX_MIG_ENTRIES]; +static int mig_table_max_displ; +static mach_msg_size_t mig_reply_size = sizeof(mig_reply_error_t); @@ -188,7 +183,6 @@ const struct mig_subsystem *mig_e[] = { (const struct mig_subsystem *)&processor_subsystem, (const struct mig_subsystem *)&processor_set_subsystem, (const struct mig_subsystem *)&is_iokit_subsystem, - (const struct mig_subsystem *)&memory_object_name_subsystem, (const struct mig_subsystem *)&lock_set_subsystem, (const struct mig_subsystem *)&task_subsystem, (const struct mig_subsystem *)&thread_act_subsystem, @@ -197,6 +191,8 @@ const struct mig_subsystem *mig_e[] = { #endif (const struct mig_subsystem *)&UNDReply_subsystem, (const struct mig_subsystem *)&default_pager_object_subsystem, + (const struct mig_subsystem *)&mach_voucher_subsystem, + (const struct mig_subsystem *)&mach_voucher_attr_control_subsystem, #if XK_PROXY (const struct mig_subsystem *)&do_uproxy_xk_uproxy_subsystem, @@ -207,10 +203,6 @@ const struct mig_subsystem *mig_e[] = { #if MCMSG && iPSC860 (const struct mig_subsystem *)&mcmsg_info_subsystem, #endif /* MCMSG && iPSC860 */ - -#if CONFIG_MACF - (const struct mig_subsystem *)&security_subsystem, -#endif }; void @@ -224,7 +216,6 @@ mig_init(void) range = mig_e[i]->end - mig_e[i]->start; if (!mig_e[i]->start || range < 0) panic("the msgh_ids in mig_e[] aren't valid!"); - mig_reply_size = max(mig_reply_size, mig_e[i]->maxsize); for (j = 0; j < range; j++) { if (mig_e[i]->routine[j].stub_routine) { @@ -328,10 +319,10 @@ ipc_kobject_server( OutP->Head.msgh_size = sizeof(mig_reply_error_t); OutP->Head.msgh_bits = - MACH_MSGH_BITS(MACH_MSGH_BITS_LOCAL(InP->msgh_bits), 0); + MACH_MSGH_BITS_SET(MACH_MSGH_BITS_LOCAL(InP->msgh_bits), 0, 0, 0); OutP->Head.msgh_remote_port = InP->msgh_local_port; - OutP->Head.msgh_local_port = MACH_PORT_NULL; - OutP->Head.msgh_reserved = (mach_msg_size_t)InP->msgh_id; /* useful for debug */ + OutP->Head.msgh_local_port = MACH_PORT_NULL; + OutP->Head.msgh_voucher_port = MACH_PORT_NULL; OutP->Head.msgh_id = InP->msgh_id + 100; #undef InP @@ -390,6 +381,17 @@ ipc_kobject_server( } *destp = IP_NULL; + /* + * Destroy voucher. The kernel MIG servers never take ownership + * of vouchers sent in messages. Swallow any such rights here. + */ + if (IP_VALID(request->ikm_voucher)) { + assert(MACH_MSG_TYPE_PORT_SEND == + MACH_MSGH_BITS_VOUCHER(request->ikm_header->msgh_bits)); + ipc_port_release_send(request->ikm_voucher); + request->ikm_voucher = IP_NULL; + } + if (!(reply->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) && ((mig_reply_error_t *) reply->ikm_header)->RetCode != KERN_SUCCESS) kr = ((mig_reply_error_t *) reply->ikm_header)->RetCode; @@ -465,11 +467,6 @@ ipc_kobject_set( { ip_lock(port); ipc_kobject_set_atomically(port, kobject, type); - -#if CONFIG_MACF_MACH - mac_port_label_update_kobject (&port->ip_label, type); -#endif - ip_unlock(port); } @@ -519,12 +516,6 @@ ipc_kobject_destroy( host_notify_port_destroy(port); break; -#if CONFIG_MACF_MACH - case IKOT_LABELH: - labelh_destroy(port); - break; -#endif - default: break; } @@ -541,6 +532,14 @@ ipc_kobject_notify( ((mig_reply_error_t *) reply_header)->RetCode = MIG_NO_REPLY; switch (request_header->msgh_id) { case MACH_NOTIFY_NO_SENDERS: + if (ip_kotype(port) == IKOT_VOUCHER) { + ipc_voucher_notify(request_header); + return TRUE; + } + if (ip_kotype(port) == IKOT_VOUCHER_ATTR_CONTROL) { + ipc_voucher_attr_control_notify(request_header); + return TRUE; + } if(ip_kotype(port) == IKOT_NAMED_ENTRY) { ip_lock(port); diff --git a/osfmk/kern/ipc_kobject.h b/osfmk/kern/ipc_kobject.h index d5bfe7895..557c7a0c0 100644 --- a/osfmk/kern/ipc_kobject.h +++ b/osfmk/kern/ipc_kobject.h @@ -124,11 +124,13 @@ typedef natural_t ipc_kobject_type_t; #define IKOT_FILEPORT 34 #define IKOT_LABELH 35 #define IKOT_TASK_RESUME 36 +#define IKOT_VOUCHER 37 +#define IKOT_VOUCHER_ATTR_CONTROL 38 /* * Add new entries here and adjust IKOT_UNKNOWN. * Please keep ipc/ipc_object.c:ikot_print_array up to date. */ -#define IKOT_UNKNOWN 37 /* magic catchall */ +#define IKOT_UNKNOWN 39 /* magic catchall */ #define IKOT_MAX_TYPE (IKOT_UNKNOWN+1) /* # of IKOT_ types */ diff --git a/osfmk/kern/ipc_mig.c b/osfmk/kern/ipc_mig.c index 632275883..6219ddfbc 100644 --- a/osfmk/kern/ipc_mig.c +++ b/osfmk/kern/ipc_mig.c @@ -557,7 +557,7 @@ mach_msg_overwrite( return MACH_RCV_TOO_LARGE; } - mr = ipc_kmsg_copyout(kmsg, space, map, MACH_MSG_BODY_NULL); + mr = ipc_kmsg_copyout(kmsg, space, map, MACH_MSG_BODY_NULL, option); if (mr != MACH_MSG_SUCCESS) { if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) { ipc_kmsg_put_to_kernel(msg, kmsg, diff --git a/osfmk/kern/ipc_tt.c b/osfmk/kern/ipc_tt.c index 23a582401..9a870298f 100644 --- a/osfmk/kern/ipc_tt.c +++ b/osfmk/kern/ipc_tt.c @@ -144,16 +144,9 @@ ipc_task_init( task->itk_nself = nport; task->itk_resume = IP_NULL; /* Lazily allocated on-demand */ task->itk_sself = ipc_port_make_send(kport); + task->itk_debug_control = IP_NULL; task->itk_space = space; -#if CONFIG_MACF_MACH - if (parent) - mac_task_label_associate(parent, task, &parent->maclabel, - &task->maclabel, &kport->ip_label); - else - mac_task_label_associate_kernel(task, &task->maclabel, &kport->ip_label); -#endif - if (parent == TASK_NULL) { ipc_port_t port; @@ -344,6 +337,9 @@ ipc_task_terminate( if (IP_VALID(task->itk_task_access)) ipc_port_release_send(task->itk_task_access); + if (IP_VALID(task->itk_debug_control)) + ipc_port_release_send(task->itk_debug_control); + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) if (IP_VALID(task->itk_registered[i])) ipc_port_release_send(task->itk_registered[i]); @@ -407,7 +403,12 @@ ipc_task_reset( old_exc_actions[i] = IP_NULL; } }/* for */ - + + if (IP_VALID(task->itk_debug_control)) { + ipc_port_release_send(task->itk_debug_control); + } + task->itk_debug_control = IP_NULL; + itk_unlock(task); /* release the naked send rights */ @@ -924,6 +925,10 @@ task_get_special_port( port = ipc_port_copy_send(task->itk_task_access); break; + case TASK_DEBUG_CONTROL_PORT: + port = ipc_port_copy_send(task->itk_debug_control); + break; + default: itk_unlock(task); return KERN_INVALID_ARGUMENT; @@ -983,6 +988,11 @@ task_set_special_port( whichp = &task->itk_task_access; break; + case TASK_DEBUG_CONTROL_PORT: + whichp = &task->itk_debug_control; + break; + + default: return KERN_INVALID_ARGUMENT; }/* switch */ @@ -1000,13 +1010,6 @@ task_set_special_port( return KERN_NO_ACCESS; } -#if CONFIG_MACF_MACH - if (mac_task_check_service(current_task(), task, "set_special_port")) { - itk_unlock(task); - return KERN_NO_ACCESS; - } -#endif - old = *whichp; *whichp = port; itk_unlock(task); @@ -1644,7 +1647,7 @@ thread_set_exception_ports( * VALID_THREAD_STATE_FLAVOR architecture dependent macro defined in * osfmk/mach/ARCHITECTURE/thread_status.h */ - if (!VALID_THREAD_STATE_FLAVOR(new_flavor)) + if (new_flavor != 0 && !VALID_THREAD_STATE_FLAVOR(new_flavor)) return (KERN_INVALID_ARGUMENT); thread_mtx_lock(thread); @@ -1713,6 +1716,14 @@ task_set_exception_ports( } } + /* + * Check the validity of the thread_state_flavor by calling the + * VALID_THREAD_STATE_FLAVOR architecture dependent macro defined in + * osfmk/mach/ARCHITECTURE/thread_status.h + */ + if (new_flavor != 0 && !VALID_THREAD_STATE_FLAVOR(new_flavor)) + return (KERN_INVALID_ARGUMENT); + itk_lock(task); if (task->itk_self == IP_NULL) { @@ -1809,6 +1820,9 @@ thread_swap_exception_ports( } } + if (new_flavor != 0 && !VALID_THREAD_STATE_FLAVOR(new_flavor)) + return (KERN_INVALID_ARGUMENT); + thread_mtx_lock(thread); if (!thread->active) { @@ -1907,6 +1921,9 @@ task_swap_exception_ports( } } + if (new_flavor != 0 && !VALID_THREAD_STATE_FLAVOR(new_flavor)) + return (KERN_INVALID_ARGUMENT); + itk_lock(task); if (task->itk_self == IP_NULL) { diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c index a5febf94c..5a799ee77 100644 --- a/osfmk/kern/kalloc.c +++ b/osfmk/kern/kalloc.c @@ -72,7 +72,6 @@ #include #include #include -#include #include #include #include diff --git a/osfmk/kern/kalloc.h b/osfmk/kern/kalloc.h index 41fc204a3..5a3808a2d 100644 --- a/osfmk/kern/kalloc.h +++ b/osfmk/kern/kalloc.h @@ -75,8 +75,6 @@ __END_DECLS #ifdef MACH_KERNEL_PRIVATE -#include - extern void kalloc_init(void); extern void kalloc_fake_zone_init( int ); diff --git a/osfmk/kern/kern_ecc.c b/osfmk/kern/kern_ecc.c new file mode 100644 index 000000000..8821a0962 --- /dev/null +++ b/osfmk/kern/kern_ecc.c @@ -0,0 +1,134 @@ +/* +* Copyright (c) 2013 Apple Inc. All rights reserved. +* +* @APPLE_OSREFERENCE_LICENSE_HEADER_START@ +* +* This file contains Original Code and/or Modifications of Original Code +* as defined in and that are subject to the Apple Public Source License +* Version 2.0 (the 'License'). You may not use this file except in +* compliance with the License. The rights granted to you under the License +* may not be used to create, or enable the creation or redistribution of, +* unlawful or unlicensed copies of an Apple operating system, or to +* circumvent, violate, or enable the circumvention or violation of, any +* terms of an Apple operating system software license agreement. +* +* Please obtain a copy of the License at +* http://www.opensource.apple.com/apsl/ and read it before using this file. +* +* The Original Code and all software distributed under the License are +* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +* Please see the License for the specific language governing rights and +* limitations under the License. +* +* @APPLE_OSREFERENCE_LICENSE_HEADER_END@ +*/ +#include +#include +#include +#include +#include +#include +#include + +/* + * ECC data. Not really KPCs, but this still seems like the + * best home for this code. + * + * Circular buffer of events. When we fill up, drop data. + */ +#define ECC_EVENT_BUFFER_COUNT 5 +struct ecc_event ecc_data[ECC_EVENT_BUFFER_COUNT]; +static uint32_t ecc_data_next_read; +static uint32_t ecc_data_next_write; +static boolean_t ecc_data_empty = TRUE; // next read == next write : empty or full? +static boolean_t ecc_prefer_panic = TRUE; +static lck_grp_t *ecc_data_lock_group; +static lck_spin_t ecc_data_lock; +static uint32_t ecc_correction_count; + +void +ecc_log_init() +{ + ecc_prefer_panic = !PE_reboot_on_panic(); + ecc_data_lock_group = lck_grp_alloc_init("ecc-data", NULL); + lck_spin_init(&ecc_data_lock, ecc_data_lock_group, NULL); + OSMemoryBarrier(); +} + +boolean_t +ecc_log_prefer_panic(void) +{ + OSMemoryBarrier(); + return ecc_prefer_panic; +} + +uint32_t +ecc_log_get_correction_count() +{ + return ecc_correction_count; +} + +kern_return_t +ecc_log_record_event(const struct ecc_event *ev) +{ + spl_t x; + + if (ev->count > ECC_EVENT_INFO_DATA_ENTRIES) { + panic("Count of %u on ecc event is too large.", (unsigned)ev->count); + } + + x = splhigh(); + lck_spin_lock(&ecc_data_lock); + + ecc_correction_count++; + + if (ecc_data_next_read == ecc_data_next_write && !ecc_data_empty) { + lck_spin_unlock(&ecc_data_lock); + splx(x); + return KERN_FAILURE; + } + + bcopy(ev, &ecc_data[ecc_data_next_write], sizeof(*ev)); + ecc_data_next_write++; + ecc_data_next_write %= ECC_EVENT_BUFFER_COUNT; + ecc_data_empty = FALSE; + + lck_spin_unlock(&ecc_data_lock); + splx(x); + + return KERN_SUCCESS; +} + + +kern_return_t +ecc_log_get_next_event(struct ecc_event *ev) +{ + spl_t x; + + x = splhigh(); + lck_spin_lock(&ecc_data_lock); + + if (ecc_data_empty) { + assert(ecc_data_next_write == ecc_data_next_read); + + lck_spin_unlock(&ecc_data_lock); + splx(x); + return KERN_FAILURE; + } + + bcopy(&ecc_data[ecc_data_next_read], ev, sizeof(*ev)); + ecc_data_next_read++; + ecc_data_next_read %= ECC_EVENT_BUFFER_COUNT; + + if (ecc_data_next_read == ecc_data_next_write) { + ecc_data_empty = TRUE; + } + + lck_spin_unlock(&ecc_data_lock); + splx(x); + + return KERN_SUCCESS; +} diff --git a/osfmk/kern/kern_stackshot.c b/osfmk/kern/kern_stackshot.c new file mode 100644 index 000000000..cff2dbb75 --- /dev/null +++ b/osfmk/kern/kern_stackshot.c @@ -0,0 +1,837 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#ifdef IMPORTANCE_INHERITANCE +#include +#endif +#include +#include +#include + +#include +#include + +#include +#include + +#include /* bcopy */ + +#include +#include +#include +#include +#include +#include +#include +#include + +extern unsigned int not_in_kdp; + +/* + * TODO: Even hackier than the other pieces. This should really + * be moved off of kdp_pmap, and we should probably separate + * machine_trace_thread out of the kdp code. + */ +extern pmap_t kdp_pmap; +extern addr64_t kdp_vtophys(pmap_t pmap, addr64_t va); + +int kdp_snapshot = 0; +static int stack_snapshot_ret = 0; +static unsigned stack_snapshot_bytes_traced = 0; + +static void *stack_snapshot_buf; +static uint32_t stack_snapshot_bufsize; +int stack_snapshot_pid; +static uint32_t stack_snapshot_flags; +static uint32_t stack_snapshot_dispatch_offset; +static unsigned int old_debugger; + +void do_stackshot(void); +void kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, + uint32_t flags, uint32_t dispatch_offset); +void kdp_snapshot_postflight(void); +static int kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, + uint32_t flags, uint32_t dispatch_offset, uint32_t *pbytesTraced); +int kdp_stack_snapshot_geterror(void); +int kdp_stack_snapshot_bytes_traced(void); +int kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced); +static int pid_from_task(task_t task); +static uint64_t proc_uniqueid_from_task(task_t task); +static void kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap); +static boolean_t kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size); +static uint64_t proc_was_throttled_from_task(task_t task); + +extern int proc_pid(void *p); +extern uint64_t proc_uniqueid(void *p); +extern uint64_t proc_was_throttled(void *p); +extern uint64_t proc_did_throttle(void *p); +static uint64_t proc_did_throttle_from_task(task_t task); +extern void proc_name_kdp(task_t task, char *buf, int size); +extern int proc_threadname_kdp(void *uth, char *buf, size_t size); +extern void proc_starttime_kdp(void *p, uint64_t *tv_sec, uint64_t *tv_usec); + +extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */ +extern void bcopy_phys(addr64_t, addr64_t, vm_size_t); +extern int machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); +extern int machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); + +/* Validates that the given address is both a valid page and has + * default caching attributes for the current kdp_pmap. Returns + * 0 if the address is invalid, and a kernel virtual address for + * the given address if it is valid. + */ +vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr); + +/* Clears caching information used by the above validation routine + * (in case the kdp_pmap has been changed or cleared). + */ +void machine_trace_thread_clear_validation_cache(void); + +#define MAX_FRAMES 1000 + +typedef struct thread_snapshot *thread_snapshot_t; +typedef struct task_snapshot *task_snapshot_t; + +#if CONFIG_KDP_INTERACTIVE_DEBUGGING +extern kdp_send_t kdp_en_send_pkt; +#endif + +/* + * Globals to support machine_trace_thread_get_kva. + */ +static vm_offset_t prev_target_page = 0; +static vm_offset_t prev_target_kva = 0; +static boolean_t validate_next_addr = TRUE; + + +/* + * Method for grabbing timer values safely, in the sense that no infinite loop will occur + * Certain flavors of the timer_grab function, which would seem to be the thing to use, + * can loop infinitely if called while the timer is in the process of being updated. + * Unfortunately, it is (rarely) possible to get inconsistent top and bottom halves of + * the timer using this method. This seems insoluble, since stackshot runs in a context + * where the timer might be half-updated, and has no way of yielding control just long + * enough to finish the update. + */ + +static uint64_t safe_grab_timer_value(struct timer *t) +{ +#if defined(__LP64__) + return t->all_bits; +#else + uint64_t time = t->high_bits; /* endian independent grab */ + time = (time << 32) | t->low_bits; + return time; +#endif +} + +/* Cache stack snapshot parameters in preparation for a trace */ +void +kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset) +{ + stack_snapshot_pid = pid; + stack_snapshot_buf = tracebuf; + stack_snapshot_bufsize = tracebuf_size; + stack_snapshot_flags = flags; + stack_snapshot_dispatch_offset = dispatch_offset; + kdp_snapshot++; + /* Mark this debugger as active, since the polled mode driver that + * ordinarily does this may not be enabled (yet), or since KDB may be + * the primary debugger. + */ + old_debugger = current_debugger; + if (old_debugger != KDP_CUR_DB) { + current_debugger = KDP_CUR_DB; + } +} + +void +kdp_snapshot_postflight(void) +{ + kdp_snapshot--; +#if CONFIG_KDP_INTERACTIVE_DEBUGGING + if ( + (kdp_en_send_pkt == NULL) || (old_debugger == KDB_CUR_DB)) + current_debugger = old_debugger; +#else + current_debugger = old_debugger; +#endif +} + +int +kdp_stack_snapshot_geterror(void) +{ + return stack_snapshot_ret; +} + +int +kdp_stack_snapshot_bytes_traced(void) +{ + return stack_snapshot_bytes_traced; +} + +static int +kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced) +{ + char *tracepos = (char *) tracebuf; + char *tracebound = tracepos + tracebuf_size; + uint32_t tracebytes = 0; + int error = 0, i; + + task_t task = TASK_NULL; + thread_t thread = THREAD_NULL; + unsigned framesize = 2 * sizeof(vm_offset_t); + + queue_head_t *task_list = &tasks; + boolean_t is_active_list = TRUE; + + boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0); + boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0); + boolean_t save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0); + boolean_t save_userframes_p = ((trace_flags & STACKSHOT_SAVE_KERNEL_FRAMES_ONLY) == 0); + boolean_t save_donating_pids_p = ((trace_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0); + + if(trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) { + if(tracepos + sizeof(struct mem_and_io_snapshot) > tracebound) { + error = -1; + goto error_exit; + } + kdp_mem_and_io_snapshot((struct mem_and_io_snapshot *)tracepos); + tracepos += sizeof(struct mem_and_io_snapshot); + } + + +walk_list: + queue_iterate(task_list, task, task_t, tasks) { + if ((task == NULL) || !ml_validate_nofault((vm_offset_t) task, sizeof(struct task))) + goto error_exit; + + int task_pid = pid_from_task(task); + uint64_t task_uniqueid = proc_uniqueid_from_task(task); + boolean_t task64 = task_has_64BitAddr(task); + + if (!task->active) { + /* + * Not interested in terminated tasks without threads, and + * at the moment, stackshot can't handle a task without a name. + */ + if (queue_empty(&task->threads) || task_pid == -1) { + continue; + } + } + + /* Trace everything, unless a process was specified */ + if ((pid == -1) || (pid == task_pid)) { + task_snapshot_t task_snap; + thread_snapshot_t tsnap = NULL; + uint32_t uuid_info_count = 0; + mach_vm_address_t uuid_info_addr = 0; + boolean_t have_map = (task->map != NULL) && + (ml_validate_nofault((vm_offset_t)(task->map), sizeof(struct _vm_map))); + boolean_t have_pmap = have_map && (task->map->pmap != NULL) && + (ml_validate_nofault((vm_offset_t)(task->map->pmap), sizeof(struct pmap))); + uint64_t shared_cache_base_address = 0; + + if (have_pmap && task->active && save_loadinfo_p && task_pid > 0) { + // Read the dyld_all_image_infos struct from the task memory to get UUID array count and location + if (task64) { + struct user64_dyld_all_image_infos task_image_infos; + if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user64_dyld_all_image_infos))) { + uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount; + uuid_info_addr = task_image_infos.uuidArray; + } + } else { + struct user32_dyld_all_image_infos task_image_infos; + if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user32_dyld_all_image_infos))) { + uuid_info_count = task_image_infos.uuidArrayCount; + uuid_info_addr = task_image_infos.uuidArray; + } + } + + // If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating + // this data structure), we zero the uuid_info_count so that we won't even try to save load info + // for this task. + if (!uuid_info_addr) { + uuid_info_count = 0; + } + } + + if (have_pmap && save_kextloadinfo_p && task_pid == 0) { + if (ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) { + uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */ + } + } + + if (tracepos + sizeof(struct task_snapshot) > tracebound) { + error = -1; + goto error_exit; + } + + task_snap = (task_snapshot_t) tracepos; + task_snap->snapshot_magic = STACKSHOT_TASK_SNAPSHOT_MAGIC; + task_snap->pid = task_pid; + task_snap->uniqueid = task_uniqueid; + task_snap->nloadinfos = uuid_info_count; + task_snap->donating_pid_count = 0; + + /* Add the BSD process identifiers */ + if (task_pid != -1) + proc_name_kdp(task, task_snap->p_comm, sizeof(task_snap->p_comm)); + else + task_snap->p_comm[0] = '\0'; + task_snap->ss_flags = 0; + if (task64) + task_snap->ss_flags |= kUser64_p; + if (task64 && task_pid == 0) + task_snap->ss_flags |= kKernel64_p; + if (!task->active) + task_snap->ss_flags |= kTerminatedSnapshot; + if(task->pidsuspended) task_snap->ss_flags |= kPidSuspended; + if(task->frozen) task_snap->ss_flags |= kFrozen; + + if (task->effective_policy.darwinbg == 1) { + task_snap->ss_flags |= kTaskDarwinBG; + } + + if (task->requested_policy.t_role == TASK_FOREGROUND_APPLICATION) { + task_snap->ss_flags |= kTaskIsForeground; + } + + if (task->requested_policy.t_boosted == 1) { + task_snap->ss_flags |= kTaskIsBoosted; + } + + if (task->effective_policy.t_sup_active == 1) + task_snap->ss_flags |= kTaskIsSuppressed; +#if IMPORTANCE_INHERITANCE + if (task->task_imp_base) { + if (task->task_imp_base->iit_donor) { + task_snap->ss_flags |= kTaskIsImpDonor; +} + + if (task->task_imp_base->iit_live_donor) { + task_snap->ss_flags |= kTaskIsLiveImpDonor; + } + } +#endif + + task_snap->latency_qos = (task->effective_policy.t_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ? + LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.t_latency_qos); + + task_snap->suspend_count = task->suspend_count; + task_snap->task_size = have_pmap ? pmap_resident_count(task->map->pmap) : 0; + task_snap->faults = task->faults; + task_snap->pageins = task->pageins; + task_snap->cow_faults = task->cow_faults; + + task_snap->user_time_in_terminated_threads = task->total_user_time; + task_snap->system_time_in_terminated_threads = task->total_system_time; + /* + * The throttling counters are maintained as 64-bit counters in the proc + * structure. However, we reserve 32-bits (each) for them in the task_snapshot + * struct to save space and since we do not expect them to overflow 32-bits. If we + * find these values overflowing in the future, the fix would be to simply + * upgrade these counters to 64-bit in the task_snapshot struct + */ + task_snap->was_throttled = (uint32_t) proc_was_throttled_from_task(task); + task_snap->did_throttle = (uint32_t) proc_did_throttle_from_task(task); + + /* fetch some useful BSD info: */ + task_snap->p_start_sec = task_snap->p_start_usec = 0; + proc_starttime_kdp(task->bsd_info, &task_snap->p_start_sec, &task_snap->p_start_usec); + if (task->shared_region && ml_validate_nofault((vm_offset_t)task->shared_region, + sizeof(struct vm_shared_region))) { + struct vm_shared_region *sr = task->shared_region; + + shared_cache_base_address = sr->sr_base_address + sr->sr_first_mapping; + } + if (!shared_cache_base_address + || !kdp_copyin(task->map->pmap, shared_cache_base_address + offsetof(struct _dyld_cache_header, uuid), task_snap->shared_cache_identifier, sizeof(task_snap->shared_cache_identifier))) { + memset(task_snap->shared_cache_identifier, 0x0, sizeof(task_snap->shared_cache_identifier)); + } + if (task->shared_region) { + /* + * No refcounting here, but we are in debugger + * context, so that should be safe. + */ + task_snap->shared_cache_slide = task->shared_region->sr_slide_info.slide; + } else { + task_snap->shared_cache_slide = 0; + } + + /* I/O Statistics */ + assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES); + + if (task->task_io_stats) { + task_snap->disk_reads_count = task->task_io_stats->disk_reads.count; + task_snap->disk_reads_size = task->task_io_stats->disk_reads.size; + task_snap->disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count); + task_snap->disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size); + for(i = 0; i < IO_NUM_PRIORITIES; i++) { + task_snap->io_priority_count[i] = task->task_io_stats->io_priority[i].count; + task_snap->io_priority_size[i] = task->task_io_stats->io_priority[i].size; + } + task_snap->paging_count = task->task_io_stats->paging.count; + task_snap->paging_size = task->task_io_stats->paging.size; + task_snap->non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count); + task_snap->non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size); + task_snap->metadata_count = task->task_io_stats->metadata.count; + task_snap->metadata_size = task->task_io_stats->metadata.size; + task_snap->data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count); + task_snap->data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size); + } else { + /* zero from disk_reads_count to end of structure */ + memset(&task_snap->disk_reads_count, 0, offsetof(struct task_snapshot, metadata_size) - offsetof(struct task_snapshot, disk_reads_count)); + } + tracepos += sizeof(struct task_snapshot); + + if (task_pid > 0 && uuid_info_count > 0) { + uint32_t uuid_info_size = (uint32_t)(task64 ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info)); + uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size; + + if (tracepos + uuid_info_array_size > tracebound) { + error = -1; + goto error_exit; + } + + // Copy in the UUID info array + // It may be nonresident, in which case just fix up nloadinfos to 0 in the task_snap + if (have_pmap && !kdp_copyin(task->map->pmap, uuid_info_addr, tracepos, uuid_info_array_size)) + task_snap->nloadinfos = 0; + else + tracepos += uuid_info_array_size; + } else if (task_pid == 0 && uuid_info_count > 0) { + uint32_t uuid_info_size = (uint32_t)sizeof(kernel_uuid_info); + uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size; + kernel_uuid_info *output_uuids; + + if (tracepos + uuid_info_array_size > tracebound) { + error = -1; + goto error_exit; + } + + output_uuids = (kernel_uuid_info *)tracepos; + + do { + + if (!kernel_uuid || !ml_validate_nofault((vm_offset_t)kernel_uuid, sizeof(uuid_t))) { + /* Kernel UUID not found or inaccessible */ + task_snap->nloadinfos = 0; + break; + } + + output_uuids[0].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext); + memcpy(&output_uuids[0].imageUUID, kernel_uuid, sizeof(uuid_t)); + + if (ml_validate_nofault((vm_offset_t)(&gLoadedKextSummaries->summaries[0]), + gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) { + uint32_t kexti; + + for (kexti=0 ; kexti < gLoadedKextSummaries->numSummaries; kexti++) { + output_uuids[1+kexti].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address); + memcpy(&output_uuids[1+kexti].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t)); + } + + tracepos += uuid_info_array_size; + } else { + /* kext summary invalid, but kernel UUID was copied */ + task_snap->nloadinfos = 1; + tracepos += uuid_info_size; + break; + } + } while(0); + } + + if (save_donating_pids_p) { + task_snap->donating_pid_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS, (int *)tracepos, (unsigned int)((tracebound - tracepos)/sizeof(int))); + tracepos += sizeof(int) * task_snap->donating_pid_count; + } + + queue_iterate(&task->threads, thread, thread_t, task_threads){ + uint64_t tval; + + if ((thread == NULL) || !ml_validate_nofault((vm_offset_t) thread, sizeof(struct thread))) + goto error_exit; + + if (((tracepos + 4 * sizeof(struct thread_snapshot)) > tracebound)) { + error = -1; + goto error_exit; + } + if (!save_userframes_p && thread->kernel_stack == 0) + continue; + + /* Populate the thread snapshot header */ + tsnap = (thread_snapshot_t) tracepos; + tsnap->thread_id = thread_tid(thread); + tsnap->state = thread->state; + tsnap->priority = thread->priority; + tsnap->sched_pri = thread->sched_pri; + tsnap->sched_flags = thread->sched_flags; + tsnap->wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event); + tsnap->continuation = VM_KERNEL_UNSLIDE(thread->continuation); + tval = safe_grab_timer_value(&thread->user_timer); + tsnap->user_time = tval; + tval = safe_grab_timer_value(&thread->system_timer); + if (thread->precise_user_kernel_time) { + tsnap->system_time = tval; + } else { + tsnap->user_time += tval; + tsnap->system_time = 0; + } + tsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC; + bzero(&tsnap->pth_name, STACKSHOT_MAX_THREAD_NAME_SIZE); + proc_threadname_kdp(thread->uthread, &tsnap->pth_name[0], STACKSHOT_MAX_THREAD_NAME_SIZE); + tracepos += sizeof(struct thread_snapshot); + tsnap->ss_flags = 0; + /* I/O Statistics */ + assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES); + if (thread->thread_io_stats) { + tsnap->disk_reads_count = thread->thread_io_stats->disk_reads.count; + tsnap->disk_reads_size = thread->thread_io_stats->disk_reads.size; + tsnap->disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count); + tsnap->disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size); + for(i = 0; i < IO_NUM_PRIORITIES; i++) { + tsnap->io_priority_count[i] = thread->thread_io_stats->io_priority[i].count; + tsnap->io_priority_size[i] = thread->thread_io_stats->io_priority[i].size; + } + tsnap->paging_count = thread->thread_io_stats->paging.count; + tsnap->paging_size = thread->thread_io_stats->paging.size; + tsnap->non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count); + tsnap->non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size); + tsnap->metadata_count = thread->thread_io_stats->metadata.count; + tsnap->metadata_size = thread->thread_io_stats->metadata.size; + tsnap->data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count); + tsnap->data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size); + } else { + /* zero from disk_reads_count to end of structure */ + memset(&tsnap->disk_reads_count, 0, + offsetof(struct thread_snapshot, metadata_size) - offsetof(struct thread_snapshot, disk_reads_count)); + } + + if (thread->effective_policy.darwinbg) { + tsnap->ss_flags |= kThreadDarwinBG; + } + + tsnap->io_tier = proc_get_effective_thread_policy(thread, TASK_POLICY_IO); + if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) { + tsnap->ss_flags |= kThreadIOPassive; + } + + if (thread->suspend_count > 0) { + tsnap->ss_flags |= kThreadSuspended; + } + if (IPC_VOUCHER_NULL != thread->ith_voucher) { + tsnap->voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher); + } + + tsnap->ts_qos = thread->effective_policy.thep_qos; + tsnap->total_syscalls = thread->syscalls_mach + thread->syscalls_unix; + + if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) { + uint64_t dqkeyaddr = thread_dispatchqaddr(thread); + if (dqkeyaddr != 0) { + uint64_t dqaddr = 0; + if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) { + uint64_t dqserialnumaddr = dqaddr + dispatch_offset; + uint64_t dqserialnum = 0; + if (kdp_copyin(task->map->pmap, dqserialnumaddr, &dqserialnum, (task64 ? 8 : 4))) { + tsnap->ss_flags |= kHasDispatchSerial; + *(uint64_t *)tracepos = dqserialnum; + tracepos += 8; + } + } + } + } +/* Call through to the machine specific trace routines + * Frames are added past the snapshot header. + */ + tracebytes = 0; + if (thread->kernel_stack != 0) { +#if defined(__LP64__) + tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, FALSE); + tsnap->ss_flags |= kKernel64_p; + framesize = 16; +#else + tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, FALSE); + framesize = 8; +#endif + } + tsnap->nkern_frames = tracebytes/framesize; + tracepos += tracebytes; + tracebytes = 0; + /* Trace user stack, if any */ + if (save_userframes_p && task->active && thread->task->map != kernel_map) { + /* 64-bit task? */ + if (task_has_64BitAddr(thread->task)) { + tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE); + tsnap->ss_flags |= kUser64_p; + framesize = 16; + } + else { + tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, TRUE); + framesize = 8; + } + } + tsnap->nuser_frames = tracebytes/framesize; + tracepos += tracebytes; + tracebytes = 0; + } + + if (!save_userframes_p && tsnap == NULL) { + /* + * No thread info is collected due to lack of kernel frames. + * Remove information about this task also + */ + tracepos = (char *)task_snap; + } + } + } + + if (is_active_list) { + is_active_list = FALSE; + task_list = &terminated_tasks; + goto walk_list; + } + +error_exit: + /* Release stack snapshot wait indicator */ + kdp_snapshot_postflight(); + + *pbytesTraced = (uint32_t)(tracepos - (char *) tracebuf); + + return error; +} + +static int pid_from_task(task_t task) +{ + int pid = -1; + + if (task->bsd_info) + pid = proc_pid(task->bsd_info); + + return pid; +} + +static uint64_t +proc_uniqueid_from_task(task_t task) +{ + uint64_t uniqueid = ~(0ULL); + + if (task->bsd_info) + uniqueid = proc_uniqueid(task->bsd_info); + + return uniqueid; +} + +static uint64_t +proc_was_throttled_from_task(task_t task) +{ + uint64_t was_throttled = 0; + + if (task->bsd_info) + was_throttled = proc_was_throttled(task->bsd_info); + + return was_throttled; +} + +static uint64_t +proc_did_throttle_from_task(task_t task) +{ + uint64_t did_throttle = 0; + + if (task->bsd_info) + did_throttle = proc_did_throttle(task->bsd_info); + + return did_throttle; +} + +static void +kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap) +{ + unsigned int pages_reclaimed; + unsigned int pages_wanted; + kern_return_t kErr; + + processor_t processor; + vm_statistics64_t stat; + vm_statistics64_data_t host_vm_stat; + + processor = processor_list; + stat = &PROCESSOR_DATA(processor, vm_stat); + host_vm_stat = *stat; + + if (processor_count > 1) { + /* + * processor_list may be in the process of changing as we are + * attempting a stackshot. Ordinarily it will be lock protected, + * but it is not safe to lock in the context of the debugger. + * Fortunately we never remove elements from the processor list, + * and only add to to the end of the list, so we SHOULD be able + * to walk it. If we ever want to truly tear down processors, + * this will have to change. + */ + while ((processor = processor->processor_list) != NULL) { + stat = &PROCESSOR_DATA(processor, vm_stat); + host_vm_stat.compressions += stat->compressions; + host_vm_stat.decompressions += stat->decompressions; + } + } + + memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC; + memio_snap->free_pages = vm_page_free_count; + memio_snap->active_pages = vm_page_active_count; + memio_snap->inactive_pages = vm_page_inactive_count; + memio_snap->purgeable_pages = vm_page_purgeable_count; + memio_snap->wired_pages = vm_page_wire_count; + memio_snap->speculative_pages = vm_page_speculative_count; + memio_snap->throttled_pages = vm_page_throttled_count; + memio_snap->busy_buffer_count = count_busy_buffers(); + memio_snap->filebacked_pages = vm_page_pageable_external_count; + memio_snap->compressions = (uint32_t)host_vm_stat.compressions; + memio_snap->decompressions = (uint32_t)host_vm_stat.decompressions; + memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT; + kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted); + + if ( ! kErr ) { + memio_snap->pages_wanted = (uint32_t)pages_wanted; + memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed; + memio_snap->pages_wanted_reclaimed_valid = 1; + } else { + memio_snap->pages_wanted = 0; + memio_snap->pages_reclaimed = 0; + memio_snap->pages_wanted_reclaimed_valid = 0; + } +} + +boolean_t +kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) +{ + size_t rem = size; + char *kvaddr = dest; + + while (rem) { + ppnum_t upn = pmap_find_phys(p, uaddr); + uint64_t phys_src = ptoa_64(upn) | (uaddr & PAGE_MASK); + uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr); + uint64_t src_rem = PAGE_SIZE - (phys_src & PAGE_MASK); + uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK); + size_t cur_size = (uint32_t) MIN(src_rem, dst_rem); + cur_size = MIN(cur_size, rem); + + if (upn && pmap_valid_page(upn) && phys_dest) { + bcopy_phys(phys_src, phys_dest, cur_size); + } + else + break; + uaddr += cur_size; + kvaddr += cur_size; + rem -= cur_size; + } + return (rem == 0); +} + +void +do_stackshot() +{ + stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid, + stack_snapshot_buf, stack_snapshot_bufsize, + stack_snapshot_flags, stack_snapshot_dispatch_offset, + &stack_snapshot_bytes_traced); + +} + +/* + * A fantastical routine that tries to be fast about returning + * translations. Caches the last page we found a translation + * for, so that we can be quick about multiple queries to the + * same page. It turns out this is exactly the workflow + * machine_trace_thread and its relatives tend to throw at us. + * + * Please zero the nasty global this uses after a bulk lookup; + * this isn't safe across a switch of the kdp_pmap or changes + * to a pmap. + * + * This also means that if zero is a valid KVA, we are + * screwed. Sucks to be us. Fortunately, this should never + * happen. + */ +vm_offset_t +machine_trace_thread_get_kva(vm_offset_t cur_target_addr) +{ + unsigned cur_wimg_bits; + vm_offset_t cur_target_page; + vm_offset_t cur_phys_addr; + vm_offset_t kern_virt_target_addr; + + cur_target_page = atop(cur_target_addr); + + if ((cur_target_page != prev_target_page) || validate_next_addr) { + /* + * Alright; it wasn't our previous page. So + * we must validate that there is a page + * table entry for this address under the + * current kdp_pmap, and that it has default + * cache attributes (otherwise it may not be + * safe to access it). + */ + cur_phys_addr = kdp_vtophys(kdp_pmap ? kdp_pmap : kernel_pmap, cur_target_addr); + + if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) { + return 0; + } + + cur_wimg_bits = pmap_cache_attributes((ppnum_t) atop(cur_phys_addr)); + + if ((cur_wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) { + return 0; + } + +#if __x86_64__ + kern_virt_target_addr = (vm_offset_t) PHYSMAP_PTOV(cur_phys_addr); +#else +#error Oh come on... we should really unify the physical -> kernel virtual interface +#endif + prev_target_page = cur_target_page; + prev_target_kva = (kern_virt_target_addr & ~PAGE_MASK); + validate_next_addr = FALSE; + return kern_virt_target_addr; + } else { + /* We found a translation, so stash this page */ + kern_virt_target_addr = prev_target_kva + (cur_target_addr & PAGE_MASK); + return kern_virt_target_addr; + } +} + +void +machine_trace_thread_clear_validation_cache(void) +{ + validate_next_addr = TRUE; +} + diff --git a/osfmk/kern/kern_types.h b/osfmk/kern/kern_types.h index b7fc7ec5c..6c19ab4dc 100644 --- a/osfmk/kern/kern_types.h +++ b/osfmk/kern/kern_types.h @@ -171,7 +171,7 @@ typedef int wait_timeout_urgency_t; #define TIMEOUT_URGENCY_LEEWAY 0x20 /* don't ignore provided leeway value */ #define TIMEOUT_URGENCY_FIRST_AVAIL 0x40 /* first available bit outside of urgency mask/leeway */ - +#define TIMEOUT_URGENCY_RATELIMITED 0x80 #ifdef KERNEL_PRIVATE #ifdef MACH_KERNEL_PRIVATE @@ -200,6 +200,11 @@ typedef struct grrr_run_queue *grrr_run_queue_t; typedef struct grrr_group *grrr_group_t; #define GRRR_GROUP_NULL ((grrr_group_t) 0) +#if defined(CONFIG_SCHED_MULTIQ) +typedef struct sched_group *sched_group_t; +#define SCHED_GROUP_NULL ((sched_group_t) 0) +#endif /* defined(CONFIG_SCHED_MULTIQ) */ + #else /* MACH_KERNEL_PRIVATE */ struct wait_queue_set ; diff --git a/osfmk/kern/kpc.h b/osfmk/kern/kpc.h index 4f4df0921..ddfc4ad06 100644 --- a/osfmk/kern/kpc.h +++ b/osfmk/kern/kpc.h @@ -29,7 +29,7 @@ #ifndef __KERN_KPC_H__ #define __KERN_KPC_H__ -/** kernel interfaces to KPC PMC infrastructure **/ +/* Kernel interfaces to KPC PMC infrastructure. */ #include @@ -37,15 +37,21 @@ #define KPC_CLASS_FIXED (0) #define KPC_CLASS_CONFIGURABLE (1) #define KPC_CLASS_POWER (2) +#define KPC_CLASS_RAWPMU (3) -#define KPC_CLASS_FIXED_MASK (1<t_chud |= TASK_KPC_FORCED_ALL_CTRS; + else + task->t_chud &= ~TASK_KPC_FORCED_ALL_CTRS; + task_unlock(task); +} + +static boolean_t +kpc_task_get_forced_all_ctrs(task_t task) +{ + assert(task); + return task->t_chud & TASK_KPC_FORCED_ALL_CTRS ? TRUE : FALSE; +} + +int +kpc_force_all_ctrs(task_t task, int val) +{ + int ret = 0; + boolean_t new_state = val ? TRUE : FALSE; + boolean_t old_state = kpc_get_force_all_ctrs(); + + /* + * Refuse to do the operation if the counters are already forced by + * another task. + */ + if (kpc_get_force_all_ctrs() && !kpc_task_get_forced_all_ctrs(task)) + return EACCES; + + /* nothing to do if the state is not changing */ + if (old_state == new_state) + return 0; + + /* do the architecture specific work */ + if ((ret = kpc_force_all_ctrs_arch(task, val)) != 0) + return ret; + + /* notify the power manager */ + if (pm_handler) + pm_handler( new_state ? FALSE : TRUE ); + + /* update the task bits */ + kpc_task_set_forced_all_ctrs(task, val); + + /* update the internal state */ + force_all_ctrs = val; + + return 0; +} + +int +kpc_get_force_all_ctrs(void) +{ + return force_all_ctrs; +} + +boolean_t +kpc_register_pm_handler(void (*handler)(boolean_t)) +{ + if (!pm_handler) { + pm_handler = handler; + } + + /* Notify machine-dependent code. Reserved PMCs could change. */ + kpc_force_all_ctrs_arch(TASK_NULL, force_all_ctrs); + + return force_all_ctrs ? FALSE : TRUE; +} + +boolean_t +kpc_multiple_clients(void) +{ + return pm_handler != NULL; +} + +boolean_t +kpc_controls_fixed_counters(void) +{ + return !pm_handler || force_all_ctrs; +} + uint32_t kpc_get_running(void) { @@ -169,6 +261,9 @@ kpc_get_config_count(uint32_t classes) if( classes & KPC_CLASS_CONFIGURABLE_MASK ) count += kpc_configurable_config_count(); + if( (classes & KPC_CLASS_RAWPMU_MASK) && !kpc_multiple_clients() ) + count += kpc_rawpmu_config_count(); + return count; } @@ -189,6 +284,19 @@ kpc_get_config(uint32_t classes, kpc_config_t *current_config) count += kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK); } + if( classes & KPC_CLASS_RAWPMU_MASK ) + { + // Client shouldn't ask for config words that aren't available. + // Most likely, they'd misinterpret the returned buffer if we + // allowed this. + if( kpc_multiple_clients() ) + { + return EPERM; + } + kpc_get_rawpmu_config(¤t_config[count]); + count += kpc_get_config_count(KPC_CLASS_RAWPMU_MASK); + } + return 0; } @@ -197,6 +305,12 @@ kpc_set_config(uint32_t classes, kpc_config_t *configv) { struct kpc_config_remote mp_config; + // Don't allow RAWPMU configuration when sharing counters. + if( (classes & KPC_CLASS_RAWPMU_MASK) && kpc_multiple_clients() ) + { + return EPERM; + } + lck_mtx_lock(&kpc_config_lock); mp_config.classes = classes; @@ -361,4 +475,3 @@ int kpc_get_actionid(uint32_t classes, uint32_t *val) return 0; } - diff --git a/osfmk/kern/ledger.c b/osfmk/kern/ledger.c index 13146ce6f..95a5d89c6 100644 --- a/osfmk/kern/ledger.c +++ b/osfmk/kern/ledger.c @@ -29,7 +29,7 @@ * @OSF_COPYRIGHT@ */ -#include +#include #include #include #include @@ -54,23 +54,24 @@ #define LF_CALLED_BACK 0x1000 /* callback was called for balance in deficit */ #define LF_WARNED 0x2000 /* callback was called for balance warning */ #define LF_TRACKING_MAX 0x4000 /* track max balance over user-specfied time */ +#define LF_PANIC_ON_NEGATIVE 0x8000 /* panic if it goes negative */ /* Determine whether a ledger entry exists and has been initialized and active */ #define ENTRY_VALID(l, e) \ (((l) != NULL) && ((e) >= 0) && ((e) < (l)->l_size) && \ (((l)->l_entries[e].le_flags & LF_ENTRY_ACTIVE) == LF_ENTRY_ACTIVE)) +#define ASSERT(a) assert(a) + #ifdef LEDGER_DEBUG int ledger_debug = 0; -#define ASSERT(a) assert(a) #define lprintf(a) if (ledger_debug) { \ printf("%lld ", abstime_to_nsecs(mach_absolute_time() / 1000000)); \ printf a ; \ } #else #define lprintf(a) -#define ASSERT(a) #endif struct ledger_callback { @@ -746,6 +747,12 @@ ledger_check_new_balance(ledger_t ledger, int entry) } } } + + if ((le->le_flags & LF_PANIC_ON_NEGATIVE) && + (le->le_credit < le->le_debit)) { + panic("ledger_check_new_balance(%p,%d): negative ledger %p balance:%lld\n", + ledger, entry, le, le->le_credit - le->le_debit); + } } /* @@ -773,6 +780,34 @@ ledger_credit(ledger_t ledger, int entry, ledger_amount_t amount) return (KERN_SUCCESS); } +/* Add all of one ledger's values into another. + * They must have been created from the same template. + * This is not done atomically. Another thread (if not otherwise synchronized) + * may see bogus values when comparing one entry to another. + * As each entry's credit & debit are modified one at a time, the warning/limit + * may spuriously trip, or spuriously fail to trip, or another thread (if not + * otherwise synchronized) may see a bogus balance. + */ +kern_return_t +ledger_rollup(ledger_t to_ledger, ledger_t from_ledger) +{ + int i; + struct ledger_entry *from_le, *to_le; + + assert(to_ledger->l_template == from_ledger->l_template); + + for (i = 0; i < to_ledger->l_size; i++) { + if (ENTRY_VALID(from_ledger, i) && ENTRY_VALID(to_ledger, i)) { + from_le = &from_ledger->l_entries[i]; + to_le = &to_ledger->l_entries[i]; + OSAddAtomic64(from_le->le_credit, &to_le->le_credit); + OSAddAtomic64(from_le->le_debit, &to_le->le_debit); + } + } + + return (KERN_SUCCESS); +} + /* * Zero the balance of a ledger by adding to its credit or debit, whichever is smaller. * Note that some clients of ledgers (notably, task wakeup statistics) require that @@ -921,6 +956,22 @@ ledger_track_maximum(ledger_template_t template, int entry, return (KERN_SUCCESS); } +kern_return_t +ledger_panic_on_negative(ledger_template_t template, int entry) +{ + template_lock(template); + + if ((entry < 0) || (entry >= template->lt_cnt)) { + template_unlock(template); + return (KERN_INVALID_VALUE); + } + + template->lt_entries[entry].et_flags |= LF_PANIC_ON_NEGATIVE; + + template_unlock(template); + + return (KERN_SUCCESS); +} /* * Add a callback to be executed when the resource goes into deficit. */ @@ -1377,6 +1428,36 @@ ledger_get_entries(ledger_t ledger, int entry, ledger_amount_t *credit, return (KERN_SUCCESS); } +kern_return_t +ledger_reset_callback_state(ledger_t ledger, int entry) +{ + struct ledger_entry *le; + + if (!ENTRY_VALID(ledger, entry)) + return (KERN_INVALID_ARGUMENT); + + le = &ledger->l_entries[entry]; + + flag_clear(&le->le_flags, LF_CALLED_BACK); + + return (KERN_SUCCESS); +} + +kern_return_t +ledger_disable_panic_on_negative(ledger_t ledger, int entry) +{ + struct ledger_entry *le; + + if (!ENTRY_VALID(ledger, entry)) + return (KERN_INVALID_ARGUMENT); + + le = &ledger->l_entries[entry]; + + flag_clear(&le->le_flags, LF_PANIC_ON_NEGATIVE); + + return (KERN_SUCCESS); +} + kern_return_t ledger_get_balance(ledger_t ledger, int entry, ledger_amount_t *balance) { diff --git a/osfmk/kern/ledger.h b/osfmk/kern/ledger.h index 17dcf329a..d29f07d27 100644 --- a/osfmk/kern/ledger.h +++ b/osfmk/kern/ledger.h @@ -96,6 +96,8 @@ extern kern_return_t ledger_set_callback(ledger_template_t template, int entry, ledger_callback_t callback, const void *param0, const void *param1); extern kern_return_t ledger_track_maximum(ledger_template_t template, int entry, int period_in_secs); +extern kern_return_t ledger_panic_on_negative(ledger_template_t template, + int entry); extern int ledger_key_lookup(ledger_template_t template, const char *key); /* value of entry type */ @@ -127,6 +129,10 @@ extern kern_return_t ledger_get_entries(ledger_t ledger, int entry, ledger_amount_t *credit, ledger_amount_t *debit); extern kern_return_t ledger_get_balance(ledger_t ledger, int entry, ledger_amount_t *balance); +extern kern_return_t ledger_reset_callback_state(ledger_t ledger, int entry); +extern kern_return_t ledger_disable_panic_on_negative(ledger_t ledger, int entry); + +extern kern_return_t ledger_rollup(ledger_t to_ledger, ledger_t from_ledger); extern void ledger_ast(thread_t thread); extern void set_astledger(thread_t thread); diff --git a/osfmk/kern/lock.h b/osfmk/kern/lock.h index 8366e26a6..27aa5d017 100644 --- a/osfmk/kern/lock.h +++ b/osfmk/kern/lock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -66,73 +66,9 @@ #ifndef _KERN_LOCK_H_ #define _KERN_LOCK_H_ -#include -#include -#include +#include -__BEGIN_DECLS - -#ifndef MACH_KERNEL_PRIVATE - -typedef struct __lock__ lock_t; - -#else /* MACH_KERNEL_PRIVATE */ - -extern void lock_init( - lock_t *lock, - boolean_t can_sleep, - unsigned short tag0, - unsigned short tag1); - -#endif /* MACH_KERNEL_PRIVATE */ - -extern lock_t *lock_alloc( - boolean_t can_sleep, - unsigned short tag0, - unsigned short tag1); - -extern void lock_free( - lock_t *lock); - -extern void lock_write( - lock_t *lock); - -extern void lock_read( - lock_t *lock); - -extern void lock_done( - lock_t *lock); - -extern void lock_write_to_read( - lock_t *lock); - -#define lock_read_done(l) lock_done(l) -#define lock_write_done(l) lock_done(l) - -extern boolean_t lock_read_to_write( - lock_t *lock); - - -/* Sleep, unlocking and then relocking a usimple_lock in the process */ -extern wait_result_t thread_sleep_usimple_lock( - event_t event, - usimple_lock_t lock, - wait_interrupt_t interruptible); - -/* Sleep, unlocking and then relocking a write lock in the process */ -extern wait_result_t thread_sleep_lock_write( - event_t event, - lock_t *lock, - wait_interrupt_t interruptible); -__END_DECLS - -#ifdef MACH_KERNEL_PRIVATE - -extern wait_result_t thread_sleep_fast_usimple_lock( - event_t event, - simple_lock_t lock, - wait_interrupt_t interruptible); -#endif /* MACH_KERNEL_PRIVATE */ +#warning This header is deprecated. Use instead. #endif /* _KERN_LOCK_H_ */ diff --git a/osfmk/kern/locks.c b/osfmk/kern/locks.c index 91e60ec74..87189b89f 100644 --- a/osfmk/kern/locks.c +++ b/osfmk/kern/locks.c @@ -495,6 +495,7 @@ lck_mtx_sleep( wait_interrupt_t interruptible) { wait_result_t res; + thread_t thread = current_thread(); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START, (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0); @@ -502,6 +503,16 @@ lck_mtx_sleep( if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) panic("Invalid lock sleep action %x\n", lck_sleep_action); + if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { + /* + * We overload the RW lock promotion to give us a priority ceiling + * during the time that this thread is asleep, so that when it + * is re-awakened (and not yet contending on the mutex), it is + * runnable at a reasonably high priority. + */ + thread->rwlock_count++; + } + res = assert_wait(event, interruptible); if (res == THREAD_WAITING) { lck_mtx_unlock(lck); @@ -517,6 +528,13 @@ lck_mtx_sleep( if (lck_sleep_action & LCK_SLEEP_UNLOCK) lck_mtx_unlock(lck); + if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { + if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { + /* sched_flags checked without lock, but will be rechecked while clearing */ + lck_rw_clear_promotion(thread); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0); return res; @@ -535,6 +553,7 @@ lck_mtx_sleep_deadline( uint64_t deadline) { wait_result_t res; + thread_t thread = current_thread(); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START, (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0); @@ -542,6 +561,13 @@ lck_mtx_sleep_deadline( if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) panic("Invalid lock sleep action %x\n", lck_sleep_action); + if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { + /* + * See lck_mtx_sleep(). + */ + thread->rwlock_count++; + } + res = assert_wait_deadline(event, interruptible, deadline); if (res == THREAD_WAITING) { lck_mtx_unlock(lck); @@ -557,6 +583,13 @@ lck_mtx_sleep_deadline( if (lck_sleep_action & LCK_SLEEP_UNLOCK) lck_mtx_unlock(lck); + if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { + if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { + /* sched_flags checked without lock, but will be rechecked while clearing */ + lck_rw_clear_promotion(thread); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0); return res; @@ -600,6 +633,9 @@ lck_mtx_lock_wait ( if (priority < BASEPRI_DEFAULT) priority = BASEPRI_DEFAULT; + /* Do not promote past promotion ceiling */ + priority = MIN(priority, MAXPRI_PROMOTE); + thread_lock(holder); if (mutex->lck_mtx_pri == 0) holder->promotions++; @@ -609,10 +645,6 @@ lck_mtx_lock_wait ( KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, holder->sched_pri, priority, holder, lck, 0); - /* This can potentially elevate the holder into the realtime - * priority band; the implementation in locks_i386.c enforces a - * MAXPRI_KERNEL ceiling. - */ set_sched_pri(holder, priority); } thread_unlock(holder); @@ -693,7 +725,8 @@ lck_mtx_lock_acquire( KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, thread->sched_pri, priority, 0, lck, 0); - + /* Do not promote past promotion ceiling */ + assert(priority <= MAXPRI_PROMOTE); set_sched_pri(thread, priority); } thread_unlock(thread); @@ -749,7 +782,10 @@ lck_mtx_unlock_wakeup ( if ( --thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED) ) { thread->sched_flags &= ~TH_SFLAG_PROMOTED; - if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { + + if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) { + /* Thread still has a RW lock promotion */ + } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, thread->sched_pri, DEPRESSPRI, 0, lck, 0); @@ -871,10 +907,23 @@ lck_rw_sleep( { wait_result_t res; lck_rw_type_t lck_rw_type; - + thread_t thread = current_thread(); + if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) panic("Invalid lock sleep action %x\n", lck_sleep_action); + if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { + /* + * Although we are dropping the RW lock, the intent in most cases + * is that this thread remains as an observer, since it may hold + * some secondary resource, but must yield to avoid deadlock. In + * this situation, make sure that the thread is boosted to the + * RW lock ceiling while blocked, so that it can re-acquire the + * RW lock at that priority. + */ + thread->rwlock_count++; + } + res = assert_wait(event, interruptible); if (res == THREAD_WAITING) { lck_rw_type = lck_rw_done(lck); @@ -892,6 +941,17 @@ lck_rw_sleep( if (lck_sleep_action & LCK_SLEEP_UNLOCK) (void)lck_rw_done(lck); + if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { + if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { + /* sched_flags checked without lock, but will be rechecked while clearing */ + + /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */ + assert(lck_sleep_action & LCK_SLEEP_UNLOCK); + + lck_rw_clear_promotion(thread); + } + } + return res; } @@ -909,10 +969,15 @@ lck_rw_sleep_deadline( { wait_result_t res; lck_rw_type_t lck_rw_type; + thread_t thread = current_thread(); if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) panic("Invalid lock sleep action %x\n", lck_sleep_action); + if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { + thread->rwlock_count++; + } + res = assert_wait_deadline(event, interruptible, deadline); if (res == THREAD_WAITING) { lck_rw_type = lck_rw_done(lck); @@ -930,6 +995,17 @@ lck_rw_sleep_deadline( if (lck_sleep_action & LCK_SLEEP_UNLOCK) (void)lck_rw_done(lck); + if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) { + if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { + /* sched_flags checked without lock, but will be rechecked while clearing */ + + /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */ + assert(lck_sleep_action & LCK_SLEEP_UNLOCK); + + lck_rw_clear_promotion(thread); + } + } + return res; } @@ -1104,178 +1180,3 @@ host_lockgroup_info( return(KERN_SUCCESS); } -/* - * Compatibility module - */ - -extern lck_rw_t *lock_alloc_EXT( boolean_t can_sleep, unsigned short tag0, unsigned short tag1); -extern void lock_done_EXT(lck_rw_t *lock); -extern void lock_free_EXT(lck_rw_t *lock); -extern void lock_init_EXT(lck_rw_t *lock, boolean_t can_sleep, unsigned short tag0, unsigned short tag1); -extern void lock_read_EXT(lck_rw_t *lock); -extern boolean_t lock_read_to_write_EXT(lck_rw_t *lock); -extern void lock_write_EXT(lck_rw_t *lock); -extern void lock_write_to_read_EXT(lck_rw_t *lock); -extern wait_result_t thread_sleep_lock_write_EXT( - event_t event, lck_rw_t *lock, wait_interrupt_t interruptible); - -extern void usimple_lock_EXT(lck_spin_t *lock); -extern void usimple_lock_init_EXT(lck_spin_t *lock, unsigned short tag); -extern unsigned int usimple_lock_try_EXT(lck_spin_t *lock); -extern void usimple_unlock_EXT(lck_spin_t *lock); -extern wait_result_t thread_sleep_usimple_lock_EXT(event_t event, lck_spin_t *lock, wait_interrupt_t interruptible); - - -lck_mtx_t* mutex_alloc_EXT(__unused unsigned short tag); -void mutex_free_EXT(lck_mtx_t *mutex); -void mutex_init_EXT(lck_mtx_t *mutex, __unused unsigned short tag); -wait_result_t thread_sleep_mutex_EXT(event_t event, lck_mtx_t *mutex, wait_interrupt_t interruptible); -wait_result_t thread_sleep_mutex_deadline_EXT(event_t event, lck_mtx_t *mutex, uint64_t deadline, wait_interrupt_t interruptible); - -lck_rw_t * -lock_alloc_EXT( - __unused boolean_t can_sleep, - __unused unsigned short tag0, - __unused unsigned short tag1) -{ - return( lck_rw_alloc_init( &LockCompatGroup, LCK_ATTR_NULL)); -} - -void -lock_done_EXT( - lck_rw_t *lock) -{ - (void) lck_rw_done(lock); -} - -void -lock_free_EXT( - lck_rw_t *lock) -{ - lck_rw_free(lock, &LockCompatGroup); -} - -void -lock_init_EXT( - lck_rw_t *lock, - __unused boolean_t can_sleep, - __unused unsigned short tag0, - __unused unsigned short tag1) -{ - lck_rw_init(lock, &LockCompatGroup, LCK_ATTR_NULL); -} - -void -lock_read_EXT( - lck_rw_t *lock) -{ - lck_rw_lock_shared( lock); -} - -boolean_t -lock_read_to_write_EXT( - lck_rw_t *lock) -{ - return( lck_rw_lock_shared_to_exclusive(lock)); -} - -void -lock_write_EXT( - lck_rw_t *lock) -{ - lck_rw_lock_exclusive(lock); -} - -void -lock_write_to_read_EXT( - lck_rw_t *lock) -{ - lck_rw_lock_exclusive_to_shared(lock); -} - -wait_result_t -thread_sleep_lock_write_EXT( - event_t event, - lck_rw_t *lock, - wait_interrupt_t interruptible) -{ - return( lck_rw_sleep(lock, LCK_SLEEP_EXCLUSIVE, event, interruptible)); -} - -void -usimple_lock_EXT( - lck_spin_t *lock) -{ - lck_spin_lock(lock); -} - -void -usimple_lock_init_EXT( - lck_spin_t *lock, - __unused unsigned short tag) -{ - lck_spin_init(lock, &LockCompatGroup, LCK_ATTR_NULL); -} - -unsigned int -usimple_lock_try_EXT( - lck_spin_t *lock) -{ - return(lck_spin_try_lock(lock)); -} - -void -usimple_unlock_EXT( - lck_spin_t *lock) -{ - lck_spin_unlock(lock); -} - -wait_result_t -thread_sleep_usimple_lock_EXT( - event_t event, - lck_spin_t *lock, - wait_interrupt_t interruptible) -{ - return( lck_spin_sleep(lock, LCK_SLEEP_DEFAULT, event, interruptible)); -} -lck_mtx_t * -mutex_alloc_EXT( - __unused unsigned short tag) -{ - return(lck_mtx_alloc_init(&LockCompatGroup, LCK_ATTR_NULL)); -} - -void -mutex_free_EXT( - lck_mtx_t *mutex) -{ - lck_mtx_free(mutex, &LockCompatGroup); -} - -void -mutex_init_EXT( - lck_mtx_t *mutex, - __unused unsigned short tag) -{ - lck_mtx_init(mutex, &LockCompatGroup, LCK_ATTR_NULL); -} - -wait_result_t -thread_sleep_mutex_EXT( - event_t event, - lck_mtx_t *mutex, - wait_interrupt_t interruptible) -{ - return( lck_mtx_sleep(mutex, LCK_SLEEP_DEFAULT, event, interruptible)); -} - -wait_result_t -thread_sleep_mutex_deadline_EXT( - event_t event, - lck_mtx_t *mutex, - uint64_t deadline, - wait_interrupt_t interruptible) -{ - return( lck_mtx_sleep_deadline(mutex, LCK_SLEEP_DEFAULT, event, interruptible, deadline)); -} diff --git a/osfmk/kern/locks.h b/osfmk/kern/locks.h index dbf40e76b..425c8dc7e 100644 --- a/osfmk/kern/locks.h +++ b/osfmk/kern/locks.h @@ -58,8 +58,9 @@ typedef unsigned int lck_sleep_action_t; #define LCK_SLEEP_SHARED 0x02 /* Reclaim the lock in shared mode (RW only) */ #define LCK_SLEEP_EXCLUSIVE 0x04 /* Reclaim the lock in exclusive mode (RW only) */ #define LCK_SLEEP_SPIN 0x08 /* Reclaim the lock in spin mode (mutex only) */ +#define LCK_SLEEP_PROMOTED_PRI 0x10 /* Sleep at a promoted priority */ -#define LCK_SLEEP_MASK 0x0f /* Valid actions */ +#define LCK_SLEEP_MASK 0x1f /* Valid actions */ #ifdef MACH_KERNEL_PRIVATE @@ -263,8 +264,10 @@ extern wait_result_t lck_spin_sleep_deadline( #ifdef KERNEL_PRIVATE -extern boolean_t lck_spin_try_lock( - lck_spin_t *lck); +extern boolean_t lck_spin_try_lock( lck_spin_t *lck); + +/* NOT SAFE: To be used only by kernel debugger to avoid deadlock. */ +extern boolean_t lck_spin_is_acquired( lck_spin_t *lck); struct _lck_mtx_ext_; extern void lck_mtx_init_ext(lck_mtx_t *lck, struct _lck_mtx_ext_ *lck_ext, diff --git a/osfmk/kern/machine.c b/osfmk/kern/machine.c index c0caa5511..3deffa426 100644 --- a/osfmk/kern/machine.c +++ b/osfmk/kern/machine.c @@ -79,7 +79,6 @@ #include #include #include -#include #include #include #include @@ -95,6 +94,10 @@ #endif #include +#if CONFIG_DTRACE +extern void (*dtrace_cpu_state_changed_hook)(int, boolean_t); +#endif + /* * Exported variables: */ @@ -122,10 +125,7 @@ processor_up( init_ast_check(processor); pset = processor->processor_set; pset_lock(pset); - if (++pset->online_processor_count == 1) { - pset_pri_init_hint(pset, processor); - pset_count_init_hint(pset, processor); - } + ++pset->online_processor_count; enqueue_tail(&pset->active_queue, (queue_entry_t)processor); processor->state = PROCESSOR_RUNNING; (void)hw_atomic_add(&processor_avail_count, 1); @@ -133,7 +133,13 @@ processor_up( pset_unlock(pset); ml_cpu_up(); splx(s); + +#if CONFIG_DTRACE + if (dtrace_cpu_state_changed_hook) + (*dtrace_cpu_state_changed_hook)(processor->cpu_id, TRUE); +#endif } +#include kern_return_t host_reboot( @@ -204,7 +210,9 @@ processor_shutdown( */ while (processor->state == PROCESSOR_DISPATCHING) { pset_unlock(pset); + splx(s); delay(1); + s = splsched(); pset_lock(pset); } @@ -255,6 +263,11 @@ processor_doshutdown( assert(processor->state == PROCESSOR_SHUTDOWN); +#if CONFIG_DTRACE + if (dtrace_cpu_state_changed_hook) + (*dtrace_cpu_state_changed_hook)(processor->cpu_id, FALSE); +#endif + ml_cpu_down(); #if HIBERNATION @@ -267,10 +280,7 @@ processor_doshutdown( pset = processor->processor_set; pset_lock(pset); processor->state = PROCESSOR_OFF_LINE; - if (--pset->online_processor_count == 0) { - pset_pri_init_hint(pset, PROCESSOR_NULL); - pset_count_init_hint(pset, PROCESSOR_NULL); - } + --pset->online_processor_count; (void)hw_atomic_sub(&processor_avail_count, 1); commpage_update_active_cpus(); SCHED(processor_queue_shutdown)(processor); diff --git a/osfmk/kern/mk_sp.c b/osfmk/kern/mk_sp.c index 1d1a6a5e4..f902e916c 100644 --- a/osfmk/kern/mk_sp.c +++ b/osfmk/kern/mk_sp.c @@ -59,97 +59,6 @@ #include #include -/* - * thread_policy_common: - * - * Set scheduling policy & priority for thread. - */ -static kern_return_t -thread_policy_common( - thread_t thread, - integer_t policy, - integer_t priority) -{ - spl_t s; - - if ( thread == THREAD_NULL || - invalid_policy(policy) ) - return(KERN_INVALID_ARGUMENT); - - if (thread->static_param) - return (KERN_SUCCESS); - - if ((policy == POLICY_TIMESHARE) - && !SCHED(supports_timeshare_mode)()) - policy = TH_MODE_FIXED; - - s = splsched(); - thread_lock(thread); - - if ( (thread->sched_mode != TH_MODE_REALTIME) && - (thread->saved_mode != TH_MODE_REALTIME) ) { - if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) { - boolean_t oldmode = thread->sched_mode == TH_MODE_TIMESHARE; - - if (policy == POLICY_TIMESHARE && !oldmode) { - thread->sched_mode = TH_MODE_TIMESHARE; - - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { - sched_share_incr(); - - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_incr(); - } - } - else - if (policy != POLICY_TIMESHARE && oldmode) { - thread->sched_mode = TH_MODE_FIXED; - - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_decr(); - - sched_share_decr(); - } - } - } - else { - if (policy == POLICY_TIMESHARE) - thread->saved_mode = TH_MODE_TIMESHARE; - else - thread->saved_mode = TH_MODE_FIXED; - } - - if (priority >= thread->max_priority) - priority = thread->max_priority - thread->task_priority; - else - if (priority >= MINPRI_KERNEL) - priority -= MINPRI_KERNEL; - else - if (priority >= MINPRI_RESERVED) - priority -= MINPRI_RESERVED; - else - priority -= BASEPRI_DEFAULT; - - priority += thread->task_priority; - - if (priority > thread->max_priority) - priority = thread->max_priority; - else - if (priority < MINPRI) - priority = MINPRI; - - thread->importance = priority - thread->task_priority; - - - set_priority(thread, priority); - } - - thread_unlock(thread); - splx(s); - - return (KERN_SUCCESS); -} /* * thread_set_policy @@ -175,6 +84,9 @@ thread_set_policy( pset == PROCESSOR_SET_NULL || pset != &pset0) return (KERN_INVALID_ARGUMENT); + if (invalid_policy(policy)) + return(KERN_INVALID_ARGUMENT); + thread_mtx_lock(thread); switch (policy) { @@ -253,7 +165,10 @@ thread_set_policy( return (result); } - result = thread_policy_common(thread, policy, bas); + /* Note that we do not pass on max priority. */ + if (result == KERN_SUCCESS) { + result = thread_set_mode_and_absolute_pri(thread, policy, bas); + } thread_mtx_unlock(thread); diff --git a/osfmk/kern/mk_timer.c b/osfmk/kern/mk_timer.c index fdb6174dd..b83339877 100644 --- a/osfmk/kern/mk_timer.c +++ b/osfmk/kern/mk_timer.c @@ -167,10 +167,12 @@ mk_timer_expire( timer->is_armed = FALSE; simple_unlock(&timer->lock); - msg.header.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); + msg.header.msgh_bits = + MACH_MSGH_BITS_SET(MACH_MSG_TYPE_COPY_SEND, 0, 0, 0); msg.header.msgh_remote_port = port; msg.header.msgh_local_port = MACH_PORT_NULL; - msg.header.msgh_reserved = msg.header.msgh_id = 0; + msg.header.msgh_voucher_port = MACH_PORT_NULL; + msg.header.msgh_id = 0; msg.unused[0] = msg.unused[1] = msg.unused[2] = 0; diff --git a/osfmk/kern/printf.c b/osfmk/kern/printf.c index 8772a0251..2ce720c57 100644 --- a/osfmk/kern/printf.c +++ b/osfmk/kern/printf.c @@ -156,10 +156,8 @@ #include #include -#include #include #include -#include #include #include #include diff --git a/osfmk/kern/priority.c b/osfmk/kern/priority.c index d3de06bba..0b64ac8aa 100644 --- a/osfmk/kern/priority.c +++ b/osfmk/kern/priority.c @@ -56,11 +56,11 @@ /* */ /* - * File: clock_prim.c + * File: priority.c * Author: Avadis Tevanian, Jr. * Date: 1986 * - * Clock primitives. + * Priority related scheduler bits. */ #include @@ -76,6 +76,10 @@ #include #include +#ifdef CONFIG_MACH_APPROXIMATE_TIME +#include /* for commpage_update_mach_approximate_time */ +#endif + /* * thread_quantum_expire: * @@ -94,6 +98,8 @@ thread_quantum_expire( ast_t preempt; uint64_t ctime; + assert(processor == current_processor()); + SCHED_STATS_QUANTUM_TIMER_EXPIRATION(processor); /* @@ -103,8 +109,17 @@ thread_quantum_expire( * thread, we must credit the ledger before taking the thread lock. The ledger * pointers are only manipulated by the thread itself at the ast boundary. */ - ledger_credit(thread->t_ledger, task_ledgers.cpu_time, thread->current_quantum); - ledger_credit(thread->t_threadledger, thread_ledgers.cpu_time, thread->current_quantum); + ledger_credit(thread->t_ledger, task_ledgers.cpu_time, thread->quantum_remaining); + ledger_credit(thread->t_threadledger, thread_ledgers.cpu_time, thread->quantum_remaining); +#ifdef CONFIG_BANK + if (thread->t_bankledger) { + ledger_credit(thread->t_bankledger, bank_ledgers.cpu_time, + (thread->quantum_remaining - thread->t_deduct_bank_ledger_time)); + } + thread->t_deduct_bank_ledger_time = 0; +#endif + + ctime = mach_absolute_time(); thread_lock(thread); @@ -112,8 +127,11 @@ thread_quantum_expire( * We've run up until our quantum expiration, and will (potentially) * continue without re-entering the scheduler, so update this now. */ - thread->last_run_time = processor->quantum_end; + thread->last_run_time = ctime; +#ifdef CONFIG_MACH_APPROXIMATE_TIME + commpage_update_mach_approximate_time(ctime); +#endif /* * Check for fail-safe trip. */ @@ -122,31 +140,18 @@ thread_quantum_expire( !(thread->options & TH_OPT_SYSTEM_CRITICAL)) { uint64_t new_computation; - new_computation = processor->quantum_end - thread->computation_epoch; + new_computation = ctime - thread->computation_epoch; new_computation += thread->computation_metered; if (new_computation > max_unsafe_computation) { KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_FAILSAFE)|DBG_FUNC_NONE, (uintptr_t)thread->sched_pri, (uintptr_t)thread->sched_mode, 0, 0, 0); - if (thread->sched_mode == TH_MODE_REALTIME) { - thread->priority = DEPRESSPRI; - } - - thread->saved_mode = thread->sched_mode; - - if (SCHED(supports_timeshare_mode)) { - sched_share_incr(); - thread->sched_mode = TH_MODE_TIMESHARE; - } else { - /* XXX handle fixed->fixed case */ - thread->sched_mode = TH_MODE_FIXED; - } + thread->safe_release = ctime + sched_safe_duration; - thread->safe_release = processor->quantum_end + sched_safe_duration; - thread->sched_flags |= TH_SFLAG_FAILSAFE; + sched_thread_mode_demote(thread, TH_SFLAG_FAILSAFE); } } - + /* * Recompute scheduled priority if appropriate. */ @@ -167,7 +172,6 @@ thread_quantum_expire( processor->timeslice--; thread_quantum_init(thread); - thread->last_quantum_refill_time = processor->quantum_end; /* Reload precise timing global policy to thread-local policy */ thread->precise_user_kernel_time = use_precise_user_kernel_time(thread); @@ -178,42 +182,47 @@ thread_quantum_expire( */ if (!thread->precise_user_kernel_time) { timer_switch(PROCESSOR_DATA(processor, current_state), - processor->quantum_end, + ctime, PROCESSOR_DATA(processor, current_state)); timer_switch(PROCESSOR_DATA(processor, thread_timer), - processor->quantum_end, + ctime, PROCESSOR_DATA(processor, thread_timer)); } - ctime = mach_absolute_time(); - processor->quantum_end = ctime + thread->current_quantum; + processor->quantum_end = ctime + thread->quantum_remaining; timer_call_enter1(&processor->quantum_timer, thread, - processor->quantum_end, TIMER_CALL_SYS_CRITICAL); + processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); /* * Context switch check. */ - if ((preempt = csw_check(processor)) != AST_NONE) + if ((preempt = csw_check(processor, AST_QUANTUM)) != AST_NONE) ast_on(preempt); - else { - processor_set_t pset = processor->processor_set; - - pset_lock(pset); - - pset_pri_hint(pset, processor, processor->current_pri); - pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor)); - - pset_unlock(pset); - } thread_unlock(thread); -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) sched_traditional_consider_maintenance(ctime); -#endif /* CONFIG_SCHED_TRADITIONAL */ +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ +} + +/* + * sched_set_thread_base_priority: + * + * Set the base priority of the thread + * and reset its scheduled priority. + * + * Called with the thread locked. + */ +void +sched_set_thread_base_priority(thread_t thread, int priority) +{ + thread->priority = priority; + SCHED(compute_priority)(thread, FALSE); } -#if defined(CONFIG_SCHED_TRADITIONAL) + +#if defined(CONFIG_SCHED_TIMESHARE_CORE) void sched_traditional_quantum_expire(thread_t thread __unused) @@ -278,39 +287,21 @@ static struct shift_data sched_decay_shifts[SCHED_DECAY_TICKS] = { * * Calculate the timesharing priority based upon usage and load. */ +extern int sched_pri_decay_band_limit; -#define do_priority_computation(thread, pri) \ - MACRO_BEGIN \ - (pri) = (thread)->priority /* start with base priority */ \ - - ((thread)->sched_usage >> (thread)->pri_shift); \ - if ((pri) < MINPRI_USER) \ - (pri) = MINPRI_USER; \ - else \ - if ((pri) > MAXPRI_KERNEL) \ - (pri) = MAXPRI_KERNEL; \ - MACRO_END +static int do_priority_computation(thread_t th) { + register int priority = th->priority /* start with base priority */ + - (th->sched_usage >> th->pri_shift); + if (priority < MINPRI_USER) + priority = MINPRI_USER; + else + if (priority > MAXPRI_KERNEL) + priority = MAXPRI_KERNEL; -#endif - -/* - * set_priority: - * - * Set the base priority of the thread - * and reset its scheduled priority. - * - * Called with the thread locked. - */ -void -set_priority( - register thread_t thread, - register int priority) -{ - thread->priority = priority; - SCHED(compute_priority)(thread, FALSE); + return priority; } -#if defined(CONFIG_SCHED_TRADITIONAL) /* * compute_priority: @@ -328,14 +319,13 @@ compute_priority( { register int priority; - if ( !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) && - (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) || - override_depress ) ) { - if (thread->sched_mode == TH_MODE_TIMESHARE) - do_priority_computation(thread, priority); - else - priority = thread->priority; + if (thread->sched_mode == TH_MODE_TIMESHARE) + priority = do_priority_computation(thread); + else + priority = thread->priority; + if ((!(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) || (priority > thread->sched_pri)) && + (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) || override_depress)) { set_sched_pri(thread, priority); } } @@ -357,8 +347,17 @@ compute_my_priority( { register int priority; - do_priority_computation(thread, priority); + priority = do_priority_computation(thread); assert(thread->runq == PROCESSOR_NULL); + + if (priority != thread->sched_pri) { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_DECAY_PRIORITY)|DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), + thread->priority, + thread->sched_pri, + priority, + 0); + } thread->sched_pri = priority; } @@ -398,7 +397,7 @@ update_priority( thread->sched_stamp += ticks; if (sched_use_combined_fgbg_decay) thread->pri_shift = sched_combined_fgbg_pri_shift; - else if (thread->max_priority <= MAXPRI_THROTTLE) + else if (thread->sched_flags & TH_SFLAG_THROTTLED) thread->pri_shift = sched_background_pri_shift; else thread->pri_shift = sched_pri_shift; @@ -451,24 +450,9 @@ update_priority( /* * Check for fail-safe release. */ - if ( (thread->sched_flags & TH_SFLAG_FAILSAFE) && - mach_absolute_time() >= thread->safe_release ) { - if (thread->saved_mode != TH_MODE_TIMESHARE) { - if (thread->saved_mode == TH_MODE_REALTIME) { - thread->priority = BASEPRI_RTQUEUES; - } - - thread->sched_mode = thread->saved_mode; - thread->saved_mode = TH_MODE_NONE; - - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) - sched_share_decr(); - - if (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)) - set_sched_pri(thread, thread->priority); - } - - thread->sched_flags &= ~TH_SFLAG_FAILSAFE; + if ((thread->sched_flags & TH_SFLAG_FAILSAFE) && + mach_absolute_time() >= thread->safe_release) { + sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE); } @@ -480,7 +464,7 @@ update_priority( !(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) ) { register int new_pri; - do_priority_computation(thread, new_pri); + new_pri = do_priority_computation(thread); if (new_pri != thread->sched_pri) { boolean_t removed = thread_run_queue_remove(thread); @@ -510,4 +494,254 @@ update_priority( return; } -#endif /* CONFIG_SCHED_TRADITIONAL */ +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ + +#if MACH_ASSERT +/* sched_mode == TH_MODE_TIMESHARE controls whether a thread has a timeshare count when it has a run count */ + +void sched_share_incr(thread_t thread) { + assert((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN); + assert(thread->sched_mode == TH_MODE_TIMESHARE); + assert(thread->SHARE_COUNT == 0); + thread->SHARE_COUNT++; + (void)hw_atomic_add(&sched_share_count, 1); +} + +void sched_share_decr(thread_t thread) { + assert((thread->state & (TH_RUN|TH_IDLE)) != TH_RUN || thread->sched_mode != TH_MODE_TIMESHARE); + assert(thread->SHARE_COUNT == 1); + (void)hw_atomic_sub(&sched_share_count, 1); + thread->SHARE_COUNT--; +} + +/* TH_SFLAG_THROTTLED controls whether a thread has a background count when it has a run count and a share count */ + +void sched_background_incr(thread_t thread) { + assert((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN); + assert(thread->sched_mode == TH_MODE_TIMESHARE); + assert((thread->sched_flags & TH_SFLAG_THROTTLED) == TH_SFLAG_THROTTLED); + + assert(thread->BG_COUNT == 0); + thread->BG_COUNT++; + int val = hw_atomic_add(&sched_background_count, 1); + assert(val >= 0); + + /* Always do the background change while holding a share count */ + assert(thread->SHARE_COUNT == 1); +} + +void sched_background_decr(thread_t thread) { + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN && thread->sched_mode == TH_MODE_TIMESHARE) + assert((thread->sched_flags & TH_SFLAG_THROTTLED) != TH_SFLAG_THROTTLED); + assert(thread->BG_COUNT == 1); + int val = hw_atomic_sub(&sched_background_count, 1); + thread->BG_COUNT--; + assert(val >= 0); + assert(thread->BG_COUNT == 0); + + /* Always do the background change while holding a share count */ + assert(thread->SHARE_COUNT == 1); +} + + +void +assert_thread_sched_count(thread_t thread) { + /* Only 0 or 1 are acceptable values */ + assert(thread->BG_COUNT == 0 || thread->BG_COUNT == 1); + assert(thread->SHARE_COUNT == 0 || thread->SHARE_COUNT == 1); + + /* BG is only allowed when you already have a share count */ + if (thread->BG_COUNT == 1) + assert(thread->SHARE_COUNT == 1); + if (thread->SHARE_COUNT == 0) + assert(thread->BG_COUNT == 0); + + if ((thread->state & (TH_RUN|TH_IDLE)) != TH_RUN || + (thread->sched_mode != TH_MODE_TIMESHARE)) + assert(thread->SHARE_COUNT == 0); + + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN && + (thread->sched_mode == TH_MODE_TIMESHARE)) + assert(thread->SHARE_COUNT == 1); + + if ((thread->state & (TH_RUN|TH_IDLE)) != TH_RUN || + (thread->sched_mode != TH_MODE_TIMESHARE) || + !(thread->sched_flags & TH_SFLAG_THROTTLED)) + assert(thread->BG_COUNT == 0); + + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN && + (thread->sched_mode == TH_MODE_TIMESHARE) && + (thread->sched_flags & TH_SFLAG_THROTTLED)) + assert(thread->BG_COUNT == 1); +} + +#endif /* MACH_ASSERT */ + +/* + * Set the thread's true scheduling mode + * Called with thread mutex and thread locked + * The thread has already been removed from the runqueue. + * + * (saved_mode is handled before this point) + */ +void +sched_set_thread_mode(thread_t thread, sched_mode_t new_mode) +{ + assert_thread_sched_count(thread); + + sched_mode_t old_mode = thread->sched_mode; + + thread->sched_mode = new_mode; + + switch (new_mode) { + case TH_MODE_FIXED: + case TH_MODE_REALTIME: + if (old_mode == TH_MODE_TIMESHARE) { + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { + if (thread->sched_flags & TH_SFLAG_THROTTLED) + sched_background_decr(thread); + + sched_share_decr(thread); + } + } + break; + + case TH_MODE_TIMESHARE: + if (old_mode != TH_MODE_TIMESHARE) { + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { + sched_share_incr(thread); + + if (thread->sched_flags & TH_SFLAG_THROTTLED) + sched_background_incr(thread); + } + } + break; + + default: + panic("unexpected mode: %d", new_mode); + break; + } + + assert_thread_sched_count(thread); +} + +/* + * Demote the true scheduler mode to timeshare (called with the thread locked) + */ +void +sched_thread_mode_demote(thread_t thread, uint32_t reason) +{ + assert(reason & TH_SFLAG_DEMOTED_MASK); + assert((thread->sched_flags & reason) != reason); + assert_thread_sched_count(thread); + + if (thread->policy_reset) + return; + + if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) { + /* Another demotion reason is already active */ + thread->sched_flags |= reason; + return; + } + + assert(thread->saved_mode == TH_MODE_NONE); + + boolean_t removed = thread_run_queue_remove(thread); + + if (thread->sched_mode == TH_MODE_REALTIME) + thread->priority = DEPRESSPRI; + + thread->sched_flags |= reason; + + thread->saved_mode = thread->sched_mode; + + sched_set_thread_mode(thread, TH_MODE_TIMESHARE); + + if (removed) + thread_setrun(thread, SCHED_TAILQ); + + assert_thread_sched_count(thread); +} + +/* + * Un-demote the true scheduler mode back to the saved mode (called with the thread locked) + */ +void +sched_thread_mode_undemote(thread_t thread, uint32_t reason) +{ + assert(reason & TH_SFLAG_DEMOTED_MASK); + assert((thread->sched_flags & reason) == reason); + assert(thread->saved_mode != TH_MODE_NONE); + assert(thread->sched_mode == TH_MODE_TIMESHARE); + assert(thread->policy_reset == 0); + + assert_thread_sched_count(thread); + + thread->sched_flags &= ~reason; + + if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) { + /* Another demotion reason is still active */ + return; + } + + boolean_t removed = thread_run_queue_remove(thread); + + sched_set_thread_mode(thread, thread->saved_mode); + + thread->saved_mode = TH_MODE_NONE; + + if (thread->sched_mode == TH_MODE_REALTIME) { + thread->priority = BASEPRI_RTQUEUES; + } + + SCHED(compute_priority)(thread, FALSE); + + if (removed) + thread_setrun(thread, SCHED_TAILQ); +} + +/* + * Set the thread to be categorized as 'background' + * Called with thread mutex and thread lock held + * + * TODO: Eventually, 'background' should be a true sched_mode. + */ +void +sched_set_thread_throttled(thread_t thread, boolean_t wants_throttle) +{ + if (thread->policy_reset) + return; + + assert(((thread->sched_flags & TH_SFLAG_THROTTLED) ? TRUE : FALSE) != wants_throttle); + + assert_thread_sched_count(thread); + + /* + * When backgrounding a thread, iOS has the semantic that + * realtime and fixed priority threads should be demoted + * to timeshare background threads. + * + * On OSX, realtime and fixed priority threads don't lose their mode. + */ + + if (wants_throttle) { + thread->sched_flags |= TH_SFLAG_THROTTLED; + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN && thread->sched_mode == TH_MODE_TIMESHARE) { + sched_background_incr(thread); + } + + assert_thread_sched_count(thread); + + } else { + thread->sched_flags &= ~TH_SFLAG_THROTTLED; + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN && thread->sched_mode == TH_MODE_TIMESHARE) { + sched_background_decr(thread); + } + + assert_thread_sched_count(thread); + + } + + assert_thread_sched_count(thread); +} + diff --git a/osfmk/kern/processor.c b/osfmk/kern/processor.c index 709019b9f..355b1b1dc 100644 --- a/osfmk/kern/processor.c +++ b/osfmk/kern/processor.c @@ -158,9 +158,12 @@ processor_init( processor->current_thmode = TH_MODE_NONE; processor->cpu_id = cpu_id; timer_call_setup(&processor->quantum_timer, thread_quantum_expire, processor); + processor->quantum_end = UINT64_MAX; processor->deadline = UINT64_MAX; processor->timeslice = 0; - processor->processor_meta = PROCESSOR_META_NULL; + processor->processor_primary = processor; /* no SMT relationship known at this point */ + processor->processor_secondary = NULL; + processor->is_SMT = FALSE; processor->processor_self = IP_NULL; processor_data_init(processor); processor->processor_list = NULL; @@ -187,21 +190,26 @@ processor_init( } void -processor_meta_init( +processor_set_primary( processor_t processor, processor_t primary) { - processor_meta_t pmeta = primary->processor_meta; - - if (pmeta == PROCESSOR_META_NULL) { - pmeta = kalloc(sizeof (*pmeta)); - - queue_init(&pmeta->idle_queue); - - pmeta->primary = primary; + assert(processor->processor_primary == primary || processor->processor_primary == processor); + /* Re-adjust primary point for this (possibly) secondary processor */ + processor->processor_primary = primary; + + assert(primary->processor_secondary == NULL || primary->processor_secondary == processor); + if (primary != processor) { + /* Link primary to secondary, assumes a 2-way SMT model + * We'll need to move to a queue if any future architecture + * requires otherwise. + */ + assert(processor->processor_secondary == NULL); + primary->processor_secondary = processor; + /* Mark both processors as SMT siblings */ + primary->is_SMT = TRUE; + processor->is_SMT = TRUE; } - - processor->processor_meta = pmeta; } processor_set_t @@ -221,6 +229,12 @@ processor_set_t pset_create( pset_node_t node) { +#if defined(CONFIG_SCHED_MULTIQ) + /* multiq scheduler is not currently compatible with multiple psets */ + if (sched_groups_enabled) + return processor_pset(master_processor); +#endif /* defined(CONFIG_SCHED_MULTIQ) */ + processor_set_t *prev, pset = kalloc(sizeof (*pset)); if (pset != PROCESSOR_SET_NULL) { @@ -255,9 +269,8 @@ pset_init( queue_init(&pset->active_queue); queue_init(&pset->idle_queue); + queue_init(&pset->idle_secondary_queue); pset->online_processor_count = 0; - pset_pri_init_hint(pset, PROCESSOR_NULL); - pset_count_init_hint(pset, PROCESSOR_NULL); pset->cpu_set_low = pset->cpu_set_hi = 0; pset->cpu_set_count = 0; pset->pending_AST_cpu_mask = 0; diff --git a/osfmk/kern/processor.h b/osfmk/kern/processor.h index a634c2594..922e20aa2 100644 --- a/osfmk/kern/processor.h +++ b/osfmk/kern/processor.h @@ -74,9 +74,11 @@ #include #include #include -#include +#include +#include #include #include +#include #include #include @@ -84,8 +86,7 @@ struct processor_set { queue_head_t active_queue; /* active processors */ queue_head_t idle_queue; /* idle processors */ - - processor_t low_pri, low_count; + queue_head_t idle_secondary_queue; /* idle secondary processors */ int online_processor_count; @@ -94,12 +95,15 @@ struct processor_set { decl_simple_lock_data(,sched_lock) /* lock for above */ -#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_MULTIQ) struct run_queue pset_runq; /* runq for this processor set */ +#endif + +#if defined(CONFIG_SCHED_TRADITIONAL) int pset_runq_bound_count; /* # of threads in runq bound to any processor in pset */ #endif - + /* CPUs that have been sent an unacknowledged remote AST for scheduling purposes */ uint32_t pending_AST_cpu_mask; @@ -127,18 +131,11 @@ extern queue_head_t tasks, terminated_tasks, threads; /* Terminated tasks are O extern int tasks_count, terminated_tasks_count, threads_count; decl_lck_mtx_data(extern,tasks_threads_lock) -struct processor_meta { - queue_head_t idle_queue; - processor_t primary; -}; - -typedef struct processor_meta *processor_meta_t; -#define PROCESSOR_META_NULL ((processor_meta_t) 0) - struct processor { queue_chain_t processor_queue;/* idle/active queue link, * MUST remain the first element */ int state; /* See below */ + boolean_t is_SMT; struct thread *active_thread, /* thread running on processor */ *next_thread, /* next thread when dispatched */ @@ -148,6 +145,7 @@ struct processor { int current_pri; /* priority of current thread */ sched_mode_t current_thmode; /* sched mode of current thread */ + sfi_class_id_t current_sfi_class; /* SFI class of current thread */ int cpu_id; /* platform numeric id */ timer_call_data_t quantum_timer; /* timer for quantum expiration */ @@ -157,15 +155,21 @@ struct processor { uint64_t deadline; /* current deadline */ int timeslice; /* quanta before timeslice ends */ -#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_MULTIQ) struct run_queue runq; /* runq for this processor */ +#endif + +#if defined(CONFIG_SCHED_TRADITIONAL) int runq_bound_count; /* # of threads bound to this processor */ #endif #if defined(CONFIG_SCHED_GRRR) struct grrr_run_queue grrr_runq; /* Group Ratio Round-Robin runq */ #endif - processor_meta_t processor_meta; + processor_t processor_primary; /* pointer to primary processor for + * secondary SMT processors, or a pointer + * to ourselves for primaries or non-SMT */ + processor_t processor_secondary; struct ipc_port * processor_self; /* port for operations */ processor_t processor_list; /* all existing processors */ @@ -185,61 +189,45 @@ extern boolean_t sched_stats_active; /* * Processor state is accessed by locking the scheduling lock * for the assigned processor set. - */ + * + * -------------------- SHUTDOWN + * / ^ ^ + * _/ | \ + * OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING + * \_________________^ ^ ^______/ / + * \__________________/ + * + * Most of these state transitions are externally driven as a + * a directive (for instance telling an IDLE processor to start + * coming out of the idle state to run a thread). However these + * are typically paired with a handshake by the processor itself + * to indicate that it has completed a transition of indeterminate + * length (for example, the DISPATCHING->RUNNING or START->RUNNING + * transitions must occur on the processor itself). + * + * The boot processor has some special cases, and skips the START state, + * since it has already bootstrapped and is ready to context switch threads. + * + * When a processor is in DISPATCHING or RUNNING state, the current_pri, + * current_thmode, and deadline fields should be set, so that other + * processors can evaluate if it is an appropriate candidate for preemption. +*/ #define PROCESSOR_OFF_LINE 0 /* Not available */ #define PROCESSOR_SHUTDOWN 1 /* Going off-line */ #define PROCESSOR_START 2 /* Being started */ -#define PROCESSOR_INACTIVE 3 /* Inactive (unavailable) */ +/* 3 Formerly Inactive (unavailable) */ #define PROCESSOR_IDLE 4 /* Idle (available) */ #define PROCESSOR_DISPATCHING 5 /* Dispatching (idle -> active) */ #define PROCESSOR_RUNNING 6 /* Normal execution */ extern processor_t current_processor(void); -extern processor_t cpu_to_processor( - int cpu); - /* Lock macros */ #define pset_lock(p) simple_lock(&(p)->sched_lock) #define pset_unlock(p) simple_unlock(&(p)->sched_lock) #define pset_lock_init(p) simple_lock_init(&(p)->sched_lock, 0) -/* Update hints */ - -#define pset_pri_hint(ps, p, pri) \ -MACRO_BEGIN \ - if ((p) != (ps)->low_pri) { \ - if ((pri) < (ps)->low_pri->current_pri) \ - (ps)->low_pri = (p); \ - else \ - if ((ps)->low_pri->state < PROCESSOR_IDLE) \ - (ps)->low_pri = (p); \ - } \ -MACRO_END - -#define pset_count_hint(ps, p, cnt) \ -MACRO_BEGIN \ - if ((p) != (ps)->low_count) { \ - if ((cnt) < SCHED(processor_runq_count)((ps)->low_count)) \ - (ps)->low_count = (p); \ - else \ - if ((ps)->low_count->state < PROCESSOR_IDLE) \ - (ps)->low_count = (p); \ - } \ -MACRO_END - -#define pset_pri_init_hint(ps, p) \ -MACRO_BEGIN \ - (ps)->low_pri = (p); \ -MACRO_END - -#define pset_count_init_hint(ps, p) \ -MACRO_BEGIN \ - (ps)->low_count = (p); \ -MACRO_END - - extern void processor_bootstrap(void); extern void processor_init( @@ -247,7 +235,7 @@ extern void processor_init( int cpu_id, processor_set_t processor_set); -extern void processor_meta_init( +extern void processor_set_primary( processor_t processor, processor_t primary); @@ -279,13 +267,12 @@ extern kern_return_t processor_info_count( extern void machine_run_count( uint32_t count); -extern boolean_t machine_processor_is_inactive( - processor_t processor); - extern processor_t machine_choose_processor( processor_set_t pset, processor_t processor); +#define next_pset(p) (((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets) + #else /* MACH_KERNEL_PRIVATE */ __BEGIN_DECLS @@ -300,4 +287,11 @@ __END_DECLS #endif /* MACH_KERNEL_PRIVATE */ +#ifdef KERNEL_PRIVATE +__BEGIN_DECLS +extern processor_t cpu_to_processor(int cpu); +__END_DECLS + +#endif /* KERNEL_PRIVATE */ + #endif /* _KERN_PROCESSOR_H_ */ diff --git a/osfmk/kern/queue.c b/osfmk/kern/queue.c deleted file mode 100644 index 052770f7a..000000000 --- a/osfmk/kern/queue.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * Routines to implement queue package. - */ - -#include - -#if !defined(__GNUC__) - -/* - * Insert element at head of queue. - */ -void -enqueue_head( - register queue_t que, - register queue_entry_t elt) -{ - elt->next = que->next; - elt->prev = que; - elt->next->prev = elt; - que->next = elt; -} - -/* - * Insert element at tail of queue. - */ -void -enqueue_tail( - register queue_t que, - register queue_entry_t elt) -{ - elt->next = que; - elt->prev = que->prev; - elt->prev->next = elt; - que->prev = elt; -} - -/* - * Remove and return element at head of queue. - */ -queue_entry_t -dequeue_head( - register queue_t que) -{ - register queue_entry_t elt; - - if (que->next == que) - return((queue_entry_t)0); - - elt = que->next; - elt->next->prev = que; - que->next = elt->next; - return(elt); -} - -/* - * Remove and return element at tail of queue. - */ -queue_entry_t -dequeue_tail( - register queue_t que) -{ - register queue_entry_t elt; - - if (que->prev == que) - return((queue_entry_t)0); - - elt = que->prev; - elt->prev->next = que; - que->prev = elt->prev; - return(elt); -} - -/* - * Remove arbitrary element from queue. - * Does not check whether element is on a queue - the world - * will go haywire if it isn't. - */ - -/*ARGSUSED*/ -void -remqueue( - register queue_entry_t elt) -{ - elt->next->prev = elt->prev; - elt->prev->next = elt->next; -} - -/* - * Routines to directly imitate the VAX hardware queue - * package. - */ -void -insque( - register queue_entry_t entry, - register queue_entry_t pred) -{ - entry->next = pred->next; - entry->prev = pred; - (pred->next)->prev = entry; - pred->next = entry; -} - -void -remque( - register queue_entry_t elt) -{ - (elt->next)->prev = elt->prev; - (elt->prev)->next = elt->next; -} - -#endif diff --git a/osfmk/kern/queue.h b/osfmk/kern/queue.h index db7377840..338395b17 100644 --- a/osfmk/kern/queue.h +++ b/osfmk/kern/queue.h @@ -70,6 +70,10 @@ #include #include +#include + +__BEGIN_DECLS + /* * Queue of abstract objects. Queue is maintained * within that object. @@ -112,52 +116,34 @@ typedef struct queue_entry *queue_entry_t; #define enqueue(queue,elt) enqueue_tail(queue, elt) #define dequeue(queue) dequeue_head(queue) -#if !defined(__GNUC__) - -#include -__BEGIN_DECLS - -/* Enqueue element to head of queue */ -extern void enqueue_head( - queue_t que, - queue_entry_t elt); - -/* Enqueue element to tail of queue */ -extern void enqueue_tail( - queue_t que, - queue_entry_t elt); - -/* Dequeue element from head of queue */ -extern queue_entry_t dequeue_head( - queue_t que); - -/* Dequeue element from tail of queue */ -extern queue_entry_t dequeue_tail( - queue_t que); - -/* Dequeue element */ -extern void remqueue( - queue_entry_t elt); - -/* Enqueue element after a particular elem */ -extern void insque( - queue_entry_t entry, - queue_entry_t pred); - -/* Dequeue element */ -extern void remque( - queue_entry_t elt); - -__END_DECLS - -#else /* !__GNUC__ */ - #ifdef XNU_KERNEL_PRIVATE -#define __DEQUEUE_ELT_CLEANUP(elt) do { \ - (elt)->next = (queue_entry_t) 0; \ - (elt)->prev = (queue_entry_t) 0; \ - } while (0) +#include +#include +static inline void __QUEUE_ELT_VALIDATE(queue_entry_t elt) { + queue_entry_t elt_next, elt_prev; + + if (__improbable(elt == (queue_entry_t)0)) { + panic("Invalid queue element %p", elt); + } + + elt_next = elt->next; + elt_prev = elt->prev; + + if (__improbable(elt_next == (queue_entry_t)0 || elt_prev == (queue_entry_t)0)) { + panic("Invalid queue element pointers for %p: next %p prev %p", elt, elt_next, elt_prev); + } + if (__improbable(elt_next->prev != elt || elt_prev->next != elt)) { + panic("Invalid queue element linkage for %p: next %p next->prev %p prev %p prev->next %p", + elt, elt_next, elt_next->prev, elt_prev, elt_prev->next); + } +} + +static inline void __DEQUEUE_ELT_CLEANUP(queue_entry_t elt) { + (elt)->next = (queue_entry_t) 0; + (elt)->prev = (queue_entry_t) 0; +} #else +#define __QUEUE_ELT_VALIDATE(elt) do { } while (0) #define __DEQUEUE_ELT_CLEANUP(elt) do { } while(0) #endif /* !XNU_KERNEL_PRIVATE */ @@ -166,9 +152,13 @@ enqueue_head( queue_t que, queue_entry_t elt) { - elt->next = que->next; + queue_entry_t old_head; + + __QUEUE_ELT_VALIDATE((queue_entry_t)que); + old_head = que->next; + elt->next = old_head; elt->prev = que; - elt->next->prev = elt; + old_head->prev = elt; que->next = elt; } @@ -177,9 +167,13 @@ enqueue_tail( queue_t que, queue_entry_t elt) { + queue_entry_t old_tail; + + __QUEUE_ELT_VALIDATE((queue_entry_t)que); + old_tail = que->prev; elt->next = que; - elt->prev = que->prev; - elt->prev->next = elt; + elt->prev = old_tail; + old_tail->next = elt; que->prev = elt; } @@ -187,12 +181,15 @@ static __inline__ queue_entry_t dequeue_head( queue_t que) { - register queue_entry_t elt = (queue_entry_t) 0; + queue_entry_t elt = (queue_entry_t) 0; + queue_entry_t new_head; if (que->next != que) { elt = que->next; - elt->next->prev = que; - que->next = elt->next; + __QUEUE_ELT_VALIDATE(elt); + new_head = elt->next; /* new_head may point to que if elt was the only element */ + new_head->prev = que; + que->next = new_head; __DEQUEUE_ELT_CLEANUP(elt); } @@ -203,12 +200,15 @@ static __inline__ queue_entry_t dequeue_tail( queue_t que) { - register queue_entry_t elt = (queue_entry_t) 0; + queue_entry_t elt = (queue_entry_t) 0; + queue_entry_t new_tail; if (que->prev != que) { elt = que->prev; - elt->prev->next = que; - que->prev = elt->prev; + __QUEUE_ELT_VALIDATE(elt); + new_tail = elt->prev; /* new_tail may point to queue if elt was the only element */ + new_tail->next = que; + que->prev = new_tail; __DEQUEUE_ELT_CLEANUP(elt); } @@ -219,8 +219,13 @@ static __inline__ void remqueue( queue_entry_t elt) { - elt->next->prev = elt->prev; - elt->prev->next = elt->next; + queue_entry_t next_elt, prev_elt; + + __QUEUE_ELT_VALIDATE(elt); + next_elt = elt->next; + prev_elt = elt->prev; /* next_elt may equal prev_elt (and the queue head) if elt was the only element */ + next_elt->prev = prev_elt; + prev_elt->next = next_elt; __DEQUEUE_ELT_CLEANUP(elt); } @@ -229,23 +234,30 @@ insque( queue_entry_t entry, queue_entry_t pred) { - entry->next = pred->next; + queue_entry_t successor; + + __QUEUE_ELT_VALIDATE(pred); + successor = pred->next; + entry->next = successor; entry->prev = pred; - (pred->next)->prev = entry; + successor->prev = entry; pred->next = entry; } static __inline__ void remque( - register queue_entry_t elt) + queue_entry_t elt) { - (elt->next)->prev = elt->prev; - (elt->prev)->next = elt->next; + queue_entry_t next_elt, prev_elt; + + __QUEUE_ELT_VALIDATE(elt); + next_elt = elt->next; + prev_elt = elt->prev; /* next_elt may equal prev_elt (and the queue head) if elt was the only element */ + next_elt->prev = prev_elt; + prev_elt->next = next_elt; __DEQUEUE_ELT_CLEANUP(elt); } -#endif /* !__GNUC__ */ - /* * Macro: queue_init * Function: @@ -343,7 +355,7 @@ MACRO_END */ #define queue_enter(head, elt, type, field) \ MACRO_BEGIN \ - register queue_entry_t __prev; \ + queue_entry_t __prev; \ \ __prev = (head)->prev; \ if ((head) == __prev) { \ @@ -371,7 +383,7 @@ MACRO_END */ #define queue_enter_first(head, elt, type, field) \ MACRO_BEGIN \ - register queue_entry_t __next; \ + queue_entry_t __next; \ \ __next = (head)->next; \ if ((head) == __next) { \ @@ -400,7 +412,7 @@ MACRO_END */ #define queue_insert_before(head, elt, cur, type, field) \ MACRO_BEGIN \ - register queue_entry_t __prev; \ + queue_entry_t __prev; \ \ if ((head) == (queue_entry_t)(cur)) { \ (elt)->field.next = (head); \ @@ -442,7 +454,7 @@ MACRO_END */ #define queue_insert_after(head, elt, cur, type, field) \ MACRO_BEGIN \ - register queue_entry_t __next; \ + queue_entry_t __next; \ \ if ((head) == (queue_entry_t)(cur)) { \ (elt)->field.prev = (head); \ @@ -489,7 +501,7 @@ MACRO_END */ #define queue_remove(head, elt, type, field) \ MACRO_BEGIN \ - register queue_entry_t __next, __prev; \ + queue_entry_t __next, __prev; \ \ __next = (elt)->field.next; \ __prev = (elt)->field.prev; \ @@ -519,7 +531,7 @@ MACRO_END */ #define queue_remove_first(head, entry, type, field) \ MACRO_BEGIN \ - register queue_entry_t __next; \ + queue_entry_t __next; \ \ (entry) = (type)(void *) ((head)->next); \ __next = (entry)->field.next; \ @@ -545,7 +557,7 @@ MACRO_END */ #define queue_remove_last(head, entry, type, field) \ MACRO_BEGIN \ - register queue_entry_t __prev; \ + queue_entry_t __prev; \ \ (entry) = (type)(void *) ((head)->prev); \ __prev = (entry)->field.prev; \ @@ -614,7 +626,7 @@ MACRO_END #ifdef MACH_KERNEL_PRIVATE -#include +#include /*----------------------------------------------------------------*/ /* @@ -681,4 +693,6 @@ MACRO_END #endif /* MACH_KERNEL_PRIVATE */ +__END_DECLS + #endif /* _KERN_QUEUE_H_ */ diff --git a/osfmk/kern/sched.h b/osfmk/kern/sched.h index 319da1fe0..980891362 100644 --- a/osfmk/kern/sched.h +++ b/osfmk/kern/sched.h @@ -70,7 +70,6 @@ #include #include #include -#include #include #include #include @@ -155,12 +154,15 @@ #define BASEPRI_CONTROL (BASEPRI_DEFAULT + 17) /* 48 */ #define BASEPRI_FOREGROUND (BASEPRI_DEFAULT + 16) /* 47 */ #define BASEPRI_BACKGROUND (BASEPRI_DEFAULT + 15) /* 46 */ +#define BASEPRI_USER_INITIATED (BASEPRI_DEFAULT + 6) /* 37 */ #define BASEPRI_DEFAULT (MAXPRI_USER - (NRQS / 4)) /* 31 */ #define MAXPRI_SUPPRESSED (BASEPRI_DEFAULT - 3) /* 28 */ +#define BASEPRI_UTILITY (BASEPRI_DEFAULT - 11) /* 20 */ #define MAXPRI_THROTTLE (MINPRI + 4) /* 4 */ #define MINPRI_USER MINPRI /* 0 */ #define DEPRESSPRI MINPRI /* depress priority */ +#define MAXPRI_PROMOTE (MAXPRI_KERNEL) /* ceiling for mutex promotion */ /* Type used for thread->sched_mode and saved_mode */ typedef enum { @@ -181,7 +183,7 @@ struct runq_stats { uint64_t last_change_timestamp; }; -#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) struct run_queue { int highq; /* highest runnable queue */ @@ -193,7 +195,7 @@ struct run_queue { struct runq_stats runq_stats; }; -#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_FIXEDPRIORITY) */ +#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) */ struct rt_queue { int count; /* # of threads total */ @@ -202,14 +204,14 @@ struct rt_queue { struct runq_stats runq_stats; }; -#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_FAIRSHARE_CORE) struct fairshare_queue { int count; /* # of threads total */ queue_head_t queue; /* all runnable threads demoted to fairshare scheduling */ struct runq_stats runq_stats; }; -#endif +#endif /* CONFIG_SCHED_FAIRSHARE_CORE */ #if defined(CONFIG_SCHED_GRRR_CORE) @@ -261,6 +263,14 @@ struct grrr_run_queue { extern struct rt_queue rt_runq; +#if defined(CONFIG_SCHED_MULTIQ) +sched_group_t sched_group_create(void); +void sched_group_destroy(sched_group_t sched_group); + +extern boolean_t sched_groups_enabled; + +#endif /* defined(CONFIG_SCHED_MULTIQ) */ + /* * Scheduler routines. */ @@ -271,12 +281,13 @@ extern void thread_quantum_expire( timer_call_param_t thread); /* Context switch check for current processor */ -extern ast_t csw_check(processor_t processor); +extern ast_t csw_check(processor_t processor, + ast_t check_reason); -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) extern uint32_t std_quantum, min_std_quantum; extern uint32_t std_quantum_us; -#endif +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ extern uint32_t thread_depress_time; extern uint32_t default_timeshare_computation; @@ -287,7 +298,7 @@ extern uint32_t max_rt_quantum, min_rt_quantum; extern int default_preemption_rate; extern int default_bg_preemption_rate; -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) /* * Age usage at approximately (1 << SCHED_TICK_SHIFT) times per second @@ -300,7 +311,7 @@ extern int default_bg_preemption_rate; extern unsigned sched_tick; extern uint32_t sched_tick_interval; -#endif /* CONFIG_SCHED_TRADITIONAL */ +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ extern uint64_t sched_one_second_interval; @@ -329,7 +340,7 @@ extern void compute_pmap_gc_throttle( * Conversion factor from usage * to priority. */ -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) extern uint32_t sched_pri_shift; extern uint32_t sched_background_pri_shift; extern uint32_t sched_combined_fgbg_pri_shift; @@ -338,7 +349,7 @@ extern int8_t sched_load_shifts[NRQS]; extern uint32_t sched_decay_usage_age_factor; extern uint32_t sched_use_combined_fgbg_decay; void sched_traditional_consider_maintenance(uint64_t); -#endif +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ extern int32_t sched_poll_yield_shift; extern uint64_t sched_safe_duration; @@ -351,36 +362,50 @@ extern uint32_t avenrun[3], mach_factor[3]; extern uint64_t max_unsafe_computation; extern uint64_t max_poll_computation; -#define sched_run_incr() \ -MACRO_BEGIN \ - hw_atomic_add(&sched_run_count, 1); \ -MACRO_END +/* TH_RUN & !TH_IDLE controls whether a thread has a run count */ +#define sched_run_incr(th) \ + hw_atomic_add(&sched_run_count, 1) \ + +#define sched_run_decr(th) \ + hw_atomic_sub(&sched_run_count, 1) \ + +#if MACH_ASSERT +extern void sched_share_incr(thread_t thread); +extern void sched_share_decr(thread_t thread); +extern void sched_background_incr(thread_t thread); +extern void sched_background_decr(thread_t thread); -#define sched_run_decr() \ -MACRO_BEGIN \ - hw_atomic_sub(&sched_run_count, 1); \ +extern void assert_thread_sched_count(thread_t thread); + +#else /* MACH_ASSERT */ +/* sched_mode == TH_MODE_TIMESHARE controls whether a thread has a timeshare count when it has a run count */ +#define sched_share_incr(th) \ +MACRO_BEGIN \ + (void)hw_atomic_add(&sched_share_count, 1); \ MACRO_END -#define sched_share_incr() \ -MACRO_BEGIN \ - (void)hw_atomic_add(&sched_share_count, 1); \ +#define sched_share_decr(th) \ +MACRO_BEGIN \ + (void)hw_atomic_sub(&sched_share_count, 1); \ MACRO_END -#define sched_share_decr() \ -MACRO_BEGIN \ - (void)hw_atomic_sub(&sched_share_count, 1); \ +/* TH_SFLAG_THROTTLED controls whether a thread has a background count when it has a run count and a share count */ +#define sched_background_incr(th) \ +MACRO_BEGIN \ + hw_atomic_add(&sched_background_count, 1); \ MACRO_END -#define sched_background_incr() \ -MACRO_BEGIN \ - (void)hw_atomic_add(&sched_background_count, 1); \ +#define sched_background_decr(th) \ +MACRO_BEGIN \ + hw_atomic_sub(&sched_background_count, 1); \ MACRO_END -#define sched_background_decr() \ -MACRO_BEGIN \ - (void)hw_atomic_sub(&sched_background_count, 1); \ +#define assert_thread_sched_count(th) \ +MACRO_BEGIN \ MACRO_END +#endif /* !MACH_ASSERT */ + /* * thread_timer_delta macro takes care of both thread timers. */ diff --git a/osfmk/kern/sched_dualq.c b/osfmk/kern/sched_dualq.c new file mode 100644 index 000000000..628ee743e --- /dev/null +++ b/osfmk/kern/sched_dualq.c @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void +sched_dualq_init(void); + +static thread_t +sched_dualq_steal_thread(processor_set_t pset); + +static void +sched_dualq_thread_update_scan(void); + +static boolean_t +sched_dualq_processor_enqueue(processor_t processor, thread_t thread, integer_t options); + +static boolean_t +sched_dualq_processor_queue_remove(processor_t processor, thread_t thread); + +static ast_t +sched_dualq_processor_csw_check(processor_t processor); + +static boolean_t +sched_dualq_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte); + +static int +sched_dualq_runq_count(processor_t processor); + +static boolean_t +sched_dualq_processor_queue_empty(processor_t processor); + +static uint64_t +sched_dualq_runq_stats_count_sum(processor_t processor); + +static int +sched_dualq_processor_bound_count(processor_t processor); + +static void +sched_dualq_pset_init(processor_set_t pset); + +static void +sched_dualq_processor_init(processor_t processor); + +static thread_t +sched_dualq_choose_thread(processor_t processor, int priority, ast_t reason); + +static void +sched_dualq_processor_queue_shutdown(processor_t processor); + +static sched_mode_t +sched_dualq_initial_thread_sched_mode(task_t parent_task); + +static boolean_t +sched_dualq_should_current_thread_rechoose_processor(processor_t processor); + +const struct sched_dispatch_table sched_dualq_dispatch = { + .init = sched_dualq_init, + .timebase_init = sched_traditional_timebase_init, + .processor_init = sched_dualq_processor_init, + .pset_init = sched_dualq_pset_init, + .maintenance_continuation = sched_traditional_maintenance_continue, + .choose_thread = sched_dualq_choose_thread, + .steal_thread = sched_dualq_steal_thread, + .compute_priority = compute_priority, + .choose_processor = choose_processor, + .processor_enqueue = sched_dualq_processor_enqueue, + .processor_queue_shutdown = sched_dualq_processor_queue_shutdown, + .processor_queue_remove = sched_dualq_processor_queue_remove, + .processor_queue_empty = sched_dualq_processor_queue_empty, + .priority_is_urgent = priority_is_urgent, + .processor_csw_check = sched_dualq_processor_csw_check, + .processor_queue_has_priority = sched_dualq_processor_queue_has_priority, + .initial_quantum_size = sched_traditional_initial_quantum_size, + .initial_thread_sched_mode = sched_dualq_initial_thread_sched_mode, + .can_update_priority = can_update_priority, + .update_priority = update_priority, + .lightweight_update_priority = lightweight_update_priority, + .quantum_expire = sched_traditional_quantum_expire, + .should_current_thread_rechoose_processor = sched_dualq_should_current_thread_rechoose_processor, + .processor_runq_count = sched_dualq_runq_count, + .processor_runq_stats_count_sum = sched_dualq_runq_stats_count_sum, + .fairshare_init = sched_traditional_fairshare_init, + .fairshare_runq_count = sched_traditional_fairshare_runq_count, + .fairshare_runq_stats_count_sum = sched_traditional_fairshare_runq_stats_count_sum, + .fairshare_enqueue = sched_traditional_fairshare_enqueue, + .fairshare_dequeue = sched_traditional_fairshare_dequeue, + .fairshare_queue_remove = sched_traditional_fairshare_queue_remove, + .processor_bound_count = sched_dualq_processor_bound_count, + .thread_update_scan = sched_dualq_thread_update_scan, + .direct_dispatch_to_idle_processors = FALSE, +}; + +__attribute__((always_inline)) +static inline run_queue_t dualq_main_runq(processor_t processor) +{ + return &processor->processor_set->pset_runq; +} + +__attribute__((always_inline)) +static inline run_queue_t dualq_bound_runq(processor_t processor) +{ + return &processor->runq; +} + +__attribute__((always_inline)) +static inline run_queue_t dualq_runq_for_thread(processor_t processor, thread_t thread) +{ + if (thread->bound_processor == PROCESSOR_NULL) { + return dualq_main_runq(processor); + } else { + assert(thread->bound_processor == processor); + return dualq_bound_runq(processor); + } +} + +static sched_mode_t +sched_dualq_initial_thread_sched_mode(task_t parent_task) +{ + if (parent_task == kernel_task) + return TH_MODE_FIXED; + else + return TH_MODE_TIMESHARE; +} + +static void +sched_dualq_processor_init(processor_t processor) +{ + run_queue_init(&processor->runq); +} + +static void +sched_dualq_pset_init(processor_set_t pset) +{ + run_queue_init(&pset->pset_runq); +} + +static void +sched_dualq_init(void) +{ + sched_traditional_init(); +} + +static thread_t +sched_dualq_choose_thread( + processor_t processor, + int priority, + __unused ast_t reason) +{ + run_queue_t main_runq = dualq_main_runq(processor); + run_queue_t bound_runq = dualq_bound_runq(processor); + run_queue_t chosen_runq; + + if (bound_runq->highq < priority && + main_runq->highq < priority) + return THREAD_NULL; + + if (bound_runq->count && main_runq->count) { + if (bound_runq->highq >= main_runq->highq) { + chosen_runq = bound_runq; + } else { + chosen_runq = main_runq; + } + } else if (bound_runq->count) { + chosen_runq = bound_runq; + } else if (main_runq->count) { + chosen_runq = main_runq; + } else { + return (THREAD_NULL); + } + + return run_queue_dequeue(chosen_runq, SCHED_HEADQ); +} + +static boolean_t +sched_dualq_processor_enqueue( + processor_t processor, + thread_t thread, + integer_t options) +{ + run_queue_t rq = dualq_runq_for_thread(processor, thread); + boolean_t result; + + result = run_queue_enqueue(rq, thread, options); + thread->runq = processor; + + return (result); +} + +static boolean_t +sched_dualq_processor_queue_empty(processor_t processor) +{ + return dualq_main_runq(processor)->count == 0 && + dualq_bound_runq(processor)->count == 0; +} + +static ast_t +sched_dualq_processor_csw_check(processor_t processor) +{ + boolean_t has_higher; + int pri; + + run_queue_t main_runq = dualq_main_runq(processor); + run_queue_t bound_runq = dualq_bound_runq(processor); + + assert(processor->active_thread != NULL); + + pri = MAX(main_runq->highq, bound_runq->highq); + + if (first_timeslice(processor)) { + has_higher = (pri > processor->current_pri); + } else { + has_higher = (pri >= processor->current_pri); + } + + if (has_higher) { + if (main_runq->urgency > 0) + return (AST_PREEMPT | AST_URGENT); + + if (bound_runq->urgency > 0) + return (AST_PREEMPT | AST_URGENT); + + if (processor->active_thread && thread_eager_preemption(processor->active_thread)) + return (AST_PREEMPT | AST_URGENT); + + return AST_PREEMPT; + } + + return AST_NONE; +} + +static boolean_t +sched_dualq_processor_queue_has_priority(processor_t processor, + int priority, + boolean_t gte) +{ + int qpri = MAX(dualq_main_runq(processor)->highq, dualq_bound_runq(processor)->highq); + + if (gte) + return qpri >= priority; + else + return qpri > priority; +} + +static boolean_t +sched_dualq_should_current_thread_rechoose_processor(processor_t processor) +{ + return (processor->current_pri < BASEPRI_RTQUEUES && processor->processor_primary != processor); +} + +static int +sched_dualq_runq_count(processor_t processor) +{ + return dualq_main_runq(processor)->count + dualq_bound_runq(processor)->count; +} + +static uint64_t +sched_dualq_runq_stats_count_sum(processor_t processor) +{ + uint64_t bound_sum = dualq_bound_runq(processor)->runq_stats.count_sum; + + if (processor->cpu_id == processor->processor_set->cpu_set_low) + return bound_sum + dualq_main_runq(processor)->runq_stats.count_sum; + else + return bound_sum; +} +static int +sched_dualq_processor_bound_count(processor_t processor) +{ + return dualq_bound_runq(processor)->count; +} + +static void +sched_dualq_processor_queue_shutdown(processor_t processor) +{ + processor_set_t pset = processor->processor_set; + run_queue_t rq = dualq_main_runq(processor); + thread_t thread; + queue_head_t tqueue; + + /* We only need to migrate threads if this is the last active processor in the pset */ + if (pset->online_processor_count > 0) { + pset_unlock(pset); + return; + } + + queue_init(&tqueue); + + while (rq->count > 0) { + thread = run_queue_dequeue(rq, SCHED_HEADQ); + enqueue_tail(&tqueue, (queue_entry_t)thread); + } + + pset_unlock(pset); + + while ((thread = (thread_t)(void*)dequeue_head(&tqueue)) != THREAD_NULL) { + thread_lock(thread); + + thread_setrun(thread, SCHED_TAILQ); + + thread_unlock(thread); + } +} + +static boolean_t +sched_dualq_processor_queue_remove( + processor_t processor, + thread_t thread) +{ + run_queue_t rq; + processor_set_t pset = processor->processor_set; + + pset_lock(pset); + + rq = dualq_runq_for_thread(processor, thread); + + if (processor == thread->runq) { + /* + * Thread is on a run queue and we have a lock on + * that run queue. + */ + run_queue_remove(rq, thread); + } + else { + /* + * The thread left the run queue before we could + * lock the run queue. + */ + assert(thread->runq == PROCESSOR_NULL); + processor = PROCESSOR_NULL; + } + + pset_unlock(pset); + + return (processor != PROCESSOR_NULL); +} + +static thread_t +sched_dualq_steal_thread(processor_set_t pset) +{ + processor_set_t nset, cset = pset; + thread_t thread; + + do { + if (cset->pset_runq.count > 0) { + thread = run_queue_dequeue(&cset->pset_runq, SCHED_HEADQ); + pset_unlock(cset); + return (thread); + } + + nset = next_pset(cset); + + if (nset != pset) { + pset_unlock(cset); + + cset = nset; + pset_lock(cset); + } + } while (nset != pset); + + pset_unlock(cset); + + return (THREAD_NULL); +} + +static void +sched_dualq_thread_update_scan(void) +{ + boolean_t restart_needed = FALSE; + processor_t processor = processor_list; + processor_set_t pset; + thread_t thread; + spl_t s; + + /* + * We update the threads associated with each processor (bound and idle threads) + * and then update the threads in each pset runqueue. + */ + + do { + do { + pset = processor->processor_set; + + s = splsched(); + pset_lock(pset); + + restart_needed = runq_scan(dualq_bound_runq(processor)); + + pset_unlock(pset); + splx(s); + + if (restart_needed) + break; + + thread = processor->idle_thread; + if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) { + if (thread_update_add_thread(thread) == FALSE) { + restart_needed = TRUE; + break; + } + } + } while ((processor = processor->processor_list) != NULL); + + /* Ok, we now have a collection of candidates -- fix them. */ + thread_update_process_threads(); + + } while (restart_needed); + + pset = &pset0; + + do { + do { + s = splsched(); + pset_lock(pset); + + restart_needed = runq_scan(&pset->pset_runq); + + pset_unlock(pset); + splx(s); + + if (restart_needed) + break; + } while ((pset = pset->pset_list) != NULL); + + /* Ok, we now have a collection of candidates -- fix them. */ + thread_update_process_threads(); + + } while (restart_needed); +} + + diff --git a/osfmk/kern/sched_fixedpriority.c b/osfmk/kern/sched_fixedpriority.c deleted file mode 100644 index 315c46d73..000000000 --- a/osfmk/kern/sched_fixedpriority.c +++ /dev/null @@ -1,734 +0,0 @@ -/* - * Copyright (c) 2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include - -static void -sched_fixedpriority_init(void); - -static void -sched_fixedpriority_with_pset_runqueue_init(void); - -static void -sched_fixedpriority_timebase_init(void); - -static void -sched_fixedpriority_processor_init(processor_t processor); - -static void -sched_fixedpriority_pset_init(processor_set_t pset); - -static void -sched_fixedpriority_maintenance_continuation(void); - -static thread_t -sched_fixedpriority_choose_thread(processor_t processor, - int priority); - -static thread_t -sched_fixedpriority_steal_thread(processor_set_t pset); - -static void -sched_fixedpriority_compute_priority(thread_t thread, - boolean_t override_depress); - -static processor_t -sched_fixedpriority_choose_processor( processor_set_t pset, - processor_t processor, - thread_t thread); - - -static boolean_t -sched_fixedpriority_processor_enqueue( - processor_t processor, - thread_t thread, - integer_t options); - -static void -sched_fixedpriority_processor_queue_shutdown( - processor_t processor); - -static boolean_t -sched_fixedpriority_processor_queue_remove( - processor_t processor, - thread_t thread); - -static boolean_t -sched_fixedpriority_processor_queue_empty(processor_t processor); - -static boolean_t -sched_fixedpriority_processor_queue_has_priority(processor_t processor, - int priority, - boolean_t gte); - -static boolean_t -sched_fixedpriority_priority_is_urgent(int priority); - -static ast_t -sched_fixedpriority_processor_csw_check(processor_t processor); - -static uint32_t -sched_fixedpriority_initial_quantum_size(thread_t thread); - -static sched_mode_t -sched_fixedpriority_initial_thread_sched_mode(task_t parent_task); - -static boolean_t -sched_fixedpriority_supports_timeshare_mode(void); - -static boolean_t -sched_fixedpriority_can_update_priority(thread_t thread); - -static void -sched_fixedpriority_update_priority(thread_t thread); - -static void -sched_fixedpriority_lightweight_update_priority(thread_t thread); - -static void -sched_fixedpriority_quantum_expire(thread_t thread); - -static boolean_t -sched_fixedpriority_should_current_thread_rechoose_processor(processor_t processor); - -static int -sched_fixedpriority_processor_runq_count(processor_t processor); - -static uint64_t -sched_fixedpriority_processor_runq_stats_count_sum(processor_t processor); - -const struct sched_dispatch_table sched_fixedpriority_dispatch = { - sched_fixedpriority_init, - sched_fixedpriority_timebase_init, - sched_fixedpriority_processor_init, - sched_fixedpriority_pset_init, - sched_fixedpriority_maintenance_continuation, - sched_fixedpriority_choose_thread, - sched_fixedpriority_steal_thread, - sched_fixedpriority_compute_priority, - sched_fixedpriority_choose_processor, - sched_fixedpriority_processor_enqueue, - sched_fixedpriority_processor_queue_shutdown, - sched_fixedpriority_processor_queue_remove, - sched_fixedpriority_processor_queue_empty, - sched_fixedpriority_priority_is_urgent, - sched_fixedpriority_processor_csw_check, - sched_fixedpriority_processor_queue_has_priority, - sched_fixedpriority_initial_quantum_size, - sched_fixedpriority_initial_thread_sched_mode, - sched_fixedpriority_supports_timeshare_mode, - sched_fixedpriority_can_update_priority, - sched_fixedpriority_update_priority, - sched_fixedpriority_lightweight_update_priority, - sched_fixedpriority_quantum_expire, - sched_fixedpriority_should_current_thread_rechoose_processor, - sched_fixedpriority_processor_runq_count, - sched_fixedpriority_processor_runq_stats_count_sum, - sched_traditional_fairshare_init, - sched_traditional_fairshare_runq_count, - sched_traditional_fairshare_runq_stats_count_sum, - sched_traditional_fairshare_enqueue, - sched_traditional_fairshare_dequeue, - sched_traditional_fairshare_queue_remove, - TRUE /* direct_dispatch_to_idle_processors */ -}; - -const struct sched_dispatch_table sched_fixedpriority_with_pset_runqueue_dispatch = { - sched_fixedpriority_with_pset_runqueue_init, - sched_fixedpriority_timebase_init, - sched_fixedpriority_processor_init, - sched_fixedpriority_pset_init, - sched_fixedpriority_maintenance_continuation, - sched_fixedpriority_choose_thread, - sched_fixedpriority_steal_thread, - sched_fixedpriority_compute_priority, - sched_fixedpriority_choose_processor, - sched_fixedpriority_processor_enqueue, - sched_fixedpriority_processor_queue_shutdown, - sched_fixedpriority_processor_queue_remove, - sched_fixedpriority_processor_queue_empty, - sched_fixedpriority_priority_is_urgent, - sched_fixedpriority_processor_csw_check, - sched_fixedpriority_processor_queue_has_priority, - sched_fixedpriority_initial_quantum_size, - sched_fixedpriority_initial_thread_sched_mode, - sched_fixedpriority_supports_timeshare_mode, - sched_fixedpriority_can_update_priority, - sched_fixedpriority_update_priority, - sched_fixedpriority_lightweight_update_priority, - sched_fixedpriority_quantum_expire, - sched_fixedpriority_should_current_thread_rechoose_processor, - sched_fixedpriority_processor_runq_count, - sched_fixedpriority_processor_runq_stats_count_sum, - sched_traditional_fairshare_init, - sched_traditional_fairshare_runq_count, - sched_traditional_fairshare_runq_stats_count_sum, - sched_traditional_fairshare_enqueue, - sched_traditional_fairshare_dequeue, - sched_traditional_fairshare_queue_remove, - FALSE /* direct_dispatch_to_idle_processors */ -}; - -extern int max_unsafe_quanta; - -#define SCHED_FIXEDPRIORITY_DEFAULT_QUANTUM 5 /* in ms */ -static uint32_t sched_fixedpriority_quantum_ms = SCHED_FIXEDPRIORITY_DEFAULT_QUANTUM; -static uint32_t sched_fixedpriority_quantum; - -#define SCHED_FIXEDPRIORITY_DEFAULT_FAIRSHARE_MINIMUM_BLOCK_TIME 100 /* ms */ -static uint32_t fairshare_minimum_blocked_time_ms = SCHED_FIXEDPRIORITY_DEFAULT_FAIRSHARE_MINIMUM_BLOCK_TIME; -static uint32_t fairshare_minimum_blocked_time; - -static uint32_t sched_fixedpriority_tick; -static uint64_t sched_fixedpriority_tick_deadline; -extern uint32_t grrr_rescale_tick; - -static boolean_t sched_fixedpriority_use_pset_runqueue = FALSE; - -__attribute__((always_inline)) -static inline run_queue_t runq_for_processor(processor_t processor) -{ - if (sched_fixedpriority_use_pset_runqueue) - return &processor->processor_set->pset_runq; - else - return &processor->runq; -} - -__attribute__((always_inline)) -static inline void runq_consider_incr_bound_count(processor_t processor, thread_t thread) -{ - if (thread->bound_processor == PROCESSOR_NULL) - return; - - assert(thread->bound_processor == processor); - - if (sched_fixedpriority_use_pset_runqueue) - processor->processor_set->pset_runq_bound_count++; - - processor->runq_bound_count++; -} - -__attribute__((always_inline)) -static inline void runq_consider_decr_bound_count(processor_t processor, thread_t thread) -{ - if (thread->bound_processor == PROCESSOR_NULL) - return; - - assert(thread->bound_processor == processor); - - if (sched_fixedpriority_use_pset_runqueue) - processor->processor_set->pset_runq_bound_count--; - - processor->runq_bound_count--; -} - -static void -sched_fixedpriority_init(void) -{ - if (!PE_parse_boot_argn("fixedpriority_quantum", &sched_fixedpriority_quantum_ms, sizeof (sched_fixedpriority_quantum_ms))) { - sched_fixedpriority_quantum_ms = SCHED_FIXEDPRIORITY_DEFAULT_QUANTUM; - } - - if (sched_fixedpriority_quantum_ms < 1) - sched_fixedpriority_quantum_ms = SCHED_FIXEDPRIORITY_DEFAULT_QUANTUM; - - printf("standard fixed priority timeslicing quantum is %u ms\n", sched_fixedpriority_quantum_ms); -} - -static void -sched_fixedpriority_with_pset_runqueue_init(void) -{ - sched_fixedpriority_init(); - sched_fixedpriority_use_pset_runqueue = TRUE; -} - -static void -sched_fixedpriority_timebase_init(void) -{ - uint64_t abstime; - - /* standard timeslicing quantum */ - clock_interval_to_absolutetime_interval( - sched_fixedpriority_quantum_ms, NSEC_PER_MSEC, &abstime); - assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); - sched_fixedpriority_quantum = (uint32_t)abstime; - - thread_depress_time = 1 * sched_fixedpriority_quantum; - default_timeshare_computation = sched_fixedpriority_quantum / 2; - default_timeshare_constraint = sched_fixedpriority_quantum; - - max_unsafe_computation = max_unsafe_quanta * sched_fixedpriority_quantum; - sched_safe_duration = 2 * max_unsafe_quanta * sched_fixedpriority_quantum; - - if (!PE_parse_boot_argn("fairshare_minblockedtime", &fairshare_minimum_blocked_time_ms, sizeof (fairshare_minimum_blocked_time_ms))) { - fairshare_minimum_blocked_time_ms = SCHED_FIXEDPRIORITY_DEFAULT_FAIRSHARE_MINIMUM_BLOCK_TIME; - } - - clock_interval_to_absolutetime_interval( - fairshare_minimum_blocked_time_ms, NSEC_PER_MSEC, &abstime); - - assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); - fairshare_minimum_blocked_time = (uint32_t)abstime; -} - -static void -sched_fixedpriority_processor_init(processor_t processor) -{ - if (!sched_fixedpriority_use_pset_runqueue) { - run_queue_init(&processor->runq); - } - processor->runq_bound_count = 0; -} - -static void -sched_fixedpriority_pset_init(processor_set_t pset) -{ - if (sched_fixedpriority_use_pset_runqueue) { - run_queue_init(&pset->pset_runq); - } - pset->pset_runq_bound_count = 0; -} - - -static void -sched_fixedpriority_maintenance_continuation(void) -{ - uint64_t abstime = mach_absolute_time(); - - sched_fixedpriority_tick++; - grrr_rescale_tick++; - - /* - * Compute various averages. - */ - compute_averages(1); - - if (sched_fixedpriority_tick_deadline == 0) - sched_fixedpriority_tick_deadline = abstime; - - clock_deadline_for_periodic_event(10*sched_one_second_interval, abstime, - &sched_fixedpriority_tick_deadline); - - assert_wait_deadline((event_t)sched_fixedpriority_maintenance_continuation, THREAD_UNINT, sched_fixedpriority_tick_deadline); - thread_block((thread_continue_t)sched_fixedpriority_maintenance_continuation); - /*NOTREACHED*/ -} - - -static thread_t -sched_fixedpriority_choose_thread(processor_t processor, - int priority) -{ - thread_t thread; - - thread = choose_thread(processor, runq_for_processor(processor), priority); - if (thread != THREAD_NULL) { - runq_consider_decr_bound_count(processor, thread); - } - - return thread; -} - -static thread_t -sched_fixedpriority_steal_thread(processor_set_t pset) -{ - pset_unlock(pset); - - return (THREAD_NULL); - -} - -static void -sched_fixedpriority_compute_priority(thread_t thread, - boolean_t override_depress) -{ - /* Reset current priority to base priority */ - if ( !(thread->sched_flags & TH_SFLAG_PROMOTED) && - (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) || - override_depress ) ) { - set_sched_pri(thread, thread->priority); - } -} - -static processor_t -sched_fixedpriority_choose_processor( processor_set_t pset, - processor_t processor, - thread_t thread) -{ - return choose_processor(pset, processor, thread); -} -static boolean_t -sched_fixedpriority_processor_enqueue( - processor_t processor, - thread_t thread, - integer_t options) -{ - run_queue_t rq = runq_for_processor(processor); - boolean_t result; - - result = run_queue_enqueue(rq, thread, options); - thread->runq = processor; - runq_consider_incr_bound_count(processor, thread); - - return (result); -} - -static void -sched_fixedpriority_processor_queue_shutdown( - processor_t processor) -{ - processor_set_t pset = processor->processor_set; - thread_t thread; - queue_head_t tqueue, bqueue; - - queue_init(&tqueue); - queue_init(&bqueue); - - while ((thread = sched_fixedpriority_choose_thread(processor, IDLEPRI)) != THREAD_NULL) { - if (thread->bound_processor == PROCESSOR_NULL) { - enqueue_tail(&tqueue, (queue_entry_t)thread); - } else { - enqueue_tail(&bqueue, (queue_entry_t)thread); - } - } - - while ((thread = (thread_t)(void *)dequeue_head(&bqueue)) != THREAD_NULL) { - sched_fixedpriority_processor_enqueue(processor, thread, SCHED_TAILQ); - } - - pset_unlock(pset); - - while ((thread = (thread_t)(void *)dequeue_head(&tqueue)) != THREAD_NULL) { - thread_lock(thread); - - thread_setrun(thread, SCHED_TAILQ); - - thread_unlock(thread); - } -} - -static boolean_t -sched_fixedpriority_processor_queue_remove( - processor_t processor, - thread_t thread) -{ - void * rqlock; - run_queue_t rq; - - rqlock = &processor->processor_set->sched_lock; - rq = runq_for_processor(processor); - - simple_lock(rqlock); - if (processor == thread->runq) { - /* - * Thread is on a run queue and we have a lock on - * that run queue. - */ - runq_consider_decr_bound_count(processor, thread); - run_queue_remove(rq, thread); - } - else { - /* - * The thread left the run queue before we could - * lock the run queue. - */ - assert(thread->runq == PROCESSOR_NULL); - processor = PROCESSOR_NULL; - } - - simple_unlock(rqlock); - - return (processor != PROCESSOR_NULL); -} - -static boolean_t -sched_fixedpriority_processor_queue_empty(processor_t processor) -{ - /* - * See sched_traditional_with_pset_runqueue_processor_queue_empty - * for algorithm - */ - int count = runq_for_processor(processor)->count; - - if (sched_fixedpriority_use_pset_runqueue) { - processor_set_t pset = processor->processor_set; - - count -= pset->pset_runq_bound_count; - count += processor->runq_bound_count; - } - - return count == 0; -} - -static boolean_t -sched_fixedpriority_processor_queue_has_priority(processor_t processor, - int priority, - boolean_t gte) -{ - if (gte) - return runq_for_processor(processor)->highq >= priority; - else - return runq_for_processor(processor)->highq > priority; -} - -/* Implement sched_preempt_pri in code */ -static boolean_t -sched_fixedpriority_priority_is_urgent(int priority) -{ - if (priority <= BASEPRI_FOREGROUND) - return FALSE; - - if (priority < MINPRI_KERNEL) - return TRUE; - - if (priority >= BASEPRI_PREEMPT) - return TRUE; - - return FALSE; -} - -static ast_t -sched_fixedpriority_processor_csw_check(processor_t processor) -{ - run_queue_t runq; - boolean_t has_higher; - - runq = runq_for_processor(processor); - if (first_timeslice(processor)) { - has_higher = (runq->highq > processor->current_pri); - } else { - has_higher = (runq->highq >= processor->current_pri); - } - if (has_higher) { - if (runq->urgency > 0) - return (AST_PREEMPT | AST_URGENT); - - if (processor->active_thread && thread_eager_preemption(processor->active_thread)) - return (AST_PREEMPT | AST_URGENT); - - return AST_PREEMPT; - } else if (processor->current_thmode == TH_MODE_FAIRSHARE) { - if (!sched_fixedpriority_processor_queue_empty(processor)) { - /* Allow queued threads to run if the current thread got demoted to fairshare */ - return (AST_PREEMPT | AST_URGENT); - } else if ((!first_timeslice(processor)) && SCHED(fairshare_runq_count)() > 0) { - /* Allow other fairshare threads to run */ - return AST_PREEMPT | AST_URGENT; - } - } - - return AST_NONE; -} - -static uint32_t -sched_fixedpriority_initial_quantum_size(thread_t thread __unused) -{ - return sched_fixedpriority_quantum; -} - -static sched_mode_t -sched_fixedpriority_initial_thread_sched_mode(task_t parent_task) -{ - if (parent_task == kernel_task) - return TH_MODE_FIXED; - else - return TH_MODE_TIMESHARE; -} - -static boolean_t -sched_fixedpriority_supports_timeshare_mode(void) -{ - return TRUE; -} - -static boolean_t -sched_fixedpriority_can_update_priority(thread_t thread __unused) -{ - return ((thread->sched_flags & TH_SFLAG_PRI_UPDATE) == 0); -} - -static void -sched_fixedpriority_update_priority(thread_t thread) -{ - uint64_t current_time = mach_absolute_time(); - - thread->sched_flags |= TH_SFLAG_PRI_UPDATE; - - if (thread->sched_flags & TH_SFLAG_FAIRSHARE_TRIPPED) { - - /* - * Make sure we've waited fairshare_minimum_blocked_time both from the time - * we were throttled into the fairshare band, and the last time - * we ran. - */ - if (current_time >= thread->last_run_time + fairshare_minimum_blocked_time) { - - boolean_t removed = thread_run_queue_remove(thread); - - thread->sched_flags &= ~TH_SFLAG_FAIRSHARE_TRIPPED; - thread->sched_mode = thread->saved_mode; - thread->saved_mode = TH_MODE_NONE; - - if (removed) - thread_setrun(thread, SCHED_TAILQ); - - KERNEL_DEBUG_CONSTANT1( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_EXIT) | DBG_FUNC_NONE, (uint32_t)(thread->last_run_time & 0xFFFFFFFF), (uint32_t)(thread->last_run_time >> 32), (uint32_t)(current_time & 0xFFFFFFFF), (uint32_t)(current_time >> 32), thread_tid(thread)); - - } - } else if ((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) && (thread->bound_processor == PROCESSOR_NULL)) { - boolean_t removed = thread_run_queue_remove(thread); - - thread->sched_flags |= TH_SFLAG_FAIRSHARE_TRIPPED; - thread->saved_mode = thread->sched_mode; - thread->sched_mode = TH_MODE_FAIRSHARE; - - thread->last_quantum_refill_time = thread->last_run_time - 2 * sched_fixedpriority_quantum - 1; - - if (removed) - thread_setrun(thread, SCHED_TAILQ); - - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_ENTER) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), 0xFFFFFFFF, 0, 0, 0); - - } - - - /* - * Check for fail-safe release. - */ - if ( (thread->sched_flags & TH_SFLAG_FAILSAFE) && - current_time >= thread->safe_release ) { - - - thread->sched_flags &= ~TH_SFLAG_FAILSAFE; - - if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) { - /* Restore to previous */ - - thread->sched_mode = thread->saved_mode; - thread->saved_mode = TH_MODE_NONE; - - if (thread->sched_mode == TH_MODE_REALTIME) { - thread->priority = BASEPRI_RTQUEUES; - - } - - if (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)) - set_sched_pri(thread, thread->priority); - } - } - - thread->sched_flags &= ~TH_SFLAG_PRI_UPDATE; - return; -} - -static void -sched_fixedpriority_lightweight_update_priority(thread_t thread __unused) -{ - return; -} - -static void -sched_fixedpriority_quantum_expire( - thread_t thread) -{ - /* Put thread into fairshare class, core scheduler will manage runqueue */ - if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->task != kernel_task) && !(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) { - uint64_t elapsed = thread->last_run_time - thread->last_quantum_refill_time; - - /* If we managed to use our quantum in less than 2*quantum wall clock time, - * we are considered CPU bound and eligible for demotion. Since the quantum - * is reset when thread_unblock() is called, we are only really considering - * threads that elongate their execution time due to preemption. - */ - if ((elapsed < 2 * sched_fixedpriority_quantum) && (thread->bound_processor == PROCESSOR_NULL)) { - - thread->saved_mode = thread->sched_mode; - thread->sched_mode = TH_MODE_FAIRSHARE; - thread->sched_flags |= TH_SFLAG_FAIRSHARE_TRIPPED; - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_ENTER) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), (uint32_t)(elapsed & 0xFFFFFFFF), (uint32_t)(elapsed >> 32), 0, 0); - } - } -} - - -static boolean_t -sched_fixedpriority_should_current_thread_rechoose_processor(processor_t processor __unused) -{ - return (TRUE); -} - - -static int -sched_fixedpriority_processor_runq_count(processor_t processor) -{ - return runq_for_processor(processor)->count; -} - -static uint64_t -sched_fixedpriority_processor_runq_stats_count_sum(processor_t processor) -{ - return runq_for_processor(processor)->runq_stats.count_sum; -} diff --git a/osfmk/kern/sched_grrr.c b/osfmk/kern/sched_grrr.c index 13067b857..5774cc7b1 100644 --- a/osfmk/kern/sched_grrr.c +++ b/osfmk/kern/sched_grrr.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -122,7 +121,8 @@ sched_grrr_maintenance_continuation(void); static thread_t sched_grrr_choose_thread(processor_t processor, - int priority); + int priority, + ast_t reason); static thread_t sched_grrr_steal_thread(processor_set_t pset); @@ -171,9 +171,6 @@ sched_grrr_initial_quantum_size(thread_t thread); static sched_mode_t sched_grrr_initial_thread_sched_mode(task_t parent_task); -static boolean_t -sched_grrr_supports_timeshare_mode(void); - static boolean_t sched_grrr_can_update_priority(thread_t thread); @@ -195,40 +192,47 @@ sched_grrr_processor_runq_count(processor_t processor); static uint64_t sched_grrr_processor_runq_stats_count_sum(processor_t processor); +static int +sched_grrr_processor_bound_count(processor_t processor); + +static void +sched_grrr_thread_update_scan(void); + const struct sched_dispatch_table sched_grrr_dispatch = { - sched_grrr_init, - sched_grrr_timebase_init, - sched_grrr_processor_init, - sched_grrr_pset_init, - sched_grrr_maintenance_continuation, - sched_grrr_choose_thread, - sched_grrr_steal_thread, - sched_grrr_compute_priority, - sched_grrr_choose_processor, - sched_grrr_processor_enqueue, - sched_grrr_processor_queue_shutdown, - sched_grrr_processor_queue_remove, - sched_grrr_processor_queue_empty, - sched_grrr_priority_is_urgent, - sched_grrr_processor_csw_check, - sched_grrr_processor_queue_has_priority, - sched_grrr_initial_quantum_size, - sched_grrr_initial_thread_sched_mode, - sched_grrr_supports_timeshare_mode, - sched_grrr_can_update_priority, - sched_grrr_update_priority, - sched_grrr_lightweight_update_priority, - sched_grrr_quantum_expire, - sched_grrr_should_current_thread_rechoose_processor, - sched_grrr_processor_runq_count, - sched_grrr_processor_runq_stats_count_sum, - sched_grrr_fairshare_init, - sched_grrr_fairshare_runq_count, - sched_grrr_fairshare_runq_stats_count_sum, - sched_grrr_fairshare_enqueue, - sched_grrr_fairshare_dequeue, - sched_grrr_fairshare_queue_remove, - TRUE /* direct_dispatch_to_idle_processors */ + .init = sched_grrr_init, + .timebase_init = sched_grrr_timebase_init, + .processor_init = sched_grrr_processor_init, + .pset_init = sched_grrr_pset_init, + .maintenance_continuation = sched_grrr_maintenance_continuation, + .choose_thread = sched_grrr_choose_thread, + .steal_thread = sched_grrr_steal_thread, + .compute_priority = sched_grrr_compute_priority, + .choose_processor = sched_grrr_choose_processor, + .processor_enqueue = sched_grrr_processor_enqueue, + .processor_queue_shutdown = sched_grrr_processor_queue_shutdown, + .processor_queue_remove = sched_grrr_processor_queue_remove, + .processor_queue_empty = sched_grrr_processor_queue_empty, + .priority_is_urgent = sched_grrr_priority_is_urgent, + .processor_csw_check = sched_grrr_processor_csw_check, + .processor_queue_has_priority = sched_grrr_processor_queue_has_priority, + .initial_quantum_size = sched_grrr_initial_quantum_size, + .initial_thread_sched_mode = sched_grrr_initial_thread_sched_mode, + .can_update_priority = sched_grrr_can_update_priority, + .update_priority = sched_grrr_update_priority, + .lightweight_update_priority = sched_grrr_lightweight_update_priority, + .quantum_expire = sched_grrr_quantum_expire, + .should_current_thread_rechoose_processor = sched_grrr_should_current_thread_rechoose_processor, + .processor_runq_count = sched_grrr_processor_runq_count, + .processor_runq_stats_count_sum = sched_grrr_processor_runq_stats_count_sum, + .fairshare_init = sched_grrr_fairshare_init, + .fairshare_runq_count = sched_grrr_fairshare_runq_count, + .fairshare_runq_stats_count_sum = sched_grrr_fairshare_runq_stats_count_sum, + .fairshare_enqueue = sched_grrr_fairshare_enqueue, + .fairshare_dequeue = sched_grrr_fairshare_dequeue, + .fairshare_queue_remove = sched_grrr_fairshare_queue_remove, + .processor_bound_count = sched_grrr_processor_bound_count, + .thread_update_scan = sched_grrr_thread_update_scan, + .direct_dispatch_to_idle_processors = TRUE, }; extern int max_unsafe_quanta; @@ -307,7 +311,8 @@ sched_grrr_maintenance_continuation(void) static thread_t sched_grrr_choose_thread(processor_t processor, - int priority __unused) + int priority __unused, + ast_t reason __unused) { grrr_run_queue_t rq = &processor->grrr_runq; @@ -365,7 +370,7 @@ sched_grrr_processor_queue_shutdown( queue_init(&tqueue); queue_init(&bqueue); - while ((thread = sched_grrr_choose_thread(processor, IDLEPRI)) != THREAD_NULL) { + while ((thread = sched_grrr_choose_thread(processor, IDLEPRI, AST_NONE)) != THREAD_NULL) { if (thread->bound_processor == PROCESSOR_NULL) { enqueue_tail(&tqueue, (queue_entry_t)thread); } else { @@ -493,12 +498,6 @@ sched_grrr_initial_thread_sched_mode(task_t parent_task) return TH_MODE_TIMESHARE; } -static boolean_t -sched_grrr_supports_timeshare_mode(void) -{ - return TRUE; -} - static boolean_t sched_grrr_can_update_priority(thread_t thread __unused) { @@ -542,6 +541,18 @@ sched_grrr_processor_runq_stats_count_sum(processor_t processor) return processor->grrr_runq.runq_stats.count_sum; } +static int +sched_grrr_processor_bound_count(__unused processor_t processor) +{ + return 0; +} + +static void +sched_grrr_thread_update_scan(void) +{ + +} + #endif /* defined(CONFIG_SCHED_GRRR) */ #if defined(CONFIG_SCHED_GRRR_CORE) @@ -873,7 +884,7 @@ grrr_sorted_list_insert_group(grrr_run_queue_t rq, #endif /* defined(CONFIG_SCHED_GRRR_CORE) */ -#if defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_GRRR) static struct grrr_run_queue fs_grrr_runq; #define FS_GRRR_RUNQ ((processor_t)-2) @@ -952,4 +963,4 @@ boolean_t sched_grrr_fairshare_queue_remove(thread_t thread) } } -#endif /* defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) */ +#endif /* defined(CONFIG_SCHED_GRRR) */ diff --git a/osfmk/kern/sched_multiq.c b/osfmk/kern/sched_multiq.c new file mode 100644 index 000000000..63519c677 --- /dev/null +++ b/osfmk/kern/sched_multiq.c @@ -0,0 +1,1424 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * Theory Statement + * + * How does the task scheduler work? + * + * It schedules threads across a few levels. + * + * RT threads are dealt with above us + * Bound threads go into the per-processor runq + * Non-bound threads are linked on their task's sched_group's runq + * sched_groups' sched_entries are linked on the pset's runq + * + * TODO: make this explicit - bound threads should have a different enqueue fxn + * + * When we choose a new thread, we will decide whether to look at the bound runqueue, the global runqueue + * or the current group's runqueue, then dequeue the next thread in that runqueue. + * + * We then manipulate the sched_entries to reflect the invariant that: + * Each non-empty priority level in a group's runq is represented by one sched_entry enqueued in the global + * runqueue. + * + * A sched_entry represents a chance at running - for each priority in each task, there is one chance of getting + * to run. This reduces the excess contention bonus given to processes which have work spread among many threads + * as compared to processes which do the same amount of work under fewer threads. + * + * NOTE: Currently, the multiq scheduler only supports one pset. + * + * NOTE ABOUT thread->sched_pri: + * + * It can change after enqueue - it's changed without pset lock but with thread lock if thread->runq is 0. + * Therefore we can only depend on it not changing during the enqueue and remove path, not the dequeue. + * + * TODO: Future features: + * + * Decouple the task priority from the sched_entry priority, allowing for: + * fast task priority change without having to iterate and re-dispatch all threads in the task. + * i.e. task-wide priority, task-wide boosting + * fancier group decay features + * + * Group (or task) decay: + * Decay is used for a few different things: + * Prioritizing latency-needing threads over throughput-needing threads for time-to-running + * Balancing work between threads in a process + * Balancing work done at the same priority between different processes + * Recovering from priority inversions between two threads in the same process + * Recovering from priority inversions between two threads in different processes + * Simulating a proportional share scheduler by allowing lower priority threads + * to run for a certain percentage of the time + * + * Task decay lets us separately address the 'same process' and 'different process' needs, + * which will allow us to make smarter tradeoffs in different cases. + * For example, we could resolve priority inversion in the same process by reordering threads without dropping the + * process below low priority threads in other processes. + * + * One lock to rule them all (or at least all the runqueues) instead of the pset locks + * + * Shrink sched_entry size to the size of a queue_chain_t by inferring priority, group, and perhaps runq field. + * The entries array is 5K currently so it'd be really great to reduce. + * One way to get sched_group below 4K without a new runq structure would be to remove the extra queues above realtime. + * + * When preempting a processor, store a flag saying if the preemption + * was from a thread in the same group or different group, + * and tell choose_thread about it. + * + * When choosing a processor, bias towards those running in the same + * group as I am running (at the same priority, or within a certain band?). + * + * Decide if we need to support psets. + * Decide how to support psets - do we need duplicate entries for each pset, + * or can we get away with putting the entry in either one or the other pset? + * + * Consider the right way to handle runq count - I don't want to iterate groups. + * Perhaps keep a global counter. sched_run_count will not work. + * Alternate option - remove it from choose_processor. It doesn't add much value + * now that we have global runq. + * + * Need a better way of finding group to target instead of looking at current_task. + * Perhaps choose_thread could pass in the current thread? + * + * Consider unifying runq copy-pastes. + * + * Thoughts on having a group central quantum bucket: + * + * I see two algorithms to decide quanta: + * A) Hand off only when switching thread to thread in the same group + * B) Allocate and return quanta to the group's pool + * + * Issues: + * If a task blocks completely, should it come back with the leftover quanta + * or brand new quanta? + * + * Should I put a flag saying zero out a quanta you grab when youre dispatched'? + * + * Resolution: + * Handing off quanta between threads will help with jumping around in the current task + * but will not help when a thread from a different task is involved. + * Need an algorithm that works with round robin-ing between threads in different tasks + * + * But wait - round robining can only be triggered by quantum expire or blocking. + * We need something that works with preemption or yielding - that's the more interesting idea. + * + * Existing algorithm - preemption doesn't re-set quantum, puts thread on head of runq. + * Blocking or quantum expiration does re-set quantum, puts thread on tail of runq. + * + * New algorithm - + * Hand off quanta when hopping between threads with same sched_group + * Even if thread was blocked it uses last thread remaining quanta when it starts. + * + * If we use the only cycle entry at quantum algorithm, then the quantum pool starts getting + * interesting. + * + * A thought - perhaps the handoff approach doesn't work so well in the presence of + * non-handoff wakeups i.e. wake other thread then wait then block - doesn't mean that + * woken thread will be what I switch to - other processor may have stolen it. + * What do we do there? + * + * Conclusions: + * We currently don't know of a scenario where quantum buckets on the task is beneficial. + * We will instead handoff quantum between threads in the task, and keep quantum + * on the preempted thread if it's preempted by something outside the task. + * + */ + +#if DEBUG || DEVELOPMENT +#define MULTIQ_SANITY_CHECK +#endif + +typedef struct sched_entry { + queue_chain_t links; + int16_t sched_pri; /* scheduled (current) priority */ + int16_t runq; + int32_t pad; +} *sched_entry_t; + +typedef run_queue_t entry_queue_t; /* A run queue that holds sched_entries instead of threads */ +typedef run_queue_t group_runq_t; /* A run queue that is part of a sched_group */ + +#define SCHED_ENTRY_NULL ((sched_entry_t) 0) +#define MULTIQ_ERUNQ (-4) /* Indicates entry is on the main runq */ + +/* Each level in the run queue corresponds to one entry in the entries array */ +struct sched_group { + struct sched_entry entries[NRQS]; + struct run_queue runq; + queue_chain_t sched_groups; +}; + +/* TODO: Turn this into an attribute in the sched dispatch struct */ +boolean_t sched_groups_enabled = FALSE; + +/* + * Keep entry on the head of the runqueue while dequeueing threads. + * Only cycle it to the end of the runqueue when a thread in the task + * hits its quantum. + */ +static boolean_t deep_drain = FALSE; + +/* + * Don't favor the task when an urgent thread is present. + */ +static boolean_t drain_urgent_first = TRUE; + +/* Verify the consistency of the runq before touching it */ +static boolean_t multiq_sanity_check = FALSE; + +/* + * Draining threads from the current task is preferred + * when they're less than X steps below the current + * global highest priority + */ +#define DEFAULT_DRAIN_BAND_LIMIT MAXPRI +static integer_t drain_band_limit; + +/* + * Don't go below this priority level if there is something above it in another task + */ +#define DEFAULT_DRAIN_DEPTH_LIMIT MAXPRI_THROTTLE +static integer_t drain_depth_limit; + + +static struct zone *sched_group_zone; + +static uint64_t num_sched_groups = 0; +static queue_head_t sched_groups; + +static lck_attr_t sched_groups_lock_attr; +static lck_grp_t sched_groups_lock_grp; +static lck_grp_attr_t sched_groups_lock_grp_attr; + +static lck_mtx_t sched_groups_lock; + + +static void +sched_multiq_init(void); + +static thread_t +sched_multiq_steal_thread(processor_set_t pset); + +static void +sched_multiq_thread_update_scan(void); + +static boolean_t +sched_multiq_processor_enqueue(processor_t processor, thread_t thread, integer_t options); + +static boolean_t +sched_multiq_processor_queue_remove(processor_t processor, thread_t thread); + +void +sched_multiq_quantum_expire(thread_t thread); + +static ast_t +sched_multiq_processor_csw_check(processor_t processor); + +static boolean_t +sched_multiq_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte); + +static int +sched_multiq_runq_count(processor_t processor); + +static boolean_t +sched_multiq_processor_queue_empty(processor_t processor); + +static uint64_t +sched_multiq_runq_stats_count_sum(processor_t processor); + +static int +sched_multiq_processor_bound_count(processor_t processor); + +static void +sched_multiq_pset_init(processor_set_t pset); + +static void +sched_multiq_processor_init(processor_t processor); + +static thread_t +sched_multiq_choose_thread(processor_t processor, int priority, ast_t reason); + +static void +sched_multiq_processor_queue_shutdown(processor_t processor); + +static sched_mode_t +sched_multiq_initial_thread_sched_mode(task_t parent_task); + +static boolean_t +sched_multiq_should_current_thread_rechoose_processor(processor_t processor); + +const struct sched_dispatch_table sched_multiq_dispatch = { + .init = sched_multiq_init, + .timebase_init = sched_traditional_timebase_init, + .processor_init = sched_multiq_processor_init, + .pset_init = sched_multiq_pset_init, + .maintenance_continuation = sched_traditional_maintenance_continue, + .choose_thread = sched_multiq_choose_thread, + .steal_thread = sched_multiq_steal_thread, + .compute_priority = compute_priority, + .choose_processor = choose_processor, + .processor_enqueue = sched_multiq_processor_enqueue, + .processor_queue_shutdown = sched_multiq_processor_queue_shutdown, + .processor_queue_remove = sched_multiq_processor_queue_remove, + .processor_queue_empty = sched_multiq_processor_queue_empty, + .priority_is_urgent = priority_is_urgent, + .processor_csw_check = sched_multiq_processor_csw_check, + .processor_queue_has_priority = sched_multiq_processor_queue_has_priority, + .initial_quantum_size = sched_traditional_initial_quantum_size, + .initial_thread_sched_mode = sched_multiq_initial_thread_sched_mode, + .can_update_priority = can_update_priority, + .update_priority = update_priority, + .lightweight_update_priority = lightweight_update_priority, + .quantum_expire = sched_multiq_quantum_expire, + .should_current_thread_rechoose_processor = sched_multiq_should_current_thread_rechoose_processor, + .processor_runq_count = sched_multiq_runq_count, + .processor_runq_stats_count_sum = sched_multiq_runq_stats_count_sum, + .fairshare_init = sched_traditional_fairshare_init, + .fairshare_runq_count = sched_traditional_fairshare_runq_count, + .fairshare_runq_stats_count_sum = sched_traditional_fairshare_runq_stats_count_sum, + .fairshare_enqueue = sched_traditional_fairshare_enqueue, + .fairshare_dequeue = sched_traditional_fairshare_dequeue, + .fairshare_queue_remove = sched_traditional_fairshare_queue_remove, + .processor_bound_count = sched_multiq_processor_bound_count, + .thread_update_scan = sched_multiq_thread_update_scan, + .direct_dispatch_to_idle_processors = FALSE, +}; + + +static void +sched_multiq_init(void) +{ + sched_groups_enabled = TRUE; + +#if defined(MULTIQ_SANITY_CHECK) + PE_parse_boot_argn("-multiq-sanity-check", &multiq_sanity_check, sizeof(multiq_sanity_check)); +#endif + + PE_parse_boot_argn("-multiq-deep-drain", &deep_drain, sizeof(deep_drain)); + + PE_parse_boot_argn("multiq_drain_urgent_first", &drain_urgent_first, sizeof(drain_urgent_first)); + + if (!PE_parse_boot_argn("multiq_drain_depth_limit", &drain_depth_limit, sizeof(drain_depth_limit))) { + drain_depth_limit = DEFAULT_DRAIN_DEPTH_LIMIT; + } + + if (!PE_parse_boot_argn("multiq_drain_band_limit", &drain_band_limit, sizeof(drain_band_limit))) { + drain_band_limit = DEFAULT_DRAIN_BAND_LIMIT; + } + + printf("multiq scheduler config: deep-drain %d, urgent first %d, depth limit %d, band limit %d, sanity check %d\n", + deep_drain, drain_urgent_first, drain_depth_limit, drain_band_limit, multiq_sanity_check); + + sched_group_zone = zinit( + sizeof(struct sched_group), + task_max * sizeof(struct sched_group), + PAGE_SIZE, + "sched groups"); + + zone_change(sched_group_zone, Z_NOENCRYPT, TRUE); + zone_change(sched_group_zone, Z_NOCALLOUT, TRUE); + + queue_init(&sched_groups); + + lck_grp_attr_setdefault(&sched_groups_lock_grp_attr); + lck_grp_init(&sched_groups_lock_grp, "sched_groups", &sched_groups_lock_grp_attr); + lck_attr_setdefault(&sched_groups_lock_attr); + lck_mtx_init(&sched_groups_lock, &sched_groups_lock_grp, &sched_groups_lock_attr); + + sched_traditional_init(); +} + +static void +sched_multiq_processor_init(processor_t processor) +{ + run_queue_init(&processor->runq); +} + +static void +sched_multiq_pset_init(processor_set_t pset) +{ + run_queue_init(&pset->pset_runq); +} + +static sched_mode_t +sched_multiq_initial_thread_sched_mode(task_t parent_task) +{ + if (parent_task == kernel_task) + return TH_MODE_FIXED; + else + return TH_MODE_TIMESHARE; +} + +sched_group_t +sched_group_create(void) +{ + sched_group_t sched_group; + + if (!sched_groups_enabled) + return SCHED_GROUP_NULL; + + sched_group = (sched_group_t)zalloc(sched_group_zone); + + bzero(sched_group, sizeof(struct sched_group)); + + run_queue_init(&sched_group->runq); + + for (int i = 0; i < NRQS; i++) { + sched_group->entries[i].runq = 0; + sched_group->entries[i].sched_pri = i; + } + + lck_mtx_lock(&sched_groups_lock); + queue_enter(&sched_groups, sched_group, sched_group_t, sched_groups); + num_sched_groups++; + lck_mtx_unlock(&sched_groups_lock); + + return (sched_group); +} + +void +sched_group_destroy(sched_group_t sched_group) +{ + if (!sched_groups_enabled) { + assert(sched_group == SCHED_GROUP_NULL); + return; + } + + assert(sched_group != SCHED_GROUP_NULL); + assert(sched_group->runq.count == 0); + + for (int i = 0; i < NRQS; i++) { + assert(sched_group->entries[i].runq == 0); + assert(sched_group->entries[i].sched_pri == i); + } + + lck_mtx_lock(&sched_groups_lock); + queue_remove(&sched_groups, sched_group, sched_group_t, sched_groups); + num_sched_groups--; + lck_mtx_unlock(&sched_groups_lock); + + zfree(sched_group_zone, sched_group); +} + +__attribute__((always_inline)) +static inline entry_queue_t +multiq_main_entryq(processor_t processor) +{ + return (entry_queue_t)&processor->processor_set->pset_runq; +} + +__attribute__((always_inline)) +static inline run_queue_t +multiq_bound_runq(processor_t processor) +{ + return &processor->runq; +} + +__attribute__((always_inline)) +static inline sched_entry_t +group_entry_for_pri(sched_group_t group, integer_t pri) +{ + return &group->entries[pri]; +} + +__attribute__((always_inline)) +static inline sched_group_t +group_for_entry(sched_entry_t entry) +{ + sched_group_t group = (sched_group_t)(entry - entry->sched_pri); + return group; +} + +/* Peek at the head of the runqueue */ +static sched_entry_t +entry_queue_first_entry(entry_queue_t rq) +{ + assert(rq->count != 0); + + queue_t queue = rq->queues + rq->highq; + + sched_entry_t entry = (sched_entry_t)queue_first(queue); + + assert(entry->sched_pri == rq->highq); + + return entry; +} + +#if defined(MULTIQ_SANITY_CHECK) + +__attribute__((always_inline)) +static inline boolean_t +queue_chain_linked(queue_chain_t* chain) +{ + if (chain->next != NULL) { + assert(chain->prev != NULL); + return TRUE; + } else { + assert(chain->prev == NULL); + return FALSE; + } +} + +static thread_t +group_first_thread(sched_group_t group) +{ + group_runq_t rq = &group->runq; + + assert(rq->count != 0); + + queue_t queue = rq->queues + rq->highq; + + thread_t thread = (thread_t)(void*)queue_first(queue); + + assert(thread != THREAD_NULL); + + assert(thread->sched_group == group); + + /* TODO: May not be safe */ + assert(thread->sched_pri == rq->highq); + + return thread; +} + +/* Asserts if entry is not in entry runq at pri */ +static void +entry_queue_check_entry(entry_queue_t runq, sched_entry_t entry, int expected_pri) +{ + queue_t q; + sched_entry_t elem; + + assert(queue_chain_linked(&entry->links)); + assert(entry->runq == MULTIQ_ERUNQ); + + q = &runq->queues[expected_pri]; + + queue_iterate(q, elem, sched_entry_t, links) { + if (elem == entry) + return; + } + + panic("runq %p doesn't contain entry %p at pri %d", runq, entry, expected_pri); +} + +/* Asserts if thread is not in group at its priority */ +static void +sched_group_check_thread(sched_group_t group, thread_t thread) +{ + queue_t q; + thread_t elem; + int pri = thread->sched_pri; + + assert(thread->runq != PROCESSOR_NULL); + + q = &group->runq.queues[pri]; + + queue_iterate(q, elem, thread_t, links) { + if (elem == thread) + return; + } + + panic("group %p doesn't contain thread %p at pri %d", group, thread, pri); +} + +static void +global_check_entry_queue(entry_queue_t main_entryq) +{ + if (main_entryq->count == 0) + return; + + sched_entry_t entry = entry_queue_first_entry(main_entryq); + + assert(entry->runq == MULTIQ_ERUNQ); + + sched_group_t group = group_for_entry(entry); + + thread_t thread = group_first_thread(group); + + __assert_only sched_entry_t thread_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri); + + assert(entry->sched_pri == group->runq.highq); + + assert(entry == thread_entry); + assert(thread->runq != PROCESSOR_NULL); +} + +static void +group_check_run_queue(entry_queue_t main_entryq, sched_group_t group) +{ + if (group->runq.count == 0) + return; + + thread_t thread = group_first_thread(group); + + assert(thread->runq != PROCESSOR_NULL); + + sched_entry_t sched_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri); + + entry_queue_check_entry(main_entryq, sched_entry, thread->sched_pri); + + assert(sched_entry->sched_pri == thread->sched_pri); + assert(sched_entry->runq == MULTIQ_ERUNQ); +} + +#endif /* defined(MULTIQ_SANITY_CHECK) */ + +/* + * The run queue must not be empty. + */ +static sched_entry_t +entry_queue_dequeue_entry(entry_queue_t rq) +{ + sched_entry_t sched_entry; + queue_t queue = rq->queues + rq->highq; + + assert(rq->count > 0); + assert(!queue_empty(queue)); + + sched_entry = (sched_entry_t)dequeue_head(queue); + + SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); + rq->count--; + if (SCHED(priority_is_urgent)(rq->highq)) { + rq->urgency--; assert(rq->urgency >= 0); + } + if (queue_empty(queue)) { + if (rq->highq != IDLEPRI) + clrbit(MAXPRI - rq->highq, rq->bitmap); + rq->highq = MAXPRI - ffsbit(rq->bitmap); + } + + sched_entry->runq = 0; + + return (sched_entry); +} + +/* + * The run queue must not be empty. + */ +static boolean_t +entry_queue_enqueue_entry( + entry_queue_t rq, + sched_entry_t entry, + integer_t options) +{ + int sched_pri = entry->sched_pri; + queue_t queue = rq->queues + sched_pri; + boolean_t result = FALSE; + + assert(entry->runq == 0); + + if (queue_empty(queue)) { + enqueue_tail(queue, (queue_entry_t)entry); + + setbit(MAXPRI - sched_pri, rq->bitmap); + if (sched_pri > rq->highq) { + rq->highq = sched_pri; + result = TRUE; + } + } else { + if (options & SCHED_TAILQ) + enqueue_tail(queue, (queue_entry_t)entry); + else + enqueue_head(queue, (queue_entry_t)entry); + } + if (SCHED(priority_is_urgent)(sched_pri)) + rq->urgency++; + SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); + rq->count++; + + entry->runq = MULTIQ_ERUNQ; + + return (result); +} + +/* + * The entry must be in this runqueue. + */ +static void +entry_queue_remove_entry( + entry_queue_t rq, + sched_entry_t entry) +{ + int sched_pri = entry->sched_pri; + +#if defined(MULTIQ_SANITY_CHECK) + if (multiq_sanity_check) { + entry_queue_check_entry(rq, entry, sched_pri); + } +#endif + + remqueue((queue_entry_t)entry); + + SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); + rq->count--; + if (SCHED(priority_is_urgent)(sched_pri)) { + rq->urgency--; assert(rq->urgency >= 0); + } + + if (queue_empty(rq->queues + sched_pri)) { + /* update run queue status */ + if (sched_pri != IDLEPRI) + clrbit(MAXPRI - sched_pri, rq->bitmap); + rq->highq = MAXPRI - ffsbit(rq->bitmap); + } + + entry->runq = 0; +} + +/* + * The run queue must not be empty. + * + * sets queue_empty to TRUE if queue is now empty at thread_pri + */ +static thread_t +group_run_queue_dequeue_thread( + group_runq_t rq, + integer_t *thread_pri, + boolean_t *queue_empty) +{ + thread_t thread; + queue_t queue = rq->queues + rq->highq; + + assert(rq->count > 0); + assert(!queue_empty(queue)); + + *thread_pri = rq->highq; + + thread = (thread_t)(void*)dequeue_head(queue); + + SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); + rq->count--; + if (SCHED(priority_is_urgent)(rq->highq)) { + rq->urgency--; assert(rq->urgency >= 0); + } + if (queue_empty(queue)) { + if (rq->highq != IDLEPRI) + clrbit(MAXPRI - rq->highq, rq->bitmap); + rq->highq = MAXPRI - ffsbit(rq->bitmap); + *queue_empty = TRUE; + } else { + *queue_empty = FALSE; + } + + return (thread); +} + +/* + * The run queue must not be empty. + * returns TRUE if queue was empty at thread_pri + */ +static boolean_t +group_run_queue_enqueue_thread( + group_runq_t rq, + thread_t thread, + integer_t thread_pri, + integer_t options) +{ + queue_t queue = rq->queues + thread_pri; + boolean_t result = FALSE; + + assert(thread->runq == PROCESSOR_NULL); + + if (queue_empty(queue)) { + enqueue_tail(queue, (queue_entry_t)thread); + + setbit(MAXPRI - thread_pri, rq->bitmap); + if (thread_pri > rq->highq) { + rq->highq = thread_pri; + } + result = TRUE; + } else { + if (options & SCHED_TAILQ) + enqueue_tail(queue, (queue_entry_t)thread); + else + enqueue_head(queue, (queue_entry_t)thread); + } + if (SCHED(priority_is_urgent)(thread_pri)) + rq->urgency++; + SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); + rq->count++; + + return (result); +} + +/* + * The thread must be in this runqueue. + * returns TRUE if queue is now empty at thread_pri + */ +static boolean_t +group_run_queue_remove_thread( + group_runq_t rq, + thread_t thread, + integer_t thread_pri) +{ + boolean_t result = FALSE; + + assert(thread->runq != PROCESSOR_NULL); + + remqueue((queue_entry_t)thread); + + SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); + rq->count--; + if (SCHED(priority_is_urgent)(thread_pri)) { + rq->urgency--; assert(rq->urgency >= 0); + } + + if (queue_empty(rq->queues + thread_pri)) { + /* update run queue status */ + if (thread_pri != IDLEPRI) + clrbit(MAXPRI - thread_pri, rq->bitmap); + rq->highq = MAXPRI - ffsbit(rq->bitmap); + result = TRUE; + } + + thread->runq = PROCESSOR_NULL; + + return result; +} + +/* + * A thread's sched pri may change out from under us because + * we're clearing thread->runq here without the thread locked. + * Do not rely on it to be the same as when we enqueued. + */ +static thread_t +sched_global_dequeue_thread(entry_queue_t main_entryq) +{ + boolean_t pri_level_empty = FALSE; + sched_entry_t entry; + group_runq_t group_runq; + thread_t thread; + integer_t thread_pri; + sched_group_t group; + + assert(main_entryq->count > 0); + + entry = entry_queue_dequeue_entry(main_entryq); + + group = group_for_entry(entry); + group_runq = &group->runq; + + thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty); + + thread->runq = PROCESSOR_NULL; + + if (!pri_level_empty) { + entry_queue_enqueue_entry(main_entryq, entry, SCHED_TAILQ); + } + + return thread; +} + +/* Dequeue a thread from the global runq without moving the entry */ +static thread_t +sched_global_deep_drain_dequeue_thread(entry_queue_t main_entryq) +{ + boolean_t pri_level_empty = FALSE; + sched_entry_t entry; + group_runq_t group_runq; + thread_t thread; + integer_t thread_pri; + sched_group_t group; + + assert(main_entryq->count > 0); + + entry = entry_queue_first_entry(main_entryq); + + group = group_for_entry(entry); + group_runq = &group->runq; + + thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty); + + thread->runq = PROCESSOR_NULL; + + if (pri_level_empty) { + entry_queue_remove_entry(main_entryq, entry); + } + + return thread; +} + + +static thread_t +sched_group_dequeue_thread( + entry_queue_t main_entryq, + sched_group_t group) +{ + group_runq_t group_runq = &group->runq; + boolean_t pri_level_empty = FALSE; + thread_t thread; + integer_t thread_pri; + + thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty); + + thread->runq = PROCESSOR_NULL; + + if (pri_level_empty) { + entry_queue_remove_entry(main_entryq, group_entry_for_pri(group, thread_pri)); + } + + return thread; +} + +static void +sched_group_remove_thread( + entry_queue_t main_entryq, + sched_group_t group, + thread_t thread) +{ + integer_t thread_pri = thread->sched_pri; + sched_entry_t sched_entry = group_entry_for_pri(group, thread_pri); + +#if defined(MULTIQ_SANITY_CHECK) + if (multiq_sanity_check) { + global_check_entry_queue(main_entryq); + group_check_run_queue(main_entryq, group); + + sched_group_check_thread(group, thread); + entry_queue_check_entry(main_entryq, sched_entry, thread_pri); + } +#endif + + boolean_t pri_level_empty = group_run_queue_remove_thread(&group->runq, thread, thread_pri); + + if (pri_level_empty) { + entry_queue_remove_entry(main_entryq, sched_entry); + } + +#if defined(MULTIQ_SANITY_CHECK) + if (multiq_sanity_check) { + global_check_entry_queue(main_entryq); + group_check_run_queue(main_entryq, group); + } +#endif +} + +static void +sched_group_enqueue_thread( + entry_queue_t main_entryq, + sched_group_t group, + thread_t thread, + integer_t options) +{ +#if defined(MULTIQ_SANITY_CHECK) + if (multiq_sanity_check) { + global_check_entry_queue(main_entryq); + group_check_run_queue(main_entryq, group); + } +#endif + + int sched_pri = thread->sched_pri; + + boolean_t pri_level_was_empty = group_run_queue_enqueue_thread(&group->runq, thread, sched_pri, options); + + if (pri_level_was_empty) { + /* + * TODO: Need to figure out if passing options here is a good idea or not + * What effects would it have? + */ + entry_queue_enqueue_entry(main_entryq, &group->entries[sched_pri], options); + } +} + +/* + * Locate a thread to execute from the run queue and return it. + * Only choose a thread with greater or equal priority. + * + * pset is locked, thread is not locked. + * + * Returns THREAD_NULL if it cannot find a valid thread. + * + * Note: we cannot rely on the value of thread->sched_pri in this path because + * we don't have the thread locked. + * + * TODO: Remove tracepoints + */ +static thread_t +sched_multiq_choose_thread( + processor_t processor, + int priority, + ast_t reason) +{ + entry_queue_t main_entryq = multiq_main_entryq(processor); + run_queue_t bound_runq = multiq_bound_runq(processor); + + boolean_t choose_bound_runq = FALSE; + + if (bound_runq->highq < priority && + main_entryq->highq < priority) + return THREAD_NULL; + + if (bound_runq->count && main_entryq->count) { + if (bound_runq->highq >= main_entryq->highq) { + choose_bound_runq = TRUE; + } else { + /* Use main runq */ + } + } else if (bound_runq->count) { + choose_bound_runq = TRUE; + } else if (main_entryq->count) { + /* Use main runq */ + } else { + return (THREAD_NULL); + } + + if (choose_bound_runq) { + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE, + MACH_MULTIQ_BOUND, main_entryq->highq, bound_runq->highq, 0, 0); + + return run_queue_dequeue(bound_runq, SCHED_HEADQ); + } + + sched_group_t group = current_thread()->sched_group; + +#if defined(MULTIQ_SANITY_CHECK) + if (multiq_sanity_check) { + global_check_entry_queue(main_entryq); + group_check_run_queue(main_entryq, group); + } +#endif + + /* + * Determine if we should look at the group or the global queue + * + * TODO: + * Perhaps pass reason as a 'should look inside' argument to choose_thread + * Should YIELD AST override drain limit? + */ + if (group->runq.count != 0 && (reason & AST_PREEMPTION) == 0) { + boolean_t drain_limit_hit = FALSE; + + if (main_entryq->highq > group->runq.highq) { + /* + * If there's something elsewhere above the depth limit, + * don't pick a thread below the limit. + */ + if (main_entryq->highq > drain_depth_limit && + group->runq.highq <= drain_depth_limit) + drain_limit_hit = TRUE; + + /* + * Don't go more than X steps below the global highest + */ + if ((main_entryq->highq - group->runq.highq) >= drain_band_limit) + drain_limit_hit = TRUE; + + /* Don't favor the task when an urgent thread is present. */ + if (drain_urgent_first && main_entryq->urgency > 0) + drain_limit_hit = TRUE; + } + + if (!drain_limit_hit) { + /* Pull from local runq */ + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE, + MACH_MULTIQ_GROUP, main_entryq->highq, group->runq.highq, 0, 0); + + return sched_group_dequeue_thread(main_entryq, group); + } + } + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE, + MACH_MULTIQ_GLOBAL, main_entryq->highq, group->runq.highq, 0, 0); + + /* Couldn't pull from local runq, pull from global runq instead */ + if (deep_drain) { + return sched_global_deep_drain_dequeue_thread(main_entryq); + } else { + return sched_global_dequeue_thread(main_entryq); + } +} + + +/* + * Thread must be locked, and not already be on a run queue. + * pset is locked. + */ +static boolean_t +sched_multiq_processor_enqueue( + processor_t processor, + thread_t thread, + integer_t options) +{ + boolean_t result; + + assert(processor == thread->chosen_processor); + + if (thread->bound_processor != PROCESSOR_NULL) { + assert(thread->bound_processor == processor); + + result = run_queue_enqueue(multiq_bound_runq(processor), thread, options); + thread->runq = processor; + + return result; + } + + sched_group_enqueue_thread(multiq_main_entryq(processor), + thread->sched_group, + thread, options); + + thread->runq = processor; + + return (FALSE); +} + +/* + * Called in the context of thread with thread and pset unlocked, + * after updating thread priority but before propagating that priority + * to the processor + */ +void +sched_multiq_quantum_expire(thread_t thread) +{ + if (deep_drain) { + /* + * Move the entry at this priority to the end of the queue, + * to allow the next task a shot at running. + */ + + processor_t processor = thread->last_processor; + processor_set_t pset = processor->processor_set; + entry_queue_t entryq = multiq_main_entryq(processor); + + pset_lock(pset); + + sched_entry_t entry = group_entry_for_pri(thread->sched_group, processor->current_pri); + + if (entry->runq == MULTIQ_ERUNQ) { + entry_queue_remove_entry(entryq, entry); + entry_queue_enqueue_entry(entryq, entry, SCHED_TAILQ); + } + + pset_unlock(pset); + } +} + +static boolean_t +sched_multiq_processor_queue_empty(processor_t processor) +{ + return multiq_main_entryq(processor)->count == 0 && + multiq_bound_runq(processor)->count == 0; +} + +static ast_t +sched_multiq_processor_csw_check(processor_t processor) +{ + boolean_t has_higher; + int pri; + + entry_queue_t main_entryq = multiq_main_entryq(processor); + run_queue_t bound_runq = multiq_bound_runq(processor); + + assert(processor->active_thread != NULL); + + pri = MAX(main_entryq->highq, bound_runq->highq); + + if (first_timeslice(processor)) { + has_higher = (pri > processor->current_pri); + } else { + has_higher = (pri >= processor->current_pri); + } + + if (has_higher) { + if (main_entryq->urgency > 0) + return (AST_PREEMPT | AST_URGENT); + + if (bound_runq->urgency > 0) + return (AST_PREEMPT | AST_URGENT); + + if (processor->active_thread && thread_eager_preemption(processor->active_thread)) + return (AST_PREEMPT | AST_URGENT); + + return AST_PREEMPT; + } + + return AST_NONE; +} + +static boolean_t +sched_multiq_processor_queue_has_priority( + processor_t processor, + int priority, + boolean_t gte) +{ + int qpri = MAX(multiq_main_entryq(processor)->highq, multiq_bound_runq(processor)->highq); + + if (gte) + return qpri >= priority; + else + return qpri > priority; +} + +static boolean_t +sched_multiq_should_current_thread_rechoose_processor(processor_t processor) +{ + return (processor->current_pri < BASEPRI_RTQUEUES && processor->processor_primary != processor); +} + +static int +sched_multiq_runq_count(processor_t processor) +{ + /* + * TODO: Decide whether to keep a count of runnable threads in the pset + * or just return something less than the true count. + * + * This needs to be fast, so no iterating the whole runq. + * + * Another possible decision is to remove this - with global runq + * it doesn't make much sense. + */ + return multiq_main_entryq(processor)->count + multiq_bound_runq(processor)->count; +} + +static uint64_t +sched_multiq_runq_stats_count_sum(processor_t processor) +{ + /* + * TODO: This one does need to go through all the runqueues, but it's only needed for + * the sched stats tool + */ + + uint64_t bound_sum = multiq_bound_runq(processor)->runq_stats.count_sum; + + if (processor->cpu_id == processor->processor_set->cpu_set_low) + return bound_sum + multiq_main_entryq(processor)->runq_stats.count_sum; + else + return bound_sum; +} + +static int +sched_multiq_processor_bound_count(processor_t processor) +{ + return multiq_bound_runq(processor)->count; +} + +static void +sched_multiq_processor_queue_shutdown(processor_t processor) +{ + processor_set_t pset = processor->processor_set; + entry_queue_t main_entryq = multiq_main_entryq(processor); + thread_t thread; + queue_head_t tqueue; + + /* We only need to migrate threads if this is the last active processor in the pset */ + if (pset->online_processor_count > 0) { + pset_unlock(pset); + return; + } + + queue_init(&tqueue); + + /* Note that we do not remove bound threads from the queues here */ + + while (main_entryq->count > 0) { + thread = sched_global_dequeue_thread(main_entryq); + enqueue_tail(&tqueue, (queue_entry_t)thread); + } + + pset_unlock(pset); + + while ((thread = (thread_t)(void*)dequeue_head(&tqueue)) != THREAD_NULL) { + thread_lock(thread); + + thread_setrun(thread, SCHED_TAILQ); + + thread_unlock(thread); + } +} + +/* + * Thread is locked + * + * This is why we can never read sched_pri unless we have the thread locked. + * Which we do in the enqueue and remove cases, but not the dequeue case. + */ +static boolean_t +sched_multiq_processor_queue_remove( + processor_t processor, + thread_t thread) +{ + boolean_t removed = FALSE; + + processor_set_t pset = processor->processor_set; + + pset_lock(pset); + + if (thread->runq != PROCESSOR_NULL) { + /* + * Thread is on a run queue and we have a lock on + * that run queue. + */ + + assert(thread->runq == processor); + + if (thread->bound_processor != PROCESSOR_NULL) { + assert(processor == thread->bound_processor); + run_queue_remove(multiq_bound_runq(processor), thread); + thread->runq = PROCESSOR_NULL; + } else { + sched_group_remove_thread(multiq_main_entryq(processor), + thread->sched_group, + thread); + } + + removed = TRUE; + } + + pset_unlock(pset); + + return removed; +} + +/* pset is locked, returned unlocked */ +static thread_t +sched_multiq_steal_thread(processor_set_t pset) +{ + pset_unlock(pset); + return (THREAD_NULL); +} + +/* + * Scan the global queue for candidate groups, and scan those groups for + * candidate threads. + * + * Returns TRUE if retry is needed. + */ +static boolean_t +group_scan(entry_queue_t runq) { + int count; + queue_t q; + sched_group_t group; + sched_entry_t entry; + + if ((count = runq->count) > 0) { + q = runq->queues + runq->highq; + while (count > 0) { + queue_iterate(q, entry, sched_entry_t, links) { + group = group_for_entry(entry); + if (group->runq.count > 0) { + if (runq_scan(&group->runq)) + return (TRUE); + } + count--; + } + q--; + } + } + + return (FALSE); +} + +static void +sched_multiq_thread_update_scan(void) +{ + boolean_t restart_needed = FALSE; + processor_t processor = processor_list; + processor_set_t pset; + thread_t thread; + spl_t s; + + /* + * We update the threads associated with each processor (bound and idle threads) + * and then update the threads in each pset runqueue. + */ + + do { + do { + pset = processor->processor_set; + + s = splsched(); + pset_lock(pset); + + restart_needed = runq_scan(multiq_bound_runq(processor)); + + pset_unlock(pset); + splx(s); + + if (restart_needed) + break; + + thread = processor->idle_thread; + if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) { + if (thread_update_add_thread(thread) == FALSE) { + restart_needed = TRUE; + break; + } + } + } while ((processor = processor->processor_list) != NULL); + + /* Ok, we now have a collection of candidates -- fix them. */ + thread_update_process_threads(); + + } while (restart_needed); + + pset = &pset0; + + do { + do { + s = splsched(); + pset_lock(pset); + + restart_needed = group_scan(&pset->pset_runq); + + pset_unlock(pset); + splx(s); + + if (restart_needed) + break; + } while ((pset = pset->pset_list) != NULL); + + /* Ok, we now have a collection of candidates -- fix them. */ + thread_update_process_threads(); + + } while (restart_needed); +} + + diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index 11bad560a..3a88e11fb 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -77,13 +77,16 @@ #include #include +#ifdef CONFIG_MACH_APPROXIMATE_TIME +#include +#endif + #include #include #include #include #include #include -#include #include #include #include @@ -91,6 +94,7 @@ #include #include #include +#include #include #include #include @@ -108,15 +112,19 @@ #include +#if defined(CONFIG_TELEMETRY) && defined(CONFIG_SCHED_TIMESHARE_CORE) +#include +#endif + struct rt_queue rt_runq; #define RT_RUNQ ((processor_t)-1) decl_simple_lock_data(static,rt_lock); -#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_FAIRSHARE_CORE) static struct fairshare_queue fs_runq; #define FS_RUNQ ((processor_t)-2) decl_simple_lock_data(static,fs_lock); -#endif +#endif /* CONFIG_SCHED_FAIRSHARE_CORE */ #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */ int default_preemption_rate = DEFAULT_PREEMPTION_RATE; @@ -138,7 +146,7 @@ uint64_t max_poll_computation; uint64_t max_unsafe_computation; uint64_t sched_safe_duration; -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) uint32_t std_quantum; uint32_t min_std_quantum; @@ -147,7 +155,7 @@ uint32_t bg_quantum; uint32_t std_quantum_us; uint32_t bg_quantum_us; -#endif /* CONFIG_SCHED_TRADITIONAL */ +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ uint32_t thread_depress_time; uint32_t default_timeshare_computation; @@ -156,10 +164,13 @@ uint32_t default_timeshare_constraint; uint32_t max_rt_quantum; uint32_t min_rt_quantum; -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) unsigned sched_tick; uint32_t sched_tick_interval; +#if defined(CONFIG_TELEMETRY) +uint32_t sched_telemetry_interval; +#endif /* CONFIG_TELEMETRY */ uint32_t sched_pri_shift = INT8_MAX; uint32_t sched_background_pri_shift = INT8_MAX; @@ -169,7 +180,9 @@ uint32_t sched_use_combined_fgbg_decay = 0; uint32_t sched_decay_usage_age_factor = 1; /* accelerate 5/8^n usage aging */ -static boolean_t sched_traditional_use_pset_runqueue = FALSE; +/* Allow foreground to decay past default to resolve inversions */ +#define DEFAULT_DECAY_BAND_LIMIT ((BASEPRI_FOREGROUND - BASEPRI_DEFAULT) + 2) +int sched_pri_decay_band_limit = DEFAULT_DECAY_BAND_LIMIT; /* Defaults for timer deadline profiling */ #define TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT 2000000 /* Timers with deadlines <= @@ -182,6 +195,12 @@ uint64_t timer_deadline_tracking_bin_2; thread_t sched_maintenance_thread; +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ + +#if defined(CONFIG_SCHED_TRADITIONAL) + +static boolean_t sched_traditional_use_pset_runqueue = FALSE; + __attribute__((always_inline)) static inline run_queue_t runq_for_processor(processor_t processor) { @@ -228,16 +247,17 @@ uint32_t sched_load_average, sched_mach_factor; /* Forwards */ -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) static void load_shift_init(void); static void preempt_pri_init(void); -#endif /* CONFIG_SCHED_TRADITIONAL */ +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ static thread_t thread_select( thread_t thread, - processor_t processor); + processor_t processor, + ast_t reason); #if CONFIG_SCHED_IDLE_IN_PLACE static thread_t thread_select_idle( @@ -251,7 +271,8 @@ thread_t processor_idle( ast_t csw_check_locked( processor_t processor, - processor_set_t pset); + processor_set_t pset, + ast_t check_reason); #if defined(CONFIG_SCHED_TRADITIONAL) @@ -285,8 +306,6 @@ processor_queue_remove( static boolean_t processor_queue_empty(processor_t processor); -static boolean_t priority_is_urgent(int priority); - static ast_t processor_csw_check(processor_t processor); static boolean_t processor_queue_has_priority(processor_t processor, @@ -302,16 +321,13 @@ static boolean_t sched_traditional_with_pset_runqueue_processor_queue_empty(proc static uint64_t sched_traditional_processor_runq_stats_count_sum(processor_t processor); static uint64_t sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor); -#endif - -#if defined(CONFIG_SCHED_TRADITIONAL) +static int sched_traditional_processor_bound_count(processor_t processor); -static void -sched_traditional_init(void); +#endif -static void -sched_traditional_timebase_init(void); + +#if defined(CONFIG_SCHED_TRADITIONAL) static void sched_traditional_processor_init(processor_t processor); @@ -334,22 +350,15 @@ static void sched_timer_deadline_tracking_init(void); #if defined(CONFIG_SCHED_TRADITIONAL) -static void -sched_traditional_maintenance_continue(void); - -static uint32_t -sched_traditional_initial_quantum_size(thread_t thread); static sched_mode_t sched_traditional_initial_thread_sched_mode(task_t parent_task); -static boolean_t -sched_traditional_supports_timeshare_mode(void); - static thread_t sched_traditional_choose_thread( - processor_t processor, - int priority); + processor_t processor, + int priority, + __unused ast_t reason); #endif @@ -360,13 +369,10 @@ extern int debug_task; #define TLOG(a, fmt, args...) do {} while (0) #endif -#if DEBUG -static +__assert_only static boolean_t thread_runnable( thread_t thread); -#endif /*DEBUG*/ - /* * State machine * @@ -393,84 +399,86 @@ boolean_t thread_runnable( * */ -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) int8_t sched_load_shifts[NRQS]; int sched_preempt_pri[NRQBM]; -#endif +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ #if defined(CONFIG_SCHED_TRADITIONAL) const struct sched_dispatch_table sched_traditional_dispatch = { - sched_traditional_init, - sched_traditional_timebase_init, - sched_traditional_processor_init, - sched_traditional_pset_init, - sched_traditional_maintenance_continue, - sched_traditional_choose_thread, - steal_thread, - compute_priority, - choose_processor, - processor_enqueue, - processor_queue_shutdown, - processor_queue_remove, - processor_queue_empty, - priority_is_urgent, - processor_csw_check, - processor_queue_has_priority, - sched_traditional_initial_quantum_size, - sched_traditional_initial_thread_sched_mode, - sched_traditional_supports_timeshare_mode, - can_update_priority, - update_priority, - lightweight_update_priority, - sched_traditional_quantum_expire, - should_current_thread_rechoose_processor, - sched_traditional_processor_runq_count, - sched_traditional_processor_runq_stats_count_sum, - sched_traditional_fairshare_init, - sched_traditional_fairshare_runq_count, - sched_traditional_fairshare_runq_stats_count_sum, - sched_traditional_fairshare_enqueue, - sched_traditional_fairshare_dequeue, - sched_traditional_fairshare_queue_remove, - TRUE /* direct_dispatch_to_idle_processors */ + .init = sched_traditional_init, + .timebase_init = sched_traditional_timebase_init, + .processor_init = sched_traditional_processor_init, + .pset_init = sched_traditional_pset_init, + .maintenance_continuation = sched_traditional_maintenance_continue, + .choose_thread = sched_traditional_choose_thread, + .steal_thread = steal_thread, + .compute_priority = compute_priority, + .choose_processor = choose_processor, + .processor_enqueue = processor_enqueue, + .processor_queue_shutdown = processor_queue_shutdown, + .processor_queue_remove = processor_queue_remove, + .processor_queue_empty = processor_queue_empty, + .priority_is_urgent = priority_is_urgent, + .processor_csw_check = processor_csw_check, + .processor_queue_has_priority = processor_queue_has_priority, + .initial_quantum_size = sched_traditional_initial_quantum_size, + .initial_thread_sched_mode = sched_traditional_initial_thread_sched_mode, + .can_update_priority = can_update_priority, + .update_priority = update_priority, + .lightweight_update_priority = lightweight_update_priority, + .quantum_expire = sched_traditional_quantum_expire, + .should_current_thread_rechoose_processor = should_current_thread_rechoose_processor, + .processor_runq_count = sched_traditional_processor_runq_count, + .processor_runq_stats_count_sum = sched_traditional_processor_runq_stats_count_sum, + .fairshare_init = sched_traditional_fairshare_init, + .fairshare_runq_count = sched_traditional_fairshare_runq_count, + .fairshare_runq_stats_count_sum = sched_traditional_fairshare_runq_stats_count_sum, + .fairshare_enqueue = sched_traditional_fairshare_enqueue, + .fairshare_dequeue = sched_traditional_fairshare_dequeue, + .fairshare_queue_remove = sched_traditional_fairshare_queue_remove, + .processor_bound_count = sched_traditional_processor_bound_count, + .thread_update_scan = thread_update_scan, + .direct_dispatch_to_idle_processors = TRUE, }; const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch = { - sched_traditional_with_pset_runqueue_init, - sched_traditional_timebase_init, - sched_traditional_processor_init, - sched_traditional_pset_init, - sched_traditional_maintenance_continue, - sched_traditional_choose_thread, - steal_thread, - compute_priority, - choose_processor, - processor_enqueue, - processor_queue_shutdown, - processor_queue_remove, - sched_traditional_with_pset_runqueue_processor_queue_empty, - priority_is_urgent, - processor_csw_check, - processor_queue_has_priority, - sched_traditional_initial_quantum_size, - sched_traditional_initial_thread_sched_mode, - sched_traditional_supports_timeshare_mode, - can_update_priority, - update_priority, - lightweight_update_priority, - sched_traditional_quantum_expire, - should_current_thread_rechoose_processor, - sched_traditional_processor_runq_count, - sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum, - sched_traditional_fairshare_init, - sched_traditional_fairshare_runq_count, - sched_traditional_fairshare_runq_stats_count_sum, - sched_traditional_fairshare_enqueue, - sched_traditional_fairshare_dequeue, - sched_traditional_fairshare_queue_remove, - FALSE /* direct_dispatch_to_idle_processors */ + .init = sched_traditional_with_pset_runqueue_init, + .timebase_init = sched_traditional_timebase_init, + .processor_init = sched_traditional_processor_init, + .pset_init = sched_traditional_pset_init, + .maintenance_continuation = sched_traditional_maintenance_continue, + .choose_thread = sched_traditional_choose_thread, + .steal_thread = steal_thread, + .compute_priority = compute_priority, + .choose_processor = choose_processor, + .processor_enqueue = processor_enqueue, + .processor_queue_shutdown = processor_queue_shutdown, + .processor_queue_remove = processor_queue_remove, + .processor_queue_empty = sched_traditional_with_pset_runqueue_processor_queue_empty, + .priority_is_urgent = priority_is_urgent, + .processor_csw_check = processor_csw_check, + .processor_queue_has_priority = processor_queue_has_priority, + .initial_quantum_size = sched_traditional_initial_quantum_size, + .initial_thread_sched_mode = sched_traditional_initial_thread_sched_mode, + .can_update_priority = can_update_priority, + .update_priority = update_priority, + .lightweight_update_priority = lightweight_update_priority, + .quantum_expire = sched_traditional_quantum_expire, + .should_current_thread_rechoose_processor = should_current_thread_rechoose_processor, + .processor_runq_count = sched_traditional_processor_runq_count, + .processor_runq_stats_count_sum = sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum, + .fairshare_init = sched_traditional_fairshare_init, + .fairshare_runq_count = sched_traditional_fairshare_runq_count, + .fairshare_runq_stats_count_sum = sched_traditional_fairshare_runq_stats_count_sum, + .fairshare_enqueue = sched_traditional_fairshare_enqueue, + .fairshare_dequeue = sched_traditional_fairshare_dequeue, + .fairshare_queue_remove = sched_traditional_fairshare_queue_remove, + .processor_bound_count = sched_traditional_processor_bound_count, + .thread_update_scan = thread_update_scan, + .direct_dispatch_to_idle_processors = FALSE, }; #endif @@ -516,6 +524,19 @@ sched_init(void) } } + + if (!PE_parse_boot_argn("sched_pri_decay_limit", &sched_pri_decay_band_limit, sizeof(sched_pri_decay_band_limit))) { + /* No boot-args, check in device tree */ + if (!PE_get_default("kern.sched_pri_decay_limit", + &sched_pri_decay_band_limit, + sizeof(sched_pri_decay_band_limit))) { + /* Allow decay all the way to normal limits */ + sched_pri_decay_band_limit = DEFAULT_DECAY_BAND_LIMIT; + } + } + + kprintf("Setting scheduler priority decay band limit %d\n", sched_pri_decay_band_limit); + if (strlen(sched_arg) > 0) { if (0) { /* Allow pattern below */ @@ -524,66 +545,67 @@ sched_init(void) sched_current_dispatch = &sched_traditional_dispatch; _sched_enum = sched_enum_traditional; strlcpy(sched_string, kSchedTraditionalString, sizeof(sched_string)); - kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalString); } else if (0 == strcmp(sched_arg, kSchedTraditionalWithPsetRunqueueString)) { sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch; _sched_enum = sched_enum_traditional_with_pset_runqueue; strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string)); - kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalWithPsetRunqueueString); #endif #if defined(CONFIG_SCHED_PROTO) } else if (0 == strcmp(sched_arg, kSchedProtoString)) { sched_current_dispatch = &sched_proto_dispatch; _sched_enum = sched_enum_proto; strlcpy(sched_string, kSchedProtoString, sizeof(sched_string)); - kprintf("Scheduler: Runtime selection of %s\n", kSchedProtoString); #endif #if defined(CONFIG_SCHED_GRRR) } else if (0 == strcmp(sched_arg, kSchedGRRRString)) { sched_current_dispatch = &sched_grrr_dispatch; _sched_enum = sched_enum_grrr; strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string)); - kprintf("Scheduler: Runtime selection of %s\n", kSchedGRRRString); #endif -#if defined(CONFIG_SCHED_FIXEDPRIORITY) - } else if (0 == strcmp(sched_arg, kSchedFixedPriorityString)) { - sched_current_dispatch = &sched_fixedpriority_dispatch; - _sched_enum = sched_enum_fixedpriority; - strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string)); - kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityString); - } else if (0 == strcmp(sched_arg, kSchedFixedPriorityWithPsetRunqueueString)) { - sched_current_dispatch = &sched_fixedpriority_with_pset_runqueue_dispatch; - _sched_enum = sched_enum_fixedpriority_with_pset_runqueue; - strlcpy(sched_string, kSchedFixedPriorityWithPsetRunqueueString, sizeof(sched_string)); - kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityWithPsetRunqueueString); +#if defined(CONFIG_SCHED_MULTIQ) + } else if (0 == strcmp(sched_arg, kSchedMultiQString)) { + sched_current_dispatch = &sched_multiq_dispatch; + _sched_enum = sched_enum_multiq; + strlcpy(sched_string, kSchedMultiQString, sizeof(sched_string)); + } else if (0 == strcmp(sched_arg, kSchedDualQString)) { + sched_current_dispatch = &sched_dualq_dispatch; + _sched_enum = sched_enum_dualq; + strlcpy(sched_string, kSchedDualQString, sizeof(sched_string)); #endif } else { +#if defined(CONFIG_SCHED_TRADITIONAL) + printf("Unrecognized scheduler algorithm: %s\n", sched_arg); + printf("Scheduler: Using instead: %s\n", kSchedTraditionalWithPsetRunqueueString); + + sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch; + _sched_enum = sched_enum_traditional_with_pset_runqueue; + strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string)); +#else panic("Unrecognized scheduler algorithm: %s", sched_arg); +#endif } + kprintf("Scheduler: Runtime selection of %s\n", sched_string); } else { -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_MULTIQ) + sched_current_dispatch = &sched_multiq_dispatch; + _sched_enum = sched_enum_multiq; + strlcpy(sched_string, kSchedMultiQString, sizeof(sched_string)); +#elif defined(CONFIG_SCHED_TRADITIONAL) sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch; _sched_enum = sched_enum_traditional_with_pset_runqueue; strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string)); - kprintf("Scheduler: Default of %s\n", kSchedTraditionalWithPsetRunqueueString); #elif defined(CONFIG_SCHED_PROTO) sched_current_dispatch = &sched_proto_dispatch; _sched_enum = sched_enum_proto; strlcpy(sched_string, kSchedProtoString, sizeof(sched_string)); - kprintf("Scheduler: Default of %s\n", kSchedProtoString); #elif defined(CONFIG_SCHED_GRRR) sched_current_dispatch = &sched_grrr_dispatch; _sched_enum = sched_enum_grrr; strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string)); - kprintf("Scheduler: Default of %s\n", kSchedGRRRString); -#elif defined(CONFIG_SCHED_FIXEDPRIORITY) - sched_current_dispatch = &sched_fixedpriority_dispatch; - _sched_enum = sched_enum_fixedpriority; - strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string)); - kprintf("Scheduler: Default of %s\n", kSchedFixedPriorityString); #else #error No default scheduler implementation #endif + kprintf("Scheduler: Default of %s\n", sched_string); } SCHED(init)(); @@ -608,9 +630,9 @@ sched_timebase_init(void) sched_realtime_timebase_init(); } -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) -static void +void sched_traditional_init(void) { /* @@ -634,7 +656,7 @@ sched_traditional_init(void) sched_tick = 0; } -static void +void sched_traditional_timebase_init(void) { uint64_t abstime; @@ -672,16 +694,27 @@ sched_traditional_timebase_init(void) abstime >>= 1; sched_fixed_shift = shift; - max_unsafe_computation = max_unsafe_quanta * std_quantum; - sched_safe_duration = 2 * max_unsafe_quanta * std_quantum; + max_unsafe_computation = ((uint64_t)max_unsafe_quanta) * std_quantum; + sched_safe_duration = 2 * ((uint64_t)max_unsafe_quanta) * std_quantum; - max_poll_computation = max_poll_quanta * std_quantum; + max_poll_computation = ((uint64_t)max_poll_quanta) * std_quantum; thread_depress_time = 1 * std_quantum; default_timeshare_computation = std_quantum / 2; default_timeshare_constraint = std_quantum; +#if defined(CONFIG_TELEMETRY) + /* interval for high frequency telemetry */ + clock_interval_to_absolutetime_interval(10, NSEC_PER_MSEC, &abstime); + assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); + sched_telemetry_interval = (uint32_t)abstime; +#endif } +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ + + +#if defined(CONFIG_SCHED_TRADITIONAL) + static void sched_traditional_processor_init(processor_t processor) { @@ -709,7 +742,7 @@ sched_traditional_with_pset_runqueue_init(void) #endif /* CONFIG_SCHED_TRADITIONAL */ -#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_FAIRSHARE_CORE) void sched_traditional_fairshare_init(void) { @@ -718,7 +751,7 @@ sched_traditional_fairshare_init(void) fs_runq.count = 0; queue_init(&fs_runq.queue); } -#endif +#endif /* CONFIG_SCHED_FAIRSHARE_CORE */ static void sched_realtime_init(void) @@ -747,7 +780,7 @@ sched_realtime_timebase_init(void) } -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) /* * Set up values for timeshare @@ -814,7 +847,7 @@ preempt_pri_init(void) setbit(i, p); } -#endif /* CONFIG_SCHED_TRADITIONAL */ +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ /* * Thread wait timer expiration. @@ -855,6 +888,7 @@ thread_unblock( { boolean_t result = FALSE; thread_t cthread = current_thread(); + uint32_t new_run_count; /* * Set wait_result. @@ -884,12 +918,12 @@ thread_unblock( /* * Update run counts. */ - sched_run_incr(); + new_run_count = sched_run_incr(thread); if (thread->sched_mode == TH_MODE_TIMESHARE) { - sched_share_incr(); + sched_share_incr(thread); - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_incr(); + if (thread->sched_flags & TH_SFLAG_THROTTLED) + sched_background_incr(thread); } } else { @@ -907,6 +941,7 @@ thread_unblock( assert((thread->state & TH_IDLE) == 0); #endif + new_run_count = sched_run_count; /* updated in thread_select_idle() */ result = TRUE; } @@ -914,13 +949,16 @@ thread_unblock( * Calculate deadline for real-time threads. */ if (thread->sched_mode == TH_MODE_REALTIME) { - thread->realtime.deadline = thread->realtime.constraint + mach_absolute_time(); + uint64_t ctime; + + ctime = mach_absolute_time(); + thread->realtime.deadline = thread->realtime.constraint + ctime; } /* * Clear old quantum, fail-safe computation, etc. */ - thread->current_quantum = 0; + thread->quantum_remaining = 0; thread->computation_metered = 0; thread->reason = AST_NONE; @@ -970,12 +1008,9 @@ thread_unblock( thread->callout_woke_thread = FALSE; } - /* Event should only be triggered if thread is not already running */ - if (result == FALSE) { - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, - (uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result, 0, 0); - } + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), thread->sched_pri, thread->wait_result, new_run_count, 0); DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info); @@ -1123,7 +1158,8 @@ assert_wait( register wait_queue_t wq; register int index; - assert(event != NO_EVENT); + if(event == NO_EVENT) + panic("assert_wait() called with NO_EVENT"); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, @@ -1147,7 +1183,9 @@ assert_wait_timeout( uint64_t deadline; spl_t s; - assert(event != NO_EVENT); + if(event == NO_EVENT) + panic("assert_wait_timeout() called with NO_EVENT"); + wqueue = &wait_queues[wait_hash(event)]; s = splsched(); @@ -1197,7 +1235,9 @@ assert_wait_timeout_with_leeway( clock_interval_to_absolutetime_interval(leeway, scale_factor, &slop); - assert(event != NO_EVENT); + if(event == NO_EVENT) + panic("assert_wait_timeout_with_leeway() called with NO_EVENT"); + wqueue = &wait_queues[wait_hash(event)]; s = splsched(); @@ -1267,7 +1307,9 @@ assert_wait_deadline_with_leeway( wait_queue_t wqueue; spl_t s; - assert(event != NO_EVENT); + if(event == NO_EVENT) + panic("assert_wait_deadline_with_leeway() called with NO_EVENT"); + wqueue = &wait_queues[wait_hash(event)]; s = splsched(); @@ -1290,85 +1332,6 @@ assert_wait_deadline_with_leeway( return (wresult); } -/* - * thread_sleep_fast_usimple_lock: - * - * Cause the current thread to wait until the specified event - * occurs. The specified simple_lock is unlocked before releasing - * the cpu and re-acquired as part of waking up. - * - * This is the simple lock sleep interface for components that use a - * faster version of simple_lock() than is provided by usimple_lock(). - */ -__private_extern__ wait_result_t -thread_sleep_fast_usimple_lock( - event_t event, - simple_lock_t lock, - wait_interrupt_t interruptible) -{ - wait_result_t res; - - res = assert_wait(event, interruptible); - if (res == THREAD_WAITING) { - simple_unlock(lock); - res = thread_block(THREAD_CONTINUE_NULL); - simple_lock(lock); - } - return res; -} - - -/* - * thread_sleep_usimple_lock: - * - * Cause the current thread to wait until the specified event - * occurs. The specified usimple_lock is unlocked before releasing - * the cpu and re-acquired as part of waking up. - * - * This is the simple lock sleep interface for components where - * simple_lock() is defined in terms of usimple_lock(). - */ -wait_result_t -thread_sleep_usimple_lock( - event_t event, - usimple_lock_t lock, - wait_interrupt_t interruptible) -{ - wait_result_t res; - - res = assert_wait(event, interruptible); - if (res == THREAD_WAITING) { - usimple_unlock(lock); - res = thread_block(THREAD_CONTINUE_NULL); - usimple_lock(lock); - } - return res; -} - -/* - * thread_sleep_lock_write: - * - * Cause the current thread to wait until the specified event - * occurs. The specified (write) lock is unlocked before releasing - * the cpu. The (write) lock will be re-acquired before returning. - */ -wait_result_t -thread_sleep_lock_write( - event_t event, - lock_t *lock, - wait_interrupt_t interruptible) -{ - wait_result_t res; - - res = assert_wait(event, interruptible); - if (res == THREAD_WAITING) { - lock_write_done(lock); - res = thread_block(THREAD_CONTINUE_NULL); - lock_write(lock); - } - return res; -} - /* * thread_isoncpu: * @@ -1378,7 +1341,7 @@ thread_sleep_lock_write( * thread state to be serialized in the thread PCB. * * Thread locked, returns the same way. While locked, fields - * like "state" and "runq" cannot change. + * like "state" cannot change. "runq" can change only from set to unset. */ static inline boolean_t thread_isoncpu(thread_t thread) @@ -1388,6 +1351,7 @@ thread_isoncpu(thread_t thread) return (FALSE); /* Waiting on a runqueue, not currently running */ + /* TODO: This is invalid - it can get dequeued without thread lock, but not context switched. */ if (thread->runq != PROCESSOR_NULL) return (FALSE); @@ -1701,6 +1665,9 @@ thread_wakeup_prim_internal( register wait_queue_t wq; register int index; + if(event == NO_EVENT) + panic("thread_wakeup_prim() called with NO_EVENT"); + index = wait_hash(event); wq = &wait_queues[index]; if (one_thread) @@ -1713,6 +1680,7 @@ thread_wakeup_prim_internal( * thread_bind: * * Force the current thread to execute on the specified processor. + * Takes effect after the next thread_block(). * * Returns the previous binding. PROCESSOR_NULL means * not bound. @@ -1730,6 +1698,9 @@ thread_bind( s = splsched(); thread_lock(self); + /* */ + assert(self->sched_pri < BASEPRI_RTQUEUES); + prev = self->bound_processor; self->bound_processor = processor; @@ -1739,6 +1710,66 @@ thread_bind( return (prev); } +/* Invoked prior to idle entry to determine if, on SMT capable processors, an SMT + * rebalancing opportunity exists when a core is (instantaneously) idle, but + * other SMT-capable cores may be over-committed. TODO: some possible negatives: + * IPI thrash if this core does not remain idle following the load balancing ASTs + * Idle "thrash", when IPI issue is followed by idle entry/core power down + * followed by a wakeup shortly thereafter. + */ + +/* Invoked with pset locked, returns with pset unlocked */ +#if (DEVELOPMENT || DEBUG) +int sched_smt_balance = 1; +#endif + +static void +sched_SMT_balance(processor_t cprocessor, processor_set_t cpset) { + processor_t ast_processor = NULL; + +#if (DEVELOPMENT || DEBUG) + if (__improbable(sched_smt_balance == 0)) + goto smt_balance_exit; +#endif + + assert(cprocessor == current_processor()); + if (cprocessor->is_SMT == FALSE) + goto smt_balance_exit; + + processor_t sib_processor = cprocessor->processor_secondary ? cprocessor->processor_secondary : cprocessor->processor_primary; + + /* Determine if both this processor and its sibling are idle, + * indicating an SMT rebalancing opportunity. + */ + if (sib_processor->state != PROCESSOR_IDLE) + goto smt_balance_exit; + + processor_t sprocessor; + + sprocessor = (processor_t)queue_first(&cpset->active_queue); + + while (!queue_end(&cpset->active_queue, (queue_entry_t)sprocessor)) { + if ((sprocessor->state == PROCESSOR_RUNNING) && + (sprocessor->processor_primary != sprocessor) && + (sprocessor->processor_primary->state == PROCESSOR_RUNNING) && + (sprocessor->current_pri < BASEPRI_RTQUEUES) && + ((cpset->pending_AST_cpu_mask & (1U << sprocessor->cpu_id)) == 0)) { + assert(sprocessor != cprocessor); + ast_processor = sprocessor; + break; + } + sprocessor = (processor_t)queue_next((queue_entry_t)sprocessor); + } + +smt_balance_exit: + pset_unlock(cpset); + + if (ast_processor) { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_SMT_BALANCE), ast_processor->cpu_id, ast_processor->state, ast_processor->processor_primary->state, 0, 0); + cause_ast_check(ast_processor); + } +} + /* * thread_select: * @@ -1749,11 +1780,11 @@ thread_bind( static thread_t thread_select( thread_t thread, - processor_t processor) + processor_t processor, + ast_t reason) { processor_set_t pset = processor->processor_set; thread_t new_thread = THREAD_NULL; - boolean_t inactive_state; assert(processor == current_processor()); @@ -1766,13 +1797,13 @@ thread_select( processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; pset_lock(pset); - assert(pset->low_count); - assert(pset->low_pri); + assert(processor->state != PROCESSOR_OFF_LINE); - if (processor->processor_meta != PROCESSOR_META_NULL && processor->processor_meta->primary != processor) { + if (processor->processor_primary != processor) { /* * Should this secondary SMT processor attempt to find work? For pset runqueue systems, * we should look for work only under the same conditions that choose_processor() @@ -1781,13 +1812,11 @@ thread_select( * An exception is that bound threads are dispatched to a processor without going through * choose_processor(), so in those cases we should continue trying to dequeue work. */ - if (!processor->runq_bound_count && !queue_empty(&pset->idle_queue) && !rt_runq.count) { + if (!SCHED(processor_bound_count)(processor) && !queue_empty(&pset->idle_queue) && !rt_runq.count) { goto idle; } } - inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_processor_is_inactive(processor); - simple_lock(&rt_lock); /* @@ -1796,28 +1825,21 @@ thread_select( * bound to a different processor, nor be in the wrong * processor set. */ - if ( ((thread->state & ~TH_SUSP) == TH_RUN) && - (thread->sched_pri >= BASEPRI_RTQUEUES || - processor->processor_meta == PROCESSOR_META_NULL || - processor->processor_meta->primary == processor) && - (thread->bound_processor == PROCESSOR_NULL || - thread->bound_processor == processor) && - (thread->affinity_set == AFFINITY_SET_NULL || - thread->affinity_set->aset_pset == pset)) { - if (thread->sched_pri >= BASEPRI_RTQUEUES && - first_timeslice(processor)) { + if (((thread->state & ~TH_SUSP) == TH_RUN) && + (thread->sched_pri >= BASEPRI_RTQUEUES || processor->processor_primary == processor) && + (thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == processor) && + (thread->affinity_set == AFFINITY_SET_NULL || thread->affinity_set->aset_pset == pset)) { + if (thread->sched_pri >= BASEPRI_RTQUEUES && first_timeslice(processor)) { if (rt_runq.count > 0) { - register queue_t q; + thread_t next_rt; - q = &rt_runq.queue; - if (((thread_t)q->next)->realtime.deadline < - processor->deadline) { - if ((((thread_t)q->next)->bound_processor == PROCESSOR_NULL) || (((thread_t)q->next)->bound_processor == processor)) { - thread = (thread_t)dequeue_head(q); + next_rt = (thread_t)queue_first(&rt_runq.queue); + if (next_rt->realtime.deadline < processor->deadline && + (next_rt->bound_processor == PROCESSOR_NULL || next_rt->bound_processor == processor)) { + thread = (thread_t)dequeue_head(&rt_runq.queue); thread->runq = PROCESSOR_NULL; SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); rt_runq.count--; - } } } @@ -1830,16 +1852,11 @@ thread_select( return (thread); } - if (!inactive_state && (thread->sched_mode != TH_MODE_FAIRSHARE || SCHED(fairshare_runq_count)() == 0) && (rt_runq.count == 0 || BASEPRI_RTQUEUES < thread->sched_pri) && - (new_thread = SCHED(choose_thread)(processor, thread->sched_mode == TH_MODE_FAIRSHARE ? MINPRI : thread->sched_pri)) == THREAD_NULL) { + if ((thread->sched_mode != TH_MODE_FAIRSHARE || SCHED(fairshare_runq_count)() == 0) && (rt_runq.count == 0 || BASEPRI_RTQUEUES < thread->sched_pri) && (new_thread = SCHED(choose_thread)(processor, thread->sched_mode == TH_MODE_FAIRSHARE ? MINPRI : thread->sched_pri, reason)) == THREAD_NULL) { simple_unlock(&rt_lock); - /* I am the highest priority runnable (non-idle) thread */ - - pset_pri_hint(pset, processor, processor->current_pri); - - pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor)); + /* This thread is still the highest priority runnable (non-idle) thread */ processor->deadline = UINT64_MAX; @@ -1851,15 +1868,9 @@ thread_select( if (new_thread != THREAD_NULL || (SCHED(processor_queue_has_priority)(processor, rt_runq.count == 0 ? IDLEPRI : BASEPRI_RTQUEUES, TRUE) && - (new_thread = SCHED(choose_thread)(processor, MINPRI)) != THREAD_NULL)) { + (new_thread = SCHED(choose_thread)(processor, MINPRI, reason)) != THREAD_NULL)) { simple_unlock(&rt_lock); - if (!inactive_state) { - pset_pri_hint(pset, processor, new_thread->sched_pri); - - pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor)); - } - processor->deadline = UINT64_MAX; pset_unlock(pset); @@ -1867,9 +1878,11 @@ thread_select( } if (rt_runq.count > 0) { - thread = (thread_t)dequeue_head(&rt_runq.queue); + thread_t next_rt = (thread_t)queue_first(&rt_runq.queue); + + if (__probable((next_rt->bound_processor == NULL || (next_rt->bound_processor == processor)))) { + thread = (thread_t)dequeue_head(&rt_runq.queue); - if (__probable((thread->bound_processor == NULL || (thread->bound_processor == processor)))) { thread->runq = PROCESSOR_NULL; SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); rt_runq.count--; @@ -1880,8 +1893,6 @@ thread_select( pset_unlock(pset); return (thread); - } else { - enqueue_head(&rt_runq.queue, (queue_entry_t)thread); } } @@ -1900,24 +1911,6 @@ thread_select( processor->deadline = UINT64_MAX; - /* - * Set processor inactive based on - * indication from the platform code. - */ - if (inactive_state) { - if (processor->state == PROCESSOR_RUNNING) - remqueue((queue_entry_t)processor); - else - if (processor->state == PROCESSOR_IDLE) - remqueue((queue_entry_t)processor); - - processor->state = PROCESSOR_INACTIVE; - - pset_unlock(pset); - - return (processor->idle_thread); - } - /* * No runnable threads, attempt to steal * from other processors. @@ -1945,24 +1938,24 @@ thread_select( remqueue((queue_entry_t)processor); processor->state = PROCESSOR_IDLE; - if (processor->processor_meta == PROCESSOR_META_NULL || processor->processor_meta->primary == processor) { + if (processor->processor_primary == processor) { enqueue_head(&pset->idle_queue, (queue_entry_t)processor); - pset_pri_init_hint(pset, processor); - pset_count_init_hint(pset, processor); } else { - enqueue_head(&processor->processor_meta->idle_queue, (queue_entry_t)processor); - pset_unlock(pset); - return (processor->idle_thread); + enqueue_head(&pset->idle_secondary_queue, (queue_entry_t)processor); } } - pset_unlock(pset); + /* Invoked with pset locked, returns with pset unlocked */ + sched_SMT_balance(processor, pset); #if CONFIG_SCHED_IDLE_IN_PLACE /* * Choose idle thread if fast idle is not possible. */ + if (processor->processor_primary != processor) + return (processor->idle_thread); + if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active || thread->sched_pri >= BASEPRI_RTQUEUES) return (processor->idle_thread); @@ -2007,16 +2000,17 @@ thread_select_idle( int urgency; if (thread->sched_mode == TH_MODE_TIMESHARE) { - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_decr(); + if (thread->sched_flags & TH_SFLAG_THROTTLED) + sched_background_decr(thread); - sched_share_decr(); + sched_share_decr(thread); } - sched_run_decr(); + sched_run_decr(thread); thread->state |= TH_IDLE; processor->current_pri = IDLEPRI; processor->current_thmode = TH_MODE_NONE; + processor->current_sfi_class = SFI_CLASS_KERNEL; /* Reload precise timing global policy to thread-local policy */ thread->precise_user_kernel_time = use_precise_user_kernel_time(thread); @@ -2027,6 +2021,11 @@ thread_select_idle( * Switch execution timing to processor idle thread. */ processor->last_dispatch = mach_absolute_time(); + +#ifdef CONFIG_MACH_APPROXIMATE_TIME + commpage_update_mach_approximate_time(processor->last_dispatch); +#endif + thread->last_run_time = processor->last_dispatch; thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer); PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer; @@ -2064,10 +2063,8 @@ thread_select_idle( PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; thread_quantum_init(thread); - thread->last_quantum_refill_time = processor->last_dispatch; - - processor->quantum_end = processor->last_dispatch + thread->current_quantum; - timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_SYS_CRITICAL); + processor->quantum_end = processor->last_dispatch + thread->quantum_remaining; + timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); processor->timeslice = 1; thread->computation_epoch = processor->last_dispatch; @@ -2086,12 +2083,12 @@ thread_select_idle( thread_tell_urgency(urgency, arg1, arg2, new_thread); - sched_run_incr(); + sched_run_incr(thread); if (thread->sched_mode == TH_MODE_TIMESHARE) { - sched_share_incr(); + sched_share_incr(thread); - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_incr(); + if (thread->sched_flags & TH_SFLAG_THROTTLED) + sched_background_incr(thread); } return (new_thread); @@ -2101,12 +2098,13 @@ thread_select_idle( #if defined(CONFIG_SCHED_TRADITIONAL) static thread_t sched_traditional_choose_thread( - processor_t processor, - int priority) + processor_t processor, + int priority, + __unused ast_t reason) { thread_t thread; - thread = choose_thread(processor, runq_for_processor(processor), priority); + thread = choose_thread_from_runq(processor, runq_for_processor(processor), priority); if (thread != THREAD_NULL) { runq_consider_decr_bound_count(processor, thread); } @@ -2116,10 +2114,10 @@ sched_traditional_choose_thread( #endif /* defined(CONFIG_SCHED_TRADITIONAL) */ -#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_TRADITIONAL) /* - * choose_thread: + * choose_thread_from_runq: * * Locate a thread to execute from the processor run queue * and return it. Only choose a thread with greater or equal @@ -2129,7 +2127,7 @@ sched_traditional_choose_thread( * on failure. */ thread_t -choose_thread( +choose_thread_from_runq( processor_t processor, run_queue_t rq, int priority) @@ -2170,7 +2168,7 @@ choose_thread( return (THREAD_NULL); } -#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY) */ +#endif /* defined(CONFIG_SCHED_TRADITIONAL) */ /* * Perform a context switch and start executing the new thread. @@ -2180,32 +2178,6 @@ choose_thread( * Called at splsched. */ -#define funnel_release_check(thread, debug) \ -MACRO_BEGIN \ - if ((thread)->funnel_state & TH_FN_OWNED) { \ - (thread)->funnel_state = TH_FN_REFUNNEL; \ - KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \ - (thread)->funnel_lock, (debug), 0, 0, 0); \ - funnel_unlock((thread)->funnel_lock); \ - } \ -MACRO_END - -#define funnel_refunnel_check(thread, debug) \ -MACRO_BEGIN \ - if ((thread)->funnel_state & TH_FN_REFUNNEL) { \ - kern_return_t result = (thread)->wait_result; \ - \ - (thread)->funnel_state = 0; \ - KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \ - (thread)->funnel_lock, (debug), 0, 0, 0); \ - funnel_lock((thread)->funnel_lock); \ - KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \ - (thread)->funnel_lock, (debug), 0, 0, 0); \ - (thread)->funnel_state = TH_FN_OWNED; \ - (thread)->wait_result = result; \ - } \ -MACRO_END - /* * thread_invoke * @@ -2224,6 +2196,10 @@ thread_invoke( processor_t processor; uint64_t ctime = mach_absolute_time(); +#ifdef CONFIG_MACH_APPROXIMATE_TIME + commpage_update_mach_approximate_time(ctime); +#endif + if (__improbable(get_preemption_level() != 0)) { int pl = get_preemption_level(); panic("thread_invoke: preemption_level %d, possible cause: %s", @@ -2232,10 +2208,11 @@ thread_invoke( } assert(self == current_thread()); + assert(self->runq == PROCESSOR_NULL); -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) sched_traditional_consider_maintenance(ctime); -#endif /* CONFIG_SCHED_TRADITIONAL */ +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ /* * Mark thread interruptible. @@ -2243,13 +2220,16 @@ thread_invoke( thread_lock(thread); thread->state &= ~TH_UNINT; -#if DEBUG assert(thread_runnable(thread)); -#endif + assert(thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == current_processor()); + assert(thread->runq == PROCESSOR_NULL); /* Reload precise timing global policy to thread-local policy */ thread->precise_user_kernel_time = use_precise_user_kernel_time(thread); + /* Update SFI class based on other factors */ + thread->sfi_class = sfi_thread_classify(thread); + /* * Allow time constraint threads to hang onto * a stack. @@ -2277,6 +2257,7 @@ thread_invoke( processor->active_thread = thread; processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; if (thread->last_processor != processor && thread->last_processor != NULL) { if (thread->last_processor->processor_set != processor->processor_set) thread->ps_switch++; @@ -2328,7 +2309,6 @@ thread_invoke( counter(c_thread_invoke_hits++); - funnel_refunnel_check(thread, 2); (void) spllo(); assert(continuation); @@ -2347,7 +2327,6 @@ thread_invoke( self->continuation = self->parameter = NULL; - funnel_refunnel_check(self, 3); (void) spllo(); call_continuation(continuation, parameter, self->wait_result); @@ -2387,6 +2366,7 @@ need_stack: processor->active_thread = thread; processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; if (thread->last_processor != processor && thread->last_processor != NULL) { if (thread->last_processor->processor_set != processor->processor_set) thread->ps_switch++; @@ -2417,7 +2397,6 @@ need_stack: PROCESSOR_DATA(processor, current_state)); } - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); @@ -2451,7 +2430,6 @@ need_stack: if (continuation) { self->continuation = self->parameter = NULL; - funnel_refunnel_check(self, 3); (void) spllo(); call_continuation(continuation, parameter, self->wait_result); @@ -2497,7 +2475,7 @@ thread_dispatch( remainder = processor->quantum_end - processor->last_dispatch; - consumed = thread->current_quantum - remainder; + consumed = thread->quantum_remaining - remainder; if ((thread->reason & AST_LEDGER) == 0) { /* @@ -2508,6 +2486,15 @@ thread_dispatch( task_ledgers.cpu_time, consumed); ledger_credit(thread->t_threadledger, thread_ledgers.cpu_time, consumed); +#ifdef CONFIG_BANK + if (thread->t_bankledger) { + ledger_credit(thread->t_bankledger, + bank_ledgers.cpu_time, + (consumed - thread->t_deduct_bank_ledger_time)); + + } + thread->t_deduct_bank_ledger_time =0; +#endif } wake_lock(thread); @@ -2518,18 +2505,17 @@ thread_dispatch( */ if (first_timeslice(processor) && processor->quantum_end > processor->last_dispatch) - thread->current_quantum = (uint32_t)remainder; + thread->quantum_remaining = (uint32_t)remainder; else - thread->current_quantum = 0; + thread->quantum_remaining = 0; if (thread->sched_mode == TH_MODE_REALTIME) { /* * Cancel the deadline if the thread has * consumed the entire quantum. */ - if (thread->current_quantum == 0) { + if (thread->quantum_remaining == 0) { thread->realtime.deadline = UINT64_MAX; - thread->reason |= AST_QUANTUM; } } else { #if defined(CONFIG_SCHED_TRADITIONAL) @@ -2538,9 +2524,9 @@ thread_dispatch( * remaining quantum as an expired quantum * but include what's left next time. */ - if (thread->current_quantum < min_std_quantum) { + if (thread->quantum_remaining < min_std_quantum) { thread->reason |= AST_QUANTUM; - thread->current_quantum += SCHED(initial_quantum_size)(thread); + thread->quantum_remaining += SCHED(initial_quantum_size)(thread); } #endif } @@ -2550,9 +2536,23 @@ thread_dispatch( * take the remainder of the quantum. */ if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) { - self->current_quantum = thread->current_quantum; + self->quantum_remaining = thread->quantum_remaining; thread->reason |= AST_QUANTUM; - thread->current_quantum = 0; + thread->quantum_remaining = 0; + } else { +#if defined(CONFIG_SCHED_MULTIQ) + if (sched_groups_enabled && thread->sched_group == self->sched_group) { + /* TODO: Remove tracepoint */ + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED, MACH_QUANTUM_HANDOFF) | DBG_FUNC_NONE, + self->reason, (uintptr_t)thread_tid(thread), + self->quantum_remaining, thread->quantum_remaining, 0); + + self->quantum_remaining = thread->quantum_remaining; + thread->quantum_remaining = 0; + /* TODO: Should we set AST_QUANTUM here? */ + } +#endif /* defined(CONFIG_SCHED_MULTIQ) */ } thread->computation_metered += (processor->last_dispatch - thread->computation_epoch); @@ -2593,6 +2593,10 @@ thread_dispatch( thread->reason = AST_NONE; + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), thread->reason, thread->state, sched_run_count, 0); + if (thread->wake_active) { thread->wake_active = FALSE; thread_unlock(thread); @@ -2609,6 +2613,7 @@ thread_dispatch( * Waiting. */ boolean_t should_terminate = FALSE; + uint32_t new_run_count; /* Only the first call to thread_dispatch * after explicit termination should add @@ -2623,12 +2628,22 @@ thread_dispatch( thread->chosen_processor = PROCESSOR_NULL; if (thread->sched_mode == TH_MODE_TIMESHARE) { - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_decr(); + if (thread->sched_flags & TH_SFLAG_THROTTLED) + sched_background_decr(thread); + + sched_share_decr(thread); + } + new_run_count = sched_run_decr(thread); - sched_share_decr(); + if ((thread->state & (TH_WAIT | TH_TERMINATE)) == TH_WAIT) { + if (thread->reason & AST_SFI) { + thread->wait_sfi_begin_time = processor->last_dispatch; + } } - sched_run_decr(); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), thread->reason, thread->state, new_run_count, 0); (*thread->sched_call)(SCHED_CALL_BLOCK, thread); @@ -2652,6 +2667,15 @@ thread_dispatch( if (!(self->state & TH_IDLE)) { uint64_t arg1, arg2; int urgency; + ast_t new_ast; + + thread_lock(self); + new_ast = sfi_thread_needs_ast(self, NULL); + thread_unlock(self); + + if (new_ast != AST_NONE) { + ast_on(new_ast); + } urgency = thread_get_urgency(self, &arg1, &arg2); @@ -2660,16 +2684,15 @@ thread_dispatch( /* * Get a new quantum if none remaining. */ - if (self->current_quantum == 0) { + if (self->quantum_remaining == 0) { thread_quantum_init(self); - self->last_quantum_refill_time = processor->last_dispatch; } /* * Set up quantum timer and timeslice. */ - processor->quantum_end = (processor->last_dispatch + self->current_quantum); - timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_SYS_CRITICAL); + processor->quantum_end = processor->last_dispatch + self->quantum_remaining; + timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); processor->timeslice = 1; @@ -2683,11 +2706,6 @@ thread_dispatch( } } -#include - -uint32_t kdebug_thread_block = 0; - - /* * thread_block_reason: * @@ -2716,9 +2734,6 @@ thread_block_reason( s = splsched(); - if (!(reason & AST_PREEMPT)) - funnel_release_check(self, 2); - processor = current_processor(); /* If we're explicitly yielding, force a subsequent quantum */ @@ -2731,7 +2746,7 @@ thread_block_reason( self->continuation = continuation; self->parameter = parameter; - if (__improbable(kdebug_thread_block && kdebug_enable && self->state != TH_RUN)) { + if (self->state & ~(TH_RUN | TH_IDLE)) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED,MACH_BLOCK), reason, VM_KERNEL_UNSLIDE(continuation), 0, 0, 0); @@ -2739,11 +2754,10 @@ thread_block_reason( do { thread_lock(self); - new_thread = thread_select(self, processor); + new_thread = thread_select(self, processor, reason); thread_unlock(self); } while (!thread_invoke(self, new_thread, reason)); - funnel_refunnel_check(self, 5); splx(s); return (self->wait_result); @@ -2788,8 +2802,6 @@ thread_run( { ast_t handoff = AST_HANDOFF; - funnel_release_check(self, 3); - self->continuation = continuation; self->parameter = parameter; @@ -2797,13 +2809,11 @@ thread_run( processor_t processor = current_processor(); thread_lock(self); - new_thread = thread_select(self, processor); + new_thread = thread_select(self, processor, AST_NONE); thread_unlock(self); handoff = AST_NONE; } - funnel_refunnel_check(self, 6); - return (self->wait_result); } @@ -2830,8 +2840,6 @@ thread_continue( self->continuation = self->parameter = NULL; - funnel_refunnel_check(self, 4); - if (thread != THREAD_NULL) (void)spllo(); @@ -2844,22 +2852,27 @@ void thread_quantum_init(thread_t thread) { if (thread->sched_mode == TH_MODE_REALTIME) { - thread->current_quantum = thread->realtime.computation; + thread->quantum_remaining = thread->realtime.computation; } else { - thread->current_quantum = SCHED(initial_quantum_size)(thread); + thread->quantum_remaining = SCHED(initial_quantum_size)(thread); } } -#if defined(CONFIG_SCHED_TRADITIONAL) -static uint32_t +#if defined(CONFIG_SCHED_TIMESHARE_CORE) + +uint32_t sched_traditional_initial_quantum_size(thread_t thread) { - if ((thread == THREAD_NULL) || thread->priority > MAXPRI_THROTTLE) + if ((thread == THREAD_NULL) || !(thread->sched_flags & TH_SFLAG_THROTTLED)) return std_quantum; else return bg_quantum; } +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ + +#if defined(CONFIG_SCHED_TRADITIONAL) + static sched_mode_t sched_traditional_initial_thread_sched_mode(task_t parent_task) { @@ -2869,12 +2882,6 @@ sched_traditional_initial_thread_sched_mode(task_t parent_task) return TH_MODE_TIMESHARE; } -static boolean_t -sched_traditional_supports_timeshare_mode(void) -{ - return TRUE; -} - #endif /* CONFIG_SCHED_TRADITIONAL */ /* @@ -2897,7 +2904,7 @@ run_queue_init( queue_init(&rq->queues[i]); } -#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_FAIRSHARE_CORE) int sched_traditional_fairshare_runq_count(void) { @@ -2976,7 +2983,7 @@ sched_traditional_fairshare_queue_remove(thread_t thread) } } -#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) */ +#endif /* CONFIG_SCHED_FAIRSHARE_CORE */ /* * run_queue_dequeue: @@ -3042,13 +3049,12 @@ run_queue_enqueue( rq->highq = thread->sched_pri; result = TRUE; } - } - else + } else { if (options & SCHED_TAILQ) enqueue_tail(queue, (queue_entry_t)thread); else enqueue_head(queue, (queue_entry_t)thread); - + } if (SCHED(priority_is_urgent)(thread->sched_pri)) rq->urgency++; SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count); @@ -3107,10 +3113,11 @@ fairshare_setrun( SCHED(fairshare_enqueue)(thread); + pset_unlock(pset); + if (processor != current_processor()) machine_signal_idle(processor); - pset_unlock(pset); } @@ -3177,8 +3184,13 @@ realtime_setrun( processor_set_t pset = processor->processor_set; ast_t preempt; + boolean_t do_signal_idle = FALSE, do_cause_ast = FALSE; + thread->chosen_processor = processor; + /* */ + assert(thread->bound_processor == PROCESSOR_NULL); + /* * Dispatch directly onto idle processor. */ @@ -3190,6 +3202,7 @@ realtime_setrun( processor->next_thread = thread; processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; processor->deadline = thread->realtime.deadline; processor->state = PROCESSOR_DISPATCHING; @@ -3197,11 +3210,14 @@ realtime_setrun( if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { /* cleared on exit from main processor_idle() loop */ pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); - machine_signal_idle(processor); + do_signal_idle = TRUE; } } - pset_unlock(pset); + + if (do_signal_idle) { + machine_signal_idle(processor); + } return; } @@ -3221,6 +3237,7 @@ realtime_setrun( processor->next_thread = THREAD_NULL; processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; processor->deadline = thread->realtime.deadline; processor->state = PROCESSOR_DISPATCHING; if (processor == current_processor()) { @@ -3229,13 +3246,14 @@ realtime_setrun( if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { /* cleared on exit from main processor_idle() loop */ pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); - machine_signal_idle(processor); + do_signal_idle = TRUE; } - } + } } else if (processor->state == PROCESSOR_DISPATCHING) { if ((processor->next_thread == THREAD_NULL) && ((processor->current_pri < thread->sched_pri) || (processor->deadline > thread->realtime.deadline))) { processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; processor->deadline = thread->realtime.deadline; } } else { @@ -3245,7 +3263,7 @@ realtime_setrun( if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { /* cleared after IPI causes csw_check() to be called */ pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); - cause_ast_check(processor); + do_cause_ast = TRUE; } } } @@ -3254,16 +3272,26 @@ realtime_setrun( } pset_unlock(pset); + + if (do_signal_idle) { + machine_signal_idle(processor); + } else if (do_cause_ast) { + cause_ast_check(processor); + } } -#if defined(CONFIG_SCHED_TRADITIONAL) -static boolean_t +#if defined(CONFIG_SCHED_TIMESHARE_CORE) + +boolean_t priority_is_urgent(int priority) { return testbit(priority, sched_preempt_pri) ? TRUE : FALSE; } +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ + +#if defined(CONFIG_SCHED_TRADITIONAL) /* * processor_enqueue: * @@ -3313,6 +3341,8 @@ processor_setrun( ast_t preempt; enum { eExitIdle, eInterruptRunning, eDoNothing } ipi_action = eDoNothing; + boolean_t do_signal_idle = FALSE, do_cause_ast = FALSE; + thread->chosen_processor = processor; /* @@ -3327,16 +3357,21 @@ processor_setrun( processor->next_thread = thread; processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; processor->deadline = UINT64_MAX; processor->state = PROCESSOR_DISPATCHING; if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { /* cleared on exit from main processor_idle() loop */ pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); - machine_signal_idle(processor); + do_signal_idle = TRUE; } pset_unlock(pset); + if (do_signal_idle) { + machine_signal_idle(processor); + } + return; } @@ -3365,6 +3400,7 @@ processor_setrun( processor->next_thread = THREAD_NULL; processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; processor->deadline = UINT64_MAX; processor->state = PROCESSOR_DISPATCHING; @@ -3373,6 +3409,7 @@ processor_setrun( if ((processor->next_thread == THREAD_NULL) && (processor->current_pri < thread->sched_pri)) { processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; processor->deadline = UINT64_MAX; } } else if ( (processor->state == PROCESSOR_RUNNING || @@ -3396,6 +3433,7 @@ processor_setrun( processor->next_thread = THREAD_NULL; processor->current_pri = thread->sched_pri; processor->current_thmode = thread->sched_mode; + processor->current_sfi_class = thread->sfi_class; processor->deadline = UINT64_MAX; processor->state = PROCESSOR_DISPATCHING; @@ -3408,31 +3446,37 @@ processor_setrun( break; case eExitIdle: if (processor == current_processor()) { - if (csw_check_locked(processor, pset) != AST_NONE) + if (csw_check_locked(processor, pset, AST_NONE) != AST_NONE) ast_on(preempt); } else { if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { /* cleared on exit from main processor_idle() loop */ pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); - machine_signal_idle(processor); + do_signal_idle = TRUE; } } break; case eInterruptRunning: if (processor == current_processor()) { - if (csw_check_locked(processor, pset) != AST_NONE) + if (csw_check_locked(processor, pset, AST_NONE) != AST_NONE) ast_on(preempt); } else { if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { /* cleared after IPI causes csw_check() to be called */ pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); - cause_ast_check(processor); + do_cause_ast = TRUE; } } break; } pset_unlock(pset); + + if (do_signal_idle) { + machine_signal_idle(processor); + } else if (do_cause_ast) { + cause_ast_check(processor); + } } #if defined(CONFIG_SCHED_TRADITIONAL) @@ -3510,8 +3554,7 @@ static boolean_t should_current_thread_rechoose_processor(processor_t processor) { return (processor->current_pri < BASEPRI_RTQUEUES - && processor->processor_meta != PROCESSOR_META_NULL - && processor->processor_meta->primary != processor); + && processor->processor_primary != processor); } static int @@ -3520,7 +3563,6 @@ sched_traditional_processor_runq_count(processor_t processor) return runq_for_processor(processor)->count; } - static uint64_t sched_traditional_processor_runq_stats_count_sum(processor_t processor) { @@ -3536,9 +3578,13 @@ sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t return 0ULL; } -#endif /* CONFIG_SCHED_TRADITIONAL */ +static int +sched_traditional_processor_bound_count(processor_t processor) +{ + return processor->runq_bound_count; +} -#define next_pset(p) (((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets) +#endif /* CONFIG_SCHED_TRADITIONAL */ /* * choose_next_pset: @@ -3581,147 +3627,228 @@ choose_processor( thread_t thread) { processor_set_t nset, cset = pset; - processor_meta_t pmeta = PROCESSOR_META_NULL; - processor_t mprocessor; /* - * Prefer the hinted processor, when appropriate. + * Prefer the hinted processor, when appropriate. */ + /* Fold last processor hint from secondary processor to its primary */ if (processor != PROCESSOR_NULL) { - if (processor->processor_meta != PROCESSOR_META_NULL) - processor = processor->processor_meta->primary; + processor = processor->processor_primary; } - mprocessor = machine_choose_processor(pset, processor); - if (mprocessor != PROCESSOR_NULL) - processor = mprocessor; + /* + * Only consult platform layer if pset is active, which + * it may not be in some cases when a multi-set system + * is going to sleep. + */ + if (pset->online_processor_count) { + if ((processor == PROCESSOR_NULL) || (processor->processor_set == pset && processor->state == PROCESSOR_IDLE)) { + processor_t mc_processor = machine_choose_processor(pset, processor); + if (mc_processor != PROCESSOR_NULL) + processor = mc_processor->processor_primary; + } + } + /* + * At this point, we may have a processor hint, and we may have + * an initial starting pset. If the hint is not in the pset, or + * if the hint is for a processor in an invalid state, discard + * the hint. + */ if (processor != PROCESSOR_NULL) { - if (processor->processor_set != pset || - processor->state == PROCESSOR_INACTIVE || - processor->state == PROCESSOR_SHUTDOWN || - processor->state == PROCESSOR_OFF_LINE) + if (processor->processor_set != pset) { processor = PROCESSOR_NULL; - else - if (processor->state == PROCESSOR_IDLE || - ((thread->sched_pri >= BASEPRI_RTQUEUES) && - (processor->current_pri < BASEPRI_RTQUEUES))) - return (processor); + } else { + switch (processor->state) { + case PROCESSOR_START: + case PROCESSOR_SHUTDOWN: + case PROCESSOR_OFF_LINE: + /* + * Hint is for a processor that cannot support running new threads. + */ + processor = PROCESSOR_NULL; + break; + case PROCESSOR_IDLE: + /* + * Hint is for an idle processor. Assume it is no worse than any other + * idle processor. The platform layer had an opportunity to provide + * the "least cost idle" processor above. + */ + return (processor); + break; + case PROCESSOR_RUNNING: + case PROCESSOR_DISPATCHING: + /* + * Hint is for an active CPU. This fast-path allows + * realtime threads to preempt non-realtime threads + * to regain their previous executing processor. + */ + if ((thread->sched_pri >= BASEPRI_RTQUEUES) && + (processor->current_pri < BASEPRI_RTQUEUES)) + return (processor); + + /* Otherwise, use hint as part of search below */ + break; + default: + processor = PROCESSOR_NULL; + break; + } + } } /* - * Iterate through the processor sets to locate - * an appropriate processor. + * Iterate through the processor sets to locate + * an appropriate processor. Seed results with + * a last-processor hint, if available, so that + * a search must find something strictly better + * to replace it. + * + * A primary/secondary pair of SMT processors are + * "unpaired" if the primary is busy but its + * corresponding secondary is idle (so the physical + * core has full use of its resources). */ + + integer_t lowest_priority = MAXPRI + 1; + integer_t lowest_unpaired_primary_priority = MAXPRI + 1; + integer_t lowest_count = INT_MAX; + uint64_t furthest_deadline = 1; + processor_t lp_processor = PROCESSOR_NULL; + processor_t lp_unpaired_primary_processor = PROCESSOR_NULL; + processor_t lp_unpaired_secondary_processor = PROCESSOR_NULL; + processor_t lc_processor = PROCESSOR_NULL; + processor_t fd_processor = PROCESSOR_NULL; + + if (processor != PROCESSOR_NULL) { + /* All other states should be enumerated above. */ + assert(processor->state == PROCESSOR_RUNNING || processor->state == PROCESSOR_DISPATCHING); + + lowest_priority = processor->current_pri; + lp_processor = processor; + + if (processor->current_pri >= BASEPRI_RTQUEUES) { + furthest_deadline = processor->deadline; + fd_processor = processor; + } + + lowest_count = SCHED(processor_runq_count)(processor); + lc_processor = processor; + } + do { + /* - * Choose an idle processor. + * Choose an idle processor, in pset traversal order */ if (!queue_empty(&cset->idle_queue)) return ((processor_t)queue_first(&cset->idle_queue)); - if (thread->sched_pri >= BASEPRI_RTQUEUES) { - integer_t lowest_priority = MAXPRI + 1; - integer_t lowest_unpaired = MAXPRI + 1; - uint64_t furthest_deadline = 1; - processor_t lp_processor = PROCESSOR_NULL; - processor_t lp_unpaired = PROCESSOR_NULL; - processor_t fd_processor = PROCESSOR_NULL; - - lp_processor = cset->low_pri; - /* Consider hinted processor */ - if (lp_processor != PROCESSOR_NULL && - ((lp_processor->processor_meta == PROCESSOR_META_NULL) || - ((lp_processor == lp_processor->processor_meta->primary) && - !queue_empty(&lp_processor->processor_meta->idle_queue))) && - lp_processor->state != PROCESSOR_INACTIVE && - lp_processor->state != PROCESSOR_SHUTDOWN && - lp_processor->state != PROCESSOR_OFF_LINE && - (lp_processor->current_pri < thread->sched_pri)) - return lp_processor; + /* + * Otherwise, enumerate active and idle processors to find candidates + * with lower priority/etc. + */ - processor = (processor_t)queue_first(&cset->active_queue); - while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) { - /* Discover the processor executing the - * thread with the lowest priority within - * this pset, or the one with the furthest - * deadline - */ - integer_t cpri = processor->current_pri; - if (cpri < lowest_priority) { - lowest_priority = cpri; - lp_processor = processor; - } + processor = (processor_t)queue_first(&cset->active_queue); + while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) { - if ((cpri >= BASEPRI_RTQUEUES) && (processor->deadline > furthest_deadline)) { - furthest_deadline = processor->deadline; - fd_processor = processor; - } + integer_t cpri = processor->current_pri; + if (cpri < lowest_priority) { + lowest_priority = cpri; + lp_processor = processor; + } + if ((cpri >= BASEPRI_RTQUEUES) && (processor->deadline > furthest_deadline)) { + furthest_deadline = processor->deadline; + fd_processor = processor; + } - if (processor->processor_meta != PROCESSOR_META_NULL && - !queue_empty(&processor->processor_meta->idle_queue)) { - if (cpri < lowest_unpaired) { - lowest_unpaired = cpri; - lp_unpaired = processor; - pmeta = processor->processor_meta; - } - else - if (pmeta == PROCESSOR_META_NULL) - pmeta = processor->processor_meta; + integer_t ccount = SCHED(processor_runq_count)(processor); + if (ccount < lowest_count) { + lowest_count = ccount; + lc_processor = processor; + } + + processor = (processor_t)queue_next((queue_entry_t)processor); + } + + /* + * For SMT configs, these idle secondary processors must have active primary. Otherwise + * the idle primary would have short-circuited the loop above + */ + processor = (processor_t)queue_first(&cset->idle_secondary_queue); + while (!queue_end(&cset->idle_secondary_queue, (queue_entry_t)processor)) { + processor_t cprimary = processor->processor_primary; + + /* If the primary processor is offline or starting up, it's not a candidate for this path */ + if (cprimary->state == PROCESSOR_RUNNING || cprimary->state == PROCESSOR_DISPATCHING) { + integer_t primary_pri = cprimary->current_pri; + + if (primary_pri < lowest_unpaired_primary_priority) { + lowest_unpaired_primary_priority = primary_pri; + lp_unpaired_primary_processor = cprimary; + lp_unpaired_secondary_processor = processor; } - processor = (processor_t)queue_next((queue_entry_t)processor); } - if (thread->sched_pri > lowest_unpaired) - return lp_unpaired; + processor = (processor_t)queue_next((queue_entry_t)processor); + } + - if (pmeta != PROCESSOR_META_NULL) - return ((processor_t)queue_first(&pmeta->idle_queue)); - if (thread->sched_pri > lowest_priority) + if (thread->sched_pri >= BASEPRI_RTQUEUES) { + + /* + * For realtime threads, the most important aspect is + * scheduling latency, so we attempt to assign threads + * to good preemption candidates (assuming an idle primary + * processor was not available above). + */ + + if (thread->sched_pri > lowest_unpaired_primary_priority) { + /* Move to end of active queue so that the next thread doesn't also pick it */ + remqueue((queue_entry_t)lp_unpaired_primary_processor); + enqueue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor); + return lp_unpaired_primary_processor; + } + if (thread->sched_pri > lowest_priority) { + /* Move to end of active queue so that the next thread doesn't also pick it */ + remqueue((queue_entry_t)lp_processor); + enqueue_tail(&cset->active_queue, (queue_entry_t)lp_processor); return lp_processor; + } if (thread->realtime.deadline < furthest_deadline) return fd_processor; - processor = PROCESSOR_NULL; - } - else { /* - * Check any hinted processors in the processor set if available. + * If all primary and secondary CPUs are busy with realtime + * threads with deadlines earlier than us, move on to next + * pset. */ - if (cset->low_pri != PROCESSOR_NULL && cset->low_pri->state != PROCESSOR_INACTIVE && - cset->low_pri->state != PROCESSOR_SHUTDOWN && cset->low_pri->state != PROCESSOR_OFF_LINE && - (processor == PROCESSOR_NULL || - (thread->sched_pri > BASEPRI_DEFAULT && cset->low_pri->current_pri < thread->sched_pri))) { - processor = cset->low_pri; + } + else { + + if (thread->sched_pri > lowest_unpaired_primary_priority) { + /* Move to end of active queue so that the next thread doesn't also pick it */ + remqueue((queue_entry_t)lp_unpaired_primary_processor); + enqueue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor); + return lp_unpaired_primary_processor; } - else - if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE && - cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE && - (processor == PROCESSOR_NULL || (thread->sched_pri <= BASEPRI_DEFAULT && - SCHED(processor_runq_count)(cset->low_count) < SCHED(processor_runq_count)(processor)))) { - processor = cset->low_count; + if (thread->sched_pri > lowest_priority) { + /* Move to end of active queue so that the next thread doesn't also pick it */ + remqueue((queue_entry_t)lp_processor); + enqueue_tail(&cset->active_queue, (queue_entry_t)lp_processor); + return lp_processor; } /* - * Otherwise, choose an available processor in the set. + * If all primary processor in this pset are running a higher + * priority thread, move on to next pset. Only when we have + * exhausted this search do we fall back to other heuristics. */ - if (processor == PROCESSOR_NULL) { - processor = (processor_t)dequeue_head(&cset->active_queue); - if (processor != PROCESSOR_NULL) - enqueue_tail(&cset->active_queue, (queue_entry_t)processor); - } - - if (processor != PROCESSOR_NULL && pmeta == PROCESSOR_META_NULL) { - if (processor->processor_meta != PROCESSOR_META_NULL && - !queue_empty(&processor->processor_meta->idle_queue)) - pmeta = processor->processor_meta; - } } /* - * Move onto the next processor set. + * Move onto the next processor set. */ nset = next_pset(cset); @@ -3734,62 +3861,54 @@ choose_processor( } while (nset != pset); /* - * Make sure that we pick a running processor, - * and that the correct processor set is locked. + * Make sure that we pick a running processor, + * and that the correct processor set is locked. + * Since we may have unlock the candidate processor's + * pset, it may have changed state. + * + * All primary processors are running a higher priority + * thread, so the only options left are enqueuing on + * the secondary processor that would perturb the least priority + * primary, or the least busy primary. */ do { - if (pmeta != PROCESSOR_META_NULL) { - if (cset != pmeta->primary->processor_set) { - pset_unlock(cset); - - cset = pmeta->primary->processor_set; - pset_lock(cset); - } - - if (!queue_empty(&pmeta->idle_queue)) - return ((processor_t)queue_first(&pmeta->idle_queue)); - - pmeta = PROCESSOR_META_NULL; - } - - /* - * If we haven't been able to choose a processor, - * pick the boot processor and return it. - */ - if (processor == PROCESSOR_NULL) { - processor = master_processor; + /* lowest_priority is evaluated in the main loops above */ + if (lp_unpaired_secondary_processor != PROCESSOR_NULL) { + processor = lp_unpaired_secondary_processor; + lp_unpaired_secondary_processor = PROCESSOR_NULL; + } else if (lc_processor != PROCESSOR_NULL) { + processor = lc_processor; + lc_processor = PROCESSOR_NULL; + } else { /* - * Check that the correct processor set is - * returned locked. + * All processors are executing higher + * priority threads, and the lowest_count + * candidate was not usable */ - if (cset != processor->processor_set) { - pset_unlock(cset); - - cset = processor->processor_set; - pset_lock(cset); - } - - return (processor); + processor = master_processor; } /* - * Check that the processor set for the chosen - * processor is locked. + * Check that the correct processor set is + * returned locked. */ if (cset != processor->processor_set) { pset_unlock(cset); - cset = processor->processor_set; pset_lock(cset); } /* - * We must verify that the chosen processor is still available. + * We must verify that the chosen processor is still available. + * master_processor is an exception, since we may need to preempt + * a running thread on it during processor shutdown (for sleep), + * and that thread needs to be enqueued on its runqueue to run + * when the processor is restarted. */ - if (processor->state == PROCESSOR_INACTIVE || - processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE) + if (processor != master_processor && (processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)) processor = PROCESSOR_NULL; + } while (processor == PROCESSOR_NULL); return (processor); @@ -3812,9 +3931,7 @@ thread_setrun( processor_t processor; processor_set_t pset; -#if DEBUG assert(thread_runnable(thread)); -#endif /* * Update priority if needed. @@ -3822,6 +3939,8 @@ thread_setrun( if (SCHED(can_update_priority)(thread)) SCHED(update_priority)(thread); + thread->sfi_class = sfi_thread_classify(thread); + assert(thread->runq == PROCESSOR_NULL); if (thread->bound_processor == PROCESSOR_NULL) { @@ -3892,6 +4011,7 @@ thread_setrun( /* * Dispatch the thread on the choosen processor. + * TODO: This should be based on sched_mode, not sched_pri */ if (thread->sched_pri >= BASEPRI_RTQUEUES) realtime_setrun(processor, thread); @@ -3985,11 +4105,12 @@ processor_queue_shutdown( * Check for a preemption point in * the current context. * - * Called at splsched. + * Called at splsched with thread locked. */ ast_t csw_check( - processor_t processor) + processor_t processor, + ast_t check_reason) { processor_set_t pset = processor->processor_set; ast_t result; @@ -3999,7 +4120,7 @@ csw_check( /* If we were sent a remote AST and interrupted a running processor, acknowledge it here with pset lock held */ pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id); - result = csw_check_locked(processor, pset); + result = csw_check_locked(processor, pset, check_reason); pset_unlock(pset); @@ -4008,41 +4129,47 @@ csw_check( /* * Check for preemption at splsched with - * pset locked + * pset and thread locked */ ast_t csw_check_locked( processor_t processor, - processor_set_t pset __unused) + processor_set_t pset __unused, + ast_t check_reason) { - ast_t result = AST_NONE; + ast_t result; thread_t thread = processor->active_thread; if (first_timeslice(processor)) { if (rt_runq.count > 0) - return (AST_PREEMPT | AST_URGENT); + return (check_reason | AST_PREEMPT | AST_URGENT); } else { if (rt_runq.count > 0) { if (BASEPRI_RTQUEUES > processor->current_pri) - return (AST_PREEMPT | AST_URGENT); + return (check_reason | AST_PREEMPT | AST_URGENT); else - return (AST_PREEMPT); + return (check_reason | AST_PREEMPT); } } result = SCHED(processor_csw_check)(processor); if (result != AST_NONE) - return (result); + return (check_reason | result); if (SCHED(should_current_thread_rechoose_processor)(processor)) - return (AST_PREEMPT); + return (check_reason | AST_PREEMPT); - if (machine_processor_is_inactive(processor)) - return (AST_PREEMPT); - if (thread->state & TH_SUSP) - return (AST_PREEMPT); + return (check_reason | AST_PREEMPT); + + /* + * Current thread may not need to be preempted, but maybe needs + * an SFI wait? + */ + result = sfi_thread_needs_ast(thread, NULL); + if (result != AST_NONE) + return (check_reason | result); return (AST_NONE); } @@ -4059,11 +4186,31 @@ csw_check_locked( void set_sched_pri( thread_t thread, - int priority) + int priority) { boolean_t removed = thread_run_queue_remove(thread); + int curgency, nurgency; + uint64_t urgency_param1, urgency_param2; + thread_t cthread = current_thread(); + if (thread == cthread) { + curgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2); + } + thread->sched_pri = priority; + + if (thread == cthread) { + nurgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2); +/* set_sched_pri doesn't alter RT params. We expect direct base priority/QoS + * class alterations from user space to occur relatively infrequently, hence + * those are lazily handled. QoS classes have distinct priority bands, and QoS + * inheritance is expected to involve priority changes. + */ + if (nurgency != curgency) { + thread_tell_urgency(nurgency, urgency_param1, urgency_param2, thread); + } + } + if (removed) thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); else @@ -4075,7 +4222,8 @@ set_sched_pri( processor->current_pri = priority; processor->current_thmode = thread->sched_mode; - if ((preempt = csw_check(processor)) != AST_NONE) + processor->current_sfi_class = thread->sfi_class = sfi_thread_classify(thread); + if ((preempt = csw_check(processor, AST_NONE)) != AST_NONE) ast_on(preempt); } else @@ -4117,8 +4265,11 @@ run_queue_check( #if defined(CONFIG_SCHED_TRADITIONAL) -/* locks the runqueue itself */ - +/* + * Locks the runqueue itself. + * + * Thread must be locked. + */ static boolean_t processor_queue_remove( processor_t processor, @@ -4155,69 +4306,82 @@ processor_queue_remove( #endif /* CONFIG_SCHED_TRADITIONAL */ + /* * thread_run_queue_remove: * - * Remove a thread from a current run queue and + * Remove a thread from its current run queue and * return TRUE if successful. * * Thread must be locked. + * + * If thread->runq is PROCESSOR_NULL, the thread will not re-enter the + * run queues because the caller locked the thread. Otherwise + * the thread is on a run queue, but could be chosen for dispatch + * and removed by another processor under a different lock, which + * will set thread->runq to PROCESSOR_NULL. + * + * Hence the thread select path must not rely on anything that could + * be changed under the thread lock after calling this function, + * most importantly thread->sched_pri. */ boolean_t thread_run_queue_remove( - thread_t thread) + thread_t thread) { - processor_t processor = thread->runq; + boolean_t removed = FALSE; + processor_t processor = thread->runq; - /* - * If processor is PROCESSOR_NULL, the thread will stay out of the - * run queues because the caller locked the thread. Otherwise - * the thread is on a run queue, but could be chosen for dispatch - * and removed. - */ - if (processor != PROCESSOR_NULL) { - queue_t q; + if ((thread->state & (TH_RUN|TH_WAIT)) == TH_WAIT) { + /* Thread isn't runnable */ + assert(thread->runq == PROCESSOR_NULL); + return FALSE; + } + if (processor == PROCESSOR_NULL) { /* - * The processor run queues are locked by the - * processor set. Real-time priorities use a - * global queue with a dedicated lock. + * The thread is either not on the runq, + * or is in the midst of being removed from the runq. + * + * runq is set to NULL under the pset lock, not the thread + * lock, so the thread may still be in the process of being dequeued + * from the runq. It will wait in invoke for the thread lock to be + * dropped. */ - if (thread->sched_mode == TH_MODE_FAIRSHARE) { - return SCHED(fairshare_queue_remove)(thread); - } - - if (thread->sched_pri < BASEPRI_RTQUEUES) { - return SCHED(processor_queue_remove)(processor, thread); - } - simple_lock(&rt_lock); - q = &rt_runq.queue; + return FALSE; + } - if (processor == thread->runq) { - /* - * Thread is on a run queue and we have a lock on - * that run queue. - */ - remqueue((queue_entry_t)thread); - SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); - rt_runq.count--; + if (thread->sched_mode == TH_MODE_FAIRSHARE) { + return SCHED(fairshare_queue_remove)(thread); + } + + if (thread->sched_pri < BASEPRI_RTQUEUES) { + return SCHED(processor_queue_remove)(processor, thread); + } - thread->runq = PROCESSOR_NULL; - } - else { - /* - * The thread left the run queue before we could - * lock the run queue. - */ - assert(thread->runq == PROCESSOR_NULL); - processor = PROCESSOR_NULL; - } + simple_lock(&rt_lock); - simple_unlock(&rt_lock); + if (thread->runq != PROCESSOR_NULL) { + /* + * Thread is on a run queue and we have a lock on + * that run queue. + */ + + assert(thread->runq == RT_RUNQ); + + remqueue((queue_entry_t)thread); + SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); + rt_runq.count--; + + thread->runq = PROCESSOR_NULL; + + removed = TRUE; } - return (processor != PROCESSOR_NULL); + simple_unlock(&rt_lock); + + return (removed); } #if defined(CONFIG_SCHED_TRADITIONAL) @@ -4360,14 +4524,18 @@ thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2) ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->priority <= MAXPRI_THROTTLE))) { /* * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted + * TODO: Use TH_SFLAG_THROTTLED instead? */ *arg1 = thread->sched_pri; *arg2 = thread->priority; return (THREAD_URGENCY_BACKGROUND); } else { - *arg1 = thread->sched_pri; - *arg2 = thread->priority; + /* For otherwise unclassified threads, report throughput QoS + * parameters + */ + *arg1 = thread->effective_policy.t_through_qos; + *arg2 = thread->task->effective_policy.t_through_qos; return (THREAD_URGENCY_NORMAL); } @@ -4409,13 +4577,10 @@ processor_idle( PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state); while (1) { - if (processor->state != PROCESSOR_IDLE) /* unsafe, but worst case we loop around once */ break; if (pset->pending_AST_cpu_mask & (1U << processor->cpu_id)) break; - if (!SCHED(processor_queue_empty)(processor)) - break; if (rt_runq.count) break; #if CONFIG_SCHED_IDLE_IN_PLACE @@ -4440,8 +4605,13 @@ processor_idle( IDLE_KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -2, 0); - if (processor->state == PROCESSOR_INACTIVE && !machine_processor_is_inactive(processor)) - break; + if (!SCHED(processor_queue_empty)(processor)) { + /* Secondary SMT processors respond to directed wakeups + * exclusively. Some platforms induce 'spurious' SMT wakeups. + */ + if (processor->processor_primary == processor) + break; + } } timer_switch(&PROCESSOR_DATA(processor, idle_state), @@ -4464,8 +4634,10 @@ processor_idle( if ((new_thread != THREAD_NULL) && (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE) || (rt_runq.count > 0 && BASEPRI_RTQUEUES >= new_thread->sched_pri)) ) { + /* Something higher priority has popped up on the runqueue - redispatch this thread elsewhere */ processor->current_pri = IDLEPRI; processor->current_thmode = TH_MODE_FIXED; + processor->current_sfi_class = SFI_CLASS_KERNEL; processor->deadline = UINT64_MAX; pset_unlock(pset); @@ -4497,15 +4669,11 @@ processor_idle( processor->state = PROCESSOR_RUNNING; processor->current_pri = IDLEPRI; processor->current_thmode = TH_MODE_FIXED; + processor->current_sfi_class = SFI_CLASS_KERNEL; processor->deadline = UINT64_MAX; enqueue_tail(&pset->active_queue, (queue_entry_t)processor); } else - if (state == PROCESSOR_INACTIVE) { - processor->state = PROCESSOR_RUNNING; - enqueue_tail(&pset->active_queue, (queue_entry_t)processor); - } - else if (state == PROCESSOR_SHUTDOWN) { /* * Going off-line. Force a @@ -4515,6 +4683,7 @@ processor_idle( processor->next_thread = THREAD_NULL; processor->current_pri = IDLEPRI; processor->current_thmode = TH_MODE_FIXED; + processor->current_sfi_class = SFI_CLASS_KERNEL; processor->deadline = UINT64_MAX; pset_unlock(pset); @@ -4619,9 +4788,12 @@ sched_startup(void) thread_block(THREAD_CONTINUE_NULL); } -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) static volatile uint64_t sched_maintenance_deadline; +#if defined(CONFIG_TELEMETRY) +static volatile uint64_t sched_telemetry_deadline = 0; +#endif static uint64_t sched_tick_last_abstime; static uint64_t sched_tick_delta; uint64_t sched_tick_max_delta; @@ -4631,17 +4803,20 @@ uint64_t sched_tick_max_delta; * Perform periodic bookkeeping functions about ten * times per second. */ -static void +void sched_traditional_maintenance_continue(void) { - uint64_t sched_tick_ctime; - sched_tick_ctime = mach_absolute_time(); + uint64_t sched_tick_ctime, late_time; + + sched_tick_ctime = mach_absolute_time(); if (__improbable(sched_tick_last_abstime == 0)) { sched_tick_last_abstime = sched_tick_ctime; + late_time = 0; sched_tick_delta = 1; } else { - sched_tick_delta = ((sched_tick_ctime) - sched_tick_last_abstime) / sched_tick_interval; + late_time = sched_tick_ctime - sched_tick_last_abstime; + sched_tick_delta = late_time / sched_tick_interval; /* Ensure a delta of 1, since the interval could be slightly * smaller than the sched_tick_interval due to dispatch * latencies. @@ -4660,6 +4835,13 @@ sched_traditional_maintenance_continue(void) sched_tick_max_delta = MAX(sched_tick_delta, sched_tick_max_delta); } + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE)|DBG_FUNC_START, + sched_tick_delta, + late_time, + 0, + 0, + 0); + /* Add a number of pseudo-ticks corresponding to the elapsed interval * This could be greater than 1 if substantial intervals where * all processors are idle occur, which rarely occurs in practice. @@ -4676,7 +4858,14 @@ sched_traditional_maintenance_continue(void) * Scan the run queues for threads which * may need to be updated. */ - thread_update_scan(); + SCHED(thread_update_scan)(); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE)|DBG_FUNC_END, + sched_pri_shift, + sched_background_pri_shift, + 0, + 0, + 0); assert_wait((event_t)sched_traditional_maintenance_continue, THREAD_UNINT); thread_block((thread_continue_t)sched_traditional_maintenance_continue); @@ -4711,9 +4900,29 @@ sched_traditional_consider_maintenance(uint64_t ctime) { sched_maintenance_wakeups++; } } + +#if defined(CONFIG_TELEMETRY) + /* + * Windowed telemetry is driven by the scheduler. It should be safe + * to call compute_telemetry_windowed() even when windowed telemetry + * is disabled, but we should try to avoid doing extra work for no + * reason. + */ + if (telemetry_window_enabled) { + deadline = sched_telemetry_deadline; + + if (__improbable(ctime >= deadline)) { + ndeadline = ctime + sched_telemetry_interval; + + if (__probable(__sync_bool_compare_and_swap(&sched_telemetry_deadline, deadline, ndeadline))) { + compute_telemetry_windowed(); + } + } + } +#endif /* CONFIG_TELEMETRY */ } -#endif /* CONFIG_SCHED_TRADITIONAL */ +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ void sched_init_thread(void (*continuation)(void)) @@ -4726,7 +4935,7 @@ sched_init_thread(void (*continuation)(void)) /*NOTREACHED*/ } -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) /* * thread_update_scan / runq_scan: @@ -4749,12 +4958,44 @@ sched_init_thread(void (*continuation)(void)) static thread_t thread_update_array[THREAD_UPDATE_SIZE]; static int thread_update_count = 0; +/* Returns TRUE if thread was added, FALSE if thread_update_array is full */ +boolean_t +thread_update_add_thread(thread_t thread) +{ + if (thread_update_count == THREAD_UPDATE_SIZE) + return (FALSE); + + thread_update_array[thread_update_count++] = thread; + thread_reference_internal(thread); + return (TRUE); +} + +void +thread_update_process_threads(void) +{ + while (thread_update_count > 0) { + spl_t s; + thread_t thread = thread_update_array[--thread_update_count]; + thread_update_array[thread_update_count] = THREAD_NULL; + + s = splsched(); + thread_lock(thread); + if (!(thread->state & (TH_WAIT)) && (SCHED(can_update_priority)(thread))) { + SCHED(update_priority)(thread); + } + thread_unlock(thread); + splx(s); + + thread_deallocate(thread); + } +} + /* * Scan a runq for candidate threads. * * Returns TRUE if retry is needed. */ -static boolean_t +boolean_t runq_scan( run_queue_t runq) { @@ -4768,11 +5009,8 @@ runq_scan( queue_iterate(q, thread, thread_t, links) { if ( thread->sched_stamp != sched_tick && (thread->sched_mode == TH_MODE_TIMESHARE) ) { - if (thread_update_count == THREAD_UPDATE_SIZE) + if (thread_update_add_thread(thread) == FALSE) return (TRUE); - - thread_update_array[thread_update_count++] = thread; - thread_reference_internal(thread); } count--; @@ -4785,6 +5023,10 @@ runq_scan( return (FALSE); } +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ + +#if defined(CONFIG_SCHED_TRADITIONAL) + static void thread_update_scan(void) { @@ -4796,6 +5038,10 @@ thread_update_scan(void) do { do { + /* + * TODO: in sched_traditional_use_pset_runqueue case, + * avoid scanning the same runq multiple times + */ pset = processor->processor_set; s = splsched(); @@ -4811,34 +5057,15 @@ thread_update_scan(void) thread = processor->idle_thread; if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) { - if (thread_update_count == THREAD_UPDATE_SIZE) { + if (thread_update_add_thread(thread) == FALSE) { restart_needed = TRUE; break; } - - thread_update_array[thread_update_count++] = thread; - thread_reference_internal(thread); } } while ((processor = processor->processor_list) != NULL); - /* - * Ok, we now have a collection of candidates -- fix them. - */ - while (thread_update_count > 0) { - thread = thread_update_array[--thread_update_count]; - thread_update_array[thread_update_count] = THREAD_NULL; - - s = splsched(); - thread_lock(thread); - if ( !(thread->state & (TH_WAIT)) ) { - if (SCHED(can_update_priority)(thread)) - SCHED(update_priority)(thread); - } - thread_unlock(thread); - splx(s); - - thread_deallocate(thread); - } + /* Ok, we now have a collection of candidates -- fix them. */ + thread_update_process_threads(); } while (restart_needed); } @@ -4864,9 +5091,9 @@ thread_set_eager_preempt(thread_t thread) thread->sched_flags |= TH_SFLAG_EAGERPREEMPT; if (thread == current_thread()) { - thread_unlock(thread); - ast = csw_check(p); + ast = csw_check(p, AST_NONE); + thread_unlock(thread); if (ast != AST_NONE) { (void) thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast); } @@ -4958,14 +5185,12 @@ preemption_enabled(void) return (get_preemption_level() == 0 && ml_get_interrupts_enabled()); } -#if DEBUG -static boolean_t +__assert_only static boolean_t thread_runnable( thread_t thread) { return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN); } -#endif /* DEBUG */ static void sched_timer_deadline_tracking_init(void) { diff --git a/osfmk/kern/sched_prim.h b/osfmk/kern/sched_prim.h index 8c7bc56b0..d7326a91c 100644 --- a/osfmk/kern/sched_prim.h +++ b/osfmk/kern/sched_prim.h @@ -135,10 +135,24 @@ extern void set_sched_pri( int priority); /* Set base priority of the specified thread */ -extern void set_priority( +extern void sched_set_thread_base_priority( thread_t thread, int priority); +/* Set the thread to be categorized as 'background' */ +extern void sched_set_thread_throttled(thread_t thread, + boolean_t wants_throttle); + +/* Set the thread's true scheduling mode */ +extern void sched_set_thread_mode(thread_t thread, + sched_mode_t mode); +/* Demote the true scheduler mode */ +extern void sched_thread_mode_demote(thread_t thread, + uint32_t reason); +/* Un-demote the true scheduler mode */ +extern void sched_thread_mode_undemote(thread_t thread, + uint32_t reason); + /* Reset scheduled priority of thread */ extern void compute_priority( thread_t thread, @@ -202,7 +216,7 @@ extern processor_t choose_processor( thread_t thread); /* Choose a thread from a processor's priority-based runq */ -extern thread_t choose_thread( +extern thread_t choose_thread_from_runq( processor_t processor, run_queue_t runq, int priority); @@ -227,6 +241,25 @@ extern void run_queue_remove( run_queue_t runq, thread_t thread); + +#if defined(CONFIG_SCHED_TIMESHARE_CORE) + +extern boolean_t thread_update_add_thread( + thread_t thread); +extern void thread_update_process_threads(void); +extern boolean_t runq_scan( + run_queue_t runq); + +void sched_traditional_timebase_init(void); +void sched_traditional_maintenance_continue(void); +boolean_t priority_is_urgent( + int priority); +uint32_t sched_traditional_initial_quantum_size( + thread_t thread); +void sched_traditional_init(void); + +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ + /* Remove thread from its run queue */ extern boolean_t thread_run_queue_remove( thread_t thread); @@ -239,7 +272,7 @@ extern boolean_t thread_eager_preemption( thread_t thread); /* Fair Share routines */ -#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_FAIRSHARE_CORE) void sched_traditional_fairshare_init(void); int sched_traditional_fairshare_runq_count(void); @@ -251,9 +284,9 @@ void sched_traditional_fairshare_enqueue(thread_t thread); thread_t sched_traditional_fairshare_dequeue(void); boolean_t sched_traditional_fairshare_queue_remove(thread_t thread); -#endif +#endif /* CONFIG_SCHED_FAIRSHARE_CORE */ -#if defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) +#if defined(CONFIG_SCHED_GRRR) void sched_grrr_fairshare_init(void); int sched_grrr_fairshare_runq_count(void); @@ -441,7 +474,7 @@ extern boolean_t preemption_enabled(void); * a function pointer table. */ -#if !defined(CONFIG_SCHED_TRADITIONAL) && !defined(CONFIG_SCHED_PROTO) && !defined(CONFIG_SCHED_GRRR) && !defined(CONFIG_SCHED_FIXEDPRIORITY) +#if !defined(CONFIG_SCHED_TRADITIONAL) && !defined(CONFIG_SCHED_PROTO) && !defined(CONFIG_SCHED_GRRR) && !defined(CONFIG_SCHED_MULTIQ) #error Enable at least one scheduler algorithm in osfmk/conf/MASTER.XXX #endif @@ -461,7 +494,8 @@ struct sched_dispatch_table { */ thread_t (*choose_thread)( processor_t processor, - int priority); + int priority, + ast_t reason); /* * Steal a thread from another processor in the pset so that it can run @@ -500,7 +534,7 @@ struct sched_dispatch_table { /* Remove the specific thread from the per-processor runqueue */ boolean_t (*processor_queue_remove)( - processor_t processor, + processor_t processor, thread_t thread); /* @@ -537,9 +571,6 @@ struct sched_dispatch_table { /* Scheduler mode for a new thread */ sched_mode_t (*initial_thread_sched_mode)(task_t parent_task); - /* Scheduler algorithm supports timeshare (decay) mode */ - boolean_t (*supports_timeshare_mode)(void); - /* * Is it safe to call update_priority, which may change a thread's * runqueue or other state. This can be used to throttle changes @@ -589,7 +620,11 @@ struct sched_dispatch_table { thread_t (*fairshare_dequeue)(void); boolean_t (*fairshare_queue_remove)(thread_t thread); - + + boolean_t (*processor_bound_count)(processor_t processor); + + void (*thread_update_scan)(void); + /* * Use processor->next_thread to pin a thread to an idle * processor. If FALSE, threads are enqueued and can @@ -605,6 +640,13 @@ extern const struct sched_dispatch_table sched_traditional_dispatch; extern const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch; #endif +#if defined(CONFIG_SCHED_MULTIQ) +extern const struct sched_dispatch_table sched_multiq_dispatch; +#define kSchedMultiQString "multiq" +extern const struct sched_dispatch_table sched_dualq_dispatch; +#define kSchedDualQString "dualq" +#endif + #if defined(CONFIG_SCHED_PROTO) #define kSchedProtoString "proto" extern const struct sched_dispatch_table sched_proto_dispatch; @@ -615,13 +657,6 @@ extern const struct sched_dispatch_table sched_proto_dispatch; extern const struct sched_dispatch_table sched_grrr_dispatch; #endif -#if defined(CONFIG_SCHED_FIXEDPRIORITY) -#define kSchedFixedPriorityString "fixedpriority" -#define kSchedFixedPriorityWithPsetRunqueueString "fixedpriority_with_pset_runqueue" -extern const struct sched_dispatch_table sched_fixedpriority_dispatch; -extern const struct sched_dispatch_table sched_fixedpriority_with_pset_runqueue_dispatch; -#endif - /* * It is an error to invoke any scheduler-related code * before this is set up @@ -638,11 +673,11 @@ enum sched_enum { #if defined(CONFIG_SCHED_GRRR) sched_enum_grrr = 4, #endif -#if defined(CONFIG_SCHED_FIXEDPRIORITY) - sched_enum_fixedpriority = 5, - sched_enum_fixedpriority_with_pset_runqueue = 6, +#if defined(CONFIG_SCHED_MULTIQ) + sched_enum_multiq = 5, + sched_enum_dualq = 6, #endif - sched_enum_max = 7 + sched_enum_max = 7, }; extern const struct sched_dispatch_table *sched_current_dispatch; diff --git a/osfmk/kern/sched_proto.c b/osfmk/kern/sched_proto.c index 4eb740797..6523bc66d 100644 --- a/osfmk/kern/sched_proto.c +++ b/osfmk/kern/sched_proto.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -80,7 +79,8 @@ sched_proto_maintenance_continuation(void); static thread_t sched_proto_choose_thread(processor_t processor, - int priority); + int priority, + ast_t reason); static thread_t sched_proto_steal_thread(processor_set_t pset); @@ -130,9 +130,6 @@ sched_proto_initial_quantum_size(thread_t thread); static sched_mode_t sched_proto_initial_thread_sched_mode(task_t parent_task); -static boolean_t -sched_proto_supports_timeshare_mode(void); - static boolean_t sched_proto_can_update_priority(thread_t thread); @@ -154,40 +151,48 @@ sched_proto_processor_runq_count(processor_t processor); static uint64_t sched_proto_processor_runq_stats_count_sum(processor_t processor); +static int +sched_proto_processor_bound_count(processor_t processor); + +static void +sched_proto_thread_update_scan(void); + + const struct sched_dispatch_table sched_proto_dispatch = { - sched_proto_init, - sched_proto_timebase_init, - sched_proto_processor_init, - sched_proto_pset_init, - sched_proto_maintenance_continuation, - sched_proto_choose_thread, - sched_proto_steal_thread, - sched_proto_compute_priority, - sched_proto_choose_processor, - sched_proto_processor_enqueue, - sched_proto_processor_queue_shutdown, - sched_proto_processor_queue_remove, - sched_proto_processor_queue_empty, - sched_proto_priority_is_urgent, - sched_proto_processor_csw_check, - sched_proto_processor_queue_has_priority, - sched_proto_initial_quantum_size, - sched_proto_initial_thread_sched_mode, - sched_proto_supports_timeshare_mode, - sched_proto_can_update_priority, - sched_proto_update_priority, - sched_proto_lightweight_update_priority, - sched_proto_quantum_expire, - sched_proto_should_current_thread_rechoose_processor, - sched_proto_processor_runq_count, - sched_proto_processor_runq_stats_count_sum, - sched_traditional_fairshare_init, - sched_traditional_fairshare_runq_count, - sched_traditional_fairshare_runq_stats_count_sum, - sched_traditional_fairshare_enqueue, - sched_traditional_fairshare_dequeue, - sched_traditional_fairshare_queue_remove, - TRUE /* direct_dispatch_to_idle_processors */ + .init = sched_proto_init, + .timebase_init = sched_proto_timebase_init, + .processor_init = sched_proto_processor_init, + .pset_init = sched_proto_pset_init, + .maintenance_continuation = sched_proto_maintenance_continuation, + .choose_thread = sched_proto_choose_thread, + .steal_thread = sched_proto_steal_thread, + .compute_priority = sched_proto_compute_priority, + .choose_processor = sched_proto_choose_processor, + .processor_enqueue = sched_proto_processor_enqueue, + .processor_queue_shutdown = sched_proto_processor_queue_shutdown, + .processor_queue_remove = sched_proto_processor_queue_remove, + .processor_queue_empty = sched_proto_processor_queue_empty, + .priority_is_urgent = sched_proto_priority_is_urgent, + .processor_csw_check = sched_proto_processor_csw_check, + .processor_queue_has_priority = sched_proto_processor_queue_has_priority, + .initial_quantum_size = sched_proto_initial_quantum_size, + .initial_thread_sched_mode = sched_proto_initial_thread_sched_mode, + .can_update_priority = sched_proto_can_update_priority, + .update_priority = sched_proto_update_priority, + .lightweight_update_priority = sched_proto_lightweight_update_priority, + .quantum_expire = sched_proto_quantum_expire, + .should_current_thread_rechoose_processor = sched_proto_should_current_thread_rechoose_processor, + .processor_runq_count = sched_proto_processor_runq_count, + .processor_runq_stats_count_sum = sched_proto_processor_runq_stats_count_sum, + .fairshare_init = sched_traditional_fairshare_init, + .fairshare_runq_count = sched_traditional_fairshare_runq_count, + .fairshare_runq_stats_count_sum = sched_traditional_fairshare_runq_stats_count_sum, + .fairshare_enqueue = sched_traditional_fairshare_enqueue, + .fairshare_dequeue = sched_traditional_fairshare_dequeue, + .fairshare_queue_remove = sched_traditional_fairshare_queue_remove, + .processor_bound_count = sched_proto_processor_bound_count, + .thread_update_scan = sched_proto_thread_update_scan, + .direct_dispatch_to_idle_processors = TRUE, }; static struct run_queue *global_runq; @@ -291,7 +296,8 @@ sched_proto_maintenance_continuation(void) static thread_t sched_proto_choose_thread(processor_t processor, - int priority) + int priority, + ast_t reason __unused) { run_queue_t rq = global_runq; queue_t queue; @@ -539,12 +545,6 @@ sched_proto_initial_thread_sched_mode(task_t parent_task) return TH_MODE_TIMESHARE; } -static boolean_t -sched_proto_supports_timeshare_mode(void) -{ - return TRUE; -} - static boolean_t sched_proto_can_update_priority(thread_t thread __unused) { @@ -595,3 +595,17 @@ sched_proto_processor_runq_stats_count_sum(processor_t processor) } } +static int +sched_proto_processor_bound_count(__unused processor_t processor) +{ + return 0; +} + +static void +sched_proto_thread_update_scan(void) +{ + +} + + + diff --git a/osfmk/kern/security.c b/osfmk/kern/security.c deleted file mode 100644 index 948803887..000000000 --- a/osfmk/kern/security.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright (c) 2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 2005-2007 SPARTA, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if CONFIG_MACF_MACH -kern_return_t -mach_get_task_label( - ipc_space_t space, - mach_port_name_t *outlabel) -{ - kern_return_t kr; - ipc_labelh_t lh; - - if (space == IS_NULL || space->is_task == NULL) - return KERN_INVALID_TASK; - - lh = space->is_task->label; - ip_lock(lh->lh_port); - lh->lh_port->ip_mscount++; - lh->lh_port->ip_srights++; - ip_reference(lh->lh_port); - ip_unlock(lh->lh_port); - kr = ipc_object_copyout(space, (ipc_object_t) lh->lh_port, - MACH_MSG_TYPE_PORT_SEND, 0, outlabel); - if (kr != KERN_SUCCESS) { - ip_release(lh->lh_port); - *outlabel = MACH_PORT_NULL; - } - - return (KERN_SUCCESS); -} -#else -kern_return_t -mach_get_task_label( - ipc_space_t space __unused, - mach_port_name_t *outlabel __unused) -{ - return KERN_FAILURE; -} -#endif - -#if CONFIG_MACF_MACH -kern_return_t -mach_get_task_label_text( - ipc_space_t space, - labelstr_t policies, - labelstr_t outl) -{ - - if (space == IS_NULL || space->is_task == NULL) - return KERN_INVALID_TASK; - - tasklabel_lock(space->is_task); - mac_task_label_externalize(&space->is_task->maclabel, policies, outl, - 512, 0); - tasklabel_unlock(space->is_task); - - return KERN_SUCCESS; -} -#else -kern_return_t -mach_get_task_label_text( - ipc_space_t space __unused, - labelstr_t policies __unused, - labelstr_t outl __unused) -{ - return KERN_FAILURE; -} -#endif - -#if CONFIG_MACF_MACH -int -mac_task_check_service( - task_t self, - task_t obj, - const char * perm) -{ - tasklabel_lock2(self, obj); - - int rc = mac_port_check_service( - &self->maclabel, &obj->maclabel, - "mach_task", perm); - - tasklabel_unlock2(self, obj); - - return rc; -} -#else -int -mac_task_check_service( - task_t self __unused, - task_t obj __unused, - const char * perm __unused) -{ - return KERN_SUCCESS; -} -#endif - -#if CONFIG_MACF_MACH -kern_return_t -mac_check_service( - __unused ipc_space_t space, - labelstr_t subj, - labelstr_t obj, - labelstr_t serv, - labelstr_t perm) -{ - struct label subjl, objl; - - mac_task_label_init(&subjl); - int rc = mac_port_label_internalize(&subjl, subj); - if (rc) { - mac_task_label_destroy(&subjl); - return KERN_INVALID_ARGUMENT; - } - mac_task_label_init(&objl); - rc = mac_port_label_internalize(&objl, obj); - if (rc) { - mac_task_label_destroy(&subjl); - mac_task_label_destroy(&objl); - return KERN_INVALID_ARGUMENT; - } - - rc = mac_port_check_service(&subjl, &objl, serv, perm); - mac_task_label_destroy(&subjl); - mac_task_label_destroy(&objl); - - return rc ? KERN_NO_ACCESS : KERN_SUCCESS; -} -#else -kern_return_t -mac_check_service( - __unused ipc_space_t space, - __unused labelstr_t subj, - __unused labelstr_t obj, - __unused labelstr_t serv, - __unused labelstr_t perm) -{ - return KERN_FAILURE; -} -#endif - -#if CONFIG_MACF_MACH -kern_return_t -mac_port_check_service_obj( - ipc_space_t space, - labelstr_t subj, - mach_port_name_t obj, - labelstr_t serv, - labelstr_t perm) -{ - struct label subjl; - ipc_entry_t entry; - ipc_object_t objp; - kern_return_t kr; - struct label *objl; - int dead; - - if (space == IS_NULL || space->is_task == NULL) - return KERN_INVALID_TASK; - - if (!MACH_PORT_VALID(obj)) - return KERN_INVALID_NAME; - - mac_task_label_init(&subjl); - int rc = mac_port_label_internalize(&subjl, subj); - if (rc) { - mac_task_label_destroy(&subjl); - return KERN_INVALID_ARGUMENT; - } - - kr = ipc_right_lookup_write(space, obj, &entry); - if (kr != KERN_SUCCESS) { - mac_task_label_destroy(&subjl); - return kr; - } - - objp = entry->ie_object; - port = (ipc_port_t)objp; - dead = ipc_right_check(space, port, obj, entry); - if (dead) { - is_write_unlock(space); - ip_release(port); - mac_task_label_destroy(&subjl); - return KERN_INVALID_RIGHT; - } - - io_lock (objp); - is_write_unlock (space); - - objl = io_getlabel(objp); - if (objl == NULL) { - io_unlock(objp); - return KERN_INVALID_ARGUMENT; - } - - rc = mac_port_check_service(&subjl, objl, serv, perm); - io_unlocklabel(objp); - io_unlock (objp); - - mac_task_label_destroy(&subjl); - return rc ? KERN_NO_ACCESS : KERN_SUCCESS; -} -#else -kern_return_t -mac_port_check_service_obj( - __unused ipc_space_t space, - __unused labelstr_t subj, - __unused mach_port_name_t obj, - __unused labelstr_t serv, - __unused labelstr_t perm) -{ - return KERN_FAILURE; -} -#endif - -#if CONFIG_MACF_MACH -kern_return_t -mac_port_check_access( - ipc_space_t space, - mach_port_name_t sub, - mach_port_name_t obj, - labelstr_t serv, - labelstr_t perm) -{ - ipc_entry_t subi, obji; - ipc_object_t subp, objp; - kern_return_t kr; - struct label *objl, *subl; - int rc; - - if (space == IS_NULL || space->is_task == NULL) - return KERN_INVALID_TASK; - - if (!MACH_PORT_VALID(obj) || !MACH_PORT_VALID(sub)) - return KERN_INVALID_NAME; - - kr = ipc_right_lookup_two_write(space, obj, &obji, sub, &subi); - if (kr != KERN_SUCCESS) - return kr; - - objp = obji->ie_object; - subp = subi->ie_object; - - ipc_port_multiple_lock(); /* serialize (not necessary for LH, but simpler) */ - io_lock(objp); - io_lock(subp); - is_write_unlock (space); - - objl = io_getlabel(objp); - if (objl == NULL) - goto errout; - subl = io_getlabel(subp); - if (subl == NULL) - goto errout; - - rc = mac_port_check_service(subl, objl, serv, perm); - io_unlocklabel(subp); - io_unlock(subp); - io_unlocklabel(objp); - io_unlock(objp); - ipc_port_multiple_unlock(); - - return rc ? KERN_NO_ACCESS : KERN_SUCCESS; - -errout: - io_unlocklabel(subp); - io_unlock(subp); - io_unlocklabel(objp); - io_unlock(objp); - ipc_port_multiple_unlock(); - return KERN_INVALID_ARGUMENT; -} -#else -kern_return_t -mac_port_check_access( - __unused ipc_space_t space, - __unused mach_port_name_t sub, - __unused mach_port_name_t obj, - __unused labelstr_t serv, - __unused labelstr_t perm) -{ - return KERN_FAILURE; -} -#endif - -#if CONFIG_MACF_MACH -kern_return_t -mac_request_label( - ipc_space_t space, - mach_port_name_t sub, - mach_port_name_t obj, - labelstr_t serv, - mach_port_name_t *outlabel) -{ - ipc_entry_t subi, obji; - ipc_object_t subp, objp; - kern_return_t kr; - struct label *objl, *subl, outl; - int rc; - - if (space == IS_NULL || space->is_task == NULL) - return KERN_INVALID_TASK; - - if (!MACH_PORT_VALID(obj) || !MACH_PORT_VALID(sub)) - return KERN_INVALID_NAME; - - kr = ipc_right_lookup_two_write(space, obj, &obji, sub, &subi); - if (kr != KERN_SUCCESS) - return kr; - - objp = obji->ie_object; - subp = subi->ie_object; - - ipc_port_multiple_lock(); /* serialize (not necessary for LH, but simpler) */ - io_lock(objp); - io_lock(subp); - is_write_unlock (space); - - objl = io_getlabel(objp); - if (objl == NULL) - goto errout; - subl = io_getlabel(subp); - if (subl == NULL) - goto errout; - - mac_port_label_init(&outl); - rc = mac_port_label_compute(subl, objl, serv, &outl); - io_unlocklabel(subp); - io_unlock(subp); - io_unlocklabel(objp); - io_unlock(objp); - ipc_port_multiple_unlock(); - - if (rc == 0) - kr = labelh_new_user(space, &outl, outlabel); - else - kr = KERN_NO_ACCESS; - - if (kr != KERN_SUCCESS) - mac_port_label_destroy(&outl); - - return kr; - -errout: - io_unlocklabel(subp); - io_unlock(subp); - io_unlocklabel(objp); - io_unlock(objp); - ipc_port_multiple_unlock(); - return KERN_INVALID_ARGUMENT; -} -#else /* !MAC_MACH */ - -kern_return_t -mac_request_label( - __unused ipc_space_t space, - __unused mach_port_name_t sub, - __unused mach_port_name_t obj, - __unused labelstr_t serv, - __unused mach_port_name_t *outlabel) -{ - return KERN_FAILURE; -} - -#endif /* MAC_MACH */ diff --git a/osfmk/kern/sfi.c b/osfmk/kern/sfi.c new file mode 100644 index 000000000..85055d027 --- /dev/null +++ b/osfmk/kern/sfi.c @@ -0,0 +1,1006 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define SFI_DEBUG 0 + +#if SFI_DEBUG +#define dprintf(...) kprintf(__VA_ARGS__) +#else +#define dprintf(...) do { } while(0) +#endif + +#ifdef MACH_BSD +extern sched_call_t workqueue_get_sched_callback(void); +#endif /* MACH_BSD */ + +/* + * SFI (Selective Forced Idle) operates by enabling a global + * timer on the SFI window interval. When it fires, all processors + * running a thread that should be SFI-ed are sent an AST. + * As threads become runnable while in their "off phase", they + * are placed on a deferred ready queue. When a per-class + * "on timer" fires, the ready threads for that class are + * re-enqueued for running. As an optimization to avoid spurious + * wakeups, the timer may be lazily programmed. + */ + +/* + * The "sfi_lock" simple lock guards access to static configuration + * parameters (as specified by userspace), dynamic state changes + * (as updated by the timer event routine), and timer data structures. + * Since it can be taken with interrupts disabled in some cases, all + * uses should be taken with interrupts disabled at splsched(). The + * "sfi_lock" also guards the "sfi_wait_class" field of thread_t, and + * must only be accessed with it held. + * + * When an "on timer" fires, we must deterministically be able to drain + * the wait queue, since if any threads are added to the queue afterwards, + * they may never get woken out of SFI wait. So sfi_lock must be + * taken before the wait queue's own spinlock. + * + * The wait queue will take the thread's scheduling lock. We may also take + * the thread_lock directly to update the "sfi_class" field and determine + * if the thread should block in the wait queue, but the lock will be + * released before doing so. + * + * The pset lock may also be taken, but not while any other locks are held. + * + * splsched ---> sfi_lock ---> wait_queue ---> thread_lock + * \ \ \__ thread_lock (*) + * \ \__ pset_lock + * \ + * \__ thread_lock + */ + +decl_simple_lock_data(static,sfi_lock); +static timer_call_data_t sfi_timer_call_entry; +volatile boolean_t sfi_is_enabled; + +boolean_t sfi_window_is_set; +uint64_t sfi_window_usecs; +uint64_t sfi_window_interval; +uint64_t sfi_next_off_deadline; + +typedef struct { + sfi_class_id_t class_id; + thread_continue_t class_continuation; + const char * class_name; + const char * class_ledger_name; +} sfi_class_registration_t; + +/* + * To add a new SFI class: + * + * 1) Raise MAX_SFI_CLASS_ID in mach/sfi_class.h + * 2) Add a #define for it to mach/sfi_class.h. It need not be inserted in order of restrictiveness. + * 3) Add a call to SFI_CLASS_REGISTER below + * 4) Augment sfi_thread_classify to categorize threads as early as possible for as restrictive as possible. + * 5) Modify thermald to use the SFI class + */ + +static inline void _sfi_wait_cleanup(sched_call_t callback); + +#define SFI_CLASS_REGISTER(class_id, ledger_name) \ +extern char compile_time_assert_ ## class_id[SFI_CLASS_ ## class_id < MAX_SFI_CLASS_ID ? 1 : -1]; \ +void __attribute__((noinline,noreturn)) SFI_ ## class_id ## _THREAD_IS_WAITING(void *callback, wait_result_t wret __unused); \ +void SFI_ ## class_id ## _THREAD_IS_WAITING(void *callback, wait_result_t wret __unused) \ +{ \ + _sfi_wait_cleanup(callback); \ + thread_exception_return(); \ +} \ + \ +sfi_class_registration_t SFI_ ## class_id ## _registration __attribute__((section("__DATA,__sfi_class_reg"),used)) = { SFI_CLASS_ ## class_id, SFI_ ## class_id ## _THREAD_IS_WAITING, "SFI_CLASS_" # class_id, "SFI_CLASS_" # ledger_name }; + +/* SFI_CLASS_UNSPECIFIED not included here */ +SFI_CLASS_REGISTER(MAINTENANCE, MAINTENANCE) +SFI_CLASS_REGISTER(DARWIN_BG, DARWIN_BG) +SFI_CLASS_REGISTER(APP_NAP, APP_NAP) +SFI_CLASS_REGISTER(MANAGED_FOCAL, MANAGED) +SFI_CLASS_REGISTER(MANAGED_NONFOCAL, MANAGED) +SFI_CLASS_REGISTER(UTILITY, UTILITY) +SFI_CLASS_REGISTER(DEFAULT_FOCAL, DEFAULT) +SFI_CLASS_REGISTER(DEFAULT_NONFOCAL, DEFAULT) +SFI_CLASS_REGISTER(LEGACY_FOCAL, LEGACY) +SFI_CLASS_REGISTER(LEGACY_NONFOCAL, LEGACY) +SFI_CLASS_REGISTER(USER_INITIATED_FOCAL, USER_INITIATED) +SFI_CLASS_REGISTER(USER_INITIATED_NONFOCAL, USER_INITIATED) +SFI_CLASS_REGISTER(USER_INTERACTIVE_FOCAL, USER_INTERACTIVE) +SFI_CLASS_REGISTER(USER_INTERACTIVE_NONFOCAL, USER_INTERACTIVE) +SFI_CLASS_REGISTER(KERNEL, OPTED_OUT) +SFI_CLASS_REGISTER(OPTED_OUT, OPTED_OUT) + +struct sfi_class_state { + uint64_t off_time_usecs; + uint64_t off_time_interval; + + timer_call_data_t on_timer; + boolean_t on_timer_programmed; + + boolean_t class_sfi_is_enabled; + volatile boolean_t class_in_on_phase; + + struct wait_queue wait_queue; /* threads in ready state */ + thread_continue_t continuation; + + const char * class_name; + const char * class_ledger_name; +}; + +/* Static configuration performed in sfi_early_init() */ +struct sfi_class_state sfi_classes[MAX_SFI_CLASS_ID]; + +int sfi_enabled_class_count; + +static void sfi_timer_global_off( + timer_call_param_t param0, + timer_call_param_t param1); + +static void sfi_timer_per_class_on( + timer_call_param_t param0, + timer_call_param_t param1); + +static sfi_class_registration_t * +sfi_get_registration_data(unsigned long *count) +{ + unsigned long sectlen = 0; + void *sectdata; + + sectdata = getsectdatafromheader(&_mh_execute_header, "__DATA", "__sfi_class_reg", §len); + if (sectdata) { + + if (sectlen % sizeof(sfi_class_registration_t) != 0) { + /* corrupt data? */ + panic("__sfi_class_reg section has invalid size %lu", sectlen); + __builtin_unreachable(); + } + + *count = sectlen / sizeof(sfi_class_registration_t); + return (sfi_class_registration_t *)sectdata; + } else { + panic("__sfi_class_reg section not found"); + __builtin_unreachable(); + } +} + +/* Called early in boot, when kernel is single-threaded */ +void sfi_early_init(void) +{ + unsigned long i, count; + sfi_class_registration_t *registrations; + + registrations = sfi_get_registration_data(&count); + for (i=0; i < count; i++) { + sfi_class_id_t class_id = registrations[i].class_id; + + assert(class_id < MAX_SFI_CLASS_ID); /* should be caught at compile-time */ + if (class_id < MAX_SFI_CLASS_ID) { + if (sfi_classes[class_id].continuation != NULL) { + panic("Duplicate SFI registration for class 0x%x", class_id); + } + sfi_classes[class_id].class_sfi_is_enabled = FALSE; + sfi_classes[class_id].class_in_on_phase = TRUE; + sfi_classes[class_id].continuation = registrations[i].class_continuation; + sfi_classes[class_id].class_name = registrations[i].class_name; + sfi_classes[class_id].class_ledger_name = registrations[i].class_ledger_name; + } + } +} + +void sfi_init(void) +{ + sfi_class_id_t i; + kern_return_t kret; + + simple_lock_init(&sfi_lock, 0); + timer_call_setup(&sfi_timer_call_entry, sfi_timer_global_off, NULL); + sfi_window_is_set = FALSE; + sfi_enabled_class_count = 0; + sfi_is_enabled = FALSE; + + for (i = 0; i < MAX_SFI_CLASS_ID; i++) { + /* If the class was set up in sfi_early_init(), initialize remaining fields */ + if (sfi_classes[i].continuation) { + timer_call_setup(&sfi_classes[i].on_timer, sfi_timer_per_class_on, (void *)(uintptr_t)i); + sfi_classes[i].on_timer_programmed = FALSE; + + kret = wait_queue_init(&sfi_classes[i].wait_queue, SYNC_POLICY_FIFO); + assert(kret == KERN_SUCCESS); + } else { + /* The only allowed gap is for SFI_CLASS_UNSPECIFIED */ + if(i != SFI_CLASS_UNSPECIFIED) { + panic("Gap in registered SFI classes"); + } + } + } +} + +/* Can be called before sfi_init() by task initialization, but after sfi_early_init() */ +sfi_class_id_t +sfi_get_ledger_alias_for_class(sfi_class_id_t class_id) +{ + sfi_class_id_t i; + const char *ledger_name = NULL; + + ledger_name = sfi_classes[class_id].class_ledger_name; + + /* Find the first class in the registration table with this ledger name */ + if (ledger_name) { + for (i = SFI_CLASS_UNSPECIFIED + 1; i < class_id; i++) { + if (0 == strcmp(sfi_classes[i].class_ledger_name, ledger_name)) { + dprintf("sfi_get_ledger_alias_for_class(0x%x) -> 0x%x\n", class_id, i); + return i; + } + } + + /* This class is the primary one for the ledger, so there is no alias */ + dprintf("sfi_get_ledger_alias_for_class(0x%x) -> 0x%x\n", class_id, SFI_CLASS_UNSPECIFIED); + return SFI_CLASS_UNSPECIFIED; + } + + /* We are permissive on SFI class lookup failures. In sfi_init(), we assert more */ + return SFI_CLASS_UNSPECIFIED; +} + +int +sfi_ledger_entry_add(ledger_template_t template, sfi_class_id_t class_id) +{ + const char *ledger_name = NULL; + + ledger_name = sfi_classes[class_id].class_ledger_name; + + dprintf("sfi_ledger_entry_add(%p, 0x%x) -> %s\n", template, class_id, ledger_name); + return ledger_entry_add(template, ledger_name, "sfi", "MATUs"); +} + +static void sfi_timer_global_off( + timer_call_param_t param0 __unused, + timer_call_param_t param1 __unused) +{ + uint64_t now = mach_absolute_time(); + sfi_class_id_t i; + processor_set_t pset, nset; + processor_t processor; + uint32_t needs_cause_ast_mask = 0x0; + spl_t s; + + s = splsched(); + + simple_lock(&sfi_lock); + if (!sfi_is_enabled) { + /* If SFI has been disabled, let all "on" timers drain naturally */ + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_OFF_TIMER) | DBG_FUNC_NONE, 1, 0, 0, 0, 0); + + simple_unlock(&sfi_lock); + splx(s); + return; + } + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_OFF_TIMER) | DBG_FUNC_START, 0, 0, 0, 0, 0); + + /* First set all configured classes into the off state, and program their "on" timer */ + for (i = 0; i < MAX_SFI_CLASS_ID; i++) { + if (sfi_classes[i].class_sfi_is_enabled) { + uint64_t on_timer_deadline; + + sfi_classes[i].class_in_on_phase = FALSE; + sfi_classes[i].on_timer_programmed = TRUE; + + /* Push out on-timer */ + on_timer_deadline = now + sfi_classes[i].off_time_interval; + timer_call_enter1(&sfi_classes[i].on_timer, NULL, on_timer_deadline, TIMER_CALL_SYS_CRITICAL); + } else { + /* If this class no longer needs SFI, make sure the timer is cancelled */ + sfi_classes[i].class_in_on_phase = TRUE; + if (sfi_classes[i].on_timer_programmed) { + sfi_classes[i].on_timer_programmed = FALSE; + timer_call_cancel(&sfi_classes[i].on_timer); + } + } + } + simple_unlock(&sfi_lock); + + /* Iterate over processors, call cause_ast_check() on ones running a thread that should be in an off phase */ + processor = processor_list; + pset = processor->processor_set; + + pset_lock(pset); + + do { + nset = processor->processor_set; + if (nset != pset) { + pset_unlock(pset); + pset = nset; + pset_lock(pset); + } + + /* "processor" and its pset are locked */ + if (processor->state == PROCESSOR_RUNNING) { + if (AST_NONE != sfi_processor_needs_ast(processor)) { + needs_cause_ast_mask |= (1U << processor->cpu_id); + } + } + } while ((processor = processor->processor_list) != NULL); + + pset_unlock(pset); + + processor = processor_list; + do { + if (needs_cause_ast_mask & (1U << processor->cpu_id)) { + if (processor == current_processor()) + ast_on(AST_SFI); + else + cause_ast_check(processor); + } + } while ((processor = processor->processor_list) != NULL); + + /* Re-arm timer if still enabled */ + simple_lock(&sfi_lock); + if (sfi_is_enabled) { + clock_deadline_for_periodic_event(sfi_window_interval, + now, + &sfi_next_off_deadline); + timer_call_enter1(&sfi_timer_call_entry, + NULL, + sfi_next_off_deadline, + TIMER_CALL_SYS_CRITICAL); + } + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_OFF_TIMER) | DBG_FUNC_END, 0, 0, 0, 0, 0); + + simple_unlock(&sfi_lock); + + splx(s); +} + +static void sfi_timer_per_class_on( + timer_call_param_t param0, + timer_call_param_t param1 __unused) +{ + sfi_class_id_t sfi_class_id = (sfi_class_id_t)(uintptr_t)param0; + struct sfi_class_state *sfi_class = &sfi_classes[sfi_class_id]; + kern_return_t kret; + spl_t s; + + s = splsched(); + + simple_lock(&sfi_lock); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_ON_TIMER) | DBG_FUNC_START, sfi_class_id, 0, 0, 0, 0); + + /* + * Any threads that may have accumulated in the ready queue for this class should get re-enqueued. + * Since we have the sfi_lock held and have changed "class_in_on_phase", we expect + * no new threads to be put on this wait queue until the global "off timer" has fired. + */ + sfi_class->class_in_on_phase = TRUE; + kret = wait_queue_wakeup64_all(&sfi_class->wait_queue, + CAST_EVENT64_T(sfi_class_id), + THREAD_AWAKENED); + assert(kret == KERN_SUCCESS || kret == KERN_NOT_WAITING); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_ON_TIMER) | DBG_FUNC_END, 0, 0, 0, 0, 0); + + simple_unlock(&sfi_lock); + + splx(s); +} + + +kern_return_t sfi_set_window(uint64_t window_usecs) +{ + uint64_t interval, deadline; + uint64_t now = mach_absolute_time(); + sfi_class_id_t i; + spl_t s; + uint64_t largest_class_off_interval = 0; + + if (window_usecs < MIN_SFI_WINDOW_USEC) + window_usecs = MIN_SFI_WINDOW_USEC; + + if (window_usecs > UINT32_MAX) + return (KERN_INVALID_ARGUMENT); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_SET_WINDOW), window_usecs, 0, 0, 0, 0); + + clock_interval_to_absolutetime_interval((uint32_t)window_usecs, NSEC_PER_USEC, &interval); + deadline = now + interval; + + s = splsched(); + + simple_lock(&sfi_lock); + + /* Check that we are not bringing in the SFI window smaller than any class */ + for (i = 0; i < MAX_SFI_CLASS_ID; i++) { + if (sfi_classes[i].class_sfi_is_enabled) { + largest_class_off_interval = MAX(largest_class_off_interval, sfi_classes[i].off_time_interval); + } + } + + /* + * Off window must be strictly greater than all enabled classes, + * otherwise threads would build up on ready queue and never be able to run. + */ + if (interval <= largest_class_off_interval) { + simple_unlock(&sfi_lock); + splx(s); + return (KERN_INVALID_ARGUMENT); + } + + /* + * If the new "off" deadline is further out than the current programmed timer, + * just let the current one expire (and the new cadence will be established thereafter). + * If the new "off" deadline is nearer than the current one, bring it in, so we + * can start the new behavior sooner. Note that this may cause the "off" timer to + * fire before some of the class "on" timers have fired. + */ + sfi_window_usecs = window_usecs; + sfi_window_interval = interval; + sfi_window_is_set = TRUE; + + if (sfi_enabled_class_count == 0) { + /* Can't program timer yet */ + } else if (!sfi_is_enabled) { + sfi_is_enabled = TRUE; + sfi_next_off_deadline = deadline; + timer_call_enter1(&sfi_timer_call_entry, + NULL, + sfi_next_off_deadline, + TIMER_CALL_SYS_CRITICAL); + } else if (deadline >= sfi_next_off_deadline) { + sfi_next_off_deadline = deadline; + } else { + sfi_next_off_deadline = deadline; + timer_call_enter1(&sfi_timer_call_entry, + NULL, + sfi_next_off_deadline, + TIMER_CALL_SYS_CRITICAL); + } + + simple_unlock(&sfi_lock); + splx(s); + + return (KERN_SUCCESS); +} + +kern_return_t sfi_window_cancel(void) +{ + spl_t s; + + s = splsched(); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_CANCEL_WINDOW), 0, 0, 0, 0, 0); + + /* Disable globals so that global "off-timer" is not re-armed */ + simple_lock(&sfi_lock); + sfi_window_is_set = FALSE; + sfi_window_usecs = 0; + sfi_window_interval = 0; + sfi_next_off_deadline = 0; + sfi_is_enabled = FALSE; + simple_unlock(&sfi_lock); + + splx(s); + + return (KERN_SUCCESS); +} + + +kern_return_t sfi_get_window(uint64_t *window_usecs) +{ + spl_t s; + uint64_t off_window_us; + + s = splsched(); + simple_lock(&sfi_lock); + + off_window_us = sfi_window_usecs; + + simple_unlock(&sfi_lock); + splx(s); + + *window_usecs = off_window_us; + + return (KERN_SUCCESS); +} + + +kern_return_t sfi_set_class_offtime(sfi_class_id_t class_id, uint64_t offtime_usecs) +{ + uint64_t interval; + spl_t s; + uint64_t off_window_interval; + + if (offtime_usecs < MIN_SFI_WINDOW_USEC) + offtime_usecs = MIN_SFI_WINDOW_USEC; + + if (class_id == SFI_CLASS_UNSPECIFIED || class_id >= MAX_SFI_CLASS_ID) + return (KERN_INVALID_ARGUMENT); + + if (offtime_usecs > UINT32_MAX) + return (KERN_INVALID_ARGUMENT); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_SET_CLASS_OFFTIME), offtime_usecs, class_id, 0, 0, 0); + + clock_interval_to_absolutetime_interval((uint32_t)offtime_usecs, NSEC_PER_USEC, &interval); + + s = splsched(); + + simple_lock(&sfi_lock); + off_window_interval = sfi_window_interval; + + /* Check that we are not bringing in class off-time larger than the SFI window */ + if (off_window_interval && (interval >= off_window_interval)) { + simple_unlock(&sfi_lock); + splx(s); + return (KERN_INVALID_ARGUMENT); + } + + /* We never re-program the per-class on-timer, but rather just let it expire naturally */ + if (!sfi_classes[class_id].class_sfi_is_enabled) { + sfi_enabled_class_count++; + } + sfi_classes[class_id].off_time_usecs = offtime_usecs; + sfi_classes[class_id].off_time_interval = interval; + sfi_classes[class_id].class_sfi_is_enabled = TRUE; + + if (sfi_window_is_set && !sfi_is_enabled) { + /* start global off timer */ + sfi_is_enabled = TRUE; + sfi_next_off_deadline = mach_absolute_time() + sfi_window_interval; + timer_call_enter1(&sfi_timer_call_entry, + NULL, + sfi_next_off_deadline, + TIMER_CALL_SYS_CRITICAL); + } + + simple_unlock(&sfi_lock); + + splx(s); + + return (KERN_SUCCESS); +} + +kern_return_t sfi_class_offtime_cancel(sfi_class_id_t class_id) +{ + spl_t s; + + if (class_id == SFI_CLASS_UNSPECIFIED || class_id >= MAX_SFI_CLASS_ID) + return (KERN_INVALID_ARGUMENT); + + s = splsched(); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_CANCEL_CLASS_OFFTIME), class_id, 0, 0, 0, 0); + + simple_lock(&sfi_lock); + + /* We never re-program the per-class on-timer, but rather just let it expire naturally */ + if (sfi_classes[class_id].class_sfi_is_enabled) { + sfi_enabled_class_count--; + } + sfi_classes[class_id].off_time_usecs = 0; + sfi_classes[class_id].off_time_interval = 0; + sfi_classes[class_id].class_sfi_is_enabled = FALSE; + + if (sfi_enabled_class_count == 0) { + sfi_is_enabled = FALSE; + } + + simple_unlock(&sfi_lock); + + splx(s); + + return (KERN_SUCCESS); +} + +kern_return_t sfi_get_class_offtime(sfi_class_id_t class_id, uint64_t *offtime_usecs) +{ + uint64_t off_time_us; + spl_t s; + + if (class_id == SFI_CLASS_UNSPECIFIED || class_id >= MAX_SFI_CLASS_ID) + return (0); + + s = splsched(); + + simple_lock(&sfi_lock); + off_time_us = sfi_classes[class_id].off_time_usecs; + simple_unlock(&sfi_lock); + + splx(s); + + *offtime_usecs = off_time_us; + + return (KERN_SUCCESS); +} + +/* + * sfi_thread_classify and sfi_processor_active_thread_classify perform the critical + * role of quickly categorizing a thread into its SFI class so that an AST_SFI can be + * set. As the thread is unwinding to userspace, sfi_ast() performs full locking + * and determines whether the thread should enter an SFI wait state. Because of + * the inherent races between the time the AST is set and when it is evaluated, + * thread classification can be inaccurate (but should always be safe). This is + * especially the case for sfi_processor_active_thread_classify, which must + * classify the active thread on a remote processor without taking the thread lock. + * When in doubt, classification should err on the side of *not* classifying a + * thread at all, and wait for the thread itself to either hit a quantum expiration + * or block inside the kernel. + */ + +/* + * Thread must be locked. Ultimately, the real decision to enter + * SFI wait happens at the AST boundary. + */ +sfi_class_id_t sfi_thread_classify(thread_t thread) +{ + task_t task = thread->task; + boolean_t is_kernel_thread = (task == kernel_task); + sched_mode_t thmode = thread->sched_mode; + int latency_qos = proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS); + int task_role = proc_get_effective_task_policy(task, TASK_POLICY_ROLE); + int thread_bg = proc_get_effective_thread_policy(thread, TASK_POLICY_DARWIN_BG); + int managed_task = proc_get_effective_task_policy(task, TASK_POLICY_SFI_MANAGED); + int thread_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS); + + /* kernel threads never reach the user AST boundary, and are in a separate world for SFI */ + if (is_kernel_thread) { + return SFI_CLASS_KERNEL; + } + + if (thread_qos == THREAD_QOS_MAINTENANCE) + return SFI_CLASS_MAINTENANCE; + + if (thread_bg || thread_qos == THREAD_QOS_BACKGROUND) { + return SFI_CLASS_DARWIN_BG; + } + + if (latency_qos != 0) { + int latency_qos_wtf = latency_qos - 1; + + if ((latency_qos_wtf >= 4) && (latency_qos_wtf <= 5)) { + return SFI_CLASS_APP_NAP; + } + } + + /* + * Realtime and fixed priority threads express their duty cycle constraints + * via other mechanisms, and are opted out of (most) forms of SFI + */ + if (thmode == TH_MODE_REALTIME || thmode == TH_MODE_FIXED || task_role == TASK_GRAPHICS_SERVER) { + return SFI_CLASS_OPTED_OUT; + } + + /* + * Threads with unspecified or legacy QOS class can be individually managed + */ + if (managed_task && + (thread_qos == THREAD_QOS_UNSPECIFIED || thread_qos == THREAD_QOS_LEGACY)) { + if (task_role == TASK_FOREGROUND_APPLICATION || task_role == TASK_CONTROL_APPLICATION) + return SFI_CLASS_MANAGED_FOCAL; + else + return SFI_CLASS_MANAGED_NONFOCAL; + } + + if (thread_qos == THREAD_QOS_UTILITY) + return SFI_CLASS_UTILITY; + + if (task_role == TASK_FOREGROUND_APPLICATION || task_role == TASK_CONTROL_APPLICATION) { + switch (thread_qos) { + case THREAD_QOS_USER_INTERACTIVE: + return SFI_CLASS_USER_INTERACTIVE_FOCAL; + case THREAD_QOS_USER_INITIATED: + return SFI_CLASS_USER_INITIATED_FOCAL; + case THREAD_QOS_LEGACY: + return SFI_CLASS_LEGACY_FOCAL; + default: + return SFI_CLASS_DEFAULT_FOCAL; + } + } else { + switch (thread_qos) { + case THREAD_QOS_USER_INTERACTIVE: + return SFI_CLASS_USER_INTERACTIVE_NONFOCAL; + case THREAD_QOS_USER_INITIATED: + return SFI_CLASS_USER_INITIATED_NONFOCAL; + case THREAD_QOS_LEGACY: + return SFI_CLASS_LEGACY_NONFOCAL; + default: + return SFI_CLASS_DEFAULT_NONFOCAL; + } + } +} + +/* + * pset must be locked. + */ +sfi_class_id_t sfi_processor_active_thread_classify(processor_t processor) +{ + return processor->current_sfi_class; +} + +/* + * thread must be locked. This is inherently racy, with the intent that + * at the AST boundary, it will be fully evaluated whether we need to + * perform an AST wait + */ +ast_t sfi_thread_needs_ast(thread_t thread, sfi_class_id_t *out_class) +{ + sfi_class_id_t class_id; + + class_id = sfi_thread_classify(thread); + + if (out_class) + *out_class = class_id; + + /* No lock taken, so a stale value may be used. */ + if (!sfi_classes[class_id].class_in_on_phase) + return AST_SFI; + else + return AST_NONE; +} + +/* + * pset must be locked. We take the SFI class for + * the currently running thread which is cached on + * the processor_t, and assume it is accurate. In the + * worst case, the processor will get an IPI and be asked + * to evaluate if the current running thread at that + * later point in time should be in an SFI wait. + */ +ast_t sfi_processor_needs_ast(processor_t processor) +{ + sfi_class_id_t class_id; + + class_id = sfi_processor_active_thread_classify(processor); + + /* No lock taken, so a stale value may be used. */ + if (!sfi_classes[class_id].class_in_on_phase) + return AST_SFI; + else + return AST_NONE; + +} + +static inline void _sfi_wait_cleanup(sched_call_t callback) { + thread_t self = current_thread(); + sfi_class_id_t current_sfi_wait_class = SFI_CLASS_UNSPECIFIED; + int64_t sfi_wait_time, sfi_wait_begin = 0; + + spl_t s = splsched(); + thread_lock(self); + if (callback) { + thread_sched_call(self, callback); + } + sfi_wait_begin = self->wait_sfi_begin_time; + thread_unlock(self); + + simple_lock(&sfi_lock); + sfi_wait_time = mach_absolute_time() - sfi_wait_begin; + current_sfi_wait_class = self->sfi_wait_class; + self->sfi_wait_class = SFI_CLASS_UNSPECIFIED; + simple_unlock(&sfi_lock); + splx(s); + assert(SFI_CLASS_UNSPECIFIED < current_sfi_wait_class < MAX_SFI_CLASS_ID); + ledger_credit(self->task->ledger, task_ledgers.sfi_wait_times[current_sfi_wait_class], sfi_wait_time); +} + +/* + * Called at AST context to fully evaluate if the current thread + * (which is obviously running) should instead block in an SFI wait. + * We must take the sfi_lock to check whether we are in the "off" period + * for the class, and if so, block. + */ +void sfi_ast(thread_t thread) +{ + sfi_class_id_t class_id; + spl_t s; + struct sfi_class_state *sfi_class; + wait_result_t waitret; + boolean_t did_wait = FALSE; + uint64_t tid; + thread_continue_t continuation; + sched_call_t workq_callback = workqueue_get_sched_callback(); + boolean_t did_clear_wq = FALSE; + + s = splsched(); + + simple_lock(&sfi_lock); + + if (!sfi_is_enabled) { + /* + * SFI is not enabled, or has recently been disabled. + * There is no point putting this thread on a deferred ready + * queue, even if it were classified as needing it, since + * SFI will truly be off at the next global off timer + */ + simple_unlock(&sfi_lock); + splx(s); + + return; + } + + thread_lock(thread); + thread->sfi_class = class_id = sfi_thread_classify(thread); + tid = thread_tid(thread); + + /* + * Once the sfi_lock is taken and the thread's ->sfi_class field is updated, we + * are committed to transitioning to whatever state is indicated by "->class_in_on_phase". + * If another thread tries to call sfi_reevaluate() after this point, it will take the + * sfi_lock and see the thread in this wait state. If another thread calls + * sfi_reevaluate() before this point, it would see a runnable thread and at most + * attempt to send an AST to this processor, but we would have the most accurate + * classification. + */ + + /* Optimistically clear workq callback while thread is already locked */ + if (workq_callback && (thread->sched_call == workq_callback)) { + thread_sched_call(thread, NULL); + did_clear_wq = TRUE; + } + thread_unlock(thread); + + sfi_class = &sfi_classes[class_id]; + if (!sfi_class->class_in_on_phase) { + /* Need to block thread in wait queue */ + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_THREAD_DEFER), tid, class_id, 0, 0, 0); + + waitret = wait_queue_assert_wait64(&sfi_class->wait_queue, + CAST_EVENT64_T(class_id), + THREAD_INTERRUPTIBLE, + 0); + if (waitret == THREAD_WAITING) { + thread->sfi_wait_class = class_id; + did_wait = TRUE; + continuation = sfi_class->continuation; + } else { + /* thread may be exiting already, all other errors are unexpected */ + assert(waitret == THREAD_INTERRUPTED); + } + } + simple_unlock(&sfi_lock); + + splx(s); + + if (did_wait) { + thread_block_reason(continuation, did_clear_wq ? workq_callback : NULL, AST_SFI); + } else { + if (did_clear_wq) { + s = splsched(); + thread_lock(thread); + thread_sched_call(thread, workq_callback); + thread_unlock(thread); + splx(s); + } + } +} + +/* Thread must be unlocked */ +void sfi_reevaluate(thread_t thread) +{ + kern_return_t kret; + spl_t s; + sfi_class_id_t class_id, current_class_id; + ast_t sfi_ast; + + s = splsched(); + + simple_lock(&sfi_lock); + + thread_lock(thread); + sfi_ast = sfi_thread_needs_ast(thread, &class_id); + thread->sfi_class = class_id; + + /* + * This routine chiefly exists to boost threads out of an SFI wait + * if their classification changes before the "on" timer fires. + * + * If we calculate that a thread is in a different ->sfi_wait_class + * than we think it should be (including no-SFI-wait), we need to + * correct that: + * + * If the thread is in SFI wait and should not be (or should be waiting + * on a different class' "on" timer), we wake it up. If needed, the + * thread may immediately block again in the different SFI wait state. + * + * If the thread is not in an SFI wait state and it should be, we need + * to get that thread's attention, possibly by sending an AST to another + * processor. + */ + + if ((current_class_id = thread->sfi_wait_class) != SFI_CLASS_UNSPECIFIED) { + + thread_unlock(thread); /* not needed anymore */ + + assert(current_class_id < MAX_SFI_CLASS_ID); + + if ((sfi_ast == AST_NONE) || (class_id != current_class_id)) { + struct sfi_class_state *sfi_class = &sfi_classes[current_class_id]; + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_WAIT_CANCELED), thread_tid(thread), current_class_id, class_id, 0, 0); + + kret = wait_queue_wakeup64_thread(&sfi_class->wait_queue, + CAST_EVENT64_T(current_class_id), + thread, + THREAD_AWAKENED); + assert(kret == KERN_SUCCESS || kret == KERN_NOT_WAITING); + } + } else { + /* + * Thread's current SFI wait class is not set, and because we + * have the sfi_lock, it won't get set. + */ + + if ((thread->state & (TH_RUN | TH_IDLE)) == TH_RUN) { + if (sfi_ast != AST_NONE) { + if (thread == current_thread()) + ast_on(sfi_ast); + else { + processor_t processor = thread->last_processor; + + if (processor != PROCESSOR_NULL && + processor->state == PROCESSOR_RUNNING && + processor->active_thread == thread) { + cause_ast_check(processor); + } else { + /* + * Runnable thread that's not on a CPU currently. When a processor + * does context switch to it, the AST will get set based on whether + * the thread is in its "off time". + */ + } + } + } + } + + thread_unlock(thread); + } + + simple_unlock(&sfi_lock); + splx(s); +} diff --git a/osfmk/kern/sfi.h b/osfmk/kern/sfi.h new file mode 100644 index 000000000..385b57cf0 --- /dev/null +++ b/osfmk/kern/sfi.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _KERN_SFI_H_ +#define _KERN_SFI_H_ + +#include +#include +#include +#include +#include +#include +#include + +extern void sfi_init(void); +extern void sfi_early_init(void); +extern sfi_class_id_t sfi_get_ledger_alias_for_class(sfi_class_id_t class_id); +extern int sfi_ledger_entry_add(ledger_template_t template, sfi_class_id_t class_id); + +kern_return_t sfi_set_window(uint64_t window_usecs); +kern_return_t sfi_window_cancel(void); +kern_return_t sfi_get_window(uint64_t *window_usecs); + +kern_return_t sfi_set_class_offtime(sfi_class_id_t class_id, uint64_t offtime_usecs); +kern_return_t sfi_class_offtime_cancel(sfi_class_id_t class_id); +kern_return_t sfi_get_class_offtime(sfi_class_id_t class_id, uint64_t *offtime_usecs); + +#ifdef MACH_KERNEL_PRIVATE +/* + * Classifying a thread requires no special locks to be held (although attribute + * changes that cause an inconsistent snapshot may cause a spurious AST). Final + * evaluation will happen at the AST boundary with the thread locked. If possible, + * + */ +sfi_class_id_t sfi_thread_classify(thread_t thread); +sfi_class_id_t sfi_processor_active_thread_classify(processor_t processor); +ast_t sfi_thread_needs_ast(thread_t thread, sfi_class_id_t *out_class /* optional */); +ast_t sfi_processor_needs_ast(processor_t processor); + +void sfi_ast(thread_t thread); +void sfi_reevaluate(thread_t thread); +#endif /* MACH_KERNEL_PRIVATE */ + +#endif /* _KERN_SFI_H_ */ diff --git a/osfmk/kern/simple_lock.h b/osfmk/kern/simple_lock.h index 576212175..c1a191adb 100644 --- a/osfmk/kern/simple_lock.h +++ b/osfmk/kern/simple_lock.h @@ -182,8 +182,6 @@ __END_DECLS #define simple_unlock(l) usimple_unlock(l) #define simple_lock_try(l) usimple_lock_try(l) #define simple_lock_addr(l) (&(l)) -#define thread_sleep_simple_lock(l, e, i) \ - thread_sleep_usimple_lock((l), (e), (i)) #endif /* !defined(simple_lock_init) */ #endif /*!_KERN_SIMPLE_LOCK_H_*/ diff --git a/osfmk/kern/spl.c b/osfmk/kern/spl.c index 9036a5d83..190bde320 100644 --- a/osfmk/kern/spl.c +++ b/osfmk/kern/spl.c @@ -26,7 +26,6 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include -#include #include #include diff --git a/osfmk/kern/stack.c b/osfmk/kern/stack.c index 805830d54..ef9f6b8da 100644 --- a/osfmk/kern/stack.c +++ b/osfmk/kern/stack.c @@ -75,10 +75,10 @@ static unsigned int stack_new_count; /* total new stack allocations */ static vm_offset_t stack_addr_mask; -unsigned int kernel_stack_pages = KERNEL_STACK_SIZE / PAGE_SIZE; -vm_offset_t kernel_stack_size = KERNEL_STACK_SIZE; -vm_offset_t kernel_stack_mask = -KERNEL_STACK_SIZE; -vm_offset_t kernel_stack_depth_max = 0; +unsigned int kernel_stack_pages; +vm_offset_t kernel_stack_size; +vm_offset_t kernel_stack_mask; +vm_offset_t kernel_stack_depth_max; static inline void STACK_ZINFO_PALLOC(thread_t thread) @@ -158,6 +158,11 @@ stack_init(void) { simple_lock_init(&stack_lock_data, 0); + kernel_stack_pages = KERNEL_STACK_SIZE / PAGE_SIZE; + kernel_stack_size = KERNEL_STACK_SIZE; + kernel_stack_mask = -KERNEL_STACK_SIZE; + kernel_stack_depth_max = 0; + if (PE_parse_boot_argn("kernel_stack_pages", &kernel_stack_pages, sizeof (kernel_stack_pages))) { diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index 57aa2bd4d..e67c01a8b 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -80,11 +80,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -96,6 +98,7 @@ #include #include #include +#include #include #include #include @@ -109,6 +112,23 @@ #include #include #include +#include + +#if CONFIG_ATM +#include +#endif + +#if CONFIG_CSR +#include +#endif + +#if CONFIG_BANK +#include +#endif + +#if ALTERNATE_DEBUGGER +#include +#endif #if MACH_KDP #include @@ -130,6 +150,10 @@ #include #endif +#if HYPERVISOR +#include +#endif + #include static void kernel_bootstrap_thread(void); @@ -141,6 +165,14 @@ extern void cpu_userwindow_init(int); extern void cpu_physwindow_init(int); #endif +#if CONFIG_ECC_LOGGING +#include +#endif + +#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX +#include +#endif + // libkern/OSKextLib.cpp extern void OSKextRemoveKextBootstrap(void); @@ -158,11 +190,26 @@ extern int serverperfmode; /* size of kernel trace buffer, disabled by default */ unsigned int new_nkdbufs = 0; unsigned int wake_nkdbufs = 0; +unsigned int write_trace_on_panic = 0; +unsigned int trace_typefilter = 0; /* mach leak logging */ int log_leaks = 0; int turn_on_log_leaks = 0; +static inline void +kernel_bootstrap_log(const char *message) +{ +// kprintf("kernel_bootstrap: %s\n", message); + kernel_debug_string(message); +} + +static inline void +kernel_bootstrap_thread_log(const char *message) +{ +// kprintf("kernel_bootstrap_thread: %s\n", message); + kernel_debug_string(message); +} void kernel_early_bootstrap(void) @@ -178,6 +225,11 @@ kernel_early_bootstrap(void) * Initialize the timer callout world */ timer_call_init(); + + /* + * Configure SFI classes + */ + sfi_early_init(); } @@ -190,24 +242,23 @@ kernel_bootstrap(void) printf("%s\n", version); /* log kernel version */ -#define kernel_bootstrap_kprintf(x...) /* kprintf("kernel_bootstrap: " x) */ - if (PE_parse_boot_argn("-l", namep, sizeof (namep))) /* leaks logging */ turn_on_log_leaks = 1; PE_parse_boot_argn("trace", &new_nkdbufs, sizeof (new_nkdbufs)); - PE_parse_boot_argn("trace_wake", &wake_nkdbufs, sizeof (wake_nkdbufs)); + PE_parse_boot_argn("trace_panic", &write_trace_on_panic, sizeof(write_trace_on_panic)); + PE_parse_boot_argn("trace_typefilter", &trace_typefilter, sizeof(trace_typefilter)); scale_setup(); - kernel_bootstrap_kprintf("calling vm_mem_bootstrap\n"); + kernel_bootstrap_log("vm_mem_bootstrap"); vm_mem_bootstrap(); - kernel_bootstrap_kprintf("calling cs_init\n"); + kernel_bootstrap_log("cs_init"); cs_init(); - kernel_bootstrap_kprintf("calling vm_mem_init\n"); + kernel_bootstrap_log("vm_mem_init"); vm_mem_init(); machine_info.memory_size = (uint32_t)mem_size; @@ -216,42 +267,42 @@ kernel_bootstrap(void) machine_info.minor_version = version_minor; #if CONFIG_TELEMETRY - kernel_bootstrap_kprintf("calling telemetry_init\n"); + kernel_bootstrap_log("telemetry_init"); telemetry_init(); #endif - kernel_bootstrap_kprintf("calling stackshot_lock_init\n"); + kernel_bootstrap_log("stackshot_lock_init"); stackshot_lock_init(); - kernel_bootstrap_kprintf("calling sched_init\n"); + kernel_bootstrap_log("sched_init"); sched_init(); - kernel_bootstrap_kprintf("calling wait_queue_bootstrap\n"); + kernel_bootstrap_log("wait_queue_bootstrap"); wait_queue_bootstrap(); - kernel_bootstrap_kprintf("calling ipc_bootstrap\n"); + kernel_bootstrap_log("ipc_bootstrap"); ipc_bootstrap(); #if CONFIG_MACF mac_policy_init(); #endif - kernel_bootstrap_kprintf("calling ipc_init\n"); + kernel_bootstrap_log("ipc_init"); ipc_init(); /* * As soon as the virtual memory system is up, we record * that this CPU is using the kernel pmap. */ - kernel_bootstrap_kprintf("calling PMAP_ACTIVATE_KERNEL\n"); + kernel_bootstrap_log("PMAP_ACTIVATE_KERNEL"); PMAP_ACTIVATE_KERNEL(master_cpu); - kernel_bootstrap_kprintf("calling mapping_free_prime\n"); + kernel_bootstrap_log("mapping_free_prime"); mapping_free_prime(); /* Load up with temporary mapping blocks */ - kernel_bootstrap_kprintf("calling machine_init\n"); + kernel_bootstrap_log("machine_init"); machine_init(); - kernel_bootstrap_kprintf("calling clock_init\n"); + kernel_bootstrap_log("clock_init"); clock_init(); ledger_init(); @@ -259,16 +310,38 @@ kernel_bootstrap(void) /* * Initialize the IPC, task, and thread subsystems. */ - kernel_bootstrap_kprintf("calling task_init\n"); +#if CONFIG_COALITIONS + kernel_bootstrap_log("coalition_init"); + coalition_init(); +#endif + + kernel_bootstrap_log("task_init"); task_init(); - kernel_bootstrap_kprintf("calling thread_init\n"); + kernel_bootstrap_log("thread_init"); thread_init(); + +#if CONFIG_ATM + /* Initialize the Activity Trace Resource Manager. */ + kernel_bootstrap_log("atm_init"); + atm_init(); +#endif + +#if CONFIG_CSR + kernel_bootstrap_log("csr_init"); + csr_init(); +#endif + +#if CONFIG_BANK + /* Initialize the BANK Manager. */ + kernel_bootstrap_log("bank_init"); + bank_init(); +#endif /* * Create a kernel thread to execute the kernel bootstrap. */ - kernel_bootstrap_kprintf("calling kernel_thread_create\n"); + kernel_bootstrap_log("kernel_thread_create"); result = kernel_thread_create((thread_continue_t)kernel_bootstrap_thread, NULL, MAXPRI_KERNEL, &thread); if (result != KERN_SUCCESS) panic("kernel_bootstrap: result = %08X\n", result); @@ -276,7 +349,7 @@ kernel_bootstrap(void) thread->state = TH_RUN; thread_deallocate(thread); - kernel_bootstrap_kprintf("calling load_context - done\n"); + kernel_bootstrap_log("load_context - done"); load_context(thread); /*NOTREACHED*/ } @@ -296,7 +369,7 @@ kernel_bootstrap_thread(void) processor_t processor = current_processor(); #define kernel_bootstrap_thread_kprintf(x...) /* kprintf("kernel_bootstrap_thread: " x) */ - kernel_bootstrap_thread_kprintf("calling idle_thread_create\n"); + kernel_bootstrap_thread_log("idle_thread_create"); /* * Create the idle processor thread. */ @@ -308,13 +381,13 @@ kernel_bootstrap_thread(void) * * Start up the scheduler services. */ - kernel_bootstrap_thread_kprintf("calling sched_startup\n"); + kernel_bootstrap_thread_log("sched_startup"); sched_startup(); /* * Thread lifecycle maintenance (teardown, stack allocation) */ - kernel_bootstrap_thread_kprintf("calling thread_daemon_init\n"); + kernel_bootstrap_thread_log("thread_daemon_init"); thread_daemon_init(); /* Create kernel map entry reserve */ @@ -323,26 +396,32 @@ kernel_bootstrap_thread(void) /* * Thread callout service. */ - kernel_bootstrap_thread_kprintf("calling thread_call_initialize\n"); + kernel_bootstrap_thread_log("thread_call_initialize"); thread_call_initialize(); - + /* * Remain on current processor as * additional processors come online. */ - kernel_bootstrap_thread_kprintf("calling thread_bind\n"); + kernel_bootstrap_thread_log("thread_bind"); thread_bind(processor); + /* + * Initialize ipc thread call support. + */ + kernel_bootstrap_thread_log("ipc_thread_call_init"); + ipc_thread_call_init(); + /* * Kick off memory mapping adjustments. */ - kernel_bootstrap_thread_kprintf("calling mapping_adjust\n"); + kernel_bootstrap_thread_log("mapping_adjust"); mapping_adjust(); /* * Create the clock service. */ - kernel_bootstrap_thread_kprintf("calling clock_service_create\n"); + kernel_bootstrap_thread_log("clock_service_create"); clock_service_create(); /* @@ -362,8 +441,8 @@ kernel_bootstrap_thread(void) -#if MACH_KDP - kernel_bootstrap_kprintf("calling kdp_init\n"); +#if MACH_KDP + kernel_bootstrap_log("kdp_init"); kdp_init(); #endif @@ -379,28 +458,49 @@ kernel_bootstrap_thread(void) kpc_init(); #endif +#if CONFIG_ECC_LOGGING + ecc_log_init(); +#endif + #if KPERF kperf_bootstrap(); #endif +#if HYPERVISOR + hv_support_init(); +#endif + #if CONFIG_TELEMETRY - kernel_bootstrap_kprintf("calling bootprofile_init\n"); + kernel_bootstrap_log("bootprofile_init"); bootprofile_init(); #endif +#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX + vmx_init(); +#endif + #if (defined(__i386__) || defined(__x86_64__)) if (turn_on_log_leaks && !new_nkdbufs) new_nkdbufs = 200000; - start_kern_tracing(new_nkdbufs, FALSE); + if (trace_typefilter) + start_kern_tracing_with_typefilter(new_nkdbufs, + FALSE, + trace_typefilter); + else + start_kern_tracing(new_nkdbufs, FALSE); if (turn_on_log_leaks) log_leaks = 1; #endif + kernel_bootstrap_log("prng_init"); + prng_cpu_init(master_cpu); + #ifdef IOKIT PE_init_iokit(); #endif - + + assert(ml_get_interrupts_enabled() == FALSE); (void) spllo(); /* Allow interruptions */ #if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 @@ -417,7 +517,10 @@ kernel_bootstrap_thread(void) #if (!defined(__i386__) && !defined(__x86_64__)) if (turn_on_log_leaks && !new_nkdbufs) new_nkdbufs = 200000; - start_kern_tracing(new_nkdbufs, FALSE); + if (trace_typefilter) + start_kern_tracing_with_typefilter(new_nkdbufs, FALSE, trace_typefilter); + else + start_kern_tracing(new_nkdbufs, FALSE); if (turn_on_log_leaks) log_leaks = 1; #endif @@ -431,18 +534,26 @@ kernel_bootstrap_thread(void) #if CONFIG_MACF + kernel_bootstrap_log("mac_policy_initmach"); mac_policy_initmach(); #endif + kernel_bootstrap_log("sfi_init"); + sfi_init(); + /* * Initialize the global used for permuting kernel * addresses that may be exported to userland as tokens * using VM_KERNEL_ADDRPERM(). Force the random number * to be odd to avoid mapping a non-zero * word-aligned address to zero via addition. + * Note: at this stage we can use the cryptographically secure PRNG + * rather than early_random(). */ - vm_kernel_addrperm = (vm_offset_t)early_random() | 1; - buf_kernel_addrperm = (vm_offset_t)early_random() | 1; + read_random(&vm_kernel_addrperm, sizeof(vm_kernel_addrperm)); + vm_kernel_addrperm |= 1; + read_random(&buf_kernel_addrperm, sizeof(buf_kernel_addrperm)); + buf_kernel_addrperm |= 1; /* * Start the user bootstrap. @@ -539,10 +650,10 @@ load_context( #define load_context_kprintf(x...) /* kprintf("load_context: " x) */ - load_context_kprintf("calling machine_set_current_thread\n"); + load_context_kprintf("machine_set_current_thread\n"); machine_set_current_thread(thread); - load_context_kprintf("calling processor_up\n"); + load_context_kprintf("processor_up\n"); processor_up(processor); PMAP_ACTIVATE_KERNEL(processor->cpu_id); @@ -555,7 +666,7 @@ load_context( load_context_kprintf("thread %p, stack %lx, stackptr %lx\n", thread, thread->kernel_stack, thread->machine.kstackptr); if (!thread->kernel_stack) { - load_context_kprintf("calling stack_alloc_try\n"); + load_context_kprintf("stack_alloc_try\n"); if (!stack_alloc_try(thread)) panic("load_context"); } @@ -565,7 +676,7 @@ load_context( * running for load calculations. */ if (!(thread->state & TH_IDLE)) - sched_run_incr(); + sched_run_incr(thread); processor->active_thread = thread; processor->current_pri = thread->sched_pri; @@ -582,7 +693,7 @@ load_context( PMAP_ACTIVATE_USER(thread, processor->cpu_id); - load_context_kprintf("calling machine_load_context\n"); + load_context_kprintf("machine_load_context\n"); machine_load_context(thread); /*NOTREACHED*/ } diff --git a/osfmk/kern/sync_sema.h b/osfmk/kern/sync_sema.h index bcd013b2f..57db2e2a2 100644 --- a/osfmk/kern/sync_sema.h +++ b/osfmk/kern/sync_sema.h @@ -45,7 +45,6 @@ #ifdef MACH_KERNEL_PRIVATE #include -#include #include typedef struct semaphore { diff --git a/osfmk/kern/syscall_subr.c b/osfmk/kern/syscall_subr.c index da2fe9e74..6f614474c 100644 --- a/osfmk/kern/syscall_subr.c +++ b/osfmk/kern/syscall_subr.c @@ -73,7 +73,7 @@ #include #include #include - +#include #ifdef MACH_BSD extern void workqueue_thread_yielded(void); @@ -361,6 +361,9 @@ thread_switch( s = splsched(); thread_lock(thread); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE, + thread_tid(thread), thread->state, 0, 0, 0); + /* * Check that the thread is not bound * to a different processor, and that realtime @@ -544,10 +547,11 @@ thread_poll_yield( abstime += (total_computation >> sched_poll_yield_shift); if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_USER_CRITICAL)) self->depress_timer_active++; - thread_unlock(self); - if ((preempt = csw_check(myprocessor)) != AST_NONE) + if ((preempt = csw_check(myprocessor, AST_NONE)) != AST_NONE) ast_on(preempt); + + thread_unlock(self); } } splx(s); diff --git a/osfmk/kern/syscall_sw.c b/osfmk/kern/syscall_sw.c index 012f4b2d1..e49182884 100644 --- a/osfmk/kern/syscall_sw.c +++ b/osfmk/kern/syscall_sw.c @@ -60,7 +60,9 @@ #include #include +#if CONFIG_REQUIRES_U32_MUNGING #include +#endif /* Forwards */ @@ -143,7 +145,7 @@ const mach_trap_t mach_trap_table[MACH_TRAP_TABLE_COUNT] = { /* 40 */ MACH_TRAP(kern_invalid, 0, 0, NULL), /* 41 */ MACH_TRAP(_kernelrpc_mach_port_guard_trap, 4, 5, munge_wwlw), /* 42 */ MACH_TRAP(_kernelrpc_mach_port_unguard_trap, 3, 4, munge_wwl), -/* 43 */ MACH_TRAP(map_fd, 5, 5, munge_wwwww), +/* 43 */ MACH_TRAP(kern_invalid, 0, 0, NULL), /* 44 */ MACH_TRAP(task_name_for_pid, 3, 3, munge_www), /* 45 */ MACH_TRAP(task_for_pid, 3, 3, munge_www), /* 46 */ MACH_TRAP(pid_for_task, 2, 2, munge_ww), @@ -278,7 +280,7 @@ const char * mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = { /* 40 */ "kern_invalid", /* 41 */ "_kernelrpc_mach_port_guard_trap", /* 42 */ "_kernelrpc_mach_port_unguard_trap", -/* 43 */ "map_fd", +/* 43 */ "kern_invalid", /* 44 */ "task_name_for_pid", /* 45 */ "task_for_pid", /* 46 */ "pid_for_task", @@ -338,8 +340,7 @@ const char * mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = { /* 98 */ "kern_invalid", /* 99 */ "kern_invalid", /* traps 100-107 reserved for iokit (esb) */ -/* 100 */ "kern_invalid", -/* 100 */ //"iokit_user_client_trap", +/* 100 */ "iokit_user_client_trap", /* 101 */ "kern_invalid", /* 102 */ "kern_invalid", /* 103 */ "kern_invalid", diff --git a/osfmk/kern/syscall_sw.h b/osfmk/kern/syscall_sw.h index 70d69bfde..6cf3ae953 100644 --- a/osfmk/kern/syscall_sw.h +++ b/osfmk/kern/syscall_sw.h @@ -66,12 +66,14 @@ * its kernel stack. Some architectures may need * to save more state in the pcb for these traps. */ -typedef void mach_munge_t(const void *, void *); +#if CONFIG_REQUIRES_U32_MUNGING +typedef void mach_munge_t(void *); +#endif typedef struct { int mach_trap_arg_count; /* Number of trap arguments (Arch independant) */ kern_return_t (*mach_trap_function)(void *); -#if defined(__x86_64__) +#if CONFIG_REQUIRES_U32_MUNGING mach_munge_t *mach_trap_arg_munge32; /* system call argument munger routine for 32-bit */ #endif int mach_trap_u32_words; /* number of 32-bit words to copyin for U32 */ @@ -86,7 +88,7 @@ typedef struct { extern const mach_trap_t mach_trap_table[]; extern int mach_trap_count; -#if defined(__x86_64__) +#if CONFIG_REQUIRES_U32_MUNGING #if !MACH_ASSERT #define MACH_TRAP(name, arg_count, u32_arg_words, munge32) \ @@ -97,9 +99,7 @@ extern int mach_trap_count; #endif /* !MACH_ASSERT */ - - -#elif defined(__i386__) || defined(__arm__) +#else /* !CONFIG_REQUIRES_U32_MUNGING */ #if !MACH_ASSERT #define MACH_TRAP(name, arg_count, u32_arg_words, munge32) \ @@ -109,12 +109,6 @@ extern int mach_trap_count; { (arg_count), (kern_return_t (*)(void *)) (name), (u32_arg_words), #name } #endif /* !MACH_ASSERT */ - - - - -#else /* !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) */ -#error Unsupported architecture -#endif /* !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) */ +#endif /* !CONFIG_REQUIRES_U32_MUNGING */ #endif /* _KERN_SYSCALL_SW_H_ */ diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index aa5d2b360..049701dff 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -86,9 +86,6 @@ * Copyright (c) 2005 SPARTA, Inc. */ -#include -#include - #include #include #include @@ -98,6 +95,7 @@ #include #include +#include #include #include #include @@ -108,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -140,14 +139,9 @@ #include #include #include -#include #include -#if CONFIG_MACF_MACH -#include -#endif - #if CONFIG_COUNTERS #include #endif /* CONFIG_COUNTERS */ @@ -155,6 +149,16 @@ #include #include +#if CONFIG_ATM +#include +#endif + +#include + +#if KPERF +extern int kpc_force_all_ctrs(task_t, int); +#endif + task_t kernel_task; zone_t task_zone; lck_attr_t task_lck_attr; @@ -171,8 +175,16 @@ zinfo_usage_store_t tasks_tkm_shared; expired_task_statistics_t dead_task_statistics; lck_spin_t dead_task_statistics_lock; -static ledger_template_t task_ledger_template = NULL; -struct _task_ledger_indices task_ledgers __attribute__((used)) = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; +ledger_template_t task_ledger_template = NULL; + +struct _task_ledger_indices task_ledgers __attribute__((used)) = + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + { 0 /* initialized at runtime */}, +#ifdef CONFIG_BANK + -1, -1, +#endif + }; + void init_task_ledgers(void); void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1); void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1); @@ -207,6 +219,10 @@ int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */ int max_task_footprint = 0; /* Per-task limit on physical memory consumption */ +#if MACH_ASSERT +int pmap_ledgers_panic = 1; +#endif /* MACH_ASSERT */ + int task_max = CONFIG_TASK_MAX; /* Max number of tasks */ int hwm_user_cores = 0; /* high watermark violations generate user core files */ @@ -220,6 +236,9 @@ extern char *proc_name_address(struct proc *p); extern void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb); #endif #endif +#if MACH_ASSERT +extern int pmap_ledgers_panic; +#endif /* MACH_ASSERT */ /* Forwards */ @@ -254,9 +273,9 @@ task_set_64bit( task_t task, boolean_t is64bit) { -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__) thread_t thread; -#endif /* defined(__i386__) || defined(__x86_64__) */ +#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */ task_lock(task); @@ -276,13 +295,13 @@ task_set_64bit( * state with respect to its task's 64-bitness. */ -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__) queue_iterate(&task->threads, thread, thread_t, task_threads) { thread_mtx_lock(thread); machine_thread_switch_addrmode(thread); thread_mtx_unlock(thread); } -#endif /* defined(__i386__) || defined(__x86_64__) */ +#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */ out: task_unlock(task); @@ -298,6 +317,18 @@ task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size) task_unlock(task); } +void +task_atm_reset(__unused task_t task) { + +#if CONFIG_ATM + if (task->atm_context != NULL) { + atm_task_descriptor_destroy(task->atm_context); + task->atm_context = NULL; + } +#endif + +} + #if TASK_REFERENCE_LEAK_DEBUG #include @@ -365,20 +396,21 @@ task_init(void) zone_change(task_zone, Z_NOENCRYPT, TRUE); /* - * Configure per-task memory limit. The boot arg takes precedence over the - * device tree. + * Configure per-task memory limit. + * The boot-arg is interpreted as Megabytes, + * and takes precedence over the device tree. + * Setting the boot-arg to 0 disables task limits. */ if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint, sizeof (max_task_footprint))) { - max_task_footprint = 0; - } - - if (max_task_footprint == 0) { /* * No limit was found in boot-args, so go look in the device tree. */ if (!PE_get_default("kern.max_task_pmem", &max_task_footprint, sizeof(max_task_footprint))) { + /* + * No limit was found in device tree. + */ max_task_footprint = 0; } } @@ -398,6 +430,11 @@ task_init(void) #endif } +#if MACH_ASSERT + PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic, + sizeof (pmap_ledgers_panic)); +#endif /* MACH_ASSERT */ + if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores, sizeof (hwm_user_cores))) { hwm_user_cores = 0; @@ -423,7 +460,16 @@ task_init(void) disable_exc_resource = 0; } +/* + * If we have coalitions, coalition_init() will call init_task_ledgers() as it + * sets up the ledgers for the default coalition. If we don't have coalitions, + * then we have to call it now. + */ +#if CONFIG_COALITIONS + assert(task_ledger_template); +#else /* CONFIG_COALITIONS */ init_task_ledgers(); +#endif /* CONFIG_COALITIONS */ #if TASK_REFERENCE_LEAK_DEBUG simple_lock_init(&task_ref_lock, 0); @@ -439,15 +485,16 @@ task_init(void) * Create the kernel task as the first task. */ #ifdef __LP64__ - if (task_create_internal(TASK_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS) #else - if (task_create_internal(TASK_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS) #endif panic("task_init\n"); vm_map_deallocate(kernel_task->map); kernel_task->map = kernel_map; lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr); + } /* @@ -512,18 +559,28 @@ host_security_create_task_token( * * phys_footprint * Physical footprint: This is the sum of: - * + phys_mem [task's resident memory] - * + phys_compressed - * + iokit_mem + * + internal + * + internal_compressed + * + iokit_mapped + * - alternate_accounting * - * iokit_mem - * IOKit mappings: The total size of all IOKit mappings in this task [regardless of clean/dirty state]. - * - * phys_compressed - * Physical compressed: Amount of this task's resident memory which is held by the compressor. + * internal + * The task's anonymous memory, which on iOS is always resident. + * + * internal_compressed + * Amount of this task's internal memory which is held by the compressor. * Such memory is no longer actually resident for the task [i.e., resident in its pmap], * and could be either decompressed back into memory, or paged out to storage, depending * on our implementation. + * + * iokit_mapped + * IOKit mappings: The total size of all IOKit mappings in this task, regardless of + clean/dirty or internal/external state]. + * + * alternate_accounting + * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages + * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid + * double counting. */ void init_task_ledgers(void) @@ -545,26 +602,94 @@ init_task_ledgers(void) "bytes"); task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem", "bytes"); - task_ledgers.iokit_mem = ledger_entry_add(t, "iokit_mem", "mappings", + task_ledgers.internal = ledger_entry_add(t, "internal", "physmem", + "bytes"); + task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings", + "bytes"); + task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem", "bytes"); task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem", "bytes"); - task_ledgers.phys_compressed = ledger_entry_add(t, "phys_compressed", "physmem", + task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem", "bytes"); + task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes"); + task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes"); + task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes"); + task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes"); task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power", "count"); task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power", "count"); + + sfi_class_id_t class_id, ledger_alias; + for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) { + task_ledgers.sfi_wait_times[class_id] = -1; + } + + /* don't account for UNSPECIFIED */ + for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) { + ledger_alias = sfi_get_ledger_alias_for_class(class_id); + if (ledger_alias != SFI_CLASS_UNSPECIFIED) { + /* Check to see if alias has been registered yet */ + if (task_ledgers.sfi_wait_times[ledger_alias] != -1) { + task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias]; + } else { + /* Otherwise, initialize it first */ + task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias); + } + } else { + task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id); + } + + if (task_ledgers.sfi_wait_times[class_id] < 0) { + panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id); + } + } - if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) || - (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) || - (task_ledgers.wired_mem < 0) || (task_ledgers.iokit_mem < 0) || - (task_ledgers.phys_footprint < 0) || (task_ledgers.phys_compressed < 0) || - (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0)) { +#ifdef CONFIG_BANK + task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns"); + task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns"); +#endif + + assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1); + + if ((task_ledgers.cpu_time < 0) || + (task_ledgers.tkm_private < 0) || + (task_ledgers.tkm_shared < 0) || + (task_ledgers.phys_mem < 0) || + (task_ledgers.wired_mem < 0) || + (task_ledgers.internal < 0) || + (task_ledgers.iokit_mapped < 0) || + (task_ledgers.alternate_accounting < 0) || + (task_ledgers.phys_footprint < 0) || + (task_ledgers.internal_compressed < 0) || + (task_ledgers.purgeable_volatile < 0) || + (task_ledgers.purgeable_nonvolatile < 0) || + (task_ledgers.purgeable_volatile_compressed < 0) || + (task_ledgers.purgeable_nonvolatile_compressed < 0) || + (task_ledgers.platform_idle_wakeups < 0) || + (task_ledgers.interrupt_wakeups < 0) +#ifdef CONFIG_BANK + || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) +#endif + ) { panic("couldn't create entries for task ledger template"); } ledger_track_maximum(t, task_ledgers.phys_footprint, 60); +#if MACH_ASSERT + if (pmap_ledgers_panic) { + ledger_panic_on_negative(t, task_ledgers.phys_footprint); + ledger_panic_on_negative(t, task_ledgers.internal); + ledger_panic_on_negative(t, task_ledgers.internal_compressed); + ledger_panic_on_negative(t, task_ledgers.iokit_mapped); + ledger_panic_on_negative(t, task_ledgers.alternate_accounting); + ledger_panic_on_negative(t, task_ledgers.purgeable_volatile); + ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile); + ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed); + ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed); + } +#endif /* MACH_ASSERT */ #if CONFIG_JETSAM ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL); @@ -579,6 +704,7 @@ init_task_ledgers(void) kern_return_t task_create_internal( task_t parent_task, + coalition_t parent_coalition __unused, boolean_t inherit_memory, boolean_t is_64bit, task_t *child_task) /* OUT */ @@ -605,6 +731,10 @@ task_create_internal( new_task->ledger = ledger; +#if defined(CONFIG_SCHED_MULTIQ) + new_task->sched_group = sched_group_create(); +#endif + /* if inherit_memory is true, parent_task MUST not be NULL */ if (inherit_memory) new_task->map = vm_map_fork(ledger, parent_task->map); @@ -639,6 +769,13 @@ task_create_internal( new_task->t_flags = 0; new_task->importance = 0; +#if CONFIG_ATM + new_task->atm_context = NULL; +#endif +#if CONFIG_BANK + new_task->bank_context = NULL; +#endif + zinfo_task_init(new_task); #ifdef MACH_BSD @@ -666,11 +803,6 @@ task_create_internal( queue_init(&new_task->semaphore_list); new_task->semaphores_owned = 0; -#if CONFIG_MACF_MACH - new_task->label = labelh_new(1); - mac_task_label_init (&new_task->maclabel); -#endif - ipc_task_init(new_task, parent_task); new_task->total_user_time = 0; @@ -698,6 +830,10 @@ task_create_internal( new_task->suspends_outstanding = 0; #endif +#if HYPERVISOR + new_task->hv_task_target = NULL; +#endif /* HYPERVISOR */ + new_task->low_mem_notified_warn = 0; new_task->low_mem_notified_critical = 0; @@ -705,11 +841,7 @@ task_create_internal( new_task->purged_memory_critical = 0; new_task->mem_notify_reserved = 0; #if IMPORTANCE_INHERITANCE - new_task->imp_receiver = 0; - new_task->imp_donor = 0; - new_task->imp_reserved = 0; - new_task->task_imp_assertcnt = 0; - new_task->task_imp_externcnt = 0; + new_task->task_imp_base = NULL; #endif /* IMPORTANCE_INHERITANCE */ #if defined(__x86_64__) @@ -743,11 +875,36 @@ task_create_internal( new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task); #if IMPORTANCE_INHERITANCE - new_task->imp_donor = parent_task->imp_donor; + ipc_importance_task_t new_task_imp = IIT_NULL; + + if (task_is_marked_importance_donor(parent_task)) { + new_task_imp = ipc_importance_for_task(new_task, FALSE); + assert(IIT_NULL != new_task_imp); + ipc_importance_task_mark_donor(new_task_imp, TRUE); + } /* Embedded doesn't want this to inherit */ - new_task->imp_receiver = parent_task->imp_receiver; + if (task_is_marked_importance_receiver(parent_task)) { + if (IIT_NULL == new_task_imp) + new_task_imp = ipc_importance_for_task(new_task, FALSE); + assert(IIT_NULL != new_task_imp); + ipc_importance_task_mark_receiver(new_task_imp, TRUE); + } + if (task_is_marked_importance_denap_receiver(parent_task)) { + if (IIT_NULL == new_task_imp) + new_task_imp = ipc_importance_for_task(new_task, FALSE); + assert(IIT_NULL != new_task_imp); + ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE); + } + + if (IIT_NULL != new_task_imp) { + assert(new_task->task_imp_base == new_task_imp); + ipc_importance_task_release(new_task_imp); + } #endif /* IMPORTANCE_INHERITANCE */ + new_task->priority = BASEPRI_DEFAULT; + new_task->max_priority = MAXPRI_USER; + new_task->requested_policy.t_apptype = parent_task->requested_policy.t_apptype; new_task->requested_policy.int_darwinbg = parent_task->requested_policy.int_darwinbg; @@ -758,6 +915,7 @@ task_create_internal( new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive; new_task->requested_policy.bg_iotier = parent_task->requested_policy.bg_iotier; new_task->requested_policy.terminated = parent_task->requested_policy.terminated; + new_task->requested_policy.t_qos_clamp = parent_task->requested_policy.t_qos_clamp; task_policy_create(new_task, parent_task->requested_policy.t_boosted); } else { @@ -771,21 +929,42 @@ task_create_internal( new_task->all_image_info_size = (mach_vm_size_t)0; new_task->pset_hint = PROCESSOR_SET_NULL; + + if (kernel_task == TASK_NULL) { + new_task->priority = BASEPRI_KERNEL; + new_task->max_priority = MAXPRI_KERNEL; + } else { + new_task->priority = BASEPRI_DEFAULT; + new_task->max_priority = MAXPRI_USER; + } } - if (kernel_task == TASK_NULL) { - new_task->priority = BASEPRI_KERNEL; - new_task->max_priority = MAXPRI_KERNEL; - } else if (proc_get_effective_task_policy(new_task, TASK_POLICY_LOWPRI_CPU)) { - new_task->priority = MAXPRI_THROTTLE; - new_task->max_priority = MAXPRI_THROTTLE; + new_task->coalition = COALITION_NULL; + +#if CONFIG_COALITIONS + if (parent_coalition) { + coalition_adopt_task(parent_coalition, new_task); + } else if (parent_task && parent_task->coalition) { + coalition_adopt_task(parent_task->coalition, new_task); } else { - new_task->priority = BASEPRI_DEFAULT; - new_task->max_priority = MAXPRI_USER; + coalition_default_adopt_task(new_task); } + if (new_task->coalition == COALITION_NULL) { + panic("created task is not a member of any coalition"); + } +#endif /* CONFIG_COALITIONS */ + + /* Allocate I/O Statistics */ + new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info)); + assert(new_task->task_io_stats != NULL); + bzero(new_task->task_io_stats, sizeof(struct io_stat_info)); + + bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats)); + bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics)); new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0; + new_task->task_gpu_ns = 0; lck_mtx_lock(&tasks_threads_lock); queue_enter(&tasks, new_task, task_t, tasks); tasks_count++; @@ -795,6 +974,9 @@ task_create_internal( new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV); new_task->task_volatile_objects = 0; + new_task->task_nonvolatile_objects = 0; + new_task->task_purgeable_disowning = FALSE; + new_task->task_purgeable_disowned = FALSE; ipc_task_enable(new_task); @@ -802,6 +984,8 @@ task_create_internal( return(KERN_SUCCESS); } +int task_dropped_imp_count = 0; + /* * task_deallocate: * @@ -812,18 +996,57 @@ task_deallocate( task_t task) { ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups; + uint32_t refs; if (task == TASK_NULL) return; - if (task_deallocate_internal(task) > 0) + refs = task_deallocate_internal(task); + +#if IMPORTANCE_INHERITANCE + if (refs > 1) return; + + if (refs == 1) { + /* + * If last ref potentially comes from the task's importance, + * disconnect it. But more task refs may be added before + * that completes, so wait for the reference to go to zero + * naturually (it may happen on a recursive task_deallocate() + * from the ipc_importance_disconnect_task() call). + */ + if (IIT_NULL != task->task_imp_base) + ipc_importance_disconnect_task(task); + return; + } +#else + if (refs > 0) + return; +#endif /* IMPORTANCE_INHERITANCE */ lck_mtx_lock(&tasks_threads_lock); queue_remove(&terminated_tasks, task, task_t, tasks); terminated_tasks_count--; lck_mtx_unlock(&tasks_threads_lock); + /* + * remove the reference on atm descriptor + */ + task_atm_reset(task); + +#if CONFIG_BANK + /* + * remove the reference on bank context + */ + if (task->bank_context != NULL) { + bank_task_destroy(task->bank_context); + task->bank_context = NULL; + } +#endif + + if (task->task_io_stats) + kfree(task->task_io_stats, sizeof(struct io_stat_info)); + /* * Give the machine dependent code a chance * to perform cleanup before ripping apart @@ -836,6 +1059,26 @@ task_deallocate( if (task->affinity_space) task_affinity_deallocate(task); +#if MACH_ASSERT + if (task->ledger != NULL && + task->map != NULL && + task->map->pmap != NULL && + task->map->pmap->ledger != NULL) { + assert(task->ledger == task->map->pmap->ledger); + } +#endif /* MACH_ASSERT */ + + vm_purgeable_disown(task); + assert(task->task_purgeable_disowned); + if (task->task_volatile_objects != 0 || + task->task_nonvolatile_objects != 0) { + panic("task_deallocate(%p): " + "volatile_objects=%d nonvolatile_objects=%d\n", + task, + task->task_volatile_objects, + task->task_nonvolatile_objects); + } + vm_map_deallocate(task->map); is_release(task->itk_space); @@ -844,6 +1087,10 @@ task_deallocate( ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups, &platform_idle_wakeups, &debit); +#if defined(CONFIG_SCHED_MULTIQ) + sched_group_destroy(task->sched_group); +#endif + /* Accumulate statistics for dead tasks */ lck_spin_lock(&dead_task_statistics_lock); dead_task_statistics.total_user_time += task->total_user_time; @@ -858,10 +1105,6 @@ task_deallocate( lck_spin_unlock(&dead_task_statistics_lock); lck_mtx_destroy(&task->lock, &task_lck_grp); -#if CONFIG_MACF_MACH - labelh_release(task->label); -#endif - if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit, &debit)) { OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc); @@ -879,15 +1122,14 @@ task_deallocate( btlog_remove_entries_for_element(task_ref_btlog, task); #endif - if (task->task_volatile_objects) { - /* - * This task still "owns" some volatile VM objects. - * Disown them now to avoid leaving them pointing back at - * an invalid task. - */ - vm_purgeable_disown(task); - assert(task->task_volatile_objects == 0); +#if CONFIG_COALITIONS + if (!task->coalition) { + panic("deallocating task was not a member of any coalition"); } + coalition_release(task->coalition); +#endif /* CONFIG_COALITIONS */ + + task->coalition = COALITION_NULL; zfree(task_zone, task); } @@ -936,6 +1178,30 @@ task_terminate( return (task_terminate_internal(task)); } +#if MACH_ASSERT +extern int proc_pid(struct proc *); +extern void proc_name_kdp(task_t t, char *buf, int size); +#endif /* MACH_ASSERT */ + +#define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */ +static void +__unused task_partial_reap(task_t task, __unused int pid) +{ + unsigned int reclaimed_resident = 0; + unsigned int reclaimed_compressed = 0; + uint64_t task_page_count; + + task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64); + + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START), + pid, task_page_count, 0, 0, 0); + + vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed); + + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END), + pid, reclaimed_resident, reclaimed_compressed, 0, 0); +} + kern_return_t task_terminate_internal( task_t task) @@ -943,6 +1209,7 @@ task_terminate_internal( thread_t thread, self; task_t self_task; boolean_t interrupt_save; + int pid = 0; assert(task != kernel_task); @@ -979,14 +1246,6 @@ task_terminate_internal( return (KERN_FAILURE); } -#if MACH_ASSERT - if (task->suspends_outstanding != 0) { - printf("WARNING: %s (%d) exiting with %d outstanding suspensions\n", - proc_name_address(task->bsd_info), proc_pid(task->bsd_info), - task->suspends_outstanding); - } -#endif - if (self_task != task) task_unlock(self_task); @@ -1022,8 +1281,19 @@ task_terminate_internal( thread_terminate_internal(thread); } +#ifdef MACH_BSD + if (task->bsd_info != NULL) { + pid = proc_pid(task->bsd_info); + } +#endif /* MACH_BSD */ + task_unlock(task); + /* Early object reap phase */ + +// PR-17045188: Revisit implementation +// task_partial_reap(task, pid); + /* * Destroy all synchronizers owned by the task. @@ -1035,8 +1305,19 @@ task_terminate_internal( */ ipc_space_terminate(task->itk_space); - if (vm_map_has_4GB_pagezero(task->map)) - vm_map_clear_4GB_pagezero(task->map); +#if 00 + /* if some ledgers go negative on tear-down again... */ + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.phys_footprint); + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.internal); + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.internal_compressed); + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.iokit_mapped); + ledger_disable_panic_on_negative(task->map->pmap->ledger, + task_ledgers.alternate_accounting); +#endif /* * If the current thread is a member of the task @@ -1054,6 +1335,22 @@ task_terminate_internal( /* release our shared region */ vm_shared_region_set(task, NULL); +#if MACH_ASSERT + /* + * Identify the pmap's process, in case the pmap ledgers drift + * and we have to report it. + */ + char procname[17]; + if (task->bsd_info) { + pid = proc_pid(task->bsd_info); + proc_name_kdp(task, procname, sizeof (procname)); + } else { + pid = 0; + strlcpy(procname, "", sizeof (procname)); + } + pmap_set_process(task->map->pmap, pid, procname); +#endif /* MACH_ASSERT */ + lck_mtx_lock(&tasks_threads_lock); queue_remove(&tasks, task, task_t, tasks); queue_enter(&terminated_tasks, task, task_t, tasks); @@ -1067,6 +1364,19 @@ task_terminate_internal( */ thread_interrupt_level(interrupt_save); +#if KPERF + /* force the task to release all ctrs */ + if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS) + kpc_force_all_ctrs(task, 0); +#endif + +#if CONFIG_COALITIONS + /* + * Leave our coalition. (drop activation but not reference) + */ + coalition_remove_task(task); +#endif + /* * Get rid of the task active reference on itself. */ @@ -2114,6 +2424,24 @@ host_security_set_task_token( return(kr); } +kern_return_t +task_send_trace_memory( + task_t target_task, + __unused uint32_t pid, + __unused uint64_t uniqueid) +{ + kern_return_t kr = KERN_INVALID_ARGUMENT; + if (target_task == TASK_NULL) + return (KERN_INVALID_ARGUMENT); + +#if CONFIG_ATM + kr = atm_send_proc_inspect_notification(target_task, + pid, + uniqueid); + +#endif + return (kr); +} /* * This routine was added, pretty much exclusively, for registering the * RPC glue vector for in-kernel short circuited tasks. Rather than @@ -2133,6 +2461,25 @@ task_set_info( return(KERN_INVALID_ARGUMENT); switch (flavor) { + +#if CONFIG_ATM + case TASK_TRACE_MEMORY_INFO: + { + if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT) + return (KERN_INVALID_ARGUMENT); + + assert(task_info_in != NULL); + task_trace_memory_info_t mem_info; + mem_info = (task_trace_memory_info_t) task_info_in; + kern_return_t kr = atm_register_trace_memory(task, + mem_info->user_memory_address, + mem_info->buffer_size, + mem_info->mailbox_array_size); + return kr; + break; + } + +#endif default: return (KERN_INVALID_ARGUMENT); } @@ -2650,7 +2997,18 @@ task_info( break; } - task_power_info_locked(task, (task_power_info_t)task_info_out); + task_power_info_locked(task, (task_power_info_t)task_info_out, NULL); + break; + } + + case TASK_POWER_INFO_V2: + { + if (*task_info_count < TASK_POWER_INFO_V2_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } + task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out; + task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy); break; } @@ -2748,6 +3106,49 @@ task_info( break; } + case TASK_WAIT_STATE_INFO: + { + /* + * Deprecated flavor. Currently allowing some results until all users + * stop calling it. The results may not be accurate. + */ + task_wait_state_info_t wait_state_info; + uint64_t total_sfi_ledger_val = 0; + + if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } + + wait_state_info = (task_wait_state_info_t) task_info_out; + + wait_state_info->total_wait_state_time = 0; + bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved)); + + int i, prev_lentry = -1; + int64_t val_credit, val_debit; + + for (i = 0; i < MAX_SFI_CLASS_ID; i++){ + val_credit =0; + /* + * checking with prev_lentry != entry ensures adjacent classes + * which share the same ledger do not add wait times twice. + * Note: Use ledger() call to get data for each individual sfi class. + */ + if (prev_lentry != task_ledgers.sfi_wait_times[i] && + KERN_SUCCESS == ledger_get_entries(task->ledger, + task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) { + total_sfi_ledger_val += val_credit; + } + prev_lentry = task_ledgers.sfi_wait_times[i]; + } + + wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val; + *task_info_count = TASK_WAIT_STATE_INFO_COUNT; + + break; + } + default: error = KERN_INVALID_ARGUMENT; } @@ -2765,7 +3166,8 @@ task_info( void task_power_info_locked( task_t task, - task_power_info_t info) + task_power_info_t info, + gpu_energy_data_t ginfo) { thread_t thread; ledger_amount_t tmp; @@ -2783,6 +3185,10 @@ task_power_info_locked( info->total_user = task->total_user_time; info->total_system = task->total_system_time; + if (ginfo) { + ginfo->task_gpu_utilisation = task->task_gpu_ns; + } + queue_iterate(&task->threads, thread, thread_t, task_threads) { uint64_t tval; spl_t x; @@ -2807,9 +3213,41 @@ task_power_info_locked( info->total_user += tval; } + if (ginfo) { + ginfo->task_gpu_utilisation += ml_gpu_stat(thread); + } + thread_unlock(thread); + splx(x); + } +} + +/* + * task_gpu_utilisation + * + * Returns the total gpu time used by the all the threads of the task + * (both dead and alive) + */ +uint64_t +task_gpu_utilisation( + task_t task) +{ + uint64_t gpu_time = 0; + thread_t thread; + + task_lock(task); + gpu_time += task->task_gpu_ns; + + queue_iterate(&task->threads, thread, thread_t, task_threads) { + spl_t x; + x = splsched(); + thread_lock(thread); + gpu_time += ml_gpu_stat(thread); thread_unlock(thread); splx(x); } + + task_unlock(task); + return gpu_time; } kern_return_t @@ -3035,26 +3473,6 @@ task_set_policy( return(KERN_FAILURE); } -#if FAST_TAS -kern_return_t -task_set_ras_pc( - task_t task, - vm_offset_t pc, - vm_offset_t endpc) -{ - extern int fast_tas_debug; - - if (fast_tas_debug) { - printf("task 0x%x: setting fast_tas to [0x%x, 0x%x]\n", - task, pc, endpc); - } - task_lock(task); - task->fast_tas_base = pc; - task->fast_tas_end = endpc; - task_unlock(task); - return KERN_SUCCESS; -} -#else /* FAST_TAS */ kern_return_t task_set_ras_pc( __unused task_t task, @@ -3063,7 +3481,6 @@ task_set_ras_pc( { return KERN_FAILURE; } -#endif /* FAST_TAS */ void task_synchronizer_destroy_all(task_t task) @@ -3156,6 +3573,15 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb) #ifdef MACH_BSD pid = proc_selfpid(); + + if (pid == 1) { + /* + * Cannot have ReportCrash analyzing + * a suspended initproc. + */ + return; + } + if (task->bsd_info != NULL) procname = proc_name_address(current_task()->bsd_info); #endif @@ -3199,7 +3625,14 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb) EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY); EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK); EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb); + + /* + * Use the _internal_ variant so that no user-space + * process can resume our task from under us. + */ + task_suspend_internal(task); exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX); + task_resume_internal(task); } /* @@ -3208,7 +3641,9 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb) void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1) { - ledger_amount_t max_footprint_mb; + ledger_amount_t max_footprint, max_footprint_mb; + ledger_amount_t footprint_after_purge; + task_t task; if (warning == LEDGER_WARNING_DIPPED_BELOW) { /* @@ -3217,14 +3652,36 @@ task_footprint_exceeded(int warning, __unused const void *param0, __unused const return; } - ledger_get_limit(current_task()->ledger, task_ledgers.phys_footprint, &max_footprint_mb); - max_footprint_mb >>= 20; + task = current_task(); + + ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint); + max_footprint_mb = max_footprint >> 20; + + /* + * Try and purge all "volatile" memory in that task first. + */ + (void) task_purge_volatile_memory(task); + /* are we still over the limit ? */ + ledger_get_balance(task->ledger, + task_ledgers.phys_footprint, + &footprint_after_purge); + if ((!warning && + footprint_after_purge <= max_footprint) || + (warning && + footprint_after_purge <= ((max_footprint * + PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) { + /* all better now */ + ledger_reset_callback_state(task->ledger, + task_ledgers.phys_footprint); + return; + } + /* still over the limit after purging... */ /* * If this an actual violation (not a warning), * generate a non-fatal high watermark EXC_RESOURCE. */ - if ((warning == 0) && (current_task()->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) { + if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) { THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb); } @@ -3382,68 +3839,6 @@ task_findtid(task_t task, uint64_t tid) return(THREAD_NULL); } - -#if CONFIG_MACF_MACH -/* - * Protect 2 task labels against modification by adding a reference on - * both label handles. The locks do not actually have to be held while - * using the labels as only labels with one reference can be modified - * in place. - */ - -void -tasklabel_lock2( - task_t a, - task_t b) -{ - labelh_reference(a->label); - labelh_reference(b->label); -} - -void -tasklabel_unlock2( - task_t a, - task_t b) -{ - labelh_release(a->label); - labelh_release(b->label); -} - -void -mac_task_label_update_internal( - struct label *pl, - struct task *task) -{ - - tasklabel_lock(task); - task->label = labelh_modify(task->label); - mac_task_label_update(pl, &task->maclabel); - tasklabel_unlock(task); - ip_lock(task->itk_self); - mac_port_label_update_cred(pl, &task->itk_self->ip_label); - ip_unlock(task->itk_self); -} - -void -mac_task_label_modify( - struct task *task, - void *arg, - void (*f) (struct label *l, void *arg)) -{ - - tasklabel_lock(task); - task->label = labelh_modify(task->label); - (*f)(&task->maclabel, arg); - tasklabel_unlock(task); -} - -struct label * -mac_task_get_label(struct task *task) -{ - return (&task->maclabel); -} -#endif - /* * Control the CPU usage monitor for a task. */ @@ -3624,3 +4019,91 @@ THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void) task_terminate_internal(task); } } + +kern_return_t +task_purge_volatile_memory( + task_t task) +{ + vm_map_t map; + int num_object_purged; + + if (task == TASK_NULL) + return KERN_INVALID_TASK; + + task_lock(task); + + if (!task->active) { + task_unlock(task); + return KERN_INVALID_TASK; + } + map = task->map; + if (map == VM_MAP_NULL) { + task_unlock(task); + return KERN_INVALID_TASK; + } + vm_map_reference(task->map); + + task_unlock(task); + + num_object_purged = vm_map_purge(map); + vm_map_deallocate(map); + + return KERN_SUCCESS; +} + +/* Placeholders for the task set/get voucher interfaces */ +kern_return_t +task_get_mach_voucher( + task_t task, + mach_voucher_selector_t __unused which, + ipc_voucher_t *voucher) +{ + if (TASK_NULL == task) + return KERN_INVALID_TASK; + + *voucher = NULL; + return KERN_SUCCESS; +} + +kern_return_t +task_set_mach_voucher( + task_t task, + ipc_voucher_t __unused voucher) +{ + if (TASK_NULL == task) + return KERN_INVALID_TASK; + + return KERN_SUCCESS; +} + +kern_return_t +task_swap_mach_voucher( + task_t task, + ipc_voucher_t new_voucher, + ipc_voucher_t *in_out_old_voucher) +{ + if (TASK_NULL == task) + return KERN_INVALID_TASK; + + *in_out_old_voucher = new_voucher; + return KERN_SUCCESS; +} + +void task_set_gpu_denied(task_t task, boolean_t denied) +{ + task_lock(task); + + if (denied) { + task->t_flags |= TF_GPU_DENIED; + } else { + task->t_flags &= ~TF_GPU_DENIED; + } + + task_unlock(task); +} + +boolean_t task_is_gpu_denied(task_t task) +{ + /* We don't need the lock to read this flag */ + return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE; +} diff --git a/osfmk/kern/task.h b/osfmk/kern/task.h index 1b2991861..c2a8d86a9 100644 --- a/osfmk/kern/task.h +++ b/osfmk/kern/task.h @@ -110,13 +110,15 @@ #include #include #include -#include +#include #include -#include +#include #endif /* MACH_KERNEL_PRIVATE */ #ifdef XNU_KERNEL_PRIVATE +#include + /* defns for task->rsu_controldata */ #define TASK_POLICY_CPU_RESOURCE_USAGE 0 #define TASK_POLICY_WIREDMEM_RESOURCE_USAGE 1 @@ -146,6 +148,24 @@ #include +#ifdef CONFIG_ATM +#include +#endif + +struct _cpu_time_qos_stats { + uint64_t cpu_time_qos_default; + uint64_t cpu_time_qos_maintenance; + uint64_t cpu_time_qos_background; + uint64_t cpu_time_qos_utility; + uint64_t cpu_time_qos_legacy; + uint64_t cpu_time_qos_user_initiated; + uint64_t cpu_time_qos_user_interactive; +}; + +#ifdef CONFIG_BANK +#include +#endif + struct task { /* Synchronization/destruction information */ decl_lck_mtx_data(,lock) /* Task's lock */ @@ -158,6 +178,10 @@ struct task { queue_chain_t tasks; /* global list of tasks */ void *user_data; /* Arbitrary data settable via IPC */ +#if defined(CONFIG_SCHED_MULTIQ) + sched_group_t sched_group; +#endif /* defined(CONFIG_SCHED_MULTIQ) */ + /* Threads in this task */ queue_head_t threads; @@ -184,7 +208,7 @@ struct task { /* Statistics */ uint64_t total_user_time; /* terminated threads only */ uint64_t total_system_time; - + /* Virtual timers */ uint32_t vtimers; @@ -199,6 +223,7 @@ struct task { struct ipc_port *itk_bootstrap; /* a send right */ struct ipc_port *itk_seatbelt; /* a send right */ struct ipc_port *itk_gssd; /* yet another send right */ + struct ipc_port *itk_debug_control; /* send right for debugmode communications */ struct ipc_port *itk_task_access; /* and another send right */ struct ipc_port *itk_resume; /* a receive right to resume this task */ struct ipc_port *itk_registered[TASK_PORT_REGISTER_MAX]; @@ -234,12 +259,14 @@ struct task { void *bsd_info; #endif struct vm_shared_region *shared_region; - uint32_t t_flags; /* general-purpose task flags protected by task_lock (TL) */ -#define TF_64B_ADDR 0x1 /* task has 64-bit addressing */ -#define TF_64B_DATA 0x2 /* task has 64-bit data registers */ -#define TF_CPUMON_WARNING 0x4 /* task has at least one thread in CPU usage warning zone */ -#define TF_WAKEMON_WARNING 0x8 /* task is in wakeups monitor warning zone */ -#define TF_TELEMETRY (TF_CPUMON_WARNING | TF_WAKEMON_WARNING) /* task is a telemetry participant */ + volatile uint32_t t_flags; /* general-purpose task flags protected by task_lock (TL) */ +#define TF_64B_ADDR 0x00000001 /* task has 64-bit addressing */ +#define TF_64B_DATA 0x00000002 /* task has 64-bit data registers */ +#define TF_CPUMON_WARNING 0x00000004 /* task has at least one thread in CPU usage warning zone */ +#define TF_WAKEMON_WARNING 0x00000008 /* task is in wakeups monitor warning zone */ +#define TF_TELEMETRY (TF_CPUMON_WARNING | TF_WAKEMON_WARNING) /* task is a telemetry participant */ +#define TF_GPU_DENIED 0x00000010 /* task is not allowed to access the GPU */ + #define task_has_64BitAddr(task) \ (((task)->t_flags & TF_64B_ADDR) != 0) #define task_set_64BitAddr(task) \ @@ -251,12 +278,11 @@ struct task { mach_vm_address_t all_image_info_addr; /* dyld __all_image_info */ mach_vm_size_t all_image_info_size; /* section location and size */ -#if CONFIG_MACF_MACH - ipc_labelh_t label; -#endif -#if CONFIG_COUNTERS -#define TASK_PMC_FLAG 0x1 /* Bit in "t_chud" signifying PMC interest */ +#if CONFIG_COUNTERS || KPERF +#define TASK_PMC_FLAG 0x1 /* Bit in "t_chud" signifying PMC interest */ +#define TASK_KPC_FORCED_ALL_CTRS 0x2 /* Bit in "t_chud" signifying KPC forced all counters */ + uint32_t t_chud; /* CHUD flags, used for Shark */ #endif @@ -275,13 +301,15 @@ struct task { uint64_t rusage_cpu_deadline; thread_call_t rusage_cpu_callt; -#if IMPORTANCE_INHERITANCE - uint32_t imp_receiver :1, /* the task can receive importance boost */ - imp_donor :1, /* the task always sends boosts regardless of boost status */ - imp_reserved :30; /* reserved for future use */ +#if CONFIG_ATM + struct atm_task_descriptor *atm_context; /* pointer to per task atm descriptor */ +#endif +#if CONFIG_BANK + struct bank_task *bank_context; /* pointer to per task bank structure */ +#endif - uint32_t task_imp_assertcnt; /* total number of boost assertions (kernel managed plus userspace managed) */ - uint32_t task_imp_externcnt; /* number of boost assertions externalized (userspace managed) */ +#if IMPORTANCE_INHERITANCE + struct ipc_importance_task *task_imp_base; /* Base of IPC importance chain */ #endif /* IMPORTANCE_INHERITANCE */ vm_extmod_statistics_data_t extmod_statistics; @@ -303,12 +331,33 @@ struct task { purged_memory_critical :1, /* purgeable memory of the task is purged for critical level pressure */ mem_notify_reserved :28; /* reserved for future use */ + io_stat_info_t task_io_stats; + + /* + * The cpu_time_qos_stats fields are protected by the task lock + */ + struct _cpu_time_qos_stats cpu_time_qos_stats; + /* Statistics accumulated for terminated threads from this task */ uint32_t task_timer_wakeups_bin_1; uint32_t task_timer_wakeups_bin_2; - - int task_volatile_objects; /* # of purgeable VM objects made - * "volatile" by this task */ + uint64_t task_gpu_ns; + + /* # of purgeable volatile VM objects owned by this task: */ + int task_volatile_objects; + /* # of purgeable but not volatile VM objects owned by this task: */ + int task_nonvolatile_objects; + boolean_t task_purgeable_disowning; + boolean_t task_purgeable_disowned; + + /* Coalition is set in task_create_internal and unset in task_deallocate_internal, so it can be referenced without the task lock. */ + coalition_t coalition; /* coalition this task belongs to */ + /* These fields are protected by coalition->lock, not the task lock. */ + queue_chain_t coalition_tasks; /* list of tasks in the coalition */ + +#if HYPERVISOR + void *hv_task_target; /* hypervisor virtual machine object associated with this task */ +#endif /* HYPERVISOR */ }; #define task_lock(task) lck_mtx_lock(&(task)->lock) @@ -316,16 +365,6 @@ struct task { #define task_lock_try(task) lck_mtx_try_lock(&(task)->lock) #define task_unlock(task) lck_mtx_unlock(&(task)->lock) -#if CONFIG_MACF_MACH -#define maclabel label->lh_label - -#define tasklabel_lock(task) lh_lock((task)->label) -#define tasklabel_unlock(task) lh_unlock((task)->label) - -extern void tasklabel_lock2(task_t a, task_t b); -extern void tasklabel_unlock2(task_t a, task_t b); -#endif /* MAC_MACH */ - #define itk_lock_init(task) lck_mtx_init(&(task)->itk_lock_data, &ipc_lck_grp, &ipc_lck_attr) #define itk_lock_destroy(task) lck_mtx_destroy(&(task)->itk_lock_data, &ipc_lck_grp) #define itk_lock(task) lck_mtx_lock(&(task)->itk_lock_data) @@ -359,6 +398,9 @@ extern kern_return_t kernel_task_create( /* Initialize task module */ extern void task_init(void); +/* coalition_init() calls this to initialize ledgers before task_init() */ +extern void init_task_ledgers(void); + #define current_task_fast() (current_thread()->task) #define current_task() current_task_fast() @@ -404,6 +446,11 @@ extern kern_return_t task_pidsuspend_locked( extern kern_return_t task_pidresume( task_t task); +extern kern_return_t task_send_trace_memory( + task_t task, + uint32_t pid, + uint64_t uniqueid); + #if CONFIG_FREEZE /* Freeze a task's resident pages */ @@ -436,6 +483,7 @@ extern kern_return_t task_terminate_internal( extern kern_return_t task_create_internal( task_t parent_task, + coalition_t parent_coalition, boolean_t inherit_memory, boolean_t is_64bit, task_t *child_task); /* OUT */ @@ -446,7 +494,11 @@ extern kern_return_t task_importance( extern void task_power_info_locked( task_t task, - task_power_info_t info); + task_power_info_t info, + gpu_energy_data_t gpu_energy); + +extern uint64_t task_gpu_utilisation( + task_t task); extern void task_vtimer_set( task_t task, @@ -489,8 +541,12 @@ extern vm_map_t get_task_map_reference(task_t); extern vm_map_t swap_task_map(task_t, thread_t, vm_map_t, boolean_t); extern pmap_t get_task_pmap(task_t); extern uint64_t get_task_resident_size(task_t); +extern uint64_t get_task_compressed(task_t); +extern uint64_t get_task_resident_max(task_t); extern uint64_t get_task_phys_footprint(task_t); extern uint64_t get_task_phys_footprint_max(task_t); +extern uint64_t get_task_purgeable_size(task_t); +extern uint64_t get_task_cpu_time(task_t); extern kern_return_t task_set_phys_footprint_limit_internal(task_t, int, int *, boolean_t); extern kern_return_t task_get_phys_footprint_limit(task_t task, int *limit_mb); @@ -519,11 +575,22 @@ struct _task_ledger_indices { int tkm_shared; int phys_mem; int wired_mem; - int iokit_mem; + int internal; + int iokit_mapped; + int alternate_accounting; int phys_footprint; - int phys_compressed; + int internal_compressed; + int purgeable_volatile; + int purgeable_nonvolatile; + int purgeable_volatile_compressed; + int purgeable_nonvolatile_compressed; int platform_idle_wakeups; int interrupt_wakeups; + int sfi_wait_times[MAX_SFI_CLASS_ID]; +#ifdef CONFIG_BANK + int cpu_time_billed_to_me; + int cpu_time_billed_to_others; +#endif }; extern struct _task_ledger_indices task_ledgers; @@ -550,10 +617,6 @@ extern struct _task_ledger_indices task_ledgers; #define TASK_POLICY_IO 0x23 #define TASK_POLICY_PASSIVE_IO 0x24 -/* internal or external, task only */ -#define TASK_POLICY_DARWIN_BG_AND_GPU 0x25 -#define TASK_POLICY_GPU_DENY 0x26 - /* internal, task only */ #define TASK_POLICY_DARWIN_BG_IOPOL 0x27 @@ -569,9 +632,18 @@ extern struct _task_ledger_indices task_ledgers; #define TASK_POLICY_THROUGH_QOS 0x30 #define TASK_POLICY_WATCHERS_BG 0x31 +#define TASK_POLICY_SFI_MANAGED 0x34 +#define TASK_POLICY_ALL_SOCKETS_BG 0x37 + +#define TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS 0x39 /* latency as value1, throughput as value2 */ +#define TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS 0x3A /* latency as value1, throughput as value2 */ + /* thread-only attributes */ #define TASK_POLICY_PIDBIND_BG 0x32 #define TASK_POLICY_WORKQ_BG 0x33 +#define TASK_POLICY_QOS 0x35 +#define TASK_POLICY_QOS_OVERRIDE 0x36 +#define TASK_POLICY_QOS_AND_RELPRIO 0x38 /* QoS as value1, relative priority as value2 */ #define TASK_POLICY_MAX 0x3F @@ -579,10 +651,17 @@ extern struct _task_ledger_indices task_ledgers; extern void proc_set_task_policy(task_t task, thread_t thread, int category, int flavor, int value); extern int proc_get_task_policy(task_t task, thread_t thread, int category, int flavor); +/* For attributes that have two scalars as input/output */ +extern void proc_set_task_policy2(task_t task, thread_t thread, int category, int flavor, int value1, int value2); +extern void proc_get_task_policy2(task_t task, thread_t thread, int category, int flavor, int *value1, int *value2); + /* For use by kernel threads and others who don't hold a reference on the target thread */ extern void proc_set_task_policy_thread(task_t task, uint64_t tid, int category, int flavor, int value); -extern void proc_set_task_apptype(task_t task, int type); +extern void proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, + ipc_port_t * portwatch_ports, int portwatch_count); + +extern void task_set_main_thread_qos(task_t task, thread_t main_thread); /* IO Throttle tiers */ #define THROTTLE_LEVEL_NONE -1 @@ -603,6 +682,10 @@ extern void proc_set_task_apptype(task_t task, int type); #define THROTTLE_LEVEL_PAGEOUT_THROTTLED THROTTLE_LEVEL_TIER2 #define THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED THROTTLE_LEVEL_TIER1 +#if CONFIG_IOSCHED +#define IOSCHED_METADATA_TIER THROTTLE_LEVEL_TIER1 +#endif /* CONFIG_IOSCHED */ + extern int proc_apply_workq_bgthreadpolicy(thread_t thread); extern int proc_restore_workq_bgthreadpolicy(thread_t thread); @@ -610,6 +693,21 @@ extern int proc_get_darwinbgstate(task_t task, uint32_t *flagsp); extern boolean_t proc_task_is_tal(task_t task); extern integer_t task_grab_latency_qos(task_t task); extern void task_policy_create(task_t task, int parent_boosted); +extern void thread_policy_create(thread_t thread); + +/* + * for IPC importance hooks into task policy + */ +typedef struct task_pend_token { + uint32_t tpt_update_sockets :1, + tpt_update_timers :1, + tpt_update_watchers :1, + tpt_update_live_donor :1; +} *task_pend_token_t; + +extern void task_policy_update_complete_unlocked(task_t task, thread_t thread, task_pend_token_t pend_token); +extern void task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token); +extern void task_set_boost_locked(task_t task, boolean_t boost_active); /* * Get effective policy @@ -628,6 +726,9 @@ int proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled); thread_t task_findtid(task_t, uint64_t); void set_thread_iotier_override(thread_t, int policy); +boolean_t proc_thread_qos_add_override(task_t task, thread_t thread, uint64_t tid, int override_qos, boolean_t first_override_for_resource); +boolean_t proc_thread_qos_remove_override(task_t task, thread_t thread, uint64_t tid); + #define TASK_RUSECPU_FLAGS_PROC_LIMIT 0x01 #define TASK_RUSECPU_FLAGS_PERTHR_LIMIT 0x02 #define TASK_RUSECPU_FLAGS_DEADLINE 0x04 @@ -646,23 +747,38 @@ extern kern_return_t task_wakeups_monitor_ctl(task_t task, uint32_t *rate_hz, in extern kern_return_t task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags); -void task_importance_mark_donor(task_t task, boolean_t donating); +extern void task_importance_mark_donor(task_t task, boolean_t donating); +extern void task_importance_mark_live_donor(task_t task, boolean_t donating); +extern void task_importance_mark_receiver(task_t task, boolean_t receiving); +extern void task_importance_mark_denap_receiver(task_t task, boolean_t denap); +extern void task_importance_reset(task_t task); +extern void task_atm_reset(task_t task); #if IMPORTANCE_INHERITANCE + extern boolean_t task_is_importance_donor(task_t task); +extern boolean_t task_is_marked_importance_donor(task_t task); +extern boolean_t task_is_marked_live_importance_donor(task_t task); + extern boolean_t task_is_importance_receiver(task_t task); +extern boolean_t task_is_marked_importance_receiver(task_t task); + +extern boolean_t task_is_importance_denap_receiver(task_t task); +extern boolean_t task_is_marked_importance_denap_receiver(task_t task); +extern boolean_t task_is_importance_receiver_type(task_t task); + +extern int task_importance_hold_watchport_assertion(task_t target_task, uint32_t count); extern int task_importance_hold_internal_assertion(task_t target_task, uint32_t count); extern int task_importance_drop_internal_assertion(task_t target_task, uint32_t count); -extern int task_importance_hold_external_assertion(task_t target_task, uint32_t count); -extern int task_importance_drop_external_assertion(task_t target_task, uint32_t count); +extern int task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count); +extern int task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count); -extern int task_importance_externalize_assertion(task_t target_task, uint32_t count, int sender_pid); -#endif /* IMPORTANCE_INHERITANCE */ +extern int task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count); +extern int task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count); -extern void task_hold_multiple_assertion(task_t task, uint32_t count); -extern void task_add_importance_watchport(task_t task, int pid, mach_port_t port, int *boostp); +#endif /* IMPORTANCE_INHERITANCE */ extern boolean_t task_has_been_notified(task_t task, int pressurelevel); extern boolean_t task_used_for_purging(task_t task, int pressurelevel); @@ -680,6 +796,11 @@ extern int task_importance_estimate(task_t task); /* End task_policy */ +extern kern_return_t task_purge_volatile_memory(task_t task); + +extern void task_set_gpu_denied(task_t task, boolean_t denied); +extern boolean_t task_is_gpu_denied(task_t task); + #endif /* XNU_KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE diff --git a/osfmk/kern/task_policy.c b/osfmk/kern/task_policy.c index 88aeafa46..f826ea7c1 100644 --- a/osfmk/kern/task_policy.c +++ b/osfmk/kern/task_policy.c @@ -37,13 +37,17 @@ #include #include #include +#include #if CONFIG_TELEMETRY #include #endif +#if IMPORTANCE_INHERITANCE +#include #if IMPORTANCE_DEBUG #include #endif /* IMPORTANCE_DEBUG */ +#endif /* IMPORTANCE_INHERITACE */ #include @@ -82,7 +86,9 @@ * * To add a new requested policy, add the field in the requested struct, the flavor in task.h, * the setter and getter in proc_(set|get)_task_policy*, and dump the state in task_requested_bitfield, - * then set up the effects of that behavior in task_policy_update*. + * then set up the effects of that behavior in task_policy_update*. If the policy manifests + * itself as a distinct effective policy, add it to the effective struct and add it to the + * proc_get_effective_policy accessor. * * Most policies are set via proc_set_task_policy, but policies that don't fit that interface * roll their own lock/set/update/unlock/complete code inside this file. @@ -107,32 +113,36 @@ * */ +extern const qos_policy_params_t thread_qos_policy_params; + /* for task holds without dropping the lock */ extern void task_hold_locked(task_t task); extern void task_release_locked(task_t task); extern void task_wait_locked(task_t task, boolean_t until_not_runnable); +extern void thread_recompute_qos(thread_t thread); + /* Task policy related helper functions */ static void proc_set_task_policy_locked(task_t task, thread_t thread, int category, int flavor, int value); +static void proc_set_task_policy2_locked(task_t task, thread_t thread, int category, int flavor, int value1, int value2); -static void task_policy_update_locked(task_t task, thread_t thread); -static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create); -static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle); -static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle); - -static void task_policy_update_complete_unlocked(task_t task, thread_t thread); +static void task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token); +static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token); +static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle, boolean_t update_sfi); +static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle, boolean_t update_sfi, boolean_t update_qos); static int proc_get_effective_policy(task_t task, thread_t thread, int policy); static void proc_iopol_to_tier(int iopolicy, int *tier, int *passive); static int proc_tier_to_iopol(int tier, int passive); -static uintptr_t trequested(task_t task, thread_t thread); -static uintptr_t teffective(task_t task, thread_t thread); -static uintptr_t tpending(task_t task, thread_t thread); +static uintptr_t trequested_0(task_t task, thread_t thread); +static uintptr_t trequested_1(task_t task, thread_t thread); +static uintptr_t teffective_0(task_t task, thread_t thread); +static uintptr_t teffective_1(task_t task, thread_t thread); +static uint32_t tpending(task_pend_token_t pend_token); static uint64_t task_requested_bitfield(task_t task, thread_t thread); static uint64_t task_effective_bitfield(task_t task, thread_t thread); -static uint64_t task_pending_bitfield(task_t task, thread_t thread); void proc_get_thread_policy(thread_t thread, thread_policy_state_t info); @@ -150,19 +160,17 @@ int proc_pid(void *proc); extern int proc_selfpid(void); extern char * proc_name_address(void *p); extern void rethrottle_thread(void * uthread); -extern void proc_apply_task_networkbg(void * bsd_info, thread_t thread, int bg); +extern void proc_apply_task_networkbg(void * bsd_info, thread_t thread); #endif /* MACH_BSD */ /* Importance Inheritance related helper functions */ -void task_importance_mark_receiver(task_t task, boolean_t receiving); - #if IMPORTANCE_INHERITANCE -static void task_update_boost_locked(task_t task, boolean_t boost_active); -static int task_importance_hold_assertion_locked(task_t target_task, int external, uint32_t count); -static int task_importance_drop_assertion_locked(task_t target_task, int external, uint32_t count); +static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp); +static void task_importance_update_live_donor(task_t target_task); + #endif /* IMPORTANCE_INHERITANCE */ #if IMPORTANCE_DEBUG @@ -201,7 +209,7 @@ int proc_graphics_timer_qos = (LATENCY_QOS_TIER_0 & 0xFF); const int proc_default_bg_iotier = THROTTLE_LEVEL_TIER2; - +/* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */ const struct task_requested_policy default_task_requested_policy = { .bg_iotier = proc_default_bg_iotier }; @@ -219,6 +227,24 @@ const struct task_pended_policy default_task_pended_policy = {}; uint8_t proc_max_cpumon_percentage; uint64_t proc_max_cpumon_interval; +kern_return_t +qos_latency_policy_validate(task_latency_qos_t ltier) { + if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) && + ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0))) + return KERN_INVALID_ARGUMENT; + + return KERN_SUCCESS; +} + +kern_return_t +qos_throughput_policy_validate(task_throughput_qos_t ttier) { + if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) && + ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0))) + return KERN_INVALID_ARGUMENT; + + return KERN_SUCCESS; +} + static kern_return_t task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count) { if (count < TASK_QOS_POLICY_COUNT) @@ -227,32 +253,34 @@ task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count task_latency_qos_t ltier = qosinfo->task_latency_qos_tier; task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier; - if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) && - ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0))) - return KERN_INVALID_ARGUMENT; + kern_return_t kr = qos_latency_policy_validate(ltier); - if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) && - ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0))) - return KERN_INVALID_ARGUMENT; + if (kr != KERN_SUCCESS) + return kr; - return KERN_SUCCESS; + kr = qos_throughput_policy_validate(ttier); + + return kr; } -static uint32_t -task_qos_extract(uint32_t qv) { +uint32_t +qos_extract(uint32_t qv) { return (qv & 0xFF); } -static uint32_t -task_qos_latency_package(uint32_t qv) { +uint32_t +qos_latency_policy_package(uint32_t qv) { return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv); } -static uint32_t -task_qos_throughput_package(uint32_t qv) { +uint32_t +qos_throughput_policy_package(uint32_t qv) { return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv); } +/* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */ +static boolean_t task_policy_suppression_disable = FALSE; + kern_return_t task_policy_set( task_t task, @@ -311,6 +339,7 @@ task_policy_set( /* Desired energy-efficiency/performance "quality-of-service" */ case TASK_BASE_QOS_POLICY: + case TASK_OVERRIDE_QOS_POLICY: { task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; kern_return_t kr = task_qos_policy_validate(qosinfo, count); @@ -318,29 +347,17 @@ task_policy_set( if (kr != KERN_SUCCESS) return kr; - task_lock(task); - - /* This uses the latency QoS tracepoint, even though we might be changing both */ - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_START, - proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); - - task->requested_policy.t_base_latency_qos = task_qos_extract(qosinfo->task_latency_qos_tier); - task->requested_policy.t_base_through_qos = task_qos_extract(qosinfo->task_throughput_qos_tier); - - task_policy_update_locked(task, THREAD_NULL); - - task_unlock(task); - task_policy_update_complete_unlocked(task, THREAD_NULL); + uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier); + uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier); - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_END, - proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); + proc_set_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, + flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, + lqos, tqos); } - break; + break; - case TASK_OVERRIDE_QOS_POLICY: + case TASK_BASE_LATENCY_QOS_POLICY: { task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; kern_return_t kr = task_qos_policy_validate(qosinfo, count); @@ -348,27 +365,25 @@ task_policy_set( if (kr != KERN_SUCCESS) return kr; - task_lock(task); - - /* This uses the latency QoS tracepoint, even though we might be changing both */ - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_START, - proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); + uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier); - task->requested_policy.t_over_latency_qos = task_qos_extract(qosinfo->task_latency_qos_tier); - task->requested_policy.t_over_through_qos = task_qos_extract(qosinfo->task_throughput_qos_tier); + proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos); + } + break; - task_policy_update_locked(task, THREAD_NULL); + case TASK_BASE_THROUGHPUT_QOS_POLICY: + { + task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; + kern_return_t kr = task_qos_policy_validate(qosinfo, count); - task_unlock(task); + if (kr != KERN_SUCCESS) + return kr; - task_policy_update_complete_unlocked(task, THREAD_NULL); + uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier); - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_END, - proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); + proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos); } - break; + break; case TASK_SUPPRESSION_POLICY: { @@ -388,32 +403,39 @@ task_policy_set( if (kr != KERN_SUCCESS) return kr; + /* TEMPORARY disablement of task suppression */ + if (task_policy_suppression_disable && info->active) + return KERN_SUCCESS; + + struct task_pend_token pend_token = {}; + task_lock(task); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START, - proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), - 0, 0); + proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), + trequested_1(task, THREAD_NULL), 0); task->requested_policy.t_sup_active = (info->active) ? 1 : 0; task->requested_policy.t_sup_lowpri_cpu = (info->lowpri_cpu) ? 1 : 0; - task->requested_policy.t_sup_timer = task_qos_extract(info->timer_throttle); + task->requested_policy.t_sup_timer = qos_extract(info->timer_throttle); task->requested_policy.t_sup_disk = (info->disk_throttle) ? 1 : 0; task->requested_policy.t_sup_cpu_limit = (info->cpu_limit) ? 1 : 0; task->requested_policy.t_sup_suspend = (info->suspend) ? 1 : 0; - task->requested_policy.t_sup_throughput = task_qos_extract(info->throughput_qos); + task->requested_policy.t_sup_throughput = qos_extract(info->throughput_qos); task->requested_policy.t_sup_cpu = (info->suppressed_cpu) ? 1 : 0; + task->requested_policy.t_sup_bg_sockets = (info->background_sockets) ? 1 : 0; - task_policy_update_locked(task, THREAD_NULL); + task_policy_update_locked(task, THREAD_NULL, &pend_token); task_unlock(task); - task_policy_update_complete_unlocked(task, THREAD_NULL); + task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END, - proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), - 0, 0); + proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), + trequested_1(task, THREAD_NULL), 0); break; @@ -455,7 +477,7 @@ task_importance( /* TODO: tracepoint? */ /* Redrive only the task priority calculation */ - task_policy_update_task_locked(task, FALSE, FALSE); + task_policy_update_task_locked(task, FALSE, FALSE, FALSE); task_unlock(task); @@ -501,19 +523,20 @@ task_policy_get( info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED; info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED; } else if (flavor == TASK_BASE_QOS_POLICY) { - task_lock(task); + int value1, value2; - info->task_latency_qos_tier = task_qos_latency_package(task->requested_policy.t_base_latency_qos); - info->task_throughput_qos_tier = task_qos_throughput_package(task->requested_policy.t_base_through_qos); + proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2); + + info->task_latency_qos_tier = qos_latency_policy_package(value1); + info->task_throughput_qos_tier = qos_throughput_policy_package(value2); - task_unlock(task); } else if (flavor == TASK_OVERRIDE_QOS_POLICY) { - task_lock(task); + int value1, value2; - info->task_latency_qos_tier = task_qos_latency_package(task->requested_policy.t_over_latency_qos); - info->task_throughput_qos_tier = task_qos_throughput_package(task->requested_policy.t_over_through_qos); + proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2); - task_unlock(task); + info->task_latency_qos_tier = qos_latency_policy_package(value1); + info->task_throughput_qos_tier = qos_throughput_policy_package(value2); } break; @@ -530,8 +553,6 @@ task_policy_get( if (current_task()->sec_token.val[0] != 0) return KERN_PROTECTION_FAILURE; - task_lock(task); - if (*get_default) { info->requested = 0; info->effective = 0; @@ -539,19 +560,33 @@ task_policy_get( info->imp_assertcnt = 0; info->imp_externcnt = 0; info->flags = 0; + info->imp_transitions = 0; } else { + task_lock(task); + info->requested = task_requested_bitfield(task, THREAD_NULL); info->effective = task_effective_bitfield(task, THREAD_NULL); - info->pending = task_pending_bitfield(task, THREAD_NULL); - info->imp_assertcnt = task->task_imp_assertcnt; - info->imp_externcnt = task->task_imp_externcnt; + info->pending = 0; info->flags = 0; - info->flags |= (task->imp_receiver ? TASK_IMP_RECEIVER : 0); - info->flags |= (task->imp_donor ? TASK_IMP_DONOR : 0); + if (task->task_imp_base != NULL) { + info->imp_assertcnt = task->task_imp_base->iit_assertcnt; + info->imp_externcnt = IIT_EXTERN(task->task_imp_base); + info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0); + info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0); + info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0); + info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0); + info->imp_transitions = task->task_imp_base->iit_transitions; + } else { + info->imp_assertcnt = 0; + info->imp_externcnt = 0; + info->imp_transitions = 0; + } + task_unlock(task); } - task_unlock(task); + info->reserved[0] = 0; + info->reserved[1] = 0; break; } @@ -577,12 +612,13 @@ task_policy_get( } else { info->active = task->requested_policy.t_sup_active; info->lowpri_cpu = task->requested_policy.t_sup_lowpri_cpu; - info->timer_throttle = task_qos_latency_package(task->requested_policy.t_sup_timer); + info->timer_throttle = qos_latency_policy_package(task->requested_policy.t_sup_timer); info->disk_throttle = task->requested_policy.t_sup_disk; info->cpu_limit = task->requested_policy.t_sup_cpu_limit; info->suspend = task->requested_policy.t_sup_suspend; - info->throughput_qos = task_qos_throughput_package(task->requested_policy.t_sup_throughput); + info->throughput_qos = qos_throughput_policy_package(task->requested_policy.t_sup_throughput); info->suppressed_cpu = task->requested_policy.t_sup_cpu; + info->background_sockets = task->requested_policy.t_sup_bg_sockets; } task_unlock(task); @@ -615,32 +651,53 @@ task_policy_create(task_t task, int parent_boosted) } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START, - proc_selfpid(), audit_token_pid_from_task(task), - teffective(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); + (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START, + audit_token_pid_from_task(task), teffective_0(task, THREAD_NULL), + teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); - task_policy_update_internal_locked(task, THREAD_NULL, TRUE); + task_policy_update_internal_locked(task, THREAD_NULL, TRUE, NULL); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END, - proc_selfpid(), audit_token_pid_from_task(task), - teffective(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); + (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END, + audit_token_pid_from_task(task), teffective_0(task, THREAD_NULL), + teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); + + task_importance_update_live_donor(task); + task_policy_update_task_locked(task, FALSE, FALSE, FALSE); +} + +void +thread_policy_create(thread_t thread) +{ + task_t task = thread->task; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START, + targetid(task, thread), teffective_0(task, thread), + teffective_1(task, thread), tpriority(task, thread), 0); + + task_policy_update_internal_locked(task, thread, TRUE, NULL); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END, + targetid(task, thread), teffective_0(task, thread), + teffective_1(task, thread), tpriority(task, thread), 0); } static void -task_policy_update_locked(task_t task, thread_t thread) +task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread)) | DBG_FUNC_START), - proc_selfpid(), targetid(task, thread), - teffective(task, thread), tpriority(task, thread), 0); + targetid(task, thread), teffective_0(task, thread), + teffective_1(task, thread), tpriority(task, thread), 0); - task_policy_update_internal_locked(task, thread, FALSE); + task_policy_update_internal_locked(task, thread, FALSE, pend_token); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread))) | DBG_FUNC_END, - proc_selfpid(), targetid(task, thread), - teffective(task, thread), tpriority(task, thread), 0); + (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread))) | DBG_FUNC_END, + targetid(task, thread), teffective_0(task, thread), + teffective_1(task, thread), tpriority(task, thread), 0); } /* @@ -654,8 +711,9 @@ task_policy_update_locked(task_t task, thread_t thread) * * Called with task locked, not thread */ + static void -task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create) +task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token) { boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; @@ -667,6 +725,7 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr struct task_requested_policy requested = (on_task) ? task->requested_policy : thread->requested_policy; + /* * Step 2: * Calculate new effective policies from requested policy and task state @@ -678,15 +737,113 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr */ struct task_effective_policy next = {}; + struct task_effective_policy task_effective; + + /* Calculate QoS policies */ + + if (on_task) { + /* Update task role */ + next.t_role = requested.t_role; + + /* Set task qos clamp and ceiling */ + next.t_qos_clamp = requested.t_qos_clamp; + + if (requested.t_apptype == TASK_APPTYPE_APP_DEFAULT || + requested.t_apptype == TASK_APPTYPE_APP_TAL) { + + switch (next.t_role) { + case TASK_FOREGROUND_APPLICATION: + /* Foreground apps get urgent scheduler priority */ + next.qos_ui_is_urgent = 1; + next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED; + break; + + case TASK_BACKGROUND_APPLICATION: + /* This is really 'non-focal but on-screen' */ + next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED; + break; + + case TASK_NONUI_APPLICATION: + /* i.e. 'off-screen' */ + next.t_qos_ceiling = THREAD_QOS_LEGACY; + break; + + case TASK_CONTROL_APPLICATION: + case TASK_GRAPHICS_SERVER: + next.qos_ui_is_urgent = 1; + next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED; + break; + + case TASK_UNSPECIFIED: + default: + /* Apps that don't have an application role get + * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */ + next.t_qos_ceiling = THREAD_QOS_LEGACY; + break; + } + } else { + /* Daemons get USER_INTERACTIVE squashed to USER_INITIATED */ + next.t_qos_ceiling = THREAD_QOS_USER_INITIATED; + } + } else { + /* + * Set thread qos tier + * Note that an override only overrides the QoS field, not other policy settings. + * A thread must already be participating in QoS for override to take effect + */ + + /* Snapshot the task's effective policy */ + task_effective = task->effective_policy; + + next.qos_ui_is_urgent = task_effective.qos_ui_is_urgent; + + if ((requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED) && (requested.thrp_qos != THREAD_QOS_UNSPECIFIED)) + next.thep_qos = MAX(requested.thrp_qos_override, requested.thrp_qos); + else + next.thep_qos = requested.thrp_qos; + + /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */ + if (task_effective.t_qos_clamp != THREAD_QOS_UNSPECIFIED) { + if (next.thep_qos != THREAD_QOS_UNSPECIFIED) + next.thep_qos = MIN(task_effective.t_qos_clamp, next.thep_qos); + else + next.thep_qos = task_effective.t_qos_clamp; + } + + /* The ceiling only applies to threads that are in the QoS world */ + if (task_effective.t_qos_ceiling != THREAD_QOS_UNSPECIFIED && + next.thep_qos != THREAD_QOS_UNSPECIFIED) { + next.thep_qos = MIN(task_effective.t_qos_ceiling, next.thep_qos); + } + + /* + * The QoS relative priority is only applicable when the original programmer's + * intended (requested) QoS is in effect. When the QoS is clamped (e.g. + * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored, + * since otherwise it would be lower than unclamped threads. Similarly, in the + * presence of boosting, the programmer doesn't know what other actors + * are boosting the thread. + */ + if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) && + (requested.thrp_qos == next.thep_qos) && + (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) { + next.thep_qos_relprio = requested.thrp_qos_relprio; + } else { + next.thep_qos_relprio = 0; + } + } /* Calculate DARWIN_BG */ boolean_t wants_darwinbg = FALSE; boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */ boolean_t wants_watchersbg = FALSE; /* Do I want my pidbound threads to be bg */ boolean_t wants_tal = FALSE; /* Do I want the effects of TAL mode */ + /* * If DARWIN_BG has been requested at either level, it's engaged. * Only true DARWIN_BG changes cause watchers to transition. + * + * Backgrounding due to apptype does. */ if (requested.int_darwinbg || requested.ext_darwinbg) wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE; @@ -708,12 +865,18 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */ if (requested.t_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) wants_darwinbg = TRUE; + + if (next.t_qos_clamp == THREAD_QOS_BACKGROUND || next.t_qos_clamp == THREAD_QOS_MAINTENANCE) + wants_darwinbg = TRUE; } else { if (requested.th_pidbind_bg) wants_all_sockets_bg = wants_darwinbg = TRUE; if (requested.th_workq_bg) wants_darwinbg = TRUE; + + if (next.thep_qos == THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_MAINTENANCE) + wants_darwinbg = TRUE; } /* Calculate side effects of DARWIN_BG */ @@ -731,6 +894,14 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr if (on_task && wants_watchersbg) next.t_watchers_bg = 1; + /* darwinbg on either task or thread implies background QOS (or lower) */ + if (!on_task && + (wants_darwinbg || task_effective.darwinbg) && + (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)){ + next.thep_qos = THREAD_QOS_BACKGROUND; + next.thep_qos_relprio = 0; + } + /* Calculate low CPU priority */ boolean_t wants_lowpri_cpu = FALSE; @@ -763,6 +934,13 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr if (wants_tal) iopol = MAX(iopol, proc_tal_disk_tier); + + if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED) + iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.t_qos_clamp]); + + } else { + /* Look up the associated IO tier value for the QoS class */ + iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]); } iopol = MAX(iopol, requested.int_iotier); @@ -778,9 +956,6 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr /* Calculate miscellaneous policy */ if (on_task) { - /* Update role */ - next.t_role = requested.t_role; - /* Calculate suppression-active flag */ if (requested.t_sup_active && requested.t_boosted == 0) next.t_sup_active = 1; @@ -789,17 +964,15 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr if (requested.t_sup_suspend && requested.t_boosted == 0) next.t_suspended = 1; - /* Calculate GPU Access policy */ - if (requested.t_int_gpu_deny || requested.t_ext_gpu_deny) - next.t_gpu_deny = 1; - - /* Calculate timer QOS */ int latency_qos = requested.t_base_latency_qos; if (requested.t_sup_timer && requested.t_boosted == 0) latency_qos = requested.t_sup_timer; + if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED) + latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.t_qos_clamp]); + if (requested.t_over_latency_qos != 0) latency_qos = requested.t_over_latency_qos; @@ -815,6 +988,9 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr if (requested.t_sup_throughput && requested.t_boosted == 0) through_qos = requested.t_sup_throughput; + if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED) + through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.t_qos_clamp]); + if (requested.t_over_through_qos != 0) through_qos = requested.t_over_through_qos; @@ -823,6 +999,37 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr /* Calculate suppressed CPU priority */ if (requested.t_sup_cpu && requested.t_boosted == 0) next.t_suppressed_cpu = 1; + + /* + * Calculate background sockets + * Don't take into account boosting to limit transition frequency. + */ + if (requested.t_sup_bg_sockets){ + next.all_sockets_bg = 1; + next.new_sockets_bg = 1; + } + + /* Apply SFI Managed class bit */ + next.t_sfi_managed = requested.t_sfi_managed; + + /* Calculate 'live donor' status for live importance */ + switch (requested.t_apptype) { + case TASK_APPTYPE_APP_TAL: + case TASK_APPTYPE_APP_DEFAULT: + if (requested.ext_darwinbg == 0) + next.t_live_donor = 1; + else + next.t_live_donor = 0; + break; + + case TASK_APPTYPE_DAEMON_INTERACTIVE: + case TASK_APPTYPE_DAEMON_STANDARD: + case TASK_APPTYPE_DAEMON_ADAPTIVE: + case TASK_APPTYPE_DAEMON_BACKGROUND: + default: + next.t_live_donor = 0; + break; + } } if (requested.terminated) { @@ -832,7 +1039,6 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr * passive (don't want to cause others to throttle) * all_sockets_bg (don't need to iterate FDs on every exit) * new_sockets_bg (doesn't matter for exiting process) - * gpu deny (doesn't matter for exiting process) * pidsuspend (jetsam-ed BG process shouldn't run again) * watchers_bg (watcher threads don't need to be unthrottled) * t_latency_qos (affects userspace timers only) @@ -849,6 +1055,8 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr /* TODO: This should only be shot down on SIGTERM, not exit */ next.t_suspended = 0; + } else { + next.thep_qos = 0; } } @@ -857,6 +1065,16 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr * Swap out old policy for new policy */ + if (!on_task) { + /* Acquire thread mutex to synchronize against + * thread_policy_set(). Consider reworking to separate qos + * fields, or locking the task in thread_policy_set. + * A more efficient model would be to make the thread bits + * authoritative. + */ + thread_mtx_lock(thread); + } + struct task_effective_policy prev = (on_task) ? task->effective_policy : thread->effective_policy; @@ -870,8 +1088,31 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr /* This is the point where the new values become visible to other threads */ if (on_task) task->effective_policy = next; - else + else { + /* Preserve thread specific latency/throughput QoS modified via + * thread_policy_set(). Inelegant in the extreme, to be reworked. + * + * If thread QoS class is set, we don't need to preserve the previously set values. + * We should ensure to not accidentally preserve previous thread QoS values if you set a thread + * back to default QoS. + */ + uint32_t lqos = thread->effective_policy.t_latency_qos, tqos = thread->effective_policy.t_through_qos; + + if (prev.thep_qos == THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) { + next.t_latency_qos = lqos; + next.t_through_qos = tqos; + } else if (prev.thep_qos != THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) { + next.t_latency_qos = 0; + next.t_through_qos = 0; + } else { + next.t_latency_qos = thread_qos_policy_params.qos_latency_qos[next.thep_qos]; + next.t_through_qos = thread_qos_policy_params.qos_through_qos[next.thep_qos]; + } + + thread_update_qos_cpu_time(thread, TRUE); thread->effective_policy = next; + thread_mtx_unlock(thread); + } /* Don't do anything further to a half-formed task or thread */ if (in_create) @@ -880,26 +1121,20 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr /* * Step 4: * Pend updates that can't be done while holding the task lock - * Preserve pending updates that may still be waiting to be applied */ - struct task_pended_policy pended = - (on_task) ? task->pended_policy : thread->pended_policy; - if (prev.all_sockets_bg != next.all_sockets_bg) - pended.update_sockets = 1; + pend_token->tpt_update_sockets = 1; if (on_task) { /* Only re-scan the timer list if the qos level is getting less strong */ if (prev.t_latency_qos > next.t_latency_qos) - pended.t_update_timers = 1; + pend_token->tpt_update_timers = 1; - } - if (on_task) - task->pended_policy = pended; - else - thread->pended_policy = pended; + if (prev.t_live_donor != next.t_live_donor) + pend_token->tpt_update_live_donor = 1; + } /* * Step 5: @@ -918,29 +1153,57 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr } boolean_t update_threads = FALSE; - - if (prev.bg_iotier != next.bg_iotier) + boolean_t update_sfi = FALSE; + + if (prev.bg_iotier != next.bg_iotier || + prev.terminated != next.terminated || + prev.t_qos_clamp != next.t_qos_clamp || + prev.t_qos_ceiling != next.t_qos_ceiling || + prev.qos_ui_is_urgent != next.qos_ui_is_urgent || + prev.darwinbg != next.darwinbg) update_threads = TRUE; - if (prev.terminated != next.terminated) - update_threads = TRUE; + /* + * A bit of a layering violation. We know what task policy attributes + * sfi_thread_classify() consults, so if they change, trigger SFI + * re-evaluation. + */ + if ((prev.t_latency_qos != next.t_latency_qos) || + (prev.t_role != next.t_role) || + (prev.darwinbg != next.darwinbg) || + (prev.t_sfi_managed != next.t_sfi_managed)) + update_sfi = TRUE; - task_policy_update_task_locked(task, update_throttle, update_threads); + task_policy_update_task_locked(task, update_throttle, update_threads, update_sfi); } else { int update_cpu = 0; + boolean_t update_sfi = FALSE; + boolean_t update_qos = FALSE; if (prev.lowpri_cpu != next.lowpri_cpu) update_cpu = (next.lowpri_cpu ? DO_LOWPRI_CPU : UNDO_LOWPRI_CPU); - task_policy_update_thread_locked(thread, update_cpu, update_throttle); + if (prev.darwinbg != next.darwinbg || + prev.thep_qos != next.thep_qos) + update_sfi = TRUE; + + if (prev.thep_qos != next.thep_qos || + prev.thep_qos_relprio != next.thep_qos_relprio || + prev.qos_ui_is_urgent != next.qos_ui_is_urgent) { + update_qos = TRUE; + } + + task_policy_update_thread_locked(thread, update_cpu, update_throttle, update_sfi, update_qos); } } /* Despite the name, the thread's task is locked, the thread is not */ -static void +void task_policy_update_thread_locked(thread_t thread, int update_cpu, - boolean_t update_throttle) + boolean_t update_throttle, + boolean_t update_sfi, + boolean_t update_qos) { thread_precedence_policy_data_t policy; @@ -948,6 +1211,10 @@ task_policy_update_thread_locked(thread_t thread, rethrottle_thread(thread->uthread); } + if (update_sfi) { + sfi_reevaluate(thread); + } + /* * TODO: pidbind needs to stuff remembered importance into saved_importance * properly deal with bg'ed threads being pidbound and unbging while pidbound @@ -967,16 +1234,20 @@ task_policy_update_thread_locked(thread_t thread, thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&policy, THREAD_PRECEDENCE_POLICY_COUNT); + + if (update_qos) + thread_recompute_qos(thread); } /* * Calculate priority on a task, loop through its threads, and tell them about * priority changes and throttle changes. */ -static void +void task_policy_update_task_locked(task_t task, boolean_t update_throttle, - boolean_t update_threads) + boolean_t update_threads, + boolean_t update_sfi) { boolean_t update_priority = FALSE; @@ -994,12 +1265,6 @@ task_policy_update_task_locked(task_t task, max_priority = MAXPRI_SUPPRESSED; } else { switch (proc_get_effective_task_policy(task, TASK_POLICY_ROLE)) { - case TASK_FOREGROUND_APPLICATION: - priority = BASEPRI_FOREGROUND; - break; - case TASK_BACKGROUND_APPLICATION: - priority = BASEPRI_BACKGROUND; - break; case TASK_CONTROL_APPLICATION: priority = BASEPRI_CONTROL; break; @@ -1013,6 +1278,13 @@ task_policy_update_task_locked(task_t task, /* factor in 'nice' value */ priority += task->importance; + + if (task->effective_policy.t_qos_clamp != THREAD_QOS_UNSPECIFIED) { + int qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.t_qos_clamp]; + + priority = MIN(priority, qos_clamp_priority); + max_priority = MIN(max_priority, qos_clamp_priority); + } } /* avoid extra work if priority isn't changing */ @@ -1031,15 +1303,14 @@ task_policy_update_task_locked(task_t task, } /* Loop over the threads in the task only once, and only if necessary */ - if (update_threads || update_throttle || update_priority ) { + if (update_threads || update_throttle || update_priority || update_sfi ) { thread_t thread; queue_iterate(&task->threads, thread, thread_t, task_threads) { if (update_priority) { thread_mtx_lock(thread); - if (thread->active) - thread_task_priority(thread, priority, max_priority); + thread_task_priority(thread, priority, max_priority); thread_mtx_unlock(thread); } @@ -1048,11 +1319,15 @@ task_policy_update_task_locked(task_t task, rethrottle_thread(thread->uthread); } + if (update_sfi) { + sfi_reevaluate(thread); + } + if (update_threads) { thread->requested_policy.bg_iotier = task->effective_policy.bg_iotier; thread->requested_policy.terminated = task->effective_policy.terminated; - task_policy_update_internal_locked(task, thread, FALSE); + task_policy_update_internal_locked(task, thread, FALSE, NULL); /* The thread policy must not emit any completion actions due to this change. */ } } @@ -1061,61 +1336,26 @@ task_policy_update_task_locked(task_t task, /* * Called with task unlocked to do things that can't be done while holding the task lock - * To keep things consistent, only one thread can make progress through here at a time for any one task. - * - * TODO: tracepoints */ -static void -task_policy_update_complete_unlocked(task_t task, thread_t thread) +void +task_policy_update_complete_unlocked(task_t task, thread_t thread, task_pend_token_t pend_token) { boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; - task_lock(task); - - while (task->pended_policy.t_updating_policy != 0) { - assert_wait((event_t)&task->pended_policy, THREAD_UNINT); - task_unlock(task); - thread_block(THREAD_CONTINUE_NULL); - task_lock(task); - } - - /* Take a snapshot of the current state */ - - struct task_pended_policy pended = - (on_task) ? task->pended_policy : thread->pended_policy; - - struct task_effective_policy effective = - (on_task) ? task->effective_policy : thread->effective_policy; - - /* Mark the pended operations as being handled */ - if (on_task) - task->pended_policy = default_task_pended_policy; - else - thread->pended_policy = default_task_pended_policy; - - task->pended_policy.t_updating_policy = 1; - - task_unlock(task); - - /* Update the other subsystems with the new state */ - #ifdef MACH_BSD - if (pended.update_sockets) - proc_apply_task_networkbg(task->bsd_info, thread, effective.all_sockets_bg); + if (pend_token->tpt_update_sockets) + proc_apply_task_networkbg(task->bsd_info, thread); #endif /* MACH_BSD */ if (on_task) { - /* The timer throttle has been removed, we need to look for expired timers and fire them */ - if (pended.t_update_timers) + /* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */ + if (pend_token->tpt_update_timers) ml_timer_evaluate(); - } - /* Wake up anyone waiting to make another update */ - task_lock(task); - task->pended_policy.t_updating_policy = 0; - thread_wakeup(&task->pended_policy); - task_unlock(task); + if (pend_token->tpt_update_live_donor) + task_importance_update_live_donor(task); + } } /* @@ -1135,23 +1375,25 @@ proc_set_task_policy(task_t task, int flavor, int value) { + struct task_pend_token pend_token = {}; + task_lock(task); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START, - proc_selfpid(), targetid(task, thread), trequested(task, thread), value, 0); + (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START, + targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0); proc_set_task_policy_locked(task, thread, category, flavor, value); - task_policy_update_locked(task, thread); + task_policy_update_locked(task, thread, &pend_token); task_unlock(task); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END, - proc_selfpid(), targetid(task, thread), trequested(task, thread), tpending(task, thread), 0); + (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END, + targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0); - task_policy_update_complete_unlocked(task, thread); + task_policy_update_complete_unlocked(task, thread, &pend_token); } /* @@ -1167,6 +1409,7 @@ proc_set_task_policy_thread(task_t task, { thread_t thread; thread_t self = current_thread(); + struct task_pend_token pend_token = {}; task_lock(task); @@ -1181,22 +1424,49 @@ proc_set_task_policy_thread(task_t task, } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START, - proc_selfpid(), targetid(task, thread), trequested(task, thread), value, 0); + (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START, + targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0); proc_set_task_policy_locked(task, thread, category, flavor, value); - task_policy_update_locked(task, thread); + task_policy_update_locked(task, thread, &pend_token); task_unlock(task); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END, - proc_selfpid(), targetid(task, thread), trequested(task, thread), tpending(task, thread), 0); + (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END, + targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0); - task_policy_update_complete_unlocked(task, thread); + task_policy_update_complete_unlocked(task, thread, &pend_token); } +/* + * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure. + * Same locking rules apply. + */ +void +proc_set_task_policy2(task_t task, thread_t thread, int category, int flavor, int value1, int value2) +{ + struct task_pend_token pend_token = {}; + + task_lock(task); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START, + targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value1, 0); + + proc_set_task_policy2_locked(task, thread, category, flavor, value1, value2); + + task_policy_update_locked(task, thread, &pend_token); + + task_unlock(task); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END, + targetid(task, thread), trequested_0(task, thread), trequested_0(task, thread), tpending(&pend_token), 0); + + task_policy_update_complete_unlocked(task, thread, &pend_token); +} /* * Set the requested state for a specific flavor to a specific value. @@ -1254,28 +1524,7 @@ proc_set_task_policy_locked(task_t task, requested.int_iopassive = value; break; - /* Category: EXTERNAL and INTERNAL, task only */ - - case TASK_POLICY_GPU_DENY: - assert(on_task); - if (category == TASK_POLICY_EXTERNAL) - requested.t_ext_gpu_deny = value; - else - requested.t_int_gpu_deny = value; - break; - - case TASK_POLICY_DARWIN_BG_AND_GPU: - assert(on_task); - if (category == TASK_POLICY_EXTERNAL) { - requested.ext_darwinbg = value; - requested.t_ext_gpu_deny = value; - } else { - requested.int_darwinbg = value; - requested.t_int_gpu_deny = value; - } - break; - - /* Category: INTERNAL, task only */ + /* Category: INTERNAL, task only */ case TASK_POLICY_DARWIN_BG_IOPOL: assert(on_task && category == TASK_POLICY_INTERNAL); @@ -1304,6 +1553,18 @@ proc_set_task_policy_locked(task_t task, assert(on_task && category == TASK_POLICY_ATTRIBUTE); requested.terminated = value; break; + case TASK_BASE_LATENCY_QOS_POLICY: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + requested.t_base_latency_qos = value; + break; + case TASK_BASE_THROUGHPUT_QOS_POLICY: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + requested.t_base_through_qos = value; + break; + case TASK_POLICY_SFI_MANAGED: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + requested.t_sfi_managed = value; + break; /* Category: ATTRIBUTE, thread only */ @@ -1317,6 +1578,16 @@ proc_set_task_policy_locked(task_t task, requested.th_workq_bg = value; break; + case TASK_POLICY_QOS: + assert(!on_task && category == TASK_POLICY_ATTRIBUTE); + requested.thrp_qos = value; + break; + + case TASK_POLICY_QOS_OVERRIDE: + assert(!on_task && category == TASK_POLICY_ATTRIBUTE); + requested.thrp_qos_override = value; + break; + default: panic("unknown task policy: %d %d %d", category, flavor, value); break; @@ -1328,19 +1599,72 @@ proc_set_task_policy_locked(task_t task, thread->requested_policy = requested; } - /* - * Gets what you set. Effective values may be different. + * Variant of proc_set_task_policy_locked() that sets two scalars in the requested policy structure. */ -int -proc_get_task_policy(task_t task, - thread_t thread, - int category, - int flavor) +static void +proc_set_task_policy2_locked(task_t task, + thread_t thread, + int category, + int flavor, + int value1, + int value2) { boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; - int value = 0; + struct task_requested_policy requested = + (on_task) ? task->requested_policy : thread->requested_policy; + + switch (flavor) { + + /* Category: ATTRIBUTE, task only */ + + case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + requested.t_base_latency_qos = value1; + requested.t_base_through_qos = value2; + break; + + case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + requested.t_over_latency_qos = value1; + requested.t_over_through_qos = value2; + break; + + /* Category: ATTRIBUTE, thread only */ + + case TASK_POLICY_QOS_AND_RELPRIO: + + assert(!on_task && category == TASK_POLICY_ATTRIBUTE); + requested.thrp_qos = value1; + requested.thrp_qos_relprio = value2; + DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio); + break; + + default: + panic("unknown task policy: %d %d %d %d", category, flavor, value1, value2); + break; + } + + if (on_task) + task->requested_policy = requested; + else + thread->requested_policy = requested; +} + + +/* + * Gets what you set. Effective values may be different. + */ +int +proc_get_task_policy(task_t task, + thread_t thread, + int category, + int flavor) +{ + boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; + + int value = 0; task_lock(task); @@ -1374,13 +1698,6 @@ proc_get_task_policy(task_t task, else value = requested.int_iopassive; break; - case TASK_POLICY_GPU_DENY: - assert(on_task); - if (category == TASK_POLICY_EXTERNAL) - value = requested.t_ext_gpu_deny; - else - value = requested.t_int_gpu_deny; - break; case TASK_POLICY_DARWIN_BG_IOPOL: assert(on_task && category == TASK_POLICY_ATTRIBUTE); value = proc_tier_to_iopol(requested.bg_iotier, 0); @@ -1389,6 +1706,18 @@ proc_get_task_policy(task_t task, assert(on_task && category == TASK_POLICY_ATTRIBUTE); value = requested.t_role; break; + case TASK_POLICY_SFI_MANAGED: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + value = requested.t_sfi_managed; + break; + case TASK_POLICY_QOS: + assert(!on_task && category == TASK_POLICY_ATTRIBUTE); + value = requested.thrp_qos; + break; + case TASK_POLICY_QOS_OVERRIDE: + assert(!on_task && category == TASK_POLICY_ATTRIBUTE); + value = requested.thrp_qos_override; + break; default: panic("unknown policy_flavor %d", flavor); break; @@ -1399,6 +1728,48 @@ proc_get_task_policy(task_t task, return value; } +/* + * Variant of proc_get_task_policy() that returns two scalar outputs. + */ +void +proc_get_task_policy2(task_t task, thread_t thread, int category __unused, int flavor, int *value1, int *value2) +{ + boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; + + task_lock(task); + + struct task_requested_policy requested = + (on_task) ? task->requested_policy : thread->requested_policy; + + switch (flavor) { + /* TASK attributes */ + case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + *value1 = requested.t_base_latency_qos; + *value2 = requested.t_base_through_qos; + break; + + case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + *value1 = requested.t_over_latency_qos; + *value2 = requested.t_over_through_qos; + break; + + /* THREAD attributes */ + case TASK_POLICY_QOS_AND_RELPRIO: + assert(!on_task && category == TASK_POLICY_ATTRIBUTE); + *value1 = requested.thrp_qos; + *value2 = requested.thrp_qos_relprio; + break; + + default: + panic("unknown policy_flavor %d", flavor); + break; + } + + task_unlock(task); +} + /* * Functions for querying effective state for relevant subsystems @@ -1424,7 +1795,7 @@ proc_get_effective_thread_policy(thread_t thread, int flavor) * NOTE: This accessor does not take the task lock. * Notifications of state updates need to be externally synchronized with state queries. * This routine *MUST* remain interrupt safe, as it is potentially invoked - * within the context of a timer interrupt. + * within the context of a timer interrupt. It is also called in KDP context for stackshot. */ static int proc_get_effective_policy(task_t task, @@ -1453,7 +1824,9 @@ proc_get_effective_policy(task_t task, case TASK_POLICY_IO: /* * The I/O system calls here to find out what throttling tier to apply to an operation. - * Returns THROTTLE_LEVEL_* values + * Returns THROTTLE_LEVEL_* values. Some userspace spinlock operations can apply + * a temporary iotier override to make the I/O more aggressive to get the lock + * owner to release the spinlock. */ if (on_task) value = task->effective_policy.io_tier; @@ -1468,13 +1841,34 @@ proc_get_effective_policy(task_t task, /* * The I/O system calls here to find out whether an operation should be passive. * (i.e. not cause operations with lower throttle tiers to be throttled) - * Returns 1 for passive mode, 0 for normal mode + * Returns 1 for passive mode, 0 for normal mode. + * If a userspace spinlock has applied an override, that I/O should always + * be passive to avoid self-throttling when the override is removed and lower + * iotier I/Os are issued. */ if (on_task) value = task->effective_policy.io_passive; - else + else { + int io_tier = MAX(task->effective_policy.io_tier, thread->effective_policy.io_tier); + boolean_t override_in_effect = (thread->iotier_override != THROTTLE_LEVEL_NONE) && (thread->iotier_override < io_tier); + value = (task->effective_policy.io_passive || - thread->effective_policy.io_passive) ? 1 : 0; + thread->effective_policy.io_passive || override_in_effect) ? 1 : 0; + } + break; + case TASK_POLICY_ALL_SOCKETS_BG: + /* + * do_background_socket() calls this to determine what it should do to the proc's sockets + * Returns 1 for background mode, 0 for normal mode + * + * This consults both thread and task so un-DBGing a thread while the task is BG + * doesn't get you out of the network throttle. + */ + if (on_task) + value = task->effective_policy.all_sockets_bg; + else + value = (task->effective_policy.all_sockets_bg || + thread->effective_policy.all_sockets_bg) ? 1 : 0; break; case TASK_POLICY_NEW_SOCKETS_BG: /* @@ -1509,8 +1903,11 @@ proc_get_effective_policy(task_t task, * timer arming calls into here to find out the timer coalescing level * Returns a QoS tier (0-6) */ - assert(on_task); - value = task->effective_policy.t_latency_qos; + if (on_task) { + value = task->effective_policy.t_latency_qos; + } else { + value = MAX(task->effective_policy.t_latency_qos, thread->effective_policy.t_latency_qos); + } break; case TASK_POLICY_THROUGH_QOS: /* @@ -1519,15 +1916,6 @@ proc_get_effective_policy(task_t task, assert(on_task); value = task->effective_policy.t_through_qos; break; - case TASK_POLICY_GPU_DENY: - /* - * This is where IOKit calls into task_policy to find out whether - * it should allow access to the GPU. - * Returns 1 for NOT allowed, returns 0 for allowed - */ - assert(on_task); - value = task->effective_policy.t_gpu_deny; - break; case TASK_POLICY_ROLE: assert(on_task); value = task->effective_policy.t_role; @@ -1536,6 +1924,14 @@ proc_get_effective_policy(task_t task, assert(on_task); value = task->effective_policy.t_watchers_bg; break; + case TASK_POLICY_SFI_MANAGED: + assert(on_task); + value = task->effective_policy.t_sfi_managed; + break; + case TASK_POLICY_QOS: + assert(!on_task); + value = thread->effective_policy.thep_qos; + break; default: panic("unknown policy_flavor %d", flavor); break; @@ -1595,10 +1991,8 @@ proc_tier_to_iopol(int tier, int passive) } else { switch (tier) { case THROTTLE_LEVEL_NONE: - return IOPOL_DEFAULT; - break; case THROTTLE_LEVEL_TIER0: - return IOPOL_IMPORTANT; + return IOPOL_DEFAULT; break; case THROTTLE_LEVEL_TIER1: return IOPOL_STANDARD; @@ -1694,69 +2088,324 @@ void set_thread_iotier_override(thread_t thread, int policy) } /* - * Called at process exec to initialize the apptype of a process + * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks, + * semaphores, dispatch_sync) may result in priority inversions where a higher priority + * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower + * priority thread. In these cases, we attempt to propagate the priority token, as long + * as the subsystem informs us of the relationships between the threads. The userspace + * synchronization subsystem should maintain the information of owner->resource and + * resource->waiters itself. + * + * The add/remove routines can return failure if the target of the override cannot be + * found, perhaps because the resource subsystem doesn't have an accurate view of the + * resource owner in the face of race conditions. */ -void -proc_set_task_apptype(task_t task, int apptype) + +boolean_t proc_thread_qos_add_override(task_t task, thread_t thread, uint64_t tid, int override_qos, boolean_t first_override_for_resource) +{ + thread_t self = current_thread(); + int resource_count; + struct task_pend_token pend_token = {}; + + /* XXX move to thread mutex when thread policy does */ + task_lock(task); + + /* + * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference + * to the thread + */ + + if (thread != THREAD_NULL) { + assert(task == thread->task); + } else { + if (tid == self->thread_id) { + thread = self; + } else { + thread = task_findtid(task, tid); + + if (thread == THREAD_NULL) { + KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE, + tid, 0, 0xdead, 0, 0); + task_unlock(task); + return FALSE; + } + } + } + + KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START, + thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0); + + DTRACE_BOOST5(qos_add_override_pre, uint64_t, tid, uint64_t, thread->requested_policy.thrp_qos, + uint64_t, thread->effective_policy.thep_qos, int, override_qos, boolean_t, first_override_for_resource); + + if (first_override_for_resource) { + resource_count = ++thread->usynch_override_contended_resource_count; + } else { + resource_count = thread->usynch_override_contended_resource_count; + } + + struct task_requested_policy requested = thread->requested_policy; + + if (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED) + requested.thrp_qos_override = override_qos; + else + requested.thrp_qos_override = MAX(requested.thrp_qos_override, override_qos); + + thread->requested_policy = requested; + + task_policy_update_locked(task, thread, &pend_token); + + thread_reference(thread); + + task_unlock(task); + + task_policy_update_complete_unlocked(task, thread, &pend_token); + + DTRACE_BOOST3(qos_add_override_post, uint64_t, requested.thrp_qos_override, + uint64_t, thread->effective_policy.thep_qos, int, resource_count); + + thread_deallocate(thread); + + KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END, + requested.thrp_qos_override, resource_count, 0, 0, 0); + + return TRUE; +} + +boolean_t proc_thread_qos_remove_override(task_t task, thread_t thread, uint64_t tid) { + thread_t self = current_thread(); + int resource_count; + struct task_pend_token pend_token = {}; + + /* XXX move to thread mutex when thread policy does */ task_lock(task); + /* + * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference + * to the thread + */ + if (thread != THREAD_NULL) { + assert(task == thread->task); + } else { + if (tid == self->thread_id) { + thread = self; + } else { + thread = task_findtid(task, tid); + + if (thread == THREAD_NULL) { + KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE, + tid, 0, 0xdead, 0, 0); + task_unlock(task); + return FALSE; + } + } + } + + resource_count = --thread->usynch_override_contended_resource_count; + + KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START, + thread_tid(thread), resource_count, 0, 0, 0); + + if (0 == resource_count) { + thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED; + + task_policy_update_locked(task, thread, &pend_token); + + thread_reference(thread); + + task_unlock(task); + + task_policy_update_complete_unlocked(task, thread, &pend_token); + + thread_deallocate(thread); + } else if (0 > resource_count) { + // panic("usynch_override_contended_resource_count underflow for thread %p", thread); + task_unlock(task); + } else { + task_unlock(task); + } + + KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END, + 0, 0, 0, 0, 0); + + return TRUE; +} + +/* TODO: remove this variable when interactive daemon audit period is over */ +extern boolean_t ipc_importance_interactive_receiver; + +/* + * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process + * + * TODO: Make this function more table-driven instead of ad-hoc + */ +void +proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, + ipc_port_t * portwatch_ports, int portwatch_count) +{ + struct task_pend_token pend_token = {}; + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START, - proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), - apptype, 0); + (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START, + audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), + apptype, 0); switch (apptype) { case TASK_APPTYPE_APP_TAL: - /* TAL starts off enabled by default */ - task->requested_policy.t_tal_enabled = 1; - /* fall through */ - case TASK_APPTYPE_APP_DEFAULT: - case TASK_APPTYPE_DAEMON_INTERACTIVE: - task->requested_policy.t_apptype = apptype; + /* Apps become donors via the 'live-donor' flag instead of the static donor flag */ + task_importance_mark_donor(task, FALSE); + task_importance_mark_live_donor(task, TRUE); + task_importance_mark_receiver(task, FALSE); + /* Apps are de-nap recievers on desktop for suppression behaviors */ + task_importance_mark_denap_receiver(task, TRUE); + break; + case TASK_APPTYPE_DAEMON_INTERACTIVE: task_importance_mark_donor(task, TRUE); - /* Apps (and interactive daemons) are boost recievers on desktop for suppression behaviors */ - task_importance_mark_receiver(task, TRUE); + task_importance_mark_live_donor(task, FALSE); + + /* + * A boot arg controls whether interactive daemons are importance receivers. + * Normally, they are not. But for testing their behavior as an adaptive + * daemon, the boot-arg can be set. + * + * TODO: remove this when the interactive daemon audit period is over. + */ + task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver); + task_importance_mark_denap_receiver(task, FALSE); break; case TASK_APPTYPE_DAEMON_STANDARD: - task->requested_policy.t_apptype = apptype; - task_importance_mark_donor(task, TRUE); + task_importance_mark_live_donor(task, FALSE); task_importance_mark_receiver(task, FALSE); + task_importance_mark_denap_receiver(task, FALSE); break; case TASK_APPTYPE_DAEMON_ADAPTIVE: - task->requested_policy.t_apptype = apptype; - task_importance_mark_donor(task, FALSE); + task_importance_mark_live_donor(task, FALSE); task_importance_mark_receiver(task, TRUE); + task_importance_mark_denap_receiver(task, FALSE); break; case TASK_APPTYPE_DAEMON_BACKGROUND: - task->requested_policy.t_apptype = apptype; - task_importance_mark_donor(task, FALSE); + task_importance_mark_live_donor(task, FALSE); task_importance_mark_receiver(task, FALSE); + task_importance_mark_denap_receiver(task, FALSE); break; - default: - panic("invalid apptype %d", apptype); + case TASK_APPTYPE_NONE: break; } - task_policy_update_locked(task, THREAD_NULL); + if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) { + int portwatch_boosts = 0; + + for (int i = 0; i < portwatch_count; i++) { + ipc_port_t port = NULL; + + if ((port = portwatch_ports[i]) != NULL) { + int boost = 0; + task_add_importance_watchport(task, port, &boost); + portwatch_boosts += boost; + } + } + + if (portwatch_boosts > 0) { + task_importance_hold_internal_assertion(task, portwatch_boosts); + } + } + + task_lock(task); + + if (apptype == TASK_APPTYPE_APP_TAL) { + /* TAL starts off enabled by default */ + task->requested_policy.t_tal_enabled = 1; + } + + if (apptype != TASK_APPTYPE_NONE) { + task->requested_policy.t_apptype = apptype; + + } + + if (qos_clamp != THREAD_QOS_UNSPECIFIED) { + task->requested_policy.t_qos_clamp = qos_clamp; + } + + task_policy_update_locked(task, THREAD_NULL, &pend_token); + + task_unlock(task); + + /* Ensure the donor bit is updated to be in sync with the new live donor status */ + pend_token.tpt_update_live_donor = 1; + + task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END, + audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), + task_is_importance_receiver(task), 0); +} + +/* Set up the primordial thread's QoS */ +void +task_set_main_thread_qos(task_t task, thread_t main_thread) { + struct task_pend_token pend_token = {}; + + assert(main_thread->task == task); + + task_lock(task); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START, + audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), + main_thread->requested_policy.thrp_qos, 0); + + int primordial_qos = THREAD_QOS_UNSPECIFIED; + + int qos_clamp = task->requested_policy.t_qos_clamp; + + switch (task->requested_policy.t_apptype) { + case TASK_APPTYPE_APP_TAL: + case TASK_APPTYPE_APP_DEFAULT: + primordial_qos = THREAD_QOS_USER_INTERACTIVE; + break; + + case TASK_APPTYPE_DAEMON_INTERACTIVE: + case TASK_APPTYPE_DAEMON_STANDARD: + case TASK_APPTYPE_DAEMON_ADAPTIVE: + primordial_qos = THREAD_QOS_LEGACY; + break; + + case TASK_APPTYPE_DAEMON_BACKGROUND: + primordial_qos = THREAD_QOS_BACKGROUND; + break; + } + + if (qos_clamp != THREAD_QOS_UNSPECIFIED) { + if (primordial_qos != THREAD_QOS_UNSPECIFIED) { + primordial_qos = MIN(qos_clamp, primordial_qos); + } else { + primordial_qos = qos_clamp; + } + } + + main_thread->requested_policy.thrp_qos = primordial_qos; + + task_policy_update_locked(task, main_thread, &pend_token); task_unlock(task); - task_policy_update_complete_unlocked(task, THREAD_NULL); + task_policy_update_complete_unlocked(task, main_thread, &pend_token); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END, - proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), - task->imp_receiver, 0); + (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END, + audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), + primordial_qos, 0); } /* for process_policy to check before attempting to set */ @@ -1770,7 +2419,7 @@ proc_task_is_tal(task_t task) integer_t task_grab_latency_qos(task_t task) { - return task_qos_latency_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS)); + return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS)); } /* update the darwin background action state in the flags field for libproc */ @@ -1784,13 +2433,17 @@ proc_get_darwinbgstate(task_t task, uint32_t * flagsp) *flagsp |= PROC_FLAG_DARWINBG; + if (task->requested_policy.t_apptype == TASK_APPTYPE_APP_DEFAULT || + task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) + *flagsp |= PROC_FLAG_APPLICATION; + if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) *flagsp |= PROC_FLAG_ADAPTIVE; if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && task->requested_policy.t_boosted == 1) *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT; - if (task->imp_donor) + if (task_is_importance_donor(task)) *flagsp |= PROC_FLAG_IMPORTANCE_DONOR; if (task->effective_policy.t_sup_active) @@ -1807,32 +2460,107 @@ proc_get_thread_policy(thread_t thread, thread_policy_state_t info) task_lock(task); info->requested = (integer_t)task_requested_bitfield(task, thread); info->effective = (integer_t)task_effective_bitfield(task, thread); - info->pending = (integer_t)task_pending_bitfield(task, thread); + info->pending = 0; task_unlock(task); } +/* + * Tracepoint data... Reading the tracepoint data can be somewhat complicated. + * The current scheme packs as much data into a single tracepoint as it can. + * + * Each task/thread requested/effective structure is 64 bits in size. Any + * given tracepoint will emit either requested or effective data, but not both. + * + * A tracepoint may emit any of task, thread, or task & thread data. + * + * The type of data emitted varies with pointer size. Where possible, both + * task and thread data are emitted. In LP32 systems, the first and second + * halves of either the task or thread data is emitted. + * + * The code uses uintptr_t array indexes instead of high/low to avoid + * confusion WRT big vs little endian. + * + * The truth table for the tracepoint data functions is below, and has the + * following invariants: + * + * 1) task and thread are uintptr_t* + * 2) task may never be NULL + * + * + * LP32 LP64 + * trequested_0(task, NULL) task[0] task[0] + * trequested_1(task, NULL) task[1] NULL + * trequested_0(task, thread) thread[0] task[0] + * trequested_1(task, thread) thread[1] thread[0] + * + * Basically, you get a full task or thread on LP32, and both on LP64. + * + * The uintptr_t munging here is squicky enough to deserve a comment. + * + * The variables we are accessing are laid out in memory like this: + * + * [ LP64 uintptr_t 0 ] + * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ] + * + * 1 2 3 4 5 6 7 8 + * + */ -/* dump requested for tracepoint */ static uintptr_t -trequested(task_t task, thread_t thread) +trequested_0(task_t task, thread_t thread) { - return (uintptr_t) task_requested_bitfield(task, thread); + assert(task); + _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated"); + _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated"); + + uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy); + return raw[0]; } -/* dump effective for tracepoint */ static uintptr_t -teffective(task_t task, thread_t thread) +trequested_1(task_t task, thread_t thread) { - return (uintptr_t) task_effective_bitfield(task, thread); + assert(task); + _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated"); + _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated"); + +#if defined __LP64__ + return (thread == NULL) ? 0 : *(uintptr_t*)&thread->requested_policy; +#else + uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy); + return raw[1]; +#endif +} + +static uintptr_t +teffective_0(task_t task, thread_t thread) +{ + assert(task); + _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated"); + _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated"); + + uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy); + return raw[0]; } -/* dump pending for tracepoint */ static uintptr_t -tpending(task_t task, thread_t thread) +teffective_1(task_t task, thread_t thread) { - return (uintptr_t) task_pending_bitfield(task, thread); + assert(task); + _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated"); + _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated"); + +#if defined __LP64__ + return (thread == NULL) ? 0 : *(uintptr_t*)&thread->effective_policy; +#else + uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy); + return raw[1]; +#endif } +/* dump pending for tracepoint */ +static uint32_t tpending(task_pend_token_t pend_token) { return *(uint32_t*)(void*)(pend_token); } + uint64_t task_requested_bitfield(task_t task, thread_t thread) { @@ -1852,10 +2580,13 @@ task_requested_bitfield(task_t task, thread_t thread) bits |= (requested.th_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0); bits |= (requested.th_workq_bg ? POLICY_REQ_WORKQ_BG : 0); + if (thread != THREAD_NULL) { + bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0); + bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0); + } + bits |= (requested.t_boosted ? POLICY_REQ_BOOSTED : 0); bits |= (requested.t_tal_enabled ? POLICY_REQ_TAL_ENABLED : 0); - bits |= (requested.t_int_gpu_deny ? POLICY_REQ_INT_GPU_DENY : 0); - bits |= (requested.t_ext_gpu_deny ? POLICY_REQ_EXT_GPU_DENY : 0); bits |= (requested.t_apptype ? (((uint64_t)requested.t_apptype) << POLICY_REQ_APPTYPE_SHIFT) : 0); bits |= (requested.t_role ? (((uint64_t)requested.t_role) << POLICY_REQ_ROLE_SHIFT) : 0); @@ -1867,10 +2598,13 @@ task_requested_bitfield(task_t task, thread_t thread) bits |= (requested.t_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : 0); bits |= (requested.t_sup_cpu_limit ? POLICY_REQ_SUP_CPU_LIMIT : 0); bits |= (requested.t_sup_suspend ? POLICY_REQ_SUP_SUSPEND : 0); + bits |= (requested.t_sup_bg_sockets ? POLICY_REQ_SUP_BG_SOCKETS : 0); bits |= (requested.t_base_latency_qos ? (((uint64_t)requested.t_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0); bits |= (requested.t_over_latency_qos ? (((uint64_t)requested.t_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0); bits |= (requested.t_base_through_qos ? (((uint64_t)requested.t_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0); bits |= (requested.t_over_through_qos ? (((uint64_t)requested.t_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0); + bits |= (requested.t_sfi_managed ? POLICY_REQ_SFI_MANAGED : 0); + bits |= (requested.t_qos_clamp ? (((uint64_t)requested.t_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT) : 0); return bits; } @@ -1890,8 +2624,11 @@ task_effective_bitfield(task_t task, thread_t thread) bits |= (effective.all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0); bits |= (effective.new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0); bits |= (effective.bg_iotier ? (((uint64_t)effective.bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0); + bits |= (effective.qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : 0); + + if (thread != THREAD_NULL) + bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0); - bits |= (effective.t_gpu_deny ? POLICY_EFF_GPU_DENY : 0); bits |= (effective.t_tal_engaged ? POLICY_EFF_TAL_ENGAGED : 0); bits |= (effective.t_suspended ? POLICY_EFF_SUSPENDED : 0); bits |= (effective.t_watchers_bg ? POLICY_EFF_WATCHERS_BG : 0); @@ -1900,22 +2637,8 @@ task_effective_bitfield(task_t task, thread_t thread) bits |= (effective.t_role ? (((uint64_t)effective.t_role) << POLICY_EFF_ROLE_SHIFT) : 0); bits |= (effective.t_latency_qos ? (((uint64_t)effective.t_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0); bits |= (effective.t_through_qos ? (((uint64_t)effective.t_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0); - - return bits; -} - -uint64_t -task_pending_bitfield(task_t task, thread_t thread) -{ - uint64_t bits = 0; - struct task_pended_policy pended = - (thread == THREAD_NULL) ? task->pended_policy : thread->pended_policy; - - bits |= (pended.t_updating_policy ? POLICY_PEND_UPDATING : 0); - bits |= (pended.update_sockets ? POLICY_PEND_SOCKETS : 0); - - bits |= (pended.t_update_timers ? POLICY_PEND_TIMERS : 0); - bits |= (pended.t_update_watchers ? POLICY_PEND_WATCHERS : 0); + bits |= (effective.t_sfi_managed ? POLICY_EFF_SFI_MANAGED : 0); + bits |= (effective.t_qos_ceiling ? (((uint64_t)effective.t_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0); return bits; } @@ -1977,6 +2700,11 @@ proc_init_cpumon_params(void) } proc_max_cpumon_interval *= NSEC_PER_SEC; + + /* TEMPORARY boot arg to control App suppression */ + PE_parse_boot_argn("task_policy_suppression_disable", + &task_policy_suppression_disable, + sizeof(task_policy_suppression_disable)); } /* @@ -2183,7 +2911,7 @@ task_disable_cpumon(task_t task) { * Disable task-wide telemetry if it was ever enabled by the CPU usage * monitor's warning zone. */ - telemetry_task_ctl_locked(current_task(), TF_CPUMON_WARNING, 0); + telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0); #endif /* @@ -2412,11 +3140,46 @@ task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t pa * Routines for importance donation/inheritance/boosting */ +static void +task_importance_update_live_donor(task_t target_task) +{ +#if IMPORTANCE_INHERITANCE + + ipc_importance_task_t task_imp; + + task_imp = ipc_importance_for_task(target_task, FALSE); + if (IIT_NULL != task_imp) { + ipc_importance_task_update_live_donor(task_imp); + ipc_importance_task_release(task_imp); + } +#endif /* IMPORTANCE_INHERITANCE */ +} + void task_importance_mark_donor(task_t task, boolean_t donating) { #if IMPORTANCE_INHERITANCE - task->imp_donor = (donating ? 1 : 0); + ipc_importance_task_t task_imp; + + task_imp = ipc_importance_for_task(task, FALSE); + if (IIT_NULL != task_imp) { + ipc_importance_task_mark_donor(task_imp, donating); + ipc_importance_task_release(task_imp); + } +#endif /* IMPORTANCE_INHERITANCE */ +} + +void +task_importance_mark_live_donor(task_t task, boolean_t live_donating) +{ +#if IMPORTANCE_INHERITANCE + ipc_importance_task_t task_imp; + + task_imp = ipc_importance_for_task(task, FALSE); + if (IIT_NULL != task_imp) { + ipc_importance_task_mark_live_donor(task_imp, live_donating); + ipc_importance_task_release(task_imp); + } #endif /* IMPORTANCE_INHERITANCE */ } @@ -2424,38 +3187,59 @@ void task_importance_mark_receiver(task_t task, boolean_t receiving) { #if IMPORTANCE_INHERITANCE - if (receiving) { - assert(task->task_imp_assertcnt == 0); - task->imp_receiver = 1; /* task can receive importance boost */ - task->task_imp_assertcnt = 0; - task->task_imp_externcnt = 0; - } else { - if (task->task_imp_assertcnt != 0 || task->task_imp_externcnt != 0) - panic("disabling imp_receiver on task with pending boosts!"); + ipc_importance_task_t task_imp; - task->imp_receiver = 0; - task->task_imp_assertcnt = 0; - task->task_imp_externcnt = 0; + task_imp = ipc_importance_for_task(task, FALSE); + if (IIT_NULL != task_imp) { + ipc_importance_task_mark_receiver(task_imp, receiving); + ipc_importance_task_release(task_imp); } #endif /* IMPORTANCE_INHERITANCE */ } +void +task_importance_mark_denap_receiver(task_t task, boolean_t denap) +{ +#if IMPORTANCE_INHERITANCE + ipc_importance_task_t task_imp; + task_imp = ipc_importance_for_task(task, FALSE); + if (IIT_NULL != task_imp) { + ipc_importance_task_mark_denap_receiver(task_imp, denap); + ipc_importance_task_release(task_imp); + } +#endif /* IMPORTANCE_INHERITANCE */ +} + +void +task_importance_reset(__imp_only task_t task) +{ #if IMPORTANCE_INHERITANCE + ipc_importance_task_t task_imp; -static void -task_update_boost_locked(task_t task, boolean_t boost_active) + /* TODO: Lower importance downstream before disconnect */ + task_imp = task->task_imp_base; + ipc_importance_reset(task_imp, FALSE); + task_importance_update_live_donor(task); +#endif /* IMPORTANCE_INHERITANCE */ +} + +#if IMPORTANCE_INHERITANCE + +/* + * Sets the task boost bit to the provided value. Does NOT run the update function. + * + * Task lock must be held. + */ +void +task_set_boost_locked(task_t task, boolean_t boost_active) { #if IMPORTANCE_DEBUG KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START), - proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), 0, 0); + proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0); #endif - /* assert(boost_active ? task->requested_policy.t_boosted == 0 : task->requested_policy.t_boosted == 1); */ - - proc_set_task_policy_locked(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BOOST, boost_active); - - task_policy_update_locked(task, THREAD_NULL); + task->requested_policy.t_boosted = boost_active; #if IMPORTANCE_DEBUG if (boost_active == TRUE){ @@ -2465,10 +3249,23 @@ task_update_boost_locked(task_t task, boolean_t boost_active) } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END), proc_selfpid(), audit_token_pid_from_task(task), - trequested(task, THREAD_NULL), tpending(task, THREAD_NULL), 0); + trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0); #endif } +/* + * Sets the task boost bit to the provided value and applies the update. + * + * Task lock must be held. Must call update complete after unlocking the task. + */ +void +task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token) +{ + task_set_boost_locked(task, boost_active); + + task_policy_update_locked(task, THREAD_NULL, pend_token); +} + /* * Check if this task should donate importance. * @@ -2478,258 +3275,220 @@ task_update_boost_locked(task_t task, boolean_t boost_active) boolean_t task_is_importance_donor(task_t task) { - return (task->imp_donor == 1 || task->task_imp_assertcnt > 0) ? TRUE : FALSE; + if (task->task_imp_base == IIT_NULL) + return FALSE; + return ipc_importance_task_is_donor(task->task_imp_base); } /* - * This routine may be called without holding task lock - * since the value of imp_receiver can never be unset. + * Query the status of the task's donor mark. */ boolean_t -task_is_importance_receiver(task_t task) +task_is_marked_importance_donor(task_t task) { - return (task->imp_receiver) ? TRUE : FALSE; + if (task->task_imp_base == IIT_NULL) + return FALSE; + return ipc_importance_task_is_marked_donor(task->task_imp_base); } /* - * External importance assertions are managed by the process in userspace - * Internal importance assertions are the responsibility of the kernel - * Assertions are changed from internal to external via task_importance_externalize_assertion + * Query the status of the task's live donor and donor mark. */ - -int -task_importance_hold_internal_assertion(task_t target_task, uint32_t count) +boolean_t +task_is_marked_live_importance_donor(task_t task) { - int rval = 0; - - task_lock(target_task); - rval = task_importance_hold_assertion_locked(target_task, TASK_POLICY_INTERNAL, count); - task_unlock(target_task); + if (task->task_imp_base == IIT_NULL) + return FALSE; + return ipc_importance_task_is_marked_live_donor(task->task_imp_base); +} - task_policy_update_complete_unlocked(target_task, THREAD_NULL); - return(rval); +/* + * This routine may be called without holding task lock + * since the value of imp_receiver can never be unset. + */ +boolean_t +task_is_importance_receiver(task_t task) +{ + if (task->task_imp_base == IIT_NULL) + return FALSE; + return ipc_importance_task_is_marked_receiver(task->task_imp_base); } -int -task_importance_hold_external_assertion(task_t target_task, uint32_t count) +/* + * Query the task's receiver mark. + */ +boolean_t +task_is_marked_importance_receiver(task_t task) { - int rval = 0; - - task_lock(target_task); - rval = task_importance_hold_assertion_locked(target_task, TASK_POLICY_EXTERNAL, count); - task_unlock(target_task); - - task_policy_update_complete_unlocked(target_task, THREAD_NULL); - - return(rval); + if (task->task_imp_base == IIT_NULL) + return FALSE; + return ipc_importance_task_is_marked_receiver(task->task_imp_base); } -int -task_importance_drop_internal_assertion(task_t target_task, uint32_t count) +/* + * This routine may be called without holding task lock + * since the value of de-nap receiver can never be unset. + */ +boolean_t +task_is_importance_denap_receiver(task_t task) { - int rval = 0; - - task_lock(target_task); - rval = task_importance_drop_assertion_locked(target_task, TASK_POLICY_INTERNAL, count); - task_unlock(target_task); - - task_policy_update_complete_unlocked(target_task, THREAD_NULL); - - return(rval); + if (task->task_imp_base == IIT_NULL) + return FALSE; + return ipc_importance_task_is_denap_receiver(task->task_imp_base); } -int -task_importance_drop_external_assertion(task_t target_task, uint32_t count) +/* + * Query the task's de-nap receiver mark. + */ +boolean_t +task_is_marked_importance_denap_receiver(task_t task) { - int rval = 0; - - task_lock(target_task); - rval = task_importance_drop_assertion_locked(target_task, TASK_POLICY_EXTERNAL, count); - task_unlock(target_task); - - task_policy_update_complete_unlocked(target_task, THREAD_NULL); - - return(rval); + if (task->task_imp_base == IIT_NULL) + return FALSE; + return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base); } /* - * Returns EOVERFLOW if an external assertion is taken when not holding an external boost. + * This routine may be called without holding task lock + * since the value of imp_receiver can never be unset. */ -static int -task_importance_hold_assertion_locked(task_t target_task, int external, uint32_t count) +boolean_t +task_is_importance_receiver_type(task_t task) { - boolean_t apply_boost = FALSE; - int ret = 0; - - assert(target_task->imp_receiver != 0); - -#if IMPORTANCE_DEBUG - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_HOLD | external))) | DBG_FUNC_START, - proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); -#endif - - /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt); */ - - if (external == TASK_POLICY_EXTERNAL) { - if (target_task->task_imp_externcnt == 0) { - /* Only allowed to take a new boost assertion when holding an external boost */ - printf("BUG in process %s[%d]: it attempted to acquire a new boost assertion without holding an existing external assertion. " - "(%d total, %d external)\n", - proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task), - target_task->task_imp_assertcnt, target_task->task_imp_externcnt); - ret = EOVERFLOW; - count = 0; - } else { - target_task->task_imp_assertcnt += count; - target_task->task_imp_externcnt += count; - } - } else { - if (target_task->task_imp_assertcnt == 0) - apply_boost = TRUE; - target_task->task_imp_assertcnt += count; - } - - if (apply_boost == TRUE) - task_update_boost_locked(target_task, TRUE); - -#if IMPORTANCE_DEBUG - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_HOLD | external))) | DBG_FUNC_END, - proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); - DTRACE_BOOST6(receive_internal_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), task_t, current_task(), int, proc_selfpid(), int, count, int, target_task->task_imp_assertcnt); - if (external == TASK_POLICY_EXTERNAL){ - DTRACE_BOOST5(receive_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), int, proc_selfpid(), int, count, int, target_task->task_imp_externcnt); - } -#endif - return(ret); + if (task->task_imp_base == IIT_NULL) + return FALSE; + return (task_is_importance_receiver(task) || + task_is_importance_denap_receiver(task)); } - /* - * Returns EOVERFLOW if an external assertion is over-released. - * Panics if an internal assertion is over-released. + * External importance assertions are managed by the process in userspace + * Internal importance assertions are the responsibility of the kernel + * Assertions are changed from internal to external via task_importance_externalize_assertion */ -static int -task_importance_drop_assertion_locked(task_t target_task, int external, uint32_t count) + +int +task_importance_hold_watchport_assertion(task_t target_task, uint32_t count) { - int ret = 0; + ipc_importance_task_t task_imp; + kern_return_t ret; - assert(target_task->imp_receiver != 0); + /* must already have set up an importance */ + task_imp = target_task->task_imp_base; + assert(IIT_NULL != task_imp); -#if IMPORTANCE_DEBUG - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_DROP | external))) | DBG_FUNC_START, - proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); -#endif + ret = ipc_importance_task_hold_internal_assertion(task_imp, count); + return (KERN_SUCCESS != ret) ? ENOTSUP : 0; +} - /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt); */ +int +task_importance_hold_internal_assertion(task_t target_task, uint32_t count) +{ + ipc_importance_task_t task_imp; + kern_return_t ret; - if (external == TASK_POLICY_EXTERNAL) { - assert(count == 1); - if (count <= target_task->task_imp_externcnt) { - target_task->task_imp_externcnt -= count; - if (count <= target_task->task_imp_assertcnt) - target_task->task_imp_assertcnt -= count; - } else { - /* Process over-released its boost count */ - printf("BUG in process %s[%d]: over-released external boost assertions (%d total, %d external)\n", - proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task), - target_task->task_imp_assertcnt, target_task->task_imp_externcnt); - - /* TODO: If count > 1, we should clear out as many external assertions as there are left. */ - ret = EOVERFLOW; - count = 0; - } - } else { - if (count <= target_task->task_imp_assertcnt) { - target_task->task_imp_assertcnt -= count; - } else { - /* TODO: Turn this back into a panic */ - printf("Over-release of kernel-internal importance assertions for task %p (%s), dropping %d assertion(s) but task only has %d remaining (%d external).\n", - target_task, - (target_task->bsd_info == NULL) ? "" : proc_name_address(target_task->bsd_info), - count, - target_task->task_imp_assertcnt, - target_task->task_imp_externcnt); - count = 0; - } + /* may be first time, so allow for possible importance setup */ + task_imp = ipc_importance_for_task(target_task, FALSE); + if (IIT_NULL == task_imp) { + return EOVERFLOW; } + ret = ipc_importance_task_hold_internal_assertion(task_imp, count); + ipc_importance_task_release(task_imp); - /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt); */ + return (KERN_SUCCESS != ret) ? ENOTSUP : 0; +} - if (target_task->task_imp_assertcnt == 0 && ret == 0) - task_update_boost_locked(target_task, FALSE); +int +task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count) +{ + ipc_importance_task_t task_imp; + kern_return_t ret; -#if IMPORTANCE_DEBUG - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_DROP | external))) | DBG_FUNC_END, - proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); - if (external == TASK_POLICY_EXTERNAL) { - DTRACE_BOOST4(drop_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), int, count, int, target_task->task_imp_externcnt); + /* may be first time, so allow for possible importance setup */ + task_imp = ipc_importance_for_task(target_task, FALSE); + if (IIT_NULL == task_imp) { + return EOVERFLOW; } - DTRACE_BOOST4(drop_internal_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), int, count, int, target_task->task_imp_assertcnt); -#endif + ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count); + ipc_importance_task_release(task_imp); - return(ret); + return (KERN_SUCCESS != ret) ? ENOTSUP : 0; } -/* Transfer an assertion to userspace responsibility */ int -task_importance_externalize_assertion(task_t target_task, uint32_t count, __unused int sender_pid) +task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count) { - assert(target_task != TASK_NULL); - assert(target_task->imp_receiver != 0); - - task_lock(target_task); - -#if IMPORTANCE_DEBUG - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, IMP_EXTERN)) | DBG_FUNC_START, - proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); -#endif - - /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt + count); */ - - target_task->task_imp_externcnt += count; - -#if IMPORTANCE_DEBUG - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, IMP_EXTERN)) | DBG_FUNC_END, - proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); - DTRACE_BOOST5(receive_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), - int, sender_pid, int, count, int, target_task->task_imp_externcnt); -#endif /* IMPORTANCE_DEBUG */ - - task_unlock(target_task); - - return(0); + ipc_importance_task_t task_imp; + kern_return_t ret; + + /* must already have set up an importance */ + task_imp = target_task->task_imp_base; + if (IIT_NULL == task_imp) { + return EOVERFLOW; + } + ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count); + return (KERN_SUCCESS != ret) ? ENOTSUP : 0; } +int +task_importance_drop_internal_assertion(task_t target_task, uint32_t count) +{ + ipc_importance_task_t task_imp; + kern_return_t ret; + + /* must already have set up an importance */ + task_imp = target_task->task_imp_base; + if (IIT_NULL == task_imp) { + return EOVERFLOW; + } + ret = ipc_importance_task_drop_internal_assertion(target_task->task_imp_base, count); + return (KERN_SUCCESS != ret) ? ENOTSUP : 0; +} -#endif /* IMPORTANCE_INHERITANCE */ - -void -task_hold_multiple_assertion(__imp_only task_t task, __imp_only uint32_t count) +int +task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count) { -#if IMPORTANCE_INHERITANCE - assert(task->imp_receiver != 0); + ipc_importance_task_t task_imp; + kern_return_t ret; + + /* must already have set up an importance */ + task_imp = target_task->task_imp_base; + if (IIT_NULL == task_imp) { + return EOVERFLOW; + } + ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count); + return (KERN_SUCCESS != ret) ? EOVERFLOW : 0; +} - task_importance_hold_internal_assertion(task, count); -#endif /* IMPORTANCE_INHERITANCE */ +int +task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count) +{ + ipc_importance_task_t task_imp; + kern_return_t ret; + + /* must already have set up an importance */ + task_imp = target_task->task_imp_base; + if (IIT_NULL == task_imp) { + return EOVERFLOW; + } + ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count); + return (KERN_SUCCESS != ret) ? EOVERFLOW : 0; } -void -task_add_importance_watchport(__imp_only task_t task, __imp_only __impdebug_only int pid, __imp_only mach_port_t port, int *boostp) +static void +task_add_importance_watchport(task_t task, mach_port_t port, int *boostp) { int boost = 0; __impdebug_only int released_pid = 0; + __impdebug_only int pid = audit_token_pid_from_task(task); -#if IMPORTANCE_INHERITANCE - task_t release_imp_task = TASK_NULL; - - if (task->imp_receiver == 0) { - *boostp = boost; - return; - } + ipc_importance_task_t release_imp_task = IIT_NULL; if (IP_VALID(port) != 0) { + ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE); + ip_lock(port); /* @@ -2743,39 +3502,46 @@ task_add_importance_watchport(__imp_only task_t task, __imp_only __impdebug_only assert(port->ip_impdonation != 0); boost = port->ip_impcount; - if (port->ip_taskptr != 0) { + if (IIT_NULL != port->ip_imp_task) { /* * if this port is already bound to a task, * release the task reference and drop any * watchport-forwarded boosts */ release_imp_task = port->ip_imp_task; + port->ip_imp_task = IIT_NULL; } - /* mark the port is watching another task */ - port->ip_taskptr = 1; - port->ip_imp_task = task; - task_reference(task); + /* mark the port is watching another task (reference held in port->ip_imp_task) */ + if (ipc_importance_task_is_marked_receiver(new_imp_task)) { + port->ip_imp_task = new_imp_task; + new_imp_task = IIT_NULL; + } } ip_unlock(port); - if (release_imp_task != TASK_NULL) { + if (IIT_NULL != new_imp_task) { + ipc_importance_task_release(new_imp_task); + } + + if (IIT_NULL != release_imp_task) { if (boost > 0) - task_importance_drop_internal_assertion(release_imp_task, boost); - released_pid = audit_token_pid_from_task(release_imp_task); - task_deallocate(release_imp_task); + ipc_importance_task_drop_internal_assertion(release_imp_task, boost); + + // released_pid = audit_token_pid_from_task(release_imp_task); /* TODO: Need ref-safe way to get pid */ + ipc_importance_task_release(release_imp_task); } #if IMPORTANCE_DEBUG KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE, proc_selfpid(), pid, boost, released_pid, 0); #endif /* IMPORTANCE_DEBUG */ } -#endif /* IMPORTANCE_INHERITANCE */ *boostp = boost; return; } +#endif /* IMPORTANCE_INHERITANCE */ /* * Routines for VM to query task importance diff --git a/osfmk/kern/task_swap.c b/osfmk/kern/task_swap.c index 6f9f462cb..02afda05d 100644 --- a/osfmk/kern/task_swap.c +++ b/osfmk/kern/task_swap.c @@ -37,7 +37,6 @@ #include #include -#include #include #include #include diff --git a/osfmk/kern/telemetry.c b/osfmk/kern/telemetry.c index 15b025418..b392ac9ba 100644 --- a/osfmk/kern/telemetry.c +++ b/osfmk/kern/telemetry.c @@ -66,9 +66,18 @@ extern uint64_t proc_did_throttle(void *p); extern uint64_t get_dispatchqueue_serialno_offset_from_proc(void *p); extern int proc_selfpid(void); -void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags); +struct micro_snapshot_buffer { + vm_offset_t buffer; + uint32_t size; + uint32_t current_position; + uint32_t end_point; +}; + +void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags, struct micro_snapshot_buffer * current_buffer); +int telemetry_buffer_gather(user_addr_t buffer, uint32_t *length, boolean_t mark, struct micro_snapshot_buffer * current_buffer); #define TELEMETRY_DEFAULT_SAMPLE_RATE (1) /* 1 sample every 1 second */ +#define TELEMETRY_DEFAULT_WINDOW_BUFFER_SIZE (512*1024) /* Should hopefully provide 10 seconds worth of samples */ #define TELEMETRY_DEFAULT_BUFFER_SIZE (16*1024) #define TELEMETRY_MAX_BUFFER_SIZE (64*1024) @@ -77,8 +86,17 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags); uint32_t telemetry_sample_rate = 0; volatile boolean_t telemetry_needs_record = FALSE; +volatile boolean_t telemetry_windowed_record = FALSE; volatile boolean_t telemetry_needs_timer_arming_record = FALSE; +/* + * Tells the scheduler that we want it to invoke + * compute_telemetry_windowed(); it is still our responsibility + * to ensure that we do not panic if someone disables the window + * buffer immediately after the scheduler does so. + */ +volatile boolean_t telemetry_window_enabled = FALSE; + /* * If TRUE, record micro-stackshot samples for all tasks. * If FALSE, only sample tasks which are marked for telemetry. @@ -88,10 +106,19 @@ uint32_t telemetry_active_tasks = 0; // Number of tasks opted into telemetry uint32_t telemetry_timestamp = 0; -vm_offset_t telemetry_buffer = 0; -uint32_t telemetry_buffer_size = 0; -uint32_t telemetry_buffer_current_position = 0; -uint32_t telemetry_buffer_end_point = 0; // If we've wrapped, where does the last record end? +/* + * We have two buffers. The telemetry_buffer is responsible + * for timer samples and interrupt samples that are driven by + * compute_averages(). It will notify its client (if one + * exists) when it has enough data to be worth flushing. + * + * The window_buffer contains only interrupt_samples that are + * driven by the scheduler. Its intent is to provide a + * window of recent activity on the cpu(s). + */ +struct micro_snapshot_buffer telemetry_buffer = {0, 0, 0, 0}; +struct micro_snapshot_buffer window_buffer = {0, 0, 0, 0}; + int telemetry_bytes_since_last_mark = -1; // How much data since buf was last marked? int telemetry_buffer_notify_at = 0; @@ -110,18 +137,19 @@ void telemetry_init(void) lck_grp_init(&telemetry_lck_grp, "telemetry group", LCK_GRP_ATTR_NULL); lck_mtx_init(&telemetry_mtx, &telemetry_lck_grp, LCK_ATTR_NULL); - if (!PE_parse_boot_argn("telemetry_buffer_size", &telemetry_buffer_size, sizeof(telemetry_buffer_size))) { - telemetry_buffer_size = TELEMETRY_DEFAULT_BUFFER_SIZE; + if (!PE_parse_boot_argn("telemetry_buffer_size", &telemetry_buffer.size, sizeof(telemetry_buffer.size))) { + telemetry_buffer.size = TELEMETRY_DEFAULT_BUFFER_SIZE; } - if (telemetry_buffer_size > TELEMETRY_MAX_BUFFER_SIZE) - telemetry_buffer_size = TELEMETRY_MAX_BUFFER_SIZE; + if (telemetry_buffer.size > TELEMETRY_MAX_BUFFER_SIZE) + telemetry_buffer.size = TELEMETRY_MAX_BUFFER_SIZE; - ret = kmem_alloc(kernel_map, &telemetry_buffer, telemetry_buffer_size); + ret = kmem_alloc(kernel_map, &telemetry_buffer.buffer, telemetry_buffer.size); if (ret != KERN_SUCCESS) { kprintf("Telemetry: Allocation failed: %d\n", ret); return; } + bzero((void *) telemetry_buffer.buffer, telemetry_buffer.size); if (!PE_parse_boot_argn("telemetry_notification_leeway", &telemetry_notification_leeway, sizeof(telemetry_notification_leeway))) { /* @@ -129,12 +157,12 @@ void telemetry_init(void) */ telemetry_notification_leeway = TELEMETRY_DEFAULT_NOTIFY_LEEWAY; } - if (telemetry_notification_leeway >= telemetry_buffer_size) { + if (telemetry_notification_leeway >= telemetry_buffer.size) { printf("telemetry: nonsensical telemetry_notification_leeway boot-arg %d changed to %d\n", telemetry_notification_leeway, TELEMETRY_DEFAULT_NOTIFY_LEEWAY); telemetry_notification_leeway = TELEMETRY_DEFAULT_NOTIFY_LEEWAY; } - telemetry_buffer_notify_at = telemetry_buffer_size - telemetry_notification_leeway; + telemetry_buffer_notify_at = telemetry_buffer.size - telemetry_notification_leeway; if (!PE_parse_boot_argn("telemetry_sample_rate", &telemetry_sample_rate, sizeof(telemetry_sample_rate))) { telemetry_sample_rate = TELEMETRY_DEFAULT_SAMPLE_RATE; @@ -222,6 +250,83 @@ telemetry_task_ctl_locked(task_t task, uint32_t reasons, int enable_disable) } } +/* + * Enable the window_buffer, and do any associated setup. + */ +kern_return_t +telemetry_enable_window(void) +{ + kern_return_t ret = KERN_SUCCESS; + vm_offset_t kern_buffer = 0; + vm_size_t kern_buffer_size = TELEMETRY_DEFAULT_WINDOW_BUFFER_SIZE; + + /* + * We have no guarantee we won't allocate the buffer, take + * the lock, and then discover someone beat us to the punch, + * but we would prefer to avoid blocking while holding the + * lock. + */ + ret = kmem_alloc(kernel_map, &kern_buffer, kern_buffer_size); + + TELEMETRY_LOCK(); + + if (!window_buffer.buffer) { + if (ret == KERN_SUCCESS) { + /* No existing buffer was found, so... */ + window_buffer.end_point = 0; + window_buffer.current_position = 0; + + /* Hand off the buffer, and... */ + window_buffer.size = (uint32_t) kern_buffer_size; + window_buffer.buffer = kern_buffer; + kern_buffer = 0; + kern_buffer_size = 0; + bzero((void *) window_buffer.buffer, window_buffer.size); + + /* Let the scheduler know it should drive windowed samples */ + telemetry_window_enabled = TRUE; + } + } else { + /* We already have a buffer, so we have "succeeded" */ + ret = KERN_SUCCESS; + } + + TELEMETRY_UNLOCK(); + + if (kern_buffer) + kmem_free(kernel_map, kern_buffer, kern_buffer_size); + + return ret; +} + +/* + * Disable the window_buffer, and do any associated teardown. + */ +void +telemetry_disable_window(void) +{ + vm_offset_t kern_buffer = 0; + vm_size_t kern_buffer_size = 0; + + TELEMETRY_LOCK(); + + if (window_buffer.buffer) { + /* We have a window buffer, so tear it down */ + telemetry_window_enabled = FALSE; + kern_buffer = window_buffer.buffer; + kern_buffer_size = window_buffer.size; + window_buffer.buffer = 0; + window_buffer.size = 0; + window_buffer.current_position = 0; + window_buffer.end_point = 0; + } + + TELEMETRY_UNLOCK(); + + if (kern_buffer) + kmem_free(kernel_map, kern_buffer, kern_buffer_size); +} + /* * Determine if the current thread is eligible for telemetry: * @@ -252,7 +357,7 @@ int telemetry_timer_event(__unused uint64_t deadline, __unused uint64_t interval { if (telemetry_needs_timer_arming_record == TRUE) { telemetry_needs_timer_arming_record = FALSE; - telemetry_take_sample(current_thread(), kTimerArmingRecord | kUserMode); + telemetry_take_sample(current_thread(), kTimerArmingRecord | kUserMode, &telemetry_buffer); } return (0); @@ -264,6 +369,7 @@ int telemetry_timer_event(__unused uint64_t deadline, __unused uint64_t interval */ void telemetry_mark_curthread(boolean_t interrupted_userspace) { + uint32_t ast_bits = 0; thread_t thread = current_thread(); /* @@ -274,8 +380,15 @@ void telemetry_mark_curthread(boolean_t interrupted_userspace) return; } + ast_bits |= (interrupted_userspace ? AST_TELEMETRY_USER : AST_TELEMETRY_KERNEL); + + if (telemetry_windowed_record) { + ast_bits |= AST_TELEMETRY_WINDOWED; + } + + telemetry_windowed_record = FALSE; telemetry_needs_record = FALSE; - thread_ast_set(thread, interrupted_userspace ? AST_TELEMETRY_USER : AST_TELEMETRY_KERNEL); + thread_ast_set(thread, ast_bits); ast_propagate(thread->ast); } @@ -283,16 +396,34 @@ void compute_telemetry(void *arg __unused) { if (telemetry_sample_all_tasks || (telemetry_active_tasks > 0)) { if ((++telemetry_timestamp) % telemetry_sample_rate == 0) { - /* - * To avoid overloading the system with telemetry ASTs, make - * sure we don't add more requests while existing ones - * are in-flight. - */ - if (TELEMETRY_TRY_SPIN_LOCK()) { - telemetry_needs_record = TRUE; - telemetry_needs_timer_arming_record = TRUE; - TELEMETRY_UNLOCK(); - } + telemetry_needs_record = TRUE; + telemetry_needs_timer_arming_record = TRUE; + } + } +} + +void compute_telemetry_windowed(void) +{ + if (telemetry_sample_all_tasks || (telemetry_active_tasks > 0)) { + /* + * Due to the relationship between the two fields here, + * a request for a windowed record will "squash" a + * request for a regular interrupt record. We hedge + * against this by doing a quick check for an existing + * request. compute_telemetry doesn't hedge because + * a regular request cannot squash a windowed request + * (due to the implementation). + * + * If we really want to do this properly, we could make + * telemetry_needs_record a bitfield, and process one + * request per telemetry_mark_curthread... but that + * would be more expensive (atomics). This should be + * robust enough for now (although it biases in favor + * of the regular records). + */ + if (!telemetry_needs_record) { + telemetry_needs_record = TRUE; + telemetry_windowed_record = TRUE; } } } @@ -315,17 +446,21 @@ telemetry_notify_user(void) telemetry_notification(user_port, flags); } -void telemetry_ast(thread_t thread, boolean_t interrupted_userspace) +void telemetry_ast(thread_t thread, boolean_t interrupted_userspace, boolean_t is_windowed) { uint8_t microsnapshot_flags = kInterruptRecord; if (interrupted_userspace) microsnapshot_flags |= kUserMode; - telemetry_take_sample(thread, microsnapshot_flags); + if (is_windowed) { + telemetry_take_sample(thread, microsnapshot_flags, &window_buffer); + } else { + telemetry_take_sample(thread, microsnapshot_flags, &telemetry_buffer); + } } -void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags) +void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags, struct micro_snapshot_buffer * current_buffer) { task_t task; void *p; @@ -349,8 +484,25 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags) if ((task == TASK_NULL) || (task == kernel_task)) return; + /* + * To avoid overloading the system with telemetry requests, make + * sure we don't add more requests while existing ones are + * in-flight. Attempt this by checking if we can grab the lock. + * + * This concerns me a little; this working as intended is + * contingent on the workload being done in the context of the + * telemetry lock being the expensive part of telemetry. This + * includes populating the buffer and the client gathering it, + * but excludes the copyin overhead. + */ + if (!TELEMETRY_TRY_SPIN_LOCK()) + return; + + TELEMETRY_UNLOCK(); + /* telemetry_XXX accessed outside of lock for instrumentation only */ - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_RECORD) | DBG_FUNC_START, microsnapshot_flags, telemetry_bytes_since_last_mark, 0, 0, 0); + /* TODO */ + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_RECORD) | DBG_FUNC_START, microsnapshot_flags, telemetry_bytes_since_last_mark, 0, 0, (&telemetry_buffer != current_buffer)); p = get_bsdtask_info(task); @@ -473,6 +625,14 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags) TELEMETRY_LOCK(); + /* + * For the benefit of the window buffer; if our buffer is not backed by anything, + * then we cannot take the sample. Meant to allow us to deallocate the window + * buffer if it is disabled. + */ + if (!current_buffer->buffer) + goto cancel_sample; + /* * We do the bulk of the operation under the telemetry lock, on assumption that * any page faults during execution will not cause another AST_TELEMETRY_ALL @@ -483,15 +643,15 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags) copytobuffer: - current_record_start = telemetry_buffer_current_position; + current_record_start = current_buffer->current_position; - if ((telemetry_buffer_size - telemetry_buffer_current_position) < sizeof(struct micro_snapshot)) { + if ((current_buffer->size - current_buffer->current_position) < sizeof(struct micro_snapshot)) { /* * We can't fit a record in the space available, so wrap around to the beginning. * Save the current position as the known end point of valid data. */ - telemetry_buffer_end_point = current_record_start; - telemetry_buffer_current_position = 0; + current_buffer->end_point = current_record_start; + current_buffer->current_position = 0; if (current_record_start == 0) { /* This sample is too large to fit in the buffer even when we started at 0, so skip it */ goto cancel_sample; @@ -499,7 +659,7 @@ copytobuffer: goto copytobuffer; } - msnap = (struct micro_snapshot *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position); + msnap = (struct micro_snapshot *)(uintptr_t)(current_buffer->buffer + current_buffer->current_position); msnap->snapshot_magic = STACKSHOT_MICRO_SNAPSHOT_MAGIC; msnap->ms_flags = microsnapshot_flags; msnap->ms_opaque_flags = 0; /* namespace managed by userspace */ @@ -507,11 +667,11 @@ copytobuffer: msnap->ms_time = secs; msnap->ms_time_microsecs = usecs; - telemetry_buffer_current_position += sizeof(struct micro_snapshot); + current_buffer->current_position += sizeof(struct micro_snapshot); - if ((telemetry_buffer_size - telemetry_buffer_current_position) < sizeof(struct task_snapshot)) { - telemetry_buffer_end_point = current_record_start; - telemetry_buffer_current_position = 0; + if ((current_buffer->size - current_buffer->current_position) < sizeof(struct task_snapshot)) { + current_buffer->end_point = current_record_start; + current_buffer->current_position = 0; if (current_record_start == 0) { /* This sample is too large to fit in the buffer even when we started at 0, so skip it */ goto cancel_sample; @@ -519,7 +679,7 @@ copytobuffer: goto copytobuffer; } - tsnap = (struct task_snapshot *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position); + tsnap = (struct task_snapshot *)(uintptr_t)(current_buffer->buffer + current_buffer->current_position); bzero(tsnap, sizeof(*tsnap)); tsnap->snapshot_magic = STACKSHOT_TASK_SNAPSHOT_MAGIC; tsnap->pid = proc_pid(p); @@ -575,15 +735,15 @@ copytobuffer: bcopy(shared_cache_header.uuid, tsnap->shared_cache_identifier, sizeof (shared_cache_header.uuid)); } - telemetry_buffer_current_position += sizeof(struct task_snapshot); + current_buffer->current_position += sizeof(struct task_snapshot); /* * Directly after the task snapshot, place the array of UUID's corresponding to the binaries * used by this task. */ - if ((telemetry_buffer_size - telemetry_buffer_current_position) < uuid_info_array_size) { - telemetry_buffer_end_point = current_record_start; - telemetry_buffer_current_position = 0; + if ((current_buffer->size - current_buffer->current_position) < uuid_info_array_size) { + current_buffer->end_point = current_record_start; + current_buffer->current_position = 0; if (current_record_start == 0) { /* This sample is too large to fit in the buffer even when we started at 0, so skip it */ goto cancel_sample; @@ -595,20 +755,20 @@ copytobuffer: * Copy the UUID info array into our sample. */ if (uuid_info_array_size > 0) { - bcopy(uuid_info_array, (char *)(telemetry_buffer + telemetry_buffer_current_position), uuid_info_array_size); + bcopy(uuid_info_array, (char *)(current_buffer->buffer + current_buffer->current_position), uuid_info_array_size); tsnap->nloadinfos = uuid_info_count; } - telemetry_buffer_current_position += uuid_info_array_size; + current_buffer->current_position += uuid_info_array_size; /* * After the task snapshot & list of binary UUIDs, we place a thread snapshot. */ - if ((telemetry_buffer_size - telemetry_buffer_current_position) < sizeof(struct thread_snapshot)) { + if ((current_buffer->size - current_buffer->current_position) < sizeof(struct thread_snapshot)) { /* wrap and overwrite */ - telemetry_buffer_end_point = current_record_start; - telemetry_buffer_current_position = 0; + current_buffer->end_point = current_record_start; + current_buffer->current_position = 0; if (current_record_start == 0) { /* This sample is too large to fit in the buffer even when we started at 0, so skip it */ goto cancel_sample; @@ -616,7 +776,7 @@ copytobuffer: goto copytobuffer; } - thsnap = (struct thread_snapshot *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position); + thsnap = (struct thread_snapshot *)(uintptr_t)(current_buffer->buffer + current_buffer->current_position); bzero(thsnap, sizeof(*thsnap)); thsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC; @@ -626,6 +786,7 @@ copytobuffer: thsnap->sched_pri = thread->sched_pri; thsnap->sched_flags = thread->sched_flags; thsnap->ss_flags |= kStacksPCOnly; + thsnap->ts_qos = thread->effective_policy.thep_qos; if (thread->effective_policy.darwinbg) { thsnap->ss_flags |= kThreadDarwinBG; @@ -642,16 +803,16 @@ copytobuffer: thsnap->system_time = 0; } - telemetry_buffer_current_position += sizeof(struct thread_snapshot); + current_buffer->current_position += sizeof(struct thread_snapshot); /* * If this thread has a dispatch queue serial number, include it here. */ if (dqserialnum_valid) { - if ((telemetry_buffer_size - telemetry_buffer_current_position) < sizeof(dqserialnum)) { + if ((current_buffer->size - current_buffer->current_position) < sizeof(dqserialnum)) { /* wrap and overwrite */ - telemetry_buffer_end_point = current_record_start; - telemetry_buffer_current_position = 0; + current_buffer->end_point = current_record_start; + current_buffer->current_position = 0; if (current_record_start == 0) { /* This sample is too large to fit in the buffer even when we started at 0, so skip it */ goto cancel_sample; @@ -660,8 +821,8 @@ copytobuffer: } thsnap->ss_flags |= kHasDispatchSerial; - bcopy(&dqserialnum, (char *)telemetry_buffer + telemetry_buffer_current_position, sizeof (dqserialnum)); - telemetry_buffer_current_position += sizeof (dqserialnum); + bcopy(&dqserialnum, (char *)current_buffer->buffer + current_buffer->current_position, sizeof (dqserialnum)); + current_buffer->current_position += sizeof (dqserialnum); } if (task_has_64BitAddr(task)) { @@ -677,9 +838,9 @@ copytobuffer: * If we can't fit this entire stacktrace then cancel this record, wrap to the beginning, * and start again there so that we always store a full record. */ - if ((telemetry_buffer_size - telemetry_buffer_current_position)/framesize < btcount) { - telemetry_buffer_end_point = current_record_start; - telemetry_buffer_current_position = 0; + if ((current_buffer->size - current_buffer->current_position)/framesize < btcount) { + current_buffer->end_point = current_record_start; + current_buffer->current_position = 0; if (current_record_start == 0) { /* This sample is too large to fit in the buffer even when we started at 0, so skip it */ goto cancel_sample; @@ -687,36 +848,42 @@ copytobuffer: goto copytobuffer; } - for (bti=0; bti < btcount; bti++, telemetry_buffer_current_position += framesize) { + for (bti=0; bti < btcount; bti++, current_buffer->current_position += framesize) { if (framesize == 8) { - *(uint64_t *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position) = cs.frames[bti]; + *(uint64_t *)(uintptr_t)(current_buffer->buffer + current_buffer->current_position) = cs.frames[bti]; } else { - *(uint32_t *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position) = (uint32_t)cs.frames[bti]; + *(uint32_t *)(uintptr_t)(current_buffer->buffer + current_buffer->current_position) = (uint32_t)cs.frames[bti]; } } - if (telemetry_buffer_end_point < telemetry_buffer_current_position) { + if (current_buffer->end_point < current_buffer->current_position) { /* * Each time the cursor wraps around to the beginning, we leave a * differing amount of unused space at the end of the buffer. Make * sure the cursor pushes the end point in case we're making use of * more of the buffer than we did the last time we wrapped. */ - telemetry_buffer_end_point = telemetry_buffer_current_position; + current_buffer->end_point = current_buffer->current_position; } thsnap->nuser_frames = btcount; - telemetry_bytes_since_last_mark += (telemetry_buffer_current_position - current_record_start); - if (telemetry_bytes_since_last_mark > telemetry_buffer_notify_at) { - notify = TRUE; + /* + * Now THIS is a hack. + */ + if (current_buffer == &telemetry_buffer) { + telemetry_bytes_since_last_mark += (current_buffer->current_position - current_record_start); + if (telemetry_bytes_since_last_mark > telemetry_buffer_notify_at) { + notify = TRUE; + } } cancel_sample: TELEMETRY_UNLOCK(); - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_RECORD) | DBG_FUNC_END, notify, telemetry_bytes_since_last_mark, telemetry_buffer_current_position, telemetry_buffer_end_point, 0); + /* TODO */ + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_RECORD) | DBG_FUNC_END, notify, telemetry_bytes_since_last_mark, current_buffer->current_position, current_buffer->end_point, (&telemetry_buffer != current_buffer)); if (notify) { telemetry_notify_user(); @@ -751,20 +918,31 @@ log_telemetry_output(vm_offset_t buf, uint32_t pos, uint32_t sz) #endif int telemetry_gather(user_addr_t buffer, uint32_t *length, boolean_t mark) +{ + return telemetry_buffer_gather(buffer, length, mark, &telemetry_buffer); +} + +int telemetry_gather_windowed(user_addr_t buffer, uint32_t *length) +{ + return telemetry_buffer_gather(buffer, length, 0, &window_buffer); +} + +int telemetry_buffer_gather(user_addr_t buffer, uint32_t *length, boolean_t mark, struct micro_snapshot_buffer * current_buffer) { int result = 0; uint32_t oldest_record_offset; - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_GATHER) | DBG_FUNC_START, mark, telemetry_bytes_since_last_mark, 0, 0, 0); + /* TODO */ + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_GATHER) | DBG_FUNC_START, mark, telemetry_bytes_since_last_mark, 0, 0, (&telemetry_buffer != current_buffer)); TELEMETRY_LOCK(); - if (telemetry_buffer == 0) { + if (current_buffer->buffer == 0) { *length = 0; goto out; } - if (*length < telemetry_buffer_size) { + if (*length < current_buffer->size) { result = KERN_NO_SPACE; goto out; } @@ -773,12 +951,12 @@ int telemetry_gather(user_addr_t buffer, uint32_t *length, boolean_t mark) * Copy the ring buffer out to userland in order sorted by time: least recent to most recent. * First, we need to search forward from the cursor to find the oldest record in our buffer. */ - oldest_record_offset = telemetry_buffer_current_position; + oldest_record_offset = current_buffer->current_position; do { - if ((oldest_record_offset == telemetry_buffer_size) || - (oldest_record_offset == telemetry_buffer_end_point)) { + if (((oldest_record_offset + sizeof(uint32_t)) > current_buffer->size) || + ((oldest_record_offset + sizeof(uint32_t)) > current_buffer->end_point)) { - if (*(uint32_t *)(uintptr_t)(telemetry_buffer) == 0) { + if (*(uint32_t *)(uintptr_t)(current_buffer->buffer) == 0) { /* * There is no magic number at the start of the buffer, which means * it's empty; nothing to see here yet. @@ -793,11 +971,11 @@ int telemetry_gather(user_addr_t buffer, uint32_t *length, boolean_t mark) */ oldest_record_offset = 0; - assert(*(uint32_t *)(uintptr_t)(telemetry_buffer) == STACKSHOT_MICRO_SNAPSHOT_MAGIC); + assert(*(uint32_t *)(uintptr_t)(current_buffer->buffer) == STACKSHOT_MICRO_SNAPSHOT_MAGIC); break; } - if (*(uint32_t *)(uintptr_t)(telemetry_buffer + oldest_record_offset) == STACKSHOT_MICRO_SNAPSHOT_MAGIC) + if (*(uint32_t *)(uintptr_t)(current_buffer->buffer + oldest_record_offset) == STACKSHOT_MICRO_SNAPSHOT_MAGIC) break; /* @@ -805,7 +983,7 @@ int telemetry_gather(user_addr_t buffer, uint32_t *length, boolean_t mark) * byte offset. */ oldest_record_offset++; - } while (oldest_record_offset != telemetry_buffer_current_position); + } while (oldest_record_offset != current_buffer->current_position); /* * If needed, copyout in two chunks: from the oldest record to the end of the buffer, and then @@ -813,28 +991,28 @@ int telemetry_gather(user_addr_t buffer, uint32_t *length, boolean_t mark) */ if (oldest_record_offset != 0) { #if TELEMETRY_DEBUG - log_telemetry_output(telemetry_buffer, oldest_record_offset, - telemetry_buffer_end_point - oldest_record_offset); + log_telemetry_output(current_buffer->buffer, oldest_record_offset, + current_buffer->end_point - oldest_record_offset); #endif - if ((result = copyout((void *)(telemetry_buffer + oldest_record_offset), buffer, - telemetry_buffer_end_point - oldest_record_offset)) != 0) { + if ((result = copyout((void *)(current_buffer->buffer + oldest_record_offset), buffer, + current_buffer->end_point - oldest_record_offset)) != 0) { *length = 0; goto out; } - *length = telemetry_buffer_end_point - oldest_record_offset; + *length = current_buffer->end_point - oldest_record_offset; } else { *length = 0; } #if TELEMETRY_DEBUG - log_telemetry_output(telemetry_buffer, 0, telemetry_buffer_current_position); + log_telemetry_output(current_buffer->buffer, 0, current_buffer->current_position); #endif - if ((result = copyout((void *)telemetry_buffer, buffer + *length, - telemetry_buffer_current_position)) != 0) { + if ((result = copyout((void *)current_buffer->buffer, buffer + *length, + current_buffer->current_position)) != 0) { *length = 0; goto out; } - *length += (uint32_t)telemetry_buffer_current_position; + *length += (uint32_t)current_buffer->current_position; out: @@ -844,7 +1022,7 @@ out: TELEMETRY_UNLOCK(); - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_GATHER) | DBG_FUNC_END, telemetry_buffer_current_position, *length, telemetry_buffer_end_point, 0, 0); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_GATHER) | DBG_FUNC_END, current_buffer->current_position, *length, current_buffer->end_point, 0, (&telemetry_buffer != current_buffer)); return (result); } @@ -870,6 +1048,10 @@ out: * currently running process and takes a stackshot only if the requested process * is on-core (which makes it unsuitable for MP systems). * + * Trigger Events + * + * The boot-arg "bootprofile_type=boot" starts the timer during early boot. Using + * "wake" starts the timer at AP wake from suspend-to-RAM. */ #define BOOTPROFILE_MAX_BUFFER_SIZE (64*1024*1024) /* see also COPYSIZELIMIT_PANIC */ @@ -886,6 +1068,13 @@ char bootprofile_proc_name[17]; lck_grp_t bootprofile_lck_grp; lck_mtx_t bootprofile_mtx; +enum { + kBootProfileDisabled = 0, + kBootProfileStartTimerAtBoot, + kBootProfileStartTimerAtWake +} bootprofile_type = kBootProfileDisabled; + + static timer_call_data_t bootprofile_timer_call_entry; #define BOOTPROFILE_LOCK() do { lck_mtx_lock(&bootprofile_mtx); } while(0) @@ -902,6 +1091,7 @@ stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint32_t flags, un void bootprofile_init(void) { kern_return_t ret; + char type[32]; lck_grp_init(&bootprofile_lck_grp, "bootprofile group", LCK_GRP_ATTR_NULL); lck_mtx_init(&bootprofile_mtx, &bootprofile_lck_grp, LCK_ATTR_NULL); @@ -922,10 +1112,22 @@ void bootprofile_init(void) bootprofile_proc_name[0] = '\0'; } + if (PE_parse_boot_argn("bootprofile_type", type, sizeof(type))) { + if (0 == strcmp(type, "boot")) { + bootprofile_type = kBootProfileStartTimerAtBoot; + } else if (0 == strcmp(type, "wake")) { + bootprofile_type = kBootProfileStartTimerAtWake; + } else { + bootprofile_type = kBootProfileDisabled; + } + } else { + bootprofile_type = kBootProfileDisabled; + } + clock_interval_to_absolutetime_interval(bootprofile_interval_ms, NSEC_PER_MSEC, &bootprofile_interval_abs); /* Both boot args must be set to enable */ - if ((bootprofile_buffer_size == 0) || (bootprofile_interval_abs == 0)) { + if ((bootprofile_type == kBootProfileDisabled) || (bootprofile_buffer_size == 0) || (bootprofile_interval_abs == 0)) { return; } @@ -934,22 +1136,41 @@ void bootprofile_init(void) kprintf("Boot profile: Allocation failed: %d\n", ret); return; } + bzero((void *) bootprofile_buffer, bootprofile_buffer_size); - kprintf("Boot profile: Sampling %s once per %u ms\n", bootprofile_all_procs ? "all procs" : bootprofile_proc_name, bootprofile_interval_ms); + kprintf("Boot profile: Sampling %s once per %u ms at %s\n", bootprofile_all_procs ? "all procs" : bootprofile_proc_name, bootprofile_interval_ms, + bootprofile_type == kBootProfileStartTimerAtBoot ? "boot" : (bootprofile_type == kBootProfileStartTimerAtWake ? "wake" : "unknown")); timer_call_setup(&bootprofile_timer_call_entry, bootprofile_timer_call, NULL); - bootprofile_next_deadline = mach_absolute_time() + bootprofile_interval_abs; - timer_call_enter_with_leeway(&bootprofile_timer_call_entry, - NULL, - bootprofile_next_deadline, - 0, - TIMER_CALL_SYS_NORMAL, - FALSE); + if (bootprofile_type == kBootProfileStartTimerAtBoot) { + bootprofile_next_deadline = mach_absolute_time() + bootprofile_interval_abs; + timer_call_enter_with_leeway(&bootprofile_timer_call_entry, + NULL, + bootprofile_next_deadline, + 0, + TIMER_CALL_SYS_NORMAL, + FALSE); + } } +void +bootprofile_wake_from_sleep(void) +{ + if (bootprofile_type == kBootProfileStartTimerAtWake) { + bootprofile_next_deadline = mach_absolute_time() + bootprofile_interval_abs; + timer_call_enter_with_leeway(&bootprofile_timer_call_entry, + NULL, + bootprofile_next_deadline, + 0, + TIMER_CALL_SYS_NORMAL, + FALSE); + } +} + + static void bootprofile_timer_call( timer_call_param_t param0 __unused, timer_call_param_t param1 __unused) diff --git a/osfmk/kern/telemetry.h b/osfmk/kern/telemetry.h index fe5cc6ca0..beca74be2 100644 --- a/osfmk/kern/telemetry.h +++ b/osfmk/kern/telemetry.h @@ -30,18 +30,24 @@ #define _KERNEL_TELEMETRY_H_ #include +#include #include #include +__BEGIN_DECLS + extern volatile boolean_t telemetry_needs_record; +extern volatile boolean_t telemetry_window_enabled; extern void telemetry_init(void); extern void compute_telemetry(void *); +extern void compute_telemetry_windowed(void); -extern void telemetry_ast(thread_t, boolean_t interrupted_userspace); +extern void telemetry_ast(thread_t, boolean_t interrupted_userspace, boolean_t is_windowed); extern int telemetry_gather(user_addr_t buffer, uint32_t *length, boolean_t mark); +extern int telemetry_gather_windowed(user_addr_t buffer, uint32_t *length); extern void telemetry_mark_curthread(boolean_t interrupted_userspace); @@ -49,11 +55,20 @@ extern void telemetry_task_ctl(task_t task, uint32_t reason, int enable_disable) extern void telemetry_task_ctl_locked(task_t task, uint32_t reason, int enable_disable); extern void telemetry_global_ctl(int enable_disable); +extern kern_return_t telemetry_enable_window(void); +extern void telemetry_disable_window(void); + extern int telemetry_timer_event(uint64_t deadline, uint64_t interval, uint64_t leeway); #define TELEMETRY_CMD_TIMER_EVENT 1 +#define TELEMETRY_CMD_VOUCHER_NAME 2 +#define TELEMETRY_CMD_VOUCHER_STAIN TELEMETRY_CMD_VOUCHER_NAME + extern void bootprofile_init(void); +extern void bootprofile_wake_from_sleep(void); extern int bootprofile_gather(user_addr_t buffer, uint32_t *length); +__END_DECLS + #endif /* _KERNEL_TELEMETRY_H_ */ diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 6f6fd6ff9..f8556bbab 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -123,6 +123,7 @@ #include #include +#include #include #include @@ -138,6 +139,7 @@ #include #include #include +#include static struct zone *thread_zone; static lck_grp_attr_t thread_lck_grp_attr; @@ -169,13 +171,17 @@ extern int debug_task; int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */ int task_threadmax = CONFIG_THREAD_MAX; -static uint64_t thread_unique_id = 0; +static uint64_t thread_unique_id = 100; struct _thread_ledger_indices thread_ledgers = { -1 }; static ledger_template_t thread_ledger_template = NULL; void init_thread_ledgers(void); int task_disable_cpumon(task_t task); +#if CONFIG_JETSAM +void jetsam_on_ledger_cpulimit_exceeded(void); +#endif + /* * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry. * @@ -220,6 +226,14 @@ thread_bootstrap(void) thread_template.saved_mode = TH_MODE_NONE; thread_template.safe_release = 0; + thread_template.sfi_class = SFI_CLASS_UNSPECIFIED; + thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED; + + thread_template.active = 0; + thread_template.started = 0; + thread_template.static_param = 0; + thread_template.policy_reset = 0; + thread_template.priority = 0; thread_template.sched_pri = 0; thread_template.max_priority = 0; @@ -230,16 +244,20 @@ thread_bootstrap(void) thread_template.pending_promoter[1] = NULL; thread_template.rwlock_count = 0; +#if MACH_ASSERT + thread_template.SHARE_COUNT = 0; + thread_template.BG_COUNT = 0; +#endif /* MACH_ASSERT */ + thread_template.realtime.deadline = UINT64_MAX; - thread_template.current_quantum = 0; + thread_template.quantum_remaining = 0; thread_template.last_run_time = 0; - thread_template.last_quantum_refill_time = 0; thread_template.computation_metered = 0; thread_template.computation_epoch = 0; -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) thread_template.sched_stamp = 0; thread_template.pri_shift = INT8_MAX; thread_template.sched_usage = 0; @@ -260,6 +278,8 @@ thread_bootstrap(void) thread_template.vtimer_prof_save = 0; thread_template.vtimer_rlim_save = 0; + thread_template.wait_sfi_begin_time = 0; + thread_template.wait_timer_is_set = FALSE; thread_template.wait_timer_active = 0; @@ -268,8 +288,6 @@ thread_bootstrap(void) thread_template.special_handler.handler = special_handler; thread_template.special_handler.next = NULL; - thread_template.funnel_lock = THR_FUNNEL_NULL; - thread_template.funnel_state = 0; thread_template.recover = (vm_offset_t)NULL; thread_template.map = VM_MAP_NULL; @@ -284,6 +302,10 @@ thread_bootstrap(void) thread_template.kpc_buf = NULL; #endif +#if HYPERVISOR + thread_template.hv_thread_target = NULL; +#endif /* HYPERVISOR */ + thread_template.t_chud = 0; thread_template.t_page_creation_count = 0; thread_template.t_page_creation_time = 0; @@ -295,13 +317,19 @@ thread_bootstrap(void) thread_template.t_ledger = LEDGER_NULL; thread_template.t_threadledger = LEDGER_NULL; +#ifdef CONFIG_BANK + thread_template.t_bankledger = LEDGER_NULL; + thread_template.t_deduct_bank_ledger_time = 0; +#endif thread_template.requested_policy = default_task_requested_policy; thread_template.effective_policy = default_task_effective_policy; thread_template.pended_policy = default_task_pended_policy; - thread_template.iotier_override = THROTTLE_LEVEL_NONE; + thread_template.usynch_override_contended_resource_count = 0; + thread_template.iotier_override = THROTTLE_LEVEL_NONE; + thread_template.thread_io_stats = NULL; thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0; thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0; @@ -309,10 +337,15 @@ thread_bootstrap(void) thread_template.thread_tag = 0; + thread_template.ith_voucher_name = MACH_PORT_NULL; + thread_template.ith_voucher = IPC_VOUCHER_NULL; + init_thread = thread_template; machine_set_current_thread(&init_thread); } +extern boolean_t allow_qos_policy_set; + void thread_init(void) { @@ -339,6 +372,8 @@ thread_init(void) cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT; } + PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set)); + init_thread_ledgers(); } @@ -374,6 +409,8 @@ thread_terminate_self(void) s = splsched(); thread_lock(thread); + assert_thread_sched_count(thread); + /* * Cancel priority depression, wait for concurrent expirations * on other processors. @@ -381,6 +418,9 @@ thread_terminate_self(void) if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK; + /* If our priority was low because of a depressed yield, restore it in case we block below */ + set_sched_pri(thread, thread->priority); + if (timer_call_cancel(&thread->depress_timer)) thread->depress_timer_active--; } @@ -400,8 +440,12 @@ thread_terminate_self(void) thread_unlock(thread); splx(s); + + thread_mtx_lock(thread); + thread_policy_reset(thread); + thread_mtx_unlock(thread); task = thread->task; uthread_cleanup(task, thread->uthread, task->bsd_info); @@ -481,7 +525,6 @@ thread_deallocate( kpc_thread_destroy(thread); #endif - ipc_thread_terminate(thread); task = thread->task; @@ -500,6 +543,12 @@ thread_deallocate( if (thread->t_threadledger) ledger_dereference(thread->t_threadledger); + if (IPC_VOUCHER_NULL != thread->ith_voucher) + ipc_voucher_release(thread->ith_voucher); + + if (thread->thread_io_stats) + kfree(thread->thread_io_stats, sizeof(struct io_stat_info)); + if (thread->kernel_stack != 0) stack_free(thread); @@ -532,6 +581,9 @@ thread_terminate_daemon(void) simple_unlock(&thread_terminate_lock); (void)spllo(); + assert(thread->SHARE_COUNT == 0); + assert(thread->BG_COUNT == 0); + task = thread->task; task_lock(task); @@ -551,6 +603,9 @@ thread_terminate_daemon(void) task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1; task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2; + task->task_gpu_ns += ml_gpu_stat(thread); + + thread_update_qos_cpu_time(thread, FALSE); queue_remove(&task->threads, thread, thread_t, task_threads); task->thread_count--; @@ -594,6 +649,8 @@ void thread_terminate_enqueue( thread_t thread) { + KERNEL_DEBUG_CONSTANT(TRACEDBG_CODE(DBG_TRACE_DATA, TRACE_DATA_THREAD_TERMINATE) | DBG_FUNC_NONE, thread->thread_id, 0, 0, 0, 0); + simple_lock(&thread_terminate_lock); enqueue_tail(&thread_terminate_queue, (queue_entry_t)thread); simple_unlock(&thread_terminate_lock); @@ -685,6 +742,8 @@ thread_daemon_init(void) /* * Create a new thread. * Doesn't start the thread running. + * + * Task and tasks_threads_lock are returned locked on success. */ static kern_return_t thread_create_internal( @@ -747,6 +806,16 @@ thread_create_internal( new_thread->continuation = continuation; + /* Allocate I/O Statistics structure */ + new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info)); + assert(new_thread->thread_io_stats != NULL); + bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info)); + +#if CONFIG_IOSCHED + /* Clear out the I/O Scheduling info for AppleFSCompression */ + new_thread->decmp_upl = NULL; +#endif /* CONFIG_IOSCHED */ + lck_mtx_lock(&tasks_threads_lock); task_lock(parent_task); @@ -771,6 +840,7 @@ thread_create_internal( #endif /* MACH_BSD */ ipc_thread_disable(new_thread); ipc_thread_terminate(new_thread); + kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info)); lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp); machine_thread_destroy(new_thread); zfree(thread_zone, new_thread); @@ -789,27 +859,32 @@ thread_create_internal( */ set_astledger(new_thread); } - new_thread->t_threadledger = LEDGER_NULL; /* per thread ledger is not inherited */ + + /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */ + if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template, + LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) { + + ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time); + } + + new_thread->cpu_time_last_qos = 0; +#ifdef CONFIG_BANK + new_thread->t_bankledger = LEDGER_NULL; + new_thread->t_deduct_bank_ledger_time = 0; +#endif + new_thread->t_ledger = new_thread->task->ledger; if (new_thread->t_ledger) ledger_reference(new_thread->t_ledger); +#if defined(CONFIG_SCHED_MULTIQ) + /* Cache the task's sched_group */ + new_thread->sched_group = parent_task->sched_group; +#endif /* defined(CONFIG_SCHED_MULTIQ) */ + /* Cache the task's map */ new_thread->map = parent_task->map; - /* Chain the thread onto the task's list */ - queue_enter(&parent_task->threads, new_thread, thread_t, task_threads); - parent_task->thread_count++; - - /* So terminating threads don't need to take the task lock to decrement */ - hw_atomic_add(&parent_task->active_thread_count, 1); - - /* Protected by the tasks_threads_lock */ - new_thread->thread_id = ++thread_unique_id; - - queue_enter(&threads, new_thread, thread_t, threads); - threads_count++; - timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread); timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread); @@ -840,12 +915,32 @@ thread_create_internal( new_thread->importance = new_thread->priority - new_thread->task_priority; new_thread->saved_importance = new_thread->importance; -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) new_thread->sched_stamp = sched_tick; new_thread->pri_shift = sched_pri_shift; -#endif +#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */ + + if (parent_task->max_priority <= MAXPRI_THROTTLE) { + sched_set_thread_throttled(new_thread, TRUE); + } + SCHED(compute_priority)(new_thread, FALSE); + thread_policy_create(new_thread); + + /* Chain the thread onto the task's list */ + queue_enter(&parent_task->threads, new_thread, thread_t, task_threads); + parent_task->thread_count++; + + /* So terminating threads don't need to take the task lock to decrement */ + hw_atomic_add(&parent_task->active_thread_count, 1); + + /* Protected by the tasks_threads_lock */ + new_thread->thread_id = ++thread_unique_id; + + queue_enter(&threads, new_thread, thread_t, threads); + threads_count++; + new_thread->active = TRUE; *out_thread = new_thread; @@ -1148,7 +1243,7 @@ thread_info_internal( * (1/(5/8) - 1). */ basic_info->cpu_usage = 0; -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) if (sched_tick_interval) { basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage * TH_USAGE_SCALE) / sched_tick_interval); @@ -1350,6 +1445,25 @@ thread_read_times( } } +uint64_t thread_get_runtime_self(void) +{ + boolean_t interrupt_state; + uint64_t runtime; + thread_t thread = NULL; + processor_t processor = NULL; + + thread = current_thread(); + + /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */ + interrupt_state = ml_set_interrupts_enabled(FALSE); + processor = current_processor(); + timer_switch(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time(), PROCESSOR_DATA(processor, thread_timer)); + runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer)); + ml_set_interrupts_enabled(interrupt_state); + + return runtime; +} + kern_return_t thread_assign( __unused thread_t thread, @@ -1440,6 +1554,25 @@ thread_wire( } +boolean_t +set_vm_privilege(boolean_t privileged) +{ + boolean_t was_vmpriv; + + if (current_thread()->options & TH_OPT_VMPRIV) + was_vmpriv = TRUE; + else + was_vmpriv = FALSE; + + if (privileged != FALSE) + current_thread()->options |= TH_OPT_VMPRIV; + else + current_thread()->options &= ~TH_OPT_VMPRIV; + + return (was_vmpriv); +} + + /* * XXX assuming current thread only, for now... */ @@ -1594,7 +1727,7 @@ THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void) "It used more than %d%% CPU (Actual recent usage: %d%%) over %d seconds. " "thread lifetime cpu usage %d.%06d seconds, (%d.%06d user, %d.%06d system) " "ledger info: balance: %lld credit: %lld debit: %lld limit: %llu (%d%%) " - "period: %llu time since last refill (ns): %llu \n", + "period: %llu time since last refill (ns): %llu %s\n", procname, pid, tid, percentage, usage_percent, interval_sec, thread_total_time.seconds, thread_total_time.microseconds, @@ -1603,20 +1736,75 @@ THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void) lei.lei_balance, lei.lei_credit, lei.lei_debit, lei.lei_limit, limit_percent, - lei.lei_refill_period, lei.lei_last_refill); + lei.lei_refill_period, lei.lei_last_refill, + (fatal ? "[fatal violation]" : "")); code[0] = code[1] = 0; EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU); - EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR); + if (fatal) { + EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL); + }else { + EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR); + } EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec); EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], limit_percent); EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent); exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX); if (fatal) { +#if CONFIG_JETSAM + jetsam_on_ledger_cpulimit_exceeded(); +#else task_terminate_internal(task); +#endif + } +} + +#define UPDATE_IO_STATS(info, size) \ +{ \ + info.count++; \ + info.size += size; \ +} + +#define UPDATE_IO_STATS_ATOMIC(info, size) \ +{ \ + OSIncrementAtomic64((SInt64 *)&(info.count)); \ + OSAddAtomic64(size, (SInt64 *)&(info.size)); \ +} + +void thread_update_io_stats(thread_t thread, int size, int io_flags) +{ + int io_tier; + + if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL) + return; + + if (io_flags & DKIO_READ) { + UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size); + UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size); + } + + if (io_flags & DKIO_META) { + UPDATE_IO_STATS(thread->thread_io_stats->metadata, size); + UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size); } + + if (io_flags & DKIO_PAGING) { + UPDATE_IO_STATS(thread->thread_io_stats->paging, size); + UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size); + } + + io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT); + assert (io_tier < IO_NUM_PRIORITIES); + + UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size); + UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size); + + /* Update Total I/O Counts */ + UPDATE_IO_STATS(thread->thread_io_stats->total_io, size); + UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size); + } void @@ -1638,6 +1826,7 @@ init_thread_ledgers(void) { } thread_ledgers.cpu_time = idx; + thread_ledger_template = t; } @@ -1717,12 +1906,8 @@ thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) */ if (thread->t_threadledger != LEDGER_NULL) { l = thread->t_threadledger; - /* - * The only way to get a per-thread ledger is via CPU limits. - */ - assert(thread->options & (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT)); - thread->t_threadledger = NULL; - ledger_dereference(l); + ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0); + ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE); thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT); } @@ -1792,123 +1977,6 @@ thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) return (0); } -int split_funnel_off = 0; -lck_grp_t *funnel_lck_grp = LCK_GRP_NULL; -lck_grp_attr_t *funnel_lck_grp_attr; -lck_attr_t *funnel_lck_attr; - -funnel_t * -funnel_alloc( - int type) -{ - lck_mtx_t *m; - funnel_t *fnl; - - if (funnel_lck_grp == LCK_GRP_NULL) { - funnel_lck_grp_attr = lck_grp_attr_alloc_init(); - - funnel_lck_grp = lck_grp_alloc_init("Funnel", funnel_lck_grp_attr); - - funnel_lck_attr = lck_attr_alloc_init(); - } - if ((fnl = (funnel_t *)kalloc(sizeof(funnel_t))) != 0){ - bzero((void *)fnl, sizeof(funnel_t)); - if ((m = lck_mtx_alloc_init(funnel_lck_grp, funnel_lck_attr)) == (lck_mtx_t *)NULL) { - kfree(fnl, sizeof(funnel_t)); - return(THR_FUNNEL_NULL); - } - fnl->fnl_mutex = m; - fnl->fnl_type = type; - } - return(fnl); -} - -void -funnel_free( - funnel_t * fnl) -{ - lck_mtx_free(fnl->fnl_mutex, funnel_lck_grp); - if (fnl->fnl_oldmutex) - lck_mtx_free(fnl->fnl_oldmutex, funnel_lck_grp); - kfree(fnl, sizeof(funnel_t)); -} - -void -funnel_lock( - funnel_t * fnl) -{ - lck_mtx_lock(fnl->fnl_mutex); - fnl->fnl_mtxholder = current_thread(); -} - -void -funnel_unlock( - funnel_t * fnl) -{ - lck_mtx_unlock(fnl->fnl_mutex); - fnl->fnl_mtxholder = NULL; - fnl->fnl_mtxrelease = current_thread(); -} - -funnel_t * -thread_funnel_get( - void) -{ - thread_t th = current_thread(); - - if (th->funnel_state & TH_FN_OWNED) { - return(th->funnel_lock); - } - return(THR_FUNNEL_NULL); -} - -boolean_t -thread_funnel_set( - funnel_t * fnl, - boolean_t funneled) -{ - thread_t cur_thread; - boolean_t funnel_state_prev; - boolean_t intr; - - cur_thread = current_thread(); - funnel_state_prev = ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED); - - if (funnel_state_prev != funneled) { - intr = ml_set_interrupts_enabled(FALSE); - - if (funneled == TRUE) { - if (cur_thread->funnel_lock) - panic("Funnel lock called when holding one %p", cur_thread->funnel_lock); - KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, - fnl, 1, 0, 0, 0); - funnel_lock(fnl); - KERNEL_DEBUG(0x6032434 | DBG_FUNC_NONE, - fnl, 1, 0, 0, 0); - cur_thread->funnel_state |= TH_FN_OWNED; - cur_thread->funnel_lock = fnl; - } else { - if(cur_thread->funnel_lock->fnl_mutex != fnl->fnl_mutex) - panic("Funnel unlock when not holding funnel"); - cur_thread->funnel_state &= ~TH_FN_OWNED; - KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, - fnl, 1, 0, 0, 0); - - cur_thread->funnel_lock = THR_FUNNEL_NULL; - funnel_unlock(fnl); - } - (void)ml_set_interrupts_enabled(intr); - } else { - /* if we are trying to acquire funnel recursively - * check for funnel to be held already - */ - if (funneled && (fnl->fnl_mutex != cur_thread->funnel_lock->fnl_mutex)) { - panic("thread_funnel_set: already holding a different funnel"); - } - } - return(funnel_state_prev); -} - static void sched_call_null( __unused int type, @@ -1990,6 +2058,295 @@ thread_should_halt( return (thread_should_halt_fast(th)); } +/* + * thread_set_voucher_name - reset the voucher port name bound to this thread + * + * Conditions: nothing locked + * + * If we already converted the previous name to a cached voucher + * reference, then we discard that reference here. The next lookup + * will cache it again. + */ + +kern_return_t +thread_set_voucher_name(mach_port_name_t voucher_name) +{ + thread_t thread = current_thread(); + ipc_voucher_t new_voucher = IPC_VOUCHER_NULL; + ipc_voucher_t voucher; +#ifdef CONFIG_BANK + ledger_t bankledger = NULL; +#endif + + if (MACH_PORT_DEAD == voucher_name) + return KERN_INVALID_RIGHT; + + /* + * agressively convert to voucher reference + */ + if (MACH_PORT_VALID(voucher_name)) { + new_voucher = convert_port_name_to_voucher(voucher_name); + if (IPC_VOUCHER_NULL == new_voucher) + return KERN_INVALID_ARGUMENT; + } +#ifdef CONFIG_BANK + bankledger = bank_get_voucher_ledger(new_voucher); +#endif + + thread_mtx_lock(thread); + voucher = thread->ith_voucher; + thread->ith_voucher_name = voucher_name; + thread->ith_voucher = new_voucher; +#ifdef CONFIG_BANK + bank_swap_thread_bank_ledger(thread, bankledger); +#endif + thread_mtx_unlock(thread); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), + (uintptr_t)voucher_name, + VM_KERNEL_ADDRPERM((uintptr_t)new_voucher), + 1, 0); + + if (IPC_VOUCHER_NULL != voucher) + ipc_voucher_release(voucher); + + return KERN_SUCCESS; +} + +/* + * thread_get_mach_voucher - return a voucher reference for the specified thread voucher + * + * Conditions: nothing locked + * + * A reference to the voucher may be lazily pending, if someone set the voucher name + * but nobody has done a lookup yet. In that case, we'll have to do the equivalent + * lookup here. + * + * NOTE: At the moment, there is no distinction between the current and effective + * vouchers because we only set them at the thread level currently. + */ +kern_return_t +thread_get_mach_voucher( + thread_act_t thread, + mach_voucher_selector_t __unused which, + ipc_voucher_t *voucherp) +{ + ipc_voucher_t voucher; + mach_port_name_t voucher_name; + + if (THREAD_NULL == thread) + return KERN_INVALID_ARGUMENT; + + thread_mtx_lock(thread); + voucher = thread->ith_voucher; + + /* if already cached, just return a ref */ + if (IPC_VOUCHER_NULL != voucher) { + ipc_voucher_reference(voucher); + thread_mtx_unlock(thread); + *voucherp = voucher; + return KERN_SUCCESS; + } + + voucher_name = thread->ith_voucher_name; + + /* convert the name to a port, then voucher reference */ + if (MACH_PORT_VALID(voucher_name)) { + ipc_port_t port; + + if (KERN_SUCCESS != + ipc_object_copyin(thread->task->itk_space, voucher_name, + MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) { + thread->ith_voucher_name = MACH_PORT_NULL; + thread_mtx_unlock(thread); + *voucherp = IPC_VOUCHER_NULL; + return KERN_SUCCESS; + } + + /* convert to a voucher ref to return, and cache a ref on thread */ + voucher = convert_port_to_voucher(port); + ipc_voucher_reference(voucher); + thread->ith_voucher = voucher; + thread_mtx_unlock(thread); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), + (uintptr_t)port, + VM_KERNEL_ADDRPERM((uintptr_t)voucher), + 2, 0); + + + ipc_port_release_send(port); + } else + thread_mtx_unlock(thread); + + *voucherp = voucher; + return KERN_SUCCESS; +} + +/* + * thread_set_mach_voucher - set a voucher reference for the specified thread voucher + * + * Conditions: callers holds a reference on the voucher. + * nothing locked. + * + * We grab another reference to the voucher and bind it to the thread. Any lazy + * binding is erased. The old voucher reference associated with the thread is + * discarded. + */ +kern_return_t +thread_set_mach_voucher( + thread_t thread, + ipc_voucher_t voucher) +{ + ipc_voucher_t old_voucher; +#ifdef CONFIG_BANK + ledger_t bankledger = NULL; +#endif + + if (THREAD_NULL == thread) + return KERN_INVALID_ARGUMENT; + + if (thread != current_thread() || thread->started) + return KERN_INVALID_ARGUMENT; + + + ipc_voucher_reference(voucher); +#ifdef CONFIG_BANK + bankledger = bank_get_voucher_ledger(voucher); +#endif + thread_mtx_lock(thread); + old_voucher = thread->ith_voucher; + thread->ith_voucher = voucher; + thread->ith_voucher_name = MACH_PORT_NULL; +#ifdef CONFIG_BANK + bank_swap_thread_bank_ledger(thread, bankledger); +#endif + thread_mtx_unlock(thread); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), + (uintptr_t)MACH_PORT_NULL, + VM_KERNEL_ADDRPERM((uintptr_t)voucher), + 3, 0); + + ipc_voucher_release(old_voucher); + + return KERN_SUCCESS; +} + +/* + * thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher + * + * Conditions: callers holds a reference on the new and presumed old voucher(s). + * nothing locked. + * + * If the old voucher is still the same as passed in, replace it with new voucher + * and discard the old (and the reference passed in). Otherwise, discard the new + * and return an updated old voucher. + */ +kern_return_t +thread_swap_mach_voucher( + thread_t thread, + ipc_voucher_t new_voucher, + ipc_voucher_t *in_out_old_voucher) +{ + mach_port_name_t old_voucher_name; + ipc_voucher_t old_voucher; +#ifdef CONFIG_BANK + ledger_t bankledger = NULL; +#endif + + if (THREAD_NULL == thread) + return KERN_INVALID_TASK; + + if (thread != current_thread() || thread->started) + return KERN_INVALID_ARGUMENT; + +#ifdef CONFIG_BANK + bankledger = bank_get_voucher_ledger(new_voucher); +#endif + + thread_mtx_lock(thread); + + old_voucher = thread->ith_voucher; + + if (IPC_VOUCHER_NULL == old_voucher) { + old_voucher_name = thread->ith_voucher_name; + + /* perform lazy binding if needed */ + if (MACH_PORT_VALID(old_voucher_name)) { + old_voucher = convert_port_name_to_voucher(old_voucher_name); + thread->ith_voucher_name = MACH_PORT_NULL; + thread->ith_voucher = old_voucher; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), + (uintptr_t)old_voucher_name, + VM_KERNEL_ADDRPERM((uintptr_t)old_voucher), + 4, 0); + + } + } + + /* swap in new voucher, if old voucher matches the one supplied */ + if (old_voucher == *in_out_old_voucher) { + ipc_voucher_reference(new_voucher); + thread->ith_voucher = new_voucher; + thread->ith_voucher_name = MACH_PORT_NULL; +#ifdef CONFIG_BANK + bank_swap_thread_bank_ledger(thread, bankledger); +#endif + thread_mtx_unlock(thread); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), + (uintptr_t)MACH_PORT_NULL, + VM_KERNEL_ADDRPERM((uintptr_t)new_voucher), + 5, 0); + + ipc_voucher_release(old_voucher); + + *in_out_old_voucher = IPC_VOUCHER_NULL; + return KERN_SUCCESS; + } + + /* Otherwise, just return old voucher reference */ + ipc_voucher_reference(old_voucher); + thread_mtx_unlock(thread); + *in_out_old_voucher = old_voucher; + return KERN_SUCCESS; +} + +/* + * thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher. + */ +kern_return_t +thread_get_current_voucher_origin_pid( + int32_t *pid) +{ + uint32_t buf_size; + kern_return_t kr; + thread_t thread = current_thread(); + + buf_size = sizeof(*pid); + kr = mach_voucher_attr_command(thread->ith_voucher, + MACH_VOUCHER_ATTR_KEY_BANK, + BANK_ORIGINATOR_PID, + NULL, + 0, + (mach_voucher_attr_content_t)pid, + &buf_size); + + return kr; +} + #if CONFIG_DTRACE uint32_t dtrace_get_thread_predcache(thread_t thread) { diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h index 47ffc95ff..a4074c301 100644 --- a/osfmk/kern/thread.h +++ b/osfmk/kern/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -99,8 +99,6 @@ #ifdef MACH_KERNEL_PRIVATE -#include - #include #include @@ -110,10 +108,11 @@ #include #include #include -#include +#include #include #include #include +#include #include #include #include @@ -130,7 +129,10 @@ struct thread { * NOTE: The runq field in the thread structure has an unusual * locking protocol. If its value is PROCESSOR_NULL, then it is * locked by the thread_lock, but if its value is something else - * then it is locked by the associated run queue lock. + * then it is locked by the associated run queue lock. It is + * set to PROCESSOR_NULL without holding the thread lock, but the + * transition from PROCESSOR_NULL to non-null must be done + * under the thread lock and the run queue lock. * * When the thread is on a wait queue, these first three fields * are treated as an unofficial union with a wait_queue_element. @@ -164,11 +166,6 @@ struct thread { * WITHOUT locking */ /* Data updated/used in thread_invoke */ - int funnel_state; - struct funnel_lock *funnel_lock; /* Non-reentrancy funnel */ -#define TH_FN_OWNED 0x1 /* we own the funnel */ -#define TH_FN_REFUNNEL 0x2 /* re-acquire funnel on dispatch */ - vm_offset_t kernel_stack; /* current kernel stack */ vm_offset_t reserved_stack; /* reserved kernel stack */ @@ -189,12 +186,15 @@ struct thread { /* Scheduling information */ sched_mode_t sched_mode; /* scheduling mode */ sched_mode_t saved_mode; /* saved mode during forced mode demotion */ + + sfi_class_id_t sfi_class; /* SFI class (XXX Updated on CSW/QE/AST) */ + sfi_class_id_t sfi_wait_class; /* Currently in SFI wait for this class, protected by sfi_lock */ - unsigned int sched_flags; /* current flag bits */ + uint32_t sched_flags; /* current flag bits */ #define TH_SFLAG_FAIRSHARE_TRIPPED 0x0001 /* fairshare scheduling activated */ #define TH_SFLAG_FAILSAFE 0x0002 /* fail-safe has tripped */ -#define TH_SFLAG_THROTTLED 0x0004 /* owner task in throttled state */ -#define TH_SFLAG_DEMOTED_MASK (TH_SFLAG_THROTTLED | TH_SFLAG_FAILSAFE | TH_SFLAG_FAIRSHARE_TRIPPED) +#define TH_SFLAG_THROTTLED 0x0004 /* thread treated as background for scheduler decay purposes */ +#define TH_SFLAG_DEMOTED_MASK (TH_SFLAG_THROTTLE_DEMOTED | TH_SFLAG_FAILSAFE | TH_SFLAG_FAIRSHARE_TRIPPED) /* saved_mode contains previous sched_mode */ #define TH_SFLAG_PROMOTED 0x0008 /* sched pri has been promoted */ #define TH_SFLAG_ABORT 0x0010 /* abort interruptible waits */ @@ -207,20 +207,13 @@ struct thread { #define TH_SFLAG_EAGERPREEMPT 0x0200 /* Any preemption of this thread should be treated as if AST_URGENT applied */ #define TH_SFLAG_RW_PROMOTED 0x0400 /* sched pri has been promoted due to blocking with RW lock held */ #define TH_SFLAG_PROMOTED_MASK (TH_SFLAG_PROMOTED | TH_SFLAG_RW_PROMOTED) +#define TH_SFLAG_THROTTLE_DEMOTED 0x0800 /* throttled thread forced to timeshare mode (may be applied in addition to failsafe) */ #define TH_SFLAG_RW_PROMOTED_BIT (10) /* 0x400 */ -/* - * A thread can either be completely unthrottled, about to be throttled, - * throttled (TH_SFLAG_THROTTLED), or about to be unthrottled - */ -#define TH_SFLAG_PENDING_THROTTLE_DEMOTION 0x1000 /* Pending sched_mode demotion */ -#define TH_SFLAG_PENDING_THROTTLE_PROMOTION 0x2000 /* Pending sched_mode promition */ -#define TH_SFLAG_PENDING_THROTTLE_MASK (TH_SFLAG_PENDING_THROTTLE_DEMOTION | TH_SFLAG_PENDING_THROTTLE_PROMOTION) - int16_t sched_pri; /* scheduled (current) priority */ int16_t priority; /* base priority */ - int16_t max_priority; /* max base priority */ + int16_t max_priority; /* copy of max base priority */ int16_t task_priority; /* copy of task base priority */ #if defined(CONFIG_SCHED_GRRR) #if 0 @@ -235,7 +228,12 @@ struct thread { uint32_t rwlock_count; /* Number of lck_rw_t locks held by thread */ +#if MACH_ASSERT + uint32_t SHARE_COUNT, BG_COUNT; /* This thread's contribution to global sched counters (temporary debugging) */ +#endif /* MACH_ASSERT */ + integer_t importance; /* task-relative importance */ + /* Priority depression expiration */ integer_t depress_timer_active; timer_call_data_t depress_timer; @@ -249,9 +247,12 @@ struct thread { } realtime; uint32_t was_promoted_on_wakeup; - uint32_t current_quantum; /* duration of current quantum */ uint64_t last_run_time; /* time when thread was switched away from */ - uint64_t last_quantum_refill_time; /* time when current_quantum was refilled after expiration */ + uint32_t quantum_remaining; /* duration of current quantum remaining */ + +#if defined(CONFIG_SCHED_MULTIQ) + sched_group_t sched_group; +#endif /* defined(CONFIG_SCHED_MULTIQ) */ /* Data used during setrun/dispatch */ timer_data_t system_timer; /* system mode timer */ @@ -273,13 +274,14 @@ struct thread { #endif /* Statistics and timesharing calculations */ -#if defined(CONFIG_SCHED_TRADITIONAL) +#if defined(CONFIG_SCHED_TIMESHARE_CORE) natural_t sched_stamp; /* last scheduler tick */ natural_t sched_usage; /* timesharing cpu usage [sched] */ natural_t pri_shift; /* usage -> priority from pset */ natural_t cpu_usage; /* instrumented cpu usage [%cpu] */ natural_t cpu_delta; /* accumulated cpu_usage delta */ -#endif +#endif /* CONFIG_SCHED_TIMESHARE_CORE */ + uint32_t c_switch; /* total context switches */ uint32_t p_switch; /* total processor switches */ uint32_t ps_switch; /* total pset switches */ @@ -293,6 +295,9 @@ struct thread { uint64_t vtimer_prof_save; uint64_t vtimer_rlim_save; + /* Timing for wait state */ + uint64_t wait_sfi_begin_time; /* start time for thread waiting in SFI */ + /* Timed wait expiration */ timer_call_data_t wait_timer; integer_t wait_timer_active; @@ -375,6 +380,7 @@ struct thread { active:1, /* Thread is active and has not been terminated */ started:1, /* Thread has been started after creation */ static_param:1, /* Disallow policy parameter changes */ + policy_reset:1, /* Disallow policy parameter changes on terminating threads */ :0; /* Return Handers */ @@ -433,6 +439,11 @@ struct thread { uint64_t kperf_pet_cnt; #endif +#if HYPERVISOR + /* hypervisor virtual CPU object associated with this thread */ + void *hv_thread_target; +#endif /* HYPERVISOR */ + uint64_t thread_id; /*system wide unique thread-id*/ /* Statistics accumulated per-thread and aggregated per-task */ @@ -440,13 +451,22 @@ struct thread { uint32_t syscalls_mach; ledger_t t_ledger; ledger_t t_threadledger; /* per thread ledger */ + uint64_t cpu_time_last_qos; +#ifdef CONFIG_BANK + ledger_t t_bankledger; /* ledger to charge someone */ + uint64_t t_deduct_bank_ledger_time; /* cpu time to be deducted from bank ledger */ +#endif /* policy is protected by the task lock */ struct task_requested_policy requested_policy; struct task_effective_policy effective_policy; struct task_pended_policy pended_policy; + /* usynch override is protected by the task lock, eventually will be thread mutex */ + int usynch_override_contended_resource_count; + int iotier_override; /* atomic operations to set, cleared on ret to user */ + io_stat_info_t thread_io_stats; /* per-thread I/O statistics */ integer_t saved_importance; /* saved task-relative importance */ @@ -464,6 +484,12 @@ struct thread { int16_t suspend_count; /* User level suspensions */ int16_t user_stop_count; + + mach_port_name_t ith_voucher_name; + ipc_voucher_t ith_voucher; +#if CONFIG_IOSCHED + void *decmp_upl; +#endif /* CONFIG_IOSCHED */ }; #define ith_state saved.receive.state @@ -567,6 +593,11 @@ extern void thread_task_priority( integer_t priority, integer_t max_priority); +extern kern_return_t thread_set_mode_and_absolute_pri( + thread_t thread, + integer_t policy, + integer_t priority); + extern void thread_policy_reset( thread_t thread); @@ -644,17 +675,9 @@ extern kern_return_t machine_thread_inherit_taskwide( thread_t thread, task_t parent_task); -/* - * XXX Funnel locks XXX - */ - -struct funnel_lock { - int fnl_type; /* funnel type */ - lck_mtx_t *fnl_mutex; /* underlying mutex for the funnel */ - void * fnl_mtxholder; /* thread (last)holdng mutex */ - void * fnl_mtxrelease; /* thread (last)releasing mutex */ - lck_mtx_t *fnl_oldmutex; /* Mutex before collapsing split funnel */ -}; +extern kern_return_t machine_thread_set_tsd_base( + thread_t thread, + mach_vm_offset_t tsd_base); typedef struct ReturnHandler ReturnHandler; @@ -671,18 +694,16 @@ extern void special_handler( ReturnHandler *rh, thread_t thread); +extern void +thread_update_qos_cpu_time( + thread_t thread, + boolean_t lock_needed); + void act_machine_sv_free(thread_t, int); vm_offset_t min_valid_stack_address(void); vm_offset_t max_valid_stack_address(void); -extern void funnel_lock( - struct funnel_lock *lock); - -extern void funnel_unlock( - struct funnel_lock *lock); - - static inline uint16_t thread_set_tag_internal(thread_t thread, uint16_t tag) { return __sync_fetch_and_or(&thread->thread_tag, tag); } @@ -691,6 +712,13 @@ static inline uint16_t thread_get_tag_internal(thread_t thread) { return thread->thread_tag; } +typedef struct { + int qos_pri[THREAD_QOS_LAST]; + int qos_iotier[THREAD_QOS_LAST]; + uint32_t qos_through_qos[THREAD_QOS_LAST]; + uint32_t qos_latency_qos[THREAD_QOS_LAST]; +} qos_policy_params_t; + #else /* MACH_KERNEL_PRIVATE */ __BEGIN_DECLS @@ -711,10 +739,6 @@ __END_DECLS __BEGIN_DECLS - -extern uint64_t thread_tid( - thread_t thread); - extern uint64_t thread_dispatchqaddr( thread_t thread); @@ -722,6 +746,15 @@ __END_DECLS #endif /* KERNEL_PRIVATE */ +#ifdef KERNEL +__BEGIN_DECLS + +extern uint64_t thread_tid(thread_t thread); + +__END_DECLS + +#endif /* KERNEL */ + __BEGIN_DECLS #ifdef XNU_KERNEL_PRIVATE @@ -780,27 +813,13 @@ extern struct _thread_ledger_indices thread_ledgers; extern int thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns); extern int thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns); -typedef struct funnel_lock funnel_t; - -#define THR_FUNNEL_NULL (funnel_t *)0 - -extern funnel_t *funnel_alloc( - int type); - -extern void funnel_free( - funnel_t *lock); - -extern funnel_t *thread_funnel_get(void); - -extern boolean_t thread_funnel_set( - funnel_t *lock, - boolean_t funneled); - extern void thread_read_times( thread_t thread, time_value_t *user_time, time_value_t *system_time); +extern uint64_t thread_get_runtime_self(void); + extern void thread_setuserstack( thread_t thread, mach_vm_offset_t user_stack); @@ -813,6 +832,10 @@ extern void thread_setentrypoint( thread_t thread, mach_vm_offset_t entry); +extern kern_return_t thread_set_tsd_base( + thread_t thread, + mach_vm_offset_t tsd_base); + extern kern_return_t thread_setsinglestep( thread_t thread, int on); @@ -842,6 +865,7 @@ extern kern_return_t thread_wire_internal( boolean_t wired, boolean_t *prev_state); + extern kern_return_t thread_dup(thread_t); typedef void (*sched_call_t)( @@ -859,12 +883,18 @@ extern void thread_static_param( thread_t thread, boolean_t state); +extern boolean_t thread_is_static_param( + thread_t thread); + extern kern_return_t thread_policy_set_internal( thread_t thread, thread_policy_flavor_t flavor, thread_policy_t policy_info, mach_msg_type_number_t count); +extern boolean_t thread_has_qos_policy(thread_t thread); + +extern kern_return_t thread_remove_qos_policy(thread_t thread); extern task_t get_threadtask(thread_t); #define thread_is_64bit(thd) \ @@ -921,9 +951,14 @@ void guard_ast(thread_t thread); extern void fd_guard_ast(thread_t thread); extern void mach_port_guard_ast(thread_t thread); extern void thread_guard_violation(thread_t thread, unsigned type); +extern void thread_update_io_stats(thread_t thread, int size, int io_flags); + +extern kern_return_t thread_set_voucher_name(mach_port_name_t name); +extern kern_return_t thread_get_current_voucher_origin_pid(int32_t *pid); #endif /* XNU_KERNEL_PRIVATE */ + /*! @function kernel_thread_start @abstract Create a kernel thread. @discussion This function takes three input parameters, namely reference to the function that the thread should execute, caller specified data and a reference which is used to return the newly created kernel thread. The function returns KERN_SUCCESS on success or an appropriate kernel code type indicating the error. It may be noted that the caller is responsible for explicitly releasing the reference to the created thread when no longer needed. This should be done by calling thread_deallocate(new_thread). @@ -941,6 +976,7 @@ extern kern_return_t kernel_thread_start( void thread_set_eager_preempt(thread_t thread); void thread_clear_eager_preempt(thread_t thread); extern ipc_port_t convert_thread_to_port(thread_t); +extern boolean_t set_vm_privilege(boolean_t); #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/osfmk/kern/thread_act.c b/osfmk/kern/thread_act.c index 0f2a4dc3a..86041cd80 100644 --- a/osfmk/kern/thread_act.c +++ b/osfmk/kern/thread_act.c @@ -484,7 +484,7 @@ thread_set_state_internal( thread_mtx_unlock(thread); - if (thread_stop(thread, FALSE)) { + if (thread_stop(thread, TRUE)) { thread_mtx_lock(thread); result = machine_thread_set_state( thread, flavor, state, state_count); @@ -660,6 +660,51 @@ thread_getstatus( return (thread_get_state(thread, flavor, tstate, count)); } +/* + * Change thread's machine-dependent userspace TSD base. + * Called with nothing locked. Returns same way. + */ +kern_return_t +thread_set_tsd_base( + thread_t thread, + mach_vm_offset_t tsd_base) +{ + kern_return_t result = KERN_SUCCESS; + + if (thread == THREAD_NULL) + return (KERN_INVALID_ARGUMENT); + + thread_mtx_lock(thread); + + if (thread->active) { + if (thread != current_thread()) { + thread_hold(thread); + + thread_mtx_unlock(thread); + + if (thread_stop(thread, TRUE)) { + thread_mtx_lock(thread); + result = machine_thread_set_tsd_base(thread, tsd_base); + thread_unstop(thread); + } + else { + thread_mtx_lock(thread); + result = KERN_ABORTED; + } + + thread_release(thread); + } + else + result = machine_thread_set_tsd_base(thread, tsd_base); + } + else + result = KERN_TERMINATED; + + thread_mtx_unlock(thread); + + return (result); +} + /* * install_special_handler: * diff --git a/osfmk/kern/thread_call.c b/osfmk/kern/thread_call.c index 610b94991..99ffcc9b5 100644 --- a/osfmk/kern/thread_call.c +++ b/osfmk/kern/thread_call.c @@ -154,10 +154,10 @@ disable_ints_and_lock(void) } static inline void -enable_ints_and_unlock(void) +enable_ints_and_unlock(spl_t s) { thread_call_unlock(); - (void)spllo(); + splx(s); } @@ -307,6 +307,7 @@ thread_call_initialize(void) kern_return_t result; thread_t thread; int i; + spl_t s; i = sizeof (thread_call_data_t); thread_call_zone = zinit(i, 4096 * i, 16 * i, "thread_call"); @@ -332,7 +333,7 @@ thread_call_initialize(void) thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_KERNEL], THREAD_CALL_PRIORITY_KERNEL, 1, TRUE); thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_HIGH], THREAD_CALL_PRIORITY_HIGH, THREAD_CALL_THREAD_MIN, FALSE); - disable_ints_and_lock(); + s = disable_ints_and_lock(); queue_init(&thread_call_internal_queue); for ( @@ -346,7 +347,7 @@ thread_call_initialize(void) thread_call_daemon_awake = TRUE; - enable_ints_and_unlock(); + enable_ints_and_unlock(s); result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon, NULL, BASEPRI_PREEMPT + 1, &thread); if (result != KERN_SUCCESS) @@ -522,7 +523,7 @@ _set_delayed_call_timer( timer_call_enter_with_leeway(&group->delayed_timer, NULL, call->tc_soft_deadline, leeway, TIMER_CALL_SYS_CRITICAL|TIMER_CALL_LEEWAY, - ((call->tc_soft_deadline & 0x1) == 0x1)); + ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED)); } /* @@ -925,17 +926,13 @@ thread_call_enter_delayed_internal( else deadline += slop; - /* Bit 0 of the "soft" deadline indicates that - * this particular callout requires rate-limiting - * behaviour. Maintain the invariant deadline >= soft_deadline - */ - deadline |= 1; if (ratelimited) { - call->tc_soft_deadline |= 0x1ULL; + call->tc_flags |= TIMER_CALL_RATELIMITED; } else { - call->tc_soft_deadline &= ~0x1ULL; + call->tc_flags &= ~TIMER_CALL_RATELIMITED; } + call->tc_call.param1 = param1; call->ttd = (sdeadline > abstime) ? (sdeadline - abstime) : 0; @@ -1112,7 +1109,7 @@ sched_call_thread( * if the client has so requested. */ static void -thread_call_finish(thread_call_t call) +thread_call_finish(thread_call_t call, spl_t *s) { boolean_t dowake = FALSE; @@ -1138,11 +1135,11 @@ thread_call_finish(thread_call_t call) panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func); } - enable_ints_and_unlock(); + enable_ints_and_unlock(*s); zfree(thread_call_zone, call); - (void)disable_ints_and_lock(); + *s = disable_ints_and_lock(); } } @@ -1157,6 +1154,7 @@ thread_call_thread( { thread_t self = current_thread(); boolean_t canwait; + spl_t s; if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT); @@ -1172,7 +1170,7 @@ thread_call_thread( panic("thread_terminate() returned?"); } - (void)disable_ints_and_lock(); + s = disable_ints_and_lock(); thread_sched_call(self, group->sched_call); @@ -1202,7 +1200,7 @@ thread_call_thread( } else canwait = FALSE; - enable_ints_and_unlock(); + enable_ints_and_unlock(s); KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE, @@ -1224,13 +1222,11 @@ thread_call_thread( pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1); } - (void)thread_funnel_set(self->funnel_lock, FALSE); /* XXX */ - - (void) disable_ints_and_lock(); + s = disable_ints_and_lock(); if (canwait) { /* Frees if so desired */ - thread_call_finish(call); + thread_call_finish(call, &s); } } @@ -1273,7 +1269,7 @@ thread_call_thread( panic("kcall worker unable to assert wait?"); } - enable_ints_and_unlock(); + enable_ints_and_unlock(s); thread_block_parameter((thread_continue_t)thread_call_thread, group); } else { @@ -1282,14 +1278,14 @@ thread_call_thread( wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0); /* Interrupted means to exit */ - enable_ints_and_unlock(); + enable_ints_and_unlock(s); thread_block_parameter((thread_continue_t)thread_call_thread, group); /* NOTREACHED */ } } - enable_ints_and_unlock(); + enable_ints_and_unlock(s); thread_terminate(self); /* NOTREACHED */ @@ -1306,8 +1302,9 @@ thread_call_daemon_continue(__unused void *arg) int i; kern_return_t kr; thread_call_group_t group; + spl_t s; - (void)disable_ints_and_lock(); + s = disable_ints_and_lock(); /* Starting at zero happens to be high-priority first. */ for (i = 0; i < THREAD_CALL_GROUP_COUNT; i++) { @@ -1315,7 +1312,7 @@ thread_call_daemon_continue(__unused void *arg) while (thread_call_group_should_add_thread(group)) { group->active_count++; - enable_ints_and_unlock(); + enable_ints_and_unlock(s); kr = thread_call_thread_create(group); if (kr != KERN_SUCCESS) { @@ -1324,11 +1321,11 @@ thread_call_daemon_continue(__unused void *arg) * We can try again later. */ delay(10000); /* 10 ms */ - (void)disable_ints_and_lock(); + s = disable_ints_and_lock(); goto out; } - (void)disable_ints_and_lock(); + s = disable_ints_and_lock(); } } @@ -1336,7 +1333,7 @@ out: thread_call_daemon_awake = FALSE; wait_queue_assert_wait(&daemon_wqueue, NO_EVENT, THREAD_UNINT, 0); - enable_ints_and_unlock(); + enable_ints_and_unlock(s); thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL); /* NOTREACHED */ @@ -1396,13 +1393,7 @@ thread_call_delayed_timer( while (!queue_end(&group->delayed_queue, qe(call))) { if (call->tc_soft_deadline <= timestamp) { - /* Bit 0 of the "soft" deadline indicates that - * this particular callout is rate-limited - * and hence shouldn't be processed before its - * hard deadline. Rate limited timers aren't - * skipped when a forcible reevaluation is in progress. - */ - if ((call->tc_soft_deadline & 0x1) && + if ((call->tc_flags & THREAD_CALL_RATELIMITED) && (CE(call)->deadline > timestamp) && (ml_timer_forced_evaluation() == FALSE)) { break; @@ -1567,10 +1558,11 @@ boolean_t thread_call_isactive(thread_call_t call) { boolean_t active; + spl_t s; - disable_ints_and_lock(); + s = disable_ints_and_lock(); active = (call->tc_submit_count > call->tc_finish_count); - enable_ints_and_unlock(); + enable_ints_and_unlock(s); return active; } diff --git a/osfmk/kern/thread_call.h b/osfmk/kern/thread_call.h index 36cd6170a..c44561f63 100644 --- a/osfmk/kern/thread_call.h +++ b/osfmk/kern/thread_call.h @@ -292,6 +292,7 @@ struct thread_call { #define THREAD_CALL_ALLOC 0x01 #define THREAD_CALL_WAIT 0x02 #define THREAD_CALL_DELAYED 0x04 +#define THREAD_CALL_RATELIMITED TIMEOUT_URGENCY_RATELIMITED typedef struct thread_call thread_call_data_t; diff --git a/osfmk/kern/thread_policy.c b/osfmk/kern/thread_policy.c index 648edba7a..ece343e19 100644 --- a/osfmk/kern/thread_policy.c +++ b/osfmk/kern/thread_policy.c @@ -33,14 +33,166 @@ #include #include #include +#include +#include + +#include + +#define QOS_EXTRACT(q) ((q) & 0xff) + +/* + * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit + * to threads that don't have a QoS class set. + */ +const qos_policy_params_t thread_qos_policy_params = { + /* + * This table defines the starting base priority of the thread, + * which will be modified by the thread importance and the task max priority + * before being applied. + */ + .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */ + .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */ + .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED, + .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT, + .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY, + .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE, + .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE, + + /* + * This table defines the highest IO priority that a thread marked with this + * QoS class can have. + */ + .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0, + .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0, + .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0, + .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0, + .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1, + .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */ + .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3, + + /* + * This table defines the highest QoS level that + * a thread marked with this QoS class can have. + */ + + .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED), + .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0), + .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1), + .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1), + .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2), + .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5), + .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5), + + .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED), + .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0), + .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1), + .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1), + .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3), + .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3), + .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3), +}; + +void +thread_recompute_qos(thread_t thread); static void thread_recompute_priority( thread_t thread); +static void +thread_set_user_sched_mode(thread_t thread, sched_mode_t mode); + +static int +thread_qos_scaled_relative_priority(int qos, int qos_relprio); + extern void proc_get_thread_policy(thread_t thread, thread_policy_state_t info); +boolean_t +thread_has_qos_policy(thread_t thread) { + return (proc_get_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE; +} + +kern_return_t +thread_remove_qos_policy(thread_t thread) +{ + thread_qos_policy_data_t unspec_qos; + unspec_qos.qos_tier = THREAD_QOS_UNSPECIFIED; + unspec_qos.tier_importance = 0; + + __unused int prev_qos = thread->requested_policy.thrp_qos; + + DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos); + + return thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&unspec_qos, THREAD_QOS_POLICY_COUNT); +} + +boolean_t +thread_is_static_param(thread_t thread) +{ + if (thread->static_param) { + DTRACE_PROC1(qos__legacy__denied, thread_t, thread); + return TRUE; + } + return FALSE; +} + +/* + * Relative priorities can range between 0REL and -15REL. These + * map to QoS-specific ranges, to create non-overlapping priority + * ranges. + */ +static int +thread_qos_scaled_relative_priority(int qos, int qos_relprio) +{ + int next_lower_qos; + + /* Fast path, since no validation or scaling is needed */ + if (qos_relprio == 0) return 0; + + switch (qos) { + case THREAD_QOS_USER_INTERACTIVE: + next_lower_qos = THREAD_QOS_USER_INITIATED; + break; + case THREAD_QOS_USER_INITIATED: + next_lower_qos = THREAD_QOS_LEGACY; + break; + case THREAD_QOS_LEGACY: + next_lower_qos = THREAD_QOS_UTILITY; + break; + case THREAD_QOS_UTILITY: + next_lower_qos = THREAD_QOS_BACKGROUND; + break; + case THREAD_QOS_MAINTENANCE: + case THREAD_QOS_BACKGROUND: + next_lower_qos = 0; + break; + default: + panic("Unrecognized QoS %d", qos); + return 0; + } + + int prio_range_max = thread_qos_policy_params.qos_pri[qos]; + int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0; + + /* + * We now have the valid range that the scaled relative priority can map to. Note + * that the lower bound is exclusive, but the upper bound is inclusive. If the + * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the + * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard + * remainder. + */ + int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4); + + return scaled_relprio; +} + +/* + * flag set by -qos-policy-allow boot-arg to allow + * testing thread qos policy from userspace + */ +boolean_t allow_qos_policy_set = FALSE; + kern_return_t thread_policy_set( thread_t thread, @@ -48,14 +200,45 @@ thread_policy_set( thread_policy_t policy_info, mach_msg_type_number_t count) { + thread_qos_policy_data_t req_qos; + kern_return_t kr; + + req_qos.qos_tier = THREAD_QOS_UNSPECIFIED; if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - if (thread->static_param) - return (KERN_SUCCESS); + if (allow_qos_policy_set == FALSE) { + if (thread_is_static_param(thread)) + return (KERN_POLICY_STATIC); + + if (flavor == THREAD_QOS_POLICY || flavor == THREAD_QOS_POLICY_OVERRIDE) + return (KERN_INVALID_ARGUMENT); + } + + /* Threads without static_param set reset their QoS when other policies are applied. */ + if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) { + /* Store the existing tier, if we fail this call it is used to reset back. */ + req_qos.qos_tier = thread->requested_policy.thrp_qos; + req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio; - return (thread_policy_set_internal(thread, flavor, policy_info, count)); + kr = thread_remove_qos_policy(thread); + if (kr != KERN_SUCCESS) { + return kr; + } + } + + kr = thread_policy_set_internal(thread, flavor, policy_info, count); + + /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */ + if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) { + if (kr != KERN_SUCCESS) { + /* Reset back to our original tier as the set failed. */ + (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT); + } + } + + return kr; } kern_return_t @@ -74,6 +257,7 @@ thread_policy_set_internal( return (KERN_TERMINATED); } + switch (flavor) { case THREAD_EXTENDED_POLICY: @@ -87,53 +271,24 @@ thread_policy_set_internal( timeshare = info->timeshare; } - if (!SCHED(supports_timeshare_mode)()) - timeshare = FALSE; - + sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED; + s = splsched(); thread_lock(thread); - if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) { - integer_t oldmode = (thread->sched_mode == TH_MODE_TIMESHARE); - - if (timeshare) { - thread->sched_mode = TH_MODE_TIMESHARE; - - if (!oldmode) { - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { - sched_share_incr(); - - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_incr(); - } - } - } - else { - thread->sched_mode = TH_MODE_FIXED; - - if (oldmode) { - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_decr(); - - sched_share_decr(); - } - } - } + boolean_t removed = thread_run_queue_remove(thread); - thread_recompute_priority(thread); - } - else { + thread_set_user_sched_mode(thread, mode); + thread_recompute_priority(thread); - if (timeshare) - thread->saved_mode = TH_MODE_TIMESHARE; - else - thread->saved_mode = TH_MODE_FIXED; - } + if (removed) + thread_setrun(thread, SCHED_TAILQ); thread_unlock(thread); splx(s); + sfi_reevaluate(thread); + break; } @@ -157,30 +312,24 @@ thread_policy_set_internal( s = splsched(); thread_lock(thread); + boolean_t removed = thread_run_queue_remove(thread); + thread->realtime.period = info->period; thread->realtime.computation = info->computation; thread->realtime.constraint = info->constraint; thread->realtime.preemptible = info->preemptible; - if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) { - thread->saved_mode = TH_MODE_REALTIME; - } - else { - if (thread->sched_mode == TH_MODE_TIMESHARE) { - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_decr(); + thread_set_user_sched_mode(thread, TH_MODE_REALTIME); + thread_recompute_priority(thread); - sched_share_decr(); - } - } - thread->sched_mode = TH_MODE_REALTIME; - thread_recompute_priority(thread); - } + if (removed) + thread_setrun(thread, SCHED_TAILQ); thread_unlock(thread); splx(s); + sfi_reevaluate(thread); + break; } @@ -231,6 +380,113 @@ thread_policy_set_internal( return thread_affinity_set(thread, info->affinity_tag); } + case THREAD_THROUGHPUT_QOS_POLICY: + { + thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info; + int tqos; + + if (count < THREAD_LATENCY_QOS_POLICY_COUNT) { + result = KERN_INVALID_ARGUMENT; + break; + } + + if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != + KERN_SUCCESS) { + break; + } + + tqos = qos_extract(info->thread_throughput_qos_tier); + thread->effective_policy.t_through_qos = tqos; + } + break; + + case THREAD_LATENCY_QOS_POLICY: + { + thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info; + int lqos; + + if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) { + result = KERN_INVALID_ARGUMENT; + break; + } + + if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != + KERN_SUCCESS) { + break; + } + + lqos = qos_extract(info->thread_latency_qos_tier); +/* The expected use cases (opt-in) of per-thread latency QoS would seem to + * preclude any requirement at present to re-evaluate timers on a thread level + * latency QoS change. + */ + thread->effective_policy.t_latency_qos = lqos; + + } + break; + + case THREAD_QOS_POLICY: + case THREAD_QOS_POLICY_OVERRIDE: + { + thread_qos_policy_t info = (thread_qos_policy_t)policy_info; + + if (count < THREAD_QOS_POLICY_COUNT) { + result = KERN_INVALID_ARGUMENT; + break; + } + + if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) { + result = KERN_INVALID_ARGUMENT; + break; + } + + if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) { + result = KERN_INVALID_ARGUMENT; + break; + } + + if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) { + result = KERN_INVALID_ARGUMENT; + break; + } + + /* + * Going into task policy requires the task mutex, + * because of the way synchronization against the IO policy + * subsystem works. + * + * We need to move thread policy to the thread mutex instead. + * separate thread policy from task policy + */ + + if (flavor == THREAD_QOS_POLICY_OVERRIDE) { + int strongest_override = info->qos_tier; + + if (info->qos_tier != THREAD_QOS_UNSPECIFIED && + thread->requested_policy.thrp_qos_override != THREAD_QOS_UNSPECIFIED) + strongest_override = MAX(thread->requested_policy.thrp_qos_override, info->qos_tier); + + thread_mtx_unlock(thread); + + /* There is a race here. To be closed in separate thread policy from task policy */ + + proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, strongest_override); + + return (result); + } + + thread_mtx_unlock(thread); + + proc_set_task_policy2(thread->task, thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, info->qos_tier, -info->tier_importance); + + thread_mtx_lock(thread); + if (!thread->active) { + thread_mtx_unlock(thread); + return (KERN_TERMINATED); + } + + break; + } default: result = KERN_INVALID_ARGUMENT; @@ -241,101 +497,422 @@ thread_policy_set_internal( return (result); } +/* + * thread_set_mode_and_absolute_pri: + * + * Set scheduling policy & absolute priority for thread, for deprecated + * thread_set_policy and thread_policy interfaces. + * + * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO. + * Both result in FIXED mode scheduling. + * + * Called with thread mutex locked. + */ +kern_return_t +thread_set_mode_and_absolute_pri( + thread_t thread, + integer_t policy, + integer_t priority) +{ + spl_t s; + sched_mode_t mode; + kern_return_t kr = KERN_SUCCESS; + + if (thread_is_static_param(thread)) + return (KERN_POLICY_STATIC); + + if (thread->policy_reset) + return (KERN_SUCCESS); + + /* Setting legacy policies on threads kills the current QoS */ + if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) { + thread_mtx_unlock(thread); + + kr = thread_remove_qos_policy(thread); + + thread_mtx_lock(thread); + if (!thread->active) { + return (KERN_TERMINATED); + } + } + + switch (policy) { + case POLICY_TIMESHARE: + mode = TH_MODE_TIMESHARE; + break; + case POLICY_RR: + case POLICY_FIFO: + mode = TH_MODE_FIXED; + break; + default: + panic("unexpected sched policy: %d", policy); + break; + } + + s = splsched(); + thread_lock(thread); + + /* This path isn't allowed to change a thread out of realtime. */ + if ((thread->sched_mode != TH_MODE_REALTIME) && + (thread->saved_mode != TH_MODE_REALTIME)) { + + /* + * Reverse engineer and apply the correct importance value + * from the requested absolute priority value. + */ + + if (priority >= thread->max_priority) + priority = thread->max_priority - thread->task_priority; + else if (priority >= MINPRI_KERNEL) + priority -= MINPRI_KERNEL; + else if (priority >= MINPRI_RESERVED) + priority -= MINPRI_RESERVED; + else + priority -= BASEPRI_DEFAULT; + + priority += thread->task_priority; + + if (priority > thread->max_priority) + priority = thread->max_priority; + else if (priority < MINPRI) + priority = MINPRI; + + thread->importance = priority - thread->task_priority; + + boolean_t removed = thread_run_queue_remove(thread); + + thread_set_user_sched_mode(thread, mode); + + thread_recompute_priority(thread); + + if (removed) + thread_setrun(thread, SCHED_TAILQ); + } + + thread_unlock(thread); + splx(s); + + sfi_reevaluate(thread); + + return (kr); +} + +/* + * Set the thread's requested mode + * Called with thread mutex and thread locked + */ +static void +thread_set_user_sched_mode(thread_t thread, sched_mode_t mode) +{ + if (thread->policy_reset) + return; + + /* + * TODO: Instead of having saved mode, have 'user mode' and 'true mode'. + * That way there's zero confusion over which the user wants + * and which the kernel wants. + */ + if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) + thread->saved_mode = mode; + else + sched_set_thread_mode(thread, mode); +} + +/* called with task lock locked */ +void +thread_recompute_qos(thread_t thread) { + spl_t s; + + thread_mtx_lock(thread); + + if (!thread->active) { + thread_mtx_unlock(thread); + return; + } + + s = splsched(); + thread_lock(thread); + + thread_recompute_priority(thread); + + thread_unlock(thread); + splx(s); + + thread_mtx_unlock(thread); +} + +/* called with task lock locked and thread_mtx_lock locked */ +void +thread_update_qos_cpu_time(thread_t thread, boolean_t lock_needed) +{ + uint64_t last_qos_change_balance; + ledger_amount_t thread_balance_credit; + ledger_amount_t thread_balance_debit; + ledger_amount_t effective_qos_time; + uint64_t ctime; + uint64_t remainder = 0, consumed = 0; + processor_t processor; + spl_t s; + kern_return_t kr; + + if (lock_needed) { + s = splsched(); + thread_lock(thread); + } + + /* + * Calculation of time elapsed by the thread in the current qos. + * Following is the timeline which shows all the variables used in the calculation below. + * + * thread ledger thread ledger + * cpu_time_last_qos cpu_time + * | |<- consumed ->|<- remainder ->| + * timeline -----------------------------------------------------------> + * | | | + * thread_dispatch ctime quantum end + * + * |<----- effective qos time ----->| + */ + + /* + * Calculate time elapsed since last qos change on this thread. + * For cpu time on thread ledger, do not use ledger_get_balance, + * only use credit field of ledger, since + * debit is used by per thread cpu limits and is not zero. + */ + kr = ledger_get_entries(thread->t_threadledger, thread_ledgers.cpu_time, &thread_balance_credit, &thread_balance_debit); + if (kr != KERN_SUCCESS) + goto out; + last_qos_change_balance = thread->cpu_time_last_qos; + + /* + * If thread running on CPU, calculate time elapsed since this thread was last dispatched on cpu. + * The thread ledger is only updated at context switch, the time since last context swicth is not + * updated in the thread ledger cpu time. + */ + processor = thread->last_processor; + if ((processor != PROCESSOR_NULL) && (processor->state == PROCESSOR_RUNNING) && + (processor->active_thread == thread)) { + ctime = mach_absolute_time(); + + if (processor->quantum_end > ctime) + remainder = processor->quantum_end - ctime; + + consumed = thread->quantum_remaining - remainder; + } + /* + * There can be multiple qos change in a quantum and in that case the cpu_time_last_qos will + * lie between cpu_time marker and ctime marker shown below. The output of + * thread_balance - last_qos_change_balance will be negative in such case, but overall outcome + * when consumed is added to it would be positive. + * + * thread ledger + * cpu_time + * |<------------ consumed --------->|<- remainder ->| + * timeline -----------------------------------------------------------> + * | | | | + * thread_dispatch thread ledger ctime quantum end + * cpu_time_last_qos + * + * |<-effective qos time->| + */ + effective_qos_time = (ledger_amount_t) consumed; + effective_qos_time += thread_balance_credit - last_qos_change_balance; + + if (lock_needed) { + thread_unlock(thread); + splx(s); + } + + if (effective_qos_time < 0) + return; + + thread->cpu_time_last_qos += (uint64_t)effective_qos_time; + + /* + * Update the task-level qos stats. Its safe to perform operations on these fields, since we + * hold the task lock. + */ + switch (thread->effective_policy.thep_qos) { + + case THREAD_QOS_DEFAULT: + thread->task->cpu_time_qos_stats.cpu_time_qos_default += effective_qos_time; + break; + + case THREAD_QOS_MAINTENANCE: + thread->task->cpu_time_qos_stats.cpu_time_qos_maintenance += effective_qos_time; + break; + + case THREAD_QOS_BACKGROUND: + thread->task->cpu_time_qos_stats.cpu_time_qos_background += effective_qos_time; + break; + + case THREAD_QOS_UTILITY: + thread->task->cpu_time_qos_stats.cpu_time_qos_utility += effective_qos_time; + break; + + case THREAD_QOS_LEGACY: + thread->task->cpu_time_qos_stats.cpu_time_qos_legacy += effective_qos_time; + break; + + case THREAD_QOS_USER_INITIATED: + thread->task->cpu_time_qos_stats.cpu_time_qos_user_initiated += effective_qos_time; + break; + + case THREAD_QOS_USER_INTERACTIVE: + thread->task->cpu_time_qos_stats.cpu_time_qos_user_interactive += effective_qos_time; + break; + } + + return; + +out: + if (lock_needed) { + thread_unlock(thread); + splx(s); + } +} + +/* + * Calculate base priority from thread attributes, and set it on the thread + * + * Called with thread_lock and thread mutex held. + */ static void thread_recompute_priority( thread_t thread) { integer_t priority; - if (thread->sched_mode == TH_MODE_REALTIME) - priority = BASEPRI_RTQUEUES; - else { + if (thread->policy_reset) + return; + + if (thread->sched_mode == TH_MODE_REALTIME) { + sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES); + return; + } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) { + int qos = thread->effective_policy.thep_qos; + int qos_ui_is_urgent = thread->effective_policy.qos_ui_is_urgent; + int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */ + int qos_scaled_relprio; + + assert(qos >= 0 && qos < THREAD_QOS_LAST); + assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE); + + priority = thread_qos_policy_params.qos_pri[qos]; + qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio); + + if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) { + /* Bump priority 46 to 47 when in a frontmost app */ + qos_scaled_relprio += 1; + } + + priority += qos_scaled_relprio; + } else { if (thread->importance > MAXPRI) priority = MAXPRI; - else - if (thread->importance < -MAXPRI) + else if (thread->importance < -MAXPRI) priority = -MAXPRI; else priority = thread->importance; priority += thread->task_priority; - - if (priority > thread->max_priority) - priority = thread->max_priority; - else - if (priority < MINPRI) - priority = MINPRI; } - set_priority(thread, priority); -} + if (priority > thread->max_priority) + priority = thread->max_priority; + else if (priority < MINPRI) + priority = MINPRI; + sched_set_thread_base_priority(thread, priority); +} + +/* Called with the thread mutex held */ void thread_task_priority( thread_t thread, integer_t priority, integer_t max_priority) { - spl_t s; + spl_t s; assert(thread != THREAD_NULL); + if (!thread->active || thread->policy_reset) + return; + s = splsched(); thread_lock(thread); - - - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { - if ((thread->max_priority <= MAXPRI_THROTTLE) && (max_priority > MAXPRI_THROTTLE)) { - sched_background_decr(); - } else if ((thread->max_priority > MAXPRI_THROTTLE) && (max_priority <= MAXPRI_THROTTLE)) { - sched_background_incr(); - } - } + integer_t old_max_priority = thread->max_priority; thread->task_priority = priority; thread->max_priority = max_priority; + /* A thread is 'throttled' when its max priority is below MAXPRI_THROTTLE */ + if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) { + sched_set_thread_throttled(thread, FALSE); + } else if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) { + sched_set_thread_throttled(thread, TRUE); + } + thread_recompute_priority(thread); thread_unlock(thread); splx(s); } +/* + * Reset thread to default state in preparation for termination + * Called with thread mutex locked + * + * Always called on current thread, so we don't need a run queue remove + */ void thread_policy_reset( thread_t thread) { spl_t s; + assert(thread == current_thread()); + s = splsched(); thread_lock(thread); - if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) { - sched_mode_t oldmode = thread->sched_mode; - - thread->sched_mode = SCHED(initial_thread_sched_mode)(thread->task); + assert_thread_sched_count(thread); - if ((oldmode != TH_MODE_TIMESHARE) && (thread->sched_mode == TH_MODE_TIMESHARE)) { + if (thread->sched_flags & TH_SFLAG_FAILSAFE) + sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE); - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { - sched_share_incr(); + assert_thread_sched_count(thread); - if (thread->max_priority <= MAXPRI_THROTTLE) - sched_background_incr(); - } - } - } - else { - thread->sched_mode = thread->saved_mode; - thread->saved_mode = TH_MODE_NONE; - thread->sched_flags &= ~TH_SFLAG_DEMOTED_MASK; - } + if (thread->sched_flags & TH_SFLAG_THROTTLED) + sched_set_thread_throttled(thread, FALSE); + + assert_thread_sched_count(thread); + + assert(thread->BG_COUNT == 0); + + /* At this point, the various demotions should be inactive */ + assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)); + assert(!(thread->sched_flags & TH_SFLAG_THROTTLED)); + assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)); + + /* Reset thread back to task-default basepri and mode */ + sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task); + + sched_set_thread_mode(thread, newmode); thread->importance = 0; - thread_recompute_priority(thread); + sched_set_thread_base_priority(thread, thread->task_priority); + + /* Prevent further changes to thread base priority or mode */ + thread->policy_reset = 1; + + assert(thread->BG_COUNT == 0); + assert_thread_sched_count(thread); thread_unlock(thread); splx(s); @@ -502,6 +1079,10 @@ thread_policy_get( info = (thread_policy_state_t)policy_info; if (!(*get_default)) { + info->flags = 0; + + info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0); + /* * Unlock the thread mutex and directly return. * This is necessary because proc_get_thread_policy() @@ -519,6 +1100,72 @@ thread_policy_get( break; } + case THREAD_LATENCY_QOS_POLICY: + { + thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info; + uint32_t plqos; + + if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) { + result = KERN_INVALID_ARGUMENT; + break; + } + + if (*get_default) { + plqos = 0; + } else { + plqos = thread->effective_policy.t_latency_qos; + } + + info->thread_latency_qos_tier = qos_latency_policy_package(plqos); + } + break; + + case THREAD_THROUGHPUT_QOS_POLICY: + { + thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info; + uint32_t ptqos; + + if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) { + result = KERN_INVALID_ARGUMENT; + break; + } + + if (*get_default) { + ptqos = 0; + } else { + ptqos = thread->effective_policy.t_through_qos; + } + + info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos); + } + break; + + case THREAD_QOS_POLICY: + case THREAD_QOS_POLICY_OVERRIDE: + { + thread_qos_policy_t info = (thread_qos_policy_t)policy_info; + + if (*count < THREAD_QOS_POLICY_COUNT) { + result = KERN_INVALID_ARGUMENT; + break; + } + + if (!(*get_default)) { + if (flavor == THREAD_QOS_POLICY_OVERRIDE) { + info->qos_tier = thread->requested_policy.thrp_qos_override; + /* TODO: handle importance overrides */ + info->tier_importance = 0; + } else { + info->qos_tier = thread->requested_policy.thrp_qos; + info->tier_importance = thread->importance; + } + } else { + info->qos_tier = THREAD_QOS_UNSPECIFIED; + info->tier_importance = 0; + } + + break; + } default: result = KERN_INVALID_ARGUMENT; diff --git a/osfmk/kern/timer.c b/osfmk/kern/timer.c index 901dbd9ee..2ddcbd9b6 100644 --- a/osfmk/kern/timer.c +++ b/osfmk/kern/timer.c @@ -56,8 +56,6 @@ /* */ -#include - #include #include #include @@ -148,14 +146,6 @@ timer_switch( new_timer->tstamp = tstamp; } -#if MACHINE_TIMER_ROUTINES - -/* - * Machine-dependent code implements the timer event routine. - */ - -#else /* MACHINE_TIMER_ROUTINES */ - /* * Update the current thread timer and * start the new timer. Requires a current @@ -183,5 +173,3 @@ thread_timer_event( PROCESSOR_DATA(processor, thread_timer) = new_timer; new_timer->tstamp = tstamp; } - -#endif /* MACHINE_TIMER_ROUTINES */ diff --git a/osfmk/kern/timer_call.c b/osfmk/kern/timer_call.c index 0c75d7707..56497e013 100644 --- a/osfmk/kern/timer_call.c +++ b/osfmk/kern/timer_call.c @@ -84,7 +84,7 @@ lck_grp_attr_t timer_longterm_lck_grp_attr; #define QUEUE(x) ((queue_t)(x)) #define MPQUEUE(x) ((mpqueue_head_t *)(x)) #define TIMER_CALL(x) ((timer_call_t)(x)) - +#define TCE(x) (&(x->call_entry)) /* * The longterm timer object is a global structure holding all timers * beyond the short-term, local timer queue threshold. The boot processor @@ -142,7 +142,11 @@ static mpqueue_head_t * timer_longterm_enqueue_unlocked( timer_call_t call, uint64_t now, uint64_t deadline, - mpqueue_head_t ** old_queue); + mpqueue_head_t ** old_queue, + uint64_t soft_deadline, + uint64_t ttd, + timer_call_param_t param1, + uint32_t callout_flags); static void timer_longterm_dequeued_locked( timer_call_t call); @@ -160,11 +164,62 @@ boolean_t mach_timer_coalescing_enabled = TRUE; mpqueue_head_t *timer_call_enqueue_deadline_unlocked( timer_call_t call, mpqueue_head_t *queue, - uint64_t deadline); + uint64_t deadline, + uint64_t soft_deadline, + uint64_t ttd, + timer_call_param_t param1, + uint32_t flags); mpqueue_head_t *timer_call_dequeue_unlocked( timer_call_t call); +timer_coalescing_priority_params_t tcoal_prio_params; + +#if TCOAL_PRIO_STATS +int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; +#define TCOAL_PRIO_STAT(x) (x++) +#else +#define TCOAL_PRIO_STAT(x) +#endif + +static void +timer_call_init_abstime(void) +{ + int i; + uint64_t result; + timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params(); + nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment); + nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result); + tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result; + nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result); + tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result; + nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result); + tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result; + tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift; + tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift; + tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift; + tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift; + tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift; + + nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max, + &tcoal_prio_params.timer_coalesce_rt_abstime_max); + nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max, + &tcoal_prio_params.timer_coalesce_bg_abstime_max); + nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max, + &tcoal_prio_params.timer_coalesce_kt_abstime_max); + nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max, + &tcoal_prio_params.timer_coalesce_fp_abstime_max); + nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max, + &tcoal_prio_params.timer_coalesce_ts_abstime_max); + + for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) { + tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i]; + nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i], + &tcoal_prio_params.latency_qos_abstime_max[i]); + tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i]; + } +} + void timer_call_init(void) @@ -172,9 +227,9 @@ timer_call_init(void) lck_attr_setdefault(&timer_call_lck_attr); lck_grp_attr_setdefault(&timer_call_lck_grp_attr); lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr); - nanotime_to_absolutetime(0, PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment); timer_longterm_init(); + timer_call_init_abstime(); } @@ -193,56 +248,16 @@ timer_call_setup( timer_call_param_t param0) { DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0); - call_entry_setup(CE(call), func, param0); + call_entry_setup(TCE(call), func, param0); simple_lock_init(&(call)->lock, 0); call->async_dequeue = FALSE; } - -/* - * Timer call entry locking model - * ============================== - * - * Timer call entries are linked on per-cpu timer queues which are protected - * by the queue lock and the call entry lock. The locking protocol is: - * - * 0) The canonical locking order is timer call entry followed by queue. - * - * 1) With only the entry lock held, entry.queue is valid: - * 1a) NULL: the entry is not queued, or - * 1b) non-NULL: this queue must be locked before the entry is modified. - * After locking the queue, the call.async_dequeue flag must be checked: - * 1c) TRUE: the entry was removed from the queue by another thread - * and we must NULL the entry.queue and reset this flag, or - * 1d) FALSE: (ie. queued), the entry can be manipulated. - * - * 2) If a queue lock is obtained first, the queue is stable: - * 2a) If a try-lock of a queued entry succeeds, the call can be operated on - * and dequeued. - * 2b) If a try-lock fails, it indicates that another thread is attempting - * to change the entry and move it to a different position in this queue - * or to different queue. The entry can be dequeued but it should not be - * operated upon since it is being changed. Furthermore, we don't null - * the entry.queue pointer (protected by the entry lock we don't own). - * Instead, we set the async_dequeue flag -- see (1c). - * 2c) Same as 2b but occurring when a longterm timer is matured. - */ - -/* - * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline() - * cast between pointer types (mpqueue_head_t *) and (queue_t) so that - * we can use the call_entry_dequeue() and call_entry_enqueue_deadline() - * methods to operate on timer_call structs as if they are call_entry structs. - * These structures are identical except for their queue head pointer fields. - * - * In the debug case, we assert that the timer call locking protocol - * is being obeyed. - */ #if TIMER_ASSERT static __inline__ mpqueue_head_t * timer_call_entry_dequeue( timer_call_t entry) { - mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); + mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); if (!hw_lock_held((hw_lock_t)&entry->lock)) panic("_call_entry_dequeue() " @@ -256,7 +271,7 @@ timer_call_entry_dequeue( panic("_call_entry_dequeue() " "queue %p is not locked\n", old_queue); - call_entry_dequeue(CE(entry)); + call_entry_dequeue(TCE(entry)); old_queue->count--; return (old_queue); @@ -268,7 +283,7 @@ timer_call_entry_enqueue_deadline( mpqueue_head_t *queue, uint64_t deadline) { - mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); + mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); if (!hw_lock_held((hw_lock_t)&entry->lock)) panic("_call_entry_enqueue_deadline() " @@ -281,12 +296,14 @@ timer_call_entry_enqueue_deadline( panic("_call_entry_enqueue_deadline() " "old_queue %p != queue", old_queue); - call_entry_enqueue_deadline(CE(entry), QUEUE(queue), deadline); + call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); /* For efficiency, track the earliest soft deadline on the queue, so that * fuzzy decisions can be made without lock acquisitions. */ - queue->earliest_soft_deadline = ((timer_call_t)queue_first(&queue->head))->soft_deadline; + timer_call_t thead = (timer_call_t)queue_first(&queue->head); + + queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; if (old_queue) old_queue->count--; @@ -301,9 +318,9 @@ static __inline__ mpqueue_head_t * timer_call_entry_dequeue( timer_call_t entry) { - mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); + mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); - call_entry_dequeue(CE(entry)); + call_entry_dequeue(TCE(entry)); old_queue->count--; return old_queue; @@ -315,14 +332,16 @@ timer_call_entry_enqueue_deadline( mpqueue_head_t *queue, uint64_t deadline) { - mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); + mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); - call_entry_enqueue_deadline(CE(entry), QUEUE(queue), deadline); + call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); /* For efficiency, track the earliest soft deadline on the queue, * so that fuzzy decisions can be made without lock acquisitions. */ - queue->earliest_soft_deadline = ((timer_call_t)queue_first(&queue->head))->soft_deadline; + + timer_call_t thead = (timer_call_t)queue_first(&queue->head); + queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; if (old_queue) old_queue->count--; @@ -338,7 +357,7 @@ timer_call_entry_enqueue_tail( timer_call_t entry, mpqueue_head_t *queue) { - call_entry_enqueue_tail(CE(entry), QUEUE(queue)); + call_entry_enqueue_tail(TCE(entry), QUEUE(queue)); queue->count++; return; } @@ -351,7 +370,7 @@ static __inline__ void timer_call_entry_dequeue_async( timer_call_t entry) { - mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); + mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); if (old_queue) { old_queue->count--; (void) remque(qe(entry)); @@ -371,15 +390,21 @@ __inline__ mpqueue_head_t * timer_call_enqueue_deadline_unlocked( timer_call_t call, mpqueue_head_t *queue, - uint64_t deadline) + uint64_t deadline, + uint64_t soft_deadline, + uint64_t ttd, + timer_call_param_t param1, + uint32_t callout_flags) { - call_entry_t entry = CE(call); + call_entry_t entry = TCE(call); mpqueue_head_t *old_queue; DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue); simple_lock(&call->lock); + old_queue = MPQUEUE(entry->queue); + if (old_queue != NULL) { timer_queue_lock_spin(old_queue); if (call->async_dequeue) { @@ -389,7 +414,7 @@ timer_call_enqueue_deadline_unlocked( DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, call, call->async_dequeue, - CE(call)->queue, + TCE(call)->queue, 0x1c, 0); timer_call_enqueue_deadline_unlocked_async1++; #endif @@ -411,6 +436,11 @@ timer_call_enqueue_deadline_unlocked( timer_queue_lock_spin(queue); } + call->soft_deadline = soft_deadline; + call->flags = callout_flags; + TCE(call)->param1 = param1; + call->ttd = ttd; + timer_call_entry_enqueue_deadline(call, queue, deadline); timer_queue_unlock(queue); simple_unlock(&call->lock); @@ -426,7 +456,7 @@ mpqueue_head_t * timer_call_dequeue_unlocked( timer_call_t call) { - call_entry_t entry = CE(call); + call_entry_t entry = TCE(call); mpqueue_head_t *old_queue; DBG("timer_call_dequeue_unlocked(%p)\n", call); @@ -438,7 +468,7 @@ timer_call_dequeue_unlocked( DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, call, call->async_dequeue, - CE(call)->queue, + TCE(call)->queue, 0, 0); #endif if (old_queue != NULL) { @@ -450,7 +480,7 @@ timer_call_dequeue_unlocked( DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, call, call->async_dequeue, - CE(call)->queue, + TCE(call)->queue, 0x1c, 0); timer_call_dequeue_unlocked_async1++; #endif @@ -467,6 +497,51 @@ timer_call_dequeue_unlocked( return (old_queue); } + +/* + * Timer call entry locking model + * ============================== + * + * Timer call entries are linked on per-cpu timer queues which are protected + * by the queue lock and the call entry lock. The locking protocol is: + * + * 0) The canonical locking order is timer call entry followed by queue. + * + * 1) With only the entry lock held, entry.queue is valid: + * 1a) NULL: the entry is not queued, or + * 1b) non-NULL: this queue must be locked before the entry is modified. + * After locking the queue, the call.async_dequeue flag must be checked: + * 1c) TRUE: the entry was removed from the queue by another thread + * and we must NULL the entry.queue and reset this flag, or + * 1d) FALSE: (ie. queued), the entry can be manipulated. + * + * 2) If a queue lock is obtained first, the queue is stable: + * 2a) If a try-lock of a queued entry succeeds, the call can be operated on + * and dequeued. + * 2b) If a try-lock fails, it indicates that another thread is attempting + * to change the entry and move it to a different position in this queue + * or to different queue. The entry can be dequeued but it should not be + * operated upon since it is being changed. Furthermore, we don't null + * the entry.queue pointer (protected by the entry lock we don't own). + * Instead, we set the async_dequeue flag -- see (1c). + * 2c) Same as 2b but occurring when a longterm timer is matured. + * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.) + * should be manipulated with the appropriate timer queue lock held, + * to prevent queue traversal observations from observing inconsistent + * updates to an in-flight callout. + */ + +/* + * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline() + * cast between pointer types (mpqueue_head_t *) and (queue_t) so that + * we can use the call_entry_dequeue() and call_entry_enqueue_deadline() + * methods to operate on timer_call structs as if they are call_entry structs. + * These structures are identical except for their queue head pointer fields. + * + * In the debug case, we assert that the timer call locking protocol + * is being obeyed. + */ + static boolean_t timer_call_enter_internal( timer_call_t call, @@ -481,12 +556,11 @@ timer_call_enter_internal( spl_t s; uint64_t slop; uint32_t urgency; + uint64_t sdeadline, ttd; s = splclock(); - call->soft_deadline = deadline; - call->flags = flags; - + sdeadline = deadline; uint64_t ctime = mach_absolute_time(); TIMER_KDEBUG_TRACE(KDEBUG_TRACE, @@ -519,49 +593,44 @@ timer_call_enter_internal( past_deadline_shortest = delta; deadline = ctime + past_deadline_timer_adjustment; - call->soft_deadline = deadline; + sdeadline = deadline; } - /* Bit 0 of the "soft" deadline indicates that - * this particular timer call requires rate-limiting - * behaviour. Maintain the invariant deadline >= soft_deadline by - * setting bit 0 of "deadline". - */ - - deadline |= 1; if (ratelimited || slop_ratelimited) { - call->soft_deadline |= 1ULL; + flags |= TIMER_CALL_RATELIMITED; } else { - call->soft_deadline &= ~0x1ULL; + flags &= ~TIMER_CALL_RATELIMITED; } - call->ttd = call->soft_deadline - ctime; - + ttd = sdeadline - ctime; #if CONFIG_DTRACE - DTRACE_TMR7(callout__create, timer_call_func_t, CE(call)->func, - timer_call_param_t, CE(call)->param0, uint32_t, call->flags, - (deadline - call->soft_deadline), - (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), call); + DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func, + timer_call_param_t, TCE(call)->param0, uint32_t, flags, + (deadline - sdeadline), + (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call); #endif + /* Program timer callout parameters under the appropriate per-CPU or + * longterm queue lock. The callout may have been previously enqueued + * and in-flight on this or another timer queue. + */ if (!ratelimited && !slop_ratelimited) { - queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue); + queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags); } if (queue == NULL) { queue = timer_queue_assign(deadline); - old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline); + old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags); } - CE(call)->param1 = param1; #if TIMER_TRACE - CE(call)->entry_time = ctime; + TCE(call)->entry_time = ctime; #endif TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_END, call, - (old_queue != NULL), call->soft_deadline, queue->count, 0); + (old_queue != NULL), deadline, queue->count, 0); splx(s); @@ -615,18 +684,19 @@ timer_call_cancel( TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CANCEL | DBG_FUNC_START, call, - CE(call)->deadline, call->soft_deadline, call->flags, 0); + TCE(call)->deadline, call->soft_deadline, call->flags, 0); old_queue = timer_call_dequeue_unlocked(call); if (old_queue != NULL) { timer_queue_lock_spin(old_queue); if (!queue_empty(&old_queue->head)) { - timer_queue_cancel(old_queue, CE(call)->deadline, CE(queue_first(&old_queue->head))->deadline); - old_queue->earliest_soft_deadline = ((timer_call_t)queue_first(&old_queue->head))->soft_deadline; + timer_queue_cancel(old_queue, TCE(call)->deadline, CE(queue_first(&old_queue->head))->deadline); + timer_call_t thead = (timer_call_t)queue_first(&old_queue->head); + old_queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; } else { - timer_queue_cancel(old_queue, CE(call)->deadline, UINT64_MAX); + timer_queue_cancel(old_queue, TCE(call)->deadline, UINT64_MAX); old_queue->earliest_soft_deadline = UINT64_MAX; } timer_queue_unlock(old_queue); @@ -635,20 +705,22 @@ timer_call_cancel( DECR_TIMER_CANCEL | DBG_FUNC_END, call, old_queue, - CE(call)->deadline - mach_absolute_time(), - CE(call)->deadline - CE(call)->entry_time, 0); + TCE(call)->deadline - mach_absolute_time(), + TCE(call)->deadline - TCE(call)->entry_time, 0); splx(s); #if CONFIG_DTRACE - DTRACE_TMR6(callout__cancel, timer_call_func_t, CE(call)->func, - timer_call_param_t, CE(call)->param0, uint32_t, call->flags, 0, + DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func, + timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); #endif return (old_queue != NULL); } -uint32_t timer_queue_shutdown_lock_skips; +static uint32_t timer_queue_shutdown_lock_skips; +static uint32_t timer_queue_shutdown_discarded; + void timer_queue_shutdown( mpqueue_head_t *queue) @@ -657,6 +729,7 @@ timer_queue_shutdown( mpqueue_head_t *new_queue; spl_t s; + DBG("timer_queue_shutdown(%p)\n", queue); s = splclock(); @@ -664,6 +737,7 @@ timer_queue_shutdown( /* Note comma operator in while expression re-locking each iteration */ while (timer_queue_lock_spin(queue), !queue_empty(&queue->head)) { call = TIMER_CALL(queue_first(&queue->head)); + if (!simple_lock_try(&call->lock)) { /* * case (2b) lock order inversion, dequeue and skip @@ -677,23 +751,35 @@ timer_queue_shutdown( DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, call, call->async_dequeue, - CE(call)->queue, + TCE(call)->queue, 0x2b, 0); #endif timer_queue_unlock(queue); continue; } + boolean_t call_local = ((call->flags & TIMER_CALL_LOCAL) != 0); + /* remove entry from old queue */ timer_call_entry_dequeue(call); timer_queue_unlock(queue); - /* and queue it on new */ - new_queue = timer_queue_assign(CE(call)->deadline); - timer_queue_lock_spin(new_queue); - timer_call_entry_enqueue_deadline( - call, new_queue, CE(call)->deadline); - timer_queue_unlock(new_queue); + if (call_local == FALSE) { + /* and queue it on new, discarding LOCAL timers */ + new_queue = timer_queue_assign(TCE(call)->deadline); + timer_queue_lock_spin(new_queue); + timer_call_entry_enqueue_deadline( + call, new_queue, TCE(call)->deadline); + timer_queue_unlock(new_queue); + } else { + timer_queue_shutdown_discarded++; + } + + /* The only lingering LOCAL timer should be this thread's + * quantum expiration timer. + */ + assert((call_local == FALSE) || + (TCE(call)->func == thread_quantum_expire)); simple_unlock(&call->lock); } @@ -702,7 +788,7 @@ timer_queue_shutdown( splx(s); } -uint32_t timer_queue_expire_lock_skips; +static uint32_t timer_queue_expire_lock_skips; uint64_t timer_queue_expire_with_options( mpqueue_head_t *queue, @@ -735,16 +821,11 @@ timer_queue_expire_with_options( DECR_TIMER_EXPIRE | DBG_FUNC_NONE, call, call->soft_deadline, - CE(call)->deadline, - CE(call)->entry_time, 0); + TCE(call)->deadline, + TCE(call)->entry_time, 0); - /* Bit 0 of the "soft" deadline indicates that - * this particular timer call is rate-limited - * and hence shouldn't be processed before its - * hard deadline. - */ - if ((call->soft_deadline & 0x1) && - (CE(call)->deadline > cur_deadline)) { + if ((call->flags & TIMER_CALL_RATELIMITED) && + (TCE(call)->deadline > cur_deadline)) { if (rescan == FALSE) break; } @@ -759,9 +840,9 @@ timer_queue_expire_with_options( timer_call_entry_dequeue(call); - func = CE(call)->func; - param0 = CE(call)->param0; - param1 = CE(call)->param1; + func = TCE(call)->func; + param0 = TCE(call)->param0; + param1 = TCE(call)->param1; simple_unlock(&call->lock); timer_queue_unlock(queue); @@ -797,8 +878,8 @@ timer_queue_expire_with_options( if (__probable(rescan == FALSE)) { break; } else { - int64_t skew = CE(call)->deadline - call->soft_deadline; - assert(CE(call)->deadline >= call->soft_deadline); + int64_t skew = TCE(call)->deadline - call->soft_deadline; + assert(TCE(call)->deadline >= call->soft_deadline); /* DRK: On a latency quality-of-service level change, * re-sort potentially rate-limited timers. The platform @@ -829,8 +910,8 @@ timer_queue_expire_with_options( if (!queue_empty(&queue->head)) { call = TIMER_CALL(queue_first(&queue->head)); - cur_deadline = CE(call)->deadline; - queue->earliest_soft_deadline = call->soft_deadline; + cur_deadline = TCE(call)->deadline; + queue->earliest_soft_deadline = (call->flags & TIMER_CALL_RATELIMITED) ? TCE(call)->deadline: call->soft_deadline; } else { queue->earliest_soft_deadline = cur_deadline = UINT64_MAX; } @@ -849,7 +930,7 @@ timer_queue_expire( } extern int serverperfmode; -uint32_t timer_queue_migrate_lock_skips; +static uint32_t timer_queue_migrate_lock_skips; /* * timer_queue_migrate() is called by timer_queue_migrate_cpu() * to move timer requests from the local processor (queue_from) @@ -905,7 +986,7 @@ timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) } call = TIMER_CALL(queue_first(&queue_from->head)); - if (CE(call)->deadline < CE(head_to)->deadline) { + if (TCE(call)->deadline < TCE(head_to)->deadline) { timers_migrated = 0; goto abort2; } @@ -928,7 +1009,7 @@ timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, call, - CE(call)->queue, + TCE(call)->queue, call->lock.interlock.lock_data, 0x2b, 0); #endif @@ -938,7 +1019,7 @@ timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) } timer_call_entry_dequeue(call); timer_call_entry_enqueue_deadline( - call, queue_to, CE(call)->deadline); + call, queue_to, TCE(call)->deadline); timers_migrated++; simple_unlock(&call->lock); } @@ -983,9 +1064,9 @@ timer_queue_trace( TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_QUEUE | DBG_FUNC_NONE, call->soft_deadline, - CE(call)->deadline, - CE(call)->entry_time, - CE(call)->func, + TCE(call)->deadline, + TCE(call)->entry_time, + TCE(call)->func, 0); call = TIMER_CALL(queue_next(qe(call))); } while (!queue_end(&queue->head, qe(call))); @@ -1017,7 +1098,11 @@ mpqueue_head_t * timer_longterm_enqueue_unlocked(timer_call_t call, uint64_t now, uint64_t deadline, - mpqueue_head_t **old_queue) + mpqueue_head_t **old_queue, + uint64_t soft_deadline, + uint64_t ttd, + timer_call_param_t param1, + uint32_t callout_flags) { timer_longterm_t *tlp = &timer_longterm; boolean_t update_required = FALSE; @@ -1031,9 +1116,9 @@ timer_longterm_enqueue_unlocked(timer_call_t call, * - the longterm mechanism is disabled, or * - this deadline is too short. */ - if (__probable((call->flags & TIMER_CALL_LOCAL) != 0 || + if ((callout_flags & TIMER_CALL_LOCAL) != 0 || (tlp->threshold.interval == TIMER_LONGTERM_NONE) || - (deadline <= longterm_threshold))) + (deadline <= longterm_threshold)) return NULL; /* @@ -1048,8 +1133,12 @@ timer_longterm_enqueue_unlocked(timer_call_t call, assert(!ml_get_interrupts_enabled()); simple_lock(&call->lock); timer_queue_lock_spin(timer_longterm_queue); + TCE(call)->deadline = deadline; + TCE(call)->param1 = param1; + call->ttd = ttd; + call->soft_deadline = soft_deadline; + call->flags = callout_flags; timer_call_entry_enqueue_tail(call, timer_longterm_queue); - CE(call)->deadline = deadline; tlp->enqueues++; @@ -1067,6 +1156,10 @@ timer_longterm_enqueue_unlocked(timer_call_t call, simple_unlock(&call->lock); if (update_required) { + /* + * Note: this call expects that calling the master cpu + * alone does not involve locking the topo lock. + */ timer_call_nosync_cpu( master_cpu, (void (*)(void *)) timer_longterm_update, @@ -1126,7 +1219,7 @@ timer_longterm_scan(timer_longterm_t *tlp, TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, call, - CE(call)->queue, + TCE(call)->queue, call->lock.interlock.lock_data, 0x2c, 0); #endif @@ -1151,14 +1244,14 @@ timer_longterm_scan(timer_longterm_t *tlp, TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ESCALATE | DBG_FUNC_NONE, call, - CE(call)->deadline, - CE(call)->entry_time, - CE(call)->func, + TCE(call)->deadline, + TCE(call)->entry_time, + TCE(call)->func, 0); tlp->escalates++; timer_call_entry_dequeue(call); timer_call_entry_enqueue_deadline( - call, timer_master_queue, CE(call)->deadline); + call, timer_master_queue, TCE(call)->deadline); /* * A side-effect of the following call is to update * the actual hardware deadline if required. @@ -1376,7 +1469,7 @@ timer_master_scan(timer_longterm_t *tlp, qe = queue_first(&timer_master_queue->head); while (!queue_end(&timer_master_queue->head, qe)) { call = TIMER_CALL(qe); - deadline = CE(call)->deadline; + deadline = TCE(call)->deadline; qe = queue_next(qe); if ((call->flags & TIMER_CALL_LOCAL) != 0) continue; @@ -1481,3 +1574,133 @@ timer_sysctl_set(int oid, uint64_t value) return KERN_INVALID_ARGUMENT; } } + + +/* Select timer coalescing window based on per-task quality-of-service hints */ +static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { + uint32_t latency_qos; + boolean_t adjusted = FALSE; + task_t ctask = t->task; + + if (ctask) { + latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS); + + assert(latency_qos <= NUM_LATENCY_QOS_TIERS); + + if (latency_qos) { + *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; + *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1]; + *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; + adjusted = TRUE; + } + } + return adjusted; +} + + +/* Adjust timer deadlines based on priority of the thread and the + * urgency value provided at timeout establishment. With this mechanism, + * timers are no longer necessarily sorted in order of soft deadline + * on a given timer queue, i.e. they may be differentially skewed. + * In the current scheme, this could lead to fewer pending timers + * processed than is technically possible when the HW deadline arrives. + */ +static void +timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { + int16_t tpri = cthread->sched_pri; + if ((urgency & TIMER_CALL_USER_MASK) != 0) { + if (tpri >= BASEPRI_RTQUEUES || + urgency == TIMER_CALL_USER_CRITICAL) { + *tshift = tcoal_prio_params.timer_coalesce_rt_shift; + *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max; + TCOAL_PRIO_STAT(rt_tcl); + } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) || + (urgency == TIMER_CALL_USER_BACKGROUND)) { + /* Determine if timer should be subjected to a lower QoS */ + if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { + if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) { + return; + } else { + *pratelimited = FALSE; + } + } + *tshift = tcoal_prio_params.timer_coalesce_bg_shift; + *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; + TCOAL_PRIO_STAT(bg_tcl); + } else if (tpri >= MINPRI_KERNEL) { + *tshift = tcoal_prio_params.timer_coalesce_kt_shift; + *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; + TCOAL_PRIO_STAT(kt_tcl); + } else if (cthread->sched_mode == TH_MODE_FIXED) { + *tshift = tcoal_prio_params.timer_coalesce_fp_shift; + *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max; + TCOAL_PRIO_STAT(fp_tcl); + } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { + TCOAL_PRIO_STAT(qos_tcl); + } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { + *tshift = tcoal_prio_params.timer_coalesce_ts_shift; + *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max; + TCOAL_PRIO_STAT(ts_tcl); + } else { + TCOAL_PRIO_STAT(nc_tcl); + } + } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { + *tshift = tcoal_prio_params.timer_coalesce_bg_shift; + *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; + TCOAL_PRIO_STAT(bg_tcl); + } else { + *tshift = tcoal_prio_params.timer_coalesce_kt_shift; + *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; + TCOAL_PRIO_STAT(kt_tcl); + } +} + + +int timer_user_idle_level; + +uint64_t +timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) +{ + int32_t tcs_shift = 0; + uint64_t tcs_max_abstime = 0; + uint64_t adjval; + uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); + + if (mach_timer_coalescing_enabled && + (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { + timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited); + + if (tcs_shift >= 0) + adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime); + else + adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime); + /* Apply adjustments derived from "user idle level" heuristic */ + adjval += (adjval * timer_user_idle_level) >> 7; + return adjval; + } else { + return 0; + } +} + +int +timer_get_user_idle_level(void) { + return timer_user_idle_level; +} + +kern_return_t timer_set_user_idle_level(int ilevel) { + boolean_t do_reeval = FALSE; + + if ((ilevel < 0) || (ilevel > 128)) + return KERN_INVALID_ARGUMENT; + + if (ilevel < timer_user_idle_level) { + do_reeval = TRUE; + } + + timer_user_idle_level = ilevel; + + if (do_reeval) + ml_timer_evaluate(); + + return KERN_SUCCESS; +} diff --git a/osfmk/kern/timer_call.h b/osfmk/kern/timer_call.h index 10152e07c..03c062ed6 100644 --- a/osfmk/kern/timer_call.h +++ b/osfmk/kern/timer_call.h @@ -38,6 +38,7 @@ #ifdef XNU_KERNEL_PRIVATE #include +#include #ifdef MACH_KERNEL_PRIVATE #include @@ -103,7 +104,7 @@ typedef void (*timer_call_func_t)( * Non-migratable timer_call */ #define TIMER_CALL_LOCAL TIMEOUT_URGENCY_FIRST_AVAIL - +#define TIMER_CALL_RATELIMITED TIMEOUT_URGENCY_RATELIMITED extern boolean_t timer_call_enter( timer_call_t call, uint64_t deadline, @@ -133,6 +134,35 @@ extern void timer_call_setup( timer_call_func_t func, timer_call_param_t param0); +extern int timer_get_user_idle_level(void); +extern kern_return_t timer_set_user_idle_level(int ilevel); + +#define NUM_LATENCY_QOS_TIERS (6) +typedef struct { + uint32_t powergate_latency_abstime; + + uint32_t idle_entry_timer_processing_hdeadline_threshold_abstime; + uint32_t interrupt_timer_coalescing_ilat_threshold_abstime; + uint32_t timer_resort_threshold_abstime; + + int32_t timer_coalesce_rt_shift; + int32_t timer_coalesce_bg_shift; + int32_t timer_coalesce_kt_shift; + int32_t timer_coalesce_fp_shift; + int32_t timer_coalesce_ts_shift; + + uint64_t timer_coalesce_rt_abstime_max; + uint64_t timer_coalesce_bg_abstime_max; + uint64_t timer_coalesce_kt_abstime_max; + uint64_t timer_coalesce_fp_abstime_max; + uint64_t timer_coalesce_ts_abstime_max; + + uint32_t latency_qos_scale[NUM_LATENCY_QOS_TIERS]; + uint64_t latency_qos_abstime_max[NUM_LATENCY_QOS_TIERS]; + boolean_t latency_tier_rate_limited[NUM_LATENCY_QOS_TIERS]; +} timer_coalescing_priority_params_t; +extern timer_coalescing_priority_params_t tcoal_prio_params; + #endif /* XNU_KERNEL_PRIVATE */ #endif /* _KERN_TIMER_CALL_H_ */ diff --git a/osfmk/kern/timer_queue.h b/osfmk/kern/timer_queue.h index 060e72183..879a311fd 100644 --- a/osfmk/kern/timer_queue.h +++ b/osfmk/kern/timer_queue.h @@ -47,6 +47,7 @@ #if defined(i386) || defined(x86_64) #define DECR_RDHPET MACHDBG_CODE(DBG_MACH_EXCP_DECI, 5) #define DECR_SET_TSC_DEADLINE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 6) +#define DECR_SET_APIC_DEADLINE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 16) #endif #define DECR_TIMER_ENTER MACHDBG_CODE(DBG_MACH_EXCP_DECI, 7) #define DECR_TIMER_CANCEL MACHDBG_CODE(DBG_MACH_EXCP_DECI, 8) @@ -100,6 +101,44 @@ extern void timer_call_nosync_cpu( * Invoked by platform, implemented by kernel. */ +/* + * Invoked by kernel, implemented by platform. + */ + +#define NUM_LATENCY_QOS_TIERS (6) + +typedef struct { + uint32_t idle_entry_timer_processing_hdeadline_threshold_ns; + uint32_t interrupt_timer_coalescing_ilat_threshold_ns; + uint32_t timer_resort_threshold_ns; + + int32_t timer_coalesce_rt_shift; + int32_t timer_coalesce_bg_shift; + int32_t timer_coalesce_kt_shift; + int32_t timer_coalesce_fp_shift; + int32_t timer_coalesce_ts_shift; + + uint64_t timer_coalesce_rt_ns_max; + uint64_t timer_coalesce_bg_ns_max; + uint64_t timer_coalesce_kt_ns_max; + uint64_t timer_coalesce_fp_ns_max; + uint64_t timer_coalesce_ts_ns_max; + + uint32_t latency_qos_scale[NUM_LATENCY_QOS_TIERS]; + uint64_t latency_qos_ns_max[NUM_LATENCY_QOS_TIERS]; + boolean_t latency_tier_rate_limited[NUM_LATENCY_QOS_TIERS]; +} timer_coalescing_priority_params_ns_t; + +extern timer_coalescing_priority_params_ns_t * timer_call_get_priority_params(void); + + +extern uint64_t timer_call_slop( + uint64_t deadline, + uint64_t armtime, + uint32_t urgency, + thread_t arming_thread, + boolean_t *rlimited); + /* Process deadline expiration for queue, returns new deadline */ extern uint64_t timer_queue_expire( mpqueue_head_t *queue, diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c index b51ff419f..1c998771b 100644 --- a/osfmk/kern/wait_queue.c +++ b/osfmk/kern/wait_queue.c @@ -1196,7 +1196,7 @@ wait_queue_assert_wait64_locked( wait_queue_set_t wqs = (wait_queue_set_t)wq; if (event == NO_EVENT64 && wqs_is_preposted(wqs)) - return(THREAD_AWAKENED); + return (thread->wait_result = THREAD_AWAKENED); } /* @@ -1808,6 +1808,7 @@ _wait_queue_select64_thread( queue_t q = &wq->wq_queue; thread_lock(thread); + if ((thread->wait_queue == wq) && (thread->wait_event == event)) { remqueue((queue_entry_t) thread); thread->at_safe_point = FALSE; @@ -1816,6 +1817,7 @@ _wait_queue_select64_thread( /* thread still locked */ return KERN_SUCCESS; } + thread_unlock(thread); /* @@ -2133,7 +2135,6 @@ wait_queue_wakeup_thread( * interrupt and waited on something else (like another * semaphore). * Conditions: - * nothing of interest locked * we need to assume spl needs to be raised * Returns: * KERN_SUCCESS - the thread was found waiting and awakened @@ -2161,6 +2162,7 @@ wait_queue_wakeup64_thread( if (res == KERN_SUCCESS) { res = thread_go(thread, result); assert(res == KERN_SUCCESS); + thread_unlock(thread); splx(s); return res; diff --git a/osfmk/kern/wait_queue.h b/osfmk/kern/wait_queue.h index 01111d16c..4a38ceeaa 100644 --- a/osfmk/kern/wait_queue.h +++ b/osfmk/kern/wait_queue.h @@ -37,12 +37,13 @@ #include /* for wait_queue_t */ #include +#include #include #ifdef MACH_KERNEL_PRIVATE -#include +#include #include #include diff --git a/osfmk/kern/xpr.c b/osfmk/kern/xpr.c index a10ec384d..8fbaca22d 100644 --- a/osfmk/kern/xpr.c +++ b/osfmk/kern/xpr.c @@ -59,7 +59,6 @@ #include #include -#include #include #include #include diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c index d03746a4e..7b4efb645 100644 --- a/osfmk/kern/zalloc.c +++ b/osfmk/kern/zalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -197,6 +197,27 @@ * */ +/* Returns TRUE if we rolled over the counter at factor */ +static inline boolean_t +sample_counter(volatile uint32_t * count_p, uint32_t factor) +{ + uint32_t old_count, new_count; + boolean_t rolled_over; + + do { + new_count = old_count = *count_p; + + if (++new_count >= factor) { + rolled_over = TRUE; + new_count = 0; + } else { + rolled_over = FALSE; + } + + } while (!OSCompareAndSwap(old_count, new_count, count_p)); + + return rolled_over; +} #if defined(__LP64__) #define ZP_POISON 0xdeadbeefdeadbeef @@ -205,6 +226,7 @@ #endif #define ZP_DEFAULT_SAMPLING_FACTOR 16 +#define ZP_DEFAULT_SCALE_FACTOR 4 /* * A zp_factor of 0 indicates zone poisoning is disabled, @@ -216,6 +238,9 @@ /* set by zp-factor=N boot arg, zero indicates non-tiny poisoning disabled */ uint32_t zp_factor = 0; +/* set by zp-scale=N boot arg, scales zp_factor by zone size */ +uint32_t zp_scale = 0; + /* set in zp_init, zero indicates -no-zp boot-arg */ vm_size_t zp_tiny_zone_limit = 0; @@ -251,6 +276,7 @@ zp_init(void) zp_tiny_zone_limit = (vm_size_t) cpu_info.cache_line_size; zp_factor = ZP_DEFAULT_SAMPLING_FACTOR; + zp_scale = ZP_DEFAULT_SCALE_FACTOR; //TODO: Bigger permutation? /* @@ -284,6 +310,11 @@ zp_init(void) printf("Zone poisoning factor override: %u\n", zp_factor); } + /* zp-scale=XXXX: override how much zone size scales zp-factor by */ + if (PE_parse_boot_argn("zp-scale", &zp_scale, sizeof(zp_scale))) { + printf("Zone poisoning scale factor override: %u\n", zp_scale); + } + /* Initialize backup pointer random cookie for unpoisoned elements */ zp_nopoison_cookie = (uintptr_t) early_random(); @@ -448,17 +479,21 @@ is_sane_zone_element(zone_t zone, /* Someone wrote to freed memory. */ static inline void /* noreturn */ zone_element_was_modified_panic(zone_t zone, + vm_offset_t element, vm_offset_t found, vm_offset_t expected, vm_offset_t offset) { - panic("a freed zone element has been modified: expected %p but found %p, bits changed %p, at offset %d of %d in zone: %s", + panic("a freed zone element has been modified in zone %s: expected %p but found %p, bits changed %p, at offset %d of %d in element %p, cookies %p %p", + zone->zone_name, (void *) expected, (void *) found, (void *) (expected ^ found), (uint32_t) offset, (uint32_t) zone->elem_size, - zone->zone_name); + (void *) element, + (void *) zp_nopoison_cookie, + (void *) zp_poisoned_cookie); } /* @@ -469,6 +504,7 @@ zone_element_was_modified_panic(zone_t zone, */ static void /* noreturn */ backup_ptr_mismatch_panic(zone_t zone, + vm_offset_t element, vm_offset_t primary, vm_offset_t backup) { @@ -478,6 +514,14 @@ backup_ptr_mismatch_panic(zone_t zone, boolean_t sane_primary = is_sane_zone_element(zone, primary); boolean_t element_was_poisoned = (backup & 0x1) ? TRUE : FALSE; +#if defined(__LP64__) + /* We can inspect the tag in the upper bits for additional confirmation */ + if ((backup & 0xFFFFFF0000000000) == 0xFACADE0000000000) + element_was_poisoned = TRUE; + else if ((backup & 0xFFFFFF0000000000) == 0xC0FFEE0000000000) + element_was_poisoned = FALSE; +#endif + if (element_was_poisoned) { likely_backup = backup ^ zp_poisoned_cookie; sane_backup = is_sane_zone_element(zone, likely_backup); @@ -488,11 +532,12 @@ backup_ptr_mismatch_panic(zone_t zone, /* The primary is definitely the corrupted one */ if (!sane_primary && sane_backup) - zone_element_was_modified_panic(zone, primary, likely_backup, 0); + zone_element_was_modified_panic(zone, element, primary, likely_backup, 0); /* The backup is definitely the corrupted one */ if (sane_primary && !sane_backup) - zone_element_was_modified_panic(zone, backup, primary, + zone_element_was_modified_panic(zone, element, backup, + (primary ^ (element_was_poisoned ? zp_poisoned_cookie : zp_nopoison_cookie)), zone->elem_size - sizeof(vm_offset_t)); /* @@ -502,10 +547,10 @@ backup_ptr_mismatch_panic(zone_t zone, * primary pointer has been overwritten with a sane but incorrect address. */ if (sane_primary && sane_backup) - zone_element_was_modified_panic(zone, primary, likely_backup, 0); + zone_element_was_modified_panic(zone, element, primary, likely_backup, 0); /* Neither are sane, so just guess. */ - zone_element_was_modified_panic(zone, primary, likely_backup, 0); + zone_element_was_modified_panic(zone, element, primary, likely_backup, 0); } @@ -532,6 +577,7 @@ append_zone_element(zone_t zone, *backup = new_next ^ zp_poisoned_cookie; else backup_ptr_mismatch_panic(zone, + (vm_offset_t) tail, old_next, old_backup); @@ -563,11 +609,11 @@ add_list_to_zone(zone_t zone, /* * Adds the element to the head of the zone's free list * Keeps a backup next-pointer at the end of the element - * Poisons the element with ZP_POISON every zp_factor frees */ static inline void free_to_zone(zone_t zone, - vm_offset_t element) + vm_offset_t element, + boolean_t poison) { vm_offset_t old_head; struct zone_page_metadata *page_meta; @@ -593,25 +639,6 @@ free_to_zone(zone_t zone, panic("zfree: freeing invalid pointer %p to zone %s\n", (void *) element, zone->zone_name); - boolean_t poison = FALSE; - - /* Always poison tiny zones' elements (limit is 0 if -no-zp is set) */ - if (zone->elem_size <= zp_tiny_zone_limit) - poison = TRUE; - else if (zp_factor != 0 && ++zone->zp_count >= zp_factor) { - /* Poison zone elements periodically */ - zone->zp_count = 0; - poison = TRUE; - } - - if (poison) { - /* memset_pattern{4|8} could help make this faster: */ - vm_offset_t *element_cursor = primary + 1; - - for ( ; element_cursor < backup; element_cursor++) - *element_cursor = ZP_POISON; - } - /* * Always write a redundant next pointer * So that it is more difficult to forge, xor it with a random cookie @@ -657,11 +684,14 @@ free_to_zone(zone_t zone, * and verifies that a poisoned element hasn't been modified. */ static inline vm_offset_t -try_alloc_from_zone(zone_t zone) +try_alloc_from_zone(zone_t zone, + boolean_t* check_poison) { vm_offset_t element; struct zone_page_metadata *page_meta; + *check_poison = FALSE; + /* if zone is empty, bail */ if (zone->use_page_list) { if (zone->allows_foreign && !queue_empty(&zone->pages.any_free_foreign)) @@ -704,7 +734,7 @@ try_alloc_from_zone(zone_t zone) * should have been, and print it appropriately */ if (__improbable(!is_sane_zone_element(zone, next_element))) - backup_ptr_mismatch_panic(zone, next_element, next_element_backup); + backup_ptr_mismatch_panic(zone, element, next_element, next_element_backup); /* Check the backup pointer for the regular cookie */ if (__improbable(next_element != (next_element_backup ^ zp_nopoison_cookie))) { @@ -712,20 +742,12 @@ try_alloc_from_zone(zone_t zone) /* Check for the poisoned cookie instead */ if (__improbable(next_element != (next_element_backup ^ zp_poisoned_cookie))) /* Neither cookie is valid, corruption has occurred */ - backup_ptr_mismatch_panic(zone, next_element, next_element_backup); + backup_ptr_mismatch_panic(zone, element, next_element, next_element_backup); /* - * Element was marked as poisoned, so check its integrity, - * skipping the primary and backup pointers at the beginning and end. + * Element was marked as poisoned, so check its integrity before using it. */ - vm_offset_t *element_cursor = primary + 1; - - for ( ; element_cursor < backup ; element_cursor++) - if (__improbable(*element_cursor != ZP_POISON)) - zone_element_was_modified_panic(zone, - *element_cursor, - ZP_POISON, - ((vm_offset_t)element_cursor) - element); + *check_poison = TRUE; } if (zone->use_page_list) { @@ -743,13 +765,6 @@ try_alloc_from_zone(zone_t zone) } } - /* - * Clear out the old next pointer and backup to avoid leaking the cookie - * and so that only values on the freelist have a valid cookie - */ - *primary = ZP_POISON; - *backup = ZP_POISON; - /* Remove this element from the free list */ if (zone->use_page_list) { @@ -1968,7 +1983,7 @@ zcram( * the "zone_zone" variable already. */ } else { - free_to_zone(zone, newmem + pos_in_page); + free_to_zone(zone, newmem + pos_in_page, FALSE); } zone->cur_size += elem_size; } @@ -1979,7 +1994,7 @@ zcram( if (newmem == (vm_offset_t)zone) { /* Don't free zone_zone zone */ } else { - free_to_zone(zone, newmem); + free_to_zone(zone, newmem, FALSE); } if (from_zm) zone_page_alloc(newmem, elem_size); @@ -2179,6 +2194,19 @@ zone_init( zone_map_min_address = zone_min; zone_map_max_address = zone_max; +#if defined(__LP64__) + /* + * ensure that any vm_page_t that gets created from + * the vm_page zone can be packed properly (see vm_page.h + * for the packing requirements + */ + if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_map_min_address)) != (vm_page_t)zone_map_min_address) + panic("VM_PAGE_PACK_PTR failed on zone_map_min_address - %p", (void *)zone_map_min_address); + + if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_map_max_address)) != (vm_page_t)zone_map_max_address) + panic("VM_PAGE_PACK_PTR failed on zone_map_max_address - %p", (void *)zone_map_max_address); +#endif + zone_pages = (unsigned int)atop_kernel(zone_max - zone_min); zone_page_table_used_size = sizeof(zone_page_table); @@ -2280,10 +2308,11 @@ extern volatile SInt32 kfree_nop_count; /* * zalloc returns an element from the specified zone. */ -void * -zalloc_canblock( +static void * +zalloc_internal( zone_t zone, - boolean_t canblock) + boolean_t canblock, + boolean_t nopagewait) { vm_offset_t addr = 0; kern_return_t retval; @@ -2294,6 +2323,7 @@ zalloc_canblock( boolean_t did_gzalloc = FALSE; #endif thread_t thr = current_thread(); + boolean_t check_poison = FALSE; #if CONFIG_ZLEAKS uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */ @@ -2312,25 +2342,22 @@ zalloc_canblock( if (__improbable(DO_LOGGING(zone))) numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH); - lock_zone(zone); - - #if CONFIG_ZLEAKS - /* + /* * Zone leak detection: capture a backtrace every zleak_sample_factor - * allocations in this zone. + * allocations in this zone. */ - if (zone->zleak_on && (++zone->zleak_capture >= zleak_sample_factor)) { - zone->zleak_capture = 0; - + if (__improbable(zone->zleak_on && sample_counter(&zone->zleak_capture, zleak_sample_factor) == TRUE)) { /* Avoid backtracing twice if zone logging is on */ - if (numsaved == 0 ) + if (numsaved == 0) zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH); else zleak_tracedepth = numsaved; } #endif /* CONFIG_ZLEAKS */ + lock_zone(zone); + if (zone->async_prio_refill && zone->zone_replenish_thread) { do { vm_size_t zfreec = (zone->cur_size - (zone->count * zone->elem_size)); @@ -2363,7 +2390,7 @@ zalloc_canblock( } if (__probable(addr == 0)) - addr = try_alloc_from_zone(zone); + addr = try_alloc_from_zone(zone, &check_poison); while ((addr == 0) && canblock) { @@ -2423,7 +2450,7 @@ zalloc_canblock( for (;;) { int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; - + if (vm_pool_low() || retry >= 1) alloc_size = round_page(zone->elem_size); @@ -2496,17 +2523,19 @@ zalloc_canblock( zone->waiting = FALSE; zone_wakeup(zone); } - addr = try_alloc_from_zone(zone); + addr = try_alloc_from_zone(zone, &check_poison); if (addr == 0 && - retval == KERN_RESOURCE_SHORTAGE) { + retval == KERN_RESOURCE_SHORTAGE) { + if (nopagewait == TRUE) + break; /* out of the main while loop */ unlock_zone(zone); - + VM_PAGE_WAIT(); lock_zone(zone); } } if (addr == 0) - addr = try_alloc_from_zone(zone); + addr = try_alloc_from_zone(zone, &check_poison); } #if CONFIG_ZLEAKS @@ -2523,12 +2552,12 @@ zalloc_canblock( #endif /* CONFIG_ZLEAKS */ - if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) { + if ((addr == 0) && (!canblock || nopagewait) && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) { zone->async_pending = TRUE; unlock_zone(zone); thread_call_enter(&call_async_alloc); lock_zone(zone); - addr = try_alloc_from_zone(zone); + addr = try_alloc_from_zone(zone, &check_poison); } /* @@ -2541,15 +2570,44 @@ zalloc_canblock( btlog_add_entry(zlog_btlog, (void *)addr, ZOP_ALLOC, (void **)zbt, numsaved); } + vm_offset_t inner_size = zone->elem_size; + #if ZONE_DEBUG if (!did_gzalloc && addr && zone_debug_enabled(zone)) { enqueue_tail(&zone->active_zones, (queue_entry_t)addr); addr += ZONE_DEBUG_OFFSET; + inner_size -= ZONE_DEBUG_OFFSET; } #endif unlock_zone(zone); + if (__improbable(check_poison && addr)) { + vm_offset_t *element_cursor = ((vm_offset_t *) addr) + 1; + vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *) addr); + + for ( ; element_cursor < backup ; element_cursor++) + if (__improbable(*element_cursor != ZP_POISON)) + zone_element_was_modified_panic(zone, + addr, + *element_cursor, + ZP_POISON, + ((vm_offset_t)element_cursor) - addr); + } + + if (addr) { + /* + * Clear out the old next pointer and backup to avoid leaking the cookie + * and so that only values on the freelist have a valid cookie + */ + + vm_offset_t *primary = (vm_offset_t *) addr; + vm_offset_t *backup = get_backup_ptr(inner_size, primary); + + *primary = ZP_POISON; + *backup = ZP_POISON; + } + TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr); if (addr) { @@ -2570,19 +2628,30 @@ zalloc_canblock( void * -zalloc( - register zone_t zone) +zalloc(zone_t zone) +{ + return (zalloc_internal(zone, TRUE, FALSE)); +} + +void * +zalloc_noblock(zone_t zone) { - return( zalloc_canblock(zone, TRUE) ); + return (zalloc_internal(zone, FALSE, FALSE)); } void * -zalloc_noblock( - register zone_t zone) +zalloc_nopagewait(zone_t zone) { - return( zalloc_canblock(zone, FALSE) ); + return (zalloc_internal(zone, TRUE, TRUE)); } +void * +zalloc_canblock(zone_t zone, boolean_t canblock) +{ + return (zalloc_internal(zone, canblock, FALSE)); +} + + void zalloc_async( __unused thread_call_param_t p0, @@ -2636,6 +2705,7 @@ zget( register zone_t zone) { vm_offset_t addr; + boolean_t check_poison = FALSE; #if CONFIG_ZLEAKS uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used for zone leak detection */ @@ -2644,24 +2714,27 @@ zget( assert( zone != ZONE_NULL ); - if (!lock_try_zone(zone)) - return NULL; - #if CONFIG_ZLEAKS /* * Zone leak detection: capture a backtrace */ - if (zone->zleak_on && (++zone->zleak_capture >= zleak_sample_factor)) { - zone->zleak_capture = 0; + if (__improbable(zone->zleak_on && sample_counter(&zone->zleak_capture, zleak_sample_factor) == TRUE)) { zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH); } #endif /* CONFIG_ZLEAKS */ - addr = try_alloc_from_zone(zone); + if (!lock_try_zone(zone)) + return NULL; + + addr = try_alloc_from_zone(zone, &check_poison); + + vm_offset_t inner_size = zone->elem_size; + #if ZONE_DEBUG if (addr && zone_debug_enabled(zone)) { enqueue_tail(&zone->active_zones, (queue_entry_t)addr); addr += ZONE_DEBUG_OFFSET; + inner_size -= ZONE_DEBUG_OFFSET; } #endif /* ZONE_DEBUG */ @@ -2680,6 +2753,31 @@ zget( unlock_zone(zone); + if (__improbable(check_poison && addr)) { + vm_offset_t *element_cursor = ((vm_offset_t *) addr) + 1; + vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *) addr); + + for ( ; element_cursor < backup ; element_cursor++) + if (__improbable(*element_cursor != ZP_POISON)) + zone_element_was_modified_panic(zone, + addr, + *element_cursor, + ZP_POISON, + ((vm_offset_t)element_cursor) - addr); + } + + if (addr) { + /* + * Clear out the old next pointer and backup to avoid leaking the cookie + * and so that only values on the freelist have a valid cookie + */ + vm_offset_t *primary = (vm_offset_t *) addr; + vm_offset_t *backup = get_backup_ptr(inner_size, primary); + + *primary = ZP_POISON; + *backup = ZP_POISON; + } + return((void *) addr); } @@ -2747,6 +2845,7 @@ zfree( uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */ int numsaved = 0; boolean_t gzfreed = FALSE; + boolean_t poison = FALSE; assert(zone != ZONE_NULL); @@ -2818,6 +2917,41 @@ zfree( return; } + if ((zp_factor != 0 || zp_tiny_zone_limit != 0) && !gzfreed) { + /* + * Poison the memory before it ends up on the freelist to catch + * use-after-free and use of uninitialized memory + * + * Always poison tiny zones' elements (limit is 0 if -no-zp is set) + * Also poison larger elements periodically + */ + + vm_offset_t inner_size = zone->elem_size; + +#if ZONE_DEBUG + if (!gzfreed && zone_debug_enabled(zone)) { + inner_size -= ZONE_DEBUG_OFFSET; + } +#endif + uint32_t sample_factor = zp_factor + (((uint32_t)inner_size) >> zp_scale); + + if (inner_size <= zp_tiny_zone_limit) + poison = TRUE; + else if (zp_factor != 0 && sample_counter(&zone->zp_count, sample_factor) == TRUE) + poison = TRUE; + + if (__improbable(poison)) { + + /* memset_pattern{4|8} could help make this faster: */ + /* Poison everything but primary and backup */ + vm_offset_t *element_cursor = ((vm_offset_t *) elem) + 1; + vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *)elem); + + for ( ; element_cursor < backup; element_cursor++) + *element_cursor = ZP_POISON; + } + } + lock_zone(zone); /* @@ -2867,7 +3001,7 @@ zfree( } if (__probable(!gzfreed)) - free_to_zone(zone, elem); + free_to_zone(zone, elem, poison); #if MACH_ASSERT if (zone->count < 0) diff --git a/osfmk/kern/zalloc.h b/osfmk/kern/zalloc.h index 19110a165..6210bd62f 100644 --- a/osfmk/kern/zalloc.h +++ b/osfmk/kern/zalloc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -74,7 +74,6 @@ #ifdef MACH_KERNEL_PRIVATE #include -#include #include #include #include @@ -214,6 +213,10 @@ __BEGIN_DECLS #ifdef XNU_KERNEL_PRIVATE +extern vm_offset_t zone_map_min_address; +extern vm_offset_t zone_map_max_address; + + /* Allocate from zone */ extern void * zalloc( zone_t zone); @@ -231,11 +234,15 @@ extern zone_t zinit( const char *name); /* a name for the zone */ +/* Non-waiting for memory version of zalloc */ +extern void * zalloc_nopagewait( + zone_t zone); + /* Non-blocking version of zalloc */ extern void * zalloc_noblock( zone_t zone); -/* direct (non-wrappered) interface */ +/* selective version of zalloc */ extern void * zalloc_canblock( zone_t zone, boolean_t canblock); diff --git a/osfmk/kperf/kperfbsd.c b/osfmk/kperf/kperfbsd.c index 149b07093..d712fd0d0 100644 --- a/osfmk/kperf/kperfbsd.c +++ b/osfmk/kperf/kperfbsd.c @@ -73,6 +73,8 @@ static lck_grp_t *kperf_cfg_lckgrp = NULL; static lck_mtx_t kperf_cfg_lock; static boolean_t kperf_cfg_initted = FALSE; +void kdbg_swap_global_state_pid(pid_t old_pid, pid_t new_pid); /* bsd/kern/kdebug.c */ + /*************************** * * lock init @@ -621,6 +623,9 @@ kperf_bless_pid(pid_t newpid) proc_rele(p); } + /* take trace facility as well */ + kdbg_swap_global_state_pid(blessed_pid, newpid); + blessed_pid = newpid; blessed_preempt = FALSE; diff --git a/osfmk/kperf/pet.c b/osfmk/kperf/pet.c index 68a3cabdf..68c7b0bd2 100644 --- a/osfmk/kperf/pet.c +++ b/osfmk/kperf/pet.c @@ -41,6 +41,9 @@ #include #include +extern kern_return_t task_resume_internal(task_t); +extern kern_return_t task_suspend_internal(task_t); + /* timer id to call back on */ static unsigned pet_timerid = 0; @@ -157,7 +160,7 @@ pet_sample_task_list( int taskc, task_array_t taskv ) /* try and stop any task other than the kernel task */ if( task != kernel_task ) { - kr = task_suspend( task ); + kr = task_suspend_internal( task ); /* try the next task */ if( kr != KERN_SUCCESS ) @@ -169,7 +172,7 @@ pet_sample_task_list( int taskc, task_array_t taskv ) /* if it wasn't the kernel, resume it */ if( task != kernel_task ) - task_resume(task); + (void) task_resume_internal(task); } } diff --git a/osfmk/kperf/threadinfo.c b/osfmk/kperf/threadinfo.c index 70d2c1fe2..d78af2068 100644 --- a/osfmk/kperf/threadinfo.c +++ b/osfmk/kperf/threadinfo.c @@ -56,13 +56,11 @@ make_runmode(thread_t thread) */ const int mode = chudxnu_thread_get_scheduler_state(thread); -#if !TARGET_OS_EMBEDDED if( 0 == mode) { return (chudxnu_thread_get_idle(thread) ? TH_IDLE : TH_IDLE_N); } else -#endif { // Today we happen to know there's a one-to-one mapping. return ((mode & 0xffff) | ((~mode & 0xffff) << 16)); @@ -218,11 +216,9 @@ typedef enum { // Target Thread State - can be OR'd extern "C" AppleProfileTriggerClientThreadRunMode AppleProfileGetRunModeOfThread(thread_t thread) { const int mode = chudxnu_thread_get_scheduler_state(thread); -#if !TARGET_OS_EMBEDDED if (0 == mode) { return (chudxnu_thread_get_idle(thread) ? kAppleProfileTriggerClientThreadModeIdle : kAppleProfileTriggerClientThreadModeNotIdle); } else -#endif return (AppleProfileTriggerClientThreadRunMode)((mode & 0xffff) | ((~mode & 0xffff) << 16)); // Today we happen to know there's a one-to-one mapping. } diff --git a/osfmk/mach/Makefile b/osfmk/mach/Makefile index 99a57a358..7338ab86b 100644 --- a/osfmk/mach/Makefile +++ b/osfmk/mach/Makefile @@ -9,9 +9,11 @@ include $(MakeInc_def) INSTINC_SUBDIRS = machine INSTINC_SUBDIRS_ARM = arm INSTINC_SUBDIRS_X86_64 = i386 +INSTINC_SUBDIRS_X86_64H = i386 EXPINC_SUBDIRS = machine EXPINC_SUBDIRS_X86_64 = i386 +EXPINC_SUBDIRS_X86_64H = i386 EXPINC_SUBDIRS_ARM = arm MIG_TYPES = \ @@ -33,10 +35,11 @@ MIG_DEFS = \ mach_host.defs \ mach_port.defs \ mach_vm.defs \ + mach_voucher.defs \ + mach_voucher_attr_control.defs \ notify.defs \ processor.defs \ processor_set.defs \ - security.defs \ task.defs \ task_access.defs \ telemetry_notification.defs \ @@ -45,11 +48,11 @@ MIG_DEFS = \ MACH_PRIVATE_DEFS = \ + coalition_notification.defs \ mach_notify.defs \ memory_object.defs \ memory_object_control.defs \ memory_object_default.defs \ - memory_object_name.defs \ upl.defs \ vm32_map.defs @@ -60,6 +63,7 @@ MACH_PRIVATE_DEFS = \ MIG_USHDRS = \ audit_triggers_server.h \ clock_reply_server.h \ + coalition_notification_server.h \ exc_server.h \ mach_exc_server.h \ memory_object_server.h \ @@ -77,11 +81,11 @@ MIG_UUHDRS = \ mach_host.h \ mach_port.h \ mach_vm.h \ + mach_voucher.h \ + mach_voucher_attr_control.h \ memory_object_control.h \ - memory_object_name.h \ processor.h \ processor_set.h \ - security.h \ task.h \ task_access.h \ thread_act.h \ @@ -106,12 +110,14 @@ DATAFILES = \ mach_time.h \ mach_traps.h \ mach_types.h \ + mach_voucher_types.h \ machine.h \ mach_syscalls.h \ memory_object_types.h \ message.h \ mig.h \ mig_errors.h \ + mig_voucher_support.h \ ndr.h \ notify.h \ policy.h \ @@ -151,6 +157,12 @@ INSTALL_MI_LIST = \ bootstrap.h \ ${DATAFILES} +INSTALL_MI_LCL_LIST = \ + bootstrap.h \ + sfi_class.h \ + coalition_notification.defs \ + ${DATAFILES} + INSTALL_KF_MI_LIST = \ mach_interface.h \ $(filter-out mach_traps.h mach_syscalls.h thread_switch.h, ${DATAFILES}) @@ -166,6 +178,7 @@ INSTALL_MI_DIR = mach EXPORT_MI_LIST = \ branch_predicates.h \ mach_interface.h \ + sfi_class.h \ ${DATAFILES} EXPORT_MI_GEN_LIST = \ @@ -223,6 +236,7 @@ MIG_KUHDRS = \ MIG_KUSRC = \ audit_triggers_user.c \ clock_reply_user.c \ + coalition_notification_user.c \ exc_user.c \ host_notify_reply_user.c \ mach_exc_user.c \ @@ -247,13 +261,13 @@ MIG_KSHDRS = \ mach_notify_server.h \ mach_port_server.h \ mach_vm_server.h \ + mach_voucher_server.h \ + mach_voucher_attr_control_server.h \ memory_object_server.h \ memory_object_control_server.h \ memory_object_default_server.h \ - memory_object_name_server.h \ processor_server.h \ processor_set_server.h \ - security_server.h \ task_server.h \ thread_act_server.h \ upl_server.h \ @@ -272,13 +286,13 @@ MIG_KSSRC = \ mach_notify_server.c \ mach_port_server.c \ mach_vm_server.c \ + mach_voucher_server.c \ + mach_voucher_attr_control_server.c \ memory_object_server.c \ memory_object_control_server.c \ memory_object_default_server.c \ - memory_object_name_server.c \ processor_server.c \ processor_set_server.c \ - security_server.c \ task_server.c \ thread_act_server.c \ upl_server.c \ diff --git a/osfmk/mach/coalition_notification.defs b/osfmk/mach/coalition_notification.defs new file mode 100644 index 000000000..b3392fc69 --- /dev/null +++ b/osfmk/mach/coalition_notification.defs @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2013, Apple Inc. All rights reserved. + */ + + /* + * Interface definition for the coalition facility. + */ + +subsystem +#if KERNEL_USER + KernelUser +#endif /* KERNEL_USER */ + coalition_notification 5300; + +#include +#include + +/* This is likely to change with 15385886. */ +simpleroutine coalition_notification( + RequestPort coalition_port : mach_port_t; + in id : uint64_t; + in flags : uint32_t); diff --git a/osfmk/mach/host_special_ports.h b/osfmk/mach/host_special_ports.h index d51582ef2..1fa0dff88 100644 --- a/osfmk/mach/host_special_ports.h +++ b/osfmk/mach/host_special_ports.h @@ -92,7 +92,9 @@ #define HOST_AMFID_PORT (11 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_GSSD_PORT (12 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_TELEMETRY_PORT (13 + HOST_MAX_SPECIAL_KERNEL_PORT) -#define HOST_MAX_SPECIAL_PORT (14 + HOST_MAX_SPECIAL_KERNEL_PORT) +#define HOST_ATM_NOTIFICATION_PORT (14 + HOST_MAX_SPECIAL_KERNEL_PORT) +#define HOST_COALITION_PORT (15 + HOST_MAX_SPECIAL_KERNEL_PORT) +#define HOST_MAX_SPECIAL_PORT (16 + HOST_MAX_SPECIAL_KERNEL_PORT) /* room to grow here as well */ /* @@ -190,4 +192,17 @@ HOST_LOCAL_NODE, HOST_TELEMETRY_PORT, (port))) #define host_set_telemetry_port(host, port) \ (host_set_special_port((host), HOST_TELEMETRY_PORT, (port))) + +#define host_get_atm_notification_port(host, port) \ + (host_get_special_port((host), \ + HOST_LOCAL_NODE, HOST_ATM_NOTIFICATION_PORT, (port))) +#define host_set_atm_notification_port(host, port) \ + (host_set_special_port((host), HOST_ATM_NOTIFICATION_PORT, (port))) + +#define host_get_coalition_port(host, port) \ + (host_get_special_port((host), \ + HOST_LOCAL_NODE, HOST_COALITION_PORT, (port))) +#define host_set_coalition_port(host, port) \ + (host_set_special_port((host), HOST_COALITION_PORT, (port))) + #endif /* _MACH_HOST_SPECIAL_PORTS_H_ */ diff --git a/osfmk/mach/i386/Makefile b/osfmk/mach/i386/Makefile index 16abdd76a..d7edf6ca3 100644 --- a/osfmk/mach/i386/Makefile +++ b/osfmk/mach/i386/Makefile @@ -13,7 +13,7 @@ DATAFILES = \ processor_info.h kern_return.h ndr_def.h syscall_sw.h \ thread_status.h thread_state.h vm_param.h \ vm_types.h rpc.h \ - machine_types.defs _structs.h sdt_isa.h + _structs.h sdt_isa.h INSTALL_MD_LIST = ${DATAFILES} diff --git a/osfmk/mach/i386/machine_types.defs b/osfmk/mach/i386/machine_types.defs deleted file mode 100644 index 9830993b2..000000000 --- a/osfmk/mach/i386/machine_types.defs +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ - -/* - * Header file for basic, machine-dependent data types. i386 version. - */ - -#ifndef _MACHINE_VM_TYPES_DEFS_ -#define _MACHINE_VM_TYPES_DEFS_ 1 - -type short = int16_t; -type int = int32_t; -type unsigned = uint32_t; - -type float = MACH_MSG_TYPE_REAL_32; -type double = MACH_MSG_TYPE_REAL_64; - - -/* from ISO/IEC 988:1999 spec */ -/* 7.18.1.4 Integer types capable of hgolding object pointers */ -/* - * The [u]intptr_t types for the native - * integer type, e.g. 32 or 64 or.. whatever - * register size the machine has. They are - * used for entities that might be either - * [unsigned] integers or pointers, and for - * type-casting between the two. - * - * For instance, the IPC system represents - * a port in user space as an integer and - * in kernel space as a pointer. - */ -#if defined(__LP64__) -type uintptr_t = uint64_t; -type intptr_t = int64_t; -#else -type uintptr_t = uint32_t; -type intptr_t = int32_t; -#endif - -/* - * These are the legacy Mach types that are - * the [rough] equivalents of the standards above. - * They were defined in terms of int, not - * long int, so they remain separate. - */ -#if defined(__LP64__) -type register_t = int64_t; -#else -type register_t = int32_t; -#endif -type integer_t = int32_t; -type natural_t = uint32_t; - -/* - * These are the VM types that scale with the address - * space size of a given process. - */ - -#if defined(__LP64__) -type vm_address_t = uint64_t; -type vm_offset_t = uint64_t; -type vm_size_t = uint64_t; -#else -type vm_address_t = natural_t; -type vm_offset_t = natural_t; -type vm_size_t = natural_t; -#endif - -type mach_port_context_t = uint64_t; - -/* - * The mach_vm_xxx_t types are sized to hold the - * maximum pointer, offset, etc... supported on the - * platform. - */ -type mach_vm_address_t = uint64_t; -type mach_vm_offset_t = uint64_t; -type mach_vm_size_t = uint64_t; - -#if MACH_IPC_COMPAT -/* - * For the old IPC interface - */ -#define MSG_TYPE_PORT_NAME natural_t - -#endif /* MACH_IPC_COMPAT */ - -/* - * These are types used internal to Mach to implement the - * legacy 32-bit VM APIs published by the kernel. - */ -#define VM32_SUPPORT 1 - -type vm32_address_t = uint32_t; -type vm32_offset_t = uint32_t; -type vm32_size_t = uint32_t; - -#endif /* _MACHINE_VM_TYPES_DEFS_ */ - -/* vim: set ft=c : */ diff --git a/osfmk/mach/i386/sdt_isa.h b/osfmk/mach/i386/sdt_isa.h index 14549e810..961d9f638 100644 --- a/osfmk/mach/i386/sdt_isa.h +++ b/osfmk/mach/i386/sdt_isa.h @@ -97,8 +97,8 @@ #define ARGS6_EXTENT 6 #define ARGS7_EXTENT 7 #define ARGS8_EXTENT 8 -#define ARGS9_EXTENT 10 -#define ARGS10_EXTENT 10 +#define ARGS9_EXTENT 9 +#define ARGS10_EXTENT 10 #define DTRACE_CALL0ARGS(provider, name) \ asm volatile ( \ diff --git a/osfmk/mach/i386/thread_status.h b/osfmk/mach/i386/thread_status.h index 9c5ae47b0..9c682ee72 100644 --- a/osfmk/mach/i386/thread_status.h +++ b/osfmk/mach/i386/thread_status.h @@ -365,28 +365,12 @@ typedef struct x86_64_intr_stack_frame x86_64_intr_stack_frame_t; * is no need for an x86_saved_state64_from_kernel variant */ struct x86_saved_state64 { - /* - * saved state organized to reflect the - * system call ABI register convention - * so that we can just pass a pointer - * to the saved state when calling through - * to the actual system call functions - * the ABI limits us to 6 args passed in - * registers... I've add v_arg6 - v_arg8 - * to accomodate our most 'greedy' system - * calls (both BSD and MACH)... the individual - * system call handlers will fill these in - * via copyin if needed... - */ uint64_t rdi; /* arg0 for system call */ uint64_t rsi; uint64_t rdx; uint64_t r10; /* R10 := RCX prior to syscall trap */ uint64_t r8; uint64_t r9; /* arg5 for system call */ - uint64_t v_arg6; - uint64_t v_arg7; - uint64_t v_arg8; uint64_t cr2; uint64_t r15; @@ -402,6 +386,8 @@ struct x86_saved_state64 { uint32_t gs; uint32_t fs; + uint64_t _pad; + struct x86_64_intr_stack_frame isf; }; typedef struct x86_saved_state64 x86_saved_state64_t; diff --git a/osfmk/mach/i386/vm_param.h b/osfmk/mach/i386/vm_param.h index 02bef271f..b4edac3cf 100644 --- a/osfmk/mach/i386/vm_param.h +++ b/osfmk/mach/i386/vm_param.h @@ -95,16 +95,18 @@ #define I386_PGBYTES 4096 /* bytes per 80386 page */ #define I386_PGSHIFT 12 /* bitshift for pages */ -#ifdef PRIVATE -#define KERNEL_PAGE_SIZE I386_PGBYTES -#define KERNEL_PAGE_SHIFT I386_PGSHIFT -#define KERNEL_PAGE_MASK (KERNEL_PAGE_SIZE-1) -#endif - #define PAGE_SIZE I386_PGBYTES #define PAGE_SHIFT I386_PGSHIFT #define PAGE_MASK (PAGE_SIZE - 1) +#define PAGE_MAX_SHIFT PAGE_SHIFT +#define PAGE_MAX_SIZE PAGE_SIZE +#define PAGE_MAX_MASK PAGE_MASK + +#define PAGE_MIN_SHIFT PAGE_SHIFT +#define PAGE_MIN_SIZE PAGE_SIZE +#define PAGE_MIN_MASK PAGE_MASK + #define I386_LPGBYTES 2*1024*1024 /* bytes per large page */ #define I386_LPGSHIFT 21 /* bitshift for large pages */ #define I386_LPGMASK (I386_LPGBYTES-1) @@ -162,6 +164,9 @@ #ifdef KERNEL_PRIVATE +#define TEST_PAGE_SIZE_16K FALSE +#define TEST_PAGE_SIZE_4K TRUE + /* Kernel-wide values */ #define KB (1024ULL) @@ -172,7 +177,7 @@ * Maximum physical memory supported. */ #define K32_MAXMEM (32*GB) -#define K64_MAXMEM (128*GB) +#define K64_MAXMEM (252*GB) #define KERNEL_MAXMEM K64_MAXMEM /* diff --git a/osfmk/mach/kern_return.h b/osfmk/mach/kern_return.h index da73236e3..bfedcc3ec 100644 --- a/osfmk/mach/kern_return.h +++ b/osfmk/mach/kern_return.h @@ -315,6 +315,10 @@ * as a result of a signature check. */ +#define KERN_POLICY_STATIC 51 + /* The requested property cannot be changed at this time. + */ + #define KERN_RETURN_MAX 0x100 /* Maximum return value allowable */ diff --git a/osfmk/mach/mach_host.defs b/osfmk/mach/mach_host.defs index 184d1349b..63d8402cd 100644 --- a/osfmk/mach/mach_host.defs +++ b/osfmk/mach/mach_host.defs @@ -64,10 +64,6 @@ * control. */ -#ifdef MACH_KERNEL -#include -#endif /* MACH_KERNEL */ - subsystem #if KERNEL_SERVER KernelServer @@ -275,4 +271,33 @@ routine mach_zone_force_gc( skip; #endif +/* + * Create a new voucher by running a series of commands against + * pairs of resource attributes. + */ +routine host_create_mach_voucher( + host : host_t; + recipes : mach_voucher_attr_raw_recipe_array_t; + out voucher : ipc_voucher_t); + +/* + * Register a resource manager with the kernel. A new key is selected. + */ +routine host_register_mach_voucher_attr_manager( + host : host_t; + attr_manager : mach_voucher_attr_manager_t; + default_value : mach_voucher_attr_value_handle_t; + out new_key : mach_voucher_attr_key_t; + out new_attr_control: ipc_voucher_attr_control_t); + +/* + * Register a resource manager (with a well-known key value) with the kernel. + */ +routine host_register_well_known_mach_voucher_attr_manager( + host : host_t; + attr_manager : mach_voucher_attr_manager_t; + default_value : mach_voucher_attr_value_handle_t; + key : mach_voucher_attr_key_t; + out new_attr_control: ipc_voucher_attr_control_t); + /* vim: set ft=c : */ diff --git a/osfmk/mach/mach_interface.h b/osfmk/mach/mach_interface.h index 12218bd01..6f8cf8e03 100644 --- a/osfmk/mach/mach_interface.h +++ b/osfmk/mach/mach_interface.h @@ -54,7 +54,6 @@ */ #include #include -#include #include #endif diff --git a/osfmk/mach/mach_port.defs b/osfmk/mach/mach_port.defs index 8591f77d4..202b08bce 100644 --- a/osfmk/mach/mach_port.defs +++ b/osfmk/mach/mach_port.defs @@ -602,4 +602,13 @@ routine mach_port_unguard( #endif ); +/* + * Returns basic information about an IPC space. + * This call is only valid on MACH_IPC_DEBUG kernels. + * Otherwise, KERN_FAILURE is returned. + */ +routine mach_port_space_basic_info( + task : ipc_space_t; + out basic_info : ipc_info_space_basic_t); + /* vim: set ft=c : */ diff --git a/osfmk/mach/mach_time.h b/osfmk/mach/mach_time.h index de93d518d..16805896a 100644 --- a/osfmk/mach/mach_time.h +++ b/osfmk/mach/mach_time.h @@ -53,6 +53,7 @@ kern_return_t mach_wait_until( #endif /* KERNEL */ uint64_t mach_absolute_time(void); +uint64_t mach_approximate_time(void); __END_DECLS #endif /* _MACH_MACH_TIME_H_ */ diff --git a/osfmk/mach/mach_traps.h b/osfmk/mach/mach_traps.h index cfff70c09..c33a308f4 100644 --- a/osfmk/mach/mach_traps.h +++ b/osfmk/mach/mach_traps.h @@ -304,18 +304,6 @@ extern kern_return_t pid_for_task( mach_port_name_t t, int *x); -#if !defined(__LP64__) && !defined(__arm__) -/* these should go away altogether - so no 64 legacy please */ - -extern kern_return_t map_fd( - int fd, - vm_offset_t offset, - vm_offset_t *va, - boolean_t findspace, - vm_size_t size); - -#endif /* !defined(__LP64__) && !defined(__arm__) */ - #else /* KERNEL */ #ifdef XNU_KERNEL_PRIVATE @@ -325,9 +313,15 @@ extern kern_return_t map_fd( * The kernel may support multiple userspace ABIs, and must use * argument structures with elements large enough for any of them. */ +#if CONFIG_REQUIRES_U32_MUNGING #define PAD_(t) (sizeof(uint64_t) <= sizeof(t) \ ? 0 : sizeof(uint64_t) - sizeof(t)) #define PAD_ARG_8 +#else +#define PAD_(t) (sizeof(uint32_t) <= sizeof(t) \ + ? 0 : sizeof(uint32_t) - sizeof(t)) +#define PAD_ARG_8 char arg8_pad_[sizeof(uint32_t)]; +#endif #if BYTE_ORDER == LITTLE_ENDIAN #define PADL_(t) 0 @@ -347,33 +341,6 @@ extern kern_return_t map_fd( * traps, without calling out to the BSD system call mungers. */ -#if 0 /* no active architectures use this */ -void munge_w(const void *, void *); -void munge_ww(const void *, void *); -void munge_www(const void *, void *); -void munge_wwww(const void *, void *); -void munge_wwwww(const void *, void *); -void munge_wwwwww(const void *, void *); -void munge_wwwwwww(const void *, void *); -void munge_wwwwwwww(const void *, void *); -void munge_d(const void *, void *); -void munge_dd(const void *, void *); -void munge_ddd(const void *, void *); -void munge_dddd(const void *, void *); -void munge_ddddd(const void *, void *); -void munge_dddddd(const void *, void *); -void munge_ddddddd(const void *, void *); -void munge_dddddddd(const void *, void *); -void munge_l(const void *, void *); -void munge_lw(const void *, void *); -void munge_lwww(const void *, void *); -void munge_wl(const void *, void *); -void munge_wlw(const void *, void *); -void munge_wwwl(const void *, void *); -void munge_wwwwl(const void *, void *); -void munge_wwwwwl(const void *, void *); -#endif /* 0 */ - struct kern_invalid_args { int32_t dummy; }; @@ -469,16 +436,6 @@ struct semaphore_timedwait_signal_trap_args { extern kern_return_t semaphore_timedwait_signal_trap( struct semaphore_timedwait_signal_trap_args *args); -struct map_fd_args { - PAD_ARG_(int, fd); - PAD_ARG_(vm_offset_t, offset); - PAD_ARG_(vm_offset_t *, va); - PAD_ARG_(boolean_t, findspace); - PAD_ARG_(vm_size_t, size); -}; -extern kern_return_t map_fd( - struct map_fd_args *args); - struct task_for_pid_args { PAD_ARG_(mach_port_name_t, target_tport); PAD_ARG_(int, pid); diff --git a/osfmk/mach/mach_types.defs b/osfmk/mach/mach_types.defs index dddcbdeae..d3077945a 100644 --- a/osfmk/mach/mach_types.defs +++ b/osfmk/mach/mach_types.defs @@ -514,6 +514,53 @@ type task_suspension_token_t = mach_port_move_send_once_t #endif /* KERNEL_SERVER */ ; + +/* public voucher types */ + +/* Mach voucher object */ +type mach_voucher_t = mach_port_t; +type mach_voucher_name_t = mach_port_name_t; + +type mach_voucher_attr_manager_t = mach_port_t; +type mach_voucher_attr_control_t = mach_port_t; + +/* IPC voucher internal object */ +type ipc_voucher_t = mach_port_t +#if KERNEL_SERVER + intran: ipc_voucher_t convert_port_to_voucher(mach_port_t) + outtran: mach_port_t convert_voucher_to_port(ipc_voucher_t) + destructor: ipc_voucher_release(ipc_voucher_t) +#endif /* KERNEL_SERVER */ + ; + +/* IPC voucher attribute control internal object */ +type ipc_voucher_attr_control_t = mach_port_t +#if KERNEL_SERVER + intran: ipc_voucher_attr_control_t convert_port_to_voucher_attr_control(mach_port_t) + outtran: mach_port_t convert_voucher_attr_control_to_port(ipc_voucher_attr_control_t) + destructor: ipc_voucher_attr_control_release(ipc_voucher_attr_control_t) +#endif /* KERNEL_SERVER */ + ; + +type mach_voucher_attr_key_t = uint32_t; + +type mach_voucher_attr_command_t = uint32_t; +type mach_voucher_attr_recipe_command_t = uint32_t; + +type mach_voucher_attr_content_size_t = uint32_t; +type mach_voucher_attr_content_t = array[*:4096] of uint8_t; +type mach_voucher_attr_content_array_t = array[*:5120] of uint8_t; + +type mach_voucher_attr_raw_recipe_size_t = uint32_t; +type mach_voucher_attr_raw_recipe_t = array[*:4096] of uint8_t; +type mach_voucher_attr_raw_recipe_array_t = array[*:5120] of uint8_t; + +type mach_voucher_selector_t = uint32_t; + +type mach_voucher_attr_value_handle_t = uint64_t; +type mach_voucher_attr_value_handle_array_t = array[*:4] of mach_voucher_attr_value_handle_t; +type mach_voucher_attr_value_reference_t = uint32_t; + /* kernel module loader */ type kmod_t = int; type kmod_control_flavor_t = int; @@ -526,6 +573,7 @@ type UNDServerRef = mach_port_t; #if KERNEL_SERVER #ifdef MACH_KERNEL_PRIVATE +simport ; /* for voucher conversions */ simport ; /* for null conversion */ simport ; /* for task/thread conversion */ simport ; /* for host/processor/pset conversions */ diff --git a/osfmk/mach/mach_types.h b/osfmk/mach/mach_types.h index 2da7476a9..8bfb9c4d1 100644 --- a/osfmk/mach/mach_types.h +++ b/osfmk/mach/mach_types.h @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,7 @@ typedef struct task *task_t, *task_name_t, *task_suspension_token_t; typedef struct thread *thread_t, *thread_act_t; typedef struct ipc_space *ipc_space_t; +typedef struct coalition *coalition_t; typedef struct host *host_t; typedef struct host *host_priv_t; typedef struct host *host_security_t; @@ -130,6 +132,7 @@ typedef struct alarm *alarm_t; typedef struct clock *clock_serv_t; typedef struct clock *clock_ctrl_t; + /* * OBSOLETE: lock_set interfaces are obsolete. */ @@ -166,6 +169,7 @@ typedef mach_port_t task_suspension_token_t; typedef mach_port_t thread_t; typedef mach_port_t thread_act_t; typedef mach_port_t ipc_space_t; +typedef mach_port_t coalition_t; typedef mach_port_t host_t; typedef mach_port_t host_priv_t; typedef mach_port_t host_security_t; @@ -253,6 +257,7 @@ typedef exception_handler_array_t exception_port_arrary_t; #define TID_NULL ((uint64_t) 0) #define THR_ACT_NULL ((thread_act_t) 0) #define IPC_SPACE_NULL ((ipc_space_t) 0) +#define COALITION_NULL ((coalition_t) 0) #define HOST_NULL ((host_t) 0) #define HOST_PRIV_NULL ((host_priv_t)0) #define HOST_SECURITY_NULL ((host_security_t)0) diff --git a/osfmk/mach/mach_voucher.defs b/osfmk/mach/mach_voucher.defs new file mode 100644 index 000000000..6d370a5b2 --- /dev/null +++ b/osfmk/mach/mach_voucher.defs @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +subsystem +#if KERNEL_SERVER + KernelServer +#endif /* KERNEL_SERVER */ + mach_voucher 5400; + +#include +#include + +/* extract just the content data for a pair */ +routine mach_voucher_extract_attr_content( + voucher : ipc_voucher_t; + key : mach_voucher_attr_key_t; + out content : mach_voucher_attr_content_t, CountInOut); + +/* extract a recipe to reconstitue a pair item in a future voucher */ +routine mach_voucher_extract_attr_recipe( + voucher : ipc_voucher_t; + key : mach_voucher_attr_key_t; + out recipe : mach_voucher_attr_raw_recipe_t, CountInOut); + +/* extract a recipe array to reconstitue all the key values in a future voucher */ +routine mach_voucher_extract_all_attr_recipes( + voucher : ipc_voucher_t; + out recipes : mach_voucher_attr_raw_recipe_array_t, CountInOut); + +/* execute a command against a given voucher attribute */ +routine mach_voucher_attr_command( + voucher : ipc_voucher_t; + key : mach_voucher_attr_key_t; + command : mach_voucher_attr_command_t; + in_content : mach_voucher_attr_content_t; + out out_content : mach_voucher_attr_content_t, CountInOut); + +/* extract a recipe array to reconstitue all the key values in a future voucher */ +routine mach_voucher_debug_info( + task : ipc_space_t; + voucher_name: mach_port_name_t; + out recipes : mach_voucher_attr_raw_recipe_array_t, CountInOut); + +/* vim: set ft=c : */ diff --git a/osfmk/mach/mach_voucher_attr_control.defs b/osfmk/mach/mach_voucher_attr_control.defs new file mode 100644 index 000000000..89fb66cc8 --- /dev/null +++ b/osfmk/mach/mach_voucher_attr_control.defs @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +subsystem +#if KERNEL_SERVER + KernelServer +#endif /* KERNEL_SERVER */ + mach_voucher_attr_control 5600; + +#include +#include + +/* Extract the given voucher-control's value-handle from the supplied voucher */ +routine mach_voucher_attr_control_get_values( + control : ipc_voucher_attr_control_t; + voucher : ipc_voucher_t; + out value_handles : mach_voucher_attr_value_handle_array_t, CountInOut); + +/* Create a new voucher with the control's privilege (to directly assign value-handles) */ +routine mach_voucher_attr_control_create_mach_voucher( + control : ipc_voucher_attr_control_t; + recipes : mach_voucher_attr_raw_recipe_array_t; + out voucher : ipc_voucher_t); diff --git a/osfmk/mach/mach_voucher_types.h b/osfmk/mach/mach_voucher_types.h new file mode 100644 index 000000000..0f3d674bc --- /dev/null +++ b/osfmk/mach/mach_voucher_types.h @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _MACH_VOUCHER_TYPES_H_ +#define _MACH_VOUCHER_TYPES_H_ + +#include +#include + +/* + * Mach Voucher - an immutable collection of attribute value handles. + * + * The mach voucher is such that it can be passed between processes + * as a Mach port send right (by convention in the mach_msg_header_t’s + * msgh_voucher field). + * + * You may construct a new mach voucher by passing a construction + * recipe to host_create_mach_voucher(). The construction recipe supports + * generic commands for copying, removing, and redeeming attribute value + * handles from previous vouchers, or running attribute-mananger-specific + * commands within the recipe. + * + * Once the set of attribute value handles is constructed and returned, + * that set will not change for the life of the voucher (just because the + * attribute value handle itself doesn't change, the value the handle refers + * to is free to change at will). + */ +typedef mach_port_t mach_voucher_t; +#define MACH_VOUCHER_NULL ((mach_voucher_t) 0) + +typedef mach_port_name_t mach_voucher_name_t; +#define MACH_VOUCHER_NAME_NULL ((mach_voucher_name_t) 0) + +typedef mach_voucher_name_t *mach_voucher_name_array_t; +#define MACH_VOUCHER_NAME_ARRAY_NULL ((mach_voucher_name_array_t) 0) + +/* + * This type changes appearance between user-space and kernel. It is + * a port at user-space and a reference to an ipc_voucher structure in-kernel. + */ +#if !defined(KERNEL) +typedef mach_voucher_t ipc_voucher_t; +#else +#if !defined(MACH_KERNEL_PRIVATE) +struct ipc_voucher ; +#endif +typedef struct ipc_voucher *ipc_voucher_t; +#endif +#define IPC_VOUCHER_NULL ((ipc_voucher_t) 0) + +/* + * mach_voucher_selector_t - A means of specifying which thread/task value to extract - + * the current voucher set at this level, or a voucher representing + * the full [layered] effective value for the task/thread. + */ +typedef uint32_t mach_voucher_selector_t; +#define MACH_VOUCHER_SELECTOR_CURRENT ((mach_voucher_selector_t)0) +#define MACH_VOUCHER_SELECTOR_EFFECTIVE ((mach_voucher_selector_t)1) + + +/* + * mach_voucher_attr_key_t - The key used to identify a particular managed resource or + * to select the specific resource manager’s data associated + * with a given voucher. + */ +typedef uint32_t mach_voucher_attr_key_t; +typedef mach_voucher_attr_key_t *mach_voucher_attr_key_array_t; + +#define MACH_VOUCHER_ATTR_KEY_ALL ((mach_voucher_attr_key_t)~0) +#define MACH_VOUCHER_ATTR_KEY_NONE ((mach_voucher_attr_key_t)0) + +/* other well-known-keys will be added here */ +#define MACH_VOUCHER_ATTR_KEY_ATM ((mach_voucher_attr_key_t)1) +#define MACH_VOUCHER_ATTR_KEY_IMPORTANCE ((mach_voucher_attr_key_t)2) +#define MACH_VOUCHER_ATTR_KEY_BANK ((mach_voucher_attr_key_t)3) + +#define MACH_VOUCHER_ATTR_KEY_USER_DATA ((mach_voucher_attr_key_t)7) +#define MACH_VOUCHER_ATTR_KEY_BITS MACH_VOUCHER_ATTR_KEY_USER_DATA /* deprecated */ +#define MACH_VOUCHER_ATTR_KEY_TEST ((mach_voucher_attr_key_t)8) + +#define MACH_VOUCHER_ATTR_KEY_NUM_WELL_KNOWN MACH_VOUCHER_ATTR_KEY_TEST + +/* + * mach_voucher_attr_content_t + * + * Data passed to a resource manager for modifying an attribute + * value or returned from the resource manager in response to a + * request to externalize the current value for that attribute. + */ +typedef uint8_t *mach_voucher_attr_content_t; +typedef uint32_t mach_voucher_attr_content_size_t; + +/* + * mach_voucher_attr_command_t - The private verbs implemented by each voucher + * attribute manager via mach_voucher_attr_command(). + */ +typedef uint32_t mach_voucher_attr_command_t; + +/* + * mach_voucher_attr_recipe_command_t + * + * The verbs used to create/morph a voucher attribute value. + * We define some system-wide commands here - related to creation, and transport of + * vouchers and attributes. Additional commands can be defined by, and supported by, + * individual attribute resource managers. + */ +typedef uint32_t mach_voucher_attr_recipe_command_t; +typedef mach_voucher_attr_recipe_command_t *mach_voucher_attr_recipe_command_array_t; + +#define MACH_VOUCHER_ATTR_NOOP ((mach_voucher_attr_recipe_command_t)0) +#define MACH_VOUCHER_ATTR_COPY ((mach_voucher_attr_recipe_command_t)1) +#define MACH_VOUCHER_ATTR_REMOVE ((mach_voucher_attr_recipe_command_t)2) +#define MACH_VOUCHER_ATTR_SET_VALUE_HANDLE ((mach_voucher_attr_recipe_command_t)3) + +/* redeem is on its way out? */ +#define MACH_VOUCHER_ATTR_REDEEM ((mach_voucher_attr_recipe_command_t)10) + +/* recipe command(s) for importance attribute manager */ +#define MACH_VOUCHER_ATTR_IMPORTANCE_SELF ((mach_voucher_attr_recipe_command_t)200) + +/* recipe command(s) for bit-store attribute manager */ +#define MACH_VOUCHER_ATTR_USER_DATA_STORE ((mach_voucher_attr_recipe_command_t)211) +#define MACH_VOUCHER_ATTR_BITS_STORE MACH_VOUCHER_ATTR_USER_DATA_STORE /* deprecated */ + +/* recipe command(s) for test attribute manager */ +#define MACH_VOUCHER_ATTR_TEST_STORE MACH_VOUCHER_ATTR_USER_DATA_STORE + +/* + * mach_voucher_attr_recipe_t + * + * An element in a recipe list to create a voucher. + */ +#pragma pack(1) + +typedef struct mach_voucher_attr_recipe_data { + mach_voucher_attr_key_t key; + mach_voucher_attr_recipe_command_t command; + mach_voucher_name_t previous_voucher; + mach_voucher_attr_content_size_t content_size; + uint8_t content[]; +} mach_voucher_attr_recipe_data_t; +typedef mach_voucher_attr_recipe_data_t *mach_voucher_attr_recipe_t; +typedef mach_msg_type_number_t mach_voucher_attr_recipe_size_t; + +/* Make the above palatable to MIG */ +typedef uint8_t *mach_voucher_attr_raw_recipe_t; +typedef mach_voucher_attr_raw_recipe_t mach_voucher_attr_raw_recipe_array_t; +typedef mach_msg_type_number_t mach_voucher_attr_raw_recipe_size_t; +typedef mach_msg_type_number_t mach_voucher_attr_raw_recipe_array_size_t; + +#pragma pack() + +/* + * VOUCHER ATTRIBUTE MANAGER Writer types + */ + +/* + * mach_voucher_attr_manager_t + * + * A handle through which the mach voucher mechanism communicates with the voucher + * attribute manager for a given attribute key. + */ +typedef mach_port_t mach_voucher_attr_manager_t; +#define MACH_VOUCHER_ATTR_MANAGER_NULL ((mach_voucher_attr_manager_t) 0) + +/* + * mach_voucher_attr_control_t + * + * A handle provided to the voucher attribute manager for a given attribute key + * through which it makes inquiries or control operations of the mach voucher mechanism. + */ +typedef mach_port_t mach_voucher_attr_control_t; +#define MACH_VOUCHER_ATTR_CONTROL_NULL ((mach_voucher_attr_control_t) 0) + +/* + * These types are different in-kernel vs user-space. They are ports in user-space, + * pointers to opaque structs in most of the kernel, and pointers to known struct + * types in the Mach portion of the kernel. + */ +#if !defined(KERNEL) +typedef mach_port_t ipc_voucher_attr_manager_t; +typedef mach_port_t ipc_voucher_attr_control_t; +#else +#if !defined(MACH_KERNEL_PRIVATE) +struct ipc_voucher_attr_manager ; +struct ipc_voucher_attr_control ; +#endif +typedef struct ipc_voucher_attr_manager *ipc_voucher_attr_manager_t; +typedef struct ipc_voucher_attr_control *ipc_voucher_attr_control_t; +#endif +#define IPC_VOUCHER_ATTR_MANAGER_NULL ((ipc_voucher_attr_manager_t) 0) +#define IPC_VOUCHER_ATTR_CONTROL_NULL ((ipc_voucher_attr_control_t) 0) + +/* + * mach_voucher_attr_value_handle_t + * + * The private handle that the voucher attribute manager provides to + * the mach voucher mechanism to represent a given attr content/value. + */ +typedef uint64_t mach_voucher_attr_value_handle_t; +typedef mach_voucher_attr_value_handle_t *mach_voucher_attr_value_handle_array_t; + +typedef mach_msg_type_number_t mach_voucher_attr_value_handle_array_size_t; +#define MACH_VOUCHER_ATTR_VALUE_MAX_NESTED ((mach_voucher_attr_value_handle_array_size_t)4) + +typedef uint32_t mach_voucher_attr_value_reference_t; + +/* USE - TBD */ +typedef uint32_t mach_voucher_attr_control_flags_t; +#define MACH_VOUCHER_ATTR_CONTROL_FLAGS_NONE ((mach_voucher_attr_control_flags_t)0) + +/* + * Commands and types for the IPC Importance Attribute Manager + * + * These are the valid mach_voucher_attr_command() options with the + * MACH_VOUCHER_ATTR_KEY_IMPORTANCE key. + */ +#define MACH_VOUCHER_IMPORTANCE_ATTR_ADD_EXTERNAL 1 /* Add some number of external refs (not supported) */ +#define MACH_VOUCHER_IMPORTANCE_ATTR_DROP_EXTERNAL 2 /* Drop some number of external refs */ +typedef uint32_t mach_voucher_attr_importance_refs; + +#endif /* _MACH_VOUCHER_TYPES_H_ */ diff --git a/osfmk/mach/machine.h b/osfmk/mach/machine.h index 4b9f35824..03e4f646a 100644 --- a/osfmk/mach/machine.h +++ b/osfmk/mach/machine.h @@ -150,6 +150,7 @@ __END_DECLS #define CPU_TYPE_MC98000 ((cpu_type_t) 10) #define CPU_TYPE_HPPA ((cpu_type_t) 11) #define CPU_TYPE_ARM ((cpu_type_t) 12) +#define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) #define CPU_TYPE_MC88000 ((cpu_type_t) 13) #define CPU_TYPE_SPARC ((cpu_type_t) 14) #define CPU_TYPE_I860 ((cpu_type_t) 15) @@ -274,6 +275,7 @@ __END_DECLS #define CPU_SUBTYPE_X86_ALL ((cpu_subtype_t)3) #define CPU_SUBTYPE_X86_64_ALL ((cpu_subtype_t)3) #define CPU_SUBTYPE_X86_ARCH1 ((cpu_subtype_t)4) +#define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell feature subset */ #define CPU_THREADTYPE_INTEL_HTT ((cpu_threadtype_t) 1) @@ -352,11 +354,19 @@ __END_DECLS #define CPU_SUBTYPE_ARM_V7 ((cpu_subtype_t) 9) #define CPU_SUBTYPE_ARM_V7F ((cpu_subtype_t) 10) /* Cortex A9 */ #define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t) 11) /* Swift */ -#define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t) 12) /* Kirkwood40 */ +#define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t) 12) #define CPU_SUBTYPE_ARM_V6M ((cpu_subtype_t) 14) /* Not meant to be run under xnu */ #define CPU_SUBTYPE_ARM_V7M ((cpu_subtype_t) 15) /* Not meant to be run under xnu */ #define CPU_SUBTYPE_ARM_V7EM ((cpu_subtype_t) 16) /* Not meant to be run under xnu */ +#define CPU_SUBTYPE_ARM_V8 ((cpu_subtype_t) 13) + +/* + * ARM64 subtypes + */ +#define CPU_SUBTYPE_ARM64_ALL ((cpu_subtype_t) 0) +#define CPU_SUBTYPE_ARM64_V8 ((cpu_subtype_t) 1) + #endif /* !__ASSEMBLER__ */ /* @@ -389,7 +399,9 @@ __END_DECLS #define CPUFAMILY_ARM_12 0xbd1b0ae9 #define CPUFAMILY_ARM_13 0x0cc90e64 #define CPUFAMILY_ARM_14 0x96077ef1 +#define CPUFAMILY_ARM_15 0xa8511bca #define CPUFAMILY_ARM_SWIFT 0x1e2d6381 +#define CPUFAMILY_ARM_CYCLONE 0x37a09642 /* The following synonyms are deprecated: */ #define CPUFAMILY_INTEL_6_14 CPUFAMILY_INTEL_YONAH diff --git a/osfmk/mach/machine/machine_types.defs b/osfmk/mach/machine/machine_types.defs index 2ed0d52fd..f48139487 100644 --- a/osfmk/mach/machine/machine_types.defs +++ b/osfmk/mach/machine/machine_types.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,18 +25,106 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +/* + * @OSF_COPYRIGHT@ + */ + +/* + * Header file for basic, machine-dependent data types. arm+i386 version. + */ + +#ifndef _MACH_MACHINE_MACHNINE_TYPES_DEFS +#define _MACH_MACHINE_MACHNINE_TYPES_DEFS -#ifndef _MACH_MACHINE_MACHINE_TYPES_DEFS -#define _MACH_MACHINE_MACHINE_TYPES_DEFS +type short = int16_t; +type int = int32_t; +type unsigned = uint32_t; -#if defined (__i386__) || defined(__x86_64__) -#include "mach/i386/machine_types.defs" -#elif defined (__arm__) -#include "mach/arm/machine_types.defs" +type float = MACH_MSG_TYPE_REAL_32; +type double = MACH_MSG_TYPE_REAL_64; + + +/* from ISO/IEC 988:1999 spec */ +/* 7.18.1.4 Integer types capable of hgolding object pointers */ +/* + * The [u]intptr_t types for the native + * integer type, e.g. 32 or 64 or.. whatever + * register size the machine has. They are + * used for entities that might be either + * [unsigned] integers or pointers, and for + * type-casting between the two. + * + * For instance, the IPC system represents + * a port in user space as an integer and + * in kernel space as a pointer. + */ +#if defined(__LP64__) +type uintptr_t = uint64_t; +type intptr_t = int64_t; +#else +type uintptr_t = uint32_t; +type intptr_t = int32_t; +#endif + +/* + * These are the legacy Mach types that are + * the [rough] equivalents of the standards above. + * They were defined in terms of int, not + * long int, so they remain separate. + */ +#if defined(__LP64__) +type register_t = int64_t; #else -#error architecture not supported +type register_t = int32_t; #endif +type integer_t = int32_t; +type natural_t = uint32_t; + +/* + * These are the VM types that scale with the address + * space size of a given process. + */ + +#if defined(__LP64__) +type vm_address_t = uint64_t; +type vm_offset_t = uint64_t; +type vm_size_t = uint64_t; +#else +type vm_address_t = natural_t; +type vm_offset_t = natural_t; +type vm_size_t = natural_t; +#endif + +/* This is a bit of a hack for arm. We implement the backend with a wide type, but present a native-sized type to callers */ +type mach_port_context_t = uint64_t; + +/* + * The mach_vm_xxx_t types are sized to hold the + * maximum pointer, offset, etc... supported on the + * platform. + */ +type mach_vm_address_t = uint64_t; +type mach_vm_offset_t = uint64_t; +type mach_vm_size_t = uint64_t; + +#if MACH_IPC_COMPAT +/* + * For the old IPC interface + */ +#define MSG_TYPE_PORT_NAME natural_t + +#endif /* MACH_IPC_COMPAT */ + +/* + * These are types used internal to Mach to implement the + * legacy 32-bit VM APIs published by the kernel. + */ +#define VM32_SUPPORT 1 + +type vm32_address_t = uint32_t; +type vm32_offset_t = uint32_t; +type vm32_size_t = uint32_t; -#endif /* _MACH_MACHINE_THREAD_STATUS_H_ */ +#endif /* _MACH_MACHINE_MACHNINE_TYPES_DEFS */ /* vim: set ft=c : */ diff --git a/osfmk/mach/memory_object.defs b/osfmk/mach/memory_object.defs index 01afb30a0..e6f93082b 100644 --- a/osfmk/mach/memory_object.defs +++ b/osfmk/mach/memory_object.defs @@ -71,10 +71,6 @@ subsystem #endif /* KERNEL_SERVER */ memory_object 2200; -#ifdef MACH_KERNEL -#include -#endif /* MACH_KERNEL */ - #include #include diff --git a/osfmk/mach/memory_object_control.defs b/osfmk/mach/memory_object_control.defs index 0b2a76ad8..e13d85b0e 100644 --- a/osfmk/mach/memory_object_control.defs +++ b/osfmk/mach/memory_object_control.defs @@ -71,10 +71,6 @@ subsystem #endif /* KERNEL_SERVER */ memory_object_control 2000; -#ifdef MACH_KERNEL -#include -#endif /* MACH_KERNEL */ - #include #include diff --git a/osfmk/mach/memory_object_name.defs b/osfmk/mach/memory_object_name.defs deleted file mode 100644 index c4d4b77df..000000000 --- a/osfmk/mach/memory_object_name.defs +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_FREE_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ -/* - * File: mach/mach_port.defs - * Author: Rich Draves - * - * Exported kernel calls. - */ - -subsystem -#if KERNEL_SERVER - KernelServer -#endif /* KERNEL_SERVER */ - memory_object_name 2600; - -#include -#include - -/* - * References to memory_object_name objects are returned by: - * vm_region(vm_map_t,...) - * - * These are used simply to compare one mapping againsts another - * and have no methods. - */ - -/* vim: set ft=c : */ diff --git a/osfmk/mach/memory_object_types.h b/osfmk/mach/memory_object_types.h index 96abd8026..ab6361793 100644 --- a/osfmk/mach/memory_object_types.h +++ b/osfmk/mach/memory_object_types.h @@ -399,12 +399,16 @@ typedef struct memory_object_attr_info memory_object_attr_info_data_t; * each of those pages. */ #ifdef PRIVATE -#define MAX_UPL_TRANSFER 256 -#define MAX_UPL_SIZE 8192 +#define MAX_UPL_TRANSFER_BYTES (1024 * 1024) +#define MAX_UPL_SIZE_BYTES (1024 * 1024 * 64) + +#define MAX_UPL_SIZE (MAX_UPL_SIZE_BYTES / PAGE_SIZE) +#define MAX_UPL_TRANSFER (MAX_UPL_TRANSFER_BYTES / PAGE_SIZE) + struct upl_page_info { ppnum_t phys_addr; /* physical page index number */ - unsigned int + unsigned int #ifdef XNU_KERNEL_PRIVATE pageout:1, /* page is to be removed on commit */ absent:1, /* No valid data in this page */ @@ -628,6 +632,9 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #ifdef PRIVATE +#define UPL_REPRIO_INFO_MASK (0xFFFFFFFF) +#define UPL_REPRIO_INFO_SHIFT 32 + /* access macros for upl_t */ #define UPL_DEVICE_PAGE(upl) \ @@ -662,6 +669,12 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ (((upl)[(index)].phys_addr != 0) ? \ ((upl)[(index)].pageout = FALSE) : FALSE) +#define UPL_REPRIO_INFO_BLKNO(upl, index) \ + (((upl)->upl_reprio_info[(index)]) & UPL_REPRIO_INFO_MASK) + +#define UPL_REPRIO_INFO_LEN(upl, index) \ + ((((upl)->upl_reprio_info[(index)]) >> UPL_REPRIO_INFO_SHIFT) & UPL_REPRIO_INFO_MASK) + /* modifier macros for upl_t */ #define UPL_SET_CS_VALIDATED(upl, index, value) \ @@ -670,6 +683,10 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #define UPL_SET_CS_TAINTED(upl, index, value) \ ((upl)[(index)].cs_tainted = ((value) ? TRUE : FALSE)) +#define UPL_SET_REPRIO_INFO(upl, index, blkno, len) \ + ((upl)->upl_reprio_info[(index)]) = (((uint64_t)(blkno) & UPL_REPRIO_INFO_MASK) | \ + (((uint64_t)(len) & UPL_REPRIO_INFO_MASK) << UPL_REPRIO_INFO_SHIFT)) + /* The call prototyped below is used strictly by UPL_GET_INTERNAL_PAGE_LIST */ extern vm_size_t upl_offset_to_pagelist; @@ -704,6 +721,10 @@ extern boolean_t upl_speculative_page(upl_page_info_t *upl, int index); extern void upl_clear_dirty(upl_t upl, boolean_t value); extern void upl_set_referenced(upl_t upl, boolean_t value); extern void upl_range_needed(upl_t upl, int index, int count); +#if CONFIG_IOSCHED +extern int64_t upl_blkno(upl_page_info_t *upl, int index); +extern void upl_set_blkno(upl_t upl, vm_offset_t upl_offset, int size, int64_t blkno); +#endif __END_DECLS @@ -715,6 +736,8 @@ extern boolean_t upl_page_present(upl_page_info_t *upl, int index); extern boolean_t upl_dirty_page(upl_page_info_t *upl, int index); extern boolean_t upl_valid_page(upl_page_info_t *upl, int index); extern void upl_deallocate(upl_t upl); +extern void upl_mark_decmp(upl_t upl); +extern void upl_unmark_decmp(upl_t upl); __END_DECLS diff --git a/osfmk/mach/message.h b/osfmk/mach/message.h index 7c1f16164..4bf640d23 100644 --- a/osfmk/mach/message.h +++ b/osfmk/mach/message.h @@ -107,10 +107,17 @@ typedef natural_t mach_msg_timeout_t; * (Ie, an error may or may not be produced.) * * The value of MACH_MSGH_BITS_REMOTE determines the interpretation - * of the msgh_remote_port field. It is handled like a msgt_name. + * of the msgh_remote_port field. It is handled like a msgt_name, + * but must result in a send or send-once type right. * * The value of MACH_MSGH_BITS_LOCAL determines the interpretation - * of the msgh_local_port field. It is handled like a msgt_name. + * of the msgh_local_port field. It is handled like a msgt_name, + * and also must result in a send or send-once type right. + * + * The value of MACH_MSGH_BITS_VOUCHER determines the interpretation + * of the msgh_voucher_port field. It is handled like a msgt_name, + * but must result in a send right (and the msgh_voucher_port field + * must be the name of a send right to a Mach voucher kernel object. * * MACH_MSGH_BITS() combines two MACH_MSG_TYPE_* values, for the remote * and local fields, into a single value suitable for msgh_bits. @@ -122,30 +129,69 @@ typedef natural_t mach_msg_timeout_t; */ #define MACH_MSGH_BITS_ZERO 0x00000000 -#define MACH_MSGH_BITS_REMOTE_MASK 0x000000ff -#define MACH_MSGH_BITS_LOCAL_MASK 0x0000ff00 -#define MACH_MSGH_BITS_COMPLEX 0x80000000U -#define MACH_MSGH_BITS_USER 0x8000ffffU -#define MACH_MSGH_BITS_CIRCULAR 0x40000000 /* internal use only */ -#define MACH_MSGH_BITS_RAISEIMP 0x20000000 /* importance raise, internal use only */ -#define MACH_MSGH_BITS_IMPHOLDASRT 0x10000000 /* holds assertion alredy, in userland */ -#define MACH_MSGH_BITS_USED 0xf000ffffU +#define MACH_MSGH_BITS_REMOTE_MASK 0x0000001f +#define MACH_MSGH_BITS_LOCAL_MASK 0x00001f00 +#define MACH_MSGH_BITS_VOUCHER_MASK 0x001f0000 + +#define MACH_MSGH_BITS_PORTS_MASK \ + (MACH_MSGH_BITS_REMOTE_MASK | \ + MACH_MSGH_BITS_LOCAL_MASK | \ + MACH_MSGH_BITS_VOUCHER_MASK) + +#define MACH_MSGH_BITS_COMPLEX 0x80000000U /* message is complex */ + +#define MACH_MSGH_BITS_USER 0x801f1f1fU /* allowed bits user->kernel */ + +#define MACH_MSGH_BITS_RAISEIMP 0x20000000U /* importance raised due to msg */ +#define MACH_MSGH_BITS_DENAP MACH_MSGH_BITS_RAISEIMP -#define MACH_MSGH_BITS_PORTS_MASK \ - (MACH_MSGH_BITS_REMOTE_MASK|MACH_MSGH_BITS_LOCAL_MASK) +#define MACH_MSGH_BITS_IMPHOLDASRT 0x10000000U /* assertion help, userland private */ +#define MACH_MSGH_BITS_DENAPHOLDASRT MACH_MSGH_BITS_IMPHOLDASRT -#define MACH_MSGH_BITS(remote, local) \ +#define MACH_MSGH_BITS_CIRCULAR 0x10000000U /* message circular, kernel private */ + +#define MACH_MSGH_BITS_USED 0xb01f1f1fU + +/* setter macros for the bits */ +#define MACH_MSGH_BITS(remote, local) /* legacy */ \ ((remote) | ((local) << 8)) +#define MACH_MSGH_BITS_SET_PORTS(remote, local, voucher) \ + (((remote) & MACH_MSGH_BITS_REMOTE_MASK) | \ + (((local) << 8) & MACH_MSGH_BITS_LOCAL_MASK) | \ + (((voucher) << 16) & MACH_MSGH_BITS_VOUCHER_MASK)) +#define MACH_MSGH_BITS_SET(remote, local, voucher, other) \ + (MACH_MSGH_BITS_SET_PORTS((remote), (local), (voucher)) \ + | ((other) &~ MACH_MSGH_BITS_PORTS_MASK)) + +/* getter macros for pulling values out of the bits field */ #define MACH_MSGH_BITS_REMOTE(bits) \ ((bits) & MACH_MSGH_BITS_REMOTE_MASK) #define MACH_MSGH_BITS_LOCAL(bits) \ (((bits) & MACH_MSGH_BITS_LOCAL_MASK) >> 8) +#define MACH_MSGH_BITS_VOUCHER(bits) \ + (((bits) & MACH_MSGH_BITS_VOUCHER_MASK) >> 16) #define MACH_MSGH_BITS_PORTS(bits) \ - ((bits) & MACH_MSGH_BITS_PORTS_MASK) + ((bits) & MACH_MSGH_BITS_PORTS_MASK) #define MACH_MSGH_BITS_OTHER(bits) \ ((bits) &~ MACH_MSGH_BITS_PORTS_MASK) +/* checking macros */ +#define MACH_MSGH_BITS_HAS_REMOTE(bits) \ + (MACH_MSGH_BITS_REMOTE(bits) != MACH_MSGH_BITS_ZERO) +#define MACH_MSGH_BITS_HAS_LOCAL(bits) \ + (MACH_MSGH_BITS_LOCAL(bits) != MACH_MSGH_BITS_ZERO) +#define MACH_MSGH_BITS_HAS_VOUCHER(bits) \ + (MACH_MSGH_BITS_VOUCHER(bits) != MACH_MSGH_BITS_ZERO) +#define MACH_MSGH_BITS_IS_COMPLEX(bits) \ + (((bits) & MACH_MSGH_BITS_COMPLEX) != MACH_MSGH_BITS_ZERO) + +/* importance checking macros */ +#define MACH_MSGH_BITS_RAISED_IMPORTANCE(bits) \ + (((bits) & MACH_MSGH_BITS_RAISEIMP) != MACH_MSGH_BITS_ZERO) +#define MACH_MSGH_BITS_HOLDS_IMPORTANCE_ASSERTION(bits) \ + (((bits) & MACH_MSGH_BITS_IMPHOLDASRT) != MACH_MSGH_BITS_ZERO) + /* * Every message starts with a message header. * Following the message header, if the message is complex, are a count @@ -162,10 +208,9 @@ typedef natural_t mach_msg_timeout_t; * to reply to the message. It may carry the values MACH_PORT_NULL, * MACH_PORT_DEAD, a send-once right, or a send right. * - * The msgh_seqno field carries a sequence number associated with the - * received-from port. A port's sequence number is incremented every - * time a message is received from it. In sent messages, the field's - * value is ignored. + * The msgh_voucher_port field specifies a Mach voucher port. Only + * send rights to kernel-implemented Mach Voucher kernel objects in + * addition to MACH_PORT_NULL or MACH_PORT_DEAD may be passed. * * The msgh_id field is uninterpreted by the message primitives. * It normally carries information specifying the format @@ -181,13 +226,16 @@ typedef integer_t mach_msg_id_t; typedef unsigned int mach_msg_type_name_t; -#define MACH_MSG_TYPE_MOVE_RECEIVE 16 /* Must hold receive rights */ -#define MACH_MSG_TYPE_MOVE_SEND 17 /* Must hold send rights */ -#define MACH_MSG_TYPE_MOVE_SEND_ONCE 18 /* Must hold sendonce rights */ -#define MACH_MSG_TYPE_COPY_SEND 19 /* Must hold send rights */ -#define MACH_MSG_TYPE_MAKE_SEND 20 /* Must hold receive rights */ -#define MACH_MSG_TYPE_MAKE_SEND_ONCE 21 /* Must hold receive rights */ -#define MACH_MSG_TYPE_COPY_RECEIVE 22 /* Must hold receive rights */ +#define MACH_MSG_TYPE_MOVE_RECEIVE 16 /* Must hold receive right */ +#define MACH_MSG_TYPE_MOVE_SEND 17 /* Must hold send right(s) */ +#define MACH_MSG_TYPE_MOVE_SEND_ONCE 18 /* Must hold sendonce right */ +#define MACH_MSG_TYPE_COPY_SEND 19 /* Must hold send right(s) */ +#define MACH_MSG_TYPE_MAKE_SEND 20 /* Must hold receive right */ +#define MACH_MSG_TYPE_MAKE_SEND_ONCE 21 /* Must hold receive right */ +#define MACH_MSG_TYPE_COPY_RECEIVE 22 /* NOT VALID */ +#define MACH_MSG_TYPE_DISPOSE_RECEIVE 24 /* must hold receive right */ +#define MACH_MSG_TYPE_DISPOSE_SEND 25 /* must hold send right(s) */ +#define MACH_MSG_TYPE_DISPOSE_SEND_ONCE 26 /* must hold sendonce right */ typedef unsigned int mach_msg_copy_options_t; @@ -356,11 +404,12 @@ typedef struct mach_msg_size_t msgh_size; mach_port_t msgh_remote_port; mach_port_t msgh_local_port; - mach_msg_size_t msgh_reserved; + mach_port_name_t msgh_voucher_port; mach_msg_id_t msgh_id; } mach_msg_header_t; -#define MACH_MSG_NULL (mach_msg_header_t *) 0 +#define msgh_reserved msgh_voucher_port +#define MACH_MSG_NULL (mach_msg_header_t *) 0 typedef struct { @@ -381,6 +430,15 @@ typedef struct mach_msg_trailer_size_t msgh_trailer_size; } mach_msg_trailer_t; +/* + * The msgh_seqno field carries a sequence number + * associated with the received-from port. A port's + * sequence number is incremented every time a message + * is received from it and included in the received + * trailer to help put messages back in sequence if + * multiple threads receive and/or process received + * messages. + */ typedef struct { mach_msg_trailer_type_t msgh_trailer_type; @@ -434,6 +492,8 @@ typedef struct mach_port_context_t msgh_context; } mach_msg_context_trailer_t; + + typedef struct { mach_port_name_t sender; @@ -456,6 +516,7 @@ typedef struct msg_labels_t msgh_labels; } mach_msg_mac_trailer_t; + #define MACH_MSG_TRAILER_MINIMUM_SIZE sizeof(mach_msg_trailer_t) /* @@ -601,12 +662,14 @@ typedef integer_t mach_msg_option_t; #define MACH_SEND_ALWAYS 0x00010000 /* ignore qlimits - kernel only */ #define MACH_SEND_TRAILER 0x00020000 /* sender-provided trailer */ #define MACH_SEND_NOIMPORTANCE 0x00040000 /* msg won't carry importance */ +#define MACH_SEND_NODENAP MACH_SEND_NOIMPORTANCE #define MACH_SEND_IMPORTANCE 0x00080000 /* msg carries importance - kernel only */ #define MACH_RCV_TIMEOUT 0x00000100 /* timeout value applies to receive */ #define MACH_RCV_NOTIFY 0x00000200 /* reserved - legacy */ #define MACH_RCV_INTERRUPT 0x00000400 /* don't restart interrupted receive */ +#define MACH_RCV_VOUCHER 0x00000800 /* willing to receive voucher port */ #define MACH_RCV_OVERWRITE 0x00001000 /* scatter receive */ /* @@ -641,7 +704,7 @@ typedef integer_t mach_msg_option_t; #define MACH_RCV_USER (MACH_RCV_MSG | \ MACH_RCV_TIMEOUT | MACH_RCV_OVERWRITE | \ MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY | \ - MACH_RCV_TRAILER_MASK) + MACH_RCV_VOUCHER | MACH_RCV_TRAILER_MASK) #define MACH_MSG_OPTION_USER (MACH_SEND_USER | MACH_RCV_USER) @@ -732,6 +795,8 @@ typedef kern_return_t mach_msg_return_t; /* Bogus destination port. */ #define MACH_SEND_TIMED_OUT 0x10000004 /* Message not sent before timeout expired. */ +#define MACH_SEND_INVALID_VOUCHER 0x10000005 + /* Bogus voucher port. */ #define MACH_SEND_INTERRUPTED 0x10000007 /* Software interrupt. */ #define MACH_SEND_MSG_TOO_SMALL 0x10000008 @@ -840,6 +905,15 @@ extern mach_msg_return_t mach_msg( mach_msg_timeout_t timeout, mach_port_name_t notify); +/* + * Routine: mach_voucher_deallocate + * Purpose: + * Deallocate a mach voucher created or received in a message. Drops + * one (send right) reference to the voucher. + */ +extern kern_return_t mach_voucher_deallocate( + mach_port_name_t voucher); + #elif defined(MACH_KERNEL_PRIVATE) extern mach_msg_return_t mach_msg_receive_results(void); diff --git a/osfmk/mach/mig_voucher_support.h b/osfmk/mach/mig_voucher_support.h new file mode 100644 index 000000000..064755ab6 --- /dev/null +++ b/osfmk/mach/mig_voucher_support.h @@ -0,0 +1,8 @@ +//This dummy header file is created for mig to check when to include voucher code. +//Mig checks if this file is available to include and knows that Libsyscall has the new voucher symbols to link to. +//Do not delete this file, mig will stop including vouchers code. + +#ifndef __MACH_MIG_VOUCHER_SUPPORT__ +#define __MACH_MIG_VOUCHER_SUPPORT__ + +#endif // __MACH_MIG_VOUCHER_SUPPORT__ diff --git a/osfmk/mach/port.h b/osfmk/mach/port.h index f9594bfb8..55735bc8a 100644 --- a/osfmk/mach/port.h +++ b/osfmk/mach/port.h @@ -348,6 +348,7 @@ typedef int mach_port_flavor_t; #define MACH_PORT_DNREQUESTS_SIZE 3 /* info is int */ #define MACH_PORT_TEMPOWNER 4 /* indicates receive right will be reassigned to another task */ #define MACH_PORT_IMPORTANCE_RECEIVER 5 /* indicates recieve right accepts priority donation */ +#define MACH_PORT_DENAP_RECEIVER 6 /* indicates receive right accepts de-nap donation */ #define MACH_PORT_INFO_EXT 7 /* uses mach_port_info_ext_t */ #define MACH_PORT_LIMITS_INFO_COUNT ((natural_t) \ @@ -382,7 +383,7 @@ typedef struct mach_port_qos { #define MPO_IMPORTANCE_RECEIVER 0x08 /* Mark the port as importance receiver */ #define MPO_INSERT_SEND_RIGHT 0x10 /* Insert a send right for the port */ #define MPO_STRICT 0x20 /* Apply strict guarding for port */ - +#define MPO_DENAP_RECEIVER 0x40 /* Mark the port as App de-nap receiver */ /* * Structure to define optional attributes for a newly * constructed port. diff --git a/osfmk/mach/security.defs b/osfmk/mach/security.defs deleted file mode 100644 index 8d27ae1a8..000000000 --- a/osfmk/mach/security.defs +++ /dev/null @@ -1,208 +0,0 @@ - -/* -*- C++ -*- */ - -subsystem -#if KERNEL_SERVER - KernelServer -#endif /* KERNEL_SERVER */ - security 5200; - -#include -#include - -type labelstr_t = c_string[*:512]; - -/** - @brief Retrieve a task label as a label handle - @param task Target's task port - @param label Returned label handle - - This call retrieves the label handle of the specified task. - - @return Standard MiG return values (0 for success) -*/ - -routine mach_get_task_label(task : ipc_space_t; - out label : mach_port_name_t); - -/** - @brief Retrieve a task label in textual form - @param task Target's task port - @param policies Comma-delimited list of policies to query - @param label Returned label text - - This call retrieves an externalized task label for the - specified task, with respect to the specified policies. - - @return Standard MiG return values (0 for success) -*/ - -routine mach_get_task_label_text(task : ipc_space_t; - policies : labelstr_t; - out label : labelstr_t); - -/** - @brief Retrieve a port label as a label handle - @param task Issuer's task port - @param port Port to query label from - @param label Returned label handle - - This call retrieves a new label handle for the specified port. - If the port represents a label handle, KERN_INVALID_ARGUMENT is - returned. - - @return Standard MiG return values (0 for success) -*/ - -routine mach_get_label(task : ipc_space_t; - port : mach_port_name_t; - out label : mach_port_name_t); - -/** - @brief Retrieve a port label in textual form - @param task Issuer's task port - @param name Port to query label from - @param policies Comma-delimited list of policies to query - @param label Returned label text - - This call retrieves an externalized port label for the specified port, - with respect to the specified policies. If the port represents a label - handle, the returned label text refers to the stored label and not the - access control label. - - @return Standard MiG return values (0 for success) -*/ - -routine mach_get_label_text(task : ipc_space_t; - name : mach_port_name_t; - policies : labelstr_t; - out label : labelstr_t); - -/** - @brief Relabel a port - @param task Task containing specified ports - @param name Port to relabel - @param label String representation of new label - - This call attempts to relabel the specified port to the - label specified. For label handles, it changes the access control - label and not the stored label. - - @return Standard MiG return values (0 for success) -*/ - -routine mach_set_port_label(task : ipc_space_t; - name : mach_port_name_t; - label : labelstr_t); - -/** - @brief Generic access control check - @param task Any task port - @param subj subject label in textual form - @param obj object label in textual form - @param serv Service or object class name - @param perm Permission, or method, within the specified service - - This function provides a general way for a user process to query - an arbitrary access control decision from the system's security policies. - Currently, there are no standards for the format of the service and - permission names. - - @return Standard MiG return values (0 for success) -*/ - -routine mac_check_service(task : ipc_space_t; - subject : labelstr_t; - object : labelstr_t; - service : labelstr_t; - perm : labelstr_t); - -/** - @brief Generic access control check - @param task Task containing specified ports (usually caller's) - @param subj subject label in textual form - @param obj port containing object label - @param serv Service or object class name - @param perm Permission, or method, within the specified service - - This function provides a general way for a user process to query - an arbitrary access control decision from the system's security policies. - Currently, there are no standards for the format of the service and - permission names. If the port is a label handle, the stored label is - used. Otherwise, its access control label is used. - - @return Standard MiG return values (0 for success) -*/ - -routine mac_port_check_service_obj(task : ipc_space_t; - subject : labelstr_t; - object : mach_port_name_t; - service : labelstr_t; - perm : labelstr_t); - -/** - @brief Generic access control check - @param task Task containing specified ports (usually caller's) - @param subj port containing subject label - @param obj port containing object label - @param serv Service or object class name - @param perm Permission, or method, within the specified service - - This function provides a general way for a user process to query - an arbitrary access control decision from the system's security policies. - Currently, there are no standards for the format of the service and - permission names. If any ports are label handles, the stored label is - used. Otherwise, the access control labels are used. - - @return Standard MiG return values (0 for success) -*/ - -routine mac_port_check_access(task : ipc_space_t; - subject : mach_port_name_t; - object : mach_port_name_t; - service : labelstr_t; - perm : labelstr_t); - -/** - @brief Create a new label handle - @param task Task to receive new ports (usually caller's) - @param name Returned label handle port - @param label String representation of new label - - Creates a new label handle, with the stored label defined by the - given text. Any task may create a label handle with any valid label, - not necessarily one that it has permission to access. A port right - for the new label handle is inserted into the specified task. - Posession of label handles should not imply any security properties. - - @return Standard MiG return values (0 for success) -*/ - -routine mac_label_new(task : ipc_space_t; - out name : mach_port_name_t; - label : labelstr_t); - -/** - @brief Request a computed label - @param task subj,obj,newlabel relative to this task (usually caller's) - @param subj port containing subject label - @param obj port containing object label - @param serv Service or object class name - @param newlabel Returned label handle port - - Ask the loaded policies to compute a label based on the two input labels - and the service name. There is currently no standard for the service name, - or even what the input labels represent (Subject and parent object are only - a suggestion). If any ports are label handles, the stored label is - used. Otherwise, the access control labels are used. A new label handle - is created to contain the computed label, and is stored into the - specified task. - - @return Standard MiG return values (0 for success) -*/ - -routine mac_request_label(task : ipc_space_t; - subject : mach_port_name_t; - object : mach_port_name_t; - service : labelstr_t; - out newlabel : mach_port_name_t); diff --git a/osfmk/mach/sfi_class.h b/osfmk/mach/sfi_class.h new file mode 100644 index 000000000..8f856435b --- /dev/null +++ b/osfmk/mach/sfi_class.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _MACH_SFI_CLASS_H_ +#define _MACH_SFI_CLASS_H_ + +#include +#include + +#define SFI_CLASS_DEFINITIONS 20140526 + +/* + * SFI Classes are used to categorized threads, and map + * them to "off-time" windows. + * + * Threads classified approximately in this order, so + * there's an implicit requirement that lowest duty cycle + * (largest off-time) are a lower class, and increasing + * SFI classes increase the duty cycle (if SFI is enabled + * at all). + */ +typedef uint32_t sfi_class_id_t; + +#ifdef XNU_KERNEL_PRIVATE + +/* + * Total number of classes supported including SFI_CLASS_UNSPECIFIED. + * If new class is defined increase this number. + */ +#define MAX_SFI_CLASS_ID 0x00000011 + +/* + * Threads may initially start out unspecified + */ +#define SFI_CLASS_UNSPECIFIED 0x00000000 + +#endif /* XNU_KERNEL_PRIVATE */ + +/* + * Threads are placed in this class as a result of placing threads or + * processes in a background state using APIs such as setpriority(2), + * specifying PRIO_DARWIN_THREAD or PRIO_DARWIN_PROCESS. + */ +#define SFI_CLASS_DARWIN_BG 0x00000001 + +/* + * Threads are placed in this class as a result of an application + * entering "Nap mode". + */ +#define SFI_CLASS_APP_NAP 0x00000002 + +/* + * Threads are placed in this class by making per coalition (by + * calling coalition_set_sfi_class(cid, SFI_CLASS_MANAGED)) or per + * process selection (by calling process_set_sfi_class(pid, + * SFI_CLASS_MANAGED)). FOCAL/NONFOCAL is a function of a task's + * role. + */ +#define SFI_CLASS_MANAGED_FOCAL 0x00000003 + +#define SFI_CLASS_MANAGED_NONFOCAL 0x00000004 + +#define SFI_CLASS_MANAGED SFI_CLASS_MANAGED_FOCAL + +/* + * Coalitions/processes that have not been explicitly tagged + * and are not opted into one of the special classes below + * fall into the default categories. FOCAL/NONFOCAL is a function + * of a task's role. + */ +#define SFI_CLASS_DEFAULT_FOCAL 0x00000005 + +#define SFI_CLASS_DEFAULT_NONFOCAL 0x00000006 + +#define SFI_CLASS_DEFAULT SFI_CLASS_DEFAULT_FOCAL + +/* + * Threads that are part of the kernel task should be duty-cycled + * only as an extreme last resort, since they must be preempted + * while locks may be held in kernel mode. + */ +#define SFI_CLASS_KERNEL 0x00000007 + +/* + * Threads that must not be part of "Selective Forced Idle" are + * placed in this class. Real time threads, threads of + * processes such as WindowServer that are critical to good user + * experience, should be placed in this class. + */ +#define SFI_CLASS_OPTED_OUT 0x00000008 + +/* + * Threads running in various QOS classes + */ +#define SFI_CLASS_UTILITY 0x00000009 +#define SFI_CLASS_LEGACY_FOCAL 0x0000000A +#define SFI_CLASS_LEGACY_NONFOCAL 0x0000000B +#define SFI_CLASS_USER_INITIATED_FOCAL 0x0000000C +#define SFI_CLASS_USER_INITIATED_NONFOCAL 0x0000000D +#define SFI_CLASS_USER_INTERACTIVE_FOCAL 0x0000000E +#define SFI_CLASS_USER_INTERACTIVE_NONFOCAL 0x0000000F +#define SFI_CLASS_MAINTENANCE 0x00000010 + +/* + * Windows that are specified smaller than MIN_SFI_WINDOW_USEC + * will be automatically rounded up. + */ +#define MIN_SFI_WINDOW_USEC 500 + +#endif /* _MACH_SFI_CLASS_H_ */ diff --git a/osfmk/mach/shared_region.h b/osfmk/mach/shared_region.h index 845cccc1d..db4f46e89 100644 --- a/osfmk/mach/shared_region.h +++ b/osfmk/mach/shared_region.h @@ -63,13 +63,23 @@ #define SHARED_REGION_NESTING_MIN_PPC64 0x0000000010000000ULL #define SHARED_REGION_NESTING_MAX_PPC64 0x0000000010000000ULL -#define SHARED_REGION_BASE_ARM 0x2C000000ULL -#define SHARED_REGION_SIZE_ARM 0x14000000ULL -#define SHARED_REGION_NESTING_BASE_ARM 0x2C000000ULL -#define SHARED_REGION_NESTING_SIZE_ARM 0x14000000ULL +#define SHARED_REGION_BASE_ARM 0x20000000ULL +#define SHARED_REGION_SIZE_ARM 0x20000000ULL +#define SHARED_REGION_NESTING_BASE_ARM 0x20000000ULL +#define SHARED_REGION_NESTING_SIZE_ARM 0x20000000ULL #define SHARED_REGION_NESTING_MIN_ARM ? #define SHARED_REGION_NESTING_MAX_ARM ? +#ifdef XNU_KERNEL_PRIVATE +/* ARM64_TODO: move to higher memory */ +#endif +#define SHARED_REGION_BASE_ARM64 0x180000000ULL +#define SHARED_REGION_SIZE_ARM64 0x20000000ULL +#define SHARED_REGION_NESTING_BASE_ARM64 0x180000000ULL +#define SHARED_REGION_NESTING_SIZE_ARM64 0x20000000ULL +#define SHARED_REGION_NESTING_MIN_ARM64 ? +#define SHARED_REGION_NESTING_MAX_ARM64 ? + #if defined(__i386__) #define SHARED_REGION_BASE SHARED_REGION_BASE_I386 #define SHARED_REGION_SIZE SHARED_REGION_SIZE_I386 diff --git a/osfmk/mach/syscall_sw.h b/osfmk/mach/syscall_sw.h index 42ccd6440..b326683d6 100644 --- a/osfmk/mach/syscall_sw.h +++ b/osfmk/mach/syscall_sw.h @@ -115,9 +115,6 @@ kernel_trap(semaphore_timedwait_signal_trap,-39,4) kernel_trap(_kernelrpc_mach_port_guard_trap,-41,5) kernel_trap(_kernelrpc_mach_port_unguard_trap,-42,4) -#if !defined(__LP64__) && !defined(__arm__) -kernel_trap(map_fd,-43,5) -#endif /*!defined(__LP64__) && !defined(__arm__) */ kernel_trap(task_name_for_pid,-44,3) kernel_trap(task_for_pid,-45,3) diff --git a/osfmk/mach/task.defs b/osfmk/mach/task.defs index ca6660b0f..4d4db4dfe 100644 --- a/osfmk/mach/task.defs +++ b/osfmk/mach/task.defs @@ -425,8 +425,6 @@ routine task_set_phys_footprint_limit( new_limit : int; out old_limit : int); -/* vim: set ft=c : */ - routine task_suspend2( target_task : task_t; out suspend_token : task_suspension_token_t); @@ -437,3 +435,20 @@ routine task_resume2( routine task_purgable_info( task : task_t; out stats : task_purgable_info_t); + +routine task_get_mach_voucher( + task : task_t; + which : mach_voucher_selector_t; + out voucher : ipc_voucher_t); + +routine task_set_mach_voucher( + task : task_t; + voucher : ipc_voucher_t); + +routine task_swap_mach_voucher( + task : task_t; + new_voucher : ipc_voucher_t; + inout old_voucher : ipc_voucher_t); + +/* vim: set ft=c : */ + diff --git a/osfmk/mach/task_info.h b/osfmk/mach/task_info.h index 8893c0a1e..599e00e24 100644 --- a/osfmk/mach/task_info.h +++ b/osfmk/mach/task_info.h @@ -330,6 +330,49 @@ typedef struct task_vm_info *task_vm_info_t; typedef struct vm_purgeable_info task_purgable_info_t; + +#define TASK_TRACE_MEMORY_INFO 24 +struct task_trace_memory_info { + uint64_t user_memory_address; /* address of start of trace memory buffer */ + uint64_t buffer_size; /* size of buffer in bytes */ + uint64_t mailbox_array_size; /* size of mailbox area in bytes */ +}; +typedef struct task_trace_memory_info task_trace_memory_info_data_t; +typedef struct task_trace_memory_info * task_trace_memory_info_t; +#define TASK_TRACE_MEMORY_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(task_trace_memory_info_data_t) / sizeof(natural_t))) + +#define TASK_WAIT_STATE_INFO 25 /* deprecated. */ +struct task_wait_state_info { + uint64_t total_wait_state_time; /* Time that all threads past and present have been in a wait state */ + uint64_t total_wait_sfi_state_time; /* Time that threads have been in SFI wait (should be a subset of total wait state time */ + uint32_t _reserved[4]; +}; +typedef struct task_wait_state_info task_wait_state_info_data_t; +typedef struct task_wait_state_info * task_wait_state_info_t; +#define TASK_WAIT_STATE_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(task_wait_state_info_data_t) / sizeof(natural_t))) + +#define TASK_POWER_INFO_V2 26 + +typedef struct { + uint64_t task_gpu_utilisation; + uint64_t task_gpu_stat_reserved0; + uint64_t task_gpu_stat_reserved1; + uint64_t task_gpu_stat_reserved2; +} gpu_energy_data; + +typedef gpu_energy_data *gpu_energy_data_t; +struct task_power_info_v2 { + task_power_info_data_t cpu_energy; + gpu_energy_data gpu_energy; +}; + +typedef struct task_power_info_v2 task_power_info_v2_data_t; +typedef struct task_power_info_v2 *task_power_info_v2_t; +#define TASK_POWER_INFO_V2_COUNT ((mach_msg_type_number_t) \ + (sizeof (task_power_info_v2_data_t) / sizeof (natural_t))) + /* * Obsolete interfaces. */ diff --git a/osfmk/mach/task_policy.h b/osfmk/mach/task_policy.h index c05f6f1fd..c6852fe52 100644 --- a/osfmk/mach/task_policy.h +++ b/osfmk/mach/task_policy.h @@ -107,6 +107,9 @@ kern_return_t task_policy_get( #define TASK_POLICY_STATE 4 #define TASK_BASE_QOS_POLICY 8 #define TASK_OVERRIDE_QOS_POLICY 9 +#define TASK_BASE_LATENCY_QOS_POLICY 10 +#define TASK_BASE_THROUGHPUT_QOS_POLICY 11 + enum task_role { TASK_RENICED = -1, @@ -141,6 +144,7 @@ enum task_latency_qos { LATENCY_QOS_TIER_3 = ((0xFF<<16) | 4), LATENCY_QOS_TIER_4 = ((0xFF<<16) | 5), LATENCY_QOS_TIER_5 = ((0xFF<<16) | 6) + }; typedef integer_t task_latency_qos_t; enum task_throughput_qos { @@ -167,6 +171,7 @@ typedef struct task_qos_policy *task_qos_policy_t; #define TASK_QOS_POLICY_COUNT ((mach_msg_type_number_t) \ (sizeof (struct task_qos_policy) / sizeof (integer_t))) +/* These should be removed - they belong in proc_info.h */ #define PROC_FLAG_DARWINBG 0x8000 /* process in darwin background */ #define PROC_FLAG_EXT_DARWINBG 0x10000 /* process in darwin background - external enforcement */ #define PROC_FLAG_IOS_APPLEDAEMON 0x20000 /* process is apple ios daemon */ @@ -175,9 +180,10 @@ typedef struct task_qos_policy *task_qos_policy_t; #define PROC_FLAG_ADAPTIVE_IMPORTANT 0x200000 /* Process is adaptive, and is currently important */ #define PROC_FLAG_IMPORTANCE_DONOR 0x400000 /* Process is marked as an importance donor */ #define PROC_FLAG_SUPPRESSED 0x800000 /* Process is suppressed */ -#define PROC_FLAG_IOS_APPLICATION 0x1000000 /* Process is an application */ +#define PROC_FLAG_APPLICATION 0x1000000 /* Process is an application */ +#define PROC_FLAG_IOS_APPLICATION PROC_FLAG_APPLICATION /* Process is an application */ -#ifdef MACH_KERNEL_PRIVATE +#ifdef PRIVATE struct task_requested_policy { /* Task and thread policy (inherited) */ @@ -193,6 +199,9 @@ struct task_requested_policy { /* Thread only policy */ th_pidbind_bg :1, /* thread only: task i'm bound to is marked 'watchbg' */ th_workq_bg :1, /* thread only: currently running a background priority workqueue */ + thrp_qos :3, /* thread only: thread qos class */ + thrp_qos_relprio :4, /* thread only: thread qos relative priority (store as inverse, -10 -> 0xA) */ + thrp_qos_override :3, /* thread only: thread qos class override */ /* Task only policy */ t_apptype :3, /* What apptype did launchd tell us this was (inherited) */ @@ -205,6 +214,8 @@ struct task_requested_policy { t_over_latency_qos :3, /* Timer latency QoS override */ t_base_through_qos :3, /* Computation throughput QoS */ t_over_through_qos :3, /* Computation throughput QoS override */ + t_sfi_managed :1, /* SFI Managed task */ + t_qos_clamp :3, /* task qos clamp */ /* Task only: suppression policies (non-embedded only) */ t_sup_active :1, /* Suppression is on */ @@ -215,8 +226,9 @@ struct task_requested_policy { t_sup_suspend :1, /* Wants to be suspended */ t_sup_throughput :3, /* Wants throughput QoS tier */ t_sup_cpu :1, /* Wants suppressed CPU priority (MAXPRI_SUPPRESSED) */ + t_sup_bg_sockets :1, /* Wants background sockets */ - reserved :17; + reserved :2; }; struct task_effective_policy { @@ -229,6 +241,11 @@ struct task_effective_policy { new_sockets_bg :1, /* Newly created sockets should be marked as bg */ bg_iotier :2, /* What throttle tier should I be in when darwinbg is set? */ terminated :1, /* all throttles have been removed for quick exit or SIGTERM handling */ + qos_ui_is_urgent :1, /* bump UI-Interactive QoS up to the urgent preemption band */ + + /* Thread only policy */ + thep_qos :3, /* thread only: thread qos class */ + thep_qos_relprio :4, /* thread only: thread qos relative priority (store as inverse, -10 -> 0xA) */ /* Task only policy */ t_gpu_deny :1, /* not allowed to access GPU */ @@ -240,8 +257,12 @@ struct task_effective_policy { t_sup_active :1, /* suppression behaviors are in effect */ t_role :3, /* task's system role */ t_suppressed_cpu :1, /* cpu priority == MAXPRI_SUPPRESSED (trumped by lowpri_cpu) */ + t_sfi_managed :1, /* SFI Managed task */ + t_live_donor :1, /* task is a live importance boost donor */ + t_qos_clamp :3, /* task qos clamp (applies to qos-disabled threads too) */ + t_qos_ceiling :3, /* task qos ceiling (applies to only qos-participating threads) */ - reserved :39; + reserved :23; }; struct task_pended_policy { @@ -257,10 +278,27 @@ struct task_pended_policy { reserved :60; }; +#endif + +#ifdef MACH_KERNEL_PRIVATE + extern const struct task_requested_policy default_task_requested_policy; extern const struct task_effective_policy default_task_effective_policy; extern const struct task_pended_policy default_task_pended_policy; +extern kern_return_t +qos_latency_policy_validate(task_latency_qos_t); + +extern kern_return_t +qos_throughput_policy_validate(task_throughput_qos_t); + +extern uint32_t +qos_extract(uint32_t); + +extern uint32_t +qos_latency_policy_package(uint32_t); +extern uint32_t +qos_throughput_policy_package(uint32_t); #endif /* MACH_KERNEL_PRIVATE */ @@ -278,7 +316,8 @@ struct task_suppression_policy { integer_t suspend; integer_t throughput_qos; integer_t suppressed_cpu; /* priority MAXPRI_SUPPRESSED cpu */ - integer_t reserved[8]; + integer_t background_sockets; + integer_t reserved[7]; }; typedef struct task_suppression_policy *task_suppression_policy_t; @@ -293,7 +332,8 @@ struct task_policy_state { uint32_t imp_assertcnt; uint32_t imp_externcnt; uint64_t flags; - uint64_t reserved[3]; + uint64_t imp_transitions; + uint64_t reserved[2]; }; typedef struct task_policy_state *task_policy_state_t; @@ -325,6 +365,8 @@ typedef struct task_policy_state *task_policy_state_t; /* task policy state flags */ #define TASK_IMP_RECEIVER 0x00000001 #define TASK_IMP_DONOR 0x00000002 +#define TASK_IMP_LIVE_DONOR 0x00000004 +#define TASK_DENAP_RECEIVER 0x00000008 /* requested_policy */ #define POLICY_REQ_INT_DARWIN_BG 0x00000001 @@ -337,8 +379,14 @@ typedef struct task_policy_state *task_policy_state_t; #define POLICY_REQ_EXT_PASSIVE_IO 0x00000080 #define POLICY_REQ_BG_IOTIER_MASK 0x00000300 /* 2 bits */ #define POLICY_REQ_BG_IOTIER_SHIFT 8 + +/* thread requested policy */ #define POLICY_REQ_PIDBIND_BG 0x00000400 #define POLICY_REQ_WORKQ_BG 0x00000800 +#define POLICY_REQ_TH_QOS_MASK 0x07000000 /* 3 bits (overlaps with ROLE) */ +#define POLICY_REQ_TH_QOS_SHIFT 24 +#define POLICY_REQ_TH_QOS_OVER_MASK 0x70000000 /* 3 bits (overlaps with TAL and SFI) */ +#define POLICY_REQ_TH_QOS_OVER_SHIFT 28 /* task_requested_policy */ #define POLICY_REQ_TERMINATED 0x00001000 @@ -352,6 +400,7 @@ typedef struct task_policy_state *task_policy_state_t; #define POLICY_REQ_ROLE_MASK 0x07000000 /* 3 bits */ #define POLICY_REQ_ROLE_SHIFT 24 #define POLICY_REQ_TAL_ENABLED 0x40000000 +#define POLICY_REQ_SFI_MANAGED 0x80000000 /* requested suppression behaviors (note: clipped off in 32-bit tracepoints) */ #define POLICY_REQ_SUP_ACTIVE 0x0000000100000000 @@ -370,6 +419,9 @@ typedef struct task_policy_state *task_policy_state_t; #define POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT 52 #define POLICY_REQ_SUP_THROUGHPUT_MASK 0x0700000000000000 /* 3 bits */ #define POLICY_REQ_SUP_THROUGHPUT_SHIFT 56 +#define POLICY_REQ_SUP_BG_SOCKETS 0x0800008000000000 +#define POLICY_REQ_QOS_CLAMP_MASK 0x7000000000000000 /* 3 bits */ +#define POLICY_REQ_QOS_CLAMP_SHIFT 60 /* effective policy */ #define POLICY_EFF_IO_TIER_MASK 0x00000003 /* 2 bits */ @@ -382,20 +434,28 @@ typedef struct task_policy_state *task_policy_state_t; #define POLICY_EFF_BG_IOTIER_MASK 0x00000300 /* 2 bits */ #define POLICY_EFF_BG_IOTIER_SHIFT 8 #define POLICY_EFF_TERMINATED 0x00000400 +#define POLICY_EFF_QOS_UI_IS_URGENT 0x80000000 + +/* thread effective policy */ +#define POLICY_EFF_TH_QOS_MASK 0x00700000 /* 3 bits (overlaps with ROLE) */ +#define POLICY_EFF_TH_QOS_SHIFT 20 +#define POLICY_EFF_LATENCY_QOS_MASK 0x00070000 /* 3 bits */ +#define POLICY_EFF_LATENCY_QOS_SHIFT 16 +#define POLICY_EFF_THROUGH_QOS_MASK 0x07000000 /* 3 bits */ +#define POLICY_EFF_THROUGH_QOS_SHIFT 24 /* task effective policy */ #define POLICY_EFF_GPU_DENY 0x00001000 #define POLICY_EFF_TAL_ENGAGED 0x00002000 #define POLICY_EFF_SUSPENDED 0x00004000 #define POLICY_EFF_WATCHERS_BG 0x00008000 -#define POLICY_EFF_LATENCY_QOS_MASK 0x00070000 /* 3 bits */ -#define POLICY_EFF_LATENCY_QOS_SHIFT 16 #define POLICY_EFF_SUP_ACTIVE 0x00080000 #define POLICY_EFF_ROLE_MASK 0x00700000 /* 3 bits */ #define POLICY_EFF_ROLE_SHIFT 20 #define POLICY_EFF_SUP_CPU 0x00800000 -#define POLICY_EFF_THROUGH_QOS_MASK 0x07000000 /* 3 bits */ -#define POLICY_EFF_THROUGH_QOS_SHIFT 24 +#define POLICY_EFF_SFI_MANAGED 0x08000000 +#define POLICY_EFF_QOS_CEILING_MASK 0x70000000 +#define POLICY_EFF_QOS_CEILING_SHIFT 28 /* pending policy */ #define POLICY_PEND_UPDATING 0x00000001 diff --git a/osfmk/mach/task_special_ports.h b/osfmk/mach/task_special_ports.h index fb08e1b7d..635c9134a 100644 --- a/osfmk/mach/task_special_ports.h +++ b/osfmk/mach/task_special_ports.h @@ -88,6 +88,7 @@ typedef int task_special_port_t; #define TASK_ACCESS_PORT 9 /* Permission check for task_for_pid. */ +#define TASK_DEBUG_CONTROL_PORT 10 /* debug control port */ /* * Definitions for ease of use @@ -108,6 +109,9 @@ typedef int task_special_port_t; #define task_get_bootstrap_port(task, port) \ (task_get_special_port((task), TASK_BOOTSTRAP_PORT, (port))) +#define task_get_debug_control_port(task, port) \ + (task_get_special_port((task), TASK_DEBUG_CONTROL_PORT, (port))) + #define task_set_bootstrap_port(task, port) \ (task_set_special_port((task), TASK_BOOTSTRAP_PORT, (port))) @@ -117,4 +121,7 @@ typedef int task_special_port_t; #define task_set_task_access_port(task, port) \ (task_set_special_port((task), TASK_ACCESS_PORT, (port))) +#define task_set_task_debug_control_port(task, port) \ + (task_set_special_port((task), TASK_DEBUG_CONTROL_PORT, (port))) + #endif /* _MACH_TASK_SPECIAL_PORTS_H_ */ diff --git a/osfmk/mach/thread_act.defs b/osfmk/mach/thread_act.defs index 9754acb63..872f1b1aa 100644 --- a/osfmk/mach/thread_act.defs +++ b/osfmk/mach/thread_act.defs @@ -71,6 +71,12 @@ subsystem #include #include +#if !KERNEL && !LIBSYSCALL_INTERFACE +#define PREFIX(NAME) _kernelrpc_ ## NAME +#else +#define PREFIX(NAME) NAME +#endif + /* * Destroy the target thread. * @@ -255,7 +261,7 @@ routine thread_swap_exception_ports( /* * OBSOLETE interface. */ -routine thread_policy( +routine PREFIX(thread_policy)( thr_act : thread_act_t; policy : policy_t; base : policy_base_t; @@ -266,7 +272,7 @@ routine thread_policy( * (Approved Mac OS X microkernel interface) */ -routine thread_policy_set( +routine PREFIX(thread_policy_set)( thread : thread_act_t; flavor : thread_policy_flavor_t; policy_info : thread_policy_t); @@ -323,11 +329,25 @@ routine thread_get_assignment( /* * OBSOLETE interface. */ -routine thread_set_policy( - thr_act : thread_act_t; - pset : processor_set_t; - policy : policy_t; - base : policy_base_t; - limit : policy_limit_t); +routine PREFIX(thread_set_policy)( + thr_act : thread_act_t; + pset : processor_set_t; + policy : policy_t; + base : policy_base_t; + limit : policy_limit_t); + +routine thread_get_mach_voucher( + thr_act : thread_act_t; + which : mach_voucher_selector_t; + out voucher : ipc_voucher_t); + +routine thread_set_mach_voucher( + thr_act : thread_act_t; + voucher : ipc_voucher_t); + +routine thread_swap_mach_voucher( + thr_act : thread_act_t; + new_voucher : ipc_voucher_t; + inout old_voucher : ipc_voucher_t); /* vim: set ft=c : */ diff --git a/osfmk/mach/thread_info.h b/osfmk/mach/thread_info.h index 5f51aeade..9c271d590 100644 --- a/osfmk/mach/thread_info.h +++ b/osfmk/mach/thread_info.h @@ -151,4 +151,21 @@ typedef struct thread_identifier_info *thread_identifier_info_t; #define THREAD_SCHED_RR_INFO 11 #define THREAD_SCHED_FIFO_INFO 12 +#define IO_NUM_PRIORITIES 4 + +struct io_stat_entry { + uint64_t count; + uint64_t size; +}; + +struct io_stat_info { + struct io_stat_entry disk_reads; + struct io_stat_entry io_priority[IO_NUM_PRIORITIES]; + struct io_stat_entry paging; + struct io_stat_entry metadata; + struct io_stat_entry total_io; +}; + +typedef struct io_stat_info *io_stat_info_t; + #endif /* _MACH_THREAD_INFO_H_ */ diff --git a/osfmk/mach/thread_policy.h b/osfmk/mach/thread_policy.h index a0662cae6..8ab95290c 100644 --- a/osfmk/mach/thread_policy.h +++ b/osfmk/mach/thread_policy.h @@ -232,6 +232,31 @@ typedef struct thread_background_policy *thread_background_policy_t; (sizeof (thread_background_policy_data_t) / sizeof (integer_t))) +#define THREAD_LATENCY_QOS_POLICY 7 +typedef integer_t thread_latency_qos_t; + +struct thread_latency_qos_policy { + thread_latency_qos_t thread_latency_qos_tier; +}; + +typedef struct thread_latency_qos_policy thread_latency_qos_policy_data_t; +typedef struct thread_latency_qos_policy *thread_latency_qos_policy_t; + +#define THREAD_LATENCY_QOS_POLICY_COUNT ((mach_msg_type_number_t) \ + (sizeof (thread_latency_qos_policy_data_t) / sizeof (integer_t))) + +#define THREAD_THROUGHPUT_QOS_POLICY 8 +typedef integer_t thread_throughput_qos_t; + +struct thread_throughput_qos_policy { + thread_throughput_qos_t thread_throughput_qos_tier; +}; + +typedef struct thread_throughput_qos_policy thread_throughput_qos_policy_data_t; +typedef struct thread_throughput_qos_policy *thread_throughput_qos_policy_t; + +#define THREAD_THROUGHPUT_QOS_POLICY_COUNT ((mach_msg_type_number_t) \ + (sizeof (thread_throughput_qos_policy_data_t) / sizeof (integer_t))) #ifdef PRIVATE @@ -240,11 +265,14 @@ typedef struct thread_background_policy *thread_background_policy_t; */ #define THREAD_POLICY_STATE 6 +#define THREAD_POLICY_STATE_FLAG_STATIC_PARAM 0x1 + struct thread_policy_state { integer_t requested; integer_t effective; integer_t pending; - integer_t reserved[13]; + integer_t flags; + integer_t reserved[12]; }; typedef struct thread_policy_state thread_policy_state_data_t; @@ -253,6 +281,36 @@ typedef struct thread_policy_state *thread_policy_state_t; #define THREAD_POLICY_STATE_COUNT ((mach_msg_type_number_t) \ (sizeof (thread_policy_state_data_t) / sizeof (integer_t))) +/* + * THREAD_QOS_POLICY: + */ +#define THREAD_QOS_POLICY 9 +#define THREAD_QOS_POLICY_OVERRIDE 10 + +#define THREAD_QOS_UNSPECIFIED 0 +#define THREAD_QOS_DEFAULT THREAD_QOS_UNSPECIFIED /* Temporary rename */ +#define THREAD_QOS_MAINTENANCE 1 +#define THREAD_QOS_BACKGROUND 2 +#define THREAD_QOS_UTILITY 3 +#define THREAD_QOS_LEGACY 4 /* i.e. default workq threads */ +#define THREAD_QOS_USER_INITIATED 5 +#define THREAD_QOS_USER_INTERACTIVE 6 + +#define THREAD_QOS_LAST 7 + +#define THREAD_QOS_MIN_TIER_IMPORTANCE (-15) + +struct thread_qos_policy { + integer_t qos_tier; + integer_t tier_importance; +}; + +typedef struct thread_qos_policy thread_qos_policy_data_t; +typedef struct thread_qos_policy *thread_qos_policy_t; + +#define THREAD_QOS_POLICY_COUNT ((mach_msg_type_number_t) \ + (sizeof (thread_qos_policy_data_t) / sizeof (integer_t))) + #endif /* PRIVATE */ #endif /* _MACH_THREAD_POLICY_H_ */ diff --git a/osfmk/mach/upl.defs b/osfmk/mach/upl.defs index e4f89e153..5aceb99a4 100644 --- a/osfmk/mach/upl.defs +++ b/osfmk/mach/upl.defs @@ -71,10 +71,6 @@ subsystem #endif /* KERNEL_SERVER */ upl 2050; -#ifdef MACH_KERNEL -#include -#endif /* MACH_KERNEL */ - #include #include diff --git a/osfmk/mach/vm_param.h b/osfmk/mach/vm_param.h index afb040eee..04726c55c 100644 --- a/osfmk/mach/vm_param.h +++ b/osfmk/mach/vm_param.h @@ -245,6 +245,7 @@ extern vm_offset_t vm_kernel_etext; extern vm_offset_t vm_kernel_base; extern vm_offset_t vm_kernel_top; extern vm_offset_t vm_kernel_slide; +extern vm_offset_t vm_hib_base; extern vm_offset_t vm_kernel_addrperm; extern vm_offset_t vm_kext_base; @@ -256,19 +257,61 @@ extern vm_offset_t vm_kext_top; #define VM_KERNEL_IS_KEXT(_o) \ (((vm_offset_t)(_o) >= vm_kext_base) && \ ((vm_offset_t)(_o) < vm_kext_top)) + +#define VM_KERNEL_SLIDE(_u) \ + ((vm_offset_t)(_u) + vm_kernel_slide) + +/* + * The following macros are to be used when exposing kernel addresses to + * userspace via any of the various debug or info facilities that might exist + * (e.g. stackshot, proc_info syscall, etc.). It is important to understand + * the goal of each macro and choose the right one depending on what you are + * trying to do. Misuse of these macros can result in critical data leaks + * which in turn lead to all sorts of system vulnerabilities. + * + * Note that in general the ideal goal is to protect addresses from userspace + * in a way that is reversible assuming you know the permutation and/or slide. + * + * The macros are as follows: + * + * VM_KERNEL_UNSLIDE: + * Use this macro when you are exposing an address to userspace which is + * a "static" kernel or kext address (i.e. coming from text or data + * sections). These are the addresses which get "slid" via ASLR on kernel + * or kext load, and it's precisely the slide value we are trying to + * protect from userspace. + * + * VM_KERNEL_ADDRPERM: + * Use this macro when you are exposing an address to userspace which is + * coming from the kernel's "heap". Since these adresses are not "loaded" + * from anywhere, there is no slide applied and we instead apply the + * permutation value to obscure the address. + * + * VM_KERNEL_UNSLIDE_OR_ADDRPERM: + * Use this macro when you are exposing an address to userspace that could + * come from either kernel text/data *or* the heap. This is a rare case, + * but one that does come up and must be handled correctly. + * + * Nesting of these macros should be considered invalid. + */ #define VM_KERNEL_UNSLIDE(_v) \ ((VM_KERNEL_IS_SLID(_v) || \ VM_KERNEL_IS_KEXT(_v)) ? \ (vm_offset_t)(_v) - vm_kernel_slide : \ (vm_offset_t)(_v)) -#define VM_KERNEL_SLIDE(_u) \ - ((vm_offset_t)(_u) + vm_kernel_slide) -#define VM_KERNEL_ADDRPERM(_v) \ - (((vm_offset_t)(_v) == 0) ? \ - (vm_offset_t)(0) : \ +#define VM_KERNEL_ADDRPERM(_v) \ + (((vm_offset_t)(_v) == 0) ? \ + (vm_offset_t)(0) : \ (vm_offset_t)(_v) + vm_kernel_addrperm) +#define VM_KERNEL_UNSLIDE_OR_PERM(_v) \ + ((VM_KERNEL_IS_SLID(_v) || \ + VM_KERNEL_IS_KEXT(_v)) ? \ + (vm_offset_t)(_v) - vm_kernel_slide : \ + VM_KERNEL_ADDRPERM(_v)) + + #endif /* XNU_KERNEL_PRIVATE */ extern vm_size_t page_size; diff --git a/osfmk/mach/vm_statistics.h b/osfmk/mach/vm_statistics.h index cf89a6471..b496747d0 100644 --- a/osfmk/mach/vm_statistics.h +++ b/osfmk/mach/vm_statistics.h @@ -245,9 +245,9 @@ struct pmap_statistics { integer_t external_peak; integer_t reusable; integer_t reusable_peak; - uint64_t compressed; - uint64_t compressed_peak; - uint64_t compressed_lifetime; + uint64_t compressed __attribute__((aligned(8))); + uint64_t compressed_peak __attribute__((aligned(8))); + uint64_t compressed_lifetime __attribute__((aligned(8))); }; typedef struct pmap_statistics *pmap_statistics_t; @@ -304,26 +304,21 @@ typedef struct pmap_statistics *pmap_statistics_t; #define VM_FLAGS_OVERWRITE 0x4000 /* delete any existing mappings first */ #ifdef KERNEL_PRIVATE #define VM_FLAGS_NO_PMAP_CHECK 0x8000 /* do not check that pmap is empty */ -#define VM_FLAGS_MAP_JIT 0x80000 /* Used to mark an entry as describing a JIT region */ #endif /* KERNEL_PRIVATE */ -#define VM_FLAGS_RETURN_DATA_ADDR 0x100000 /* Return address of target data, rather than base of page */ /* * VM_FLAGS_SUPERPAGE_MASK * 3 bits that specify whether large pages should be used instead of * base pages (!=0), as well as the requested page size. */ #define VM_FLAGS_SUPERPAGE_MASK 0x70000 /* bits 0x10000, 0x20000, 0x40000 */ -#define VM_FLAGS_SUPERPAGE_SHIFT 16 - -#define SUPERPAGE_NONE 0 /* no superpages, if all bits are 0 */ -#define SUPERPAGE_SIZE_ANY 1 -#define VM_FLAGS_SUPERPAGE_NONE (SUPERPAGE_NONE << VM_FLAGS_SUPERPAGE_SHIFT) -#define VM_FLAGS_SUPERPAGE_SIZE_ANY (SUPERPAGE_SIZE_ANY << VM_FLAGS_SUPERPAGE_SHIFT) -#if defined(__x86_64__) || !defined(KERNEL) -#define SUPERPAGE_SIZE_2MB 2 -#define VM_FLAGS_SUPERPAGE_SIZE_2MB (SUPERPAGE_SIZE_2MB<> 24 @@ -339,12 +334,23 @@ typedef struct pmap_statistics *pmap_statistics_t; VM_FLAGS_OVERWRITE | \ VM_FLAGS_SUPERPAGE_MASK | \ VM_FLAGS_ALIAS_MASK) -#define VM_FLAGS_USER_MAP (VM_FLAGS_USER_ALLOCATE | VM_FLAGS_RETURN_DATA_ADDR) +#define VM_FLAGS_USER_MAP (VM_FLAGS_USER_ALLOCATE | \ + VM_FLAGS_RETURN_DATA_ADDR) #define VM_FLAGS_USER_REMAP (VM_FLAGS_FIXED | \ VM_FLAGS_ANYWHERE | \ VM_FLAGS_OVERWRITE| \ VM_FLAGS_RETURN_DATA_ADDR) +#define VM_FLAGS_SUPERPAGE_SHIFT 16 +#define SUPERPAGE_NONE 0 /* no superpages, if all bits are 0 */ +#define SUPERPAGE_SIZE_ANY 1 +#define VM_FLAGS_SUPERPAGE_NONE (SUPERPAGE_NONE << VM_FLAGS_SUPERPAGE_SHIFT) +#define VM_FLAGS_SUPERPAGE_SIZE_ANY (SUPERPAGE_SIZE_ANY << VM_FLAGS_SUPERPAGE_SHIFT) +#if defined(__x86_64__) || !defined(KERNEL) +#define SUPERPAGE_SIZE_2MB 2 +#define VM_FLAGS_SUPERPAGE_SIZE_2MB (SUPERPAGE_SIZE_2MB< +#include typedef u_int8_t UCHAR; typedef int8_t CHAR; diff --git a/bsd/dev/random/YarrowCoreLib/include/yarrow.h b/osfmk/prng/YarrowCoreLib/include/yarrow.h similarity index 98% rename from bsd/dev/random/YarrowCoreLib/include/yarrow.h rename to osfmk/prng/YarrowCoreLib/include/yarrow.h index e1ce58dd0..282da7631 100644 --- a/bsd/dev/random/YarrowCoreLib/include/yarrow.h +++ b/osfmk/prng/YarrowCoreLib/include/yarrow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/dev/random/YarrowCoreLib/include/yarrowUtils.h b/osfmk/prng/YarrowCoreLib/include/yarrowUtils.h similarity index 93% rename from bsd/dev/random/YarrowCoreLib/include/yarrowUtils.h rename to osfmk/prng/YarrowCoreLib/include/yarrowUtils.h index cdeb4bbf4..95a43f5d0 100644 --- a/bsd/dev/random/YarrowCoreLib/include/yarrowUtils.h +++ b/osfmk/prng/YarrowCoreLib/include/yarrowUtils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -44,7 +44,7 @@ #ifndef _YARROW_UTILS_H_ #define _YARROW_UTILS_H_ -#include +#include #ifdef __cplusplus extern "C" { diff --git a/bsd/dev/random/YarrowCoreLib/port/smf.c b/osfmk/prng/YarrowCoreLib/port/smf.c similarity index 74% rename from bsd/dev/random/YarrowCoreLib/port/smf.c rename to osfmk/prng/YarrowCoreLib/port/smf.c index 6bdd9ca07..83025e57a 100644 --- a/bsd/dev/random/YarrowCoreLib/port/smf.c +++ b/osfmk/prng/YarrowCoreLib/port/smf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -33,10 +33,16 @@ */ -#include -#include -#include +#include +#include +#include +/* Shim emulating _MALLOC */ + +struct _mhead { + size_t mlen; + char dat[0]; +}; SMFAPI void mmInit( void ) { @@ -45,24 +51,25 @@ SMFAPI void mmInit( void ) SMFAPI MMPTR mmMalloc(DWORD request) { - // since kfree requires that we pass in the alloc size, add enough bytes to store a dword - void* mem; - - mem = _MALLOC (request, M_DEVBUF, M_WAITOK); + struct _mhead *hdr = NULL; + size_t memsize = sizeof (*hdr) + request; - if (mem == 0) // oops, it didn't appear to work - { - printf ("Couldn't allocate kernel memory!\n"); - return NULL; - } + hdr = (void *) kalloc(memsize); + if (hdr == NULL) + return NULL; + hdr->mlen = memsize; - return (MMPTR) mem; + return (MMPTR) hdr->dat; } SMFAPI void mmFree(MMPTR ptrnum) { - // get the size of the pointer back - _FREE (ptrnum, M_DEVBUF); + // get the size of the pointer back + struct _mhead *hdr; + + hdr = ptrnum; + hdr--; + kfree(hdr, hdr->mlen); } SMFAPI LPVOID mmGetPtr(MMPTR ptrnum) diff --git a/bsd/dev/random/YarrowCoreLib/src/assertverify.h b/osfmk/prng/YarrowCoreLib/src/assertverify.h similarity index 97% rename from bsd/dev/random/YarrowCoreLib/src/assertverify.h rename to osfmk/prng/YarrowCoreLib/src/assertverify.h index 3e8613462..7f2c35a90 100644 --- a/bsd/dev/random/YarrowCoreLib/src/assertverify.h +++ b/osfmk/prng/YarrowCoreLib/src/assertverify.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/dev/random/YarrowCoreLib/src/comp.c b/osfmk/prng/YarrowCoreLib/src/comp.c similarity index 97% rename from bsd/dev/random/YarrowCoreLib/src/comp.c rename to osfmk/prng/YarrowCoreLib/src/comp.c index c22784095..28f3c07c2 100644 --- a/bsd/dev/random/YarrowCoreLib/src/comp.c +++ b/osfmk/prng/YarrowCoreLib/src/comp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,7 +32,7 @@ Contains: NULL compression. Kernel version of Yarrow assumes incoming seed data is truly random. */ -#include "dev/random/YarrowCoreLib/include/WindowsTypesForMac.h" +#include "prng/YarrowCoreLib/include/WindowsTypesForMac.h" #include "comp.h" #ifdef YARROW_KERNEL diff --git a/bsd/dev/random/YarrowCoreLib/src/comp.h b/osfmk/prng/YarrowCoreLib/src/comp.h similarity index 97% rename from bsd/dev/random/YarrowCoreLib/src/comp.h rename to osfmk/prng/YarrowCoreLib/src/comp.h index 6c7989a91..0cbeca32f 100644 --- a/bsd/dev/random/YarrowCoreLib/src/comp.h +++ b/osfmk/prng/YarrowCoreLib/src/comp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/dev/random/YarrowCoreLib/src/entropysources.h b/osfmk/prng/YarrowCoreLib/src/entropysources.h similarity index 96% rename from bsd/dev/random/YarrowCoreLib/src/entropysources.h rename to osfmk/prng/YarrowCoreLib/src/entropysources.h index 52f387571..1821acc2d 100644 --- a/bsd/dev/random/YarrowCoreLib/src/entropysources.h +++ b/osfmk/prng/YarrowCoreLib/src/entropysources.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/dev/random/YarrowCoreLib/src/macOnly.h b/osfmk/prng/YarrowCoreLib/src/macOnly.h similarity index 93% rename from bsd/dev/random/YarrowCoreLib/src/macOnly.h rename to osfmk/prng/YarrowCoreLib/src/macOnly.h index c42c7ae89..4586b0245 100644 --- a/bsd/dev/random/YarrowCoreLib/src/macOnly.h +++ b/osfmk/prng/YarrowCoreLib/src/macOnly.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -48,7 +48,7 @@ #ifndef _MAC_ONLY_H_ #define _MAC_ONLY_H_ -#include "dev/random/YarrowCoreLib/include/WindowsTypesForMac.h" +#include "prng/YarrowCoreLib/include/WindowsTypesForMac.h" #if defined(__cplusplus) extern "C" { diff --git a/bsd/dev/random/YarrowCoreLib/src/prng.c b/osfmk/prng/YarrowCoreLib/src/prng.c similarity index 96% rename from bsd/dev/random/YarrowCoreLib/src/prng.c rename to osfmk/prng/YarrowCoreLib/src/prng.c index f14c41163..754935bc6 100644 --- a/bsd/dev/random/YarrowCoreLib/src/prng.c +++ b/osfmk/prng/YarrowCoreLib/src/prng.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -47,7 +47,7 @@ */ #include "userdefines.h" #include "assertverify.h" -#include "dev/random/YarrowCoreLib/include/yarrowUtils.h" +#include "prng/YarrowCoreLib/include/yarrowUtils.h" #if defined(macintosh) || defined(__APPLE__) /* FIXME - this file needs to be in a platform-independent place */ @@ -58,7 +58,7 @@ #include "sha1mod.h" #include "entropysources.h" #include "comp.h" -#include "dev/random/YarrowCoreLib/include/yarrow.h" +#include "prng/YarrowCoreLib/include/yarrow.h" #include "prng.h" #include "prngpriv.h" @@ -80,7 +80,10 @@ #include /* Microseconds */ #include #elif KERNEL_BUILD - #include + #include +#elif MACH_KERNEL_PRIVATE + #include + #include #else #error Unknown TARGET_API #endif /* TARGET_API */ @@ -352,6 +355,8 @@ prngForceReseed(PRNG *p, LONGLONG ticks) #if (defined(TARGET_API_MAC_OSX) || defined(KERNEL_BUILD)) struct timeval tv; int64_t endTime, curTime; + #elif defined(MACH_KERNEL_PRIVATE) + int64_t endTime, curTime; #else /* TARGET_API_MAC_CARBON */ UnsignedWide uwide; /* struct needed for Microseconds() */ LONGLONG start; @@ -373,6 +378,8 @@ prngForceReseed(PRNG *p, LONGLONG ticks) gettimeofday(&tv, NULL); #endif endTime = (int64_t)tv.tv_sec*1000000LL + (int64_t)tv.tv_usec + ticks; + #elif defined(MACH_KERNEL_PRIVATE) + endTime = mach_absolute_time() + (ticks*NSEC_PER_USEC); #else /* TARGET_API_MAC_OSX */ Microseconds(&uwide); start = UnsignedWideToUInt64(uwide); @@ -401,6 +408,9 @@ prngForceReseed(PRNG *p, LONGLONG ticks) curTime = (int64_t)tv.tv_sec*1000000LL + (int64_t)tv.tv_usec; #endif } while(curTime < endTime); + #elif defined(MACH_KERNEL_PRIVATE) + curTime = mach_absolute_time(); + } while(curTime < endTime); #else Microseconds(&uwide); now = UnsignedWideToUInt64(uwide); diff --git a/bsd/dev/random/YarrowCoreLib/src/prng.h b/osfmk/prng/YarrowCoreLib/src/prng.h similarity index 95% rename from bsd/dev/random/YarrowCoreLib/src/prng.h rename to osfmk/prng/YarrowCoreLib/src/prng.h index df8a53886..7d80758c1 100644 --- a/bsd/dev/random/YarrowCoreLib/src/prng.h +++ b/osfmk/prng/YarrowCoreLib/src/prng.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -51,7 +51,7 @@ #define __YARROW_PRNG_H__ #if defined(macintosh) || defined(__APPLE__) -#include "dev/random/YarrowCoreLib/include/yarrow.h" +#include "prng/YarrowCoreLib/include/yarrow.h" /* Private function forward declarations */ // this is in yarrow.h...YARROWAPI prng_error_status prngInitialize(void); // ditto.... YARROWAPI prng_error_status prngDestroy(void); diff --git a/bsd/dev/random/YarrowCoreLib/src/prngpriv.h b/osfmk/prng/YarrowCoreLib/src/prngpriv.h similarity index 96% rename from bsd/dev/random/YarrowCoreLib/src/prngpriv.h rename to osfmk/prng/YarrowCoreLib/src/prngpriv.h index 5030deb85..3014b4f6e 100644 --- a/bsd/dev/random/YarrowCoreLib/src/prngpriv.h +++ b/osfmk/prng/YarrowCoreLib/src/prngpriv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,7 +50,7 @@ #define __YARROW_PRNG_PRIV_H__ #include "userdefines.h" -#include "dev/random/YarrowCoreLib/include/yarrow.h" +#include "prng/YarrowCoreLib/include/yarrow.h" #include "entropysources.h" #include "comp.h" #include "sha1mod.h" diff --git a/bsd/dev/random/YarrowCoreLib/src/readme-prnguser.txt b/osfmk/prng/YarrowCoreLib/src/readme-prnguser.txt similarity index 100% rename from bsd/dev/random/YarrowCoreLib/src/readme-prnguser.txt rename to osfmk/prng/YarrowCoreLib/src/readme-prnguser.txt diff --git a/bsd/dev/random/YarrowCoreLib/src/sha1mod.c b/osfmk/prng/YarrowCoreLib/src/sha1mod.c similarity index 99% rename from bsd/dev/random/YarrowCoreLib/src/sha1mod.c rename to osfmk/prng/YarrowCoreLib/src/sha1mod.c index c1e245aa3..3f308d952 100644 --- a/bsd/dev/random/YarrowCoreLib/src/sha1mod.c +++ b/osfmk/prng/YarrowCoreLib/src/sha1mod.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/dev/random/YarrowCoreLib/src/sha1mod.h b/osfmk/prng/YarrowCoreLib/src/sha1mod.h similarity index 96% rename from bsd/dev/random/YarrowCoreLib/src/sha1mod.h rename to osfmk/prng/YarrowCoreLib/src/sha1mod.h index 9d64139ba..d969f2c27 100644 --- a/bsd/dev/random/YarrowCoreLib/src/sha1mod.h +++ b/osfmk/prng/YarrowCoreLib/src/sha1mod.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,7 +37,7 @@ By Steve Reid #define __SHA1_H__ -#include +#include /* Test Vectors (from FIPS PUB 180-1) diff --git a/bsd/dev/random/YarrowCoreLib/src/smf.h b/osfmk/prng/YarrowCoreLib/src/smf.h similarity index 97% rename from bsd/dev/random/YarrowCoreLib/src/smf.h rename to osfmk/prng/YarrowCoreLib/src/smf.h index a332a64f8..b152732b2 100644 --- a/bsd/dev/random/YarrowCoreLib/src/smf.h +++ b/osfmk/prng/YarrowCoreLib/src/smf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/dev/random/YarrowCoreLib/src/userdefines.h b/osfmk/prng/YarrowCoreLib/src/userdefines.h similarity index 96% rename from bsd/dev/random/YarrowCoreLib/src/userdefines.h rename to osfmk/prng/YarrowCoreLib/src/userdefines.h index 3021f5885..3d76b4b7a 100644 --- a/bsd/dev/random/YarrowCoreLib/src/userdefines.h +++ b/osfmk/prng/YarrowCoreLib/src/userdefines.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c b/osfmk/prng/YarrowCoreLib/src/yarrowUtils.c similarity index 93% rename from bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c rename to osfmk/prng/YarrowCoreLib/src/yarrowUtils.c index c57f50dce..68b6ce0e3 100644 --- a/bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c +++ b/osfmk/prng/YarrowCoreLib/src/yarrowUtils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -41,7 +41,7 @@ */ -#include "dev/random/YarrowCoreLib/include/yarrowUtils.h" +#include "prng/YarrowCoreLib/include/yarrowUtils.h" #include void diff --git a/bsd/dev/random/fips_sha1.c b/osfmk/prng/fips_sha1.c similarity index 99% rename from bsd/dev/random/fips_sha1.c rename to osfmk/prng/fips_sha1.c index 136bd266c..ccf0d72bb 100644 --- a/bsd/dev/random/fips_sha1.c +++ b/osfmk/prng/fips_sha1.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,7 +64,7 @@ WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! */ -#include +#include #include #include "fips_sha1.h" diff --git a/bsd/dev/random/fips_sha1.h b/osfmk/prng/fips_sha1.h similarity index 96% rename from bsd/dev/random/fips_sha1.h rename to osfmk/prng/fips_sha1.h index b12684a98..092c48b36 100644 --- a/bsd/dev/random/fips_sha1.h +++ b/osfmk/prng/fips_sha1.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -44,7 +44,7 @@ extern "C" { #endif -#include +#include #define SHA_DIGEST_LENGTH 20 #define SHA1_RESULTLEN SHA_DIGEST_LENGTH diff --git a/osfmk/prng/prng_yarrow.c b/osfmk/prng/prng_yarrow.c new file mode 100644 index 000000000..74c161ed2 --- /dev/null +++ b/osfmk/prng/prng_yarrow.c @@ -0,0 +1,411 @@ +/* + * Copyright (c) 1999-2013 Apple, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include + +#include + +#include "fips_sha1.h" + + +/* + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! + + THIS FILE IS NEEDED TO PASS FIPS ACCEPTANCE FOR THE RANDOM NUMBER GENERATOR. + IF YOU ALTER IT IN ANY WAY, WE WILL NEED TO GO THOUGH FIPS ACCEPTANCE AGAIN, + AN OPERATION THAT IS VERY EXPENSIVE AND TIME CONSUMING. IN OTHER WORDS, + DON'T MESS WITH THIS FILE. + + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! +*/ +/* + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! + + ANY CODE PROTECTED UNDER "#ifdef __arm__" IS SERIOUSLY SUPPOSED TO BE THERE! + IF YOU REMOVE ARM CODE, RANDOM WILL NOT MEAN ANYTHING FOR iPHONES ALL OVER. + PLEASE DON'T TOUCH __arm__ CODE IN THIS FILE! + + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! +*/ + + +#define RESEED_TICKS 50 /* how long a reseed operation can take */ + + +typedef u_int8_t BlockWord; +enum {kBSize = 20}; +typedef BlockWord Block[kBSize]; +enum {kBlockSize = sizeof(Block)}; + +struct YarrowContext { + PrngRef PrngRef; + Block xkey; + Block random_data; + int bytes_used; + unsigned char SelfTestInitialized; + u_int32_t LastBlockChecksum; + uint64_t bytes_since_reseed; +}; +typedef struct YarrowContext *YarrowContextp; + +/* define prototypes to keep the compiler happy... */ + +void add_blocks(Block a, Block b, BlockWord carry); +void fips_initialize(YarrowContextp yp); +void random_block(YarrowContextp yp, Block b, int addOptional); +u_int32_t CalculateCRC(u_int8_t* buffer, size_t length); + +/* + * Get 120 bits from yarrow + */ + +/* + * add block b to block a + */ +void +add_blocks(Block a, Block b, BlockWord carry) +{ + int i = kBlockSize - 1; + while (i >= 0) + { + u_int32_t c = (u_int32_t)carry + + (u_int32_t)a[i] + + (u_int32_t)b[i]; + a[i] = c & 0xff; + carry = c >> 8; + i -= 1; + } +} + + + +static char zeros[(512 - kBSize * 8) / 8]; + +static const u_int32_t g_crc_table[] = +{ + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, +}; + +/* + * Setup for fips compliance + */ + +/* + * calculate a crc-32 checksum + */ +u_int32_t CalculateCRC(u_int8_t* buffer, size_t length) +{ + u_int32_t crc = 0; + + size_t i; + for (i = 0; i < length; ++i) + { + u_int32_t temp = (crc ^ ((u_int32_t) buffer[i])) & 0xFF; + crc = (crc >> 8) ^ g_crc_table[temp]; + } + + return crc; +} + +/* + * get a random block of data per fips 186-2 + */ +void +random_block(YarrowContextp pp, Block b, int addOptional) +{ + SHA1_CTX sha1_ctx; + + int repeatCount = 0; + do + { + // do one iteration + + if (addOptional) + { + // create an xSeed to add. + Block xSeed; + prngOutput (pp->PrngRef, (BYTE*) &xSeed, sizeof (xSeed)); + + // add the seed to the previous value of xkey + add_blocks (pp->xkey, xSeed, 0); + } + + // initialize the value of H + FIPS_SHA1Init(&sha1_ctx); + + // to stay compatible with the FIPS specification, we need to flip the bytes in + // xkey to little endian byte order. In our case, this makes exactly no difference + // (random is random), but we need to do it anyway to keep FIPS happy + + // compute "G" + FIPS_SHA1Update(&sha1_ctx, pp->xkey, kBlockSize); + + // add zeros to fill the internal SHA-1 buffer + FIPS_SHA1Update (&sha1_ctx, (const u_int8_t *)zeros, sizeof (zeros)); + + // we have to do a byte order correction here because the sha1 math is being done internally + // as u_int32_t, not a stream of bytes. Since we maintain our data as a byte stream, we need + // to convert + + u_int32_t* finger = (u_int32_t*) b; + + unsigned j; + for (j = 0; j < kBlockSize / sizeof (u_int32_t); ++j) + { + *finger++ = OSSwapHostToBigInt32(sha1_ctx.h.b32[j]); + } + + // calculate the CRC-32 of the block + u_int32_t new_crc = CalculateCRC(sha1_ctx.h.b8, sizeof (Block)); + + // make sure we don't repeat + int cmp = new_crc == pp->LastBlockChecksum; + pp->LastBlockChecksum = new_crc; + if (!pp->SelfTestInitialized) + { + pp->SelfTestInitialized = 1; + return; + } + else if (!cmp) + { + return; + } + + repeatCount += 1; + + // fix up the next value of xkey + add_blocks (pp->xkey, b, 1); + } while (repeatCount < 2); + + /* + * If we got here, three sucessive checksums of the random number + * generator have been the same. Since the odds of this happening are + * 1 in 18,446,744,073,709,551,616, (1 in 18 quintillion) one of the following has + * most likely happened: + * + * 1: There is a significant bug in this code. + * 2: There has been a massive system failure. + * 3: The universe has ceased to exist. + * + * There is no good way to recover from any of these cases. We + * therefore panic. + */ + + panic("FIPS random self-test failed."); +} + +const Block kKnownAnswer = {0x92, 0xb4, 0x04, 0xe5, 0x56, 0x58, 0x8c, 0xed, 0x6c, 0x1a, 0xcd, 0x4e, 0xbf, 0x05, 0x3f, 0x68, 0x09, 0xf7, 0x3a, 0x93}; + +void +fips_initialize(YarrowContextp yp) +{ + /* So that we can do the self test, set the seed to zero */ + memset(&yp->xkey, 0, sizeof(yp->xkey)); + + /* other initializations */ + memset (zeros, 0, sizeof (zeros)); + yp->bytes_used = 0; + random_block(yp, yp->random_data, FALSE); + + // check here to see if we got the initial data we were expecting + if (memcmp(kKnownAnswer, yp->random_data, kBlockSize) != 0) + { + panic("FIPS random self test failed"); + } + + // now do the random block again to make sure that userland doesn't get predicatable data + random_block(yp, yp->random_data, TRUE); +} + + +static int +yarrow_init( + const struct ccdrbg_info *info, + struct ccdrbg_state *drbg, + unsigned long entropyLength, const void* entropy, + unsigned long nonceLength, const void* nonce, + unsigned long psLength, const void* ps) +{ +#pragma unused(info) +#pragma unused(nonceLength) +#pragma unused(nonce) +#pragma unused(psLength) +#pragma unused(ps) + YarrowContextp yp = (YarrowContextp) drbg; + prng_error_status perr; + char buffer[16]; + + yp->SelfTestInitialized = 0; + + /* create a Yarrow object */ + perr = prngInitialize(&yp->PrngRef); + if (perr != 0) { + panic("Couldn't initialize Yarrow, /dev/random will not work."); + } + + perr = prngInput(yp->PrngRef, (BYTE*) entropy, (UINT) entropyLength, + SYSTEM_SOURCE, (UINT) entropyLength * 8); + if (perr != 0) { + /* an error, complain */ + panic("Couldn't seed Yarrow.\n"); + } + + /* turn the data around */ + perr = prngOutput(yp->PrngRef, (BYTE*) buffer, (UINT) sizeof(buffer)); + + /* and scramble it some more */ + perr = prngForceReseed(yp->PrngRef, RESEED_TICKS); + + fips_initialize(yp); + + yp->bytes_since_reseed = 0; + + return perr; +} + +static int +yarrow_generate( + struct ccdrbg_state *prng, + unsigned long outlen, void *out, + unsigned long inlen, const void *in) +{ +#pragma unused(inlen) +#pragma unused(in) + YarrowContextp yp = (YarrowContextp) prng; + int bytes_read = 0; + int bytes_remaining = (int) outlen; + + yp->bytes_since_reseed += outlen; + if (yp->bytes_since_reseed > RESEED_BYTES) + return CCDRBG_STATUS_NEED_RESEED; + + while (bytes_remaining > 0) { + int bytes_to_read = MIN(bytes_remaining, + kBlockSize - yp->bytes_used); + if (bytes_to_read == 0) { + random_block(yp, yp->random_data, TRUE); + yp->bytes_used = 0; + bytes_to_read = MIN(bytes_remaining, kBlockSize); + } + + memmove((u_int8_t*) out + bytes_read, + ((u_int8_t*)yp->random_data) + yp->bytes_used, + bytes_to_read); + yp->bytes_used += bytes_to_read; + bytes_read += bytes_to_read; + bytes_remaining -= bytes_to_read; + } + + return CCDRBG_STATUS_OK; +} + +static int +yarrow_reseed( + struct ccdrbg_state *prng, + unsigned long entropylen, const void *entropy, + unsigned long inlen, const void *in) +{ +#pragma unused(inlen) +#pragma unused(in) + YarrowContextp yp = (YarrowContextp) prng; + + (void) prngInput(yp->PrngRef, (BYTE*) entropy, (UINT) entropylen, + SYSTEM_SOURCE, (UINT) entropylen * 8); + (void) prngForceReseed(yp->PrngRef, RESEED_TICKS); + + yp->bytes_since_reseed = 0; + + return CCDRBG_STATUS_OK; +} + +static void +yarrow_destroy( + struct ccdrbg_state *prng) +{ +#pragma unused(prng) +} + + +void +ccdrbg_factory_yarrow( + struct ccdrbg_info *info, + const void *custom) +{ + info->size = sizeof(struct YarrowContext); + info->init = yarrow_init; + info->generate = yarrow_generate; + info->reseed = yarrow_reseed; + info->done = yarrow_destroy; + info->custom = custom; +} diff --git a/osfmk/prng/random.c b/osfmk/prng/random.c new file mode 100644 index 000000000..44045cc37 --- /dev/null +++ b/osfmk/prng/random.c @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +static lck_grp_t *gPRNGGrp; +static lck_attr_t *gPRNGAttr; +static lck_grp_attr_t *gPRNGGrpAttr; +static lck_mtx_t *gPRNGMutex = NULL; + +typedef struct prngContext { + struct ccdrbg_info *infop; + struct ccdrbg_state *statep; + uint64_t bytes_generated; + uint64_t bytes_reseeded; +} *prngContextp; + +ccdrbg_factory_t prng_ccdrbg_factory = NULL; + +entropy_data_t EntropyData = { .index_ptr = EntropyData.buffer }; + +boolean_t erandom_seed_set = FALSE; +char erandom_seed[EARLY_RANDOM_SEED_SIZE]; +typedef struct ccdrbg_state ccdrbg_state_t; +uint8_t master_erandom_state[EARLY_RANDOM_STATE_STATIC_SIZE]; +ccdrbg_state_t *erandom_state[MAX_CPUS]; +struct ccdrbg_info erandom_info; +decl_simple_lock_data(,entropy_lock); + +struct ccdrbg_nisthmac_custom erandom_custom = { + .di = &ccsha1_eay_di, + .strictFIPS = 0, +}; + +static void read_erandom(void *buffer, u_int numBytes); /* Forward */ + +void +entropy_buffer_read(char *buffer, + unsigned int *count) +{ + boolean_t current_state; + unsigned int i, j; + + if (!erandom_seed_set) { + panic("early_random was never invoked"); + } + + if ((*count) > (ENTROPY_BUFFER_SIZE * sizeof(unsigned int))) + *count = ENTROPY_BUFFER_SIZE * sizeof(unsigned int); + + current_state = ml_set_interrupts_enabled(FALSE); +#if defined (__x86_64__) + simple_lock(&entropy_lock); +#endif + + memcpy((char *) buffer, (char *) EntropyData.buffer, *count); + + for (i = 0, j = (ENTROPY_BUFFER_SIZE - 1); i < ENTROPY_BUFFER_SIZE; j = i, i++) + EntropyData.buffer[i] = EntropyData.buffer[i] ^ EntropyData.buffer[j]; + +#if defined (__x86_64__) + simple_unlock(&entropy_lock); +#endif + (void) ml_set_interrupts_enabled(current_state); + +#if DEVELOPMENT || DEBUG + uint32_t *word = (uint32_t *) (void *) buffer; + /* Good for both 32-bit and 64-bit kernels. */ + for (i = 0; i < ENTROPY_BUFFER_SIZE; i += 4) + /* + * We use "EARLY" here so that we can grab early entropy on + * ARM, where tracing is not started until after PRNG is + * initialized. + */ + KERNEL_DEBUG_EARLY(ENTROPY_READ(i/4), + word[i+0], word[i+1], word[i+2], word[i+3]); +#endif +} + +/* + * Return a uniformly distributed 64-bit random number. + * + * This interface should have minimal dependencies on kernel + * services, and thus be available very early in the life + * of the kernel. + * This provides cryptographically secure randomness. + * Each processor has its own generator instance. + * It is seeded (lazily) with entropy provided by the Booter. +* + * For the algorithm switched from LCG to + * NIST HMAC DBRG as follows: + * - When first called (on OSX this is very early while page tables are being + * built) early_random() calls ccdrbg_factory_hmac() to set-up a ccdbrg info + * structure. + * - The boot processor's ccdrbg state structure is a statically allocated area + * which is then initialized by calling the ccdbrg_init method. + * The initial entropy is 16 bytes of boot entropy. + * The nonce is the first 8 bytes of entropy xor'ed with a timestamp + * from ml_get_timebase(). + * The personalization data provided is null. + * - The first 64-bit random value is returned on the boot processor from + * an invocation of the ccdbrg_generate method. + * - Non-boot processor's DRBG state structures are allocated dynamically + * from prng_init(). Each is initialized with the same 16 bytes of entropy + * but with a different timestamped nonce and cpu number as personalization. + * - Subsequent calls to early_random() pass to read_erandom() to generate + * an 8-byte random value. read_erandom() ensures that pre-emption is + * disabled and selects the DBRG state from the current processor. + * The ccdbrg_generate method is called for the required random output. + * If this method returns CCDRBG_STATUS_NEED_RESEED, the erandom_seed buffer + * is re-filled with kernel-harvested entropy and the ccdbrg_reseed method is + * called with this new entropy. The kernel panics if a reseed fails. + */ +uint64_t +early_random(void) +{ + uint32_t cnt = 0; + uint64_t result; + uint64_t nonce; + int rc; + ccdrbg_state_t *state; + + if (!erandom_seed_set) { + simple_lock_init(&entropy_lock,0); + erandom_seed_set = TRUE; + cnt = PE_get_random_seed((unsigned char *) EntropyData.buffer, + sizeof(EntropyData.buffer)); + + if (cnt < sizeof(EntropyData.buffer)) { + /* + * Insufficient entropy is fatal. We must fill the + * entire entropy buffer during initializaton. + */ + panic("EntropyData needed %lu bytes, but got %u.\n", + sizeof(EntropyData.buffer), cnt); + } + + /* + * Use some of the supplied entropy as a basis for early_random; + * reuse is ugly, but simplifies things. Ideally, we would guard + * early random values well enough that it isn't safe to attack + * them, but this cannot be guaranteed; thus, initial entropy + * can be considered 8 bytes weaker for a given boot if any + * early random values are conclusively determined. + * + * early_random_seed could be larger than EntopyData.buffer... + * but it won't be. + */ + bcopy(EntropyData.buffer, &erandom_seed, sizeof(erandom_seed)); + + /* Init DRBG for NIST HMAC */ + ccdrbg_factory_nisthmac(&erandom_info, &erandom_custom); + assert(erandom_info.size <= sizeof(master_erandom_state)); + state = (ccdrbg_state_t *) master_erandom_state; + erandom_state[0] = state; + + /* + * Init our DBRG from the boot entropy and a nonce composed of + * a timestamp swizzled with the first 8 bytes of this entropy. + */ + assert(sizeof(erandom_seed) > sizeof(nonce)); + bcopy(erandom_seed, &nonce, sizeof(nonce)); + nonce ^= ml_get_timebase(); + rc = ccdrbg_init(&erandom_info, state, + sizeof(erandom_seed), erandom_seed, + sizeof(nonce), &nonce, + 0, NULL); + assert(rc == CCDRBG_STATUS_OK); + + /* Generate output */ + rc = ccdrbg_generate(&erandom_info, state, + sizeof(result), &result, + 0, NULL); + assert(rc == CCDRBG_STATUS_OK); + + return result; + }; + + read_erandom(&result, sizeof(result)); + + return result; +} + +void +read_erandom(void *buffer, u_int numBytes) +{ + int cpu; + int rc; + uint32_t cnt; + ccdrbg_state_t *state; + + mp_disable_preemption(); + cpu = cpu_number(); + state = erandom_state[cpu]; + assert(state); + while (TRUE) { + /* Generate output */ + rc = ccdrbg_generate(&erandom_info, state, + numBytes, buffer, + 0, NULL); + if (rc == CCDRBG_STATUS_OK) + break; + if (rc == CCDRBG_STATUS_NEED_RESEED) { + /* It's time to reseed. Get more entropy */ + cnt = sizeof(erandom_seed); + entropy_buffer_read(erandom_seed, &cnt); + assert(cnt == sizeof(erandom_seed)); + rc = ccdrbg_reseed(&erandom_info, state, + sizeof(erandom_seed), erandom_seed, + 0, NULL); + if (rc == CCDRBG_STATUS_OK) + continue; + panic("read_erandom reseed error %d\n", rc); + } + panic("read_erandom ccdrbg error %d\n", rc); + } + mp_enable_preemption(); +} + +void +read_frandom(void *buffer, u_int numBytes) +{ + char *cp = (char *) buffer; + int nbytes; + + /* + * Split up into requests for blocks smaller than + * than the DBRG request limit. iThis limit is private but + * for NISTHMAC it's known to be greater then 4096. + */ + while (numBytes) { + nbytes = MIN(numBytes, PAGE_SIZE); + read_erandom(cp, nbytes); + cp += nbytes; + numBytes -= nbytes; + } +} + +/* + * Register a DRBG factory routine to e used in constructing the kernel PRNG. + * XXX to be called from the corecrypto kext. + */ +void +prng_factory_register(ccdrbg_factory_t factory) +{ + prng_ccdrbg_factory = factory; + thread_wakeup((event_t) &prng_ccdrbg_factory); +} + +void +prng_cpu_init(int cpu) +{ + uint64_t nonce; + int rc; + ccdrbg_state_t *state; + prngContextp pp; + + /* + * Allocate state and initialize DBRG state for early_random() + * for this processor, if necessary. + */ + if (erandom_state[cpu] == NULL) { + + state = kalloc(erandom_info.size); + if (state == NULL) { + panic("prng_init kalloc failed\n"); + } + erandom_state[cpu] = state; + + /* + * Init our DBRG from boot entropy, nonce as timestamp xor'ed + * with the first 8 bytes of entropy, and use the cpu number + * as the personalization parameter. + */ + bcopy(erandom_seed, &nonce, sizeof(nonce)); + nonce ^= ml_get_timebase(); + rc = ccdrbg_init(&erandom_info, state, + sizeof(erandom_seed), erandom_seed, + sizeof(nonce), &nonce, + sizeof(cpu), &cpu); + assert(rc == CCDRBG_STATUS_OK); + } + + /* Non-boot cpus use the master cpu's global context */ + if (cpu != master_cpu) { + cpu_datap(cpu)->cpu_prng = master_prng_context(); + return; + } + + assert(gPRNGMutex == NULL); /* Once only, please */ + + /* make a mutex to control access */ + gPRNGGrpAttr = lck_grp_attr_alloc_init(); + gPRNGGrp = lck_grp_alloc_init("random", gPRNGGrpAttr); + gPRNGAttr = lck_attr_alloc_init(); + gPRNGMutex = lck_mtx_alloc_init(gPRNGGrp, gPRNGAttr); + + pp = kalloc(sizeof(*pp)); + if (pp == NULL) + panic("Unable to allocate prng context"); + pp->bytes_generated = 0; + pp->bytes_reseeded = 0; + pp->infop = NULL; + + /* XXX Temporary registration */ + prng_factory_register(ccdrbg_factory_yarrow); + + master_prng_context() = pp; +} + +static ccdrbg_info_t * +prng_infop(prngContextp pp) +{ + lck_mtx_assert(gPRNGMutex, LCK_MTX_ASSERT_OWNED); + + /* Usual case: the info is all set */ + if (pp->infop) + return pp->infop; + + /* + * Possibly wait for the CCDRBG factory routune to be registered + * by corecypto. But panic after waiting for more than 10 seconds. + */ + while (prng_ccdrbg_factory == NULL ) { + wait_result_t wait_result; + assert_wait_timeout((event_t) &prng_ccdrbg_factory, TRUE, + 10, NSEC_PER_USEC); + lck_mtx_unlock(gPRNGMutex); + wait_result = thread_block(THREAD_CONTINUE_NULL); + if (wait_result == THREAD_TIMED_OUT) + panic("prng_ccdrbg_factory registration timeout"); + lck_mtx_lock(gPRNGMutex); + } + /* Check we didn't lose the set-up race */ + if (pp->infop) + return pp->infop; + + pp->infop = (ccdrbg_info_t *) kalloc(sizeof(ccdrbg_info_t)); + if (pp->infop == NULL) + panic("Unable to allocate prng info"); + + prng_ccdrbg_factory(pp->infop, NULL); + + pp->statep = kalloc(pp->infop->size); + if (pp->statep == NULL) + panic("Unable to allocate prng state"); + + char rdBuffer[ENTROPY_BUFFER_BYTE_SIZE]; + unsigned int bytesToInput = sizeof(rdBuffer); + + entropy_buffer_read(rdBuffer, &bytesToInput); + + (void) ccdrbg_init(pp->infop, pp->statep, + bytesToInput, rdBuffer, + 0, NULL, + 0, NULL); + return pp->infop; +} + +static void +Reseed(prngContextp pp) +{ + char rdBuffer[ENTROPY_BUFFER_BYTE_SIZE]; + unsigned int bytesToInput = sizeof(rdBuffer); + + entropy_buffer_read(rdBuffer, &bytesToInput); + + PRNG_CCDRBG((void) ccdrbg_reseed(pp->infop, pp->statep, + bytesToInput, rdBuffer, + 0, NULL)); + + pp->bytes_reseeded = pp->bytes_generated; +} + + +/* export good random numbers to the rest of the kernel */ +void +read_random(void* buffer, u_int numbytes) +{ + prngContextp pp; + ccdrbg_info_t *infop; + int ccdrbg_err; + + lck_mtx_lock(gPRNGMutex); + + pp = current_prng_context(); + infop = prng_infop(pp); + + /* + * Call DRBG, reseeding and retrying if requested. + */ + while (TRUE) { + PRNG_CCDRBG( + ccdrbg_err = ccdrbg_generate(infop, pp->statep, + numbytes, buffer, + 0, NULL)); + if (ccdrbg_err == CCDRBG_STATUS_OK) + break; + if (ccdrbg_err == CCDRBG_STATUS_NEED_RESEED) { + Reseed(pp); + continue; + } + panic("read_random ccdrbg error %d\n", ccdrbg_err); + } + + pp->bytes_generated += numbytes; + lck_mtx_unlock(gPRNGMutex); +} + +int +write_random(void* buffer, u_int numbytes) +{ +#if 0 + int retval = 0; + prngContextp pp; + + lck_mtx_lock(gPRNGMutex); + + pp = current_prng_context(); + + if (ccdrbg_reseed(prng_infop(pp), pp->statep, + bytesToInput, rdBuffer, 0, NULL) != 0) + retval = EIO; + + lck_mtx_unlock(gPRNGMutex); + return retval; +#else +#pragma unused(buffer, numbytes) + return 0; +#endif +} diff --git a/osfmk/prng/random.h b/osfmk/prng/random.h new file mode 100644 index 000000000..60a46b70a --- /dev/null +++ b/osfmk/prng/random.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _PRNG_RANDOM_H_ +#define _PRNG_RANDOM_H_ + +__BEGIN_DECLS + +#ifdef XNU_KERNEL_PRIVATE + +#define ENTROPY_BUFFER_BYTE_SIZE 64 + +#define ENTROPY_BUFFER_SIZE ENTROPY_BUFFER_BYTE_SIZE/sizeof(uint32_t) + +typedef struct entropy_data { + uint32_t *index_ptr; + uint32_t buffer[ENTROPY_BUFFER_SIZE]; +} entropy_data_t; + +extern entropy_data_t EntropyData; + +/* Trace codes for DBG_SEC_KERNEL: */ +#define ENTROPY_READ(n) SECURITYDBG_CODE(DBG_SEC_KERNEL, n) /* n: 0 .. 3 */ + +/* + * Early_random implementation params: */ +#define EARLY_RANDOM_SEED_SIZE (16) +#define EARLY_RANDOM_STATE_STATIC_SIZE (256) + +#if defined (__x86_64__) +#define current_prng_context() (current_cpu_datap()->cpu_prng) +#define master_prng_context() (cpu_datap(master_cpu)->cpu_prng) +#else +#error architecture unknown +#endif + +#include +#include + +typedef struct ccdrbg_info ccdrbg_info_t; +typedef void (*ccdrbg_factory_t)(ccdrbg_info_t *info, const void *custom); + +extern void ccdrbg_factory_yarrow(ccdrbg_info_t *info, const void *custom); + +void prng_factory_register(ccdrbg_factory_t factory); +void prng_cpu_init(int cpu); + +void entropy_buffer_read(char *buffer, unsigned int *count); +void entropy_boot_trace(void); + +/* + * Wrapper for requesting a CCDRBG operation. + * This macro makes the DRBG call with pre-emption disabled to ensure that + * any attempt to block will cause a panic. And the operation is timed and + * cannot exceed 10msec (for development kernels). + * But skip this while we retain Yarrow. + */ +#define YARROW 1 +#if YARROW +#define PRNG_CCDRBG(op) \ +MACRO_BEGIN \ + op; \ +MACRO_END +#else +#define PRNG_CCDRBG(op) \ +MACRO_BEGIN \ + uint64_t start; \ + uint64_t stop; \ + disable_preemption(); \ + start = mach_absolute_time(); \ + op; \ + stop = mach_absolute_time(); \ + enable_preemption(); \ + assert(stop - start < 10*NSEC_PER_MSEC || \ + machine_timeout_suspended()); \ + (void) start; \ + (void) stop; \ +MACRO_END +#endif + +#endif /* XNU_KERNEL_PRIVATE */ + +/* /dev/random's PRNG is reseeded after generating this many bytes: */ +#define RESEED_BYTES (17597) + +__END_DECLS + +#endif /* _PRNG_RANDOM_H_ */ diff --git a/osfmk/profiling/Makefile b/osfmk/profiling/Makefile index c273a8c41..f256a9c47 100644 --- a/osfmk/profiling/Makefile +++ b/osfmk/profiling/Makefile @@ -13,6 +13,9 @@ INSTINC_SUBDIRS = \ INSTINC_SUBDIRS_X86_64 = \ x86_64 +INSTINC_SUBDIRS_X86_64H = \ + x86_64 + EXPINC_SUBDIRS = \ machine @@ -21,6 +24,9 @@ EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_X86_64 = \ x86_64 +EXPINC_SUBDIRS_X86_64H = \ + x86_64 + DATAFILES = \ profile-internal.h profile-mk.h profile-kgmon.c diff --git a/osfmk/profiling/machine/profile-md.h b/osfmk/profiling/machine/profile-md.h index 028bde46d..2861ee13f 100644 --- a/osfmk/profiling/machine/profile-md.h +++ b/osfmk/profiling/machine/profile-md.h @@ -25,7 +25,7 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _MACH_MACHINE_PROFILE_MD_H +#ifndef _MACH_MACHINE_PROFILE_MD_H_ #define _MACH_MACHINE_PROFILE_MD_H_ #if defined (__i386__) || defined (__x86_64__) diff --git a/osfmk/sys/syslog.h b/osfmk/sys/syslog.h deleted file mode 100644 index 472111935..000000000 --- a/osfmk/sys/syslog.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:59 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.6.1 1994/09/23 03:13:08 ezf - * change marker to not FREE - * [1994/09/22 21:58:52 ezf] - * - * Revision 1.2.2.3 1993/08/03 18:30:38 gm - * CR9596: Change KERNEL to MACH_KERNEL. - * [1993/08/02 19:02:56 gm] - * - * Revision 1.2.2.2 1993/06/09 02:55:27 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:30:57 jeffc] - * - * Revision 1.2 1993/04/19 17:16:58 devrcs - * Fixes for ANSI C - * [1993/02/26 14:02:39 sp] - * - * Revision 1.1 1992/09/30 02:36:56 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 17:40:18 mrt - * Correcting copyright - * - * Revision 2.3 91/05/13 06:07:15 af - * Removed CMU conditionals. - * [91/05/12 16:31:12 af] - * - * Revision 2.2 91/02/05 17:56:53 mrt - * Changed to new Mach copyright - * [91/02/01 17:49:22 mrt] - * - * Revision 2.1 89/08/03 16:10:10 rwd - * Created. - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon rights - * to redistribute these changes. - */ -/* - */ - -/* - * Copyright (c) 1982, 1986, 1988 Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. - * - * @(#)syslog.h 7.10 (Berkeley) 6/27/88 - */ - -/* - * Facility codes - */ - -#ifndef _SYS_SYSLOG_H_ -#define _SYS_SYSLOG_H_ - -#define LOG_KERN (0<<3) /* kernel messages */ -#define LOG_USER (1<<3) /* random user-level messages */ -#define LOG_MAIL (2<<3) /* mail system */ -#define LOG_DAEMON (3<<3) /* system daemons */ -#define LOG_AUTH (4<<3) /* security/authorization messages */ -#define LOG_SYSLOG (5<<3) /* messages generated internally by syslogd */ -#define LOG_LPR (6<<3) /* line printer subsystem */ -#define LOG_NEWS (7<<3) /* network news subsystem */ -#define LOG_UUCP (8<<3) /* UUCP subsystem */ - /* other codes through 15 reserved for system use */ -#define LOG_LOCAL0 (16<<3) /* reserved for local use */ -#define LOG_LOCAL1 (17<<3) /* reserved for local use */ -#define LOG_LOCAL2 (18<<3) /* reserved for local use */ -#define LOG_LOCAL3 (19<<3) /* reserved for local use */ -#define LOG_LOCAL4 (20<<3) /* reserved for local use */ -#define LOG_LOCAL5 (21<<3) /* reserved for local use */ -#define LOG_LOCAL6 (22<<3) /* reserved for local use */ -#define LOG_LOCAL7 (23<<3) /* reserved for local use */ - -#define LOG_NFACILITIES 24 /* maximum number of facilities */ -#define LOG_FACMASK 0x03f8 /* mask to extract facility part */ - -#define LOG_FAC(p) (((p) & LOG_FACMASK) >> 3) /* facility of pri */ - -/* - * Priorities (these are ordered) - */ - -#define LOG_EMERG 0 /* system is unusable */ -#define LOG_ALERT 1 /* action must be taken immediately */ -#define LOG_CRIT 2 /* critical conditions */ -#define LOG_ERR 3 /* error conditions */ -#define LOG_WARNING 4 /* warning conditions */ -#define LOG_NOTICE 5 /* normal but signification condition */ -#define LOG_INFO 6 /* informational */ -#define LOG_DEBUG 7 /* debug-level messages */ - -#define LOG_PRIMASK 0x0007 /* mask to extract priority part (internal) */ -#define LOG_PRI(p) ((p) & LOG_PRIMASK) /* extract priority */ - -#define LOG_MAKEPRI(fac, pri) (((fac) << 3) | (pri)) - -#ifdef MACH_KERNEL -#define LOG_PRINTF -1 /* pseudo-priority to indicate use of printf */ -#endif - -/* - * arguments to setlogmask. - */ -#define LOG_MASK(pri) (1 << (pri)) /* mask for one priority */ -#define LOG_UPTO(pri) ((1 << ((pri)+1)) - 1) /* all priorities through pri */ - -/* - * Option flags for openlog. - * - * LOG_ODELAY no longer does anything; LOG_NDELAY is the - * inverse of what it used to be. - */ -#define LOG_PID 0x01 /* log the pid with each message */ -#define LOG_CONS 0x02 /* log on the console if errors in sending */ -#define LOG_ODELAY 0x04 /* delay open until first syslog() (default) */ -#define LOG_NDELAY 0x08 /* don't delay open */ -#define LOG_NOWAIT 0x10 /* if forking to log on console, don't wait() */ - -#if defined(__STDC__) -extern void openlog(const char *, int); -extern void syslog(int, const char *, ...); -extern void closelog(void); -extern void setlogmask(int); -#endif /* defined(__STDC__) */ -#endif /* _SYS_SYSLOG_H_ */ diff --git a/osfmk/sys/types.h b/osfmk/sys/types.h deleted file mode 100644 index 5395efe2b..000000000 --- a/osfmk/sys/types.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:59 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.10.3 1996/01/09 19:23:12 devrcs - * Change time_t typedef from "unsigned int" to "int" to - * match the server and what it has historically been. - * Added more shorthand definitions for unsigned typedefs. - * Made conditional on ASSEMBLER not being defined. - * [1995/12/01 20:39:08 jfraser] - * - * Merged '64-bit safe' changes from DEC alpha port. - * [1995/11/21 18:10:35 jfraser] - * - * Revision 1.1.10.2 1995/01/06 19:57:26 devrcs - * mk6 CR668 - 1.3b26 merge - * add shorthand defs for unsigned typedefs - * OSF alpha pal merge - * paranoid bit masking, 64bit cleanup, add NBBY - * [1994/10/14 03:43:58 dwm] - * - * Revision 1.1.10.1 1994/09/23 03:13:36 ezf - * change marker to not FREE - * [1994/09/22 21:59:04 ezf] - * - * Revision 1.1.3.2 1993/06/03 00:18:19 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:31:08 jeffc] - * - * Revision 1.1 1992/09/30 02:37:03 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.6 91/05/14 17:40:39 mrt - * Correcting copyright - * - * Revision 2.5 91/02/05 17:57:07 mrt - * Changed to new Mach copyright - * [91/02/01 17:49:41 mrt] - * - * Revision 2.4 90/08/27 22:13:03 dbg - * Created. - * [90/07/16 dbg] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon rights - * to redistribute these changes. - */ -/* - */ -#ifndef _SYS_TYPES_H_ -#define _SYS_TYPES_H_ - -#ifndef ASSEMBLER - -/* - * Common type definitions that lots of old files seem to want. - */ - -typedef unsigned char u_char; /* unsigned char */ -typedef unsigned short u_short; /* unsigned short */ -typedef unsigned int u_int; /* unsigned int */ -typedef unsigned long u_long; /* unsigned long */ - -typedef struct _quad_ { - unsigned int val[2]; /* 2 32-bit values make... */ -} quad; /* an 8-byte item */ - -typedef char * caddr_t; /* address of a (signed) char */ - -typedef int time_t; /* a signed 32 */ -typedef unsigned int daddr_t; /* an unsigned 32 */ -#if 0 /* off_t should be 64-bit ! */ -typedef unsigned int off_t; /* another unsigned 32 */ -#endif -typedef unsigned short dev_t; /* another unsigned short */ -#define NODEV ((dev_t)-1) /* and a null value for it */ - -#define major(i) (((i) >> 8) & 0xFF) -#define minor(i) ((i) & 0xFF) -#define makedev(i,j) ((((i) & 0xFF) << 8) | ((j) & 0xFF)) - -#define NBBY 8 - -#ifndef NULL -#define NULL ((void *) 0) /* the null pointer */ -#endif - -/* - * Shorthand type definitions for unsigned storage classes - */ -typedef unsigned char uchar_t; -typedef unsigned short ushort_t; -typedef unsigned int uint_t; -typedef unsigned long ulong_t; -typedef volatile unsigned char vuchar_t; -typedef volatile unsigned short vushort_t; -typedef volatile unsigned int vuint_t; -typedef volatile unsigned long vulong_t; - -/* - * Shorthand type definitions for unsigned storage classes - */ -typedef uchar_t uchar; -typedef ushort_t ushort; -typedef uint_t uint; -typedef ulong_t ulong; - -#endif /* !ASSEMBLER */ - -#endif /* _SYS_TYPES_H_ */ diff --git a/osfmk/vm/WKdm_new.h b/osfmk/vm/WKdm_new.h index c29a2cbb3..8072e6d0d 100644 --- a/osfmk/vm/WKdm_new.h +++ b/osfmk/vm/WKdm_new.h @@ -62,200 +62,28 @@ * (from partial matches), packed three per word. */ - #ifdef __cplusplus extern "C" { #endif -/* ============================================================ */ -/* Included files */ - -#ifdef WK_DEBUG -#include -#include -#include -#include -#endif - -typedef unsigned int WK_word; - -/* at the moment we have dependencies on the page size. That should - * be changed to work for any power-of-two size that's at least 16 - * words, or something like that - */ - -#define PAGE_SIZE_IN_WORDS 1024 -#define PAGE_SIZE_IN_BYTES 4096 - -#define DICTIONARY_SIZE 16 - -/* - * macros defining the basic layout of stuff in a page - */ -#define HEADER_SIZE_IN_WORDS 3 -#define TAGS_AREA_OFFSET 3 -#define TAGS_AREA_SIZE 64 - -/* the next few are used during compression to write the header */ -#define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr) \ - (compr_dest_buf[0] = qpos_start_addr - compr_dest_buf) -#define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \ - (compr_dest_buf[1] = lb_start_addr - compr_dest_buf) -#define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \ - (compr_dest_buf[2] = lb_end_addr - compr_dest_buf) - -/* the next few are only use during decompression to read the header */ -#define TAGS_AREA_START(decomp_src_buf) \ - (decomp_src_buf + TAGS_AREA_OFFSET) -#define TAGS_AREA_END(decomp_src_buf) \ - (TAGS_AREA_START(decomp_src_buf) + TAGS_AREA_SIZE) -#define FULL_WORD_AREA_START(the_buf) TAGS_AREA_END(the_buf) -#define QPOS_AREA_START(decomp_src_buf) \ - (decomp_src_buf + decomp_src_buf[0]) -#define LOW_BITS_AREA_START(decomp_src_buf) \ - (decomp_src_buf + (decomp_src_buf[1])) -#define QPOS_AREA_END(the_buf) LOW_BITS_AREA_START(the_buf) -#define LOW_BITS_AREA_END(decomp_src_buf) \ - (decomp_src_buf + (decomp_src_buf[2])) - -/* ============================================================ */ -/* Types and structures */ +#include -/* A structure to store each element of the dictionary. */ -typedef WK_word DictionaryElement; - -/* ============================================================ */ -/* Misc constants */ - -#define BITS_PER_WORD 32 -#define BYTES_PER_WORD 4 -#define NUM_LOW_BITS 10 -#define LOW_BITS_MASK 0x3FF -#define ALL_ONES_MASK 0xFFFFFFFF - -#define TWO_BITS_PACKING_MASK 0x03030303 -#define FOUR_BITS_PACKING_MASK 0x0F0F0F0F -#define TEN_LOW_BITS_MASK 0x000003FF -#define TWENTY_TWO_HIGH_BITS_MASK 0xFFFFFC00 - -/* Tag values. NOTE THAT CODE MAY DEPEND ON THE NUMBERS USED. - * Check for conditionals doing arithmetic on these things - * before changing them - */ -#define ZERO_TAG 0x0 -#define PARTIAL_TAG 0x1 -#define MISS_TAG 0x2 -#define EXACT_TAG 0x3 -#define BITS_PER_BYTE 8 +#define WKdm_SCRATCH_BUF_SIZE PAGE_SIZE -/* ============================================================ */ -/* Global macros */ - -/* Shift out the low bits of a pattern to give the high bits pattern. - The stripped patterns are used for initial tests of partial - matches. */ -#define HIGH_BITS(word_pattern) (word_pattern >> NUM_LOW_BITS) - -/* String the high bits of a pattern so the low order bits can - be included in an encoding of a partial match. */ -#define LOW_BITS(word_pattern) (word_pattern & LOW_BITS_MASK) - -#if defined DEBUG_WK -#define DEBUG_PRINT_1(string) printf (string) -#define DEBUG_PRINT_2(string,value) printf(string, value) -#else -#define DEBUG_PRINT_1(string) -#define DEBUG_PRINT_2(string, value) -#endif - -/* Set up the dictionary before performing compression or - decompression. Each element is loaded with some value, the - high-bits version of that value, and a next pointer. */ -#define PRELOAD_DICTIONARY { \ - dictionary[0] = 1; \ - dictionary[1] = 1; \ - dictionary[2] = 1; \ - dictionary[3] = 1; \ - dictionary[4] = 1; \ - dictionary[5] = 1; \ - dictionary[6] = 1; \ - dictionary[7] = 1; \ - dictionary[8] = 1; \ - dictionary[9] = 1; \ - dictionary[10] = 1; \ - dictionary[11] = 1; \ - dictionary[12] = 1; \ - dictionary[13] = 1; \ - dictionary[14] = 1; \ - dictionary[15] = 1; \ -} - -/* these are the constants for the hash function lookup table. - * Only zero maps to zero. The rest of the tabale is the result - * of appending 17 randomizations of the multiples of 4 from - * 4 to 56. Generated by a Scheme script in hash.scm. - */ -#define HASH_LOOKUP_TABLE_CONTENTS { \ - 0, 52, 8, 56, 16, 12, 28, 20, 4, 36, 48, 24, 44, 40, 32, 60, \ - 8, 12, 28, 20, 4, 60, 16, 36, 24, 48, 44, 32, 52, 56, 40, 12, \ - 8, 48, 16, 52, 60, 28, 56, 32, 20, 24, 36, 40, 44, 4, 8, 40, \ - 60, 32, 20, 44, 4, 36, 52, 24, 16, 56, 48, 12, 28, 16, 8, 40, \ - 36, 28, 32, 12, 4, 44, 52, 20, 24, 48, 60, 56, 40, 48, 8, 32, \ - 28, 36, 4, 44, 20, 56, 60, 24, 52, 16, 12, 12, 4, 48, 20, 8, \ - 52, 16, 60, 24, 36, 44, 28, 56, 40, 32, 36, 20, 24, 60, 40, 44, \ - 52, 16, 32, 4, 48, 8, 28, 56, 12, 28, 32, 40, 52, 36, 16, 20, \ - 48, 8, 4, 60, 24, 56, 44, 12, 8, 36, 24, 28, 16, 60, 20, 56, \ - 32, 40, 48, 12, 4, 44, 52, 44, 40, 12, 56, 8, 36, 24, 60, 28, \ - 48, 4, 32, 20, 16, 52, 60, 12, 24, 36, 8, 4, 16, 56, 48, 44, \ - 40, 52, 32, 20, 28, 32, 12, 36, 28, 24, 56, 40, 16, 52, 44, 4, \ - 20, 60, 8, 48, 48, 52, 12, 20, 32, 44, 36, 28, 4, 40, 24, 8, \ - 56, 60, 16, 36, 32, 8, 40, 4, 52, 24, 44, 20, 12, 28, 48, 56, \ - 16, 60, 4, 52, 60, 48, 20, 16, 56, 44, 24, 8, 40, 12, 32, 28, \ - 36, 24, 32, 12, 4, 20, 16, 60, 36, 28, 8, 52, 40, 48, 44, 56 \ -} - -#define HASH_TO_DICT_BYTE_OFFSET(pattern) \ - (hashLookupTable[((pattern) >> 10) & 0xFF]) - -extern const char hashLookupTable[]; - -/* EMIT... macros emit bytes or words into the intermediate arrays - */ - -#define EMIT_BYTE(fill_ptr, byte_value) {*fill_ptr++ = byte_value; } -#define EMIT_WORD(fill_ptr,word_value) {*fill_ptr++ = word_value; } - -/* RECORD... macros record the results of modeling in the intermediate - * arrays - */ - -#define RECORD_ZERO { EMIT_BYTE(next_tag,ZERO_TAG); } - -#define RECORD_EXACT(queue_posn) EMIT_BYTE(next_tag,EXACT_TAG); \ - EMIT_BYTE(next_qp,(queue_posn)); - -#define RECORD_PARTIAL(queue_posn,low_bits_pattern) { \ - EMIT_BYTE(next_tag,PARTIAL_TAG); \ - EMIT_BYTE(next_qp,(queue_posn)); \ - EMIT_WORD(next_low_bits,(low_bits_pattern)) } - -#define RECORD_MISS(word_pattern) EMIT_BYTE(next_tag,MISS_TAG); \ - EMIT_WORD(next_full_patt,(word_pattern)); - +typedef unsigned int WK_word; -#define WKdm_SCRATCH_BUF_SIZE 4096 void WKdm_decompress_new (WK_word* src_buf, - WK_word* dest_buf, - WK_word* scratch, - unsigned int bytes); + WK_word* dest_buf, + WK_word* scratch, + unsigned int bytes); int WKdm_compress_new (WK_word* src_buf, - WK_word* dest_buf, - WK_word* scratch, - unsigned int limit); + WK_word* dest_buf, + WK_word* scratch, + unsigned int limit); #ifdef __cplusplus } /* extern "C" */ diff --git a/osfmk/vm/bsd_vm.c b/osfmk/vm/bsd_vm.c index 5d05fa984..a7f17574a 100644 --- a/osfmk/vm/bsd_vm.c +++ b/osfmk/vm/bsd_vm.c @@ -472,7 +472,7 @@ memory_object_control_uiomove( if ((xsize = PAGE_SIZE - start_offset) > io_requested) xsize = io_requested; - if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) ) + if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << PAGE_SHIFT) + start_offset), xsize, uio)) ) break; io_requested -= xsize; @@ -509,7 +509,8 @@ memory_object_control_uiomove( * update clustered and speculative state * */ - VM_PAGE_CONSUME_CLUSTERED(dst_page); + if (dst_page->clustered) + VM_PAGE_CONSUME_CLUSTERED(dst_page); PAGE_WAKEUP_DONE(dst_page); } @@ -813,7 +814,7 @@ vnode_pager_data_request( vnode_object = vnode_pager_lookup(mem_obj); - size = MAX_UPL_TRANSFER * PAGE_SIZE; + size = MAX_UPL_TRANSFER_BYTES; base_offset = offset; if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS) @@ -964,7 +965,7 @@ vnode_pager_cluster_write( upl_flags |= UPL_KEEPCACHED; while (cnt) { - size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */ + size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */ assert((upl_size_t) size == size); vnode_pageout(vnode_object->vnode_handle, @@ -996,7 +997,7 @@ vnode_pager_cluster_write( * and then clip the size to insure we * don't request past the end of the underlying file */ - size = PAGE_SIZE * MAX_UPL_TRANSFER; + size = MAX_UPL_TRANSFER_BYTES; base_offset = offset & ~((signed)(size - 1)); if ((base_offset + size) > vnode_size) @@ -1014,7 +1015,8 @@ vnode_pager_cluster_write( } assert((upl_size_t) size == size); vnode_pageout(vnode_object->vnode_handle, - NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size, UPL_VNODE_PAGER, NULL); + NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size, + (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL); } } @@ -1253,6 +1255,86 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal * return(1); } +int +fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid) +{ + + vm_map_t map; + vm_map_offset_t address = (vm_map_offset_t )arg; + vm_map_entry_t tmp_entry; + vm_map_entry_t entry; + + task_lock(task); + map = task->map; + if (map == VM_MAP_NULL) + { + task_unlock(task); + return(0); + } + vm_map_reference(map); + task_unlock(task); + + vm_map_lock_read(map); + + if (!vm_map_lookup_entry(map, address, &tmp_entry)) { + if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { + vm_map_unlock_read(map); + vm_map_deallocate(map); + return(0); + } + } else { + entry = tmp_entry; + } + + while ((entry != vm_map_to_entry(map))) { + *vnodeaddr = 0; + *vid = 0; + + if (entry->is_sub_map == 0) { + if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) { + + pinfo->pri_offset = entry->offset; + pinfo->pri_protection = entry->protection; + pinfo->pri_max_protection = entry->max_protection; + pinfo->pri_inheritance = entry->inheritance; + pinfo->pri_behavior = entry->behavior; + pinfo->pri_user_wired_count = entry->user_wired_count; + pinfo->pri_user_tag = entry->alias; + + if (entry->is_shared) + pinfo->pri_flags |= PROC_REGION_SHARED; + + pinfo->pri_pages_resident = 0; + pinfo->pri_pages_shared_now_private = 0; + pinfo->pri_pages_swapped_out = 0; + pinfo->pri_pages_dirtied = 0; + pinfo->pri_ref_count = 0; + pinfo->pri_shadow_depth = 0; + pinfo->pri_share_mode = 0; + + pinfo->pri_private_pages_resident = 0; + pinfo->pri_shared_pages_resident = 0; + pinfo->pri_obj_id = 0; + + pinfo->pri_address = (uint64_t)entry->vme_start; + pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start); + pinfo->pri_depth = 0; + + vm_map_unlock_read(map); + vm_map_deallocate(map); + return(1); + } + } + + /* Keep searching for a vnode-backed mapping */ + entry = entry->vme_next; + } + + vm_map_unlock_read(map); + vm_map_deallocate(map); + return(0); +} + static int fill_vnodeinfoforaddr( vm_map_entry_t entry, @@ -1337,6 +1419,23 @@ vnode_pager_get_object_vnode ( return(KERN_FAILURE); } +#if CONFIG_IOSCHED +kern_return_t +vnode_pager_get_object_devvp( + memory_object_t mem_obj, + uintptr_t *devvp) +{ + struct vnode *vp; + uint32_t vid; + + if(vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS) + return (KERN_FAILURE); + *devvp = (uintptr_t)vnode_mountdevvp(vp); + if (*devvp) + return (KERN_SUCCESS); + return (KERN_FAILURE); +} +#endif /* * Find the underlying vnode object for the given vm_map_entry. If found, return with the diff --git a/osfmk/vm/default_freezer.c b/osfmk/vm/default_freezer.c index b7127b39e..d6601a227 100644 --- a/osfmk/vm/default_freezer.c +++ b/osfmk/vm/default_freezer.c @@ -845,9 +845,23 @@ default_freezer_handle_deallocate_locked( assert(df_handle); df_handle->dfh_ref_count--; if (df_handle->dfh_ref_count == 0) { + + if (df_handle->dfh_compact_object) { + vm_object_deallocate(df_handle->dfh_compact_object); + df_handle->dfh_compact_object = NULL; + df_handle->dfh_compact_offset = 0; + } + + if (df_handle->dfh_table) { + default_freezer_mapping_free(&df_handle->dfh_table, TRUE); + df_handle->dfh_table = NULL; + } + + lck_rw_done(&df_handle->dfh_lck); lck_rw_destroy(&df_handle->dfh_lck, &default_freezer_handle_lck_grp); - kfree(df_handle, sizeof(struct default_freezer_handle)); should_unlock = FALSE; + + kfree(df_handle, sizeof(struct default_freezer_handle)); } return should_unlock; } diff --git a/osfmk/vm/memory_object.c b/osfmk/vm/memory_object.c index 7f2ebbd3b..19a1d566f 100644 --- a/osfmk/vm/memory_object.c +++ b/osfmk/vm/memory_object.c @@ -62,8 +62,6 @@ * External memory management interface control functions. */ -#include - /* * Interface dependencies: */ @@ -551,10 +549,10 @@ vm_object_update_extent( /* * Limit the number of pages to be cleaned at once to a contiguous - * run, or at most MAX_UPL_TRANSFER size + * run, or at most MAX_UPL_TRANSFER_BYTES */ if (data_cnt) { - if ((data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) || (next_offset != offset)) { + if ((data_cnt >= MAX_UPL_TRANSFER_BYTES) || (next_offset != offset)) { if (dw_count) { vm_page_do_delayed_work(object, &dw_array[0], dw_count); @@ -804,6 +802,7 @@ vm_object_update( fault_info.interruptible = THREAD_UNINT; fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; fault_info.user_tag = 0; + fault_info.pmap_options = 0; fault_info.lo_offset = copy_offset; fault_info.hi_offset = copy_size; fault_info.no_cache = FALSE; @@ -1128,11 +1127,6 @@ vm_object_set_attributes_common( return(KERN_INVALID_ARGUMENT); } -#if !ADVISORY_PAGEOUT - if (silent_overwrite || advisory_pageout) - return(KERN_INVALID_ARGUMENT); - -#endif /* !ADVISORY_PAGEOUT */ if (may_cache) may_cache = TRUE; if (temporary) @@ -1566,8 +1560,6 @@ memory_object_iopl_request( return (KERN_INVALID_ARGUMENT); if (!object->private) { - if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE)) - *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE); if (object->phys_contiguous) { *flags = UPL_PHYS_CONTIG; } else { @@ -1926,6 +1918,22 @@ memory_object_mark_unused( vm_object_cache_add(object); } +void +memory_object_mark_io_tracking( + memory_object_control_t control) +{ + vm_object_t object; + + if (control == NULL) + return; + object = memory_object_control_to_vm_object(control); + + if (object != VM_OBJECT_NULL) { + vm_object_lock(object); + object->io_tracking = TRUE; + vm_object_unlock(object); + } +} kern_return_t memory_object_pages_resident( diff --git a/osfmk/vm/memory_object.h b/osfmk/vm/memory_object.h index f9d1b4326..a3cf7c1c6 100644 --- a/osfmk/vm/memory_object.h +++ b/osfmk/vm/memory_object.h @@ -154,5 +154,7 @@ extern void memory_object_mark_unused( memory_object_control_t control, boolean_t rage); +extern void memory_object_mark_io_tracking( + memory_object_control_t control); #endif /* _VM_MEMORY_OBJECT_H_ */ diff --git a/osfmk/vm/pmap.h b/osfmk/vm/pmap.h index 2e228d6d2..7de264ec5 100644 --- a/osfmk/vm/pmap.h +++ b/osfmk/vm/pmap.h @@ -112,6 +112,8 @@ extern kern_return_t copypv( #ifdef MACH_KERNEL_PRIVATE +#include + #include /* @@ -195,6 +197,11 @@ extern void pmap_reference(pmap_t pmap); /* Gain a reference. */ extern void pmap_destroy(pmap_t pmap); /* Release a reference. */ extern void pmap_switch(pmap_t); +#if MACH_ASSERT +extern void pmap_set_process(pmap_t pmap, + int pid, + char *procname); +#endif /* MACH_ASSERT */ extern void pmap_enter( /* Enter a mapping */ pmap_t pmap, @@ -220,6 +227,12 @@ extern void pmap_remove_some_phys( pmap_t pmap, ppnum_t pn); +extern void pmap_lock_phys_page( + ppnum_t pn); + +extern void pmap_unlock_phys_page( + ppnum_t pn); + /* * Routines that operate on physical addresses. @@ -303,6 +316,7 @@ extern boolean_t pmap_verify_free(ppnum_t pn); /* * Statistics routines */ +extern int (pmap_compressed)(pmap_t pmap); extern int (pmap_resident_count)(pmap_t pmap); extern int (pmap_resident_max)(pmap_t pmap); @@ -585,7 +599,10 @@ extern pmap_t kernel_pmap; /* The kernel's map */ #define PMAP_OPTIONS_REUSABLE 0x10 /* page is "reusable" */ #define PMAP_OPTIONS_NOFLUSH 0x20 /* delay flushing of pmap */ #define PMAP_OPTIONS_NOREFMOD 0x40 /* don't need ref/mod on disconnect */ +#define PMAP_OPTIONS_ALT_ACCT 0x80 /* use alternate accounting scheme for page */ #define PMAP_OPTIONS_REMOVE 0x100 /* removing a mapping */ +#define PMAP_OPTIONS_SET_REUSABLE 0x200 /* page is now "reusable" */ +#define PMAP_OPTIONS_CLEAR_REUSABLE 0x400 /* page no longer "reusable" */ #if !defined(__LP64__) extern vm_offset_t pmap_extract(pmap_t pmap, @@ -608,12 +625,6 @@ extern void pmap_remove_options( /* Remove mappings. */ vm_map_offset_t e, int options); -extern void pmap_reusable( - pmap_t map, - vm_map_offset_t s, - vm_map_offset_t e, - boolean_t reusable); - extern void fillPage(ppnum_t pa, unsigned int fill); extern void pmap_map_sharedpage(task_t task, pmap_t pmap); diff --git a/osfmk/vm/vm_apple_protect.c b/osfmk/vm/vm_apple_protect.c index 11ccb6716..7cb6b93cf 100644 --- a/osfmk/vm/vm_apple_protect.c +++ b/osfmk/vm/vm_apple_protect.c @@ -338,6 +338,7 @@ apple_protect_pager_data_request( vm_map_offset_t kernel_mapping; vm_offset_t src_vaddr, dst_vaddr; vm_offset_t cur_offset; + vm_offset_t offset_in_page; vm_map_entry_t map_entry; kern_return_t error_code; vm_prot_t prot; @@ -539,10 +540,23 @@ apple_protect_pager_data_request( * Decrypt the encrypted contents of the source page * into the destination page. */ - ret = pager->crypt.page_decrypt((const void *) src_vaddr, - (void *) dst_vaddr, - offset+cur_offset, - pager->crypt.crypt_ops); + for (offset_in_page = 0; + offset_in_page < PAGE_SIZE; + offset_in_page += 4096) { + ret = pager->crypt.page_decrypt((const void *) + (src_vaddr + + offset_in_page), + (void *) + (dst_vaddr + + offset_in_page), + (offset + + cur_offset + + offset_in_page), + pager->crypt.crypt_ops); + if (ret) { + break; + } + } if (ret) { /* * Decryption failed. Abort the fault. diff --git a/osfmk/vm/vm_compressor.c b/osfmk/vm/vm_compressor.c index bc565e3f1..402323060 100644 --- a/osfmk/vm/vm_compressor.c +++ b/osfmk/vm/vm_compressor.c @@ -27,6 +27,11 @@ */ #include + +#if CONFIG_PHANTOM_CACHE +#include +#endif + #include #include #include @@ -64,6 +69,7 @@ extern void vm_pageout_io_throttle(void); extern unsigned int hash_string(char *cp, int len); #endif + struct c_slot { uint64_t c_offset:C_SEG_OFFSET_BITS, c_size:12, @@ -77,7 +83,7 @@ struct c_slot { }; -#define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? 4096 : cs->c_size) +#define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size) #define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size)) @@ -137,8 +143,10 @@ caddr_t c_segments_next_page; boolean_t c_segments_busy; uint32_t c_segments_available; uint32_t c_segments_limit; +uint32_t c_segments_nearing_limit; uint32_t c_segment_pages_compressed; uint32_t c_segment_pages_compressed_limit; +uint32_t c_segment_pages_compressed_nearing_limit; uint32_t c_free_segno_head = (uint32_t)-1; uint32_t vm_compressor_minorcompact_threshold_divisor = 10; @@ -161,7 +169,7 @@ lck_spin_t *c_list_lock; #endif /* __i386__ || __x86_64__ */ lck_rw_t c_master_lock; -lck_rw_t c_decompressor_lock; +boolean_t decompressions_blocked = FALSE; zone_t compressor_segment_zone; int c_compressor_swap_trigger = 0; @@ -195,16 +203,17 @@ uint64_t first_c_segment_to_warm_generation_id = 0; uint64_t last_c_segment_to_warm_generation_id = 0; boolean_t hibernate_flushing = FALSE; -int64_t c_segment_input_bytes = 0; -int64_t c_segment_compressed_bytes = 0; -int64_t compressor_bytes_used = 0; +int64_t c_segment_input_bytes __attribute__((aligned(8))) = 0; +int64_t c_segment_compressed_bytes __attribute__((aligned(8))) = 0; +int64_t compressor_bytes_used __attribute__((aligned(8))) = 0; +uint64_t compressor_kvspace_used __attribute__((aligned(8))) = 0; +uint64_t compressor_kvwaste_limit = 0; static boolean_t compressor_needs_to_swap(void); static void vm_compressor_swap_trigger_thread(void); static void vm_compressor_do_delayed_compactions(boolean_t); static void vm_compressor_compact_and_swap(boolean_t); static void vm_compressor_age_swapped_in_segments(boolean_t); -static uint64_t compute_elapsed_msecs(clock_sec_t, clock_nsec_t, clock_sec_t, clock_nsec_t); boolean_t vm_compressor_low_on_space(void); @@ -227,6 +236,7 @@ void c_seg_free_locked(c_segment_t); uint64_t vm_available_memory(void); +uint64_t vm_compressor_pages_compressed(void); extern unsigned int dp_pages_free, dp_pages_reserve; @@ -237,6 +247,13 @@ vm_available_memory(void) } +uint64_t +vm_compressor_pages_compressed(void) +{ + return (c_segment_pages_compressed * PAGE_SIZE_64); +} + + boolean_t vm_compression_available(void) { @@ -253,8 +270,8 @@ vm_compression_available(void) boolean_t vm_compressor_low_on_space(void) { - if ((c_segment_pages_compressed > (c_segment_pages_compressed_limit - 20000)) || - (c_segment_count > (c_segments_limit - 250))) + if ((c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) || + (c_segment_count > c_segments_nearing_limit)) return (TRUE); return (FALSE); @@ -262,13 +279,18 @@ vm_compressor_low_on_space(void) int -vm_low_on_space(void) +vm_wants_task_throttled(task_t task) { + if (task == kernel_task) + return (0); + if (vm_compressor_mode == COMPRESSED_PAGER_IS_ACTIVE || vm_compressor_mode == DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { - if (vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) + if ((vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) && + (unsigned int)pmap_compressed(task->map->pmap) > (c_segment_pages_compressed / 4)) return (1); } else { - if (((dp_pages_free + dp_pages_reserve < 2000) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))) + if (((dp_pages_free + dp_pages_reserve < 2000) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) && + get_task_resident_size(task) > (((AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE) / 5)) return (1); } return (0); @@ -283,21 +305,29 @@ vm_compressor_init_locks(void) lck_attr_setdefault(&vm_compressor_lck_attr); lck_rw_init(&c_master_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); - lck_rw_init(&c_decompressor_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); } void vm_decompressor_lock(void) { - lck_rw_lock_exclusive(&c_decompressor_lock); + PAGE_REPLACEMENT_ALLOWED(TRUE); + + decompressions_blocked = TRUE; + + PAGE_REPLACEMENT_ALLOWED(FALSE); } void vm_decompressor_unlock(void) { - lck_rw_done(&c_decompressor_lock); + PAGE_REPLACEMENT_ALLOWED(TRUE); + + decompressions_blocked = FALSE; + + PAGE_REPLACEMENT_ALLOWED(FALSE); + thread_wakeup((event_t)&decompressions_blocked); } @@ -306,6 +336,23 @@ void vm_compressor_init(void) { thread_t thread; + struct c_slot cs_dummy; + c_slot_t cs = &cs_dummy; + + /* + * ensure that any pointer that gets created from + * the vm_page zone can be packed properly + */ + cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_min_address); + + if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_min_address) + panic("C_SLOT_UNPACK_PTR failed on zone_map_min_address - %p", (void *)zone_map_min_address); + + cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_max_address); + + if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_max_address) + panic("C_SLOT_UNPACK_PTR failed on zone_map_max_address - %p", (void *)zone_map_max_address); + assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE); @@ -374,6 +421,11 @@ vm_compressor_init(void) if ((c_segments_limit = c_segment_pages_compressed_limit / (C_SEG_BUFSIZE / PAGE_SIZE)) > C_SEG_MAX_LIMIT) c_segments_limit = C_SEG_MAX_LIMIT; + c_segment_pages_compressed_nearing_limit = (c_segment_pages_compressed_limit * 98) / 100; + c_segments_nearing_limit = (c_segments_limit * 98) / 100; + + compressor_kvwaste_limit = (vm_map_max(kernel_map) - vm_map_min(kernel_map)) / 16; + c_segments_busy = FALSE; if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_segments), (sizeof(union c_segu) * c_segments_limit), 0, KMA_KOBJECT | KMA_VAONLY) != KERN_SUCCESS) @@ -407,6 +459,11 @@ vm_compressor_init(void) panic("vm_compressor_init: Failed to start the internal pageout thread.\n"); } + if ((vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP) || + (vm_compressor_mode == VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP)) { + vm_compressor_swap_init(); + } + #if CONFIG_FREEZE memorystatus_freeze_enabled = TRUE; #endif /* CONFIG_FREEZE */ @@ -481,7 +538,7 @@ c_seg_need_delayed_compaction(c_segment_t c_seg) boolean_t clear_busy = FALSE; if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { - c_seg->c_busy = 1; + C_SEG_BUSY(c_seg); lck_mtx_unlock_always(&c_seg->c_lock); lck_mtx_lock_spin_always(c_list_lock); @@ -509,7 +566,7 @@ c_seg_move_to_sparse_list(c_segment_t c_seg) boolean_t clear_busy = FALSE; if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { - c_seg->c_busy = 1; + C_SEG_BUSY(c_seg); lck_mtx_unlock_always(&c_seg->c_lock); lck_mtx_lock_spin_always(c_list_lock); @@ -590,7 +647,7 @@ c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg) } else { try_minor_compaction_succeeded++; - c_seg->c_busy = 1; + C_SEG_BUSY(c_seg); c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE); } } @@ -680,7 +737,7 @@ c_seg_try_free(c_segment_t c_seg) */ } if (!c_seg->c_busy_swapping) - c_seg->c_busy = 1; + C_SEG_BUSY(c_seg); c_seg_free_locked(c_seg); @@ -693,8 +750,7 @@ c_seg_try_free(c_segment_t c_seg) void c_seg_free(c_segment_t c_seg) { - if (!c_seg->c_busy_swapping) - c_seg->c_busy = 1; + assert(c_seg->c_busy); lck_mtx_unlock_always(&c_seg->c_lock); lck_mtx_lock_spin_always(c_list_lock); @@ -779,6 +835,8 @@ c_seg_free_locked(c_segment_t c_seg) kernel_memory_depopulate(kernel_map, (vm_offset_t) c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR); kmem_free(kernel_map, (vm_offset_t) c_buffer, C_SEG_ALLOCSIZE); + OSAddAtomic64(-C_SEG_ALLOCSIZE, &compressor_kvspace_used); + } else if (c_swap_handle) vm_swap_free(c_swap_handle); @@ -1122,8 +1180,8 @@ c_seg_major_compact( } -static uint64_t -compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec) +uint64_t +vm_compressor_compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec) { uint64_t end_msecs; uint64_t start_msecs; @@ -1142,6 +1200,9 @@ uint32_t compressor_sample_max_in_msecs = 10000; uint32_t compressor_thrashing_threshold_per_10msecs = 50; uint32_t compressor_thrashing_min_per_10msecs = 20; +/* When true, reset sample data next chance we get. */ +static boolean_t compressor_need_sample_reset = FALSE; + extern uint32_t vm_page_filecache_min; @@ -1153,19 +1214,19 @@ compute_swapout_target_age(void) uint32_t min_operations_needed_in_this_sample; uint64_t elapsed_msecs_in_eval; uint64_t elapsed_msecs_in_sample; - boolean_t need_sample_reset = FALSE; boolean_t need_eval_reset = FALSE; clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); - elapsed_msecs_in_sample = compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec); + elapsed_msecs_in_sample = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec); - if (elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) { - need_sample_reset = TRUE; + if (compressor_need_sample_reset || + elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) { + compressor_need_sample_reset = TRUE; need_eval_reset = TRUE; goto done; } - elapsed_msecs_in_eval = compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec); + elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec); if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs) goto done; @@ -1183,7 +1244,7 @@ compute_swapout_target_age(void) swapout_target_age = 0; - need_sample_reset = TRUE; + compressor_need_sample_reset = TRUE; need_eval_reset = TRUE; goto done; } @@ -1241,12 +1302,12 @@ compute_swapout_target_age(void) } else KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_target, running_total, 0, 4, 0); - need_sample_reset = TRUE; + compressor_need_sample_reset = TRUE; need_eval_reset = TRUE; } else KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10, 0, 6, 0); done: - if (need_sample_reset == TRUE) { + if (compressor_need_sample_reset == TRUE) { bzero(age_of_decompressions_during_sample_period, sizeof(age_of_decompressions_during_sample_period)); overage_decompressions_during_sample_period = 0; @@ -1256,6 +1317,7 @@ done: sample_period_compression_count = 0; last_eval_decompression_count = 0; last_eval_compression_count = 0; + compressor_need_sample_reset = FALSE; } if (need_eval_reset == TRUE) { start_of_eval_period_sec = cur_ts_sec; @@ -1264,16 +1326,18 @@ done: } - -int calls_since_last_considered = 0; +int compaction_swapper_inited = 0; +int compaction_swapper_init_now = 0; int compaction_swapper_running = 0; int compaction_swapper_abort = 0; #if CONFIG_JETSAM boolean_t memorystatus_kill_on_VM_thrashing(boolean_t); +boolean_t memorystatus_kill_on_FC_thrashing(boolean_t); int compressor_thrashing_induced_jetsam = 0; -boolean_t vm_compressor_thrashing_detected = FALSE; +int filecache_thrashing_induced_jetsam = 0; +static boolean_t vm_compressor_thrashing_detected = FALSE; #endif /* CONFIG_JETSAM */ static boolean_t @@ -1302,21 +1366,32 @@ compressor_needs_to_swap(void) c_seg = (c_segment_t) queue_first(&c_age_list_head); - if (c_seg->c_creation_ts <= swapout_target_age) - should_swap = TRUE; - else + if (c_seg->c_creation_ts > swapout_target_age) swapout_target_age = 0; } lck_mtx_unlock_always(c_list_lock); } +#if CONFIG_PHANTOM_CACHE + if (vm_phantom_cache_check_pressure()) + should_swap = TRUE; +#endif + if (swapout_target_age) + should_swap = TRUE; if (vm_swap_up == FALSE) { -#if CONFIG_JETSAM + if (should_swap) { +#if CONFIG_JETSAM if (vm_compressor_thrashing_detected == FALSE) { vm_compressor_thrashing_detected = TRUE; - memorystatus_kill_on_VM_thrashing(TRUE /* async */); - compressor_thrashing_induced_jetsam++; + + if (swapout_target_age) { + memorystatus_kill_on_VM_thrashing(TRUE /* async */); + compressor_thrashing_induced_jetsam++; + } else { + memorystatus_kill_on_FC_thrashing(TRUE /* async */); + filecache_thrashing_induced_jetsam++; + } /* * let the jetsam take precedence over * any major compactions we might have @@ -1327,11 +1402,11 @@ compressor_needs_to_swap(void) */ should_swap = FALSE; } - } else #endif /* CONFIG_JETSAM */ - if (COMPRESSOR_NEEDS_TO_MAJOR_COMPACT()) - should_swap = TRUE; + } else + should_swap = COMPRESSOR_NEEDS_TO_MAJOR_COMPACT(); } + /* * returning TRUE when swap_supported == FALSE * will cause the major compaction engine to @@ -1343,37 +1418,44 @@ compressor_needs_to_swap(void) return (should_swap); } -uint64_t -vm_compressor_total_compressions(void) +#if CONFIG_JETSAM +/* + * This function is called from the jetsam thread after killing something to + * mitigate thrashing. + * + * We need to restart our thrashing detection heuristics since memory pressure + * has potentially changed significantly, and we don't want to detect on old + * data from before the jetsam. + */ +void +vm_thrashing_jetsam_done(void) { - processor_t processor = processor_list; - vm_statistics64_t stat = &PROCESSOR_DATA(processor, vm_stat); - - uint64_t compressions = stat->compressions; + vm_compressor_thrashing_detected = FALSE; - if (processor_count > 1) { - simple_lock(&processor_list_lock); - - while ((processor = processor->processor_list) != NULL) { - stat = &PROCESSOR_DATA(processor, vm_stat); - compressions += stat->compressions; - } - - simple_unlock(&processor_list_lock); + /* Were we compressor-thrashing or filecache-thrashing? */ + if (swapout_target_age) { + swapout_target_age = 0; + compressor_need_sample_reset = TRUE; } - - return compressions; +#if CONFIG_PHANTOM_CACHE + else { + vm_phantom_cache_restart_sample(); + } +#endif } +#endif /* CONFIG_JETSAM */ uint32_t vm_wake_compactor_swapper_calls = 0; void vm_wake_compactor_swapper(void) { + boolean_t need_major_compaction = FALSE; + if (compaction_swapper_running) return; - if (c_minor_count == 0) + if (c_minor_count == 0 && need_major_compaction == FALSE) return; lck_mtx_lock_spin_always(c_list_lock); @@ -1390,14 +1472,19 @@ vm_wake_compactor_swapper(void) lck_mtx_unlock_always(c_list_lock); } + void vm_consider_waking_compactor_swapper(void) { boolean_t need_wakeup = FALSE; - if (calls_since_last_considered++ < 1000 || compaction_swapper_running) + if (compaction_swapper_running) return; - calls_since_last_considered = 0; + + if (!compaction_swapper_inited && !compaction_swapper_init_now) { + compaction_swapper_init_now = 1; + need_wakeup = TRUE; + } if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) { @@ -1452,15 +1539,15 @@ vm_compressor_do_delayed_compactions(boolean_t flush_all) lck_mtx_lock_spin_always(&c_seg->c_lock); - if (c_seg->c_busy) { + if (c_seg->c_busy) { - lck_mtx_unlock_always(c_list_lock); - c_seg_wait_on_busy(c_seg); - lck_mtx_lock_spin_always(c_list_lock); + lck_mtx_unlock_always(c_list_lock); + c_seg_wait_on_busy(c_seg); + lck_mtx_lock_spin_always(c_list_lock); - continue; - } - c_seg->c_busy = 1; + continue; + } + C_SEG_BUSY(c_seg); c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE); @@ -1581,13 +1668,29 @@ vm_compressor_flush(void) } - +extern void vm_swap_file_set_tuneables(void); int compaction_swap_trigger_thread_awakened = 0; + static void vm_compressor_swap_trigger_thread(void) { + /* + * compaction_swapper_init_now is set when the first call to + * vm_consider_waking_compactor_swapper is made from + * vm_pageout_scan... since this function is called upon + * thread creation, we want to make sure to delay adjusting + * the tuneables until we are awakened via vm_pageout_scan + * so that we are at a point where the vm_swapfile_open will + * be operating on the correct directory (in case the default + * of /var/vm/ is overridden by the dymanic_pager + */ + if (compaction_swapper_init_now && !compaction_swapper_inited) { + if (vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP) + vm_swap_file_set_tuneables(); + compaction_swapper_inited = 1; + } lck_mtx_lock_spin_always(c_list_lock); compaction_swap_trigger_thread_awakened++; @@ -1877,7 +1980,7 @@ vm_compressor_compact_and_swap(boolean_t flush_all) continue; } - c_seg->c_busy = 1; + C_SEG_BUSY(c_seg); if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) { /* @@ -1916,7 +2019,7 @@ vm_compressor_compact_and_swap(boolean_t flush_all) continue; } /* grab that segment */ - c_seg_next->c_busy = 1; + C_SEG_BUSY(c_seg_next); if (c_seg_do_minor_compaction_and_unlock(c_seg_next, FALSE, TRUE, TRUE)) { /* @@ -1992,6 +2095,10 @@ vm_compressor_compact_and_swap(boolean_t flush_all) } +static uint32_t no_paging_space_action_in_progress = 0; +extern void memorystatus_send_low_swap_note(void); + + static c_segment_t c_seg_allocate(c_segment_t *current_chead) { @@ -2003,6 +2110,20 @@ c_seg_allocate(c_segment_t *current_chead) if ( (c_seg = *current_chead) == NULL ) { uint32_t c_segno; + if (vm_compressor_low_on_space() || vm_swap_low_on_space()) { + + if (no_paging_space_action_in_progress == 0) { + + if (OSCompareAndSwap(0, 1, (UInt32 *)&no_paging_space_action_in_progress)) { + + if (no_paging_space_action()) { + memorystatus_send_low_swap_note(); + } + + no_paging_space_action_in_progress = 0; + } + } + } KERNEL_DEBUG(0xe0400004 | DBG_FUNC_START, 0, 0, 0, 0, 0); lck_mtx_lock_spin_always(c_list_lock); @@ -2064,6 +2185,7 @@ c_seg_allocate(c_segment_t *current_chead) return (NULL); } + OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used); #if __i386__ || __x86_64__ lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); @@ -2219,8 +2341,8 @@ c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction) #endif io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); f_offset = c_seg->c_store.c_swap_handle; - - c_seg->c_busy = 1; + + C_SEG_BUSY(c_seg); lck_mtx_unlock_always(&c_seg->c_lock); if (c_seg->c_ondisk) { @@ -2239,11 +2361,12 @@ c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction) kmem_free(kernel_map, addr, C_SEG_ALLOCSIZE); c_seg->c_store.c_buffer = (int32_t*) NULL; + c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0); } else { c_seg->c_store.c_buffer = (int32_t*) addr; -#if CRYPTO +#if ENCRYPTED_SWAP vm_swap_decrypt(c_seg); -#endif /* CRYPTO */ +#endif /* ENCRYPTED_SWAP */ #if CHECKSUM_THE_SWAP if (c_seg->cseg_swap_size != io_size) @@ -2262,6 +2385,7 @@ c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction) c_seg_minor_compaction_and_unlock(c_seg, FALSE); } OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used); + OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used); } } c_seg_swapin_requeue(c_seg); @@ -2290,6 +2414,8 @@ retry: cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot); cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr); + assert(slot_ptr == (c_slot_mapping_t)C_SLOT_UNPACK_PTR(cs)); + cs->c_offset = c_seg->c_nextoffset; max_csize = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset); @@ -2317,9 +2443,9 @@ retry: #if CHECKSUM_THE_DATA cs->c_hash_data = hash_string(src, PAGE_SIZE); #endif - c_size = WKdm_compress_new((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], - (WK_word *)(uintptr_t)scratch_buf, max_csize - 4); + c_size = WKdm_compress_new((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], + (WK_word *)(uintptr_t)scratch_buf, max_csize - 4); assert(c_size <= (max_csize - 4) && c_size >= -1); if (c_size == -1) { @@ -2364,22 +2490,6 @@ retry: KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, 0); - if (vm_compressor_low_on_space()) { - ipc_port_t trigger = IP_NULL; - - PSL_LOCK(); - if (IP_VALID(min_pages_trigger_port)) { - trigger = min_pages_trigger_port; - min_pages_trigger_port = IP_NULL; - } - PSL_UNLOCK(); - - if (IP_VALID(trigger)) { - no_paging_space_action(); - default_pager_space_alert(trigger, HI_WAT_ALERT); - ipc_port_release_send(trigger); - } - } return (0); } @@ -2399,26 +2509,48 @@ c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int boolean_t consider_defragmenting = FALSE; ReTry: + PAGE_REPLACEMENT_DISALLOWED(TRUE); + #if HIBERNATION - if (dst) { - if (lck_rw_try_lock_shared(&c_decompressor_lock) == 0) { - if (flags & C_DONT_BLOCK) { - *zeroslot = 0; - return (-2); - } - lck_rw_lock_shared(&c_decompressor_lock); + /* + * if hibernation is enabled, it indicates (via a call + * to 'vm_decompressor_lock' that no further + * decompressions are allowed once it reaches + * the point of flushing all of the currently dirty + * anonymous memory through the compressor and out + * to disk... in this state we allow freeing of compressed + * pages and must honor the C_DONT_BLOCK case + */ + if (dst && decompressions_blocked == TRUE) { + if (flags & C_DONT_BLOCK) { + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + *zeroslot = 0; + return (-2); } + /* + * it's safe to atomically assert and block behind the + * lock held in shared mode because "decompressions_blocked" is + * only set and cleared and the thread_wakeup done when the lock + * is held exclusively + */ + assert_wait((event_t)&decompressions_blocked, THREAD_UNINT); + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + thread_block(THREAD_CONTINUE_NULL); + + goto ReTry; } #endif - PAGE_REPLACEMENT_DISALLOWED(TRUE); - /* s_cseg is actually "segno+1" */ c_seg = c_segments[slot_ptr->s_cseg - 1].c_seg; lck_mtx_lock_spin_always(&c_seg->c_lock); if (flags & C_DONT_BLOCK) { - if (c_seg->c_busy || c_seg->c_ondisk) { + if (c_seg->c_busy || (c_seg->c_ondisk && dst)) { retval = -2; *zeroslot = 0; @@ -2429,10 +2561,7 @@ ReTry: if (c_seg->c_busy) { PAGE_REPLACEMENT_DISALLOWED(FALSE); -#if HIBERNATION - if (dst) - lck_rw_done(&c_decompressor_lock); -#endif + c_seg_wait_on_busy(c_seg); goto ReTry; @@ -2482,7 +2611,6 @@ ReTry: assert(my_cpu_no < compressor_cpus); scratch_buf = &compressor_scratch_bufs[my_cpu_no * WKdm_SCRATCH_BUF_SIZE]; - WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size); } @@ -2549,7 +2677,7 @@ c_seg_invalid_data: if (pages_populated) { assert(c_seg->c_store.c_buffer != NULL); - c_seg->c_busy = 1; + C_SEG_BUSY(c_seg); lck_mtx_unlock_always(&c_seg->c_lock); kernel_memory_depopulate(kernel_map, (vm_offset_t) c_seg->c_store.c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR); @@ -2587,10 +2715,8 @@ done: if (consider_defragmenting == TRUE) vm_swap_consider_defragmenting(); -#if HIBERNATION - if (dst) - lck_rw_done(&c_decompressor_lock); -#endif + + return (retval); } @@ -2615,17 +2741,6 @@ vm_compressor_get(ppnum_t pn, int *slot, int flags) * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set */ if (zeroslot) { - /* - * We've just decompressed a page, and are about to hand that back to VM for - * re-entry into some pmap. This is a decompression operation which must have no - * impact on the pmap's physical footprint. However, when VM goes to re-enter - * this page into the pmap, it doesn't know that it came from the compressor, - * which means the pmap's physical footprint will be incremented. To compensate - * for that, we decrement the physical footprint here, so that the total net effect - * on the physical footprint statistic is zero. - */ - pmap_ledger_debit(current_task()->map->pmap, task_ledgers.phys_footprint, PAGE_SIZE); - *slot = 0; } /* @@ -2638,14 +2753,24 @@ vm_compressor_get(ppnum_t pn, int *slot, int flags) } -void -vm_compressor_free(int *slot) +int +vm_compressor_free(int *slot, int flags) { int zeroslot = 1; + int retval; - (void)c_decompress_page(NULL, (c_slot_mapping_t)slot, 0, &zeroslot); + assert(flags == 0 || flags == C_DONT_BLOCK); - *slot = 0; + retval = c_decompress_page(NULL, (c_slot_mapping_t)slot, flags, &zeroslot); + /* + * returns 0 if we successfully freed the specified compressed page + * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' set + */ + + if (retval == 0) + *slot = 0; + + return (retval); } @@ -2655,12 +2780,6 @@ vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_bu char *src; int retval; - if ((vm_offset_t)slot < VM_MIN_KERNEL_AND_KEXT_ADDRESS || (vm_offset_t)slot >= VM_MAX_KERNEL_ADDRESS) - panic("vm_compressor_put: slot 0x%llx address out of range [0x%llx:0x%llx]", - (uint64_t)(vm_offset_t) slot, - (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS, - (uint64_t) VM_MAX_KERNEL_ADDRESS); - #if __x86_64__ src = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT); #else @@ -2670,3 +2789,40 @@ vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_bu return (retval); } + +void +vm_compressor_transfer( + int *dst_slot_p, + int *src_slot_p) +{ + c_slot_mapping_t dst_slot, src_slot; + c_segment_t c_seg; + int c_indx; + c_slot_t cs; + + dst_slot = (c_slot_mapping_t) dst_slot_p; + src_slot = (c_slot_mapping_t) src_slot_p; + +Retry: + PAGE_REPLACEMENT_DISALLOWED(TRUE); + /* get segment for src_slot */ + c_seg = c_segments[src_slot->s_cseg -1].c_seg; + /* lock segment */ + lck_mtx_lock_spin_always(&c_seg->c_lock); + /* wait if it's busy */ + if (c_seg->c_busy) { + PAGE_REPLACEMENT_DISALLOWED(FALSE); + c_seg_wait_on_busy(c_seg); + goto Retry; + } + /* find the c_slot */ + c_indx = src_slot->s_cindx; + cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); + /* point the c_slot back to dst_slot instead of src_slot */ + cs->c_packed_ptr = C_SLOT_PACK_PTR(dst_slot); + /* transfer */ + *dst_slot_p = *src_slot_p; + *src_slot_p = 0; + lck_mtx_unlock_always(&c_seg->c_lock); + PAGE_REPLACEMENT_DISALLOWED(FALSE); +} diff --git a/osfmk/vm/vm_compressor.h b/osfmk/vm/vm_compressor.h index 4721f3e32..9ac8d64b1 100644 --- a/osfmk/vm/vm_compressor.h +++ b/osfmk/vm/vm_compressor.h @@ -114,6 +114,10 @@ struct c_segment { unsigned int cseg_swap_size; #endif /* CHECKSUM_THE_SWAP */ +#if MACH_ASSERT + thread_t c_busy_for_thread; +#endif /* MACH_ASSERT */ + struct c_slot *c_slots[C_SEG_SLOT_ARRAYS]; }; @@ -133,19 +137,32 @@ struct c_segment { #define C_SEG_WAKEUP_DONE(cseg) \ MACRO_BEGIN \ + assert((cseg)->c_busy); \ (cseg)->c_busy = 0; \ + assert((cseg)->c_busy_for_thread != NULL); \ + assert((((cseg)->c_busy_for_thread = NULL), TRUE)); \ if ((cseg)->c_wanted) { \ (cseg)->c_wanted = 0; \ thread_wakeup((event_t) (cseg)); \ } \ MACRO_END +#define C_SEG_BUSY(cseg) \ + MACRO_BEGIN \ + assert((cseg)->c_busy == 0); \ + (cseg)->c_busy = 1; \ + assert((cseg)->c_busy_for_thread == NULL); \ + assert((((cseg)->c_busy_for_thread = current_thread()), TRUE)); \ + MACRO_END + + typedef struct c_segment *c_segment_t; typedef struct c_slot *c_slot_t; uint64_t vm_compressor_total_compressions(void); void vm_wake_compactor_swapper(void); +void vm_thrashing_jetsam_done(void); void vm_consider_waking_compactor_swapper(void); void vm_compressor_flush(void); void c_seg_free(c_segment_t); @@ -160,16 +177,18 @@ void vm_compressor_do_warmup(void); void vm_compressor_record_warmup_start(void); void vm_compressor_record_warmup_end(void); -int vm_low_on_space(void); +int vm_wants_task_throttled(task_t); boolean_t vm_compression_available(void); +extern void vm_compressor_swap_init(void); extern void vm_compressor_init_locks(void); extern lck_rw_t c_master_lock; -#if CRYPTO +#if ENCRYPTED_SWAP extern void vm_swap_decrypt(c_segment_t); -#endif /* CRYPTO */ +#endif /* ENCRYPTED_SWAP */ +extern int vm_swap_low_on_space(void); extern kern_return_t vm_swap_get(vm_offset_t, uint64_t, uint64_t); extern void vm_swap_free(uint64_t); extern void vm_swap_consider_defragmenting(void); @@ -195,6 +214,7 @@ extern uint32_t c_swappedout_count; extern uint32_t c_swappedout_sparse_count; extern int64_t compressor_bytes_used; +extern uint64_t compressor_kvspace_used; extern uint64_t first_c_segment_to_warm_generation_id; extern uint64_t last_c_segment_to_warm_generation_id; extern boolean_t hibernate_flushing; @@ -207,6 +227,7 @@ extern uint32_t vm_compressor_minorcompact_threshold_divisor; extern uint32_t vm_compressor_majorcompact_threshold_divisor; extern uint32_t vm_compressor_unthrottle_threshold_divisor; extern uint32_t vm_compressor_catchup_threshold_divisor; +extern uint64_t vm_compressor_compute_elapsed_msecs(clock_sec_t, clock_nsec_t, clock_sec_t, clock_nsec_t); #define PAGE_REPLACEMENT_DISALLOWED(enable) (enable == TRUE ? lck_rw_lock_shared(&c_master_lock) : lck_rw_done(&c_master_lock)) #define PAGE_REPLACEMENT_ALLOWED(enable) (enable == TRUE ? lck_rw_lock_exclusive(&c_master_lock) : lck_rw_done(&c_master_lock)) @@ -229,23 +250,12 @@ extern uint32_t vm_compressor_catchup_threshold_divisor; #define HARD_THROTTLE_LIMIT_REACHED() ((AVAILABLE_NON_COMPRESSED_MEMORY < (VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 2) ? 1 : 0) #define SWAPPER_NEEDS_TO_UNTHROTTLE() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) ? 1 : 0) #define COMPRESSOR_NEEDS_TO_MINOR_COMPACT() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0) -#define COMPRESSOR_NEEDS_TO_MAJOR_COMPACT() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) ? 1 : 0) -#define COMPRESSOR_FREE_RESERVED_LIMIT 28 +#define COMPRESSOR_NEEDS_TO_MAJOR_COMPACT() (((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) || \ + (compressor_kvspace_used - (compressor_object->resident_page_count * PAGE_SIZE_64)) > compressor_kvwaste_limit) \ + ? 1 : 0) -/* - * Upward trajectory. - */ -extern boolean_t vm_compressor_low_on_space(void); - -#define VM_PRESSURE_NORMAL_TO_WARNING() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0) -#define VM_PRESSURE_WARNING_TO_CRITICAL() (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0) - -/* - * Downward trajectory. - */ -#define VM_PRESSURE_WARNING_TO_NORMAL() ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0) -#define VM_PRESSURE_CRITICAL_TO_WARNING() ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0) +#define COMPRESSOR_FREE_RESERVED_LIMIT 28 #define COMPRESSOR_SCRATCH_BUF_SIZE WKdm_SCRATCH_BUF_SIZE diff --git a/osfmk/vm/vm_compressor_backing_store.c b/osfmk/vm/vm_compressor_backing_store.c index bf57f06e5..0cb37f7dc 100644 --- a/osfmk/vm/vm_compressor_backing_store.c +++ b/osfmk/vm/vm_compressor_backing_store.c @@ -34,7 +34,8 @@ boolean_t compressor_store_stop_compaction = FALSE; boolean_t vm_swap_up = FALSE; -boolean_t vm_swapfile_mgmt_needed = FALSE; +boolean_t vm_swapfile_create_needed = FALSE; +boolean_t vm_swapfile_gc_needed = FALSE; int swapper_throttle = -1; boolean_t swapper_throttle_inited = FALSE; @@ -45,8 +46,10 @@ uint64_t vm_swap_get_failures = 0; int vm_num_swap_files = 0; int vm_swapout_thread_processed_segments = 0; int vm_swapout_thread_awakened = 0; -int vm_swapfile_mgmt_thread_awakened = 0; -int vm_swapfile_mgmt_thread_running = 0; +int vm_swapfile_create_thread_awakened = 0; +int vm_swapfile_create_thread_running = 0; +int vm_swapfile_gc_thread_awakened = 0; +int vm_swapfile_gc_thread_running = 0; unsigned int vm_swapfile_total_segs_alloced = 0; unsigned int vm_swapfile_total_segs_used = 0; @@ -74,33 +77,41 @@ struct swapfile{ struct trim_list *swp_delayed_trim_list_head; unsigned int swp_delayed_trim_count; - boolean_t swp_trim_supported; }; queue_head_t swf_global_queue; +boolean_t swp_trim_supported = FALSE; #define VM_SWAPFILE_DELAYED_TRIM_MAX 128 extern clock_sec_t dont_trim_until_ts; clock_sec_t vm_swapfile_last_failed_to_create_ts = 0; +clock_sec_t vm_swapfile_last_successful_create_ts = 0; +int vm_swapfile_can_be_created = FALSE; +boolean_t delayed_trim_handling_in_progress = FALSE; static void vm_swapout_thread_throttle_adjust(void); static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset); static void vm_swapout_thread(void); -static void vm_swapfile_mgmt_thread(void); +static void vm_swapfile_create_thread(void); +static void vm_swapfile_gc_thread(void); static void vm_swap_defragment(); static void vm_swap_handle_delayed_trims(boolean_t); static void vm_swap_do_delayed_trim(); +static void vm_swap_wait_on_trim_handling_in_progress(void); + -#define VM_SWAPFILE_DELAYED_CREATE 30 #define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0) #define VM_SWAP_SHOULD_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0) +#define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0) #define VM_SWAP_SHOULD_CREATE(cur_ts) (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \ ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0) #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0) +#define VM_SWAPFILE_DELAYED_CREATE 15 + #define VM_SWAP_BUSY() ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0) @@ -108,16 +119,18 @@ static void vm_swap_do_delayed_trim(); extern unsigned int hash_string(char *cp, int len); #endif -#if CRYPTO +#if ENCRYPTED_SWAP extern boolean_t swap_crypt_ctx_initialized; extern void swap_crypt_ctx_initialize(void); extern const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE]; extern aes_ctx swap_crypt_ctx; extern unsigned long vm_page_encrypt_counter; extern unsigned long vm_page_decrypt_counter; -#endif /* CRYPTO */ +#endif /* ENCRYPTED_SWAP */ extern void vm_pageout_io_throttle(void); +extern void vm_pageout_reinit_tuneables(void); +extern void vm_swap_file_set_tuneables(void); struct swapfile *vm_swapfile_for_handle(uint64_t); @@ -155,17 +168,10 @@ vm_swapfile_for_handle(uint64_t f_offset) } void -vm_swap_init() +vm_compressor_swap_init() { - static boolean_t vm_swap_try_init = FALSE; thread_t thread = NULL; - if (vm_swap_try_init == TRUE) { - return; - } - - vm_swap_try_init = TRUE; - lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr); lck_grp_init(&vm_swap_data_lock_grp, "vm_swap_data", @@ -178,178 +184,88 @@ vm_swap_init() queue_init(&swf_global_queue); - if (vm_swap_create_file()) { - if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL, - BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { - panic("vm_swapout_thread: create failed"); - } - thread->options |= TH_OPT_VMPRIV; - vm_swapout_thread_id = thread->thread_id; - - thread_deallocate(thread); - - if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_mgmt_thread, NULL, + if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL, BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { - panic("vm_swapfile_mgmt_thread: create failed"); - } - thread->options |= TH_OPT_VMPRIV; - - thread_deallocate(thread); - -#if CRYPTO - if (swap_crypt_ctx_initialized == FALSE) { - swap_crypt_ctx_initialize(); - } -#endif /* CRYPTO */ - - vm_swap_up = TRUE; - -#if SANITY_CHECK_SWAP_ROUTINES -extern lck_attr_t *vm_compressor_lck_attr; -extern lck_grp_t *vm_compressor_lck_grp; - - /* - * Changes COMPRESSED_SWAP_CHUNK_SIZE to make it (4*KB). - * Changes MIN_SWAP_FILE_SIZE to (4*KB). - * Changes MAX_SWAP_FILE_SIZE to (4*KB). - * That will then cause the below allocations to create - * 4 new swap files and put/get/free from them. - */ - { - c_segment_t c_seg = NULL, c_seg1 = NULL, c_seg2 = NULL, c_seg3 = NULL; - vm_offset_t addr = 0; - vm_offset_t dup_addr = 0; - kern_return_t kr = KERN_SUCCESS; - uint64_t f_offset = 0; - uint64_t f_offset1 = 0; - uint64_t f_offset2 = 0; - uint64_t f_offset3 = 0; - - if ((kr = kernel_memory_allocate(kernel_map, - &addr, - 4 * COMPRESSED_SWAP_CHUNK_SIZE, - 0, - KMA_KOBJECT))) { - printf("kernel_memory_allocate failed with %d\n", kr); - goto done; - } - - if ((kr = kernel_memory_allocate(kernel_map, - &dup_addr, - 4 * COMPRESSED_SWAP_CHUNK_SIZE, - 0, - KMA_KOBJECT))) { - printf("kernel_memory_allocate failed with %d\n", kr); - goto done; - } - - c_seg = (c_segment_t) kalloc(sizeof(*c_seg)); - memset(c_seg, 0, sizeof(*c_seg)); -#if __i386__ || __x86_64__ - lck_mtx_init(&c_seg->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); -#else /* __i386__ || __x86_64__ */ - lck_spin_init(&c_seg->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); -#endif /* __i386__ || __x86_64__ */ - - - c_seg1 = (c_segment_t) kalloc(sizeof(*c_seg)); - memset(c_seg1, 0, sizeof(*c_seg)); -#if __i386__ || __x86_64__ - lck_mtx_init(&c_seg1->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); -#else /* __i386__ || __x86_64__ */ - lck_spin_init(&c_seg1->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); -#endif /* __i386__ || __x86_64__ */ - - - c_seg2 = (c_segment_t) kalloc(sizeof(*c_seg)); - memset(c_seg2, 0, sizeof(*c_seg)); -#if __i386__ || __x86_64__ - lck_mtx_init(&c_seg2->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); -#else /* __i386__ || __x86_64__ */ - lck_spin_init(&c_seg2->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); -#endif /* __i386__ || __x86_64__ */ - + panic("vm_swapout_thread: create failed"); + } + thread->options |= TH_OPT_VMPRIV; + vm_swapout_thread_id = thread->thread_id; - c_seg3 = (c_segment_t) kalloc(sizeof(*c_seg)); - memset(c_seg3, 0, sizeof(*c_seg)); -#if __i386__ || __x86_64__ - lck_mtx_init(&c_seg3->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); -#else /* __i386__ || __x86_64__ */ - lck_spin_init(&c_seg3->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); -#endif /* __i386__ || __x86_64__ */ - + thread_deallocate(thread); - memset((void*)addr, (int) 'a', PAGE_SIZE_64); - memset((void*)(addr + PAGE_SIZE_64), (int) 'b', PAGE_SIZE_64); - memset((void*)(addr + (2 * PAGE_SIZE_64)), (int) 'c', PAGE_SIZE_64); - memset((void*)(addr + (3 * PAGE_SIZE_64)), (int) 'd', PAGE_SIZE_64); + if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL, + BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { + panic("vm_swapfile_create_thread: create failed"); + } + thread->options |= TH_OPT_VMPRIV; - vm_swap_put(addr, &f_offset, PAGE_SIZE_64, c_seg); - c_seg->c_store.c_swap_handle = f_offset; + thread_deallocate(thread); - vm_swap_put(addr + PAGE_SIZE_64, &f_offset1, PAGE_SIZE_64, c_seg1); - c_seg1->c_store.c_swap_handle = f_offset1; - vm_swap_put(addr + (2 * PAGE_SIZE_64), &f_offset2, PAGE_SIZE_64, c_seg2); - c_seg2->c_store.c_swap_handle = f_offset2; + if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL, + BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { + panic("vm_swapfile_gc_thread: create failed"); + } + thread_deallocate(thread); - vm_swap_put(addr + (3 * PAGE_SIZE_64), &f_offset3, PAGE_SIZE_64, c_seg3); - c_seg3->c_store.c_swap_handle = f_offset3; + proc_set_task_policy_thread(kernel_task, thread->thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); + proc_set_task_policy_thread(kernel_task, thread->thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE); - //vm_swap_free(f_offset); - vm_swap_get(dup_addr, f_offset, PAGE_SIZE_64); +#if ENCRYPTED_SWAP + if (swap_crypt_ctx_initialized == FALSE) { + swap_crypt_ctx_initialize(); + } +#endif /* ENCRYPTED_SWAP */ + + memset(swapfilename, 0, MAX_SWAPFILENAME_LEN + 1); - //vm_swap_free(f_offset1); - vm_swap_reclaim(); - vm_swap_get(dup_addr + PAGE_SIZE_64, c_seg1->c_store.c_swap_handle, PAGE_SIZE_64); + vm_swap_up = TRUE; - //vm_swap_free(f_offset2); - vm_swap_reclaim(); - vm_swap_get(dup_addr + (2 * PAGE_SIZE_64), c_seg2->c_store.c_swap_handle, PAGE_SIZE_64); + printf("VM Swap Subsystem is %s\n", (vm_swap_up == TRUE) ? "ON" : "OFF"); +} - //vm_swap_free(f_offset3); - vm_swap_reclaim(); - vm_swap_get(dup_addr + (3 * PAGE_SIZE_64), c_seg3->c_store.c_swap_handle, PAGE_SIZE_64); - if (memcmp((void*)addr, (void*)dup_addr, PAGE_SIZE_64)) { - panic("First page data mismatch\n"); - kr = KERN_FAILURE; - goto done; - } +void +vm_swap_file_set_tuneables() +{ + struct vnode *vp; + char *pathname; + int namelen; - if (memcmp((void*)(addr + PAGE_SIZE_64), (void*)(dup_addr + PAGE_SIZE_64), PAGE_SIZE_64)) { - panic("Second page data mismatch 0x%lx, 0x%lxn", addr, dup_addr); - kr = KERN_FAILURE; - goto done; - } + if (strlen(swapfilename) == 0) { + /* + * If no swapfile name has been set, we'll + * use the default name. + * + * Also, this function is only called from the vm_pageout_scan thread + * via vm_consider_waking_compactor_swapper, + * so we don't need to worry about a race in checking/setting the name here. + */ + strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN); + } + namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1; + pathname = (char*)kalloc(namelen); + memset(pathname, 0, namelen); + snprintf(pathname, namelen, "%s%d", swapfilename, 0); - if (memcmp((void*)(addr + (2 * PAGE_SIZE_64)), (void*)(dup_addr + (2 * PAGE_SIZE_64)), PAGE_SIZE_64)) { - panic("Third page data mismatch\n"); - kr = KERN_FAILURE; - goto done; - } + vm_swapfile_open(pathname, &vp); - if (memcmp((void*)(addr + (3 * PAGE_SIZE_64)), (void*)(dup_addr + (3 * PAGE_SIZE_64)), PAGE_SIZE_64)) { - panic("Fourth page data mismatch 0x%lx, 0x%lxn", addr, dup_addr); - kr = KERN_FAILURE; - goto done; - } + if (vp == NULL) + goto done; + if (vnode_pager_isSSD(vp) == FALSE) + vm_pageout_reinit_tuneables(); + vnode_setswapmount(vp); + vm_swapfile_close((uint64_t)pathname, vp); done: - printf("Sanity check %s\n", ((kr != KERN_SUCCESS) ? "FAILED" : "SUCCEEDED")); - kfree((void*)addr, 4 * COMPRESSED_SWAP_CHUNK_SIZE); - addr = 0; - kfree((void*)dup_addr, 4 * COMPRESSED_SWAP_CHUNK_SIZE); - dup_addr = 0; - } -#endif /* SANITY_CHECK_SWAP_ROUTINES */ - } - - printf("VM Swap Subsystem is %s\n", (vm_swap_up == TRUE) ? "ON" : "OFF"); + kfree(pathname, namelen); } -#if CRYPTO + +#if ENCRYPTED_SWAP void vm_swap_encrypt(c_segment_t c_seg) { @@ -434,19 +350,20 @@ vm_swap_decrypt(c_segment_t c_seg) vm_page_decrypt_counter += (size/PAGE_SIZE_64); } -#endif /* CRYPTO */ +#endif /* ENCRYPTED_SWAP */ void vm_swap_consider_defragmenting() { - if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() && (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) { + if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() && + (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) { - if (!vm_swapfile_mgmt_thread_running) { + if (!vm_swapfile_gc_thread_running) { lck_mtx_lock(&vm_swap_data_lock); - if (!vm_swapfile_mgmt_thread_running) - thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + if (!vm_swapfile_gc_thread_running) + thread_wakeup((event_t) &vm_swapfile_gc_needed); lck_mtx_unlock(&vm_swap_data_lock); } @@ -539,24 +456,15 @@ vm_swap_defragment() static void -vm_swapfile_mgmt_thread(void) +vm_swapfile_create_thread(void) { - - boolean_t did_work = FALSE; clock_sec_t sec; clock_nsec_t nsec; - vm_swapfile_mgmt_thread_awakened++; - vm_swapfile_mgmt_thread_running = 1; - -try_again: - - do { - if (vm_swap_up == FALSE) - break; - did_work = FALSE; - clock_get_system_nanotime(&sec, &nsec); + vm_swapfile_create_thread_awakened++; + vm_swapfile_create_thread_running = 1; + while (TRUE) { /* * walk through the list of swap files * and do the delayed frees/trims for @@ -565,59 +473,77 @@ try_again: */ vm_swap_handle_delayed_trims(FALSE); - if (VM_SWAP_SHOULD_CREATE(sec)) { - if (vm_swap_create_file() == TRUE) - did_work = TRUE; - else { - vm_swapfile_last_failed_to_create_ts = sec; - HIBLOG("vm_swap_create_file failed @ %lu secs\n", sec); - } - } - if (VM_SWAP_SHOULD_DEFRAGMENT()) { - proc_set_task_policy_thread(kernel_task, current_thread()->thread_id, - TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); + lck_mtx_lock(&vm_swap_data_lock); - vm_swap_defragment(); + clock_get_system_nanotime(&sec, &nsec); - if (!VM_SWAP_BUSY()) - did_work = TRUE; + if (VM_SWAP_SHOULD_CREATE(sec) == 0) + break; - proc_set_task_policy_thread(kernel_task, current_thread()->thread_id, - TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER1); - } - if (VM_SWAP_SHOULD_RECLAIM()) { - proc_set_task_policy_thread(kernel_task, current_thread()->thread_id, - TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); + lck_mtx_unlock(&vm_swap_data_lock); - vm_swap_defragment(); - vm_swap_reclaim(); + if (vm_swap_create_file() == FALSE) { + vm_swapfile_last_failed_to_create_ts = sec; + HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec); - if (!VM_SWAP_BUSY()) - did_work = TRUE; + } else + vm_swapfile_last_successful_create_ts = sec; + } + vm_swapfile_create_thread_running = 0; - proc_set_task_policy_thread(kernel_task, current_thread()->thread_id, - TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER1); - } + assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT); - } while (did_work == TRUE); + lck_mtx_unlock(&vm_swap_data_lock); - lck_mtx_lock(&vm_swap_data_lock); + thread_block((thread_continue_t)vm_swapfile_create_thread); + + /* NOTREACHED */ +} - clock_get_system_nanotime(&sec, &nsec); - if (vm_swap_up == TRUE && (VM_SWAP_SHOULD_CREATE(sec) || ((!VM_SWAP_BUSY() && compressor_store_stop_compaction == FALSE) && - (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())))) { +static void +vm_swapfile_gc_thread(void) +{ + boolean_t need_defragment; + boolean_t need_reclaim; + + vm_swapfile_gc_thread_awakened++; + vm_swapfile_gc_thread_running = 1; + + while (TRUE) { + + lck_mtx_lock(&vm_swap_data_lock); + + if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) + break; + + need_defragment = FALSE; + need_reclaim = FALSE; + + if (VM_SWAP_SHOULD_DEFRAGMENT()) + need_defragment = TRUE; + + if (VM_SWAP_SHOULD_RECLAIM()) { + need_defragment = TRUE; + need_reclaim = TRUE; + } + if (need_defragment == FALSE && need_reclaim == FALSE) + break; + lck_mtx_unlock(&vm_swap_data_lock); - goto try_again; - } - vm_swapfile_mgmt_thread_running = 0; + if (need_defragment == TRUE) + vm_swap_defragment(); + if (need_reclaim == TRUE) + vm_swap_reclaim(); + } + vm_swapfile_gc_thread_running = 0; - assert_wait((event_t)&vm_swapfile_mgmt_needed, THREAD_UNINT); + assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT); lck_mtx_unlock(&vm_swap_data_lock); - thread_block((thread_continue_t)vm_swapfile_mgmt_thread); + thread_block((thread_continue_t)vm_swapfile_gc_thread); /* NOTREACHED */ } @@ -742,7 +668,7 @@ vm_swapout_thread(void) c_seg_free_locked(c_seg); goto c_seg_was_freed; } - c_seg->c_busy = 1; + C_SEG_BUSY(c_seg); c_seg->c_busy_swapping = 1; lck_mtx_unlock_always(c_list_lock); @@ -756,9 +682,9 @@ vm_swapout_thread(void) c_seg->cseg_swap_size = size; #endif /* CHECKSUM_THE_SWAP */ -#if CRYPTO +#if ENCRYPTED_SWAP vm_swap_encrypt(c_seg); -#endif /* CRYPTO */ +#endif /* ENCRYPTED_SWAP */ vm_swapout_thread_throttle_adjust(); @@ -794,9 +720,9 @@ vm_swapout_thread(void) if (c_seg->c_bytes_used) OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used); } else { -#if CRYPTO +#if ENCRYPTED_SWAP vm_swap_decrypt(c_seg); -#endif /* CRYPTO */ +#endif /* ENCRYPTED_SWAP */ c_seg_insert_into_q(&c_age_list_head, c_seg); c_seg->c_on_age_q = 1; c_age_count++; @@ -805,23 +731,23 @@ vm_swapout_thread(void) } lck_mtx_unlock_always(c_list_lock); - c_seg->c_busy_swapping = 0; - - C_SEG_WAKEUP_DONE(c_seg); - if (c_seg->c_must_free) c_seg_free(c_seg); - else + else { + c_seg->c_busy_swapping = 0; + C_SEG_WAKEUP_DONE(c_seg); lck_mtx_unlock_always(&c_seg->c_lock); + } if (kr == KERN_SUCCESS) kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR); PAGE_REPLACEMENT_DISALLOWED(FALSE); - if (kr == KERN_SUCCESS) + if (kr == KERN_SUCCESS) { kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_ALLOCSIZE); - + OSAddAtomic64(-C_SEG_ALLOCSIZE, &compressor_kvspace_used); + } vm_pageout_io_throttle(); c_seg_was_freed: if (c_swapout_count == 0) @@ -848,10 +774,6 @@ vm_swap_create_file() boolean_t swap_file_reuse = FALSE; struct swapfile *swf = NULL; - - if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { - } - /* * Any swapfile structure ready for re-use? */ @@ -872,7 +794,19 @@ vm_swap_create_file() if (swap_file_reuse == FALSE) { - namelen = SWAPFILENAME_LEN + SWAPFILENAME_INDEX_LEN + 1; + if (strlen(swapfilename) == 0) { + /* + * If no swapfile name has been set, we'll + * use the default name. + * + * Also, this function is only called from the swapfile management thread. + * So we don't need to worry about a race in checking/setting the name here. + */ + + strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN); + } + + namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1; swf = (struct swapfile*) kalloc(sizeof *swf); memset(swf, 0, sizeof(*swf)); @@ -883,7 +817,7 @@ vm_swap_create_file() memset(swf->swp_path, 0, namelen); - snprintf(swf->swp_path, namelen, "%s%d", SWAP_FILE_NAME, vm_num_swap_files + 1); + snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files); } vm_swapfile_open(swf->swp_path, &swf->swp_vp); @@ -895,6 +829,8 @@ vm_swap_create_file() } return FALSE; } + vm_swapfile_can_be_created = TRUE; + size = MAX_SWAP_FILE_SIZE; while (size >= MIN_SWAP_FILE_SIZE) { @@ -926,10 +862,8 @@ vm_swap_create_file() * will return ENOTSUP if trim isn't supported * and 0 if it is */ - if (vnode_trim_list(swf->swp_vp, NULL)) - swf->swp_trim_supported = FALSE; - else - swf->swp_trim_supported = TRUE; + if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) + swp_trim_supported = TRUE; lck_mtx_lock(&vm_swap_data_lock); @@ -973,7 +907,7 @@ vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size) { struct swapfile *swf = NULL; uint64_t file_offset = 0; - int retval; + int retval = 0; if (addr == 0) { return KERN_FAILURE; @@ -983,27 +917,22 @@ vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size) swf = vm_swapfile_for_handle(f_offset); - if (swf) { - if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) { - - swf->swp_io_count++; - file_offset = (f_offset & SWAP_SLOT_MASK); - - lck_mtx_unlock(&vm_swap_data_lock); - - } else { - - lck_mtx_unlock(&vm_swap_data_lock); - return KERN_FAILURE; - } - } else { - - lck_mtx_unlock(&vm_swap_data_lock); - return KERN_FAILURE; + if (swf == NULL || ( !(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) { + retval = 1; + goto done; } + swf->swp_io_count++; + + lck_mtx_unlock(&vm_swap_data_lock); + file_offset = (f_offset & SWAP_SLOT_MASK); retval = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int)(size / PAGE_SIZE_64), SWAP_READ); + if (retval == 0) + VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT); + else + vm_swap_get_failures++; + /* * Free this slot in the swap structure. */ @@ -1017,16 +946,13 @@ vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size) swf->swp_flags &= ~SWAP_WANTED; thread_wakeup((event_t) &swf->swp_flags); } - if (retval == 0) - VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT); +done: lck_mtx_unlock(&vm_swap_data_lock); if (retval == 0) return KERN_SUCCESS; - else { - vm_swap_get_failures++; + else return KERN_FAILURE; - } } kern_return_t @@ -1040,6 +966,7 @@ vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_s unsigned int offset_within_byte = 0; boolean_t swf_eligible = FALSE; boolean_t waiting = FALSE; + boolean_t retried = FALSE; int error = 0; clock_sec_t sec; clock_nsec_t nsec; @@ -1047,7 +974,7 @@ vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_s if (addr == 0 || f_offset == NULL) { return KERN_FAILURE; } - +retry: lck_mtx_lock(&vm_swap_data_lock); swf = (struct swapfile*) queue_first(&swf_global_queue); @@ -1081,8 +1008,8 @@ vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_s clock_get_system_nanotime(&sec, &nsec); - if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_mgmt_thread_running) - thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) + thread_wakeup((event_t) &vm_swapfile_create_needed); lck_mtx_unlock(&vm_swap_data_lock); @@ -1110,8 +1037,8 @@ vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_s */ clock_get_system_nanotime(&sec, &nsec); - if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_mgmt_thread_running) - thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) + thread_wakeup((event_t) &vm_swapfile_create_needed); if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) { waiting = TRUE; @@ -1121,9 +1048,15 @@ vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_s lck_mtx_unlock(&vm_swap_data_lock); - if (waiting == TRUE) + if (waiting == TRUE) { thread_block(THREAD_CONTINUE_NULL); + if (retried == FALSE && hibernate_flushing == TRUE) { + retried = TRUE; + goto retry; + } + } + return KERN_FAILURE; done: @@ -1145,10 +1078,6 @@ done: lck_mtx_unlock(&vm_swap_data_lock); -#if SANITY_CHECK_SWAP_ROUTINES - printf("Returned 0x%llx as offset\n", *f_offset); -#endif /* SANITY_CHECK_SWAP_ROUTINES */ - if (error) { vm_swap_free(*f_offset); @@ -1190,10 +1119,9 @@ vm_swap_free_now(struct swapfile *swf, uint64_t f_offset) swf->swp_free_hint = segidx; } } - if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_mgmt_thread_running) - thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) + thread_wakeup((event_t) &vm_swapfile_gc_needed); } - lck_mtx_unlock(&vm_swap_data_lock); } @@ -1205,17 +1133,20 @@ void vm_swap_free(uint64_t f_offset) { struct swapfile *swf = NULL; - struct trim_list *tl; + struct trim_list *tl = NULL; clock_sec_t sec; clock_nsec_t nsec; + if (swp_trim_supported == TRUE) + tl = kalloc(sizeof(struct trim_list)); + lck_mtx_lock(&vm_swap_data_lock); swf = vm_swapfile_for_handle(f_offset); if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) { - if (swf->swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) { + if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) { /* * don't delay the free if the underlying disk doesn't support * trim, or we're in the midst of reclaiming this swap file since @@ -1225,54 +1156,94 @@ vm_swap_free(uint64_t f_offset) vm_swap_free_now(swf, f_offset); vm_swap_free_now_count++; - return; + goto done; } - tl = kalloc(sizeof(struct trim_list)); - tl->tl_offset = f_offset & SWAP_SLOT_MASK; tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE; tl->tl_next = swf->swp_delayed_trim_list_head; swf->swp_delayed_trim_list_head = tl; swf->swp_delayed_trim_count++; + tl = NULL; - if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_mgmt_thread_running) { + if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) { clock_get_system_nanotime(&sec, &nsec); if (sec > dont_trim_until_ts) - thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + thread_wakeup((event_t) &vm_swapfile_create_needed); } vm_swap_free_delayed_count++; } +done: lck_mtx_unlock(&vm_swap_data_lock); + + if (tl != NULL) + kfree(tl, sizeof(struct trim_list)); } +static void +vm_swap_wait_on_trim_handling_in_progress() +{ + while (delayed_trim_handling_in_progress == TRUE) { + + assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT); + lck_mtx_unlock(&vm_swap_data_lock); + + thread_block(THREAD_CONTINUE_NULL); + + lck_mtx_lock(&vm_swap_data_lock); + } +} + + static void vm_swap_handle_delayed_trims(boolean_t force_now) { struct swapfile *swf = NULL; /* - * because swap files are created or reclaimed on the - * same thread that calls this function, it's safe - * to iterate "swf_global_queue" w/o holding - * the lock since those are the only 2 cases that can - * change the items on the "swf_global_queue" + * serialize the race between us and vm_swap_reclaim... + * if vm_swap_reclaim wins it will turn off SWAP_READY + * on the victim it has chosen... we can just skip over + * that file since vm_swap_reclaim will first process + * all of the delayed trims associated with it + */ + lck_mtx_lock(&vm_swap_data_lock); + + delayed_trim_handling_in_progress = TRUE; + + lck_mtx_unlock(&vm_swap_data_lock); + + /* + * no need to hold the lock to walk the swf list since + * vm_swap_create (the only place where we add to this list) + * is run on the same thread as this function + * and vm_swap_reclaim doesn't remove items from this list + * instead marking them with SWAP_REUSE for future re-use */ swf = (struct swapfile*) queue_first(&swf_global_queue); while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { - assert(!(swf->swp_flags & SWAP_RECLAIM)); + if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) { - if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) + assert(!(swf->swp_flags & SWAP_RECLAIM)); vm_swap_do_delayed_trim(swf); - + } swf = (struct swapfile*) queue_next(&swf->swp_queue); } -} + lck_mtx_lock(&vm_swap_data_lock); + + delayed_trim_handling_in_progress = FALSE; + thread_wakeup((event_t) &delayed_trim_handling_in_progress); + if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) + thread_wakeup((event_t) &vm_swapfile_gc_needed); + + lck_mtx_unlock(&vm_swap_data_lock); + +} static void vm_swap_do_delayed_trim(struct swapfile *swf) @@ -1287,7 +1258,7 @@ vm_swap_do_delayed_trim(struct swapfile *swf) lck_mtx_unlock(&vm_swap_data_lock); - vnode_trim_list(swf->swp_vp, tl_head); + vnode_trim_list(swf->swp_vp, tl_head, TRUE); while ((tl = tl_head) != NULL) { unsigned int segidx = 0; @@ -1352,6 +1323,20 @@ vm_swap_reclaim(void) lck_mtx_lock(&vm_swap_data_lock); + /* + * if we're running the swapfile list looking for + * candidates with delayed trims, we need to + * wait before making our decision concerning + * the swapfile we want to reclaim + */ + vm_swap_wait_on_trim_handling_in_progress(); + + /* + * from here until we knock down the SWAP_READY bit, + * we need to remain behind the vm_swap_data_lock... + * once that bit has been turned off, "vm_swap_handle_delayed_trims" + * will not consider this swapfile for processing + */ swf = (struct swapfile*) queue_first(&swf_global_queue); min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE; smallest_swf = NULL; @@ -1388,10 +1373,6 @@ vm_swap_reclaim(void) while (segidx < swf->swp_nsegs) { ReTry_for_cseg: - if (compressor_store_stop_compaction == TRUE || (swf->swp_trim_supported == FALSE && VM_SWAP_BUSY())) { - vm_swap_reclaim_yielded++; - break; - } /* * Wait for outgoing I/Os. */ @@ -1406,6 +1387,10 @@ ReTry_for_cseg: lck_mtx_lock(&vm_swap_data_lock); } + if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) { + vm_swap_reclaim_yielded++; + break; + } byte_for_segidx = segidx >> 3; offset_within_byte = segidx % 8; @@ -1449,28 +1434,19 @@ ReTry_for_cseg: lck_mtx_unlock(&vm_swap_data_lock); if (c_seg->c_must_free) { - + C_SEG_BUSY(c_seg); c_seg_free(c_seg); } else { - c_seg->c_busy = 1; + C_SEG_BUSY(c_seg); c_seg->c_busy_swapping = 1; #if !CHECKSUM_THE_SWAP c_seg_trim_tail(c_seg); #endif - -#if SANITY_CHECK_SWAP_ROUTINES - - c_size = COMPRESSED_SWAP_CHUNK_SIZE; - -#else /* SANITY_CHECK_SWAP_ROUTINES */ - c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); assert(c_size <= C_SEG_BUFSIZE); -#endif /* SANITY_CHECK_SWAP_ROUTINES */ - lck_mtx_unlock_always(&c_seg->c_lock); if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) { @@ -1495,14 +1471,16 @@ ReTry_for_cseg: */ if (kernel_memory_allocate(kernel_map, &c_buffer, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) panic("vm_swap_reclaim: kernel_memory_allocate failed\n"); + OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used); + kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR); memcpy((char *)c_buffer, (char *)addr, c_size); c_seg->c_store.c_buffer = (int32_t *)c_buffer; -#if CRYPTO +#if ENCRYPTED_SWAP vm_swap_decrypt(c_seg); -#endif /* CRYPTO */ +#endif /* ENCRYPTED_SWAP */ c_seg_swapin_requeue(c_seg); OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used); @@ -1542,7 +1520,8 @@ swap_io_failed: /* * We don't remove this inactive swf from the queue. * That way, we can re-use it when needed again and - * preserve the namespace. + * preserve the namespace. The delayed_trim processing + * is also dependent on us not removing swfs from the queue. */ //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue); @@ -1565,8 +1544,8 @@ swap_io_failed: swf->swp_nsegs = 0; swf->swp_flags = SWAP_REUSE; - thread_wakeup((event_t) &swf->swp_flags); done: + thread_wakeup((event_t) &swf->swp_flags); lck_mtx_unlock(&vm_swap_data_lock); kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_BUFSIZE); @@ -1598,3 +1577,22 @@ vm_swap_get_free_space(void) { return (vm_swap_get_total_space() - vm_swap_get_used_space()); } + + +int +vm_swap_low_on_space(void) +{ + + if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) + return (0); + + if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) { + + if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) + return (0); + + if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) + return (1); + } + return (0); +} diff --git a/osfmk/vm/vm_compressor_backing_store.h b/osfmk/vm/vm_compressor_backing_store.h index 6cced5b2f..fd2ba4bd0 100644 --- a/osfmk/vm/vm_compressor_backing_store.h +++ b/osfmk/vm/vm_compressor_backing_store.h @@ -39,23 +39,6 @@ #include -#define SANITY_CHECK_SWAP_ROUTINES 0 - -#if SANITY_CHECK_SWAP_ROUTINES - -#define MIN_SWAP_FILE_SIZE (4 * 1024) - -#define MAX_SWAP_FILE_SIZE (4 * 1024) - -#define COMPRESSED_SWAP_CHUNK_SIZE (4 * 1024) - -#define VM_SWAPFILE_HIWATER_SEGS (MIN_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE) - -#define SWAPFILE_RECLAIM_THRESHOLD_SEGS (MIN_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE) - -#else /* SANITY_CHECK_SWAP_ROUTINES */ - - #define MIN_SWAP_FILE_SIZE (256 * 1024 * 1024) #define MAX_SWAP_FILE_SIZE (1 * 1024 * 1024 * 1024) @@ -65,13 +48,15 @@ #define VM_SWAPFILE_HIWATER_SEGS (MIN_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE) -#define SWAPFILE_RECLAIM_THRESHOLD_SEGS ((15 * (MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE)) / 10) +#define SWAPFILE_RECLAIM_THRESHOLD_SEGS ((17 * (MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE)) / 10) +#define SWAPFILE_RECLAIM_MINIMUM_SEGS ((13 * (MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE)) / 10) -#endif /* SANITY_CHECK_SWAP_ROUTINES */ #define SWAP_FILE_NAME "/var/vm/swapfile" #define SWAPFILENAME_LEN (int)(strlen(SWAP_FILE_NAME)) -#define SWAPFILENAME_INDEX_LEN 2 /* Doesn't include the terminating NULL character */ + +char swapfilename[MAX_SWAPFILENAME_LEN + 1]; + #define SWAP_SLOT_MASK 0x1FFFFFFFF #define SWAP_DEVICE_SHIFT 33 diff --git a/osfmk/vm/vm_compressor_pager.c b/osfmk/vm/vm_compressor_pager.c index 5acb12320..a638590de 100644 --- a/osfmk/vm/vm_compressor_pager.c +++ b/osfmk/vm/vm_compressor_pager.c @@ -139,8 +139,11 @@ const struct memory_object_pager_ops compressor_pager_ops = { struct { uint64_t data_returns; uint64_t data_requests; + uint64_t put; + uint64_t get; uint64_t state_clr; uint64_t state_get; + uint64_t transfer; } compressor_pager_stats; typedef int compressor_slot_t; @@ -153,6 +156,8 @@ typedef struct compressor_pager { unsigned int cpgr_references; unsigned int cpgr_num_slots; + unsigned int cpgr_num_slots_occupied_pager; + unsigned int cpgr_num_slots_occupied; union { compressor_slot_t *cpgr_dslots; compressor_slot_t **cpgr_islots; @@ -188,7 +193,10 @@ lck_attr_t compressor_pager_lck_attr; #define COMPRESSOR_SLOTS_PER_CHUNK (COMPRESSOR_SLOTS_CHUNK_SIZE / sizeof (compressor_slot_t)) /* forward declarations */ -void compressor_pager_slots_chunk_free(compressor_slot_t *chunk, int num_slots); +unsigned int compressor_pager_slots_chunk_free(compressor_slot_t *chunk, + int num_slots, + int flags, + int *failures); void compressor_pager_slot_lookup( compressor_pager_t pager, boolean_t do_alloc, @@ -315,6 +323,7 @@ compressor_memory_object_deallocate( memory_object_t mem_obj) { compressor_pager_t pager; + unsigned int num_slots_freed; /* * Because we don't give out multiple first references @@ -357,9 +366,17 @@ compressor_memory_object_deallocate( for (i = 0; i < num_chunks; i++) { chunk = pager->cpgr_slots.cpgr_islots[i]; if (chunk != NULL) { - compressor_pager_slots_chunk_free( - chunk, - COMPRESSOR_SLOTS_PER_CHUNK); + num_slots_freed = + compressor_pager_slots_chunk_free( + chunk, + COMPRESSOR_SLOTS_PER_CHUNK, + 0, + NULL); + assert(pager->cpgr_num_slots_occupied_pager >= + num_slots_freed); + OSAddAtomic(-num_slots_freed, + &pager->cpgr_num_slots_occupied_pager); + assert(pager->cpgr_num_slots_occupied_pager >= 0); pager->cpgr_slots.cpgr_islots[i] = NULL; kfree(chunk, COMPRESSOR_SLOTS_CHUNK_SIZE); } @@ -369,14 +386,21 @@ compressor_memory_object_deallocate( pager->cpgr_slots.cpgr_islots = NULL; } else { chunk = pager->cpgr_slots.cpgr_dslots; - compressor_pager_slots_chunk_free( - chunk, - pager->cpgr_num_slots); + num_slots_freed = + compressor_pager_slots_chunk_free( + chunk, + pager->cpgr_num_slots, + 0, + NULL); + assert(pager->cpgr_num_slots_occupied_pager >= num_slots_freed); + OSAddAtomic(-num_slots_freed, &pager->cpgr_num_slots_occupied_pager); + assert(pager->cpgr_num_slots_occupied_pager >= 0); pager->cpgr_slots.cpgr_dslots = NULL; kfree(chunk, (pager->cpgr_num_slots * sizeof (pager->cpgr_slots.cpgr_dslots[0]))); } + assert(pager->cpgr_num_slots_occupied_pager == 0); compressor_pager_lock_destroy(pager); zfree(compressor_pager_zone, pager); @@ -529,6 +553,8 @@ compressor_memory_object_create( pager->cpgr_control = MEMORY_OBJECT_CONTROL_NULL; pager->cpgr_references = 1; pager->cpgr_num_slots = (uint32_t)(new_size/PAGE_SIZE); + pager->cpgr_num_slots_occupied_pager = 0; + pager->cpgr_num_slots_occupied = 0; num_chunks = (pager->cpgr_num_slots + COMPRESSOR_SLOTS_PER_CHUNK - 1) / COMPRESSOR_SLOTS_PER_CHUNK; if (num_chunks > 1) { @@ -552,21 +578,32 @@ compressor_memory_object_create( } -void +unsigned int compressor_pager_slots_chunk_free( compressor_slot_t *chunk, - int num_slots) + int num_slots, + int flags, + int *failures) { -#if 00 - vm_compressor_free(chunk, num_slots); -#else int i; + unsigned int num_slots_freed; + + if (failures) + *failures = 0; + num_slots_freed = 0; for (i = 0; i < num_slots; i++) { if (chunk[i] != 0) { - vm_compressor_free(&chunk[i]); + if (vm_compressor_free(&chunk[i], flags) == 0) + num_slots_freed++; + else { + assert(flags & C_DONT_BLOCK); + + if (failures) + *failures += 1; + } } } -#endif + return num_slots_freed; } void @@ -651,12 +688,15 @@ vm_compressor_pager_put( memory_object_offset_t offset, ppnum_t ppnum, void **current_chead, - char *scratch_buf) + char *scratch_buf, + int *compressed_count_delta_p) { compressor_pager_t pager; compressor_slot_t *slot_p; - compressor_pager_stats.data_returns++; + compressor_pager_stats.put++; + + *compressed_count_delta_p = 0; /* This routine is called by the pageout thread. The pageout thread */ /* cannot be blocked by read activities unless the read activities */ @@ -679,7 +719,7 @@ vm_compressor_pager_put( if (slot_p == NULL) { /* out of range ? */ - panic("compressor_pager_put: out of range"); + panic("vm_compressor_pager_put: out of range"); } if (*slot_p != 0) { /* @@ -689,10 +729,18 @@ vm_compressor_pager_put( * the "backing_object" had some pages paged out and the * "object" had an equivalent page resident. */ - vm_compressor_free(slot_p); + vm_compressor_free(slot_p, 0); + assert(pager->cpgr_num_slots_occupied_pager >= 1); + OSAddAtomic(-1, &pager->cpgr_num_slots_occupied_pager); + assert(pager->cpgr_num_slots_occupied_pager >= 0); + *compressed_count_delta_p -= 1; } if (vm_compressor_put(ppnum, slot_p, current_chead, scratch_buf)) return (KERN_RESOURCE_SHORTAGE); + assert(pager->cpgr_num_slots_occupied_pager >= 0); + OSAddAtomic(+1, &pager->cpgr_num_slots_occupied_pager); + assert(pager->cpgr_num_slots_occupied_pager > 0); + *compressed_count_delta_p += 1; return (KERN_SUCCESS); } @@ -704,13 +752,16 @@ vm_compressor_pager_get( memory_object_offset_t offset, ppnum_t ppnum, int *my_fault_type, - int flags) + int flags, + int *compressed_count_delta_p) { compressor_pager_t pager; kern_return_t kr; compressor_slot_t *slot_p; - compressor_pager_stats.data_requests++; + compressor_pager_stats.get++; + + *compressed_count_delta_p = 0; if ((uint32_t)(offset/PAGE_SIZE) != (offset/PAGE_SIZE)) { panic("%s: offset 0x%llx overflow\n", @@ -739,7 +790,8 @@ vm_compressor_pager_get( int retval; /* get the page from the compressor */ - if ((retval = vm_compressor_get(ppnum, slot_p, flags)) == -1) + retval = vm_compressor_get(ppnum, slot_p, flags); + if (retval == -1) kr = KERN_MEMORY_FAILURE; else if (retval == 1) *my_fault_type = DBG_COMPRESSOR_SWAPIN_FAULT; @@ -748,16 +800,34 @@ vm_compressor_pager_get( kr = KERN_FAILURE; } } + + if (kr == KERN_SUCCESS) { + assert(slot_p != NULL); + if (*slot_p != 0) { + /* + * We got the page for a copy-on-write fault + * and we kept the original in place. Slot + * is still occupied. + */ + } else { + assert(pager->cpgr_num_slots_occupied_pager >= 1); + OSAddAtomic(-1, &pager->cpgr_num_slots_occupied_pager); + assert(pager->cpgr_num_slots_occupied_pager >= 0); + *compressed_count_delta_p -= 1; + } + } + return kr; } -void +unsigned int vm_compressor_pager_state_clr( memory_object_t mem_obj, memory_object_offset_t offset) { compressor_pager_t pager; compressor_slot_t *slot_p; + unsigned int num_slots_freed; compressor_pager_stats.state_clr++; @@ -765,7 +835,7 @@ vm_compressor_pager_state_clr( /* overflow */ panic("%s: offset 0x%llx overflow\n", __FUNCTION__, (uint64_t) offset); - return; + return 0; } compressor_pager_lookup(mem_obj, pager); @@ -773,9 +843,17 @@ vm_compressor_pager_state_clr( /* find the compressor slot for that page */ compressor_pager_slot_lookup(pager, FALSE, offset, &slot_p); + num_slots_freed = 0; if (slot_p && *slot_p != 0) { - vm_compressor_free(slot_p); + vm_compressor_free(slot_p, 0); + num_slots_freed++; + assert(*slot_p == 0); + assert(pager->cpgr_num_slots_occupied_pager >= 1); + OSAddAtomic(-1, &pager->cpgr_num_slots_occupied_pager); + assert(pager->cpgr_num_slots_occupied_pager >= 0); } + + return num_slots_freed; } vm_external_state_t @@ -811,3 +889,242 @@ vm_compressor_pager_state_get( return VM_EXTERNAL_STATE_EXISTS; } } + +unsigned int +vm_compressor_pager_reap_pages( + memory_object_t mem_obj, + int flags) +{ + compressor_pager_t pager; + int num_chunks; + int failures; + int i; + compressor_slot_t *chunk; + unsigned int num_slots_freed; + + compressor_pager_lookup(mem_obj, pager); + if (pager == NULL) + return 0; + + compressor_pager_lock(pager); + + /* reap the compressor slots */ + num_slots_freed = 0; + + num_chunks = (pager->cpgr_num_slots + COMPRESSOR_SLOTS_PER_CHUNK -1) / COMPRESSOR_SLOTS_PER_CHUNK; + if (num_chunks > 1) { + /* we have an array of chunks */ + for (i = 0; i < num_chunks; i++) { + chunk = pager->cpgr_slots.cpgr_islots[i]; + if (chunk != NULL) { + num_slots_freed += + compressor_pager_slots_chunk_free( + chunk, + COMPRESSOR_SLOTS_PER_CHUNK, + flags, + &failures); + if (failures == 0) { + pager->cpgr_slots.cpgr_islots[i] = NULL; + kfree(chunk, COMPRESSOR_SLOTS_CHUNK_SIZE); + } + } + } + } else { + chunk = pager->cpgr_slots.cpgr_dslots; + num_slots_freed += + compressor_pager_slots_chunk_free( + chunk, + pager->cpgr_num_slots, + flags, + NULL); + } + OSAddAtomic(-num_slots_freed, &pager->cpgr_num_slots_occupied_pager); + + compressor_pager_unlock(pager); + + return num_slots_freed; +} + +unsigned int +vm_compressor_pager_get_slots_occupied( + memory_object_t mem_obj) +{ + compressor_pager_t pager; + + compressor_pager_lookup(mem_obj, pager); + if (pager == NULL) + return 0; + + assert(pager->cpgr_num_slots_occupied_pager >= 0); + + return pager->cpgr_num_slots_occupied_pager; +} + +void +vm_compressor_pager_transfer( + memory_object_t dst_mem_obj, + memory_object_offset_t dst_offset, + memory_object_t src_mem_obj, + memory_object_offset_t src_offset) +{ + compressor_pager_t src_pager, dst_pager; + compressor_slot_t *src_slot_p, *dst_slot_p; + + compressor_pager_stats.transfer++; + + /* find the compressor slot for the destination */ + assert((uint32_t) dst_offset == dst_offset); + compressor_pager_lookup(dst_mem_obj, dst_pager); + assert(dst_offset / PAGE_SIZE <= dst_pager->cpgr_num_slots); + compressor_pager_slot_lookup(dst_pager, TRUE, (uint32_t) dst_offset, + &dst_slot_p); + assert(dst_slot_p != NULL); + assert(*dst_slot_p == 0); + + /* find the compressor slot for the source */ + assert((uint32_t) src_offset == src_offset); + compressor_pager_lookup(src_mem_obj, src_pager); + assert(src_offset / PAGE_SIZE <= src_pager->cpgr_num_slots); + compressor_pager_slot_lookup(src_pager, FALSE, (uint32_t) src_offset, + &src_slot_p); + assert(src_slot_p != NULL); + assert(*src_slot_p != 0); + + /* transfer the slot from source to destination */ + vm_compressor_transfer(dst_slot_p, src_slot_p); + OSAddAtomic(-1, &src_pager->cpgr_num_slots_occupied_pager); + OSAddAtomic(+1, &dst_pager->cpgr_num_slots_occupied_pager); + OSAddAtomic(-1, &src_pager->cpgr_num_slots_occupied); + OSAddAtomic(+1, &dst_pager->cpgr_num_slots_occupied); +} + +memory_object_offset_t +vm_compressor_pager_next_compressed( + memory_object_t mem_obj, + memory_object_offset_t offset) +{ + compressor_pager_t pager; + uint32_t num_chunks; + uint32_t page_num; + uint32_t chunk_idx; + uint32_t slot_idx; + compressor_slot_t *chunk; + + compressor_pager_lookup(mem_obj, pager); + + page_num = (uint32_t)(offset / PAGE_SIZE); + if (page_num != (offset/PAGE_SIZE)) { + /* overflow */ + return (memory_object_offset_t) -1; + } + if (page_num > pager->cpgr_num_slots) { + /* out of range */ + return (memory_object_offset_t) -1; + } + num_chunks = ((pager->cpgr_num_slots + COMPRESSOR_SLOTS_PER_CHUNK - 1) / + COMPRESSOR_SLOTS_PER_CHUNK); + + if (num_chunks == 1) { + chunk = pager->cpgr_slots.cpgr_dslots; + for (slot_idx = page_num; + slot_idx < pager->cpgr_num_slots; + slot_idx++) { + if (chunk[slot_idx] != 0) { + /* found a non-NULL slot in this chunk */ + return (memory_object_offset_t) (slot_idx * + PAGE_SIZE); + } + } + return (memory_object_offset_t) -1; + } + + /* we have an array of chunks; find the next non-NULL chunk */ + chunk = NULL; + for (chunk_idx = page_num / COMPRESSOR_SLOTS_PER_CHUNK, + slot_idx = page_num % COMPRESSOR_SLOTS_PER_CHUNK; + chunk_idx < num_chunks; + chunk_idx++, + slot_idx = 0) { + chunk = pager->cpgr_slots.cpgr_islots[chunk_idx]; + if (chunk == NULL) { + /* no chunk here: try the next one */ + continue; + } + /* search for an occupied slot in this chunk */ + for (; + slot_idx < COMPRESSOR_SLOTS_PER_CHUNK; + slot_idx++) { + if (chunk[slot_idx] != 0) { + /* found an occupied slot in this chunk */ + uint32_t next_slot; + + next_slot = ((chunk_idx * + COMPRESSOR_SLOTS_PER_CHUNK) + + slot_idx); + if (next_slot > pager->cpgr_num_slots) { + /* went beyond end of object */ + return (memory_object_offset_t) -1; + } + return (memory_object_offset_t) (next_slot * + PAGE_SIZE); + } + } + } + return (memory_object_offset_t) -1; +} + +unsigned int +vm_compressor_pager_get_count( + memory_object_t mem_obj) +{ + compressor_pager_t pager; + + compressor_pager_lookup(mem_obj, pager); + if (pager == NULL) + return 0; + + /* + * The caller should have the VM object locked and one + * needs that lock to do a page-in or page-out, so no + * need to lock the pager here. + */ + assert(pager->cpgr_num_slots_occupied >= 0); + + return pager->cpgr_num_slots_occupied; +} + +void +vm_compressor_pager_count( + memory_object_t mem_obj, + int compressed_count_delta, + boolean_t shared_lock, + vm_object_t object __unused) +{ + compressor_pager_t pager; + + if (compressed_count_delta == 0) { + return; + } + + compressor_pager_lookup(mem_obj, pager); + if (pager == NULL) + return; + + if (compressed_count_delta < 0) { + assert(pager->cpgr_num_slots_occupied >= + (unsigned int) -compressed_count_delta); + } + + /* + * The caller should have the VM object locked, + * shared or exclusive. + */ + if (shared_lock) { + vm_object_lock_assert_shared(object); + OSAddAtomic(compressed_count_delta, + &pager->cpgr_num_slots_occupied); + } else { + vm_object_lock_assert_exclusive(object); + pager->cpgr_num_slots_occupied += compressed_count_delta; + } +} diff --git a/osfmk/vm/vm_compressor_pager.h b/osfmk/vm/vm_compressor_pager.h index be9035986..6016888e5 100644 --- a/osfmk/vm/vm_compressor_pager.h +++ b/osfmk/vm/vm_compressor_pager.h @@ -40,18 +40,20 @@ extern kern_return_t vm_compressor_pager_put( memory_object_offset_t offset, ppnum_t ppnum, void **current_chead, - char *scratch_buf); + char *scratch_buf, + int *compressed_count_delta_p); extern kern_return_t vm_compressor_pager_get( memory_object_t mem_obj, memory_object_offset_t offset, ppnum_t ppnum, int *my_fault_type, - int flags); + int flags, + int *compressed_count_delta_p); #define C_DONT_BLOCK 0x01 #define C_KEEP 0x02 -extern void vm_compressor_pager_state_clr( +extern unsigned int vm_compressor_pager_state_clr( memory_object_t mem_obj, memory_object_offset_t offset); extern vm_external_state_t vm_compressor_pager_state_get( @@ -59,7 +61,8 @@ extern vm_external_state_t vm_compressor_pager_state_get( memory_object_offset_t offset); #define VM_COMPRESSOR_PAGER_STATE_GET(object, offset) \ - (((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && \ + (((COMPRESSED_PAGER_IS_ACTIVE || \ + DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && \ (object)->internal && \ (object)->pager != NULL && \ !(object)->terminating && \ @@ -68,23 +71,58 @@ extern vm_external_state_t vm_compressor_pager_state_get( (offset) + (object)->paging_offset) \ : VM_EXTERNAL_STATE_UNKNOWN) -#define VM_COMPRESSOR_PAGER_STATE_CLR(object, offset) \ - MACRO_BEGIN \ - if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && \ - (object)->internal && \ - (object)->pager != NULL && \ - !(object)->terminating && \ - (object)->alive) { \ - vm_compressor_pager_state_clr( \ - (object)->pager, \ - (offset) + (object)->paging_offset); \ - } \ +#define VM_COMPRESSOR_PAGER_STATE_CLR(object, offset) \ + MACRO_BEGIN \ + if ((COMPRESSED_PAGER_IS_ACTIVE || \ + DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && \ + (object)->internal && \ + (object)->pager != NULL && \ + !(object)->terminating && \ + (object)->alive) { \ + int _num_pages_cleared; \ + _num_pages_cleared = \ + vm_compressor_pager_state_clr( \ + (object)->pager, \ + (offset) + (object)->paging_offset); \ + if (_num_pages_cleared) { \ + vm_compressor_pager_count((object)->pager, \ + -_num_pages_cleared, \ + FALSE, /* shared */ \ + (object)); \ + } \ + if (_num_pages_cleared && \ + (object)->purgable != VM_PURGABLE_DENY && \ + (object)->vo_purgeable_owner != NULL) { \ + /* less compressed purgeable pages */ \ + assert(_num_pages_cleared == 1); \ + vm_purgeable_compressed_update( \ + (object), \ + -_num_pages_cleared); \ + } \ + } \ MACRO_END +extern void vm_compressor_pager_transfer( + memory_object_t dst_mem_obj, + memory_object_offset_t dst_offset, + memory_object_t src_mem_obj, + memory_object_offset_t src_offset); +extern memory_object_offset_t vm_compressor_pager_next_compressed( + memory_object_t mem_obj, + memory_object_offset_t offset); + extern void vm_compressor_init(void); extern int vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_buf); extern int vm_compressor_get(ppnum_t pn, int *slot, int flags); -extern void vm_compressor_free(int *slot); +extern int vm_compressor_free(int *slot, int flags); +extern unsigned int vm_compressor_pager_reap_pages(memory_object_t mem_obj, int flags); +extern unsigned int vm_compressor_pager_get_slots_occupied(memory_object_t mem_obj); +extern unsigned int vm_compressor_pager_get_count(memory_object_t mem_obj); +extern void vm_compressor_pager_count(memory_object_t mem_obj, + int compressed_count_delta, + boolean_t shared_lock, + vm_object_t object); +extern void vm_compressor_transfer(int *dst_slot_p, int *src_slot_p); #endif /* _VM_VM_COMPRESSOR_PAGER_H_ */ diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c index 207a987ec..e39ebf9b1 100644 --- a/osfmk/vm/vm_fault.c +++ b/osfmk/vm/vm_fault.c @@ -111,7 +111,7 @@ #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */ -int vm_object_pagein_throttle = 16; +unsigned int vm_object_pagein_throttle = 16; /* * We apply a hard throttle to the demand zero rate of tasks that we believe are running out of control which @@ -133,11 +133,9 @@ uint64_t vm_hard_throttle_threshold; -#define NEED_TO_HARD_THROTTLE_THIS_TASK() ((current_task() != kernel_task && \ - get_task_resident_size(current_task()) > (((AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE) / 5)) && \ - (vm_low_on_space() || (vm_page_free_count < vm_page_throttle_limit && \ - proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO) >= THROTTLE_LEVEL_THROTTLED ))) - +#define NEED_TO_HARD_THROTTLE_THIS_TASK() (vm_wants_task_throttled(current_task()) || \ + (vm_page_free_count < vm_page_throttle_limit && \ + proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO) >= THROTTLE_LEVEL_THROTTLED)) #define HARD_THROTTLE_DELAY 20000 /* 20000 us == 20 ms */ @@ -151,7 +149,8 @@ extern kern_return_t vm_fault_wire_fast( vm_map_offset_t va, vm_map_entry_t entry, pmap_t pmap, - vm_map_offset_t pmap_addr); + vm_map_offset_t pmap_addr, + ppnum_t *physpage_p); extern void vm_fault_continue(void); @@ -179,6 +178,8 @@ unsigned long vm_cs_query_modified = 0; unsigned long vm_cs_validated_dirtied = 0; unsigned long vm_cs_bitmap_validated = 0; +void vm_pre_fault(vm_map_offset_t); + /* * Routine: vm_fault_init * Purpose: @@ -705,6 +706,9 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill) if (no_zero_fill == TRUE) { my_fault = DBG_NZF_PAGE_FAULT; + + if (m->absent && m->busy) + return (my_fault); } else { vm_page_zero_fill(m); @@ -896,7 +900,7 @@ vm_fault_page( if (!m->active && !m->inactive && !m->throttled) { \ vm_page_lockspin_queues(); \ if (!m->active && !m->inactive && !m->throttled) { \ - if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) \ + if (COMPRESSED_PAGER_IS_ACTIVE) \ vm_page_deactivate(m); \ else \ vm_page_activate(m); \ @@ -1189,15 +1193,14 @@ vm_fault_page( m->absent = FALSE; m->busy = TRUE; } + if (fault_info->mark_zf_absent && no_zero_fill == TRUE) + m->absent = TRUE; /* * zero-fill the page and put it on * the correct paging queue */ my_fault = vm_fault_zero_page(m, no_zero_fill); - if (fault_info->mark_zf_absent && no_zero_fill == TRUE) - m->absent = TRUE; - break; } else { if (must_be_resident) @@ -1513,7 +1516,10 @@ vm_fault_page( return (VM_FAULT_RETRY); } } - if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && object->internal) { + if (object->internal && + (COMPRESSED_PAGER_IS_ACTIVE + || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)) { + int compressed_count_delta; if (m == VM_PAGE_NULL) { /* @@ -1543,11 +1549,25 @@ vm_fault_page( m->absent = TRUE; pager = object->pager; + assert(object->paging_in_progress > 0); vm_object_unlock(object); - rc = vm_compressor_pager_get(pager, offset + object->paging_offset, m->phys_page, &my_fault_type, 0); + rc = vm_compressor_pager_get( + pager, + offset + object->paging_offset, + m->phys_page, + &my_fault_type, + 0, + &compressed_count_delta); vm_object_lock(object); + assert(object->paging_in_progress > 0); + + vm_compressor_pager_count( + pager, + compressed_count_delta, + FALSE, /* shared_lock */ + object); switch (rc) { case KERN_SUCCESS: @@ -1564,8 +1584,31 @@ vm_fault_page( */ pmap_sync_page_attributes_phys( m->phys_page); - } else + } else { m->written_by_kernel = TRUE; + } + + /* + * If the object is purgeable, its + * owner's purgeable ledgers have been + * updated in vm_page_insert() but the + * page was also accounted for in a + * "compressed purgeable" ledger, so + * update that now. + */ + if ((object->purgable != + VM_PURGABLE_DENY) && + (object->vo_purgeable_owner != + NULL)) { + /* + * One less compressed + * purgeable page. + */ + vm_purgeable_compressed_update( + object, + -1); + } + break; case KERN_MEMORY_FAILURE: m->unusual = TRUE; @@ -1576,7 +1619,10 @@ vm_fault_page( assert(m->absent); break; default: - panic("?"); + panic("vm_fault_page(): unexpected " + "error %d from " + "vm_compressor_pager_get()\n", + rc); } PAGE_WAKEUP_DONE(m); @@ -1691,6 +1737,7 @@ vm_fault_page( } else data_already_requested = TRUE; + DTRACE_VM2(maj_fault, int, 1, (uint64_t *), NULL); #if TRACEFAULTPAGE dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */ #endif @@ -1825,10 +1872,11 @@ dont_look_for_page: } vm_page_insert(m, object, offset); } - my_fault = vm_fault_zero_page(m, no_zero_fill); - if (fault_info->mark_zf_absent && no_zero_fill == TRUE) m->absent = TRUE; + + my_fault = vm_fault_zero_page(m, no_zero_fill); + break; } else { @@ -1982,6 +2030,10 @@ dont_look_for_page: if (m->pmapped) pmap_disconnect(m->phys_page); + if (m->clustered) { + VM_PAGE_COUNT_AS_PAGEIN(m); + VM_PAGE_CONSUME_CLUSTERED(m); + } assert(!m->cleaning); /* @@ -2210,12 +2262,16 @@ dont_look_for_page: if (m->pmapped) pmap_disconnect(m->phys_page); + if (m->clustered) { + VM_PAGE_COUNT_AS_PAGEIN(m); + VM_PAGE_CONSUME_CLUSTERED(m); + } /* * If there's a pager, then immediately * page out this page, using the "initialize" * option. Else, we use the copy. */ - if ((!copy_object->pager_created) + if ((!copy_object->pager_ready) #if MACH_PAGEMAP || vm_external_state_get(copy_object->existence_map, copy_offset) == VM_EXTERNAL_STATE_ABSENT #endif @@ -2381,21 +2437,15 @@ done: if (m != VM_PAGE_NULL) { retval = VM_FAULT_SUCCESS; + if (my_fault == DBG_PAGEIN_FAULT) { - if (!m->object->internal || (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE)) - VM_STAT_INCR(pageins); - DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL); - DTRACE_VM2(maj_fault, int, 1, (uint64_t *), NULL); - current_task()->pageins++; + VM_PAGE_COUNT_AS_PAGEIN(m); - if (m->object->internal) { - DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL); + if (m->object->internal) my_fault = DBG_PAGEIND_FAULT; - } else { - DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL); + else my_fault = DBG_PAGEINV_FAULT; - } /* * evaluate access pattern and update state @@ -2474,6 +2524,8 @@ vm_fault_enter(vm_page_t m, boolean_t change_wiring, boolean_t no_cache, boolean_t cs_bypass, + __unused int user_tag, + int pmap_options, boolean_t *need_retry, int *type_of_fault) { @@ -2510,31 +2562,26 @@ vm_fault_enter(vm_page_t m, * access later... */ prot &= ~VM_PROT_WRITE; - } + } if (m->pmapped == FALSE) { - if ((*type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) { - /* - * found it in the cache, but this - * is the first fault-in of the page (m->pmapped == FALSE) - * so it must have come in as part of - * a cluster... account 1 pagein against it - */ - VM_STAT_INCR(pageins); - DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL); - - if (m->object->internal) { - DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL); - *type_of_fault = DBG_PAGEIND_FAULT; - } else { - DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL); - *type_of_fault = DBG_PAGEINV_FAULT; + if (m->clustered) { + if (*type_of_fault == DBG_CACHE_HIT_FAULT) { + /* + * found it in the cache, but this + * is the first fault-in of the page (m->pmapped == FALSE) + * so it must have come in as part of + * a cluster... account 1 pagein against it + */ + if (m->object->internal) + *type_of_fault = DBG_PAGEIND_FAULT; + else + *type_of_fault = DBG_PAGEINV_FAULT; + + VM_PAGE_COUNT_AS_PAGEIN(m); } - - current_task()->pageins++; + VM_PAGE_CONSUME_CLUSTERED(m); } - VM_PAGE_CONSUME_CLUSTERED(m); - } if (*type_of_fault != DBG_COW_FAULT) { @@ -2639,7 +2686,7 @@ vm_fault_enter(vm_page_t m, } if (reject_page) { - /* reject the tainted page: abort the page fault */ + /* reject the invalid page: abort the page fault */ int pid; const char *procname; task_t task; @@ -2692,6 +2739,7 @@ vm_fault_enter(vm_page_t m, } else { pathname = (char *)kalloc(__PATH_MAX * 2); if (pathname) { + pathname[0] = '\0'; pathname_len = __PATH_MAX; filename = pathname + pathname_len; filename_len = __PATH_MAX; @@ -2714,7 +2762,7 @@ vm_fault_enter(vm_page_t m, "wpmapped:%d slid:%d)\n", pid, procname, (addr64_t) vaddr, file_offset, - pathname, + (pathname ? pathname : ""), (truncated_path ? "/.../" : ""), (truncated_path ? filename : ""), cs_mtime.tv_sec, cs_mtime.tv_nsec, @@ -2737,20 +2785,47 @@ vm_fault_enter(vm_page_t m, filename = NULL; } } else { - /* proceed with the tainted page */ + /* proceed with the invalid page */ kr = KERN_SUCCESS; - /* Page might have been tainted before or not; now it - * definitively is. If the page wasn't tainted, we must - * disconnect it from all pmaps later. */ - must_disconnect = !m->cs_tainted; - m->cs_tainted = TRUE; + if (!m->cs_validated) { + /* + * This page has not been validated, so it + * must not belong to a code-signed object + * and should not be forcefully considered + * as tainted. + * We're just concerned about it here because + * we've been asked to "execute" it but that + * does not mean that it should cause other + * accesses to fail. + * This happens when a debugger sets a + * breakpoint and we then execute code in + * that page. Marking the page as "tainted" + * would cause any inspection tool ("leaks", + * "vmmap", "CrashReporter", ...) to get killed + * due to code-signing violation on that page, + * even though they're just reading it and not + * executing from it. + */ + assert(!m->object->code_signed); + } else { + /* + * Page might have been tainted before or not; + * now it definitively is. If the page wasn't + * tainted, we must disconnect it from all + * pmaps later, to force existing mappings + * through that code path for re-consideration + * of the validity of that page. + */ + must_disconnect = !m->cs_tainted; + m->cs_tainted = TRUE; + } cs_enter_tainted_accepted++; } if (kr != KERN_SUCCESS) { if (cs_debug) { printf("CODESIGNING: vm_fault_enter(0x%llx): " - "page %p obj %p off 0x%llx *** INVALID PAGE ***\n", - (long long)vaddr, m, m->object, m->offset); + "*** INVALID PAGE ***\n", + (long long)vaddr); } #if !SECURE_KERNEL if (cs_enforcement_panic) { @@ -2928,42 +3003,6 @@ MACRO_END } } } - - if ((prot & VM_PROT_EXECUTE) && - ! m->xpmapped) { - - __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); - - /* - * xpmapped is protected by the page queues lock - * so it matters not that we might only hold the - * object lock in the shared state - */ - - if (! m->xpmapped) { - - m->xpmapped = TRUE; - __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); - - if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && - m->object->internal && - m->object->pager != NULL) { - /* - * This page could have been - * uncompressed by the - * compressor pager and its - * contents might be only in - * the data cache. - * Since it's being mapped for - * "execute" for the fist time, - * make sure the icache is in - * sync. - */ - pmap_sync_page_data_phys(m->phys_page); - } - - } - } /* we're done with the page queues lock, if we ever took it */ __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); @@ -2975,17 +3014,59 @@ MACRO_END * now so those processes can take note. */ if (kr == KERN_SUCCESS) { + /* * NOTE: we may only hold the vm_object lock SHARED - * at this point, but the update of pmapped is ok - * since this is the ONLY bit updated behind the SHARED - * lock... however, we need to figure out how to do an atomic - * update on a bit field to make this less fragile... right - * now I don't know how to coerce 'C' to give me the offset info - * that's needed for an AtomicCompareAndSwap + * at this point, so we need the phys_page lock to + * properly serialize updating the pmapped and + * xpmapped bits */ - m->pmapped = TRUE; - if(vm_page_is_slideable(m)) { + if ((prot & VM_PROT_EXECUTE) && !m->xpmapped) { + + pmap_lock_phys_page(m->phys_page); + /* + * go ahead and take the opportunity + * to set 'pmapped' here so that we don't + * need to grab this lock a 2nd time + * just below + */ + m->pmapped = TRUE; + + if (!m->xpmapped) { + + m->xpmapped = TRUE; + + pmap_unlock_phys_page(m->phys_page); + + if (!m->object->internal) + OSAddAtomic(1, &vm_page_xpmapped_external_count); + + if ((COMPRESSED_PAGER_IS_ACTIVE) && + m->object->internal && + m->object->pager != NULL) { + /* + * This page could have been + * uncompressed by the + * compressor pager and its + * contents might be only in + * the data cache. + * Since it's being mapped for + * "execute" for the fist time, + * make sure the icache is in + * sync. + */ + pmap_sync_page_data_phys(m->phys_page); + } + } else + pmap_unlock_phys_page(m->phys_page); + } else { + if (m->pmapped == FALSE) { + pmap_lock_phys_page(m->phys_page); + m->pmapped = TRUE; + pmap_unlock_phys_page(m->phys_page); + } + } + if (vm_page_is_slideable(m)) { boolean_t was_busy = m->busy; vm_object_lock_assert_exclusive(m->object); @@ -3040,7 +3121,9 @@ MACRO_END * holding the object lock if we need to wait for a page in * pmap_enter() - */ PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, fault_type, 0, - wired, PMAP_OPTIONS_NOWAIT, pe_result); + wired, + pmap_options | PMAP_OPTIONS_NOWAIT, + pe_result); if(pe_result == KERN_RESOURCE_SHORTAGE) { @@ -3072,7 +3155,9 @@ MACRO_END m->busy = TRUE; vm_object_unlock(m->object); - PMAP_ENTER(pmap, vaddr, m, prot, fault_type, 0, wired); + PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, fault_type, + 0, wired, + pmap_options, pe_result); /* Take the object lock again. */ vm_object_lock(m->object); @@ -3091,6 +3176,21 @@ after_the_pmap_enter: return kr; } +void +vm_pre_fault(vm_map_offset_t vaddr) +{ + if (pmap_find_phys(current_map()->pmap, vaddr) == 0) { + + vm_fault(current_map(), /* map */ + vaddr, /* vaddr */ + VM_PROT_READ, /* fault_type */ + FALSE, /* change_wiring */ + THREAD_UNINT, /* interruptible */ + NULL, /* caller_pmap */ + 0 /* caller_pmap_addr */); + } +} + /* * Routine: vm_fault @@ -3121,6 +3221,22 @@ vm_fault( int interruptible, pmap_t caller_pmap, vm_map_offset_t caller_pmap_addr) +{ + return vm_fault_internal(map, vaddr, fault_type, change_wiring, + interruptible, caller_pmap, caller_pmap_addr, + NULL); +} + +kern_return_t +vm_fault_internal( + vm_map_t map, + vm_map_offset_t vaddr, + vm_prot_t fault_type, + boolean_t change_wiring, + int interruptible, + pmap_t caller_pmap, + vm_map_offset_t caller_pmap_addr, + ppnum_t *physpage_p) { vm_map_version_t version; /* Map version for verificiation */ boolean_t wired; /* Should mapping be wired down? */ @@ -3152,6 +3268,7 @@ vm_fault( int cur_object_lock_type; vm_object_t top_object = VM_OBJECT_NULL; int throttle_delay; + int compressed_count_delta; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, @@ -3567,7 +3684,8 @@ reclaimed_from_pageout: } } - if (VM_FAULT_NEED_CS_VALIDATION(map->pmap, m)) { + if (VM_FAULT_NEED_CS_VALIDATION(map->pmap, m) || + (physpage_p != NULL && (prot & VM_PROT_WRITE))) { upgrade_for_validation: /* * We might need to validate this page @@ -3673,6 +3791,8 @@ FastPmapEnter: change_wiring, fault_info.no_cache, fault_info.cs_bypass, + fault_info.user_tag, + fault_info.pmap_options, need_retry_ptr, &type_of_fault); } else { @@ -3685,10 +3805,23 @@ FastPmapEnter: change_wiring, fault_info.no_cache, fault_info.cs_bypass, + fault_info.user_tag, + fault_info.pmap_options, need_retry_ptr, &type_of_fault); } + if (kr == KERN_SUCCESS && + physpage_p != NULL) { + /* for vm_map_wire_and_extract() */ + *physpage_p = m->phys_page; + if (prot & VM_PROT_WRITE) { + vm_object_lock_assert_exclusive( + m->object); + m->dirty = TRUE; + } + } + if (top_object != VM_OBJECT_NULL) { /* * It's safe to drop the top object @@ -3738,7 +3871,9 @@ FastPmapEnter: * re-drive the fault which should result in vm_fault_enter * being able to successfully enter the mapping this time around */ - (void)pmap_enter_options(pmap, vaddr, 0, 0, 0, 0, 0, PMAP_OPTIONS_NOENTER, NULL); + (void)pmap_enter_options( + pmap, vaddr, 0, 0, 0, 0, 0, + PMAP_OPTIONS_NOENTER, NULL); need_retry = FALSE; goto RetryFault; @@ -3838,7 +3973,11 @@ FastPmapEnter: */ if (object->ref_count > 1 && cur_m->pmapped) pmap_disconnect(cur_m->phys_page); - + + if (cur_m->clustered) { + VM_PAGE_COUNT_AS_PAGEIN(cur_m); + VM_PAGE_CONSUME_CLUSTERED(cur_m); + } need_collapse = TRUE; if (!cur_object->internal && @@ -3987,17 +4126,94 @@ FastPmapEnter: */ break; } - if (vm_compressor_pager_get(cur_object->pager, cur_offset + cur_object->paging_offset, - m->phys_page, &my_fault_type, c_flags) != KERN_SUCCESS) { + + /* + * The object is and remains locked + * so no need to take a + * "paging_in_progress" reference. + */ + boolean_t shared_lock; + if ((object == cur_object && + object_lock_type == OBJECT_LOCK_EXCLUSIVE) || + (object != cur_object && + cur_object_lock_type == OBJECT_LOCK_EXCLUSIVE)) { + shared_lock = FALSE; + } else { + shared_lock = TRUE; + } + + kr = vm_compressor_pager_get( + cur_object->pager, + (cur_offset + + cur_object->paging_offset), + m->phys_page, + &my_fault_type, + c_flags, + &compressed_count_delta); + + vm_compressor_pager_count( + cur_object->pager, + compressed_count_delta, + shared_lock, + cur_object); + + if (kr != KERN_SUCCESS) { vm_page_release(m); break; } m->dirty = TRUE; - if (insert_cur_object) + /* + * If the object is purgeable, its + * owner's purgeable ledgers will be + * updated in vm_page_insert() but the + * page was also accounted for in a + * "compressed purgeable" ledger, so + * update that now. + */ + if (object != cur_object && + !insert_cur_object) { + /* + * We're not going to insert + * the decompressed page into + * the object it came from. + * + * We're dealing with a + * copy-on-write fault on + * "object". + * We're going to decompress + * the page directly into the + * target "object" while + * keepin the compressed + * page for "cur_object", so + * no ledger update in that + * case. + */ + } else if ((cur_object->purgable == + VM_PURGABLE_DENY) || + (cur_object->vo_purgeable_owner == + NULL)) { + /* + * "cur_object" is not purgeable + * or is not owned, so no + * purgeable ledgers to update. + */ + } else { + /* + * One less compressed + * purgeable page for + * cur_object's owner. + */ + vm_purgeable_compressed_update( + cur_object, + -1); + } + + if (insert_cur_object) { vm_page_insert(m, cur_object, cur_offset); - else + } else { vm_page_insert(m, object, offset); + } if ((m->object->wimg_bits & VM_WIMG_MASK) != VM_WIMG_USE_DEFAULT) { /* @@ -4008,6 +4224,7 @@ FastPmapEnter: */ pmap_sync_page_attributes_phys(m->phys_page); } + type_of_fault = my_fault_type; VM_STAT_INCR(decompressions); @@ -4441,6 +4658,8 @@ handle_copy_delay: change_wiring, fault_info.no_cache, fault_info.cs_bypass, + fault_info.user_tag, + fault_info.pmap_options, NULL, &type_of_fault); } else { @@ -4453,6 +4672,8 @@ handle_copy_delay: change_wiring, fault_info.no_cache, fault_info.cs_bypass, + fault_info.user_tag, + fault_info.pmap_options, NULL, &type_of_fault); } @@ -4466,6 +4687,14 @@ handle_copy_delay: vm_object_deallocate(object); goto done; } + if (physpage_p != NULL) { + /* for vm_map_wire_and_extract() */ + *physpage_p = m->phys_page; + if (prot & VM_PROT_WRITE) { + vm_object_lock_assert_exclusive(m->object); + m->dirty = TRUE; + } + } } else { vm_map_entry_t entry; @@ -4538,31 +4767,40 @@ handle_copy_delay: } if (vm_map_lookup_entry(map, laddr, &entry) && - (entry->object.vm_object != NULL) && - (entry->object.vm_object == object)) { + (entry->object.vm_object != NULL) && + (entry->object.vm_object == object)) { int superpage = (!object->pager_created && object->phys_contiguous)? VM_MEM_SUPERPAGE : 0; + + if (superpage && physpage_p) { + /* for vm_map_wire_and_extract() */ + *physpage_p = (ppnum_t) ((((vm_map_offset_t) entry->object.vm_object->vo_shadow_offset) + + entry->offset + + (laddr - entry->vme_start)) + >> PAGE_SHIFT); + } + if (caller_pmap) { /* * Set up a block mapped area */ - assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12)); + assert((uint32_t)((ldelta + hdelta) >> PAGE_SHIFT) == ((ldelta + hdelta) >> PAGE_SHIFT)); pmap_map_block(caller_pmap, (addr64_t)(caller_pmap_addr - ldelta), (ppnum_t)((((vm_map_offset_t) (entry->object.vm_object->vo_shadow_offset)) + - entry->offset + (laddr - entry->vme_start) - ldelta) >> 12), - (uint32_t)((ldelta + hdelta) >> 12), prot, + entry->offset + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT), + (uint32_t)((ldelta + hdelta) >> PAGE_SHIFT), prot, (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0); } else { /* * Set up a block mapped area */ - assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12)); + assert((uint32_t)((ldelta + hdelta) >> PAGE_SHIFT) == ((ldelta + hdelta) >> PAGE_SHIFT)); pmap_map_block(real_map->pmap, (addr64_t)(vaddr - ldelta), (ppnum_t)((((vm_map_offset_t)(entry->object.vm_object->vo_shadow_offset)) + - entry->offset + (laddr - entry->vme_start) - ldelta) >> 12), - (uint32_t)((ldelta + hdelta) >> 12), prot, + entry->offset + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT), + (uint32_t)((ldelta + hdelta) >> PAGE_SHIFT), prot, (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0); } } @@ -4618,7 +4856,8 @@ vm_fault_wire( vm_map_t map, vm_map_entry_t entry, pmap_t pmap, - vm_map_offset_t pmap_addr) + vm_map_offset_t pmap_addr, + ppnum_t *physpage_p) { register vm_map_offset_t va; @@ -4628,8 +4867,8 @@ vm_fault_wire( assert(entry->in_transition); if ((entry->object.vm_object != NULL) && - !entry->is_sub_map && - entry->object.vm_object->phys_contiguous) { + !entry->is_sub_map && + entry->object.vm_object->phys_contiguous) { return KERN_SUCCESS; } @@ -4648,14 +4887,18 @@ vm_fault_wire( */ for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) { - if ((rc = vm_fault_wire_fast( - map, va, entry, pmap, - pmap_addr + (va - entry->vme_start) - )) != KERN_SUCCESS) { - rc = vm_fault(map, va, VM_PROT_NONE, TRUE, - (pmap == kernel_pmap) ? - THREAD_UNINT : THREAD_ABORTSAFE, - pmap, pmap_addr + (va - entry->vme_start)); + rc = vm_fault_wire_fast(map, va, entry, pmap, + pmap_addr + (va - entry->vme_start), + physpage_p); + if (rc != KERN_SUCCESS) { + rc = vm_fault_internal(map, va, VM_PROT_NONE, TRUE, + ((pmap == kernel_pmap) + ? THREAD_UNINT + : THREAD_ABORTSAFE), + pmap, + (pmap_addr + + (va - entry->vme_start)), + physpage_p); DTRACE_VM2(softlock, int, 1, (uint64_t *), NULL); } @@ -4706,6 +4949,11 @@ vm_fault_unwire( fault_info.interruptible = THREAD_UNINT; fault_info.behavior = entry->behavior; fault_info.user_tag = entry->alias; + fault_info.pmap_options = 0; + if (entry->iokit_acct || + (!entry->is_sub_map && !entry->use_pmap)) { + fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT; + } fault_info.lo_offset = entry->offset; fault_info.hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; fault_info.no_cache = entry->no_cache; @@ -4857,8 +5105,9 @@ vm_fault_wire_fast( __unused vm_map_t map, vm_map_offset_t va, vm_map_entry_t entry, - pmap_t pmap, - vm_map_offset_t pmap_addr) + pmap_t pmap, + vm_map_offset_t pmap_addr, + ppnum_t *physpage_p) { vm_object_t object; vm_object_offset_t offset; @@ -4910,8 +5159,10 @@ vm_fault_wire_fast( /* * If this entry is not directly to a vm_object, bail out. */ - if (entry->is_sub_map) + if (entry->is_sub_map) { + assert(physpage_p == NULL); return(KERN_FAILURE); + } /* * Find the backing store object and offset into it. @@ -5005,6 +5256,11 @@ vm_fault_wire_fast( FALSE, FALSE, FALSE, + entry->alias, + ((entry->iokit_acct || + (!entry->is_sub_map && !entry->use_pmap)) + ? PMAP_OPTIONS_ALT_ACCT + : 0), NULL, &type_of_fault); @@ -5013,6 +5269,19 @@ done: * Unlock everything, and return */ + if (physpage_p) { + /* for vm_map_wire_and_extract() */ + if (kr == KERN_SUCCESS) { + *physpage_p = m->phys_page; + if (prot & VM_PROT_WRITE) { + vm_object_lock_assert_exclusive(m->object); + m->dirty = TRUE; + } + } else { + *physpage_p = 0; + } + } + PAGE_WAKEUP_DONE(m); UNLOCK_AND_DEALLOCATE; @@ -5134,6 +5403,7 @@ vm_fault_copy( fault_info_src.interruptible = interruptible; fault_info_src.behavior = VM_BEHAVIOR_SEQUENTIAL; fault_info_src.user_tag = 0; + fault_info_src.pmap_options = 0; fault_info_src.lo_offset = vm_object_trunc_page(src_offset); fault_info_src.hi_offset = fault_info_src.lo_offset + amount_left; fault_info_src.no_cache = FALSE; @@ -5146,6 +5416,7 @@ vm_fault_copy( fault_info_dst.interruptible = interruptible; fault_info_dst.behavior = VM_BEHAVIOR_SEQUENTIAL; fault_info_dst.user_tag = 0; + fault_info_dst.pmap_options = 0; fault_info_dst.lo_offset = vm_object_trunc_page(dst_offset); fault_info_dst.hi_offset = fault_info_dst.lo_offset + amount_left; fault_info_dst.no_cache = FALSE; @@ -5598,7 +5869,6 @@ vm_page_validate_cs_mapped( } } -extern int panic_on_cs_killed; void vm_page_validate_cs( vm_page_t page) @@ -5643,8 +5913,7 @@ vm_page_validate_cs( return; } - if (panic_on_cs_killed && - page->slid) { + if (page->slid) { panic("vm_page_validate_cs(%p): page is slid\n", page); } assert(!page->slid); diff --git a/osfmk/vm/vm_fault.h b/osfmk/vm/vm_fault.h index 1c4e0696d..65e8fd658 100644 --- a/osfmk/vm/vm_fault.h +++ b/osfmk/vm/vm_fault.h @@ -95,6 +95,8 @@ extern kern_return_t vm_fault( pmap_t pmap, vm_map_offset_t pmap_addr); +extern void vm_pre_fault(vm_map_offset_t); + #ifdef MACH_KERNEL_PRIVATE #include @@ -103,6 +105,16 @@ extern kern_return_t vm_fault( extern void vm_fault_init(void); +extern kern_return_t vm_fault_internal( + vm_map_t map, + vm_map_offset_t vaddr, + vm_prot_t fault_type, + boolean_t change_wiring, + int interruptible, + pmap_t pmap, + vm_map_offset_t pmap_addr, + ppnum_t *physpage_p); + /* * Page fault handling based on vm_object only. */ @@ -136,7 +148,8 @@ extern kern_return_t vm_fault_wire( vm_map_t map, vm_map_entry_t entry, pmap_t pmap, - vm_map_offset_t pmap_addr); + vm_map_offset_t pmap_addr, + ppnum_t *physpage_p); extern void vm_fault_unwire( vm_map_t map, @@ -165,6 +178,8 @@ extern kern_return_t vm_fault_enter( boolean_t change_wiring, boolean_t no_cache, boolean_t cs_bypass, + int user_tag, + int pmap_options, boolean_t *need_retry, int *type_of_fault); diff --git a/osfmk/vm/vm_init.c b/osfmk/vm/vm_init.c index 027e6c416..82787a83c 100644 --- a/osfmk/vm/vm_init.c +++ b/osfmk/vm/vm_init.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +97,13 @@ boolean_t zlog_ready = FALSE; vm_offset_t kmapoff_kaddr; unsigned int kmapoff_pgcnt; +static inline void +vm_mem_bootstrap_log(const char *message) +{ +// kprintf("vm_mem_bootstrap: %s\n", message); + kernel_debug_string(message); +} + /* * vm_mem_bootstrap initializes the virtual memory system. * This is done only by the first cpu up. @@ -113,27 +121,25 @@ vm_mem_bootstrap(void) * From here on, all physical memory is accounted for, * and we use only virtual addresses. */ -#define vm_mem_bootstrap_kprintf(x) /* kprintf(x) */ - - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_page_bootstrap\n")); + vm_mem_bootstrap_log("vm_page_bootstrap"); vm_page_bootstrap(&start, &end); /* * Initialize other VM packages */ - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling zone_bootstrap\n")); + vm_mem_bootstrap_log("zone_bootstrap"); zone_bootstrap(); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_object_bootstrap\n")); + vm_mem_bootstrap_log("vm_object_bootstrap"); vm_object_bootstrap(); vm_kernel_ready = TRUE; - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_map_init\n")); + vm_mem_bootstrap_log("vm_map_init"); vm_map_init(); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling kmem_init\n")); + vm_mem_bootstrap_log("kmem_init"); kmem_init(start, end); kmem_ready = TRUE; /* @@ -150,7 +156,7 @@ vm_mem_bootstrap(void) kmapoff_pgcnt * PAGE_SIZE_64, VM_FLAGS_ANYWHERE) != KERN_SUCCESS) panic("cannot vm_allocate %u kernel_map pages", kmapoff_pgcnt); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling pmap_init\n")); + vm_mem_bootstrap_log("pmap_init"); pmap_init(); kmem_alloc_ready = TRUE; @@ -173,10 +179,10 @@ vm_mem_bootstrap(void) zsize = ZONE_MAP_MAX; /* Clamp to 1.5GB max for K32 */ #endif /* !__LP64__ */ - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling kext_alloc_init\n")); + vm_mem_bootstrap_log("kext_alloc_init"); kext_alloc_init(); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling zone_init\n")); + vm_mem_bootstrap_log("zone_init"); assert((vm_size_t) zsize == zsize); zone_init((vm_size_t) zsize); /* Allocate address space for zones */ @@ -186,27 +192,27 @@ vm_mem_bootstrap(void) * page allocations (which are used for guard pages by the guard * mode zone allocator). */ - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_page_module_init\n")); + vm_mem_bootstrap_log("vm_page_module_init"); vm_page_module_init(); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling kalloc_init\n")); + vm_mem_bootstrap_log("kalloc_init"); kalloc_init(); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling vm_fault_init\n")); + vm_mem_bootstrap_log("vm_fault_init"); vm_fault_init(); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling memory_manager_default_init\n")); + vm_mem_bootstrap_log("memory_manager_default_init"); memory_manager_default_init(); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling memory_object_control_bootstrap\n")); + vm_mem_bootstrap_log("memory_object_control_bootstrap"); memory_object_control_bootstrap(); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling device_pager_bootstrap\n")); + vm_mem_bootstrap_log("device_pager_bootstrap"); device_pager_bootstrap(); vm_paging_map_init(); - vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: done\n")); + vm_mem_bootstrap_log("vm_mem_bootstrap done"); } void diff --git a/osfmk/vm/vm_kern.c b/osfmk/vm/vm_kern.c index 65e48ae7d..5dcffd740 100644 --- a/osfmk/vm/vm_kern.c +++ b/osfmk/vm/vm_kern.c @@ -66,7 +66,6 @@ #include #include #include -#include #include #include #include @@ -947,7 +946,7 @@ kmem_alloc_pageable( kern_return_t kr; #ifndef normal - map_addr = (vm_map_min(map)) + 0x1000; + map_addr = (vm_map_min(map)) + PAGE_SIZE; #else map_addr = vm_map_min(map); #endif diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index a30ff18e1..a22b763c1 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -65,6 +65,9 @@ #include #include + +#include + #include #include @@ -216,7 +219,8 @@ static kern_return_t vm_map_wire_nested( vm_prot_t access_type, boolean_t user_wire, pmap_t map_pmap, - vm_map_offset_t pmap_addr); + vm_map_offset_t pmap_addr, + ppnum_t *physpage_p); static kern_return_t vm_map_unwire_nested( vm_map_t map, @@ -315,7 +319,8 @@ boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \ (NEW)->user_wired_count = 0; \ (NEW)->permanent = FALSE; \ (NEW)->used_for_jit = FALSE; \ - (NEW)->from_reserved_zone = _vmec_reserved; \ + (NEW)->from_reserved_zone = _vmec_reserved; \ + (NEW)->iokit_acct = FALSE; \ MACRO_END #define vm_map_entry_copy_full(NEW,OLD) \ @@ -524,7 +529,8 @@ vm_map_apple_protected( start, &map_entry) || map_entry->vme_end < end || - map_entry->is_sub_map) { + map_entry->is_sub_map || + !(map_entry->protection & VM_PROT_EXECUTE)) { /* that memory is not properly mapped */ kr = KERN_INVALID_ARGUMENT; goto done; @@ -593,6 +599,7 @@ done: lck_grp_t vm_map_lck_grp; lck_grp_attr_t vm_map_lck_grp_attr; lck_attr_t vm_map_lck_attr; +lck_attr_t vm_map_lck_rw_attr; /* @@ -668,6 +675,9 @@ vm_map_init( lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr); lck_attr_setdefault(&vm_map_lck_attr); + lck_attr_setdefault(&vm_map_lck_rw_attr); + lck_attr_cleardebug(&vm_map_lck_rw_attr); + #if CONFIG_FREEZE default_freezer_init(); #endif /* CONFIG_FREEZE */ @@ -1326,7 +1336,7 @@ vm_map_find_space( new_entry->is_shared = FALSE; new_entry->is_sub_map = FALSE; - new_entry->use_pmap = FALSE; + new_entry->use_pmap = TRUE; new_entry->object.vm_object = VM_OBJECT_NULL; new_entry->offset = (vm_object_offset_t) 0; @@ -1354,6 +1364,7 @@ vm_map_find_space( new_entry->alias = 0; new_entry->zero_wired_pages = FALSE; + new_entry->iokit_acct = FALSE; VM_GET_FLAGS_ALIAS(flags, new_entry->alias); @@ -1391,7 +1402,7 @@ int vm_map_pmap_enter_enable = FALSE; * In/out conditions: * The source map should not be locked on entry. */ -static void +__unused static void vm_map_pmap_enter( vm_map_t map, register vm_map_offset_t addr, @@ -1409,6 +1420,17 @@ vm_map_pmap_enter( while (addr < end_addr) { register vm_page_t m; + + /* + * TODO: + * From vm_map_enter(), we come into this function without the map + * lock held or the object lock held. + * We haven't taken a reference on the object either. + * We should do a proper lookup on the map to make sure + * that things are sane before we go locking objects that + * could have been deallocated from under us. + */ + vm_object_lock(object); m = vm_page_lookup(object, offset); @@ -1431,7 +1453,10 @@ vm_map_pmap_enter( } type_of_fault = DBG_CACHE_HIT_FAULT; kr = vm_fault_enter(m, map->pmap, addr, protection, protection, - VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL, + VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, + 0, /* XXX need user tag / alias? */ + 0, /* alternate accounting? */ + NULL, &type_of_fault); vm_object_unlock(object); @@ -1567,6 +1592,7 @@ vm_map_enter( boolean_t map_locked = FALSE; boolean_t pmap_empty = TRUE; boolean_t new_mapping_established = FALSE; + boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0); boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0); boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0); boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0); @@ -1574,6 +1600,7 @@ vm_map_enter( boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0); boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0); boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0); + boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0); unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT); char alias; vm_map_offset_t effective_min_offset, effective_max_offset; @@ -1669,6 +1696,18 @@ vm_map_enter( */ clear_map_aligned = TRUE; } + if (!anywhere && + !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) { + /* + * We've been asked to map at a fixed address and that + * address is not aligned to the map's specific alignment. + * The caller should know what it's doing (i.e. most likely + * mapping some fragmented copy map, transferring memory from + * a VM map with a different alignment), so clear map_aligned + * for this new VM map entry and proceed. + */ + clear_map_aligned = TRUE; + } /* * Only zero-fill objects are allowed to be purgable. @@ -1806,6 +1845,7 @@ StartAgain: ; if ((end > effective_max_offset) || (end < start)) { if (map->wait_for_space) { + assert(!keep_map_locked); if (size <= (effective_max_offset - effective_min_offset)) { assert_wait((event_t)map, @@ -1878,7 +1918,8 @@ StartAgain: ; * address range, saving them in our "zap_old_map". */ (void) vm_map_delete(map, start, end, - VM_MAP_REMOVE_SAVE_ENTRIES, + (VM_MAP_REMOVE_SAVE_ENTRIES | + VM_MAP_REMOVE_NO_MAP_ALIGN), zap_old_map); } @@ -1914,6 +1955,7 @@ StartAgain: ; entry->protection != cur_protection || entry->max_protection != max_protection || entry->inheritance != inheritance || + entry->iokit_acct != iokit_acct || entry->alias != alias) { /* not the same mapping ! */ RETURN(KERN_NO_SPACE); @@ -1992,8 +2034,30 @@ StartAgain: ; if (object == VM_OBJECT_NULL) { object = vm_object_allocate(size); object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + object->true_share = TRUE; if (purgable) { + task_t owner; object->purgable = VM_PURGABLE_NONVOLATILE; + if (map->pmap == kernel_pmap) { + /* + * Purgeable mappings made in a kernel + * map are "owned" by the kernel itself + * rather than the current user task + * because they're likely to be used by + * more than this user task (see + * execargs_purgeable_allocate(), for + * example). + */ + owner = kernel_task; + } else { + owner = current_task(); + } + assert(object->vo_purgeable_owner == NULL); + assert(object->resident_page_count == 0); + assert(object->wired_page_count == 0); + vm_object_lock(object); + vm_purgeable_nonvolatile_enqueue(object, owner); + vm_object_unlock(object); } offset = (vm_object_offset_t)0; } @@ -2003,22 +2067,30 @@ StartAgain: ; (entry->vme_end == start) && (!entry->is_shared) && (!entry->is_sub_map) && - ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) && - (entry->inheritance == inheritance) && + (!entry->in_transition) && + (!entry->needs_wakeup) && + (entry->behavior == VM_BEHAVIOR_DEFAULT) && (entry->protection == cur_protection) && (entry->max_protection == max_protection) && - (entry->behavior == VM_BEHAVIOR_DEFAULT) && - (entry->in_transition == 0) && + (entry->inheritance == inheritance) && + ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) && (entry->no_cache == no_cache) && + (entry->permanent == permanent) && + (!entry->superpage_size && !superpage_size) && /* * No coalescing if not map-aligned, to avoid propagating * that condition any further than needed: */ (!entry->map_aligned || !clear_map_aligned) && + (!entry->zero_wired_pages) && + (!entry->used_for_jit && !entry_for_jit) && + (entry->iokit_acct == iokit_acct) && + ((entry->vme_end - entry->vme_start) + size <= (alias == VM_MEMORY_REALLOC ? ANON_CHUNK_SIZE : NO_COALESCE_LIMIT)) && + (entry->wired_count == 0)) { /* implies user_wired_count == 0 */ if (vm_object_coalesce(entry->object.vm_object, VM_OBJECT_NULL, @@ -2038,6 +2110,7 @@ StartAgain: ; VM_MAP_PAGE_MASK(map))); entry->vme_end = end; vm_map_store_update_first_free(map, map->first_free); + new_mapping_established = TRUE; RETURN(KERN_SUCCESS); } } @@ -2080,7 +2153,8 @@ StartAgain: ; 0, no_cache, permanent, superpage_size, - clear_map_aligned); + clear_map_aligned, + is_submap); new_entry->alias = alias; if (entry_for_jit){ if (!(map->jit_entry_exists)){ @@ -2089,16 +2163,46 @@ StartAgain: ; } } + assert(!new_entry->iokit_acct); + if (!is_submap && + object != VM_OBJECT_NULL && + object->purgable != VM_PURGABLE_DENY) { + assert(new_entry->use_pmap); + assert(!new_entry->iokit_acct); + /* + * Turn off pmap accounting since + * purgeable objects have their + * own ledgers. + */ + new_entry->use_pmap = FALSE; + } else if (!is_submap && + iokit_acct) { + /* alternate accounting */ + assert(!new_entry->iokit_acct); + assert(new_entry->use_pmap); + new_entry->iokit_acct = TRUE; + new_entry->use_pmap = FALSE; + vm_map_iokit_mapped_region( + map, + (new_entry->vme_end - + new_entry->vme_start)); + } else if (!is_submap) { + assert(!new_entry->iokit_acct); + assert(new_entry->use_pmap); + } + if (is_submap) { vm_map_t submap; boolean_t submap_is_64bit; boolean_t use_pmap; - new_entry->is_sub_map = TRUE; + assert(new_entry->is_sub_map); + assert(!new_entry->use_pmap); + assert(!new_entry->iokit_acct); submap = (vm_map_t) object; submap_is_64bit = vm_map_is_64bit(submap); use_pmap = (alias == VM_MEMORY_SHARED_PMAP); - #ifndef NO_NESTED_PMAP +#ifndef NO_NESTED_PMAP if (use_pmap && submap->pmap == NULL) { ledger_t ledger = map->pmap->ledger; /* we need a sub pmap to nest... */ @@ -2127,7 +2231,7 @@ StartAgain: ; pmap_empty = FALSE; } } - #endif /* NO_NESTED_PMAP */ +#endif /* NO_NESTED_PMAP */ } entry = new_entry; @@ -2149,6 +2253,7 @@ StartAgain: ; sp_object->phys_contiguous = TRUE; sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE; entry->object.vm_object = sp_object; + assert(entry->use_pmap); /* enter the base pages into the object */ vm_object_lock(sp_object); @@ -2167,46 +2272,23 @@ StartAgain: ; tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end)); } - vm_map_unlock(map); - map_locked = FALSE; - new_mapping_established = TRUE; - /* Wire down the new entry if the user - * requested all new map entries be wired. - */ - if ((map->wiring_required)||(superpage_size)) { - pmap_empty = FALSE; /* pmap won't be empty */ - kr = vm_map_wire(map, start, end, - new_entry->protection, TRUE); - RETURN(kr); - } - - if ((object != VM_OBJECT_NULL) && - (vm_map_pmap_enter_enable) && - (!anywhere) && - (!needs_copy) && - (size < (128*1024))) { - pmap_empty = FALSE; /* pmap won't be empty */ - - if (override_nx(map, alias) && cur_protection) - cur_protection |= VM_PROT_EXECUTE; +BailOut: + assert(map_locked == TRUE); - vm_map_pmap_enter(map, start, end, - object, offset, cur_protection); - } - -BailOut: ; if (result == KERN_SUCCESS) { vm_prot_t pager_prot; memory_object_t pager; +#if DEBUG if (pmap_empty && !(flags & VM_FLAGS_NO_PMAP_CHECK)) { assert(vm_map_pmap_is_empty(map, *address, *address+size)); } +#endif /* DEBUG */ /* * For "named" VM objects, let the pager know that the @@ -2246,7 +2328,35 @@ BailOut: ; } vm_object_unlock(object); } - } else { + } + + assert(map_locked == TRUE); + + if (!keep_map_locked) { + vm_map_unlock(map); + map_locked = FALSE; + } + + /* + * We can't hold the map lock if we enter this block. + */ + + if (result == KERN_SUCCESS) { + + /* Wire down the new entry if the user + * requested all new map entries be wired. + */ + if ((map->wiring_required)||(superpage_size)) { + assert(!keep_map_locked); + pmap_empty = FALSE; /* pmap won't be empty */ + kr = vm_map_wire(map, start, end, + new_entry->protection, TRUE); + result = kr; + } + + } + + if (result != KERN_SUCCESS) { if (new_mapping_established) { /* * We have to get rid of the new mappings since we @@ -2265,7 +2375,8 @@ BailOut: ; map_locked = TRUE; } (void) vm_map_delete(map, *address, *address+size, - VM_MAP_REMOVE_SAVE_ENTRIES, + (VM_MAP_REMOVE_SAVE_ENTRIES | + VM_MAP_REMOVE_NO_MAP_ALIGN), zap_new_map); } if (zap_old_map != VM_MAP_NULL && @@ -2324,7 +2435,11 @@ BailOut: ; } } - if (map_locked) { + /* + * The caller is responsible for releasing the lock if it requested to + * keep the map locked. + */ + if (map_locked && !keep_map_locked) { vm_map_unlock(map); } @@ -2346,8 +2461,14 @@ BailOut: ; #undef RETURN } -kern_return_t -vm_map_enter_mem_object( +/* + * Counters for the prefault optimization. + */ +int64_t vm_prefault_nb_pages = 0; +int64_t vm_prefault_nb_bailout = 0; + +static kern_return_t +vm_map_enter_mem_object_helper( vm_map_t target_map, vm_map_offset_t *address, vm_map_size_t initial_size, @@ -2358,7 +2479,9 @@ vm_map_enter_mem_object( boolean_t copy, vm_prot_t cur_protection, vm_prot_t max_protection, - vm_inherit_t inheritance) + vm_inherit_t inheritance, + upl_page_list_ptr_t page_list, + unsigned int page_list_count) { vm_map_address_t map_addr; vm_map_size_t map_size; @@ -2366,6 +2489,7 @@ vm_map_enter_mem_object( vm_object_size_t size; kern_return_t result; boolean_t mask_cur_protection, mask_max_protection; + boolean_t try_prefault = (page_list_count != 0); vm_map_offset_t offset_in_mapping; mask_cur_protection = cur_protection & VM_PROT_IS_MASK; @@ -2380,6 +2504,7 @@ vm_map_enter_mem_object( (cur_protection & ~VM_PROT_ALL) || (max_protection & ~VM_PROT_ALL) || (inheritance > VM_INHERIT_LAST_VALID) || + (try_prefault && (copy || !page_list)) || initial_size == 0) return KERN_INVALID_ARGUMENT; @@ -2442,8 +2567,15 @@ vm_map_enter_mem_object( VM_MAP_PAGE_MASK(target_map)); } - if (offset != 0 || - size != named_entry->size) { + if (!(flags & VM_FLAGS_ANYWHERE) && + (offset != 0 || + size != named_entry->size)) { + /* + * XXX for a mapping at a "fixed" address, + * we can't trim after mapping the whole + * memory entry, so reject a request for a + * partial mapping. + */ return KERN_INVALID_ARGUMENT; } } @@ -2575,6 +2707,22 @@ vm_map_enter_mem_object( if (object->wimg_bits != wimg_mode) vm_object_change_wimg_mode(object, wimg_mode); +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); + } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) @@ -2616,7 +2764,8 @@ vm_map_enter_mem_object( /* reserve a contiguous range */ kr = vm_map_enter(target_map, &map_addr, - map_size, + /* map whole mem entry, trim later: */ + named_entry->size, mask, flags & (VM_FLAGS_ANYWHERE | VM_FLAGS_OVERWRITE | @@ -2648,8 +2797,9 @@ vm_map_enter_mem_object( copy_entry->vme_start); /* sanity check */ - if (copy_addr + copy_size > - map_addr + map_size) { + if ((copy_addr + copy_size) > + (map_addr + + named_entry->size /* XXX full size */ )) { /* over-mapping too much !? */ kr = KERN_INVALID_ARGUMENT; /* abort */ @@ -2707,6 +2857,30 @@ vm_map_enter_mem_object( } else { *address = map_addr; } + + if (offset) { + /* + * Trim in front, from 0 to "offset". + */ + vm_map_remove(target_map, + map_addr, + map_addr + offset, + 0); + *address += offset; + } + if (offset + map_size < named_entry->size) { + /* + * Trim in back, from + * "offset + map_size" to + * "named_entry->size". + */ + vm_map_remove(target_map, + (map_addr + + offset + map_size), + (map_addr + + named_entry->size), + 0); + } } named_entry_unlock(named_entry); @@ -2874,6 +3048,12 @@ vm_map_enter_mem_object( offset = new_offset; } + /* + * If users want to try to prefault pages, the mapping and prefault + * needs to be atomic. + */ + if (try_prefault) + flags |= VM_FLAGS_KEEP_MAP_LOCKED; result = vm_map_enter(target_map, &map_addr, map_size, (vm_map_offset_t)mask, @@ -2884,6 +3064,43 @@ vm_map_enter_mem_object( if (result != KERN_SUCCESS) vm_object_deallocate(object); + /* + * Try to prefault, and do not forget to release the vm map lock. + */ + if (result == KERN_SUCCESS && try_prefault) { + mach_vm_address_t va = map_addr; + kern_return_t kr = KERN_SUCCESS; + unsigned int i = 0; + + for (i = 0; i < page_list_count; ++i) { + if (UPL_VALID_PAGE(page_list, i)) { + /* + * If this function call failed, we should stop + * trying to optimize, other calls are likely + * going to fail too. + * + * We are not gonna report an error for such + * failure though. That's an optimization, not + * something critical. + */ + kr = pmap_enter_options(target_map->pmap, + va, UPL_PHYS_PAGE(page_list, i), + cur_protection, VM_PROT_NONE, + 0, TRUE, PMAP_OPTIONS_NOWAIT, NULL); + if (kr != KERN_SUCCESS) { + OSIncrementAtomic64(&vm_prefault_nb_bailout); + goto BailOut; + } + OSIncrementAtomic64(&vm_prefault_nb_pages); + } + + /* Next virtual address */ + va += PAGE_SIZE; + } +BailOut: + vm_map_unlock(target_map); + } + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { *address = map_addr + offset_in_mapping; } else { @@ -2892,7 +3109,43 @@ vm_map_enter_mem_object( return result; } +kern_return_t +vm_map_enter_mem_object( + vm_map_t target_map, + vm_map_offset_t *address, + vm_map_size_t initial_size, + vm_map_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags, + port, offset, copy, cur_protection, max_protection, + inheritance, NULL, 0); +} +kern_return_t +vm_map_enter_mem_object_prefault( + vm_map_t target_map, + vm_map_offset_t *address, + vm_map_size_t initial_size, + vm_map_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + vm_prot_t cur_protection, + vm_prot_t max_protection, + upl_page_list_ptr_t page_list, + unsigned int page_list_count) +{ + return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags, + port, offset, FALSE, cur_protection, max_protection, + VM_INHERIT_DEFAULT, page_list, page_list_count); +} kern_return_t @@ -3214,7 +3467,7 @@ vm_map_enter_cpm( type_of_fault = DBG_ZERO_FILL_FAULT; vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE, - VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL, + VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL, &type_of_fault); vm_object_unlock(cpm_obj); @@ -3299,6 +3552,7 @@ vm_map_clip_unnest( assert(entry->is_sub_map); assert(entry->object.sub_map != NULL); + assert(entry->use_pmap); /* * Query the platform for the optimal unnest range. @@ -3365,7 +3619,8 @@ vm_map_clip_start( vm_map_offset_t startaddr) { #ifndef NO_NESTED_PMAP - if (entry->use_pmap && + if (entry->is_sub_map && + entry->use_pmap && startaddr >= entry->vme_start) { vm_map_offset_t start_unnest, end_unnest; @@ -3421,17 +3676,18 @@ _vm_map_clip_start( * address. */ + if (entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(start, + VM_MAP_HDR_PAGE_MASK(map_header))); + } + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy_full(new_entry, entry); - assert(VM_MAP_PAGE_ALIGNED(start, - VM_MAP_HDR_PAGE_MASK(map_header))); new_entry->vme_end = start; assert(new_entry->vme_start < new_entry->vme_end); entry->offset += (start - entry->vme_start); assert(start < entry->vme_end); - assert(VM_MAP_PAGE_ALIGNED(start, - VM_MAP_HDR_PAGE_MASK(map_header))); entry->vme_start = start; _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry); @@ -3464,7 +3720,7 @@ vm_map_clip_end( endaddr = entry->vme_end; } #ifndef NO_NESTED_PMAP - if (entry->use_pmap) { + if (entry->is_sub_map && entry->use_pmap) { vm_map_offset_t start_unnest, end_unnest; /* @@ -3518,12 +3774,15 @@ _vm_map_clip_end( * AFTER the specified entry */ + if (entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(end, + VM_MAP_HDR_PAGE_MASK(map_header))); + } + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy_full(new_entry, entry); assert(entry->vme_start < end); - assert(VM_MAP_PAGE_ALIGNED(end, - VM_MAP_HDR_PAGE_MASK(map_header))); new_entry->vme_start = entry->vme_end = end; new_entry->offset += (end - entry->vme_start); assert(new_entry->vme_start < new_entry->vme_end); @@ -3634,15 +3893,15 @@ vm_map_range_check( */ kern_return_t vm_map_submap( - vm_map_t map, + vm_map_t map, vm_map_offset_t start, vm_map_offset_t end, - vm_map_t submap, + vm_map_t submap, vm_map_offset_t offset, #ifdef NO_NESTED_PMAP __unused #endif /* NO_NESTED_PMAP */ - boolean_t use_pmap) + boolean_t use_pmap) { vm_map_entry_t entry; register kern_return_t result = KERN_INVALID_ARGUMENT; @@ -3660,7 +3919,6 @@ vm_map_submap( return KERN_INVALID_ARGUMENT; } - assert(!entry->use_pmap); /* we don't want to unnest anything here */ vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); @@ -3675,6 +3933,7 @@ vm_map_submap( entry->object.vm_object = VM_OBJECT_NULL; vm_object_deallocate(object); entry->is_sub_map = TRUE; + entry->use_pmap = FALSE; entry->object.sub_map = submap; vm_map_reference(submap); if (submap->mapped_in_other_pmaps == FALSE && @@ -3837,7 +4096,10 @@ vm_map_protect( vm_map_clip_end(map, current, end); - assert(!current->use_pmap); /* clipping did unnest if needed */ + if (current->is_sub_map) { + /* clipping did unnest if needed */ + assert(!current->use_pmap); + } old_prot = current->protection; @@ -3851,6 +4113,7 @@ vm_map_protect( if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){ current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start)); current->offset = 0; + assert(current->use_pmap); } current->needs_copy = TRUE; current->max_protection |= VM_PROT_WRITE; @@ -3962,7 +4225,10 @@ vm_map_inherit( while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { vm_map_clip_end(map, entry, end); - assert(!entry->use_pmap); /* clip did unnest if needed */ + if (entry->is_sub_map) { + /* clip did unnest if needed */ + assert(!entry->use_pmap); + } entry->inheritance = new_inheritance; @@ -4106,7 +4372,8 @@ vm_map_wire_nested( register vm_prot_t access_type, boolean_t user_wire, pmap_t map_pmap, - vm_map_offset_t pmap_addr) + vm_map_offset_t pmap_addr, + ppnum_t *physpage_p) { register vm_map_entry_t entry; struct vm_map_entry *first_entry, tmp_entry; @@ -4119,6 +4386,21 @@ vm_map_wire_nested( thread_t cur_thread; unsigned int last_timestamp; vm_map_size_t size; + boolean_t wire_and_extract; + + wire_and_extract = FALSE; + if (physpage_p != NULL) { + /* + * The caller wants the physical page number of the + * wired page. We return only one physical page number + * so this works for only one page at a time. + */ + if ((end - start) != PAGE_SIZE) { + return KERN_INVALID_ARGUMENT; + } + wire_and_extract = TRUE; + *physpage_p = 0; + } vm_map_lock(map); if(map_pmap == NULL) @@ -4240,6 +4522,17 @@ vm_map_wire_nested( vm_map_offset_t local_end; pmap_t pmap; + if (wire_and_extract) { + /* + * Wiring would result in copy-on-write + * which would not be compatible with + * the sharing we have with the original + * provider of this memory. + */ + rc = KERN_INVALID_ARGUMENT; + goto done; + } + vm_map_clip_start(map, entry, s); vm_map_clip_end(map, entry, end); @@ -4352,7 +4645,8 @@ vm_map_wire_nested( rc = vm_map_wire_nested(entry->object.sub_map, sub_start, sub_end, access_type, - user_wire, pmap, pmap_addr); + user_wire, pmap, pmap_addr, + NULL); vm_map_lock(map); /* @@ -4398,6 +4692,24 @@ vm_map_wire_nested( * the appropriate wire reference count. */ if (entry->wired_count) { + + if ((entry->protection & access_type) != access_type) { + /* found a protection problem */ + + /* + * XXX FBDP + * We should always return an error + * in this case but since we didn't + * enforce it before, let's do + * it only for the new "wire_and_extract" + * code path for now... + */ + if (wire_and_extract) { + rc = KERN_PROTECTION_FAILURE; + goto done; + } + } + /* * entry is already wired down, get our reference * after clipping to our range. @@ -4408,6 +4720,56 @@ vm_map_wire_nested( if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) goto done; + if (wire_and_extract) { + vm_object_t object; + vm_object_offset_t offset; + vm_page_t m; + + /* + * We don't have to "wire" the page again + * bit we still have to "extract" its + * physical page number, after some sanity + * checks. + */ + assert((entry->vme_end - entry->vme_start) + == PAGE_SIZE); + assert(!entry->needs_copy); + assert(!entry->is_sub_map); + assert(entry->object.vm_object); + if (((entry->vme_end - entry->vme_start) + != PAGE_SIZE) || + entry->needs_copy || + entry->is_sub_map || + entry->object.vm_object == VM_OBJECT_NULL) { + rc = KERN_INVALID_ARGUMENT; + goto done; + } + + object = entry->object.vm_object; + offset = entry->offset; + /* need exclusive lock to update m->dirty */ + if (entry->protection & VM_PROT_WRITE) { + vm_object_lock(object); + } else { + vm_object_lock_shared(object); + } + m = vm_page_lookup(object, offset); + assert(m != VM_PAGE_NULL); + assert(m->wire_count); + if (m != VM_PAGE_NULL && m->wire_count) { + *physpage_p = m->phys_page; + if (entry->protection & VM_PROT_WRITE) { + vm_object_lock_assert_exclusive( + m->object); + m->dirty = TRUE; + } + } else { + /* not already wired !? */ + *physpage_p = 0; + } + vm_object_unlock(object); + } + /* map was not unlocked: no need to relookup */ entry = entry->vme_next; s = entry->vme_start; @@ -4432,12 +4794,30 @@ vm_map_wire_nested( * This is aggressive, but once it's wired we can't move it. */ if (entry->needs_copy) { + if (wire_and_extract) { + /* + * We're supposed to share with the original + * provider so should not be "needs_copy" + */ + rc = KERN_INVALID_ARGUMENT; + goto done; + } + vm_object_shadow(&entry->object.vm_object, &entry->offset, size); entry->needs_copy = FALSE; } else if (entry->object.vm_object == VM_OBJECT_NULL) { + if (wire_and_extract) { + /* + * We're supposed to share with the original + * provider so should already have an object. + */ + rc = KERN_INVALID_ARGUMENT; + goto done; + } entry->object.vm_object = vm_object_allocate(size); entry->offset = (vm_object_offset_t)0; + assert(entry->use_pmap); } vm_map_clip_start(map, entry, s); @@ -4500,11 +4880,13 @@ vm_map_wire_nested( if(map_pmap) rc = vm_fault_wire(map, - &tmp_entry, map_pmap, pmap_addr); + &tmp_entry, map_pmap, pmap_addr, + physpage_p); else rc = vm_fault_wire(map, &tmp_entry, map->pmap, - tmp_entry.vme_start); + tmp_entry.vme_start, + physpage_p); if (!user_wire && cur_thread != THREAD_NULL) thread_interrupt_level(interruptible_state); @@ -4563,6 +4945,9 @@ done: if (rc != KERN_SUCCESS) { /* undo what has been wired so far */ vm_map_unwire(map, start, s, user_wire); + if (physpage_p) { + *physpage_p = 0; + } } return rc; @@ -4581,7 +4966,33 @@ vm_map_wire( kern_return_t kret; kret = vm_map_wire_nested(map, start, end, access_type, - user_wire, (pmap_t)NULL, 0); + user_wire, (pmap_t)NULL, 0, NULL); + return kret; +} + +kern_return_t +vm_map_wire_and_extract( + vm_map_t map, + vm_map_offset_t start, + vm_prot_t access_type, + boolean_t user_wire, + ppnum_t *physpage_p) +{ + + kern_return_t kret; + + kret = vm_map_wire_nested(map, + start, + start+VM_MAP_PAGE_SIZE(map), + access_type, + user_wire, + (pmap_t)NULL, + 0, + physpage_p); + if (kret != KERN_SUCCESS && + physpage_p != NULL) { + *physpage_p = 0; + } return kret; } @@ -5136,6 +5547,18 @@ vm_map_delete( */ if (vm_map_lookup_entry(map, start, &first_entry)) { entry = first_entry; + if (map == kalloc_map && + (entry->vme_start != start || + entry->vme_end != end)) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "mismatched entry %p [0x%llx:0x%llx]\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)entry->vme_start, + (uint64_t)entry->vme_end); + } if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start); start = SUPERPAGE_ROUND_DOWN(start); continue; @@ -5146,6 +5569,27 @@ vm_map_delete( * any unnecessary unnesting in this case... */ } else { + if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) && + entry->map_aligned && + !VM_MAP_PAGE_ALIGNED( + start, + VM_MAP_PAGE_MASK(map))) { + /* + * The entry will no longer be + * map-aligned after clipping + * and the caller said it's OK. + */ + entry->map_aligned = FALSE; + } + if (map == kalloc_map) { + panic("vm_map_delete(%p,0x%llx,0x%llx):" + " clipping %p at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)start); + } vm_map_clip_start(map, entry, start); } @@ -5155,6 +5599,15 @@ vm_map_delete( */ SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } else { + if (map->pmap == kernel_pmap && + map->ref_count != 0) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "no map entry at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + (uint64_t)start); + } entry = first_entry->vme_next; } break; @@ -5191,6 +5644,25 @@ vm_map_delete( * vm_map_simplify_entry(). We need to * re-clip its start. */ + if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) && + entry->map_aligned && + !VM_MAP_PAGE_ALIGNED(s, + VM_MAP_PAGE_MASK(map))) { + /* + * The entry will no longer be map-aligned + * after clipping and the caller said it's OK. + */ + entry->map_aligned = FALSE; + } + if (map == kalloc_map) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "clipping %p at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)s); + } vm_map_clip_start(map, entry, s); } if (entry->vme_end <= end) { @@ -5199,6 +5671,25 @@ vm_map_delete( * to clip and possibly cause an unnecessary unnesting. */ } else { + if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) && + entry->map_aligned && + !VM_MAP_PAGE_ALIGNED(end, + VM_MAP_PAGE_MASK(map))) { + /* + * The entry will no longer be map-aligned + * after clipping and the caller said it's OK. + */ + entry->map_aligned = FALSE; + } + if (map == kalloc_map) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "clipping %p at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)end); + } vm_map_clip_end(map, entry, end); } @@ -5236,7 +5727,6 @@ vm_map_delete( * We do not clear the needs_wakeup flag, * since we cannot tell if we were the only one. */ - vm_map_unlock(map); return KERN_ABORTED; } @@ -5305,7 +5795,6 @@ vm_map_delete( * cannot tell if we were the * only one. */ - vm_map_unlock(map); return KERN_ABORTED; } @@ -5496,15 +5985,40 @@ vm_map_delete( } } + if (entry->iokit_acct) { + /* alternate accounting */ + vm_map_iokit_unmapped_region(map, + (entry->vme_end - + entry->vme_start)); + entry->iokit_acct = FALSE; + } + /* * All pmap mappings for this map entry must have been * cleared by now. */ +#if DEBUG assert(vm_map_pmap_is_empty(map, entry->vme_start, entry->vme_end)); +#endif /* DEBUG */ next = entry->vme_next; + + if (map->pmap == kernel_pmap && + map->ref_count != 0 && + entry->vme_end < end && + (next == vm_map_to_entry(map) || + next->vme_start != entry->vme_end)) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "hole after %p at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)entry->vme_end); + } + s = next->vme_start; last_timestamp = map->timestamp; @@ -5746,7 +6260,10 @@ start_pass_1: tmp_entry, vm_map_trunc_page(dst_addr, VM_MAP_PAGE_MASK(dst_map))); - assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */ + if (tmp_entry->is_sub_map) { + /* clipping did unnest if needed */ + assert(!tmp_entry->use_pmap); + } for (entry = tmp_entry;;) { vm_map_entry_t next; @@ -5969,8 +6486,7 @@ vm_map_copy_overwrite_nested( !VM_MAP_PAGE_ALIGNED(copy->offset, VM_MAP_PAGE_MASK(dst_map)) || !VM_MAP_PAGE_ALIGNED(dst_addr, - VM_MAP_PAGE_MASK(dst_map)) || - dst_map->hdr.page_shift != copy->cpy_hdr.page_shift) + VM_MAP_PAGE_MASK(dst_map))) { aligned = FALSE; dst_end = vm_map_round_page(dst_addr + copy->size, @@ -6888,6 +7404,7 @@ vm_map_copy_overwrite_unaligned( entry->vme_end - entry->vme_start); entry->object.vm_object = dst_object; entry->offset = 0; + assert(entry->use_pmap); vm_map_lock_write_to_read(dst_map); } /* @@ -7036,7 +7553,10 @@ vm_map_copy_overwrite_aligned( copy_size = (copy_entry->vme_end - copy_entry->vme_start); entry = tmp_entry; - assert(!entry->use_pmap); /* unnested when clipped earlier */ + if (entry->is_sub_map) { + /* unnested when clipped earlier */ + assert(!entry->use_pmap); + } if (entry == vm_map_to_entry(dst_map)) { vm_map_unlock(dst_map); return KERN_INVALID_ADDRESS; @@ -7070,6 +7590,12 @@ vm_map_copy_overwrite_aligned( */ if (copy_size < size) { + if (entry->map_aligned && + !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size, + VM_MAP_PAGE_MASK(dst_map))) { + /* no longer map-aligned */ + entry->map_aligned = FALSE; + } vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size); size = copy_size; } @@ -7319,6 +7845,7 @@ vm_map_copy_overwrite_aligned( dst_offset = 0; entry->object.vm_object = dst_object; entry->offset = dst_offset; + assert(entry->use_pmap); } @@ -7382,6 +7909,13 @@ vm_map_copy_overwrite_aligned( copy_size != 0) { /* We can safely use saved tmp_entry value */ + if (tmp_entry->map_aligned && + !VM_MAP_PAGE_ALIGNED( + start, + VM_MAP_PAGE_MASK(dst_map))) { + /* no longer map-aligned */ + tmp_entry->map_aligned = FALSE; + } vm_map_clip_end(dst_map, tmp_entry, start); tmp_entry = tmp_entry->vme_next; } else { @@ -7391,6 +7925,13 @@ vm_map_copy_overwrite_aligned( vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } + if (tmp_entry->map_aligned && + !VM_MAP_PAGE_ALIGNED( + start, + VM_MAP_PAGE_MASK(dst_map))) { + /* no longer map-aligned */ + tmp_entry->map_aligned = FALSE; + } vm_map_clip_start(dst_map, tmp_entry, start); } } @@ -7612,6 +8153,7 @@ vm_map_copy_remap( new_entry->behavior = VM_BEHAVIOR_DEFAULT; /* take an extra reference on the entry's "object" */ if (new_entry->is_sub_map) { + assert(!new_entry->use_pmap); /* not nested */ vm_map_lock(new_entry->object.sub_map); vm_map_reference(new_entry->object.sub_map); vm_map_unlock(new_entry->object.sub_map); @@ -7818,7 +8360,11 @@ StartAgain: ; while (entry != vm_map_copy_to_entry(copy)) { new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); vm_map_entry_copy_full(new, entry); - new->use_pmap = FALSE; /* clr address space specifics */ + assert(!new->iokit_acct); + if (new->is_sub_map) { + /* clr address space specifics */ + new->use_pmap = FALSE; + } vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new); @@ -7927,8 +8473,14 @@ StartAgain: ; type_of_fault = DBG_CACHE_HIT_FAULT; vm_fault_enter(m, dst_map->pmap, va, prot, prot, - VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL, - &type_of_fault); + VM_PAGE_WIRED(m), FALSE, FALSE, + FALSE, entry->alias, + ((entry->iokit_acct || + (!entry->is_sub_map && + !entry->use_pmap)) + ? PMAP_OPTIONS_ALT_ACCT + : 0), + NULL, &type_of_fault); vm_object_unlock(object); @@ -8058,7 +8610,8 @@ vm_map_copyin_common( register vm_map_copy_t copy; /* Resulting copy */ - vm_map_address_t copy_addr; + vm_map_address_t copy_addr; + vm_map_size_t copy_size; /* * Check for copies of zero bytes. @@ -8156,11 +8709,28 @@ vm_map_copyin_common( vm_map_lock(src_map); - if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) + /* + * Lookup the original "src_addr" rather than the truncated + * "src_start", in case "src_start" falls in a non-map-aligned + * map entry *before* the map entry that contains "src_addr"... + */ + if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) RETURN(KERN_INVALID_ADDRESS); if(!tmp_entry->is_sub_map) { + /* + * ... but clip to the map-rounded "src_start" rather than + * "src_addr" to preserve map-alignment. We'll adjust the + * first copy entry at the end, if needed. + */ vm_map_clip_start(src_map, tmp_entry, src_start); } + if (src_start < tmp_entry->vme_start) { + /* + * Move "src_start" up to the start of the + * first map entry to copy. + */ + src_start = tmp_entry->vme_start; + } /* set for later submap fix-up */ copy_addr = src_start; @@ -8283,7 +8853,10 @@ vm_map_copyin_common( was_wired = (src_entry->wired_count != 0); vm_map_entry_copy(new_entry, src_entry); - new_entry->use_pmap = FALSE; /* clr address space specifics */ + if (new_entry->is_sub_map) { + /* clr address space specifics */ + new_entry->use_pmap = FALSE; + } /* * Attempt non-blocking copy-on-write optimizations. @@ -8409,6 +8982,9 @@ vm_map_copyin_common( new_entry->object.vm_object = new_object; new_entry->needs_copy = TRUE; + assert(!new_entry->iokit_acct); + assert(new_object->purgable == VM_PURGABLE_DENY); + new_entry->use_pmap = TRUE; result = KERN_SUCCESS; } else { @@ -8457,6 +9033,12 @@ vm_map_copyin_common( */ if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) { + if (result != KERN_MEMORY_RESTART_COPY) { + vm_object_deallocate(new_entry->object.vm_object); + new_entry->object.vm_object = VM_OBJECT_NULL; + assert(!new_entry->iokit_acct); + new_entry->use_pmap = TRUE; + } RETURN(KERN_INVALID_ADDRESS); } @@ -8521,36 +9103,60 @@ vm_map_copyin_common( src_start = new_entry->vme_end; new_entry = VM_MAP_ENTRY_NULL; while ((src_start >= src_end) && (src_end != 0)) { - if (src_map != base_map) { - submap_map_t *ptr; - - ptr = parent_maps; - assert(ptr != NULL); - parent_maps = parent_maps->next; - - /* fix up the damage we did in that submap */ - vm_map_simplify_range(src_map, - src_base, - src_end); - - vm_map_unlock(src_map); - vm_map_deallocate(src_map); - vm_map_lock(ptr->parent_map); - src_map = ptr->parent_map; - src_base = ptr->base_start; - src_start = ptr->base_start + ptr->base_len; - src_end = ptr->base_end; - if ((src_end > src_start) && - !vm_map_lookup_entry( - src_map, src_start, &tmp_entry)) - RETURN(KERN_INVALID_ADDRESS); - kfree(ptr, sizeof(submap_map_t)); - if(parent_maps == NULL) - map_share = FALSE; - src_entry = tmp_entry->vme_prev; - } else + submap_map_t *ptr; + + if (src_map == base_map) { + /* back to the top */ break; + } + + ptr = parent_maps; + assert(ptr != NULL); + parent_maps = parent_maps->next; + + /* fix up the damage we did in that submap */ + vm_map_simplify_range(src_map, + src_base, + src_end); + + vm_map_unlock(src_map); + vm_map_deallocate(src_map); + vm_map_lock(ptr->parent_map); + src_map = ptr->parent_map; + src_base = ptr->base_start; + src_start = ptr->base_start + ptr->base_len; + src_end = ptr->base_end; + if (!vm_map_lookup_entry(src_map, + src_start, + &tmp_entry) && + (src_end > src_start)) { + RETURN(KERN_INVALID_ADDRESS); + } + kfree(ptr, sizeof(submap_map_t)); + if (parent_maps == NULL) + map_share = FALSE; + src_entry = tmp_entry->vme_prev; + } + + if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) && + (src_start >= src_addr + len) && + (src_addr + len != 0)) { + /* + * Stop copying now, even though we haven't reached + * "src_end". We'll adjust the end of the last copy + * entry at the end, if needed. + * + * If src_map's aligment is different from the + * system's page-alignment, there could be + * extra non-map-aligned map entries between + * the original (non-rounded) "src_addr + len" + * and the rounded "src_end". + * We do not want to copy those map entries since + * they're not part of the copied range. + */ + break; } + if ((src_start >= src_end) && (src_end != 0)) break; @@ -8559,43 +9165,8 @@ vm_map_copyin_common( */ tmp_entry = src_entry->vme_next; - if ((tmp_entry->vme_start != src_start) || + if ((tmp_entry->vme_start != src_start) || (tmp_entry == vm_map_to_entry(src_map))) { - - if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT && - (vm_map_round_page(src_entry->vme_end, - VM_MAP_PAGE_MASK(src_map)) == - src_end)) { - vm_map_entry_t last_copy_entry; - vm_map_offset_t adjustment; - - /* - * This is the last entry in the range we - * want and it happens to miss a few pages - * because it is not map-aligned (must have - * been imported from a differently-aligned - * map). - * Let's say we're done, but first we have - * to compensate for the alignment adjustment - * we're about to do before returning. - */ - - last_copy_entry = vm_map_copy_last_entry(copy); - assert(last_copy_entry != - vm_map_copy_to_entry(copy)); - adjustment = - (vm_map_round_page((copy->offset + - copy->size), - VM_MAP_PAGE_MASK(src_map)) - - vm_map_round_page((copy->offset + - copy->size), - PAGE_MASK)); - last_copy_entry->vme_end += adjustment; - last_copy_entry->map_aligned = FALSE; - /* ... and we're done */ - break; - } - RETURN(KERN_INVALID_ADDRESS); } } @@ -8627,12 +9198,28 @@ vm_map_copyin_common( vm_map_unlock(src_map); if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) { + vm_map_offset_t original_start, original_offset, original_end; + assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK); /* adjust alignment of first copy_entry's "vme_start" */ tmp_entry = vm_map_copy_first_entry(copy); if (tmp_entry != vm_map_copy_to_entry(copy)) { vm_map_offset_t adjustment; + + original_start = tmp_entry->vme_start; + original_offset = tmp_entry->offset; + + /* map-align the start of the first copy entry... */ + adjustment = (tmp_entry->vme_start - + vm_map_trunc_page( + tmp_entry->vme_start, + VM_MAP_PAGE_MASK(src_map))); + tmp_entry->vme_start -= adjustment; + tmp_entry->offset -= adjustment; + copy_addr -= adjustment; + assert(tmp_entry->vme_start < tmp_entry->vme_end); + /* ... adjust for mis-aligned start of copy range */ adjustment = (vm_map_trunc_page(copy->offset, PAGE_MASK) - @@ -8646,12 +9233,35 @@ vm_map_copyin_common( copy_addr += adjustment; assert(tmp_entry->vme_start < tmp_entry->vme_end); } + + /* + * Assert that the adjustments haven't exposed + * more than was originally copied... + */ + assert(tmp_entry->vme_start >= original_start); + assert(tmp_entry->offset >= original_offset); + /* + * ... and that it did not adjust outside of a + * a single 16K page. + */ + assert(vm_map_trunc_page(tmp_entry->vme_start, + VM_MAP_PAGE_MASK(src_map)) == + vm_map_trunc_page(original_start, + VM_MAP_PAGE_MASK(src_map))); } /* adjust alignment of last copy_entry's "vme_end" */ tmp_entry = vm_map_copy_last_entry(copy); if (tmp_entry != vm_map_copy_to_entry(copy)) { vm_map_offset_t adjustment; + + original_end = tmp_entry->vme_end; + + /* map-align the end of the last copy entry... */ + tmp_entry->vme_end = + vm_map_round_page(tmp_entry->vme_end, + VM_MAP_PAGE_MASK(src_map)); + /* ... adjust for mis-aligned end of copy range */ adjustment = (vm_map_round_page((copy->offset + copy->size), @@ -8665,6 +9275,20 @@ vm_map_copyin_common( tmp_entry->vme_end -= adjustment; assert(tmp_entry->vme_start < tmp_entry->vme_end); } + + /* + * Assert that the adjustments haven't exposed + * more than was originally copied... + */ + assert(tmp_entry->vme_end <= original_end); + /* + * ... and that it did not adjust outside of a + * a single 16K page. + */ + assert(vm_map_round_page(tmp_entry->vme_end, + VM_MAP_PAGE_MASK(src_map)) == + vm_map_round_page(original_end, + VM_MAP_PAGE_MASK(src_map))); } } @@ -8673,6 +9297,7 @@ vm_map_copyin_common( /* up from different sub-maps */ tmp_entry = vm_map_copy_first_entry(copy); + copy_size = 0; /* compute actual size */ while (tmp_entry != vm_map_copy_to_entry(copy)) { assert(VM_MAP_PAGE_ALIGNED( copy_addr + (tmp_entry->vme_end - @@ -8693,9 +9318,31 @@ vm_map_copyin_common( tmp_entry->vme_start = copy_addr; assert(tmp_entry->vme_start < tmp_entry->vme_end); copy_addr += tmp_entry->vme_end - tmp_entry->vme_start; + copy_size += tmp_entry->vme_end - tmp_entry->vme_start; tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next; } + if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT && + copy_size < copy->size) { + /* + * The actual size of the VM map copy is smaller than what + * was requested by the caller. This must be because some + * PAGE_SIZE-sized pages are missing at the end of the last + * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range. + * The caller might not have been aware of those missing + * pages and might not want to be aware of it, which is + * fine as long as they don't try to access (and crash on) + * those missing pages. + * Let's adjust the size of the "copy", to avoid failing + * in vm_map_copyout() or vm_map_copy_overwrite(). + */ + assert(vm_map_round_page(copy_size, + VM_MAP_PAGE_MASK(src_map)) == + vm_map_round_page(copy->size, + VM_MAP_PAGE_MASK(src_map))); + copy->size = copy_size; + } + *copy_result = copy; return(KERN_SUCCESS); @@ -8845,6 +9492,7 @@ vm_map_fork_share( old_entry->vme_start)); old_entry->offset = 0; old_entry->object.vm_object = object; + old_entry->use_pmap = TRUE; assert(!old_entry->needs_copy); } else if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { @@ -9185,8 +9833,10 @@ vm_map_fork( new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */ vm_map_entry_copy(new_entry, old_entry); - /* clear address space specifics */ - new_entry->use_pmap = FALSE; + if (new_entry->is_sub_map) { + /* clear address space specifics */ + new_entry->use_pmap = FALSE; + } if (! vm_object_copy_quickly( &new_entry->object.vm_object, @@ -9246,6 +9896,7 @@ vm_map_fork( old_entry = old_entry->vme_next; } + new_map->size = new_size; vm_map_unlock(old_map); vm_map_deallocate(old_map); @@ -9269,12 +9920,20 @@ vm_map_exec( { SHARED_REGION_TRACE_DEBUG( ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n", - current_task(), new_map, task, fsroot, cpu)); + (void *)VM_KERNEL_ADDRPERM(current_task()), + (void *)VM_KERNEL_ADDRPERM(new_map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu)); (void) vm_commpage_enter(new_map, task); (void) vm_shared_region_enter(new_map, task, fsroot, cpu); SHARED_REGION_TRACE_DEBUG( ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n", - current_task(), new_map, task, fsroot, cpu)); + (void *)VM_KERNEL_ADDRPERM(current_task()), + (void *)VM_KERNEL_ADDRPERM(new_map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu)); return KERN_SUCCESS; } @@ -9325,6 +9984,7 @@ vm_map_lookup_locked( vm_map_offset_t old_end = 0; register vm_prot_t prot; boolean_t mask_protections; + boolean_t force_copy; vm_prot_t original_fault_type; /* @@ -9333,7 +9993,8 @@ vm_map_lookup_locked( * absolute value. */ mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE; - fault_type &= ~VM_PROT_IS_MASK; + force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE; + fault_type &= VM_PROT_ALL; original_fault_type = fault_type; *real_map = map; @@ -9509,8 +10170,10 @@ submap_recurse: (old_end - cow_parent_vaddr); vm_map_clip_start(map, submap_entry, local_start); vm_map_clip_end(map, submap_entry, local_end); - /* unnesting was done in vm_map_clip_start/end() */ - assert(!submap_entry->use_pmap); + if (submap_entry->is_sub_map) { + /* unnesting was done when clipping */ + assert(!submap_entry->use_pmap); + } /* This is the COW case, lets connect */ /* an entry in our space to the underlying */ @@ -9613,13 +10276,17 @@ submap_recurse: vm_map_clip_start(map, entry, local_start); vm_map_clip_end(map, entry, local_end); - /* unnesting was done in vm_map_clip_start/end() */ - assert(!entry->use_pmap); + if (entry->is_sub_map) { + /* unnesting was done when clipping */ + assert(!entry->use_pmap); + } /* substitute copy object for */ /* shared map entry */ vm_map_deallocate(entry->object.sub_map); + assert(!entry->iokit_acct); entry->is_sub_map = FALSE; + entry->use_pmap = TRUE; entry->object.vm_object = copy_object; /* propagate the submap entry's protections */ @@ -9709,7 +10376,7 @@ submap_recurse: * demote the permissions allowed. */ - if ((fault_type & VM_PROT_WRITE) || *wired) { + if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) { /* * Make a new object, and place it in the * object chain. Note that no new references @@ -9771,6 +10438,11 @@ submap_recurse: /* ... the caller will change "interruptible" if needed */ fault_info->cluster_size = 0; fault_info->user_tag = entry->alias; + fault_info->pmap_options = 0; + if (entry->iokit_acct || + (!entry->is_sub_map && !entry->use_pmap)) { + fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT; + } fault_info->behavior = entry->behavior; fault_info->lo_offset = entry->offset; fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; @@ -10869,34 +11541,37 @@ vm_map_simplify_entry( (prev_entry->vme_end == this_entry->vme_start) && (prev_entry->is_sub_map == this_entry->is_sub_map) && - (prev_entry->object.vm_object == this_entry->object.vm_object) && ((prev_entry->offset + (prev_entry->vme_end - prev_entry->vme_start)) == this_entry->offset) && - (prev_entry->map_aligned == this_entry->map_aligned) && - (prev_entry->inheritance == this_entry->inheritance) && + (prev_entry->behavior == this_entry->behavior) && + (prev_entry->needs_copy == this_entry->needs_copy) && (prev_entry->protection == this_entry->protection) && (prev_entry->max_protection == this_entry->max_protection) && - (prev_entry->behavior == this_entry->behavior) && + (prev_entry->inheritance == this_entry->inheritance) && + (prev_entry->use_pmap == this_entry->use_pmap) && (prev_entry->alias == this_entry->alias) && - (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) && (prev_entry->no_cache == this_entry->no_cache) && + (prev_entry->permanent == this_entry->permanent) && + (prev_entry->map_aligned == this_entry->map_aligned) && + (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) && + (prev_entry->used_for_jit == this_entry->used_for_jit) && + /* from_reserved_zone: OK if that field doesn't match */ + (prev_entry->iokit_acct == this_entry->iokit_acct) && + (prev_entry->wired_count == this_entry->wired_count) && (prev_entry->user_wired_count == this_entry->user_wired_count) && - (prev_entry->needs_copy == this_entry->needs_copy) && - (prev_entry->permanent == this_entry->permanent) && - - (prev_entry->use_pmap == FALSE) && - (this_entry->use_pmap == FALSE) && (prev_entry->in_transition == FALSE) && (this_entry->in_transition == FALSE) && (prev_entry->needs_wakeup == FALSE) && (this_entry->needs_wakeup == FALSE) && (prev_entry->is_shared == FALSE) && - (this_entry->is_shared == FALSE) + (this_entry->is_shared == FALSE) && + (prev_entry->superpage_size == FALSE) && + (this_entry->superpage_size == FALSE) ) { vm_map_store_entry_unlink(map, prev_entry); assert(prev_entry->vme_start < this_entry->vme_end); @@ -11167,7 +11842,9 @@ vm_map_behavior_set( while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { vm_map_clip_end(map, entry, end); - assert(!entry->use_pmap); + if (entry->is_sub_map) { + assert(!entry->use_pmap); + } if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) { entry->zero_wired_pages = TRUE; @@ -11302,6 +11979,11 @@ vm_map_willneed( fault_info.lo_offset = offset; fault_info.hi_offset = offset + len; fault_info.user_tag = entry->alias; + fault_info.pmap_options = 0; + if (entry->iokit_acct || + (!entry->is_sub_map && !entry->use_pmap)) { + fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT; + } /* * If there's no read permission to this mapping, then just @@ -11339,7 +12021,7 @@ vm_map_willneed( * * Note that memory_object_data_request() places limits on the * amount of I/O it will do. Regardless of the len we - * specified, it won't do more than MAX_UPL_TRANSFER and it + * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it * silently truncates the len to that size. This isn't * necessarily bad since madvise shouldn't really be used to * page in unlimited amounts of data. Other Unix variants @@ -11527,11 +12209,6 @@ vm_map_reuse_pages( object = entry->object.vm_object; if (object != VM_OBJECT_NULL) { - /* tell pmap to not count this range as "reusable" */ - pmap_reusable(map->pmap, - MAX(start, entry->vme_start), - MIN(end, entry->vme_end), - FALSE); vm_object_lock(object); vm_object_reuse_pages(object, start_offset, end_offset, TRUE); @@ -11622,16 +12299,21 @@ vm_map_reusable_pages( vm_object_lock(object); - if (object->ref_count == 1 && !object->shadow) + if (object->ref_count == 1 && + !object->shadow && + /* + * "iokit_acct" entries are billed for their virtual size + * (rather than for their resident pages only), so they + * wouldn't benefit from making pages reusable, and it + * would be hard to keep track of pages that are both + * "iokit_acct" and "reusable" in the pmap stats and ledgers. + */ + !(entry->iokit_acct || + (!entry->is_sub_map && !entry->use_pmap))) kill_pages = 1; else kill_pages = -1; if (kill_pages != -1) { - /* tell pmap to count this range as "reusable" */ - pmap_reusable(map->pmap, - MAX(start, entry->vme_start), - MIN(end, entry->vme_end), - TRUE); vm_object_deactivate_pages(object, start_offset, end_offset - start_offset, @@ -11734,7 +12416,8 @@ vm_map_entry_insert( boolean_t no_cache, boolean_t permanent, unsigned int superpage_size, - boolean_t clear_map_aligned) + boolean_t clear_map_aligned, + boolean_t is_submap) { vm_map_entry_t new_entry; @@ -11748,7 +12431,8 @@ vm_map_entry_insert( new_entry->map_aligned = FALSE; } if (clear_map_aligned && - ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))) { + (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) || + ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) { new_entry->map_aligned = FALSE; } @@ -11756,9 +12440,9 @@ vm_map_entry_insert( new_entry->vme_end = end; assert(page_aligned(new_entry->vme_start)); assert(page_aligned(new_entry->vme_end)); - assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, - VM_MAP_PAGE_MASK(map))); if (new_entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, + VM_MAP_PAGE_MASK(map))); assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, VM_MAP_PAGE_MASK(map))); } @@ -11767,7 +12451,7 @@ vm_map_entry_insert( new_entry->object.vm_object = object; new_entry->offset = offset; new_entry->is_shared = is_shared; - new_entry->is_sub_map = FALSE; + new_entry->is_sub_map = is_submap; new_entry->needs_copy = needs_copy; new_entry->in_transition = in_transition; new_entry->needs_wakeup = FALSE; @@ -11777,7 +12461,19 @@ vm_map_entry_insert( new_entry->behavior = behavior; new_entry->wired_count = wired_count; new_entry->user_wired_count = 0; - new_entry->use_pmap = FALSE; + if (is_submap) { + /* + * submap: "use_pmap" means "nested". + * default: false. + */ + new_entry->use_pmap = FALSE; + } else { + /* + * object: "use_pmap" means "use pmap accounting" for footprint. + * default: true. + */ + new_entry->use_pmap = TRUE; + } new_entry->alias = 0; new_entry->zero_wired_pages = FALSE; new_entry->no_cache = no_cache; @@ -11787,6 +12483,7 @@ vm_map_entry_insert( else new_entry->superpage_size = FALSE; new_entry->used_for_jit = FALSE; + new_entry->iokit_acct = FALSE; /* * Insert the new entry into the list. @@ -11901,6 +12598,24 @@ vm_map_remap_extract( object = VM_OBJECT_NULL; } else { object = src_entry->object.vm_object; + if (src_entry->iokit_acct) { + /* + * This entry uses "IOKit accounting". + */ + } else if (object != VM_OBJECT_NULL && + object->purgable != VM_PURGABLE_DENY) { + /* + * Purgeable objects have their own accounting: + * no pmap accounting for them. + */ + assert(!src_entry->use_pmap); + } else { + /* + * Not IOKit or purgeable: + * must be accounted by pmap stats. + */ + assert(src_entry->use_pmap); + } if (object == VM_OBJECT_NULL) { object = vm_object_allocate(entry_size); @@ -11967,7 +12682,10 @@ vm_map_remap_extract( new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy(new_entry, src_entry); - new_entry->use_pmap = FALSE; /* clr address space specifics */ + if (new_entry->is_sub_map) { + /* clr address space specifics */ + new_entry->use_pmap = FALSE; + } new_entry->map_aligned = FALSE; @@ -12464,7 +13182,8 @@ StartAgain: ; vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map)); kr = vm_map_delete(map, start, end, - VM_MAP_REMOVE_SAVE_ENTRIES, + (VM_MAP_REMOVE_SAVE_ENTRIES | + VM_MAP_REMOVE_NO_MAP_ALIGN), zap_map); if (kr == KERN_SUCCESS) { vm_map_destroy(zap_map, @@ -12679,6 +13398,7 @@ vm_map_purgable_control( vm_map_entry_t entry; vm_object_t object; kern_return_t kr; + boolean_t was_nonvolatile; /* * Vet all the input parameters and current type and state of the @@ -12722,9 +13442,10 @@ vm_map_purgable_control( } object = entry->object.vm_object; - if (object == VM_OBJECT_NULL) { + if (object == VM_OBJECT_NULL || + object->purgable == VM_PURGABLE_DENY) { /* - * Object must already be present or it can't be purgable. + * Object must already be present and be purgeable. */ vm_map_unlock_read(map); return KERN_INVALID_ARGUMENT; @@ -12744,11 +13465,24 @@ vm_map_purgable_control( return KERN_INVALID_ARGUMENT; } #endif - + + assert(!entry->is_sub_map); + assert(!entry->use_pmap); /* purgeable has its own accounting */ + vm_map_unlock_read(map); + was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE); + kr = vm_object_purgable_control(object, control, state); + if (was_nonvolatile && + object->purgable != VM_PURGABLE_NONVOLATILE && + map->pmap == kernel_pmap) { +#if DEBUG + object->vo_purgeable_volatilizer = kernel_task; +#endif /* DEBUG */ + } + vm_object_unlock(object); return kr; @@ -13599,19 +14333,6 @@ vm_map_has_hard_pagezero( return (map->min_offset >= pagezero_size); } -void -vm_map_set_4GB_pagezero(vm_map_t map) -{ -#pragma unused(map) - -} - -void -vm_map_clear_4GB_pagezero(vm_map_t map) -{ -#pragma unused(map) -} - /* * Raise a VM map's maximun offset. */ @@ -13721,7 +14442,7 @@ vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes) { pmap_t pmap = vm_map_pmap(map); - ledger_credit(pmap->ledger, task_ledgers.iokit_mem, bytes); + ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes); ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes); } @@ -13730,7 +14451,7 @@ vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes) { pmap_t pmap = vm_map_pmap(map); - ledger_debit(pmap->ledger, task_ledgers.iokit_mem, bytes); + ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes); ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes); } @@ -13826,6 +14547,64 @@ kern_return_t vm_map_sign(vm_map_t map, } #endif +kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed) +{ + vm_map_entry_t entry = VM_MAP_ENTRY_NULL; + vm_map_entry_t next_entry; + kern_return_t kr = KERN_SUCCESS; + vm_map_t zap_map; + + vm_map_lock(map); + + /* + * We use a "zap_map" to avoid having to unlock + * the "map" in vm_map_delete(). + */ + zap_map = vm_map_create(PMAP_NULL, + map->min_offset, + map->max_offset, + map->hdr.entries_pageable); + + if (zap_map == VM_MAP_NULL) { + return KERN_RESOURCE_SHORTAGE; + } + + vm_map_set_page_shift(zap_map, + VM_MAP_PAGE_SHIFT(map)); + + for (entry = vm_map_first_entry(map); + entry != vm_map_to_entry(map); + entry = next_entry) { + next_entry = entry->vme_next; + + if (entry->object.vm_object && !entry->is_sub_map && (entry->object.vm_object->internal == TRUE) + && (entry->object.vm_object->ref_count == 1)) { + + *reclaimed_resident += entry->object.vm_object->resident_page_count; + *reclaimed_compressed += vm_compressor_pager_get_count(entry->object.vm_object->pager); + + (void)vm_map_delete(map, + entry->vme_start, + entry->vme_end, + VM_MAP_REMOVE_SAVE_ENTRIES, + zap_map); + } + } + + vm_map_unlock(map); + + /* + * Get rid of the "zap_maps" and all the map entries that + * they may still contain. + */ + if (zap_map != VM_MAP_NULL) { + vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); + zap_map = VM_MAP_NULL; + } + + return kr; +} + #if CONFIG_FREEZE kern_return_t vm_map_freeze_walk( @@ -14041,13 +14820,15 @@ vm_map_entry_should_cow_for_true_share( return FALSE; } - if (entry->alias != VM_MEMORY_MALLOC) { - /* not tagged as an ObjectiveC's Garbage Collector entry */ + if (entry->alias != VM_MEMORY_MALLOC && + entry->alias != VM_MEMORY_MALLOC_SMALL) { + /* not a malloc heap or Obj-C Garbage Collector heap */ return FALSE; } if (entry->wired_count) { /* wired: can't change the map entry... */ + vm_counters.should_cow_but_wired++; return FALSE; } @@ -14073,8 +14854,15 @@ vm_map_entry_should_cow_for_true_share( return FALSE; } - if (object->vo_size != ANON_CHUNK_SIZE) { - /* not an object created for the ObjC Garbage Collector */ + if (entry->alias == VM_MEMORY_MALLOC && + object->vo_size != ANON_CHUNK_SIZE) { + /* ... not an object created for the ObjC Garbage Collector */ + return FALSE; + } + + if (entry->alias == VM_MEMORY_MALLOC_SMALL && + object->vo_size != 2048 * 4096) { + /* ... not a "MALLOC_SMALL" heap */ return FALSE; } @@ -14139,6 +14927,67 @@ vm_map_set_page_shift( return KERN_SUCCESS; } +int +vm_map_purge( + vm_map_t map) +{ + int num_object_purged; + vm_map_entry_t entry; + vm_map_offset_t next_address; + vm_object_t object; + int state; + kern_return_t kr; + + num_object_purged = 0; + + vm_map_lock_read(map); + entry = vm_map_first_entry(map); + while (entry != vm_map_to_entry(map)) { + if (entry->is_sub_map) { + goto next; + } + if (! (entry->protection & VM_PROT_WRITE)) { + goto next; + } + object = entry->object.vm_object; + if (object == VM_OBJECT_NULL) { + goto next; + } + if (object->purgable != VM_PURGABLE_VOLATILE) { + goto next; + } + + vm_object_lock(object); +#if 00 + if (entry->offset != 0 || + (entry->vme_end - entry->vme_start) != object->vo_size) { + vm_object_unlock(object); + goto next; + } +#endif + next_address = entry->vme_end; + vm_map_unlock_read(map); + state = VM_PURGABLE_EMPTY; + kr = vm_object_purgable_control(object, + VM_PURGABLE_SET_STATE, + &state); + if (kr == KERN_SUCCESS) { + num_object_purged++; + } + vm_object_unlock(object); + + vm_map_lock_read(map); + if (vm_map_lookup_entry(map, next_address, &entry)) { + continue; + } + next: + entry = entry->vme_next; + } + vm_map_unlock_read(map); + + return num_object_purged; +} + kern_return_t vm_map_query_volatile( vm_map_t map, @@ -14207,3 +15056,49 @@ vm_map_query_volatile( return KERN_SUCCESS; } + +#if VM_SCAN_FOR_SHADOW_CHAIN +int vm_map_shadow_max(vm_map_t map); +int vm_map_shadow_max( + vm_map_t map) +{ + int shadows, shadows_max; + vm_map_entry_t entry; + vm_object_t object, next_object; + + if (map == NULL) + return 0; + + shadows_max = 0; + + vm_map_lock_read(map); + + for (entry = vm_map_first_entry(map); + entry != vm_map_to_entry(map); + entry = entry->vme_next) { + if (entry->is_sub_map) { + continue; + } + object = entry->object.vm_object; + if (object == NULL) { + continue; + } + vm_object_lock_shared(object); + for (shadows = 0; + object->shadow != NULL; + shadows++, object = next_object) { + next_object = object->shadow; + vm_object_lock_shared(next_object); + vm_object_unlock(object); + } + vm_object_unlock(object); + if (shadows > shadows_max) { + shadows_max = shadows; + } + } + + vm_map_unlock_read(map); + + return shadows_max; +} +#endif /* VM_SCAN_FOR_SHADOW_CHAIN */ diff --git a/osfmk/vm/vm_map.h b/osfmk/vm/vm_map.h index 2a40d949d..6d05060c1 100644 --- a/osfmk/vm/vm_map.h +++ b/osfmk/vm/vm_map.h @@ -105,7 +105,7 @@ __END_DECLS #include #include -#include +#include #include #include @@ -228,7 +228,14 @@ struct vm_map_entry { /* vm_prot_t */ protection:3, /* protection code */ /* vm_prot_t */ max_protection:3,/* maximum protection */ /* vm_inherit_t */ inheritance:2, /* inheritance */ - /* boolean_t */ use_pmap:1, /* nested pmaps */ + /* boolean_t */ use_pmap:1, /* + * use_pmap is overloaded: + * if "is_sub_map": + * use a nested pmap? + * else (i.e. if object): + * use pmap accounting + * for footprint? + */ /* * IMPORTANT: * The "alias" field can be updated while holding the VM map lock @@ -244,7 +251,10 @@ struct vm_map_entry { /* boolean_t */ used_for_jit:1, /* boolean_t */ from_reserved_zone:1, /* Allocated from * kernel reserved zone */ - __unused_bits:1; + + /* iokit accounting: use the virtual size rather than resident size: */ + /* boolean_t */ iokit_acct:1; + unsigned short wired_count; /* can be paged if = 0 */ unsigned short user_wired_count; /* for vm_wire */ #if DEBUG @@ -317,7 +327,7 @@ struct vm_map_header { * quickly find free space. */ struct _vm_map { - lock_t lock; /* uni- and smp-lock */ + lck_rw_t lock; /* map lock */ struct vm_map_header hdr; /* Map entry header */ #define min_offset hdr.links.start /* start of range */ #define max_offset hdr.links.end /* end of range */ @@ -463,19 +473,19 @@ struct vm_map_copy { #define vm_map_lock_init(map) \ ((map)->timestamp = 0 , \ - lock_init(&(map)->lock, TRUE, 0, 0)) + lck_rw_init(&(map)->lock, &vm_map_lck_grp, &vm_map_lck_rw_attr)) -#define vm_map_lock(map) lock_write(&(map)->lock) +#define vm_map_lock(map) lck_rw_lock_exclusive(&(map)->lock) #define vm_map_unlock(map) \ - ((map)->timestamp++ , lock_write_done(&(map)->lock)) -#define vm_map_lock_read(map) lock_read(&(map)->lock) -#define vm_map_unlock_read(map) lock_read_done(&(map)->lock) + ((map)->timestamp++ , lck_rw_done(&(map)->lock)) +#define vm_map_lock_read(map) lck_rw_lock_shared(&(map)->lock) +#define vm_map_unlock_read(map) lck_rw_done(&(map)->lock) #define vm_map_lock_write_to_read(map) \ - ((map)->timestamp++ , lock_write_to_read(&(map)->lock)) + ((map)->timestamp++ , lck_rw_lock_exclusive_to_shared(&(map)->lock)) /* lock_read_to_write() returns FALSE on failure. Macro evaluates to * zero on success and non-zero value on failure. */ -#define vm_map_lock_read_to_write(map) (lock_read_to_write(&(map)->lock) != TRUE) +#define vm_map_lock_read_to_write(map) (lck_rw_lock_shared_to_exclusive(&(map)->lock) != TRUE) /* * Exported procedures that operate on vm_map_t. @@ -560,7 +570,8 @@ extern vm_map_entry_t vm_map_entry_insert( boolean_t no_cache, boolean_t permanent, unsigned int superpage_size, - boolean_t clear_map_aligned); + boolean_t clear_map_aligned, + boolean_t is_submap); /* @@ -685,8 +696,8 @@ extern vm_object_t vm_submap_object; */ #define vm_map_entry_wait(map, interruptible) \ ((map)->timestamp++ , \ - thread_sleep_lock_write((event_t)&(map)->hdr, \ - &(map)->lock, interruptible)) + lck_rw_sleep(&(map)->lock, LCK_SLEEP_EXCLUSIVE|LCK_SLEEP_PROMOTED_PRI, \ + (event_t)&(map)->hdr, interruptible)) #define vm_map_entry_wakeup(map) \ @@ -893,6 +904,8 @@ extern kern_return_t vm_map_set_cache_attr( extern int override_nx(vm_map_t map, uint32_t user_tag); +extern int vm_map_purge(vm_map_t map); + #endif /* MACH_KERNEL_PRIVATE */ __BEGIN_DECLS @@ -939,6 +952,13 @@ extern kern_return_t vm_map_wire( vm_prot_t access_type, boolean_t user_wire); +extern kern_return_t vm_map_wire_and_extract( + vm_map_t map, + vm_map_offset_t start, + vm_prot_t access_type, + boolean_t user_wire, + ppnum_t *physpage_p); + /* unwire a region */ extern kern_return_t vm_map_unwire( vm_map_t map, @@ -960,6 +980,20 @@ extern kern_return_t vm_map_enter_mem_object( vm_prot_t max_protection, vm_inherit_t inheritance); +/* Enter a mapping of a memory object */ +extern kern_return_t vm_map_enter_mem_object_prefault( + vm_map_t map, + vm_map_offset_t *address, + vm_map_size_t size, + vm_map_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + vm_prot_t cur_protection, + vm_prot_t max_protection, + upl_page_list_ptr_t page_list, + unsigned int page_list_count); + /* Enter a mapping of a memory object */ extern kern_return_t vm_map_enter_mem_object_control( vm_map_t map, @@ -1050,15 +1084,8 @@ extern boolean_t vm_map_has_hard_pagezero( extern boolean_t vm_map_is_64bit( vm_map_t map); -#define vm_map_has_4GB_pagezero(map) vm_map_has_hard_pagezero(map, (vm_map_offset_t)0x100000000ULL) -extern void vm_map_set_4GB_pagezero( - vm_map_t map); - -extern void vm_map_clear_4GB_pagezero( - vm_map_t map); - extern kern_return_t vm_map_raise_max_offset( vm_map_t map, vm_map_offset_t new_max_offset); @@ -1172,6 +1199,7 @@ extern kern_return_t vm_map_set_page_shift(vm_map_t map, int pageshift); #define VM_MAP_REMOVE_WAIT_FOR_KWIRE 0x4 #define VM_MAP_REMOVE_SAVE_ENTRIES 0x8 #define VM_MAP_REMOVE_NO_PMAP_CLEANUP 0x10 +#define VM_MAP_REMOVE_NO_MAP_ALIGN 0x20 /* Support for UPLs from vm_maps */ @@ -1191,6 +1219,11 @@ extern kern_return_t vm_map_sign(vm_map_t map, vm_map_offset_t end); #endif +extern kern_return_t vm_map_partial_reap( + vm_map_t map, + unsigned int *reclaimed_resident, + unsigned int *reclaimed_compressed); + #if CONFIG_FREEZE void vm_map_freeze_thaw_init(void); void vm_map_freeze_thaw(void); diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c index a16857ec0..3658a00a2 100644 --- a/osfmk/vm/vm_object.c +++ b/osfmk/vm/vm_object.c @@ -79,7 +79,6 @@ #include #include -#include #include #include #include @@ -101,6 +100,51 @@ #include +#if CONFIG_PHANTOM_CACHE +#include +#endif + +boolean_t vm_object_collapse_compressor_allowed = TRUE; + +struct vm_counters vm_counters; + +#if VM_OBJECT_TRACKING +boolean_t vm_object_tracking_inited = FALSE; +decl_simple_lock_data(static,vm_object_tracking_lock_data); +btlog_t *vm_object_tracking_btlog; +static void +vm_object_tracking_lock(void *context) +{ + simple_lock((simple_lock_t)context); +} +static void +vm_object_tracking_unlock(void *context) +{ + simple_unlock((simple_lock_t)context); +} +void +vm_object_tracking_init(void) +{ + int vm_object_tracking; + + vm_object_tracking = 1; + PE_parse_boot_argn("vm_object_tracking", &vm_object_tracking, + sizeof (vm_object_tracking)); + + if (vm_object_tracking) { + simple_lock_init(&vm_object_tracking_lock_data, 0); + vm_object_tracking_btlog = btlog_create( + 50000, + VM_OBJECT_TRACKING_BTDEPTH, + vm_object_tracking_lock, + vm_object_tracking_unlock, + &vm_object_tracking_lock_data); + assert(vm_object_tracking_btlog); + vm_object_tracking_inited = TRUE; + } +} +#endif /* VM_OBJECT_TRACKING */ + /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given @@ -349,6 +393,35 @@ unsigned int vm_object_reap_count_async = 0; #define vm_object_reaper_unlock() \ lck_mtx_unlock(&vm_object_reaper_lock_data) +#if CONFIG_IOSCHED +/* I/O Re-prioritization request list */ +queue_head_t io_reprioritize_list; +lck_spin_t io_reprioritize_list_lock; + +#define IO_REPRIORITIZE_LIST_LOCK() \ + lck_spin_lock(&io_reprioritize_list_lock) +#define IO_REPRIORITIZE_LIST_UNLOCK() \ + lck_spin_unlock(&io_reprioritize_list_lock) + +#define MAX_IO_REPRIORITIZE_REQS 8192 +zone_t io_reprioritize_req_zone; + +/* I/O Re-prioritization thread */ +int io_reprioritize_wakeup = 0; +static void io_reprioritize_thread(void *param __unused, wait_result_t wr __unused); + +#define IO_REPRIO_THREAD_WAKEUP() thread_wakeup((event_t)&io_reprioritize_wakeup) +#define IO_REPRIO_THREAD_CONTINUATION() \ +{ \ + assert_wait(&io_reprioritize_wakeup, THREAD_UNINT); \ + thread_block(io_reprioritize_thread); \ +} + +void vm_page_request_reprioritize(vm_object_t, uint64_t, uint32_t, int); +void vm_page_handle_prio_inversion(vm_object_t, vm_page_t); +void vm_decmp_upl_reprioritize(upl_t, int); +#endif + #if 0 #undef KERNEL_DEBUG #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT @@ -415,6 +488,8 @@ vm_object_hash_insert( { queue_t bucket; + vm_object_lock_assert_exclusive(object); + bucket = &vm_object_hashtable[vm_object_hash(entry->pager)]; queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link); @@ -462,11 +537,25 @@ _vm_object_allocate( *object = vm_object_template; queue_init(&object->memq); queue_init(&object->msr_q); -#if UPL_DEBUG +#if UPL_DEBUG || CONFIG_IOSCHED queue_init(&object->uplq); -#endif /* UPL_DEBUG */ +#endif vm_object_lock_init(object); object->vo_size = size; + +#if VM_OBJECT_TRACKING_OP_CREATED + if (vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int numsaved = 0; + + numsaved = OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_CREATED, + bt, + numsaved); + } +#endif /* VM_OBJECT_TRACKING_OP_CREATED */ } __private_extern__ vm_object_t @@ -578,6 +667,9 @@ vm_object_bootstrap(void) vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL; vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC; vm_object_template.paging_in_progress = 0; +#if __LP64__ + vm_object_template.__object1_unused_bits = 0; +#endif /* __LP64__ */ vm_object_template.activity_in_progress = 0; /* Begin bitfields */ @@ -614,7 +706,9 @@ vm_object_bootstrap(void) vm_object_template.pages_created = 0; vm_object_template.pages_used = 0; vm_object_template.scan_collisions = 0; - +#if CONFIG_PHANTOM_CACHE + vm_object_template.phantom_object_id = 0; +#endif #if MACH_PAGEMAP vm_object_template.existence_map = VM_EXTERNAL_NULL; #endif /* MACH_PAGEMAP */ @@ -631,12 +725,13 @@ vm_object_bootstrap(void) vm_object_template.hashed = FALSE; vm_object_template.transposed = FALSE; vm_object_template.mapping_in_progress = FALSE; + vm_object_template.phantom_isssd = FALSE; vm_object_template.volatile_empty = FALSE; vm_object_template.volatile_fault = FALSE; vm_object_template.all_reusable = FALSE; vm_object_template.blocked_access = FALSE; vm_object_template.__object2_unused_bits = 0; -#if UPL_DEBUG +#if CONFIG_IOSCHED || UPL_DEBUG vm_object_template.uplq.prev = NULL; vm_object_template.uplq.next = NULL; #endif /* UPL_DEBUG */ @@ -645,14 +740,22 @@ vm_object_bootstrap(void) sizeof (vm_object_template.pip_holders)); #endif /* VM_PIP_DEBUG */ - vm_object_template.objq.next=NULL; - vm_object_template.objq.prev=NULL; + vm_object_template.objq.next = NULL; + vm_object_template.objq.prev = NULL; vm_object_template.purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; vm_object_template.purgeable_queue_group = 0; vm_object_template.vo_cache_ts = 0; +#if DEBUG + bzero(&vm_object_template.purgeable_owner_bt[0], + sizeof (vm_object_template.purgeable_owner_bt)); + vm_object_template.vo_purgeable_volatilizer = NULL; + bzero(&vm_object_template.purgeable_volatilizer_bt[0], + sizeof (vm_object_template.purgeable_volatilizer_bt)); +#endif /* DEBUG */ + /* * Initialize the "kernel object" */ @@ -704,6 +807,30 @@ vm_object_bootstrap(void) #endif /* MACH_PAGEMAP */ } +#if CONFIG_IOSCHED +void +vm_io_reprioritize_init(void) +{ + kern_return_t result; + thread_t thread = THREAD_NULL; + + /* Initialze the I/O reprioritization subsystem */ + lck_spin_init(&io_reprioritize_list_lock, &vm_object_lck_grp, &vm_object_lck_attr); + queue_init(&io_reprioritize_list); + + io_reprioritize_req_zone = zinit(sizeof(struct io_reprioritize_req), + MAX_IO_REPRIORITIZE_REQS * sizeof(struct io_reprioritize_req), + 4096, "io_reprioritize_req"); + + result = kernel_thread_start_priority(io_reprioritize_thread, NULL, 95 /* MAXPRI_KERNEL */, &thread); + if (result == KERN_SUCCESS) { + thread_deallocate(thread); + } else { + panic("Could not create io_reprioritize_thread"); + } +} +#endif + void vm_object_reaper_init(void) { @@ -798,8 +925,23 @@ vm_object_deallocate( return; } - if (object->ref_count > 2 || - (!object->named && object->ref_count > 1)) { + if (object->ref_count == 2 && + object->named) { + /* + * This "named" object's reference count is about to + * drop from 2 to 1: + * we'll need to call memory_object_last_unmap(). + */ + } else if (object->ref_count == 2 && + object->internal && + object->shadow != VM_OBJECT_NULL) { + /* + * This internal object's reference count is about to + * drop from 2 to 1 and it has a shadow object: + * we'll want to try and collapse this object with its + * shadow. + */ + } else if (object->ref_count >= 2) { UInt32 original_ref_count; volatile UInt32 *ref_count_p; Boolean atomic_swap; @@ -820,19 +962,30 @@ vm_object_deallocate( * Test again as "ref_count" could have changed. * "named" shouldn't change. */ - if (original_ref_count > 2 || - (!object->named && original_ref_count > 1)) { + if (original_ref_count == 2 && + object->named) { + /* need to take slow path for m_o_last_unmap() */ + atomic_swap = FALSE; + } else if (original_ref_count == 2 && + object->internal && + object->shadow != VM_OBJECT_NULL) { + /* need to take slow path for vm_object_collapse() */ + atomic_swap = FALSE; + } else if (original_ref_count < 2) { + /* need to take slow path for vm_object_terminate() */ + atomic_swap = FALSE; + } else { + /* try an atomic update with the shared lock */ atomic_swap = OSCompareAndSwap( original_ref_count, original_ref_count - 1, (UInt32 *) &object->ref_count); if (atomic_swap == FALSE) { vm_object_deallocate_shared_swap_failures++; + /* fall back to the slow path... */ } - - } else { - atomic_swap = FALSE; } + vm_object_unlock(object); if (atomic_swap) { @@ -1403,6 +1556,9 @@ vm_object_cache_evict( ep_moved++; } else { +#if CONFIG_PHANTOM_CACHE + vm_phantom_cache_add_ghost(p); +#endif vm_page_free_prepare_queues(p); assert(p->pageq.next == NULL && p->pageq.prev == NULL); @@ -1722,6 +1878,18 @@ vm_object_reap( vm_object_reap_count++; + /* + * Disown this purgeable object to cleanup its owner's purgeable + * ledgers. We need to do this before disconnecting the object + * from its pager, to properly account for compressed pages. + */ + if (object->internal && + object->purgable != VM_PURGABLE_DENY) { + vm_purgeable_accounting(object, + object->purgable, + TRUE); /* disown */ + } + pager = object->pager; object->pager = MEMORY_OBJECT_NULL; @@ -1738,21 +1906,68 @@ vm_object_reap( /* * remove from purgeable queue if it's on */ - if (object->internal && (object->objq.next || object->objq.prev)) { - purgeable_q_t queue = vm_purgeable_object_remove(object); - assert(queue); + if (object->internal) { + task_t owner; + + owner = object->vo_purgeable_owner; + + if (object->purgable == VM_PURGABLE_DENY) { + /* not purgeable: nothing to do */ + } else if (object->purgable == VM_PURGABLE_VOLATILE) { + purgeable_q_t queue; + + assert(object->vo_purgeable_owner == NULL); + + queue = vm_purgeable_object_remove(object); + assert(queue); + + if (object->purgeable_when_ripe) { + /* + * Must take page lock for this - + * using it to protect token queue + */ + vm_page_lock_queues(); + vm_purgeable_token_delete_first(queue); + + assert(queue->debug_count_objects>=0); + vm_page_unlock_queues(); + } - if (object->purgeable_when_ripe) { /* - * Must take page lock for this - - * using it to protect token queue + * Update "vm_page_purgeable_count" in bulk and mark + * object as VM_PURGABLE_EMPTY to avoid updating + * "vm_page_purgeable_count" again in vm_page_remove() + * when reaping the pages. */ - vm_page_lock_queues(); - vm_purgeable_token_delete_first(queue); - - assert(queue->debug_count_objects>=0); - vm_page_unlock_queues(); + unsigned int delta; + assert(object->resident_page_count >= + object->wired_page_count); + delta = (object->resident_page_count - + object->wired_page_count); + if (delta != 0) { + assert(vm_page_purgeable_count >= delta); + OSAddAtomic(-delta, + (SInt32 *)&vm_page_purgeable_count); + } + if (object->wired_page_count != 0) { + assert(vm_page_purgeable_wired_count >= + object->wired_page_count); + OSAddAtomic(-object->wired_page_count, + (SInt32 *)&vm_page_purgeable_wired_count); + } + object->purgable = VM_PURGABLE_EMPTY; + } + else if (object->purgable == VM_PURGABLE_NONVOLATILE || + object->purgable == VM_PURGABLE_EMPTY) { + /* remove from nonvolatile queue */ + assert(object->vo_purgeable_owner == TASK_NULL); + vm_purgeable_nonvolatile_dequeue(object); + } else { + panic("object %p in unexpected purgeable state 0x%x\n", + object, object->purgable); } + assert(object->objq.next == NULL); + assert(object->objq.prev == NULL); } /* @@ -1797,6 +2012,13 @@ vm_object_reap( object->shadow = VM_OBJECT_NULL; +#if VM_OBJECT_TRACKING + if (vm_object_tracking_inited) { + btlog_remove_entries_for_element(vm_object_tracking_btlog, + object); + } +#endif /* VM_OBJECT_TRACKING */ + vm_object_lock_destroy(object); /* * Free the space for the object. @@ -1950,7 +2172,7 @@ restart_after_sleep: vm_pageout_steal_laundry(p, TRUE); } - if (p->cleaning || p->laundry) { + if (p->cleaning || p->laundry || p->absent) { /* * page is being acted upon, * so don't mess with it @@ -2561,9 +2783,11 @@ deactivate_pages_in_object( if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy) && (!m->laundry)) { int clear_refmod; + int pmap_options; dwp->dw_mask = 0; + pmap_options = 0; clear_refmod = VM_MEM_REFERENCED; dwp->dw_mask |= DW_clear_reference; @@ -2603,9 +2827,19 @@ deactivate_pages_in_object( object->reusable_page_count++; assert(object->resident_page_count >= object->reusable_page_count); reusable++; + /* + * Tell pmap this page is now + * "reusable" (to update pmap + * stats for all mappings). + */ + pmap_options |= PMAP_OPTIONS_SET_REUSABLE; } } - pmap_clear_refmod_options(m->phys_page, clear_refmod, PMAP_OPTIONS_NOFLUSH, (void *)pfc); + pmap_options |= PMAP_OPTIONS_NOFLUSH; + pmap_clear_refmod_options(m->phys_page, + clear_refmod, + pmap_options, + (void *)pfc); if (!m->throttled && !(reusable_page || all_reusable)) dwp->dw_mask |= DW_move_page; @@ -2777,6 +3011,13 @@ vm_object_deactivate_pages( all_reusable = FALSE; +#if 11 + /* + * For the sake of accurate "reusable" pmap stats, we need + * to tell pmap about each page that is no longer "reusable", + * so we can't do the "all_reusable" optimization. + */ +#else if (reusable_page && object->internal && object->vo_size != 0 && @@ -2785,6 +3026,7 @@ vm_object_deactivate_pages( all_reusable = TRUE; reusable_page = FALSE; } +#endif if ((reusable_page || all_reusable) && object->all_reusable) { /* This means MADV_FREE_REUSABLE has been called twice, which @@ -2841,6 +3083,17 @@ vm_object_reuse_pages( (object)->reusable_page_count--; \ (m)->reusable = FALSE; \ (reused)++; \ + /* \ + * Tell pmap that this page is no longer \ + * "reusable", to update the "reusable" stats \ + * for all the pmaps that have mapped this \ + * page. \ + */ \ + pmap_clear_refmod_options((m)->phys_page, \ + 0, /* refmod */ \ + (PMAP_OPTIONS_CLEAR_REUSABLE \ + | PMAP_OPTIONS_NOFLUSH), \ + NULL); \ } \ MACRO_END @@ -2850,6 +3103,8 @@ vm_object_reuse_pages( vm_object_lock_assert_exclusive(object); if (object->all_reusable) { + panic("object %p all_reusable: can't update pmap stats\n", + object); assert(object->reusable_page_count == 0); object->all_reusable = FALSE; if (end_offset - start_offset == object->vo_size || @@ -3191,7 +3446,8 @@ vm_object_copy_slowly( fault_info.interruptible = interruptible; fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; - fault_info.user_tag = 0; + fault_info.user_tag = 0; + fault_info.pmap_options = 0; fault_info.lo_offset = src_offset; fault_info.hi_offset = src_offset + size; fault_info.no_cache = FALSE; @@ -4227,7 +4483,9 @@ Retry: * Lookup failed twice, and we have something * to insert; set the object. */ + vm_object_lock(new_object); vm_object_hash_insert(new_entry, new_object); + vm_object_unlock(new_object); entry = new_entry; new_entry = VM_OBJECT_HASH_ENTRY_NULL; new_object = VM_OBJECT_NULL; @@ -4500,9 +4758,11 @@ vm_object_pager_create( entry = vm_object_hash_entry_alloc(pager); + vm_object_lock(object); lck = vm_object_hash_lock_spin(pager); vm_object_hash_insert(entry, object); vm_object_hash_unlock(lck); + vm_object_unlock(object); /* * A reference was returned by @@ -4533,6 +4793,7 @@ vm_object_compressor_pager_create( memory_object_t pager; vm_object_hash_entry_t entry; lck_mtx_t *lck; + vm_object_t pager_object = VM_OBJECT_NULL; assert(object != kernel_object); @@ -4599,9 +4860,11 @@ vm_object_compressor_pager_create( entry = vm_object_hash_entry_alloc(pager); + vm_object_lock(object); lck = vm_object_hash_lock_spin(pager); vm_object_hash_insert(entry, object); vm_object_hash_unlock(lck); + vm_object_unlock(object); /* * A reference was returned by @@ -4609,8 +4872,11 @@ vm_object_compressor_pager_create( * copied by vm_object_enter(). */ - if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object) - panic("vm_object_compressor_pager_create: mismatch"); + pager_object = vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE); + + if (pager_object != object) { + panic("vm_object_compressor_pager_create: mismatch (pager: %p, pager_object: %p, orig_object: %p, orig_object size: 0x%llx)\n", pager, pager_object, object, (uint64_t) object->vo_size); + } /* * Drop the reference we were passed. @@ -4668,6 +4934,83 @@ static int vm_external_collapsed; unsigned long vm_object_collapse_encrypted = 0; +void vm_object_do_collapse_compressor(vm_object_t object, + vm_object_t backing_object); +void +vm_object_do_collapse_compressor( + vm_object_t object, + vm_object_t backing_object) +{ + vm_object_offset_t new_offset, backing_offset; + vm_object_size_t size; + + vm_counters.do_collapse_compressor++; + + vm_object_lock_assert_exclusive(object); + vm_object_lock_assert_exclusive(backing_object); + + size = object->vo_size; + + /* + * Move all compressed pages from backing_object + * to the parent. + */ + + for (backing_offset = object->vo_shadow_offset; + backing_offset < object->vo_shadow_offset + object->vo_size; + backing_offset += PAGE_SIZE) { + memory_object_offset_t backing_pager_offset; + + /* find the next compressed page at or after this offset */ + backing_pager_offset = (backing_offset + + backing_object->paging_offset); + backing_pager_offset = vm_compressor_pager_next_compressed( + backing_object->pager, + backing_pager_offset); + if (backing_pager_offset == (memory_object_offset_t) -1) { + /* no more compressed pages */ + break; + } + backing_offset = (backing_pager_offset - + backing_object->paging_offset); + + new_offset = backing_offset - object->vo_shadow_offset; + + if (new_offset >= object->vo_size) { + /* we're out of the scope of "object": done */ + break; + } + + if ((vm_page_lookup(object, new_offset) != VM_PAGE_NULL) || + (vm_compressor_pager_state_get(object->pager, + (new_offset + + object->paging_offset)) == + VM_EXTERNAL_STATE_EXISTS)) { + /* + * This page already exists in object, resident or + * compressed. + * We don't need this compressed page in backing_object + * and it will be reclaimed when we release + * backing_object. + */ + continue; + } + + /* + * backing_object has this page in the VM compressor and + * we need to transfer it to object. + */ + vm_counters.do_collapse_compressor_pages++; + vm_compressor_pager_transfer( + /* destination: */ + object->pager, + (new_offset + object->paging_offset), + /* source: */ + backing_object->pager, + (backing_offset + backing_object->paging_offset)); + } +} + /* * Routine: vm_object_do_collapse * Purpose: @@ -4691,6 +5034,9 @@ vm_object_do_collapse( vm_object_lock_assert_exclusive(object); vm_object_lock_assert_exclusive(backing_object); + assert(object->purgable == VM_PURGABLE_DENY); + assert(backing_object->purgable == VM_PURGABLE_DENY); + backing_offset = object->vo_shadow_offset; size = object->vo_size; @@ -4735,12 +5081,26 @@ vm_object_do_collapse( pp = vm_page_lookup(object, new_offset); if (pp == VM_PAGE_NULL) { - /* - * Parent now has no page. - * Move the backing object's page up. - */ + if (VM_COMPRESSOR_PAGER_STATE_GET(object, + new_offset) + == VM_EXTERNAL_STATE_EXISTS) { + /* + * Parent object has this page + * in the VM compressor. + * Throw away the backing + * object's page. + */ + VM_PAGE_FREE(p); + } else { + /* + * Parent now has no page. + * Move the backing object's page + * up. + */ + vm_page_rename(p, object, new_offset, + TRUE); + } - vm_page_rename(p, object, new_offset, TRUE); #if MACH_PAGEMAP } else if (pp->absent) { @@ -4771,25 +5131,26 @@ vm_object_do_collapse( } } } - -#if !MACH_PAGEMAP - assert((!object->pager_created && (object->pager == MEMORY_OBJECT_NULL)) - || (!backing_object->pager_created - && (backing_object->pager == MEMORY_OBJECT_NULL))); -#else - assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL); -#endif /* !MACH_PAGEMAP */ - if (backing_object->pager != MEMORY_OBJECT_NULL) { + if (vm_object_collapse_compressor_allowed && + object->pager != MEMORY_OBJECT_NULL && + backing_object->pager != MEMORY_OBJECT_NULL) { + + /* move compressed pages from backing_object to object */ + vm_object_do_collapse_compressor(object, backing_object); + + } else if (backing_object->pager != MEMORY_OBJECT_NULL) { vm_object_hash_entry_t entry; -#if 00 - if (COMPRESSED_PAGER_IS_ACTIVE) { - panic("vm_object_do_collapse(%p,%p): " - "backing_object has a compressor pager", - object, backing_object); - } -#endif +#if !MACH_PAGEMAP + assert((!object->pager_created && + (object->pager == MEMORY_OBJECT_NULL)) || + (!backing_object->pager_created && + (backing_object->pager == MEMORY_OBJECT_NULL))); +#else + assert(!object->pager_created && + object->pager == MEMORY_OBJECT_NULL); +#endif /* !MACH_PAGEMAP */ /* * Move the pager from backing_object to object. @@ -4801,6 +5162,8 @@ vm_object_do_collapse( assert(!object->paging_in_progress); assert(!object->activity_in_progress); + assert(!object->pager_created); + assert(object->pager == NULL); object->pager = backing_object->pager; if (backing_object->hashed) { @@ -4824,6 +5187,12 @@ vm_object_do_collapse( memory_object_control_collapse(object->pager_control, object); } + /* the backing_object has lost its pager: reset all fields */ + backing_object->pager_created = FALSE; + backing_object->pager_control = NULL; + backing_object->pager_ready = FALSE; + backing_object->paging_offset = 0; + backing_object->pager = NULL; } #if MACH_PAGEMAP @@ -4863,6 +5232,9 @@ vm_object_do_collapse( object->shadow = backing_object->shadow; if (object->shadow) { object->vo_shadow_offset += backing_object->vo_shadow_offset; + /* "backing_object" gave its shadow to "object" */ + backing_object->shadow = VM_OBJECT_NULL; + backing_object->vo_shadow_offset = 0; } else { /* no shadow, therefore no shadow offset... */ object->vo_shadow_offset = 0; @@ -4877,11 +5249,25 @@ vm_object_do_collapse( * pager left, and no object references within it, * all that is necessary is to dispose of it. */ + object_collapses++; - assert((backing_object->ref_count == 1) && - (backing_object->resident_page_count == 0) && - (backing_object->paging_in_progress == 0) && - (backing_object->activity_in_progress == 0)); + assert(backing_object->ref_count == 1); + assert(backing_object->resident_page_count == 0); + assert(backing_object->paging_in_progress == 0); + assert(backing_object->activity_in_progress == 0); + assert(backing_object->shadow == VM_OBJECT_NULL); + assert(backing_object->vo_shadow_offset == 0); + + if (backing_object->pager != MEMORY_OBJECT_NULL) { + /* ... unless it has a pager; need to terminate pager too */ + vm_counters.do_collapse_terminate++; + if (vm_object_terminate(backing_object) != KERN_SUCCESS) { + vm_counters.do_collapse_terminate_failure++; + } + return; + } + + assert(backing_object->pager == NULL); backing_object->alive = FALSE; vm_object_unlock(backing_object); @@ -4889,11 +5275,17 @@ vm_object_do_collapse( XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n", backing_object, 0,0,0,0); +#if VM_OBJECT_TRACKING + if (vm_object_tracking_inited) { + btlog_remove_entries_for_element(vm_object_tracking_btlog, + backing_object); + } +#endif /* VM_OBJECT_TRACKING */ + vm_object_lock_destroy(backing_object); zfree(vm_object_zone, backing_object); - object_collapses++; } static void @@ -5140,6 +5532,29 @@ retry: object_lock_type = backing_object_lock_type; continue; } + + /* + * Purgeable objects are not supposed to engage in + * copy-on-write activities, so should not have + * any shadow objects or be a shadow object to another + * object. + * Collapsing a purgeable object would require some + * updates to the purgeable compressed ledgers. + */ + if (object->purgable != VM_PURGABLE_DENY || + backing_object->purgable != VM_PURGABLE_DENY) { + panic("vm_object_collapse() attempting to collapse " + "purgeable object: %p(%d) %p(%d)\n", + object, object->purgable, + backing_object, backing_object->purgable); + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + object_lock_type = backing_object_lock_type; + continue; + } /* * The backing object can't be a copy-object: @@ -5183,7 +5598,8 @@ retry: * to the pager, we can collapse them. */ if (backing_object->ref_count == 1 && - (!object->pager_created + (vm_object_collapse_compressor_allowed || + !object->pager_created #if !MACH_PAGEMAP || (!backing_object->pager_created) #endif /*!MACH_PAGEMAP */ @@ -5479,9 +5895,12 @@ retry: continue; } + /* NOT REACHED */ + /* if (object != original_object) { vm_object_unlock(object); } + */ } /* @@ -6176,7 +6595,7 @@ vm_object_lock_request( * purgeable with no delayed copies pending. */ void -vm_object_purge(vm_object_t object) +vm_object_purge(vm_object_t object, int flags) { vm_object_lock_assert_exclusive(object); @@ -6186,7 +6605,22 @@ vm_object_purge(vm_object_t object) assert(object->copy == VM_OBJECT_NULL); assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); - if(object->purgable == VM_PURGABLE_VOLATILE) { + /* + * We need to set the object's state to VM_PURGABLE_EMPTY *before* + * reaping its pages. We update vm_page_purgeable_count in bulk + * and we don't want vm_page_remove() to update it again for each + * page we reap later. + * + * For the purgeable ledgers, pages from VOLATILE and EMPTY objects + * are all accounted for in the "volatile" ledgers, so this does not + * make any difference. + * If we transitioned directly from NONVOLATILE to EMPTY, + * vm_page_purgeable_count must have been updated when the object + * was dequeued from its volatile queue and the purgeable ledgers + * must have also been updated accordingly at that time (in + * vm_object_purgable_control()). + */ + if (object->purgable == VM_PURGABLE_VOLATILE) { unsigned int delta; assert(object->resident_page_count >= object->wired_page_count); @@ -6204,10 +6638,63 @@ vm_object_purge(vm_object_t object) OSAddAtomic(-object->wired_page_count, (SInt32 *)&vm_page_purgeable_wired_count); } + object->purgable = VM_PURGABLE_EMPTY; } - object->purgable = VM_PURGABLE_EMPTY; + assert(object->purgable == VM_PURGABLE_EMPTY); vm_object_reap_pages(object, REAP_PURGEABLE); + + if (object->pager != NULL && + COMPRESSED_PAGER_IS_ACTIVE) { + unsigned int pgcount; + + if (object->activity_in_progress == 0 && + object->paging_in_progress == 0) { + /* + * Also reap any memory coming from this object + * in the VM compressor. + * + * There are no operations in progress on the VM object + * and no operation can start while we're holding the + * VM object lock, so it's safe to reap the compressed + * pages and update the page counts. + */ + pgcount = vm_compressor_pager_get_count(object->pager); + if (pgcount) { + pgcount = vm_compressor_pager_reap_pages(object->pager, flags); + vm_compressor_pager_count(object->pager, + -pgcount, + FALSE, /* shared */ + object); + vm_purgeable_compressed_update(object, + -pgcount); + } + if ( !(flags & C_DONT_BLOCK)) { + assert(vm_compressor_pager_get_count(object->pager) + == 0); + } + } else { + /* + * There's some kind of paging activity in progress + * for this object, which could result in a page + * being compressed or decompressed, possibly while + * the VM object is not locked, so it could race + * with us. + * + * We can't really synchronize this without possibly + * causing a deadlock when the compressor needs to + * allocate or free memory while compressing or + * decompressing a page from a purgeable object + * mapped in the kernel_map... + * + * So let's not attempt to purge the compressor + * pager if there's any kind of operation in + * progress on the VM object. + */ + } + } + + vm_object_lock_assert_exclusive(object); } @@ -6302,6 +6789,8 @@ vm_object_purgable_control( return KERN_INVALID_ARGUMENT; } + vm_object_lock_assert_exclusive(object); + /* * Get current state of the purgeable object. */ @@ -6362,8 +6851,16 @@ vm_object_purgable_control( vm_page_lock_queues(); - assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ - purgeable_q_t queue = vm_purgeable_object_remove(object); + /* object should be on a queue */ + assert(object->objq.next != NULL && + object->objq.prev != NULL); + purgeable_q_t queue; + + /* + * Move object from its volatile queue to the + * non-volatile queue... + */ + queue = vm_purgeable_object_remove(object); assert(queue); if (object->purgeable_when_ripe) { @@ -6373,6 +6870,16 @@ vm_object_purgable_control( vm_page_unlock_queues(); } + if (old_state == VM_PURGABLE_VOLATILE || + old_state == VM_PURGABLE_EMPTY) { + /* + * Transfer the object's pages from the volatile to + * non-volatile ledgers. + */ + vm_purgeable_accounting(object, VM_PURGABLE_VOLATILE, + FALSE); + } + break; case VM_PURGABLE_VOLATILE: @@ -6395,7 +6902,8 @@ vm_object_purgable_control( } if (old_state == VM_PURGABLE_EMPTY && - object->resident_page_count == 0) + object->resident_page_count == 0 && + object->pager == NULL) break; purgeable_q_t queue; @@ -6451,8 +6959,9 @@ vm_object_purgable_control( object->purgable = new_state; - /* object should not be on a queue */ - assert(object->objq.next == NULL && object->objq.prev == NULL); + /* object should be on "non-volatile" queue */ + assert(object->objq.next != NULL); + assert(object->objq.prev != NULL); } else if (old_state == VM_PURGABLE_VOLATILE) { purgeable_q_t old_queue; @@ -6498,6 +7007,10 @@ vm_object_purgable_control( } }; vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT ); + if (old_state == VM_PURGABLE_NONVOLATILE) { + vm_purgeable_accounting(object, VM_PURGABLE_NONVOLATILE, + FALSE); + } assert(queue->debug_count_objects>=0); @@ -6523,30 +7036,56 @@ vm_object_purgable_control( } } - if (old_state != new_state) { - assert(old_state == VM_PURGABLE_NONVOLATILE || - old_state == VM_PURGABLE_VOLATILE); - if (old_state == VM_PURGABLE_VOLATILE) { - purgeable_q_t old_queue; + if (old_state == new_state) { + /* nothing changes */ + break; + } - /* object should be on a queue */ - assert(object->objq.next != NULL && - object->objq.prev != NULL); - old_queue = vm_purgeable_object_remove(object); - assert(old_queue); - if (object->purgeable_when_ripe) { - vm_page_lock_queues(); - vm_purgeable_token_delete_first(old_queue); - vm_page_unlock_queues(); - } + assert(old_state == VM_PURGABLE_NONVOLATILE || + old_state == VM_PURGABLE_VOLATILE); + if (old_state == VM_PURGABLE_VOLATILE) { + purgeable_q_t old_queue; + + /* object should be on a queue */ + assert(object->objq.next != NULL && + object->objq.prev != NULL); + + old_queue = vm_purgeable_object_remove(object); + assert(old_queue); + if (object->purgeable_when_ripe) { + vm_page_lock_queues(); + vm_purgeable_token_delete_first(old_queue); + vm_page_unlock_queues(); } - (void) vm_object_purge(object); } - break; + if (old_state == VM_PURGABLE_NONVOLATILE) { + /* + * This object's pages were previously accounted as + * "non-volatile" and now need to be accounted as + * "volatile". + */ + vm_purgeable_accounting(object, VM_PURGABLE_NONVOLATILE, + FALSE); + /* + * Set to VM_PURGABLE_EMPTY because the pages are no + * longer accounted in the "non-volatile" ledger + * and are also not accounted for in + * "vm_page_purgeable_count". + */ + object->purgable = VM_PURGABLE_EMPTY; + } + + (void) vm_object_purge(object, 0); + assert(object->purgable == VM_PURGABLE_EMPTY); + + break; } + *state = old_state; + vm_object_lock_assert_exclusive(object); + return KERN_SUCCESS; } @@ -7121,11 +7660,8 @@ done: extern int speculative_reads_disabled; extern int ignore_is_ssd; -unsigned int preheat_pages_max = MAX_UPL_TRANSFER; -unsigned int preheat_pages_min = 8; - -uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1]; -uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1]; +unsigned int preheat_max_bytes = MAX_UPL_TRANSFER_BYTES; +unsigned int preheat_min_bytes = (1024 * 32); __private_extern__ void @@ -7148,9 +7684,8 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, uint32_t throttle_limit; int sequential_run; int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; - unsigned int max_ph_size; - unsigned int min_ph_size; - unsigned int min_ph_size_in_bytes; + vm_size_t max_ph_size; + vm_size_t min_ph_size; assert( !(*length & PAGE_MASK)); assert( !(*start & PAGE_MASK_64)); @@ -7186,29 +7721,27 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if (!ignore_is_ssd) vnode_pager_get_isSSD(object->pager, &isSSD); - min_ph_size = preheat_pages_min; - max_ph_size = preheat_pages_max; + min_ph_size = round_page(preheat_min_bytes); + max_ph_size = round_page(preheat_max_bytes); if (isSSD) { min_ph_size /= 2; max_ph_size /= 8; } - if (min_ph_size < 1) - min_ph_size = 1; + if (min_ph_size < PAGE_SIZE) + min_ph_size = PAGE_SIZE; - if (max_ph_size < 1) - max_ph_size = 1; - else if (max_ph_size > MAX_UPL_TRANSFER) - max_ph_size = MAX_UPL_TRANSFER; + if (max_ph_size < PAGE_SIZE) + max_ph_size = PAGE_SIZE; + else if (max_ph_size > MAX_UPL_TRANSFER_BYTES) + max_ph_size = MAX_UPL_TRANSFER_BYTES; - if (max_length > (max_ph_size * PAGE_SIZE)) - max_length = max_ph_size * PAGE_SIZE; + if (max_length > max_ph_size) + max_length = max_ph_size; if (max_length <= PAGE_SIZE) goto out; - min_ph_size_in_bytes = min_ph_size * PAGE_SIZE; - if (object->internal) object_size = object->vo_size; else @@ -7260,11 +7793,11 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, *io_streaming = 1; } else { - if (object->pages_created < (20 * min_ph_size)) { + if (object->pages_created < (20 * (min_ph_size >> PAGE_SHIFT))) { /* * prime the pump */ - pre_heat_size = min_ph_size_in_bytes; + pre_heat_size = min_ph_size; } else { /* * Linear growth in PH size: The maximum size is max_length... @@ -7273,9 +7806,9 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, * it up to the nearest PAGE_SIZE boundary */ pre_heat_size = (max_length * object->pages_used) / object->pages_created; - - if (pre_heat_size < min_ph_size_in_bytes) - pre_heat_size = min_ph_size_in_bytes; + + if (pre_heat_size < min_ph_size) + pre_heat_size = min_ph_size; else pre_heat_size = round_page(pre_heat_size); } @@ -7314,7 +7847,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if (pre_heat_size > max_length) pre_heat_size = max_length; - if (behavior == VM_BEHAVIOR_DEFAULT && (pre_heat_size > min_ph_size_in_bytes)) { + if (behavior == VM_BEHAVIOR_DEFAULT && (pre_heat_size > min_ph_size)) { unsigned int consider_free = vm_page_free_count + vm_page_cleaned_count; @@ -7324,8 +7857,8 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, pre_heat_size = trunc_page(pre_heat_size / 4); } - if (pre_heat_size < min_ph_size_in_bytes) - pre_heat_size = min_ph_size_in_bytes; + if (pre_heat_size < min_ph_size) + pre_heat_size = min_ph_size; } if (look_ahead == TRUE) { if (look_behind == TRUE) { @@ -7375,8 +7908,6 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, assert( !(target_start & PAGE_MASK_64)); assert( !(pre_heat_size & PAGE_MASK)); - pre_heat_scaling[pre_heat_size / PAGE_SIZE]++; - if (pre_heat_size <= PAGE_SIZE) goto out; @@ -7444,8 +7975,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, break; } #endif /* MACH_PAGEMAP */ - if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) - == VM_EXTERNAL_STATE_ABSENT) { + if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) == VM_EXTERNAL_STATE_ABSENT) { break; } if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { @@ -7461,8 +7991,6 @@ out: if (*length > max_length) *length = max_length; - pre_heat_cluster[*length / PAGE_SIZE]++; - vm_object_unlock(object); DTRACE_VM1(clustersize, vm_size_t, *length); @@ -7881,6 +8409,7 @@ kern_return_t vm_object_pack( /* object should be on a queue */ assert(src_object->objq.next != NULL && src_object->objq.prev != NULL); + queue = vm_purgeable_object_remove(src_object); assert(queue); if (src_object->purgeable_when_ripe) { @@ -7888,7 +8417,15 @@ kern_return_t vm_object_pack( vm_purgeable_token_delete_first(queue); vm_page_unlock_queues(); } - vm_object_purge(src_object); + + vm_object_purge(src_object, 0); + assert(src_object->purgable == VM_PURGABLE_EMPTY); + + /* + * This object was "volatile" so its pages must have + * already been accounted as "volatile": no change + * in accounting now that it's "empty". + */ } goto done; } @@ -7980,6 +8517,7 @@ vm_object_pageout( { vm_page_t p, next; struct vm_pageout_queue *iq; + boolean_t set_pageout_bit = FALSE; iq = &vm_pageout_queue_internal; @@ -7995,6 +8533,13 @@ vm_object_pageout( */ vm_object_pager_create(object); } + + set_pageout_bit = TRUE; + } + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + + set_pageout_bit = FALSE; } ReScan: @@ -8061,7 +8606,7 @@ ReScan: } VM_PAGE_QUEUES_REMOVE(p); - vm_pageout_cluster(p, TRUE); + vm_pageout_cluster(p, set_pageout_bit); } vm_page_unlock_queues(); } @@ -8103,3 +8648,229 @@ vm_object_pagein( return kr; } #endif /* CONFIG_FREEZE */ + + +#if CONFIG_IOSCHED +void +vm_page_request_reprioritize(vm_object_t o, uint64_t blkno, uint32_t len, int prio) +{ + io_reprioritize_req_t req; + struct vnode *devvp = NULL; + + if(vnode_pager_get_object_devvp(o->pager, (uintptr_t *)&devvp) != KERN_SUCCESS) + return; + + /* Create the request for I/O reprioritization */ + req = (io_reprioritize_req_t)zalloc(io_reprioritize_req_zone); + assert(req != NULL); + req->blkno = blkno; + req->len = len; + req->priority = prio; + req->devvp = devvp; + + /* Insert request into the reprioritization list */ + IO_REPRIORITIZE_LIST_LOCK(); + queue_enter(&io_reprioritize_list, req, io_reprioritize_req_t, io_reprioritize_list); + IO_REPRIORITIZE_LIST_UNLOCK(); + + /* Wakeup reprioritize thread */ + IO_REPRIO_THREAD_WAKEUP(); + + return; +} + +void +vm_decmp_upl_reprioritize(upl_t upl, int prio) +{ + int offset; + vm_object_t object; + io_reprioritize_req_t req; + struct vnode *devvp = NULL; + uint64_t blkno; + uint32_t len; + upl_t io_upl; + uint64_t *io_upl_reprio_info; + int io_upl_size; + + if ((upl->flags & UPL_TRACKED_BY_OBJECT) == 0 || (upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) + return; + + /* + * We dont want to perform any allocations with the upl lock held since that might + * result in a deadlock. If the system is low on memory, the pageout thread would + * try to pageout stuff and might wait on this lock. If we are waiting for the memory to + * be freed up by the pageout thread, it would be a deadlock. + */ + + + /* First step is just to get the size of the upl to find out how big the reprio info is */ + upl_lock(upl); + if (upl->decmp_io_upl == NULL) { + /* The real I/O upl was destroyed by the time we came in here. Nothing to do. */ + upl_unlock(upl); + return; + } + + io_upl = upl->decmp_io_upl; + assert((io_upl->flags & UPL_DECMP_REAL_IO) != 0); + io_upl_size = io_upl->size; + upl_unlock(upl); + + /* Now perform the allocation */ + io_upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * (io_upl_size / PAGE_SIZE)); + if (io_upl_reprio_info == NULL) + return; + + /* Now again take the lock, recheck the state and grab out the required info */ + upl_lock(upl); + if (upl->decmp_io_upl == NULL || upl->decmp_io_upl != io_upl) { + /* The real I/O upl was destroyed by the time we came in here. Nothing to do. */ + upl_unlock(upl); + goto out; + } + memcpy(io_upl_reprio_info, io_upl->upl_reprio_info, sizeof(uint64_t) * (io_upl_size / PAGE_SIZE)); + + /* Get the VM object for this UPL */ + if (io_upl->flags & UPL_SHADOWED) { + object = io_upl->map_object->shadow; + } else { + object = io_upl->map_object; + } + + /* Get the dev vnode ptr for this object */ + if(!object || !object->pager || + vnode_pager_get_object_devvp(object->pager, (uintptr_t *)&devvp) != KERN_SUCCESS) { + upl_unlock(upl); + goto out; + } + + upl_unlock(upl); + + /* Now we have all the information needed to do the expedite */ + + offset = 0; + while (offset < io_upl_size) { + blkno = io_upl_reprio_info[(offset / PAGE_SIZE)] & UPL_REPRIO_INFO_MASK; + len = (io_upl_reprio_info[(offset / PAGE_SIZE)] >> UPL_REPRIO_INFO_SHIFT) & UPL_REPRIO_INFO_MASK; + + /* + * This implementation may cause some spurious expedites due to the + * fact that we dont cleanup the blkno & len from the upl_reprio_info + * even after the I/O is complete. + */ + + if (blkno != 0 && len != 0) { + /* Create the request for I/O reprioritization */ + req = (io_reprioritize_req_t)zalloc(io_reprioritize_req_zone); + assert(req != NULL); + req->blkno = blkno; + req->len = len; + req->priority = prio; + req->devvp = devvp; + + /* Insert request into the reprioritization list */ + IO_REPRIORITIZE_LIST_LOCK(); + queue_enter(&io_reprioritize_list, req, io_reprioritize_req_t, io_reprioritize_list); + IO_REPRIORITIZE_LIST_UNLOCK(); + + offset += len; + } else { + offset += PAGE_SIZE; + } + } + + /* Wakeup reprioritize thread */ + IO_REPRIO_THREAD_WAKEUP(); + +out: + kfree(io_upl_reprio_info, sizeof(uint64_t) * (io_upl_size / PAGE_SIZE)); + return; +} + +void +vm_page_handle_prio_inversion(vm_object_t o, vm_page_t m) +{ + upl_t upl; + upl_page_info_t *pl; + unsigned int i, num_pages; + int cur_tier; + + cur_tier = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO); + + /* + Scan through all UPLs associated with the object to find the + UPL containing the contended page. + */ + queue_iterate(&o->uplq, upl, upl_t, uplq) { + if (((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) || upl->upl_priority <= cur_tier) + continue; + pl = UPL_GET_INTERNAL_PAGE_LIST(upl); + num_pages = (upl->size / PAGE_SIZE); + + /* + For each page in the UPL page list, see if it matches the contended + page and was issued as a low prio I/O. + */ + for(i=0; i < num_pages; i++) { + if(UPL_PAGE_PRESENT(pl,i) && m->phys_page == pl[i].phys_addr) { + if ((upl->flags & UPL_DECMP_REQ) && upl->decmp_io_upl) { + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_EXPEDITE)) | DBG_FUNC_NONE, upl->upl_creator, m, upl, upl->upl_priority, 0); + vm_decmp_upl_reprioritize(upl, cur_tier); + break; + } + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_EXPEDITE)) | DBG_FUNC_NONE, upl->upl_creator, m, upl->upl_reprio_info[i], upl->upl_priority, 0); + if (UPL_REPRIO_INFO_BLKNO(upl, i) != 0 && UPL_REPRIO_INFO_LEN(upl, i) != 0) + vm_page_request_reprioritize(o, UPL_REPRIO_INFO_BLKNO(upl, i), UPL_REPRIO_INFO_LEN(upl, i), cur_tier); + break; + } + } + /* Check if we found any hits */ + if (i != num_pages) + break; + } + + return; +} + +wait_result_t +vm_page_sleep(vm_object_t o, vm_page_t m, int interruptible) +{ + wait_result_t ret; + + KERNEL_DEBUG((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_SLEEP)) | DBG_FUNC_START, o, m, 0, 0, 0); + + if (o->io_tracking && ((m->busy == TRUE) || (m->cleaning == TRUE) || VM_PAGE_WIRED(m))) { + /* + Indicates page is busy due to an I/O. Issue a reprioritize request if necessary. + */ + vm_page_handle_prio_inversion(o,m); + } + m->wanted = TRUE; + ret = thread_sleep_vm_object(o, m, interruptible); + KERNEL_DEBUG((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_SLEEP)) | DBG_FUNC_END, o, m, 0, 0, 0); + return ret; +} + +static void +io_reprioritize_thread(void *param __unused, wait_result_t wr __unused) +{ + io_reprioritize_req_t req = NULL; + + while(1) { + + IO_REPRIORITIZE_LIST_LOCK(); + if (queue_empty(&io_reprioritize_list)) { + IO_REPRIORITIZE_LIST_UNLOCK(); + break; + } + + queue_remove_first(&io_reprioritize_list, req, io_reprioritize_req_t, io_reprioritize_list); + IO_REPRIORITIZE_LIST_UNLOCK(); + + vnode_pager_issue_reprioritize_io(req->devvp, req->blkno, req->len, req->priority); + zfree(io_reprioritize_req_zone, req); + } + + IO_REPRIO_THREAD_CONTINUATION(); +} +#endif diff --git a/osfmk/vm/vm_object.h b/osfmk/vm/vm_object.h index 9c4fe0e32..9462329d7 100644 --- a/osfmk/vm/vm_object.h +++ b/osfmk/vm/vm_object.h @@ -79,7 +79,6 @@ #include #include #include -#include #include #include #include @@ -91,6 +90,18 @@ #include +#if VM_OBJECT_TRACKING +#include +#include +extern void vm_object_tracking_init(void); +extern boolean_t vm_object_tracking_inited; +extern btlog_t *vm_object_tracking_btlog; +#define VM_OBJECT_TRACKING_BTDEPTH 7 +#define VM_OBJECT_TRACKING_OP_CREATED 1 +#define VM_OBJECT_TRACKING_OP_MODIFIED 2 +#define VM_OBJECT_TRACKING_OP_TRUESHARE 3 +#endif /* VM_OBJECT_TRACKING */ + struct vm_page; struct vm_shared_region_slide_info; @@ -116,6 +127,7 @@ struct vm_object_fault_info { /* boolean_t */ mark_zf_absent:1, /* boolean_t */ batch_pmap_op:1, __vm_object_fault_info_unused_bits:26; + int pmap_options; }; @@ -175,7 +187,27 @@ struct vm_object { memory_object_copy_strategy_t copy_strategy; /* How to handle data copy */ - short paging_in_progress; +#if __LP64__ + /* + * Some user processes (mostly VirtualMachine software) take a large + * number of UPLs (via IOMemoryDescriptors) to wire pages in large + * VM objects and overflow the 16-bit "activity_in_progress" counter. + * Since we never enforced any limit there, let's give them 32 bits + * for backwards compatibility's sake. + */ + unsigned int paging_in_progress:16, + __object1_unused_bits:16; + unsigned int activity_in_progress; +#else /* __LP64__ */ + /* + * On 32-bit platforms, enlarging "activity_in_progress" would increase + * the size of "struct vm_object". Since we don't know of any actual + * overflow of these counters on these platforms, let's keep the + * counters as 16-bit integers. + */ + unsigned short paging_in_progress; + unsigned short activity_in_progress; +#endif /* __LP64__ */ /* The memory object ports are * being used (e.g., for pagein * or pageout) -- don't change @@ -183,7 +215,6 @@ struct vm_object { * don't collapse, destroy or * terminate) */ - short activity_in_progress; unsigned int /* boolean_t array */ all_wanted:11, /* Bit array of "want to be @@ -325,6 +356,7 @@ struct vm_object { hashed:1, /* object/pager entered in hash */ transposed:1, /* object was transposed with another */ mapping_in_progress:1, /* pager being mapped/unmapped */ + phantom_isssd:1, volatile_empty:1, volatile_fault:1, all_reusable:1, @@ -333,13 +365,16 @@ struct vm_object { object_slid:1, purgeable_queue_type:2, purgeable_queue_group:3, - __object2_unused_bits:9; /* for expansion */ + io_tracking:1, + __object2_unused_bits:7; /* for expansion */ uint32_t scan_collisions; - -#if UPL_DEBUG +#if CONFIG_PHANTOM_CACHE + uint32_t phantom_object_id; +#endif +#if CONFIG_IOSCHED || UPL_DEBUG queue_head_t uplq; /* List of outstanding upls */ -#endif /* UPL_DEBUG */ +#endif #ifdef VM_PIP_DEBUG /* @@ -354,6 +389,12 @@ struct vm_object { #endif /* VM_PIP_DEBUG */ queue_chain_t objq; /* object queue - currently used for purgable queues */ + +#if DEBUG + void *purgeable_owner_bt[16]; + task_t vo_purgeable_volatilizer; /* who made it volatile? */ + void *purgeable_volatilizer_bt[16]; +#endif /* DEBUG */ }; #define VM_OBJECT_PURGEABLE_FAULT_ERROR(object) \ @@ -558,7 +599,8 @@ __private_extern__ void vm_object_reuse_pages( boolean_t allow_partial_reuse); __private_extern__ void vm_object_purge( - vm_object_t object); + vm_object_t object, + int flags); __private_extern__ kern_return_t vm_object_purgable_control( vm_object_t object, @@ -764,6 +806,19 @@ vm_object_pagein( vm_object_t object); #endif /* CONFIG_FREEZE */ +#if CONFIG_IOSCHED +struct io_reprioritize_req { + uint64_t blkno; + uint32_t len; + int priority; + struct vnode *devvp; + queue_chain_t io_reprioritize_list; +}; +typedef struct io_reprioritize_req *io_reprioritize_req_t; + +extern void vm_io_reprioritize_init(void); +#endif + /* * Event waiting handling */ @@ -789,7 +844,7 @@ vm_object_pagein( thread_block(THREAD_CONTINUE_NULL)) \ #define thread_sleep_vm_object(object, event, interruptible) \ - lck_rw_sleep(&(object)->Lock, LCK_SLEEP_DEFAULT, (event_t)(event), (interruptible)) + lck_rw_sleep(&(object)->Lock, LCK_SLEEP_PROMOTED_PRI, (event_t)(event), (interruptible)) #define vm_object_sleep(object, event, interruptible) \ (((object)->all_wanted |= 1 << (event)), \ @@ -832,15 +887,19 @@ vm_object_pagein( #define vm_object_activity_begin(object) \ MACRO_BEGIN \ vm_object_lock_assert_exclusive((object)); \ - assert((object)->paging_in_progress >= 0); \ VM_PIP_DEBUG_BEGIN((object)); \ (object)->activity_in_progress++; \ + if ((object)->activity_in_progress == 0) { \ + panic("vm_object_activity_begin(%p): overflow\n", (object));\ + } \ MACRO_END #define vm_object_activity_end(object) \ MACRO_BEGIN \ vm_object_lock_assert_exclusive((object)); \ - assert((object)->activity_in_progress > 0); \ + if ((object)->activity_in_progress == 0) { \ + panic("vm_object_activity_end(%p): underflow\n", (object));\ + } \ (object)->activity_in_progress--; \ if ((object)->paging_in_progress == 0 && \ (object)->activity_in_progress == 0) \ @@ -851,15 +910,19 @@ vm_object_pagein( #define vm_object_paging_begin(object) \ MACRO_BEGIN \ vm_object_lock_assert_exclusive((object)); \ - assert((object)->paging_in_progress >= 0); \ VM_PIP_DEBUG_BEGIN((object)); \ (object)->paging_in_progress++; \ + if ((object)->paging_in_progress == 0) { \ + panic("vm_object_paging_begin(%p): overflow\n", (object));\ + } \ MACRO_END #define vm_object_paging_end(object) \ MACRO_BEGIN \ vm_object_lock_assert_exclusive((object)); \ - assert((object)->paging_in_progress > 0); \ + if ((object)->paging_in_progress == 0) { \ + panic("vm_object_paging_end(%p): underflow\n", (object));\ + } \ (object)->paging_in_progress--; \ if ((object)->paging_in_progress == 0) { \ vm_object_wakeup((object), \ diff --git a/osfmk/vm/vm_options.h b/osfmk/vm/vm_options.h index abb7dddea..92781d9da 100644 --- a/osfmk/vm/vm_options.h +++ b/osfmk/vm/vm_options.h @@ -37,4 +37,7 @@ #define VM_PAGE_FAKE_BUCKETS 1 #endif /* VM_PAGE_BUCKETS_CHECK */ +#define VM_OBJECT_TRACKING 0 +#define VM_SCAN_FOR_SHADOW_CHAIN (DEVELOPMENT || DEBUG) + #endif /* __VM_VM_OPTIONS_H__ */ diff --git a/osfmk/vm/vm_page.h b/osfmk/vm/vm_page.h index fbb9d2971..c0330f0dd 100644 --- a/osfmk/vm/vm_page.h +++ b/osfmk/vm/vm_page.h @@ -74,7 +74,7 @@ #include #include #include -#include +#include #include #include @@ -161,16 +161,45 @@ extern unsigned int vm_page_speculative_q_age_ms; * change that field; holding either lock is sufficient to read.] */ + +#if defined(__LP64__) + +/* + * in order to make the size of a vm_page_t 64 bytes (cache line size for both arm64 and x86_64) + * we'll keep the next_m pointer packed... as long as the kernel virtual space where we allocate + * vm_page_t's from doesn't span more then 256 Gbytes, we're safe. There are live tests in the + * vm_page_t array allocation and the zone init code to determine if we can safely pack and unpack + * pointers from the 2 ends of these spaces + */ +typedef uint32_t vm_page_packed_t; + +#define VM_PAGE_PACK_PTR(m) (!(m) ? (vm_page_packed_t)0 : ((vm_page_packed_t)((uintptr_t)(((uintptr_t)(m) - (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS)) >> 6))) +#define VM_PAGE_UNPACK_PTR(p) (!(p) ? VM_PAGE_NULL : ((vm_page_t)((((uintptr_t)(p)) << 6) + (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS))) + +#else + +/* + * we can't do the packing trick on 32 bit architectures, so + * just turn the macros into noops. + */ +typedef struct vm_page *vm_page_packed_t; + +#define VM_PAGE_PACK_PTR(m) ((vm_page_packed_t)(m)) +#define VM_PAGE_UNPACK_PTR(p) ((vm_page_t)(p)) + +#endif + + struct vm_page { queue_chain_t pageq; /* queue info for FIFO */ /* queue or free list (P) */ queue_chain_t listq; /* all pages in same object (O) */ - struct vm_page *next; /* VP bucket link (O) */ - vm_object_t object; /* which object am I in (O&P) */ vm_object_offset_t offset; /* offset into that object (O,P) */ + vm_object_t object; /* which object am I in (O&P) */ + vm_page_packed_t next_m; /* VP bucket link (O) */ /* * The following word of flags is protected * by the "page queues" lock. @@ -197,8 +226,8 @@ struct vm_page { * the free list (P) */ no_cache:1, /* page is not to be cached and should * be reused ahead of other pages (P) */ - xpmapped:1, - __unused_pageq_bits:2; /* 2 bits available here */ + + __unused_pageq_bits:3; /* 3 bits available here */ ppnum_t phys_page; /* Physical address of page, passed * to pmap_enter (read-only) */ @@ -215,13 +244,17 @@ struct vm_page { (O) + the bucket lock */ fictitious:1, /* Physical page doesn't exist (O) */ /* - * IMPORTANT: the "pmapped" bit can be turned on while holding the - * VM object "shared" lock. See vm_fault_enter(). - * This is OK as long as it's the only bit in this bit field that - * can be updated without holding the VM object "exclusive" lock. + * IMPORTANT: the "pmapped", "xpmapped" and "clustered" bits can be modified while holding the + * VM object "shared" lock + the page lock provided through the pmap_lock_phys_page function. + * This is done in vm_fault_enter and the CONSUME_CLUSTERED macro. + * It's also ok to modify them behind just the VM object "exclusive" lock. */ + clustered:1, /* page is not the faulted page (O) or (O-shared AND pmap_page) */ pmapped:1, /* page has been entered at some - * point into a pmap (O **shared**) */ + * point into a pmap (O) or (O-shared AND pmap_page) */ + xpmapped:1, /* page has been entered with execute permission (O) + or (O-shared AND pmap_page) */ + wpmapped:1, /* page has been entered at some * point into a pmap for write (O) */ pageout:1, /* page wired & busy for pageout (O) */ @@ -233,7 +266,6 @@ struct vm_page { cleaning:1, /* Page clean has begun (O) */ precious:1, /* Page is precious; data must be * returned even if clean (O) */ - clustered:1, /* page is not the faulted page (O) */ overwriting:1, /* Request to unlock has been made * without having data. (O) * [See vm_fault_page_overwrite] */ @@ -249,15 +281,9 @@ struct vm_page { reusable:1, lopage:1, slid:1, - was_dirty:1, /* was this page previously dirty? */ compressor:1, /* page owned by compressor pool */ written_by_kernel:1, /* page was written by kernel (i.e. decompressed) */ - __unused_object_bits:5; /* 5 bits available here */ - -#if __LP64__ - unsigned int __unused_padding; /* Pad structure explicitly - * to 8-byte multiple for LP64 */ -#endif + __unused_object_bits:5; /* 5 bits available here */ }; #define DEBUG_ENCRYPTED_SWAP 1 @@ -436,6 +462,8 @@ unsigned int vm_page_speculative_count; /* How many speculative pages are unclai extern unsigned int vm_page_pageable_internal_count; extern unsigned int vm_page_pageable_external_count; extern +unsigned int vm_page_xpmapped_external_count; /* How many pages are mapped executable? */ +extern unsigned int vm_page_external_count; /* How many pages are file-backed? */ extern unsigned int vm_page_internal_count; /* How many pages are anonymous? */ @@ -669,12 +697,21 @@ extern void vm_page_free_prepare_object( vm_page_t page, boolean_t remove_from_hash); +#if CONFIG_IOSCHED +extern wait_result_t vm_page_sleep( + vm_object_t object, + vm_page_t m, + int interruptible); +#endif + +extern void vm_pressure_response(void); + #if CONFIG_JETSAM extern void memorystatus_pages_update(unsigned int pages_avail); #define VM_CHECK_MEMORYSTATUS do { \ memorystatus_pages_update( \ - vm_page_external_count + \ + vm_page_pageable_external_count + \ vm_page_free_count + \ (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count) \ ); \ @@ -683,8 +720,6 @@ extern void memorystatus_pages_update(unsigned int pages_avail); #else /* CONFIG_JETSAM */ -extern void vm_pressure_response(void); - #define VM_CHECK_MEMORYSTATUS vm_pressure_response() @@ -705,9 +740,14 @@ extern void vm_pressure_response(void); (((m)->wanted = TRUE), \ assert_wait((event_t) (m), (interruptible))) +#if CONFIG_IOSCHED #define PAGE_SLEEP(o, m, interruptible) \ - (((m)->wanted = TRUE), \ - thread_sleep_vm_object((o), (m), (interruptible))) + vm_page_sleep(o, m, interruptible) +#else +#define PAGE_SLEEP(o, m, interruptible) \ + (((m)->wanted = TRUE), \ + thread_sleep_vm_object((o), (m), (interruptible))) +#endif #define PAGE_WAKEUP_DONE(m) \ MACRO_BEGIN \ @@ -788,7 +828,7 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); boolean_t was_pageable; \ \ VM_PAGE_QUEUES_ASSERT(mem, 1); \ - assert(!mem->laundry); \ + assert(!mem->pageout_queue); \ /* \ * if (mem->pageout_queue) \ * NOTE: VM_PAGE_QUEUES_REMOVE does not deal with removing pages from the pageout queue... \ @@ -942,15 +982,28 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); #define VM_PAGE_CONSUME_CLUSTERED(mem) \ MACRO_BEGIN \ + pmap_lock_phys_page(mem->phys_page); \ if (mem->clustered) { \ assert(mem->object); \ mem->object->pages_used++; \ mem->clustered = FALSE; \ VM_PAGE_SPECULATIVE_USED_ADD(); \ } \ + pmap_unlock_phys_page(mem->phys_page); \ MACRO_END +#define VM_PAGE_COUNT_AS_PAGEIN(mem) \ + MACRO_BEGIN \ + DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL); \ + current_task()->pageins++; \ + if (mem->object->internal) { \ + DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL); \ + } else { \ + DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL); \ + } \ + MACRO_END + #define DW_vm_page_unwire 0x01 #define DW_vm_page_wire 0x02 @@ -967,6 +1020,7 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); #define DW_move_page 0x1000 #define DW_VM_PAGE_QUEUES_REMOVE 0x2000 #define DW_enqueue_cleaned 0x4000 +#define DW_vm_phantom_cache_update 0x8000 struct vm_page_delayed_work { vm_page_t dw_m; diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index ad6388a93..7d78f66b2 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -68,7 +68,6 @@ #include #include #include -#include #include #include @@ -107,6 +106,9 @@ #include #include +#if CONFIG_PHANTOM_CACHE +#include +#endif /* * ENCRYPTED SWAP: */ @@ -119,16 +121,27 @@ extern int cs_debug; #include #endif +extern void m_drain(void); + +#if VM_PRESSURE_EVENTS +extern unsigned int memorystatus_available_pages; +extern unsigned int memorystatus_available_pages_pressure; +extern unsigned int memorystatus_available_pages_critical; +extern unsigned int memorystatus_frozen_count; +extern unsigned int memorystatus_suspended_count; + extern vm_pressure_level_t memorystatus_vm_pressure_level; int memorystatus_purge_on_warning = 2; int memorystatus_purge_on_urgent = 5; int memorystatus_purge_on_critical = 8; -#if VM_PRESSURE_EVENTS void vm_pressure_response(void); boolean_t vm_pressure_thread_running = FALSE; extern void consider_vm_pressure_events(void); -#endif + +#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4 +#endif /* VM_PRESSURE_EVENTS */ + boolean_t vm_pressure_changed = FALSE; #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */ @@ -152,7 +165,7 @@ boolean_t vm_pressure_changed = FALSE; #endif /* VM_PAGEOUT_LAUNDRY_MAX */ #ifndef VM_PAGEOUT_BURST_WAIT -#define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds */ +#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */ #endif /* VM_PAGEOUT_BURST_WAIT */ #ifndef VM_PAGEOUT_EMPTY_WAIT @@ -227,10 +240,9 @@ unsigned int vm_page_speculative_percentage = 5; #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100) #endif /* VM_PAGE_FREE_MIN */ -#define VM_PAGE_FREE_RESERVED_LIMIT 100 -#define VM_PAGE_FREE_MIN_LIMIT 1500 -#define VM_PAGE_FREE_TARGET_LIMIT 2000 - +#define VM_PAGE_FREE_RESERVED_LIMIT 1700 +#define VM_PAGE_FREE_MIN_LIMIT 3500 +#define VM_PAGE_FREE_TARGET_LIMIT 4000 /* * When vm_page_free_count falls below vm_page_free_reserved, @@ -283,6 +295,12 @@ struct cq { #if VM_PRESSURE_EVENTS void vm_pressure_thread(void); + +boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void); +boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void); + +boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void); +boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void); #endif static void vm_pageout_garbage_collect(int); static void vm_pageout_iothread_continue(struct vm_pageout_queue *); @@ -1137,14 +1155,6 @@ mach_vm_pressure_monitor( */ extern void vm_pageout_io_throttle(void); - -#if LATENCY_JETSAM -boolean_t jlp_init = FALSE; -uint64_t jlp_time = 0, jlp_current = 0; -struct vm_page jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS]; -unsigned int latency_jetsam_wakeup = 0; -#endif /* LATENCY_JETSAM */ - /* * Page States: Used below to maintain the page state * before it's removed from it's Q. This saved state @@ -1234,33 +1244,16 @@ vm_pageout_scan(void) int page_prev_state = 0; int cache_evict_throttle = 0; uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0; + int force_purge = 0; + +#if VM_PRESSURE_EVENTS vm_pressure_level_t pressure_level; +#endif /* VM_PRESSURE_EVENTS */ VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START, vm_pageout_speculative_clean, vm_pageout_inactive_clean, vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); -#if LATENCY_JETSAM - if (jlp_init == FALSE) { - int i=0; - vm_page_t jlp; - for(; i < NUM_OF_JETSAM_LATENCY_TOKENS; i++) { - jlp = &jetsam_latency_page[i]; - jlp->fictitious = TRUE; - jlp->offset = 0; - - } - jlp = &jetsam_latency_page[0]; - queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq); - jlp->active = TRUE; - - jlp->offset = mach_absolute_time(); - jlp_time = jlp->offset; - jlp_current++; - jlp_init = TRUE; - } -#endif /* LATENCY_JETSAM */ - flow_control.state = FCS_IDLE; iq = &vm_pageout_queue_internal; eq = &vm_pageout_queue_external; @@ -1395,107 +1388,54 @@ Restart: DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); -#if LATENCY_JETSAM - if (m->fictitious) { - const uint32_t FREE_TARGET_MULTIPLIER = 2; - - uint64_t now = mach_absolute_time(); - uint64_t delta = now - m->offset; - clock_sec_t jl_secs = 0; - clock_usec_t jl_usecs = 0; - boolean_t issue_jetsam = FALSE; - - absolutetime_to_microtime(delta, &jl_secs, &jl_usecs); - jl_usecs += jl_secs * USEC_PER_SEC; + /* + * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise... + * + * a TLB flush isn't really needed here since at worst we'll miss the reference bit being + * updated in the PTE if a remote processor still has this mapping cached in its TLB when the + * new reference happens. If no futher references happen on the page after that remote TLB flushes + * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue + * by pageout_scan, which is just fine since the last reference would have happened quite far + * in the past (TLB caches don't hang around for very long), and of course could just as easily + * have happened before we moved the page + */ + pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); - /* Jetsam only if the token hasn't aged sufficiently and the free count is close to the target (avoiding spurious triggers) */ - if ((jl_usecs <= JETSAM_AGE_NOTIFY_CRITICAL) && (vm_page_free_count < (FREE_TARGET_MULTIPLIER * vm_page_free_target))) { - issue_jetsam = TRUE; - } - - VM_DEBUG_EVENT(vm_pageout_page_token, VM_PAGEOUT_PAGE_TOKEN, DBG_FUNC_NONE, - vm_page_active_count, vm_page_inactive_count, vm_page_free_count, jl_usecs); - - m->offset = 0; - queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); - queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); + /* + * The page might be absent or busy, + * but vm_page_deactivate can handle that. + * FALSE indicates that we don't want a H/W clear reference + */ + vm_page_deactivate_internal(m, FALSE); - m->offset = now; - jlp_time = now; - - if (issue_jetsam) { - vm_page_unlock_queues(); + if (delayed_unlock++ > delayed_unlock_limit) { - if (local_freeq) { - vm_page_free_list(local_freeq, TRUE); - local_freeq = NULL; - local_freed = 0; - } - - VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START, - vm_page_active_count, vm_page_inactive_count, vm_page_free_count, 0); - - assert_wait_timeout(&latency_jetsam_wakeup, THREAD_INTERRUPTIBLE, 10 /* msecs */, 1000*NSEC_PER_USEC); - /* Kill the top process asynchronously */ - memorystatus_kill_on_VM_page_shortage(TRUE); - thread_block(THREAD_CONTINUE_NULL); + if (local_freeq) { + vm_page_unlock_queues(); - VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0); + VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, + vm_page_free_count, local_freed, delayed_unlock_limit, 1); + + vm_page_free_list(local_freeq, TRUE); + + VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, + vm_page_free_count, 0, 0, 1); + local_freeq = NULL; + local_freed = 0; vm_page_lock_queues(); + } else { + lck_mtx_yield(&vm_page_queue_lock); } - } else { -#endif /* LATENCY_JETSAM */ - /* - * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise... - * - * a TLB flush isn't really needed here since at worst we'll miss the reference bit being - * updated in the PTE if a remote processor still has this mapping cached in its TLB when the - * new reference happens. If no futher references happen on the page after that remote TLB flushes - * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue - * by pageout_scan, which is just fine since the last reference would have happened quite far - * in the past (TLB caches don't hang around for very long), and of course could just as easily - * have happened before we moved the page - */ - pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); + + delayed_unlock = 1; /* - * The page might be absent or busy, - * but vm_page_deactivate can handle that. - * FALSE indicates that we don't want a H/W clear reference + * continue the while loop processing + * the active queue... need to hold + * the page queues lock */ - vm_page_deactivate_internal(m, FALSE); - - if (delayed_unlock++ > delayed_unlock_limit) { - - if (local_freeq) { - vm_page_unlock_queues(); - - VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, - vm_page_free_count, local_freed, delayed_unlock_limit, 1); - - vm_page_free_list(local_freeq, TRUE); - - VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, - vm_page_free_count, 0, 0, 1); - - local_freeq = NULL; - local_freed = 0; - vm_page_lock_queues(); - } else - lck_mtx_yield(&vm_page_queue_lock); - - delayed_unlock = 1; - - /* - * continue the while loop processing - * the active queue... need to hold - * the page queues lock - */ - } -#if LATENCY_JETSAM } -#endif /* LATENCY_JETSAM */ } VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END, @@ -1585,33 +1525,36 @@ return_from_scan: * If the purge succeeds, go back to the top and reevalute * the new memory situation. */ - pressure_level = memorystatus_vm_pressure_level; + assert (available_for_purge>=0); + force_purge = 0; /* no force-purging */ - if (available_for_purge - || pressure_level > kVMPressureNormal - ) { - int force_purge; - - if (object != NULL) { - vm_object_unlock(object); - object = NULL; - } +#if VM_PRESSURE_EVENTS + pressure_level = memorystatus_vm_pressure_level; - VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0); - memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START); + if (pressure_level > kVMPressureNormal) { - force_purge = 0; /* no force-purging */ if (pressure_level >= kVMPressureCritical) { force_purge = memorystatus_purge_on_critical; } else if (pressure_level >= kVMPressureUrgent) { force_purge = memorystatus_purge_on_urgent; } else if (pressure_level >= kVMPressureWarning) { force_purge = memorystatus_purge_on_warning; - } else { - force_purge = 0; } - if (vm_purgeable_object_purge_one(force_purge)) { + } +#endif /* VM_PRESSURE_EVENTS */ + + if (available_for_purge || force_purge) { + + if (object != NULL) { + vm_object_unlock(object); + object = NULL; + } + + memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START); + + VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0); + if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) { VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0); memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); @@ -1620,6 +1563,7 @@ return_from_scan: VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1); memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); } + if (queue_empty(&sq->age_q) && vm_page_speculative_count) { /* * try to pull pages from the aging bins... @@ -1830,8 +1774,9 @@ vm_pageout_scan_delay: } vm_pageout_scan_wants_object = VM_OBJECT_NULL; + vm_page_unlock_queues(); + if (local_freeq) { - vm_page_unlock_queues(); VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, vm_page_free_count, local_freed, delayed_unlock_limit, 3); @@ -1843,13 +1788,16 @@ vm_pageout_scan_delay: local_freeq = NULL; local_freed = 0; - vm_page_lock_queues(); + } + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) + vm_consider_waking_compactor_swapper(); - if (flow_control.state == FCS_DELAYED && - !VM_PAGE_Q_THROTTLED(iq)) { - flow_control.state = FCS_IDLE; - goto consider_inactive; - } + vm_page_lock_queues(); + + if (flow_control.state == FCS_DELAYED && + !VM_PAGE_Q_THROTTLED(iq)) { + flow_control.state = FCS_IDLE; + goto consider_inactive; } if (vm_page_free_count >= vm_page_free_target) { @@ -1980,7 +1928,7 @@ consider_inactive: * The most eligible pages are ones we paged in speculatively, * but which have not yet been touched. */ - if (!queue_empty(&sq->age_q) ) { + if (!queue_empty(&sq->age_q) && force_anonymous == FALSE) { m = (vm_page_t) queue_first(&sq->age_q); page_prev_state = PAGE_STATE_SPECULATIVE; @@ -2005,9 +1953,6 @@ consider_inactive: anons_grabbed = 0; } - if (grab_anonymous == TRUE && vm_compression_available() == FALSE) - grab_anonymous = FALSE; - if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) { if ( !queue_empty(&vm_page_queue_inactive) ) { @@ -2057,9 +2002,14 @@ consider_inactive: vm_page_lock_queues(); delayed_unlock = 1; + force_anonymous = FALSE; + if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) goto Restart; + if (!queue_empty(&sq->age_q)) + goto Restart; + panic("vm_pageout: no victim"); /* NOTREACHED */ @@ -2283,6 +2233,7 @@ reclaim_page: if (page_prev_state != PAGE_STATE_SPECULATIVE) vm_pageout_stats[vm_pageout_stat_now].reclaimed++; + inactive_burst_count = 0; goto done_with_inactivepage; } /* @@ -2307,7 +2258,7 @@ reclaim_page: goto reclaim_page; } - if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (COMPRESSED_PAGER_IS_ACTIVE) { /* * With the VM compressor, the cost of * reclaiming a page is much lower (no I/O), @@ -2423,7 +2374,9 @@ consider_inactive_page: VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m); } - if (m->reference && !m->no_cache) { + if (!m->no_cache && + (m->reference || + (m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) { /* * The page we pulled off the inactive list has * been referenced. It is possible for other @@ -2459,6 +2412,7 @@ reactivate_page: */ vm_page_activate(m); VM_STAT_INCR(reactivations); + inactive_burst_count = 0; } if (page_prev_state == PAGE_STATE_CLEAN) @@ -2528,6 +2482,7 @@ throttle_inactive: vm_pageout_scan_reclaimed_throttled++; + inactive_burst_count = 0; goto done_with_inactivepage; } if (inactive_throttled == TRUE) { @@ -2563,11 +2518,7 @@ throttle_inactive: queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); m->active = TRUE; vm_page_active_count++; - if (m->object->internal) { - vm_page_pageable_internal_count++; - } else { - vm_page_pageable_external_count++; - } + vm_page_pageable_external_count++; vm_pageout_adjust_io_throttles(iq, eq, FALSE); @@ -2601,6 +2552,7 @@ throttle_inactive: #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ force_anonymous = TRUE; #endif + inactive_burst_count = 0; goto done_with_inactivepage; } else { if (page_prev_state == PAGE_STATE_SPECULATIVE) @@ -2630,7 +2582,16 @@ throttle_inactive: */ if (m->pmapped == TRUE) { - if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || object->internal == FALSE) { + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE || object->internal == FALSE) { + /* + * Don't count this page as going into the compressor if any of these are true: + * 1) We have the dynamic pager i.e. no compressed pager + * 2) Freezer enabled device with a freezer file to hold the app data i.e. no compressed pager + * 3) Freezer enabled device with compressed pager backend (exclusive use) i.e. most of the VM system + (including vm_pageout_scan) has no knowledge of the compressor + * 4) This page belongs to a file and hence will not be sent into the compressor + */ + refmod_state = pmap_disconnect_options(m->phys_page, 0, NULL); } else { refmod_state = pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); @@ -2659,17 +2620,16 @@ throttle_inactive: else if (page_prev_state == PAGE_STATE_CLEAN) vm_pageout_cleaned_reclaimed++; - if (m->was_dirty) { - /* page on clean queue used to be dirty; we should increment the vm_stat pageout count here */ - VM_STAT_INCR(pageouts); - DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL); - } vm_pageout_inactive_clean++; } /* * OK, at this point we have found a page we are going to free. */ +#if CONFIG_PHANTOM_CACHE + if (!object->internal) + vm_phantom_cache_add_ghost(m); +#endif goto reclaim_page; } @@ -2689,8 +2649,21 @@ throttle_inactive: if (inactive_throttled == TRUE) goto throttle_inactive; -#if VM_PRESSURE_EVENTS +#if VM_PRESSURE_EVENTS +#if CONFIG_JETSAM + + /* + * If Jetsam is enabled, then the sending + * of memory pressure notifications is handled + * from the same thread that takes care of high-water + * and other jetsams i.e. the memorystatus_thread. + */ + +#else /* CONFIG_JETSAM */ + vm_pressure_response(); + +#endif /* CONFIG_JETSAM */ #endif /* VM_PRESSURE_EVENTS */ /* @@ -2710,17 +2683,18 @@ throttle_inactive: done_with_inactivepage: - inactive_burst_count = 0; if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) { + boolean_t need_delay = TRUE; if (object != NULL) { vm_pageout_scan_wants_object = VM_OBJECT_NULL; vm_object_unlock(object); object = NULL; } + vm_page_unlock_queues(); + if (local_freeq) { - vm_page_unlock_queues(); VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, vm_page_free_count, local_freed, delayed_unlock_limit, 4); @@ -2732,17 +2706,21 @@ done_with_inactivepage: local_freeq = NULL; local_freed = 0; - vm_page_lock_queues(); - } else + need_delay = FALSE; + } + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + vm_consider_waking_compactor_swapper(); + need_delay = FALSE; + } + vm_page_lock_queues(); + + if (need_delay == TRUE) lck_mtx_yield(&vm_page_queue_lock); delayed_unlock = 1; } vm_pageout_considered_page++; - if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) - vm_consider_waking_compactor_swapper(); - /* * back to top of pageout scan loop */ @@ -3195,6 +3173,7 @@ vm_pageout_iothread_internal_continue(struct cq *cq) int local_freed = 0; int local_batch_size; kern_return_t retval; + int compressed_count_delta; KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0); @@ -3360,13 +3339,44 @@ vm_pageout_iothread_internal_continue(struct cq *cq) } else lck_mtx_unlock(&vm_page_queue_free_lock); } - retval = vm_compressor_pager_put(pager, m->offset + object->paging_offset, m->phys_page, &cq->current_chead, cq->scratch_buf); + + assert(object->activity_in_progress > 0); + + retval = vm_compressor_pager_put( + pager, + m->offset + object->paging_offset, + m->phys_page, + &cq->current_chead, + cq->scratch_buf, + &compressed_count_delta); vm_object_lock(object); + assert(object->activity_in_progress > 0); + + assert(m->object == object); + + vm_compressor_pager_count(pager, + compressed_count_delta, + FALSE, /* shared_lock */ + object); + m->laundry = FALSE; m->pageout = FALSE; if (retval == KERN_SUCCESS) { + /* + * If the object is purgeable, its owner's + * purgeable ledgers will be updated in + * vm_page_remove() but the page still + * contributes to the owner's memory footprint, + * so account for it as such. + */ + if (object->purgable != VM_PURGABLE_DENY && + object->vo_purgeable_owner != NULL) { + /* one more compressed purgeable page */ + vm_purgeable_compressed_update(object, + +1); + } vm_page_compressions_failing = FALSE; @@ -3542,23 +3552,29 @@ vm_set_buffer_cleanup_callout(boolean_t (*func)(int)) } } - extern boolean_t memorystatus_manual_testing_on; extern unsigned int memorystatus_level; - #if VM_PRESSURE_EVENTS +boolean_t vm_pressure_events_enabled = FALSE; + void vm_pressure_response(void) { - vm_pressure_level_t old_level = kVMPressureNormal; int new_level = -1; - uint64_t available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100); + uint64_t available_memory = 0; + + if (vm_pressure_events_enabled == FALSE) + return; + + + available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100); + memorystatus_level = (unsigned int) (available_memory / atop_64(max_mem)); @@ -3608,11 +3624,14 @@ vm_pressure_response(void) if (new_level != -1) { memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level; - if (old_level != new_level) { + if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != new_level)) { if (vm_pressure_thread_running == FALSE) { thread_wakeup(&vm_pressure_thread); } - thread_wakeup(&vm_pressure_changed); + + if (old_level != new_level) { + thread_wakeup(&vm_pressure_changed); + } } } @@ -3623,7 +3642,7 @@ kern_return_t mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) { #if !VM_PRESSURE_EVENTS - + return KERN_FAILURE; #else /* VM_PRESSURE_EVENTS */ @@ -3670,15 +3689,15 @@ mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused un #if VM_PRESSURE_EVENTS void vm_pressure_thread(void) { - static boolean_t set_up_thread = FALSE; + static boolean_t thread_initialized = FALSE; - if (set_up_thread) { + if (thread_initialized == TRUE) { vm_pressure_thread_running = TRUE; consider_vm_pressure_events(); vm_pressure_thread_running = FALSE; } - set_up_thread = TRUE; + thread_initialized = TRUE; assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT); thread_block((thread_continue_t)vm_pressure_thread); } @@ -3713,6 +3732,7 @@ vm_pageout_garbage_collect(int collect) stack_collect(); consider_machine_collect(); + m_drain(); do { if (consider_buffer_cache_collect != NULL) { @@ -3738,12 +3758,42 @@ vm_pageout_garbage_collect(int collect) } +void vm_pageout_reinit_tuneables(void); + +void +vm_pageout_reinit_tuneables(void) +{ + vm_page_filecache_min = (uint32_t) (max_mem / PAGE_SIZE) / 15; + + if (vm_page_filecache_min < VM_PAGE_FILECACHE_MIN) + vm_page_filecache_min = VM_PAGE_FILECACHE_MIN; + + vm_compressor_minorcompact_threshold_divisor = 18; + vm_compressor_majorcompact_threshold_divisor = 22; + vm_compressor_unthrottle_threshold_divisor = 32; +} + + #if VM_PAGE_BUCKETS_CHECK #if VM_PAGE_FAKE_BUCKETS extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end; #endif /* VM_PAGE_FAKE_BUCKETS */ #endif /* VM_PAGE_BUCKETS_CHECK */ +#define FBDP_TEST_COLLAPSE_COMPRESSOR 0 +#if FBDP_TEST_COLLAPSE_COMPRESSOR +extern boolean_t vm_object_collapse_compressor_allowed; +#include +#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */ + +#define FBDP_TEST_WIRE_AND_EXTRACT 0 +#if FBDP_TEST_WIRE_AND_EXTRACT +extern ledger_template_t task_ledger_template; +#include +extern ppnum_t vm_map_get_phys_page(vm_map_t map, + vm_offset_t offset); +#endif /* FBDP_TEST_WIRE_AND_EXTRACT */ + void vm_pageout(void) { @@ -3884,10 +3934,18 @@ vm_pageout(void) if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) vm_compressor_pager_init(); +#if VM_PRESSURE_EVENTS + vm_pressure_events_enabled = TRUE; +#endif /* VM_PRESSURE_EVENTS */ + +#if CONFIG_PHANTOM_CACHE + vm_phantom_cache_init(); +#endif #if VM_PAGE_BUCKETS_CHECK #if VM_PAGE_FAKE_BUCKETS printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n", - vm_page_fake_buckets_start, vm_page_fake_buckets_end); + (uint64_t) vm_page_fake_buckets_start, + (uint64_t) vm_page_fake_buckets_end); pmap_protect(kernel_pmap, vm_page_fake_buckets_start, vm_page_fake_buckets_end, @@ -3896,6 +3954,262 @@ vm_pageout(void) #endif /* VM_PAGE_FAKE_BUCKETS */ #endif /* VM_PAGE_BUCKETS_CHECK */ +#if VM_OBJECT_TRACKING + vm_object_tracking_init(); +#endif /* VM_OBJECT_TRACKING */ + + +#if FBDP_TEST_COLLAPSE_COMPRESSOR + vm_object_size_t backing_size, top_size; + vm_object_t backing_object, top_object; + vm_map_offset_t backing_offset, top_offset; + unsigned char *backing_address, *top_address; + kern_return_t kr; + + printf("FBDP_TEST_COLLAPSE_COMPRESSOR:\n"); + + /* create backing object */ + backing_size = 15 * PAGE_SIZE; + backing_object = vm_object_allocate(backing_size); + assert(backing_object != VM_OBJECT_NULL); + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n", + backing_object); + /* map backing object */ + backing_offset = 0; + kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0, + VM_FLAGS_ANYWHERE, backing_object, 0, FALSE, + VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT); + assert(kr == KERN_SUCCESS); + backing_address = (unsigned char *) backing_offset; + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " + "mapped backing object %p at 0x%llx\n", + backing_object, (uint64_t) backing_offset); + /* populate with pages to be compressed in backing object */ + backing_address[0x1*PAGE_SIZE] = 0xB1; + backing_address[0x4*PAGE_SIZE] = 0xB4; + backing_address[0x7*PAGE_SIZE] = 0xB7; + backing_address[0xa*PAGE_SIZE] = 0xBA; + backing_address[0xd*PAGE_SIZE] = 0xBD; + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " + "populated pages to be compressed in " + "backing_object %p\n", backing_object); + /* compress backing object */ + vm_object_pageout(backing_object); + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n", + backing_object); + /* wait for all the pages to be gone */ + while (*(volatile int *)&backing_object->resident_page_count != 0) + IODelay(10); + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n", + backing_object); + /* populate with pages to be resident in backing object */ + backing_address[0x0*PAGE_SIZE] = 0xB0; + backing_address[0x3*PAGE_SIZE] = 0xB3; + backing_address[0x6*PAGE_SIZE] = 0xB6; + backing_address[0x9*PAGE_SIZE] = 0xB9; + backing_address[0xc*PAGE_SIZE] = 0xBC; + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " + "populated pages to be resident in " + "backing_object %p\n", backing_object); + /* leave the other pages absent */ + /* mess with the paging_offset of the backing_object */ + assert(backing_object->paging_offset == 0); + backing_object->paging_offset = 0x3000; + + /* create top object */ + top_size = 9 * PAGE_SIZE; + top_object = vm_object_allocate(top_size); + assert(top_object != VM_OBJECT_NULL); + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created top object %p\n", + top_object); + /* map top object */ + top_offset = 0; + kr = vm_map_enter(kernel_map, &top_offset, top_size, 0, + VM_FLAGS_ANYWHERE, top_object, 0, FALSE, + VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT); + assert(kr == KERN_SUCCESS); + top_address = (unsigned char *) top_offset; + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " + "mapped top object %p at 0x%llx\n", + top_object, (uint64_t) top_offset); + /* populate with pages to be compressed in top object */ + top_address[0x3*PAGE_SIZE] = 0xA3; + top_address[0x4*PAGE_SIZE] = 0xA4; + top_address[0x5*PAGE_SIZE] = 0xA5; + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " + "populated pages to be compressed in " + "top_object %p\n", top_object); + /* compress top object */ + vm_object_pageout(top_object); + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n", + top_object); + /* wait for all the pages to be gone */ + while (top_object->resident_page_count != 0); + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n", + top_object); + /* populate with pages to be resident in top object */ + top_address[0x0*PAGE_SIZE] = 0xA0; + top_address[0x1*PAGE_SIZE] = 0xA1; + top_address[0x2*PAGE_SIZE] = 0xA2; + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " + "populated pages to be resident in " + "top_object %p\n", top_object); + /* leave the other pages absent */ + + /* link the 2 objects */ + vm_object_reference(backing_object); + top_object->shadow = backing_object; + top_object->vo_shadow_offset = 0x3000; + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n", + top_object, backing_object); + + /* unmap backing object */ + vm_map_remove(kernel_map, + backing_offset, + backing_offset + backing_size, + 0); + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " + "unmapped backing_object %p [0x%llx:0x%llx]\n", + backing_object, + (uint64_t) backing_offset, + (uint64_t) (backing_offset + backing_size)); + + /* collapse */ + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object); + vm_object_lock(top_object); + vm_object_collapse(top_object, 0, FALSE); + vm_object_unlock(top_object); + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object); + + /* did it work? */ + if (top_object->shadow != VM_OBJECT_NULL) { + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: not collapsed\n"); + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n"); + if (vm_object_collapse_compressor_allowed) { + panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n"); + } + } else { + /* check the contents of the mapping */ + unsigned char expect[9] = + { 0xA0, 0xA1, 0xA2, /* resident in top */ + 0xA3, 0xA4, 0xA5, /* compressed in top */ + 0xB9, /* resident in backing + shadow_offset */ + 0xBD, /* compressed in backing + shadow_offset + paging_offset */ + 0x00 }; /* absent in both */ + unsigned char actual[9]; + unsigned int i, errors; + + errors = 0; + for (i = 0; i < sizeof (actual); i++) { + actual[i] = (unsigned char) top_address[i*PAGE_SIZE]; + if (actual[i] != expect[i]) { + errors++; + } + } + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " + "actual [%x %x %x %x %x %x %x %x %x] " + "expect [%x %x %x %x %x %x %x %x %x] " + "%d errors\n", + actual[0], actual[1], actual[2], actual[3], + actual[4], actual[5], actual[6], actual[7], + actual[8], + expect[0], expect[1], expect[2], expect[3], + expect[4], expect[5], expect[6], expect[7], + expect[8], + errors); + if (errors) { + panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n"); + } else { + printf("FBDP_TEST_COLLAPSE_COMPRESSOR: PASS\n"); + } + } +#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */ + +#if FBDP_TEST_WIRE_AND_EXTRACT + ledger_t ledger; + vm_map_t user_map, wire_map; + mach_vm_address_t user_addr, wire_addr; + mach_vm_size_t user_size, wire_size; + mach_vm_offset_t cur_offset; + vm_prot_t cur_prot, max_prot; + ppnum_t user_ppnum, wire_ppnum; + kern_return_t kr; + + ledger = ledger_instantiate(task_ledger_template, + LEDGER_CREATE_ACTIVE_ENTRIES); + user_map = vm_map_create(pmap_create(ledger, 0, TRUE), + 0x100000000ULL, + 0x200000000ULL, + TRUE); + wire_map = vm_map_create(NULL, + 0x100000000ULL, + 0x200000000ULL, + TRUE); + user_addr = 0; + user_size = 0x10000; + kr = mach_vm_allocate(user_map, + &user_addr, + user_size, + VM_FLAGS_ANYWHERE); + assert(kr == KERN_SUCCESS); + wire_addr = 0; + wire_size = user_size; + kr = mach_vm_remap(wire_map, + &wire_addr, + wire_size, + 0, + VM_FLAGS_ANYWHERE, + user_map, + user_addr, + FALSE, + &cur_prot, + &max_prot, + VM_INHERIT_NONE); + assert(kr == KERN_SUCCESS); + for (cur_offset = 0; + cur_offset < wire_size; + cur_offset += PAGE_SIZE) { + kr = vm_map_wire_and_extract(wire_map, + wire_addr + cur_offset, + VM_PROT_DEFAULT, + TRUE, + &wire_ppnum); + assert(kr == KERN_SUCCESS); + user_ppnum = vm_map_get_phys_page(user_map, + user_addr + cur_offset); + printf("FBDP_TEST_WIRE_AND_EXTRACT: kr=0x%x " + "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n", + kr, + user_map, user_addr + cur_offset, user_ppnum, + wire_map, wire_addr + cur_offset, wire_ppnum); + if (kr != KERN_SUCCESS || + wire_ppnum == 0 || + wire_ppnum != user_ppnum) { + panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n"); + } + } + cur_offset -= PAGE_SIZE; + kr = vm_map_wire_and_extract(wire_map, + wire_addr + cur_offset, + VM_PROT_DEFAULT, + TRUE, + &wire_ppnum); + assert(kr == KERN_SUCCESS); + printf("FBDP_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x " + "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n", + kr, + user_map, user_addr + cur_offset, user_ppnum, + wire_map, wire_addr + cur_offset, wire_ppnum); + if (kr != KERN_SUCCESS || + wire_ppnum == 0 || + wire_ppnum != user_ppnum) { + panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n"); + } + + printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n"); +#endif /* FBDP_TEST_WIRE_AND_EXTRACT */ + + vm_pageout_continue(); /* @@ -3968,6 +4282,51 @@ vm_pageout_internal_start(void) return result; } +#if CONFIG_IOSCHED +/* + * To support I/O Expedite for compressed files we mark the upls with special flags. + * The way decmpfs works is that we create a big upl which marks all the pages needed to + * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs + * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages + * being held in the big original UPL. We mark each of these smaller UPLs with the flag + * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the + * decmp_io_upl field (in the upl structure). This link is protected in the forward direction + * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link + * unless the real I/O upl is being destroyed). + */ + + +static void +upl_set_decmp_info(upl_t upl, upl_t src_upl) +{ + assert((src_upl->flags & UPL_DECMP_REQ) != 0); + + upl_lock(src_upl); + if (src_upl->decmp_io_upl) { + /* + * If there is already an alive real I/O UPL, ignore this new UPL. + * This case should rarely happen and even if it does, it just means + * that we might issue a spurious expedite which the driver is expected + * to handle. + */ + upl_unlock(src_upl); + return; + } + src_upl->decmp_io_upl = (void *)upl; + src_upl->ref_count++; + upl_unlock(src_upl); + + upl->flags |= UPL_DECMP_REAL_IO; + upl->decmp_io_upl = (void *)src_upl; + +} +#endif /* CONFIG_IOSCHED */ + +#if UPL_DEBUG +int upl_debug_enabled = 1; +#else +int upl_debug_enabled = 0; +#endif static upl_t upl_create(int type, int flags, upl_size_t size) @@ -4005,18 +4364,40 @@ upl_create(int type, int flags, upl_size_t size) upl->highest_page = 0; upl_lock_init(upl); upl->vector_upl = NULL; +#if CONFIG_IOSCHED + if (type & UPL_CREATE_IO_TRACKING) { + upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO); + } + + upl->upl_reprio_info = 0; + upl->decmp_io_upl = 0; + if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) { + /* Only support expedite on internal UPLs */ + thread_t curthread = current_thread(); + upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size)); + bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size))); + upl->flags |= UPL_EXPEDITE_SUPPORTED; + if (curthread->decmp_upl != NULL) + upl_set_decmp_info(upl, curthread->decmp_upl); + } +#endif +#if CONFIG_IOSCHED || UPL_DEBUG + if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) { + upl->upl_creator = current_thread(); + upl->uplq.next = 0; + upl->uplq.prev = 0; + upl->flags |= UPL_TRACKED_BY_OBJECT; + } +#endif + #if UPL_DEBUG upl->ubc_alias1 = 0; upl->ubc_alias2 = 0; - upl->upl_creator = current_thread(); upl->upl_state = 0; upl->upl_commit_index = 0; bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records)); - upl->uplq.next = 0; - upl->uplq.prev = 0; - (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES); #endif /* UPL_DEBUG */ @@ -4033,8 +4414,20 @@ upl_destroy(upl_t upl) panic("upl(%p) ext_ref_count", upl); } -#if UPL_DEBUG - if ( !(upl->flags & UPL_VECTOR)) { +#if CONFIG_IOSCHED + if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) { + upl_t src_upl; + src_upl = upl->decmp_io_upl; + assert((src_upl->flags & UPL_DECMP_REQ) != 0); + upl_lock(src_upl); + src_upl->decmp_io_upl = NULL; + upl_unlock(src_upl); + upl_deallocate(src_upl); + } +#endif /* CONFIG_IOSCHED */ + +#if CONFIG_IOSCHED || UPL_DEBUG + if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) { vm_object_t object; if (upl->flags & UPL_SHADOWED) { @@ -4042,13 +4435,14 @@ upl_destroy(upl_t upl) } else { object = upl->map_object; } + vm_object_lock(object); queue_remove(&object->uplq, upl, upl_t, uplq); vm_object_activity_end(object); vm_object_collapse(object, 0, TRUE); vm_object_unlock(object); } -#endif /* UPL_DEBUG */ +#endif /* * drop a reference on the map_object whether or * not a pageout object is inserted @@ -4069,6 +4463,11 @@ upl_destroy(upl_t upl) upl_lock_destroy(upl); upl->vector_upl = (vector_upl_t) 0xfeedbeef; +#if CONFIG_IOSCHED + if (upl->flags & UPL_EXPEDITE_SUPPORTED) + kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE)); +#endif + if (upl->flags & UPL_INTERNAL) { kfree(upl, sizeof(struct upl) + @@ -4082,13 +4481,53 @@ upl_destroy(upl_t upl) void upl_deallocate(upl_t upl) { + upl_lock(upl); if (--upl->ref_count == 0) { if(vector_upl_is_valid(upl)) vector_upl_deallocate(upl); + upl_unlock(upl); upl_destroy(upl); } + else + upl_unlock(upl); +} + +#if CONFIG_IOSCHED +void +upl_mark_decmp(upl_t upl) +{ + if (upl->flags & UPL_TRACKED_BY_OBJECT) { + upl->flags |= UPL_DECMP_REQ; + upl->upl_creator->decmp_upl = (void *)upl; + } +} + +void +upl_unmark_decmp(upl_t upl) +{ + if(upl && (upl->flags & UPL_DECMP_REQ)) { + upl->upl_creator->decmp_upl = NULL; + } +} + +#endif /* CONFIG_IOSCHED */ + +#define VM_PAGE_Q_BACKING_UP(q) \ + ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10)) + +boolean_t must_throttle_writes(void); + +boolean_t +must_throttle_writes() +{ + if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) && + vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10) + return (TRUE); + + return (FALSE); } + #if DEVELOPMENT || DEBUG /*/* * Statistics about UPL enforcement of copy-on-write obligations. @@ -4175,6 +4614,7 @@ vm_object_upl_request( struct vm_page_delayed_work *dwp; int dw_count; int dw_limit; + int io_tracking_flag = 0; if (cntrl_flags & ~UPL_VALID_FLAGS) { /* @@ -4189,16 +4629,25 @@ vm_object_upl_request( panic("vm_object_upl_request: contiguous object specified\n"); - if ((size / PAGE_SIZE) > MAX_UPL_SIZE) - size = MAX_UPL_SIZE * PAGE_SIZE; + if (size > MAX_UPL_SIZE_BYTES) + size = MAX_UPL_SIZE_BYTES; if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL) - *page_list_count = MAX_UPL_SIZE; + *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT; + +#if CONFIG_IOSCHED || UPL_DEBUG + if (object->io_tracking || upl_debug_enabled) + io_tracking_flag |= UPL_CREATE_IO_TRACKING; +#endif +#if CONFIG_IOSCHED + if (object->io_tracking) + io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP; +#endif if (cntrl_flags & UPL_SET_INTERNAL) { if (cntrl_flags & UPL_SET_LITE) { - upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, 0, size); + upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size); user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); lite_list = (wpl_array_t) @@ -4209,7 +4658,7 @@ vm_object_upl_request( lite_list = NULL; } } else { - upl = upl_create(UPL_CREATE_INTERNAL, 0, size); + upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size); user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); if (size == 0) { @@ -4219,14 +4668,14 @@ vm_object_upl_request( } else { if (cntrl_flags & UPL_SET_LITE) { - upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size); + upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size); lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); if (size == 0) { lite_list = NULL; } } else { - upl = upl_create(UPL_CREATE_EXTERNAL, 0, size); + upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size); } } *upl_ptr = upl; @@ -4275,11 +4724,12 @@ vm_object_upl_request( upl->size = size; upl->offset = offset + object->paging_offset; -#if UPL_DEBUG - vm_object_activity_begin(object); - queue_enter(&object->uplq, upl, upl_t, uplq); -#endif /* UPL_DEBUG */ - +#if CONFIG_IOSCHED || UPL_DEBUG + if (object->io_tracking || upl_debug_enabled) { + vm_object_activity_begin(object); + queue_enter(&object->uplq, upl, upl_t, uplq); + } +#endif if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) { /* * Honor copy-on-write obligations @@ -4318,9 +4768,26 @@ vm_object_upl_request( dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); if (vm_page_free_count > (vm_page_free_target + size_in_pages) || - object->resident_page_count < (MAX_UPL_SIZE * 2)) + object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT)) object->scan_collisions = 0; + if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) { + boolean_t isSSD = FALSE; + + vnode_pager_get_isSSD(object->pager, &isSSD); + vm_object_unlock(object); + + OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); + + if (isSSD == TRUE) + delay(1000 * size_in_pages); + else + delay(5000 * size_in_pages); + OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); + + vm_object_lock(object); + } + while (xfer_size) { dwp->dw_mask = 0; @@ -4727,6 +5194,9 @@ check_busy: * speculative list */ dst_page->clustered = TRUE; + + if ( !(cntrl_flags & UPL_FILE_IO)) + VM_STAT_INCR(pageins); } } /* @@ -4877,7 +5347,8 @@ check_busy: * update clustered and speculative state * */ - VM_PAGE_CONSUME_CLUSTERED(dst_page); + if (dst_page->clustered) + VM_PAGE_CONSUME_CLUSTERED(dst_page); } try_next_page: if (dwp->dw_mask) { @@ -5114,8 +5585,8 @@ REDISCOVER_ENTRY: } if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) { - if ((*upl_size/PAGE_SIZE) > MAX_UPL_SIZE) - *upl_size = MAX_UPL_SIZE * PAGE_SIZE; + if (*upl_size > MAX_UPL_SIZE_BYTES) + *upl_size = MAX_UPL_SIZE_BYTES; } /* * Create an object if necessary. @@ -5135,80 +5606,99 @@ REDISCOVER_ENTRY: vm_map_unlock_read(map); return KERN_PROTECTION_FAILURE; } + } - local_object = entry->object.vm_object; - if (vm_map_entry_should_cow_for_true_share(entry) && - local_object->vo_size > *upl_size && - *upl_size != 0) { - vm_prot_t prot; + local_object = entry->object.vm_object; + if (vm_map_entry_should_cow_for_true_share(entry) && + local_object->vo_size > *upl_size && + *upl_size != 0) { + vm_prot_t prot; - /* - * Set up the targeted range for copy-on-write to avoid - * applying true_share/copy_delay to the entire object. - */ + /* + * Set up the targeted range for copy-on-write to avoid + * applying true_share/copy_delay to the entire object. + */ - if (vm_map_lock_read_to_write(map)) { - goto REDISCOVER_ENTRY; - } + if (vm_map_lock_read_to_write(map)) { + goto REDISCOVER_ENTRY; + } - vm_map_clip_start(map, - entry, - vm_map_trunc_page(offset, - VM_MAP_PAGE_MASK(map))); - vm_map_clip_end(map, - entry, - vm_map_round_page(offset + *upl_size, - VM_MAP_PAGE_MASK(map))); - prot = entry->protection & ~VM_PROT_WRITE; - if (override_nx(map, entry->alias) && prot) - prot |= VM_PROT_EXECUTE; - vm_object_pmap_protect(local_object, - entry->offset, - entry->vme_end - entry->vme_start, - ((entry->is_shared || map->mapped_in_other_pmaps) - ? PMAP_NULL - : map->pmap), - entry->vme_start, - prot); - entry->needs_copy = TRUE; - - vm_map_lock_write_to_read(map); - } - - if (entry->needs_copy) { - /* - * Honor copy-on-write for COPY_SYMMETRIC - * strategy. - */ - vm_map_t local_map; - vm_object_t object; - vm_object_offset_t new_offset; - vm_prot_t prot; - boolean_t wired; - vm_map_version_t version; - vm_map_t real_map; - - local_map = map; - - if (vm_map_lookup_locked(&local_map, - offset, VM_PROT_WRITE, - OBJECT_LOCK_EXCLUSIVE, - &version, &object, - &new_offset, &prot, &wired, - NULL, - &real_map) != KERN_SUCCESS) { - vm_map_unlock_read(local_map); - return KERN_FAILURE; + vm_map_clip_start(map, + entry, + vm_map_trunc_page(offset, + VM_MAP_PAGE_MASK(map))); + vm_map_clip_end(map, + entry, + vm_map_round_page(offset + *upl_size, + VM_MAP_PAGE_MASK(map))); + if ((entry->vme_end - offset) < *upl_size) { + *upl_size = (upl_size_t) (entry->vme_end - offset); + assert(*upl_size == entry->vme_end - offset); + } + + prot = entry->protection & ~VM_PROT_WRITE; + if (override_nx(map, entry->alias) && prot) + prot |= VM_PROT_EXECUTE; + vm_object_pmap_protect(local_object, + entry->offset, + entry->vme_end - entry->vme_start, + ((entry->is_shared || map->mapped_in_other_pmaps) + ? PMAP_NULL + : map->pmap), + entry->vme_start, + prot); + entry->needs_copy = TRUE; + + vm_map_lock_write_to_read(map); + } + + if (entry->needs_copy) { + /* + * Honor copy-on-write for COPY_SYMMETRIC + * strategy. + */ + vm_map_t local_map; + vm_object_t object; + vm_object_offset_t new_offset; + vm_prot_t prot; + boolean_t wired; + vm_map_version_t version; + vm_map_t real_map; + vm_prot_t fault_type; + + local_map = map; + + if (caller_flags & UPL_COPYOUT_FROM) { + fault_type = VM_PROT_READ | VM_PROT_COPY; + vm_counters.create_upl_extra_cow++; + vm_counters.create_upl_extra_cow_pages += (entry->vme_end - entry->vme_start) / PAGE_SIZE; + } else { + fault_type = VM_PROT_WRITE; + } + if (vm_map_lookup_locked(&local_map, + offset, fault_type, + OBJECT_LOCK_EXCLUSIVE, + &version, &object, + &new_offset, &prot, &wired, + NULL, + &real_map) != KERN_SUCCESS) { + if (fault_type == VM_PROT_WRITE) { + vm_counters.create_upl_lookup_failure_write++; + } else { + vm_counters.create_upl_lookup_failure_copy++; } - if (real_map != map) - vm_map_unlock(real_map); vm_map_unlock_read(local_map); + return KERN_FAILURE; + } + if (real_map != map) + vm_map_unlock(real_map); + vm_map_unlock_read(local_map); - vm_object_unlock(object); + vm_object_unlock(object); - goto REDISCOVER_ENTRY; - } + goto REDISCOVER_ENTRY; } + if (sync_cow_data) { if (entry->object.vm_object->shadow || entry->object.vm_object->copy) { local_object = entry->object.vm_object; @@ -5510,9 +6000,9 @@ process_upl_to_enter: * but only in kernel space. If this was on a user map, * we'd have to set the wpmapped bit. */ /* m->wpmapped = TRUE; */ - assert(map==kernel_map); + assert(map->pmap == kernel_pmap); - PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, VM_PROT_NONE, 0, TRUE); + PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE); } offset += PAGE_SIZE_64; } @@ -5638,7 +6128,6 @@ process_upl_to_remove: return KERN_FAILURE; } -extern int panic_on_cs_killed; kern_return_t upl_commit_range( upl_t upl, @@ -5667,6 +6156,14 @@ upl_commit_range( upl_t vector_upl = NULL; boolean_t should_be_throttled = FALSE; + vm_page_t nxt_page = VM_PAGE_NULL; + int fast_path_possible = 0; + int fast_path_full_commit = 0; + int throttle_page = 0; + int unwired_count = 0; + int local_queue_count = 0; + queue_head_t local_queue; + *empty = FALSE; if (upl == UPL_NULL) @@ -5779,6 +6276,29 @@ process_upl_to_commit: dw_count = 0; dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); + if ((upl->flags & UPL_IO_WIRE) && + !(flags & UPL_COMMIT_FREE_ABSENT) && + !isVectorUPL && + shadow_object->purgable != VM_PURGABLE_VOLATILE && + shadow_object->purgable != VM_PURGABLE_EMPTY) { + + if (!queue_empty(&shadow_object->memq)) { + queue_init(&local_queue); + if (size == shadow_object->vo_size) { + nxt_page = (vm_page_t)queue_first(&shadow_object->memq); + fast_path_full_commit = 1; + } + fast_path_possible = 1; + + if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal && + (shadow_object->purgable == VM_PURGABLE_DENY || + shadow_object->purgable == VM_PURGABLE_NONVOLATILE || + shadow_object->purgable == VM_PURGABLE_VOLATILE)) { + throttle_page = 1; + } + } + } + while (xfer_size) { vm_page_t t, m; @@ -5790,15 +6310,21 @@ process_upl_to_commit: if (upl->flags & UPL_LITE) { unsigned int pg_num; + if (nxt_page != VM_PAGE_NULL) { + m = nxt_page; + nxt_page = (vm_page_t)queue_next(&nxt_page->listq); + target_offset = m->offset; + } pg_num = (unsigned int) (target_offset/PAGE_SIZE); assert(pg_num == target_offset/PAGE_SIZE); if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); - if (!(upl->flags & UPL_KERNEL_OBJECT)) + if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL) m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset)); - } + } else + m = NULL; } if (upl->flags & UPL_SHADOWED) { if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { @@ -5807,11 +6333,11 @@ process_upl_to_commit: VM_PAGE_FREE(t); - if (m == VM_PAGE_NULL) + if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL) m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset); } } - if ((upl->flags & UPL_KERNEL_OBJECT) || m == VM_PAGE_NULL) + if (m == VM_PAGE_NULL) goto commit_next_page; if (m->compressor) { @@ -5831,7 +6357,7 @@ process_upl_to_commit: m->cs_tainted = page_list[entry].cs_tainted; } if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL) - m->written_by_kernel = TRUE; + m->written_by_kernel = TRUE; if (upl->flags & UPL_IO_WIRE) { @@ -5852,8 +6378,7 @@ process_upl_to_commit: * so it will need to be * re-validated. */ - if (panic_on_cs_killed && - m->slid) { + if (m->slid) { panic("upl_commit_range(%p): page %p was slid\n", upl, m); } @@ -5866,10 +6391,6 @@ process_upl_to_commit: } clear_refmod |= VM_MEM_MODIFIED; } - if (flags & UPL_COMMIT_INACTIVATE) { - dwp->dw_mask |= DW_vm_page_deactivate_internal; - clear_refmod |= VM_MEM_REFERENCED; - } if (upl->flags & UPL_ACCESS_BLOCKED) { /* * We blocked access to the pages in this UPL. @@ -5878,19 +6399,61 @@ process_upl_to_commit: */ dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); } - if (m->absent) { - if (flags & UPL_COMMIT_FREE_ABSENT) - dwp->dw_mask |= DW_vm_page_free; - else { + if (fast_path_possible) { + assert(m->object->purgable != VM_PURGABLE_EMPTY); + assert(m->object->purgable != VM_PURGABLE_VOLATILE); + if (m->absent) { + assert(m->wire_count == 0); + assert(m->busy); + m->absent = FALSE; dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); + } else { + if (m->wire_count == 0) + panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object); + + /* + * XXX FBDP need to update some other + * counters here (purgeable_wired_count) + * (ledgers), ... + */ + assert(m->wire_count); + m->wire_count--; - if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal)) - dwp->dw_mask |= DW_vm_page_activate; + if (m->wire_count == 0) + unwired_count++; } - } else - dwp->dw_mask |= DW_vm_page_unwire; + if (m->wire_count == 0) { + queue_enter(&local_queue, m, vm_page_t, pageq); + local_queue_count++; + if (throttle_page) { + m->throttled = TRUE; + } else { + if (flags & UPL_COMMIT_INACTIVATE) + m->inactive = TRUE; + else + m->active = TRUE; + } + } + } else { + if (flags & UPL_COMMIT_INACTIVATE) { + dwp->dw_mask |= DW_vm_page_deactivate_internal; + clear_refmod |= VM_MEM_REFERENCED; + } + if (m->absent) { + if (flags & UPL_COMMIT_FREE_ABSENT) + dwp->dw_mask |= DW_vm_page_free; + else { + m->absent = FALSE; + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); + + if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal)) + dwp->dw_mask |= DW_vm_page_activate; + } + } else + dwp->dw_mask |= DW_vm_page_unwire; + } goto commit_next_page; } assert(!m->compressor); @@ -5925,8 +6488,7 @@ process_upl_to_commit: * so it will need to be * re-validated. */ - if (panic_on_cs_killed && - m->slid) { + if (m->slid) { panic("upl_commit_range(%p): page %p was slid\n", upl, m); } @@ -5942,6 +6504,10 @@ process_upl_to_commit: * the (COPY_OUT_FROM == FALSE) request_page_list case */ if (m->busy) { +#if CONFIG_PHANTOM_CACHE + if (m->absent && !m->object->internal) + dwp->dw_mask |= DW_vm_phantom_cache_update; +#endif m->absent = FALSE; dwp->dw_mask |= DW_clear_busy; @@ -6038,8 +6604,8 @@ process_upl_to_commit: if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) { pgpgout_count++; - /* this page used to be dirty; now it's on the clean queue. */ - m->was_dirty = TRUE; + VM_STAT_INCR(pageouts); + DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL); dwp->dw_mask |= DW_enqueue_cleaned; vm_pageout_enqueued_cleaned_from_inactive_dirty++; @@ -6077,7 +6643,6 @@ process_upl_to_commit: */ dwp->dw_mask |= DW_clear_busy; } - /* * Wakeup any thread waiting for the page to be un-cleaning. */ @@ -6113,6 +6678,80 @@ commit_next_page: if (dw_count) vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); + if (fast_path_possible) { + + assert(shadow_object->purgable != VM_PURGABLE_VOLATILE); + assert(shadow_object->purgable != VM_PURGABLE_EMPTY); + + if (local_queue_count || unwired_count) { + + if (local_queue_count) { + vm_page_t first_local, last_local; + vm_page_t first_target; + queue_head_t *target_queue; + + if (throttle_page) + target_queue = &vm_page_queue_throttled; + else { + if (flags & UPL_COMMIT_INACTIVATE) { + if (shadow_object->internal) + target_queue = &vm_page_queue_anonymous; + else + target_queue = &vm_page_queue_inactive; + } else + target_queue = &vm_page_queue_active; + } + /* + * Transfer the entire local queue to a regular LRU page queues. + */ + first_local = (vm_page_t) queue_first(&local_queue); + last_local = (vm_page_t) queue_last(&local_queue); + + vm_page_lockspin_queues(); + + first_target = (vm_page_t) queue_first(target_queue); + + if (queue_empty(target_queue)) + queue_last(target_queue) = (queue_entry_t) last_local; + else + queue_prev(&first_target->pageq) = (queue_entry_t) last_local; + + queue_first(target_queue) = (queue_entry_t) first_local; + queue_prev(&first_local->pageq) = (queue_entry_t) target_queue; + queue_next(&last_local->pageq) = (queue_entry_t) first_target; + + /* + * Adjust the global page counts. + */ + if (throttle_page) { + vm_page_throttled_count += local_queue_count; + } else { + if (flags & UPL_COMMIT_INACTIVATE) { + if (shadow_object->internal) + vm_page_anonymous_count += local_queue_count; + vm_page_inactive_count += local_queue_count; + + token_new_pagecount += local_queue_count; + } else + vm_page_active_count += local_queue_count; + + if (shadow_object->internal) + vm_page_pageable_internal_count += local_queue_count; + else + vm_page_pageable_external_count += local_queue_count; + } + } else { + vm_page_lockspin_queues(); + } + if (unwired_count) { + vm_page_wire_count -= unwired_count; + VM_CHECK_MEMORYSTATUS; + } + vm_page_unlock_queues(); + + shadow_object->wired_page_count -= unwired_count; + } + } occupied = 1; if (upl->flags & UPL_DEVICE_MEMORY) { @@ -6121,14 +6760,17 @@ commit_next_page: int pg_num; int i; - pg_num = upl->size/PAGE_SIZE; - pg_num = (pg_num + 31) >> 5; occupied = 0; - for (i = 0; i < pg_num; i++) { - if (lite_list[i] != 0) { - occupied = 1; - break; + if (!fast_path_full_commit) { + pg_num = upl->size/PAGE_SIZE; + pg_num = (pg_num + 31) >> 5; + + for (i = 0; i < pg_num; i++) { + if (lite_list[i] != 0) { + occupied = 1; + break; + } } } } else { @@ -6621,6 +7263,82 @@ upl_commit( return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty); } + +void +iopl_valid_data( + upl_t upl) +{ + vm_object_t object; + vm_offset_t offset; + vm_page_t m, nxt_page = VM_PAGE_NULL; + upl_size_t size; + int wired_count = 0; + + if (upl == NULL) + panic("iopl_valid_data: NULL upl"); + if (vector_upl_is_valid(upl)) + panic("iopl_valid_data: vector upl"); + if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE) + panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags); + + object = upl->map_object; + + if (object == kernel_object || object == compressor_object) + panic("iopl_valid_data: object == kernel or compressor"); + + if (object->purgable == VM_PURGABLE_VOLATILE) + panic("iopl_valid_data: object == VM_PURGABLE_VOLATILE"); + + size = upl->size; + + vm_object_lock(object); + + if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE)) + nxt_page = (vm_page_t)queue_first(&object->memq); + else + offset = 0 + upl->offset - object->paging_offset; + + while (size) { + + if (nxt_page != VM_PAGE_NULL) { + m = nxt_page; + nxt_page = (vm_page_t)queue_next(&nxt_page->listq); + } else { + m = vm_page_lookup(object, offset); + offset += PAGE_SIZE; + + if (m == VM_PAGE_NULL) + panic("iopl_valid_data: missing expected page at offset %lx", (long)offset); + } + if (m->busy) { + if (!m->absent) + panic("iopl_valid_data: busy page w/o absent"); + + if (m->pageq.next || m->pageq.prev) + panic("iopl_valid_data: busy+absent page on page queue"); + + m->absent = FALSE; + m->dirty = TRUE; + m->wire_count++; + wired_count++; + + PAGE_WAKEUP_DONE(m); + } + size -= PAGE_SIZE; + } + if (wired_count) { + object->wired_page_count += wired_count; + + vm_page_lockspin_queues(); + vm_page_wire_count += wired_count; + vm_page_unlock_queues(); + } + vm_object_unlock(object); +} + + + + void vm_object_set_pmap_cache_attr( vm_object_t object, @@ -6667,6 +7385,13 @@ vm_object_iopl_request( int dw_limit; int dw_index; boolean_t caller_lookup; + int io_tracking_flag = 0; + int interruptible; + + boolean_t set_cache_attr_needed = FALSE; + boolean_t free_wired_pages = FALSE; + int fast_path_possible = 0; + if (cntrl_flags & ~UPL_VALID_FLAGS) { /* @@ -6708,20 +7433,21 @@ vm_object_iopl_request( else prot = VM_PROT_READ | VM_PROT_WRITE; - if (((size/PAGE_SIZE) > MAX_UPL_SIZE) && !object->phys_contiguous) - size = MAX_UPL_SIZE * PAGE_SIZE; - - if (cntrl_flags & UPL_SET_INTERNAL) { - if (page_list_count != NULL) - *page_list_count = MAX_UPL_SIZE; - } - if (((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) && - ((page_list_count != NULL) && (*page_list_count != 0) && *page_list_count < (size/page_size))) - return KERN_INVALID_ARGUMENT; - if ((!object->internal) && (object->paging_offset != 0)) panic("vm_object_iopl_request: external object with non-zero paging offset\n"); +#if CONFIG_IOSCHED || UPL_DEBUG + if ((object->io_tracking && object != kernel_object) || upl_debug_enabled) + io_tracking_flag |= UPL_CREATE_IO_TRACKING; +#endif + +#if CONFIG_IOSCHED + if (object->io_tracking) { + /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */ + if (object != kernel_object) + io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP; + } +#endif if (object->phys_contiguous) psize = PAGE_SIZE; @@ -6729,7 +7455,7 @@ vm_object_iopl_request( psize = size; if (cntrl_flags & UPL_SET_INTERNAL) { - upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, UPL_IO_WIRE, psize); + upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize); user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); lite_list = (wpl_array_t) (((uintptr_t)user_page_list) + @@ -6739,7 +7465,7 @@ vm_object_iopl_request( lite_list = NULL; } } else { - upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize); + upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize); lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); if (size == 0) { @@ -6780,11 +7506,28 @@ vm_object_iopl_request( upl->flags |= UPL_ACCESS_BLOCKED; } - if (object->phys_contiguous) { -#if UPL_DEBUG + if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) && + object->purgable != VM_PURGABLE_VOLATILE && + object->purgable != VM_PURGABLE_EMPTY && + object->copy == NULL && + size == object->vo_size && + offset == 0 && + object->resident_page_count == 0 && + object->shadow == NULL && + object->pager == NULL) + { + fast_path_possible = 1; + set_cache_attr_needed = TRUE; + } + +#if CONFIG_IOSCHED || UPL_DEBUG + if (upl->flags & UPL_TRACKED_BY_OBJECT) { vm_object_activity_begin(object); queue_enter(&object->uplq, upl, upl_t, uplq); -#endif /* UPL_DEBUG */ + } +#endif + + if (object->phys_contiguous) { if (upl->flags & UPL_ACCESS_BLOCKED) { assert(!object->blocked_access); @@ -6817,16 +7560,27 @@ vm_object_iopl_request( /* * Protect user space from future COW operations */ +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); + } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; } - -#if UPL_DEBUG - vm_object_activity_begin(object); - queue_enter(&object->uplq, upl, upl_t, uplq); -#endif /* UPL_DEBUG */ if (!(cntrl_flags & UPL_COPYOUT_FROM) && object->copy != VM_OBJECT_NULL) { @@ -6860,12 +7614,99 @@ vm_object_iopl_request( iopl_cow_pages += size >> PAGE_SHIFT; #endif } - + if (cntrl_flags & UPL_SET_INTERRUPTIBLE) + interruptible = THREAD_ABORTSAFE; + else + interruptible = THREAD_UNINT; entry = 0; xfer_size = size; dst_offset = offset; + dw_count = 0; + + if (fast_path_possible) { + int wired_count = 0; + + while (xfer_size) { + + while ( (dst_page = vm_page_grab()) == VM_PAGE_NULL) { + OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); + + VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); + + if (vm_page_wait(interruptible) == FALSE) { + /* + * interrupted case + */ + OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); + + VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1); + + if (wired_count) { + vm_page_lockspin_queues(); + vm_page_wire_count += wired_count; + vm_page_unlock_queues(); + + free_wired_pages = TRUE; + } + ret = MACH_SEND_INTERRUPTED; + + goto return_err; + } + OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); + + VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); + } + if (no_zero_fill == FALSE) + vm_page_zero_fill(dst_page); + else + dst_page->absent = TRUE; + + dst_page->reference = TRUE; + + if (!(cntrl_flags & UPL_COPYOUT_FROM)) { + SET_PAGE_DIRTY(dst_page, FALSE); + } + if (dst_page->absent == FALSE) { + assert(object->purgable != VM_PURGABLE_VOLATILE); + assert(object->purgable != VM_PURGABLE_EMPTY); + dst_page->wire_count++; + wired_count++; + + PAGE_WAKEUP_DONE(dst_page); + } + vm_page_insert_internal(dst_page, object, dst_offset, FALSE, TRUE, TRUE); + + lite_list[entry>>5] |= 1 << (entry & 31); + + if (dst_page->phys_page > upl->highest_page) + upl->highest_page = dst_page->phys_page; + + if (user_page_list) { + user_page_list[entry].phys_addr = dst_page->phys_page; + user_page_list[entry].absent = dst_page->absent; + user_page_list[entry].dirty = dst_page->dirty; + user_page_list[entry].precious = FALSE; + user_page_list[entry].pageout = FALSE; + user_page_list[entry].device = FALSE; + user_page_list[entry].needed = FALSE; + user_page_list[entry].speculative = FALSE; + user_page_list[entry].cs_validated = FALSE; + user_page_list[entry].cs_tainted = FALSE; + } + entry++; + dst_offset += PAGE_SIZE_64; + xfer_size -= PAGE_SIZE; + size_in_pages--; + } + if (wired_count) { + vm_page_lockspin_queues(); + vm_page_wire_count += wired_count; + vm_page_unlock_queues(); + } + goto finish; + } fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; fault_info.user_tag = 0; @@ -6875,10 +7716,11 @@ vm_object_iopl_request( fault_info.stealth = FALSE; fault_info.io_sync = FALSE; fault_info.cs_bypass = FALSE; - fault_info.mark_zf_absent = (0 == (cntrl_flags & UPL_NOZEROFILLIO)); + fault_info.mark_zf_absent = TRUE; + fault_info.interruptible = interruptible; + fault_info.batch_pmap_op = TRUE; dwp = &dw_array[0]; - dw_count = 0; dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); while (xfer_size) { @@ -6911,6 +7753,7 @@ vm_object_iopl_request( ret = KERN_MEMORY_ERROR; goto return_err; } + set_cache_attr_needed = TRUE; /* * We just looked up the page and the result remains valid @@ -6923,16 +7766,8 @@ vm_object_iopl_request( do { vm_page_t top_page; kern_return_t error_code; - int interruptible; - - if (cntrl_flags & UPL_SET_INTERRUPTIBLE) - interruptible = THREAD_ABORTSAFE; - else - interruptible = THREAD_UNINT; - fault_info.interruptible = interruptible; fault_info.cluster_size = xfer_size; - fault_info.batch_pmap_op = TRUE; vm_object_paging_begin(object); @@ -7180,11 +8015,13 @@ record_phys_addr: * update clustered and speculative state * */ - VM_PAGE_CONSUME_CLUSTERED(dst_page); + if (dst_page->clustered) + VM_PAGE_CONSUME_CLUSTERED(dst_page); } entry++; dst_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; + size_in_pages--; if (dwp->dw_mask) { VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count); @@ -7200,7 +8037,9 @@ record_phys_addr: if (dw_count) vm_page_do_delayed_work(object, &dw_array[0], dw_count); - vm_object_set_pmap_cache_attr(object, user_page_list, entry, TRUE); +finish: + if (user_page_list && set_cache_attr_needed == TRUE) + vm_object_set_pmap_cache_attr(object, user_page_list, entry, TRUE); if (page_list_count != NULL) { if (upl->flags & UPL_INTERNAL) @@ -7262,7 +8101,7 @@ return_err: } vm_page_lock_queues(); - if (dst_page->absent) { + if (dst_page->absent || free_wired_pages == TRUE) { vm_page_free(dst_page); need_unwire = FALSE; @@ -7344,15 +8183,28 @@ upl_transpose( * Make each UPL point to the correct VM object, i.e. the * object holding the pages that the UPL refers to... */ -#if UPL_DEBUG - queue_remove(&object1->uplq, upl1, upl_t, uplq); - queue_remove(&object2->uplq, upl2, upl_t, uplq); +#if CONFIG_IOSCHED || UPL_DEBUG + if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) { + vm_object_lock(object1); + vm_object_lock(object2); + } + if (upl1->flags & UPL_TRACKED_BY_OBJECT) + queue_remove(&object1->uplq, upl1, upl_t, uplq); + if (upl2->flags & UPL_TRACKED_BY_OBJECT) + queue_remove(&object2->uplq, upl2, upl_t, uplq); #endif upl1->map_object = object2; upl2->map_object = object1; -#if UPL_DEBUG - queue_enter(&object1->uplq, upl2, upl_t, uplq); - queue_enter(&object2->uplq, upl1, upl_t, uplq); + +#if CONFIG_IOSCHED || UPL_DEBUG + if (upl1->flags & UPL_TRACKED_BY_OBJECT) + queue_enter(&object2->uplq, upl1, upl_t, uplq); + if (upl2->flags & UPL_TRACKED_BY_OBJECT) + queue_enter(&object1->uplq, upl2, upl_t, uplq); + if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) { + vm_object_unlock(object2); + vm_object_unlock(object1); + } #endif } @@ -7565,9 +8417,12 @@ vm_paging_map_object( */ vm_paging_page_waiter_total++; vm_paging_page_waiter++; - thread_sleep_fast_usimple_lock(&vm_paging_page_waiter, - &vm_paging_lock, - THREAD_UNINT); + kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT); + if (kr == THREAD_WAITING) { + simple_unlock(&vm_paging_lock); + kr = thread_block(THREAD_CONTINUE_NULL); + simple_lock(&vm_paging_lock); + } vm_paging_page_waiter--; /* ... and try again */ } @@ -7762,7 +8617,7 @@ vm_paging_unmap_object( } } -#if CRYPTO +#if ENCRYPTED_SWAP /* * Encryption data. * "iv" is the "initial vector". Ideally, we want to @@ -8289,7 +9144,7 @@ process_upl_to_encrypt: goto process_upl_to_encrypt; } -#else /* CRYPTO */ +#else /* ENCRYPTED_SWAP */ void upl_encrypt( __unused upl_t upl, @@ -8312,7 +9167,7 @@ vm_page_decrypt( { } -#endif /* CRYPTO */ +#endif /* ENCRYPTED_SWAP */ /* * page->object must be locked @@ -8687,6 +9542,25 @@ upl_set_referenced( upl_unlock(upl); } +#if CONFIG_IOSCHED +void +upl_set_blkno( + upl_t upl, + vm_offset_t upl_offset, + int io_size, + int64_t blkno) +{ + int i,j; + if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) + return; + + assert(upl->upl_reprio_info != 0); + for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) { + UPL_SET_REPRIO_INFO(upl, i, blkno, io_size); + } +} +#endif + boolean_t vm_page_is_slideable(vm_page_t m) { @@ -8776,8 +9650,20 @@ vm_page_slide( assert(!page->slid); assert(page->object->object_slid); - pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/PAGE_SIZE); - kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr, pageIndex); + /* on some platforms this is an extern int, on others it's a cpp macro */ + __unreachable_ok_push + /* TODO: Consider this */ + if (!TEST_PAGE_SIZE_4K) { + for (int i = 0; i < 4; i++) { + pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/0x1000); + kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr + (0x1000*i), pageIndex + i); + } + } else { + pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/PAGE_SIZE); + kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr, pageIndex); + } + __unreachable_ok_pop + vm_page_slide_counter++; /* @@ -8854,7 +9740,6 @@ ppnum_t upl_phys_page(upl_page_info_t *upl, int index) return(UPL_PHYS_PAGE(upl, index)); } - void vm_countdirtypages(void) { @@ -8970,3 +9855,80 @@ int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2) return KERN_SUCCESS; } #endif /* UPL_DEBUG */ + +#if VM_PRESSURE_EVENTS +/* + * Upward trajectory. + */ +extern boolean_t vm_compressor_low_on_space(void); + +boolean_t +VM_PRESSURE_NORMAL_TO_WARNING(void) { + + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { + + /* Available pages below our threshold */ + if (memorystatus_available_pages < memorystatus_available_pages_pressure) { + /* No frozen processes to kill */ + if (memorystatus_frozen_count == 0) { + /* Not enough suspended processes available. */ + if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) { + return TRUE; + } + } + } + return FALSE; + + } else { + return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0); + } +} + +boolean_t +VM_PRESSURE_WARNING_TO_CRITICAL(void) { + + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { + /* Available pages below our threshold */ + if (memorystatus_available_pages < memorystatus_available_pages_critical) { + return TRUE; + } + return FALSE; + } else { + return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0); + } +} + +/* + * Downward trajectory. + */ +boolean_t +VM_PRESSURE_WARNING_TO_NORMAL(void) { + + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { + /* Available pages above our threshold */ + unsigned int target_threshold = memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100); + if (memorystatus_available_pages > target_threshold) { + return TRUE; + } + return FALSE; + } else { + return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0); + } +} + +boolean_t +VM_PRESSURE_CRITICAL_TO_WARNING(void) { + + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { + /* Available pages above our threshold */ + unsigned int target_threshold = memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100); + if (memorystatus_available_pages > target_threshold) { + return TRUE; + } + return FALSE; + } else { + return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0); + } +} +#endif /* VM_PRESSURE_EVENTS */ + diff --git a/osfmk/vm/vm_pageout.h b/osfmk/vm/vm_pageout.h index 7396460b2..6d358ed3c 100644 --- a/osfmk/vm/vm_pageout.h +++ b/osfmk/vm/vm_pageout.h @@ -74,7 +74,7 @@ #include #include -#include +#include #include @@ -92,27 +92,16 @@ /* externally manipulated counters */ extern unsigned int vm_pageout_cleaned_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated; -#if CONFIG_JETSAM -#define LATENCY_JETSAM FALSE -#if LATENCY_JETSAM -#define JETSAM_LATENCY_TOKEN_AGE 3000 /* 3ms */ -#define NUM_OF_JETSAM_LATENCY_TOKENS 1000 - -#define JETSAM_AGE_NOTIFY_CRITICAL 1500000 /* 1.5 secs */ - -extern boolean_t jlp_init; -extern uint64_t jlp_time, jlp_current; -extern unsigned int latency_jetsam_wakeup; -#endif /* LATENCY_JETSAM */ -#endif /* CONFIG_JETSAM */ - #if CONFIG_FREEZE extern boolean_t memorystatus_freeze_enabled; -#define VM_DYNAMIC_PAGING_ENABLED(port) ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) || (memorystatus_freeze_enabled == FALSE && IP_VALID(port))) +#define VM_DYNAMIC_PAGING_ENABLED(port) (COMPRESSED_PAGER_IS_ACTIVE || (memorystatus_freeze_enabled == FALSE && IP_VALID(port))) #else #define VM_DYNAMIC_PAGING_ENABLED(port) (COMPRESSED_PAGER_IS_ACTIVE || IP_VALID(port)) #endif +#if VM_PRESSURE_EVENTS +extern boolean_t vm_pressure_events_enabled; +#endif /* VM_PRESSURE_EVENTS */ extern int vm_debug_events; @@ -128,12 +117,16 @@ extern int vm_debug_events; #define VM_PAGEOUT_CACHE_EVICT 0x108 #define VM_PAGEOUT_THREAD_BLOCK 0x109 #define VM_PAGEOUT_JETSAM 0x10A -#define VM_PAGEOUT_PAGE_TOKEN 0x10B #define VM_UPL_PAGE_WAIT 0x120 #define VM_IOPL_PAGE_WAIT 0x121 #define VM_PAGE_WAIT_BLOCK 0x122 +#if CONFIG_IOSCHED +#define VM_PAGE_SLEEP 0x123 +#define VM_PAGE_EXPEDITE 0x124 +#endif + #define VM_PRESSURE_EVENT 0x130 #define VM_EXECVE 0x131 #define VM_WAKEUP_COMPACTOR_SWAPPER 0x132 @@ -145,7 +138,7 @@ extern int vm_debug_events; } \ MACRO_END -extern void inline memoryshot(unsigned int event, unsigned int control); +extern void memoryshot(unsigned int event, unsigned int control); extern kern_return_t vm_map_create_upl( vm_map_t map, @@ -162,6 +155,8 @@ extern ppnum_t upl_get_highest_page( extern upl_size_t upl_get_size( upl_t upl); +extern void iopl_valid_data( + upl_t upl_ptr); #ifndef MACH_KERNEL_PRIVATE typedef struct vm_page *vm_page_t; @@ -298,12 +293,19 @@ struct upl { vm_object_t map_object; ppnum_t highest_page; void* vector_upl; +#if CONFIG_IOSCHED + int upl_priority; + uint64_t *upl_reprio_info; + void *decmp_io_upl; +#endif +#if CONFIG_IOSCHED || UPL_DEBUG + thread_t upl_creator; + queue_chain_t uplq; /* List of outstanding upls on an obj */ +#endif #if UPL_DEBUG uintptr_t ubc_alias1; uintptr_t ubc_alias2; - queue_chain_t uplq; /* List of outstanding upls on an obj */ - - thread_t upl_creator; + uint32_t upl_state; uint32_t upl_commit_index; void *upl_create_retaddr[UPL_DEBUG_STACK_FRAMES]; @@ -330,11 +332,17 @@ struct upl { #define UPL_VECTOR 0x4000 #define UPL_SET_DIRTY 0x8000 #define UPL_HAS_BUSY 0x10000 +#define UPL_TRACKED_BY_OBJECT 0x20000 +#define UPL_EXPEDITE_SUPPORTED 0x40000 +#define UPL_DECMP_REQ 0x80000 +#define UPL_DECMP_REAL_IO 0x100000 /* flags for upl_create flags parameter */ #define UPL_CREATE_EXTERNAL 0 #define UPL_CREATE_INTERNAL 0x1 #define UPL_CREATE_LITE 0x2 +#define UPL_CREATE_IO_TRACKING 0x4 +#define UPL_CREATE_EXPEDITE_SUP 0x8 extern upl_t vector_upl_create(vm_offset_t); extern void vector_upl_deallocate(upl_t); @@ -488,6 +496,7 @@ struct vm_page_stats_reusable { extern struct vm_page_stats_reusable vm_page_stats_reusable; extern int hibernate_flush_memory(void); +extern void hibernate_reset_stats(void); extern void hibernate_create_paddr_map(void); extern int vm_compressor_mode; @@ -509,6 +518,8 @@ extern int vm_compressor_thread_count; #define DEFAULT_FREEZER_IS_ACTIVE ((vm_compressor_mode & VM_PAGER_FREEZER_DEFAULT) == VM_PAGER_FREEZER_DEFAULT) #define DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE (vm_compressor_mode & (VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP | VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP)) +#define DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS ((vm_compressor_mode & VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP) == VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP) +#define DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED ((vm_compressor_mode & VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP) == VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP) #endif /* KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_phantom_cache.c b/osfmk/vm/vm_phantom_cache.c new file mode 100644 index 000000000..cd86dfc8c --- /dev/null +++ b/osfmk/vm/vm_phantom_cache.c @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + + +uint32_t phantom_cache_eval_period_in_msecs = 250; +uint32_t phantom_cache_thrashing_threshold_ssd = 1000; +uint32_t phantom_cache_thrashing_threshold = 100; + +/* + * Number of consecutive thrashing periods required before + * vm_phantom_cache_check_pressure() returns true. + */ +unsigned phantom_cache_contiguous_periods = 2; + +clock_sec_t pc_start_of_eval_period_sec = 0; +clock_nsec_t pc_start_of_eval_period_nsec = 0; +boolean_t pc_need_eval_reset = FALSE; + +/* One bit per recent sampling period. Bit 0 = current period. */ +uint32_t pc_history = 0; + +uint32_t sample_period_ghost_added_count = 0; +uint32_t sample_period_ghost_added_count_ssd = 0; +uint32_t sample_period_ghost_found_count = 0; +uint32_t sample_period_ghost_found_count_ssd = 0; + +uint32_t vm_phantom_object_id = 1; +#define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000 + +vm_ghost_t vm_phantom_cache; +uint32_t vm_phantom_cache_nindx = 1; +uint32_t vm_phantom_cache_num_entries = 0; +uint32_t vm_phantom_cache_size; + +typedef uint32_t vm_phantom_hash_entry_t; +vm_phantom_hash_entry_t *vm_phantom_cache_hash; +uint32_t vm_phantom_cache_hash_size; +uint32_t vm_ghost_hash_mask; /* Mask for hash function */ +uint32_t vm_ghost_bucket_hash; /* Basic bucket hash */ + + +int pg_masks[4] = { + 0x1, 0x2, 0x4, 0x8 +}; + + +#define vm_phantom_hash(obj_id, offset) (\ + ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask) + + +struct phantom_cache_stats { + uint32_t pcs_wrapped; + uint32_t pcs_added_page_to_entry; + uint32_t pcs_added_new_entry; + uint32_t pcs_replaced_entry; + + uint32_t pcs_lookup_found_page_in_cache; + uint32_t pcs_lookup_entry_not_in_cache; + uint32_t pcs_lookup_page_not_in_entry; + + uint32_t pcs_updated_phantom_state; +} phantom_cache_stats; + + +void +vm_phantom_cache_init() +{ + unsigned int num_entries; + unsigned int log1; + unsigned int size; + + num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY); + vm_phantom_cache_num_entries = 1; + + while (vm_phantom_cache_num_entries < num_entries) + vm_phantom_cache_num_entries <<= 1; + + vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries; + vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries; + + if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache), vm_phantom_cache_size, 0, KMA_KOBJECT) != KERN_SUCCESS) + panic("vm_phantom_cache_init: kernel_memory_allocate failed\n"); + bzero(vm_phantom_cache, vm_phantom_cache_size); + + if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache_hash), vm_phantom_cache_hash_size, 0, KMA_KOBJECT) != KERN_SUCCESS) + panic("vm_phantom_cache_init: kernel_memory_allocate failed\n"); + bzero(vm_phantom_cache_hash, vm_phantom_cache_hash_size); + + + vm_ghost_hash_mask = vm_phantom_cache_num_entries - 1; + + /* + * Calculate object_id shift value for hashing algorithm: + * O = log2(sizeof(struct vm_object)) + * B = log2(vm_page_bucket_count) + * hash shifts the object_id left by + * B/2 - O + */ + size = vm_phantom_cache_num_entries; + for (log1 = 0; size > 1; log1++) + size /= 2; + + vm_ghost_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */ + vm_ghost_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */ + vm_ghost_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */ + + if (vm_ghost_hash_mask & vm_phantom_cache_num_entries) + printf("vm_phantom_cache_init: WARNING -- strange page hash\n"); +} + + +void +vm_phantom_cache_add_ghost(vm_page_t m) +{ + vm_ghost_t vpce; + int ghost_index; + int pg_mask; + boolean_t isSSD = FALSE; + vm_phantom_hash_entry_t ghost_hash_index; + +#if MACH_ASSERT || DEBUG + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); + vm_object_lock_assert_exclusive(m->object); +#endif + + if (vm_phantom_cache_num_entries == 0) + return; + + pg_mask = pg_masks[(m->offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK]; + + if (m->object->phantom_object_id == 0) { + + vnode_pager_get_isSSD(m->object->pager, &isSSD); + + if (isSSD == TRUE) + m->object->phantom_isssd = TRUE; + + m->object->phantom_object_id = vm_phantom_object_id++; + + if (vm_phantom_object_id == 0) + vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP; + } else { + if ( (vpce = vm_phantom_cache_lookup_ghost(m, 0)) ) { + vpce->g_pages_held |= pg_mask; + + phantom_cache_stats.pcs_added_page_to_entry++; + goto done; + } + } + /* + * if we're here then the vm_ghost_t of this vm_page_t + * is not present in the phantom cache... take the next + * available entry in the LRU first evicting the existing + * entry if we've wrapped the ring + */ + ghost_index = vm_phantom_cache_nindx++; + + if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) { + vm_phantom_cache_nindx = 1; + + phantom_cache_stats.pcs_wrapped++; + } + vpce = &vm_phantom_cache[ghost_index]; + + if (vpce->g_obj_id) { + /* + * we're going to replace an existing entry + * so first remove it from the hash + */ + vm_ghost_t nvpce; + + ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset); + + nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]]; + + if (nvpce == vpce) { + vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index; + } else { + for (;;) { + if (nvpce->g_next_index == 0) + panic("didn't find ghost in hash\n"); + + if (&vm_phantom_cache[nvpce->g_next_index] == vpce) { + nvpce->g_next_index = vpce->g_next_index; + break; + } + nvpce = &vm_phantom_cache[nvpce->g_next_index]; + } + } + phantom_cache_stats.pcs_replaced_entry++; + } else + phantom_cache_stats.pcs_added_new_entry++; + + vpce->g_pages_held = pg_mask; + vpce->g_obj_offset = (m->offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK; + vpce->g_obj_id = m->object->phantom_object_id; + + ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset); + vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index]; + vm_phantom_cache_hash[ghost_hash_index] = ghost_index; + +done: + if (m->object->phantom_isssd) + OSAddAtomic(1, &sample_period_ghost_added_count_ssd); + else + OSAddAtomic(1, &sample_period_ghost_added_count); +} + + +vm_ghost_t +vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask) +{ + uint64_t g_obj_offset; + uint32_t g_obj_id; + uint32_t ghost_index; + + if ((g_obj_id = m->object->phantom_object_id) == 0) { + /* + * no entries in phantom cache for this object + */ + return (NULL); + } + g_obj_offset = (m->offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK; + + ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)]; + + while (ghost_index) { + vm_ghost_t vpce; + + vpce = &vm_phantom_cache[ghost_index]; + + if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) { + + if (pg_mask == 0 || (vpce->g_pages_held & pg_mask)) { + phantom_cache_stats.pcs_lookup_found_page_in_cache++; + + return (vpce); + } + phantom_cache_stats.pcs_lookup_page_not_in_entry++; + + return (NULL); + } + ghost_index = vpce->g_next_index; + } + phantom_cache_stats.pcs_lookup_entry_not_in_cache++; + + return (NULL); +} + + + +void +vm_phantom_cache_update(vm_page_t m) +{ + int pg_mask; + vm_ghost_t vpce; + +#if MACH_ASSERT || DEBUG + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); + vm_object_lock_assert_exclusive(m->object); +#endif + + if (vm_phantom_cache_num_entries == 0) + return; + + pg_mask = pg_masks[(m->offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK]; + + if ( (vpce = vm_phantom_cache_lookup_ghost(m, pg_mask)) ) { + + vpce->g_pages_held &= ~pg_mask; + + phantom_cache_stats.pcs_updated_phantom_state++; + + if (m->object->phantom_isssd) + OSAddAtomic(1, &sample_period_ghost_found_count_ssd); + else + OSAddAtomic(1, &sample_period_ghost_found_count); + } +} + + +#define PHANTOM_CACHE_DEBUG 1 + +#if PHANTOM_CACHE_DEBUG + +int sample_period_ghost_counts_indx = 0; + +struct { + uint32_t added; + uint32_t found; + uint32_t added_ssd; + uint32_t found_ssd; + uint32_t elapsed_ms; + boolean_t pressure_detected; +} sample_period_ghost_counts[256]; + +#endif + +/* + * Determine if the file cache is thrashing from sampling interval statistics. + * + * Pages added to the phantom cache = pages evicted from the file cache. + * Pages found in the phantom cache = reads of pages that were recently evicted. + * Threshold is the latency-dependent number of reads we consider thrashing. + */ +static boolean_t +is_thrashing(uint32_t added, uint32_t found, uint32_t threshold) +{ + /* Ignore normal activity below the threshold. */ + if (added < threshold || found < threshold) + return FALSE; + + /* + * When thrashing in a way that we can mitigate, most of the pages read + * into the file cache were recently evicted, and 'found' will be close + * to 'added'. + * + * When replacing the current working set because a new app is + * launched, we see very high read traffic with sporadic phantom cache + * hits. + * + * This is not thrashing, or freeing up memory wouldn't help much + * anyway. + */ + if (found < added / 2) + return FALSE; + + return TRUE; +} + +/* + * the following function is never called + * from multiple threads simultaneously due + * to a condition variable used to serialize + * at the compressor level... thus no need + * to provide locking for the sample processing + */ +boolean_t +vm_phantom_cache_check_pressure() +{ + clock_sec_t cur_ts_sec; + clock_nsec_t cur_ts_nsec; + uint64_t elapsed_msecs_in_eval; + boolean_t pressure_detected = FALSE; + + clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); + + elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec); + + /* + * Reset evaluation period after phantom_cache_eval_period_in_msecs or + * whenever vm_phantom_cache_restart_sample has been called. + */ + if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) { + pc_need_eval_reset = TRUE; + } + + if (pc_need_eval_reset == TRUE) { + +#if PHANTOM_CACHE_DEBUG + /* + * maintain some info about the last 256 sample periods + */ + sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count; + sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count; + sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd; + sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd; + sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval; + + sample_period_ghost_counts_indx++; + + if (sample_period_ghost_counts_indx >= 256) + sample_period_ghost_counts_indx = 0; +#endif + sample_period_ghost_added_count = 0; + sample_period_ghost_found_count = 0; + sample_period_ghost_added_count_ssd = 0; + sample_period_ghost_found_count_ssd = 0; + + pc_start_of_eval_period_sec = cur_ts_sec; + pc_start_of_eval_period_nsec = cur_ts_nsec; + pc_history <<= 1; + pc_need_eval_reset = FALSE; + } else { + /* + * Since the trashing rate is really a function of the read latency of the disk + * we have to consider both the SSD and spinning disk case since the file cache + * could be backed by either or even both flavors. When the object is first + * assigned a phantom_object_id, we query the pager to determine if the backing + * backing media is an SSD and remember that answer in the vm_object. We use + * that info to maintains counts for both the SSD and spinning disk cases. + */ + if (is_thrashing(sample_period_ghost_added_count, + sample_period_ghost_found_count, + phantom_cache_thrashing_threshold) || + is_thrashing(sample_period_ghost_added_count_ssd, + sample_period_ghost_found_count_ssd, + phantom_cache_thrashing_threshold_ssd)) { + /* Thrashing in the current period: Set bit 0. */ + pc_history |= 1; + } + } + + /* + * Declare pressure_detected after phantom_cache_contiguous_periods. + * + * Create a bitmask with the N low bits set. These bits must all be set + * in pc_history. The high bits of pc_history are ignored. + */ + uint32_t bitmask = (1u << phantom_cache_contiguous_periods) - 1; + if ((pc_history & bitmask) == bitmask) + pressure_detected = TRUE; + + if (vm_page_external_count > ((AVAILABLE_MEMORY) * 50) / 100) + pressure_detected = FALSE; + +#if PHANTOM_CACHE_DEBUG + sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected; +#endif + return (pressure_detected); +} + +/* + * Restart the current sampling because conditions have changed significantly, + * and we don't want to react to old data. + * + * This function can be called from any thread. + */ +void +vm_phantom_cache_restart_sample(void) +{ + pc_need_eval_reset = TRUE; +} diff --git a/bsd/i386/ucontext.h b/osfmk/vm/vm_phantom_cache.h similarity index 58% rename from bsd/i386/ucontext.h rename to osfmk/vm/vm_phantom_cache.h index acae1f055..dcf0e5a54 100644 --- a/bsd/i386/ucontext.h +++ b/osfmk/vm/vm_phantom_cache.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,38 +26,30 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _I386_UCONTEXT_H_ -#define _I386_UCONTEXT_H_ +#include +#define VM_GHOST_OFFSET_BITS 39 +#define VM_GHOST_OFFSET_MASK 0x7FFFFFFFFF +#define VM_GHOST_PAGES_PER_ENTRY 4 +#define VM_GHOST_PAGE_MASK 0x3 +#define VM_GHOST_PAGE_SHIFT 2 +#define VM_GHOST_INDEX_BITS (64 - VM_GHOST_OFFSET_BITS - VM_GHOST_PAGES_PER_ENTRY) -#include -#include - -#if !__DARWIN_UNIX03 -struct mcontext -#else /* __DARWIN_UNIX03 */ -struct __darwin_mcontext -#endif /* __DARWIN_UNIX03 */ +struct vm_ghost { - i386_exception_state_t es; - i386_thread_state_t ss; - i386_float_state_t fs; -}; - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define I386_MCONTEXT_SIZE (i386_THREAD_STATE_COUNT + i386_FLOAT_STATE_COUNT + I386_EXCEPTION_STATE_COUNT) * sizeof(int) -#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ + uint64_t g_next_index:VM_GHOST_INDEX_BITS, + g_pages_held:VM_GHOST_PAGES_PER_ENTRY, + g_obj_offset:VM_GHOST_OFFSET_BITS; + uint32_t g_obj_id; -#ifndef _MCONTEXT_T -#define _MCONTEXT_T -#if defined(__LP64__) -typedef __darwin_mcontext64_t mcontext_t; -#else -typedef __darwin_mcontext32_t mcontext_t; -#endif -#endif +} __attribute__((packed)); -//#endif +typedef struct vm_ghost *vm_ghost_t; -#endif /* _I386_UCONTEXT_H_ */ +extern void vm_phantom_cache_init(void); +extern void vm_phantom_cache_add_ghost(vm_page_t); +extern vm_ghost_t vm_phantom_cache_lookup_ghost(vm_page_t, uint32_t); +extern void vm_phantom_cache_update(vm_page_t); +extern boolean_t vm_phantom_cache_check_pressure(void); +extern void vm_phantom_cache_restart_sample(void); diff --git a/osfmk/vm/vm_protos.h b/osfmk/vm/vm_protos.h index 0f814c3e9..d241abafb 100644 --- a/osfmk/vm/vm_protos.h +++ b/osfmk/vm/vm_protos.h @@ -154,6 +154,8 @@ extern void vnode_pager_shutdown(void); extern void *upl_get_internal_page_list( upl_t upl); +extern void vnode_setswapmount(struct vnode *); + typedef int pager_return_t; extern pager_return_t vnode_pagein( struct vnode *, upl_t, @@ -193,6 +195,14 @@ extern kern_return_t vnode_pager_get_cs_blobs( struct vnode *vp, void **blobs); +#if CONFIG_IOSCHED +void vnode_pager_issue_reprioritize_io( + struct vnode *devvp, + uint64_t blkno, + uint32_t len, + int priority); +#endif + #if CHECK_CS_VALIDATION_BITMAP /* used by the vnode_pager_cs_validation_bitmap routine*/ #define CS_BITMAP_SET 1 @@ -215,6 +225,13 @@ extern kern_return_t vnode_pager_init( extern kern_return_t vnode_pager_get_object_size( memory_object_t, memory_object_offset_t *); + +#if CONFIG_IOSCHED +extern kern_return_t vnode_pager_get_object_devvp( + memory_object_t, + uintptr_t *); +#endif + extern kern_return_t vnode_pager_get_isinuse( memory_object_t, uint32_t *); @@ -488,7 +505,7 @@ extern void vm_paging_map_init(void); extern int macx_backing_store_compaction(int flags); extern unsigned int mach_vm_ctl_page_free_wanted(void); -extern void no_paging_space_action(void); +extern int no_paging_space_action(void); #define VM_TOGGLE_CLEAR 0 #define VM_TOGGLE_SET 1 @@ -504,9 +521,14 @@ extern kern_return_t compressor_memory_object_create( memory_object_size_t, memory_object_t *); +#if CONFIG_JETSAM +extern int proc_get_memstat_priority(struct proc*, boolean_t); +#endif /* CONFIG_JETSAM */ + /* the object purger. purges the next eligible object from memory. */ /* returns TRUE if an object was purged, otherwise FALSE. */ boolean_t vm_purgeable_object_purge_one_unlocked(int force_purge_below_group); +void vm_purgeable_disown(task_t task); struct trim_list { uint64_t tl_offset; @@ -514,7 +536,25 @@ struct trim_list { struct trim_list *tl_next; }; -u_int32_t vnode_trim_list(struct vnode *vp, struct trim_list *tl); +u_int32_t vnode_trim_list(struct vnode *vp, struct trim_list *tl, boolean_t route_only); + +#define MAX_SWAPFILENAME_LEN 1024 +#define SWAPFILENAME_INDEX_LEN 2 /* Doesn't include the terminating NULL character */ + +extern char swapfilename[MAX_SWAPFILENAME_LEN + 1]; + +struct vm_counters { + unsigned int do_collapse_compressor; + unsigned int do_collapse_compressor_pages; + unsigned int do_collapse_terminate; + unsigned int do_collapse_terminate_failure; + unsigned int should_cow_but_wired; + unsigned int create_upl_extra_cow; + unsigned int create_upl_extra_cow_pages; + unsigned int create_upl_lookup_failure_write; + unsigned int create_upl_lookup_failure_copy; +}; +extern struct vm_counters vm_counters; #endif /* _VM_VM_PROTOS_H_ */ diff --git a/osfmk/vm/vm_purgeable.c b/osfmk/vm/vm_purgeable.c index 5930bc7d4..d6229881b 100644 --- a/osfmk/vm/vm_purgeable.c +++ b/osfmk/vm/vm_purgeable.c @@ -21,14 +21,23 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include +#include + +#include + #include -#include + +#include + +#include #include /* kmem_alloc */ +#include +#include #include #include + #include -#include -#include extern vm_pressure_level_t memorystatus_vm_pressure_level; @@ -57,6 +66,8 @@ static int token_q_allocating = 0; /* flag for singlethreading * allocator */ struct purgeable_q purgeable_queues[PURGEABLE_Q_TYPE_MAX]; +queue_head_t purgeable_nonvolatile_queue; +int purgeable_nonvolatile_count; decl_lck_mtx_data(,vm_purgeable_queue_lock) @@ -72,6 +83,12 @@ static token_idx_t vm_purgeable_token_remove_first(purgeable_q_t queue); static void vm_purgeable_stats_helper(vm_purgeable_stat_t *stat, purgeable_q_t queue, int group, task_t target_task); +void vm_purgeable_nonvolatile_owner_update(task_t owner, + int delta); +void vm_purgeable_volatile_owner_update(task_t owner, + int delta); + + #if MACH_ASSERT static void vm_purgeable_token_check_queue(purgeable_q_t queue) @@ -81,6 +98,20 @@ vm_purgeable_token_check_queue(purgeable_q_t queue) token_idx_t unripe = 0; int our_inactive_count; +#if DEVELOPMENT + static unsigned lightweight_check = 0; + + /* + * Due to performance impact, only perform this check + * every 100 times on DEVELOPMENT kernels. + */ + if (lightweight_check++ < 100) { + return; + } + + lightweight_check = 0; +#endif + while (token) { if (tokens[token].count != 0) { assert(queue->token_q_unripe); @@ -682,10 +713,12 @@ vm_purgeable_object_find_and_lock( } object_task_importance = 0; + owner = object->vo_purgeable_owner; if (owner) { object_task_importance = task_importance_estimate(owner); } + if (object_task_importance < best_object_task_importance) { if (vm_object_lock_try(object)) { if (best_object != VM_OBJECT_NULL) { @@ -702,32 +735,43 @@ vm_purgeable_object_find_and_lock( } } } + object = best_object; + + if (object == VM_OBJECT_NULL) { + return VM_OBJECT_NULL; + } - if (best_object) { - /* Locked. Great. We'll take it. Remove and return. */ -// printf("FOUND PURGEABLE object %p skipped %d\n", object, num_objects_skipped); + /* Locked. Great. We'll take it. Remove and return. */ +// printf("FOUND PURGEABLE object %p skipped %d\n", object, num_objects_skipped); - /* clear ownership when dequeueing purgeable object */ - owner = best_object->vo_purgeable_owner; - if (owner) { - assert(owner->task_volatile_objects > 0); - OSAddAtomic(-1, &owner->task_volatile_objects); - best_object->vo_purgeable_owner = NULL; - } + vm_object_lock_assert_exclusive(object); + + queue_remove(&queue->objq[group], object, + vm_object_t, objq); + object->objq.next = NULL; + object->objq.prev = NULL; + object->purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; + object->purgeable_queue_group = 0; + /* one less volatile object for this object's owner */ + vm_purgeable_volatile_owner_update(object->vo_purgeable_owner, -1); + +#if DEBUG + object->vo_purgeable_volatilizer = NULL; +#endif /* DEBUG */ + + /* keep queue of non-volatile objects */ + queue_enter(&purgeable_nonvolatile_queue, object, + vm_object_t, objq); + assert(purgeable_nonvolatile_count >= 0); + purgeable_nonvolatile_count++; + assert(purgeable_nonvolatile_count > 0); + /* one more nonvolatile object for this object's owner */ + vm_purgeable_nonvolatile_owner_update(object->vo_purgeable_owner, +1); - queue_remove(&queue->objq[group], best_object, - vm_object_t, objq); - best_object->purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; - best_object->purgeable_queue_group = 0; - best_object->objq.next = NULL; - best_object->objq.prev = NULL; #if MACH_ASSERT - queue->debug_count_objects--; + queue->debug_count_objects--; #endif - return best_object; - } - - return 0; + return object; } /* Can be called without holding locks */ @@ -776,8 +820,10 @@ restart: vm_page_unlock_queues(); } - assert(object->purgable == VM_PURGABLE_VOLATILE); - (void) vm_object_purge(object); + (void) vm_object_purge(object, 0); + assert(object->purgable == VM_PURGABLE_EMPTY); + /* no change in purgeable accounting */ + vm_object_unlock(object); purged_count++; goto restart; @@ -802,7 +848,7 @@ vm_purgeable_object_purge_one_unlocked( boolean_t retval; vm_page_lock_queues(); - retval = vm_purgeable_object_purge_one(force_purge_below_group); + retval = vm_purgeable_object_purge_one(force_purge_below_group, 0); vm_page_unlock_queues(); return retval; @@ -810,7 +856,8 @@ vm_purgeable_object_purge_one_unlocked( boolean_t vm_purgeable_object_purge_one( - int force_purge_below_group) + int force_purge_below_group, + int flags) { enum purgeable_q_type i; int group; @@ -917,10 +964,11 @@ vm_purgeable_object_purge_one( purge_now: assert(object); - assert(object->purgable == VM_PURGABLE_VOLATILE); vm_page_unlock_queues(); /* Unlock for call to vm_object_purge() */ // printf("%sPURGING object %p task %p importance %d queue %d group %d force_purge_below_group %d memorystatus_vm_pressure_level %d\n", forced_purge ? "FORCED " : "", object, object->vo_purgeable_owner, task_importance_estimate(object->vo_purgeable_owner), i, group, force_purge_below_group, memorystatus_vm_pressure_level); - (void) vm_object_purge(object); + (void) vm_object_purge(object, flags); + assert(object->purgable == VM_PURGABLE_EMPTY); + /* no change in purgeable accounting */ vm_object_unlock(object); vm_page_lock_queues(); @@ -938,11 +986,21 @@ purge_now: void vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group) { - task_t owner; - vm_object_lock_assert_exclusive(object); lck_mtx_lock(&vm_purgeable_queue_lock); + assert(object->objq.next != NULL); + assert(object->objq.prev != NULL); + queue_remove(&purgeable_nonvolatile_queue, object, + vm_object_t, objq); + object->objq.next = NULL; + object->objq.prev = NULL; + assert(purgeable_nonvolatile_count > 0); + purgeable_nonvolatile_count--; + assert(purgeable_nonvolatile_count >= 0); + /* one less nonvolatile object for this object's owner */ + vm_purgeable_nonvolatile_owner_update(object->vo_purgeable_owner, -1); + if (queue->type == PURGEABLE_Q_TYPE_OBSOLETE) group = 0; @@ -951,18 +1009,17 @@ vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group) queue_enter(&queue->objq[group], object, vm_object_t, objq); /* last to die */ else queue_enter_first(&queue->objq[group], object, vm_object_t, objq); /* first to die */ + /* one more volatile object for this object's owner */ + vm_purgeable_volatile_owner_update(object->vo_purgeable_owner, +1); object->purgeable_queue_type = queue->type; object->purgeable_queue_group = group; - /* set ownership when enqueueing purgeable object */ - assert(object->vo_purgeable_owner == NULL); - owner = current_task(); - if (current_task() != kernel_task) { - OSAddAtomic(+1, &owner->task_volatile_objects); - assert(owner->task_volatile_objects > 0); - object->vo_purgeable_owner = owner; - } +#if DEBUG + assert(object->vo_purgeable_volatilizer == NULL); + object->vo_purgeable_volatilizer = current_task(); + OSBacktrace(&object->purgeable_volatilizer_bt[0], 16); +#endif /* DEBUG */ #if MACH_ASSERT queue->debug_count_objects++; @@ -983,7 +1040,6 @@ purgeable_q_t vm_purgeable_object_remove(vm_object_t object) { int group; - task_t owner; enum purgeable_q_type type; purgeable_q_t queue; @@ -1004,15 +1060,26 @@ vm_purgeable_object_remove(vm_object_t object) queue = &purgeable_queues[type]; - /* clear ownership when dequeueing purgeable object */ - owner = object->vo_purgeable_owner; - if (owner) { - assert(owner->task_volatile_objects > 0); - OSAddAtomic(-1, &owner->task_volatile_objects); - object->vo_purgeable_owner = NULL; - } - queue_remove(&queue->objq[group], object, vm_object_t, objq); + object->objq.next = NULL; + object->objq.prev = NULL; + /* one less volatile object for this object's owner */ + vm_purgeable_volatile_owner_update(object->vo_purgeable_owner, -1); +#if DEBUG + object->vo_purgeable_volatilizer = NULL; +#endif /* DEBUG */ + /* keep queue of non-volatile objects */ + if (object->alive && !object->terminating) { + task_t owner; + queue_enter(&purgeable_nonvolatile_queue, object, + vm_object_t, objq); + assert(purgeable_nonvolatile_count >= 0); + purgeable_nonvolatile_count++; + assert(purgeable_nonvolatile_count > 0); + /* one more nonvolatile object for this object's owner */ + owner = object->vo_purgeable_owner; + vm_purgeable_nonvolatile_owner_update(owner, +1); + } #if MACH_ASSERT queue->debug_count_objects--; @@ -1029,8 +1096,7 @@ vm_purgeable_object_remove(vm_object_t object) object->purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; object->purgeable_queue_group = 0; - object->objq.next = NULL; - object->objq.prev = NULL; + vm_object_lock_assert_exclusive(object); return &purgeable_queues[type]; } @@ -1081,53 +1147,622 @@ vm_purgeable_stats(vm_purgeable_info_t info, task_t target_task) static void -vm_purgeable_queue_disown( +vm_purgeable_volatile_queue_disown( purgeable_q_t queue, int group, task_t task) { vm_object_t object; - int num_objects; + int collisions; + + collisions = 0; +again: lck_mtx_assert(&vm_purgeable_queue_lock, LCK_MTX_ASSERT_OWNED); - num_objects = 0; for (object = (vm_object_t) queue_first(&queue->objq[group]); !queue_end(&queue->objq[group], (queue_entry_t) object); object = (vm_object_t) queue_next(&object->objq)) { +#if MACH_ASSERT + /* + * Sanity check: let's scan the entire queues to + * make sure we don't leave any purgeable objects + * pointing back at a dead task. If the counters + * are off, we would fail to assert that they go + * back to 0 after disowning is done. + */ +#else /* MACH_ASSERT */ + if (task->task_volatile_objects == 0) { + /* no more volatile objects owned by "task" */ + break; + } +#endif /* MACH_ASSERT */ if (object->vo_purgeable_owner == task) { - object->vo_purgeable_owner = NULL; - num_objects++; + if (! vm_object_lock_try(object)) { + lck_mtx_unlock(&vm_purgeable_queue_lock); + mutex_pause(collisions++); + lck_mtx_lock(&vm_purgeable_queue_lock); + goto again; + } + assert(object->purgable == VM_PURGABLE_VOLATILE); + if (object->vo_purgeable_owner == task) { + vm_purgeable_accounting(object, + object->purgable, + TRUE); /* disown */ + assert(object->vo_purgeable_owner == NULL); + } + vm_object_unlock(object); } } - assert(task->task_volatile_objects >= num_objects); - OSAddAtomic(-num_objects, &task->task_volatile_objects); - return; } void vm_purgeable_disown( task_t task) { - purgeable_q_t queue; + purgeable_q_t volatile_q; int group; + queue_head_t *nonvolatile_q; + vm_object_t object; + int collisions; if (task == NULL) { return; } + task->task_purgeable_disowning = TRUE; + + /* + * Scan the purgeable objects queues for objects owned by "task". + * This has to be done "atomically" under the "vm_purgeable_queue" + * lock, to ensure that no new purgeable object get associated + * with this task or moved between queues while we're scanning. + */ + + /* + * Scan non-volatile queue for objects owned by "task". + */ + + collisions = 0; + +again: + if (task->task_purgeable_disowned) { + /* task has already disowned its purgeable memory */ + assert(task->task_volatile_objects == 0); + assert(task->task_nonvolatile_objects == 0); + return; + } lck_mtx_lock(&vm_purgeable_queue_lock); + + nonvolatile_q = &purgeable_nonvolatile_queue; + for (object = (vm_object_t) queue_first(nonvolatile_q); + !queue_end(nonvolatile_q, (queue_entry_t) object); + object = (vm_object_t) queue_next(&object->objq)) { +#if MACH_ASSERT + /* + * Sanity check: let's scan the entire queues to + * make sure we don't leave any purgeable objects + * pointing back at a dead task. If the counters + * are off, we would fail to assert that they go + * back to 0 after disowning is done. + */ +#else /* MACH_ASSERT */ + if (task->task_nonvolatile_objects == 0) { + /* no more non-volatile objects owned by "task" */ + break; + } +#endif /* MACH_ASSERT */ +#if DEBUG + assert(object->vo_purgeable_volatilizer == NULL); +#endif /* DEBUG */ + if (object->vo_purgeable_owner == task) { + if (!vm_object_lock_try(object)) { + lck_mtx_unlock(&vm_purgeable_queue_lock); + mutex_pause(collisions++); + goto again; + } + if (object->vo_purgeable_owner == task) { + vm_purgeable_accounting(object, + object->purgable, + TRUE); /* disown */ + assert(object->vo_purgeable_owner == NULL); + } + vm_object_unlock(object); + } + } + + lck_mtx_yield(&vm_purgeable_queue_lock); + + /* + * Scan volatile queues for objects owned by "task". + */ + + volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE]; + vm_purgeable_volatile_queue_disown(volatile_q, 0, task); + lck_mtx_yield(&vm_purgeable_queue_lock); + + volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO]; + for (group = 0; group < NUM_VOLATILE_GROUPS; group++) { + vm_purgeable_volatile_queue_disown(volatile_q, group, task); + lck_mtx_yield(&vm_purgeable_queue_lock); + } + volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO]; + for (group = 0; group < NUM_VOLATILE_GROUPS; group++) { + vm_purgeable_volatile_queue_disown(volatile_q, group, task); + lck_mtx_yield(&vm_purgeable_queue_lock); + } + + if (task->task_volatile_objects != 0 || + task->task_nonvolatile_objects != 0) { + /* some purgeable objects sneaked into a queue: find them */ + lck_mtx_unlock(&vm_purgeable_queue_lock); + mutex_pause(collisions++); + goto again; + } + + /* there shouldn't be any purgeable objects owned by task now */ + assert(task->task_volatile_objects == 0); + assert(task->task_nonvolatile_objects == 0); + assert(task->task_purgeable_disowning); + + /* and we don't need to try and disown again */ + task->task_purgeable_disowned = TRUE; + + lck_mtx_unlock(&vm_purgeable_queue_lock); +} + + +#if notyet +static int +vm_purgeable_queue_purge_task_owned( + purgeable_q_t queue, + int group, + task_t task) +{ + vm_object_t object; + int num_objects; + int collisions; + int num_objects_purged; + + num_objects_purged = 0; + collisions = 0; + +look_again: + lck_mtx_lock(&vm_purgeable_queue_lock); + + num_objects = 0; + for (object = (vm_object_t) queue_first(&queue->objq[group]); + !queue_end(&queue->objq[group], (queue_entry_t) object); + object = (vm_object_t) queue_next(&object->objq)) { + + if (object->vo_purgeable_owner != task && + object->vo_purgeable_owner != NULL) { + continue; + } + + /* found an object: try and grab it */ + if (!vm_object_lock_try(object)) { + lck_mtx_unlock(&vm_purgeable_queue_lock); + mutex_pause(collisions++); + goto look_again; + } + /* got it ! */ + + collisions = 0; + + /* remove object from purgeable queue */ + queue_remove(&queue->objq[group], object, + vm_object_t, objq); + object->objq.next = NULL; + object->objq.prev = NULL; + /* one less volatile object for this object's owner */ + assert(object->vo_purgeable_owner == task); + vm_purgeable_volatile_owner_update(task, -1); + +#if DEBUG + object->vo_purgeable_volatilizer = NULL; +#endif /* DEBUG */ + queue_enter(&purgeable_nonvolatile_queue, object, + vm_object_t, objq); + assert(purgeable_nonvolatile_count >= 0); + purgeable_nonvolatile_count++; + assert(purgeable_nonvolatile_count > 0); + /* one more nonvolatile object for this object's owner */ + assert(object->vo_purgeable_owner == task); + vm_purgeable_nonvolatile_owner_update(task, +1); + + /* unlock purgeable queues */ + lck_mtx_unlock(&vm_purgeable_queue_lock); + + if (object->purgeable_when_ripe) { + /* remove a token */ + vm_page_lock_queues(); + vm_purgeable_token_remove_first(queue); + vm_page_unlock_queues(); + } + + /* purge the object */ + (void) vm_object_purge(object, 0); + assert(object->purgable == VM_PURGABLE_EMPTY); + /* no change for purgeable accounting */ + vm_object_unlock(object); + num_objects_purged++; + + /* we unlocked the purgeable queues, so start over */ + goto look_again; + } + + lck_mtx_unlock(&vm_purgeable_queue_lock); + + return num_objects_purged; +} + +int +vm_purgeable_purge_task_owned( + task_t task) +{ + purgeable_q_t queue; + int group; + int num_objects_purged; + + num_objects_purged = 0; + queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE]; - vm_purgeable_queue_disown(queue, 0, task); + num_objects_purged += vm_purgeable_queue_purge_task_owned(queue, + 0, + task); queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO]; for (group = 0; group < NUM_VOLATILE_GROUPS; group++) - vm_purgeable_queue_disown(queue, group, task); + num_objects_purged += vm_purgeable_queue_purge_task_owned(queue, + group, + task); queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO]; for (group = 0; group < NUM_VOLATILE_GROUPS; group++) - vm_purgeable_queue_disown(queue, group, task); + num_objects_purged += vm_purgeable_queue_purge_task_owned(queue, + group, + task); + + return num_objects_purged; +} +#endif + +void +vm_purgeable_nonvolatile_enqueue( + vm_object_t object, + task_t owner) +{ + int page_count; + + vm_object_lock_assert_exclusive(object); + + assert(object->purgable == VM_PURGABLE_NONVOLATILE); + assert(object->vo_purgeable_owner == NULL); + assert(owner != NULL); + + lck_mtx_lock(&vm_purgeable_queue_lock); + + if (owner->task_purgeable_disowning) { + /* task is exiting and no longer tracking purgeable objects */ + owner = NULL; + } + + object->vo_purgeable_owner = owner; +#if DEBUG + object->vo_purgeable_volatilizer = NULL; +#endif /* DEBUG */ + +#if DEBUG + OSBacktrace(&object->purgeable_owner_bt[0], 16); +#endif /* DEBUG */ + + page_count = object->resident_page_count; + assert(page_count == 0); /* should be a freshly-created object */ + if (owner != NULL && page_count != 0) { + ledger_credit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + ptoa(page_count)); + ledger_credit(owner->ledger, + task_ledgers.phys_footprint, + ptoa(page_count)); + } + + assert(object->objq.next == NULL); + assert(object->objq.prev == NULL); + + queue_enter(&purgeable_nonvolatile_queue, object, + vm_object_t, objq); + assert(purgeable_nonvolatile_count >= 0); + purgeable_nonvolatile_count++; + assert(purgeable_nonvolatile_count > 0); + /* one more nonvolatile object for this object's owner */ + assert(object->vo_purgeable_owner == owner); + vm_purgeable_nonvolatile_owner_update(owner, +1); + lck_mtx_unlock(&vm_purgeable_queue_lock); + + vm_object_lock_assert_exclusive(object); +} + +void +vm_purgeable_nonvolatile_dequeue( + vm_object_t object) +{ + task_t owner; + + vm_object_lock_assert_exclusive(object); + + owner = object->vo_purgeable_owner; +#if DEBUG + assert(object->vo_purgeable_volatilizer == NULL); +#endif /* DEBUG */ + if (owner != NULL) { + /* + * Update the owner's ledger to stop accounting + * for this object. + */ + vm_purgeable_accounting(object, + object->purgable, + TRUE); /* disown */ + } + lck_mtx_lock(&vm_purgeable_queue_lock); + assert(object->objq.next != NULL); + assert(object->objq.prev != NULL); + queue_remove(&purgeable_nonvolatile_queue, object, + vm_object_t, objq); + object->objq.next = NULL; + object->objq.prev = NULL; + assert(purgeable_nonvolatile_count > 0); + purgeable_nonvolatile_count--; + assert(purgeable_nonvolatile_count >= 0); lck_mtx_unlock(&vm_purgeable_queue_lock); + + vm_object_lock_assert_exclusive(object); +} + +void +vm_purgeable_accounting( + vm_object_t object, + vm_purgable_t old_state, + boolean_t disown) +{ + task_t owner; + int resident_page_count; + int wired_page_count; + int compressed_page_count; + boolean_t disown_on_the_fly; + + vm_object_lock_assert_exclusive(object); + + owner = object->vo_purgeable_owner; + if (owner == NULL) + return; + + if (!disown && owner->task_purgeable_disowning) { + /* task is disowning its purgeable objects: help it */ + disown_on_the_fly = TRUE; + } else { + disown_on_the_fly = FALSE; + } + + resident_page_count = object->resident_page_count; + wired_page_count = object->wired_page_count; + if ((COMPRESSED_PAGER_IS_ACTIVE || + DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && + object->pager != NULL) { + compressed_page_count = + vm_compressor_pager_get_count(object->pager); + } else { + compressed_page_count = 0; + } + + if (old_state == VM_PURGABLE_VOLATILE || + old_state == VM_PURGABLE_EMPTY) { + /* less volatile bytes in ledger */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_volatile, + ptoa(resident_page_count - wired_page_count)); + /* less compressed volatile bytes in ledger */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_volatile_compressed, + ptoa(compressed_page_count)); + + if (disown || !object->alive || object->terminating) { + /* wired pages were accounted as "non-volatile"... */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + ptoa(wired_page_count)); + /* ... and in phys_footprint */ + ledger_debit(owner->ledger, + task_ledgers.phys_footprint, + ptoa(wired_page_count)); + + if (!disown_on_the_fly && + (object->purgeable_queue_type == + PURGEABLE_Q_TYPE_MAX)) { + /* + * Not on a volatile queue: must be empty + * or emptying. + */ + vm_purgeable_nonvolatile_owner_update(owner,-1); + } else { + /* on a volatile queue */ + vm_purgeable_volatile_owner_update(owner, -1); + } + /* no more accounting for this dead object */ + object->vo_purgeable_owner = NULL; +#if DEBUG + object->vo_purgeable_volatilizer = NULL; +#endif /* DEBUG */ + return; + } + + /* more non-volatile bytes in ledger */ + ledger_credit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + ptoa(resident_page_count - wired_page_count)); + /* more compressed non-volatile bytes in ledger */ + ledger_credit(owner->ledger, + task_ledgers.purgeable_nonvolatile_compressed, + ptoa(compressed_page_count)); + /* more footprint */ + ledger_credit(owner->ledger, + task_ledgers.phys_footprint, + ptoa(resident_page_count + + compressed_page_count + - wired_page_count)); + + } else if (old_state == VM_PURGABLE_NONVOLATILE) { + + /* less non-volatile bytes in ledger */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + ptoa(resident_page_count - wired_page_count)); + /* less compressed non-volatile bytes in ledger */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_nonvolatile_compressed, + ptoa(compressed_page_count)); + /* less footprint */ + ledger_debit(owner->ledger, + task_ledgers.phys_footprint, + ptoa(resident_page_count + + compressed_page_count + - wired_page_count)); + + if (disown || !object->alive || object->terminating) { + /* wired pages still accounted as "non-volatile" */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + ptoa(wired_page_count)); + ledger_debit(owner->ledger, + task_ledgers.phys_footprint, + ptoa(wired_page_count)); + + /* one less "non-volatile" object for the owner */ + if (!disown_on_the_fly) { + assert(object->purgeable_queue_type == + PURGEABLE_Q_TYPE_MAX); + } + vm_purgeable_nonvolatile_owner_update(owner, -1); + /* no more accounting for this dead object */ + object->vo_purgeable_owner = NULL; +#if DEBUG + object->vo_purgeable_volatilizer = NULL; +#endif /* DEBUG */ + return; + } + /* more volatile bytes in ledger */ + ledger_credit(owner->ledger, + task_ledgers.purgeable_volatile, + ptoa(resident_page_count - wired_page_count)); + /* more compressed volatile bytes in ledger */ + ledger_credit(owner->ledger, + task_ledgers.purgeable_volatile_compressed, + ptoa(compressed_page_count)); + } else { + panic("vm_purgeable_accounting(%p): " + "unexpected old_state=%d\n", + object, old_state); + } + + vm_object_lock_assert_exclusive(object); +} + +void +vm_purgeable_nonvolatile_owner_update( + task_t owner, + int delta) +{ + if (owner == NULL || delta == 0) { + return; + } + + if (delta > 0) { + assert(owner->task_nonvolatile_objects >= 0); + OSAddAtomic(delta, &owner->task_nonvolatile_objects); + assert(owner->task_nonvolatile_objects > 0); + } else { + assert(owner->task_nonvolatile_objects > delta); + OSAddAtomic(delta, &owner->task_nonvolatile_objects); + assert(owner->task_nonvolatile_objects >= 0); + } +} + +void +vm_purgeable_volatile_owner_update( + task_t owner, + int delta) +{ + if (owner == NULL || delta == 0) { + return; + } + + if (delta > 0) { + assert(owner->task_volatile_objects >= 0); + OSAddAtomic(delta, &owner->task_volatile_objects); + assert(owner->task_volatile_objects > 0); + } else { + assert(owner->task_volatile_objects > delta); + OSAddAtomic(delta, &owner->task_volatile_objects); + assert(owner->task_volatile_objects >= 0); + } +} + +void +vm_purgeable_compressed_update( + vm_object_t object, + int delta) +{ + task_t owner; + + vm_object_lock_assert_exclusive(object); + + if (delta == 0 || + !object->internal || + object->purgable == VM_PURGABLE_DENY || + object->vo_purgeable_owner == NULL) { + /* not an owned purgeable VM object: nothing to update */ + return; + } + + owner = object->vo_purgeable_owner; + switch (object->purgable) { + case VM_PURGABLE_DENY: + break; + case VM_PURGABLE_NONVOLATILE: + if (delta > 0) { + ledger_credit(owner->ledger, + task_ledgers.purgeable_nonvolatile_compressed, + ptoa(delta)); + ledger_credit(owner->ledger, + task_ledgers.phys_footprint, + ptoa(delta)); + } else { + ledger_debit(owner->ledger, + task_ledgers.purgeable_nonvolatile_compressed, + ptoa(-delta)); + ledger_debit(owner->ledger, + task_ledgers.phys_footprint, + ptoa(-delta)); + } + break; + case VM_PURGABLE_VOLATILE: + case VM_PURGABLE_EMPTY: + if (delta > 0) { + ledger_credit(owner->ledger, + task_ledgers.purgeable_volatile_compressed, + ptoa(delta)); + } else { + ledger_debit(owner->ledger, + task_ledgers.purgeable_volatile_compressed, + ptoa(-delta)); + } + break; + default: + panic("vm_purgeable_compressed_update(): " + "unexpected purgable %d for object %p\n", + object->purgable, object); + } } diff --git a/osfmk/vm/vm_purgeable_internal.h b/osfmk/vm/vm_purgeable_internal.h index efd66bbb3..498566b04 100644 --- a/osfmk/vm/vm_purgeable_internal.h +++ b/osfmk/vm/vm_purgeable_internal.h @@ -67,6 +67,8 @@ struct purgeable_q { typedef struct purgeable_q * purgeable_q_t; extern struct purgeable_q purgeable_queues[PURGEABLE_Q_TYPE_MAX]; +extern queue_head_t purgeable_nonvolatile_queue; +extern int purgeable_nonvolatile_count; extern int32_t token_new_pagecount; #define TOKEN_NEW_PAGECOUNT_MAX INT32_MAX extern int available_for_purge; @@ -98,7 +100,7 @@ void vm_purgeable_q_advance_all(void); /* the object purger. purges the next eligible object from memory. */ /* returns TRUE if an object was purged, otherwise FALSE. */ -boolean_t vm_purgeable_object_purge_one(int force_purge_below_group); +boolean_t vm_purgeable_object_purge_one(int force_purge_below_group, int flags); /* purge all volatile objects now */ void vm_purgeable_object_purge_all(void); @@ -112,6 +114,13 @@ purgeable_q_t vm_purgeable_object_remove(vm_object_t object); /* statistics for purgable objects in all queues */ void vm_purgeable_stats(vm_purgeable_info_t info, task_t target_task); -void vm_purgeable_disown(task_t task); +int vm_purgeable_purge_task_owned(task_t task); +void vm_purgeable_nonvolatile_enqueue(vm_object_t object, task_t task); +void vm_purgeable_nonvolatile_dequeue(vm_object_t object); +void vm_purgeable_accounting(vm_object_t object, + vm_purgable_t old_state, + boolean_t disown); +void vm_purgeable_compressed_update(vm_object_t object, + int delta); #endif /* __VM_PURGEABLE_INTERNAL__ */ diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index 32271953a..513877c18 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,10 @@ #include #include +#if CONFIG_PHANTOM_CACHE +#include +#endif + #include #include @@ -146,7 +151,7 @@ uint32_t vm_page_pages; * or VP, table.] */ typedef struct { - vm_page_t pages; + vm_page_packed_t page_list; #if MACH_PAGE_HASH_STATS int cur_count; /* current count */ int hi_count; /* high water mark */ @@ -245,14 +250,13 @@ ppnum_t vm_page_lowest = 0; unsigned int vm_colors; unsigned int vm_color_mask; /* mask is == (vm_colors-1) */ unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */ +unsigned int vm_free_magazine_refill_limit = 0; queue_head_t vm_page_queue_free[MAX_COLORS]; unsigned int vm_page_free_wanted; unsigned int vm_page_free_wanted_privileged; unsigned int vm_page_free_count; unsigned int vm_page_fictitious_count; -unsigned int vm_page_free_count_minimum; /* debugging */ - /* * Occasionally, the virtual memory system uses * resident page structures that do not refer to @@ -318,13 +322,15 @@ unsigned int vm_page_speculative_count; unsigned int vm_page_wire_count; unsigned int vm_page_wire_count_initial; unsigned int vm_page_gobble_count = 0; -unsigned int vm_page_wire_count_warning = 0; -unsigned int vm_page_gobble_count_warning = 0; + +#define VM_PAGE_WIRE_COUNT_WARNING 0 +#define VM_PAGE_GOBBLE_COUNT_WARNING 0 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */ unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */ uint64_t vm_page_purged_count = 0; /* total count of purged pages */ +unsigned int vm_page_xpmapped_external_count = 0; unsigned int vm_page_external_count = 0; unsigned int vm_page_internal_count = 0; unsigned int vm_page_pageable_external_count = 0; @@ -386,7 +392,9 @@ struct vm_page_stats_reusable vm_page_stats_reusable; void vm_set_page_size(void) { - page_mask = page_size - 1; + page_size = PAGE_SIZE; + page_mask = PAGE_MASK; + page_shift = PAGE_SHIFT; if ((page_mask & page_size) != 0) panic("vm_set_page_size: page size not a power of two"); @@ -396,6 +404,8 @@ vm_set_page_size(void) break; } +#define COLOR_GROUPS_TO_STEAL 4 + /* Called once during statup, once the cache geometry is known. */ @@ -421,6 +431,8 @@ vm_page_set_colors( void ) vm_colors = n; vm_color_mask = n - 1; + + vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL; } @@ -518,7 +530,7 @@ vm_page_bootstrap( m->pageq.prev = NULL; m->listq.next = NULL; m->listq.prev = NULL; - m->next = VM_PAGE_NULL; + m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL); m->object = VM_OBJECT_NULL; /* reset later */ m->offset = (vm_object_offset_t) -1; /* reset later */ @@ -563,7 +575,6 @@ vm_page_bootstrap( m->no_cache = FALSE; m->reusable = FALSE; m->slid = FALSE; - m->was_dirty = FALSE; m->xpmapped = FALSE; m->compressor = FALSE; m->written_by_kernel = FALSE; @@ -593,6 +604,8 @@ vm_page_bootstrap( purgeable_queues[i].debug_count_objects = 0; #endif }; + purgeable_nonvolatile_count = 0; + queue_init(&purgeable_nonvolatile_queue); for (i = 0; i < MAX_COLORS; i++ ) queue_init(&vm_page_queue_free[i]); @@ -619,7 +632,9 @@ vm_page_bootstrap( /* * Steal memory for the map and zone subsystems. */ + kernel_debug_string("zone_steal_memory"); zone_steal_memory(); + kernel_debug_string("vm_map_steal_memory"); vm_map_steal_memory(); /* @@ -689,10 +704,12 @@ vm_page_bootstrap( #endif /* VM_PAGE_FAKE_BUCKETS */ #endif /* VM_PAGE_BUCKETS_CHECK */ + kernel_debug_string("vm_page_buckets"); vm_page_buckets = (vm_page_bucket_t *) pmap_steal_memory(vm_page_bucket_count * sizeof(vm_page_bucket_t)); + kernel_debug_string("vm_page_bucket_locks"); vm_page_bucket_locks = (lck_spin_t *) pmap_steal_memory(vm_page_bucket_lock_count * sizeof(lck_spin_t)); @@ -700,7 +717,7 @@ vm_page_bootstrap( for (i = 0; i < vm_page_bucket_count; i++) { register vm_page_bucket_t *bucket = &vm_page_buckets[i]; - bucket->pages = VM_PAGE_NULL; + bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL); #if MACH_PAGE_HASH_STATS bucket->cur_count = 0; bucket->hi_count = 0; @@ -722,6 +739,7 @@ vm_page_bootstrap( * to get the alignment right. */ + kernel_debug_string("pmap_startup"); pmap_startup(&virtual_space_start, &virtual_space_end); virtual_space_start = round_page(virtual_space_start); virtual_space_end = trunc_page(virtual_space_end); @@ -739,11 +757,11 @@ vm_page_bootstrap( assert((unsigned int) atop_64(max_mem) == atop_64(max_mem)); vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */ vm_page_wire_count_initial = vm_page_wire_count; - vm_page_free_count_minimum = vm_page_free_count; printf("vm_page_bootstrap: %d free pages and %d wired pages\n", vm_page_free_count, vm_page_wire_count); + kernel_debug_string("vm_page_bootstrap complete"); simple_lock_init(&vm_paging_lock, 0); } @@ -824,6 +842,7 @@ pmap_steal_memory( return (void *) addr; } +void vm_page_release_startup(vm_page_t mem); void pmap_startup( vm_offset_t *startp, @@ -833,6 +852,22 @@ pmap_startup( ppnum_t phys_page; addr64_t tmpaddr; + +#if defined(__LP64__) + /* + * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use + */ + assert(sizeof(struct vm_page) == 64); + + /* + * make sure we are aligned on a 64 byte boundary + * for VM_PAGE_PACK_PTR (it clips off the low-order + * 6 bits of the pointer) + */ + if (virtual_space_start != virtual_space_end) + virtual_space_start = round_page(virtual_space_start); +#endif + /* * We calculate how many page frames we will have * and then allocate the page structures in one chunk. @@ -847,6 +882,7 @@ pmap_startup( /* * Initialize the page frames. */ + kernel_debug_string("Initialize the page frames"); for (i = 0, pages_initialized = 0; i < npages; i++) { if (!pmap_next_page(&phys_page)) break; @@ -859,6 +895,15 @@ pmap_startup( } vm_pages_count = pages_initialized; +#if defined(__LP64__) + + if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0]) + panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]); + + if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1]) + panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]); +#endif + kernel_debug_string("page fill/release"); /* * Check if we want to initialize pages to a known value */ @@ -881,7 +926,7 @@ pmap_startup( // free low -> high so high is preferred for (i = 1; i <= pages_initialized; i++) { if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ - vm_page_release(&vm_pages[i - 1]); + vm_page_release_startup(&vm_pages[i - 1]); } } else @@ -895,9 +940,11 @@ pmap_startup( */ for (i = pages_initialized; i > 0; i--) { if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ - vm_page_release(&vm_pages[i - 1]); + vm_page_release_startup(&vm_pages[i - 1]); } + VM_CHECK_MEMORYSTATUS; + #if 0 { vm_page_t xx, xxo, xxl; @@ -1052,9 +1099,10 @@ vm_page_insert_internal( boolean_t insert_in_hash, boolean_t batch_pmap_op) { - vm_page_bucket_t *bucket; - lck_spin_t *bucket_lock; - int hash_id; + vm_page_bucket_t *bucket; + lck_spin_t *bucket_lock; + int hash_id; + task_t owner; XPR(XPR_VM_PAGE, "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n", @@ -1069,10 +1117,8 @@ vm_page_insert_internal( assert(page_aligned(offset)); - if (object == vm_submap_object) { - /* the vm_submap_object is only a placeholder for submaps */ - panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset); - } + /* the vm_submap_object is only a placeholder for submaps */ + assert(object != vm_submap_object); vm_object_lock_assert_exclusive(object); #if DEBUG @@ -1112,8 +1158,10 @@ vm_page_insert_internal( lck_spin_lock(bucket_lock); - mem->next = bucket->pages; - bucket->pages = mem; + mem->next_m = bucket->page_list; + bucket->page_list = VM_PAGE_PACK_PTR(mem); + assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list)); + #if MACH_PAGE_HASH_STATS if (++bucket->cur_count > bucket->hi_count) bucket->hi_count = bucket->cur_count; @@ -1170,11 +1218,38 @@ vm_page_insert_internal( OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count); } + if (object->purgable == VM_PURGABLE_DENY) { + owner = TASK_NULL; + } else { + owner = object->vo_purgeable_owner; + } + if (owner && + (object->purgable == VM_PURGABLE_NONVOLATILE || + VM_PAGE_WIRED(mem))) { + /* more non-volatile bytes */ + ledger_credit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + PAGE_SIZE); + /* more footprint */ + ledger_credit(owner->ledger, + task_ledgers.phys_footprint, + PAGE_SIZE); + + } else if (owner && + (object->purgable == VM_PURGABLE_VOLATILE || + object->purgable == VM_PURGABLE_EMPTY)) { + assert(! VM_PAGE_WIRED(mem)); + /* more volatile bytes */ + ledger_credit(owner->ledger, + task_ledgers.purgeable_volatile, + PAGE_SIZE); + } + if (object->purgable == VM_PURGABLE_VOLATILE) { if (VM_PAGE_WIRED(mem)) { - OSAddAtomic(1, &vm_page_purgeable_wired_count); + OSAddAtomic(+1, &vm_page_purgeable_wired_count); } else { - OSAddAtomic(1, &vm_page_purgeable_count); + OSAddAtomic(+1, &vm_page_purgeable_count); } } else if (object->purgable == VM_PURGABLE_EMPTY && mem->throttled) { @@ -1192,6 +1267,25 @@ vm_page_insert_internal( if (queues_lock_held == FALSE) vm_page_unlock_queues(); } + +#if VM_OBJECT_TRACKING_OP_MODIFIED + if (vm_object_tracking_inited && + object->internal && + object->resident_page_count == 0 && + object->pager == NULL && + object->shadow != NULL && + object->shadow->copy == object) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int numsaved = 0; + + numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_MODIFIED, + bt, + numsaved); + } +#endif /* VM_OBJECT_TRACKING_OP_MODIFIED */ } /* @@ -1246,32 +1340,32 @@ vm_page_replace( lck_spin_lock(bucket_lock); - if (bucket->pages) { - vm_page_t *mp = &bucket->pages; - vm_page_t m = *mp; + if (bucket->page_list) { + vm_page_packed_t *mp = &bucket->page_list; + vm_page_t m = VM_PAGE_UNPACK_PTR(*mp); do { if (m->object == object && m->offset == offset) { /* * Remove old page from hash list */ - *mp = m->next; + *mp = m->next_m; m->hashed = FALSE; found_m = m; break; } - mp = &m->next; - } while ((m = *mp)); + mp = &m->next_m; + } while ((m = VM_PAGE_UNPACK_PTR(*mp))); - mem->next = bucket->pages; + mem->next_m = bucket->page_list; } else { - mem->next = VM_PAGE_NULL; + mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL); } /* * insert new page at head of hash list */ - bucket->pages = mem; + bucket->page_list = VM_PAGE_PACK_PTR(mem); mem->hashed = TRUE; lck_spin_unlock(bucket_lock); @@ -1305,6 +1399,7 @@ vm_page_remove( vm_page_t this; lck_spin_t *bucket_lock; int hash_id; + task_t owner; XPR(XPR_VM_PAGE, "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n", @@ -1332,18 +1427,18 @@ vm_page_remove( lck_spin_lock(bucket_lock); - if ((this = bucket->pages) == mem) { + if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) { /* optimize for common case */ - bucket->pages = mem->next; + bucket->page_list = mem->next_m; } else { - vm_page_t *prev; + vm_page_packed_t *prev; - for (prev = &this->next; - (this = *prev) != mem; - prev = &this->next) + for (prev = &this->next_m; + (this = VM_PAGE_UNPACK_PTR(*prev)) != mem; + prev = &this->next_m) continue; - *prev = this->next; + *prev = this->next_m; } #if MACH_PAGE_HASH_STATS bucket->cur_count--; @@ -1366,11 +1461,19 @@ vm_page_remove( mem->object->resident_page_count--; if (mem->object->internal) { +#if DEBUG assert(vm_page_internal_count); +#endif /* DEBUG */ + OSAddAtomic(-1, &vm_page_internal_count); } else { assert(vm_page_external_count); OSAddAtomic(-1, &vm_page_external_count); + + if (mem->xpmapped) { + assert(vm_page_xpmapped_external_count); + OSAddAtomic(-1, &vm_page_xpmapped_external_count); + } } if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) { if (mem->object->resident_page_count == 0) @@ -1396,6 +1499,31 @@ vm_page_remove( vm_page_stats_reusable.reused_remove++; } + if (mem->object->purgable == VM_PURGABLE_DENY) { + owner = TASK_NULL; + } else { + owner = mem->object->vo_purgeable_owner; + } + if (owner && + (mem->object->purgable == VM_PURGABLE_NONVOLATILE || + VM_PAGE_WIRED(mem))) { + /* less non-volatile bytes */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + PAGE_SIZE); + /* less footprint */ + ledger_debit(owner->ledger, + task_ledgers.phys_footprint, + PAGE_SIZE); + } else if (owner && + (mem->object->purgable == VM_PURGABLE_VOLATILE || + mem->object->purgable == VM_PURGABLE_EMPTY)) { + assert(! VM_PAGE_WIRED(mem)); + /* less volatile bytes */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_volatile, + PAGE_SIZE); + } if (mem->object->purgable == VM_PURGABLE_VOLATILE) { if (VM_PAGE_WIRED(mem)) { assert(vm_page_purgeable_wired_count > 0); @@ -1495,7 +1623,7 @@ vm_page_lookup( * at outside the scope of the hash bucket lock... this is a * really cheap optimiztion to avoid taking the lock */ - if (bucket->pages == VM_PAGE_NULL) { + if (!bucket->page_list) { vm_page_lookup_bucket_NULL++; return (VM_PAGE_NULL); @@ -1504,7 +1632,7 @@ vm_page_lookup( lck_spin_lock(bucket_lock); - for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) { + for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) { #if 0 /* * we don't hold the page queue lock @@ -1820,7 +1948,7 @@ vm_pool_low(void) * this is an interface to support bring-up of drivers * on platforms with physical memory > 4G... */ -int vm_himemory_mode = 0; +int vm_himemory_mode = 2; /* @@ -1913,8 +2041,6 @@ vm_page_grablo(void) * request from the per-cpu queue. */ -#define COLOR_GROUPS_TO_STEAL 4 - vm_page_t vm_page_grab( void ) @@ -1928,9 +2054,9 @@ vm_page_grab( void ) return_page_from_cpu_list: PROCESSOR_DATA(current_processor(), page_grab_count) += 1; PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next; - mem->pageq.next = NULL; enable_preemption(); + mem->pageq.next = NULL; assert(mem->listq.next == NULL && mem->listq.prev == NULL); assert(mem->tabled == FALSE); @@ -1957,19 +2083,18 @@ return_page_from_cpu_list: * Optionally produce warnings if the wire or gobble * counts exceed some threshold. */ - if (vm_page_wire_count_warning > 0 - && vm_page_wire_count >= vm_page_wire_count_warning) { +#if VM_PAGE_WIRE_COUNT_WARNING + if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) { printf("mk: vm_page_grab(): high wired page count of %d\n", vm_page_wire_count); - assert(vm_page_wire_count < vm_page_wire_count_warning); } - if (vm_page_gobble_count_warning > 0 - && vm_page_gobble_count >= vm_page_gobble_count_warning) { +#endif +#if VM_PAGE_GOBBLE_COUNT_WARNING + if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) { printf("mk: vm_page_grab(): high gobbled page count of %d\n", vm_page_gobble_count); - assert(vm_page_gobble_count < vm_page_gobble_count_warning); } - +#endif lck_mtx_lock_spin(&vm_page_queue_free_lock); /* @@ -2014,17 +2139,17 @@ return_page_from_cpu_list: if (vm_page_free_count <= vm_page_free_reserved) pages_to_steal = 1; else { - pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors; - - if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved)) + if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved)) + pages_to_steal = vm_free_magazine_refill_limit; + else pages_to_steal = (vm_page_free_count - vm_page_free_reserved); } color = PROCESSOR_DATA(current_processor(), start_color); head = tail = NULL; + vm_page_free_count -= pages_to_steal; + while (pages_to_steal--) { - if (--vm_page_free_count < vm_page_free_count_minimum) - vm_page_free_count_minimum = vm_page_free_count; while (queue_empty(&vm_page_queue_free[color])) color = (color + 1) & vm_color_mask; @@ -2049,7 +2174,6 @@ return_page_from_cpu_list: tail->pageq.next = (queue_t)mem; tail = mem; - mem->pageq.prev = NULL; assert(mem->listq.next == NULL && mem->listq.prev == NULL); assert(mem->tabled == FALSE); assert(mem->object == VM_OBJECT_NULL); @@ -2065,6 +2189,8 @@ return_page_from_cpu_list: assert(!mem->wpmapped); assert(!pmap_is_noencrypt(mem->phys_page)); } + lck_mtx_unlock(&vm_page_queue_free_lock); + PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next; PROCESSOR_DATA(current_processor(), start_color) = color; @@ -2075,8 +2201,6 @@ return_page_from_cpu_list: mem = head; mem->pageq.next = NULL; - lck_mtx_unlock(&vm_page_queue_free_lock); - enable_preemption(); } /* @@ -2206,6 +2330,32 @@ vm_page_release( VM_CHECK_MEMORYSTATUS; } +/* + * This version of vm_page_release() is used only at startup + * when we are single-threaded and pages are being released + * for the first time. Hence, no locking or unnecessary checks are made. + * Note: VM_CHECK_MEMORYSTATUS invoked by the caller. + */ +void +vm_page_release_startup( + register vm_page_t mem) +{ + queue_t queue_free; + + if (vm_lopage_free_count < vm_lopage_free_limit && + mem->phys_page < max_valid_low_ppnum) { + mem->lopage = TRUE; + vm_lopage_free_count++; + queue_free = &vm_lopage_queue_free; + } else { + mem->lopage = FALSE; + mem->free = TRUE; + vm_page_free_count++; + queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask]; + } + queue_enter_first(queue_free, mem, vm_page_t, pageq); +} + /* * vm_page_wait: * @@ -2365,11 +2515,12 @@ vm_page_free_prepare_queues( VM_PAGE_CHECK(mem); assert(!mem->free); assert(!mem->cleaning); -#if DEBUG + +#if MACH_ASSERT || DEBUG lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); if (mem->free) panic("vm_page_free: freeing page on free list\n"); -#endif +#endif /* MACH_ASSERT || DEBUG */ if (mem->object) { vm_object_lock_assert_exclusive(mem->object); } @@ -2399,6 +2550,30 @@ vm_page_free_prepare_queues( assert(vm_page_purgeable_wired_count > 0); OSAddAtomic(-1, &vm_page_purgeable_wired_count); } + if ((mem->object->purgable == VM_PURGABLE_VOLATILE || + mem->object->purgable == VM_PURGABLE_EMPTY) && + mem->object->vo_purgeable_owner != TASK_NULL) { + task_t owner; + + owner = mem->object->vo_purgeable_owner; + /* + * While wired, this page was accounted + * as "non-volatile" but it should now + * be accounted as "volatile". + */ + /* one less "non-volatile"... */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + PAGE_SIZE); + /* ... and "phys_footprint" */ + ledger_debit(owner->ledger, + task_ledgers.phys_footprint, + PAGE_SIZE); + /* one more "volatile" */ + ledger_credit(owner->ledger, + task_ledgers.purgeable_volatile, + PAGE_SIZE); + } } if (!mem->private && !mem->fictitious) vm_page_wire_count--; @@ -2684,6 +2859,25 @@ vm_page_wire( OSAddAtomic(-1, &vm_page_purgeable_count); OSAddAtomic(1, &vm_page_purgeable_wired_count); } + if ((mem->object->purgable == VM_PURGABLE_VOLATILE || + mem->object->purgable == VM_PURGABLE_EMPTY) && + mem->object->vo_purgeable_owner != TASK_NULL) { + task_t owner; + + owner = mem->object->vo_purgeable_owner; + /* less volatile bytes */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_volatile, + PAGE_SIZE); + /* more not-quite-volatile bytes */ + ledger_credit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + PAGE_SIZE); + /* more footprint */ + ledger_credit(owner->ledger, + task_ledgers.phys_footprint, + PAGE_SIZE); + } if (mem->object->all_reusable) { /* * Wired pages are not counted as "re-usable" @@ -2788,7 +2982,25 @@ vm_page_unwire( assert(vm_page_purgeable_wired_count > 0); OSAddAtomic(-1, &vm_page_purgeable_wired_count); } - assert(!mem->laundry); + if ((mem->object->purgable == VM_PURGABLE_VOLATILE || + mem->object->purgable == VM_PURGABLE_EMPTY) && + mem->object->vo_purgeable_owner != TASK_NULL) { + task_t owner; + + owner = mem->object->vo_purgeable_owner; + /* more volatile bytes */ + ledger_credit(owner->ledger, + task_ledgers.purgeable_volatile, + PAGE_SIZE); + /* less not-quite-volatile bytes */ + ledger_debit(owner->ledger, + task_ledgers.purgeable_nonvolatile, + PAGE_SIZE); + /* less footprint */ + ledger_debit(owner->ledger, + task_ledgers.phys_footprint, + PAGE_SIZE); + } assert(mem->object != kernel_object); assert(mem->pageq.next == NULL && mem->pageq.prev == NULL); @@ -2862,7 +3074,7 @@ vm_page_deactivate_internal( * reference which is held on the object while the page is in the pageout queue... * just let the normal laundry processing proceed */ - if (m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m))) + if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m))) return; if (!m->absent && clear_hw_reference == TRUE) @@ -2928,7 +3140,7 @@ void vm_page_enqueue_cleaned(vm_page_t m) * reference which is held on the object while the page is in the pageout queue... * just let the normal laundry processing proceed */ - if (m->clean_queue || m->pageout_queue || m->private || m->fictitious) + if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious) return; VM_PAGE_QUEUES_REMOVE(m); @@ -2956,12 +3168,6 @@ void vm_page_enqueue_cleaned(vm_page_t m) * The page queues must be locked. */ -#if CONFIG_JETSAM -#if LATENCY_JETSAM -extern struct vm_page jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS]; -#endif /* LATENCY_JETSAM */ -#endif /* CONFIG_JETSAM */ - void vm_page_activate( register vm_page_t m) @@ -2991,7 +3197,7 @@ vm_page_activate( * reference which is held on the object while the page is in the pageout queue... * just let the normal laundry processing proceed */ - if (m->pageout_queue || m->private || m->fictitious || m->compressor) + if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor) return; #if DEBUG @@ -3025,37 +3231,6 @@ vm_page_activate( } else { vm_page_pageable_external_count++; } -#if LATENCY_JETSAM - if (jlp_init) { - uint64_t now = mach_absolute_time(); - uint64_t delta = now - jlp_time; - clock_sec_t jl_secs = 0; - clock_usec_t jl_usecs = 0; - vm_page_t jlp; - - absolutetime_to_microtime(delta, &jl_secs, &jl_usecs); - - jl_usecs += jl_secs * USEC_PER_SEC; - if (jl_usecs >= JETSAM_LATENCY_TOKEN_AGE) { - - jlp = &jetsam_latency_page[jlp_current]; - if (jlp->active) { - queue_remove(&vm_page_queue_active, jlp, vm_page_t, pageq); - } - queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq); - - jlp->active = TRUE; - - jlp->offset = now; - jlp_time = jlp->offset; - - if(++jlp_current == NUM_OF_JETSAM_LATENCY_TOKENS) { - jlp_current = 0; - } - - } - } -#endif /* LATENCY_JETSAM */ } m->reference = TRUE; m->no_cache = FALSE; @@ -3094,7 +3269,7 @@ vm_page_speculate( * reference which is held on the object while the page is in the pageout queue... * just let the normal laundry processing proceed */ - if (m->pageout_queue || m->private || m->fictitious || m->compressor) + if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor) return; VM_PAGE_QUEUES_REMOVE(m); @@ -3228,7 +3403,7 @@ vm_page_lru( * reference which is held on the object while the page is in the pageout queue... * just let the normal laundry processing proceed */ - if (m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m))) + if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m))) return; m->no_cache = FALSE; @@ -3593,7 +3768,7 @@ _vm_page_print( printf("vm_page %p: \n", p); printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev); printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev); - printf(" next=%p\n", p->next); + printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m)); printf(" object=%p offset=0x%llx\n", p->object, p->offset); printf(" wire_count=%u\n", p->wire_count); @@ -3676,6 +3851,7 @@ vm_page_verify_contiguous( /* * Check the free lists for proper length etc. */ +static boolean_t vm_page_verify_this_free_list_enabled = FALSE; static unsigned int vm_page_verify_free_list( queue_head_t *vm_page_queue, @@ -3688,6 +3864,9 @@ vm_page_verify_free_list( vm_page_t prev_m; boolean_t found_page; + if (! vm_page_verify_this_free_list_enabled) + return 0; + found_page = FALSE; npages = 0; prev_m = (vm_page_t) vm_page_queue; @@ -3745,18 +3924,32 @@ vm_page_verify_free_list( return npages; } -static boolean_t vm_page_verify_free_lists_enabled = FALSE; +static boolean_t vm_page_verify_all_free_lists_enabled = FALSE; static void vm_page_verify_free_lists( void ) { unsigned int color, npages, nlopages; + boolean_t toggle = TRUE; - if (! vm_page_verify_free_lists_enabled) + if (! vm_page_verify_all_free_lists_enabled) return; npages = 0; lck_mtx_lock(&vm_page_queue_free_lock); + + if (vm_page_verify_this_free_list_enabled == TRUE) { + /* + * This variable has been set globally for extra checking of + * each free list Q. Since we didn't set it, we don't own it + * and we shouldn't toggle it. + */ + toggle = FALSE; + } + + if (toggle == TRUE) { + vm_page_verify_this_free_list_enabled = TRUE; + } for( color = 0; color < vm_colors; color++ ) { npages += vm_page_verify_free_list(&vm_page_queue_free[color], @@ -3770,6 +3963,10 @@ vm_page_verify_free_lists( void ) "npages %u free_count %d nlopages %u lo_free_count %u", npages, vm_page_free_count, nlopages, vm_lopage_free_count); + if (toggle == TRUE) { + vm_page_verify_this_free_list_enabled = FALSE; + } + lck_mtx_unlock(&vm_page_queue_free_lock); } @@ -4155,12 +4352,6 @@ did_consider: vm_page_free_count--; } } - /* - * adjust global freelist counts - */ - if (vm_page_free_count < vm_page_free_count_minimum) - vm_page_free_count_minimum = vm_page_free_count; - if( flags & KMA_LOMEM) vm_page_lomem_find_contiguous_last_idx = page_idx; else @@ -4284,7 +4475,7 @@ did_consider: assert(!m1->gobbled); assert(!m1->private); m2->no_cache = m1->no_cache; - m2->xpmapped = m1->xpmapped; + m2->xpmapped = 0; assert(!m1->busy); assert(!m1->wanted); assert(!m1->fictitious); @@ -4317,7 +4508,6 @@ did_consider: assert(!m1->lopage); m2->slid = m1->slid; - m2->was_dirty = m1->was_dirty; m2->compressor = m1->compressor; /* @@ -4613,13 +4803,16 @@ vm_page_do_delayed_work( if (dwp->dw_mask & DW_vm_pageout_throttle_up) vm_pageout_throttle_up(m); - +#if CONFIG_PHANTOM_CACHE + if (dwp->dw_mask & DW_vm_phantom_cache_update) + vm_phantom_cache_update(m); +#endif if (dwp->dw_mask & DW_vm_page_wire) vm_page_wire(m); else if (dwp->dw_mask & DW_vm_page_unwire) { boolean_t queueit; - queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE; + queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE; vm_page_unwire(m, queueit); } @@ -5054,8 +5247,8 @@ hibernate_flush_queue(queue_head_t *q, int qcount) VM_PAGE_QUEUES_REMOVE(m); - if (COMPRESSED_PAGER_IS_ACTIVE) - pmap_disconnect(m->phys_page); + if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE) + pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); vm_pageout_cluster(m, FALSE); @@ -5094,8 +5287,6 @@ hibernate_flush_dirty_pages(int pass) struct vm_speculative_age_q *aq; uint32_t i; - bzero(&hibernate_stats, sizeof(struct hibernate_statistics)); - if (vm_page_local_q) { for (i = 0; i < vm_page_local_q_count; i++) vm_page_reactivate_local(i, TRUE, FALSE); @@ -5159,6 +5350,13 @@ hibernate_flush_dirty_pages(int pass) } +void +hibernate_reset_stats() +{ + bzero(&hibernate_stats, sizeof(struct hibernate_statistics)); +} + + int hibernate_flush_memory() { @@ -5173,16 +5371,13 @@ hibernate_flush_memory() if (COMPRESSED_PAGER_IS_ACTIVE) { - if ((retval = hibernate_flush_dirty_pages(2)) == 0) { - KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0); vm_compressor_flush(); KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0); - } } - if (retval == 0 && consider_buffer_cache_collect != NULL) { + if (consider_buffer_cache_collect != NULL) { unsigned int orig_wire_count; KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0); @@ -5397,14 +5592,14 @@ hibernate_discard_page(vm_page_t m) */ return; -#if DEBUG +#if MACH_ASSERT || DEBUG vm_object_t object = m->object; if (!vm_object_lock_try(m->object)) panic("hibernate_discard_page(%p) !vm_object_lock_try", m); #else /* No need to lock page queue for token delete, hibernate_vm_unlock() makes sure these locks are uncontended before sleep */ -#endif /* !DEBUG */ +#endif /* MACH_ASSERT || DEBUG */ if (m->pmapped == TRUE) { @@ -5428,13 +5623,26 @@ hibernate_discard_page(vm_page_t m) vm_purgeable_token_delete_first(old_queue); } m->object->purgable = VM_PURGABLE_EMPTY; + + /* + * Purgeable ledgers: pages of VOLATILE and EMPTY objects are + * accounted in the "volatile" ledger, so no change here. + * We have to update vm_page_purgeable_count, though, since we're + * effectively purging this object. + */ + unsigned int delta; + assert(m->object->resident_page_count >= m->object->wired_page_count); + delta = (m->object->resident_page_count - m->object->wired_page_count); + assert(vm_page_purgeable_count >= delta); + assert(delta > 0); + OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count); } vm_page_free(m); -#if DEBUG +#if MACH_ASSERT || DEBUG vm_object_unlock(object); -#endif /* DEBUG */ +#endif /* MACH_ASSERT || DEBUG */ } /* @@ -5518,7 +5726,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, discard_all = will_discard; } -#if DEBUG +#if MACH_ASSERT || DEBUG if (!preflight) { vm_page_lock_queues(); @@ -5530,7 +5738,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, } } } -#endif /* DEBUG */ +#endif /* MACH_ASSERT || DEBUG */ KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0); @@ -5815,7 +6023,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active; -#if DEBUG +#if MACH_ASSERT || DEBUG if (!preflight) { if (vm_page_local_q) { @@ -5827,7 +6035,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, } vm_page_unlock_queues(); } -#endif /* DEBUG */ +#endif /* MACH_ASSERT || DEBUG */ if (preflight) { lck_mtx_unlock(&vm_page_queue_free_lock); @@ -5852,7 +6060,7 @@ hibernate_page_list_discard(hibernate_page_list_t * page_list) uint32_t count_discard_speculative = 0; -#if DEBUG +#if MACH_ASSERT || DEBUG vm_page_lock_queues(); if (vm_page_local_q) { for (i = 0; i < vm_page_local_q_count; i++) { @@ -5861,7 +6069,7 @@ hibernate_page_list_discard(hibernate_page_list_t * page_list) VPL_LOCK(&lq->vpl_lock); } } -#endif /* DEBUG */ +#endif /* MACH_ASSERT || DEBUG */ clock_get_uptime(&start); @@ -5940,7 +6148,7 @@ hibernate_page_list_discard(hibernate_page_list_t * page_list) m = next; } -#if DEBUG +#if MACH_ASSERT || DEBUG if (vm_page_local_q) { for (i = 0; i < vm_page_local_q_count; i++) { struct vpl *lq; @@ -5949,7 +6157,7 @@ hibernate_page_list_discard(hibernate_page_list_t * page_list) } } vm_page_unlock_queues(); -#endif /* DEBUG */ +#endif /* MACH_ASSERT || DEBUG */ clock_get_uptime(&end); absolutetime_to_nanoseconds(end - start, &nsec); @@ -6080,8 +6288,8 @@ hibernate_hash_insert_page(vm_page_t mem) hash_id = vm_page_hash(mem->object, mem->offset); bucket = &vm_page_buckets[hash_id]; - mem->next = bucket->pages; - bucket->pages = mem; + mem->next_m = bucket->page_list; + bucket->page_list = VM_PAGE_PACK_PTR(mem); } @@ -6141,8 +6349,8 @@ hibernate_rebuild_vm_structs(void) * hibernate_teardown_vm_structs leaves the location where * this vm_page_t must be located in "next". */ - tmem = mem->next; - mem->next = NULL; + tmem = VM_PAGE_UNPACK_PTR(mem->next_m); + mem->next_m = VM_PAGE_PACK_PTR(NULL); sindx = (int)(tmem - &vm_pages[0]); @@ -6176,9 +6384,9 @@ hibernate_rebuild_vm_structs(void) * vm_page_t's that were created on the fly (i.e. fictitious) */ for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) { - mem_next = mem->next; + mem_next = VM_PAGE_UNPACK_PTR(mem->next_m); - mem->next = NULL; + mem->next_m = VM_PAGE_PACK_PTR(NULL); hibernate_hash_insert_page(mem); } hibernate_rebuild_hash_list = NULL; @@ -6223,13 +6431,13 @@ hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_l bucket = &vm_page_buckets[i]; - for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem_next) { + for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) { assert(mem->hashed); - mem_next = mem->next; + mem_next = VM_PAGE_UNPACK_PTR(mem->next_m); if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) { - mem->next = hibernate_rebuild_hash_list; + mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list); hibernate_rebuild_hash_list = mem; } } @@ -6273,7 +6481,7 @@ hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_l * as an indicator to the rebuild function that * we don't have to move it */ - mem->next = mem; + mem->next_m = VM_PAGE_PACK_PTR(mem); if (vm_pages[compact_target_indx].free) { /* @@ -6347,7 +6555,7 @@ vm_page_info( bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK]; lck_spin_lock(bucket_lock); - for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next) + for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m)) bucket_count++; lck_spin_unlock(bucket_lock); @@ -6393,21 +6601,21 @@ vm_page_buckets_check(void) panic("BUCKET_CHECK: corruption at %p in fake buckets " "[0x%llx:0x%llx]\n", cp, - vm_page_fake_buckets_start, - vm_page_fake_buckets_end); + (uint64_t) vm_page_fake_buckets_start, + (uint64_t) vm_page_fake_buckets_end); } } #endif /* VM_PAGE_FAKE_BUCKETS */ for (i = 0; i < vm_page_bucket_count; i++) { bucket = &vm_page_buckets[i]; - if (bucket->pages == VM_PAGE_NULL) { + if (!bucket->page_list) { continue; } bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK]; lck_spin_lock(bucket_lock); - p = bucket->pages; + p = VM_PAGE_UNPACK_PTR(bucket->page_list); while (p != VM_PAGE_NULL) { if (!p->hashed) { panic("BUCKET_CHECK: page %p (%p,0x%llx) " @@ -6424,7 +6632,7 @@ vm_page_buckets_check(void) i, bucket, p, p->object, p->offset, p_hash); } - p = p->next; + p = VM_PAGE_UNPACK_PTR(p->next_m); } lck_spin_unlock(bucket_lock); } diff --git a/osfmk/vm/vm_shared_region.c b/osfmk/vm/vm_shared_region.c index ec74a2810..00039b366 100644 --- a/osfmk/vm/vm_shared_region.c +++ b/osfmk/vm/vm_shared_region.c @@ -198,7 +198,7 @@ vm_shared_region_get( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> get(%p)\n", - task)); + (void *)VM_KERNEL_ADDRPERM(task))); task_lock(task); vm_shared_region_lock(); @@ -212,7 +212,8 @@ vm_shared_region_get( SHARED_REGION_TRACE_DEBUG( ("shared_region: get(%p) <- %p\n", - task, shared_region)); + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(shared_region))); return shared_region; } @@ -231,11 +232,12 @@ vm_shared_region_base_address( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> base_address(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); SHARED_REGION_TRACE_DEBUG( ("shared_region: base_address(%p) <- 0x%llx\n", - shared_region, (long long)shared_region->sr_base_address)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + (long long)shared_region->sr_base_address)); return shared_region->sr_base_address; } @@ -253,11 +255,12 @@ vm_shared_region_size( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> size(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); SHARED_REGION_TRACE_DEBUG( ("shared_region: size(%p) <- 0x%llx\n", - shared_region, (long long)shared_region->sr_size)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + (long long)shared_region->sr_size)); return shared_region->sr_size; } @@ -275,11 +278,12 @@ vm_shared_region_mem_entry( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> mem_entry(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); SHARED_REGION_TRACE_DEBUG( ("shared_region: mem_entry(%p) <- %p\n", - shared_region, shared_region->sr_mem_entry)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + (void *)VM_KERNEL_ADDRPERM(shared_region->sr_mem_entry))); return shared_region->sr_mem_entry; } @@ -289,11 +293,12 @@ vm_shared_region_get_slide( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> vm_shared_region_get_slide(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); SHARED_REGION_TRACE_DEBUG( ("shared_region: vm_shared_region_get_slide(%p) <- %u\n", - shared_region, shared_region->sr_slide_info.slide)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + shared_region->sr_slide_info.slide)); /* 0 if we haven't slid */ assert(shared_region->sr_slide_info.slide_object != NULL || @@ -308,11 +313,12 @@ vm_shared_region_get_slide_info( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> vm_shared_region_get_slide_info(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); SHARED_REGION_TRACE_DEBUG( ("shared_region: vm_shared_region_get_slide_info(%p) <- %p\n", - shared_region, &shared_region->sr_slide_info)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + (void *)VM_KERNEL_ADDRPERM(&shared_region->sr_slide_info))); return &shared_region->sr_slide_info; } @@ -332,7 +338,8 @@ vm_shared_region_set( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> set(%p, %p)\n", - task, new_shared_region)); + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(new_shared_region))); task_lock(task); vm_shared_region_lock(); @@ -354,7 +361,9 @@ vm_shared_region_set( SHARED_REGION_TRACE_DEBUG( ("shared_region: set(%p) <- old=%p new=%p\n", - task, old_shared_region, new_shared_region)); + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(old_shared_region), + (void *)VM_KERNEL_ADDRPERM(new_shared_region))); } /* @@ -375,7 +384,8 @@ vm_shared_region_lookup( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> lookup(root=%p,cpu=%d,64bit=%d)\n", - root_dir, cputype, is_64bit)); + + (void *)VM_KERNEL_ADDRPERM(root_dir), cputype, is_64bit)); shared_region = NULL; new_shared_region = NULL; @@ -431,7 +441,9 @@ done: SHARED_REGION_TRACE_DEBUG( ("shared_region: lookup(root=%p,cpu=%d,64bit=%d) <- %p\n", - root_dir, cputype, is_64bit, shared_region)); + (void *)VM_KERNEL_ADDRPERM(root_dir), + cputype, is_64bit, + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 0); return shared_region; @@ -451,7 +463,7 @@ vm_shared_region_reference_locked( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> reference_locked(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 0); shared_region->sr_ref_count++; @@ -472,7 +484,8 @@ vm_shared_region_reference_locked( SHARED_REGION_TRACE_DEBUG( ("shared_region: reference_locked(%p) <- %d\n", - shared_region, shared_region->sr_ref_count)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + shared_region->sr_ref_count)); } /* @@ -485,7 +498,7 @@ vm_shared_region_deallocate( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> deallocate(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); vm_shared_region_lock(); @@ -519,7 +532,8 @@ vm_shared_region_deallocate( shared_region->sr_ref_count--; SHARED_REGION_TRACE_DEBUG( ("shared_region: deallocate(%p): ref now %d\n", - shared_region, shared_region->sr_ref_count)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + shared_region->sr_ref_count)); if (shared_region->sr_ref_count == 0) { uint64_t deadline; @@ -545,7 +559,7 @@ vm_shared_region_deallocate( SHARED_REGION_TRACE_DEBUG( ("shared_region: deallocate(%p): armed timer\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); vm_shared_region_unlock(); } else { @@ -576,7 +590,7 @@ vm_shared_region_deallocate( SHARED_REGION_TRACE_DEBUG( ("shared_region: deallocate(%p) <-\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); } void @@ -611,7 +625,7 @@ vm_shared_region_create( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> create(root=%p,cpu=%d,64bit=%d)\n", - root_dir, cputype, is_64bit)); + (void *)VM_KERNEL_ADDRPERM(root_dir), cputype, is_64bit)); base_address = 0; size = 0; @@ -744,14 +758,20 @@ done: ("shared_region: create(root=%p,cpu=%d,64bit=%d," "base=0x%llx,size=0x%llx) <- " "%p mem=(%p,%p) map=%p pmap=%p\n", - root_dir, cputype, is_64bit, (long long)base_address, - (long long)size, shared_region, - mem_entry_port, mem_entry, sub_map, sub_map->pmap)); + (void *)VM_KERNEL_ADDRPERM(root_dir), + cputype, is_64bit, (long long)base_address, + (long long)size, + (void *)VM_KERNEL_ADDRPERM(shared_region), + (void *)VM_KERNEL_ADDRPERM(mem_entry_port), + (void *)VM_KERNEL_ADDRPERM(mem_entry), + (void *)VM_KERNEL_ADDRPERM(sub_map), + (void *)VM_KERNEL_ADDRPERM(sub_map->pmap))); } else { SHARED_REGION_TRACE_INFO( ("shared_region: create(root=%p,cpu=%d,64bit=%d," "base=0x%llx,size=0x%llx) <- NULL", - root_dir, cputype, is_64bit, (long long)base_address, + (void *)VM_KERNEL_ADDRPERM(root_dir), + cputype, is_64bit, (long long)base_address, (long long)size)); } return shared_region; @@ -770,8 +790,8 @@ vm_shared_region_destroy( SHARED_REGION_TRACE_INFO( ("shared_region: -> destroy(%p) (root=%p,cpu=%d,64bit=%d)\n", - shared_region, - shared_region->sr_root_dir, + (void *)VM_KERNEL_ADDRPERM(shared_region), + (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir), shared_region->sr_cpu_type, shared_region->sr_64bit)); @@ -835,7 +855,7 @@ vm_shared_region_destroy( SHARED_REGION_TRACE_DEBUG( ("shared_region: destroy(%p) <-\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); shared_region = NULL; } @@ -854,7 +874,7 @@ vm_shared_region_start_address( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> start_address(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); vm_shared_region_lock(); @@ -888,17 +908,18 @@ vm_shared_region_start_address( SHARED_REGION_TRACE_DEBUG( ("shared_region: start_address(%p) <- 0x%llx\n", - shared_region, (long long)shared_region->sr_base_address)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + (long long)shared_region->sr_base_address)); return kr; } void vm_shared_region_undo_mappings( - vm_map_t sr_map, - mach_vm_offset_t sr_base_address, - struct shared_file_mapping_np *mappings, - unsigned int mappings_count) + vm_map_t sr_map, + mach_vm_offset_t sr_base_address, + struct shared_file_mapping_np *mappings, + unsigned int mappings_count) { unsigned int j = 0; vm_shared_region_t shared_region = NULL; @@ -1065,8 +1086,9 @@ vm_shared_region_map_file( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> map(%p,%d,%p,%p,0x%llx)\n", - shared_region, mappings_count, mappings, - file_control, file_size)); + (void *)VM_KERNEL_ADDRPERM(shared_region), mappings_count, + (void *)VM_KERNEL_ADDRPERM(mappings), + (void *)VM_KERNEL_ADDRPERM(file_control), file_size)); /* get the VM object associated with the file to be mapped */ file_object = memory_object_control_to_vm_object(file_control); @@ -1247,8 +1269,8 @@ vm_shared_region_map_file( (long long)slide_start, (long long)slide_size, kr)); - vm_shared_region_undo_mappings(NULL, - 0, + vm_shared_region_undo_mappings(sr_map, + sr_base_address, mappings, mappings_count); } @@ -1270,8 +1292,9 @@ vm_shared_region_map_file( done: SHARED_REGION_TRACE_DEBUG( ("shared_region: map(%p,%d,%p,%p,0x%llx) <- 0x%x \n", - shared_region, mappings_count, mappings, - file_control, file_size, kr)); + (void *)VM_KERNEL_ADDRPERM(shared_region), mappings_count, + (void *)VM_KERNEL_ADDRPERM(mappings), + (void *)VM_KERNEL_ADDRPERM(file_control), file_size, kr)); return kr; } @@ -1302,7 +1325,9 @@ vm_shared_region_enter( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> " "enter(map=%p,task=%p,root=%p,cpu=%d,64bit=%d)\n", - map, task, fsroot, cpu, is_64bit)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit)); /* lookup (create if needed) the shared region for this environment */ shared_region = vm_shared_region_lookup(fsroot, cpu, is_64bit); @@ -1312,7 +1337,9 @@ vm_shared_region_enter( ("shared_region: -> " "enter(map=%p,task=%p,root=%p,cpu=%d,64bit=%d): " "lookup failed !\n", - map, task, fsroot, cpu, is_64bit)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit)); //panic("shared_region_enter: lookup failed\n"); return KERN_FAILURE; } @@ -1353,17 +1380,23 @@ vm_shared_region_enter( SHARED_REGION_TRACE_ERROR( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu, is_64bit, (long long)target_address, - (long long)mapping_size, sr_handle, kr)); + (long long)mapping_size, + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); goto done; } SHARED_REGION_TRACE_DEBUG( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit, (long long)target_address, (long long)mapping_size, - sr_handle, kr)); + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); sr_offset += mapping_size; sr_size -= mapping_size; } @@ -1398,17 +1431,23 @@ vm_shared_region_enter( SHARED_REGION_TRACE_ERROR( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu, is_64bit, (long long)target_address, - (long long)mapping_size, sr_handle, kr)); + (long long)mapping_size, + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); goto done; } SHARED_REGION_TRACE_DEBUG( ("shared_region: enter(%p,%p,%p,%d,%d): " "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit, (long long)target_address, (long long)mapping_size, - sr_handle, kr)); + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); } if (sr_size > 0) { /* and there's some left to be mapped without pmap-nesting */ @@ -1430,17 +1469,23 @@ vm_shared_region_enter( SHARED_REGION_TRACE_ERROR( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu, is_64bit, (long long)target_address, - (long long)mapping_size, sr_handle, kr)); + (long long)mapping_size, + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); goto done; } SHARED_REGION_TRACE_DEBUG( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit, (long long)target_address, (long long)mapping_size, - sr_handle, kr)); + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); sr_offset += mapping_size; sr_size -= mapping_size; } @@ -1449,11 +1494,13 @@ vm_shared_region_enter( done: SHARED_REGION_TRACE_DEBUG( ("shared_region: enter(%p,%p,%p,%d,%d) <- 0x%x\n", - map, task, fsroot, cpu, is_64bit, kr)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit, kr)); return kr; } -#define SANE_SLIDE_INFO_SIZE (1024*1024) /*Can be changed if needed*/ +#define SANE_SLIDE_INFO_SIZE (2048*1024) /*Can be changed if needed*/ struct vm_shared_region_slide_info slide_info; kern_return_t @@ -1786,7 +1833,7 @@ _vm_commpage_init( SHARED_REGION_TRACE_DEBUG( ("commpage: _init(0x%llx) <- %p\n", - (long long)size, *handlep)); + (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep))); } #endif @@ -1878,7 +1925,8 @@ vm_commpage_enter( SHARED_REGION_TRACE_DEBUG( ("commpage: -> enter(%p,%p)\n", - map, task)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task))); commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH; /* the comm page is likely to be beyond the actual end of the VM map */ @@ -1928,8 +1976,10 @@ vm_commpage_enter( SHARED_REGION_TRACE_ERROR( ("commpage: enter(%p,0x%llx,0x%llx) " "commpage %p mapping failed 0x%x\n", - map, (long long)commpage_address, - (long long)commpage_size, commpage_handle, kr)); + (void *)VM_KERNEL_ADDRPERM(map), + (long long)commpage_address, + (long long)commpage_size, + (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr)); } /* map the comm text page in the task's address space */ @@ -1950,8 +2000,10 @@ vm_commpage_enter( SHARED_REGION_TRACE_ERROR( ("commpage text: enter(%p,0x%llx,0x%llx) " "commpage text %p mapping failed 0x%x\n", - map, (long long)commpage_text_address, - (long long)commpage_text_size, commpage_text_handle, kr)); + (void *)VM_KERNEL_ADDRPERM(map), + (long long)commpage_text_address, + (long long)commpage_text_size, + (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr)); } /* @@ -1975,14 +2027,16 @@ vm_commpage_enter( SHARED_REGION_TRACE_ERROR( ("commpage: enter(%p,0x%llx,0x%llx) " "objc mapping failed 0x%x\n", - map, (long long)objc_address, + (void *)VM_KERNEL_ADDRPERM(map), + (long long)objc_address, (long long)objc_size, kr)); } } SHARED_REGION_TRACE_DEBUG( ("commpage: enter(%p,%p) <- 0x%x\n", - map, task, kr)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), kr)); return kr; } diff --git a/osfmk/vm/vm_shared_region.h b/osfmk/vm/vm_shared_region.h index 67a9257d0..9cc2b394c 100644 --- a/osfmk/vm/vm_shared_region.h +++ b/osfmk/vm/vm_shared_region.h @@ -102,7 +102,7 @@ struct vm_shared_region_slide_info_entry { }; #define NBBY 8 -#define NUM_SLIDING_BITMAPS_PER_PAGE (PAGE_SIZE/sizeof(int)/NBBY) /*128*/ +#define NUM_SLIDING_BITMAPS_PER_PAGE (0x1000/sizeof(int)/NBBY) /*128*/ typedef struct slide_info_entry_toc *slide_info_entry_toc_t; struct slide_info_entry_toc { uint8_t entry[NUM_SLIDING_BITMAPS_PER_PAGE]; diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index 5c66335d7..024140fb5 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -114,6 +114,7 @@ #include #include #include +#include vm_size_t upl_offset_to_pagelist = 0; @@ -2025,6 +2026,13 @@ mach_make_memory_entry_64( goto make_mem_done; } object->purgable = VM_PURGABLE_NONVOLATILE; + assert(object->vo_purgeable_owner == NULL); + assert(object->resident_page_count == 0); + assert(object->wired_page_count == 0); + vm_object_lock(object); + vm_purgeable_nonvolatile_enqueue(object, + current_task()); + vm_object_unlock(object); } /* @@ -2059,6 +2067,7 @@ mach_make_memory_entry_64( * shadow objects either... */ object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + object->true_share = TRUE; user_entry->backing.object = object; user_entry->internal = TRUE; @@ -2416,13 +2425,14 @@ redo_lookup: VM_MAP_PAGE_MASK(target_map))); vm_map_clip_end(target_map, map_entry, - (vm_map_round_page(offset, - VM_MAP_PAGE_MASK(target_map)) - + map_size)); + (vm_map_round_page(offset + map_size, + VM_MAP_PAGE_MASK(target_map)))); force_shadow = TRUE; - map_size = map_entry->vme_end - map_entry->vme_start; - total_size = map_size; + if ((map_entry->vme_end - offset) < map_size) { + map_size = map_entry->vme_end - offset; + } + total_size = map_entry->vme_end - map_entry->vme_start; vm_map_lock_write_to_read(target_map); vm_object_lock(object); @@ -2468,7 +2478,9 @@ redo_lookup: target_map = original_map; goto redo_lookup; } +#if 00 vm_object_lock(object); +#endif /* * JMM - We need to avoid coming here when the object @@ -2481,7 +2493,9 @@ redo_lookup: vm_object_shadow(&map_entry->object.vm_object, &map_entry->offset, total_size); shadow_object = map_entry->object.vm_object; +#if 00 vm_object_unlock(object); +#endif prot = map_entry->protection & ~VM_PROT_WRITE; @@ -2567,6 +2581,22 @@ redo_lookup: } } +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); + } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; @@ -2619,6 +2649,8 @@ redo_lookup: /* parent_entry->ref_count++; XXX ? */ /* Get an extra send-right on handle */ ipc_port_copy_send(parent_handle); + + *size = CAST_DOWN(vm_size_t, map_size); *object_handle = parent_handle; return KERN_SUCCESS; } else { @@ -2739,6 +2771,22 @@ redo_lookup: /* we now point to this object, hold on */ vm_object_reference(object); vm_object_lock(object); +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); + } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; @@ -3609,8 +3657,8 @@ kernel_object_iopl_request( } if (!object->private) { - if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE)) - *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE); + if (*upl_size > MAX_UPL_TRANSFER_BYTES) + *upl_size = MAX_UPL_TRANSFER_BYTES; if (object->phys_contiguous) { *flags = UPL_PHYS_CONTIG; } else { diff --git a/osfmk/x86_64/copyio.c b/osfmk/x86_64/copyio.c index 1adb732e5..b9d7910db 100644 --- a/osfmk/x86_64/copyio.c +++ b/osfmk/x86_64/copyio.c @@ -210,8 +210,9 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr, break; } - if (!recursive_CopyIOActive) + if (!recursive_CopyIOActive) { thread->machine.specFlags &= ~CopyIOActive; + } if (no_shared_cr3) { istate = ml_set_interrupts_enabled(FALSE); if (get_cr3_raw() != kernel_pmap->pm_cr3) diff --git a/osfmk/x86_64/cswitch.s b/osfmk/x86_64/cswitch.s index c0f3715c4..59b642007 100644 --- a/osfmk/x86_64/cswitch.s +++ b/osfmk/x86_64/cswitch.s @@ -56,7 +56,6 @@ /* */ -#include #include #include diff --git a/osfmk/x86_64/idt64.s b/osfmk/x86_64/idt64.s index 3eaf156d8..c6a38aef8 100644 --- a/osfmk/x86_64/idt64.s +++ b/osfmk/x86_64/idt64.s @@ -278,8 +278,11 @@ L_dispatch_U32_after_fault: mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */ L_common_dispatch: - cld /* Ensure the direction flag is clear in the kernel */ - + cld /* Ensure the direction flag is clear in the kernel */ + cmpl $0, EXT(pmap_smap_enabled)(%rip) + je 1f + clac /* Clear EFLAGS.AC if SMAP is present/enabled */ +1: /* * On entering the kernel, we don't need to switch cr3 * because the kernel shares the user's address space. @@ -293,36 +296,37 @@ L_common_dispatch: mov %gs:CPU_KERNEL_CR3, %rcx mov %rcx, %gs:CPU_ACTIVE_CR3 test $3, %esi /* user/kernel? */ - jz 1f /* skip cr3 reload from kernel */ + jz 2f /* skip cr3 reload from kernel */ xor %rbp, %rbp cmpl $0, EXT(no_shared_cr3)(%rip) - je 1f + je 2f mov %rcx, %cr3 /* load kernel cr3 */ - jmp 2f /* and skip tlb flush test */ -1: + jmp 4f /* and skip tlb flush test */ +2: mov %gs:CPU_ACTIVE_CR3+4, %rcx shr $32, %rcx testl %ecx, %ecx - jz 2f + jz 4f movl $0, %gs:CPU_TLB_INVALID testl $(1<<16), %ecx /* Global? */ - jz 11f + jz 3f mov %cr4, %rcx /* RMWW CR4, for lack of an alternative*/ and $(~CR4_PGE), %rcx mov %rcx, %cr4 or $(CR4_PGE), %rcx mov %rcx, %cr4 - jmp 2f - -11: mov %cr3, %rcx + jmp 4f +3: + mov %cr3, %rcx mov %rcx, %cr3 -2: +4: mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */ + movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap */ cmpq $0, TH_PCB_IDS(%rcx) /* Is there a debug register state? */ - je 3f + je 5f xor %ecx, %ecx /* If so, reset DR7 (the control) */ mov %rcx, %dr7 -3: +5: incl %gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count /* Dispatch the designated handler */ jmp *%rdx @@ -393,7 +397,7 @@ L_32bit_return: je 1f cli POSTCODE2(0x6432) - CCALL1(panic_idt64, %rsp) + CCALL1(panic_idt64, %r15) 1: #endif /* DEBUG_IDT64 */ @@ -559,6 +563,7 @@ Entry(idt64_mdep_scall) pushq $(MACHDEP_INT) jmp L_32bit_entry_check +/* Programmed into MSR_IA32_LSTAR by mp_desc.c */ Entry(hi64_syscall) Entry(idt64_syscall) L_syscall_continue: @@ -1014,6 +1019,7 @@ Entry(hndl_alltraps) /* Check for active vtimers in the current task */ mov %gs:CPU_ACTIVE_THREAD, %rcx + movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling trap/exception */ mov TH_TASK(%rcx), %rbx TASK_VTIMER_CHECK(%rbx, %rcx) @@ -1027,7 +1033,7 @@ Entry(hndl_alltraps) Entry(return_from_trap) movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */ - movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Clear IO tier override before returning to userspace */ + movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Reset IO tier override to -1 before returning to userspace */ cmpl $0, TH_RWLOCK_COUNT(%r15) /* Check if current thread has pending RW locks held */ jz 1f xorq %rbp, %rbp /* clear framepointer */ @@ -1352,6 +1358,7 @@ Entry(hndl_syscall) TIME_TRAP_UENTRY movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ + movl $-1, TH_IOTIER_OVERRIDE(%rcx) /* Reset IO tier override to -1 before handling syscall */ movq TH_TASK(%rcx),%rbx /* point to current task */ /* Check for active vtimers in the current task */ diff --git a/osfmk/x86_64/kpc_x86.c b/osfmk/x86_64/kpc_x86.c index 3a6ee7c04..a0b7e01ab 100644 --- a/osfmk/x86_64/kpc_x86.c +++ b/osfmk/x86_64/kpc_x86.c @@ -164,6 +164,19 @@ kpc_configurable_config_count(void) return kpc_configurable_count(); } +uint32_t +kpc_rawpmu_config_count(void) +{ + // RAW PMU access not implemented. + return 0; +} + +int +kpc_get_rawpmu_config(__unused kpc_config_t *configv) +{ + return 0; +} + static uint8_t kpc_fixed_width(void) { @@ -337,7 +350,7 @@ kpc_get_fixed_counters(uint64_t *counterv) for( i = 0; i < n; i++ ) { if ((1ull << (i + 32)) & status) counterv[i] = FIXED_SHADOW(ctr) + - (kpc_fixed_max() - FIXED_RELOAD(ctr)) + IA32_FIXED_CTRx(i); + (kpc_fixed_max() - FIXED_RELOAD(ctr) + 1 /* Wrap */) + IA32_FIXED_CTRx(i); } #else for( i = 0; i < n; i++ ) @@ -367,7 +380,29 @@ kpc_set_configurable_config(kpc_config_t *configv) for( i = 0; i < n; i++ ) { /* need to save and restore counter since it resets when reconfigured */ save = IA32_PMCx(i); - wrIA32_PERFEVTSELx(i, configv[i]); + /* + * Some bits are not safe to set from user space. + * Allow these bits to be set: + * + * 0-7 Event select + * 8-15 UMASK + * 16 USR + * 17 OS + * 18 E + * 22 EN + * 23 INV + * 24-31 CMASK + * + * Excluding: + * + * 19 PC + * 20 INT + * 21 AnyThread + * 32 IN_TX + * 33 IN_TXCP + * 34-63 Reserved + */ + wrIA32_PERFEVTSELx(i, configv[i] & 0xffc7ffffull); wrIA32_PMCx(i, save); } @@ -474,6 +509,12 @@ kpc_set_period_arch( struct kpc_config_remote *mp_config ) /* interface functions */ +void +kpc_arch_init(void) +{ + /* No-op */ +} + uint32_t kpc_get_classes(void) { @@ -518,7 +559,7 @@ void kpc_pmi_handler(__unused x86_saved_state_t *state) extra = kpc_reload_fixed(ctr); FIXED_SHADOW(ctr) - += kpc_fixed_max() - FIXED_RELOAD(ctr) + extra; + += (kpc_fixed_max() - FIXED_RELOAD(ctr) + 1 /* Wrap */) + extra; BUF_INFO(PERF_KPC_FCOUNTER, ctr, FIXED_SHADOW(ctr), extra, FIXED_ACTIONID(ctr)); @@ -549,6 +590,15 @@ void kpc_pmi_handler(__unused x86_saved_state_t *state) ml_set_interrupts_enabled(enabled); } +int +kpc_force_all_ctrs_arch( task_t task __unused, int val __unused ) +{ + /* TODO: reclaim counters ownership from XCPM */ + return 0; +} - - +int +kpc_set_sw_inc( uint32_t mask __unused ) +{ + return ENOTSUP; +} diff --git a/osfmk/x86_64/locore.s b/osfmk/x86_64/locore.s index f6af50273..ca044a570 100644 --- a/osfmk/x86_64/locore.s +++ b/osfmk/x86_64/locore.s @@ -55,7 +55,6 @@ */ #include -#include #include #include diff --git a/osfmk/x86_64/loose_ends.c b/osfmk/x86_64/loose_ends.c index cd61326f3..486d5c726 100644 --- a/osfmk/x86_64/loose_ends.c +++ b/osfmk/x86_64/loose_ends.c @@ -251,13 +251,13 @@ static inline unsigned int ml_phys_read_data(pmap_paddr_t paddr, int size) { unsigned int result = 0; + unsigned char s1; + unsigned short s2; if (!physmap_enclosed(paddr)) panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr); switch (size) { - unsigned char s1; - unsigned short s2; case 1: s1 = *(volatile unsigned char *)PHYSMAP_PTOV(paddr); result = s1; @@ -491,6 +491,13 @@ memcmp(const void *s1, const void *s2, size_t n) return (0); } +void * +memmove(void *dst, const void *src, size_t ulen) +{ + bcopy(src, dst, ulen); + return dst; +} + /* * Abstract: * strlen returns the number of characters in "string" preceeding @@ -646,20 +653,6 @@ kdp_register_callout(kdp_callout_fn_t fn, void *arg) } #endif -/* - * Return a uniformly distributed 64-bit random number. - * - * This interface should have minimal dependencies on kernel - * services, and thus be available very early in the life - * of the kernel. But as a result, it may not be very random - * on all platforms. - */ -uint64_t -early_random(void) -{ - return (ml_early_random()); -} - #if !CONFIG_VMX int host_vmxon(boolean_t exclusive __unused) { diff --git a/osfmk/x86_64/lowmem_vectors.c b/osfmk/x86_64/lowmem_vectors.c index d1d1e7f24..190edafc1 100644 --- a/osfmk/x86_64/lowmem_vectors.c +++ b/osfmk/x86_64/lowmem_vectors.c @@ -55,7 +55,6 @@ * the rights to redistribute these changes. */ -#include #include #include #include diff --git a/osfmk/x86_64/machine_routines_asm.s b/osfmk/x86_64/machine_routines_asm.s index 0d304f26a..253070a09 100644 --- a/osfmk/x86_64/machine_routines_asm.s +++ b/osfmk/x86_64/machine_routines_asm.s @@ -39,9 +39,7 @@ /* ** ml_get_timebase() ** -** Entry - %rdi contains pointer to 64 bit structure. -** -** Exit - 64 bit structure filled in. +** Returns TSC in RAX ** */ ENTRY(ml_get_timebase) @@ -51,7 +49,6 @@ ENTRY(ml_get_timebase) lfence shlq $32,%rdx orq %rdx,%rax - movq %rax, (%rdi) ret @@ -190,45 +187,6 @@ Entry(x86_init_wrapper) movq %rsi, %rsp callq *%rdi - /* - * Generate a 64-bit quantity with possibly random characteristics, intended for use - * before the kernel entropy pool is available. The processor's RNG is used if - * available, and a value derived from the Time Stamp Counter is returned if not. - * Multiple invocations may result in well-correlated values if sourced from the TSC. - */ -Entry(ml_early_random) - mov %rbx, %rsi - mov $1, %eax - cpuid - mov %rsi, %rbx - test $(1 << 30), %ecx - jz Lnon_rdrand - RDRAND_RAX /* RAX := 64 bits of DRBG entropy */ - jnc Lnon_rdrand - ret -Lnon_rdrand: - rdtsc /* EDX:EAX := TSC */ - /* Distribute low order bits */ - mov %eax, %ecx - xor %al, %ah - shl $16, %rcx - xor %rcx, %rax - xor %eax, %edx - - /* Incorporate ASLR entropy, if any */ - lea (%rip), %rcx - shr $21, %rcx - movzbl %cl, %ecx - shl $16, %ecx - xor %ecx, %edx - - mov %ah, %cl - ror %cl, %edx /* Right rotate EDX (TSC&0xFF ^ (TSC>>8 & 0xFF))&1F */ - shl $32, %rdx - xor %rdx, %rax - mov %cl, %al - ret - #if CONFIG_VMX /* diff --git a/osfmk/x86_64/pmap.c b/osfmk/x86_64/pmap.c index e9d0157ef..8e1b55902 100644 --- a/osfmk/x86_64/pmap.c +++ b/osfmk/x86_64/pmap.c @@ -102,7 +102,6 @@ #include #include -#include #include #include @@ -174,7 +173,7 @@ uint64_t max_preemption_latency_tsc = 0; pv_hashed_entry_t *pv_hash_table; /* hash lists */ -uint32_t npvhash = 0; +uint32_t npvhashmask = 0, npvhashbuckets = 0; pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL; pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL; @@ -182,6 +181,8 @@ decl_simple_lock_data(,pv_hashed_free_list_lock) decl_simple_lock_data(,pv_hashed_kern_free_list_lock) decl_simple_lock_data(,pv_hash_table_lock) +decl_simple_lock_data(,phys_backup_lock) + zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ /* @@ -291,6 +292,7 @@ extern vm_offset_t sconstdata, econstdata; extern void *KPTphys; boolean_t pmap_smep_enabled = FALSE; +boolean_t pmap_smap_enabled = FALSE; void pmap_cpu_init(void) @@ -325,7 +327,18 @@ pmap_cpu_init(void) } } +static uint32_t pmap_scale_shift(void) { + uint32_t scale = 0; + if (sane_size <= 8*GB) { + scale = (uint32_t)(sane_size / (2 * GB)); + } else if (sane_size <= 32*GB) { + scale = 4 + (uint32_t)((sane_size - (8 * GB))/ (4 * GB)); + } else { + scale = 10 + (uint32_t)MIN(4, ((sane_size - (32 * GB))/ (8 * GB))); + } + return scale; +} /* * Bootstrap the system enough to run with virtual memory. @@ -410,21 +423,23 @@ pmap_bootstrap( virtual_avail = va; #endif + if (!PE_parse_boot_argn("npvhash", &npvhashmask, sizeof (npvhashmask))) { + npvhashmask = ((NPVHASHBUCKETS) << pmap_scale_shift()) - 1; - if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) { - if (0 != ((npvhash + 1) & npvhash)) { - kprintf("invalid hash %d, must be ((2^N)-1), " - "using default %d\n", npvhash, NPVHASH); - npvhash = NPVHASH; - } - } else { - npvhash = NPVHASH; + } + + npvhashbuckets = npvhashmask + 1; + + if (0 != ((npvhashbuckets) & npvhashmask)) { + panic("invalid hash %d, must be ((2^N)-1), " + "using default %d\n", npvhashmask, NPVHASHMASK); } simple_lock_init(&kernel_pmap->lock, 0); simple_lock_init(&pv_hashed_free_list_lock, 0); simple_lock_init(&pv_hashed_kern_free_list_lock, 0); simple_lock_init(&pv_hash_table_lock,0); + simple_lock_init(&phys_backup_lock, 0); pmap_cpu_init(); @@ -436,7 +451,7 @@ pmap_bootstrap( #if DEBUG printf("Stack canary: 0x%lx\n", __stack_chk_guard[0]); - printf("ml_early_random(): 0x%qx\n", ml_early_random()); + printf("early_random(): 0x%qx\n", early_random()); #endif boolean_t ptmp; /* Check if the user has requested disabling stack or heap no-execute @@ -657,9 +672,9 @@ pmap_init(void) pmap_npages = (uint32_t)npages; #endif s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages - + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1)) + + (sizeof (struct pv_hashed_entry_t *) * (npvhashbuckets)) + pv_lock_table_size(npages) - + pv_hash_lock_table_size((npvhash+1)) + + pv_hash_lock_table_size((npvhashbuckets)) + npages); s = round_page(s); @@ -674,7 +689,7 @@ pmap_init(void) vsize = s; #if PV_DEBUG - if (0 == npvhash) panic("npvhash not initialized"); + if (0 == npvhashmask) panic("npvhashmask not initialized"); #endif /* @@ -684,13 +699,13 @@ pmap_init(void) addr = (vm_offset_t) (pv_head_table + npages); pv_hash_table = (pv_hashed_entry_t *)addr; - addr = (vm_offset_t) (pv_hash_table + (npvhash + 1)); + addr = (vm_offset_t) (pv_hash_table + (npvhashbuckets)); pv_lock_table = (char *) addr; addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages)); pv_hash_lock_table = (char *) addr; - addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1))); + addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhashbuckets))); pmap_phys_attributes = (char *) addr; @@ -2125,7 +2140,7 @@ pmap_switch(pmap_t tpmap) spl_t s; s = splhigh(); /* Make sure interruptions are disabled */ - set_dirbase(tpmap, current_thread()); + set_dirbase(tpmap, current_thread(), cpu_number()); splx(s); } @@ -2173,20 +2188,6 @@ pt_fake_zone_info( *caller_acct = 1; } -static inline void -pmap_cpuset_NMIPI(cpu_set cpu_mask) { - unsigned int cpu, cpu_bit; - uint64_t deadline; - - for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { - if (cpu_mask & cpu_bit) - cpu_NMI_interrupt(cpu); - } - deadline = mach_absolute_time() + (LockTimeOut); - while (mach_absolute_time() < deadline) - cpu_pause(); -} - void pmap_flush_context_init(pmap_flush_context *pfc) @@ -2195,6 +2196,7 @@ pmap_flush_context_init(pmap_flush_context *pfc) pfc->pfc_invalid_global = 0; } +extern unsigned TLBTimeOut; void pmap_flush( pmap_flush_context *pfc) @@ -2202,9 +2204,9 @@ pmap_flush( unsigned int my_cpu; unsigned int cpu; unsigned int cpu_bit; - cpu_set cpus_to_respond = 0; - cpu_set cpus_to_signal = 0; - cpu_set cpus_signaled = 0; + cpumask_t cpus_to_respond = 0; + cpumask_t cpus_to_signal = 0; + cpumask_t cpus_signaled = 0; boolean_t flush_self = FALSE; uint64_t deadline; @@ -2252,7 +2254,10 @@ pmap_flush( if (cpus_to_respond) { - deadline = mach_absolute_time() + LockTimeOut; + deadline = mach_absolute_time() + + (TLBTimeOut ? TLBTimeOut : LockTimeOut); + boolean_t is_timeout_traced = FALSE; + /* * Wait for those other cpus to acknowledge */ @@ -2277,9 +2282,17 @@ pmap_flush( if (cpus_to_respond && (mach_absolute_time() > deadline)) { if (machine_timeout_suspended()) continue; + if (TLBTimeOut == 0) { + if (is_timeout_traced) + continue; + PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS_TO), + NULL, cpus_to_signal, cpus_to_respond, 0, 0); + is_timeout_traced = TRUE; + continue; + } pmap_tlb_flush_timeout = TRUE; orig_acks = NMIPI_acks; - pmap_cpuset_NMIPI(cpus_to_respond); + mp_cpus_NMIPI(cpus_to_respond); panic("TLB invalidation IPI timeout: " "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", @@ -2309,17 +2322,23 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o { unsigned int cpu; unsigned int cpu_bit; - cpu_set cpus_to_signal; + cpumask_t cpus_to_signal; unsigned int my_cpu = cpu_number(); pmap_paddr_t pmap_cr3 = pmap->pm_cr3; boolean_t flush_self = FALSE; uint64_t deadline; boolean_t pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap)); boolean_t need_global_flush = FALSE; + uint32_t event_code; assert((processor_avail_count < 2) || (ml_get_interrupts_enabled() && get_preemption_level() != 0)); + event_code = (pmap == kernel_pmap) ? PMAP_CODE(PMAP__FLUSH_KERN_TLBS) + : PMAP_CODE(PMAP__FLUSH_TLBS); + PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_START, + pmap, options, startv, endv, 0); + /* * Scan other cpus for matching active or task CR3. * For idle cpus (with no active map) we mark them invalid but @@ -2384,15 +2403,8 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o } } if ((options & PMAP_DELAY_TLB_FLUSH)) - return; + goto out; - if (pmap == kernel_pmap) { - PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_KERN_TLBS) | DBG_FUNC_START, - pmap, cpus_to_signal, flush_self, startv, endv); - } else { - PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START, - pmap, cpus_to_signal, flush_self, startv, endv); - } /* * Flush local tlb if required. * Do this now to overlap with other processors responding. @@ -2410,9 +2422,12 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o } if (cpus_to_signal) { - cpu_set cpus_to_respond = cpus_to_signal; + cpumask_t cpus_to_respond = cpus_to_signal; + + deadline = mach_absolute_time() + + (TLBTimeOut ? TLBTimeOut : LockTimeOut); + boolean_t is_timeout_traced = FALSE; - deadline = mach_absolute_time() + LockTimeOut; /* * Wait for those other cpus to acknowledge */ @@ -2437,9 +2452,19 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o if (cpus_to_respond && (mach_absolute_time() > deadline)) { if (machine_timeout_suspended()) continue; + if (TLBTimeOut == 0) { + /* cut tracepoint but don't panic */ + if (is_timeout_traced) + continue; + PMAP_TRACE_CONSTANT( + PMAP_CODE(PMAP__FLUSH_TLBS_TO), + pmap, cpus_to_signal, cpus_to_respond, 0, 0); + is_timeout_traced = TRUE; + continue; + } pmap_tlb_flush_timeout = TRUE; orig_acks = NMIPI_acks; - pmap_cpuset_NMIPI(cpus_to_respond); + mp_cpus_NMIPI(cpus_to_respond); panic("TLB invalidation IPI timeout: " "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", @@ -2452,13 +2477,9 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, pmap_cr3: 0x%llx, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, pmap_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map); } - if (pmap == kernel_pmap) { - PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_KERN_TLBS) | DBG_FUNC_END, - pmap, cpus_to_signal, startv, endv, 0); - } else { - PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, - pmap, cpus_to_signal, startv, endv, 0); - } +out: + PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_END, + pmap, cpus_to_signal, startv, endv, 0); } diff --git a/osfmk/x86_64/start.s b/osfmk/x86_64/start.s index 8d2230bf7..9baf0ab3b 100644 --- a/osfmk/x86_64/start.s +++ b/osfmk/x86_64/start.s @@ -56,7 +56,6 @@ /* */ -#include #include #include @@ -212,7 +211,7 @@ L_pstart_common: cpuid test $(1 << 30), %ecx jz Lnon_rdrand - RDRAND_RAX /* RAX := 64 bits of DRBG entropy */ + rdrand %rax /* RAX := 64 bits of DRBG entropy */ jnc Lnon_rdrand /* TODO: complain if DRBG fails at this stage */ Lstore_random_guard: diff --git a/pexpert/Makefile b/pexpert/Makefile index 71a283d78..d8ac9dfba 100644 --- a/pexpert/Makefile +++ b/pexpert/Makefile @@ -9,11 +9,13 @@ include $(MakeInc_def) INSTINC_SUBDIRS = pexpert INSTINC_SUBDIRS_X86_64 = pexpert +INSTINC_SUBDIRS_X86_64H = pexpert INSTINC_SUBDIRS_ARM = pexpert EXPINC_SUBDIRS = pexpert EXPINC_SUBDIRS_X86_64 = pexpert +EXPINC_SUBDIRS_X86_64H = pexpert EXPINC_SUBDIRS_ARM = pexpert COMP_SUBDIRS = \ diff --git a/pexpert/conf/MASTER b/pexpert/conf/MASTER deleted file mode 100644 index 0f102b7c0..000000000 --- a/pexpert/conf/MASTER +++ /dev/null @@ -1,94 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -####################################################################### -# -# Master machine independent configuration file. -# -# Specific configuration files are created based on this and -# the machine specific master file using the doconf script. -# -# Any changes to the master configuration files will affect all -# other configuration files based upon it. -# -####################################################################### -# -# To build a configuration, execute "doconf ." -# Configurations are specified in the "Configurations:" section -# of the MASTER and MASTER.* files as follows: -# -# = [ ... ] -# -# Lines in the MASTER and MASTER.* files are selected based on -# the attribute selector list, found in a comment at the end of -# the line. This is a list of attributes separated by commas. -# The "!" operator selects the line if none of the attributes are -# specified. -# -# For example: -# -# selects a line if "foo" or "bar" are specified. -# selects a line if neither "foo" nor "bar" is -# specified. -# -# Lines with no attributes specified are selected for all -# configurations. -# -####################################################################### -# -# STANDARD CONFIGURATION OPTIONS (select any combination) -# -# debug = extra kernel level debugging support -# mach = Mach support -# -# EXPERIMENTAL CONFIGURATION OPTIONS (select any combination, carefully) -# -# nbc = no buffer cache support -# simple = non-rollover clock support -# timing = precision timing support -# host = host resource control support -# fixpri = fixed priority threads -# -# MULTI-PROCESSOR CONFIGURATION (select at most one) -# -# multi16 = enable 16 multi-processors -# multi32 = enable 32 multi-processors -# multi48 = enable 48 multi-processors -# -# SYSTEM SIZE CONFIGURATION (select exactly one) -# -# xlarge = extra large scale system configuration -# large = large scale system configuration -# medium = medium scale system configuration -# small = small scale system configuration -# xsmall = extra small scale system configuration -# bsmall = special extra small scale system configuration for -# (e.g. for boot floppies) -# -####################################################################### -# -# Standard Mach Research Configurations: -# -------- ---- -------- --------------- -# -# These are the default configurations that can be used by most sites. -# They are used internally by the Mach project. -# -# MACH = [mach multi16 medium debug] -# -####################################################################### -# -ident PEXPERT - -options MACH_PE # Objective-C support # -options MACH_KERNEL -options DEBUG # general debugging code # -options DEVELOPMENT # dev kernel # -options MACH_ASSERT # # -options CONFIG_DTRACE # dtrace support # - -options CONFIG_NO_PANIC_STRINGS # -options CONFIG_NO_PRINTF_STRINGS # -options CONFIG_NO_KPRINTF_STRINGS # diff --git a/pexpert/conf/MASTER.x86_64 b/pexpert/conf/MASTER.x86_64 deleted file mode 100644 index a73bb602e..000000000 --- a/pexpert/conf/MASTER.x86_64 +++ /dev/null @@ -1,18 +0,0 @@ -###################################################################### -# -# Standard Apple Mac OS Configurations: -# -------- ----- ------ --------------- -# -# RELEASE = [ intel mach mach_pe config_dtrace ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug mach_assert ] -# -# EMBEDDED = [ intel mach mach_pe ] -# DEVELOPMENT = [ EMBEDDED ] -# -###################################################################### - -machine "x86_64" # -cpu "x86_64" # - -options NO_NESTED_PMAP # diff --git a/pexpert/conf/Makefile b/pexpert/conf/Makefile index 25a42ef5e..76db9a7d8 100644 --- a/pexpert/conf/Makefile +++ b/pexpert/conf/Makefile @@ -6,20 +6,24 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) - -$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) - $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ - cd $(addsuffix /conf, $(TARGET)); \ - rm -f $(notdir $?); \ - cp $? .; \ - if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); +# Special handling for x86_64h which shares a MASTER config file with x86_64: +ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64h) +DOCONF_ARCH_CONFIG_LC = x86_64 +else +DOCONF_ARCH_CONFIG_LC = $(CURRENT_ARCH_CONFIG_LC) +endif + +MASTERCONFDIR = $(SRCROOT)/config +DOCONFDEPS = $(addprefix $(MASTERCONFDIR)/, MASTER MASTER.$(DOCONF_ARCH_CONFIG_LC)) \ + $(addprefix $(SOURCE)/, Makefile.template Makefile.$(DOCONF_ARCH_CONFIG_LC) files files.$(DOCONF_ARCH_CONFIG_LC)) + +ifneq (,$(wildcard $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC))) +DOCONFDEPS += $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) +endif + +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile: $(SRCROOT)/SETUP/config/doconf $(OBJROOT)/SETUP/config $(DOCONFDEPS) + $(_v)$(MKDIR) $(TARGET)/$(CURRENT_KERNEL_CONFIG) + $(_v)$(SRCROOT)/SETUP/config/doconf -c -cpu $(DOCONF_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) -s $(SOURCE) -m $(MASTERCONFDIR) $(CURRENT_KERNEL_CONFIG); do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile $(_v)${MAKE} \ diff --git a/pexpert/conf/Makefile.template b/pexpert/conf/Makefile.template index 7a8e0038d..60c3c87b4 100644 --- a/pexpert/conf/Makefile.template +++ b/pexpert/conf/Makefile.template @@ -44,22 +44,29 @@ COMP_SUBDIRS = %CFILES +%CXXFILES + %SFILES %MACHDEP -# -# OBJSDEPS is the set of files (defined in the machine dependent -# template if necessary) which all objects depend on (such as an -# in-line assembler expansion filter) -# -${OBJS}: ${OBJSDEPS} - -LDOBJS = $(OBJS) - -$(COMPONENT).filelist: $(LDOBJS) +# Rebuild if per-file overrides change +${OBJS}: $(firstword $(MAKEFILE_LIST)) + +# Rebuild if global compile flags change +$(COBJS): .CFLAGS +.CFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KCC) $(CFLAGS) $(INCFLAGS) +$(CXXOBJS): .CXXFLAGS +.CXXFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KC++) $(CXXFLAGS) $(INCFLAGS) +$(SOBJS): .SFLAGS +.SFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(S_KCC) $(SFLAGS) $(INCFLAGS) + +$(COMPONENT).filelist: $(OBJS) @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${LDOBJS}; do \ + $(_v)( for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist diff --git a/pexpert/gen/pe_gen.c b/pexpert/gen/pe_gen.c index 43ab61a96..15eaf656d 100644 --- a/pexpert/gen/pe_gen.c +++ b/pexpert/gen/pe_gen.c @@ -31,6 +31,7 @@ #include #include +#include #include static int DEBUGFlag; @@ -64,6 +65,41 @@ void PE_init_printf(boolean_t vm_initialized) } } +uint32_t +PE_get_random_seed(unsigned char *dst_random_seed, uint32_t request_size) +{ + DTEntry entryP; + uint32_t size = 0; + void *dt_random_seed; + + if ((DTLookupEntry(NULL, "/chosen", &entryP) == kSuccess) + && (DTGetProperty(entryP, "random-seed", + (void **)&dt_random_seed, &size) == kSuccess)) { + unsigned char *src_random_seed; + unsigned int i; + unsigned int null_count = 0; + + src_random_seed = (unsigned char *)dt_random_seed; + + if (size > request_size) size = request_size; + + /* + * Copy from the device tree into the destination buffer, + * count the number of null bytes and null out the device tree. + */ + for (i=0 ; i< size; i++, src_random_seed++, dst_random_seed++) { + *dst_random_seed = *src_random_seed; + null_count += *src_random_seed == (unsigned char)0; + *src_random_seed = (unsigned char)0; + } + if (null_count == size) + /* All nulls is no seed - return 0 */ + size = 0; + } + + return(size); +} + unsigned char appleClut8[ 256 * 3 ] = { // 00 0xFF,0xFF,0xFF, 0xFF,0xFF,0xCC, 0xFF,0xFF,0x99, 0xFF,0xFF,0x66, @@ -146,3 +182,4 @@ unsigned char appleClut8[ 256 * 3 ] = { 0xAA,0xAA,0xAA, 0x88,0x88,0x88, 0x77,0x77,0x77, 0x55,0x55,0x55, 0x44,0x44,0x44, 0x22,0x22,0x22, 0x11,0x11,0x11, 0x00,0x00,0x00 }; + diff --git a/pexpert/i386/pe_identify_machine.c b/pexpert/i386/pe_identify_machine.c index 2d59f1b66..a632a7ae7 100644 --- a/pexpert/i386/pe_identify_machine.c +++ b/pexpert/i386/pe_identify_machine.c @@ -25,9 +25,9 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include #include #include -#include /* Local declarations */ void pe_identify_machine(boot_args *args); diff --git a/pexpert/i386/pe_init.c b/pexpert/i386/pe_init.c index 29efd7c67..e154c7a78 100644 --- a/pexpert/i386/pe_init.c +++ b/pexpert/i386/pe_init.c @@ -29,6 +29,7 @@ * file: pe_init.c * i386 platform expert initialization. */ + #include #include #include @@ -171,8 +172,11 @@ void PE_init_iokit(void) /* * Initialize the spinning wheel (progress indicator). */ - vc_progress_initialize( &default_progress, default_progress_data1x, default_progress_data2x, - (unsigned char *) appleClut8 ); + vc_progress_initialize(&default_progress, + default_progress_data1x, + default_progress_data2x, + default_progress_data3x, + (unsigned char *) appleClut8); (void) StartIOKit( PE_state.deviceTreeHead, PE_state.bootArgs, gPEEFIRuntimeServices, NULL); } @@ -193,7 +197,6 @@ void PE_init_platform(boolean_t vm_initialized, void * _args) PE_state.video.v_height = args->Video.v_height; PE_state.video.v_depth = args->Video.v_depth; PE_state.video.v_display = args->Video.v_display; - PE_state.video.v_scale = (kBootArgsFlagHiDPI & args->flags) ? 2 : 1; strlcpy(PE_state.video.v_pixelFormat, "PPPPPPPP", sizeof(PE_state.video.v_pixelFormat)); diff --git a/pexpert/pexpert/GearImage.h b/pexpert/pexpert/GearImage.h index 8e4050055..f074d0092 100644 --- a/pexpert/pexpert/GearImage.h +++ b/pexpert/pexpert/GearImage.h @@ -4,6 +4,8 @@ #define kGearFrames (6) #define kGearFPS (24) + + const unsigned char gGearPict2x[4*kGearFrames*kGearWidth*kGearHeight] = { 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, diff --git a/pexpert/pexpert/Makefile b/pexpert/pexpert/Makefile index e31e88903..50a0293d9 100644 --- a/pexpert/pexpert/Makefile +++ b/pexpert/pexpert/Makefile @@ -11,11 +11,14 @@ INSTINC_SUBDIRS = \ machine INSTINC_SUBDIRS_X86_64 = \ i386 +INSTINC_SUBDIRS_X86_64H = \ + i386 INSTINC_SUBDIRS_ARM = \ arm EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} +EXPINC_SUBDIRS_X86_64H = ${INSTINC_SUBDIRS_X86_64H} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} DATAFILES = \ diff --git a/pexpert/pexpert/i386/boot.h b/pexpert/pexpert/i386/boot.h index b73e6ef24..d79e4994a 100644 --- a/pexpert/pexpert/i386/boot.h +++ b/pexpert/pexpert/i386/boot.h @@ -125,6 +125,11 @@ typedef struct boot_icon_element boot_icon_element; #define kBootArgsFlagRebootOnPanic (1 << 0) #define kBootArgsFlagHiDPI (1 << 1) #define kBootArgsFlagBlack (1 << 2) +#define kBootArgsFlagCSRActiveConfig (1 << 3) +#define kBootArgsFlagCSRPendingConfig (1 << 4) +#define kBootArgsFlagCSRBoot (1 << 5) +#define kBootArgsFlagBlackBg (1 << 6) +#define kBootArgsFlagLoginUI (1 << 7) typedef struct boot_args { uint16_t Revision; /* Revision of boot_args structure */ @@ -168,7 +173,9 @@ typedef struct boot_args { uint64_t pciConfigSpaceBaseAddress; uint32_t pciConfigSpaceStartBusNumber; uint32_t pciConfigSpaceEndBusNumber; - uint32_t __reserved4[730]; + uint32_t csrActiveConfig; + uint32_t csrPendingConfig; + uint32_t __reserved4[728]; } boot_args; diff --git a/pexpert/pexpert/pe_images.h b/pexpert/pexpert/pe_images.h index 4a4cb6d3c..c7a0597b1 100644 --- a/pexpert/pexpert/pe_images.h +++ b/pexpert/pexpert/pe_images.h @@ -42,6 +42,7 @@ static const unsigned char * default_noroot_data; static const unsigned char * default_progress_data1x = gGearPict; static const unsigned char * default_progress_data2x = gGearPict2x; +static const unsigned char * default_progress_data3x = NULL; static vc_progress_element default_progress = { 0, 4|1, 1000 / kGearFPS, kGearFrames, {0, 0, 0}, diff --git a/pexpert/pexpert/pexpert.h b/pexpert/pexpert/pexpert.h index 506361341..34498150b 100644 --- a/pexpert/pexpert/pexpert.h +++ b/pexpert/pexpert/pexpert.h @@ -59,6 +59,15 @@ void PE_init_platform( boolean_t vm_initialized, void *args); +/* + * Copies the requested number of bytes from the "random-seed" property in + * the device tree, and zeros the corresponding bytes in the device tree. + * Returns the number of bytes actually copied. + */ +uint32_t PE_get_random_seed( + unsigned char * dst_random_seed, + uint32_t request_size); + void PE_init_kprintf( boolean_t vm_initialized); @@ -225,6 +234,7 @@ extern int PE_initialize_console( #define kPEEnableScreen 6 #define kPEDisableScreen 7 #define kPEBaseAddressChange 8 +#define kPERefreshBootGraphics 9 extern void PE_display_icon( unsigned int flags, const char * name ); diff --git a/security/conf/MASTER b/security/conf/MASTER deleted file mode 100644 index 8a93c2486..000000000 --- a/security/conf/MASTER +++ /dev/null @@ -1,78 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -####################################################################### -# -# Master machine independent configuration file. -# -# Specific configuration files are created based on this and -# the machine specific master file using the doconf script. -# -# Any changes to the master configuration files will affect all -# other configuration files based upon it. -# -####################################################################### -# -# To build a configuration, execute "doconf ." -# Configurations are specified in the "Configurations:" section -# of the MASTER and MASTER.* files as follows: -# -# = [ ... ] -# -# Lines in the MASTER and MASTER.* files are selected based on -# the attribute selector list, found in a comment at the end of -# the line. This is a list of attributes separated by commas. -# The "!" operator selects the line if none of the attributes are -# specified. -# -# For example: -# -# selects a line if "foo" or "bar" are specified. -# selects a line if neither "foo" nor "bar" is -# specified. -# -# Lines with no attributes specified are selected for all -# configurations. -# -####################################################################### -# -# Standard Mach Research Configurations: -# -------- ---- -------- --------------- -# -# These are the default configurations that can be used by most sites. -# They are used internally by the Mach project. -# -# LIBSA = [debug] -# -####################################################################### -# -ident SECURITY - -# -# Note: MAC options must be set in both bsd/conf and security/conf MASTER files -# -options KDEBUG # kernel tracing # -options AUDIT # Security event auditing # -options CONFIG_LCTX # Login Context - -options CONFIG_DTRACE # dtrace support # - -options VM_PRESSURE_EVENTS # - -options CONFIG_NO_PANIC_STRINGS # -options CONFIG_NO_PRINTF_STRINGS # -options CONFIG_NO_KPRINTF_STRINGS # -options CONFIG_FSE # file system events # -options CONFIG_TRIGGERS # trigger vnodes # -options CONFIG_EXT_RESOLVER # e.g. memberd # - -options SECURE_KERNEL # -options DEBUG # # -options MACH_ASSERT # # - -options CONFIG_MEMORYSTATUS # -options CONFIG_JETSAM # -options CONFIG_FREEZE # diff --git a/security/conf/MASTER.x86_64 b/security/conf/MASTER.x86_64 deleted file mode 100644 index 485e7bc7c..000000000 --- a/security/conf/MASTER.x86_64 +++ /dev/null @@ -1,31 +0,0 @@ -###################################################################### -# -# RELEASE = [ intel mach libkerncpp config_dtrace audit vm_pressure_events memorystatus ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug mach_assert ] -# -# EMBEDDED = [ intel mach libkerncpp audit ] -# DEVELOPMENT = [ EMBEDDED ] -# -###################################################################### - -# -# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and -# security/conf MASTER files. -# -options CONFIG_MACF # Mandatory Access Control Framework -options CONFIG_MACF_SOCKET_SUBSET # MACF subset of socket support -options CONFIG_FSE -options CONFIG_TRIGGERS -options CONFIG_EXT_RESOLVER -#options CONFIG_MACF_SOCKET -#options CONFIG_MACF_NET -#options CONFIG_MACF_ALWAYS_LABEL_MBUF -#options CONFIG_MACF_DEBUG -#options CONFIG_MACF_MACH -options CONFIG_AUDIT # Kernel auditing - -machine "x86_64" # -cpu "x86_64" # - -options NO_NESTED_PMAP # diff --git a/security/conf/Makefile b/security/conf/Makefile index 25a42ef5e..76db9a7d8 100644 --- a/security/conf/Makefile +++ b/security/conf/Makefile @@ -6,20 +6,24 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) - -$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) - $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ - cd $(addsuffix /conf, $(TARGET)); \ - rm -f $(notdir $?); \ - cp $? .; \ - if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); +# Special handling for x86_64h which shares a MASTER config file with x86_64: +ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64h) +DOCONF_ARCH_CONFIG_LC = x86_64 +else +DOCONF_ARCH_CONFIG_LC = $(CURRENT_ARCH_CONFIG_LC) +endif + +MASTERCONFDIR = $(SRCROOT)/config +DOCONFDEPS = $(addprefix $(MASTERCONFDIR)/, MASTER MASTER.$(DOCONF_ARCH_CONFIG_LC)) \ + $(addprefix $(SOURCE)/, Makefile.template Makefile.$(DOCONF_ARCH_CONFIG_LC) files files.$(DOCONF_ARCH_CONFIG_LC)) + +ifneq (,$(wildcard $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC))) +DOCONFDEPS += $(MASTERCONFDIR)/MASTER.$(DOCONF_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) +endif + +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile: $(SRCROOT)/SETUP/config/doconf $(OBJROOT)/SETUP/config $(DOCONFDEPS) + $(_v)$(MKDIR) $(TARGET)/$(CURRENT_KERNEL_CONFIG) + $(_v)$(SRCROOT)/SETUP/config/doconf -c -cpu $(DOCONF_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) -s $(SOURCE) -m $(MASTERCONFDIR) $(CURRENT_KERNEL_CONFIG); do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile $(_v)${MAKE} \ diff --git a/security/conf/Makefile.template b/security/conf/Makefile.template index 0270cfb70..975f64d84 100644 --- a/security/conf/Makefile.template +++ b/security/conf/Makefile.template @@ -41,14 +41,6 @@ COMP_SUBDIRS = # .PRECIOUS: Makefile -VERSION_FILES= \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.major \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.minor \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.variant - -COPYRIGHT_FILES = \ - $(SOURCE_DIR)/$(COMPONENT)/conf/copyright.nai - # # Theses macros are filled in by the config program depending on the # current configuration. The MACHDEP macro is replaced by the @@ -61,22 +53,29 @@ COPYRIGHT_FILES = \ %CFILES +%CXXFILES + %SFILES %MACHDEP -# -# OBJSDEPS is the set of files (defined in the machine dependent -# template if necessary) which all objects depend on (such as an -# in-line assembler expansion filter) -# -${OBJS}: ${OBJSDEPS} - -LDOBJS = $(OBJS) - -$(COMPONENT).filelist: $(LDOBJS) vers.o +# Rebuild if per-file overrides change +${OBJS}: $(firstword $(MAKEFILE_LIST)) + +# Rebuild if global compile flags change +$(COBJS): .CFLAGS +.CFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KCC) $(CFLAGS) $(INCFLAGS) +$(CXXOBJS): .CXXFLAGS +.CXXFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(KC++) $(CXXFLAGS) $(INCFLAGS) +$(SOBJS): .SFLAGS +.SFLAGS: ALWAYS + $(_v)$(REPLACECONTENTS) $@ $(S_KCC) $(SFLAGS) $(INCFLAGS) + +$(COMPONENT).filelist: $(OBJS) @echo LDFILELIST $(COMPONENT) - $(_v)( for obj in ${LDOBJS} vers.o; do \ + $(_v)( for obj in ${OBJS}; do \ echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist @@ -84,14 +83,6 @@ do_all: $(COMPONENT).filelist do_build_all:: do_all -vers.o: vers.c - @echo CC $@ - $(_v)${KCC} $(CFLAGS) -o ${@} -c ${INCFLAGS} $< - -vers.c: - $(_v)$(SRCROOT)/SETUP/newvers \ - `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} - %RULES include $(MakeInc_rule) diff --git a/security/conf/files b/security/conf/files index 75ba967af..2d6761fd3 100644 --- a/security/conf/files +++ b/security/conf/files @@ -15,8 +15,6 @@ security/mac_audit.c optional config_macf security/mac_base.c standard security/mac_data.c optional config_macf security/mac_label.c optional config_macf -security/mac_task.c optional config_macf_mach -security/mac_port.c optional config_macf_mach security/mac_process.c optional config_macf security/mac_vfs.c optional config_macf security/mac_vfs_subr.c optional config_macf diff --git a/security/conf/kernelversion.major b/security/conf/kernelversion.major deleted file mode 100644 index 1e8b31496..000000000 --- a/security/conf/kernelversion.major +++ /dev/null @@ -1 +0,0 @@ -6 diff --git a/security/conf/kernelversion.minor b/security/conf/kernelversion.minor deleted file mode 100644 index 1e8b31496..000000000 --- a/security/conf/kernelversion.minor +++ /dev/null @@ -1 +0,0 @@ -6 diff --git a/security/conf/kernelversion.variant b/security/conf/kernelversion.variant deleted file mode 100644 index 8b1378917..000000000 --- a/security/conf/kernelversion.variant +++ /dev/null @@ -1 +0,0 @@ - diff --git a/security/conf/version.major b/security/conf/version.major deleted file mode 100644 index 1e8b31496..000000000 --- a/security/conf/version.major +++ /dev/null @@ -1 +0,0 @@ -6 diff --git a/security/conf/version.minor b/security/conf/version.minor deleted file mode 100644 index 1e8b31496..000000000 --- a/security/conf/version.minor +++ /dev/null @@ -1 +0,0 @@ -6 diff --git a/security/conf/version.variant b/security/conf/version.variant deleted file mode 100644 index 8b1378917..000000000 --- a/security/conf/version.variant +++ /dev/null @@ -1 +0,0 @@ - diff --git a/security/mac_base.c b/security/mac_base.c index f7698f2e8..c88c7e14a 100644 --- a/security/mac_base.c +++ b/security/mac_base.c @@ -294,120 +294,6 @@ mac_policy_list_t mac_policy_list; struct mac_label_element_list_t mac_label_element_list; struct mac_label_element_list_t mac_static_label_element_list; -/* - * Journal of label operations that occur before policies are loaded. - */ -struct mac_label_journal_list_t mac_label_journal_list; - -int -mac_label_journal_add (struct label *l, int type) -{ - struct mac_label_journal *mlj; - - if (mac_label_journal_find(l)) - return (0); - - MALLOC(mlj, struct mac_label_journal *, - sizeof(struct mac_label_journal), M_MACTEMP, M_WAITOK); - mlj->l = l; - mlj->type = type; - TAILQ_INSERT_TAIL(&mac_label_journal_list, mlj, link); - - return (0); -} - -int -mac_label_journal_remove (struct label *l) -{ - struct mac_label_journal *mlj; - - mlj = mac_label_journal_find(l); - if (mlj == NULL) - return (-1); - - TAILQ_REMOVE(&mac_label_journal_list, mlj, link); - FREE(mlj, M_MACTEMP); - return (0); -} - -struct mac_label_journal * -mac_label_journal_find (struct label *l) -{ - struct mac_label_journal *mlj; - - TAILQ_FOREACH(mlj, &mac_label_journal_list, link) { - if (l == mlj->l) - return (mlj); - } - - return (NULL); -} - -int -mac_label_journal (struct label *l, int op, ...) -{ - struct mac_label_journal *mlj; - va_list ap; - - mlj = mac_label_journal_find(l); - if (mlj == NULL) { - printf("%s(): Label not in list!\n", __func__); - return (-1); - } - - if (op == MLJ_PORT_OP_UPDATE) { - va_start(ap, op); - mlj->kotype = va_arg(ap, int); - va_end(ap); - } - - mlj->ops |= op; - return (0); -} - -/* - * The assumption during replay is that the system is totally - * serialized and no additional tasks/ports will be created. - */ -void -mac_label_journal_replay (void) -{ - struct mac_label_journal *mlj; - - TAILQ_FOREACH(mlj, &mac_label_journal_list, link) { - switch (mlj->type) { - case MLJ_TYPE_PORT: - if (mlj->ops & MLJ_PORT_OP_INIT) - MAC_PERFORM(port_label_init, mlj->l); - if (mlj->ops & MLJ_PORT_OP_CREATE_K) - MAC_PERFORM(port_label_associate_kernel, mlj->l, 0); - if (mlj->ops & MLJ_PORT_OP_UPDATE) - MAC_PERFORM(port_label_update_kobject, mlj->l, - mlj->kotype); - break; - case MLJ_TYPE_TASK: - if (mlj->ops & MLJ_TASK_OP_INIT) - MAC_PERFORM(task_label_init, mlj->l); -#if 0 - /* Not enough context to replay. */ - if (mlj->ops & MLJ_TASK_OP_CREATE_K) - ; -#endif - break; - default: - break; - } - } - - /* Free list */ - while (!TAILQ_EMPTY(&mac_label_journal_list)) { - mlj = TAILQ_FIRST(&mac_label_journal_list); - TAILQ_REMOVE(&mac_label_journal_list, mlj, link); - FREE(mlj, M_MACTEMP); - } - return; -} - static __inline void mac_policy_grab_exclusive(void) { @@ -419,14 +305,6 @@ mac_policy_grab_exclusive(void) } } -static __inline void -mac_policy_assert_exclusive(void) -{ - lck_mtx_assert(mac_policy_mtx, LCK_MTX_ASSERT_OWNED); - KASSERT(mac_policy_busy == 0, - ("mac_policy_assert_exclusive(): not exclusive")); -} - static __inline void mac_policy_release_exclusive(void) { @@ -496,7 +374,6 @@ mac_policy_init(void) LIST_INIT(&mac_label_element_list); LIST_INIT(&mac_static_label_element_list); - TAILQ_INIT(&mac_label_journal_list); mac_lck_grp_attr = lck_grp_attr_alloc_init(); lck_grp_attr_setstat(mac_lck_grp_attr); @@ -536,9 +413,6 @@ mac_policy_initmach(void) load_security_extensions_function(); } mac_late = 1; -#if CONFIG_MACF_MACH - mac_label_journal_replay(); -#endif } /* @@ -1050,26 +924,6 @@ mac_label_destroy(struct label *label) /* implicit: label->l_flags &= ~MAC_FLAG_INITIALIZED; */ } -int -mac_port_check_service (struct label *subj, struct label *obj, - const char *s, const char *p) -{ - int error; - - MAC_CHECK(port_check_service, subj, obj, s, p); - return (error); -} - -int -mac_port_label_compute(struct label *subj, struct label *obj, - const char *s, struct label *out) -{ - int error; - - MAC_CHECK(port_label_compute, subj, obj, s, out); - return error; -} - int mac_check_structmac_consistent(struct user_mac *mac) { @@ -1405,7 +1259,6 @@ __mac_get_proc(proc_t p, struct __mac_get_proc_args *uap, int *ret __unused) int __mac_set_proc(proc_t p, struct __mac_set_proc_args *uap, int *ret __unused) { - kauth_cred_t newcred; struct label *intlabel; struct user_mac mac; char *buffer; @@ -1453,18 +1306,6 @@ __mac_set_proc(proc_t p, struct __mac_set_proc_args *uap, int *ret __unused) if (error) goto out; - newcred = kauth_cred_proc_ref(p); - mac_task_label_update_cred(newcred, p->task); - -#if 0 - if (mac_vm_enforce) { - mutex_lock(Giant); /* XXX FUNNEL? */ - mac_cred_mmapped_drop_perms(p, newcred); - mutex_unlock(Giant); /* XXX FUNNEL? */ - } -#endif - - kauth_cred_unref(&newcred); out: mac_cred_label_free(intlabel); return (error); @@ -2235,6 +2076,7 @@ __mac_get_mount(proc_t p __unused, struct __mac_get_mount_args *uap, return (error); } mp = nd.ni_vp->v_mount; + vnode_put(nd.ni_vp); nameidone(&nd); return mac_mount_label_get(mp, uap->mac_p); diff --git a/security/mac_framework.h b/security/mac_framework.h index 777efd542..c81d4ec41 100644 --- a/security/mac_framework.h +++ b/security/mac_framework.h @@ -142,13 +142,6 @@ typedef struct OSObject *io_object_t; /*@ macros */ #define VNODE_LABEL_CREATE 1 -#if CONFIG_MACF_MACH -#define mac_task_label_update_cred(cred, task) \ - mac_task_label_update_internal(((cred)->cr_label), task) -#else -#define mac_task_label_update_cred(cred, task) -#endif - /*@ === */ int mac_audit_check_postselect(kauth_cred_t cred, unsigned short syscode, void *args, int error, int retval, int mac_forced); @@ -161,8 +154,9 @@ void mac_bpfdesc_label_associate(kauth_cred_t cred, struct bpf_d *bpf_d); int mac_cred_check_label_update(kauth_cred_t cred, struct label *newlabel); int mac_cred_check_label_update_execve(vfs_context_t ctx, - struct vnode *vp, struct vnode *scriptvp, struct label *scriptvnodelabel, - struct label *execlabel, proc_t proc, void *macextensions); + struct vnode *vp, off_t offset, struct vnode *scriptvp, + struct label *scriptvnodelabel, struct label *execlabel, + proc_t proc, void *macextensions); int mac_cred_check_visible(kauth_cred_t u1, kauth_cred_t u2); struct label *mac_cred_label_alloc(void); void mac_cred_label_associate(kauth_cred_t cred_parent, @@ -176,9 +170,10 @@ void mac_cred_label_free(struct label *label); void mac_cred_label_init(kauth_cred_t cred); int mac_cred_label_compare(struct label *a, struct label *b); void mac_cred_label_update(kauth_cred_t cred, struct label *newlabel); -int mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t newcred, - struct vnode *vp, struct vnode *scriptvp, struct label *scriptvnodelabel, - struct label *execlabel, void *macextensions); +void mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t newcred, + struct vnode *vp, off_t offset, struct vnode *scriptvp, + struct label *scriptvnodelabel, struct label *execlabel, u_int *csflags, + void *macextensions, int *disjoint, int *labelupdateerror); void mac_devfs_label_associate_device(dev_t dev, struct devnode *de, const char *fullpath); void mac_devfs_label_associate_directory(const char *dirname, int dirnamelen, @@ -232,6 +227,8 @@ void mac_inpcb_label_update(struct socket *so); int mac_iokit_check_device(char *devtype, struct mac_module_data *mdata); int mac_iokit_check_open(kauth_cred_t cred, io_object_t user_client, unsigned int user_client_type); int mac_iokit_check_set_properties(kauth_cred_t cred, io_object_t registry_entry, io_object_t properties); +int mac_iokit_check_filter_properties(kauth_cred_t cred, io_object_t registry_entry); +int mac_iokit_check_get_property(kauth_cred_t cred, io_object_t registry_entry, const char *name); int mac_iokit_check_hid_control(kauth_cred_t cred); void mac_ipq_label_associate(struct mbuf *fragment, struct ipq *ipq); int mac_ipq_label_compare(struct mbuf *fragment, struct ipq *ipq); @@ -340,6 +337,7 @@ int mac_proc_check_fork(proc_t proc); int mac_proc_check_suspend_resume(proc_t proc, int sr); int mac_proc_check_get_task_name(kauth_cred_t cred, struct proc *p); int mac_proc_check_get_task(kauth_cred_t cred, struct proc *p); +int mac_proc_check_inherit_ipc_ports(struct proc *p, struct vnode *cur_vp, off_t cur_offset, struct vnode *img_vp, off_t img_offset, struct vnode *scriptvp); int mac_proc_check_getaudit(proc_t proc); int mac_proc_check_getauid(proc_t proc); int mac_proc_check_getlcid(proc_t proc1, proc_t proc2, @@ -410,9 +408,9 @@ int mac_system_check_reboot(kauth_cred_t cred, int howto); int mac_system_check_settime(kauth_cred_t cred); int mac_system_check_swapoff(kauth_cred_t cred, struct vnode *vp); int mac_system_check_swapon(kauth_cred_t cred, struct vnode *vp); -int mac_system_check_sysctl(kauth_cred_t cred, int *name, - u_int namelen, user_addr_t oldctl, user_addr_t oldlenp, int inkernel, - user_addr_t newctl, size_t newlen); +int mac_system_check_sysctlbyname(kauth_cred_t cred, const char *namestring, int *name, + u_int namelen, user_addr_t oldctl, size_t oldlen, + user_addr_t newctl, size_t newlen); int mac_system_check_kas_info(kauth_cred_t cred, int selector); void mac_sysvmsg_label_associate(kauth_cred_t cred, struct msqid_kernel *msqptr, struct msg *msgptr); @@ -477,7 +475,8 @@ int mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp, struct image_params *imgp); int mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp); int mac_vnode_check_signature(struct vnode *vp, off_t macho_offset, - unsigned char *sha1, void * signature, size_t size); + unsigned char *sha1, const void * signature, size_t size, + int *is_platform_binary); int mac_vnode_check_getattrlist(vfs_context_t ctx, struct vnode *vp, struct attrlist *alist); int mac_vnode_check_getextattr(vfs_context_t ctx, struct vnode *vp, @@ -499,10 +498,9 @@ int mac_vnode_check_read(vfs_context_t ctx, kauth_cred_t file_cred, struct vnode *vp); int mac_vnode_check_readdir(vfs_context_t ctx, struct vnode *vp); int mac_vnode_check_readlink(vfs_context_t ctx, struct vnode *vp); -int mac_vnode_check_rename_from(vfs_context_t ctx, struct vnode *dvp, - struct vnode *vp, struct componentname *cnp); -int mac_vnode_check_rename_to(vfs_context_t ctx, struct vnode *dvp, - struct vnode *vp, int samedir, struct componentname *cnp); +int mac_vnode_check_rename(vfs_context_t ctx, struct vnode *dvp, + struct vnode *vp, struct componentname *cnp, struct vnode *tdvp, + struct vnode *tvp, struct componentname *tcnp); int mac_vnode_check_revoke(vfs_context_t ctx, struct vnode *vp); int mac_vnode_check_searchfs(vfs_context_t ctx, struct vnode *vp, struct attrlist *alist); diff --git a/security/mac_internal.h b/security/mac_internal.h index 013f6563d..153e7d727 100644 --- a/security/mac_internal.h +++ b/security/mac_internal.h @@ -139,47 +139,12 @@ struct mac_label_element { LIST_HEAD(mac_label_element_list_t, mac_label_element); -/* - * Journal operations - */ - -#define MLJ_TYPE_PORT 1 -#define MLJ_TYPE_TASK 2 - -#define MLJ_PORT_OP_INIT 0x0001 -#define MLJ_PORT_OP_CREATE_K 0x0002 -#define MLJ_PORT_OP_CREATE 0x0004 -#define MLJ_PORT_OP_UPDATE 0x0008 - -#define MLJ_TASK_OP_INIT 0x0001 -#define MLJ_TASK_OP_CREATE_K 0x0002 - -struct mac_label_journal { - struct label *l; - int type; - int ops; - - int kotype; /* Kernel Port */ - - TAILQ_ENTRY(mac_label_journal) link; -}; -TAILQ_HEAD(mac_label_journal_list_t, mac_label_journal); - -int mac_label_journal_add (struct label *, int); -int mac_label_journal_remove(struct label *); -struct mac_label_journal * - mac_label_journal_find (struct label *); -int mac_label_journal (struct label *, int, ...); -void mac_label_journal_replay(void); - - /* * MAC Framework global variables. */ extern struct mac_label_element_list_t mac_label_element_list; extern struct mac_label_element_list_t mac_static_label_element_list; -extern struct mac_label_journal_list_t mac_label_journal_list; extern struct mac_policy_list mac_policy_list; diff --git a/security/mac_iokit.c b/security/mac_iokit.c index 0207dbf20..0a68a6d41 100644 --- a/security/mac_iokit.c +++ b/security/mac_iokit.c @@ -92,6 +92,24 @@ mac_iokit_check_set_properties(kauth_cred_t cred, io_object_t registry_entry, io return (error); } +int +mac_iokit_check_filter_properties(kauth_cred_t cred, io_object_t registry_entry) +{ + int error; + + MAC_CHECK(iokit_check_filter_properties, cred, registry_entry); + return (error); +} + +int +mac_iokit_check_get_property(kauth_cred_t cred, io_object_t registry_entry, const char *name) +{ + int error; + + MAC_CHECK(iokit_check_get_property, cred, registry_entry, name); + return (error); +} + int mac_iokit_check_hid_control(kauth_cred_t cred) { diff --git a/security/mac_mach_internal.h b/security/mac_mach_internal.h index 2a98a1196..cf48cf3be 100644 --- a/security/mac_mach_internal.h +++ b/security/mac_mach_internal.h @@ -58,12 +58,6 @@ #warning "MAC policy is not KPI, see Technical Q&A QA1574, this header will be removed in next version" #endif -int mac_task_check_service(task_t self, task_t obj, const char *perm); -void mac_task_label_update_internal(struct label *pl, struct task *t); -int mac_port_label_compute(struct label *subj, struct label *obj, - const char *serv, struct label *out); -int mac_port_check_method(task_t task, struct label *sub, struct label *obj, int msgid); - /* mac_do_machexc() flags */ #define MAC_DOEXCF_TRACED 0x01 /* Only do mach exeception if being ptrace()'ed */ @@ -77,49 +71,6 @@ struct label *mac_thread_get_uthreadlabel(struct uthread *uthread); void mac_policy_init(void); void mac_policy_initmach(void); -/* tasks */ -void mac_task_label_init(struct label *); -void mac_task_label_copy(struct label *src, struct label *dest); -void mac_task_label_destroy(struct label *); -void mac_task_label_associate(struct task *, struct task *, struct label *, - struct label *, struct label *); -void mac_task_label_associate_kernel(struct task *, struct label *, struct label *); -void mac_task_label_modify( struct task *pt, void *arg, - void (*f)(struct label *l, void *arg)); -struct label *mac_task_get_label(struct task *task); - -/* ports */ -void mac_port_label_init(struct label *l); -void mac_port_label_destroy(struct label *l); -void mac_port_label_associate(struct label *it, struct label *st, struct label *plabel); -void mac_port_label_associate_kernel(struct label *plabel, int isreply); -void mac_port_label_update_kobject(struct label *plabel, int kotype); -void mac_port_label_copy(struct label *src, struct label *dest); -void mac_port_label_update_cred(struct label *src, struct label *dest); -int mac_port_check_label_update(struct label *task, struct label *oldl, struct label *newl); - -int mac_port_check_send(struct label *task, struct label *port); -int mac_port_check_receive(struct label *task, struct label *sender); -int mac_port_check_make_send(struct label *task, struct label *port); -int mac_port_check_make_send_once(struct label *task, struct label *port); -int mac_port_check_move_receive(struct label *task, struct label *port); -int mac_port_check_copy_send(struct label *task, struct label *port); -int mac_port_check_move_send(struct label *task, struct label *port); -int mac_port_check_move_send_once(struct label *task, struct label *port); - -int mac_port_check_hold_send(struct label *task, struct label *port); -int mac_port_check_hold_send_once(struct label *task, struct label *port); -int mac_port_check_hold_receive(struct label *task, struct label *port); - -int mac_task_label_externalize(struct label *, char *e, char *out, size_t olen, int flags); -int mac_task_label_internalize(struct label *label, char *string); -int mac_port_label_externalize(struct label *, char *e, char *out, size_t olen, int flags); -int mac_port_label_internalize(struct label *label, char *string); - -void mac_task_label_update(struct label *cred, struct label *task); -int mac_port_check_service(struct label *subj, struct label *obj, - const char *serv, const char *perm); - /* threads */ void act_set_astmacf(struct thread *); void mac_thread_userret(struct thread *); diff --git a/security/mac_policy.h b/security/mac_policy.h index cf90fbaa6..31507419b 100644 --- a/security/mac_policy.h +++ b/security/mac_policy.h @@ -298,6 +298,8 @@ typedef int mpo_bpfdesc_check_receive_t( @brief Indicate desire to change the process label at exec time @param old Existing subject credential @param vp File being executed + @param offset Offset of binary within file being executed + @param scriptvp Script being executed by interpreter, if any. @param vnodelabel Label corresponding to vp @param scriptvnodelabel Script vnode label @param execlabel Userspace provided execution label @@ -337,6 +339,7 @@ typedef int mpo_bpfdesc_check_receive_t( typedef int mpo_cred_check_label_update_execve_t( kauth_cred_t old, struct vnode *vp, + off_t offset, struct vnode *scriptvp, struct label *vnodelabel, struct label *scriptvnodelabel, @@ -532,9 +535,12 @@ typedef int mpo_cred_label_internalize_t( @param new_cred New subject credential to be labeled @param p Object process. @param vp File being executed + @param offset Offset of binary within file being executed + @param scriptvp Script being executed by interpreter, if any. @param vnodelabel Label corresponding to vp @param scriptvnodelabel Script vnode label @param execlabel Userspace provided execution label + @param csflags Code signing flags to be set after exec @param macpolicyattr MAC policy-specific spawn attribute data. @param macpolicyattrlen Length of policy-specific spawn attribute data. @see mac_execve @@ -562,16 +568,20 @@ typedef int mpo_cred_label_internalize_t( The vnode lock is held during this operation. No changes should be made to the old credential structure. + @return 0 on success, Otherwise, return non-zero if update results in + termination of child. */ -typedef void mpo_cred_label_update_execve_t( +typedef int mpo_cred_label_update_execve_t( kauth_cred_t old_cred, kauth_cred_t new_cred, struct proc *p, struct vnode *vp, + off_t offset, struct vnode *scriptvp, struct label *vnodelabel, struct label *scriptvnodelabel, struct label *execlabel, + u_int *csflags, void *macpolicyattr, size_t macpolicyattrlen, int *disjointp @@ -1253,7 +1263,7 @@ typedef int mpo_iokit_check_open_t( /** @brief Access control check for setting I/O Kit device properties @param cred Subject credential - @param registry_entry Target device + @param entry Target device @param properties Property list Determine whether the subject identified by the credential can set @@ -1267,6 +1277,50 @@ typedef int mpo_iokit_check_set_properties_t( io_object_t entry, io_object_t properties ); +/** + @brief Indicate desire to filter I/O Kit devices properties + @param cred Subject credential + @param entry Target device + @see mpo_iokit_check_get_property_t + + Indicate whether this policy may restrict the subject credential + from reading properties of the target device. + If a policy returns success from this entry point, the + mpo_iokit_check_get_property entry point will later be called + for each property that the subject credential tries to read from + the target device. + + This entry point is primarilly to optimize bulk property reads + by skipping calls to the mpo_iokit_check_get_property entry point + for credentials / devices no MAC policy is interested in. + + @warning Even if a policy returns 0, it should behave correctly in + the presence of an invocation of mpo_iokit_check_get_property, as that + call may happen as a result of another policy requesting a transition. + + @return Non-zero if a transition is required, 0 otherwise. + */ +typedef int mpo_iokit_check_filter_properties_t( + kauth_cred_t cred, + io_object_t entry +); +/** + @brief Access control check for getting I/O Kit device properties + @param cred Subject credential + @param entry Target device + @param name Property name + + Determine whether the subject identified by the credential can get + properties on an I/O Kit device. + + @return Return 0 if access is granted, or an appropriate value for + errno. +*/ +typedef int mpo_iokit_check_get_property_t( + kauth_cred_t cred, + io_object_t entry, + const char *name +); /** @brief Access control check for software HID control @param cred Subject credential @@ -2337,407 +2391,6 @@ typedef int mpo_policy_syscall_t( int call, user_addr_t arg ); -/** - @brief Access control check for copying a send right to another task - @param task Label of the sender task - @param port Label of the affected port - - Access control check for copying send rights to the port from the - specified task. A complementary entry point, mpo_port_check_hold_send, - handles the receiving task. port_check_copy_send is called as part of - a group of policy invocations when messages with port rights are sent. - All access control checks made for a particular message must be successful - for the message to be sent. - - The task label and the port are locked. Sleeping is permitted. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_copy_send_t( - struct label *task, - struct label *port -); -/** - @brief Access control check for obtaining a receive right - @param task Label of the receiving task - @param port Label of the affected port - - Access control check for a task obtaining receive rights to a - port. Usually, these are port rights that were obtained with a call - to mach_port_allocate. This entry point is called as part of a - group of policy invocations when messages with port rights are - received. All of these access control checks must succeed in order - to receive the message. - - The task label and the port are locked. Sleeping is permitted. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_hold_receive_t( - struct label *task, - struct label *port -); -/** - @brief Access control check for obtaining a send once right - @param task Label of the receiving task - @param port Label of the affected port - - Access control check for a task obtaining send once rights to a port. Usually, - these are port rights that were part of a message sent by another userspace - task. port_check_hold_send_once is called as part of a group of policy - invocations when messages with port rights are received. All of these access - control checks must succeed in order to receive the message. - - The task label and the port are locked. Sleeping is permitted. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_hold_send_once_t( - struct label *task, - struct label *port -); -/** - @brief Access control check for obtaining a send right - @param task Label of the receiving task - @param port Label of the affected port - - Access control check for a task obtaining send rights to a port. Usually, - these are port rights that were part of a message sent by another userspace - task. port_check_hold_send is called as part of a group of policy - invocations when messages with port rights are received. All of these access - control checks must succeed in order to receive the message. - - The task label and the port are locked. Sleeping is permitted. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_hold_send_t( - struct label *task, - struct label *port -); -/** - @brief Access control check for relabelling ports - @param task Subject's task label - @param oldlabel Original label of port - @param newlabel New label for port - - Access control check for relabelling ports. The policy should - indicate whether the subject is permitted to change the label - of a port from oldlabel to newlabel. The port is locked, but - the subject's task label is not locked. - - @warning XXX In future releases, the task label lock will likely - also be held. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_label_update_t( - struct label *task, - struct label *oldlabel, - struct label *newlabel -); -/** - @brief Access control check for producing a send once right from a receive right - @param task Label of the sender task - @param port Label of the affected port - - Access control check for obtaining send once rights from receive rights. - The new send once right may be destined for the calling task, or a different - task. In either case the mpo_port_check_hold_send_once entry point handles - the receiving task. port_check_make_send_once may be called as part of a - group of policy invocations when messages with port rights are sent. - All access control checks made for a particular message must be successful - for the message to be sent. - - The task label and the port are locked. Sleeping is permitted. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_make_send_once_t( - struct label *task, - struct label *port -); -/** - @brief Access control check for producing a send right from a receive right - @param task Label of the sender task - @param port Label of the affected port - - Access control check for obtaining send rights from receive rights. The new - send right may be destined for the calling task, or a different task. - In either case the mpo_port_check_hold_send entry point - handles the receiving task. port_check_make_send may be called as part of - a group of policy invocations when messages with port rights are sent. - All access control checks made for a particular message must be successful - for the message to be sent. - - The task label and the port are locked. Sleeping is permitted. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_make_send_t( - struct label *task, - struct label *port -); -/** - @brief Compute access control check for a Mach message-based service - @param proc Sender's process structure (may be NULL) - @param task Sender's task label - @param port Destination port label - @param msgid Message id - - Access control computation for message-based services. This entry point - computes permission to the service requested by the specified port and message - id, for example a single MiG server routine, and is unrelated to the access - check for sending messages to ports (but that check must succeed for the - message to be sent to the destination). The result of this access computation - is stored in the message trailer field msgh_ad (only if requested by the - recipient); it does not actually inhibit the message from being sent or - received. - - @return 0 for access granted, nonzero for access denied. -*/ - -typedef int mpo_port_check_method_t( - struct proc *proc, - struct label *task, - struct label *port, - int msgid -); -/** - @brief Access control check for transferring a receive right - @param task Label of the sender task - @param port Label of the affected port - - Access control check for transferring the receive right to a port out - of the specified task. A complementary entry point, - mpo_port_check_hold_receive, handles the receiving task. - port_check_move_receive is called as part of - a group of policy invocations when messages with port rights are sent. - All access control checks made for a particular message must be successful - for the message to be sent. - - The task label and the port are locked. Sleeping is permitted. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_move_receive_t( - struct label *task, - struct label *port -); -/** - @brief Access control check for transferring a send once right - @param task Label of the sender task - @param port Label of the affected port - - Access control check for transferring a send once right from one task to - the task listening to the specified port. A complementary entry point, - mpo_port_check_hold_send_once, handles the receiving task. - port_check_move_send_once is called as part of a group of policy invocations - when messages with port rights are sent. All access control checks made - for a particular message must be successful for the message to be sent. - - The task label and the port are locked. Sleeping is permitted. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_move_send_once_t( - struct label *task, - struct label *port -); -/** - @brief Access control check for transferring a send right - @param task Label of the sender task - @param port Label of the affected port - - Access control check for transferring a send right from one task to the - task listening to the specified port. A complementary entry point, - mpo_port_check_hold_send, handles the receiving task. - port_check_move_send is called as part of a group of policy invocations - when messages with port rights are sent. All access control checks made - for a particular message must be successful for the message to be sent. - - The task label and the port are locked. Sleeping is permitted. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_move_send_t( - struct label *task, - struct label *port -); -/** - @brief Access control check for receiving Mach messsages - @param task Label of the receiving task - @param sender Label of the sending task - - Access control check for receiving messages. The two labels are locked. - - @warning This entry point can be invoked from many places inside the - kernel, with arbitrary other locks held. The implementation of this - entry point must not cause page faults, as those are handled by mach - messages. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_receive_t( - struct label *task, - struct label *sender -); -/** - @brief Access control check for sending Mach messsages - @param task Label of the sender task - @param port Label of the destination port - - Access control check for sending messages. The task label and the - port are locked. - - @warning This entry point can be invoked from many places inside the - kernel, with arbitrary other locks held. The implementation of this - entry point must not cause page faults, as those are handled by mach - messages. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_send_t( - struct label *task, - struct label *port -); -/** - @brief Generic access control check - @param subj Caller-provided subject label - @param obj Caller-provided object label - @param serv Service or object class name - @param perm Permission, or method, within the specified service - - This function provides a general way for a user process to query - an arbitrary access control decision from the system's security policies. - Currently, there are no standards for the format of the service and - permission names. Labels may be either cred or port labels; the policy - must accept either. The userspace interfaces to this entry point allow - label strings or label handles (ports) to be provided. - - @return Return 0 if access is granted, non-zero otherwise. -*/ -typedef int mpo_port_check_service_t( - struct label *subj, - struct label *obj, - const char *serv, - const char *perm -); -/** - @brief Assign a label to a new Mach port created by the kernel - @param portlabel Label for the new port - @param isreply True if the port is for a reply message from the kernel - - Assign a label to a new port created by the kernel. If the port is being - used to reply to a message, isreply is 1 (0 otherwise). The port is locked. -*/ -typedef void mpo_port_label_associate_kernel_t( - struct label *portlabel, - int isreply -); -/** - @brief Assign a label to a new Mach port - @param it Task label of issuer - @param st Task label of target - @param portlabel Label for the new port - - Assign a label to a new port. The policy can base this label on - the label of the calling task, as well as the label of the target task. - The target task is the one which recieves the first right for this port. - Both task labels and the port are locked. -*/ -typedef void mpo_port_label_associate_t( - struct label *it, - struct label *st, - struct label *portlabel -); -/** - @brief Request label for new (userspace) object - @param subj Subject label - @param obj Parent or existing object label - @param serv Name of service - @param out Computed label - - Ask the loaded policies to compute a label based on the two input labels - and the service name. There is currently no standard for the service name, - or even what the input labels represent (Subject and parent object are only - a suggestion). If successful, the computed label is stored in out. All labels - must be port (or task) labels. The userspace interfaces to this entry point - allow label handles (ports) to be provided. - - @return 0 on success, or an errno value for failure. -*/ -typedef int mpo_port_label_compute_t( - struct label *subj, - struct label *obj, - const char *serv, - struct label *out -); -/** - @brief Copy a Mach port label - @param src Source port label - @param dest Destination port label - - Copy the Mach port label information from src to dest. This is used - to copy user-suplied labels into an existing port. -*/ -typedef void mpo_port_label_copy_t( - struct label *src, - struct label *dest -); -/** - @brief Destroy Mach port label - @param label The label to be destroyed - - Destroy a Mach port label. Since the object is going out of - scope, policy modules should free any internal storage associated - with the label so that it may be destroyed. -*/ -typedef void mpo_port_label_destroy_t( - struct label *label -); -/** - @brief Initialize Mach port label - @param label New label to initialize - - Initialize the label for a newly instantiated Mach port. Sleeping - is permitted. -*/ -typedef void mpo_port_label_init_t( - struct label *label -); -/** - @brief Update a Mach task port label - @param cred User credential label to be used as the source - @param task Mach port label to be used as the destination - @see mpo_cred_label_update_t - @see mpo_cred_label_update_execve_t - - Update the label on a Mach task port, using the supplied user - credential label. When a mac_cred_label_update_execve or a mac_cred_label_update - operation causes the label on a user credential to change, the Mach - task port label also needs to be updated to reflect the change. - Both labels are already valid (initialized and created). -*/ -typedef void mpo_port_label_update_cred_t( - struct label *cred, - struct label *task -); -/** - @brief Assign a label to a Mach port connected to a kernel object - @param portlabel Label for the port - @param kotype Type of kernel object - - Label a kernel port based on the type of object behind it. The - kotype parameter is one of the IKOT constants in - . The port already has a valid label from either - mpo_port_label_associate_kernel, or because it is a task port and has a label - derived from the process and task labels. The port is locked. -*/ -typedef void mpo_port_label_update_kobject_t( - struct label *portlabel, - int kotype -); /** @brief Access control check for POSIX semaphore create @param cred Subject credential @@ -4147,11 +3800,11 @@ typedef int mpo_system_check_swapon_t( /** @brief Access control check for sysctl @param cred Subject credential + @param namestring String representation of sysctl name. @param name Integer name; see sysctl(3) @param namelen Length of name array of integers; see sysctl(3) @param old 0 or address where to store old value; see sysctl(3) - @param oldlenp Pointer to length of old buffer; see sysctl(3) - @param inkernel Boolean; 1 if called from kernel + @param oldlen Length of old buffer; see sysctl(3) @param newvalue 0 or address of new value; see sysctl(3) @param newlen Length of new buffer; see sysctl(3) @@ -4165,13 +3818,13 @@ typedef int mpo_system_check_swapon_t( @return Return 0 if access is granted, otherwise an appropriate value for errno should be returned. */ -typedef int mpo_system_check_sysctl_t( +typedef int mpo_system_check_sysctlbyname_t( kauth_cred_t cred, + const char *namestring, int *name, u_int namelen, user_addr_t old, /* NULLOK */ - user_addr_t oldlenp, /* NULLOK */ - int inkernel, + size_t oldlen, user_addr_t newvalue, /* NULLOK */ size_t newlen ); @@ -4681,6 +4334,28 @@ typedef int mpo_proc_check_get_task_t( kauth_cred_t cred, struct proc *p ); + +/** + @brief Check whether task's IPC may inherit across process exec + @param proc current process instance + @param cur_vp vnode pointer to current instance + @param cur_offset offset of binary of currently executing image + @param img_vp vnode pointer to to be exec'ed image + @param img_offset offset into file which is selected for execution + @param scriptvp vnode pointer of script file if any. + @return Return 0 if access is granted. + EPERM if parent does not have any entitlements. + EACCESS if mismatch in entitlements +*/ +typedef int mpo_proc_check_inherit_ipc_ports_t( + struct proc *p, + struct vnode *cur_vp, + off_t cur_offset, + struct vnode *img_vp, + off_t img_offset, + struct vnode *scriptvp +); + /** @brief Privilege check for a process to run invalid @param proc Object process @@ -4695,143 +4370,6 @@ typedef int mac_proc_check_run_cs_invalid_t( struct proc *p ); - -/** - @brief Assign a label to a new kernelspace Mach task - @param kproc New task - @param tasklabel Label for new task - @param portlabel Label for new task port - @see mpo_cred_label_associate_kernel_t - - Assign labels to a new kernel task and its task port. Both the task and - task port labels should be specified. Both new labels are initialized. - If there is an associated BSD process structure, it will be labelled - with calls to mpo_cred_label_associate_kernel. -*/ -typedef void mpo_task_label_associate_kernel_t( - struct task *kproc, - struct label *tasklabel, - struct label *portlabel -); -/** - @brief Assign a label to a new (userspace) Mach task - @param parent Parent task - @param child New (child) task - @param parentlabel Label of parent task - @param childlabel Label for new task - @param childportlabel Label for new task's task port - - Assign labels to a new task and its task port. Both the task and task port - labels should be specified. Both new labels are initialized. If the task - will have an associated BSD process, that information will be made available - by the task_label_update and port_label_update_cred entry points. -*/ -typedef void mpo_task_label_associate_t( - struct task *parent, - struct task *child, - struct label *parentlabel, - struct label *childlabel, - struct label *childportlabel -); -/** - @brief Copy a Mach task label - @param src Source task label - @param dest Destination task label - - Copy the Mach task label information from src to dest. This is used - when duplicating label handles to implement copy-on-write semantics. -*/ -typedef void mpo_task_label_copy_t( - struct label *src, - struct label *dest -); -/** - @brief Destroy Mach task label - @param label The label to be destroyed - - Destroy a Mach task label. Since the object is going out of - scope, policy modules should free any internal storage associated - with the label so that it may be destroyed. -*/ -typedef void mpo_task_label_destroy_t( - struct label *label -); -/** - @brief Externalize a task label - @param label Label to be externalized - @param element_name Name of the label namespace for which labels should be - externalized - @param sb String buffer to be filled with a text representation of the label - - Produce an external representation of the label on a task. An - externalized label consists of a text representation of the label - contents that can be used with user applications. Policy-agnostic - user space tools will display this externalized version. - - @return 0 on success, return non-zero if an error occurs while - externalizing the label data. - -*/ -typedef int mpo_task_label_externalize_t( - struct label *label, - char *element_name, - struct sbuf *sb -); -/** - @brief Initialize Mach task label - @param label New label to initialize - - Initialize the label for a newly instantiated Mach task. Sleeping - is permitted. -*/ -typedef void mpo_task_label_init_t( - struct label *label -); -/** - @brief Internalize a task label - @param label Label to be internalized - @param element_name Name of the label namespace for which the label should - be internalized - @param element_data Text data to be internalized - - Produce a task label from an external representation. An - externalized label consists of a text representation of the label - contents that can be used with user applications. Policy-agnostic - user space tools will forward text version to the kernel for - processing by individual policy modules. - - The policy's internalize entry points will be called only if the - policy has registered interest in the label namespace. - - @return 0 on success, Otherwise, return non-zero if an error occurs - while internalizing the label data. - -*/ -typedef int mpo_task_label_internalize_t( - struct label *label, - char *element_name, - char *element_data -); -/** - @brief Update a Mach task label - @param cred User credential label to be used as the source - @param task Mach task label to be used as the destination - @see mpo_cred_label_update_t - @see mpo_cred_label_update_execve_t - - Update the label on a Mach task, using the supplied user credential - label. When a mac_cred_label_update_execve or a mac_cred_label_update operation - causes the label on a user credential to change, the Mach task label - also needs to be updated to reflect the change. Both labels are - already valid (initialized and created). - - @warning XXX We may change the name of this entry point in a future - version of the MAC framework. -*/ -typedef void mpo_task_label_update_t( - struct label *cred, - struct label *task -); /** @brief Perform MAC-related events when a thread returns to user space @param thread Mach (not BSD) thread that is returning @@ -4995,7 +4533,9 @@ typedef int mpo_vnode_check_exchangedata_t( @brief Access control check for executing the vnode @param cred Subject credential @param vp Object vnode to execute - @param label Policy label for vp + @param scriptvp Script being executed by interpreter, if any. + @param vnodelabel Label corresponding to vp + @param scriptvnodelabel Script vnode label @param execlabel Userspace provided execution label @param cnp Component name for file being executed @param macpolicyattr MAC policy-specific spawn attribute data. @@ -5017,7 +4557,9 @@ typedef int mpo_vnode_check_exchangedata_t( typedef int mpo_vnode_check_exec_t( kauth_cred_t cred, struct vnode *vp, - struct label *label, + struct vnode *scriptvp, + struct label *vnodelabel, + struct label *scriptlabel, struct label *execlabel, /* NULLOK */ struct componentname *cnp, u_int *csflags, @@ -5045,8 +4587,9 @@ typedef int mpo_vnode_check_fsgetpath_t( @brief Access control check after determining the code directory hash */ typedef int mpo_vnode_check_signature_t(struct vnode *vp, struct label *label, - off_t macho_offset, unsigned char *sha1, void *signature, - int size); + off_t macho_offset, unsigned char *sha1, + const void *signature, int size, + int *is_platform_binary); /** @brief Access control check for retrieving file attributes @@ -5303,6 +4846,39 @@ typedef int mpo_vnode_check_readlink_t( struct vnode *vp, struct label *label ); +/** + @brief Access control check for rename + @param cred Subject credential + @param dvp Directory vnode + @param dlabel Policy label associated with dvp + @param vp vnode to be renamed + @param label Policy label associated with vp + @param cnp Component name for vp + @param tdvp Destination directory vnode + @param tdlabel Policy label associated with tdvp + @param tvp Overwritten vnode + @param tlabel Policy label associated with tvp + @param tcnp Destination component name + + Determine whether the subject identified by the credential should be allowed + to rename the vnode vp to something else. + + @return Return 0 if access is granted, otherwise an appropriate value for + errno should be returned. +*/ +typedef int mpo_vnode_check_rename_t( + kauth_cred_t cred, + struct vnode *dvp, + struct label *dlabel, + struct vnode *vp, + struct label *label, + struct componentname *cnp, + struct vnode *tdvp, + struct label *tdlabel, + struct vnode *tvp, + struct label *tlabel, + struct componentname *tcnp +); /** @brief Access control check for rename from @param cred Subject credential @@ -5311,6 +4887,7 @@ typedef int mpo_vnode_check_readlink_t( @param vp vnode to be renamed @param label Policy label associated with vp @param cnp Component name for vp + @see mpo_vnode_check_rename_t @see mpo_vnode_check_rename_to_t Determine whether the subject identified by the credential should be @@ -5320,6 +4897,8 @@ typedef int mpo_vnode_check_readlink_t( held during this entry point), the vnode relabel checks had to be split into two parts: relabel_from and relabel to. + This hook is deprecated, mpo_vnode_check_rename_t should be used instead. + @return Return 0 if access is granted, otherwise an appropriate value for errno should be returned. */ @@ -5340,6 +4919,7 @@ typedef int mpo_vnode_check_rename_from_t( @param label Policy label associated with vp @param samedir Boolean; 1 if the source and destination directories are the same @param cnp Destination component name + @see mpo_vnode_check_rename_t @see mpo_vnode_check_rename_from_t Determine whether the subject identified by the credential should be @@ -5351,6 +4931,8 @@ typedef int mpo_vnode_check_rename_from_t( held during this entry point), the vnode relabel checks had to be split into two parts: relabel_from and relabel to. + This hook is deprecated, mpo_vnode_check_rename_t should be used instead. + @return Return 0 if access is granted, otherwise an appropriate value for errno should be returned. */ @@ -6230,7 +5812,7 @@ typedef void mpo_reserved_hook_t(void); * Please note that this should be kept in sync with the check assumptions * policy in bsd/kern/policy_check.c (policy_ops struct). */ -#define MAC_POLICY_OPS_VERSION 24 /* inc when new reserved slots are taken */ +#define MAC_POLICY_OPS_VERSION 31 /* inc when new reserved slots are taken */ struct mac_policy_ops { mpo_audit_check_postselect_t *mpo_audit_check_postselect; mpo_audit_check_preselect_t *mpo_audit_check_preselect; @@ -6365,28 +5947,28 @@ struct mac_policy_ops { mpo_policy_initbsd_t *mpo_policy_initbsd; mpo_policy_syscall_t *mpo_policy_syscall; - mpo_port_check_copy_send_t *mpo_port_check_copy_send; - mpo_port_check_hold_receive_t *mpo_port_check_hold_receive; - mpo_port_check_hold_send_once_t *mpo_port_check_hold_send_once; - mpo_port_check_hold_send_t *mpo_port_check_hold_send; - mpo_port_check_label_update_t *mpo_port_check_label_update; - mpo_port_check_make_send_once_t *mpo_port_check_make_send_once; - mpo_port_check_make_send_t *mpo_port_check_make_send; - mpo_port_check_method_t *mpo_port_check_method; - mpo_port_check_move_receive_t *mpo_port_check_move_receive; - mpo_port_check_move_send_once_t *mpo_port_check_move_send_once; - mpo_port_check_move_send_t *mpo_port_check_move_send; - mpo_port_check_receive_t *mpo_port_check_receive; - mpo_port_check_send_t *mpo_port_check_send; - mpo_port_check_service_t *mpo_port_check_service; - mpo_port_label_associate_kernel_t *mpo_port_label_associate_kernel; - mpo_port_label_associate_t *mpo_port_label_associate; - mpo_port_label_compute_t *mpo_port_label_compute; - mpo_port_label_copy_t *mpo_port_label_copy; - mpo_port_label_destroy_t *mpo_port_label_destroy; - mpo_port_label_init_t *mpo_port_label_init; - mpo_port_label_update_cred_t *mpo_port_label_update_cred; - mpo_port_label_update_kobject_t *mpo_port_label_update_kobject; + mpo_system_check_sysctlbyname_t *mpo_system_check_sysctlbyname; + mpo_proc_check_inherit_ipc_ports_t *mpo_proc_check_inherit_ipc_ports; + mpo_vnode_check_rename_t *mpo_vnode_check_rename; + mpo_reserved_hook_t *mpo_reserved4; + mpo_reserved_hook_t *mpo_reserved5; + mpo_reserved_hook_t *mpo_reserved6; + mpo_reserved_hook_t *mpo_reserved7; + mpo_reserved_hook_t *mpo_reserved8; + mpo_reserved_hook_t *mpo_reserved9; + mpo_reserved_hook_t *mpo_reserved10; + mpo_reserved_hook_t *mpo_reserved11; + mpo_reserved_hook_t *mpo_reserved12; + mpo_reserved_hook_t *mpo_reserved13; + mpo_reserved_hook_t *mpo_reserved14; + mpo_reserved_hook_t *mpo_reserved15; + mpo_reserved_hook_t *mpo_reserved16; + mpo_reserved_hook_t *mpo_reserved17; + mpo_reserved_hook_t *mpo_reserved18; + mpo_reserved_hook_t *mpo_reserved19; + mpo_reserved_hook_t *mpo_reserved20; + mpo_reserved_hook_t *mpo_reserved21; + mpo_reserved_hook_t *mpo_reserved22; mpo_posixsem_check_create_t *mpo_posixsem_check_create; mpo_posixsem_check_open_t *mpo_posixsem_check_open; @@ -6464,7 +6046,7 @@ struct mac_policy_ops { mpo_system_check_settime_t *mpo_system_check_settime; mpo_system_check_swapoff_t *mpo_system_check_swapoff; mpo_system_check_swapon_t *mpo_system_check_swapon; - mpo_system_check_sysctl_t *mpo_system_check_sysctl; + mpo_reserved_hook_t *mpo_reserved31; mpo_sysvmsg_label_associate_t *mpo_sysvmsg_label_associate; mpo_sysvmsg_label_destroy_t *mpo_sysvmsg_label_destroy; @@ -6497,14 +6079,14 @@ struct mac_policy_ops { mpo_sysvshm_label_init_t *mpo_sysvshm_label_init; mpo_sysvshm_label_recycle_t *mpo_sysvshm_label_recycle; - mpo_task_label_associate_kernel_t *mpo_task_label_associate_kernel; - mpo_task_label_associate_t *mpo_task_label_associate; - mpo_task_label_copy_t *mpo_task_label_copy; - mpo_task_label_destroy_t *mpo_task_label_destroy; - mpo_task_label_externalize_t *mpo_task_label_externalize; - mpo_task_label_init_t *mpo_task_label_init; - mpo_task_label_internalize_t *mpo_task_label_internalize; - mpo_task_label_update_t *mpo_task_label_update; + mpo_reserved_hook_t *mpo_reserved23; + mpo_reserved_hook_t *mpo_reserved24; + mpo_reserved_hook_t *mpo_reserved25; + mpo_reserved_hook_t *mpo_reserved26; + mpo_reserved_hook_t *mpo_reserved27; + mpo_reserved_hook_t *mpo_reserved28; + mpo_reserved_hook_t *mpo_reserved29; + mpo_reserved_hook_t *mpo_reserved30; mpo_iokit_check_hid_control_t *mpo_iokit_check_hid_control; @@ -6609,8 +6191,8 @@ struct mac_policy_ops { mpo_proc_check_proc_info_t *mpo_proc_check_proc_info; mpo_vnode_notify_link_t *mpo_vnode_notify_link; - mpo_reserved_hook_t *mpo_reserved28; - mpo_reserved_hook_t *mpo_reserved29; + mpo_iokit_check_filter_properties_t *mpo_iokit_check_filter_properties; + mpo_iokit_check_get_property_t *mpo_iokit_check_get_property; }; /** diff --git a/security/mac_port.c b/security/mac_port.c deleted file mode 100644 index 6dbeeb1e6..000000000 --- a/security/mac_port.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/*- - * Copyright (c) 2003, 2004 Networks Associates Technology, Inc. - * Copyright (c) 2005-2006 SPARTA, Inc. - * All rights reserved. - * - * This software was developed for the FreeBSD Project in part by Network - * Associates Laboratories, the Security Research Division of Network - * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), - * as part of the DARPA CHATS research program. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#include -#include -#include -#include - -void -mac_port_label_init(struct label *l) -{ - - mac_label_init(l); - if (mac_late == 0) { - mac_label_journal_add(l, MLJ_TYPE_PORT); - mac_label_journal(l, MLJ_PORT_OP_INIT); - } - MAC_PERFORM (port_label_init, l); -} - -void -mac_port_label_destroy(struct label *l) -{ - - MAC_PERFORM (port_label_destroy, l); - if (mac_late == 0) - mac_label_journal_remove(l); - mac_label_destroy(l); -} - -void -mac_port_label_copy(struct label *src, struct label *dest) -{ - - MAC_PERFORM(port_label_copy, src, dest); -} - -void -mac_port_label_update_cred(struct label *src, struct label *dest) -{ - - MAC_PERFORM(port_label_update_cred, src, dest); -} - -void -mac_port_label_associate(struct label *it, struct label *st, struct label *port) -{ - - if (mac_late == 0) - mac_label_journal(port, MLJ_PORT_OP_CREATE); - MAC_PERFORM(port_label_associate, it, st, port); -} - -void -mac_port_label_associate_kernel(struct label *port, int isreply) -{ - - if (mac_late == 0) - mac_label_journal(port, MLJ_PORT_OP_CREATE_K); - MAC_PERFORM(port_label_associate_kernel, port, isreply); -} - -void -mac_port_label_update_kobject(struct label *port, int kotype) -{ - - if (mac_late == 0) - mac_label_journal(port, MLJ_PORT_OP_UPDATE, kotype); - MAC_PERFORM(port_label_update_kobject, port, kotype); -} - -int -mac_port_label_internalize(struct label *label, char *string) -{ - int error; - - /* XXX - should have mpo_port_label_internalize */ - error = MAC_INTERNALIZE(cred, label, string); - - return (error); -} - -int -mac_port_label_externalize(struct label *label, char *elements, - char *outbuf, size_t outbuflen, int flags __unused) -{ - int error; - - /* XXX - should have mpo_port_label_externalize */ - error = MAC_EXTERNALIZE(cred, label, elements, outbuf, outbuflen); - - return (error); -} - -int -mac_port_check_label_update(struct label *task, struct label *old, - struct label *newlabel) -{ - int error; - - MAC_CHECK(port_check_label_update, task, old, newlabel); - - return (error); -} - -int -mac_port_check_send(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_send, task, port); - - return (error); -} - -int -mac_port_check_receive(struct label *task, struct label *sender) -{ - int error; - - MAC_CHECK(port_check_receive, task, sender); - - return (error); -} - -int -mac_port_check_make_send(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_make_send, task, port); - - return (error); -} - -int -mac_port_check_make_send_once(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_make_send_once, task, port); - - return (error); -} - -int -mac_port_check_copy_send(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_copy_send, task, port); - - return (error); -} - -int -mac_port_check_move_send(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_move_send, task, port); - - return (error); -} - -int -mac_port_check_move_send_once(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_move_send_once, task, port); - - return (error); -} - -int -mac_port_check_move_receive(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_move_receive, task, port); - - return (error); -} - -int -mac_port_check_hold_send(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_hold_send, task, port); - - return (error); -} - -int -mac_port_check_hold_send_once(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_hold_send_once, task, port); - - return (error); -} - -int -mac_port_check_hold_receive(struct label *task, struct label *port) -{ - int error; - - MAC_CHECK(port_check_hold_receive, task, port); - - return (error); -} - -int -mac_port_check_method(task_t task, struct label *sub, struct label *obj, int msgid) -{ - int error; - - MAC_CHECK(port_check_method, get_bsdtask_info(task), sub, obj, msgid); - - return (error); -} diff --git a/security/mac_process.c b/security/mac_process.c index a53f25c50..929d8107c 100644 --- a/security/mac_process.c +++ b/security/mac_process.c @@ -363,6 +363,16 @@ mac_proc_check_get_task(struct ucred *cred, struct proc *p) return (error); } +int +mac_proc_check_inherit_ipc_ports(struct proc *p, struct vnode *cur_vp, off_t cur_offset, struct vnode *img_vp, off_t img_offset, struct vnode *scriptvp) +{ + int error; + + MAC_CHECK(proc_check_inherit_ipc_ports, p, cur_vp, cur_offset, img_vp, img_offset, scriptvp); + + return (error); +} + /* * The type of maxprot in proc_check_map_anon must be equivalent to vm_prot_t * (defined in ). mac_policy.h does not include any header diff --git a/security/mac_system.c b/security/mac_system.c index 621d24244..0ccb63401 100644 --- a/security/mac_system.c +++ b/security/mac_system.c @@ -188,21 +188,18 @@ mac_system_check_swapoff(kauth_cred_t cred, struct vnode *vp) } int -mac_system_check_sysctl(kauth_cred_t cred, int *name, u_int namelen, - user_addr_t old, user_addr_t oldlenp, int inkernel, user_addr_t new, size_t newlen) +mac_system_check_sysctlbyname(kauth_cred_t cred, const char *namestring, int *name, + u_int namelen, user_addr_t oldctl, size_t oldlen, + user_addr_t newctl, size_t newlen) { int error; - - /* - * XXXMAC: We're very much like to assert the SYSCTL_LOCK here, - * but since it's not exported from kern_sysctl.c, we can't. - */ + if (!mac_system_enforce) return (0); - MAC_CHECK(system_check_sysctl, cred, name, namelen, old, oldlenp, - inkernel, new, newlen); - + MAC_CHECK(system_check_sysctlbyname, cred, namestring, + name, namelen, oldctl, oldlen, newctl, newlen); + return (error); } diff --git a/security/mac_task.c b/security/mac_task.c deleted file mode 100644 index 917cdef48..000000000 --- a/security/mac_task.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/*- - * Copyright (c) 2003, 2004 Networks Associates Technology, Inc. - * Copyright (c) 2005 SPARTA, Inc. - * - * This software was developed for the FreeBSD Project in part by Network - * Associates Laboratories, the Security Research Division of Network - * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), - * as part of the DARPA CHATS research program. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#include -#include - -void -mac_task_label_init(struct label *label) -{ - - mac_label_init(label); - if (mac_late == 0) { - mac_label_journal_add(label, MLJ_TYPE_TASK); - mac_label_journal(label, MLJ_TASK_OP_INIT); - } - MAC_PERFORM(task_label_init, label); -} - -void -mac_task_label_update(struct label *cred, struct label *task) -{ - - MAC_PERFORM(task_label_update, cred, task); -} - -void -mac_task_label_copy(struct label *src, struct label *dest) -{ - - MAC_PERFORM(task_label_copy, src, dest); -} - -void -mac_task_label_destroy(struct label *label) -{ - - MAC_PERFORM(task_label_destroy, label); - if (mac_late == 0) - mac_label_journal_remove(label); - mac_label_destroy(label); -} - -void -mac_task_label_associate(struct task *parent, struct task *child, struct label *pl, - struct label *chl, struct label *chportl) -{ - - MAC_PERFORM(task_label_associate, parent, child, pl, chl, chportl); -} - -void -mac_task_label_associate_kernel(struct task *t, struct label *tl, struct label *tportl) -{ - - if (mac_late == 0) - mac_label_journal(tl, MLJ_TASK_OP_CREATE_K); - MAC_PERFORM(task_label_associate_kernel, t, tl, tportl); -} - -int -mac_task_label_externalize(struct label *label, char *elements, - char *outbuf, size_t outbuflen, int flags __unused) -{ - int error = 0; - - error = MAC_EXTERNALIZE(task, label, elements, outbuf, outbuflen); - - return (error); -} - -int -mac_task_label_internalize(struct label *label, char *string) -{ - int error; - - error = MAC_INTERNALIZE(task, label, string); - - return (error); -} diff --git a/security/mac_vfs.c b/security/mac_vfs.c index cf54eafa9..b318e3b75 100644 --- a/security/mac_vfs.c +++ b/security/mac_vfs.c @@ -461,17 +461,18 @@ mac_vnode_label_store(vfs_context_t ctx, struct vnode *vp, return (error); } -int -mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode *vp, - struct vnode *scriptvp, struct label *scriptvnodelabel, struct label *execl, - void *macextensions) +void +mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode *vp, off_t offset, + struct vnode *scriptvp, struct label *scriptvnodelabel, struct label *execl, u_int *csflags, + void *macextensions, int *disjoint, int *labelupdateerror) { kauth_cred_t cred; - int disjoint = 0; + *disjoint = 0; + int error; posix_cred_t pcred = posix_cred_get(new); if (!mac_proc_enforce && !mac_vnode_enforce) - return disjoint; + return; /* mark the new cred to indicate "matching" includes the label */ pcred->cr_flags |= CRF_MAC_ENFORCE; @@ -479,7 +480,7 @@ mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode * cred = vfs_context_ucred(ctx); /* - * NB: Cannot use MAC_PERFORM macro because we need a sequence point after + * NB: Cannot use MAC_CHECK macro because we need a sequence point after * calling exec_spawnattr_getmacpolicyinfo() and before passing the * spawnattrlen as an argument to the hook. */ @@ -487,6 +488,7 @@ mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode * struct mac_policy_conf *mpc; u_int i; + error = 0; for (i = 0; i< mac_policy_list.staticmax; i++) { mpc = mac_policy_list.entries[i].mpc; if (mpc == NULL) @@ -499,8 +501,9 @@ mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode * size_t spawnattrlen = 0; void *spawnattr = exec_spawnattr_getmacpolicyinfo(macextensions, mpc->mpc_name, &spawnattrlen); - hook(cred, new, vfs_context_proc(ctx), vp, scriptvp, vp->v_label, - scriptvnodelabel, execl, spawnattr, spawnattrlen, &disjoint); + error = mac_error_select(hook(cred, new, vfs_context_proc(ctx), vp, offset, scriptvp, + vp->v_label, scriptvnodelabel, execl, csflags, spawnattr, spawnattrlen, disjoint), + error); } if (mac_policy_list_conditional_busy() != 0) { for (; i <= mac_policy_list.maxindex; i++) { @@ -515,18 +518,18 @@ mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode * size_t spawnattrlen = 0; void *spawnattr = exec_spawnattr_getmacpolicyinfo(macextensions, mpc->mpc_name, &spawnattrlen); - hook(cred, new, vfs_context_proc(ctx), vp, scriptvp, vp->v_label, - scriptvnodelabel, execl, spawnattr, spawnattrlen, &disjoint); + error = mac_error_select(hook(cred, new, vfs_context_proc(ctx), vp, offset, scriptvp, + vp->v_label, scriptvnodelabel, execl, csflags, spawnattr, spawnattrlen, disjoint), + error); } mac_policy_list_unbusy(); } } - - return (disjoint); + *labelupdateerror = error; } int -mac_cred_check_label_update_execve(vfs_context_t ctx, struct vnode *vp, +mac_cred_check_label_update_execve(vfs_context_t ctx, struct vnode *vp, off_t offset, struct vnode *scriptvp, struct label *scriptvnodelabel, struct label *execlabel, struct proc *p, void *macextensions) { @@ -559,7 +562,7 @@ mac_cred_check_label_update_execve(vfs_context_t ctx, struct vnode *vp, size_t spawnattrlen = 0; void *spawnattr = exec_spawnattr_getmacpolicyinfo(macextensions, mpc->mpc_name, &spawnattrlen); - result = result || hook(cred, vp, scriptvp, vp->v_label, scriptvnodelabel, execlabel, p, spawnattr, spawnattrlen); + result = result || hook(cred, vp, offset, scriptvp, vp->v_label, scriptvnodelabel, execlabel, p, spawnattr, spawnattrlen); } if (mac_policy_list_conditional_busy() != 0) { for (; i <= mac_policy_list.maxindex; i++) { @@ -574,7 +577,7 @@ mac_cred_check_label_update_execve(vfs_context_t ctx, struct vnode *vp, size_t spawnattrlen = 0; void *spawnattr = exec_spawnattr_getmacpolicyinfo(macextensions, mpc->mpc_name, &spawnattrlen); - result = result || hook(cred, vp, scriptvp, vp->v_label, scriptvnodelabel, execlabel, p, spawnattr, spawnattrlen); + result = result || hook(cred, vp, offset, scriptvp, vp->v_label, scriptvnodelabel, execlabel, p, spawnattr, spawnattrlen); } mac_policy_list_unbusy(); } @@ -782,13 +785,12 @@ mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp, continue; size_t spawnattrlen = 0; - void *spawnattr = (imgp != NULL) ? exec_spawnattr_getmacpolicyinfo(imgp->ip_px_smpx, mpc->mpc_name, &spawnattrlen) : NULL; + void *spawnattr = exec_spawnattr_getmacpolicyinfo(imgp->ip_px_smpx, mpc->mpc_name, &spawnattrlen); error = mac_error_select( - hook(cred, vp, vp->v_label, - (imgp != NULL) ? imgp->ip_execlabelp : NULL, - (imgp != NULL) ? &imgp->ip_ndp->ni_cnd : NULL, - (imgp != NULL) ? &imgp->ip_csflags : NULL, + hook(cred, + vp, imgp->ip_scriptvp, vp->v_label, imgp->ip_scriptlabelp, + imgp->ip_execlabelp, &imgp->ip_ndp->ni_cnd, &imgp->ip_csflags, spawnattr, spawnattrlen), error); } if (mac_policy_list_conditional_busy() != 0) { @@ -802,13 +804,12 @@ mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp, continue; size_t spawnattrlen = 0; - void *spawnattr = (imgp != NULL) ? exec_spawnattr_getmacpolicyinfo(imgp->ip_px_smpx, mpc->mpc_name, &spawnattrlen) : NULL; + void *spawnattr = exec_spawnattr_getmacpolicyinfo(imgp->ip_px_smpx, mpc->mpc_name, &spawnattrlen); error = mac_error_select( - hook(cred, vp, vp->v_label, - (imgp != NULL) ? imgp->ip_execlabelp : NULL, - (imgp != NULL) ? &imgp->ip_ndp->ni_cnd : NULL, - (imgp != NULL) ? &imgp->ip_csflags : NULL, + hook(cred, + vp, imgp->ip_scriptvp, vp->v_label, imgp->ip_scriptlabelp, + imgp->ip_execlabelp, &imgp->ip_ndp->ni_cnd, &imgp->ip_csflags, spawnattr, spawnattrlen), error); } mac_policy_list_unbusy(); @@ -836,14 +837,16 @@ mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp) int mac_vnode_check_signature(struct vnode *vp, off_t macho_offset, unsigned char *sha1, - void * signature, size_t size) + const void *signature, size_t size, + int *is_platform_binary) { int error; if (!mac_vnode_enforce || !mac_proc_enforce) return (0); - MAC_CHECK(vnode_check_signature, vp, vp->v_label, macho_offset, sha1, signature, size); + MAC_CHECK(vnode_check_signature, vp, vp->v_label, macho_offset, sha1, + signature, size, is_platform_binary); return (error); } @@ -1043,36 +1046,32 @@ mac_vnode_check_label_update(vfs_context_t ctx, struct vnode *vp, } int -mac_vnode_check_rename_from(vfs_context_t ctx, struct vnode *dvp, - struct vnode *vp, struct componentname *cnp) +mac_vnode_check_rename(vfs_context_t ctx, struct vnode *dvp, + struct vnode *vp, struct componentname *cnp, struct vnode *tdvp, + struct vnode *tvp, struct componentname *tcnp) { kauth_cred_t cred; int error; - if (!mac_vnode_enforce || + if (!mac_vnode_enforce || !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE)) return (0); cred = vfs_context_ucred(ctx); + MAC_CHECK(vnode_check_rename_from, cred, dvp, dvp->v_label, vp, vp->v_label, cnp); - return (error); -} - -int -mac_vnode_check_rename_to(vfs_context_t ctx, struct vnode *dvp, - struct vnode *vp, int samedir, struct componentname *cnp) -{ - kauth_cred_t cred; - int error; + if (error) + return (error); - if (!mac_vnode_enforce || - !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE)) - return (0); + MAC_CHECK(vnode_check_rename_to, cred, tdvp, tdvp->v_label, tvp, + tvp != NULL ? tvp->v_label : NULL, dvp == tdvp, tcnp); + if (error) + return (error); - cred = vfs_context_ucred(ctx); - MAC_CHECK(vnode_check_rename_to, cred, dvp, dvp->v_label, vp, - vp != NULL ? vp->v_label : NULL, samedir, cnp); + MAC_CHECK(vnode_check_rename, cred, dvp, dvp->v_label, vp, + vp->v_label, cnp, tdvp, tdvp->v_label, tvp, + tvp != NULL ? tvp->v_label : NULL, tcnp); return (error); } diff --git a/tools/lldbmacros/Makefile b/tools/lldbmacros/Makefile index 2e7b3aeed..db5e9dd55 100644 --- a/tools/lldbmacros/Makefile +++ b/tools/lldbmacros/Makefile @@ -12,7 +12,8 @@ include $(MakeInc_def) do_config_all:: lldbmacros_install LLDBMACROS_SOURCE:=$(SRCROOT)/tools/lldbmacros/ -LLDBMACROS_DEST:=$(OBJPATH)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros/ +LLDBMACROS_BOOTSTRAP_DEST:=$(OBJPATH)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR) +LLDBMACROS_DEST:=$(LLDBMACROS_BOOTSTRAP_DEST)/lldbmacros/ LLDBMACROS_PYTHON_FILES = \ core/standard.py \ @@ -26,13 +27,17 @@ LLDBMACROS_PYTHON_FILES = \ core/xnu_lldb_init.py \ plugins/__init__.py \ plugins/zprint_perf_log.py \ + atm.py \ + bank.py \ xnu.py \ xnudefines.py \ mbufdefines.py \ netdefines.py \ routedefines.py \ ipc.py \ + ipcimportancedetail.py \ scheduler.py \ + structanalyze.py \ pmap.py \ memory.py \ mbufs.py \ @@ -46,6 +51,12 @@ LLDBMACROS_PYTHON_FILES = \ misc.py \ apic.py +ifneq ($(PLATFORM),MacOSX) + LLDBMACROS_PYTHON_FILES+= \ + plugins/iosspeedtracer.py \ + plugins/iosspeedtracer.sh +endif + INSTALL_LLDBMACROS_PYTHON_FILES=$(addprefix $(LLDBMACROS_DEST), $(LLDBMACROS_PYTHON_FILES)) $(INSTALL_LLDBMACROS_PYTHON_FILES): $(LLDBMACROS_DEST)% : $(LLDBMACROS_SOURCE)% @@ -54,12 +65,12 @@ $(INSTALL_LLDBMACROS_PYTHON_FILES): $(LLDBMACROS_DEST)% : $(LLDBMACROS_SOURCE)% $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ $(_v)$(TOUCH) $(LLDBMACROS_DEST) -$(LLDBMACROS_DEST)/../mach_kernel.py: $(LLDBMACROS_SOURCE)/core/xnu_lldb_init.py +$(LLDBMACROS_BOOTSTRAP_DEST)/$(KERNEL_LLDBBOOTSTRAP_NAME): $(LLDBMACROS_SOURCE)/core/xnu_lldb_init.py $(_v)$(MKDIR) $(dir $@) $(_v)$(PYTHON) $(LLDBMACROS_SOURCE)/core/syntax_checker.py $< $(_vstdout) $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ -lldbmacros_install: $(INSTALL_LLDBMACROS_PYTHON_FILES) $(LLDBMACROS_DEST)/../mach_kernel.py +lldbmacros_install: $(INSTALL_LLDBMACROS_PYTHON_FILES) $(LLDBMACROS_BOOTSTRAP_DEST)/$(KERNEL_LLDBBOOTSTRAP_NAME) include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/tools/lldbmacros/README b/tools/lldbmacros/README index 9cb143107..051511206 100644 --- a/tools/lldbmacros/README +++ b/tools/lldbmacros/README @@ -22,7 +22,11 @@ F. Development and Debugging on lldb kernel debugging platform. ======================================== A. How to use lldb for kernel debugging ======================================== -lldb can be used for kernel debugging the same way as gdb. The simplest way is to start lldb with kernel symbol file. The lldb environment will ready to connect over kdp-remote '' or 'gdb-remote '. In case using a core file please do 'file --core /path/to/corefile' +lldb can be used for kernel debugging the same way as gdb. The simplest way is to start lldb with kernel symbol file. The lldb environment by default does not allow loading automatic python modules. Please add the following setting in +File: ~/.lldbinit +settings set target.load-script-from-symbol-file true + +Now lldb will be ready to connect over kdp-remote '' or 'gdb-remote '. In case using a core file please do 'file --core /path/to/corefile' Following are detailed steps on how to debug a panic'ed / NMI'ed machine (For the curious souls). lldb debugging in detail:- @@ -245,8 +249,14 @@ i. Frequently Asked Questions This way the expressing withing ` ` is evaluated by lldb and the value is passed to the command. Note that if your argument pointer is bad or the memory is corrupted lldb macros will fail with a long backtrace that may not make sense. gdb used to fail silently but lldb does not. Please see Section F(i) for more information on reading backtraces. + + Q. I connected to a coredump file with lldb --core corefile and I got RuntimeError: Unable to find lldb thread for tid=XYZ. What should I do? + A. This is most likely the case that lldb ignored the operating system plugin in the dSYM and hence threads are not populated. Please put the line 'settings set target.load-script-from-symbol-file true' in your ~/.lldbinit file. If you do not have access you can alternatively do + bash# lldb + (lldb) settings set target.load-script-from-symbol-file true + (lldb) file --core corefile + - ii. Formatted output printing - zen and peace for life ------------------------------------------------------ diff --git a/tools/lldbmacros/atm.py b/tools/lldbmacros/atm.py new file mode 100644 index 000000000..7f16e901e --- /dev/null +++ b/tools/lldbmacros/atm.py @@ -0,0 +1,96 @@ +from xnu import * +from utils import * + + +@lldb_type_summary(['atm_value', 'atm_value_t']) +@header("{0: <20s} {1: <16s} {2: <20s} {3: <16s}".format("atm_value", "aid", "voucher_value", "sync")) +def GetATMValueSummary(atm_value): + """ Summarizes the atm_value + params: atm_value = value object of type atm_value_t + returns: string with the summary of the type. + """ + format_str = "{0: <#020x} {1: <16d} {2: <#020x} {3: <16d}" + out_string = format_str.format(atm_value, unsigned(atm_value.aid), atm_value, atm_value.sync) + return out_string + + +@lldb_type_summary(['atm_task_descriptor', 'atm_task_descriptor_t']) +@header("{0: <20s} {1: <20s} {2: <16s} {3: <16s} {4: <20s} {5: <20s} {6: <10s}".format("task_descriptor", "trace_buffer", "buffer_size", "refcount", "mailbox_addr", "mailbox_size", "flags")) +def GetATMTaskDescriptorSummary(descriptor): + """ Summarizes atm_task_descriptor object + params: descriptor - value object of type atm_task_descriptor_t + returns: string - containing the description. + """ + format_str = "{0: <#020x} {1: <#020x} {2: <#016x} {3: <16d} {4: <#020x} {5: <#020x} {6: <10s}" + flags_str = "" + if unsigned(descriptor.flags) & 0x1: + flags_str = "DEAD" + out_string = format_str.format(descriptor, descriptor.trace_buffer, descriptor.trace_buffer_size, descriptor.reference_count, descriptor.mailbox_kernel_addr, descriptor.mailbox_array_size, flags_str) + + #if DEVELOPMENT + if hasattr(descriptor, 'task'): + out_string += " " + GetTaskSummary(descriptor.task) + " " + GetProcNameForTask(descriptor.task) + #endif + + return out_string + +# Macro: showatmvaluelisteners +@lldb_command('showatmvaluelisteners') +def ShowATMValueListeners(cmd_args=None, cmd_options={}): + """ show a list of listeners for an atm_value object. + Usage: (lldb)showatmvaluelisteners + """ + if not cmd_args: + raise ArgumentError("Please provide arguments") + + atm_val = kern.GetValueFromAddress(cmd_args[0], 'atm_value_t') + print GetATMValueSummary.header + print GetATMValueSummary(atm_val) + header_str = "{0: <20s} ".format("#mailbox") + GetATMTaskDescriptorSummary.header + #if DEVELOPMENT + header_str += " " + GetTaskSummary.header + " procname" + #endif + print header_str + for listener in IterateQueue(atm_val.listeners, 'atm_link_object_t', 'listeners_element'): + listener_summary = "{0: <#020x}".format(listener.mailbox) + listener_summary += " " + GetATMTaskDescriptorSummary(listener.descriptor) + print listener_summary + return +# EndMacro: showatmvaluelisteners + + +#if DEVELOPMENT + +# Macro: showallatmallocatedvalueslist +@lldb_command('showallatmallocatedvalueslist') +def ShowAllATMAllocatedValuesList(cmd_args=None, cmd_options={}): + """ A DEVELOPMENT macro that walks the list of all allocated atm_value objects + and prints them. + usage: (lldb) showallatmallocatedvalueslist + """ + if not hasattr(kern.globals, 'atm_values_list'): + print "It seems you are running a build of kernel that does not have the list of all atm_values_list." + return False + print GetATMValueSummary.header + for v in IterateQueue(kern.globals.atm_values_list, 'atm_value_t', 'value_elt'): + print GetATMValueSummary(v) + return True +# EndMacro: showallatmallocatedvalueslist + +# Macro: showallatmdescriptors +@lldb_command('showallatmdescriptors') +def ShowAllATMDescriptors(cmd_args=None, cmd_options={}): + """ A DEVELOPMENT macro that walks the list of all atm_descriptors_list + and prints the summary for each. + usage: (lldb) showallatmdescriptors + """ + if not hasattr(kern.globals, 'atm_descriptors_list'): + print "It seems you are running a build of kernel that does not have the list of all atm_descriptors_list." + return False + + print GetATMTaskDescriptorSummary.header + for d in IterateQueue(kern.globals.atm_descriptors_list, 'atm_task_descriptor_t', 'descriptor_elt'): + print GetATMTaskDescriptorSummary(d) + return True +# EndMacro +#endif diff --git a/tools/lldbmacros/bank.py b/tools/lldbmacros/bank.py new file mode 100644 index 000000000..6874636ae --- /dev/null +++ b/tools/lldbmacros/bank.py @@ -0,0 +1,146 @@ +from xnu import * +from utils import * + + +@lldb_type_summary(['bank_element', 'bank_element_t']) +@header("{0: <20s} {1: <16s} {2: <16s} {3: <16s} {4: <16s} {5: <20s} {6: <20s}".format("bank_element", "type", "ref_count", "sync", "pid", "task", "process_name")) +def GetBankElementSummary(bank_element): + """ Summarizes the bank element + params: bank_element = value of the object of type bank_element_t + returns: String with summary of the type. + """ + format_str = "{0: <#020x} {1: <16s} {2: <16d} {3: <16d} {4: <16d}" + + if bank_element.be_type == 0: + out_string = format_str.format(bank_element, "BANK_TASK", unsigned(bank_element.be_refs), unsigned(bank_element.be_made), bank_element.be_pid) + else: + out_string = format_str.format(bank_element, "BANK_ACCOUNT", unsigned(bank_element.be_refs), unsigned(bank_element.be_made), bank_element.be_pid) + + #if DEVELOPMENT + format_str = "{0: <#020x} {1: <20s}" + if hasattr(bank_element, 'be_task'): + out_string += " " + format_str.format(bank_element.be_task, GetProcNameForTask(bank_element.be_task)) + #endif + + return out_string + + +@lldb_type_summary(['bank_task', 'bank_task_t']) +@header("{0: <20s} {1: <16s} {2: <20s} {3: <16s} {4: <16s} {5: <20s} {6: <20s}".format("bank_task", "pid", "ledger", "ref_count", "sync", "task", "process_name")) +def GetBankTaskSummary(bank_task): + """ Summarizes the bank task + params: bank_task = value of the object of type bank_task_t + returns: String with summary of the type. + """ + + format_str = "{0: <#020x} {1: <16d} {2: <#020x} {3: <16d} {4: <16d}" + out_string = format_str.format(bank_task, bank_task.bt_elem.be_pid, bank_task.bt_creditcard, unsigned(bank_task.bt_elem.be_refs), unsigned(bank_task.bt_elem.be_made)) + + #if DEVELOPMENT + format_str = "{0: <#020x} {1: <20s}" + if hasattr(bank_task.bt_elem, 'be_task'): + out_string += " " + format_str.format(bank_task.bt_elem.be_task, GetProcNameForTask(bank_task.bt_elem.be_task)) + #endif + return out_string + + +@lldb_type_summary(['bank_account', 'bank_account_t']) +@header("{0: <20s} {1: <16s} {2: <16s} {3: <20s} {4: <16s} {5: <16s} {6: <20s} {7: <20s} {8: <20s} {9: <20s}".format("bank_account", "holder_pid", "merchant_pid", "chit_ledger", "ref_count", "sync", "holder_task", "holder_process", "merchant_task", "merchant_process")) +def GetBankAccountSummary(bank_account): + """ Summarizes the bank account + params: bank_task = value of the object of type bank_account_t + returns: String with summary of the type. + """ + + format_str = "{0: <#020x} {1: <16d} {2: <16d} {3: <#020x} {4: <16d} {5: <16d}" + out_string = format_str.format(bank_account, bank_account.ba_holder.bt_elem.be_pid, bank_account.ba_merchant.bt_elem.be_pid, bank_account.ba_bill, unsigned(bank_account.ba_elem.be_refs), unsigned(bank_account.ba_elem.be_made)) + + #if DEVELOPMENT + format_str = "{0: <#020x} {1: <20s}" + if hasattr(bank_account.ba_holder.bt_elem, 'be_task'): + out_string += " " + format_str.format(bank_account.ba_holder.bt_elem.be_task, GetProcNameForTask(bank_account.ba_holder.bt_elem.be_task)) + if hasattr(bank_account.ba_merchant.bt_elem, 'be_task'): + out_string += " " + format_str.format(bank_account.ba_merchant.bt_elem.be_task, GetProcNameForTask(bank_account.ba_merchant.bt_elem.be_task)) + #endif + return out_string + + +# Macro: showbankaccountstopay +@lldb_command('showbankaccountstopay') +def ShowBankAccountsToPay(cmd_args=None, cmd_options={}): + """ show a list of merchant bank tasks for a bank_task object. + Usage: (lldb)showbankaccountstopay + """ + if not cmd_args: + raise ArgumentError("Please provide arguments") + + bank_task = kern.GetValueFromAddress(cmd_args[0], 'bank_task_t') + print GetBankTaskSummary.header + print GetBankTaskSummary(bank_task) + print "List of Accounts to Pay." + header_str = GetBankAccountSummary.header + print header_str + + for bank_account in IterateQueue(bank_task.bt_accounts_to_pay, 'bank_account_t', 'ba_next_acc_to_pay'): + print GetBankAccountSummary(bank_account) + return +# EndMacro: showbankaccountstopay + + +# Macro: showbankaccountstocharge +@lldb_command('showbankaccountstocharge') +def ShowBankAccountsToCharge(cmd_args=None, cmd_options={}): + """ show a list of holder bank tasks for a bank_task object. + Usage: (lldb)showbankaccountstocharge + """ + if not cmd_args: + raise ArgumentError("Please provide arguments") + + bank_task = kern.GetValueFromAddress(cmd_args[0], 'bank_task_t') + print GetBankTaskSummary.header + print GetBankTaskSummary(bank_task) + print "List of Accounts to Charge." + header_str = GetBankAccountSummary.header + print header_str + + for bank_account in IterateQueue(bank_task.bt_accounts_to_charge, 'bank_account_t', 'ba_next_acc_to_charge'): + print GetBankAccountSummary(bank_account) + return +# EndMacro: showbankaccountstocharge + + +#if DEVELOPMENT + +# Macro: showallbanktasklist +@lldb_command('showallbanktasklist') +def ShowAllBankTaskList(cmd_args=None, cmd_options={}): + """ A DEVELOPMENT macro that walks the list of all allocated bank_task objects + and prints them. + usage: (lldb) showallbanktasklist + """ + if not hasattr(kern.globals, 'bank_tasks_list'): + print "It seems you are running a build of kernel that does not have the list of all bank_tasks_list." + return False + print GetBankTaskSummary.header + for bank_task in IterateQueue(kern.globals.bank_tasks_list, 'bank_task_t', 'bt_global_elt'): + print GetBankTaskSummary(bank_task) + return True +# EndMacro showallbanktasklist + + +# Macro: showallbankaccountlist +@lldb_command('showallbankaccountlist') +def ShowAllBankAccountList(cmd_args=None, cmd_options={}): + """ A DEVELOPMENT macro that walks the list of all allocated bank_account objects + and prints them. + usage: (lldb) showallbankaccountlist + """ + if not hasattr(kern.globals, 'bank_accounts_list'): + print "It seems you are running a build of kernel that does not have the list of all bank_accounts_list." + return False + print GetBankAccountSummary.header + for bank_account in IterateQueue(kern.globals.bank_accounts_list, 'bank_account_t', 'ba_global_elt'): + print GetBankAccountSummary(bank_account) + return True +# EndMacro showallbankaccountlist +#endif diff --git a/tools/lldbmacros/core/cvalue.py b/tools/lldbmacros/core/cvalue.py index 07bc25aec..f3d9eb5cd 100644 --- a/tools/lldbmacros/core/cvalue.py +++ b/tools/lldbmacros/core/cvalue.py @@ -280,6 +280,20 @@ class value(object): def GetSBValue(self): return self._sbval19k84obscure747 + def __getstate__(self): + err = lldb.SBError() + if self._sbval19k84obscure747_is_ptr: + addr = self._sbval19k84obscure747.GetValueAsUnsigned() + size = self._sbval19k84obscure747_type.GetPointeeType().GetByteSize() + else: + addr = self._sbval19k84obscure747.AddressOf().GetValueAsUnsigned() + size = self._sbval19k84obscure747_type.GetByteSize() + + content = LazyTarget.GetProcess().ReadMemory(addr, size, err) + if err.fail: + content = '' + return content + def _GetValueAsSigned(self): serr = lldb.SBError() retval = self._sbval19k84obscure747.GetValueAsSigned(serr) @@ -414,9 +428,6 @@ def gettype(target_type): NameError - Incase the type is not identified """ global _value_types_cache - # LLDB Somehow does not support finding types for 'struct pmap' while 'pmap' works fine - # - target_type = target_type.replace('struct', '') target_type = str(target_type).strip() if target_type not in _value_types_cache: tmp_type = None @@ -461,3 +472,13 @@ def islong(x): except ValueError: return False return True + +def readmemory(val): + """ Returns a string of hex data that is referenced by the value. + params: val - a value object. + return: str - string of hex bytes. + raises: TypeError if val is not a valid type + """ + if not type(val) is value: + raise TypeError('%s is not of type value' % str(type(val))) + return val.__getstate__() diff --git a/tools/lldbmacros/core/kernelcore.py b/tools/lldbmacros/core/kernelcore.py index ce029d2bd..26a4dcf7a 100644 --- a/tools/lldbmacros/core/kernelcore.py +++ b/tools/lldbmacros/core/kernelcore.py @@ -78,6 +78,9 @@ def IterateQueue(queue_head, element_ptr_type, element_field_name): returns: A generator does not return. It is used for iterating. value : an object thats of type (element_type) queue_head->next. Always a pointer object + example usage: + for page_meta in IterateQueue(kern.globals.first_zone.pages.all_free, 'struct zone_page_metadata *', 'pages'): + print page_meta """ if type(element_ptr_type) == str : element_ptr_type = gettype(element_ptr_type) @@ -111,6 +114,7 @@ class KernelTarget(object): self._debugger = debugger # This holds an lldb.SBDebugger object for debugger state self._threads_list = [] self._tasks_list = [] + self._coalitions_list = [] self._allproc = [] self._terminated_tasks_list = [] self._zones_list = [] @@ -268,12 +272,13 @@ class KernelTarget(object): def StraddlesPage(self, addr, size): if size > unsigned(self.GetGlobalVariable("page_size")): return True - return (((addr + size) & (unsigned(self.GetGlobalVariable("page_size"))-1)) < size) + val = ((addr + size) & (unsigned(self.GetGlobalVariable("page_size"))-1)) + return (val < size and val > 0) def PhysToKernelVirt(self, addr): if self.arch == 'x86_64': return (addr + unsigned(self.GetGlobalVariable('physmap_base'))) - elif self.arch == 'arm': + elif self.arch == 'arm' or self.arch == 'arm64': return (addr - unsigned(self.GetGlobalVariable("gPhysBase")) + unsigned(self.GetGlobalVariable("gVirtBase"))) else: raise ValueError("PhysToVirt does not support {0}".format(arch)) @@ -310,6 +315,17 @@ class KernelTarget(object): caching.SaveDynamicCacheData("kern._tasks_list", self._tasks_list) return self._tasks_list + if name == 'coalitions' : + self._coalitions_list = caching.GetDynamicCacheData("kern._coalitions_list", []) + if len(self._coalitions_list) > 0 : return self._coalitions_list + coalition_queue_head = self.GetGlobalVariable('coalitions') + coalition_type = LazyTarget.GetTarget().FindFirstType('coalition') + coalition_ptr_type = coalition_type.GetPointerType() + for tsk in IterateQueue(coalition_queue_head, coalition_ptr_type, 'coalitions'): + self._coalitions_list.append(tsk) + caching.SaveDynamicCacheData("kern._coalitions_list", self._coalitions_list) + return self._coalitions_list + if name == 'terminated_tasks' : self._terminated_tasks_list = caching.GetDynamicCacheData("kern._terminated_tasks_list", []) if len(self._terminated_tasks_list) > 0 : return self._terminated_tasks_list @@ -332,6 +348,17 @@ class KernelTarget(object): caching.SaveDynamicCacheData("kern._allproc", self._allproc) return self._allproc + if name == 'interrupt_stats' : + self._interrupt_stats_list = caching.GetDynamicCacheData("kern._interrupt_stats_list", []) + if len(self._interrupt_stats_list) > 0 : return self._interrupt_stats_list + interrupt_stats_head = self.GetGlobalVariable('gInterruptAccountingDataList') + interrupt_stats_type = LazyTarget.GetTarget().FindFirstType('IOInterruptAccountingData') + interrupt_stats_ptr_type = interrupt_stats_type.GetPointerType() + for interrupt_stats_obj in IterateQueue(interrupt_stats_head, interrupt_stats_ptr_type, 'chain'): + self._interrupt_stats_list.append(interrupt_stats_obj) + caching.SaveDynamicCacheData("kern._interrupt_stats", self._interrupt_stats_list) + return self._interrupt_stats_list + if name == 'zombprocs' : self._zombproc_list = caching.GetDynamicCacheData("kern._zombproc_list", []) if len(self._zombproc_list) > 0 : return self._zombproc_list @@ -354,7 +381,7 @@ class KernelTarget(object): self._arch = caching.GetStaticCacheData("kern.arch", None) if self._arch != None : return self._arch arch = LazyTarget.GetTarget().triple.split('-')[0] - if arch in ('armv7', 'armv7s'): + if arch in ('armv7', 'armv7s', 'armv7k'): self._arch = 'arm' else: self._arch = arch @@ -365,7 +392,7 @@ class KernelTarget(object): self._ptrsize = caching.GetStaticCacheData("kern.ptrsize", None) if self._ptrsize != None : return self._ptrsize arch = LazyTarget.GetTarget().triple.split('-')[0] - if arch in ('x86_64'): + if arch in ('x86_64', 'arm64'): self._ptrsize = 8 else: self._ptrsize = 4 diff --git a/tools/lldbmacros/core/operating_system.py b/tools/lldbmacros/core/operating_system.py index 37d1aeba9..e19945561 100644 --- a/tools/lldbmacros/core/operating_system.py +++ b/tools/lldbmacros/core/operating_system.py @@ -7,381 +7,634 @@ import struct osplugin_target_obj = None class PluginValue(lldb.SBValue): - def GetChildMemberWithName(val, name): - val_type = val.GetType() - if val_type.IsPointerType() == True: - val_type = val_type.GetPointeeType() - for i in range(val_type.GetNumberOfFields()): - if name == val_type.GetFieldAtIndex(i).GetName(): - return PluginValue(val.GetChildAtIndex(i)) - return None + def GetChildMemberWithName(val, name): + val_type = val.GetType() + if val_type.IsPointerType() == True: + val_type = val_type.GetPointeeType() + for i in range(val_type.GetNumberOfFields()): + if name == val_type.GetFieldAtIndex(i).GetName(): + return PluginValue(val.GetChildAtIndex(i)) + return None + +class Armv8_RegisterSet(object): + """ register info set for armv8 64 bit architecture""" + register_info = { 'sets' : ['GPR'], + 'registers': [ + {'name': 'x0' , 'bitsize':64, 'offset': 0, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 0, 'dwarf': 0}, + {'name': 'x1' , 'bitsize':64, 'offset': 8, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 1, 'dwarf': 1}, + {'name': 'x2' , 'bitsize':64, 'offset': 16, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 2, 'dwarf': 2}, + {'name': 'x3' , 'bitsize':64, 'offset': 24, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 3, 'dwarf': 3}, + {'name': 'x4' , 'bitsize':64, 'offset': 32, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 4, 'dwarf': 4}, + {'name': 'x5' , 'bitsize':64, 'offset': 40, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 5, 'dwarf': 5}, + {'name': 'x6' , 'bitsize':64, 'offset': 48, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 6, 'dwarf': 6}, + {'name': 'x7' , 'bitsize':64, 'offset': 56, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 7, 'dwarf': 7}, + {'name': 'x8' , 'bitsize':64, 'offset': 64, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 8, 'dwarf': 8}, + {'name': 'x9' , 'bitsize':64, 'offset': 72, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 9, 'dwarf': 9}, + {'name': 'x10' , 'bitsize':64, 'offset': 80, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':10, 'dwarf':10}, + {'name': 'x11' , 'bitsize':64, 'offset': 88, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':11, 'dwarf':11}, + {'name': 'x12' , 'bitsize':64, 'offset': 96, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':12, 'dwarf':12}, + {'name': 'x13' , 'bitsize':64, 'offset':104, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':13, 'dwarf':13}, + {'name': 'x14' , 'bitsize':64, 'offset':112, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':14, 'dwarf':14}, + {'name': 'x15' , 'bitsize':64, 'offset':120, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':15, 'dwarf':15}, + {'name': 'x16' , 'bitsize':64, 'offset':128, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':16, 'dwarf':16}, + {'name': 'x17' , 'bitsize':64, 'offset':136, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':17, 'dwarf':17}, + {'name': 'x18' , 'bitsize':64, 'offset':144, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':18, 'dwarf':18}, + {'name': 'x19' , 'bitsize':64, 'offset':152, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':19, 'dwarf':19}, + {'name': 'x20' , 'bitsize':64, 'offset':160, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':20, 'dwarf':20}, + {'name': 'x21' , 'bitsize':64, 'offset':168, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':21, 'dwarf':21}, + {'name': 'x22' , 'bitsize':64, 'offset':176, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':22, 'dwarf':22}, + {'name': 'x23' , 'bitsize':64, 'offset':184, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':23, 'dwarf':23}, + {'name': 'x24' , 'bitsize':64, 'offset':192, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':24, 'dwarf':24}, + {'name': 'x25' , 'bitsize':64, 'offset':200, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':25, 'dwarf':25}, + {'name': 'x26' , 'bitsize':64, 'offset':208, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':26, 'dwarf':26}, + {'name': 'x27' , 'bitsize':64, 'offset':216, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':27, 'dwarf':27}, + {'name': 'x28' , 'bitsize':64, 'offset':224, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':28, 'dwarf':28}, + {'name': 'fp' , 'bitsize':64, 'offset':232, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':29, 'dwarf':29, 'alt-name': 'fp', 'generic':'fp'}, + {'name': 'lr' , 'bitsize':64, 'offset':240, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':30, 'dwarf':30, 'alt-name': 'lr', 'generic':'lr'}, + {'name': 'sp' , 'bitsize':64, 'offset':248, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':31, 'dwarf':31, 'alt-name': 'sp', 'generic':'sp'}, + {'name': 'pc' , 'bitsize':64, 'offset':256, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':32, 'dwarf':32, 'alt-name': 'pc', 'generic':'pc'}, + {'name': 'far' , 'bitsize':64, 'offset':264, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':34, 'dwarf':34, 'alt-name': 'far', 'generic':'far'}, + {'name': 'cpsr', 'bitsize':32, 'offset':272, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':33, 'dwarf':33, 'alt-name': 'cpsr', 'generic':'cpsr'}, + {'name': 'esr' , 'bitsize':32, 'offset':276, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':35, 'dwarf':35, 'alt-name': 'esr', 'generic':'esr'}, + ] + } + + def __init__(self): + self.switch_context_address = osplugin_target_obj.FindSymbols('Switch_context')[0].GetSymbol().GetStartAddress().GetLoadAddress(osplugin_target_obj) + self.ResetRegisterValues() + def ResetRegisterValues(self): + self.x0 = 0 + self.x1 = 0 + self.x2 = 0 + self.x3 = 0 + self.x4 = 0 + self.x5 = 0 + self.x6 = 0 + self.x7 = 0 + self.x8 = 0 + self.x9 = 0 + self.x10 = 0 + self.x11 = 0 + self.x12 = 0 + self.x13 = 0 + self.x14 = 0 + self.x15 = 0 + self.x16 = 0 + self.x17 = 0 + self.x18 = 0 + self.x19 = 0 + self.x20 = 0 + self.x21 = 0 + self.x22 = 0 + self.x23 = 0 + self.x24 = 0 + self.x25 = 0 + self.x26 = 0 + self.x27 = 0 + self.x28 = 0 + self.fp = 0 + self.lr = 0 + self.sp = 0 + self.pc = 0 + self.far = 0 + self.cpsr = 0 + self.esr = 0 + + def __str__(self): + return """ pc = """ + + def GetPackedRegisterState(self): + return struct.pack('34QII', self.x0, self.x1, self.x2, self.x3, self.x4, self.x5, + self.x6, self.x7, self.x8, self.x9, self.x10, self.x11, self.x12, self.x13, + self.x14, self.x15, self.x16, self.x17, self.x18, self.x19, self.x20, self.x21, + self.x22, self.x23, self.x24, self.x25, self.x26, self.x27, self.x28, self.fp, + self.lr, self.sp, self.pc, self.far, self.cpsr, self.esr) + + def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): + """ Setup register values from KDP saved information. + """ + saved_state = kernel_version.CreateValueFromExpression(None, '(arm_saved_state64_t *) ' + str(kdp_state.GetValueAsUnsigned())) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.x0 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(0).GetValueAsUnsigned() + self.x1 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(1).GetValueAsUnsigned() + self.x2 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(2).GetValueAsUnsigned() + self.x3 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(3).GetValueAsUnsigned() + self.x4 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(4).GetValueAsUnsigned() + self.x5 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(5).GetValueAsUnsigned() + self.x6 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(6).GetValueAsUnsigned() + self.x7 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(7).GetValueAsUnsigned() + self.x8 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(8).GetValueAsUnsigned() + self.x9 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(9).GetValueAsUnsigned() + self.x10 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(10).GetValueAsUnsigned() + self.x11 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(11).GetValueAsUnsigned() + self.x12 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(12).GetValueAsUnsigned() + self.x13 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(13).GetValueAsUnsigned() + self.x14 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(14).GetValueAsUnsigned() + self.x15 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(15).GetValueAsUnsigned() + self.x16 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(16).GetValueAsUnsigned() + self.x17 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(17).GetValueAsUnsigned() + self.x18 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(18).GetValueAsUnsigned() + self.x19 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(19).GetValueAsUnsigned() + self.x20 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(20).GetValueAsUnsigned() + self.x21 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(21).GetValueAsUnsigned() + self.x22 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(22).GetValueAsUnsigned() + self.x23 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(23).GetValueAsUnsigned() + self.x24 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(24).GetValueAsUnsigned() + self.x25 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(25).GetValueAsUnsigned() + self.x26 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(26).GetValueAsUnsigned() + self.x27 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(27).GetValueAsUnsigned() + self.x28 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(28).GetValueAsUnsigned() + self.fp = saved_state.GetChildMemberWithName('fp').GetValueAsUnsigned() + self.lr = saved_state.GetChildMemberWithName('lr').GetValueAsUnsigned() + self.sp = saved_state.GetChildMemberWithName('sp').GetValueAsUnsigned() + self.pc = saved_state.GetChildMemberWithName('pc').GetValueAsUnsigned() + self.far = saved_state.GetChildMemberWithName('far').GetValueAsUnsigned() + self.cpsr = saved_state.GetChildMemberWithName('cpsr').GetValueAsUnsigned() + self.esr = saved_state.GetChildMemberWithName('esr').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): + saved_state = kernel_version.CreateValueFromExpression(None, '(struct arm_saved_state64 *) '+ str(kstack_saved_state_addr)) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.x0 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(0).GetValueAsUnsigned() + self.x1 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(1).GetValueAsUnsigned() + self.x2 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(2).GetValueAsUnsigned() + self.x3 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(3).GetValueAsUnsigned() + self.x4 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(4).GetValueAsUnsigned() + self.x5 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(5).GetValueAsUnsigned() + self.x6 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(6).GetValueAsUnsigned() + self.x7 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(7).GetValueAsUnsigned() + self.x8 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(8).GetValueAsUnsigned() + self.x9 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(9).GetValueAsUnsigned() + self.x10 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(10).GetValueAsUnsigned() + self.x11 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(11).GetValueAsUnsigned() + self.x12 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(12).GetValueAsUnsigned() + self.x13 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(13).GetValueAsUnsigned() + self.x14 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(14).GetValueAsUnsigned() + self.x15 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(15).GetValueAsUnsigned() + self.x16 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(16).GetValueAsUnsigned() + self.x17 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(17).GetValueAsUnsigned() + self.x18 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(18).GetValueAsUnsigned() + self.x19 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(19).GetValueAsUnsigned() + self.x20 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(20).GetValueAsUnsigned() + self.x21 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(21).GetValueAsUnsigned() + self.x22 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(22).GetValueAsUnsigned() + self.x23 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(23).GetValueAsUnsigned() + self.x24 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(24).GetValueAsUnsigned() + self.x25 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(25).GetValueAsUnsigned() + self.x26 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(26).GetValueAsUnsigned() + self.x27 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(27).GetValueAsUnsigned() + self.x28 = saved_state.GetChildMemberWithName('x').GetChildAtIndex(28).GetValueAsUnsigned() + self.fp = saved_state.GetChildMemberWithName('fp').GetValueAsUnsigned() + self.lr = saved_state.GetChildMemberWithName('lr').GetValueAsUnsigned() + self.sp = saved_state.GetChildMemberWithName('sp').GetValueAsUnsigned() + # pc for a blocked thread is treated to be the next instruction it would run after thread switch. + self.pc = self.switch_context_address + self.far = saved_state.GetChildMemberWithName('far').GetValueAsUnsigned() + self.cpsr = saved_state.GetChildMemberWithName('cpsr').GetValueAsUnsigned() + self.esr = saved_state.GetChildMemberWithName('esr').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromContinuation(self, continuation_ptr): + self.ResetRegisterValues() + self.pc = continuation_ptr + return self + + @classmethod + def GetRegisterInfo(cls, regnum): + if regnum < 0 or regnum > len(cls.register_info['registers']): + return '' + + reginfo = cls.register_info['registers'][regnum] + retval = '' + for i in reginfo.keys(): + v_str = str(reginfo[i]) + if i == 'set': + v_str = 'General Purpose Registers' + retval += "%s:%s;" % (str(i), v_str) + return retval + class Armv7_RegisterSet(object): - """ register info set for armv7 32 bit architecture """ - def __init__(self): - self.register_info = {} - self.register_info['sets'] = ['GPR'] - self.register_info['registers'] = [ - { 'name':'r0' , 'bitsize' : 32, 'offset' : 0, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 0, 'dwarf' : 0}, - { 'name':'r1' , 'bitsize' : 32, 'offset' : 4, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 1, 'dwarf' : 1}, - { 'name':'r2' , 'bitsize' : 32, 'offset' : 8, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 2, 'dwarf' : 2}, - { 'name':'r3' , 'bitsize' : 32, 'offset' : 12, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 3, 'dwarf' : 3}, - { 'name':'r4' , 'bitsize' : 32, 'offset' : 16, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 4, 'dwarf' : 4}, - { 'name':'r5' , 'bitsize' : 32, 'offset' : 20, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 5, 'dwarf' : 5}, - { 'name':'r6' , 'bitsize' : 32, 'offset' : 24, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 6, 'dwarf' : 6}, - { 'name':'r7' , 'bitsize' : 32, 'offset' : 28, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 7, 'dwarf' : 7}, - { 'name':'r8' , 'bitsize' : 32, 'offset' : 32, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 8, 'dwarf' : 8}, - { 'name':'r9' , 'bitsize' : 32, 'offset' : 36, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 9, 'dwarf' : 9}, - { 'name':'r10' , 'bitsize' : 32, 'offset' : 40, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':10, 'dwarf' :10}, - { 'name':'r11' , 'bitsize' : 32, 'offset' : 44, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':11, 'dwarf' :11, 'alt-name': 'fp', 'generic': 'fp'}, - { 'name':'r12' , 'bitsize' : 32, 'offset' : 48, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':12, 'dwarf' :12}, - { 'name':'sp' , 'bitsize' : 32, 'offset' : 52, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':13, 'dwarf' :13, 'alt-name': 'sp', 'generic': 'sp'}, - { 'name':'lr' , 'bitsize' : 32, 'offset' : 56, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':14, 'dwarf' :14, 'alt-name': 'lr', 'generic': 'lr'}, - { 'name':'pc' , 'bitsize' : 32, 'offset' : 60, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':15, 'dwarf' :15, 'alt-name': 'pc', 'generic': 'pc'}, - { 'name':'cpsr' , 'bitsize' : 32, 'offset' : 64, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':25, 'dwarf' :16, 'alt-name':'cpsr','generic':'cpsr'}, - { 'name':'fsr' , 'bitsize' : 32, 'offset' : 68, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':17, 'dwarf' :17, 'alt-name':'fsr', 'generic': 'fsr'}, - { 'name':'far' , 'bitsize' : 32, 'offset' : 72, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':18, 'dwarf' :18, 'alt-name': 'far', 'generic': 'far'} - ] - self.switch_context_address = osplugin_target_obj.FindSymbols('load_reg')[0].GetSymbol().GetStartAddress().GetLoadAddress(osplugin_target_obj) + 8 - self.ResetRegisterValues() - def ResetRegisterValues(self): - self.r0 = 0 - self.r1 = 0 - self.r2 = 0 - self.r3 = 0 - self.r4 = 0 - self.r5 = 0 - self.r6 = 0 - self.r7 = 0 - self.r8 = 0 - self.r9 = 0 - self.r10 = 0 - self.r11 = 0 - self.r12 = 0 - self.sp = 0 - self.lr = 0 - self.pc = 0 - self.cpsr = 0 - self.fsr = 0 - self.far = 0 - - def __str__(self): - return """ - r0 = {o.r0: <#010x} - r1 = {o.r1: <#010x} - r2 = {o.r2: <#010x} - r3 = {o.r3: <#010x} - r4 = {o.r4: <#010x} - r5 = {o.r5: <#010x} - r6 = {o.r6: <#010x} - r7 = {o.r7: <#010x} - r8 = {o.r8: <#010x} - r9 = {o.r9: <#010x} - r10 = {o.r10: <#010x} - r11 = {o.r11: <#010x} - r12 = {o.r12: <#010x} - sp = {o.sp: <#010x} - lr = {o.lr: <#010x} - pc = {o.pc: <#010x} - cpsr = {o.cpsr: <#010x} - fsr = {o.fsr : <#010x} - far = {o.far : <#010x} - """.format(o=self) - - def GetPackedRegisterState(self): - return struct.pack('19I', self.r0, self.r1, self.r2, self.r3, - self.r4, self.r5, self.r6, self.r7, - self.r8, self.r9, self.r10, self.r11, - self.r12, self.sp, self.lr, self.pc, - self.cpsr, self.fsr, self.far) - - def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): - saved_state = kernel_version.CreateValueFromExpression(None, '(struct arm_saved_state *) ' + str(kdp_state.GetValueAsUnsigned())) - saved_state = saved_state.Dereference() - saved_state = PluginValue(saved_state) - self.ResetRegisterValues() - self.r0 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(0).GetValueAsUnsigned() - self.r1 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(1).GetValueAsUnsigned() - self.r2 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(2).GetValueAsUnsigned() - self.r3 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(3).GetValueAsUnsigned() - self.r4 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(4).GetValueAsUnsigned() - self.r5 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(5).GetValueAsUnsigned() - self.r6 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(6).GetValueAsUnsigned() - self.r7 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(7).GetValueAsUnsigned() - self.r8 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(8).GetValueAsUnsigned() - self.r9 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(9).GetValueAsUnsigned() - self.r10 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(10).GetValueAsUnsigned() - self.r11 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(11).GetValueAsUnsigned() - self.r12 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(12).GetValueAsUnsigned() - self.sp = saved_state.GetChildMemberWithName('sp').GetValueAsUnsigned() - self.lr = saved_state.GetChildMemberWithName('lr').GetValueAsUnsigned() - self.pc = saved_state.GetChildMemberWithName('pc').GetValueAsUnsigned() - self.cpsr = saved_state.GetChildMemberWithName('cpsr').GetValueAsUnsigned() - self.fsr = saved_state.GetChildMemberWithName('fsr').GetValueAsUnsigned() - self.far = saved_state.GetChildMemberWithName('far').GetValueAsUnsigned() - return self - - def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): - saved_state = kernel_version.CreateValueFromExpression(None, '(struct arm_saved_state *) '+ str(kstack_saved_state_addr)) - saved_state = saved_state.Dereference() - saved_state = PluginValue(saved_state) - self.ResetRegisterValues() - self.r0 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(0).GetValueAsUnsigned() - self.r1 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(1).GetValueAsUnsigned() - self.r2 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(2).GetValueAsUnsigned() - self.r3 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(3).GetValueAsUnsigned() - self.r4 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(4).GetValueAsUnsigned() - self.r5 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(5).GetValueAsUnsigned() - self.r6 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(6).GetValueAsUnsigned() - self.r7 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(7).GetValueAsUnsigned() - self.r8 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(8).GetValueAsUnsigned() - self.r9 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(9).GetValueAsUnsigned() - self.r10 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(10).GetValueAsUnsigned() - self.r11 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(11).GetValueAsUnsigned() - self.r12 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(12).GetValueAsUnsigned() - self.sp = saved_state.GetChildMemberWithName('sp').GetValueAsUnsigned() - self.lr = saved_state.GetChildMemberWithName('lr').GetValueAsUnsigned() - # pc for a blocked thread is treated to be the next instruction it would run after thread switch. - self.pc = self.switch_context_address - self.cpsr = saved_state.GetChildMemberWithName('cpsr').GetValueAsUnsigned() - self.fsr = saved_state.GetChildMemberWithName('fsr').GetValueAsUnsigned() - self.far = saved_state.GetChildMemberWithName('far').GetValueAsUnsigned() - return self - - def ReadRegisterDataFromContinuation(self, continuation_ptr): - self.ResetRegisterValues() - self.pc = continuation_ptr - return self + """ register info set for armv7 32 bit architecture """ + register_info = { 'sets' : ['GPR'], + 'registers': [ + { 'name':'r0' , 'bitsize' : 32, 'offset' : 0, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 0, 'dwarf' : 0}, + { 'name':'r1' , 'bitsize' : 32, 'offset' : 4, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 1, 'dwarf' : 1}, + { 'name':'r2' , 'bitsize' : 32, 'offset' : 8, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 2, 'dwarf' : 2}, + { 'name':'r3' , 'bitsize' : 32, 'offset' : 12, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 3, 'dwarf' : 3}, + { 'name':'r4' , 'bitsize' : 32, 'offset' : 16, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 4, 'dwarf' : 4}, + { 'name':'r5' , 'bitsize' : 32, 'offset' : 20, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 5, 'dwarf' : 5}, + { 'name':'r6' , 'bitsize' : 32, 'offset' : 24, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 6, 'dwarf' : 6}, + { 'name':'r7' , 'bitsize' : 32, 'offset' : 28, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 7, 'dwarf' : 7}, + { 'name':'r8' , 'bitsize' : 32, 'offset' : 32, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 8, 'dwarf' : 8}, + { 'name':'r9' , 'bitsize' : 32, 'offset' : 36, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 9, 'dwarf' : 9}, + { 'name':'r10' , 'bitsize' : 32, 'offset' : 40, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':10, 'dwarf' :10}, + { 'name':'r11' , 'bitsize' : 32, 'offset' : 44, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':11, 'dwarf' :11, 'alt-name': 'fp', 'generic': 'fp'}, + { 'name':'r12' , 'bitsize' : 32, 'offset' : 48, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':12, 'dwarf' :12}, + { 'name':'sp' , 'bitsize' : 32, 'offset' : 52, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':13, 'dwarf' :13, 'alt-name': 'sp', 'generic': 'sp'}, + { 'name':'lr' , 'bitsize' : 32, 'offset' : 56, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':14, 'dwarf' :14, 'alt-name': 'lr', 'generic': 'lr'}, + { 'name':'pc' , 'bitsize' : 32, 'offset' : 60, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':15, 'dwarf' :15, 'alt-name': 'pc', 'generic': 'pc'}, + { 'name':'cpsr' , 'bitsize' : 32, 'offset' : 64, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':25, 'dwarf' :16, 'alt-name':'cpsr','generic':'cpsr'}, + { 'name':'fsr' , 'bitsize' : 32, 'offset' : 68, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':17, 'dwarf' :17, 'alt-name':'fsr', 'generic': 'fsr'}, + { 'name':'far' , 'bitsize' : 32, 'offset' : 72, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':18, 'dwarf' :18, 'alt-name': 'far', 'generic': 'far'} + ] + } + + def __init__(self): + self.switch_context_address = osplugin_target_obj.FindSymbols('load_reg')[0].GetSymbol().GetStartAddress().GetLoadAddress(osplugin_target_obj) + 8 + self.ResetRegisterValues() + + @classmethod + def GetRegisterInfo(cls, regnum): + if regnum < 0 or regnum > len(cls.register_info['registers']): + return '' + + reginfo = cls.register_info['registers'][regnum] + retval = '' + for i in reginfo.keys(): + v_str = str(reginfo[i]) + if i == 'set': + v_str = 'General Purpose Registers' + retval += "%s:%s;" % (str(i), v_str) + return retval + + def ResetRegisterValues(self): + self.r0 = 0 + self.r1 = 0 + self.r2 = 0 + self.r3 = 0 + self.r4 = 0 + self.r5 = 0 + self.r6 = 0 + self.r7 = 0 + self.r8 = 0 + self.r9 = 0 + self.r10 = 0 + self.r11 = 0 + self.r12 = 0 + self.sp = 0 + self.lr = 0 + self.pc = 0 + self.cpsr = 0 + self.fsr = 0 + self.far = 0 + + def __str__(self): + return """ + r0 = {o.r0: <#010x} + r1 = {o.r1: <#010x} + r2 = {o.r2: <#010x} + r3 = {o.r3: <#010x} + r4 = {o.r4: <#010x} + r5 = {o.r5: <#010x} + r6 = {o.r6: <#010x} + r7 = {o.r7: <#010x} + r8 = {o.r8: <#010x} + r9 = {o.r9: <#010x} + r10 = {o.r10: <#010x} + r11 = {o.r11: <#010x} + r12 = {o.r12: <#010x} + sp = {o.sp: <#010x} + lr = {o.lr: <#010x} + pc = {o.pc: <#010x} + cpsr = {o.cpsr: <#010x} + fsr = {o.fsr : <#010x} + far = {o.far : <#010x} + """.format(o=self) + + def GetPackedRegisterState(self): + return struct.pack('19I', self.r0, self.r1, self.r2, self.r3, + self.r4, self.r5, self.r6, self.r7, + self.r8, self.r9, self.r10, self.r11, + self.r12, self.sp, self.lr, self.pc, + self.cpsr, self.fsr, self.far) + + def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): + saved_state = kernel_version.CreateValueFromExpression(None, '(struct arm_saved_state *) ' + str(kdp_state.GetValueAsUnsigned())) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.r0 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(0).GetValueAsUnsigned() + self.r1 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(1).GetValueAsUnsigned() + self.r2 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(2).GetValueAsUnsigned() + self.r3 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(3).GetValueAsUnsigned() + self.r4 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(4).GetValueAsUnsigned() + self.r5 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(5).GetValueAsUnsigned() + self.r6 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(6).GetValueAsUnsigned() + self.r7 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(7).GetValueAsUnsigned() + self.r8 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(8).GetValueAsUnsigned() + self.r9 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(9).GetValueAsUnsigned() + self.r10 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(10).GetValueAsUnsigned() + self.r11 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(11).GetValueAsUnsigned() + self.r12 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(12).GetValueAsUnsigned() + self.sp = saved_state.GetChildMemberWithName('sp').GetValueAsUnsigned() + self.lr = saved_state.GetChildMemberWithName('lr').GetValueAsUnsigned() + self.pc = saved_state.GetChildMemberWithName('pc').GetValueAsUnsigned() + self.cpsr = saved_state.GetChildMemberWithName('cpsr').GetValueAsUnsigned() + self.fsr = saved_state.GetChildMemberWithName('fsr').GetValueAsUnsigned() + self.far = saved_state.GetChildMemberWithName('far').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): + saved_state = kernel_version.CreateValueFromExpression(None, '(struct arm_saved_state *) '+ str(kstack_saved_state_addr)) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.r0 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(0).GetValueAsUnsigned() + self.r1 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(1).GetValueAsUnsigned() + self.r2 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(2).GetValueAsUnsigned() + self.r3 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(3).GetValueAsUnsigned() + self.r4 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(4).GetValueAsUnsigned() + self.r5 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(5).GetValueAsUnsigned() + self.r6 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(6).GetValueAsUnsigned() + self.r7 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(7).GetValueAsUnsigned() + self.r8 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(8).GetValueAsUnsigned() + self.r9 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(9).GetValueAsUnsigned() + self.r10 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(10).GetValueAsUnsigned() + self.r11 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(11).GetValueAsUnsigned() + self.r12 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(12).GetValueAsUnsigned() + self.sp = saved_state.GetChildMemberWithName('sp').GetValueAsUnsigned() + self.lr = saved_state.GetChildMemberWithName('lr').GetValueAsUnsigned() + # pc for a blocked thread is treated to be the next instruction it would run after thread switch. + self.pc = self.switch_context_address + self.cpsr = saved_state.GetChildMemberWithName('cpsr').GetValueAsUnsigned() + self.fsr = saved_state.GetChildMemberWithName('fsr').GetValueAsUnsigned() + self.far = saved_state.GetChildMemberWithName('far').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromContinuation(self, continuation_ptr): + self.ResetRegisterValues() + self.pc = continuation_ptr + return self class I386_RegisterSet(object): - """ register info set for i386 architecture - """ - def __init__(self): - self.register_info = [] - self.register_info['sets'] = ['GPR'] - self.register_info['registers'] = [ - { 'name': 'eax' , 'bitsize': 32, 'offset' : 0, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 0, 'dwarf': 0}, - { 'name': 'ebx' , 'bitsize': 32, 'offset' : 4, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 1, 'dwarf': 1}, - { 'name': 'ecx' , 'bitsize': 32, 'offset' : 8, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 2, 'dwarf': 2}, - { 'name': 'edx' , 'bitsize': 32, 'offset' :12, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 3, 'dwarf': 3}, - { 'name': 'edi' , 'bitsize': 32, 'offset' :16, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 4, 'dwarf': 4}, - { 'name': 'esi' , 'bitsize': 32, 'offset' :20, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 5, 'dwarf': 5}, - { 'name': 'ebp' , 'bitsize': 32, 'offset' :24, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 6, 'dwarf': 6}, - { 'name': 'esp' , 'bitsize': 32, 'offset' :28, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 7, 'dwarf': 7}, - { 'name': 'ss' , 'bitsize': 32, 'offset' :32, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 8, 'dwarf': 8}, - { 'name': 'eflags', 'bitsize': 32, 'offset' :36, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 9, 'dwarf': 9}, - { 'name': 'eip' , 'bitsize': 32, 'offset' :40, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :10, 'dwarf':10}, - { 'name': 'cs' , 'bitsize': 32, 'offset' :44, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :11, 'dwarf':11}, - { 'name': 'ds' , 'bitsize': 32, 'offset' :48, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :12, 'dwarf':12}, - { 'name': 'es' , 'bitsize': 32, 'offset' :52, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :13, 'dwarf':13}, - { 'name': 'fs' , 'bitsize': 32, 'offset' :56, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :14, 'dwarf':14}, - { 'name': 'gs' , 'bitsize': 32, 'offset' :60, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :15, 'dwarf':15}, - ] - self.ResetRegisterValues() - def ResetRegisterValues(self): - """ set all registers to zero """ - self.eax = 0 - self.ebx = 0 - self.ecx = 0 - self.edx = 0 - self.edi = 0 - self.esi = 0 - self.ebp = 0 - self.esp = 0 - self.ss = 0 - self.eflags = 0 - self.eip = 0 - self.cs = 0 - self.ds = 0 - self.es = 0 - self.fs = 0 - self.gs = 0 - - def __str__(self): - return """ - eax = {o.eax: #010x} - ebx = {o.ebx: #010x} - ecx = {o.ecx: #010x} - edx = {o.edx: #010x} - edi = {o.edi: #010x} - esi = {o.esi: #010x} - ebp = {o.ebp: #010x} - esp = {o.esp: #010x} - ss = {o.ss: #010x} - eflags = {o.eflags: #010x} - eip = {o.eip: #010x} - cs = {o.cs: #010x} - ds = {o.ds: #010x} - es = {o.es: #010x} - fs = {o.fs: #010x} - gs = {o.gs: #010x} - """.format(o=self) - - def GetPackedRegisterState(self): - """ get a struct.pack register data """ - return struct.pack('16I', self.eax, self.ebx, self.ecx, - self.edx, self.edi, self.esi, - self.ebp, self.esp, self.ss, - self.eflags, self.eip, self.cs, - self.ds, self.es, self.fs, self.gs - ) - def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): - """ to be implemented""" - return None - def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): - """ to be implemented """ - return None - - def ReadRegisterDataFromContinuation(self, continuation_ptr): - self.ResetRegisterValues() - self.eip = continuation_ptr - return self - - + """ register info set for i386 architecture + """ + register_info = { 'sets' : ['GPR'], + 'registers': [ + { 'name': 'eax' , 'bitsize': 32, 'offset' : 0, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 0, 'dwarf': 0}, + { 'name': 'ebx' , 'bitsize': 32, 'offset' : 4, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 1, 'dwarf': 1}, + { 'name': 'ecx' , 'bitsize': 32, 'offset' : 8, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 2, 'dwarf': 2}, + { 'name': 'edx' , 'bitsize': 32, 'offset' :12, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 3, 'dwarf': 3}, + { 'name': 'edi' , 'bitsize': 32, 'offset' :16, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 4, 'dwarf': 4}, + { 'name': 'esi' , 'bitsize': 32, 'offset' :20, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 5, 'dwarf': 5}, + { 'name': 'ebp' , 'bitsize': 32, 'offset' :24, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 6, 'dwarf': 6}, + { 'name': 'esp' , 'bitsize': 32, 'offset' :28, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 7, 'dwarf': 7}, + { 'name': 'ss' , 'bitsize': 32, 'offset' :32, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 8, 'dwarf': 8}, + { 'name': 'eflags', 'bitsize': 32, 'offset' :36, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 9, 'dwarf': 9}, + { 'name': 'eip' , 'bitsize': 32, 'offset' :40, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :10, 'dwarf':10}, + { 'name': 'cs' , 'bitsize': 32, 'offset' :44, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :11, 'dwarf':11}, + { 'name': 'ds' , 'bitsize': 32, 'offset' :48, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :12, 'dwarf':12}, + { 'name': 'es' , 'bitsize': 32, 'offset' :52, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :13, 'dwarf':13}, + { 'name': 'fs' , 'bitsize': 32, 'offset' :56, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :14, 'dwarf':14}, + { 'name': 'gs' , 'bitsize': 32, 'offset' :60, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :15, 'dwarf':15}, + ] + } + + def __init__(self): + self.ResetRegisterValues() + + @classmethod + def GetRegisterInfo(cls, regnum): + if regnum < 0 or regnum > len(cls.register_info['registers']): + return '' + + reginfo = cls.register_info['registers'][regnum] + retval = '' + for i in reginfo.keys(): + v_str = str(reginfo[i]) + if i == 'set': + v_str = 'General Purpose Registers' + retval += "%s:%s;" % (str(i), v_str) + return retval + + def ResetRegisterValues(self): + """ set all registers to zero """ + self.eax = 0 + self.ebx = 0 + self.ecx = 0 + self.edx = 0 + self.edi = 0 + self.esi = 0 + self.ebp = 0 + self.esp = 0 + self.ss = 0 + self.eflags = 0 + self.eip = 0 + self.cs = 0 + self.ds = 0 + self.es = 0 + self.fs = 0 + self.gs = 0 + + def __str__(self): + return """ + eax = {o.eax: #010x} + ebx = {o.ebx: #010x} + ecx = {o.ecx: #010x} + edx = {o.edx: #010x} + edi = {o.edi: #010x} + esi = {o.esi: #010x} + ebp = {o.ebp: #010x} + esp = {o.esp: #010x} + ss = {o.ss: #010x} + eflags = {o.eflags: #010x} + eip = {o.eip: #010x} + cs = {o.cs: #010x} + ds = {o.ds: #010x} + es = {o.es: #010x} + fs = {o.fs: #010x} + gs = {o.gs: #010x} + """.format(o=self) + + def GetPackedRegisterState(self): + """ get a struct.pack register data """ + return struct.pack('16I', self.eax, self.ebx, self.ecx, + self.edx, self.edi, self.esi, + self.ebp, self.esp, self.ss, + self.eflags, self.eip, self.cs, + self.ds, self.es, self.fs, self.gs + ) + + def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): + """ to be implemented""" + return None + + def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): + """ to be implemented """ + return None + + def ReadRegisterDataFromContinuation(self, continuation_ptr): + self.ResetRegisterValues() + self.eip = continuation_ptr + return self + + class X86_64RegisterSet(object): - """ register info set for x86_64 architecture """ - def __init__(self): - self.register_info = {} - self.register_info['sets'] = ['GPR', 'FPU', 'EXC'] - self.register_info['registers'] = [ - { 'name':'rax' , 'bitsize' : 64, 'offset' : 0, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 0, 'dwarf' : 0}, - { 'name':'rbx' , 'bitsize' : 64, 'offset' : 8, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 3, 'dwarf' : 3}, - { 'name':'rcx' , 'bitsize' : 64, 'offset' : 16, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 2, 'dwarf' : 2, 'generic':'arg4', 'alt-name':'arg4', }, - { 'name':'rdx' , 'bitsize' : 64, 'offset' : 24, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 1, 'dwarf' : 1, 'generic':'arg3', 'alt-name':'arg3', }, - { 'name':'rdi' , 'bitsize' : 64, 'offset' : 32, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 5, 'dwarf' : 5, 'generic':'arg1', 'alt-name':'arg1', }, - { 'name':'rsi' , 'bitsize' : 64, 'offset' : 40, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 4, 'dwarf' : 4, 'generic':'arg2', 'alt-name':'arg2', }, - { 'name':'rbp' , 'bitsize' : 64, 'offset' : 48, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 6, 'dwarf' : 6, 'generic':'fp' , 'alt-name':'fp', }, - { 'name':'rsp' , 'bitsize' : 64, 'offset' : 56, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 7, 'dwarf' : 7, 'generic':'sp' , 'alt-name':'sp', }, - { 'name':'r8' , 'bitsize' : 64, 'offset' : 64, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 8, 'dwarf' : 8, 'generic':'arg5', 'alt-name':'arg5', }, - { 'name':'r9' , 'bitsize' : 64, 'offset' : 72, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 9, 'dwarf' : 9, 'generic':'arg6', 'alt-name':'arg6', }, - { 'name':'r10' , 'bitsize' : 64, 'offset' : 80, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 10, 'dwarf' : 10}, - { 'name':'r11' , 'bitsize' : 64, 'offset' : 88, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 11, 'dwarf' : 11}, - { 'name':'r12' , 'bitsize' : 64, 'offset' : 96, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 12, 'dwarf' : 12}, - { 'name':'r13' , 'bitsize' : 64, 'offset' : 104, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 13, 'dwarf' : 13}, - { 'name':'r14' , 'bitsize' : 64, 'offset' : 112, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 14, 'dwarf' : 14}, - { 'name':'r15' , 'bitsize' : 64, 'offset' : 120, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 15, 'dwarf' : 15}, - { 'name':'rip' , 'bitsize' : 64, 'offset' : 128, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 16, 'dwarf' : 16, 'generic':'pc', 'alt-name':'pc' }, - { 'name':'rflags' , 'bitsize' : 64, 'offset' : 136, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'generic':'flags', 'alt-name':'flags' }, - { 'name':'cs' , 'bitsize' : 64, 'offset' : 144, 'encoding':'uint' , 'format':'hex' , 'set': 0 }, - { 'name':'fs' , 'bitsize' : 64, 'offset' : 152, 'encoding':'uint' , 'format':'hex' , 'set': 0 }, - { 'name':'gs' , 'bitsize' : 64, 'offset' : 160, 'encoding':'uint' , 'format':'hex' , 'set': 0 }, - ] - self.ResetRegisterValues() - - def ResetRegisterValues(self): - """ set all the registers to zero. """ - self.rax = 0 - self.rbx = 0 - self.rcx = 0 - self.rdx = 0 - self.rdi = 0 - self.rsi = 0 - self.rbp = 0 - self.rsp = 0 - self.r8 = 0 - self.r9 = 0 - self.r10 = 0 - self.r11 = 0 - self.r12 = 0 - self.r13 = 0 - self.r14 = 0 - self.r15 = 0 - self.rip = 0 - self.rflags = 0 - self.cs = 0 - self.fs = 0 - self.gs = 0 - def __str__(self): - return """ - rax = {o.rax: <#018x} - rbx = {o.rbx: <#018x} - rcx = {o.rcx: <#018x} - rdx = {o.rdx: <#018x} - rdi = {o.rdi: <#018x} - rsi = {o.rsi: <#018x} - rbp = {o.rbp: <#018x} - rsp = {o.rsp: <#018x} - r8 = {o.r8: <#018x} - r9 = {o.r9: <#018x} - r10 = {o.r10: <#018x} - r11 = {o.r11: <#018x} - r12 = {o.r12: <#018x} - r13 = {o.r13: <#018x} - r14 = {o.r14: <#018x} - r15 = {o.r15: <#018x} - rip = {o.rip: <#018x} - rflags = {o.rflags: <#018x} - cs = {o.cs: <#018x} - fs = {o.fs: <#018x} - gs = {o.gs: <#018x} - """.format(o=self) - - def GetPackedRegisterState(self): - """ get a struct.pack register data for passing to C constructs """ - return struct.pack('21Q', self.rax, self.rbx, self.rcx, self.rdx, self.rdi, - self.rsi, self.rbp, self.rsp, self.r8, self.r9, - self.r10, self.r11, self.r12, self.r13, self.r14, - self.r15, self.rip, self.rflags, self.cs, self.fs, self.gs) - - def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): - saved_state = kernel_version.CreateValueFromExpression(None, '(struct x86_saved_state64 *) '+ str(kdp_state.GetValueAsUnsigned())) - saved_state = saved_state.Dereference() - saved_state = PluginValue(saved_state) - self.ResetRegisterValues() - self.rdi = saved_state.GetChildMemberWithName('rdi').GetValueAsUnsigned() - self.rsi = saved_state.GetChildMemberWithName('rsi').GetValueAsUnsigned() - self.rdx = saved_state.GetChildMemberWithName('rdx').GetValueAsUnsigned() - self.r10 = saved_state.GetChildMemberWithName('r10').GetValueAsUnsigned() - self.r8 = saved_state.GetChildMemberWithName('r8').GetValueAsUnsigned() - self.r9 = saved_state.GetChildMemberWithName('r9').GetValueAsUnsigned() - self.r15 = saved_state.GetChildMemberWithName('r15').GetValueAsUnsigned() - self.r14 = saved_state.GetChildMemberWithName('r14').GetValueAsUnsigned() - self.r13 = saved_state.GetChildMemberWithName('r13').GetValueAsUnsigned() - self.r12 = saved_state.GetChildMemberWithName('r12').GetValueAsUnsigned() - self.r11 = saved_state.GetChildMemberWithName('r11').GetValueAsUnsigned() - self.rbp = saved_state.GetChildMemberWithName('rbp').GetValueAsUnsigned() - self.rbx = saved_state.GetChildMemberWithName('rbx').GetValueAsUnsigned() - self.rcx = saved_state.GetChildMemberWithName('rcx').GetValueAsUnsigned() - self.rax = saved_state.GetChildMemberWithName('rax').GetValueAsUnsigned() - self.rip = saved_state.GetChildMemberWithName('isf').GetChildMemberWithName('rip').GetValueAsUnsigned() - self.rflags = saved_state.GetChildMemberWithName('isf').GetChildMemberWithName('rflags').GetValueAsUnsigned() - self.rsp = saved_state.GetChildMemberWithName('isf').GetChildMemberWithName('rsp').GetValueAsUnsigned() - return self - - def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): - saved_state = kernel_version.CreateValueFromExpression(None, '(struct x86_kernel_state *) '+ str(kstack_saved_state_addr)) - saved_state = saved_state.Dereference() - saved_state = PluginValue(saved_state) - self.ResetRegisterValues() - self.rbx = saved_state.GetChildMemberWithName('k_rbx').GetValueAsUnsigned() - self.rsp = saved_state.GetChildMemberWithName('k_rsp').GetValueAsUnsigned() - self.rbp = saved_state.GetChildMemberWithName('k_rbp').GetValueAsUnsigned() - self.r12 = saved_state.GetChildMemberWithName('k_r12').GetValueAsUnsigned() - self.r13 = saved_state.GetChildMemberWithName('k_r13').GetValueAsUnsigned() - self.r14 = saved_state.GetChildMemberWithName('k_r14').GetValueAsUnsigned() - self.r15 = saved_state.GetChildMemberWithName('k_r15').GetValueAsUnsigned() - self.rip = saved_state.GetChildMemberWithName('k_rip').GetValueAsUnsigned() - return self - - def ReadRegisterDataFromContinuation(self, continuation_ptr): - self.ResetRegisterValues() - self.rip = continuation_ptr - return self + """ register info set for x86_64 architecture """ + register_info = { 'sets' : ['GPR'], + 'registers': [ + { 'name':'rax' , 'bitsize' : 64, 'offset' : 0, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 0, 'dwarf' : 0}, + { 'name':'rbx' , 'bitsize' : 64, 'offset' : 8, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 3, 'dwarf' : 3}, + { 'name':'rcx' , 'bitsize' : 64, 'offset' : 16, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 2, 'dwarf' : 2, 'generic':'arg4', 'alt-name':'arg4', }, + { 'name':'rdx' , 'bitsize' : 64, 'offset' : 24, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 1, 'dwarf' : 1, 'generic':'arg3', 'alt-name':'arg3', }, + { 'name':'rdi' , 'bitsize' : 64, 'offset' : 32, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 5, 'dwarf' : 5, 'generic':'arg1', 'alt-name':'arg1', }, + { 'name':'rsi' , 'bitsize' : 64, 'offset' : 40, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 4, 'dwarf' : 4, 'generic':'arg2', 'alt-name':'arg2', }, + { 'name':'rbp' , 'bitsize' : 64, 'offset' : 48, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 6, 'dwarf' : 6, 'generic':'fp' , 'alt-name':'fp', }, + { 'name':'rsp' , 'bitsize' : 64, 'offset' : 56, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 7, 'dwarf' : 7, 'generic':'sp' , 'alt-name':'sp', }, + { 'name':'r8' , 'bitsize' : 64, 'offset' : 64, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 8, 'dwarf' : 8, 'generic':'arg5', 'alt-name':'arg5', }, + { 'name':'r9' , 'bitsize' : 64, 'offset' : 72, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 9, 'dwarf' : 9, 'generic':'arg6', 'alt-name':'arg6', }, + { 'name':'r10' , 'bitsize' : 64, 'offset' : 80, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 10, 'dwarf' : 10}, + { 'name':'r11' , 'bitsize' : 64, 'offset' : 88, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 11, 'dwarf' : 11}, + { 'name':'r12' , 'bitsize' : 64, 'offset' : 96, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 12, 'dwarf' : 12}, + { 'name':'r13' , 'bitsize' : 64, 'offset' : 104, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 13, 'dwarf' : 13}, + { 'name':'r14' , 'bitsize' : 64, 'offset' : 112, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 14, 'dwarf' : 14}, + { 'name':'r15' , 'bitsize' : 64, 'offset' : 120, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 15, 'dwarf' : 15}, + { 'name':'rip' , 'bitsize' : 64, 'offset' : 128, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 16, 'dwarf' : 16, 'generic':'pc', 'alt-name':'pc' }, + { 'name':'rflags' , 'bitsize' : 64, 'offset' : 136, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'generic':'flags', 'alt-name':'flags' }, + { 'name':'cs' , 'bitsize' : 64, 'offset' : 144, 'encoding':'uint' , 'format':'hex' , 'set': 0 }, + { 'name':'fs' , 'bitsize' : 64, 'offset' : 152, 'encoding':'uint' , 'format':'hex' , 'set': 0 }, + { 'name':'gs' , 'bitsize' : 64, 'offset' : 160, 'encoding':'uint' , 'format':'hex' , 'set': 0 }, + ] + } + def __init__(self): + self.ResetRegisterValues() + + @classmethod + def GetRegisterInfo(cls, regnum): + if regnum < 0 or regnum > len(cls.register_info['registers']): + return '' + + reginfo = cls.register_info['registers'][regnum] + retval = '' + for i in reginfo.keys(): + v_str = str(reginfo[i]) + if i == 'set': + v_str = 'General Purpose Registers' + retval += "%s:%s;" % (str(i), v_str) + return retval + + + def ResetRegisterValues(self): + """ set all the registers to zero. """ + self.rax = 0 + self.rbx = 0 + self.rcx = 0 + self.rdx = 0 + self.rdi = 0 + self.rsi = 0 + self.rbp = 0 + self.rsp = 0 + self.r8 = 0 + self.r9 = 0 + self.r10 = 0 + self.r11 = 0 + self.r12 = 0 + self.r13 = 0 + self.r14 = 0 + self.r15 = 0 + self.rip = 0 + self.rflags = 0 + self.cs = 0 + self.fs = 0 + self.gs = 0 + + def __str__(self): + return """ + rax = {o.rax: <#018x} + rbx = {o.rbx: <#018x} + rcx = {o.rcx: <#018x} + rdx = {o.rdx: <#018x} + rdi = {o.rdi: <#018x} + rsi = {o.rsi: <#018x} + rbp = {o.rbp: <#018x} + rsp = {o.rsp: <#018x} + r8 = {o.r8: <#018x} + r9 = {o.r9: <#018x} + r10 = {o.r10: <#018x} + r11 = {o.r11: <#018x} + r12 = {o.r12: <#018x} + r13 = {o.r13: <#018x} + r14 = {o.r14: <#018x} + r15 = {o.r15: <#018x} + rip = {o.rip: <#018x} + rflags = {o.rflags: <#018x} + cs = {o.cs: <#018x} + fs = {o.fs: <#018x} + gs = {o.gs: <#018x} + """.format(o=self) + + def GetPackedRegisterState(self): + """ get a struct.pack register data for passing to C constructs """ + return struct.pack('21Q', self.rax, self.rbx, self.rcx, self.rdx, self.rdi, + self.rsi, self.rbp, self.rsp, self.r8, self.r9, + self.r10, self.r11, self.r12, self.r13, self.r14, + self.r15, self.rip, self.rflags, self.cs, self.fs, self.gs) + + def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): + saved_state = kernel_version.CreateValueFromExpression(None, '(struct x86_saved_state64 *) '+ str(kdp_state.GetValueAsUnsigned())) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.rdi = saved_state.GetChildMemberWithName('rdi').GetValueAsUnsigned() + self.rsi = saved_state.GetChildMemberWithName('rsi').GetValueAsUnsigned() + self.rdx = saved_state.GetChildMemberWithName('rdx').GetValueAsUnsigned() + self.r10 = saved_state.GetChildMemberWithName('r10').GetValueAsUnsigned() + self.r8 = saved_state.GetChildMemberWithName('r8').GetValueAsUnsigned() + self.r9 = saved_state.GetChildMemberWithName('r9').GetValueAsUnsigned() + self.r15 = saved_state.GetChildMemberWithName('r15').GetValueAsUnsigned() + self.r14 = saved_state.GetChildMemberWithName('r14').GetValueAsUnsigned() + self.r13 = saved_state.GetChildMemberWithName('r13').GetValueAsUnsigned() + self.r12 = saved_state.GetChildMemberWithName('r12').GetValueAsUnsigned() + self.r11 = saved_state.GetChildMemberWithName('r11').GetValueAsUnsigned() + self.rbp = saved_state.GetChildMemberWithName('rbp').GetValueAsUnsigned() + self.rbx = saved_state.GetChildMemberWithName('rbx').GetValueAsUnsigned() + self.rcx = saved_state.GetChildMemberWithName('rcx').GetValueAsUnsigned() + self.rax = saved_state.GetChildMemberWithName('rax').GetValueAsUnsigned() + self.rip = saved_state.GetChildMemberWithName('isf').GetChildMemberWithName('rip').GetValueAsUnsigned() + self.rflags = saved_state.GetChildMemberWithName('isf').GetChildMemberWithName('rflags').GetValueAsUnsigned() + self.rsp = saved_state.GetChildMemberWithName('isf').GetChildMemberWithName('rsp').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): + saved_state = kernel_version.CreateValueFromExpression(None, '(struct x86_kernel_state *) '+ str(kstack_saved_state_addr)) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.rbx = saved_state.GetChildMemberWithName('k_rbx').GetValueAsUnsigned() + self.rsp = saved_state.GetChildMemberWithName('k_rsp').GetValueAsUnsigned() + self.rbp = saved_state.GetChildMemberWithName('k_rbp').GetValueAsUnsigned() + self.r12 = saved_state.GetChildMemberWithName('k_r12').GetValueAsUnsigned() + self.r13 = saved_state.GetChildMemberWithName('k_r13').GetValueAsUnsigned() + self.r14 = saved_state.GetChildMemberWithName('k_r14').GetValueAsUnsigned() + self.r15 = saved_state.GetChildMemberWithName('k_r15').GetValueAsUnsigned() + self.rip = saved_state.GetChildMemberWithName('k_rip').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromContinuation(self, continuation_ptr): + self.ResetRegisterValues() + self.rip = continuation_ptr + return self def IterateQueue(queue_head, element_ptr_type, element_field_name): - """ iterate over a queue in kernel of type queue_head_t. refer to osfmk/kern/queue.h + """ iterate over a queue in kernel of type queue_head_t. refer to osfmk/kern/queue.h params: queue_head - lldb.SBValue : Value object for queue_head. element_type - lldb.SBType : a pointer type of the element 'next' points to. Typically its structs like thread, task etc.. element_field_name - str : name of the field in target struct. returns: A generator does not return. It is used for iterating. - SBValue : an object thats of type (element_type) queue_head->next. Always a pointer object + SBValue : an object thats of type (element_type) queue_head->next. Always a pointer object """ queue_head_addr = 0x0 if queue_head.TypeIsPointerType(): @@ -390,7 +643,6 @@ def IterateQueue(queue_head, element_ptr_type, element_field_name): queue_head_addr = queue_head.GetAddress().GetLoadAddress(osplugin_target_obj) cur_elt = queue_head.GetChildMemberWithName('next') while True: - if not cur_elt.IsValid() or cur_elt.GetValueAsUnsigned() == 0 or cur_elt.GetValueAsUnsigned() == queue_head_addr: break elt = cur_elt.Cast(element_ptr_type) @@ -398,31 +650,31 @@ def IterateQueue(queue_head, element_ptr_type, element_field_name): cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next') def GetUniqueSessionID(process_obj): - """ Create a unique session identifier. - params: - process_obj: lldb.SBProcess object refering to connected process. - returns: - int - a unique number identified by processid and stopid. - """ - session_key_str = "" - if hasattr(process_obj, "GetUniqueID"): - session_key_str += str(process_obj.GetUniqueID()) + ":" - else: - session_key_str += "0:" + """ Create a unique session identifier. + params: + process_obj: lldb.SBProcess object refering to connected process. + returns: + int - a unique number identified by processid and stopid. + """ + session_key_str = "" + if hasattr(process_obj, "GetUniqueID"): + session_key_str += str(process_obj.GetUniqueID()) + ":" + else: + session_key_str += "0:" - if hasattr(process_obj, "GetStopID"): - session_key_str += str(process_obj.GetStopID()) - else: - session_key_str +="1" + if hasattr(process_obj, "GetStopID"): + session_key_str += str(process_obj.GetStopID()) + else: + session_key_str +="1" - return hash(session_key_str) + return hash(session_key_str) -(archX86_64, archARMv7_family, archI386) = ("x86_64", ("armv7", "armv7s") , "i386") +(archX86_64, archARMv7_family, archI386, archARMv8) = ("x86_64", ("armv7", "armv7s", "armv7k") , "i386", "arm64") class OperatingSystemPlugIn(object): """Class that provides data for an instance of a LLDB 'OperatingSystemPython' plug-in class""" - + def __init__(self, process): '''Initialization needs a valid.SBProcess object''' self.process = None @@ -441,9 +693,14 @@ class OperatingSystemPlugIn(object): self.kernel_stack_size = self._target.FindGlobalVariables('kernel_stack_size', 0).GetValueAtIndex(0).GetValueAsUnsigned() self.kernel_context_size = 0 self.connected_over_kdp = False + # connected_to_debugserver signifies if we are connected to astris or other gdbserver instance + # that has the correct thread state for on core threads. For kdp and coredumps we rely on in memory + # state of threads. + self.connected_to_debugserver = True plugin_string = self.process.GetPluginName().lower() if plugin_string.find("kdp") >=0: self.connected_over_kdp = True + self.connected_to_debugserver = False #print "version", self.version, "kernel_stack_size", self.kernel_stack_size, "context_size", self.kernel_context_size self.threads = None # Will be an dictionary containing info for each thread triple = self.process.target.triple @@ -451,23 +708,38 @@ class OperatingSystemPlugIn(object): self.target_arch = "" self.kernel_context_size = 0 if arch == archX86_64 : - self.target_arch = archX86_64 - print "Target arch: x86_64" - self.register_set = X86_64RegisterSet() - self.kernel_context_size = self._target.FindFirstType('x86_kernel_state').GetByteSize() + self.target_arch = archX86_64 + print "Target arch: x86_64" + self.register_set = X86_64RegisterSet() + self.kernel_context_size = self._target.FindFirstType('x86_kernel_state').GetByteSize() elif arch in archARMv7_family : - self.target_arch = arch - print "Target arch: " + self.target_arch - self.register_set = Armv7_RegisterSet() + self.target_arch = arch + print "Target arch: " + self.target_arch + self.register_set = Armv7_RegisterSet() + elif arch == archARMv8: + self.target_arch = arch + print "Target arch: " + self.target_arch + self.register_set = Armv8_RegisterSet() + # connection intel arm + # kdp Memory Memory + # gdb Server Server + # coredump Memory Server + if not self.connected_over_kdp : + if plugin_string.find('core') >= 0 and self.target_arch == archX86_64: + self.connected_to_debugserver = False self.registers = self.register_set.register_info - + if self.connected_to_debugserver: + print "Connected to live debugserver or arm core. Will associate on-core threads to registers reported by server." + else: + print "Instantiating threads completely from saved state in memory." + def create_thread(self, tid, context): th_ptr = context th = self.version.CreateValueFromExpression(str(th_ptr),'(struct thread *)' + str(th_ptr)) thread_id = th.GetChildMemberWithName('thread_id').GetValueAsUnsigned() if tid != thread_id: - print "FATAL ERROR: Creating thread from memory 0x%x with tid in mem=%d when requested tid = %d " % (context, thread_id, tid) - return None + print "FATAL ERROR: Creating thread from memory 0x%x with tid in mem=%d when requested tid = %d " % (context, thread_id, tid) + return None thread_obj = { 'tid' : thread_id, 'ptr' : th.GetValueAsUnsigned(), 'name' : hex(th.GetValueAsUnsigned()).rstrip('L'), @@ -476,8 +748,8 @@ class OperatingSystemPlugIn(object): 'stop_reason' : 'none' } if self.current_session_id != GetUniqueSessionID(self.process): - self.thread_cache = {} - self.current_session_id = GetUniqueSessionID(self.process) + self.thread_cache = {} + self.current_session_id = GetUniqueSessionID(self.process) self.thread_cache[tid] = thread_obj return thread_obj @@ -491,110 +763,116 @@ class OperatingSystemPlugIn(object): kdp_state = kdp.GetChildMemberWithName('saved_state') kdp_thread = kdp.GetChildMemberWithName('kdp_thread') if kdp_thread and kdp_thread.GetValueAsUnsigned() != 0: - self.kdp_thread = kdp_thread - self.kdp_state = kdp_state - kdp_thid = kdp_thread.GetChildMemberWithName('thread_id').GetValueAsUnsigned() - self.create_thread(kdp_thid, kdp_thread.GetValueAsUnsigned()) - self.thread_cache[kdp_thid]['core']=0 - retval = [self.thread_cache[kdp_thid]] - return retval + self.kdp_thread = kdp_thread + self.kdp_state = kdp_state + kdp_thid = kdp_thread.GetChildMemberWithName('thread_id').GetValueAsUnsigned() + self.create_thread(kdp_thid, kdp_thread.GetValueAsUnsigned()) + self.thread_cache[kdp_thid]['core']=0 + retval = [self.thread_cache[kdp_thid]] + return retval else: - print "FATAL FAILURE: Unable to find kdp_thread state for this connection." - return [] + print "FATAL FAILURE: Unable to find kdp_thread state for this connection." + return [] num_threads = self._target.FindGlobalVariables('threads_count',1).GetValueAtIndex(0).GetValueAsUnsigned() #In case we are caught before threads are initialized. Fallback to threads known by astris/gdb server. if num_threads <=0 : return [] - + self.current_session_id = GetUniqueSessionID(self.process) self.threads = [] self.thread_cache = {} self.processors = [] try: - processor_list_val = PluginValue(self._target.FindGlobalVariables('processor_list',1).GetValueAtIndex(0)) - while processor_list_val.IsValid() and processor_list_val.GetValueAsUnsigned() !=0 : - th = processor_list_val.GetChildMemberWithName('active_thread') - th_id = th.GetChildMemberWithName('thread_id').GetValueAsUnsigned() - cpu_id = processor_list_val.GetChildMemberWithName('cpu_id').GetValueAsUnsigned() - self.processors.append({'active_thread': th.GetValueAsUnsigned(), 'cpu_id': cpu_id}) - self.create_thread(th_id, th.GetValueAsUnsigned()) - self.thread_cache[th_id]['core'] = cpu_id - nth = self.thread_cache[th_id] - print "Found 0x%x on logical cpu %d" % ( nth['ptr'], nth['core']) - self.threads.append(nth) - self.thread_cache[nth['tid']] = nth - processor_list_val = processor_list_val.GetChildMemberWithName('processor_list') + processor_list_val = PluginValue(self._target.FindGlobalVariables('processor_list',1).GetValueAtIndex(0)) + while processor_list_val.IsValid() and processor_list_val.GetValueAsUnsigned() !=0 : + th = processor_list_val.GetChildMemberWithName('active_thread') + th_id = th.GetChildMemberWithName('thread_id').GetValueAsUnsigned() + cpu_id = processor_list_val.GetChildMemberWithName('cpu_id').GetValueAsUnsigned() + self.processors.append({'active_thread': th.GetValueAsUnsigned(), 'cpu_id': cpu_id}) + self.create_thread(th_id, th.GetValueAsUnsigned()) + if self.connected_to_debugserver: + self.thread_cache[th_id]['core'] = cpu_id + self.thread_cache[th_id]['queue'] = "cpu-%d" % int(cpu_id) + nth = self.thread_cache[th_id] + self.threads.append(nth) + self.thread_cache[nth['tid']] = nth + processor_list_val = processor_list_val.GetChildMemberWithName('processor_list') except KeyboardInterrupt, ke: - print "OS Plugin Interrupted during thread loading process. \nWARNING:Thread registers and backtraces may not be accurate." - return self.threads - + print "OS Plugin Interrupted during thread loading process. \nWARNING:Thread registers and backtraces may not be accurate." + return self.threads + if hasattr(self.process, 'CreateOSPluginThread'): - return self.threads + return self.threads - # FIXME remove legacy code + # FIXME remove legacy code try: - thread_q_head = self._target.FindGlobalVariables('threads', 0).GetValueAtIndex(0) - thread_type = self._target.FindFirstType('thread') - thread_ptr_type = thread_type.GetPointerType() - for th in IterateQueue(thread_q_head, thread_ptr_type, 'threads'): - th_id = th.GetChildMemberWithName('thread_id').GetValueAsUnsigned() - self.create_thread(th_id, th.GetValueAsUnsigned()) - nth = self.thread_cache[th_id] - for cputhread in self.processors: - if cputhread['active_thread'] == nth['ptr']: - nth['core'] = cputhread['cpu_id'] - #print "Found 0x%x on logical cpu %d" % ( nth['ptr'], cputhread['cpu_id']) - self.threads.append( nth ) + thread_q_head = self._target.FindGlobalVariables('threads', 0).GetValueAtIndex(0) + thread_type = self._target.FindFirstType('thread') + thread_ptr_type = thread_type.GetPointerType() + for th in IterateQueue(thread_q_head, thread_ptr_type, 'threads'): + th_id = th.GetChildMemberWithName('thread_id').GetValueAsUnsigned() + self.create_thread(th_id, th.GetValueAsUnsigned()) + nth = self.thread_cache[th_id] + for cputhread in self.processors: + if cputhread['active_thread'] == nth['ptr']: + nth['core'] = cputhread['cpu_id'] + self.threads.append( nth ) except KeyboardInterrupt, ke: - print "OS Plugin Interrupted during thread loading process. \nWARNING:Thread registers and backtraces may not be accurate." - return self.threads - # end legacy code + print "OS Plugin Interrupted during thread loading process. \nWARNING:Thread registers and backtraces may not be accurate." + return self.threads + # end legacy code return self.threads - + def get_register_info(self): if self.registers == None: - print "Register Information not found " + print "Register Information not found " return self.register_set.register_info - + def get_register_data(self, tid): #print "searching for tid", tid thobj = None try: - if self.current_session_id != GetUniqueSessionID(self.process): - self.thread_cache = {} - self.current_session_id = GetUniqueSessionID(self.process) - - if tid in self.thread_cache.keys(): - thobj = self.version.CreateValueFromExpression(self.thread_cache[tid]['name'], '(struct thread *)' + str(self.thread_cache[tid]['ptr'])) - regs = self.register_set - if thobj == None : - print "FATAL ERROR: Could not find thread with id %d" % tid - regs.ResetRegisterValues() - return regs.GetPackedRegisterState() - - if self.kdp_thread and self.kdp_thread.GetValueAsUnsigned() == thobj.GetValueAsUnsigned(): - regs.ReadRegisterDataFromKDPSavedState(self.kdp_state, self.version) - return regs.GetPackedRegisterState() - if int(PluginValue(thobj).GetChildMemberWithName('kernel_stack').GetValueAsUnsigned()) != 0 : - if self.target_arch == archX86_64 : - # we do have a stack so lets get register information - saved_state_addr = PluginValue(thobj).GetChildMemberWithName('kernel_stack').GetValueAsUnsigned() + self.kernel_stack_size - self.kernel_context_size - regs.ReadRegisterDataFromKernelStack(saved_state_addr, self.version) - return regs.GetPackedRegisterState() - elif self.target_arch in archARMv7_family and int(PluginValue(thobj).GetChildMemberWithName('machine').GetChildMemberWithName('kstackptr').GetValueAsUnsigned()) != 0: - #we have stack on the machine.kstackptr. - saved_state_addr = PluginValue(thobj).GetChildMemberWithName('machine').GetChildMemberWithName('kstackptr').GetValueAsUnsigned() - regs.ReadRegisterDataFromKernelStack(saved_state_addr, self.version) - return regs.GetPackedRegisterState() - elif self.target_arch == archX86_64 or self.target_arch in archARMv7_family: - regs.ReadRegisterDataFromContinuation( PluginValue(thobj).GetChildMemberWithName('continuation').GetValueAsUnsigned()) - return regs.GetPackedRegisterState() - #incase we failed very miserably + if self.current_session_id != GetUniqueSessionID(self.process): + self.thread_cache = {} + self.current_session_id = GetUniqueSessionID(self.process) + + if tid in self.thread_cache.keys(): + thobj = self.version.CreateValueFromExpression(self.thread_cache[tid]['name'], '(struct thread *)' + str(self.thread_cache[tid]['ptr'])) + regs = self.register_set + if thobj == None : + print "FATAL ERROR: Could not find thread with id %d" % tid + regs.ResetRegisterValues() + return regs.GetPackedRegisterState() + + if self.kdp_thread and self.kdp_thread.GetValueAsUnsigned() == thobj.GetValueAsUnsigned(): + regs.ReadRegisterDataFromKDPSavedState(self.kdp_state, self.version) + return regs.GetPackedRegisterState() + if int(PluginValue(thobj).GetChildMemberWithName('kernel_stack').GetValueAsUnsigned()) != 0 : + if self.target_arch == archX86_64 : + # we do have a stack so lets get register information + saved_state_addr = PluginValue(thobj).GetChildMemberWithName('kernel_stack').GetValueAsUnsigned() + self.kernel_stack_size - self.kernel_context_size + regs.ReadRegisterDataFromKernelStack(saved_state_addr, self.version) + return regs.GetPackedRegisterState() + elif self.target_arch in archARMv7_family and int(PluginValue(thobj).GetChildMemberWithName('machine').GetChildMemberWithName('kstackptr').GetValueAsUnsigned()) != 0: + #we have stack on the machine.kstackptr. + saved_state_addr = PluginValue(thobj).GetChildMemberWithName('machine').GetChildMemberWithName('kstackptr').GetValueAsUnsigned() + regs.ReadRegisterDataFromKernelStack(saved_state_addr, self.version) + return regs.GetPackedRegisterState() + elif self.target_arch == archARMv8 and int(PluginValue(thobj).GetChildMemberWithName('machine').GetChildMemberWithName('kstackptr').GetValueAsUnsigned()) != 0: + saved_state_addr = PluginValue(thobj).GetChildMemberWithName('machine').GetChildMemberWithName('kstackptr').GetValueAsUnsigned() + arm_ctx = PluginValue(self.version.CreateValueFromExpression(None, '(struct arm_context *) ' + str(saved_state_addr))) + ss_64_addr = arm_ctx.GetChildMemberWithName('ss').GetChildMemberWithName('uss').GetChildMemberWithName('ss_64').GetLoadAddress() + regs.ReadRegisterDataFromKernelStack(ss_64_addr, self.version) + return regs.GetPackedRegisterState() + elif self.target_arch == archX86_64 or self.target_arch in archARMv7_family or self.target_arch == archARMv8: + regs.ReadRegisterDataFromContinuation( PluginValue(thobj).GetChildMemberWithName('continuation').GetValueAsUnsigned()) + return regs.GetPackedRegisterState() + #incase we failed very miserably except KeyboardInterrupt, ke: - print "OS Plugin Interrupted during thread register load. \nWARNING:Thread registers and backtraces may not be accurate. for tid = %d" % tid + print "OS Plugin Interrupted during thread register load. \nWARNING:Thread registers and backtraces may not be accurate. for tid = %d" % tid regs.ResetRegisterValues() print "FATAL ERROR: Failed to get register state for thread id 0x%x " % tid print thobj return regs.GetPackedRegisterState() - + diff --git a/tools/lldbmacros/core/standard.py b/tools/lldbmacros/core/standard.py index 547c49c21..70039d2da 100644 --- a/tools/lldbmacros/core/standard.py +++ b/tools/lldbmacros/core/standard.py @@ -107,6 +107,7 @@ class CommandOutput(object): self.fname=os.path.normpath(os.path.expanduser(a.strip())) self.fhandle=open(self.fname,"w") print "saving results in file ",str(a) + self.fhandle.write("(lldb)%s \n" % " ".join(cmdargs)) elif o == "-s" and len(a) > 0: self.reg = re.compile(a.strip(),re.MULTILINE|re.DOTALL) self.FILTER=True diff --git a/tools/lldbmacros/core/syntax_checker.py b/tools/lldbmacros/core/syntax_checker.py index 223b1e988..f9a7142b5 100755 --- a/tools/lldbmacros/core/syntax_checker.py +++ b/tools/lldbmacros/core/syntax_checker.py @@ -17,12 +17,12 @@ tabs_search_rex = re.compile("^\s*\t+",re.MULTILINE|re.DOTALL) if __name__ == "__main__": if len(sys.argv) < 2: - print "Error: Unknown arguments" + print >>sys.stderr, "Error: Unknown arguments" print helpdoc sys.exit(1) for fname in sys.argv[1:]: if not os.path.exists(fname): - print "Error: Cannot recognize %s as a file" % fname + print >>sys.stderr, "Error: Cannot recognize %s as a file" % fname sys.exit(1) if fname.split('.')[-1] != 'py': print "Note: %s is not a valid python file. Skipping." % fname @@ -34,17 +34,17 @@ if __name__ == "__main__": for linedata in strdata: lineno += 1 if len(tabs_search_rex.findall(linedata)) > 0 : - print "Error: Found a TAB character at %s:%d" % (fname, lineno) + print >>sys.stderr, "Error: Found a TAB character at %s:%d" % (fname, lineno) tab_check_status = False if tab_check_status == False: - print "Error: Syntax check failed. Please fix the errors and try again." + print >>sys.stderr, "Error: Syntax check failed. Please fix the errors and try again." sys.exit(1) #now check for error in compilation try: compile_result = py_compile.compile(fname, cfile="/dev/null", doraise=True) except py_compile.PyCompileError as exc: print str(exc) - print "Error: Compilation failed. Please fix the errors and try again." + print >>sys.stderr, "Error: Compilation failed. Please fix the errors and try again." sys.exit(1) print "Success: Checked %s. No syntax errors found." % fname sys.exit(0) diff --git a/tools/lldbmacros/ioreg.py b/tools/lldbmacros/ioreg.py index 930dcc1ab..44f3aaf9c 100644 --- a/tools/lldbmacros/ioreg.py +++ b/tools/lldbmacros/ioreg.py @@ -771,3 +771,100 @@ def WriteIOPortInt(addr, numbytes, value, lcpu): else: print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr) +@lldb_command('showinterruptcounts') +def showinterruptcounts(cmd_args=None): + """ Shows event source based interrupt counts by nub name and interrupt index. + Does not cover interrupts that are not event source based. Will report 0 + if interrupt accounting is disabled. + """ + + header_format = "{0: <20s} {1: >5s} {2: >20s}" + content_format = "{0: <20s} {1: >5d} {2: >20d}" + + print header_format.format("Name", "Index", "Count") + + for i in kern.interrupt_stats: + owner = Cast(i.owner, 'IOInterruptEventSource *') + nub = Cast(owner.provider, 'IORegistryEntry *') + name = None + + # To uniquely identify an interrupt, we need the nub name and the index. The index + # is stored with the stats object, but we need to retrieve the name. + + registryTable = nub.fRegistryTable + propertyTable = nub.fPropertyTable + + name = LookupKeyInOSDict(registryTable, kern.globals.gIOServicePlane.nameKey) + if name is None: + name = LookupKeyInOSDict(registryTable, kern.globals.gIONameKey) + if name is None: + name = LookupKeyInOSDict(propertyTable, kern.globals.gIOClassKey) + + if name is None: + nub_name = "Unknown" + else: + nub_name = GetString(Cast(name, 'OSString *')) + + # We now have everything we need; spew the requested data. + + interrupt_index = i.interruptIndex + first_level_count = i.interruptStatistics[0] + + print content_format.format(nub_name, interrupt_index, first_level_count) + + return True + +@lldb_command('showinterruptstats') +def showinterruptstats(cmd_args=None): + """ Shows event source based interrupt statistics by nub name and interrupt index. + Does not cover interrupts that are not event source based. Will report 0 + if interrupt accounting is disabled, or if specific statistics are disabled. + Time is reported in ticks of mach_absolute_time. Statistics are: + + Interrupt Count: Number of times the interrupt context handler was run + Interrupt Time: Total time spent in the interrupt context handler (if any) + Workloop Count: Number of times the kernel context handler was run + Workloop CPU Time: Total CPU time spent running the kernel context handler + Workloop Time: Total time spent running the kernel context handler + """ + + header_format = "{0: <20s} {1: >5s} {2: >20s} {3: >20s} {4: >20s} {5: >20s} {6: >20s}" + content_format = "{0: <20s} {1: >5d} {2: >20d} {3: >20d} {4: >20d} {5: >20d} {6: >20d}" + + print header_format.format("Name", "Index", "Interrupt Count", "Interrupt Time", "Workloop Count", "Workloop CPU Time", "Workloop Time") + + for i in kern.interrupt_stats: + owner = Cast(i.owner, 'IOInterruptEventSource *') + nub = Cast(owner.provider, 'IORegistryEntry *') + name = None + + # To uniquely identify an interrupt, we need the nub name and the index. The index + # is stored with the stats object, but we need to retrieve the name. + + registryTable = nub.fRegistryTable + propertyTable = nub.fPropertyTable + + name = LookupKeyInOSDict(registryTable, kern.globals.gIOServicePlane.nameKey) + if name is None: + name = LookupKeyInOSDict(registryTable, kern.globals.gIONameKey) + if name is None: + name = LookupKeyInOSDict(propertyTable, kern.globals.gIOClassKey) + + if name is None: + nub_name = "Unknown" + else: + nub_name = GetString(Cast(name, 'OSString *')) + + # We now have everything we need; spew the requested data. + + interrupt_index = i.interruptIndex + first_level_count = i.interruptStatistics[0] + second_level_count = i.interruptStatistics[1] + first_level_time = i.interruptStatistics[2] + second_level_cpu_time = i.interruptStatistics[3] + second_level_system_time = i.interruptStatistics[4] + + print content_format.format(nub_name, interrupt_index, first_level_count, first_level_time, second_level_count, second_level_cpu_time, second_level_system_time) + + return True + diff --git a/tools/lldbmacros/ipc.py b/tools/lldbmacros/ipc.py index db09a4b46..ec783959f 100644 --- a/tools/lldbmacros/ipc.py +++ b/tools/lldbmacros/ipc.py @@ -5,6 +5,8 @@ from xnu import * import sys, shlex from utils import * from process import * +from atm import * +from bank import * import xnudefines @header("{0: <20s} {1: <6s} {2: <6s} {3: <10s} {4: <15s}".format("task", "pid", '#acts', "tablesize", "command")) @@ -78,13 +80,13 @@ def GetPortDestProc(portp): destprocp = Cast(tsk.bsd_info, 'struct proc *') out_str = "{0:s}({1: to find the error. \n\n" # EndMacro: showallrights @@ -564,7 +581,7 @@ def ShowMQueue(cmd_args=None): mqueue = kern.GetValueFromAddress(cmd_args[0], 'struct ipc_mqueue *') wq_type = mqueue.data.pset.set_queue.wqs_wait_queue.wq_type if int(wq_type) == 3: - psetoff = getfieldoffset('struct ipc_pset *', 'ips_messages') + psetoff = getfieldoffset('struct ipc_pset', 'ips_messages') pset = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(psetoff) print GetPortSetSummary.header + GetPortSetSummary(kern.GetValueFromAddress(pset, 'struct ipc_pset *')) if int(wq_type) == 2: @@ -573,6 +590,20 @@ def ShowMQueue(cmd_args=None): print GetPortSummary.header + GetPortSummary(kern.GetValueFromAddress(port, 'struct ipc_port *')) # EndMacro: showmqueue +# Macro: showkmsg: +@lldb_command('showkmsg') +def ShowKMSG(cmd_args=[]): + """ Show detail information about a structure + Usage: (lldb) showkmsg + """ + if not cmd_args: + raise ArgumentError('Invalid arguments') + kmsg = kern.GetValueFromAddress(cmd_args[0], 'ipc_kmsg_t') + print GetKMsgSummary.header + print GetKMsgSummary(kmsg) + +# EndMacro: showkmsg + # Macro: showpset @lldb_command('showpset') def ShowPSet(cmd_args=None): @@ -587,3 +618,443 @@ def ShowPSet(cmd_args=None): print GetPortSetSummary.header + GetPortSetSummary(kern.GetValueFromAddress(cmd_args[0], 'ipc_pset *')) # EndMacro: showpset +# IPC importance inheritance related macros. + +@lldb_command('showalliits') +def ShowAllIITs(cmd_args=[], cmd_options={}): + """ Development only macro. Show list of all iits allocated in the system. """ + try: + iit_queue = kern.globals.global_iit_alloc_queue + except ValueError: + print "This debug macro is only available in development or debug kernels" + return + + print GetIPCImportantTaskSummary.header + for iit in IterateQueue(iit_queue, 'struct ipc_importance_task *', 'iit_allocation'): + print GetIPCImportantTaskSummary(iit) + return + +@header("{: <18s} {: <3s} {: <18s} {: <20s} {: <18s} {: <8s}".format("ipc_imp_inherit", "don", "to_task", "proc_name", "from_elem", "depth")) +@lldb_type_summary(['ipc_importance_inherit *', 'ipc_importance_inherit_t']) +def GetIPCImportanceInheritSummary(iii): + """ describes iii object of type ipc_importance_inherit_t * """ + out_str = "" + fmt = "{o: <#018x} {don: <3s} {o.iii_to_task.iit_task: <#018x} {task_name: <20s} {o.iii_from_elem: <#018x} {o.iii_depth: <#08x}" + donating_str = "" + if unsigned(iii.iii_donating): + donating_str = "DON" + taskname = GetProcNameForTask(iii.iii_to_task.iit_task) + if hasattr(iii.iii_to_task, 'iit_bsd_pid'): + taskname = "({:d}) {:s}".format(iii.iii_to_task.iit_bsd_pid, iii.iii_to_task.iit_procname) + out_str += fmt.format(o=iii, task_name = taskname, don=donating_str) + return out_str + +@static_var('recursion_count', 0) +@header("{: <18s} {: <4s} {: <8s} {: <8s} {: <18s} {: <18s}".format("iie", "type", "refs", "made", "#kmsgs", "#inherits")) +@lldb_type_summary(['ipc_importance_elem *']) +def GetIPCImportanceElemSummary(iie): + """ describes an ipc_importance_elem * object """ + + if GetIPCImportanceElemSummary.recursion_count > 500: + GetIPCImportanceElemSummary.recursion_count = 0 + return "Recursion of 500 reached" + + out_str = '' + fmt = "{: <#018x} {: <4s} {: <8d} {: <8d} {: <#018x} {: <#018x}" + type_str = 'TASK' + if unsigned(iie.iie_bits) & 0x80000000: + type_str = "INH" + refs = unsigned(iie.iie_bits) & 0x7fffffff + made_refs = unsigned(iie.iie_made) + kmsg_count = sum(1 for i in IterateQueue(iie.iie_kmsgs, 'struct ipc_kmsg *', 'ikm_inheritance')) + inherit_count = sum(1 for i in IterateQueue(iie.iie_inherits, 'struct ipc_importance_inherit *', 'iii_inheritance')) + out_str += fmt.format(iie, type_str, refs, made_refs, kmsg_count, inherit_count) + if config['verbosity'] > vHUMAN: + if kmsg_count > 0: + out_str += "\n\t"+ GetKMsgSummary.header + for k in IterateQueue(iie.iie_kmsgs, 'struct ipc_kmsg *', 'ikm_inheritance'): + out_str += "\t" + "{: <#018x}".format(k.ikm_header.msgh_remote_port) + ' ' + GetKMsgSummary(k).lstrip() + out_str += "\n" + if inherit_count > 0: + out_str += "\n\t" + GetIPCImportanceInheritSummary.header + "\n" + for i in IterateQueue(iie.iie_inherits, 'struct ipc_importance_inherit *', 'iii_inheritance'): + out_str += "\t" + GetIPCImportanceInheritSummary(i) + "\n" + out_str += "\n" + if type_str == "INH": + iii = Cast(iie, 'struct ipc_importance_inherit *') + out_str += "Inherit from: " + GetIPCImportanceElemSummary(iii.iii_from_elem) + + return out_str + +@header("{: <18s} {: <18s} {: <20s}".format("iit", "task", "name")) +@lldb_type_summary(['ipc_importance_task *']) +def GetIPCImportantTaskSummary(iit): + """ iit is a ipc_importance_task value object. + """ + fmt = "{: <#018x} {: <#018x} {: <20s}" + out_str='' + pname = GetProcNameForTask(iit.iit_task) + if hasattr(iit, 'iit_bsd_pid'): + pname = "({:d}) {:s}".format(iit.iit_bsd_pid, iit.iit_procname) + out_str += fmt.format(iit, iit.iit_task, pname) + return out_str + +@lldb_command('showallimportancetasks') +def ShowIPCImportanceTasks(cmd_args=[], cmd_options={}): + """ display a list of all tasks with ipc importance information. + Usage: (lldb) showallimportancetasks + Tip: add "-v" to see detailed information on each kmsg or inherit elems + """ + print ' ' + GetIPCImportantTaskSummary.header + ' ' + GetIPCImportanceElemSummary.header + for t in kern.tasks: + s = "" + if unsigned(t.task_imp_base): + s += ' ' + GetIPCImportantTaskSummary(t.task_imp_base) + s += ' ' + GetIPCImportanceElemSummary(addressof(t.task_imp_base.iit_elem)) + print s + +@lldb_command('showipcimportance', '') +def ShowIPCImportance(cmd_args=[], cmd_options={}): + """ Describe an importance from argument. + Usage: (lldb) showimportance + """ + if not cmd_args: + raise ArgumentError("Please provide valid argument") + + elem = kern.GetValueFromAddress(cmd_args[0], 'ipc_importance_elem_t') + print GetIPCImportanceElemSummary.header + print GetIPCImportanceElemSummary(elem) + +@header("{: <18s} {: <10s} {: <18s} {: <18s} {: <8s} {: <5s} {: <5s} {: <5s}".format("ivac", "refs", "port", "tbl", "tblsize", "index", "Grow", "freelist")) +@lldb_type_summary(['ipc_voucher_attr_control *', 'ipc_voucher_attr_control_t']) +def GetIPCVoucherAttrControlSummary(ivac): + """ describes a voucher attribute control settings """ + out_str = "" + fmt = "{c: <#018x} {c.ivac_refs: <10d} {c.ivac_port: <#018x} {c.ivac_table: <#018x} {c.ivac_table_size: <8d} {c.ivac_key_index: <5d} {growing: <5s} {c.ivac_freelist: <5d}" + growing_str = "" + + if unsigned(ivac) == 0: + return "{: <#018x}".format(ivac) + + if unsigned(ivac.ivac_is_growing): + growing_str = "Y" + out_str += fmt.format(c=ivac, growing = growing_str) + return out_str + +@lldb_command('showivac','') +def ShowIPCVoucherAttributeControl(cmd_args=[], cmd_options={}): + """ Show summary of voucher attribute contols. + Usage: (lldb) showivac + """ + if not cmd_args: + raise ArgumentError("Please provide correct arguments.") + ivac = kern.GetValueFromAddress(cmd_args[0], 'ipc_voucher_attr_control_t') + print GetIPCVoucherAttrControlSummary.header + print GetIPCVoucherAttrControlSummary(ivac) + if config['verbosity'] > vHUMAN: + cur_entry_index = 0 + last_entry_index = unsigned(ivac.ivac_table_size) + print "index " + GetIPCVoucherAttributeEntrySummary.header + while cur_entry_index < last_entry_index: + print "{: <5d} ".format(cur_entry_index) + GetIPCVoucherAttributeEntrySummary(addressof(ivac.ivac_table[cur_entry_index])) + cur_entry_index += 1 + + + + +@header("{: <18s} {: <30s} {: <30s} {: <30s} {: <30s} {: <30s}".format("ivam", "get_value_fn", "extract_fn", "release_value_fn", "command_fn", "release_fn")) +@lldb_type_summary(['ipc_voucher_attr_manager *', 'ipc_voucher_attr_manager_t']) +def GetIPCVoucherAttrManagerSummary(ivam): + """ describes a voucher attribute manager settings """ + out_str = "" + fmt = "{: <#018x} {: <30s} {: <30s} {: <30s} {: <30s} {: <30s}" + + if unsigned(ivam) == 0 : + return "{: <#018x}".format(ivam) + + get_value_fn = kern.Symbolicate(unsigned(ivam.ivam_get_value)) + extract_fn = kern.Symbolicate(unsigned(ivam.ivam_extract_content)) + release_value_fn = kern.Symbolicate(unsigned(ivam.ivam_release_value)) + command_fn = kern.Symbolicate(unsigned(ivam.ivam_command)) + release_fn = kern.Symbolicate(unsigned(ivam.ivam_release)) + out_str += fmt.format(ivam, get_value_fn, extract_fn, release_value_fn, command_fn, release_fn) + return out_str + + + +@header("{: <18s} {: <10s} {:s} {:s}".format("ivgte", "key", GetIPCVoucherAttrControlSummary.header.strip(), GetIPCVoucherAttrManagerSummary.header.strip())) +@lldb_type_summary(['ipc_voucher_global_table_element *', 'ipc_voucher_global_table_element_t']) +def GetIPCVoucherGlobalTableElementSummary(ivgte): + """ describes a ipc_voucher_global_table_element object """ + out_str = "" + fmt = "{g: <#018x} {g.ivgte_key: <10d} {ctrl_s:s} {mgr_s:s}" + out_str += fmt.format(g=ivgte, ctrl_s=GetIPCVoucherAttrControlSummary(ivgte.ivgte_control), mgr_s=GetIPCVoucherAttrManagerSummary(ivgte.ivgte_manager)) + return out_str + +@lldb_command('showglobalvouchertable', '') +def ShowGlobalVoucherTable(cmd_args=[], cmd_options={}): + """ show detailed information of all voucher attribute managers registered with vouchers system + Usage: (lldb) showglobalvouchertable + """ + entry_size = sizeof(kern.globals.iv_global_table[0]) + elems = sizeof(kern.globals.iv_global_table) / entry_size + print GetIPCVoucherGlobalTableElementSummary.header + for i in range(elems): + elt = addressof(kern.globals.iv_global_table[i]) + print GetIPCVoucherGlobalTableElementSummary(elt) + +# Type summaries for Bag of Bits. + +@lldb_type_summary(['user_data_value_element', 'user_data_element_t']) +@header("{0: <20s} {1: <16s} {2: <20s} {3: <20s} {4: <16s} {5: <20s}".format("user_data_ve", "maderefs", "checksum", "hash value", "size", "data")) +def GetBagofBitsElementSummary(data_element): + """ Summarizes the Bag of Bits element + params: data_element = value of the object of type user_data_value_element_t + returns: String with summary of the type. + """ + format_str = "{0: <#020x} {1: <16d} {2: <#020x} {3: <#020x} {4: <16d}" + out_string = format_str.format(data_element, unsigned(data_element.e_made), data_element.e_sum, data_element.e_hash, unsigned(data_element.e_size)) + out_string += " 0x" + + for i in range(0, (unsigned(data_element.e_size) - 1)): + out_string += "{:02x}".format(int(data_element.e_data[i])) + return out_string + +def GetIPCHandleSummary(handle_ptr): + """ converts a handle value inside a voucher attribute table to ipc element and returns appropriate summary. + params: handle_ptr - uint64 number stored in handle of voucher. + returns: str - string summary of the element held in internal structure + """ + elem = kern.GetValueFromAddress(handle_ptr, 'ipc_importance_elem_t') + if elem.iie_bits & 0x80000000 : + iie = Cast(elem, 'struct ipc_importance_inherit *') + return GetIPCImportanceInheritSummary(iie) + else: + iit = Cast(elem, 'struct ipc_importance_task *') + return GetIPCImportantTaskSummary(iit) + +def GetATMHandleSummary(handle_ptr): + """ Convert a handle value to atm value and returns corresponding summary of its fields. + params: handle_ptr - uint64 number stored in handle of voucher + returns: str - summary of atm value + """ + elem = kern.GetValueFromAddress(handle_ptr, 'atm_value *') + return GetATMValueSummary(elem) + +def GetBankHandleSummary(handle_ptr): + """ converts a handle value inside a voucher attribute table to bank element and returns appropriate summary. + params: handle_ptr - uint64 number stored in handle of voucher. + returns: str - summary of bank element + """ + elem = kern.GetValueFromAddress(handle_ptr, 'bank_element_t') + if elem.be_type & 1 : + ba = Cast(elem, 'struct bank_account *') + return GetBankAccountSummary(ba) + else: + bt = Cast(elem, 'struct bank_task *') + return GetBankTaskSummary(bt) + +def GetBagofBitsHandleSummary(handle_ptr): + """ Convert a handle value to bag of bits value and returns corresponding summary of its fields. + params: handle_ptr - uint64 number stored in handle of voucher + returns: str - summary of bag of bits element + """ + elem = kern.GetValueFromAddress(handle_ptr, 'user_data_element_t') + return GetBagofBitsElementSummary(elem) + +@static_var('attr_managers',{1: GetATMHandleSummary, 2: GetIPCHandleSummary, 3: GetBankHandleSummary, 7: GetBagofBitsHandleSummary}) +def GetHandleSummaryForKey(handle_ptr, key_num): + """ Get a summary of handle pointer from the voucher attribute manager. + For example key 1 -> ATM and it puts atm_value_t in the handle. So summary of it would be atm value and refs etc. + key 2 -> ipc and it puts either ipc_importance_inherit_t or ipc_important_task_t. + key 3 -> Bank and it puts either bank_task_t or bank_account_t. + key 7 -> Bag of Bits and it puts user_data_element_t in handle. So summary of it would be Bag of Bits content and refs etc. + """ + key_num = int(key_num) + if key_num not in GetHandleSummaryForKey.attr_managers: + return "Unknown key %d" % key_num + return GetHandleSummaryForKey.attr_managers[key_num](handle_ptr) + + +@header("{: <18s} {: <18s} {: <10s} {: <4s} {: <18s} {: <18s}".format("ivace", "value_handle", "#refs", "rel?", "maderefs", "next_layer")) +@lldb_type_summary(['ivac_entry *', 'ivac_entry_t']) +def GetIPCVoucherAttributeEntrySummary(ivace, manager_key_num = 0): + """ Get summary for voucher attribute entry. + """ + out_str = "" + fmt = "{e: <#018x} {e.ivace_value: <#018x} {e.ivace_refs: <10d} {release: <4s} {made_refs: <18s} {next_layer: <18s}" + release_str = "" + free_str = "" + made_refs = "" + next_layer = "" + + if unsigned(ivace.ivace_releasing): + release_str = "Y" + if unsigned(ivace.ivace_free): + free_str = 'F' + if unsigned(ivace.ivace_layered): + next_layer = "{: <#018x}".format(ivace.ivace_u.ivaceu_layer) + else: + made_refs = "{: <18d}".format(ivace.ivace_u.ivaceu_made) + + out_str += fmt.format(e=ivace, release=release_str, made_refs=made_refs, next_layer=next_layer) + if config['verbosity'] > vHUMAN and manager_key_num > 0: + out_str += " " + GetHandleSummaryForKey(unsigned(ivace.ivace_value), manager_key_num) + if config['verbosity'] > vHUMAN : + out_str += ' {: <2s} {: <4d} {: <4d}'.format(free_str, ivace.ivace_next, ivace.ivace_index) + return out_str + +@lldb_command('showivacfreelist','') +def ShowIVACFreeList(cmd_args=[], cmd_options={}): + """ Walk the free list and print every entry in the list. + usage: (lldb) showivacfreelist + """ + if not cmd_args: + raise ArgumentError('Please provide ') + ivac = kern.GetValueFromAddress(cmd_args[0], 'ipc_voucher_attr_control_t') + print GetIPCVoucherAttrControlSummary.header + print GetIPCVoucherAttrControlSummary(ivac) + if unsigned(ivac.ivac_freelist) == 0: + print "ivac table is full" + return + print "index " + GetIPCVoucherAttributeEntrySummary.header + next_free = unsigned(ivac.ivac_freelist) + while next_free != 0: + print "{: <5d} ".format(next_free) + GetIPCVoucherAttributeEntrySummary(addressof(ivac.ivac_table[next_free])) + next_free = unsigned(ivac.ivac_table[next_free].ivace_next) + + + +@header('{: <18s} {: <8s} {: <18s} {: <18s} {: <18s} {: <18s} {: <18s}'.format("ipc_voucher", "refs", "checksum", "hash", "tbl_size", "table", "voucher_port")) +@lldb_type_summary(['ipc_voucher *', 'ipc_voucher_t']) +def GetIPCVoucherSummary(voucher, show_entries=False): + """ describe a voucher from its ipc_voucher * object """ + out_str = "" + fmt = "{v: <#018x} {v.iv_refs: <8d} {v.iv_sum: <#018x} {v.iv_hash: <#018x} {v.iv_table_size: <#018x} {v.iv_table: <#018x} {v.iv_port: <#018x}" + out_str += fmt.format(v = voucher) + entries_str = '' + if show_entries or config['verbosity'] > vHUMAN: + elems = unsigned(voucher.iv_table_size) + entries_header_str = "\n\t" + "{: <5s} {: <3s} {: <16s} {: <30s}".format("index", "key", "value_index", "manager") + " " + GetIPCVoucherAttributeEntrySummary.header + fmt = "{: <5d} {: <3d} {: <16d} {: <30s}" + for i in range(elems): + voucher_entry_index = unsigned(voucher.iv_inline_table[i]) + if voucher_entry_index: + s = fmt.format(i, GetVoucherManagerKeyForIndex(i), voucher_entry_index, GetVoucherAttributeManagerNameForIndex(i)) + e = GetVoucherValueHandleFromVoucherForIndex(voucher, i) + if e is not None: + s += " " + GetIPCVoucherAttributeEntrySummary(addressof(e), GetVoucherManagerKeyForIndex(i) ) + if entries_header_str : + entries_str = entries_header_str + entries_header_str = '' + entries_str += "\n\t" + s + if not entries_header_str: + entries_str += "\n\t" + out_str += entries_str + return out_str + +def GetVoucherManagerKeyForIndex(idx): + """ Returns key number for index based on global table. Will raise index error if value is incorrect + """ + return unsigned(kern.globals.iv_global_table[idx].ivgte_key) + +def GetVoucherAttributeManagerForKey(k): + """ Walks through the iv_global_table and finds the attribute manager name + params: k - int key number of the manager + return: cvalue - the attribute manager object. + None - if not found + """ + retval = None + entry_size = sizeof(kern.globals.iv_global_table[0]) + elems = sizeof(kern.globals.iv_global_table) / entry_size + for i in range(elems): + elt = addressof(kern.globals.iv_global_table[i]) + if k == unsigned(elt.ivgte_key): + retval = elt.ivgte_manager + break + return retval + +def GetVoucherAttributeControllerForKey(k): + """ Walks through the iv_global_table and finds the attribute controller + params: k - int key number of the manager + return: cvalue - the attribute controller object. + None - if not found + """ + retval = None + entry_size = sizeof(kern.globals.iv_global_table[0]) + elems = sizeof(kern.globals.iv_global_table) / entry_size + for i in range(elems): + elt = addressof(kern.globals.iv_global_table[i]) + if k == unsigned(elt.ivgte_key): + retval = elt.ivgte_control + break + return retval + + +def GetVoucherAttributeManagerName(ivam): + """ find the name of the ivam object + param: ivam - cvalue object of type ipc_voucher_attr_manager_t + returns: str - name of the manager + """ + return kern.Symbolicate(unsigned(ivam)) + +def GetVoucherAttributeManagerNameForIndex(idx): + """ get voucher attribute manager name for index + return: str - name of the attribute manager object + """ + return GetVoucherAttributeManagerName(GetVoucherAttributeManagerForKey(GetVoucherManagerKeyForIndex(idx))) + +def GetVoucherValueHandleFromVoucherForIndex(voucher, idx): + """ traverse the voucher attrs and get value_handle in the voucher attr controls table + params: + voucher - cvalue object of type ipc_voucher_t + idx - int index in the entries for which you wish to get actual handle for + returns: cvalue object of type ivac_entry_t + None if no handle found. + """ + manager_key = GetVoucherManagerKeyForIndex(idx) + voucher_num_elems = unsigned(voucher.iv_table_size) + if idx >= voucher_num_elems: + debuglog("idx %d is out of range max: %d" % (idx, voucher_num_elems)) + return None + voucher_entry_value = unsigned(voucher.iv_inline_table[idx]) + debuglog("manager_key %d" % manager_key) + ivac = GetVoucherAttributeControllerForKey(manager_key) + if ivac is None or unsigned(ivac) == 0: + debuglog("No voucher attribute controller for idx %d" % idx) + return None + + ivac = kern.GetValueFromAddress(unsigned(ivac), 'ipc_voucher_attr_control_t') # ??? No idea why lldb does not addressof directly + ivace_table = ivac.ivac_table + if voucher_entry_value >= unsigned(ivac.ivac_table_size): + print "Failed to get ivace for value %d in table of size %d" % (voucher_entry_value, unsigned(ivac.ivac_table_size)) + return None + return ivace_table[voucher_entry_value] + + + +@lldb_command('showallvouchers') +def ShowAllVouchers(cmd_args=[], cmd_options={}): + """ Display a list of all vouchers in the global voucher hash table + Usage: (lldb) showallvouchers + """ + iv_hash_table = kern.globals.ivht_bucket + num_buckets = sizeof(kern.globals.ivht_bucket) / sizeof(kern.globals.ivht_bucket[0]) + print GetIPCVoucherSummary.header + for i in range(num_buckets): + for v in IterateQueue(iv_hash_table[i], 'ipc_voucher_t', 'iv_hash_link'): + print GetIPCVoucherSummary(v) + +@lldb_command('showvoucher', '') +def ShowVoucher(cmd_args=[], cmd_options={}): + """ Describe a voucher from argument. + Usage: (lldb) showvoucher + """ + if not cmd_args: + raise ArgumentError("Please provide valid argument") + + voucher = kern.GetValueFromAddress(cmd_args[0], 'ipc_voucher_t') + print GetIPCVoucherSummary.header + print GetIPCVoucherSummary(voucher, show_entries=True) + + diff --git a/tools/lldbmacros/ipcimportancedetail.py b/tools/lldbmacros/ipcimportancedetail.py new file mode 100644 index 000000000..ca81e2eac --- /dev/null +++ b/tools/lldbmacros/ipcimportancedetail.py @@ -0,0 +1,135 @@ +from xnu import * + +""" +Recursive ipc importance chain viewing macro. This file incorporates complex python datastructures +interspersed with cvalue based objects from lldb interface. +""" + +class TaskNode(object): + def __init__(self, task_kobj): + self.task = task_kobj + self.importance_refs = [] + + @staticmethod + def GetHeaderString(): + return GetTaskSummary.header + " " + GetProcSummary.header + " {: <18s}".format("task_imp_base") + + def __str__(self): + out_arr = [] + if unsigned(self.task) != 0: + out_arr.append(GetTaskSummary(self.task) + " " + GetProcSummary(Cast(self.task.bsd_info, 'proc *')) + " {: <#018x}".format(self.task.task_imp_base) ) + else: + out_arr.append("Unknown task.") + #out_arr.append("TASK: {: <#018x} {: + """ + print ' ' + GetIPCImportantTaskSummary.header + ' ' + GetIPCImportanceElemSummary.header + for task in kern.tasks: + if unsigned(task.task_imp_base): + print " " + GetIPCImportantTaskSummary(task.task_imp_base) + ' ' + GetIPCImportanceElemSummary(addressof(task.task_imp_base.iit_elem)) + base_node = IIINode(Cast(task.task_imp_base, 'ipc_importance_inherit *'), None) + GetIIIListFromIIE(task.task_imp_base.iit_elem, base_node) + print base_node.GetChildSummaries(prefix="\t\t") + + print "\n\n ======================== TASK REVERSE CHAIN OF IMPORTANCES =========================" + print TaskNode.GetHeaderString() + for k in AllTasksCollection.keys(): + t = AllTasksCollection[k] + print "\n" + str(t) + diff --git a/tools/lldbmacros/mbufs.py b/tools/lldbmacros/mbufs.py index e379fda7d..f6ab3002a 100644 --- a/tools/lldbmacros/mbufs.py +++ b/tools/lldbmacros/mbufs.py @@ -1,6 +1,6 @@ """ Please make sure you read the README COMPLETELY BEFORE reading anything below. - It is very critical that you read coding guidelines in Section E in README file. + It is very critical that you read coding guidelines in Section E in README file. """ from xnu import * @@ -12,7 +12,7 @@ import xnudefines # Macro: mbuf_stat @lldb_command('mbuf_stat') def MBufStat(cmd_args=None): - """ Print extended mbuf allocator statistics. + """ Print extended mbuf allocator statistics. """ hdr_format = "{0: <16s} {1: >8s} {2: >8s} {3: ^16s} {4: >8s} {5: >12s} {6: >8s} {7: >8s} {8: >8s}" print hdr_format.format('class', 'total', 'cached', 'uncached', 'inuse', 'failed', 'waiter', 'notified', 'purge') @@ -22,7 +22,7 @@ def MBufStat(cmd_args=None): num_items = sizeof(kern.globals.mbuf_table) / sizeof(kern.globals.mbuf_table[0]) ncpus = int(kern.globals.ncpu) for i in range(num_items): - mbuf = kern.globals.mbuf_table[i] + mbuf = kern.globals.mbuf_table[i] mcs = Cast(mbuf.mtbl_stats, 'mb_class_stat_t *') mc = mbuf.mtbl_cache total = 0 @@ -41,15 +41,41 @@ def MBufStat(cmd_args=None): mcs.mbcl_notified, mcs.mbcl_purge_cnt ) # EndMacro: mbuf_stat - + +# Macro: mbuf_walk_mleak_traces +@lldb_command('mbuf_walk_mleak_traces') +def MbufWalkMleakTraces(cmd_args=None): + """ Print mleak_traces + """ + i = 0 + while (i<256): + trace = kern.globals.mleak_traces[i] + out_string = "" + if (trace.allocs != 0): + print "Index: " + str(i) + out_string += ":" + str(trace.allocs) + " outstanding allocs\n" + out_string += str(trace.hitcount) + " hitcount\n" + out_string += str(trace.collisions) + " collisions\n" + out_string += "Backtrace saved " + str(trace.depth) + " deep\n" + if (trace.depth != 0): + cnt = 0 + while (cnt < trace.depth): + out_string += str(cnt + 1) + ": " + out_string += GetPc(trace.addr[cnt]) + out_string += "\n" + cnt += 1 + print out_string + i +=1 +# EndMacro: mbuf_walk_mleak_traces + # Macro: mbuf_walkpkt @lldb_command('mbuf_walkpkt') def MbufWalkPacket(cmd_args=None): """ Walk the mbuf packet chain (m_nextpkt) """ - if (cmd_args == None or len(cmd_args) == 0): - print "Missing argument 0 in user function." - return + if not cmd_args: + raise ArgumentError("Missing argument 0 in user function.") + mp = kern.GetValueFromAddress(cmd_args[0], 'mbuf *') cnt = 1 tot = 0 @@ -92,9 +118,9 @@ def MbufWalk(cmd_args=None): def MbufBuf2Slab(cmd_args=None): """ Given an mbuf object, find its corresponding slab address """ - if (cmd_args == None or len(cmd_args) == 0): - print "Missing argument 0 in user function." - return + if not cmd_args: + raise ArgumentError("Missing argument 0 in user function.") + m = kern.GetValueFromAddress(cmd_args[0], 'mbuf *') gix = (m - Cast(kern.globals.mbutl, 'char *')) >> MBSHIFT slabstbl = kern.globals.slabstbl @@ -123,7 +149,11 @@ def MbufBuf2Mca(cmd_args=None): def MbufSlabs(cmd_args=None): """ Print all slabs in the group """ + out_string = "" + if not cmd_args: + raise ArgumentError("Invalid arguments passed.") + slg = kern.GetValueFromAddress(cmd_args[0], 'mcl_slabg_t *') x = 0 @@ -146,9 +176,10 @@ def MbufSlabs(cmd_args=None): if (kern.globals.mclaudit != 0): ix = (obj - Cast(kern.globals.mbutl, 'char *')) >> 12 clbase = mbutl + (sizeof(dereference(mbutl)) * ix) - mclidx = (obj - clbase) >> 8 + mclidx = (obj - clbase) >> 8 mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)] - ts = mca.mca_tstamp + trn = (mca.mca_next_trn + kern.globals.mca_trn_max - 1) % kern.globals.mca_trn_max + ts = mca.mca_trns[trn].mca_tstamp out_string += slabs_string_format.format((x + 1), sl, sl.sl_next, obj, hex(mca), int(ts), int(sl.sl_class), int(sl.sl_refcnt), int(sl.sl_chunks), int(sl.sl_len), hex(sl.sl_flags)) @@ -175,9 +206,10 @@ def MbufSlabs(cmd_args=None): if (kern.globals.mclaudit != 0): ix = (obj - Cast(kern.globals.mbutl, 'char *')) >> 12 clbase = mbutl + (sizeof(dereference(mbutl)) * ix) - mclidx = (obj - clbase) >> 8 + mclidx = (obj - clbase) >> 8 mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)] - ts = mca.mca_tstamp + trn = (mca.mca_next_trn + kern.globals.mca_trn_max - 1) % kern.globals.mca_trn_max + ts = mca.mca_trns[trn].mca_tstamp if (kern.ptrsize == 8): out_string += " " + hex(obj) + " " + hex(mca) + " " + str(unsigned(ts)) + "\n" @@ -228,7 +260,7 @@ def GetMbufBuf2Mca(m): #mbutl = Cast(kern.globals.mbutl, 'union mbigcluster *') mbutl = cast(kern.globals.mbutl, 'union mbigcluster *') clbase = mbutl + (sizeof(dereference(mbutl)) * ix) - mclidx = (m - clbase) >> 8 + mclidx = (m - clbase) >> 8 mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)] return str(mca) @@ -236,7 +268,7 @@ def GetMbufWalkAllSlabs(show_a, show_f, show_tr): out_string = "" kern.globals.slabstbl[0] - + x = 0 total = 0 total_a = 0 @@ -266,13 +298,13 @@ def GetMbufWalkAllSlabs(show_a, show_f, show_tr): mbutl = cast(kern.globals.mbutl, 'union mbigcluster *') ix = (base - mbutl) >> 12 clbase = mbutl + (sizeof(dereference(mbutl)) * ix) - mclidx = (base - clbase) >> 8 + mclidx = (base - clbase) >> 8 mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)] first = 1 while ((Cast(mca, 'int') != 0) and (unsigned(mca.mca_addr) != 0)): printmca = 0 - if (mca.mca_uflags & (MB_INUSE|MB_COMP_INUSE)): + if (mca.mca_uflags & (MB_INUSE | MB_COMP_INUSE)): total_a = total_a + 1 printmca = show_a else: @@ -302,7 +334,7 @@ def GetMbufWalkAllSlabs(show_a, show_f, show_tr): out_string += GetMbufMcaCtype(mca, 0) - if (mca.mca_uflags & (MB_INUSE|MB_COMP_INUSE)): + if (mca.mca_uflags & (MB_INUSE | MB_COMP_INUSE)): out_string += "active " else: out_string += " freed " @@ -312,14 +344,18 @@ def GetMbufWalkAllSlabs(show_a, show_f, show_tr): total = total + 1 if (show_tr != 0): - out_string += "Recent transaction for this buffer (thread: 0x" + hex(mca.mca_thread) + "):\n" + idx = int(show_tr) + trn = (mca.mca_next_trn + idx - 1) % unsigned(kern.globals.mca_trn_max) + out_string += "Transaction " + str(int(trn)) + " at " + str(int(mca.mca_trns[int(trn)].mca_tstamp)) + " by thread: 0x" + str(hex(mca.mca_trns[int(trn)].mca_thread)) + ":\n" cnt = 0 - while (cnt < mca.mca_depth): - kgm_pc = mca.mca_stack[int(cnt)] + while (cnt < mca.mca_trns[int(trn)].mca_depth): + kgm_pc = mca.mca_trns[int(trn)].mca_stack[int(cnt)] out_string += str(int(cnt) + 1) + " " - out_string += GetPc(kgm_pc) + out_string += GetPc(kgm_pc) cnt += 1 + print out_string + out_string = "" mca = mca.mca_next y += 1 @@ -337,7 +373,7 @@ def GetMbufWalkAllSlabs(show_a, show_f, show_tr): def GetMbufMcaCtype(mca, vopt): cp = mca.mca_cache mca_class = unsigned(cp.mc_private) - csize = kern.globals.mbuf_table[mca_class].mtbl_stats.mbcl_size + csize = unsigned(kern.globals.mbuf_table[mca_class].mtbl_stats.mbcl_size) done = 0 out_string = " " if (csize == MSIZE): @@ -394,7 +430,7 @@ def GetMbufMcaCtype(mca, vopt): out_string += "(paired mbuf, 4K cluster) " else: out_string += "M-BCL " - if vopt: + if vopt: out_string += "(unpaired mbuf, 4K cluster) " else: if (mca.mca_uptr): @@ -430,7 +466,7 @@ def GetMbufMcaCtype(mca, vopt): out_string += "unknown: " + cp.mc_name return out_string - + kgm_pkmod = 0 kgm_pkmodst = 0 kgm_pkmoden = 0 @@ -473,8 +509,7 @@ def GetKmodAddrIntAsString(kgm_pc): def GetPc(kgm_pc): out_string = "" mh_execute_addr = int(lldb_run_command('p/x (uintptr_t *)&_mh_execute_header').split('=')[-1].strip(), 16) - if (unsigned(kgm_pc) < unsigned(mh_execute_addr) or - unsigned(kgm_pc) >= unsigned(kern.globals.vm_kernel_top)): + if (unsigned(kgm_pc) < unsigned(mh_execute_addr) or unsigned(kgm_pc) >= unsigned(kern.globals.vm_kernel_top)): out_string += GetKmodAddrIntAsString(kgm_pc) else: out_string += GetSourceInformationForAddress(int(kgm_pc)) @@ -486,7 +521,7 @@ def GetPc(kgm_pc): def MbufShowActive(cmd_args=None): """ Print all active/in-use mbuf objects """ - if cmd_args != None and len(cmd_args) > 0 : + if cmd_args: print GetMbufWalkAllSlabs(1, 0, cmd_args[0]) else: print GetMbufWalkAllSlabs(1, 0, 0) @@ -508,7 +543,7 @@ def MbufShowMca(cmd_args=None): """ Print the contents of an mbuf mcache audit structure """ out_string = "" - if cmd_args != None and len(cmd_args) > 0 : + if cmd_args: mca = kern.GetValueFromAddress(cmd_args[0], 'mcache_audit_t *') cp = mca.mca_cache out_string += "object type:\t" @@ -530,30 +565,22 @@ def MbufShowMca(cmd_args=None): if (mca.mca_uptr != 0): peer_mca = cast(mca.mca_uptr, 'mcache_audit_t *') out_string += "paired mbuf obj :\t" + hex(peer_mca.mca_addr) + " (mca " + hex(peer_mca) + ")\n" - - out_string += "Recent transaction (tstamp " + str(unsigned(mca.mca_tstamp)) + ", thread " + hex(mca.mca_thread) + ") :\n" - cnt = 0 - while (cnt < mca.mca_depth): - kgm_pc = mca.mca_stack[cnt] - out_string += " " + str(cnt + 1) + ". " - out_string += GetPc(kgm_pc) - cnt += 1 - - if (mca.mca_pdepth > 0): - out_string += "previous transaction (tstamp " + str(unsigned(mca.mca_ptstamp)) + ", thread " + hex(mca.mca_pthread) + "):\n" - cnt = 0 - - while (cnt < mca.mca_pdepth): - kgm_pc = mca.mca_pstack[cnt] - out_string += " " + str(cnt + 1) + ". " - out_string += GetPc(kgm_pc) - cnt += 1 + for idx in range(kern.globals.mca_trn_max, 0, -1): + trn = (mca.mca_next_trn + idx - 1) % unsigned(kern.globals.mca_trn_max) + out_string += "transaction {:d} (tstamp {:d}, thread 0x{:x}):\n".format(trn, mca.mca_trns[trn].mca_tstamp, mca.mca_trns[trn].mca_thread) + cnt = 0 + while (cnt < mca.mca_trns[trn].mca_depth): + kgm_pc = mca.mca_trns[trn].mca_stack[cnt] + out_string += " " + str(cnt + 1) + ". " + out_string += GetPc(kgm_pc) + cnt += 1 + + msc = cast(mca.mca_contents, 'mcl_saved_contents_t *') + msa = addressof(msc.sc_scratch) if (mca.mca_uflags & MB_SCVALID): - msc = cast(mca.mca_contents, 'mcl_saved_contents_t *') - msa = addressof(msc.sc_scratch) if (msa.msa_depth > 0): - out_string += "Recent scratch transaction (tstamp " + str(unsigned(msa.msa_tstamp)) + ", thread " + hex(msa.msa_thread) + ") :\n" + out_string += "Recent scratch transaction (tstamp {:d}, thread 0x{:x}):\n".format(msa.msa_tstamp, msa.msa_thread) cnt = 0 while (cnt < msa.msa_depth): kgm_pc = msa.msa_stack[cnt] @@ -562,14 +589,15 @@ def MbufShowMca(cmd_args=None): cnt += 1 if (msa.msa_pdepth > 0): - out_string += "previous scratch transaction (tstamp " + msa.msa_ptstamp + ", thread " + msa.msa_pthread + "):\n" - cnt = 0 - while (cnt < msa.msa_pdepth): - kgm_pc = msa.msa_pstack[cnt] - out_string += " " + str(cnt + 1) + ". " - out_string += GetPc(kgm_pc) - cnt += 1 - else : + out_string += "previous scratch transaction (tstamp {:d}, thread 0x{:x}):\n".format(msa.msa_ptstamp, msa.msa_pthread) + if (msa): + cnt = 0 + while (cnt < msa.msa_pdepth): + kgm_pc = msa.msa_pstack[cnt] + out_string += " " + str(cnt + 1) + ". " + out_string += GetPc(kgm_pc) + cnt += 1 + else: out_string += "Missing argument 0 in user function." print out_string @@ -581,7 +609,7 @@ def MbufShowMca(cmd_args=None): def MbufShowAll(cmd_args=None): """ Print all mbuf objects """ - print GetMbufWalkAllSlabs(1, 1, 0) + print GetMbufWalkAllSlabs(1, 1, 0) # EndMacro: mbuf_showall # Macro: mbuf_countchain @@ -589,14 +617,14 @@ def MbufShowAll(cmd_args=None): def MbufCountChain(cmd_args=None): """ Count the length of an mbuf chain """ - if (cmd_args == None or len(cmd_args) == 0): - print "Missing argument 0 in user function." - return + if not cmd_args: + raise ArgumentError("Missing argument 0 in user function.") + mp = kern.GetValueFromAddress(cmd_args[0], 'mbuf *') pkt = 0 nxt = 0 - + while (mp): pkt = pkt + 1 mn = mp.m_hdr.mh_next @@ -653,9 +681,9 @@ def MbufTraceLeak(cmd_args=None): stored information with that trace syntax: (lldb) mbuf_traceleak """ - if (cmd_args == None or len(cmd_args) == 0): - print "Missing argument 0 in user function." - return + if not cmd_args: + raise ArgumentError("Missing argument 0 in user function.") + trace = kern.GetValueFromAddress(cmd_args[0], 'mtrace *') print GetMbufTraceLeak(trace) # EndMacro: mbuf_traceleak @@ -666,9 +694,9 @@ def MbufTraceLeak(cmd_args=None): def McacheWalkObject(cmd_args=None): """ Given a mcache object address, walk its obj_next pointer """ - if (cmd_args == None or len(cmd_args) == 0): - print "Missing argument 0 in user function." - return + if not cmd_args: + raise ArgumentError("Missing argument 0 in user function.") + out_string = "" p = kern.GetValueFromAddress(cmd_args[0], 'mcache_obj_t *') cnt = 1 @@ -693,15 +721,15 @@ def McacheStat(cmd_args=None): mcache_stat_format_string = "{0:<24s} {1:>8s} {2:>20s} {3:>5s} {4:>5s} {5:>20s} {6:>30s} {7:>18s}" else: mcache_stat_format_string = "{0:<24s} {1:>8s} {2:>12s} {3:>5s} {4:>5s} {5:>12s} {6:>30s} {7:>18s}" - + if (kern.ptrsize == 8): mcache_stat_data_format_string = "{0:<24s} {1:>12s} {2:>20s} {3:>5s} {4:>5s} {5:>22s} {6:>12d} {7:>8d} {8:>8d} {9:>18d}" else: mcache_stat_data_format_string = "{0:<24s} {1:>12s} {2:>12s} {3:>5s} {4:>5s} {5:>14s} {6:>12d} {7:>8d} {8:>8d} {9:>18d}" - - out_string += mcache_stat_format_string.format("cache name", "cache state" , "cache addr", "buf size", "buf align", "backing zone", "wait nowait failed", "bufs incache") + + out_string += mcache_stat_format_string.format("cache name", "cache state", "cache addr", "buf size", "buf align", "backing zone", "wait nowait failed", "bufs incache") out_string += "\n" - + ncpu = int(kern.globals.ncpu) while mc != 0: bktsize = mc.mc_cpu[0].cc_bktsize @@ -720,7 +748,7 @@ def McacheStat(cmd_args=None): backing_zone = " custom" else: backing_zone = " custom" - + total = 0 total += mc.mc_full.bl_total * bktsize n = 0 @@ -772,7 +800,7 @@ def McacheShowCache(cmd_args=None): out_string += " " + str(total) + "\n\n" total += cp.mc_full.bl_total * bktsize - out_string += "Total # of full buckets (" + str(int(bktsize)) + " objs/bkt):\t" + str(int(cp.mc_full.bl_total)) +"\n" + out_string += "Total # of full buckets (" + str(int(bktsize)) + " objs/bkt):\t" + str(int(cp.mc_full.bl_total)) + "\n" out_string += "Total # of objects cached:\t\t" + str(total) + "\n" print out_string # EndMacro: mcache_showcache diff --git a/tools/lldbmacros/memory.py b/tools/lldbmacros/memory.py index 284472709..9f9c30729 100644 --- a/tools/lldbmacros/memory.py +++ b/tools/lldbmacros/memory.py @@ -1,9 +1,10 @@ """ Please make sure you read the README file COMPLETELY BEFORE reading anything below. - It is very critical that you read coding guidelines in Section E in README file. + It is very critical that you read coding guidelines in Section E in README file. """ from xnu import * -import sys, shlex +import sys +import shlex from utils import * import xnudefines from process import * @@ -79,7 +80,7 @@ def GetMemoryStatusNode(proc_val): task_ledgerp = task_val.ledger task_physmem_footprint_ledger_entry = task_ledgerp.l_entries[kern.globals.task_ledgers.phys_mem] - task_iokit_footprint_ledger_entry = task_ledgerp.l_entries[kern.globals.task_ledgers.iokit_mem] + task_iokit_footprint_ledger_entry = task_ledgerp.l_entries[kern.globals.task_ledgers.iokit_mapped] task_phys_footprint_ledger_entry = task_ledgerp.l_entries[kern.globals.task_ledgers.phys_footprint] page_size = kern.globals.page_size @@ -236,10 +237,14 @@ def ShowZfreeListHeader(zone): returns: None """ + + scaled_factor = (unsigned(kern.globals.zp_factor) + + (unsigned(zone.elem_size) >> unsigned(kern.globals.zp_scale))) + out_str = "" out_str += "{0: <9s} {1: <12s} {2: <18s} {3: <18s} {4: <6s}\n".format('ELEM_SIZE', 'COUNT', 'NCOOKIE', 'PCOOKIE', 'FACTOR') out_str += "{0: <9d} {1: <12d} 0x{2:0>16x} 0x{3:0>16x} {4: <2d}/{5: <2d}\n\n".format( - zone.elem_size, zone.count, kern.globals.zp_nopoison_cookie, kern.globals.zp_poisoned_cookie, zone.zp_count, kern.globals.zp_factor) + zone.elem_size, zone.count, kern.globals.zp_nopoison_cookie, kern.globals.zp_poisoned_cookie, zone.zp_count, scaled_factor) out_str += "{0: <7s} {1: <18s} {2: <18s} {3: <18s} {4: <18s} {5: <18s} {6: <14s}\n".format( 'NUM', 'ELEM', 'NEXT', 'BACKUP', '^ NCOOKIE', '^ PCOOKIE', 'POISON (PREV)') print out_str @@ -473,10 +478,11 @@ def FindElem(cmd_args=None): # Macro: btlog_find -@lldb_command('btlog_find', "A") +@lldb_command('btlog_find', "AS") def BtlogFind(cmd_args=None, cmd_options={}): """ Search the btlog_t for entries corresponding to the given element. Use -A flag to print all entries. + Use -S flag to summarize the count of records Usage: btlog_find Usage: btlog_find -A Note: Backtraces will be in chronological order, with oldest entries aged out in FIFO order as needed. @@ -485,7 +491,9 @@ def BtlogFind(cmd_args=None, cmd_options={}): raise ArgumentError("Need a btlog_t parameter") btlog = kern.GetValueFromAddress(cmd_args[0], 'btlog_t *') printall = False - target_elem = 0xffffff + summarize = False + summary_cache = {} + target_elem = 0xffffffff if "-A" in cmd_options: printall = True @@ -494,18 +502,37 @@ def BtlogFind(cmd_args=None, cmd_options={}): raise ArgumentError(" is missing in args. Need a search pointer.") target_elem = unsigned(kern.GetValueFromAddress(cmd_args[1], 'void *')) + if "-S" in cmd_options: + summarize = True + index = unsigned(btlog.head) progress = 0 record_size = unsigned(btlog.btrecord_size) - while index != 0xffffff: - record_offset = index * record_size - record = kern.GetValueFromAddress(unsigned(btlog.btrecords) + record_offset, 'btlog_record_t *') - if unsigned(record.element) == target_elem or printall: - print '{0: (ex. showtaskvme 0x00ataskptr00 ) """ + show_pager_info = False + show_all_shadows = False + if "-P" in cmd_options: + show_pager_info = True + if "-S" in cmd_options: + show_all_shadows = True task = kern.GetValueFromAddress(cmd_args[0], 'task *') - ShowTaskVMEntries(task) + ShowTaskVMEntries(task, show_pager_info, show_all_shadows) -@lldb_command('showallvme') -def ShowAllVME(cmd_args=None): +@lldb_command('showallvme', "PS") +def ShowAllVME(cmd_args=None, cmd_options={}): """ Routine to print a summary listing of all the vm map entries - Go Through each task in system and show the vm info - """ + Go Through each task in system and show the vm memory regions + Use -S flag to show VM object shadow chains + Use -P flag to show pager info (mapped file, compressed pages, ...) + """ + show_pager_info = False + show_all_shadows = False + if "-P" in cmd_options: + show_pager_info = True + if "-S" in cmd_options: + show_all_shadows = True for task in kern.tasks: - ShowTaskVMEntries(task) + ShowTaskVMEntries(task, show_pager_info, show_all_shadows) @lldb_command('showallvm') def ShowAllVM(cmd_args=None): @@ -913,7 +954,7 @@ def ShowAllVMStats(cmd_args=None): print entry_format.format(p=proc, m=vmmap, vsize=(unsigned(vmmap.size) >> 12), t=task, s=vmstats) -def ShowTaskVMEntries(task): +def ShowTaskVMEntries(task, show_pager_info, show_all_shadows): """ Routine to print out a summary listing of all the entries in a vm_map params: task - core.value : a object of type 'task *' @@ -932,7 +973,7 @@ def ShowTaskVMEntries(task): vme_ptr_type = GetType('vm_map_entry *') print GetVMEntrySummary.header for vme in IterateQueue(vme_list_head, vme_ptr_type, "links"): - print GetVMEntrySummary(vme) + print GetVMEntrySummary(vme, show_pager_info, show_all_shadows) return None @lldb_command("showmap") @@ -1151,7 +1192,7 @@ def ShowKmodAddr(cmd_args=[]): return True return False -@lldb_command('addkext','F:N:') +@lldb_command('addkext','AF:N:') def AddKextSyms(cmd_args=[], cmd_options={}): """ Add kext symbols into lldb. This command finds symbols for a uuid and load the required executable @@ -1159,6 +1200,7 @@ def AddKextSyms(cmd_args=[], cmd_options={}): addkext : Load one kext based on uuid. eg. (lldb)addkext 4DD2344C0-4A81-3EAB-BDCF-FEAFED9EB73E addkext -F : Load kext executable at specified load address addkext -N : Load one kext that matches the name provided. eg. (lldb) addkext -N corecrypto + addkext -N -A: Load all kext that matches the name provided. eg. to load all kext with Apple in name do (lldb) addkext -N Apple -A addkext all : Will load all the kext symbols - SLOW """ @@ -1209,24 +1251,25 @@ def AddKextSyms(cmd_args=[], cmd_options={}): if "-N" in cmd_options: kext_name = cmd_options["-N"] kext_name_matches = GetLongestMatchOption(kext_name, [str(x[2]) for x in all_kexts_info], True) - if len(kext_name_matches) != 1: + if len(kext_name_matches) != 1 and "-A" not in cmd_options: print "Ambiguous match for name: {:s}".format(kext_name) if len(kext_name_matches) > 0: print "Options are:\n\t" + "\n\t".join(kext_name_matches) return debuglog("matched the kext to name %s and uuid %s" % (kext_name_matches[0], kext_name)) - for x in all_kexts_info: - if kext_name_matches[0] == x[2]: - cur_uuid = x[0].lower() - print "Fetching dSYM for {:s}".format(cur_uuid) - info = dsymForUUID(cur_uuid) - if info and 'DBGSymbolRichExecutable' in info: - print "Adding dSYM ({0:s}) for {1:s}".format(cur_uuid, info['DBGSymbolRichExecutable']) - addDSYM(cur_uuid, info) - loadDSYM(cur_uuid, int(x[1],16)) - else: - print "Failed to get symbol info for {:s}".format(cur_uuid) - break + for cur_knm in kext_name_matches: + for x in all_kexts_info: + if cur_knm == x[2]: + cur_uuid = x[0].lower() + print "Fetching dSYM for {:s}".format(cur_uuid) + info = dsymForUUID(cur_uuid) + if info and 'DBGSymbolRichExecutable' in info: + print "Adding dSYM ({0:s}) for {1:s}".format(cur_uuid, info['DBGSymbolRichExecutable']) + addDSYM(cur_uuid, info) + loadDSYM(cur_uuid, int(x[1],16)) + else: + print "Failed to get symbol info for {:s}".format(cur_uuid) + break kern.symbolicator = None return @@ -1264,13 +1307,13 @@ lldb_alias('showkext', 'showkmodaddr') lldb_alias('showkextaddr', 'showkmodaddr') @lldb_type_summary(['mount *']) -@header("{0: <20s} {1: <20s} {2: <20s} {3: <12s} {4: <12s} {5: <12s} {6: >6s} {7: <30s} {8: <35s}".format('volume(mp)', 'mnt_data', 'mnt_devvp', 'flag', 'kern_flag', 'lflag', 'type', 'mnton', 'mntfrom')) +@header("{0: <20s} {1: <20s} {2: <20s} {3: <12s} {4: <12s} {5: <12s} {6: >6s} {7: <30s} {8: <35s} {9: <30s}".format('volume(mp)', 'mnt_data', 'mnt_devvp', 'flag', 'kern_flag', 'lflag', 'type', 'mnton', 'mntfrom', 'iosched supported')) def GetMountSummary(mount): """ Display a summary of mount on the system """ out_string = ("{mnt: <#020x} {mnt.mnt_data: <#020x} {mnt.mnt_devvp: <#020x} {mnt.mnt_flag: <#012x} " + "{mnt.mnt_kern_flag: <#012x} {mnt.mnt_lflag: <#012x} {vfs.f_fstypename: >6s} " + - "{vfs.f_mntonname: <30s} {vfs.f_mntfromname: <35s}").format(mnt=mount, vfs=mount.mnt_vfsstat) + "{vfs.f_mntonname: <30s} {vfs.f_mntfromname: <35s} {iomode: <30s}").format(mnt=mount, vfs=mount.mnt_vfsstat, iomode=('Yes' if (mount.mnt_ioflags & 0x4) else 'No')) return out_string @lldb_command('showallmounts') @@ -1303,7 +1346,7 @@ def ShowSystemLog(cmd_args=None): err.Clear() cbyte = msg_bufc_data.GetUnsignedInt8(err, i) if not err.Success() : - raise ValueError("Failed to read character at offset " + i + ": " + err.GetCString()) + raise ValueError("Failed to read character at offset " + str(i) + ": " + err.GetCString()) c = chr(cbyte) if c == '\0' : continue @@ -1577,12 +1620,12 @@ def ShowProcLocks(cmd_args=None): # EndMacro: showproclocks @lldb_type_summary(['vnode_t', 'vnode *']) -@header("{0: <20s} {1: >8s} {2: >8s} {3: <20s} {4: <6s} {5: <20s} {6: <6s} {7: <35s}".format('vnode', 'usecount', 'iocount', 'v_data', 'vtype', 'parent', 'mapped', 'name')) +@header("{0: <20s} {1: >8s} {2: >8s} {3: <20s} {4: <6s} {5: <20s} {6: <6s} {7: <6s} {8: <35s}".format('vnode', 'usecount', 'iocount', 'v_data', 'vtype', 'parent', 'mapped', 'cs_version', 'name')) def GetVnodeSummary(vnode): """ Get a summary of important information out of vnode """ out_str = '' - format_string = "{0: <#020x} {1: >8d} {2: >8d} {3: <#020x} {4: <6s} {5: <#020x} {6: <6s} {7: <35s}" + format_string = "{0: <#020x} {1: >8d} {2: >8d} {3: <#020x} {4: <6s} {5: <#020x} {6: <6s} {7: <6s} {8: <35s}" usecount = int(vnode.v_usecount) iocount = int(vnode.v_iocount) v_data_ptr = int(hex(vnode.v_data), 16) @@ -1600,13 +1643,15 @@ def GetVnodeSummary(vnode): cnode = Cast(vnode.v_data, 'cnode *') name = "hfs: %s" % str( Cast(cnode.c_desc.cd_nameptr, 'char *')) mapped = '-' + csblob_version = '-' if (vtype == 1) and (vnode.v_un.vu_ubcinfo != 0): + csblob_version = '{: <6d}'.format(vnode.v_un.vu_ubcinfo.cs_add_gen) # Check to see if vnode is mapped/unmapped if (vnode.v_un.vu_ubcinfo.ui_flags & 0x8) != 0: mapped = '1' else: mapped = '0' - out_str += format_string.format(vnode, usecount, iocount, v_data_ptr, vtype_str, parent_ptr, mapped, name) + out_str += format_string.format(vnode, usecount, iocount, v_data_ptr, vtype_str, parent_ptr, mapped, csblob_version, name) return out_str @lldb_command('showallvnodes') @@ -2020,3 +2065,379 @@ def ShowBooterMemoryMap(cmd_args=None): print out_string #EndMacro: showbootermemorymap +@lldb_command('show_all_purgeable_objects') +def ShowAllPurgeableVmObjects(cmd_args=None): + """ Routine to print a summary listing of all the purgeable vm objects + """ + print "\n-------------------- VOLATILE OBJECTS --------------------\n" + ShowAllPurgeableVolatileVmObjects() + print "\n-------------------- NON-VOLATILE OBJECTS --------------------\n" + ShowAllPurgeableNonVolatileVmObjects() + +@lldb_command('show_all_purgeable_nonvolatile_objects') +def ShowAllPurgeableNonVolatileVmObjects(cmd_args=None): + """ Routine to print a summary listing of all the vm objects in + the purgeable_nonvolatile_queue + """ + + nonvolatile_total = lambda:None + nonvolatile_total.objects = 0 + nonvolatile_total.vsize = 0 + nonvolatile_total.rsize = 0 + nonvolatile_total.wsize = 0 + nonvolatile_total.csize = 0 + nonvolatile_total.disowned_objects = 0 + nonvolatile_total.disowned_vsize = 0 + nonvolatile_total.disowned_rsize = 0 + nonvolatile_total.disowned_wsize = 0 + nonvolatile_total.disowned_csize = 0 + + queue_len = kern.globals.purgeable_nonvolatile_count + queue_head = kern.globals.purgeable_nonvolatile_queue + + print 'purgeable_nonvolatile_queue:{:#018x} purgeable_volatile_count:{:d}\n'.format(kern.GetLoadAddressForSymbol('purgeable_nonvolatile_queue'),queue_len) + print 'N:non-volatile V:volatile E:empty D:deny\n' + + print '{:>6s} {:<6s} {:18s} {:1s} {:>6s} {:>16s} {:>10s} {:>10s} {:>10s} {:18s} {:>6s} {:<20s}\n'.format("#","#","object","P","refcnt","size (pages)","resid","wired","compressed","owner","pid","process") + idx = 0 + for object in IterateQueue(queue_head, 'struct vm_object *', 'objq'): + idx += 1 + ShowPurgeableNonVolatileVmObject(object, idx, queue_len, nonvolatile_total) + print "disowned objects:{:<10d} [ virtual:{:<10d} resident:{:<10d} wired:{:<10d} compressed:{:<10d} ]\n".format(nonvolatile_total.disowned_objects, nonvolatile_total.disowned_vsize, nonvolatile_total.disowned_rsize, nonvolatile_total.disowned_wsize, nonvolatile_total.disowned_csize) + print " all objects:{:<10d} [ virtual:{:<10d} resident:{:<10d} wired:{:<10d} compressed:{:<10d} ]\n".format(nonvolatile_total.objects, nonvolatile_total.vsize, nonvolatile_total.rsize, nonvolatile_total.wsize, nonvolatile_total.csize) + + +def ShowPurgeableNonVolatileVmObject(object, idx, queue_len, nonvolatile_total): + """ Routine to print out a summary a VM object in purgeable_nonvolatile_queue + params: + object - core.value : a object of type 'struct vm_object *' + returns: + None + """ + if object.purgable == 0: + purgable = "N" + elif object.purgable == 1: + purgable = "V" + elif object.purgable == 2: + purgable = "E" + elif object.purgable == 3: + purgable = "D" + else: + purgable = "?" + if object.pager == 0: + compressed_count = 0 + else: + compressor_pager = Cast(object.pager, 'compressor_pager *') + compressed_count = compressor_pager.cpgr_num_slots_occupied + + print "{:>6d}/{:<6d} {:#018x} {:1s} {:>6d} {:>16d} {:>10d} {:>10d} {:>10d} {:#018x} {:>6d} {:<20s}\n".format(idx,queue_len,object,purgable,object.ref_count,object.vo_un1.vou_size/kern.globals.page_size,object.resident_page_count,object.wired_page_count,compressed_count, object.vo_un2.vou_purgeable_owner,GetProcPIDForTask(object.vo_un2.vou_purgeable_owner),GetProcNameForTask(object.vo_un2.vou_purgeable_owner)) + + nonvolatile_total.objects += 1 + nonvolatile_total.vsize += object.vo_un1.vou_size/kern.globals.page_size + nonvolatile_total.rsize += object.resident_page_count + nonvolatile_total.wsize += object.wired_page_count + nonvolatile_total.csize += compressed_count + if object.vo_un2.vou_purgeable_owner == 0: + nonvolatile_total.disowned_objects += 1 + nonvolatile_total.disowned_vsize += object.vo_un1.vou_size/kern.globals.page_size + nonvolatile_total.disowned_rsize += object.resident_page_count + nonvolatile_total.disowned_wsize += object.wired_page_count + nonvolatile_total.disowned_csize += compressed_count + + +@lldb_command('show_all_purgeable_volatile_objects') +def ShowAllPurgeableVolatileVmObjects(cmd_args=None): + """ Routine to print a summary listing of all the vm objects in + the purgeable queues + """ + volatile_total = lambda:None + volatile_total.objects = 0 + volatile_total.vsize = 0 + volatile_total.rsize = 0 + volatile_total.wsize = 0 + volatile_total.csize = 0 + volatile_total.disowned_objects = 0 + volatile_total.disowned_vsize = 0 + volatile_total.disowned_rsize = 0 + volatile_total.disowned_wsize = 0 + volatile_total.disowned_csize = 0 + + purgeable_queues = kern.globals.purgeable_queues + print "---------- OBSOLETE\n" + ShowPurgeableQueue(purgeable_queues[0], volatile_total) + print "\n\n---------- FIFO\n" + ShowPurgeableQueue(purgeable_queues[1], volatile_total) + print "\n\n---------- LIFO\n" + ShowPurgeableQueue(purgeable_queues[2], volatile_total) + + print "disowned objects:{:<10d} [ virtual:{:<10d} resident:{:<10d} wired:{:<10d} compressed:{:<10d} ]\n".format(volatile_total.disowned_objects, volatile_total.disowned_vsize, volatile_total.disowned_rsize, volatile_total.disowned_wsize, volatile_total.disowned_csize) + print " all objects:{:<10d} [ virtual:{:<10d} resident:{:<10d} wired:{:<10d} compressed:{:<10d} ]\n".format(volatile_total.objects, volatile_total.vsize, volatile_total.rsize, volatile_total.wsize, volatile_total.csize) + purgeable_count = kern.globals.vm_page_purgeable_count + purgeable_wired_count = kern.globals.vm_page_purgeable_wired_count + if purgeable_count != volatile_total.rsize or purgeable_wired_count != volatile_total.wsize: + mismatch = "<--------- MISMATCH\n" + else: + mismatch = "" + print "vm_page_purgeable_count: resident:{:<10d} wired:{:<10d} {:s}\n".format(purgeable_count, purgeable_wired_count, mismatch) + + +def ShowPurgeableQueue(qhead, volatile_total): + print "----- GROUP 0\n" + ShowPurgeableGroup(qhead.objq[0], volatile_total) + print "----- GROUP 1\n" + ShowPurgeableGroup(qhead.objq[1], volatile_total) + print "----- GROUP 2\n" + ShowPurgeableGroup(qhead.objq[2], volatile_total) + print "----- GROUP 3\n" + ShowPurgeableGroup(qhead.objq[3], volatile_total) + print "----- GROUP 4\n" + ShowPurgeableGroup(qhead.objq[4], volatile_total) + print "----- GROUP 5\n" + ShowPurgeableGroup(qhead.objq[5], volatile_total) + print "----- GROUP 6\n" + ShowPurgeableGroup(qhead.objq[6], volatile_total) + print "----- GROUP 7\n" + ShowPurgeableGroup(qhead.objq[7], volatile_total) + +def ShowPurgeableGroup(qhead, volatile_total): + idx = 0 + for object in IterateQueue(qhead, 'struct vm_object *', 'objq'): + if idx == 0: +# print "{:>6s} {:18s} {:1s} {:>6s} {:>16s} {:>10s} {:>10s} {:>10s} {:18s} {:>6s} {:<20s} {:18s} {:>6s} {:<20s} {:s}\n".format("#","object","P","refcnt","size (pages)","resid","wired","compressed","owner","pid","process","volatilizer","pid","process","") + print "{:>6s} {:18s} {:1s} {:>6s} {:>16s} {:>10s} {:>10s} {:>10s} {:18s} {:>6s} {:<20s}\n".format("#","object","P","refcnt","size (pages)","resid","wired","compressed","owner","pid","process") + idx += 1 + ShowPurgeableVolatileVmObject(object, idx, volatile_total) + +def ShowPurgeableVolatileVmObject(object, idx, volatile_total): + """ Routine to print out a summary a VM object in a purgeable queue + params: + object - core.value : a object of type 'struct vm_object *' + returns: + None + """ +# if int(object.vo_un2.vou_purgeable_owner) != int(object.vo_purgeable_volatilizer): +# diff=" !=" +# else: +# diff=" " + if object.purgable == 0: + purgable = "N" + elif object.purgable == 1: + purgable = "V" + elif object.purgable == 2: + purgable = "E" + elif object.purgable == 3: + purgable = "D" + else: + purgable = "?" + if object.pager == 0: + compressed_count = 0 + else: + compressor_pager = Cast(object.pager, 'compressor_pager *') + compressed_count = compressor_pager.cpgr_num_slots_occupied +# print "{:>6d} {:#018x} {:1s} {:>6d} {:>16d} {:>10d} {:>10d} {:>10d} {:#018x} {:>6d} {:<20s} {:#018x} {:>6d} {:<20s} {:s}\n".format(idx,object,purgable,object.ref_count,object.vo_un1.vou_size/kern.globals.page_size,object.resident_page_count,object.wired_page_count,compressed_count,object.vo_un2.vou_purgeable_owner,GetProcPIDForTask(object.vo_un2.vou_purgeable_owner),GetProcNameForTask(object.vo_un2.vou_purgeable_owner),object.vo_purgeable_volatilizer,GetProcPIDForTask(object.vo_purgeable_volatilizer),GetProcNameForTask(object.vo_purgeable_volatilizer),diff) + print "{:>6d} {:#018x} {:1s} {:>6d} {:>16d} {:>10d} {:>10d} {:>10d} {:#018x} {:>6d} {:<20s}\n".format(idx,object,purgable,object.ref_count,object.vo_un1.vou_size/kern.globals.page_size,object.resident_page_count,object.wired_page_count,compressed_count, object.vo_un2.vou_purgeable_owner,GetProcPIDForTask(object.vo_un2.vou_purgeable_owner),GetProcNameForTask(object.vo_un2.vou_purgeable_owner)) + volatile_total.objects += 1 + volatile_total.vsize += object.vo_un1.vou_size/kern.globals.page_size + volatile_total.rsize += object.resident_page_count + volatile_total.wsize += object.wired_page_count + volatile_total.csize += compressed_count + if object.vo_un2.vou_purgeable_owner == 0: + volatile_total.disowned_objects += 1 + volatile_total.disowned_vsize += object.vo_un1.vou_size/kern.globals.page_size + volatile_total.disowned_rsize += object.resident_page_count + volatile_total.disowned_wsize += object.wired_page_count + volatile_total.disowned_csize += compressed_count + + +def GetCompressedPagesForObject(obj): + """Stuff + """ + pager = Cast(obj.pager, 'compressor_pager_t') + return pager.cpgr_num_slots_occupied +# if pager.cpgr_num_slots > 128: +# slots_arr = pager.cpgr_slots.cpgr_islots +# num_indirect_slot_ptr = (pager.cpgr_num_slots + 127) / 128 +# index = 0 +# compressor_slot = 0 +# compressed_pages = 0 +# while index < num_indirect_slot_ptr: +# compressor_slot = 0 +# if slots_arr[index]: +# while compressor_slot < 128: +# if slots_arr[index][compressor_slot]: +# compressed_pages += 1 +# compressor_slot += 1 +# index += 1 +# else: +# slots_arr = pager.cpgr_slots.cpgr_dslots +# compressor_slot = 0 +# compressed_pages = 0 +# while compressor_slot < pager.cpgr_num_slots: +# if slots_arr[compressor_slot]: +# compressed_pages += 1 +# compressor_slot += 1 +# return compressed_pages + +@lldb_command('showallvme', "-PS") +def ShowAllVME(cmd_args=None, cmd_options={}): + """ Routine to print a summary listing of all the vm map entries + Go Through each task in system and show the vm info + """ + show_pager_info = False + show_all_shadows = False + if "-P" in cmd_options: + show_pager_info = True + if "-S" in cmd_options: + show_all_shadows = True + for task in kern.tasks: + ShowTaskVMEntries(task, show_pager_info, show_all_shadows) + +def ShowTaskVMEntries(task, show_pager_info, show_all_shadows): + """ Routine to print out a summary listing of all the entries in a vm_map + params: + task - core.value : a object of type 'task *' + returns: + None + """ + print "vm_map entries for task " + hex(task) + print GetTaskSummary.header + print GetTaskSummary(task) + if not task.map: + print "Task {0: <#020x} has map = 0x0" + return None + showmapvme(task.map, show_pager_info, show_all_shadows) + +@lldb_command("showmapvme", "PS") +def ShowMapVME(cmd_args=None, cmd_options={}): + """Routine to print out info about the specified vm_map and its vm entries + usage: showmapvme + """ + if cmd_args == None or len(cmd_args) < 1: + print "Invalid argument.", ShowMap.__doc__ + return + show_pager_info = False + show_all_shadows = False + if "-P" in cmd_options: + show_pager_info = True + if "-S" in cmd_options: + show_all_shadows = True + map = kern.GetValueFromAddress(cmd_args[0], 'vm_map_t') + showmapvme(map, show_pager_info, show_all_shadows) + +def showmapvme(map, show_pager_info, show_all_shadows): + vnode_pager_ops = kern.globals.vnode_pager_ops + vnode_pager_ops_addr = unsigned(addressof(vnode_pager_ops)) + rsize = 0 + if map.pmap != 0: + rsize = int(map.pmap.stats.resident_count) + print "{:<18s} {:<18s} {:<18s} {:>10s} {:>10s} {:>18s}:{:<18s}".format("vm_map","pmap","size","#ents","rsize","start","end") + print "{:#018x} {:#018x} {:#018x} {:>10d} {:>10d} {:#018x}:{:#018x}".format(map,map.pmap,(map.size/4096),map.hdr.nentries,rsize,map.hdr.links.start,map.hdr.links.end) + vme_list_head = map.hdr.links + vme_ptr_type = GetType('vm_map_entry *') + print "{:<18s} {:>18s}:{:<18s} {:>10s} {:>3s} {:<10s} {:<18s} {:<18s}".format("entry","start","end","#pgs","tag","prot&flags","object","offset") + last_end = map.hdr.links.start + for vme in IterateQueue(vme_list_head, vme_ptr_type, "links"): + if vme.links.start != last_end: + print "{:18s} {:#018x}:{:#018x} {:>10d}".format("------------------",last_end,vme.links.start,(vme.links.start-last_end)/4096) + last_end = vme.links.end + vme_flags = "" + if vme.is_sub_map: + vme_flags += "s" + print "{:#018x} {:#018x}:{:#018x} {:>10d} {:>3d} {:1d}{:1d}{:<8s} {:#018x} {:<#18x}".format(vme,vme.links.start,vme.links.end,(vme.links.end-vme.links.start)/4096,vme.alias,vme.protection,vme.max_protection,vme_flags,vme.object.vm_object,vme.offset) + if show_pager_info and vme.is_sub_map == 0 and vme.object.vm_object != 0: + object = vme.object.vm_object + else: + object = 0 + depth = 0 + offset = unsigned(vme.offset) + size = vme.links.end - vme.links.start + while object != 0: + depth += 1 + if show_all_shadows == False and depth != 1 and object.shadow != 0: + offset += unsigned(object.vo_un2.vou_shadow_offset) + object = object.shadow + continue + if object.copy_strategy == 0: + copy_strategy="N" + elif object.copy_strategy == 2: + copy_strategy="D" + elif object.copy_strategy == 4: + copy_strategy="S" + else: + copy_strategy=str(object.copy_strategy) + if object.internal: + internal = "internal" + else: + internal = "external" + pager_string = "" + if show_pager_info and object.pager != 0: + if object.internal: + pager_string = "-> compressed:{:d}".format(GetCompressedPagesForObject(object)) + else: + vnode_pager = Cast(object.pager,'vnode_pager *') + if unsigned(vnode_pager.pager_ops) == vnode_pager_ops_addr: + pager_string = "-> " + GetVnodePath(vnode_pager.vnode_handle) + print "{:>18d} {:#018x}:{:#018x} {:#018x} ref:{:<6d} ts:{:1d} strat:{:1s} {:s} ({:d} {:d} {:d}) {:s}".format(depth,offset,offset+size,object,object.ref_count,object.true_share,copy_strategy,internal,unsigned(object.vo_un1.vou_size)/4096,object.resident_page_count,object.wired_page_count,pager_string) +# print " #{:<5d} obj {:#018x} ref:{:<6d} ts:{:1d} strat:{:1s} {:s} size:{:<10d} wired:{:<10d} resident:{:<10d} reusable:{:<10d}".format(depth,object,object.ref_count,object.true_share,copy_strategy,internal,object.vo_un1.vou_size/4096,object.wired_page_count,object.resident_page_count,object.reusable_page_count) + offset += unsigned(object.vo_un2.vou_shadow_offset) + object = object.shadow + return None + +def FindVMEntriesForVnode(task, vn): + """ returns an array of vme that have the vnode set to defined vnode + each entry in array is of format (vme, start_addr, end_address, protection) + """ + retval = [] + vmmap = task.map + pmap = vmmap.pmap + pager_ops_addr = unsigned(addressof(kern.globals.vnode_pager_ops)) + debuglog("pager_ops_addr %s" % hex(pager_ops_addr)) + + if unsigned(pmap) == 0: + return retval + vme_list_head = vmmap.hdr.links + vme_ptr_type = gettype('vm_map_entry *') + for vme in IterateQueue(vme_list_head, vme_ptr_type, 'links'): + #print vme + if unsigned(vme.is_sub_map) == 0 and unsigned(vme.object.vm_object) != 0: + obj = vme.object.vm_object + else: + continue + + while obj != 0: + if obj.pager != 0: + if obj.internal: + pass + else: + vn_pager = Cast(obj.pager, 'vnode_pager *') + if unsigned(vn_pager.pager_ops) == pager_ops_addr and unsigned(vn_pager.vnode_handle) == unsigned(vn): + retval.append((vme, unsigned(vme.links.start), unsigned(vme.links.end), unsigned(vme.protection))) + obj = obj.shadow + return retval + +@lldb_command('showtaskloadinfo') +def ShowTaskLoadInfo(cmd_args=None, cmd_options={}): + """ Print the load address and uuid for the process + Usage: (lldb)showtaskloadinfo + """ + if not cmd_args: + raise ArgumentError("Insufficient arguments") + t = kern.GetValueFromAddress(cmd_args[0], 'struct task *') + print_format = "0x{0:x} - 0x{1:x} {2: <50s} (??? - ???) <{3: <36s}> {4: <50s}" + p = Cast(t.bsd_info, 'struct proc *') + uuid = p.p_uuid + uuid_out_string = "{a[0]:02X}{a[1]:02X}{a[2]:02X}{a[3]:02X}-{a[4]:02X}{a[5]:02X}-{a[6]:02X}{a[7]:02X}-{a[8]:02X}{a[9]:02X}-{a[10]:02X}{a[11]:02X}{a[12]:02X}{a[13]:02X}{a[14]:02X}{a[15]:02X}".format(a=uuid) + filepath = GetVnodePath(p.p_textvp) + libname = filepath.split('/')[-1] + #print "uuid: %s file: %s" % (uuid_out_string, filepath) + mappings = FindVMEntriesForVnode(t, p.p_textvp) + load_addr = 0 + end_addr = 0 + for m in mappings: + if m[3] == 5: + load_addr = m[1] + end_addr = m[2] + #print "Load address: %s" % hex(m[1]) + print print_format.format(load_addr, end_addr, libname, uuid_out_string, filepath) + return None + diff --git a/tools/lldbmacros/net.py b/tools/lldbmacros/net.py index 86c5303a8..7e55c8d7c 100644 --- a/tools/lldbmacros/net.py +++ b/tools/lldbmacros/net.py @@ -1559,8 +1559,8 @@ def GetInPcb(pcb, proto): out_string += "timewait " if (pcb.inp_flags2 & INP2_IN_FCTREE): out_string += "in_fctree " - if (pcb.inp_flags2 & INP2_WANT_FLOW_DIVERT): - out_string += "want_flow_divert " + if (pcb.inp_flags2 & INP2_WANT_APP_POLICY): + out_string += "want_app_policy " so = pcb.inp_socket if (so != 0): diff --git a/tools/lldbmacros/netdefines.py b/tools/lldbmacros/netdefines.py index 8fff027b4..c35a01e2d 100644 --- a/tools/lldbmacros/netdefines.py +++ b/tools/lldbmacros/netdefines.py @@ -120,6 +120,6 @@ INPCB_STATE_DEAD = 0x3 INP2_TIMEWAIT = 0x00000001 INP2_IN_FCTREE = 0x00000002 -INP2_WANT_FLOW_DIVERT = 0x00000004 +INP2_WANT_APP_POLICY = 0x00000004 N_TIME_WAIT_SLOTS = 128 diff --git a/tools/lldbmacros/plugins/iosspeedtracer.py b/tools/lldbmacros/plugins/iosspeedtracer.py new file mode 100644 index 000000000..0bf21068c --- /dev/null +++ b/tools/lldbmacros/plugins/iosspeedtracer.py @@ -0,0 +1,33 @@ +# Feed user stacks to ios/speedtracer + +def plugin_init(kernel_target, config, lldb_obj, isConnected): + """ initialize the common data as required by plugin """ + return None + +def plugin_execute(command_name, result_output): + """ The xnu framework will call this function with output of a command. + The options for returning are as follows + returns: (status, outstr, further_cmds) + status: Boolean - specifying whether plugin execution succeeded(True) or failed. If failed then xnu will stop doing any further work with this command. + outstr: str - string output for user to be printed at the prompt + further_cmds: [] of str - this holds set of commands to execute at the lldb prompt. Empty array if nothing is required. + """ + import subprocess,os + status = True + outstr = '' + further_cmds = [] + + if command_name != 'showtaskuserstacks' : + status = False + else: + ios_process = subprocess.Popen([os.path.join(os.path.dirname(os.path.abspath(__file__)), "iosspeedtracer.sh")], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + outstr += ios_process.communicate(input=result_output)[0] + + return (status, outstr, further_cmds) + +def plugin_cleanup(): + """ A cleanup call from xnu which is a signal to wrap up any open file descriptors etc. """ + return None + + diff --git a/tools/lldbmacros/plugins/iosspeedtracer.sh b/tools/lldbmacros/plugins/iosspeedtracer.sh new file mode 100755 index 000000000..9f57eaa72 --- /dev/null +++ b/tools/lldbmacros/plugins/iosspeedtracer.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +TMPFILE=$(mktemp /var/tmp/iosspeedtracer-$$-XXXXXX) +TMPFILE2=$(mktemp /var/tmp/iosspeedtracer2-$$-XXXXXX) +trap 'rm -f $TMPFILE $TMPFILE2' 0 + +# save stack log for curl +cat > $TMPFILE + +if security find-internet-password -s ios.apple.com > $TMPFILE2; then + USER=$(sed -n -E -e 's/"acct"="([^"]*)"/\1/p' < $TMPFILE2) + PW=$(security find-internet-password -a $USER -s ios.apple.com -w) + if [[ $? = 0 ]]; then + curl -u $USER:$PW -X POST -H "Content-Type: text/plain" -H "Accept: text/plain" --data-binary @${TMPFILE} https://ios.apple.com/speedtracer/services/logs + else + echo "security failed -- try security unlock-keychain" + fi +else + echo "no internet password keychain item for ios.apple.com?!" +fi diff --git a/tools/lldbmacros/pmap.py b/tools/lldbmacros/pmap.py index fc3a00529..191c2783b 100644 --- a/tools/lldbmacros/pmap.py +++ b/tools/lldbmacros/pmap.py @@ -4,7 +4,7 @@ from kdp import * from utils import * def ReadPhysInt(phys_addr, bitsize = 64, cpuval = None): - """ Read a physical memory data based on address. + """ Read a physical memory data based on address. params: phys_addr : int - Physical address to read bitsize : int - defines how many bytes to read. defaults to 64 bit @@ -30,12 +30,12 @@ def ReadPhysInt(phys_addr, bitsize = 64, cpuval = None): @lldb_command('readphys') def ReadPhys(cmd_args = None): - """ Reads the specified untranslated address + """ Reads the specified untranslated address The argument is interpreted as a physical address, and the 64-bit word addressed is displayed. usage: readphys

nbits: 8,16,32,64 - address: 1234 or 0x1234 + address: 1234 or 0x1234 """ if cmd_args == None or len(cmd_args) < 2: print "Insufficient arguments.", ReadPhys.__doc__ @@ -57,7 +57,7 @@ def KDPReadPhysMEM(address, bits): address : int - address where to read the data from bits : int - number of bits in the intval (8/16/32/64) returns: - int: read value from memory. + int: read value from memory. 0xBAD10AD: if failed to read data. """ retval = 0xBAD10AD @@ -77,15 +77,15 @@ def KDPReadPhysMEM(address, bits): data_addr = int(addressof(kern.globals.manual_pkt)) pkt = kern.GetValueFromAddress(data_addr, 'kdp_readphysmem64_req_t *') - + header_value =GetKDPPacketHeaderInt(request=GetEnumValue('kdp_req_t::KDP_READPHYSMEM64'), length=kdp_pkt_size) - + if ( WriteInt64ToMemoryAddress((header_value), int(addressof(pkt.hdr))) and WriteInt64ToMemoryAddress(address, int(addressof(pkt.address))) and WriteInt32ToMemoryAddress((bits/8), int(addressof(pkt.nbytes))) and WriteInt16ToMemoryAddress(xnudefines.lcpu_self, int(addressof(pkt.lcpu))) ): - + if WriteInt32ToMemoryAddress(1, input_address): # now read data from the kdp packet data_address = unsigned(addressof(kern.GetValueFromAddress(int(addressof(kern.globals.manual_pkt.data)), 'kdp_readphysmem64_reply_t *').data)) @@ -124,15 +124,15 @@ def KDPWritePhysMEM(address, intval, bits): data_addr = int(addressof(kern.globals.manual_pkt)) pkt = kern.GetValueFromAddress(data_addr, 'kdp_writephysmem64_req_t *') - + header_value =GetKDPPacketHeaderInt(request=GetEnumValue('kdp_req_t::KDP_WRITEPHYSMEM64'), length=kdp_pkt_size) - + if ( WriteInt64ToMemoryAddress((header_value), int(addressof(pkt.hdr))) and WriteInt64ToMemoryAddress(address, int(addressof(pkt.address))) and WriteInt32ToMemoryAddress((bits/8), int(addressof(pkt.nbytes))) and WriteInt16ToMemoryAddress(xnudefines.lcpu_self, int(addressof(pkt.lcpu))) ): - + if bits == 8: if not WriteInt8ToMemoryAddress(intval, int(addressof(pkt.data))): return False @@ -151,7 +151,7 @@ def KDPWritePhysMEM(address, intval, bits): def WritePhysInt(phys_addr, int_val, bitsize = 64): - """ Write and integer value in a physical memory data based on address. + """ Write and integer value in a physical memory data based on address. params: phys_addr : int - Physical address to read int_val : int - int value to write in memory @@ -170,14 +170,14 @@ def WritePhysInt(phys_addr, int_val, bitsize = 64): @lldb_command('writephys') def WritePhys(cmd_args=None): - """ writes to the specified untranslated address + """ writes to the specified untranslated address The argument is interpreted as a physical address, and the 64-bit word addressed is displayed. usage: writephys
nbits: 8,16,32,64 address: 1234 or 0x1234 value: int value to be written - ex. (lldb)writephys 16 0x12345abcd 0x25 + ex. (lldb)writephys 16 0x12345abcd 0x25 """ if cmd_args == None or len(cmd_args) < 3: print "Invalid arguments.", WritePhys.__doc__ @@ -193,6 +193,7 @@ lldb_alias('writephys16', 'writephys 16 ') lldb_alias('writephys32', 'writephys 32 ') lldb_alias('writephys64', 'writephys 64 ') + def _PT_Step(paddr, index, verbose_level = vSCRIPT): """ Step to lower-level page table and print attributes @@ -245,44 +246,44 @@ def _PT_Step(paddr, index, verbose_level = vSCRIPT): out_string += " writable" else: out_string += " read-only" - + if entry & (0x1 << 2): out_string += " user" else: out_string += " supervisor" - + if entry & (0x1 << 3): out_string += " PWT" - + if entry & (0x1 << 4): out_string += " PCD" - + if entry & (0x1 << 5): out_string += " accessed" - + if entry & (0x1 << 6): out_string += " dirty" - + if entry & (0x1 << 7): out_string += " large" pt_large = True else: pt_large = False - + if entry & (0x1 << 8): out_string += " global" - + if entry & (0x3 << 9): out_string += " avail:{0:x}".format((entry >> 9) & 0x3) - + if entry & (0x1 << 63): out_string += " noexec" print out_string return (pt_paddr, pt_valid, pt_large) - - - - + + + + def _PmapL4Walk(pmap_addr_val,vaddr, verbose_level = vSCRIPT): """ Walk the l4 pmap entry. params: pmap_addr_val - core.value representing kernel data of type pmap_addr_t @@ -326,7 +327,7 @@ def _PmapL4Walk(pmap_addr_val,vaddr, verbose_level = vSCRIPT): if pt_valid: paddr = pt_paddr + pframe_offset paddr_isvalid = True - + if verbose_level > vHUMAN: if paddr_isvalid: pvalue = ReadPhysInt(paddr, 32, xnudefines.lcpu_self) @@ -334,8 +335,8 @@ def _PmapL4Walk(pmap_addr_val,vaddr, verbose_level = vSCRIPT): else: print "no translation" - return - + return paddr + def _PmapWalkARMLevel1Section(tte, vaddr, verbose_level = vSCRIPT): paddr = 0 out_string = "" @@ -344,7 +345,7 @@ def _PmapWalkARMLevel1Section(tte, vaddr, verbose_level = vSCRIPT): paddr = ( (tte & 0xFF000000) | (vaddr & 0x00FFFFFF) ) else: paddr = ( (tte & 0xFFF00000) | (vaddr & 0x000FFFFF) ) - + if verbose_level >= vSCRIPT: out_string += "{0: <#020x}\n\t{1: <#020x}\n\t".format(addressof(tte), tte) #bit [1:0] evaluated in PmapWalkARM @@ -353,7 +354,7 @@ def _PmapWalkARMLevel1Section(tte, vaddr, verbose_level = vSCRIPT): # C bit 3 c_bit = (tte & 0x8) >> 3 #XN bit 4 - if (tte & 0x10) : + if (tte & 0x10) : out_string += "no-execute" else: out_string += "execute" @@ -365,7 +366,7 @@ def _PmapWalkARMLevel1Section(tte, vaddr, verbose_level = vSCRIPT): # AP bit 15 and [11:10] merged to a single 3 bit value access = ( (tte & 0xc00) >> 10 ) | ((tte & 0x8000) >> 13) out_string += xnudefines.arm_level2_access_strings[access] - + #TEX bit [14:12] tex_bits = ((tte & 0x7000) >> 12) #Print TEX, C , B all together @@ -396,15 +397,15 @@ def _PmapWalkARMLevel1Section(tte, vaddr, verbose_level = vSCRIPT): out_string += " no-secure" else: out_string += " secure" - + print out_string return paddr - - - + + + def _PmapWalkARMLevel2(tte, vaddr, verbose_level = vSCRIPT): - """ Pmap walk the level 2 tte. - params: + """ Pmap walk the level 2 tte. + params: tte - value object vaddr - int returns: str - description of the tte + additional informaiton based on verbose_level @@ -446,9 +447,9 @@ def _PmapWalkARMLevel2(tte, vaddr, verbose_level = vSCRIPT): if (pte & 0x3) == 0x1: out_string += " large" # XN bit 15 - if pte & 0x8000 == 0x8000: - out_string+= " no-execute" - else: + if pte & 0x8000 == 0x8000: + out_string+= " no-execute" + else: out_string += " execute" else: out_string += " small" @@ -463,12 +464,12 @@ def _PmapWalkARMLevel2(tte, vaddr, verbose_level = vSCRIPT): # AP bit 9 and [5:4], merged to a single 3-bit value access = (pte & 0x30) >> 4 | (pte & 0x200) >> 7 out_string += xnudefines.arm_level2_access_strings[access] - + #TEX bit [14:12] for large, [8:6] for small tex_bits = ((pte & 0x1c0) >> 6) if (pte & 0x3) == 0x1: - tex_bits = ((pte & 0x7000) >> 12) - + tex_bits = ((pte & 0x7000) >> 12) + # Print TEX, C , B alltogether out_string += " TEX:C:B({:d}{:d}{:d}:{:d}:{:d})".format( 1 if (tex_bits & 0x4) else 0, @@ -482,7 +483,7 @@ def _PmapWalkARMLevel2(tte, vaddr, verbose_level = vSCRIPT): out_string += " shareable" else: out_string += " not-shareable" - + # nG bit 11 if pte & 0x800: out_string += " not-global" @@ -532,16 +533,177 @@ def PmapWalkX86_64(pmapval, vaddr): params: pmapval - core.value representing pmap_t in kernel vaddr: int - int representing virtual address to walk """ - _PmapL4Walk(pmapval.pm_cr3, vaddr, config['verbosity']) + return _PmapL4Walk(pmapval.pm_cr3, vaddr, config['verbosity']) def assert_64bit(val): assert(val < 2**64) +ARM64_TTE_SIZE = 8 +ARM64_VMADDR_BITS = 48 + +def PmapBlockOffsetMaskARM64(level): + assert level >= 1 and level <= 3 + page_size = kern.globals.page_size + ttentries = (page_size / ARM64_TTE_SIZE) + return page_size * (ttentries ** (3 - level)) - 1 + +def PmapBlockBaseMaskARM64(level): + assert level >= 1 and level <= 3 + page_size = kern.globals.page_size + return ((1 << ARM64_VMADDR_BITS) - 1) & ~PmapBlockOffsetMaskARM64(level) + +def PmapIndexMaskARM64(level): + assert level >= 1 and level <= 3 + page_size = kern.globals.page_size + ttentries = (page_size / ARM64_TTE_SIZE) + return page_size * (ttentries ** (3 - level) * (ttentries - 1)) + +def PmapIndexDivideARM64(level): + assert level >= 1 and level <= 3 + page_size = kern.globals.page_size + ttentries = (page_size / ARM64_TTE_SIZE) + return page_size * (ttentries ** (3 - level)) + +def PmapTTnIndexARM64(vaddr, level): + assert(type(vaddr) in (long, int)) + assert_64bit(vaddr) + + return (vaddr & PmapIndexMaskARM64(level)) // PmapIndexDivideARM64(level) + +def PmapDecodeTTEARM64(tte, level): + assert(type(tte) == long) + assert(type(level) == int) + assert_64bit(tte) + + if tte & 0x1 == 0x1: + if (tte & 0x2 == 0x2) and (level != 0x3): + print "Type = Table pointer." + print "Table addr = {:#x}.".format(tte & 0xfffffffff000) + print "PXN = {:#x}.".format((tte >> 59) & 0x1) + print "XN = {:#x}.".format((tte >> 60) & 0x1) + print "AP = {:#x}.".format((tte >> 61) & 0x3) + print "NS = {:#x}".format(tte >> 63) + else: + print "Type = Block." + print "AttrIdx = {:#x}.".format((tte >> 2) & 0x7) + print "NS = {:#x}.".format((tte >> 5) & 0x1) + print "AP = {:#x}.".format((tte >> 6) & 0x3) + print "SH = {:#x}.".format((tte >> 8) & 0x3) + print "AF = {:#x}.".format((tte >> 10) & 0x1) + print "nG = {:#x}.".format((tte >> 11) & 0x1) + print "HINT = {:#x}.".format((tte >> 52) & 0x1) + print "PXN = {:#x}.".format((tte >> 53) & 0x1) + print "XN = {:#x}.".format((tte >> 54) & 0x1) + print "SW Use = {:#x}.".format((tte >> 55) & 0xf) + else: + print "Invalid." + + return + +def PmapWalkARM64(pmap, vaddr, verbose_level = vHUMAN): + assert(type(pmap) == core.cvalue.value) + assert(type(vaddr) in (long, int)) + page_size = kern.globals.page_size + page_offset_mask = (page_size - 1) + page_base_mask = ((1 << ARM64_VMADDR_BITS) - 1) & (~page_offset_mask) + + assert_64bit(vaddr) + paddr = -1 + + tt1_index = PmapTTnIndexARM64(vaddr, 1) + tt2_index = PmapTTnIndexARM64(vaddr, 2) + tt3_index = PmapTTnIndexARM64(vaddr, 3) + + # L1 + tte = long(unsigned(pmap.tte[tt1_index])) + assert(type(tte) == long) + assert_64bit(tte) + + if verbose_level >= vSCRIPT: + print "L1 entry: {:#x}".format(tte) + if verbose_level >= vDETAIL: + PmapDecodeTTEARM64(tte, 1) + + if tte & 0x1 == 0x1: + # Check for L1 block entry + if tte & 0x2 == 0x0: + # Handle L1 block entry + paddr = tte & PmapBlockBaseMaskARM64(1) + paddr = paddr | (vaddr & PmapBlockOffsetMaskARM64(1)) + print "phys: {:#x}".format(paddr) + else: + # Handle L1 table entry + l2_phys = (tte & page_base_mask) + (ARM64_TTE_SIZE * tt2_index) + assert(type(l2_phys) == long) + + l2_virt = kern.PhysToKernelVirt(l2_phys) + assert(type(l2_virt) == long) + + if verbose_level >= vDETAIL: + print "L2 physical address: {:#x}. L2 virtual address: {:#x}".format(l2_phys, l2_virt) + + # L2 + ttep = kern.GetValueFromAddress(l2_virt, "tt_entry_t*") + tte = long(unsigned(dereference(ttep))) + assert(type(tte) == long) + + if verbose_level >= vSCRIPT: + print "L2 entry: {:#0x}".format(tte) + if verbose_level >= vDETAIL: + PmapDecodeTTEARM64(tte, 2) + + if tte & 0x1 == 0x1: + # Check for L2 block entry + if tte & 0x2 == 0x0: + # Handle L2 block entry + paddr = tte & PmapBlockBaseMaskARM64(2) + paddr = paddr | (vaddr & PmapBlockOffsetMaskARM64(2)) + else: + # Handle L2 table entry + l3_phys = (tte & page_base_mask) + (ARM64_TTE_SIZE * tt3_index) + assert(type(l3_phys) == long) + + l3_virt = kern.PhysToKernelVirt(l3_phys) + assert(type(l3_virt) == long) + + if verbose_level >= vDETAIL: + print "L3 physical address: {:#x}. L3 virtual address: {:#x}".format(l3_phys, l3_virt) + + # L3 + ttep = kern.GetValueFromAddress(l3_virt, "tt_entry_t*") + tte = long(unsigned(dereference(ttep))) + assert(type(tte) == long) + + if verbose_level >= vSCRIPT: + print "L3 entry: {:#0x}".format(tte) + if verbose_level >= vDETAIL: + PmapDecodeTTEARM64(tte, 3) + + if tte & 0x3 == 0x3: + paddr = tte & page_base_mask + paddr = paddr | (vaddr & page_offset_mask) + elif verbose_level >= vHUMAN: + print "L3 entry invalid: {:#x}\n".format(tte) + elif verbose_level >= vHUMAN: # tte & 0x1 == 0x1 + print "L2 entry invalid: {:#x}\n".format(tte) + elif verbose_level >= vHUMAN: + print "L1 entry invalid: {:#x}\n".format(tte) + + if verbose_level >= vHUMAN: + if paddr: + print "Translation of {:#x} is {:#x}.".format(vaddr, paddr) + else: + print "(no translation)" + + return paddr + def PmapWalk(pmap, vaddr, verbose_level = vHUMAN): if kern.arch == 'x86_64': return PmapWalkX86_64(pmap, vaddr) elif kern.arch == 'arm': return PmapWalkARM(pmap, vaddr, verbose_level) + elif kern.arch == 'arm64': + return PmapWalkARM64(pmap, vaddr, verbose_level) else: raise NotImplementedError("PmapWalk does not support {0}".format(kern.arch)) diff --git a/tools/lldbmacros/process.py b/tools/lldbmacros/process.py index 11f9ef452..965049dd6 100644 --- a/tools/lldbmacros/process.py +++ b/tools/lldbmacros/process.py @@ -9,6 +9,34 @@ from utils import * from core.lazytarget import * import xnudefines +def GetProcNameForTask(task): + """ returns a string name of the process. if proc is not valid "unknown" is returned + params: + task: value object represeting a task in the kernel. + returns: + str : A string name of the process linked to the task + """ + if not task or not unsigned(task.bsd_info): + return "unknown" + p = Cast(task.bsd_info, 'proc *') + return str(p.p_comm) + +def GetProcPIDForTask(task): + """ returns a int pid of the process. if the proc is not valid, val[5] from audit_token is returned. + params: + task: value object representing a task in the kernel + returns: + int : pid of the process or -1 if not found + """ + if task and unsigned(task.bsd_info): + p = Cast(task.bsd_info, 'proc *') + return unsigned(p.p_pid) + + if task : + return unsigned(task.audit_token.val[5]) + + return -1 + def GetProcInfo(proc): """ returns a string name, pid, parent and task for a proc_t. Decodes cred, flag and p_stat fields. params: @@ -92,19 +120,77 @@ def ZombProc(cmd_args=None): params: cmd_args - [] : array of strings passed from lldb command prompt """ - for proc in kern.zombprocs: - print GetProcInfo(proc) + if len(kern.zombprocs) != 0: + print "\nZombie Processes:" + for proc in kern.zombprocs: + print GetProcInfo(proc) + "\n\n" + +@lldb_command('zombtasks') +def ZombTasks(cmd_args=None): + """ Routine to print out all tasks in the zombie list + params: None + """ + out_str = "" + if len(kern.zombprocs) != 0: + header = "\nZombie Tasks:\n" + header += GetTaskSummary.header + " " + GetProcSummary.header + for proc in kern.zombprocs: + if proc.p_stat != 5: + t = Cast(proc.task, 'task *') + out_str += GetTaskSummary(t) +" "+ GetProcSummary(proc) + "\n" + if out_str != "": + print header + print out_str @lldb_command('zombstacks') def ZombStacks(cmd_args=None): """ Routine to print out all stacks of tasks that are exiting """ + header_flag = 0 for proc in kern.zombprocs: if proc.p_stat != 5: + if header_flag == 0: + print "\nZombie Stacks:" + header_flag = 1 t = Cast(proc.task, 'task *') ShowTaskStacks(t) #End of Zombstacks +def GetASTSummary(ast): + """ Summarizes an AST field + Flags: + P - AST_PREEMPT + Q - AST_QUANTUM + U - AST_URGENT + H - AST_HANDOFF + Y - AST_YIELD + A - AST_APC + L - AST_LEDGER + B - AST_BSD + K - AST_KPERF + M - AST_MACF + C - AST_CHUD + C - AST_CHUD_URGENT + G - AST_GUARD + T - AST_TELEMETRY_USER + T - AST_TELEMETRY_KERNEL + T - AST_TELEMETRY_WINDOWED + S - AST_SFI + """ + out_string = "" + state = int(ast) + thread_state_chars = {0x0:'', 0x1:'P', 0x2:'Q', 0x4:'U', 0x8:'H', 0x10:'Y', 0x20:'A', + 0x40:'L', 0x80:'B', 0x100:'K', 0x200:'M', 0x400:'C', 0x800:'C', + 0x1000:'G', 0x2000:'T', 0x4000:'T', 0x8000:'T', 0x10000:'S'} + state_str = '' + mask = 0x1 + while mask <= 0x10000 : + state_str += thread_state_chars[int(state & mask)] + mask = mask << 1 + + return state_str + + @lldb_type_summary(['task', 'task_t']) @header("{0: <20s} {1: <20s} {2: <20s} {3: >5s} {4: <5s}".format("task","vm_map", "ipc_space", "#acts", "flags")) def GetTaskSummary(task): @@ -120,31 +206,65 @@ def GetTaskSummary(task): task_flags += 'P' if hasattr(task, "suspend_count") and int(task.suspend_count) > 0: task_flags += 'S' - if hasattr(task, "imp_receiver") and int(task.imp_receiver) == 1: - task_flags += 'R' - if hasattr(task, "imp_donor") and int(task.imp_donor) == 1: - task_flags += 'D' - if hasattr(task, "task_imp_assertcnt") and int(task.task_imp_assertcnt) > 0: - task_flags += 'B' + if hasattr(task, 'task_imp_base') and unsigned(task.task_imp_base): + tib = task.task_imp_base + if int(tib.iit_receiver) == 1: + task_flags += 'R' + if int(tib.iit_donor) == 1: + task_flags += 'D' + if int(tib.iit_assertcnt) > 0: + task_flags += 'B' out_string += format_string.format(task, task.map, task.itk_space, thread_count, task_flags) return out_string @lldb_type_summary(['thread *', 'thread_t']) -@header("{0: <24s} {1: <10s} {2: <20s} {3: <6s} {4: <10s} {5: <5s} {6: <20s} {7: <45s} {8: <20s} {9: <20s}".format('thread', 'thread_id', 'processor', 'pri', 'io_policy', 'state', 'wait_queue', 'wait_event', 'wmesg', 'thread_name')) +@header("{0: <24s} {1: <10s} {2: <20s} {3: <6s} {4: <6s} {5: <15s} {6: <15s} {7: <8s} {8: <12s} {9: <32s} {10: <20s} {11: <20s} {12: <20s}".format('thread', 'thread_id', 'processor', 'base', 'pri', 'sched_mode', 'io_policy', 'state', 'ast', 'wait_queue', 'wait_event', 'wmesg', 'thread_name')) def GetThreadSummary(thread): """ Summarize the thread structure. It decodes the wait state and waitevents from the data in the struct. params: thread: value - value objecte representing a thread in kernel returns: str - summary of a thread + + State flags: + W - WAIT + S - SUSP + R - RUN + U - Uninterruptible + H - Terminated + A - Terminated and on termination queue + I - Idle thread + + policy flags: + B - darwinbg + L - lowpri cpu + T - IO throttle + P - IO passive + D - Terminated """ out_string = "" - format_string = "{0: <24s} {1: <10s} {2: <20s} {3: <6s} {4: <10s} {5: <5s} {6: <20s} {7: <45s} {8: <20s} {9: <20s}" + format_string = "{0: <24s} {1: <10s} {2: <20s} {3: <6s} {4: <6s} {5: <15s} {6: <15s} {7: <8s} {8: <12s} {9: <32s} {10: <20s} {11: <20s} {12: <20s}" thread_ptr_str = str("{0: <#020x}".format(thread)) if int(thread.static_param) : thread_ptr_str+="[WQ]" thread_id = hex(thread.thread_id) thread_name = '' processor = hex(thread.last_processor) + base_priority = str(int(thread.priority)) sched_priority = str(int(thread.sched_pri)) + sched_mode = '' + mode = str(thread.sched_mode) + if "TIMESHARE" in mode: + sched_mode+="timeshare" + elif "FIXED" in mode: + sched_mode+="fixed" + elif "REALTIME" in mode: + sched_mode+="realtime" + + if (unsigned(thread.bound_processor) != 0): + sched_mode+=" bound" + + # TH_SFLAG_THROTTLED + if (unsigned(thread.sched_flags) & 0x0004): + sched_mode+=" BG" io_policy_str = "" if int(thread.uthread) != 0: @@ -176,16 +296,15 @@ def GetThreadSummary(thread): io_policy_str += "D" state = int(thread.state) - thread_state_chars = {0:'', 1:'W', 2:'S', 4:'R', 8:'U', 16:'H', 32:'A', 64:'P', 128:'I'} + thread_state_chars = {0x0:'', 0x1:'W', 0x2:'S', 0x4:'R', 0x8:'U', 0x10:'H', 0x20:'A', 0x40:'P', 0x80:'I'} state_str = '' - state_str += thread_state_chars[int(state & 0x1)] - state_str += thread_state_chars[int(state & 0x2)] - state_str += thread_state_chars[int(state & 0x4)] - state_str += thread_state_chars[int(state & 0x8)] - state_str += thread_state_chars[int(state & 0x10)] - state_str += thread_state_chars[int(state & 0x20)] - state_str += thread_state_chars[int(state & 0x40)] - state_str += thread_state_chars[int(state & 0x80)] + mask = 0x1 + while mask <= 0x80 : + state_str += thread_state_chars[int(state & mask)] + mask = mask << 1 + + ast = int(thread.ast) | int(thread.reason) + ast_str = GetASTSummary(ast) #wait queue information wait_queue_str = '' @@ -203,12 +322,24 @@ def GetThreadSummary(thread): if int(uthread.uu_wmesg) != 0: wait_message = str(Cast(uthread.uu_wmesg, 'char *')) - out_string += format_string.format(thread_ptr_str, thread_id, processor, sched_priority, io_policy_str, state_str, wait_queue_str, wait_event_str, wait_message, thread_name ) + out_string += format_string.format(thread_ptr_str, thread_id, processor, base_priority, sched_priority, sched_mode, io_policy_str, state_str, ast_str, wait_queue_str, wait_event_str, wait_message, thread_name) return out_string +@lldb_type_summary(['coalition_t', 'coalition']) +@header("type coalition summary (header tbw)") +def GetCoalitionSummary(coal): + out_string = "" + format_string = '{0: <#020x} {1: 6s} {1: ^20s} {2: >14s} {3: ^10s} {4: <20s}".format("pid", "process", "io_policy", "wq_state", "command")) def GetProcSummary(proc): """ Summarize the process data. @@ -274,6 +405,51 @@ def GetProcSummary(proc): out_string += format_string.format(pid, proc_addr, " ".join([proc_rage_str, io_policy_str]), wq_num_threads, wq_idle_threads, wq_req_threads, process_name) return out_string +@lldb_type_summary(['tty_dev_t', 'tty_dev_t *']) +@header("{0: <20s} {1: <10s} {2: <10s} {3: <15s} {4: <15s} {5: <15s} {6: <15s}".format("tty_dev","master", "slave", "open", "free", "name", "revoke")) +def GetTTYDevSummary(tty_dev): + """ Summarizes the important fields in tty_dev_t structure. + params: tty_dev: value - value object representing a tty_dev_t in kernel + returns: str - summary of the tty_dev + """ + out_string = "" + format_string = "{0: <#020x} {1: <#010x} {2: <#010x} {3: <15s} {4: <15s} {5: <15s} {6: <15s}" + open_fn = kern.Symbolicate(int(hex(tty_dev.open), 16)) + free_fn = kern.Symbolicate(int(hex(tty_dev.free), 16)) + name_fn = kern.Symbolicate(int(hex(tty_dev.name), 16)) + revoke_fn = kern.Symbolicate(int(hex(tty_dev.revoke), 16)) + out_string += format_string.format(tty_dev, tty_dev.master, tty_dev.slave, open_fn, free_fn, name_fn, revoke_fn) + return out_string + +@lldb_type_summary(['kqueue *']) +@header("{: <20s} {: <20s} {: <6s} {: <20s} {: <10s}".format('kqueue', 'process', '#events', 'wqs', 'state')) +def GetKQueueSummary(kq): + """ summarizes kqueue information + returns: str - summary of kqueue + """ + out_string = "" + format_string = "{o: <#020x} {o.kq_p: <#020x} {o.kq_count: <6d} {o.kq_wqs: <#020x} {st_str: <10s}" + state = int(kq.kq_state) + state_str = '' + mask = 0x1 + while mask <= 0x80 : + if int(state & mask): + state_str += ' ' + xnudefines.kq_state_strings[int(state & mask)] + mask = mask << 1 + out_string += format_string.format(o=kq, st_str=state_str) + return out_string + +@lldb_type_summary(['knote *']) +@header("{0: <20s}".format('knote')) +def GetKnoteSummary(kn): + """ Summarizes a knote and related information + returns: str - summary of knote + """ + out_string = "" + format_string = "{o: <#020x}" + out_string += format_string.format(o=kn) + return out_string + # Macro: showtask @lldb_command('showtask', 'F:') @@ -418,6 +594,21 @@ def ShowProcFiles(cmd_args=None): #EndMacro: showprocfiles +#Macro: showkqueue +@lldb_command('showkqueue' ,'') +def ShowKQueue(cmd_args=[], cmd_options={}): + """ Given a struct kqueue pointer, display the summary of the kqueue + Usage: (lldb) showkqueue + """ + if not cmd_args: + raise ArgumentError('Invalid arguments') + + kq = kern.GetValueFromAddress(cmd_args[0], 'struct kqueue *') + print GetKQueueSummary.header + print GetKQueueSummary(kq) + +#EndMacro: showkqueue + #Macro: showtty @lldb_command('showtty') @@ -488,6 +679,24 @@ def ShowTTY(cmd_args=None): #EndMacro: showtty +#Macro showallttydevs + +@lldb_command('showallttydevs') +def ShowAllTTYDevs(cmd_args=[], cmd_options={}): + """ Show a list of ttydevs registered in the system. + Usage: + (lldb)showallttydevs + """ + tty_dev_head = kern.globals.tty_dev_head + tty_dev = tty_dev_head + print GetTTYDevSummary.header + while unsigned(tty_dev) != 0: + print GetTTYDevSummary(tty_dev) + tty_dev = tty_dev.next + return "" + +#EndMacro: showallttydevs + #Macro: dumpcallqueue @lldb_command('dumpcallqueue') @@ -510,6 +719,15 @@ def DumpCallQueue(cmd_args=None): #EndMacro: dumpcallqueue +@lldb_command('showallcoalitions') +def ShowAllCoalitions(cmd_args=None): + """ Routine to print a summary listing of all the coalitions + """ + global kern + print GetCoalitionSummary.header + for c in kern.coalitions: + print GetCoalitionSummary(c) + @lldb_command('showalltasks') def ShowAllTasks(cmd_args=None): """ Routine to print a summary listing of all the tasks @@ -525,6 +743,7 @@ def ShowAllTasks(cmd_args=None): for t in kern.tasks: pval = Cast(t.bsd_info, 'proc *') print GetTaskSummary(t) +" "+ GetProcSummary(pval) + ZombTasks() @lldb_command('showterminatedtasks') def ShowTerminatedTasks(cmd_args=None): @@ -611,6 +830,12 @@ def ShowAllThreads(cmd_args = None): for t in kern.tasks: ShowTaskThreads([str(int(t))]) print " \n" + + for t in kern.terminated_tasks: + print "Terminated: \n" + ShowTaskThreads([str(int(t))]) + print " \n" + return @lldb_command('showtaskthreads', "F:") @@ -693,7 +918,8 @@ def ShowAllStacks(cmd_args=None): """ for t in kern.tasks: ShowTaskStacks(t) - print " \n" + print " \n" + ZombStacks() return # EndMacro: showallstacks @@ -706,7 +932,7 @@ def ShowCurrentStacks(cmd_args=None): processor_list = kern.GetGlobalVariable('processor_list') current_processor = processor_list while unsigned(current_processor) > 0: - print "\nProcessor {: <#020x} State {: #04x})".format(current_processor, int(current_processor.state), int(current_processor.cpu_id)) + print "\n" + GetProcessorSummary(current_processor) active_thread = current_processor.active_thread if unsigned(active_thread) != 0 : task_val = active_thread.task @@ -727,7 +953,7 @@ def ShowCurrentThreads(cmd_args=None): processor_list = kern.GetGlobalVariable('processor_list') current_processor = processor_list while unsigned(current_processor) > 0: - print "Processor {: <#020x} State {: #04x})".format(current_processor, int(current_processor.state), int(current_processor.cpu_id)) + print GetProcessorSummary(current_processor) active_thread = current_processor.active_thread if unsigned(active_thread) != 0 : task_val = active_thread.task @@ -770,13 +996,37 @@ def GetFullBackTrace(frame_addr, verbosity = vHUMAN, prefix = ""): def FullBackTrace(cmd_args=[]): """ Show full backtrace across the interrupt boundary. Syntax: fullbt - Example: kfullbt `$rbp` + Example: fullbt `$rbp` """ if len(cmd_args) < 1: print FullBackTrace.__doc__ return False print GetFullBackTrace(ArgumentStringToInt(cmd_args[0]), prefix="\t") +@lldb_command('fullbtall') +def FullBackTraceAll(cmd_args=[]): + """ Show full backtrace across the interrupt boundary for threads running on all processors. + Syntax: fullbtall + Example: fullbtall + """ + for processor in IterateLinkedList(kern.globals.processor_list, 'processor_list') : + print "\n" + GetProcessorSummary(processor) + active_thread = processor.active_thread + if unsigned(active_thread) != 0 : + task_val = active_thread.task + proc_val = Cast(task_val.bsd_info, 'proc *') + print GetTaskSummary.header + " " + GetProcSummary.header + print GetTaskSummary(task_val) + " " + GetProcSummary(proc_val) + print "\t" + GetThreadSummary.header + print "\t" + GetThreadSummary(active_thread) + print "\tBacktrace:" + + ThreadVal = GetLLDBThreadForKernelThread(active_thread) + + FramePtr = ThreadVal.frames[0].GetFP() + + print GetFullBackTrace(unsigned(FramePtr), prefix="\t") + @lldb_command('symbolicate') def SymbolicateAddress(cmd_args=[]): @@ -866,42 +1116,93 @@ def GetProcessorSummary(processor): params: processor - value representing struct processor * return: str - representing the details of given processor """ - out_str = "Processor {: <#012x} ".format(processor) - out_str += "State {:d} (cpu_id {:#x})\n".format(processor.state, processor.cpu_id) + + processor_state_str = "INVALID" + processor_state = int(processor.state) + + processor_states = { + 0: 'OFF_LINE', + 1: 'SHUTDOWN', + 2: 'START', + # 3 (formerly INACTIVE) + 4: 'IDLE', + 5: 'DISPATCHING', + 6: 'RUNNING' + } + + if processor_state in processor_states: + processor_state_str = "{0: <11s} ".format(processor_states[processor_state]) + + out_str = "Processor {: <#018x} cpu_id {:>#4x} State {: + """ + out_str = '' + runq = kern.GetValueFromAddress(cmd_args[0], 'struct run_queue *') + out_str += GetRunQSummary(runq) + print out_str + def GetRunQSummary(runq): """ Internal function to print summary of run_queue params: runq - value representing struct run_queue * return: str - representing the details of given run_queue """ - out_str = " Priority Run Queue Info: Count {: <10d}\n".format(runq.count) + out_str = " runq: count {: <10d} highq: {: <10d} urgency {: <10d}\n".format(runq.count, runq.highq, runq.urgency) + runq_queue_i = 0 runq_queue_count = sizeof(runq.queues)/sizeof(runq.queues[0]) - while runq.count and (runq_queue_i < runq_queue_count): + + for runq_queue_i in range(runq_queue_count) : runq_queue_head = addressof(runq.queues[runq_queue_i]) runq_queue_p = runq_queue_head.next + if unsigned(runq_queue_p) != unsigned(runq_queue_head): runq_queue_this_count = 0 - while runq_queue_p != runq_queue_head: - runq_queue_this_count = runq_queue_this_count + 1 - runq_queue_p_thread = Cast(runq_queue_p, 'thread_t') - # Get the task information - out_str += GetTaskSummary.header + " " + GetProcSummary.header - pval = Cast(runq_queue_p_thread.task.bsd_info, 'proc *') - out_str += GetTaskSummary(runq_queue_p_thread.task) +" "+ GetProcSummary(pval) - # Get the thread information with related stack traces - out_str += GetThreadSummary.header + GetThreadSummary(runq_queue_p_thread) - out_str += GetThreadBackTrace(LazyTarget.GetProcess().GetThreadByID(int(runq_queue_p_thread.thread_id)), - prefix="\t") - runq_queue_p = runq_queue_p.next - - out_str += " Queue Priority {: <3d} [{: <#012x}] Count {:d}\n".format(runq_queue_i, - runq_queue_head, runq_queue_this_count) - - runq_queue_i = runq_queue_i + 1 + + for thread in IterateQueue(runq_queue_head, "thread_t", "links"): + runq_queue_this_count += 1 + + out_str += " Queue [{: <#012x}] Priority {: <3d} count {:d}\n".format(runq_queue_head, runq_queue_i, runq_queue_this_count) + out_str += "\t" + GetThreadSummary.header + "\n" + for thread in IterateQueue(runq_queue_head, "thread_t", "links"): + out_str += "\t" + GetThreadSummary(thread) + "\n" + if config['verbosity'] > vHUMAN : + out_str += "\t" + GetThreadBackTrace(thread, prefix="\t\t") + "\n" return out_str + def GetGrrrSummary(grrr_runq): """ Internal function to print summary of grrr_run_queue params: grrr_runq - value representing struct grrr_run_queue * @@ -911,26 +1212,17 @@ def GetGrrrSummary(grrr_runq): grrr_runq.weight, grrr_runq.current_group) grrr_group_i = 0 grrr_group_count = sizeof(grrr_runq.groups)/sizeof(grrr_runq.groups[0]) - while grrr_runq.count and (grrr_group_i < grrr_group_count): + for grrr_group_i in range(grrr_group_count) : grrr_group = addressof(grrr_runq.groups[grrr_group_i]) - runq_queue_p = runq_queue_head.next if grrr_group.count > 0: out_str += " Group {: <3d} [{: <#012x}] ".format(grrr_group.index, grrr_group) out_str += "Count {:d} Weight {:d}\n".format(grrr_group.count, grrr_group.weight) grrr_group_client_head = addressof(grrr_group.clients) - grrr_group_client = grrr_group_client_head.next - while grrr_group_client != grrr_group_client_head: - grrr_group_client_thread = Cast(grrr_group_client, 'thread_t') - # Get the task information - out_str += GetTaskSummary.header + " " + GetProcSummary.header - pval = Cast(grrr_group_client_thread.task.bsd_info, 'proc *') - out_str += GetTaskSummary(grrr_group_client_thread.task) +" "+ GetProcSummary(pval) - # Get the thread information with related stack traces - out_str += GetThreadSummary.header + GetThreadSummary(grrr_group_client_thread) - out_str += GetThreadBackTrace(LazyTarget.GetProcess().GetThreadByID(int(grrr_group_client_thread.thread_id)), - prefix="\t") - grrr_group_client = grrr_group_client.next - grrr_group_i = grrr_group_i + 1 + out_str += GetThreadSummary.header + for thread in IterateQueue(grrr_group_client_head, "thread_t", "links"): + out_str += "\t" + GetThreadSummary(thread) + "\n" + if config['verbosity'] > vHUMAN : + out_str += "\t" + GetThreadBackTrace(thread, prefix="\t\t") + "\n" return out_str @lldb_command('showallprocessors') @@ -942,6 +1234,7 @@ def ShowAllProcessors(cmd_args=None): show_grrr = 0 show_priority_runq = 0 show_priority_pset_runq = 0 + show_group_pset_runq = 0 show_fairshare_grrr = 0 show_fairshare_list = 0 sched_enum_val = kern.globals._sched_enum @@ -957,81 +1250,86 @@ def ShowAllProcessors(cmd_args=None): show_fairshare_grrr = 1 elif sched_enum_val == 5: show_priority_runq = 1 + show_group_pset_runq = 1 show_fairshare_list = 1 elif sched_enum_val == 6: - show_priority_pset_runq = 1 + show_priority_pset_runq = 1 + show_priority_runq = 1 show_fairshare_list = 1 out_str = '' - while pset: + + out_str += "Scheduler: {:s} ({:s}, {:d})\n".format(kern.globals.sched_string, + kern.Symbolicate(unsigned(kern.globals.sched_current_dispatch)), + sched_enum_val) + + out_str += "Runnable threads: {:d} Timeshare threads: {:d} Background threads {:d}\n".format( + kern.globals.sched_run_count, kern.globals.sched_share_count, kern.globals.sched_background_count) + + if show_group_pset_runq: + # Create a group->task mapping + task_map = {} + for task in kern.tasks: + task_map[unsigned(task.sched_group)] = task + for task in kern.terminated_tasks: + task_map[unsigned(task.sched_group)] = task + + while unsigned(pset) != 0: out_str += "Processor Set {: <#012x} Count {:d} (cpu_id {:<#x}-{:<#x})\n".format(pset, pset.cpu_set_count, pset.cpu_set_low, pset.cpu_set_hi) + + if show_priority_pset_runq: + runq = pset.pset_runq + out_str += GetRunQSummary(runq) + + if show_group_pset_runq: + out_str += "Main Runq:\n" + runq = pset.pset_runq + out_str += GetGroupSetSummary(runq, task_map) + out_str += "All Groups:\n" + # TODO: Possibly output task header for each group + for group in IterateQueue(kern.globals.sched_groups, "sched_group_t", "sched_groups"): + if (group.runq.count != 0) : + task = task_map.get(unsigned(group), "Unknown task!") + out_str += "Group {: <#012x} Task {: <#012x}\n".format(unsigned(group), unsigned(task)) + out_str += GetRunQSummary(group.runq) + out_str += " Active Processors:\n" - active_queue_head = addressof(pset.active_queue) - active_elt = active_queue_head.next - while active_elt != active_queue_head: - processor = Cast(active_elt, 'processor *') + for processor in IterateQueue(pset.active_queue, "processor_t", "processor_queue"): out_str += " " out_str += GetProcessorSummary(processor) if show_priority_runq: - runq = addressof(processor.runq) + runq = processor.runq out_str += GetRunQSummary(runq) if show_grrr: - grrr_runq = addressof(processor.grrr_runq) + grrr_runq = processor.grrr_runq out_str += GetGrrrSummary(grrr_runq) - - if processor.processor_meta and (processor.processor_meta.primary == - processor): - processor_meta_idle_head = addressof(processor.processor_meta.idle_queue) - processor_meta_idle = processor_meta_idle_head.next - while processor_meta_idle != processor_meta_idle_head: - out_str += " Idle Meta Processor: " - out_str += GetProcessorSummary(processor_meta_idle) - processor_meta_idle = processor_meta_idle.next - active_elt = active_elt.next out_str += " Idle Processors:\n" - idle_queue_head = addressof(pset.idle_queue) - idle_elt = idle_queue_head.next - while idle_elt != idle_queue_head: - processor = Cast(idle_elt, 'processor *') - out_str += " " - out_str += GetProcessorSummary(processor) - - if processor.processor_meta and (processor.processor_meta.primary == - processor): - processor_meta_idle_head = addressof(processor.processor_meta.idle_queue) - processor_meta_idle = processor_meta_idle_head.next - while processor_meta_idle != processor_meta_idle_head: - out_str += " Idle Meta Processor: " - out_str += GetProcessorSummary(processor_meta_idle) - processor_meta_idle = processor_meta_idle.next - idle_elt = idle_elt.next - - if show_priority_pset_runq: - runq = addressof(pset.pset_runq) - out_str += "\n" + GetRunQSummary(runq) + for processor in IterateQueue(pset.idle_queue, "processor_t", "processor_queue"): + out_str += " " + GetProcessorSummary(processor) + if show_priority_runq: + out_str += GetRunQSummary(processor.runq) + + out_str += " Idle Secondary Processors:\n" + for processor in IterateQueue(pset.idle_secondary_queue, "processor_t", "processor_queue"): + out_str += " " + GetProcessorSummary(processor) + if show_priority_runq: + out_str += GetRunQSummary(processor.runq) + pset = pset.pset_list out_str += "\nRealtime Queue Count {:d}\n".format(kern.globals.rt_runq.count) - rt_runq_head = addressof(kern.globals.rt_runq.queue) - rt_runq_local = rt_runq_head.next - while rt_runq_local != rt_runq_head: - rt_runq_thread = Cast(rt_runq_local, 'thread *') + for rt_runq_thread in IterateQueue(kern.globals.rt_runq.queue, "thread_t", "links"): out_str += ShowTask([unsigned(rt_runq_thread.task)]) out_str += ShowAct([unsigned(rt_runq_thread)]) - rt_runq_local = rt_runq_local.next out_str += "\n" if show_fairshare_list: out_str += "Fair Share Queue Count {:d}\n".format(kern.globals.fs_runq.count) - fs_runq_head = addressof(kern.globals.fs_runq.queue) - fs_runq_local = fs_runq_head.next - while fs_runq_local != fs_runq_head: - fs_runq_thread = Cast(fs_runq, 'thread *') + for fs_runq_thread in IterateQueue(kern.globals.fs_runq.queue, "thread_t", "links"): out_str += ShowTask([unsigned(fs_runq_thread.task)]) out_str += ShowAct([unsigned(rt_runq_thread)]) - fs_runq_local = fs_runq_local.next if show_fairshare_grrr: out_str += "Fair Share Queue Count {:d}\n".format(kern.globals.fs_grrr_runq.count) fs_grrr = addressof(kern.globals.fs_grrr_runq) @@ -1051,18 +1349,18 @@ def GetLedgerEntrySummary(ledger_template, ledger, i): lf_tracking_max = 0x4000 out_str = '' - now = kern.globals.sched_tick / 20 + now = unsigned(kern.globals.sched_tick) / 20 lim_pct = 0 - out_str += "{: >25s} {: unsigned(ledger.le_limit)): + if ((unsigned(ledger.le_credit) - unsigned(ledger.le_debit)) > unsigned(ledger.le_limit)): out_str += " X " else: out_str += " " @@ -1106,7 +1404,7 @@ def GetThreadLedgerSummary(thread_val): i = i + 1 return out_str -@header("{0: <15s} {1: >9s} {2: <2s} {3: >12s} {4: >9s} {5: >6s} {6: >8s} {7: <10s} {8: <9s} \ +@header("{0: <15s} {1: >16s} {2: <2s} {3: >15s} {4: >9s} {5: >6s} {6: >8s} {7: <10s} {8: <9s} \ {9: <12s} {10: <7s} {11: <15s} {12: <8s} {13: <9s} {14: <6s} {15: >6s}".format( "task [thread]", "entry", "#", "balance", "peakA", "(age)", "peakB", "(age)", "credit", "debit", "limit", "refill period", "lim pct", "warn pct", "over?", "flags")) @@ -1237,7 +1535,6 @@ def ShowAllTaskPolicy(cmd_args=None): ["t_int_gpu_deny", "gpudeny-int"], ["t_ext_gpu_deny", "gpudeny-ext"], ["t_role", "role"], - ["t_visibility", "vis"], ["t_tal_enabled", "tal-enabled"], ["t_base_latency_qos", "latency-base"], ["t_over_latency_qos", "latency-override"], @@ -1258,7 +1555,8 @@ def ShowAllTaskPolicy(cmd_args=None): ["t_sup_timer", "timer-throttling"], ["t_sup_disk", "disk-throttling"], ["t_sup_cpu_limit", "cpu-limits"], - ["t_sup_suspend", "suspend"] + ["t_sup_suspend", "suspend"], + ["t_sup_bg_sockets", "bg-sockets"] ] suppression="" @@ -1284,8 +1582,7 @@ def ShowAllTaskPolicy(cmd_args=None): ["t_latency_qos", "latency-qos"], ["t_through_qos", "throughput-qos"], ["t_sup_active", "suppression-active"], - ["t_role", "role"], - ["t_visibility", "vis"] + ["t_role", "role"] ] effective="" @@ -1316,5 +1613,51 @@ def ShowAllTaskPolicy(cmd_args=None): print "pended: " + pended +@lldb_type_summary(['wait_queue', 'wait_queue_t']) +@header("{: <20s} {: <20s} {: <15s} {:<5s} {:<5s} {: <20s}".format("waitq", "interlock", "policy", "members", "threads", "eventmask")) +def GetWaitQSummary(waitq): + """ Summarizes the important fields in task structure. + params: task: value - value object representing a task in kernel + returns: str - summary of the task + """ + out_string = "" + format_string = '{: <#020x} {: <#020x} {: <15s} {: <5d} {: <5d} {: <#020x}' + + wqtype = "" + + if (waitq.wq_fifo == 1) : + wqtype += "FIFO" + else : + wqtype += "PRIO" + + if (waitq.wq_prepost == 1) : + wqtype += "Prepost" + + if (waitq.wq_type == 0x3) : + wqtype += "Set" + elif (waitq.wq_type == 0x2) : + wqtype += "Queue" + else : + wqtype += "INVALID" + + out_string += format_string.format(waitq, unsigned(waitq.wq_interlock.lock_data), policy, 0, 0, unsigned(waitq.wq_eventmask)) + + out_string += "\n" + GetThreadSummary.header + + for thread in IterateQueue(waitq.wq_queue, "thread_t", "links"): + out_string += "\n" + GetThreadSummary(thread) + + return out_string + + +@lldb_command('showallsuspendedtasks', '') +def ShowSuspendedTasks(cmd_args=[], options={}): + """ Show a list of suspended tasks with their process name summary. + """ + print GetTaskSummary.header + ' ' + GetProcSummary.header + for t in kern.tasks: + if t.suspend_count > 0: + print GetTaskSummary(t) + ' ' + GetProcSummary(Cast(t.bsd_info, 'proc *')) + return True diff --git a/tools/lldbmacros/scheduler.py b/tools/lldbmacros/scheduler.py index cf6baa3a6..453708083 100644 --- a/tools/lldbmacros/scheduler.py +++ b/tools/lldbmacros/scheduler.py @@ -20,6 +20,12 @@ def ShowInterrupts(cmd_args=None): print "CPU 1 IRQ: {:d}\n".format(cpu_data_entry.cpu_stat.irq_ex_cnt) print "CPU 1 IPI: {:d}\n".format(cpu_data_entry.cpu_stat.ipi_cnt) print "CPU 1 TMR: {:d}\n".format(cpu_data_entry.cpu_stat.timer_cnt) + elif kern.arch == 'arm64': + cdentries = kern.GetValueFromAddress(kern.GetLoadAddressForSymbol('CpuDataEntries') + 24, 'uintptr_t *') + cpu_data_entry = Cast(dereference(cdentries), 'cpu_data_t *') + print "CPU 1 IRQ: {:d}\n".format(cpu_data_entry.cpu_stat.irq_ex_cnt) + print "CPU 1 IPI: {:d}\n".format(cpu_data_entry.cpu_stat.ipi_cnt) + print "CPU 1 TMR: {:d}\n".format(cpu_data_entry.cpu_stat.timer_cnt) # EndMacro: showinterrupts diff --git a/tools/lldbmacros/structanalyze.py b/tools/lldbmacros/structanalyze.py new file mode 100644 index 000000000..f4c21553d --- /dev/null +++ b/tools/lldbmacros/structanalyze.py @@ -0,0 +1,73 @@ +import lldb +from xnu import * + +def _showStructPacking(symbol, prefix, begin_offset=0): + """ + recursively parse the field members of structure. + params : symbol (lldb.SBType) reference to symbol in binary + prefix (string) string to be prefixed for each line of output. Useful for recursive struct parsing. + returns: string containing lines of output. + """ + ctype = "unknown type" + if symbol.GetTypeClass() == lldb.eTypeClassUnion : + ctype = "union" + if symbol.GetTypeClass() == lldb.eTypeClassStruct : + ctype = "struct" + outstr = "[%4d] (%s) %s { " % (symbol.GetByteSize(), ctype, symbol.GetName()) + "\n" + numFields = symbol.GetNumberOfFields() + _has_memory_hole = False + _compact_size = 0 # asuming the struct is perfectly packed + _compact_offset = begin_offset + _previous_bit_offset = 0 + for i in range(numFields): + member = symbol.GetFieldAtIndex(i) + m_offset = member.GetOffsetInBytes() + begin_offset + m_offset_bits = member.GetOffsetInBits() + m_type = member.GetType() + m_name = member.GetName() + m_size = m_type.GetByteSize() + warningstr = "" + debugstr = "" # + str((m_size, m_offset , m_offset_bits, _previous_bit_offset, _compact_offset, begin_offset)) + if _compact_offset != m_offset and (m_offset_bits - _previous_bit_offset) > m_size*8 : + _has_memory_hole = True + warningstr = " *** Possible memory hole ***" + _compact_offset = m_offset + _compact_offset += m_size + if m_type.GetTypeClass() == lldb.eTypeClassStruct or m_type.GetTypeClass() == lldb.eTypeClassUnion : + outstr += prefix + ("*%4d," % m_offset) + _showStructPacking(m_type, prefix+" ", m_offset) + warningstr + debugstr + "\n" + else: + outstr += prefix + ("+%4d,[%4d] (%s) %s" % (m_offset, m_size, m_type.GetName(), m_name)) + warningstr + debugstr + "\n" + if i > 0 : + _previous_bit_offset = m_offset_bits + outstr += prefix + "}" + if _has_memory_hole == True : + outstr += " *** Warning: Struct layout leaves memory hole *** " + return outstr + +@lldb_command('showstructpacking') +def showStructInfo(cmd_args=None): + """Show how a structure is packed in the binary. The format is + +, [] () + For example: + (lldb) script lldbmacros.showStructInfo("pollfd") + [ 8] (struct) pollfd { + + 0,[ 4] (int) fd + + 4,[ 2] (short) events + + 6,[ 2] (short) revents + } + syntax: showstructpacking task + """ + if not cmd_args: + raise ArgumentError("Please provide a type name.") + + sym = gettype(cmd_args[0]) + if sym == None: + print "No such struct found" + if sym.GetTypeClass() == lldb.eTypeClassTypedef: + sym = sym.GetCanonicalType() + if sym.GetTypeClass() != lldb.eTypeClassStruct: + print "%s is not a structure" % cmd_args[0] + else: + print _showStructPacking(sym,"", 0) + +# EndMacro: showstructinto diff --git a/tools/lldbmacros/userspace.py b/tools/lldbmacros/userspace.py index afd24f662..21eeb328a 100644 --- a/tools/lldbmacros/userspace.py +++ b/tools/lldbmacros/userspace.py @@ -2,26 +2,12 @@ from xnu import * from utils import * from process import * from pmap import * - -def _GetIntegerDataFromTask(u_ptr, task_abi): - """ - params: - u_ptr : int - pointer in user memory - task_abi : int - what kind of user program is running - returns: - int - value stored at specified u_ptr. - """ - if kern.arch != "x86_64": - raise ValueError("This function does not work for non x86_64 arch") - if task_abi == 0xf : - return unsigned(dereference(kern.GetValueFromAddress(u_ptr, 'uint64_t *'))) - else: - return unsigned(dereference(kern.GetValueFromAddress(u_ptr, 'uint32_t *'))) +import struct def GetBinaryNameForPC(pc_val, user_lib_info = None): """ find the binary in user_lib_info that the passed pc_val falls in range of. params: - pc_val : int - integer form of the pc address + pc_val : int - integer form of the pc address user_lib_info: [] of [] which hold start, end, binary name returns: str - Name of binary or "unknown" if not found. @@ -55,12 +41,14 @@ def ShowX86UserStack(thread, user_lib_info = None): user_ip = iss.uss.ss_64.isf.rip user_frame = iss.uss.ss_64.rbp user_abi_ret_offset = 8 + user_abi_type = "uint64_t" else: debuglog("user process is 32 bit") user_ip = iss.uss.ss_32.eip user_frame = iss.uss.ss_32.ebp user_abi_ret_offset = 4 - + user_abi_type = "uint32_t" + if user_ip == 0: print "This activation does not appear to have a valid user context." return False @@ -68,11 +56,6 @@ def ShowX86UserStack(thread, user_lib_info = None): cur_ip = user_ip cur_frame = user_frame debuglog("ip= 0x%x , fr = 0x%x " % (cur_ip, cur_frame)) - kdp_pmap_addr = unsigned(addressof(kern.globals.kdp_pmap)) - if not WriteInt64ToMemoryAddress(unsigned(thread.task.map.pmap), kdp_pmap_addr): - print "Failed to write in kdp_pmap = 0x{0:0>16x} value.".format(thread.task.map.pmap) - return False - debuglog("newpmap = 0x{:x}".format(kern.globals.kdp_pmap)) frameformat = "{0:d} FP: 0x{1:x} PC: 0x{2:x}" if user_lib_info is not None: @@ -80,19 +63,17 @@ def ShowX86UserStack(thread, user_lib_info = None): print frameformat.format(0, cur_frame, cur_ip, GetBinaryNameForPC(cur_ip, user_lib_info)) print kern.Symbolicate(cur_ip) - tmp_frame = unsigned(cur_frame) - prev_frame = _GetIntegerDataFromTask(tmp_frame, abi) - prev_ip = _GetIntegerDataFromTask(tmp_frame + user_abi_ret_offset, abi) - frameno = 1 - while prev_frame and prev_frame != 0x0000000800000008: - print frameformat.format(frameno, prev_frame, prev_ip, GetBinaryNameForPC(prev_ip, user_lib_info)) - print kern.Symbolicate(prev_ip) - prev_ip = _GetIntegerDataFromTask(prev_frame + user_abi_ret_offset, abi) - prev_frame = _GetIntegerDataFromTask(prev_frame, abi) - frameno +=1 - if not WriteInt64ToMemoryAddress(0, kdp_pmap_addr): - print "Failed to write in kdp_pmap = 0" - return False + + frameno = 0 + while True: + frameno = frameno + 1 + frame = GetUserDataAsString(thread.task, unsigned(cur_frame), user_abi_ret_offset*2) + cur_ip = _ExtractDataFromString(frame, user_abi_ret_offset, user_abi_type) + cur_frame = _ExtractDataFromString(frame, 0, user_abi_type) + if not cur_frame or cur_frame == 0x0000000800000008: + break + print frameformat.format(frameno, cur_frame, cur_ip, GetBinaryNameForPC(cur_ip, user_lib_info)) + print kern.Symbolicate(cur_ip) return def _PrintARMUserStack(task, cur_pc, cur_fp, framesize, frametype, frameformat, user_lib_info=None): @@ -120,6 +101,31 @@ def ShowARMUserStack(thread, user_lib_info = None): frametype = "uint32_t" _PrintARMUserStack(thread.task, cur_pc, cur_fp, framesize, frametype, frameformat, user_lib_info=user_lib_info) +def ShowARM64UserStack(thread, user_lib_info = None): + SAVED_STATE_FLAVOR_ARM=20 + SAVED_STATE_FLAVOR_ARM64=21 + upcb = thread.machine.upcb + flavor = upcb.ash.flavor + frameformat = "{0:>2d} FP: 0x{1:x} PC: 0x{2:x}" + if flavor == SAVED_STATE_FLAVOR_ARM64: + cur_pc = unsigned(upcb.uss.ss_64.pc) + cur_fp = unsigned(upcb.uss.ss_64.fp) + if user_lib_info is not None: + frameformat = "{0:>2d} {3: <30s} 0x{2:x}" + framesize = 16 + frametype = "uint64_t" + elif flavor == SAVED_STATE_FLAVOR_ARM: + cur_pc = unsigned(upcb.uss.ss_32.pc) + cur_fp = unsigned(upcb.uss.ss_32.r[7]) + if user_lib_info is not None: + frameformat = "{0:>2d}: {3: <30s} 0x{2:x}" + framesize = 8 + frametype = "uint32_t" + else: + raise RuntimeError("Thread {0} has an invalid flavor {1}".format(unsigned(thread), flavor)) + + _PrintARMUserStack(thread.task, cur_pc, cur_fp, framesize, frametype, frameformat, user_lib_info=user_lib_info) + @lldb_command('showthreaduserstack') def ShowThreadUserStack(cmd_args=None): @@ -134,6 +140,45 @@ def ShowThreadUserStack(cmd_args=None): ShowX86UserStack(thread) elif kern.arch == "arm": ShowARMUserStack(thread) + elif kern.arch == "arm64": + ShowARM64UserStack(thread) + return True + +@lldb_command('printuserdata','X') +def PrintUserspaceData(cmd_args=None, cmd_options={}): + """ Read userspace data for given task and print based on format provided. + Syntax: (lldb) printuserdata + params: + : pointer to task + : address to user space memory + : String representation for processing the data and printing it. + e.g Q -> unsigned long long, q-> long long, I-> unsigned int, i->int + options: + -X : print all values in hex. + """ + + if not cmd_args or len(cmd_args) < 3: + raise ArgumentError("Insufficient arguments") + task = kern.GetValueFromAddress(cmd_args[0], 'task *') + uspace_addr = ArgumentStringToInt(cmd_args[1]) + format_specifier_str = cmd_args[2] + user_data_len = 0 + try: + user_data_len = struct.calcsize(format_specifier_str) + except Exception, e: + raise ArgumentError("Invalid format specifier provided.") + + user_data_string = GetUserDataAsString(task, uspace_addr, user_data_len) + if not user_data_string: + print "Could not read any data from userspace address." + return False + upacked_data = struct.unpack(format_specifier_str, user_data_string) + for i in range(len(upacked_data)): + if "-X" in cmd_options: + print "%d: " % i + hex(upacked_data[i]) + else: + print "%d: " % i + str(upacked_data[i]) + return True @lldb_command('showtaskuserstacks') @@ -142,7 +187,7 @@ def ShowTaskUserStacks(cmd_args=None): Syntax: (lldb) showtaskuserstacks The format is compatible with CrashTracer. You can also use the speedtracer plugin as follows (lldb) showtaskuserstacks -p speedtracer - + Note: the address ranges are approximations. Also the list may not be completely accurate. This command expects memory read failures and hence will skip a library if unable to read information. Please use your good judgement and not take the output as accurate """ @@ -156,7 +201,7 @@ def ShowTaskUserStacks(cmd_args=None): crash_report_format_string = """\ Process: {pid: <10d} Path: {path: <50s} -Identifier: {pname: <30s} +Identifier: {pname: <30s} Version: ??? (???) Code Type: {parch: <20s} Parent Process: {ppname: >20s}[{ppid:d}] @@ -180,7 +225,7 @@ Synthetic crash log generated from Kernel userstacks is_64 = False if pval.p_flag & 0x4 : is_64 = True - + parch_s = "" if kern.arch == "x86_64" or kern.arch == "i386": osversion = "Mac OS X 10.8" @@ -215,7 +260,9 @@ Synthetic crash log generated from Kernel userstacks printthread_user_stack_ptr = ShowX86UserStack if kern.arch == "arm": printthread_user_stack_ptr = ShowARMUserStack - + elif kern.arch =="arm64": + printthread_user_stack_ptr = ShowARM64UserStack + counter = 0 for thval in IterateQueue(task.threads, 'thread *', 'task_threads'): print "\nThread {0:d} name:0x{1:x}\nThread {0:d}:".format(counter, thval) @@ -253,7 +300,7 @@ def GetUserDataAsString(task, addr, size): if not WriteInt64ToMemoryAddress(0, kdp_pmap_addr): debuglog("Failed to reset in kdp_pmap from GetUserDataAsString.") return "" - elif kern.arch in ['arm'] and long(size) < (2 * kern.globals.page_size): + elif kern.arch in ['arm', 'arm64', 'x86_64'] and long(size) < (2 * kern.globals.page_size): # Without the benefit of a KDP stub on the target, try to # find the user task's physical mapping and memcpy the data. # If it straddles a page boundary, copy in two passes @@ -284,7 +331,7 @@ def GetUserDataAsString(task, addr, size): debuglog("Not mapped task 0x{:x} address 0x{:x}".format(task, addr)) return "" range2_in_kva = kern.PhysToKernelVirt(paddr_range2) - content += LazyTarget.GetProcess().ReadMemory(range1_in_kva, range1_size, err) + content += LazyTarget.GetProcess().ReadMemory(range2_in_kva, range2_size, err) if not err.Success(): raise RuntimeError("Failed to read process memory. Error: " + err.description) else: @@ -321,7 +368,7 @@ def _ExtractDataFromString(strdata, offset, data_type, length=0): return struct.unpack(unpack_str, strdata[offset:(offset + length)])[0] def GetPathForImage(task, path_address): - """ Maps 32 bytes at a time and packs as string + """ Maps 32 bytes at a time and packs as string params: task: obj - referencing task to read data from path_address: int - address where the image path is stored @@ -394,7 +441,7 @@ def GetImageInfo(task, mh_image_address, mh_path_address, approx_end_address=Non retval = None while lc_idx < mh_ncmds: # 24 bytes is the size of uuid_command - lcmd_data = GetUserDataAsString(task, lc_address, 24) + lcmd_data = GetUserDataAsString(task, lc_address, 24) lc_cmd = _ExtractDataFromString(lcmd_data, 4 * 0, "uint32_t") lc_cmd_size = _ExtractDataFromString(lcmd_data, 4 * 1, "uint32_t") lc_data = _ExtractDataFromString(lcmd_data, 4*2, "string", 16) @@ -406,13 +453,14 @@ def GetImageInfo(task, mh_image_address, mh_path_address, approx_end_address=Non # need to print the uuid now. uuid_data = [ord(x) for x in lc_data] found_uuid_data = True - uuid_out_string = "{a[0]:02X}{a[1]:02X}{a[2]:02X}{a[3]:02X}-{a[4]:02X}{a[5]:02X}-{a[6]:02X}{a[7]:02X}-{a[8]:02X}{a[9]:02X}-{a[10]:02X}{a[11]:02X}{a[12]:02X}{a[13]:02X}{a[14]:02X}{a[15]:02X}".format(a=uuid_data) + uuid_out_string = "{a[0]:02X}{a[1]:02X}{a[2]:02X}{a[3]:02X}-{a[4]:02X}{a[5]:02X}-{a[6]:02X}{a[7]:02X}-{a[8]:02X}{a[9]:02X}-{a[10]:02X}{a[11]:02X}{a[12]:02X}{a[13]:02X}{a[14]:02X}{a[15]:02X}".format(a=uuid_data) #also print image path path_out_string = GetPathForImage(task, mh_path_address) path_base_name = path_out_string.split("/")[-1] retval = print_format.format(mh_image_address, image_end_load_address, path_base_name, uuid_out_string, path_out_string) elif lc_cmd == 0xe: ShowTaskUserLibraries.exec_load_path = lc_address + _ExtractDataFromString(lcmd_data, 4*2, "uint32_t") + debuglog("Found load command to be 0xe for address %s" % hex(ShowTaskUserLibraries.exec_load_path)) lc_address = lc_address + lc_cmd_size lc_idx += 1 @@ -443,16 +491,22 @@ def ShowTaskUserLibraries(cmd_args=None): task = kern.GetValueFromAddress(cmd_args[0], 'task_t') is_task_64 = int(task.t_flags) & 0x1 dyld_all_image_infos_address = unsigned(task.all_image_info_addr) + debuglog("dyld_all_image_infos_address = %s" % hex(dyld_all_image_infos_address)) + cur_data_offset = 0 if dyld_all_image_infos_address == 0: print "No dyld shared library information available for task" return False + + debuglog("Extracting version information.") vers_info_data = GetUserDataAsString(task, dyld_all_image_infos_address, 112) version = _ExtractDataFromString(vers_info_data, cur_data_offset, "uint32_t") cur_data_offset += 4 - if version > 12: + if version > 14: print "Unknown dyld all_image_infos version number %d" % version image_info_count = _ExtractDataFromString(vers_info_data, cur_data_offset, "uint32_t") + debuglog("version = %d count = %d is_task_64 = %s" % (version, image_info_count, repr(is_task_64))) + ShowTaskUserLibraries.exec_load_path = 0 if is_task_64: image_info_size = 24 @@ -471,6 +525,7 @@ def ShowTaskUserLibraries(cmd_args=None): image_info_list = [] while i < image_info_count: image_info_address = image_info_array_address + i * image_info_size + debuglog("i = %d, image_info_address = %s, image_info_size = %d" % (i, hex(image_info_address), image_info_size)) n_im_info_addr = None img_data = "" try: @@ -478,20 +533,22 @@ def ShowTaskUserLibraries(cmd_args=None): except Exception, e: debuglog("Failed to read user data for task 0x{:x} addr 0x{:x}, exception {:s}".format(task, image_info_address, str(e))) pass + if is_task_64: image_info_addr = _ExtractDataFromString(img_data, 0, "uint64_t") image_info_path = _ExtractDataFromString(img_data, 8, "uint64_t") else: image_info_addr = _ExtractDataFromString(img_data, 0, "uint32_t") image_info_path = _ExtractDataFromString(img_data, 4, "uint32_t") - + if image_info_addr : + debuglog("Found image: image_info_addr = %s, image_info_path= %s" % (hex(image_info_addr), hex(image_info_path))) image_info_list.append((image_info_addr, image_info_path)) i += 1 - + image_info_list.sort() - num_images_found = len(image_info_list) - + num_images_found = len(image_info_list) + for ii in range(num_images_found): n_im_info_addr = dyld_load_address if ii + 1 < num_images_found: @@ -509,9 +566,10 @@ def ShowTaskUserLibraries(cmd_args=None): except Exception,e: if config['debug']: raise e - - # load_path might get set when the main executable is processed. - if ShowTaskUserLibraries.exec_load_path != 0: + + # load_path might get set when the main executable is processed. + if ShowTaskUserLibraries.exec_load_path != 0: + debuglog("main executable load_path is set.") image_print_s = GetImageInfo(task, dyld_load_address, ShowTaskUserLibraries.exec_load_path) if len(image_print_s) > 0: print image_print_s @@ -519,7 +577,197 @@ def ShowTaskUserLibraries(cmd_args=None): ShowTaskUserLibraries.exec_load_path, image_print_s)) else: debuglog("Failed to print image for main executable for task 0x{:x} dyld_load_addr 0x{:x}".format(task, dyld_load_address)) + else: + debuglog("Falling back to vm entry method for finding executable load address") + print "# NOTE: Failed to find executable using all_image_infos. Using fuzzy match to find best possible load address for executable." + ShowTaskLoadInfo([cmd_args[0]]) return +@lldb_command("showtaskuserdyldinfo") +def ShowTaskUserDyldInfo(cmd_args=None): + """ Inspect the dyld global info for the given user task & print out all fields including error messages + Syntax: (lldb)showtaskuserdyldinfo + """ + if cmd_args == None or len(cmd_args) < 1: + print "No arguments passed" + print ShowTaskUserDyldInfo.__doc__.strip() + return + + out_str = "" + task = kern.GetValueFromAddress(cmd_args[0], 'task_t') + is_task_64 = int(task.t_flags) & 0x1 + dyld_all_image_infos_address = unsigned(task.all_image_info_addr) + if dyld_all_image_infos_address == 0: + print "No dyld shared library information available for task" + return False + vers_info_data = GetUserDataAsString(task, dyld_all_image_infos_address, 112) + dyld_all_image_infos_version = _ExtractDataFromString(vers_info_data, 0, "uint32_t") + if dyld_all_image_infos_version > 14: + out_str += "Unknown dyld all_image_infos version number %d" % dyld_all_image_infos_version + + # Find fields by byte offset. We assume at least version 9 is supported + if is_task_64: + dyld_all_image_infos_infoArrayCount = _ExtractDataFromString(vers_info_data, 4, "uint32_t") + dyld_all_image_infos_infoArray = _ExtractDataFromString(vers_info_data, 8, "uint64_t") + dyld_all_image_infos_notification = _ExtractDataFromString(vers_info_data, 16, "uint64_t") + dyld_all_image_infos_processDetachedFromSharedRegion = _ExtractDataFromString(vers_info_data, 24, "string") + dyld_all_image_infos_libSystemInitialized = _ExtractDataFromString(vers_info_data, 25, "string") + dyld_all_image_infos_dyldImageLoadAddress = _ExtractDataFromString(vers_info_data, 32, "uint64_t") + dyld_all_image_infos_jitInfo = _ExtractDataFromString(vers_info_data, 40, "uint64_t") + dyld_all_image_infos_dyldVersion = _ExtractDataFromString(vers_info_data, 48, "uint64_t") + dyld_all_image_infos_errorMessage = _ExtractDataFromString(vers_info_data, 56, "uint64_t") + dyld_all_image_infos_terminationFlags = _ExtractDataFromString(vers_info_data, 64, "uint64_t") + dyld_all_image_infos_coreSymbolicationShmPage = _ExtractDataFromString(vers_info_data, 72, "uint64_t") + dyld_all_image_infos_systemOrderFlag = _ExtractDataFromString(vers_info_data, 80, "uint64_t") + dyld_all_image_infos_uuidArrayCount = _ExtractDataFromString(vers_info_data, 88, "uint64_t") + dyld_all_image_infos_uuidArray = _ExtractDataFromString(vers_info_data, 96, "uint64_t") + dyld_all_image_infos_dyldAllImageInfosAddress = _ExtractDataFromString(vers_info_data, 104, "uint64_t") + else: + dyld_all_image_infos_infoArrayCount = _ExtractDataFromString(vers_info_data, 4, "uint32_t") + dyld_all_image_infos_infoArray = _ExtractDataFromString(vers_info_data, 8, "uint32_t") + dyld_all_image_infos_notification = _ExtractDataFromString(vers_info_data, 12, "uint32_t") + dyld_all_image_infos_processDetachedFromSharedRegion = _ExtractDataFromString(vers_info_data, 16, "string") + dyld_all_image_infos_libSystemInitialized = _ExtractDataFromString(vers_info_data, 17, "string") + dyld_all_image_infos_dyldImageLoadAddress = _ExtractDataFromString(vers_info_data, 20, "uint32_t") + dyld_all_image_infos_jitInfo = _ExtractDataFromString(vers_info_data, 24, "uint32_t") + dyld_all_image_infos_dyldVersion = _ExtractDataFromString(vers_info_data, 28, "uint32_t") + dyld_all_image_infos_errorMessage = _ExtractDataFromString(vers_info_data, 32, "uint32_t") + dyld_all_image_infos_terminationFlags = _ExtractDataFromString(vers_info_data, 36, "uint32_t") + dyld_all_image_infos_coreSymbolicationShmPage = _ExtractDataFromString(vers_info_data, 40, "uint32_t") + dyld_all_image_infos_systemOrderFlag = _ExtractDataFromString(vers_info_data, 44, "uint32_t") + dyld_all_image_infos_uuidArrayCount = _ExtractDataFromString(vers_info_data, 48, "uint32_t") + dyld_all_image_infos_uuidArray = _ExtractDataFromString(vers_info_data, 52, "uint32_t") + dyld_all_image_infos_dyldAllImageInfosAddress = _ExtractDataFromString(vers_info_data, 56, "uint32_t") + + dyld_all_imfo_infos_slide = (dyld_all_image_infos_address - dyld_all_image_infos_dyldAllImageInfosAddress) + dyld_all_image_infos_dyldVersion_postslide = (dyld_all_image_infos_dyldVersion + dyld_all_imfo_infos_slide) + + path_out = GetPathForImage(task, dyld_all_image_infos_dyldVersion_postslide) + out_str += "[dyld-{:s}]\n".format(path_out) + out_str += "version \t\t\t\t: {:d}\n".format(dyld_all_image_infos_version) + out_str += "infoArrayCount \t\t\t\t: {:d}\n".format(dyld_all_image_infos_infoArrayCount) + out_str += "infoArray \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_infoArray) + out_str += "notification \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_notification) + + out_str += "processDetachedFromSharedRegion \t: " + if dyld_all_image_infos_processDetachedFromSharedRegion != "": + out_str += "TRUE\n".format(dyld_all_image_infos_processDetachedFromSharedRegion) + else: + out_str += "FALSE\n" + + out_str += "libSystemInitialized \t\t\t: " + if dyld_all_image_infos_libSystemInitialized != "": + out_str += "TRUE\n".format(dyld_all_image_infos_libSystemInitialized) + else: + out_str += "FALSE\n" + + out_str += "dyldImageLoadAddress \t\t\t: {:#x}\n".format(dyld_all_image_infos_dyldImageLoadAddress) + out_str += "jitInfo \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_jitInfo) + out_str += "\ndyldVersion \t\t\t\t: {:#x}".format(dyld_all_image_infos_dyldVersion) + if (dyld_all_imfo_infos_slide != 0): + out_str += " (currently {:#x})\n".format(dyld_all_image_infos_dyldVersion_postslide) + else: + out_str += "\n" + + out_str += "errorMessage \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_errorMessage) + if dyld_all_image_infos_errorMessage != 0: + out_str += GetPathForImage(task, dyld_all_image_infos_errorMessage) + + out_str += "terminationFlags \t\t\t: {:#x}\n".format(dyld_all_image_infos_terminationFlags) + out_str += "coreSymbolicationShmPage \t\t: {:#x}\n".format(dyld_all_image_infos_coreSymbolicationShmPage) + out_str += "systemOrderFlag \t\t\t: {:#x}\n".format(dyld_all_image_infos_systemOrderFlag) + out_str += "uuidArrayCount \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_uuidArrayCount) + out_str += "uuidArray \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_uuidArray) + out_str += "dyldAllImageInfosAddress \t\t: {:#x}".format(dyld_all_image_infos_dyldAllImageInfosAddress) + if (dyld_all_imfo_infos_slide != 0): + out_str += " (currently {:#x})\n".format(dyld_all_image_infos_address) + else: + out_str += "\n" + + if is_task_64: + dyld_all_image_infos_address = dyld_all_image_infos_address + 112 + dyld_all_image_infos_v10 = GetUserDataAsString(task, dyld_all_image_infos_address, 64) + dyld_all_image_infos_initialImageCount = _ExtractDataFromString(dyld_all_image_infos_v10, 112-112, "uint64_t") + dyld_all_image_infos_errorKind = _ExtractDataFromString(dyld_all_image_infos_v10, 120-112, "uint64_t") + dyld_all_image_infos_errorClientOfDylibPath = _ExtractDataFromString(dyld_all_image_infos_v10, 128-112, "uint64_t") + dyld_all_image_infos_errorTargetDylibPath = _ExtractDataFromString(dyld_all_image_infos_v10, 136-112, "uint64_t") + dyld_all_image_infos_errorSymbol = _ExtractDataFromString(dyld_all_image_infos_v10, 144-112, "uint64_t") + dyld_all_image_infos_sharedCacheSlide = _ExtractDataFromString(dyld_all_image_infos_v10, 152-112, "uint64_t") + dyld_all_image_infos_sharedCacheUUID = _ExtractDataFromString(dyld_all_image_infos_v10, 160-112, "string") + else: + dyld_all_image_infos_address = dyld_all_image_infos_address + 60 + dyld_all_image_infos_v10 = GetUserDataAsString(task, dyld_all_image_infos_address, 40) + dyld_all_image_infos_initialImageCount = _ExtractDataFromString(dyld_all_image_infos_v10, 60-60, "uint32_t") + dyld_all_image_infos_errorKind = _ExtractDataFromString(dyld_all_image_infos_v10, 64-60, "uint32_t") + dyld_all_image_infos_errorClientOfDylibPath = _ExtractDataFromString(dyld_all_image_infos_v10, 68-60, "uint32_t") + dyld_all_image_infos_errorTargetDylibPath = _ExtractDataFromString(dyld_all_image_infos_v10, 72-60, "uint32_t") + dyld_all_image_infos_errorSymbol = _ExtractDataFromString(dyld_all_image_infos_v10, 76-60, "uint32_t") + dyld_all_image_infos_sharedCacheSlide = _ExtractDataFromString(dyld_all_image_infos_v10, 80-60, "uint32_t") + dyld_all_image_infos_sharedCacheUUID = _ExtractDataFromString(dyld_all_image_infos_v10, 84-60, "string") + + if dyld_all_image_infos_version >= 10: + out_str += "\ninitialImageCount \t\t\t: {:#x}\n".format(dyld_all_image_infos_initialImageCount) + + if dyld_all_image_infos_version >= 11: + out_str += "errorKind \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_errorKind) + out_str += "errorClientOfDylibPath \t\t\t: {:#x}\n".format(dyld_all_image_infos_errorClientOfDylibPath) + if dyld_all_image_infos_errorClientOfDylibPath != 0: + out_str += "\t\t\t\t" + out_str += GetPathForImage(task, dyld_all_image_infos_errorClientOfDylibPath) + out_str += "\n" + out_str += "errorTargetDylibPath \t\t\t: {:#x}\n".format(dyld_all_image_infos_errorTargetDylibPath) + if dyld_all_image_infos_errorTargetDylibPath != 0: + out_str += "\t\t\t\t" + out_str += GetPathForImage(task, dyld_all_image_infos_errorTargetDylibPath) + out_str += "\n" + out_str += "errorSymbol \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_errorSymbol) + if dyld_all_image_infos_errorSymbol != 0: + out_str += "\t\t\t\t" + out_str += GetPathForImage(task, dyld_all_image_infos_errorSymbol) + out_str += "\n" + + if dyld_all_image_infos_version >= 12: + out_str += "sharedCacheSlide \t\t\t: {:#x}\n".format(dyld_all_image_infos_sharedCacheSlide) + if dyld_all_image_infos_version >= 13 and dyld_all_image_infos_sharedCacheUUID != "": + out_str += "sharedCacheUUID \t\t\t: {:s}\n".format(dyld_all_image_infos_sharedCacheUUID) + else: + out_str += "No dyld information available for task\n" + print out_str + +# Macro: showosmalloc +@lldb_type_summary(['OSMallocTag']) +@header("{0: <20s} {1: >5s} {2: ^16s} {3: <5s} {4: <40s}".format("TAG", "COUNT", "STATE", "ATTR", "NAME")) +def GetOSMallocTagSummary(malloc_tag): + """ Summarize the given OSMalloc tag. + params: + malloc_tag : value - value representing a _OSMallocTag_ * in kernel + returns: + out_str - string summary of the OSMalloc tag. + """ + if not malloc_tag: + return "Invalid malloc tag value: 0x0" + + out_str = "{: <#20x} {: >5d} {: ^#16x} {: <5d} {: <40s}\n".format(malloc_tag, + malloc_tag.OSMT_refcnt, malloc_tag.OSMT_state, malloc_tag.OSMT_attr, malloc_tag.OSMT_name) + return out_str + +@lldb_command('showosmalloc') +def ShowOSMalloc(cmd_args=None): + """ Print the outstanding allocation count of OSMalloc tags + Usage: showosmalloc + """ + summary_str = "" + tag_headp = Cast(addressof(kern.globals.OSMalloc_tag_list), 'struct _OSMallocTag_ *') + tagp = Cast(tag_headp.OSMT_link.next, 'struct _OSMallocTag_ *') + summary_str += GetOSMallocTagSummary.header + "\n" + while tagp != tag_headp: + summary_str += GetOSMallocTagSummary(tagp) + tagp = Cast(tagp.OSMT_link.next, 'struct _OSMallocTag_ *') + + print summary_str + +# EndMacro: showosmalloc + + diff --git a/tools/lldbmacros/utils.py b/tools/lldbmacros/utils.py index 104a528cc..5c7ff72c6 100644 --- a/tools/lldbmacros/utils.py +++ b/tools/lldbmacros/utils.py @@ -27,6 +27,8 @@ def lldb_run_command(cmdstring): lldb_run_command_state['active'] = False if res.Succeeded(): retval = res.GetOutput() + else: + retval = "ERROR:" + res.GetError() return retval def EnableLLDBAPILogging(): @@ -44,9 +46,9 @@ def EnableLLDBAPILogging(): cmd_str = enable_log_base_cmd + ' kdp-remote packets' print cmd_str print lldb_run_command(cmd_str) - print lldb_run_command("verison") + print lldb_run_command("version") print "Please collect the logs from %s for filing a radar. If you had encountered an exception in a lldbmacro command please re-run it." % logfile_name - print "Please make sure to provide the output of 'verison', 'image list' and output of command that failed." + print "Please make sure to provide the output of 'version', 'image list' and output of command that failed." return def GetConnectionProtocol(): @@ -111,6 +113,8 @@ def GetLongestMatchOption(searchstr, options=[], ignore_case=True): so = o if ignore_case: so = o.lower() + if so == searchstr: + return [o] if so.find(searchstr) >=0 : found_options.append(o) return found_options diff --git a/tools/lldbmacros/xnu.py b/tools/lldbmacros/xnu.py index d72c3aeef..a3c5b1b5a 100644 --- a/tools/lldbmacros/xnu.py +++ b/tools/lldbmacros/xnu.py @@ -99,7 +99,7 @@ def lldb_command(cmd_name, option_string = ''): try: stream.setOptions(command_args, option_string) if stream.verbose_level != 0: - config['verbosity'] = stream.verbose_level + config['verbosity'] += stream.verbose_level with RedirectStdStreams(stdout=stream) : if option_string: obj(cmd_args=stream.target_cmd_args, cmd_options=stream.target_cmd_options) @@ -285,8 +285,8 @@ def GetLLDBThreadForKernelThread(thread_obj): sbthread = lldb_process.GetThreadByID(tid) if not sbthread.IsValid(): - raise RuntimeError("Unable to find lldb thread for tid={0:d} thread = {1:#018x}".format(tid, thread_obj)) - + raise RuntimeError("Unable to find lldb thread for tid={0:d} thread = {1:#018x} (#16049947: have you put 'settings set target.load-script-from-symbol-file true' in your .lldbinit?)".format(tid, thread_obj)) + return sbthread def GetThreadBackTrace(thread_obj, verbosity = vHUMAN, prefix = ""): @@ -321,18 +321,32 @@ def GetThreadBackTrace(thread_obj, verbosity = vHUMAN, prefix = ""): if iteration == 0 and not is_continuation: out_string += prefix +"stacktop = {:#018x}\n".format(frame_p) - + if not function: # No debug info for 'function'. - symbol = frame.GetSymbol() - file_addr = addr.GetFileAddress() - start_addr = symbol.GetStartAddress().GetFileAddress() - symbol_name = symbol.GetName() - symbol_offset = file_addr - start_addr out_string += prefix if not is_continuation: out_string += "{fp:#018x} ".format(fp = frame_p) - out_string += "{addr:#018x} {mod}`{symbol} + {offset} \n".format(addr=load_addr, mod=mod_name, symbol=symbol_name, offset=symbol_offset) + + symbol = frame.GetSymbol() + if not symbol: + symbol_name = "None" + symbol_offset = load_addr + kmod_val = kern.globals.kmod + for kval in IterateLinkedList(kmod_val, 'next'): + if load_addr >= unsigned(kval.address) and \ + load_addr <= (unsigned(kval.address) + unsigned(kval.size)): + symbol_name = kval.name + symbol_offset = load_addr - unsigned(kval.address) + break + out_string += "{:#018x} {:s} + {:#x} \n".format(load_addr, symbol_name, symbol_offset) + else: + file_addr = addr.GetFileAddress() + start_addr = symbol.GetStartAddress().GetFileAddress() + symbol_name = symbol.GetName() + symbol_offset = file_addr - start_addr + out_string += "{addr:#018x} {mod}`{symbol} + {offset:#x} \n".format(addr=load_addr, + mod=mod_name, symbol=symbol_name, offset=symbol_offset) else: # Debug info is available for 'function'. func_name = frame.GetFunctionName() @@ -521,29 +535,30 @@ def ShowVersion(cmd_args=None): @lldb_command('paniclog') def ShowPanicLog(cmd_args=None): """ Display the paniclog information + usage: (lldb) paniclog + options: + -v : increase verbosity """ - panic_buf = kern.globals.debug_buf - panic_buf_start = addressof(panic_buf) + panic_buf = kern.globals.debug_buf_addr + panic_buf_start = unsigned(panic_buf) panic_buf_end = unsigned(kern.globals.debug_buf_ptr) num_bytes = panic_buf_end - panic_buf_start if num_bytes == 0 : return - panic_data = panic_buf.GetSBValue().GetData() - err = lldb.SBError() - line = '' - for i in range(0, num_bytes): - c = panic_data.GetUnsignedInt8(err, i) - if chr(c) == '\n': - if line =='': - line = " " - print line - line = '' - else: - line += chr(c) - - if len(line) > 0: - print line - + warn_str = "" + if num_bytes > 4096 and config['verbosity'] == vHUMAN: + num_bytes = 4096 + warn_str = "LLDBMacro Warning: The paniclog is too large. Trimming to 4096 bytes." + warn_str += " If you wish to see entire log please use '-v' argument." + out_str = "" + for i in range(num_bytes): + p_char = str(panic_buf[i]) + out_str += p_char + if p_char == '\n': + print out_str + out_str = "" + if warn_str: + print warn_str return @lldb_command('showbootargs') @@ -611,6 +626,74 @@ def ShowLLDBTypeSummaries(cmd_args=[]): lldb_run_command("type category "+ action +" kernel") print "Successfully "+action+"d the kernel type summaries. %s" % trailer_msg +@lldb_command('walkqueue_head', 'S') +def WalkQueueHead(cmd_args=[], cmd_options={}): + """ walk a queue_head_t and list all members in it. Note this is for queue_head_t. refer to osfmk/kern/queue.h + Option: -S - suppress summary output. + Usage: (lldb) walkqueue_head + ex: (lldb) walkqueue_head 0x7fffff80 "thread *" "task_threads" + + """ + global lldb_summary_definitions + if not cmd_args: + raise ArgumentError("invalid arguments") + if len(cmd_args) != 3: + raise ArgumentError("insufficient arguments") + queue_head = kern.GetValueFromAddress(cmd_args[0], 'struct queue_entry *') + el_type = cmd_args[1] + field_name = cmd_args[2] + showsummary = False + if el_type in lldb_summary_definitions: + showsummary = True + if '-S' in cmd_options: + showsummary = False + + for i in IterateQueue(queue_head, el_type, field_name): + if showsummary: + print lldb_summary_definitions[el_type](i) + else: + print "{0: <#020x}".format(i) + + + +@lldb_command('walklist_entry', 'S') +def WalkList(cmd_args=[], cmd_options={}): + """ iterate over a list as defined with LIST_ENTRY in bsd/sys/queue.h + params: + object addr - value : address of object + element_type - str : Type of the next element + field_name - str : Name of the field in next element's structure + + Option: -S - suppress summary output. + Usage: (lldb) walklist_entry + ex: (lldb) walklist_entry 0x7fffff80 "struct proc *" "p_sibling" + + """ + global lldb_summary_definitions + if not cmd_args: + raise ArgumentError("invalid arguments") + if len(cmd_args) != 3: + raise ArgumentError("insufficient arguments") + el_type = cmd_args[1] + queue_head = kern.GetValueFromAddress(cmd_args[0], el_type) + field_name = cmd_args[2] + + showsummary = False + if el_type in lldb_summary_definitions: + showsummary = True + if '-S' in cmd_options: + showsummary = False + elt = queue_head + while unsigned(elt) != 0: + i = elt + elt = elt.__getattr__(field_name).le_next + if showsummary: + print lldb_summary_definitions[el_type](i) + else: + print "{0: <#020x}".format(i) + + + from memory import * from process import * from ipc import * @@ -624,3 +707,8 @@ from pci import * from misc import * from apic import * from scheduler import * +from atm import * +from structanalyze import * +from ipcimportancedetail import * +from bank import * + diff --git a/tools/lldbmacros/xnudefines.py b/tools/lldbmacros/xnudefines.py index 9762ba3ec..3d694ac4e 100644 --- a/tools/lldbmacros/xnudefines.py +++ b/tools/lldbmacros/xnudefines.py @@ -21,6 +21,8 @@ arm_level2_access_strings = [ " noaccess", " supervisor(readonly) user(readonly)", " " ] +kq_state_strings = {0:"", 1:"SEL", 2:"SLEEP", 4:"PROCWAIT", 8:"KEV32", 16:"KEV64"} + proc_state_strings = [ "", "Idle", "Run", "Sleep", "Stop", "Zombie", "Reaping" ] proc_flag_explain_strings = ["!0x00000004 - process is 32 bit", #only exception that does not follow bit settings "0x00000001 - may hold advisory locks", @@ -60,7 +62,7 @@ proc_flag_explain_strings = ["!0x00000004 - process is 32 bit", #only exception # string representations for Kobject types kobject_types = ['', 'THREAD', 'TASK', 'HOST', 'HOST_PRIV', 'PROCESSOR', 'PSET', 'PSET_NAME', 'TIMER', 'PAGER_REQ', 'DEVICE', 'XMM_OBJECT', 'XMM_PAGER', 'XMM_KERNEL', 'XMM_REPLY', 'NOTDEF 15', 'NOTDEF 16', 'HOST_SEC', 'LEDGER', 'MASTER_DEV', 'ACTIVATION', 'SUBSYTEM', 'IO_DONE_QUE', 'SEMAPHORE', 'LOCK_SET', 'CLOCK', 'CLOCK_CTRL' , 'IOKIT_SPARE', - 'NAMED_MEM', 'IOKIT_CON', 'IOKIT_OBJ', 'UPL', 'MEM_OBJ_CONTROL', 'AU_SESSIONPORT', 'FILEPORT', 'LABELH'] + 'NAMED_MEM', 'IOKIT_CON', 'IOKIT_OBJ', 'UPL', 'MEM_OBJ_CONTROL', 'AU_SESSIONPORT', 'FILEPORT', 'LABELH', 'TASK_RESUME', 'VOUCHER', 'VOUCHER_ATTR_CONTROL'] def populate_kobject_types(xnu_dir_path): """ Function to read data from header file xnu/osfmk/kern/ipc_kobject.h @@ -76,4 +78,4 @@ def populate_kobject_types(xnu_dir_path): if __name__ == "__main__": populate_kobject_types("../../") - \ No newline at end of file + diff --git a/tools/remote_build.sh b/tools/remote_build.sh index e3fcd5b2d..43e3ec44e 100755 --- a/tools/remote_build.sh +++ b/tools/remote_build.sh @@ -136,6 +136,16 @@ else else RSYNC_ARGS="" fi + if [ ! -e "${SYMROOT}/" ]; then + RSYNC_DELETE_SYMROOT=1 + else + RSYNC_DELETE_SYMROOT=0 + fi + if [ ! -e "${DSTROOT}/" ]; then + RSYNC_DELETE_DSTROOT=1 + else + RSYNC_DELETE_DSTROOT=0 + fi TARBUILDDIRS=0 fi @@ -175,6 +185,8 @@ chmod a+x "${BUILDSCRIPTDIR}/${BUILDSCRIPTNAME}" #echo "Build script is:" #cat "${BUILDSCRIPTDIR}/${BUILDSCRIPTNAME}" +mkdir -p "${BUILDTOOLSDIR}/empty" + if [ "$REMOTEBUILD" = "$SPECIALREMOTEBUILD" ]; then : else @@ -198,7 +210,7 @@ else REMOTEBUILDPATH="${REMOTEBUILDPATH}/$st_ino/${SRCNAME}/" echo "Remote path is ${REMOTEBUILD}:${REMOTEBUILDPATH}" 1>&2 - ssh $REMOTEBUILD "mkdir -p \"${REMOTEBUILDPATH}/BUILD/obj\"" || die "Could not make remote build directory" + ssh $REMOTEBUILD "mkdir -p \"${REMOTEBUILDPATH}/BUILD/\"{obj,sym,dst}" || die "Could not make remote build directory" # Copy source only rsync -azv --delete --exclude=\*~ --exclude=.svn --exclude=.git --exclude=/BUILD . $REMOTEBUILD:"${REMOTEBUILDPATH}" || die "Could not rsync source tree" @@ -206,9 +218,19 @@ else # Copy partial OBJROOT (just build tools and build script), and optionally delete everything else rsync -azv --delete $RSYNC_ARGS --include=/build.sh --include=/BuildTools --include=/BuildTools/\*\* --exclude=\* "${OBJROOT}/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/obj/" || die "Could not rsync build tree" + # Delete remote SYMROOT if it has been deleted locally + if [ "$RSYNC_DELETE_SYMROOT" -eq 1 ]; then + rsync -azv --delete "${BUILDTOOLSDIR}/empty/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/sym/" || die "Could not rsync delete SYMROOT" + fi + + # Delete remote DSTROOT if it has been deleted locally + if [ "$RSYNC_DELETE_DSTROOT" -eq 1 ]; then + rsync -azv --delete "${BUILDTOOLSDIR}/empty/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/dst/" || die "Could not rsync delete DSTROOT" + fi + # Start the build - echo ssh $REMOTEBUILD "cd \"${REMOTEBUILDPATH}\" && ${REMOTE_BUILDSCRIPTREL}/${BUILDSCRIPTNAME}" 1>&2 - ssh $REMOTEBUILD "cd \"${REMOTEBUILDPATH}\" && ${REMOTE_BUILDSCRIPTREL}/${BUILDSCRIPTNAME}" || die "Could not complete remote build" + echo ssh $REMOTEBUILD "/bin/bash -c 'cd \"${REMOTEBUILDPATH}\" && ${REMOTE_BUILDSCRIPTREL}/${BUILDSCRIPTNAME}'" 1>&2 + ssh $REMOTEBUILD "/bin/bash -c 'cd \"${REMOTEBUILDPATH}\" && ${REMOTE_BUILDSCRIPTREL}/${BUILDSCRIPTNAME}'" || die "Could not complete remote build" # Copy back build results except for object files (which might be several GB) echo "Copying results back..." diff --git a/tools/tests/MPMMTest/KQMPMMtest.c b/tools/tests/MPMMTest/KQMPMMtest.c index 4ee81c427..635726b6d 100644 --- a/tools/tests/MPMMTest/KQMPMMtest.c +++ b/tools/tests/MPMMTest/KQMPMMtest.c @@ -1,7 +1,5 @@ #include -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER -#include -#endif +#include #include #include @@ -313,7 +311,6 @@ void setup_client_ports(struct port_args *ports) static void thread_setup(int tag) { -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER kern_return_t ret; thread_extended_policy_data_t epolicy; thread_affinity_policy_data_t policy; @@ -337,7 +334,6 @@ thread_setup(int tag) { if (ret != KERN_SUCCESS) printf("thread_policy_set(THREAD_AFFINITY_POLICY) returned %d\n", ret); } -#endif } void * diff --git a/tools/tests/MPMMTest/MPMMtest.c b/tools/tests/MPMMTest/MPMMtest.c index 590ac04b1..69dd25bd7 100644 --- a/tools/tests/MPMMTest/MPMMtest.c +++ b/tools/tests/MPMMTest/MPMMtest.c @@ -1,7 +1,5 @@ #include -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER -#include -#endif +#include #include #include @@ -328,7 +326,6 @@ void setup_client_ports(struct port_args *ports) static void thread_setup(int tag) { -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER kern_return_t ret; thread_extended_policy_data_t epolicy; thread_affinity_policy_data_t policy; @@ -352,7 +349,6 @@ thread_setup(int tag) { if (ret != KERN_SUCCESS) printf("thread_policy_set(THREAD_AFFINITY_POLICY) returned %d\n", ret); } -#endif } void * diff --git a/tools/tests/MPMMTest/Makefile b/tools/tests/MPMMTest/Makefile index 914f680a0..8bfd13440 100644 --- a/tools/tests/MPMMTest/Makefile +++ b/tools/tests/MPMMTest/Makefile @@ -5,17 +5,17 @@ else Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) endif -CC:=xcrun -sdk "$(SDKROOT)" cc +CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc) SYMROOT?=$(shell /bin/pwd) -CFLAGS := -g -O2 +CFLAGS := -g -O2 -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders ifdef RC_ARCHS ARCHS:=$(RC_ARCHS) else ifeq "$(Embedded)" "YES" - ARCHS:=armv7 armv7s + ARCHS:=armv7 armv7s arm64 else ARCHS:=x86_64 i386 endif diff --git a/tools/tests/Makefile b/tools/tests/Makefile index e69f54bcd..37bd9e536 100644 --- a/tools/tests/Makefile +++ b/tools/tests/Makefile @@ -6,6 +6,18 @@ endif OBJROOT?=$(shell /bin/pwd) +SDKROOT ?= macosx.internal + +# SDKROOT may be passed as a shorthand like "iphoneos.internal". We +# must resolve these to a full path and override SDKROOT. + +SDKROOT_RESOLVED := $(shell xcrun -sdk $(SDKROOT) -show-sdk-path) +ifeq ($(strip $(SDKROOT)_$(SDKROOT_RESOLVED)),/_) +SDKROOT_RESOLVED := / +endif +override SDKROOT = $(SDKROOT_RESOLVED) + + ifeq "$(RC_TARGET_CONFIG)" "iPhone" Embedded?=YES else @@ -15,6 +27,7 @@ endif COMMON_TARGETS = xnu_quick_test \ MPMMTest \ affinity \ + execperf \ kqueue_tests \ superpages \ zero-to-n \ @@ -37,4 +50,4 @@ all: $(TARGETS) $(DSTSUBPATH)/%: mkdir -p $@ mkdir -p $(OBJROOT)/$(notdir $@) - $(MAKE) -C $(SRCROOT)/$(notdir $@) SRCROOT=$(SRCROOT)/$(notdir $@) DSTROOT=$@ OBJROOT=$(OBJROOT)/$(notdir $@) + $(MAKE) -C $(SRCROOT)/$(notdir $@) SRCROOT=$(SRCROOT)/$(notdir $@) DSTROOT=$@ OBJROOT=$(OBJROOT)/$(notdir $@) SDKROOT=$(SDKROOT) diff --git a/tools/tests/affinity/Makefile b/tools/tests/affinity/Makefile index e66c7fe16..41e3848be 100644 --- a/tools/tests/affinity/Makefile +++ b/tools/tests/affinity/Makefile @@ -5,7 +5,7 @@ else Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) endif -CC:=xcrun -sdk "$(SDKROOT)" cc +CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc) SYMROOT?=$(shell /bin/pwd) @@ -13,7 +13,7 @@ ifdef RC_ARCHS ARCHS:=$(RC_ARCHS) else ifeq "$(Embedded)" "YES" - ARCHS:=armv7 armv7s + ARCHS:=armv7 armv7s arm64 else ARCHS:=x86_64 i386 endif @@ -24,7 +24,7 @@ ARCH_32_FLAGS := $(patsubst %, -arch %, $(ARCH_32)) ARCH_64 := $(filter %64, $(ARCHS)) ARCH_64_FLAGS := $(patsubst %, -arch %, $(ARCH_64)) -CFLAGS :=-g +CFLAGS :=-g -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders DSTROOT?=$(shell /bin/pwd) SRCROOT?=$(shell /bin/pwd) diff --git a/tools/tests/affinity/pool.c b/tools/tests/affinity/pool.c index 97626c5f8..449ff23af 100644 --- a/tools/tests/affinity/pool.c +++ b/tools/tests/affinity/pool.c @@ -1,7 +1,5 @@ #include -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER -#include -#endif +#include #include #include #include @@ -120,12 +118,8 @@ static void usage() { fprintf(stderr, -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER "usage: pool [-a] Turn affinity on (off)\n" " [-b B] Number of buffers per producer (2)\n" -#else - "usage: pool [-b B] Number of buffers per producer (2)\n" -#endif " [-i I] Number of buffers to produce (10000)\n" " [-s S] Number of stages (2)\n" " [-p P] Number of pages per buffer (256=1MB)]\n" @@ -173,7 +167,6 @@ reader_writer_fn(int *data, int isize) void affinity_set(int tag) { -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER kern_return_t ret; thread_affinity_policy_data_t policy; if (affinity) { @@ -185,7 +178,6 @@ affinity_set(int tag) if (ret != KERN_SUCCESS) printf("thread_policy_set(THREAD_AFFINITY_POLICY) returned %d\n", ret); } -#endif } /* @@ -201,7 +193,6 @@ manager_fn(void *arg) long iteration = 0; int current_tag = 0; -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER kern_return_t ret; thread_extended_policy_data_t epolicy; epolicy.timeshare = FALSE; @@ -212,7 +203,6 @@ manager_fn(void *arg) if (ret != KERN_SUCCESS) printf("thread_policy_set(THREAD_EXTENDED_POLICY) returned %d\n", ret); -#endif /* * If we're using affinity sets and we're a producer * set our tag to by our thread set number. @@ -337,12 +327,8 @@ main(int argc, char *argv[]) while ((c = getopt (argc, argv, "ab:i:p:s:twv:")) != -1) { switch (c) { case 'a': -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER affinity = !affinity; break; -#else - usage(); -#endif case 'b': buffers = atoi(optarg); break; diff --git a/tools/tests/affinity/sets.c b/tools/tests/affinity/sets.c index cbaedcdef..4631d09c1 100644 --- a/tools/tests/affinity/sets.c +++ b/tools/tests/affinity/sets.c @@ -1,7 +1,5 @@ #include -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER -#include -#endif +#include #include #include #include @@ -121,12 +119,8 @@ static void usage() { fprintf(stderr, -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER "usage: sets [-a] Turn affinity on (off)\n" " [-b B] Number of buffers per set/line (2)\n" -#else - "usage: sets [-b B] Number of buffers per set/line (2)\n" -#endif " [-c] Configure for max cache performance\n" " [-h] Print this\n" " [-i I] Number of items/buffers to process (1000)\n" @@ -190,7 +184,6 @@ manager_fn(void *arg) * If we're using affinity sets (we are by default) * set our tag to by our thread set number. */ -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER thread_extended_policy_data_t epolicy; thread_affinity_policy_data_t policy; @@ -211,7 +204,6 @@ manager_fn(void *arg) if (ret != KERN_SUCCESS) printf("thread_policy_set(THREAD_AFFINITY_POLICY) returned %d\n", ret); } -#endif DBG("Starting %s set: %d stage: %d\n", sp->name, lp->setnum, sp->stagenum); @@ -356,12 +348,8 @@ main(int argc, char *argv[]) while ((c = getopt (argc, argv, "ab:chi:p:s:twv:")) != -1) { switch (c) { case 'a': -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER affinity = !affinity; break; -#else - usage(); -#endif case 'b': buffers = atoi(optarg); break; diff --git a/tools/tests/affinity/tags.c b/tools/tests/affinity/tags.c index e0872087b..f03ef46c5 100644 --- a/tools/tests/affinity/tags.c +++ b/tools/tests/affinity/tags.c @@ -1,7 +1,5 @@ #include -#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER -#include -#endif +#include #include #include #include diff --git a/tools/tests/execperf/Makefile b/tools/tests/execperf/Makefile index 00d03037c..d67091354 100644 --- a/tools/tests/execperf/Makefile +++ b/tools/tests/execperf/Makefile @@ -1,79 +1,109 @@ SDKROOT ?= / -ARCHS = x86_64 -CC = xcrun -sdk $(SDKROOT) cc -CODESIGN = xcrun -sdk $(SDKROOT) codesign +ifeq "$(RC_TARGET_CONFIG)" "iPhone" +Embedded?=YES +else +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +endif + +ifdef RC_ARCHS + ARCHS:=$(RC_ARCHS) + else + ifeq "$(Embedded)" "YES" + ARCHS:=armv7 armv7s arm64 + else + ARCHS:=x86_64 i386 + endif +endif + +CC = $(shell xcrun -sdk "$(SDKROOT)" -find cc) +STRIP = $(shell xcrun -sdk "$(SDKROOT)" -find strip) +CODESIGN = $(shell xcrun -sdk "$(SDKROOT)" -find codesign) +export CODESIGN_ALLOCATE := $(shell xcrun -sdk "$(SDKROOT)" -find codesign_allocate) +DSYMUTIL = $(shell xcrun -sdk "$(SDKROOT)" -find dsymutil) CFLAGS = -O0 -g -isysroot $(SDKROOT) $(patsubst %, -arch %,$(ARCHS)) -LDFLAGS = -Wl,-new_linker -dead_strip \ +LDFLAGS = -dead_strip \ -isysroot $(SDKROOT) $(patsubst %, -arch %,$(ARCHS)) -NOPIE_OPTION = -Wl,-no_pie + +OBJROOT?=$(shell /bin/pwd)/BUILD/obj +SYMROOT?=$(shell /bin/pwd)/BUILD/sym +DSTROOT?=$(shell /bin/pwd)/BUILD/dst EXECUTABLES = exit.nodyld \ exit.nopie.dyld-but-no-Libsystem exit.pie.dyld-but-no-Libsystem \ exit.nopie.dyld-and-Libsystem exit.pie.dyld-and-Libsystem \ exit.nopie exit.pie \ - printexecinfo -OBJECTS = exit-asm.o exit.o printexecinfo.o + printexecinfo run test +OBJECTS = exit-asm.o exit.o printexecinfo.o run.o -default: $(EXECUTABLES) run +default: $(addprefix $(DSTROOT)/,$(EXECUTABLES)) clean: - rm -f run $(EXECUTABLES) - rm -f run.o $(OBJECTS) + rm -f $(addprefix $(OBJROOT)/,$(OBJECTS)) + rm -f $(addprefix $(SYMROOT)/,$(EXECUTABLES)) + rm -rf $(addsuffix .dSYM,$(addprefix $(SYMROOT)/,$(EXECUTABLES))) + rm -f $(addprefix $(DSTROOT)/,$(EXECUTABLES)) -run.o: run.c - $(CC) -c -o $@ $< $(CFLAGS) +# DEPENDENCIES +$(addprefix $(DSTROOT)/,$(EXECUTABLES)): DSTROOT SYMROOT -run: run.o - $(CC) -o $@ $< $(LDFLAGS) - $(CODESIGN) -s - $@ +$(addprefix $(OBJROOT)/,$(OBJECTS)): OBJROOT + +DSTROOT SYMROOT OBJROOT: + mkdir -p $($@) # OBJECTS -exit-asm.o: exit-asm.S +$(OBJROOT)/exit-asm.o: exit-asm.S OBJROOT $(CC) -c -o $@ $< $(CFLAGS) -exit.o: exit.c +$(OBJROOT)/exit.o: exit.c OBJROOT $(CC) -c -o $@ $< $(CFLAGS) -printexecinfo.o: printexecinfo.c +$(OBJROOT)/printexecinfo.o: printexecinfo.c OBJROOT + $(CC) -c -o $@ $< $(CFLAGS) + +$(OBJROOT)/run.o: run.c OBJROOT $(CC) -c -o $@ $< $(CFLAGS) # EXECUTABLES -exit.nodyld: exit-asm.o - $(CC) -o $@ $< $(LDFLAGS) -e mystart -nostartfiles -nodefaultlibs -static +$(DSTROOT)/exit.nodyld: $(OBJROOT)/exit-asm.o + $(CC) -o $@ $< $(LDFLAGS) -e _main -nostartfiles -nodefaultlibs -static -Wl,-segalign,0x4000 $(CODESIGN) -s - $@ - -exit.nopie.dyld-but-no-Libsystem: exit-asm.o - $(CC) -o $@ $< $(LDFLAGS) -e mystart $(NOPIE_OPTION) -nostartfiles -nodefaultlibs +$(DSTROOT)/exit.nopie.dyld-but-no-Libsystem: $(OBJROOT)/exit-asm.o + $(CC) -o $@ $< $(LDFLAGS) -e _main -Wl,-no_pie -nostartfiles -nodefaultlibs -Wl,-no_new_main $(CODESIGN) -s - $@ -exit.pie.dyld-but-no-Libsystem: exit-asm.o - $(CC) -o $@ $< $(LDFLAGS) -e mystart -Wl,-pie -nostartfiles -nodefaultlibs +$(DSTROOT)/exit.pie.dyld-but-no-Libsystem: $(OBJROOT)/exit-asm.o + $(CC) -o $@ $< $(LDFLAGS) -e _main -Wl,-pie -nostartfiles -nodefaultlibs -Wl,-no_new_main $(CODESIGN) -s - $@ -exit.nopie.dyld-and-Libsystem: exit-asm.o - $(CC) -o $@ $< $(LDFLAGS) -e mystart $(NOPIE_OPTION) -nostartfiles -nodefaultlibs -lSystem +$(DSTROOT)/exit.nopie.dyld-and-Libsystem: $(OBJROOT)/exit-asm.o + $(CC) -o $@ $< $(LDFLAGS) -Wl,-no_pie -nostartfiles -nodefaultlibs -lSystem $(CODESIGN) -s - $@ -exit.pie.dyld-and-Libsystem: exit-asm.o - $(CC) -o $@ $< $(LDFLAGS) -e mystart -Wl,-pie -nostartfiles -nodefaultlibs -lSystem +$(DSTROOT)/exit.pie.dyld-and-Libsystem: $(OBJROOT)/exit-asm.o + $(CC) -o $@ $< $(LDFLAGS) -Wl,-pie -nostartfiles -nodefaultlibs -lSystem $(CODESIGN) -s - $@ -exit.nopie: exit.o - $(CC) -o $@ $< $(LDFLAGS) -e mystart $(NOPIE_OPTION) +$(DSTROOT)/exit.nopie: $(OBJROOT)/exit.o + $(CC) -o $@ $< $(LDFLAGS) -Wl,-no_pie $(CODESIGN) -s - $@ -exit.pie: exit.o - $(CC) -o $@ $< $(LDFLAGS) -e mystart -Wl,-pie +$(DSTROOT)/exit.pie: $(OBJROOT)/exit.o + $(CC) -o $@ $< $(LDFLAGS) -Wl,-pie $(CODESIGN) -s - $@ -printexecinfo: printexecinfo.o +$(DSTROOT)/printexecinfo: $(OBJROOT)/printexecinfo.o $(CC) -o $@ $< $(LDFLAGS) $(CODESIGN) -s - $@ -# ACTIONS +$(DSTROOT)/run: $(OBJROOT)/run.o + $(CC) -o $(SYMROOT)/run $< $(LDFLAGS) + $(DSYMUTIL) $(SYMROOT)/run + $(STRIP) -S -o $@ $(SYMROOT)/run + $(CODESIGN) -s - $@ -quick-test: $(EXECUTABLES) run - ./test.sh +$(DSTROOT)/test: test.sh + install -m 755 $< $@ diff --git a/tools/tests/execperf/exit-asm.S b/tools/tests/execperf/exit-asm.S index ba63101e4..2b65a52c9 100644 --- a/tools/tests/execperf/exit-asm.S +++ b/tools/tests/execperf/exit-asm.S @@ -1,6 +1,7 @@ .text - .globl mystart -mystart: + .align 2 + .globl _main +_main: #if defined(__x86_64__) pushq $0 mov %rsp, %rbp diff --git a/tools/tests/execperf/exit.c b/tools/tests/execperf/exit.c index 1f6e025ad..301679fcb 100644 --- a/tools/tests/execperf/exit.c +++ b/tools/tests/execperf/exit.c @@ -1,8 +1,6 @@ #include -void mystart(void) __asm__("mystart"); - -void mystart(void) { +int main(int artc, char *argv[]) { #if defined(__x86_64__) asm volatile ("andq $0xfffffffffffffff0, %rsp\n"); #elif defined(__i386__) diff --git a/tools/tests/execperf/printexecinfo.c b/tools/tests/execperf/printexecinfo.c index 5dfcd6bb7..1baefb922 100644 --- a/tools/tests/execperf/printexecinfo.c +++ b/tools/tests/execperf/printexecinfo.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -31,8 +32,14 @@ void printexecinfo(void) int ret; uint64_t stackaddr; size_t len = sizeof(stackaddr); + const NXArchInfo *arch = NXGetArchInfoFromCpuType(_mh_execute_header.cputype, _mh_execute_header.cpusubtype & ~CPU_SUBTYPE_MASK); printf("executable load address = 0x%016llx\n", (uint64_t)(uintptr_t)&_mh_execute_header); + printf("executable cputype 0x%08x cpusubtype 0x%08x (%s:%s)\n", + _mh_execute_header.cputype, + _mh_execute_header.cpusubtype, + arch ? arch->name : "unknown", + arch ? arch->description : "unknown"); ret = sysctlbyname("kern.usrstack64", &stackaddr, &len, NULL, 0); if (ret == -1) diff --git a/tools/tests/execperf/test.sh b/tools/tests/execperf/test.sh index 72917a719..7a8f31650 100755 --- a/tools/tests/execperf/test.sh +++ b/tools/tests/execperf/test.sh @@ -18,9 +18,10 @@ case "$PRODUCT" in ;; esac -for j in 1 2 3; do - for i in ${EXECUTABLES}; do - echo "Running $i" +for i in ${EXECUTABLES}; do + echo "Running $i" + for j in `jot $(sysctl -n hw.ncpu) 1`; do + printf "\t%dx\t" $j /usr/bin/time ./${RUN} $j $((${COUNT}/$j)) ./$i if [ $? -ne 0 ]; then echo "Failed $i, exit status $?" diff --git a/tools/tests/jitter/Makefile b/tools/tests/jitter/Makefile index 4b679bbea..6500ebb8e 100644 --- a/tools/tests/jitter/Makefile +++ b/tools/tests/jitter/Makefile @@ -7,13 +7,13 @@ endif OBJROOT?=$(shell /bin/pwd) -CC:=xcrun -sdk "$(SDKROOT)" cc +CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc) ifdef RC_ARCHS ARCHS:=$(RC_ARCHS) else ifeq "$(Embedded)" "YES" - ARCHS:=armv7 armv7s + ARCHS:=armv7 armv7s arm64 else ARCHS:=x86_64 i386 endif diff --git a/tools/tests/kqueue_tests/Makefile b/tools/tests/kqueue_tests/Makefile index 7bd1baacd..8d10021ed 100755 --- a/tools/tests/kqueue_tests/Makefile +++ b/tools/tests/kqueue_tests/Makefile @@ -5,13 +5,13 @@ else Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) endif -CC:=xcrun -sdk "$(SDKROOT)" cc +CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc) ifdef RC_ARCHS ARCHS:=$(RC_ARCHS) else ifeq "$(Embedded)" "YES" - ARCHS:=armv7 armv7s + ARCHS:=armv7 armv7s arm64 else ARCHS:=x86_64 i386 endif diff --git a/tools/tests/libMicro/Makefile.Darwin b/tools/tests/libMicro/Makefile.Darwin index 9d4e00608..5bb7da2dd 100644 --- a/tools/tests/libMicro/Makefile.Darwin +++ b/tools/tests/libMicro/Makefile.Darwin @@ -39,7 +39,7 @@ SDKPATH = $(shell xcodebuild -sdk $(SDKROOT) -version Path) CFLAGS += -isysroot $(SDKPATH) endif -CC = xcrun -sdk $(SDKROOT) gcc +CC = $(shell xcrun -sdk "$(SDKROOT)" -find gcc) #NOPIC= -mdynamic-no-pic ARCH= i386 diff --git a/tools/tests/libMicro/apple/Makefile.Darwin b/tools/tests/libMicro/apple/Makefile.Darwin index 9ef0e27cf..c1677177f 100644 --- a/tools/tests/libMicro/apple/Makefile.Darwin +++ b/tools/tests/libMicro/apple/Makefile.Darwin @@ -35,12 +35,12 @@ Product=$(shell tconf --product) Embedded=$(shell tconf --test TARGET_OS_EMBEDDED) ifeq "$(Embedded)" "YES" -SDKPATH = $(shell xcodebuild -sdk $(SDKROOT) -version Path) +SDKPATH = $(shell xcodebuild -sdk "$(SDKROOT)" -version Path) CFLAGS += -isysroot $(SDKPATH) EmbeddedOS=yes endif -CC = xcrun -sdk $(SDKROOT) gcc +CC = $(shell xcrun -sdk "$(SDKROOT)" -find gcc) #NOPIC= -mdynamic-no-pic ARCH= i386 diff --git a/tools/tests/memorystatus/Makefile b/tools/tests/memorystatus/Makefile index e158b0274..6a97b658f 100644 --- a/tools/tests/memorystatus/Makefile +++ b/tools/tests/memorystatus/Makefile @@ -1,14 +1,14 @@ #!/usr/bin/make DSTROOT?=$(shell /bin/pwd) -CC:=clang -CFLAGS:=-I. +CC:=$(shell xcrun -sdk "$(SDKROOT)" -find clang) +CFLAGS:=-I. -g ifdef RC_ARCHS ARCH:=$(RC_ARCHS) else ifeq "$(Embedded)" "YES" - ARCH:=armv7 armv7s + ARCH:=armv7 armv7s arm64 else ARCH:=x86_64 i386 endif @@ -28,11 +28,20 @@ endif MY_ARCH := $(patsubst %, -arch %, $(ARCH)) # allows building multiple archs. -all: $(DSTROOT)/memorystatus +all: $(DSTROOT)/memorystatus \ + $(DSTROOT)/memorystatus_groups + +$(DSTROOT)/memorystatus_groups: memorystatus_groups.c + $(CC) $(MY_ARCH) -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders -o $(SYMROOT)/$(notdir $@) memorystatus_groups.c $(CFLAGS) + ditto $(SYMROOT)/$(notdir $@) $@ + codesign -f -s - $@ + $(DSTROOT)/memorystatus: memorystatus.c - xcrun -sdk $(SDKROOT) $(CC) $(MY_ARCH) -framework CoreFoundation -framework ServiceManagement -F $(SDKROOT)/System/Library/PrivateFrameworks/ -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders -o $@ memorystatus.c $(CFLAGS) + $(CC) $(MY_ARCH) -framework CoreFoundation -framework ServiceManagement -F $(SDKROOT)/System/Library/PrivateFrameworks/ -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders -o $(SYMROOT)/$(notdir $@) memorystatus.c $(CFLAGS) + ditto $(SYMROOT)/$(notdir $@) $@ codesign -f -s - $@ clean: rm -f $(DSTROOT)/memorystatus + rm -f $(DSTROOT)/memorystatus_groups diff --git a/tools/tests/memorystatus/memorystatus.c b/tools/tests/memorystatus/memorystatus.c index 337fc3404..89ac9713d 100644 --- a/tools/tests/memorystatus/memorystatus.c +++ b/tools/tests/memorystatus/memorystatus.c @@ -17,6 +17,7 @@ #include #include #include +#include /* Needed for vm_region info */ #include #include @@ -50,6 +51,18 @@ #define VM_PAGE_SIZE 4096 #endif +#define TASK_LIMIT_MB 75 +#define HWM_LIMIT_MB 8 + +/* + * Blob of data that is not easily compressed. + * Guaranteed during setup to be at least + * RANDOM_DATA_SIZE in length. + */ + +#define RANDOM_DATA_SIZE 4096 +char random_data[] = "ffd8ffe000104a46494600010101002400240000ffe100744578696600004d4d002a000000080004011a0005000000010000003e011b0005000000010000004601280003000000010002000087690004000000010000004e00000000000000240000000100000024000000010002a002000400000001000003c0a003000400000001000001ff00000000ffdb00430002020202020102020202020202030306040303030307050504060807080808070808090a0d0b09090c0a08080b0f0b0c0d0e0e0e0e090b10110f0e110d0e0e0effdb004301020202030303060404060e0908090e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0effc000110801ff03c003012200021101031101ffc4001f0000010501010101010100000000000000000102030405060708090a0bffc400b5100002010303020403050504040000017d01020300041105122131410613516107227114328191a1082342b1c11552d1f02433627282090a161718191a25262728292a3435363738393a434445464748494a535455565758595a636465666768696a737475767778797a838485868788898a92939495969798999aa2a3a4a5a6a7a8a9aab2b3b4b5b6b7b8b9bac2c3c4c5c6c7c8c9cad2d3d4d5d6d7d8d9dae1e2e3e4e5e6e7e8e9eaf1f2f3f4f5f6f7f8f9faffc4001f0100030101010101010101010000000000000102030405060708090a0bffc400b51100020102040403040705040400010277000102031104052131061241510761711322328108144291a1b1c109233352f0156272d10a162434e125f11718191a262728292a35363738393a434445464748494a535455565758595a636465666768696a737475767778797a82838485868788898a92939495969798999aa2a3a4a5a6a7a8a9aab2b3b4b5b6b7b8b9bac2c3c4c5c6c7c8c9cad2d3d4d5d6d7d8d9dae2e3e4e5e6e7e8e9eaf2f3f4f5f6f7f8f9faffda000c03010002110311003f00f9e74fbd37baa2db99e6506391f28371f9519ba67fd9fcabd46cbc1315de8d6776752d7419e049084b152a37283c1dfc8e6bc02db4af18d9df79c9e1bd59a40ae9b65b1761f32953c63ae09c7a1c57656fe24f8896da7c16c9e0bb3748a358d5a4d04b31006324f73c75a00935f7fec9f165ee98b7372e2ddc05795763f2a0f20138ebeb590bac3e70d2b6e1fed1ac6d4ecbc65aa6b973a85c7867528a6998168edec1a38c1c01c2f61c550fec1f16ff00d0bdade4f5ff00447ff0a00eaffb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f"; + /* * TODO: import header (currently vm_pageout.h) without pulling in extraneous definitions; * see . @@ -66,6 +79,7 @@ typedef enum jetsam_test { kSimpleJetsamTest = 1, + kCustomTaskLimitTest, kPressureJetsamTestFG, kPressureJetsamTestBG, kHighwaterJetsamTest, @@ -85,16 +99,30 @@ typedef struct shared_mem_t { pthread_cond_t cv; boolean_t completed; boolean_t pressure_event_fired; + boolean_t child_failed; } shared_mem_t; shared_mem_t *g_shared = NULL; unsigned long g_physmem = 0; +int g_compressor_mode=0; int g_ledger_count = -1, g_footprint_index = -1; int64_t g_per_process_limit = -1; -#if TARGET_OS_EMBEDDED -static boolean_t set_priority(pid_t pid, int32_t priority, uint64_t user_data); -#endif +/* + * g_exit_status: + * Holds the PASS/FAIL status of the memorystatus + * test run as a whole. + * e.g: If one subtest reports failure, the entire + * test run reports failure. + * + * PASS: returns 0 (default) + * FAIL: returns -1 + * + * The only time the g_exit_status changes state + * is when printTestResult() reports a FAIL status. + */ +int g_exit_status = 0; + extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3); static boolean_t check_properties(pid_t pid, int32_t requested_priority, int32_t requested_limit_mb, uint64_t requested_user_data, const char *test); @@ -114,6 +142,7 @@ printTestHeader(pid_t testPid, const char *testName, ...) printf("[PID] %d\n", testPid); printf("========================================\n"); printf("[BEGIN]\n"); + fflush(stdout); } static void @@ -131,7 +160,54 @@ printTestResult(const char *testName, boolean_t didPass, const char *msg, ...) printf("[PASS]\t%s\n\n", testName); } else { printf("[FAIL]\t%s\n\n", testName); + + /* Any single failure, fails full test run */ + g_exit_status = -1; } + fflush(stdout); +} + +static int +_get_munch_interval(int given_interval) +{ + int res; + int new_interval=0; + char *slow_device; + char model_name_buf[1025]; + size_t mnb_size = 1024; + res = sysctlbyname("hw.model", model_name_buf, &mnb_size, NULL, 0); + + if (res) { + perror("\t\tsysctlbyname(hw.model...)"); + } + else { + /* see if we're a slow device (N90, K66, J33) */ + slow_device = strstr(model_name_buf, "N90"); + if (slow_device == NULL) { + slow_device = strstr(model_name_buf, "K66"); + } + if (slow_device == NULL) { + slow_device = strstr(model_name_buf, "J33"); + } + + if (slow_device != NULL) { + printf("\t\tRunning on a slow device...\n"); + } + + if (given_interval == 0) { + if (slow_device != NULL) { + new_interval = 500 * 1000; /* want sleep time in microseconds */ + } + else { + new_interval = 100 * 1000;/* want sleep time in microseconds */ + } + } + else { + new_interval = given_interval * USEC_PER_SEC; + } + } + + return new_interval; } static CFDictionaryRef create_dictionary_from_plist(const char *path) { @@ -163,8 +239,9 @@ static CFDictionaryRef create_dictionary_from_plist(const char *path) { goto exit; } - options = (CFDictionaryRef) CFPropertyListCreateFromXMLData(kCFAllocatorDefault, data, kCFPropertyListImmutable, NULL); + options = (CFDictionaryRef) CFPropertyListCreateWithData(kCFAllocatorDefault, data, kCFPropertyListImmutable, NULL, NULL); if (options == NULL) { + goto exit; } exit: @@ -181,104 +258,34 @@ exit: return options; } -#if TARGET_OS_EMBEDDED - -static void disable_crashreporter(void) { - if (!SMJobRemove(kSMDomainSystemLaunchd, CFSTR(CR_JOB), NULL, true, NULL)) { - printf ("\t\tCould not unload %s\n", CR_JOB); - } -} - -static void enable_crashreporter(void) { - CFDictionaryRef job_dict; - - job_dict = create_dictionary_from_plist(CR_JOB_PLIST_PATH); - if (!job_dict) { - printf("\t\tCould not create dictionary from %s\n", CR_JOB_PLIST_PATH); - } - - if (!SMJobSubmit(kSMDomainSystemLaunchd, job_dict, NULL, NULL)) { - printf ("\t\tCould not submit %s\n", CR_JOB); - } - - CFRelease(job_dict); -} - -static boolean_t verify_snapshot(pid_t pid, int32_t priority, uint32_t kill_cause, uint64_t user_data, bool expecting_snapshot) { - int size; - memorystatus_jetsam_snapshot_t *snapshot = NULL; - int i; - boolean_t res = false; - - if (kill_cause == CAUSE_HIWAT_OR_PERPROC) { - kill_cause = kMemorystatusKilledHiwat|kMemorystatusKilledVMPageShortage; - } - - size = memorystatus_control(MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT, 0, 0, NULL, 0); - if (size <= 0) { - if (expecting_snapshot) { - printf("\t\tCan't get snapshot size: %d!\n", size); - } - goto exit; - } - - snapshot = (memorystatus_jetsam_snapshot_t*)malloc(size); - if (!snapshot) { - printf("\t\tCan't allocate snapshot!\n"); - goto exit; - } - - size = memorystatus_control(MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT, 0, 0, snapshot, size); - if (size <= 0) { - printf("\t\tCan't retrieve snapshot (%d)!\n", size); - goto exit; - } - - if (((size - sizeof(memorystatus_jetsam_snapshot_t)) / sizeof(memorystatus_jetsam_snapshot_entry_t)) != snapshot->entry_count) { - printf("\t\tMalformed snapshot: %d! Expected %ld + %zd x %ld = %ld\n", size, - sizeof(memorystatus_jetsam_snapshot_t), snapshot->entry_count, sizeof(memorystatus_jetsam_snapshot_entry_t), - sizeof(memorystatus_jetsam_snapshot_t) + (snapshot->entry_count * sizeof(memorystatus_jetsam_snapshot_entry_t))); - goto exit; - } - - if (pid == -1) { - /* Just flushing the buffer */ - res = true; - goto exit; - } - - /* Locate */ - for (i = 0; i < snapshot->entry_count; i++) { - if (snapshot->entries[i].pid == pid) { - res = 0; - if ((priority == snapshot->entries[i].priority) && ((kill_cause | snapshot->entries[i].killed) == kill_cause) && (user_data == snapshot->entries[i].user_data)) { - res = true; - } else { - printf("\t\tMismatched snapshot properties for pid %d (expected/actual): priority %d/%d : kill cause 0x%x/0x%x : user data 0x%llx/0x%llx\n", - pid, priority, snapshot->entries[i].priority, kill_cause, snapshot->entries[i].killed, user_data, snapshot->entries[i].user_data); - } - goto exit; - } - } - -exit: - free(snapshot); - - return res; -} - -#endif /* TARGET_OS_EMBEDDED */ +/* + * cleanup_and_exit(): + * The parent process can call this routine to exit or abort + * the test run at any time. + * + * The child process on the other hand should not call this routine. + * Be mindful about how re-enabling the crashreporter can affect tests + * further down the line. + */ static void cleanup_and_exit(int status) { -#if TARGET_OS_EMBEDDED - /* Cleanup */ - enable_crashreporter(); -#endif /* Exit. Pretty literal. */ exit(status); } +/* + * child_ready(): + * After a child process takes care of its inital setup, it + * synchronizes back to the parent using this call. + * + * If the child process experiences a failure during its + * intial setup, it should abort using a standard exit + * routine, leaving crashreporter cleanup to the parent. + * + * The child should never call cleanup_and_exit(). + * That's for the parent only. + */ static void child_ready() { pthread_mutex_lock(&g_shared->mutex); pthread_cond_signal(&g_shared->cv); @@ -287,7 +294,7 @@ static void child_ready() { static pid_t init_and_fork() { int pid; - + g_shared->completed = 0; g_shared->pressure_event_fired = 0; @@ -297,7 +304,7 @@ static pid_t init_and_fork() { if (pid == 0) { return 0; } else if (pid == -1) { - printTestResult(__func__, false, "Fork error!\n"); + printTestResult(__func__, false, "Fork error!"); cleanup_and_exit(-1); } @@ -336,44 +343,6 @@ exit: /* Tests */ -#if TARGET_OS_EMBEDDED - -/* Spawn tests */ - -static void spawn_test() { - int page_delta = 32768; /* 128MB */ - char *mem; - unsigned long total = 0; - - /* Spin */ - while (1) { - /* Priority will be shifted during this time... */ - sleep(1); - - /* ...then process will be backgrounded and hopefully killed by the memory limit */ - while(1) { - int i; - mem = malloc(page_delta * VM_PAGE_SIZE); - if (!mem) { - fprintf(stderr, "Failed to allocate memory!\n"); - while (1) { - sleep(1); - } - } - - total += page_delta; - memset(mem, 0xFF, page_delta * VM_PAGE_SIZE); - - set_priority(getpid(), JETSAM_PRIORITY_BACKGROUND, 0); - - while(1) { - sleep(1); - } - } - } -} - -#endif static boolean_t get_ledger_info(pid_t pid, int64_t *balance_mb, int64_t *limit_mb) { struct ledger_entry_info *lei; @@ -439,7 +408,8 @@ static boolean_t get_priority_props(pid_t pid, int32_t *priority, int32_t *limit printf("\t\tCan't find pid: %d!\n", pid); exit: - free(entries); + if (entries) + free(entries); return res; } @@ -458,7 +428,7 @@ static boolean_t check_properties(pid_t pid, int32_t requested_priority, int32_t /* -1 really means the default per-process limit, which varies per device */ if (requested_limit_mb <= 0) { - requested_limit_mb = g_per_process_limit; + requested_limit_mb = (int32_t)g_per_process_limit; } if (actual_priority != requested_priority || actual_hiwat != requested_limit_mb || actual_user_data != requested_user_data) { @@ -474,914 +444,6 @@ static boolean_t check_properties(pid_t pid, int32_t requested_priority, int32_t return true; } -#if TARGET_OS_EMBEDDED - -static void spin() { - child_ready(); - - /* Spin */ - while (1) { - sleep(10); - } -} - -/* Priority tests */ - -static boolean_t set_priority(pid_t pid, int32_t priority, uint64_t user_data) { - int ret; - memorystatus_priority_properties_t props; - - props.priority = priority; - props.user_data = (uint32_t)user_data; - - return memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, pid, 0, &props, sizeof(props)); -} - -static boolean_t set_memlimit(pid_t pid, int32_t limit_mb) { - return memorystatus_control(MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK, pid, limit_mb, NULL, 0); -} - -static boolean_t set_priority_properties(pid_t pid, int32_t priority, int32_t limit_mb, uint64_t user_data, const char *stage, boolean_t show_error) { - int ret; - - ret = set_priority(pid, priority, user_data); - if (ret == 0) { - ret = set_memlimit(pid, limit_mb); - } - - if (ret) { - if (show_error) { - printf("\t\t%s stage: failed to set properties!\n", stage); - } - - return false; - } - - return true; -} - -static void start_priority_test() { - const char *DEFAULT_TEST_STR = "Default"; - const char *INVALID_NEGATIVE_TEST_STR = "Invalid (Negative)"; - const char *INVALID_POSITIVE_TEST_STR = "Invalid (Positive)"; - const char *IDLE_ALIAS_TEST_STR = "Idle Alias"; - const char *DEFERRED_TEST_STR = "Deferred"; - const char *SUSPENDED_TEST_STR = "Suspended"; - const char *FOREGROUND_TEST_STR = "Foreground"; - const char *HIGHPRI_TEST_STR = "Highpri"; - - pid_t pid; - int status; - int success = false; - - pid = init_and_fork(); - if (pid == 0) { - spin(); - } else { - printTestHeader(pid, "Priority test"); - } - - /* Check the default properties */ - if (!check_properties(pid, JETSAM_PRIORITY_DEFAULT, -1, 0, DEFAULT_TEST_STR)) { - goto exit; - } - - /* Check that setting a negative value (other than -1) leaves properties unchanged */ - if (set_priority_properties(pid, -100, 0xABABABAB, 0, INVALID_NEGATIVE_TEST_STR, false) || !check_properties(pid, JETSAM_PRIORITY_DEFAULT, -1, 0, INVALID_NEGATIVE_TEST_STR)) { - goto exit; - } - - /* Check that setting an out-of-range positive value leaves properties unchanged */ - if (set_priority_properties(pid, 100, 0xCBCBCBCB, 0, INVALID_POSITIVE_TEST_STR, false) || !check_properties(pid, JETSAM_PRIORITY_DEFAULT, -1, 0, INVALID_POSITIVE_TEST_STR)) { - goto exit; - } - - /* Idle-deferred - this should be adjusted down to idle */ - if (!set_priority_properties(pid, JETSAM_PRIORITY_IDLE_DEFERRED, 0, 0xBEEF, DEFERRED_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_IDLE, 0, 0xBEEF, DEFERRED_TEST_STR)) { - goto exit; - } - - /* Suspended */ - if (!set_priority_properties(pid, JETSAM_PRIORITY_IDLE, 0, 0xCAFE, SUSPENDED_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_IDLE, 0, 0xCAFE, SUSPENDED_TEST_STR)) { - goto exit; - } - - /* Foreground */ - if (!set_priority_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xBEEFF00D, FOREGROUND_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xBEEFF00D, FOREGROUND_TEST_STR)) { - goto exit; - } - - /* Hipri */ - if (!set_priority_properties(pid, JETSAM_PRIORITY_DEFAULT - 1, 0, 0x01234567, HIGHPRI_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_DEFAULT - 1, 0, 0x01234567, HIGHPRI_TEST_STR)) { - goto exit; - } - - /* Foreground again (to test that the limit is restored) */ - if (!set_priority_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xBEEFF00D, FOREGROUND_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xBEEFF00D, FOREGROUND_TEST_STR)) { - goto exit; - } - - /* Set foreground priority again; this would have caught */ - if (!set_priority_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xFEEDF00D, FOREGROUND_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xFEEDF00D, FOREGROUND_TEST_STR)) { - goto exit; - } - - /* Set foreground priority again but pass a large memory limit; this would have caught */ - if (!set_priority_properties(pid, JETSAM_PRIORITY_FOREGROUND, 4096, 0xBEEFF00D, FOREGROUND_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 4096, 0xBEEFF00D, FOREGROUND_TEST_STR)) { - goto exit; - } - - /* Check that -1 aliases to JETSAM_PRIORITY_DEFAULT */ - if (!set_priority_properties(pid, -1, 0, 0xFADEF00D, IDLE_ALIAS_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_DEFAULT, 0, 0xFADEF00D, IDLE_ALIAS_TEST_STR)) { - goto exit; - } - - success = true; - -exit: - - /* Done here... */ - kill(pid, SIGKILL); - - /* Wait for exit */ - waitpid(pid, &status, 0); - - printTestResult("Priority test", success, NULL); -} - -/* Reordering */ - -static boolean_t check_reorder_priorities(pid_t pid1, pid_t pid2, int priority) { - int size; - memorystatus_priority_entry_t *entries = NULL; - int i; - boolean_t res = false; - - entries = get_priority_list(&size); - if (!entries) { - goto exit; - } - - /* Check relative priorities */ - for (i = 0; i < size/sizeof(memorystatus_priority_entry_t); i++ ){ - if (entries[i].pid == pid1) { - /* First process. The priority should match... */ - if (entries[i].priority != priority) { - goto exit; - } - - /* There should be one more daemon to follow... */ - if ((i + 1) >= size) { - goto exit; - } - - /* The next process should be pid2 */ - if (entries[i + 1].pid != pid2) { - goto exit; - } - - /* The priority should also match... */ - if (entries[i + 1].priority != priority) { - goto exit; - } - - break; - } - } - - res = true; - -exit: - - return res; -} - -static void start_fs_priority_test() { - const char *REORDER_TEST_STR = "Reorder"; - const int test_priority = JETSAM_PRIORITY_FOREGROUND_SUPPORT; - - pid_t pid1, pid2; - int status; - int success = false; - - pid1 = init_and_fork(); - if (pid1 == 0) { - spin(); - } - - pid2 = init_and_fork(); - if (pid2 == 0) { - spin(); - } - - printTestHeader(pid1, "Reorder test"); - - /* pid2 should follow pid1 in the bucket */ - if (!set_priority_properties(pid1, test_priority, 0, 0, REORDER_TEST_STR, true) || !set_priority_properties(pid2, test_priority, 0, 0, REORDER_TEST_STR, true)) { - printf("Cannot set priorities - #1!\n"); - goto exit; - } - - /* Check relative priorities */ - if (!check_reorder_priorities(pid1, pid2, test_priority)) { - printf("Bad pid1 -> pid2 priorities - #2!\n"); - goto exit; - } - - /* pid 1 should move to the back... */ - if (!set_priority_properties(pid1, test_priority, 0, 0, REORDER_TEST_STR, true)) { - printf("Cannot set priorities - #3!\n"); - goto exit; - } - - /* ...so validate */ - if (!check_reorder_priorities(pid2, pid1, test_priority)) { - printf("Bad pid2 -> pid1 priorities - #4!\n"); - goto exit; - } - - /* Again, pid 2 should move to the back... */ - if (!set_priority_properties(pid2, test_priority, 0, 0, REORDER_TEST_STR, true)) { - printf("Cannot set priorities - #5!\n"); - goto exit; - } - - /* ...so validate for the last time */ - if (!check_reorder_priorities(pid1, pid2, test_priority)) { - printf("Bad pid1 -> pid2 priorities - #6!\n"); - goto exit; - } - - success = true; - -exit: - - /* Done here... */ - kill(pid1, SIGKILL); - kill(pid2, SIGKILL); - - /* Wait for exit */ - waitpid(pid1, &status, 0); - waitpid(pid2, &status, 0); - - printTestResult("Reorder test", success, NULL); -} - -/* Jetsam tests */ - -/* - - ASL message format: - - Message is ReadUID 0 - Message is ReadGID 80 - Message is ASLMessageID 703 - Message is Level 7 - Message is Time 1333155901 - Message is Sender kernel - Message is Facility kern - - */ - -static void vnode_test(int page_delta, int interval, int verbose, int32_t priority, uint64_t user_data) { - memorystatus_priority_properties_t props; - - props.priority = priority; - props.user_data = user_data; - - if (memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, getpid(), 0, &props, sizeof(props))) { - /*printf("\t\tFailed to set jetsam priority!\n");*/ - printTestResult(__func__, false, "Failed to set jetsam priority!"); - cleanup_and_exit(-1); - } - - /* Initialized... */ - child_ready(); - - /* ...so start stealing vnodes */ - while(1) { - sleep(1); - } -} - -static void *wait_for_pressure_event(void *s) { - int kq; - int res; - struct kevent event, mevent; - char errMsg[ERR_BUF_LEN + 1]; - - kq = kqueue(); - - EV_SET(&mevent, 0, EVFILT_VM, EV_ADD, NOTE_VM_PRESSURE, 0, 0); - - res = kevent(kq, &mevent, 1, NULL, 0, NULL); - if (res != 0) { - /*printf("\t\tKevent registration failed - returning: %d!\n", res);*/ - snprintf(errMsg, ERR_BUF_LEN, "Kevent registration failed - returning: %d!",res); - printTestResult(__func__, false, errMsg); - cleanup_and_exit(-1); - } - - while (1) { - memset(&event, 0, sizeof(struct kevent)); - res = kevent(kq, NULL, 0, &event, 1, NULL); - g_shared->pressure_event_fired = 1; - } -} - -static void wait_for_exit_event(int pid, uint32_t kill_cause) { - int kq; - int res; - uint32_t expected_flag, received_flag; - struct kevent event, mevent; - char errMsg[ERR_BUF_LEN + 1]; - - switch (kill_cause) { - case kMemorystatusKilledVnodes: expected_flag = NOTE_EXIT_MEMORY_VNODE; break; - case kMemorystatusKilledVMPageShortage: expected_flag = NOTE_EXIT_MEMORY_VMPAGESHORTAGE; break; - case kMemorystatusKilledVMThrashing: expected_flag = NOTE_EXIT_MEMORY_VMTHRASHING; break; - case kMemorystatusKilledHiwat: expected_flag = NOTE_EXIT_MEMORY_HIWAT; break; - case kMemorystatusKilledPerProcessLimit: expected_flag = NOTE_EXIT_MEMORY_PID; break; - case kMemorystatusKilledIdleExit: expected_flag = NOTE_EXIT_MEMORY_IDLE; break; - case CAUSE_HIWAT_OR_PERPROC: expected_flag = NOTE_EXIT_MEMORY_HIWAT|NOTE_EXIT_MEMORY_PID; break; - } - - kq = kqueue(); - - EV_SET(&mevent, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT | NOTE_EXIT_DETAIL, 0, 0); - - res = kevent(kq, &mevent, 1, NULL, 0, NULL); - if (res != 0) { - snprintf(errMsg,ERR_BUF_LEN,"Exit kevent registration failed - returning: %d!",res); - printTestResult(__func__, false, errMsg); - cleanup_and_exit(-1); - } - - res = kevent(kq, NULL, 0, &event, 1, NULL); - - /* Check if appropriate flags are set */ - if (!event.fflags & NOTE_EXIT_MEMORY) { - printTestResult(__func__, false, "Exit event fflags do not contain NOTE_EXIT_MEMORY\n"); - cleanup_and_exit(-1); - } - - received_flag = event.data & NOTE_EXIT_MEMORY_DETAIL_MASK; - if ((received_flag | expected_flag) != expected_flag) { - printTestResult(__func__, false, "Exit event data does not contain the expected jetsam flag for cause %x.\n" - "\t\t(expected %x, got %x)", kill_cause, expected_flag, received_flag); - cleanup_and_exit(-1); - } -} - -static void munch_test(int page_delta, int interval, int verbose, int32_t priority, int32_t highwater, uint64_t user_data) { - const char *MUNCH_TEST_STR = "Munch"; - char *mem; - unsigned long total = 0; - pthread_t pe_thread; - int res; - - /* Start thread to watch for pressure messages */ - res = pthread_create(&pe_thread, NULL, wait_for_pressure_event, (void*)g_shared); - if (res) { - printTestResult(__func__, false, "Error creating pressure event thread!\n"); - cleanup_and_exit(-1); - } - - if (set_priority_properties(getpid(), priority, highwater, user_data, MUNCH_TEST_STR, false) == false) { - printTestResult(__func__, false, "Failed to set jetsam priority!"); - cleanup_and_exit(-1); - } - - if (!page_delta) { - page_delta = 4096; - } - - sleep(1); - - /* Initialized... */ - child_ready(); - - /* ...so start munch */ - while(1) { - int i; - mem = malloc(page_delta * VM_PAGE_SIZE); - if (!mem) { - fprintf(stderr, "Failed to allocate memory!\n"); - while (1) { - sleep(1); - } - } - - total += page_delta; - memset(mem, 0xFF, page_delta * VM_PAGE_SIZE); - - if (verbose) { - printf("\t\t%lu pages dirtied...\n", total); - } - - sleep(interval); - } -} - -static bool is_pressure_test(test) { - return ((test == kPressureJetsamTestFG) || (test == kPressureJetsamTestBG)); -} - -static bool verify_exit(pid_t pid, uint32_t kill_cause, time_t start_time, uint32_t test_pri, uint64_t test_user_data, jetsam_test_t test, bool expecting_snapshot) { - const char *msg_key = "Message"; - const char *time_key = "Time"; - aslmsg query; - aslresponse response; - aslmsg message; - char pid_buffer[16]; - const char *val; - int got_jetsam = 0; - bool got_snapshot = 0; - bool success; - - /* Wait for exit */ - wait_for_exit_event(pid, kill_cause); - - /* Let the messages filter through to the log - arbitrary */ - sleep(3); - - query = asl_new(ASL_TYPE_QUERY); - asl_set_query(query, ASL_KEY_SENDER, "kernel", ASL_QUERY_OP_EQUAL); - asl_set_query(query, ASL_KEY_MSG, "memorystatus", ASL_QUERY_OP_EQUAL|ASL_QUERY_OP_SUBSTRING); - snprintf(pid_buffer, sizeof(pid_buffer) - 1, "%d", pid); - asl_set_query(query, ASL_KEY_MSG, pid_buffer, ASL_QUERY_OP_EQUAL|ASL_QUERY_OP_SUBSTRING); - response = asl_search(NULL, query); - asl_free(query); - - while (NULL != (message = aslresponse_next(response))) - { - val = asl_get(message, time_key); - if (val) { - uint32_t msg_time = atoi(val); - if (msg_time > start_time) { - val = asl_get(message, msg_key); - if (val) { - printf("\t\tFound: %s\n", val); - got_jetsam = 1; - } - } - } - } - - if (got_jetsam) { - got_snapshot = verify_snapshot(pid, test_pri, kill_cause, test_user_data, expecting_snapshot); - } else { - printf("\t\tCouldn't find jetsam message in log!\n"); - } - - aslresponse_free(response); - - success = got_jetsam && (expecting_snapshot == got_snapshot) && (!(is_pressure_test(test)) || (is_pressure_test(test) && g_shared->pressure_event_fired)); - printTestResult("munch_test", success, "(test: %d, got_jetsam: %d, got_snapshot: %d, fired: %d)", test, got_jetsam, got_snapshot, g_shared->pressure_event_fired); - - return success; -} - -static void start_jetsam_test(jetsam_test_t test, const char *description) { - const char *msg_key = "Message"; - const char *time_key = "Time"; - const char *val; - aslmsg query; - aslresponse response; - aslmsg message; - time_t start_time; - pid_t pid; - char pid_buffer[16]; - int status; - int got_jetsam = 0; - int got_snapshot = 0; - uint32_t test_pri = 0; - uint64_t test_user_data = 0; - uint32_t kill_cause; - int success; - boolean_t expecting_snapshot = TRUE; - boolean_t big_mem = (g_physmem > 512 * 1024 * 1024); - - if (big_mem) { - /* - * On big memory machines (1GB+), there is a per-task memory limit. - * A munch test could fail because of this, if they manage to cross it; - * *or* because the high watermark was crossed, and the system was under - * enough mem pressure to go looking for a high watermark victim to kill. - */ - kill_cause = CAUSE_HIWAT_OR_PERPROC; - } else if (test == kHighwaterJetsamTest) { - /* - * On systems without the per-task memory limit, we shouldn't see any - * such kills; so that leaves high watermark kills as the only legitimate - * reason to kill a munch test that has a high watermark set. - */ - kill_cause = kMemorystatusKilledHiwat; - } else { - /* - * If this is a standard munch test and we're on a machine without the - * per-task memory limit, the only reason for kill should be that we need - * memory. - */ - kill_cause = kMemorystatusKilledVMPageShortage; - } - - start_time = time(NULL); - - switch (test) { - case kPressureJetsamTestFG: - test_pri = JETSAM_PRIORITY_FOREGROUND; /* Test that FG processes get pressure events */ - test_user_data = 0xDEADBEEF; - break; - case kPressureJetsamTestBG: - test_pri = JETSAM_PRIORITY_UI_SUPPORT; /* Test that BG processes get pressure events */ - test_user_data = 0xFADEBEEF; - break; - case kSimpleJetsamTest: - /* - * On 1GB devices, we should see a snapshot as the per-process limit is hit. - * On 512MB devices, we should see a normal jetsam, and no snapshot. - */ - expecting_snapshot = big_mem ? TRUE : FALSE; - test_pri = JETSAM_PRIORITY_IDLE; /* Suspended */ - test_user_data = 0xFACEF00D; - break; - default: - test_pri = JETSAM_PRIORITY_IDLE; /* Suspended */ - test_user_data = 0xCAFEF00D; - break; - } - - pid = init_and_fork(); - - if (pid == 0) { - switch (test) { - case kVnodeJetsamTest: - vnode_test(0, 0, 0, test_pri, test_user_data); - break; - case kHighwaterJetsamTest: - munch_test(0, 0, 0, test_pri, 8, test_user_data); - break; - default: - munch_test(0, 0, 0, test_pri, -1, test_user_data); - break; - } - } - else { - printTestHeader(pid, "%s test", description); - } - - verify_exit(pid, kill_cause, start_time, test_pri, test_user_data, test, expecting_snapshot); -} - -static void start_jetsam_test_background(const char *path) { - const char *argv[] = { - path, - "-s", - NULL - }; - - const uint32_t memlimit = 100; /* 100 MB */ - - time_t start_time; - pid_t pid = 1; - int status; - uint32_t test_pri = 0; - posix_spawnattr_t spattr; - int32_t pf_balance; - bool success; - - start_time = time(NULL); - - pid = 1; - status = 1; - - posix_spawnattr_init(&spattr); - posix_spawnattr_setjetsam(&spattr, (POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY | POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND), JETSAM_PRIORITY_UI_SUPPORT, 100); - - if (posix_spawn(&pid, path, NULL, &spattr, (char *const *)argv, NULL) < 0) { - printf("posix_spawn() failed!\n"); - goto exit; - } - - printTestHeader(pid, "Background memory limit test"); - - /* Starts in background */ - if (!check_properties(pid, JETSAM_PRIORITY_UI_SUPPORT, memlimit, 0x0, "jetsam_test_background - #1 BG")) { - goto exit; - } - - /* Set to foreground - priority and memlimit should change */ - set_priority(pid, JETSAM_PRIORITY_FOREGROUND, 0); - if (!check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 0, 0x0, "jetsam_test_background - #2 FG")) { - goto exit; - } - - /* ...and back */ - set_priority(pid, JETSAM_PRIORITY_BACKGROUND, 0); - if (!check_properties(pid, JETSAM_PRIORITY_BACKGROUND, memlimit, 0x0, "jetsam_test_background - #3 BG")) { - goto exit; - } - - /* ...and again */ - set_priority(pid, JETSAM_PRIORITY_FOREGROUND, 0); - if (!check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 0, 0x0, "jetsam_test_background - #4 FG")) { - goto exit; - } - -#if 1 - /* - * For now, this is all we can do. Limitations of the ledger mean that this process is credited with - * the dirty pages, *not* the child. At least the memory limit is reported to have shifted dynamically - * by this point. Kill the child and continue. - */ - kill(pid, SIGKILL); -#else - /* Let the process dirty 128MB of memory, then background itself */ - verify_exit(pid, kMemorystatusKilledPerProcessLimit, start_time, test_pri, 0, kBackgroundJetsamTest); -#endif - - success = true; - -exit: - if (pid != -1) { - kill(pid, SIGKILL); - } - - /* Wait for exit */ - waitpid(pid, &status, 0); - - printTestResult("Background test", success, NULL); -} - -/* Freeze tests */ - -/* Cribbed from 'top'... */ -static int -in_shared_region(mach_vm_address_t addr, cpu_type_t type) { - mach_vm_address_t base = 0, size = 0; - - switch(type) { - case CPU_TYPE_ARM: - base = SHARED_REGION_BASE_ARM; - size = SHARED_REGION_SIZE_ARM; - break; - - case CPU_TYPE_X86_64: - base = SHARED_REGION_BASE_X86_64; - size = SHARED_REGION_SIZE_X86_64; - break; - - case CPU_TYPE_I386: - base = SHARED_REGION_BASE_I386; - size = SHARED_REGION_SIZE_I386; - break; - - case CPU_TYPE_POWERPC: - base = SHARED_REGION_BASE_PPC; - size = SHARED_REGION_SIZE_PPC; - break; - - case CPU_TYPE_POWERPC64: - base = SHARED_REGION_BASE_PPC64; - size = SHARED_REGION_SIZE_PPC64; - break; - - default: { - int t = type; - - fprintf(stderr, "unknown CPU type: 0x%x\n", t); - abort(); - } - break; - } - - return(addr >= base && addr < (base + size)); -} - -static unsigned long long get_rprvt(mach_port_t task, pid_t pid) { - kern_return_t kr; - - mach_vm_size_t rprvt = 0; - mach_vm_size_t empty = 0; - mach_vm_size_t fw_private = 0; - mach_vm_size_t pagesize = VM_PAGE_SIZE; - mach_vm_size_t regs = 0; - - mach_vm_address_t addr; - mach_vm_size_t size; - - int split = 0; - - for (addr = 0; ; addr += size) { - vm_region_top_info_data_t info; - mach_msg_type_number_t count = VM_REGION_TOP_INFO_COUNT; - mach_port_t object_name; - - kr = mach_vm_region(task, &addr, &size, VM_REGION_TOP_INFO, (vm_region_info_t)&info, &count, &object_name); - if (kr != KERN_SUCCESS) break; - - if (in_shared_region(addr, CPU_TYPE_ARM)) { - // Private Shared - fw_private += info.private_pages_resident * pagesize; - - /* - * Check if this process has the globally shared - * text and data regions mapped in. If so, set - * split to TRUE and avoid checking - * again. - */ - if (split == FALSE && info.share_mode == SM_EMPTY) { - vm_region_basic_info_data_64_t b_info; - mach_vm_address_t b_addr = addr; - mach_vm_size_t b_size = size; - count = VM_REGION_BASIC_INFO_COUNT_64; - - kr = mach_vm_region(task, &b_addr, &b_size, VM_REGION_BASIC_INFO, (vm_region_info_t)&b_info, &count, &object_name); - if (kr != KERN_SUCCESS) break; - - if (b_info.reserved) { - split = TRUE; - } - } - - /* - * Short circuit the loop if this isn't a shared - * private region, since that's the only region - * type we care about within the current address - * range. - */ - if (info.share_mode != SM_PRIVATE) { - continue; - } - } - - regs++; - - /* - * Update counters according to the region type. - */ - - if (info.share_mode == SM_COW && info.ref_count == 1) { - // Treat single reference SM_COW as SM_PRIVATE - info.share_mode = SM_PRIVATE; - } - - switch (info.share_mode) { - case SM_LARGE_PAGE: - // Treat SM_LARGE_PAGE the same as SM_PRIVATE - // since they are not shareable and are wired. - case SM_PRIVATE: - rprvt += info.private_pages_resident * pagesize; - rprvt += info.shared_pages_resident * pagesize; - break; - - case SM_EMPTY: - empty += size; - break; - - case SM_COW: - case SM_SHARED: - if (pid == 0) { - // Treat kernel_task specially - if (info.share_mode == SM_COW) { - rprvt += info.private_pages_resident * pagesize; - } - break; - } - - if (info.share_mode == SM_COW) { - rprvt += info.private_pages_resident * pagesize; - } - break; - - default: - assert(0); - break; - } - } - - return rprvt; -} - -static void freeze_test() { - const unsigned long DIRTY_ALLOC = 16 * 1024 * 1024; - unsigned long *ptr; - task_port_t task = mach_task_self(); - - child_ready(); - - /* Needs to be vm_allocate() here; otherwise the compiler will optimize memset away */ - vm_allocate(task, (vm_address_t *)&ptr, DIRTY_ALLOC, TRUE); - if (ptr) { - int i; - int pid = getpid(); - unsigned long long baseline_rprvt, half_rprvt, rprvt; - - /* Get baseline */ - baseline_rprvt = get_rprvt(task, pid); - - /* Dirty half */ - memset(ptr, 0xAB, DIRTY_ALLOC / 2); - - /* Check RPRVT */ - half_rprvt = get_rprvt(task, pid); - printf("\t\trprvt is %llu\n", half_rprvt); - - if (half_rprvt != (baseline_rprvt + (DIRTY_ALLOC / 2))) - { - printTestResult(__func__, false, "Failed to dirty memory"); - cleanup_and_exit(-1); - } - - /* Freeze */ - sysctlbyname("kern.memorystatus_freeze", NULL, 0, &pid, sizeof(pid)); - - sleep(2); - - /* Check RPRVT */ - rprvt = get_rprvt(task, pid); - printf("\t\trprvt is %llu\n", rprvt); - - if ((rprvt > (half_rprvt - (DIRTY_ALLOC / 2))) || (rprvt > (64 * 1024)) /* Sanity */) - { - printTestResult(__func__, false, "Failed to freeze memory"); - cleanup_and_exit(-1); - } - - /* Thaw */ - sysctlbyname("kern.memorystatus_thaw", NULL, 0, &pid, sizeof(pid)); - - sleep(2); - - /* Check RPRVT */ - rprvt = get_rprvt(task, pid); - printf("\t\trprvt is %llu\n", rprvt); - - if (rprvt < (baseline_rprvt + (DIRTY_ALLOC / 2))) - { - printTestResult(__func__, false, "Failed to thaw memory"); - cleanup_and_exit(-1); - } - - /* Dirty the rest */ - memset(ptr + (DIRTY_ALLOC / (2 * sizeof(unsigned long))), 0xBC, DIRTY_ALLOC / 2); - - /* Check RPRVT */ - rprvt = get_rprvt(task, pid); - printf("\t\trprvt is %llu\n", rprvt); - - if (rprvt < (baseline_rprvt + DIRTY_ALLOC)) - { - printTestResult(__func__, false, "Failed to dirty memory"); - cleanup_and_exit(-1); - } - - g_shared->completed = 1; - cleanup_and_exit(0); - } - - printTestResult(__func__, false, "Something bad happened..."); - cleanup_and_exit(-1); -} - -static void start_freeze_test() { - pid_t pid; - int status; - int mode; - size_t size; - - /* Check to see if the test is applicable */ - size = sizeof(mode); - if (sysctlbyname("vm.compressor_mode", &mode, &size, NULL, 0) != 0) { - printTestHeader(getpid(), "Freeze test"); - printTestResult(__func__, false, "Failed to retrieve compressor config"); - cleanup_and_exit(-1); - } - - if (mode != VM_PAGER_FREEZER_DEFAULT) { - printTestHeader(getpid(), "Freeze test"); - printTestResult(__func__, true, "Freeze disabled; skipping test"); - return; - } - - /* Reset */ - memset(g_shared, 0, sizeof(shared_mem_t)); - - pid = init_and_fork(); - if (pid == 0) { - freeze_test(); - } else { - printTestHeader(pid, "Freeze test"); - } - - /* Wait for exit */ - waitpid(pid, &status, 0); - - printTestResult("Freeze test", g_shared->completed, NULL); -} - -#endif static void start_list_validation_test() { int size; @@ -1431,7 +493,8 @@ static void start_list_validation_test() { valid = true; exit: - free(entries); + if (entries) + free(entries); printTestResult("List validation test", valid, NULL); } @@ -1439,31 +502,19 @@ exit: /* Random individual tests */ static void start_general_sanity_test() { int ret, size; - memorystatus_priority_entry_t *entries = NULL; int i; boolean_t valid = false; + + /* + * The sanity test checks for permission failures + * against P_MEMSTAT_INTERNAL processes. + * Currently only launchd (pid==1) qualifies. + */ printTestHeader(getpid(), "Sanity test"); - /* Should not be able to set the priority of launchd... */ - ret = set_priority(1, JETSAM_PRIORITY_FOREGROUND, 0); - if (ret != -1 || errno != EPERM) { - printf("\t\tAble to set priority of launchd (%d/%d)!\n", ret, errno); - goto exit; - } else { - printf("\t\tlaunchd priority test OK!\n"); - } - - /* ...nor the memory limit... */ - ret = set_memlimit(1, 100); - if (ret != -1 || errno != EPERM) { - printf("\t\tNo EPERM setting launchd memlimit (%d/%d)!\n", ret, errno); - goto exit; - } else { - printf("\t\tlaunchd memlimit test OK!\n"); - } - - /* ...nor tinker with transactions */ + + /* Ensure that launchd's transaction state is fixed */ ret = proc_track_dirty(1, PROC_DIRTY_TRACK | PROC_DIRTY_ALLOW_IDLE_EXIT | PROC_DIRTY_DEFER); if (ret != EPERM) { printf("\t\tNo EPERM tracking launchd (%d/%d)!\n", ret, errno); @@ -1480,12 +531,11 @@ static void start_general_sanity_test() { printf("\t\tlaunchd dirty test OK!\n"); } + valid = true; exit: - free(entries); - - printTestResult("Idle exit test", valid, NULL); + printTestResult("Sanity test", valid, NULL); } static void idle_exit_deferral_test(idle_exit_test_t test) { @@ -1575,7 +625,7 @@ static void idle_exit_deferral_test(idle_exit_test_t test) { goto exit; } - proc_track_dirty(getpid(), PROC_DIRTY_TRACK | PROC_DIRTY_ALLOW_IDLE_EXIT); + proc_clear_dirty(getpid(), PROC_DIRTY_DEFER); if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#4 - post deferral cancellation")) { goto exit; @@ -1593,7 +643,7 @@ static void idle_exit_deferral_test(idle_exit_test_t test) { goto exit; } - proc_track_dirty(getpid(), PROC_DIRTY_TRACK | PROC_DIRTY_ALLOW_IDLE_EXIT); + proc_clear_dirty(getpid(), PROC_DIRTY_DEFER); if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#4 - post deferral cancellation")) { goto exit; @@ -1615,11 +665,11 @@ static void idle_exit_deferral_test(idle_exit_test_t test) { } g_shared->completed = 1; - cleanup_and_exit(0); + exit(0); exit: printTestResult(__func__, false, "Something bad happened..."); - cleanup_and_exit(-1); + exit(-1); } static void start_idle_exit_defer_test(idle_exit_test_t test) { @@ -1634,7 +684,7 @@ static void start_idle_exit_defer_test(idle_exit_test_t test) { idle_exit_deferral_test(test); } else { - printTestHeader(pid, "Idle exit deferral test"); + printTestHeader(pid, "Idle exit deferral test: %d", test); } /* Wait for exit */ @@ -1680,18 +730,7 @@ static void ledger_init(void) { static void run_tests(const char *path) { /* Embedded-only */ -#if TARGET_OS_EMBEDDED - start_jetsam_test(kSimpleJetsamTest, "Simple munch"); - start_jetsam_test(kHighwaterJetsamTest, "Highwater munch"); - start_jetsam_test(kPressureJetsamTestBG, "Background pressure munch"); - start_jetsam_test(kPressureJetsamTestFG, "Foreground Pressure munch"); - start_jetsam_test_background(path); - start_freeze_test(); - start_priority_test(); - start_fs_priority_test(); -#else #pragma unused(path) -#endif /* Generic */ start_general_sanity_test(); @@ -1702,31 +741,12 @@ static void run_tests(const char *path) { start_idle_exit_defer_test(kCancelTimeoutDirtyTest); } -#if TARGET_OS_EMBEDDED - -static void -sigterm(int sig) -{ - /* Reload crash reporter job */ - enable_crashreporter(); - - /* Reset signal handlers and re-raise signal */ - signal(SIGTERM, SIG_DFL); - signal(SIGINT, SIG_DFL); - - kill(getpid(), sig); -} - -#endif int main(int argc, char **argv) { pthread_mutexattr_t attr; pthread_condattr_t cattr; size_t size; -#if TARGET_OS_EMBEDDED - struct sigaction sa; -#endif /* Must be run as root for priority retrieval */ if (getuid() != 0) { @@ -1734,26 +754,6 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } -#if TARGET_OS_EMBEDDED - /* Spawn test */ - if ((argc == 2) && !strcmp(argv[1], "-s")) { - spawn_test(); - } - - sa.sa_flags = 0; - sa.sa_handler = sigterm; - sigemptyset(&sa.sa_mask); - - /* Ensure we can reinstate CrashReporter on exit */ - sigaction(SIGTERM, &sa, NULL); - sigaction(SIGINT, &sa, NULL); - - /* Unload */ - disable_crashreporter(); - - /* Flush the jetsam snapshot */ - verify_snapshot(-1, 0, 0, 0, FALSE); -#endif /* Memory */ size = sizeof(g_physmem); @@ -1762,6 +762,13 @@ int main(int argc, char **argv) cleanup_and_exit(-1); } + /* VM Compressor Mode */ + size = sizeof(g_compressor_mode); + if (sysctlbyname("vm.compressor_mode", &g_compressor_mode, &size, NULL, 0) != 0) { + printTestResult(__func__, false, "Failed to retrieve compressor config"); + cleanup_and_exit(-1); + } + /* Ledger; default limit applies to this process, so grab it here */ ledger_init(); if ((-1 == g_ledger_count) || (-1 == g_footprint_index) || (false == get_ledger_info(getpid(), NULL, &g_per_process_limit))) { @@ -1769,8 +776,12 @@ int main(int argc, char **argv) cleanup_and_exit(-1); } - /* Rescale to MB */ - g_per_process_limit /= (1024 * 1024); + if (g_per_process_limit == LEDGER_LIMIT_INFINITY) { + g_per_process_limit = 0; + } else { + /* Rescale to MB */ + g_per_process_limit /= (1024 * 1024); + } /* Shared memory */ g_shared = mmap(NULL, sizeof(shared_mem_t), PROT_WRITE|PROT_READ, MAP_ANON|MAP_SHARED, 0, 0); @@ -1779,6 +790,13 @@ int main(int argc, char **argv) cleanup_and_exit(-1); } + /* Guarantee size of random_data buffer */ + if (sizeof(random_data) < RANDOM_DATA_SIZE) { + printTestResult(__func__, false, "Failed to guarantee random_data buffer size [expected %d, actual %d]", + RANDOM_DATA_SIZE, sizeof(random_data)); + cleanup_and_exit(-1); + } + pthread_mutexattr_init(&attr); pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED ); @@ -1786,7 +804,7 @@ int main(int argc, char **argv) pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED); if (pthread_mutex_init(&g_shared->mutex, &attr) || pthread_cond_init(&g_shared->cv, &cattr)) { - printTestResult("setup", false, "Unable to init condition variable!\n"); + printTestResult("setup", false, "Unable to init condition variable!"); cleanup_and_exit(-1); } @@ -1799,10 +817,6 @@ int main(int argc, char **argv) pthread_mutexattr_destroy(&attr); pthread_condattr_destroy(&cattr); -#if TARGET_OS_EMBEDDED - /* Reload crash reporter */ - enable_crashreporter(); -#endif - return 0; + return (g_exit_status); /* exit status 0 on success, -1 on failure */ } diff --git a/tools/tests/memorystatus/memorystatus_groups.c b/tools/tests/memorystatus/memorystatus_groups.c new file mode 100644 index 000000000..93ae8ade0 --- /dev/null +++ b/tools/tests/memorystatus/memorystatus_groups.c @@ -0,0 +1,653 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAXTESTPIDS 15 +#define MAXPRIORITY JETSAM_PRIORITY_MAX - 1 + +/* + * memorystatus_control support for + * reprioritizing multiple processes + * + * This test/tool operates in one of two modes. + * List mode or Generate mode. + * + * In generate mode (the default) + * Setup: + * Spin off some number of child processes. (Enforce a max) + * Generate a random jetsam priority band for each process. + * Kill at least one of the processes (this tests the kernel's + * ability to ignore non-existant pid.) + * Sprinkle the processes into their randomly assigned band. + * Test: + * Query the kernel for a snapshot of the jetsam priority list, + * (saving the priority and the index into the overall + * priority list for each pid) + * + * Exercise the MEMORYSTATUS_CMD_GRP_SET_PROPERTIES control call. + * + * Properties supported in this exercise? + * [1] priority + * + * Query the kernel again for a second snapshot. + * + * Verify: + * If everything works as expected, all the pids have moved + * to the new priority band and relative order before the + * move is the same order after the move. + * + * In list mode, the user passes in a list of pids from the command line. + * We skip the Setup phase, but follow through with the Test and Verify + * steps. + * + * When using generate mode, you can add a delay that takes place just + * before the control call and then again just after the control call. + * eg: This allows time to manaully introspect the state of + * the device before and after the new property assignments. + */ + +/* Globals */ +int g_exit_status = 0; +boolean_t generate_flag = FALSE; +boolean_t list_flag = FALSE; +boolean_t verbose_flag = FALSE; +boolean_t do_error_flag = FALSE; +uint64_t delay_seconds = 0; +uint32_t kill_pid_indx = 0; +uint32_t g_new_priority = JETSAM_PRIORITY_IDLE; + +typedef struct pidinfo { + pid_t pid; + int32_t pri_random; /* random priority for generate path */ + int32_t pri_before; /* priority before idle move */ + int32_t indx_before; /* jetsam bucket index before idle move */ + int32_t pri_after; /* priority found after idle move test */ + int32_t exp_after; /* Expect priority. Zero if moved to idle band */ + int32_t indx_after; /* order it landed in the idle band */ +} pidinfo_t; + +static boolean_t do_get_priority_list (boolean_t before, memorystatus_priority_entry_t *mypids, size_t pid_count, pidinfo_t *pidinfo); +static void do_generate_test(); +static void do_child_labor(); +static int priority_cmp(const void *x, const void *y); +static void do_pidlist_test(memorystatus_priority_entry_t *list, uint32_t pid_count); +static void do_control_list_test(memorystatus_priority_entry_t *list, uint32_t pid_count); +static void dump_info_table(pidinfo_t *info, uint32_t count); +static void print_usage(); + +static char *g_testname = "GrpSetProperties"; + +static void +printTestHeader(pid_t testPid, const char *testName, ...) +{ + va_list va; + printf("=============================================\n"); + printf("[TEST] GrpSetProperty "); + va_start(va, testName); + vprintf(testName, va); + va_end(va); + printf("\n"); + printf("[PID] %d\n", testPid); + printf("=============================================\n"); + printf("[BEGIN]\n"); +} + +static void +printTestResult(const char *testName, boolean_t didPass, const char *msg, ...) +{ + if (msg != NULL) { + va_list va; + printf("\t\t"); + va_start(va, msg); + vprintf(msg, va); + va_end(va); + printf("\n"); + } + if (didPass) { + printf("[PASS] GrpSetProperty\t%s\n\n", testName); + } else { + printf("[FAIL] GrpSetProperty\t%s\n\n", testName); + + /* Any single failure, fails full test run */ + g_exit_status = -1; + } +} + +static void +do_error_test () +{ + boolean_t passflag = TRUE; + int error; + size_t listsize = 0; + memorystatus_priority_entry_t list[MAXTESTPIDS]; + + listsize = (sizeof(memorystatus_priority_entry_t) * MAXTESTPIDS); + memset (list, 0, listsize); + + list[0].pid = getpid(); + list[0].priority = JETSAM_PRIORITY_MAX+10; /* out of range priority */ + + printTestHeader (getpid(), "NULL pointer test"); + errno=0; + error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0, NULL, listsize); + printf("\t Expect: error (-1), errno (%d)\n", EINVAL); + printf("\t Actual: error (%d), errno (%d)\n", error, errno); + if (error == -1 && errno == EINVAL) + passflag = TRUE; + else + passflag = FALSE; + printTestResult("NULL pointer test", passflag, NULL); + + + printTestHeader (getpid(), "zero size test"); + errno=0; + error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0, &list, 0); + printf("\t Expect: error (-1), errno (%d)\n", EINVAL); + printf("\t Actual: error (%d), errno (%d)\n", error, errno); + if (error == -1 && errno == EINVAL) + passflag = TRUE; + else + passflag = FALSE; + printTestResult("zero size test", passflag, NULL); + + + printTestHeader (getpid(), "bad size test"); + errno=0; + error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0, &list, (listsize-1)); + printf("\t Expect: error (-1), errno (%d)\n", EINVAL); + printf("\t Actual: error (%d), errno (%d)\n", error, errno); + if (error == -1 && errno == EINVAL) + passflag = TRUE; + else + passflag = FALSE; + printTestResult("bad size test", passflag, NULL); + + printTestHeader (getpid(), "bad priority test"); + errno=0; + error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0, &list, (listsize)); + printf("\t Expect: error (-1), errno (%d)\n", EINVAL); + printf("\t Actual: error (%d), errno (%d)\n", error, errno); + if (error == -1 && errno == EINVAL) + passflag = TRUE; + else + passflag = FALSE; + printTestResult("bad priority test", passflag, NULL); +} + +int +main(int argc, char *argv[]) +{ + kern_return_t error; + + memorystatus_priority_entry_t list[MAXTESTPIDS]; + uint32_t pid_count = MAXTESTPIDS; /* default */ + size_t listsize = 0; + int c; + int i = 0; + + if (geteuid() != 0) { + printf("\tMust be run as root\n"); + exit(1); + } + + listsize = sizeof(memorystatus_priority_entry_t) * MAXTESTPIDS; + memset (list, 0, listsize); + + while ((c = getopt (argc, argv, "p:ed:hvg:l")) != -1) { + switch (c) { + case 'p': + g_new_priority = strtol(optarg, NULL, 10); + break; + case 'e': + do_error_flag = TRUE; + break; + case 'v': + verbose_flag = TRUE; + break; + case 'd': + delay_seconds = strtol(optarg, NULL, 10); + break; + case 'l': + /* means a list of pids follow */ + list_flag = TRUE; + break; + case 'g': + /* dynamicall generate 'n' processes */ + generate_flag = TRUE; + pid_count = strtol(optarg, NULL, 10); + break; + case 'h': + print_usage(); + exit(0); + case '?': + default: + print_usage(); + exit(-1); + } + } + + argc -= optind; + argv += optind; + errno = 0; + + /* + * This core part of this test has two modes only. + * Default is to dynamically generate a list of pids to work on. + * Else use the -l flag and pass in a list of pids. + */ + if (generate_flag && list_flag) { + printTestResult(g_testname, FALSE, "Can't use both -g and -l options\n"); + exit(g_exit_status); + } + + if (generate_flag) { + if (pid_count <= 0 || pid_count > MAXTESTPIDS) { + printTestResult(g_testname, FALSE, + "Pid count out of range (actual: %d), (max: %d)\n", pid_count, MAXTESTPIDS); + exit(g_exit_status); + } + } else if (list_flag) { + pid_count=0; + for (; *argv; ++argv) { + if (pid_count < MAXTESTPIDS){ + list[pid_count].pid = strtol(*argv, NULL, 10); + list[pid_count].priority = g_new_priority; + pid_count++; + argc--; + optind++; + } else { + printTestResult(g_testname, FALSE, + "Too many pids (actual: %d), (max: %d)\n", pid_count, MAXTESTPIDS); + exit(g_exit_status); + break; + } + } + if (pid_count <= 0 ) { + printTestResult(g_testname, FALSE, + "Provide at least one pid (actual: %d),(max: %d)\n", pid_count, MAXTESTPIDS); + exit(g_exit_status); + } + } else { + /* set defaults */ + do_error_flag = TRUE; + generate_flag = TRUE; + pid_count = MAXTESTPIDS; + } + + if (do_error_flag) { + do_error_test(); + } + + if (generate_flag) { + do_generate_test(list, pid_count); + } + + if (list_flag) { + do_pidlist_test (list, pid_count); + } + + return(g_exit_status); + +} + + +static void +do_pidlist_test(memorystatus_priority_entry_t *list, uint32_t pid_count) +{ + + do_control_list_test(list, pid_count); +} + +static void +do_control_list_test(memorystatus_priority_entry_t *list, uint32_t pid_count) +{ + int error = 0; + int i; + boolean_t passflag; + pidinfo_t info[MAXTESTPIDS]; + + printTestHeader (getpid(), "new priority test"); + memset (info, 0, MAXTESTPIDS * sizeof(pidinfo_t)); + printf ("\tInput: pid_count = %d\n", pid_count); + printf ("\tInput: new_priority = %d\n", g_new_priority); + + if (generate_flag) + printf("\tIntentionally killed pid [%d]\n", list[kill_pid_indx].pid); + + /* random value initialization */ + srandom((u_long)time(NULL)); + + /* In generate path, we sprinkle pids into random priority buckets */ + + /* initialize info structures and properties */ + for (i = 0; i < pid_count; i++) { + info[i].pid = list[i].pid; + info[i].pri_random = random() % MAXPRIORITY; /* generate path only */ + info[i].pri_before = -1; + info[i].indx_before = -1; + info[i].pri_after = -1; + info[i].exp_after = g_new_priority; + info[i].indx_after = -1; + + if (generate_flag) { + /* Initialize properties for generated pids */ + memorystatus_priority_properties_t mp; + mp.priority = info[i].pri_random; + mp.user_data = 0; + if(memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, list[i].pid, 0, &mp, sizeof(mp)) == -1) { + /* + * If we cannot set the properties on a given + * pid (for whatever reason), we'll ignore it. + * But set expectations for verification phase. + */ + printf("\tWarning: set properties failed on pid [%d] (%s)\n", list[i].pid, strerror(errno)); + info[i].exp_after = -1; + errno = 0; + } + } + } + + /* Get the system's current jetsam priority list, init pass */ + if (do_get_priority_list(TRUE, list, pid_count, info) == FALSE) { + error = 1; + goto out; + } + + if (delay_seconds > 0) { + printf("\tDelay [%llu] seconds... (before move to new band)\n", delay_seconds); + sleep(delay_seconds); + errno = 0; + } + + error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0, + list, (pid_count * sizeof(memorystatus_priority_entry_t))); + if (error) { + printf("\tMEMORYSTATUS_CMD_GRP_SET_PROPERTIES failed (%s)\n", strerror(errno)); + goto out; + } + + /* Get the system's jetsam priority list, after move to new band */ + if (do_get_priority_list(FALSE, list, pid_count, info) == FALSE) { + error = 1; + goto out; + } + + if (delay_seconds > 0) { + printf("\tDelay [%llu] seconds... (after move to new band)\n", delay_seconds); + sleep(delay_seconds); + errno = 0; + } + + qsort ((void *)info, pid_count, sizeof(pidinfo_t),priority_cmp); + + /* + * Verify that the list of pids have been placed in new priority band + * and that they are in the same relative priority order. + * The relative bucket placement before moving to the new priority + * band should be the same as that after moving to the new + * priority band. + */ + error = 0; + for (i=0; i < pid_count; i++) { + if (info[i].pri_before == -1){ + /* skip... this pid did not exist */ + continue; + } + + /* The new priority band must meet expectations */ + if (info[i].pri_after != info[i].exp_after) { + error++; + } + + if (i+1 == pid_count) + break; /* Done traversing list */ + + if (info[i].pid == info[i+1].pid) { + /* skip duplicate pids */ + continue; + } + + if (info[i].indx_before < info[i+1].indx_before && + info[i].indx_after < info[i+1].indx_after && + info[i].pri_before <= info[i+1].pri_before && + info[i].pri_after <= info[i+1].pri_after ) { + /* yay */ + } + else { + error++; + } + } + + printf("\tFound [%d] verification errors.\n", error); + + if (error || errno || verbose_flag==TRUE) { + dump_info_table(info, pid_count); + } + +out: + printf("\n\tExpect: error (0), errno (0)\n"); + printf("\tActual: error (%d), errno (%d)\n", error, errno); + if (error != 0 || errno != 0) + passflag = FALSE; + else + passflag = TRUE; + printTestResult(g_testname, passflag, NULL); +} + +/* + * The concept of jetsam priority order can actually be viewed as + * the relative index of an item in a bucket from from lowest + * priority bucket to highest priority bucket and then from + * head bucket entry to tail bucket entry. + * In reality, we have a linear, ordered list at any point + * in time. + */ + + +static int +priority_cmp(const void *x, const void *y) +{ + pidinfo_t entry_x = *((pidinfo_t *)x); + pidinfo_t entry_y = *((pidinfo_t *)y); + + if (entry_x.pri_before < entry_y.pri_before) + return -1; + if (entry_x.pri_before == entry_y.pri_before) { + /* + * Second level ordering. + */ + if (entry_x.indx_before < entry_y.indx_before) + return -1; + if (entry_x.indx_before == entry_y.indx_before) + return 0; /* never */ + return 1; + } + return 1; +} + + +static boolean_t +do_get_priority_list (boolean_t before, memorystatus_priority_entry_t *mypids, size_t pid_count, pidinfo_t *pidinfo) +{ +#pragma unused (mypids) + + size_t size = 0; + memorystatus_priority_entry_t *list; + size_t list_count = 0; + int found = 0; + int i, j; + + size = memorystatus_control(MEMORYSTATUS_CMD_GET_PRIORITY_LIST, 0, 0, NULL, 0); + if (size <= 0 ) { + printf("\tCan't get jetsam priority list size: %s\n", strerror(errno)); + return(FALSE); + } + + list = (memorystatus_priority_entry_t *)malloc(size); + + size = memorystatus_control(MEMORYSTATUS_CMD_GET_PRIORITY_LIST, 0, 0, list, size); + if (size <= 0) { + printf("\tCould not get jetsam priority list: %s\n", strerror(errno)); + free(list); + return(FALSE); + } + + /* recompute number of entries in the list and find the pid's priority*/ + list_count = size / sizeof(memorystatus_priority_entry_t); + + printf("\tFound [%d] jetsam bucket entries (%s move to new band).\n", + (int)list_count, before? "before" : " after"); + + for (i=0; i < pid_count; i++) { + for (j=0; j < list_count; j++) { + if (list[j].pid == pidinfo[i].pid) { + if (before) { + /* + * Save process's priority and relative index + * before moving to new priority + */ + pidinfo[i].pri_before = list[j].priority; + pidinfo[i].indx_before = j; + }else { + /* + * Save process's priority and relative index + * after moving to new priority + */ + pidinfo[i].pri_after = list[j].priority; + pidinfo[i].indx_after = j; + } + break; + } + } + } + + if (list) + free(list); + + return(TRUE); +} + + + +static +void do_generate_test (memorystatus_priority_entry_t *list, uint32_t pid_count) +{ + int launch_errors = 0; + int i; + memorystatus_priority_properties_t mp; + + /* Generate mode Setup phase */ + + if (pid_count <= 0) + return; + + for (i=0; i < pid_count; i++) { + list[i].pid = fork(); + list[i].priority = g_new_priority; /*XXX introduce multiple + new priorities??? */ + switch (list[i].pid) { + case 0: /* child */ + do_child_labor(); + exit(0); + break; + case -1: + launch_errors++; + break; + default: + continue; + } + } + + /* + * Parent will set the priority of the + * child processes + */ + + if (verbose_flag && launch_errors > 0) + printf("\tParent launch errors = %d\n", launch_errors); + + /* Introduce a case where pid is not found */ + kill_pid_indx = pid_count/2 ; + kill(list[kill_pid_indx].pid, SIGKILL); + sleep(5); + + do_control_list_test (list, pid_count); + + for (i=0; i < pid_count; i++) { + if (i != kill_pid_indx) { + kill(list[i].pid, SIGKILL ); + } + } +} + + +static void +do_child_labor() +{ + /* + * Ideally, the process should be suspended, + * but letting it spin doing random + * stuff should be harmless for this test. + */ + if (verbose_flag) + printf("\tLaunched child pid [%d]\n", getpid()); + while (TRUE) { + random(); + sleep(5); + } +} + + +static void +dump_info_table(pidinfo_t *info, uint32_t count) +{ + int i; + + /* + * The random priority value is only of interest in the + * generate_flag path, and even then, it's not really + * that interesting! So, not dumped here. + * But it is evident in the Jetsam Priority 'before' column. + */ + + printf("\n%10s \t%s \t\t%20s\n", "Pid", "Jetsam Priority", "Relative Bucket Index"); + printf("%10s \t%s %20s\n", "", "(before | after | expected)", "(before | after)"); + + for (i=0; i < count; i++) { + printf("%10d", info[i].pid); + printf("\t(%4d |", info[i].pri_before); + printf("%4d |", info[i].pri_after); + printf("%4d)", info[i].exp_after); + printf("\t\t(%5d |", info[i].indx_before); + printf("%5d)\n", info[i].indx_after); + } +} + +static void +print_usage() { + + printf("\nUsage:\n"); + printf("[-e] [-p] [-v] [-d ][ -g | -l ]\n\n"); + printf("Exercise the MEMORYSTATUS_CMD_GRP_SET_PROPERTIES command.\n"); + printf("Operates on at most %d pids.\n", MAXTESTPIDS); + printf("Pass in a list of pids or allow the test to generate the pids dynamically.\n\n"); + + printf("\t -e : exercise error tests\n"); + printf("\t -p : Override default priority band.\n"); + printf("\t -v : extra verbosity\n"); + printf("\t -d : delay before and after idle move (default = 0)\n"); + printf("\t -g : dynamically generate processes.\n"); + printf("\t -l : operate on the given list of pids\n\n"); + printf("\t default : generate %d pids, no delay, priority %d eg: -g %d -p %d\n\n", + MAXTESTPIDS, g_new_priority, MAXTESTPIDS, g_new_priority); +} diff --git a/tools/tests/perf_index/Makefile b/tools/tests/perf_index/Makefile index ae8f3a683..44c522b14 100644 --- a/tools/tests/perf_index/Makefile +++ b/tools/tests/perf_index/Makefile @@ -1,42 +1,87 @@ SDKROOT ?= / Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) -CC:=xcrun -sdk "$(SDKROOT)" cc -CFLAGS:=-c -Wall -pedantic -OPTIMIZATION:=-Os -LDFLAGS= +CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc) +CFLAGS:=-c -Wall -pedantic -Os +LDFLAGS:= SRCROOT?=$(shell /bin/pwd) -DSTROOT?=$(shell /bin/pwd) -OBJROOT?=$(shell /bin/pwd) - -SOURCES:=main.c stress_cpu.c stress_memory.c stress_syscall.c stress_fault.c md5.c stress_file_create.c stress_file_write.c stress_file_read.c stress_file_local.c stress_file_ram.c iperf.c compile.c stress_general.c -SOURCE_PATHS:=$(addprefix $(SRCROOT)/,$(SOURCES)) -OBJECTS:=$(addprefix $(OBJROOT)/,$(SOURCES:.c=.o)) -EXECUTABLE=perf_index +DSTROOT?=$(shell /bin/pwd)/BUILD/dst +OBJROOT?=$(shell /bin/pwd)/BUILD/obj +SYMROOT?=$(shell /bin/pwd)/BUILD/sym ifdef RC_ARCHS -ARCHS:=$(RC_ARCHS) + ARCHS:=$(RC_ARCHS) else -ifeq ($(ARCHS),) -ifeq "$(Embedded)" "YES" -ARCHS:=armv7 armv7s + ifeq ($(ARCHS),) + ifeq "$(Embedded)" "YES" + ARCHS:=armv7 armv7s arm64 else -ARCHS:=x86_64 i386 + ARCHS:=x86_64 i386 endif endif endif + +ifeq "$(Embedded)" "YES" + TARGET_NAME:=PerfIndex.bundle-ios + XCODEBUILD=xcodebuild -sdk iphoneos.internal +else + TARGET_NAME:=PerfIndex.bundle-osx + XCODEBUILD=xcodebuild +endif + CFLAGS += $(patsubst %, -arch %, $(ARCHS)) LDFLAGS += $(patsubst %, -arch %, $(ARCHS)) -all: $(SOURCE_PATHS) $(EXECUTABLE) +all: $(DSTROOT) $(OBJROOT) $(SYMROOT) \ + $(DSTROOT)/perf_index \ + $(DSTROOT)/perfindex-cpu.dylib \ + $(DSTROOT)/perfindex-memory.dylib \ + $(DSTROOT)/perfindex-syscall.dylib \ + $(DSTROOT)/perfindex-fault.dylib \ + $(DSTROOT)/perfindex-zfod.dylib \ + $(DSTROOT)/perfindex-file_create.dylib \ + $(DSTROOT)/perfindex-file_read.dylib \ + $(DSTROOT)/perfindex-file_write.dylib \ + $(DSTROOT)/perfindex-ram_file_create.dylib \ + $(DSTROOT)/perfindex-ram_file_read.dylib \ + $(DSTROOT)/perfindex-ram_file_write.dylib \ + $(DSTROOT)/perfindex-iperf.dylib \ + $(DSTROOT)/perfindex-compile.dylib \ + $(DSTROOT)/PerfIndex.bundle -$(EXECUTABLE): $(OBJECTS) - $(CC) $(LDFLAGS) $(OBJECTS) -o $(DSTROOT)/$@ +$(DSTROOT)/perfindex-cpu.dylib: $(OBJROOT)/md5.o +$(DSTROOT)/perfindex-fault.dylib: $(OBJROOT)/test_fault_helper.o +$(DSTROOT)/perfindex-zfod.dylib: $(OBJROOT)/test_fault_helper.o +$(DSTROOT)/perfindex-file_create.dylib: $(OBJROOT)/test_file_helper.o +$(DSTROOT)/perfindex-file_read.dylib: $(OBJROOT)/test_file_helper.o +$(DSTROOT)/perfindex-file_write.dylib: $(OBJROOT)/test_file_helper.o +$(DSTROOT)/perfindex-ram_file_create.dylib: $(OBJROOT)/test_file_helper.o $(OBJROOT)/ramdisk.o +$(DSTROOT)/perfindex-ram_file_read.dylib: $(OBJROOT)/test_file_helper.o $(OBJROOT)/ramdisk.o +$(DSTROOT)/perfindex-ram_file_write.dylib: $(OBJROOT)/test_file_helper.o $(OBJROOT)/ramdisk.o + +$(DSTROOT)/perf_index: $(OBJROOT)/perf_index.o + $(CC) $(LDFLAGS) $? -o $@ + +$(DSTROOT)/PerfIndex.bundle: $(SRCROOT)/PerfIndex_COPS_Module/PerfIndex.xcodeproj + $(XCODEBUILD) -target $(TARGET_NAME) OBJROOT=$(OBJROOT) SYMROOT=$(SYMROOT) TARGET_TEMP_DIR=$(OBJROOT) TARGET_BUILD_DIR=$(DSTROOT) -project $? CLANG_ENABLE_MODULES=NO + +$(DSTROOT)/%.dylib: $(OBJROOT)/%.o + $(CC) $(LDFLAGS) -dynamiclib $? -o $@ $(OBJROOT)/%.o: $(SRCROOT)/%.c - $(CC) $(CFLAGS) $(OPTIMIZATION) $< -o $@ + $(CC) $(CFLAGS) $? -o $@ + +$(DSTROOT): + mkdir -p $(DSTROOT); + +$(OBJROOT): + mkdir -p $(OBJROOT); + +$(SYMROOT): + mkdir -p $(SYMROOT); clean: - rm -f *.o - rm -f $(EXECUTABLE) + rm -rf $(OBJROOT) + rm -rf $(DSTROOT) + rm -rf $(SYMROOT) diff --git a/tools/tests/perf_index/PerfIndex_COPS_Module/Info.plist b/tools/tests/perf_index/PerfIndex_COPS_Module/Info.plist new file mode 100644 index 000000000..7d23609f3 --- /dev/null +++ b/tools/tests/perf_index/PerfIndex_COPS_Module/Info.plist @@ -0,0 +1,59 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + PerfIndex + CFBundleIconFile + + CFBundleIdentifier + com.apple.coreostest.PerfIndex + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + PerfIndex + CFBundlePackageType + BNDL + CFBundleShortVersionString + 1.0 + CFBundleSignature + ???? + CFBundleVersion + 1 + NSHumanReadableCopyright + Copyright © 2013 Apple, Inc. All rights reserved. + NSPrincipalClass + PITest + Tests + + perfindex-cpu + PITest + perfindex-memory + PITest + perfindex-syscall + PITest + perfindex-fault + PITest + perfindex-zfod + PITest + perfindex-filecreate + PITest + perfindex-fileread + PITest + perfindex-filewrite + PITest + perfindex-ramfilecreate + PITest + perfindex-ramfileread + PITest + perfindex-ramfilewrite + PITest + perfindex-iperf + PITest + perfindex-compile + PITest + + + diff --git a/tools/tests/perf_index/PerfIndex_COPS_Module/PITest.h b/tools/tests/perf_index/PerfIndex_COPS_Module/PITest.h new file mode 100644 index 000000000..6449307c1 --- /dev/null +++ b/tools/tests/perf_index/PerfIndex_COPS_Module/PITest.h @@ -0,0 +1,36 @@ +// +// PITest.h +// PerfIndex +// +// Created by Mark Hamilton on 8/21/13. +// +// + +#import +#import "PerfIndex.h" + +@interface PITest : NSObject +{ + int (*setup_func)(int, long long, int, void**); + int (*execute_func)(int, int, long long, int, void**); + void (*cleanup_func)(int, long long); + + long long length; + int numThreads; + int readyThreadCount; + int testArgc; + void** testArgv; + pthread_mutex_t readyThreadCountLock; + pthread_cond_t threadsReadyCvar; + pthread_cond_t startCvar; + pthread_t* threads; +} + +@property NSString* testName; + +- (BOOL)setup; +- (BOOL)execute; +- (void)cleanup; + + +@end \ No newline at end of file diff --git a/tools/tests/perf_index/PerfIndex_COPS_Module/PITest.m b/tools/tests/perf_index/PerfIndex_COPS_Module/PITest.m new file mode 100644 index 000000000..19c19a904 --- /dev/null +++ b/tools/tests/perf_index/PerfIndex_COPS_Module/PITest.m @@ -0,0 +1,164 @@ +// +// PITest.m +// PerfIndex +// +// Created by Mark Hamilton on 8/21/13. +// +// + +#import "PITest.h" +#include +#include + +@implementation PITest + ++ (id)testWithOptions:(NSDictionary *)options +{ + PITest *instance = nil; + if(instance == nil) + instance = [[PITest alloc] init]; + [instance setTestName:[options objectForKey:@"name"]]; + return instance; +} + +- (BOOL)loadPITestAtPath:(NSString*) path +{ + void* handle; + void* f; + + handle = dlopen([path UTF8String], RTLD_NOW | RTLD_LOCAL); + if(!handle) { + return NO; + } + + + f = dlsym(handle, "setup"); + self->setup_func = (int (*)(int, long long, int, void **))f; + + f = dlsym(handle, "execute"); + self->execute_func = (int (*)(int, int, long long, int, void **))f; + if(!self->execute_func) + return NO; + + f = dlsym(handle, "cleanup"); + self->cleanup_func = (void (*)(int, long long))f; + return YES; +} + +- (long long)lengthForTest:(NSString*) testName +{ + NSNumber* number; + long long myLength; + NSDictionary* lengths = [NSDictionary dictionaryWithObjectsAndKeys: + @"cpu", [NSNumber numberWithLongLong:2000], + @"syscall", [NSNumber numberWithLongLong:2500], + @"memory", [NSNumber numberWithLongLong:1000000], + @"fault", [NSNumber numberWithLongLong:500], + @"zfod", [NSNumber numberWithLongLong:500], + @"file_create", [NSNumber numberWithLongLong:10], + @"file_read", [NSNumber numberWithLongLong:1000000], + @"file_write", [NSNumber numberWithLongLong:1000000], + nil]; + + number = (NSNumber*)[lengths objectForKey:testName]; + if(!number) { + myLength = 10; + } else { + myLength = [number longLongValue]; + } + + return myLength; +} + +- (BOOL)setup +{ + BOOL success = NO; + int retval; + + NSString* testPath = [NSString stringWithFormat:@"/AppleInternal/CoreOS/perf_index/%@.dylib", [self testName]]; + success = [self loadPITestAtPath:testPath]; + if(!success) { + NSLog(@"Failed to load test %@", [self testName]); + return NO; + } + + self->length = [self lengthForTest:[self testName]]; + self->numThreads = 1; + self->testArgc = 0; + self->testArgv = NULL; + + pthread_cond_init(&self->threadsReadyCvar, NULL); + pthread_cond_init(&self->startCvar, NULL); + pthread_mutex_init(&self->readyThreadCountLock, NULL); + self->readyThreadCount = 0; + + if(self->setup_func) { + retval = self->setup_func(1, self->length, 0, NULL); + if(retval != 0) { + NSLog(@"setup_func failed"); + return NO; + } + } + + self->threads = (pthread_t*)malloc(sizeof(pthread_t)*self->numThreads); + + for(int thread_index = 0; thread_index < self->numThreads; thread_index++) { + NSNumber* my_thread_index = [NSNumber numberWithInt:thread_index]; + NSArray *arg = [NSArray arrayWithObjects:my_thread_index, self, nil]; + retval = pthread_create(&threads[thread_index], NULL, thread_setup, (__bridge void*)arg); + if(retval != 0) { + NSLog(@"pthread_create failed"); + free(self->threads); + return NO; + } + } + + pthread_mutex_lock(&self->readyThreadCountLock); + if(self->readyThreadCount != self->numThreads) { + pthread_cond_wait(&self->threadsReadyCvar, &self->readyThreadCountLock); + } + pthread_mutex_unlock(&self->readyThreadCountLock); + return YES; +} + +- (BOOL)execute +{ + pthread_cond_broadcast(&self->startCvar); + for(int thread_index = 0; thread_index < self->numThreads; thread_index++) { + pthread_join(self->threads[thread_index], NULL); + } + return YES; +} + +- (void)cleanup +{ + free(self->threads); + if(self->cleanup_func) + self->cleanup_func(0, self->length); +} + +void* thread_setup(void* arg) +{ + int my_index = (int)[(NSNumber*)[(__bridge NSArray*)arg objectAtIndex:0] integerValue]; + PITest* test = (PITest*)[(__bridge NSArray*)arg objectAtIndex:1]; + + long long work_size = test->length / test->numThreads; + int work_remainder = test->length % test->numThreads; + + if(work_remainder > my_index) { + work_size++; + } + + pthread_mutex_lock(&test->readyThreadCountLock); + test->readyThreadCount++; + + if(test->readyThreadCount == test->numThreads) + pthread_cond_signal(&test->threadsReadyCvar); + pthread_cond_wait(&test->startCvar, &test->readyThreadCountLock); + pthread_mutex_unlock(&test->readyThreadCountLock); + test->execute_func(my_index, test->numThreads, work_size, test->testArgc, test->testArgv); + + return NULL; +} + +@end diff --git a/tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.h b/tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.h new file mode 100644 index 000000000..982ce600a --- /dev/null +++ b/tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.h @@ -0,0 +1,13 @@ +// +// PerfIndex.h +// PerfIndex +// +// Created by Mark Hamilton on 8/20/13. +// +// + +@protocol HGTest + +- (BOOL)execute; + +@end diff --git a/tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.xcodeproj/project.pbxproj b/tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.xcodeproj/project.pbxproj new file mode 100644 index 000000000..7c0cd67b1 --- /dev/null +++ b/tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.xcodeproj/project.pbxproj @@ -0,0 +1,402 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 4D95B51817C560DE00637699 /* PITest.m in Sources */ = {isa = PBXBuildFile; fileRef = 4D95B51717C560DE00637699 /* PITest.m */; }; + 4D95B51917C560DE00637699 /* PITest.m in Sources */ = {isa = PBXBuildFile; fileRef = 4D95B51717C560DE00637699 /* PITest.m */; }; + 4DB6141B17C454030014BDC5 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4DB6141A17C454030014BDC5 /* Foundation.framework */; }; + 4DF4EAE717C45A850031AFFB /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4DB6141A17C454030014BDC5 /* Foundation.framework */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 4D95B51617C560DE00637699 /* PITest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PITest.h; sourceTree = ""; }; + 4D95B51717C560DE00637699 /* PITest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = PITest.m; sourceTree = ""; }; + 4DB6140617C453F30014BDC5 /* PerfIndex.bundle */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = PerfIndex.bundle; sourceTree = BUILT_PRODUCTS_DIR; }; + 4DB6141917C454030014BDC5 /* PerfIndex.bundle */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = PerfIndex.bundle; sourceTree = BUILT_PRODUCTS_DIR; }; + 4DB6141A17C454030014BDC5 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; + 4DB6144917C455EB0014BDC5 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 4DB6144C17C4560D0014BDC5 /* Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Prefix.pch; sourceTree = ""; }; + 4DB6145017C4572A0014BDC5 /* PerfIndex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PerfIndex.h; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 4DB6140317C453F30014BDC5 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 4DF4EAE717C45A850031AFFB /* Foundation.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 4DB6141617C454030014BDC5 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 4DB6141B17C454030014BDC5 /* Foundation.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 4DB613FB17C453C70014BDC5 = { + isa = PBXGroup; + children = ( + 4DB6144617C455980014BDC5 /* PerfIndex.bundle */, + 4DB6140817C453F30014BDC5 /* Frameworks */, + 4DB6140717C453F30014BDC5 /* Products */, + ); + sourceTree = ""; + }; + 4DB6140717C453F30014BDC5 /* Products */ = { + isa = PBXGroup; + children = ( + 4DB6140617C453F30014BDC5 /* PerfIndex.bundle */, + 4DB6141917C454030014BDC5 /* PerfIndex.bundle */, + ); + name = Products; + sourceTree = ""; + }; + 4DB6140817C453F30014BDC5 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 4DB6141A17C454030014BDC5 /* Foundation.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; + 4DB6144617C455980014BDC5 /* PerfIndex.bundle */ = { + isa = PBXGroup; + children = ( + 4DB6144917C455EB0014BDC5 /* Info.plist */, + 4DB6144C17C4560D0014BDC5 /* Prefix.pch */, + 4DB6145017C4572A0014BDC5 /* PerfIndex.h */, + 4D95B51617C560DE00637699 /* PITest.h */, + 4D95B51717C560DE00637699 /* PITest.m */, + ); + name = PerfIndex.bundle; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 4DB6140517C453F30014BDC5 /* PerfIndex.bundle-osx */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4DB6141217C453F30014BDC5 /* Build configuration list for PBXNativeTarget "PerfIndex.bundle-osx" */; + buildPhases = ( + 4DB6140217C453F30014BDC5 /* Sources */, + 4DB6140317C453F30014BDC5 /* Frameworks */, + 4DB6140417C453F30014BDC5 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "PerfIndex.bundle-osx"; + productName = "PerfIndex.bundle-osx"; + productReference = 4DB6140617C453F30014BDC5 /* PerfIndex.bundle */; + productType = "com.apple.product-type.bundle"; + }; + 4DB6141817C454030014BDC5 /* PerfIndex.bundle-ios */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4DB6142417C454030014BDC5 /* Build configuration list for PBXNativeTarget "PerfIndex.bundle-ios" */; + buildPhases = ( + 4DB6141517C454030014BDC5 /* Sources */, + 4DB6141617C454030014BDC5 /* Frameworks */, + 4DB6141717C454030014BDC5 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "PerfIndex.bundle-ios"; + productName = "PerfIndex.bundle-ios"; + productReference = 4DB6141917C454030014BDC5 /* PerfIndex.bundle */; + productType = "com.apple.product-type.bundle"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 4DB613FC17C453C70014BDC5 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0500; + }; + buildConfigurationList = 4DB613FF17C453C70014BDC5 /* Build configuration list for PBXProject "PerfIndex" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = 4DB613FB17C453C70014BDC5; + productRefGroup = 4DB6140717C453F30014BDC5 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 4DB6140517C453F30014BDC5 /* PerfIndex.bundle-osx */, + 4DB6141817C454030014BDC5 /* PerfIndex.bundle-ios */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 4DB6140417C453F30014BDC5 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 4DB6141717C454030014BDC5 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 4DB6140217C453F30014BDC5 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4D95B51817C560DE00637699 /* PITest.m in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 4DB6141517C454030014BDC5 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4D95B51917C560DE00637699 /* PITest.m in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 4DB6140017C453C70014BDC5 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + }; + name = Debug; + }; + 4DB6140117C453C70014BDC5 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + }; + name = Release; + }; + 4DB6141317C453F30014BDC5 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COMBINE_HIDPI_IMAGES = YES; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_OPTIMIZATION_LEVEL = s; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = Prefix.pch; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_SYMBOLS_PRIVATE_EXTERN = NO; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Bundles"; + MACOSX_DEPLOYMENT_TARGET = 10.9; + ONLY_ACTIVE_ARCH = YES; + PRODUCT_NAME = PerfIndex; + STRIP_INSTALLED_PRODUCT = NO; + STRIP_STYLE = debugging; + SUPPORTED_PLATFORMS = macosx; + VALID_ARCHS = x86_64; + WRAPPER_EXTENSION = bundle; + }; + name = Debug; + }; + 4DB6141417C453F30014BDC5 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COMBINE_HIDPI_IMAGES = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = Prefix.pch; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Bundles"; + MACOSX_DEPLOYMENT_TARGET = 10.9; + PRODUCT_NAME = PerfIndex; + STRIP_INSTALLED_PRODUCT = NO; + STRIP_STYLE = debugging; + SUPPORTED_PLATFORMS = macosx; + VALID_ARCHS = x86_64; + WRAPPER_EXTENSION = bundle; + }; + name = Release; + }; + 4DB6142517C454030014BDC5 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_OPTIMIZATION_LEVEL = s; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = Prefix.pch; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_SYMBOLS_PRIVATE_EXTERN = NO; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Bundles"; + IPHONEOS_DEPLOYMENT_TARGET = 7.0; + ONLY_ACTIVE_ARCH = YES; + PRODUCT_NAME = PerfIndex; + SDKROOT = iphoneos.internal; + TARGETED_DEVICE_FAMILY = "1,2"; + WRAPPER_EXTENSION = bundle; + }; + name = Debug; + }; + 4DB6142617C454030014BDC5 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = YES; + ENABLE_NS_ASSERTIONS = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = Prefix.pch; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Bundles"; + IPHONEOS_DEPLOYMENT_TARGET = 7.0; + PRODUCT_NAME = PerfIndex; + SDKROOT = iphoneos.internal; + TARGETED_DEVICE_FAMILY = "1,2"; + VALIDATE_PRODUCT = YES; + WRAPPER_EXTENSION = bundle; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 4DB613FF17C453C70014BDC5 /* Build configuration list for PBXProject "PerfIndex" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 4DB6140017C453C70014BDC5 /* Debug */, + 4DB6140117C453C70014BDC5 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4DB6141217C453F30014BDC5 /* Build configuration list for PBXNativeTarget "PerfIndex.bundle-osx" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 4DB6141317C453F30014BDC5 /* Debug */, + 4DB6141417C453F30014BDC5 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4DB6142417C454030014BDC5 /* Build configuration list for PBXNativeTarget "PerfIndex.bundle-ios" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 4DB6142517C454030014BDC5 /* Debug */, + 4DB6142617C454030014BDC5 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 4DB613FC17C453C70014BDC5 /* Project object */; +} diff --git a/tools/tests/perf_index/PerfIndex_COPS_Module/Prefix.pch b/tools/tests/perf_index/PerfIndex_COPS_Module/Prefix.pch new file mode 100644 index 000000000..eb2007ecd --- /dev/null +++ b/tools/tests/perf_index/PerfIndex_COPS_Module/Prefix.pch @@ -0,0 +1,9 @@ +// +// Prefix header +// +// The contents of this file are implicitly included at the beginning of every source file. +// + +#ifdef __OBJC__ + #import +#endif diff --git a/tools/tests/perf_index/README b/tools/tests/perf_index/README new file mode 100644 index 000000000..618696a16 --- /dev/null +++ b/tools/tests/perf_index/README @@ -0,0 +1,81 @@ +perf_index - This is a tool for gather performance data. perf_index can run in +two modes. In regulular (offline mode) the usage is: +pref_index type threads size [args] +where type is one of the test types explained below, threads is the number of +userland threads that should preform the task, size is the size of the task and +args are arguments to pass to the test. Currently only the iperf test requires +these arguments. For example if run with the following arguments: +./perf_index cpu 2 100000000 + +iperf will run the cpu workload on two threads with a total work load size of +100000000. Since the workload is distributed over 2 threads, on a perfectly +parallel system, this would take half the time relative to if 1 was specified +for the threads parameter. When finished running perf_index will write the +number of seconds it took to standard out as a decimal number. Some of the test +types have initialization and teardown steps, and these steps are not counted +towards the time. The workload and the time it takes to be performed differs +quite drastically between test type, so you may need to play around with the +size argument to find a value that will complete in a reasonable amount of time. + +In online mode, perf_index is invoked like so: +perf_index remote server +where remote is exactly the string "remote" and server is the control host to +connect to. This tells the program to connect to the specified server and wait +for instructions. The server is run by running the test_controller.py python +script with the following arguments: +test_controller.py num_clients type threads size +The server will wait for num_client to connect. It will then pass type, threads, +and size to each of those clients, who will run the initialization code and +report back to the server. Once the initialization is run by every client, the +server will give the OK to every client to run the workload and begin timing. +When done, each client reports back to the server. Once the server hears back +from every client, it will stop timing and output the elapsed time. + + +Test Types: +Note this implementations are subject to change, for an authoritative source, +see the source code +cpu - calculates n md5 sums +memory - initializes by allocating memory equal to half the RAM on the machine, +then writes a byte to every page to ensure it is paged in. Then copies n bytes +from the first half of memory to the second. If the allocated space is less than +n/2, it keeps repeating the copies until n bytes are copied. +syscall - calls the getppid(2) system call n times +fault - performs n page faults by mmaping a large chunk of memory, toggling the +write protection bit, and writing to each page +zfod - performs n zero fill on demands, by mmaping a large chunk of memory and +writing to each page +file_create - creates n files (in the same directory) with the open(2) system +call +file_write - writes n bytes to files on disk. There is one file per each thread. +file_read - initializes by creating one large file on disk per each thread. +Then reads n bytes total from all the files. If there are less than n bytes in +the files, repeats reading from the beginning. +ram_file_create - same as file_create but on a ram disk +ram_file_read - same as file_read but on a ram disk +ram_file_write - same as file_write but on a ram disk +iperf - uses iperf to send n bytes over the network to the designated host +specified as args +compile - compiles xnu using make. This currently does a single compile and +ignores the size argument + +Building: +perf_index is built automatically by BNI for both Mac (10.9 and later), and iOS +(7 and later) trains, and is delivered on AppleInternal builds in +/AppleInternal/CoreOS/perf_index. It is built as part of the xnu_quick_test +build alias, so you can also find a copy on ~rc at: +~rc/Software/$RELEASE/Updates/$RELEASEVERSION/Roots/xnu_quick_test/AppleInternal/CoreOS/perf_index. + +Alternatively you can build it yourself using make like so: +SDKROOT=/path/to/sdk make + +For example: +# build for Mac, current OS +SDKROOT=/ make +# build for iOS +SDKROOT=`xcodebuild -sdk iphoneos.internal -version Path` make + +By default xnu builds all-way fat, but you can restrict this by explicitly +specifying architectures like so: +# build for only armv7 and armv7s +SDKROOT=`xcodebuild -sdk iphoneos.internal -version Path` make ARCH="armv7 armv7s" diff --git a/tools/tests/perf_index/compile.c b/tools/tests/perf_index/compile.c deleted file mode 100644 index a43159895..000000000 --- a/tools/tests/perf_index/compile.c +++ /dev/null @@ -1,34 +0,0 @@ -#include -#include "perf_index.h" -#include - -static const char *src_dst = "/tmp/perf_index_compile_code"; -static const char *src_root = "/Network/Servers/xs1/release/Software/Zin/Projects/xnu/xnu-2050.7.9"; - -const stress_test_t compile_test = {"compile", &compile_init, &compile, &compile_cleanup, &no_validate}; - -DECL_INIT(compile_init) { - char *cmd; - const char *src = src_root; - if(test_argc >= 1) - src = test_argv[0]; - assert(asprintf(&cmd, "ditto \"%s\" \"%s\"", src, src_dst) >= 0); - assert(system(cmd) == 0); - free(cmd); -} - -DECL_CLEANUP(compile_cleanup) { - char *cmd; - assert(asprintf(&cmd, "rm -rf \"%s\"", src_dst) >= 0); - assert(system(cmd) == 0); - free(cmd); -} - -DECL_TEST(compile) { - char *cmd; - if(thread_id == 0) { - assert(asprintf(&cmd, "make -C \"%s\" MAKEJOBS=-j%d", src_dst, num_threads) >= 0); - assert(system(cmd) == 0); - free(cmd); - } -} diff --git a/tools/tests/perf_index/fail.h b/tools/tests/perf_index/fail.h new file mode 100644 index 000000000..dbe2a68ed --- /dev/null +++ b/tools/tests/perf_index/fail.h @@ -0,0 +1,21 @@ +#ifndef __FAIL_H_ +#define __FAIL_H_ + +#define TOSTRING_HELPER(x) #x +#define TOSTRING(x) TOSTRING_HELPER(x) + +#define PERFINDEX_FAILURE -1 +#define PERFINDEX_SUCCESS 0 + +extern char* error_str; + +#define FAIL(message) do {\ + error_str = message " at " __FILE__ ": " TOSTRING(__LINE__);\ + return PERFINDEX_FAILURE;\ +} while(0) + +#define VERIFY(condition, fail_message) do {\ + if(!(condition)) FAIL(fail_message);\ +} while(0) + +#endif diff --git a/tools/tests/perf_index/iperf.c b/tools/tests/perf_index/iperf.c deleted file mode 100644 index a8d0f3a21..000000000 --- a/tools/tests/perf_index/iperf.c +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include "perf_index.h" -#include - -const stress_test_t iperf_test = {"iperf", &stress_general_init, &iperf, &stress_general_cleanup, &validate_iperf}; - -DECL_VALIDATE(validate_iperf) { - return (test_argc >= 1); -} - -DECL_TEST(iperf) { - char *cmd; - assert(asprintf(&cmd, "iperf -c \"%s\" -n %lld > /dev/null", test_argv[0], length) >= 0); - assert(system(cmd) == 0); - free(cmd); -} diff --git a/tools/tests/perf_index/main.c b/tools/tests/perf_index/main.c deleted file mode 100644 index fed6c205f..000000000 --- a/tools/tests/perf_index/main.c +++ /dev/null @@ -1,259 +0,0 @@ -#include -#include -#include -#include -#include "perf_index.h" -#include -#include -#include - -#define CONTROL_PORT 17694 - -static const stress_test_t *stress_tests[] = -{&cpu_test, &memory_test, &syscall_test, &fault_test, &zfod_test, - &file_local_create_test, &file_local_write_test, &file_local_read_test, - &file_ram_create_test, &file_ram_read_test, &file_ram_write_test, &iperf_test, - &compile_test -}; - -static int num_threads; -static long long all_len; -static int test_type; -static const char *control_host = NULL; -static const char **test_argv; -static int test_argc; -struct in_addr control_host_addr; -int control_sock; -const char remote_str[] = "remote"; -const char ready_msg[] = "Ready"; -const char done_msg[] = "Done"; - -static pthread_cond_t threads_running_cvar; -static pthread_cond_t start_cvar; -static int thread_count; -static pthread_mutex_t count_lock; - -static void usage() { - int i; - fprintf(stderr, "usage: perf_index remote server\n" - "or\n" - "usage: pref_index type threads size [args]\n\n" - "where type is one of:\n"); - for(i=0; iname); - } - fprintf(stderr, "\n"); - exit(1); -} - -static int validate_args(int argc, const char **argv) { - int i; - int ret; - int found = 0; - - if(argc < 3) { - return -1; - } - if(argc==3 && strcmp(argv[1], remote_str) == 0) - return 0; - - - if(argc < 4) - return -1; - - ret = -1; - for(i=0; iname) == 0) { - ret = i; - found = 1; - break; - } - } - - if(!found) - return -1; - - if(stress_tests[i]->validate(argc-4, argv+4)) - return ret; - else - return -1; -} - -int host_to_addr(const char *hostname, struct in_addr *addr) { - struct addrinfo *info; - int err; - if((err = getaddrinfo(hostname, NULL, NULL, &info)) != 0) { - return -1; - } - *addr = ((struct sockaddr_in*)info->ai_addr)->sin_addr; - freeaddrinfo(info); - return 0; -} - -static void parse_args(int argc, const char **argv); - -static void read_params_from_server(void) { - struct sockaddr_in addr; - char readbuff[1024]; - int zerocount = 0; - ssize_t offset = 0; - ssize_t recv_count; - ssize_t i; - const char **newargv = malloc(sizeof(char*) * 4); - assert(newargv != NULL); - - if(host_to_addr(control_host, &control_host_addr)<0) { - fprintf(stderr, "Could not resolve: %s\n", control_host); - exit(2); - } - - control_sock = socket(PF_INET, SOCK_STREAM, 0); - assert(control_sock != -1); - addr.sin_family = AF_INET; - addr.sin_port = htons(CONTROL_PORT); - addr.sin_addr = control_host_addr; - bzero(addr.sin_zero, sizeof addr.sin_zero); - if(connect(control_sock, (struct sockaddr *)&addr, sizeof(struct sockaddr)) == -1) { - fprintf(stderr, "Failed to connect to host: %s\n", control_host); - exit(3); - } - - while(offset=2 && readbuff[offset-1] == '\0' && readbuff[offset-2] == '\0') - break; - } - if(zerocount < 3) { - fprintf(stderr, "Received invalid parameters"); - exit(4); - } - - parse_args(zerocount+1, newargv); -} - -static void parse_args(int argc, const char **argv) { - test_type = validate_args(argc, argv); - if(test_type < 0) - usage(); - if(strcmp(argv[1], remote_str) == 0) { - control_host = argv[2]; - read_params_from_server(); - } - else { - num_threads = strtoimax(argv[2], NULL, 10); - all_len = strtoll(argv[3], NULL, 10); - test_argc = argc - 4; - test_argv = argv + 4; - } -} - -static void *stress_loop(void *data) { - int my_index = (int)data; - long long work_size = all_len / num_threads; - int work_remainder = all_len % num_threads; - - if(work_remainder > my_index) { - work_size++; - } - - pthread_mutex_lock(&count_lock); - thread_count++; - if(thread_count == num_threads) - pthread_cond_signal(&threads_running_cvar); - pthread_cond_wait(&start_cvar, &count_lock); - pthread_mutex_unlock(&count_lock); - stress_tests[test_type]->stress(my_index, num_threads, work_size, test_argc, test_argv); - return NULL; -} - -void start_timer(struct timeval *tp) { - gettimeofday(tp, NULL); -} - -void end_timer(struct timeval *tp) { - struct timeval tend; - gettimeofday(&tend, NULL); - if(tend.tv_usec >= tp->tv_usec) { - tp->tv_sec = tend.tv_sec - tp->tv_sec; - tp->tv_usec = tend.tv_usec - tp->tv_usec; - } - else { - tp->tv_sec = tend.tv_sec - tp->tv_sec - 1; - tp->tv_usec = tend.tv_usec - tp->tv_usec + 1000000; - } -} - -void print_timer(struct timeval *tp) { - printf("%ld.%06d", tp->tv_sec, tp->tv_usec); -} - -void wait_start(void) { - char readbuff[1024]; - if(control_host != NULL) { - send(control_sock, ready_msg, strlen(ready_msg), 0); - while(recv(control_sock, readbuff, sizeof(readbuff), 0)>0); - } -} - -void done(void) { - send(control_sock, done_msg, strlen(done_msg), 0); -} - -int main(int argc, const char **argv) { - int thread_index; - pthread_t *threads; - parse_args(argc, argv); - struct timeval timer; - - stress_tests[test_type]->init(num_threads, all_len, test_argc, test_argv); - pthread_cond_init(&threads_running_cvar, NULL); - pthread_cond_init(&start_cvar, NULL); - pthread_mutex_init(&count_lock, NULL); - thread_count = 0; - - threads = (pthread_t*)malloc(sizeof(pthread_t)*num_threads); - for(thread_index = 0; thread_index < num_threads; thread_index++) { - assert(pthread_create(&threads[thread_index], NULL, stress_loop, (void*)thread_index) == 0); - } - - pthread_mutex_lock(&count_lock); - if(thread_count != num_threads) - pthread_cond_wait(&threads_running_cvar, &count_lock); - pthread_mutex_unlock(&count_lock); - - wait_start(); - - start_timer(&timer); - pthread_cond_broadcast(&start_cvar); - for(thread_index = 0; thread_index < num_threads; thread_index++) { - pthread_join(threads[thread_index], NULL); - } - end_timer(&timer); - done(); - - pthread_mutex_destroy(&count_lock); - pthread_cond_destroy(&start_cvar); - pthread_cond_destroy(&threads_running_cvar); - - stress_tests[test_type]->cleanup(num_threads, all_len); - - print_timer(&timer); - printf("\n"); - - return 0; -} diff --git a/tools/tests/perf_index/md5.c b/tools/tests/perf_index/md5.c index cbc9bd74d..8b2775ad4 100644 --- a/tools/tests/perf_index/md5.c +++ b/tools/tests/perf_index/md5.c @@ -1,12 +1,14 @@ /* - * ccmd5_ltc.c - * corecrypto + * md5.c + * Adapted for perf_index from ccmd5_ltc.c in corecrypto * * Created by Fabrice Gautier on 12/3/10. * Copyright 2010,2011 Apple Inc. All rights reserved. * */ +#include "md5.h" + #include #include diff --git a/tools/tests/perf_index/md5.h b/tools/tests/perf_index/md5.h new file mode 100644 index 000000000..bcd3d3949 --- /dev/null +++ b/tools/tests/perf_index/md5.h @@ -0,0 +1,8 @@ +#ifndef __MD5_H_ +#define __MD5_H_ + +#include + +void md5_hash(uint8_t *message, uint64_t len, uint32_t *hash); + +#endif diff --git a/tools/tests/perf_index/perf_index.c b/tools/tests/perf_index/perf_index.c new file mode 100644 index 000000000..195328836 --- /dev/null +++ b/tools/tests/perf_index/perf_index.c @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fail.h" + +typedef struct parsed_args_struct { + char* my_name; + char* test_name; + int num_threads; + long long length; + int test_argc; + void** test_argv; +} parsed_args_t; + +typedef struct test_struct { + int (*setup)(int, long long, int, void**); + int (*execute)(int, int, long long, int, void**); + int (*cleanup)(int, long long); + char** error_str_ptr; +} test_t; + +parsed_args_t args; +test_t test; +int ready_thread_count; +pthread_mutex_t ready_thread_count_lock; +pthread_cond_t start_cvar; +pthread_cond_t threads_ready_cvar; + +int parse_args(int argc, char** argv, parsed_args_t* parsed_args) { + if(argc != 4) { + return -1; + } + + parsed_args->my_name = argv[0]; + parsed_args->test_name = argv[1]; + parsed_args->num_threads = atoi(argv[2]); + parsed_args->length = strtoll(argv[3], NULL, 10); + parsed_args->test_argc = 0; + parsed_args->test_argv = NULL; + return 0; +} + +void print_usage(char** argv) { + printf("Usage: %s test_name threads length\n", argv[0]); +} + +int find_test(char* test_name, char* test_path) { + char binpath[MAXPATHLEN]; + char* dirpath; + uint32_t size = sizeof(binpath); + int retval; + + retval = _NSGetExecutablePath(binpath, &size); + assert(retval == 0); + dirpath = dirname(binpath); + + snprintf(test_path, MAXPATHLEN, "%s/perfindex-%s.dylib", dirpath, test_name); + if(access(test_path, F_OK) == 0) + return 0; + else + return -1; +} + +int load_test(char* path, test_t* test) { + void* handle; + void* p; + + handle = dlopen(path, RTLD_NOW | RTLD_LOCAL); + if(!handle) { + return -1; + } + + + p = dlsym(handle, "setup"); + test->setup = (int (*)(int, long long, int, void **))p; + + p = dlsym(handle, "execute"); + test->execute = (int (*)(int, int, long long, int, void **))p; + if(p == NULL) + return -1; + + p = dlsym(handle, "cleanup"); + test->cleanup = (int (*)(int, long long))p; + + p = dlsym(handle, "error_str"); + test->error_str_ptr = (char**)p; + + return 0; +} + +void start_timer(struct timeval *tp) { + gettimeofday(tp, NULL); +} + +void end_timer(struct timeval *tp) { + struct timeval tend; + gettimeofday(&tend, NULL); + if(tend.tv_usec >= tp->tv_usec) { + tp->tv_sec = tend.tv_sec - tp->tv_sec; + tp->tv_usec = tend.tv_usec - tp->tv_usec; + } + else { + tp->tv_sec = tend.tv_sec - tp->tv_sec - 1; + tp->tv_usec = tend.tv_usec - tp->tv_usec + 1000000; + } +} + +void print_timer(struct timeval *tp) { + printf("%ld.%06d\n", tp->tv_sec, tp->tv_usec); +} + +static void* thread_setup(void *arg) { + int my_index = (int)arg; + long long work_size = args.length / args.num_threads; + int work_remainder = args.length % args.num_threads; + + if(work_remainder > my_index) { + work_size++; + } + + pthread_mutex_lock(&ready_thread_count_lock); + ready_thread_count++; + if(ready_thread_count == args.num_threads) + pthread_cond_signal(&threads_ready_cvar); + pthread_cond_wait(&start_cvar, &ready_thread_count_lock); + pthread_mutex_unlock(&ready_thread_count_lock); + test.execute(my_index, args.num_threads, work_size, args.test_argc, args.test_argv); + return NULL; +} + +int main(int argc, char** argv) { + int retval; + int thread_index; + struct timeval timer; + pthread_t* threads; + int thread_retval; + void* thread_retval_ptr = &thread_retval; + char test_path[MAXPATHLEN]; + + retval = parse_args(argc, argv, &args); + if(retval) { + print_usage(argv); + return -1; + } + + retval = find_test(args.test_name, test_path); + if(retval) { + printf("Unable to find test %s\n", args.test_name); + return -1; + } + + load_test(test_path, &test); + if(retval) { + printf("Unable to load test %s\n", args.test_name); + return -1; + } + + pthread_cond_init(&threads_ready_cvar, NULL); + pthread_cond_init(&start_cvar, NULL); + pthread_mutex_init(&ready_thread_count_lock, NULL); + ready_thread_count = 0; + + if(test.setup) { + retval = test.setup(args.num_threads, args.length, 0, NULL); + if(retval == PERFINDEX_FAILURE) { + fprintf(stderr, "Test setup failed: %s\n", *test.error_str_ptr); + return -1; + } + } + + threads = (pthread_t*)malloc(sizeof(pthread_t)*args.num_threads); + for(thread_index = 0; thread_index < args.num_threads; thread_index++) { + retval = pthread_create(&threads[thread_index], NULL, thread_setup, (void*)(long)thread_index); + assert(retval == 0); + } + + pthread_mutex_lock(&ready_thread_count_lock); + if(ready_thread_count != args.num_threads) { + pthread_cond_wait(&threads_ready_cvar, &ready_thread_count_lock); + } + pthread_mutex_unlock(&ready_thread_count_lock); + + start_timer(&timer); + pthread_cond_broadcast(&start_cvar); + for(thread_index = 0; thread_index < args.num_threads; thread_index++) { + pthread_join(threads[thread_index], &thread_retval_ptr); + if(**test.error_str_ptr) { + printf("Test failed: %s\n", *test.error_str_ptr); + } + } + end_timer(&timer); + + if(test.cleanup) + retval = test.cleanup(args.num_threads, args.length); + if(retval == PERFINDEX_FAILURE) { + fprintf(stderr, "Test cleanup failed: %s\n", *test.error_str_ptr); + free(threads); + return -1; + } + + print_timer(&timer); + + free(threads); + + return 0; +} diff --git a/tools/tests/perf_index/perf_index.h b/tools/tests/perf_index/perf_index.h index 7925853a5..5d5e326c6 100644 --- a/tools/tests/perf_index/perf_index.h +++ b/tools/tests/perf_index/perf_index.h @@ -1,96 +1,10 @@ #ifndef __PERF_INDEX_H_ #define __PERF_INDEX_H_ -#include -#include -#include -#include -#include -#include -#include +#define DECL_SETUP int setup(int num_threads, long long length, int test_argc, const void** test_argv) +#define DECL_TEST int execute(int thread_id, int num_threads, long long length, int test_argc, const void** test_argv) +#define DECL_CLEANUP int cleanup(int num_threads, long long length) -#define DECL_VALIDATE(validatetest) int validatetest(int test_argc, const char **test_argv) -#define DECL_INIT(inittest) void inittest(int num_threads, long long length, int test_argc, const char **test_argv) -#define DECL_TEST(test) void test(int thread_id, int num_threads, long long length, int test_argc, const char **test_argv) -#define DECL_CLEANUP(cleanuptest) void cleanuptest(int num_threads, long long length) - -#define MAXPATHLEN 1024 - -typedef DECL_INIT((*init_func)); -typedef DECL_TEST((*stress_func)); -typedef DECL_CLEANUP((*cleanup_func)); -typedef DECL_VALIDATE((*validate_func)); - -typedef struct { - char *name; - init_func init; - stress_func stress; - cleanup_func cleanup; - validate_func validate; -} stress_test_t; - -extern const stress_test_t cpu_test; -extern const stress_test_t memory_test; -extern const stress_test_t syscall_test; -extern const stress_test_t fault_test; -extern const stress_test_t zfod_test; -extern const stress_test_t file_local_create_test; -extern const stress_test_t file_local_write_test; -extern const stress_test_t file_local_read_test; -extern const stress_test_t file_ram_create_test; -extern const stress_test_t file_ram_write_test; -extern const stress_test_t file_ram_read_test; -extern const stress_test_t iperf_test; -extern const stress_test_t compile_test; - -DECL_VALIDATE(no_validate); -DECL_VALIDATE(validate_iperf); - -DECL_INIT(stress_memory_init); -DECL_INIT(stress_syscall_init); -DECL_INIT(stress_fault_init); -DECL_INIT(stress_file_local_create_init); -DECL_INIT(stress_file_local_read_init); -DECL_INIT(stress_file_local_write_init); -DECL_INIT(stress_file_ram_create_init); -DECL_INIT(stress_file_ram_read_init); -DECL_INIT(stress_file_ram_write_init); -DECL_INIT(compile_init); -DECL_INIT(stress_general_init); - -DECL_TEST(stress_memory); -DECL_TEST(stress_cpu); -DECL_TEST(stress_syscall); -DECL_TEST(stress_fault); -DECL_TEST(stress_zfod); -DECL_TEST(stress_file_local_create); -DECL_TEST(stress_file_local_read); -DECL_TEST(stress_file_local_write); -DECL_TEST(stress_file_ram_create); -DECL_TEST(stress_file_ram_read); -DECL_TEST(stress_file_ram_write); -DECL_TEST(iperf); -DECL_TEST(compile); -DECL_TEST(stress_general); - -DECL_CLEANUP(stress_general_cleanup); -DECL_CLEANUP(stress_file_local_create_cleanup); -DECL_CLEANUP(stress_file_local_read_cleanup); -DECL_CLEANUP(stress_file_local_write_cleanup); -DECL_CLEANUP(stress_file_ram_create_cleanup); -DECL_CLEANUP(stress_file_ram_read_cleanup); -DECL_CLEANUP(stress_file_ram_write_cleanup); -DECL_CLEANUP(compile_cleanup); - -void stress_file_create(const char *fs_path, int thread_id, int num_threads, long long length); - -void stress_file_write_init(const char *fs_path, int num_threads, long long length); -void stress_file_write(const char *fs_path, int thread_id, int num_threads, long long length, long long max_file_size); - -void stress_file_read_init(const char *fs_path, int num_threads, long long length, long long max_file_size); -void stress_file_read(const char *fs_path, int thread_id, int num_threads, long long length, long long max_file_size); -void stress_file_read_cleanup(const char *fs_path, int num_threads, long long length); - -void md5_hash(uint8_t *message, uint64_t len, uint32_t *hash); +char* error_str = ""; #endif diff --git a/tools/tests/perf_index/perfindex-compile.c b/tools/tests/perf_index/perfindex-compile.c new file mode 100644 index 000000000..b7743f8eb --- /dev/null +++ b/tools/tests/perf_index/perfindex-compile.c @@ -0,0 +1,54 @@ +#include "perf_index.h" +#include "fail.h" +#include +#include + +static const char *src_dst = "/tmp/perf_index_compile_code"; +static const char *src_root = "/Network/Servers/xs1/release/Software/Zin/Projects/xnu/xnu-2050.7.9"; + +DECL_SETUP { + char* cmd; + int retval; + const char *src = src_root; + if(test_argc >= 1) + src = (char*)test_argv[0]; + + retval = asprintf(&cmd, "ditto \"%s\" \"%s\"", src, src_dst); + VERIFY(retval > 0, "asprintf failed"); + + retval = system(cmd); + VERIFY(retval == 0, "ditto command failed"); + + free(cmd); + + return PERFINDEX_SUCCESS; +} + +DECL_TEST { + char* cmd; + int retval; + + if(thread_id != 0) + return 0; + + retval = asprintf(&cmd, "make -C \"%s\" MAKEJOBS=-j%d", src_dst, num_threads); + VERIFY(retval > 0, "asprintf failed"); + + retval = system(cmd); + VERIFY(retval == 0, "make command failed"); + + return PERFINDEX_SUCCESS; +} + +DECL_CLEANUP { + char* cmd; + int retval; + + retval = asprintf(&cmd, "rm -rf \"%s\"", src_dst); + VERIFY(retval > 0, "asprintf failed"); + + retval = system(cmd); + VERIFY(retval == 0, "rm command failed"); + + return PERFINDEX_SUCCESS; +} diff --git a/tools/tests/perf_index/perfindex-cpu.c b/tools/tests/perf_index/perfindex-cpu.c new file mode 100644 index 000000000..e95e4640d --- /dev/null +++ b/tools/tests/perf_index/perfindex-cpu.c @@ -0,0 +1,14 @@ +#include "perf_index.h" +#include "fail.h" +#include "md5.h" +#include +#include + +DECL_TEST { + long long i; + uint32_t digest[4]; + for(i=0; i +#include +#include + +char tempdir[MAXPATHLEN]; + +DECL_SETUP { + char* retval; + + retval = setup_tempdir(tempdir); + + VERIFY(retval, "tempdir setup failed"); + + printf("tempdir: %s\n", tempdir); + + return PERFINDEX_SUCCESS; +} + +DECL_TEST { + return test_file_create(tempdir, thread_id, num_threads, length); +} + +DECL_CLEANUP { + int retval; + + retval = cleanup_tempdir(tempdir); + VERIFY(retval == 0, "cleanup_tempdir failed"); + + return PERFINDEX_SUCCESS; +} diff --git a/tools/tests/perf_index/perfindex-file_read.c b/tools/tests/perf_index/perfindex-file_read.c new file mode 100644 index 000000000..6eeaaaf0f --- /dev/null +++ b/tools/tests/perf_index/perfindex-file_read.c @@ -0,0 +1,36 @@ +#include "perf_index.h" +#include "fail.h" +#include "test_file_helper.h" +#include +#include + +char tempdir[MAXPATHLEN]; + +DECL_SETUP { + char* retval; + + retval = setup_tempdir(tempdir); + + VERIFY(retval, "tempdir setup failed"); + + printf("tempdir: %s\n", tempdir); + + return test_file_read_setup(tempdir, num_threads, length, 0L); +} + +DECL_TEST { + return test_file_read(tempdir, thread_id, num_threads, length, 0L); +} + +DECL_CLEANUP { + int retval; + + retval = test_file_read_cleanup(tempdir, num_threads, length); + VERIFY(retval == PERFINDEX_SUCCESS, "test_file_read_cleanup failed"); + + retval = cleanup_tempdir(tempdir); + VERIFY(retval == 0, "cleanup_tempdir failed"); + + return PERFINDEX_SUCCESS; + +} diff --git a/tools/tests/perf_index/perfindex-file_write.c b/tools/tests/perf_index/perfindex-file_write.c new file mode 100644 index 000000000..76134a27a --- /dev/null +++ b/tools/tests/perf_index/perfindex-file_write.c @@ -0,0 +1,37 @@ +#include "perf_index.h" +#include "fail.h" +#include "test_file_helper.h" +#include +#include +#include + +char tempdir[MAXPATHLEN]; + +DECL_SETUP { + char* retval; + + retval = setup_tempdir(tempdir); + + VERIFY(retval, "tempdir setup failed"); + + printf("tempdir: %s\n", tempdir); + + return test_file_write_setup(tempdir, num_threads, length); + +} + +DECL_TEST { + return test_file_write(tempdir, thread_id, num_threads, length, 0L); +} + +DECL_CLEANUP { + int retval; + + retval = test_file_write_cleanup(tempdir, num_threads, length); + VERIFY(retval == PERFINDEX_SUCCESS, "test_file_read_cleanup failed"); + + retval = cleanup_tempdir(tempdir); + VERIFY(retval == 0, "cleanup_tempdir failed"); + + return PERFINDEX_SUCCESS; +} diff --git a/tools/tests/perf_index/perfindex-iperf.c b/tools/tests/perf_index/perfindex-iperf.c new file mode 100644 index 000000000..abb9b28b4 --- /dev/null +++ b/tools/tests/perf_index/perfindex-iperf.c @@ -0,0 +1,23 @@ +#include "perf_index.h" +#include "fail.h" +#include +#include + +DECL_SETUP { + VERIFY(test_argc > 0, "missing argument"); + + return PERFINDEX_SUCCESS; +} + +DECL_TEST { + char* cmd; + int retval; + + retval = asprintf(&cmd, "iperf -c \"%s\" -n %lld > /dev/null", test_argv[0], length); + VERIFY(retval > 0, "asprintf failed"); + + retval = system(cmd); + VERIFY(retval == 0, "iperf command failed"); + + return PERFINDEX_SUCCESS; +} diff --git a/tools/tests/perf_index/stress_memory.c b/tools/tests/perf_index/perfindex-memory.c similarity index 74% rename from tools/tests/perf_index/stress_memory.c rename to tools/tests/perf_index/perfindex-memory.c index 0d6c4cad9..759468ed9 100644 --- a/tools/tests/perf_index/stress_memory.c +++ b/tools/tests/perf_index/perfindex-memory.c @@ -1,24 +1,32 @@ #include "perf_index.h" +#include "fail.h" +#include +#include +#include #include -#include static char *memblock; static size_t memsize; -const stress_test_t memory_test = {"memory", &stress_memory_init, &stress_memory, &stress_general_cleanup, &no_validate}; - size_t hw_memsize(void) { int mib[2]; size_t len; size_t my_memsize; + int retval; + mib[0] = CTL_HW; mib[1] = HW_MEMSIZE; len = sizeof(my_memsize); - sysctl(mib, 2, &my_memsize, &len, NULL, 0); + + retval = sysctl(mib, 2, &my_memsize, &len, NULL, 0); + + if(retval != 0) + return 0; + return my_memsize; } -DECL_INIT(stress_memory_init) { +DECL_SETUP { char *memblockfiller; long long i; int pgsz = getpagesize(); @@ -28,19 +36,26 @@ DECL_INIT(stress_memory_init) { * metric, like amount of free memory, so that the memory allocated is always * consistent for a given device. */ - memsize = hw_memsize()/2; + memsize = hw_memsize(); + VERIFY(memsize > 0, "hw_memsize failed"); + memsize = memsize/2; + memblock = (char*)malloc(memsize); + VERIFY(memblock != NULL, "malloc failed"); + memblockfiller = memblock; /* Do this manually, to make sure everything is paged in */ for(i=0; i +#include + +const char ramdisk_name[] = "StressRAMDisk"; +char ramdisk_path[MAXPATHLEN]; + +DECL_SETUP { + int retval; + + retval = setup_ram_volume(ramdisk_name, ramdisk_path); + VERIFY(retval == PERFINDEX_SUCCESS, "setup_ram_volume failed"); + + printf("ramdisk: %s\n", ramdisk_path); + + return PERFINDEX_SUCCESS; +} + +DECL_TEST { + return test_file_create(ramdisk_path, thread_id, num_threads, length); +} + +DECL_CLEANUP { + int retval; + + retval = cleanup_ram_volume(ramdisk_path); + VERIFY(retval == 0, "cleanup_ram_volume failed"); + + return PERFINDEX_SUCCESS; +} diff --git a/tools/tests/perf_index/perfindex-ram_file_read.c b/tools/tests/perf_index/perfindex-ram_file_read.c new file mode 100644 index 000000000..e547cec42 --- /dev/null +++ b/tools/tests/perf_index/perfindex-ram_file_read.c @@ -0,0 +1,36 @@ +#include "perf_index.h" +#include "fail.h" +#include "test_file_helper.h" +#include "ramdisk.h" +#include +#include + +const char ramdisk_name[] = "StressRAMDisk"; +char ramdisk_path[MAXPATHLEN]; + +DECL_SETUP { + int retval; + + retval = setup_ram_volume(ramdisk_name, ramdisk_path); + VERIFY(retval == PERFINDEX_SUCCESS, "setup_ram_volume failed"); + + printf("ramdisk: %s\n", ramdisk_path); + + return test_file_read_setup(ramdisk_path, num_threads, length, 0L); +} + +DECL_TEST { + return test_file_read(ramdisk_path, thread_id, num_threads, length, 0L); +} + +DECL_CLEANUP { + int retval; + + retval = test_file_read_cleanup(ramdisk_path, num_threads, length); + VERIFY(retval == PERFINDEX_SUCCESS, "test_file_read_cleanup failed"); + + retval = cleanup_ram_volume(ramdisk_path); + VERIFY(retval == 0, "cleanup_ram_volume failed"); + + return PERFINDEX_SUCCESS; +} diff --git a/tools/tests/perf_index/perfindex-ram_file_write.c b/tools/tests/perf_index/perfindex-ram_file_write.c new file mode 100644 index 000000000..e8c596bad --- /dev/null +++ b/tools/tests/perf_index/perfindex-ram_file_write.c @@ -0,0 +1,36 @@ +#include "perf_index.h" +#include "fail.h" +#include "test_file_helper.h" +#include "ramdisk.h" +#include +#include + +const char ramdisk_name[] = "StressRAMDisk"; +char ramdisk_path[MAXPATHLEN]; + +DECL_SETUP { + int retval; + + retval = setup_ram_volume(ramdisk_name, ramdisk_path); + VERIFY(retval == PERFINDEX_SUCCESS, "setup_ram_volume failed"); + + printf("ramdisk: %s\n", ramdisk_path); + + return test_file_write_setup(ramdisk_path, num_threads, length); +} + +DECL_TEST { + return test_file_write(ramdisk_path, thread_id, num_threads, length, 0L); +} + +DECL_CLEANUP { + int retval; + + retval = test_file_write_cleanup(ramdisk_path, num_threads, length); + VERIFY(retval == PERFINDEX_SUCCESS, "test_file_read_cleanup failed"); + + retval = cleanup_ram_volume(ramdisk_path); + VERIFY(retval == 0, "cleanup_ram_volume failed"); + + return PERFINDEX_SUCCESS; +} diff --git a/tools/tests/perf_index/perfindex-syscall.c b/tools/tests/perf_index/perfindex-syscall.c new file mode 100644 index 000000000..757c5c8ad --- /dev/null +++ b/tools/tests/perf_index/perfindex-syscall.c @@ -0,0 +1,11 @@ +#include "perf_index.h" +#include "fail.h" +#include + +DECL_TEST { + long long i; + for(i=0; i +#include +#include +#include + +int setup_ram_volume(const char* name, char* path) { + char *cmd; + int retval; + + retval = asprintf(&cmd, "diskutil erasevolume HFS+ '%s' `hdiutil attach -nomount ram://1500000` >/dev/null", name); + VERIFY(retval > 0, "asprintf failed"); + + retval = system(cmd); + VERIFY(retval == 0, "diskutil command failed"); + + snprintf(path, MAXPATHLEN, "/Volumes/%s", name); + + free(cmd); + + return PERFINDEX_SUCCESS; +} + +int cleanup_ram_volume(char* path) { + char *cmd; + int retval; + + retval = asprintf(&cmd, "umount -f '%s' >/dev/null", path); + VERIFY(retval > 0, "asprintf failed"); + + retval = system(cmd); + VERIFY(retval == 0, "diskutil command failed"); + + free(cmd); + + return PERFINDEX_SUCCESS; +} diff --git a/tools/tests/perf_index/ramdisk.h b/tools/tests/perf_index/ramdisk.h new file mode 100644 index 000000000..9cf45c2df --- /dev/null +++ b/tools/tests/perf_index/ramdisk.h @@ -0,0 +1,7 @@ +#ifndef __RAMDISK_H_ +#define __RAMDISK_H_ + +int setup_ram_volume(const char* name, char* path); +int cleanup_ram_volume(char* path); + +#endif diff --git a/tools/tests/perf_index/stress_cpu.c b/tools/tests/perf_index/stress_cpu.c deleted file mode 100644 index 1e0c4b2e0..000000000 --- a/tools/tests/perf_index/stress_cpu.c +++ /dev/null @@ -1,11 +0,0 @@ -#include "perf_index.h" - -const stress_test_t cpu_test = {"cpu", &stress_general_init, &stress_cpu, &stress_general_cleanup, &no_validate}; - -DECL_TEST(stress_cpu) { - long long i; - uint32_t digest[4]; - for(i=0; i -#include - -#if TARGET_OS_EMBEDDED -#define MEMSIZE (1L<<28) -#else -#define MEMSIZE (1L<<30) -#endif - -typedef enum { - TESTZFOD, - TESTFAULT -} testtype_t; - -const stress_test_t fault_test = {"fault", &stress_fault_init, &stress_fault, &stress_general_cleanup, &no_validate}; -const stress_test_t zfod_test = {"zfod", &stress_fault_init, &stress_zfod, &stress_general_cleanup, &no_validate}; - -static char *memblock; - -DECL_INIT(stress_fault_init) { - int pgsz = getpagesize(); - memblock = (char *)mmap(NULL, MEMSIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); - char *ptr; - /* make sure memory is paged */ - for(ptr = memblock; ptr -#include "perf_index.h" -#include - -void stress_file_create(const char *fs_path, int thread_id, int num_threads, long long length) { - long long i; - int fd; - char filepath[MAXPATHLEN]; - for(i=0; i=0); - close(fd); - } - for(i=0; i=0); - } -} diff --git a/tools/tests/perf_index/stress_file_local.c b/tools/tests/perf_index/stress_file_local.c deleted file mode 100644 index d948caf1a..000000000 --- a/tools/tests/perf_index/stress_file_local.c +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include "perf_index.h" -#include - - -const stress_test_t file_local_create_test = {"file_create", &stress_file_local_create_init, &stress_file_local_create, &stress_file_local_create_cleanup, &no_validate}; -const stress_test_t file_local_write_test = {"file_write", &stress_file_local_write_init, &stress_file_local_write, &stress_file_local_write_cleanup, &no_validate}; -const stress_test_t file_local_read_test = {"file_read", &stress_file_local_read_init, &stress_file_local_read, &stress_file_local_read_cleanup, &no_validate}; - -static char fs_path[MAXPATHLEN]; - -static void setup_local_volume(void) { - snprintf(fs_path, MAXPATHLEN, "%s", "/tmp"); -} - -DECL_INIT(stress_file_local_read_init) { - setup_local_volume(); - stress_file_read_init(fs_path, num_threads, length, 0L); -} - -DECL_TEST(stress_file_local_read) { - stress_file_read(fs_path, thread_id, num_threads, length, 0L); -} - -DECL_CLEANUP(stress_file_local_read_cleanup) { - stress_file_read_cleanup(fs_path, num_threads, length); -} - -DECL_INIT(stress_file_local_write_init) { - setup_local_volume(); - stress_file_write_init(fs_path, num_threads, length); -} - -DECL_TEST(stress_file_local_write) { - stress_file_write(fs_path, thread_id, num_threads, length, 0L); -} - -DECL_CLEANUP(stress_file_local_write_cleanup) { -} - -DECL_INIT(stress_file_local_create_init) { - setup_local_volume(); -} - -DECL_TEST(stress_file_local_create) { - stress_file_create(fs_path, thread_id, num_threads, length); -} - -DECL_CLEANUP(stress_file_local_create_cleanup) { -} diff --git a/tools/tests/perf_index/stress_file_ram.c b/tools/tests/perf_index/stress_file_ram.c deleted file mode 100644 index 6f203bae4..000000000 --- a/tools/tests/perf_index/stress_file_ram.c +++ /dev/null @@ -1,65 +0,0 @@ -#include -#include "perf_index.h" -#include - -#define MAX_FILE_SIZE 536870912L - -const stress_test_t file_ram_create_test = {"ram_file_create", &stress_file_ram_create_init, &stress_file_ram_create, &stress_file_ram_create_cleanup, &no_validate}; -const stress_test_t file_ram_write_test = {"ram_file_write", &stress_file_ram_write_init, &stress_file_ram_write, &stress_file_ram_write_cleanup, &no_validate}; -const stress_test_t file_ram_read_test = {"ram_file_read", &stress_file_ram_read_init, &stress_file_ram_read, &stress_file_ram_read_cleanup, &no_validate}; - -static const char ramdiskname[] = "StressRamDisk"; - -static const char fs_path[MAXPATHLEN] = "/Volumes/StressRamDisk"; - -static void setup_ram_volume(void) { - char *cmd; - assert(asprintf(&cmd, "diskutil erasevolume HFS+ \"%s\" `hdiutil attach -nomount ram://1500000` >/dev/null", ramdiskname) >= 0); - assert(system(cmd) == 0); - free(cmd); -} - -static void cleanup_ram_volume(void) { - char *cmd; - assert(asprintf(&cmd, "umount -f %s >/dev/null", fs_path) >= 0); - assert(system(cmd) == 0); - free(cmd); -} - -DECL_INIT(stress_file_ram_read_init) { - setup_ram_volume(); - stress_file_read_init(fs_path, num_threads, length, MAX_FILE_SIZE); -} - -DECL_TEST(stress_file_ram_read) { - stress_file_read(fs_path, thread_id, num_threads, length, MAX_FILE_SIZE); -} - -DECL_CLEANUP(stress_file_ram_read_cleanup) { - cleanup_ram_volume(); -} - -DECL_INIT(stress_file_ram_write_init) { - setup_ram_volume(); - stress_file_write_init(fs_path, num_threads, length); -} - -DECL_TEST(stress_file_ram_write) { - stress_file_write(fs_path, thread_id, num_threads, length, MAX_FILE_SIZE); -} - -DECL_CLEANUP(stress_file_ram_write_cleanup) { - cleanup_ram_volume(); -} - -DECL_INIT(stress_file_ram_create_init) { - setup_ram_volume(); -} - -DECL_TEST(stress_file_ram_create) { - stress_file_create(fs_path, thread_id, num_threads, length); -} - -DECL_CLEANUP(stress_file_ram_create_cleanup) { - cleanup_ram_volume(); -} diff --git a/tools/tests/perf_index/stress_file_read.c b/tools/tests/perf_index/stress_file_read.c deleted file mode 100644 index 29096db40..000000000 --- a/tools/tests/perf_index/stress_file_read.c +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include "perf_index.h" -#include - -#define MAXFILESIZE 8589934592L -#define MIN(a,b) ((a)<(b) ? (a) : (b)) - -static char readbuff[4096]; - -void stress_file_read_init(const char *fs_path, int num_threads, long long length, long long max_file_size) { - int fd; - char filepath[MAXPATHLEN]; - long long left; - size_t writelen; - - if(max_file_size == 0) - max_file_size = MAXFILESIZE; - - left = MIN(length, max_file_size/num_threads); - - snprintf(filepath, sizeof(filepath), "%s/file_read", fs_path); - fd = open(filepath, O_CREAT | O_EXCL | O_WRONLY, 0644); - assert(fd > 0); - bzero(readbuff, sizeof(readbuff)); - - while(left > 0) { - writelen = sizeof(readbuff) < left ? sizeof(readbuff) : left; - assert(write(fd, readbuff, writelen) == writelen); - left -= writelen; - } -} - -void stress_file_read(const char *fs_path, int thread_id, int num_threads, long long length, long long max_file_size) { - long long left; - size_t file_offset = 0; - int readlen; - int fd; - char filepath[MAXPATHLEN]; - long long filesize; - - - if(max_file_size == 0) - max_file_size = MAXFILESIZE; - filesize = MIN(length, max_file_size/num_threads); - - snprintf(filepath, sizeof(filepath), "%s/file_read", fs_path); - fd = open(filepath, O_RDONLY); - assert(fd > 0); - for(left=length; left>0;) { - readlen = sizeof(readbuff) < left ? sizeof(readbuff) : left; - if(file_offset+readlen > filesize) { - lseek(fd, 0, SEEK_SET); - file_offset = 0; - continue; - } - assert(read(fd, readbuff, readlen) == readlen); - left -= readlen; - file_offset += readlen; - } -} - -void stress_file_read_cleanup(const char *fs_path, int num_threads, long long length) { - char filepath[MAXPATHLEN]; - snprintf(filepath, sizeof(filepath), "%s/file_read", fs_path); - assert(unlink(filepath)>=0); -} diff --git a/tools/tests/perf_index/stress_file_write.c b/tools/tests/perf_index/stress_file_write.c deleted file mode 100644 index fc87fda10..000000000 --- a/tools/tests/perf_index/stress_file_write.c +++ /dev/null @@ -1,46 +0,0 @@ -#include -#include "perf_index.h" -#include - -#define MAXFILESIZE 8589934592L - -static int *fds = NULL; -static char writebuff[4096]; - -void stress_file_write_init(const char *fs_path, int num_threads, long long length) { - int i; - char filepath[MAXPATHLEN]; - - if(fds == NULL) - fds = (int*)malloc(sizeof(int)*num_threads); - for(i=0; i 0); - } - bzero(writebuff, sizeof(writebuff)); -} - -void stress_file_write(const char *fs_path, int thread_id, int num_threads, long long length, long long max_file_size) { - long long left; - size_t file_offset = 0; - int writelen; - char filepath[MAXPATHLEN]; - int fd = fds[thread_id]; - - if(max_file_size == 0) - max_file_size = MAXFILESIZE; - - for(left=length; left>0;) { - writelen = sizeof(writebuff) < left ? sizeof(writebuff) : left; - assert(write(fd, writebuff, writelen) == writelen); - left -= writelen; - file_offset += writelen; - if(file_offset>max_file_size/num_threads) { - lseek(fd, 0, SEEK_SET); - file_offset = 0; - } - } - snprintf(filepath, sizeof(filepath), "%s/file_write-%d", fs_path, thread_id); - assert(unlink(filepath)>=0); -} diff --git a/tools/tests/perf_index/stress_general.c b/tools/tests/perf_index/stress_general.c deleted file mode 100644 index 90e5e39ce..000000000 --- a/tools/tests/perf_index/stress_general.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "perf_index.h" -DECL_VALIDATE(no_validate) { - return 1; -} -DECL_INIT(stress_general_init) { -} -DECL_TEST(stress_general_test) { -} -DECL_CLEANUP(stress_general_cleanup) { -} diff --git a/tools/tests/perf_index/stress_syscall.c b/tools/tests/perf_index/stress_syscall.c deleted file mode 100644 index c53b8afe7..000000000 --- a/tools/tests/perf_index/stress_syscall.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "perf_index.h" - -const stress_test_t syscall_test = {"syscall", &stress_syscall_init, &stress_syscall, &stress_general_cleanup, &no_validate}; - -DECL_INIT(stress_syscall_init) { -} - -DECL_TEST(stress_syscall) { - long long i; - for(i=0; i +#include +#include +#include +#include + +#define MEMSIZE (1L<<30) + +static char* memblock; + +int test_fault_setup() { + char *ptr; + int pgsz = getpagesize(); + int retval; + + memblock = (char *)mmap(NULL, MEMSIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + VERIFY(memblock != MAP_FAILED, "mmap failed"); + + /* make sure memory is paged */ + for(ptr = memblock; ptr +#include +#include +#include +#include +#include +#include +#include + +static char readbuff[4096]; +static char writebuff[4096]; +static int* fds = NULL; + +char* setup_tempdir(char* buf) { + strcpy(buf, "/tmp/perfindex.XXXXXX"); + return mkdtemp(buf); +} + +int cleanup_tempdir(char* path) { + return rmdir(path); +} + +int test_file_create(char* path, int thread_id, int num_threads, long long length) { + long long i; + int fd; + int retval; + char filepath[MAXPATHLEN]; + + for(i=0; i= 0, "open failed"); + + close(fd); + } + + for(i=0; i= 0, "open failed"); + + bzero(readbuff, sizeof(readbuff)); + + while(left > 0) { + writelen = sizeof(readbuff) < left ? sizeof(readbuff) : left; + retval = write(fd, readbuff, writelen); + VERIFY(retval == writelen, "write failed"); + left -= writelen; + } + + return PERFINDEX_SUCCESS; +} + +int test_file_read(char* path, int thread_id, int num_threads, long long length, long long max_file_size) { + long long left; + size_t file_offset = 0; + int readlen; + int fd; + int retval; + char filepath[MAXPATHLEN]; + long long filesize; + + + if(max_file_size == 0) + max_file_size = MAXFILESIZE; + filesize = MIN(length, max_file_size/num_threads); + + snprintf(filepath, sizeof(filepath), "%s/file_read", path); + fd = open(filepath, O_RDONLY); + VERIFY(fd >= 0, "open failed"); + + for(left=length; left>0;) { + readlen = sizeof(readbuff) < left ? sizeof(readbuff) : left; + if(file_offset+readlen > filesize) { + retval = lseek(fd, 0, SEEK_SET); + + + VERIFY(retval >= 0, "lseek failed"); + + file_offset = 0; + continue; + } + retval = read(fd, readbuff, readlen); + VERIFY(retval == readlen, "read failed"); + left -= readlen; + file_offset += readlen; + } + return PERFINDEX_SUCCESS; +} + +int test_file_read_cleanup(char* path, int num_threads, long long length) { + char filepath[MAXPATHLEN]; + int retval; + + snprintf(filepath, sizeof(filepath), "%s/file_read", path); + retval = unlink(filepath); + VERIFY(retval == 0, "unlink failed"); + + return PERFINDEX_SUCCESS; +} + +int test_file_write_setup(char* path, int num_threads, long long length) { + int i; + char filepath[MAXPATHLEN]; + + if(fds == NULL) { + fds = (int*)malloc(sizeof(int)*num_threads); + VERIFY(fds, "malloc failed"); + } + + for(i=0; i0;) { + writelen = sizeof(writebuff) < left ? sizeof(writebuff) : left; + retval = write(fd, writebuff, writelen); + VERIFY(retval == writelen, "write failed"); + + left -= writelen; + file_offset += writelen; + if(file_offset>max_file_size/num_threads) { + retval = lseek(fd, 0, SEEK_SET); + VERIFY(retval >= 0, "leeks failed"); + file_offset = 0; + } + } + + return PERFINDEX_SUCCESS; +} + + +int test_file_write_cleanup(char* path, int num_threads, long long length) { + int i; + char filepath[MAXPATHLEN]; + int retval; + + for(i=0; itlock); IOSimpleLockUnlock(self->tlock); +} -#if 1 - IOSimpleLockLock(self->tlock); -#else - IOSimpleLockUnlock(self->tlock); -#endif +static void thread_call_test_func2(thread_call_param_t param0, + thread_call_param_t param1) +{ + testthreadcall *self = (testthreadcall *)param0; + + IOLog("thread_call_test_func2 %p %p\n", param0, param1); + + IOLockWakeup(self->tlock2, NULL, false); } diff --git a/tools/tests/testkext/testthreadcall.h b/tools/tests/testkext/testthreadcall.h index 2b8973825..c2a03b806 100644 --- a/tools/tests/testkext/testthreadcall.h +++ b/tools/tests/testkext/testthreadcall.h @@ -14,5 +14,7 @@ class testthreadcall : public IOService { public: thread_call_t tcall; + thread_call_t tcall2; IOSimpleLock *tlock; -}; \ No newline at end of file + IOLock *tlock2; +}; diff --git a/tools/tests/unit_tests/Makefile b/tools/tests/unit_tests/Makefile deleted file mode 100644 index 08e4fd429..000000000 --- a/tools/tests/unit_tests/Makefile +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/make -# This file lists all individual tests added over time to test various functionality. -# The Raft TestBot framework runs the tests based on the targets listed in this file. -# Please review the following guidelines to ensure successful execution of your test case -# -# == Steps followed by Raft testbot == -# * find target name from this Makefile. A target is identified by the string : related files -# * build the target with the command "make ". The current dir is same as of this Makefile -# * The test is executed with following commands "cd BUILD/dst/; ./ " -# * The exit value of is logged. (0 = PASS and = FAIL) -# * remove the BUILD directory -# -# == Note about SDKROOT == -# The environment variable SDKROOT must be passed to select the appropriate SDK. -# x86/OSX is the default, so to build for iphone, you must: -# -# 64-bit: $make SDKROOT=iphoneos.internal -# 32-bit: $make SDKROOT=iphoneos.internal ARCH_CONFIGS="armv7" -# -# == How to add a new test == -# * Create a test directory based on radar #. (for example test_) -# * Put test specific files in the directory. -# * Add an entry in this Makefile (reserved targetnames are {run_tests.sh, xnu_target_executables.list, build_*.log}) -# targetname: testdir/programname.c -# -# -# * Check if your target name is listed in the right configurations. -# $make list_targets -# optionally you can pass SDKROOT=iphoneos|iphoneos.internal|macosx|macosx.internal and verify -# the built binary is of right arch and config. -# -# * verify that your test setup works by running the following commands -# $make -# $cd BUILD/dst/ -# $./targetname -# -# == Easy Option == -# look at some example targets in this file and replicate that :) -# - -ifneq ($(SRCROOT),) -SRCDIR=$(SRCROOT) -else -SRCDIR?=$(shell /bin/pwd) -endif - -ifneq ($(DSTROOT),) -BUILDDIR?=$(DSTROOT) -else -BUILDDIR?=$(SRCDIR)/BUILD/dst -endif - -# make sure we have a build directory -$(shell [ -d "$(BUILDDIR)" ] || mkdir -p $(BUILDDIR)) - -SDKROOT ?= / -TARGETSDK:=$(SDKROOT) - - -# setup the TARGETSDK and SDKROOT variables -ifeq (/,$(SDKROOT)) -SDKROOTPATH=/ -else -SDKROOTPATH:=$(shell /usr/bin/xcodebuild -sdk $(TARGETSDK) -version Path) -endif - -ifeq ($(SDKROOTPATH),) -$(error "Unable to find any SDKROOT on host. Exiting") -endif - -PRIVATE_INCLUDES = $(SDKROOTPATH)/System/Library/Frameworks/System.framework/PrivateHeaders - -#arch configs if not provided -ifdef RC_ARCHS -ARCH_CONFIGS:=$(RC_ARCHS) -endif -ifeq ($(ARCH_CONFIGS),) -ARCH_CONFIGS:= -ifeq (iPhone,$(findstring iPhone,$(SDKROOTPATH))) -ARCH_CONFIGS:=-arch armv7 -endif - -else -TMP_ARCHCONF:=$(foreach argarch,$(ARCH_CONFIGS),-arch $(argarch) ) -override ARCH_CONFIGS:=$(TMP_ARCHCONF) -endif - - -#setup the compiler flags. -ifeq (iPhone,$(findstring iPhone,$(SDKROOTPATH))) -CFLAGS=-I$(BUILDDIR) -I. -isysroot $(SDKROOTPATH) $(ARCH_CONFIGS) -CC=xcrun -sdk $(TARGETSDK) clang -MIG=xcrun -sdk $(TARGETSDK) mig -XCODEBUILD=xcodebuild -sdk iphoneos.internal $(ARCH_CONFIGS) -CODESIGN=$(shell xcrun -sdk $(TARGETSDK) -find codesign) -CODESIGN_ALLOCATE=$(shell xcrun -sdk $(TARGETSDK) -find codesign_allocate) -TARGET_NAME=ios -else -#Compiler flags for macosx -CFLAGS=-I$(BUILDDIR) -I. $(ARCH_CONFIGS) -CC=clang -MIG=xcrun mig -XCODEBUILD=xcodebuild -CODESIGN=codesign -CODESIGN_ALLOCATE=$(shell xcrun -find codesign_allocate) -TARGET_NAME=osx -endif - -#Flags that define the environment -TARGETOSVERS:=$(shell /usr/bin/xcodebuild -sdk $(TARGETSDK) -version ProductVersion) -TARGETOSBUILDVERS:=$(shell /usr/bin/xcodebuild -sdk $(TARGETSDK) -version ProductBuildVersion) -SDKTARGET_STR:=$(subst .,_,$(TARGETSDK)) -MORECFLAGS=-D TARGET_SDK_$(SDKTARGET_STR)=1 -D TARGET_OS_VERS=\"$(TARGETOSVERS)\" -D TARGET_OS_BUILD_VERS=\"$(TARGETOSBUILDVERS)\" - -#special recipe for special targets: list_targets and clean -define _sed_target_extract_script -/^$$/ { n -/^[^ ]*:/p -} -endef -export sed_target_extract_script=$(_sed_target_extract_script) -all: - @ for TARGET in `make list_targets`; do \ - if [ $$TARGET != all ]; then \ - make $$TARGET DSTROOT="$(BUILDDIR)/$$TARGET"; \ - fi \ - done -list_targets: - @ make -rpn | sed -n -e "$$sed_target_extract_script" | cut -d':' -f1 | grep -v '^clean' | grep -v '^list_targets' - -clean: - rm -fr ./BUILD/ -# == List of targets for test cases == -#Note: target name should be same as the executable in $(BUILDDIR) -#And: target name has to be seperate from source directory name. Using "_src" suffix is a good idea. -sampletest: sampletest.c - $(CC) -o $(BUILDDIR)/$@ $^ $(CFLAGS) $(MORECFLAGS) - -pipe_test_10807398: pipe_test_10807398_src/parent.c pipe_test_10807398_src/child.c - $(CC) -o $(BUILDDIR)/$@ pipe_test_10807398_src/parent.c $(CFLAGS) - $(CC) -o $(BUILDDIR)/child pipe_test_10807398_src/child.c $(CFLAGS) - -pipes_fill_procinfo_11179336: pipes_fill_procinfo_11179336.c - $(CC) -o $(BUILDDIR)/$@ pipes_fill_procinfo_11179336.c $(CFLAGS) - -test_wq_exit_race_panic_10970548: test_wq_exit_race_panic_10970548.c - $(CC) -o $(BUILDDIR)/$@ test_wq_exit_race_panic_10970548.c $(CFLAGS) - -ptrace_tests_10767133: ptrace_tests_10767133_src/ptrace_tests_10767133.c - $(CC) -O0 -o $(BUILDDIR)/ptrace_tests_10767133 ptrace_tests_10767133_src/ptrace_tests_10767133.c $(CFLAGS) -Wall - -ptrace_test_12507045: ptrace_test_12507045_src/ptrace_test.c - $(CC) -O0 -o $(BUILDDIR)/ptrace_test_12507045 $< $(CFLAGS) - -clock_types_6368156: clock_types_6368156.c - $(CC) -o $(BUILDDIR)/$@ $^ $(CFLAGS) - -semctl_test_8534495: semctl_test_8534495_src/semctl_test_8534495.c - $(CC) -o $(BUILDDIR)/semctl_test_8534495 semctl_test_8534495_src/semctl_test_8534495.c $(CFLAGS) - -ptcwd_test_11269991: ptcwd_test_11269991_src/ptcwd_test_11269991.c - $(CC) -o $(BUILDDIR)/ptcwd_test_11269991 ptcwd_test_11269991_src/ptcwd_test_11269991.c $(CFLAGS) - -sprace_test_11891562: sprace_test_11891562_src/sprace_test_11891562.c - $(CC) -o $(BUILDDIR)/sprace_test_11891562 sprace_test_11891562_src/sprace_test_11891562.c $(CFLAGS) - -guarded_fd_tests_11746236: guarded_fd_tests_11746236_src/mach_exc.defs guarded_fd_tests_11746236_src/guarded_test_framework.c guarded_fd_tests_11746236_src/guarded_test.c - $(MIG) $(CFLAGS) \ - -user $(BUILDDIR)/mach_excUser.c \ - -server $(BUILDDIR)/mach_excServer.c \ - -header $(BUILDDIR)/mach_exc.h \ - guarded_fd_tests_11746236_src/mach_exc.defs - $(CC) -o $(BUILDDIR)/guarded_fd_tests_11746236 \ - guarded_fd_tests_11746236_src/guarded_test_framework.c \ - $(BUILDDIR)/mach_excServer.c $(CFLAGS) -I$(PRIVATE_INCLUDES) -I$(BUILDDIR) - $(CC) -o $(BUILDDIR)/guarded_test \ - guarded_fd_tests_11746236_src/guarded_test.c \ - -I$(PRIVATE_INCLUDES) $(CFLAGS) - -thread_get_state_11918811: thread_get_state_11918811_src/thread_get_state.c - $(MIG) $(CFLAGS) \ - -sheader $(BUILDDIR)/excserver.h \ - -server $(BUILDDIR)/excserver.c \ - -header /dev/null -user /dev/null \ - thread_get_state_11918811_src/excserver.defs - $(CC) -o $(BUILDDIR)/thread_get_state_11918811 \ - thread_get_state_11918811_src/thread_get_state.c \ - $(BUILDDIR)/excserver.c \ - $(CFLAGS) - -fcntlrangecheck_tests_11202484: fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c - $(CC) -o $(BUILDDIR)/fcntlrangecheck_tests_11202484 fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c $(CFLAGS) - -test_waitqlocktry_12053360: test_waitqlocktry_12053360.c - $(CC) -o $(BUILDDIR)/test_waitqlocktry_12053360 test_waitqlocktry_12053360.c $(CFLAGS) - -guarded_mach_port_tests_11178535: guarded_mach_port_tests_11178535_src/mach_exc.defs guarded_mach_port_tests_11178535_src/guarded_test_framework.c guarded_mach_port_tests_11178535_src/guarded_test.c - $(MIG) $(CFLAGS) \ - -user $(BUILDDIR)/mach_excUser.c \ - -server $(BUILDDIR)/mach_excServer.c \ - -header $(BUILDDIR)/mach_exc.h \ - guarded_mach_port_tests_11178535_src/mach_exc.defs - $(CC) -o $(BUILDDIR)/guarded_mach_port_tests_11178535 \ - guarded_mach_port_tests_11178535_src/guarded_test_framework.c \ - $(BUILDDIR)/mach_excServer.c $(CFLAGS) -I$(PRIVATE_INCLUDES) -I$(BUILDDIR) - $(CC) -o $(BUILDDIR)/guarded_mp_test \ - guarded_mach_port_tests_11178535_src/guarded_test.c \ - -I$(PRIVATE_INCLUDES) $(CFLAGS) - -cpu_monitor_tests_11646922: cpu_monitor_tests_11646922_src/cpumon_test_framework.c - $(MIG) $(CFLAGS) \ - -sheader $(BUILDDIR)/excserver.h \ - -server $(BUILDDIR)/excserver.c \ - -header /dev/null -user /dev/null \ - cpu_monitor_tests_11646922_src/mach_exc.defs - $(CC) -o $(BUILDDIR)/cpu_monitor_tests_11646922 \ - cpu_monitor_tests_11646922_src/cpumon_test_framework.c \ - $(BUILDDIR)/excserver.c \ - $(CFLAGS) $(MORECFLAGS) -I$(PRIVATE_INCLUDES) - $(XCODEBUILD) -project cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj TARGET_BUILD_DIR=$(BUILDDIR) - $(CC) -o $(BUILDDIR)/mem_hog \ - cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c \ - $(CFLAGS) $(MORECFLAGS) -I$(PRIVATE_INCLUDES) - -monitor_stress_12901965: monitor_stress_12901965_src/monitor_stress/monitor_stress.m - echo '#!/bin/sh\n./monitor_stress -e 20\n./monitor_stress -w 3 -e 20' > $(BUILDDIR)/monitor_stress_12901965 - chmod +x $(BUILDDIR)/monitor_stress_12901965 - $(XCODEBUILD) -target $(TARGET_NAME) -project monitor_stress_12901965_src/monitor_stress.xcodeproj TARGET_BUILD_DIR=$(BUILDDIR) - -codesigntests: codesigntests.c codesigntests-entitlements.plist - $(CC) -o $(BUILDDIR)/codesigntests codesigntests.c $(CFLAGS) - env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) \ - $(CODESIGN) -s - --entitlements codesigntests-entitlements.plist $(BUILDDIR)/codesigntests - -libproc_privilege_test_13203438: libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c - $(CC) -o $(BUILDDIR)/libproc_privilege_test_13203438 libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c $(CFLAGS) - diff --git a/tools/tests/unit_tests/build_tests.sh b/tools/tests/unit_tests/build_tests.sh deleted file mode 100755 index 4de662147..000000000 --- a/tools/tests/unit_tests/build_tests.sh +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env bash - -function run_test() { - local testname="$1" - local out_status=1 - local out_str=" " - - echo "" - echo "[TEST] ${testname} " - if [ -x "./${testname}" ] - then - echo "[BEGIN] Executing test ${testname}" - out_str=$(./"${testname}" 2>&1) - out_status="$?" - else - echo "[FAIL] Failed to execute test with name ${testname}" - out_status=1 - fi - - if [ "${out_status}" == "0" ] - then - echo "[PASS] Successfully finished ${testname}" - else - echo $out_str - echo "[FAIL] Test failed ${testname} exit value $out_status" - echo " *** FAILURE of test ${testname} *** " - echo "" - fi - return $out_status -} - -function build_test(){ - local testtarget="$1" - local out_str=" " - local out_status=1 - - echo "[MAKE] Building test ${testtarget}" - out_str=$(make ${MAKE_ARGS} ${testtarget} 2>&1) - out_status=$? - echo ${out_str} >> ${BUILD_LOG_FILEMAME} - - if [ "${out_status}" == "0" ] - then - echo "[PASS][BUILD] Successfully built ${testtarget}" - else - echo ${out_str} - echo "[FAIL][BUILD] Failed to build ${testtarget}" - fi - return ${out_status} -} - -CMD=build -TARGET_MODE=$1 -TIMESTAMP=`date +%s` -PROGNAME=$0 -TARGET_LIST_FILE="xnu_target_executables.list" -BUILD_DIR="${PWD}/BUILD/" -BUILD_LOG_FILEMAME="${BUILD_DIR}/build_${TIMESTAMP}.log" - -# load the list of targets to build/run -if [ -f "$TARGET_LIST_FILE" ] -then - TARGET_NAMES=`cat $TARGET_LIST_FILE` -else - TARGET_NAMES=`make ${MAKE_ARGS} list_targets` -fi - -if [ "$CMD" == "build" ] -then - - # setup make arguments based on target requirements - if [ "${TARGET_MODE}" == "embedded" ] - then - T_ios=`/usr/bin/xcodebuild -sdk iphoneos.internal -version Path` - T_ios_name=iphoneos.internal - if [ "$T_ios" == "" ] - then - T_ios=`/usr/bin/xcodebuild -sdk iphoneos -version Path` - T_ios_name=iphoneos - fi - - if [ "$T_ios" == "" ] - then - echo "No iOS SDK found. Exiting." - exit 1 - fi - - MAKE_ARGS="SDKROOT=${T_ios_name}" - elif [ "${TARGET_MODE}" == "desktop" ] - then - MAKE_ARGS="" - else - echo "Usage: ${PROGNAME} " - exit 1 - fi - - if [ -d "${BUILD_DIR}" ] - then - mkdir -p ${BUILD_DIR} - fi - - echo " " - echo "=========== Building XNU Unit Tests =========" - echo " " - - for testname_target in ${TARGET_NAMES} - do - build_test ${makefilename} ${testname_target} - echo "" - done - - echo "Finished building tests. Saving list of targets in ${BUILD_DIR}/dst/${TARGET_LIST_FILE}" - echo "${TARGET_NAMES}" > ${BUILD_DIR}/dst/${TARGET_LIST_FILE} - cat "${PROGNAME}" | sed s/^CMD=build/CMD=run/g > ${BUILD_DIR}/dst/run_tests.sh - chmod +x ${BUILD_DIR}/dst/run_tests.sh - echo "Generated ${BUILD_DIR}/dst/run_tests.sh for running the tests." - exit 0 - -fi -# End of Build action - -# -if [ "$CMD" == "run" ] -then - echo " " - echo "=========== Running XNU Unit Tests =========" - echo " " - for testname_target in ${TARGET_NAMES} - do - run_test ${testname_target} - done - exit 0 -fi -# End of Run action diff --git a/tools/tests/unit_tests/clock_types_6368156.c b/tools/tests/unit_tests/clock_types_6368156.c deleted file mode 100644 index bb7eb4e15..000000000 --- a/tools/tests/unit_tests/clock_types_6368156.c +++ /dev/null @@ -1,20 +0,0 @@ -#include -#include - -int main(void) -{ - long long good = 5 * 1000000000LL; - long long bad = 5 * NSEC_PER_SEC; - - printf("%lld\n%lld\n", good, bad); - if (good == bad ){ - printf("[PASS] successfully verified that (5 * 1000000000LL) == (5 * NSEC_PER_SEC). \n"); - return 0; - }else { - printf("[FAIL] NSEC_PER_SEC is not long long.\n"); - return -1; - } - /* by default return as error */ - - return 1; -} diff --git a/tools/tests/unit_tests/codesigntests-entitlements.plist b/tools/tests/unit_tests/codesigntests-entitlements.plist deleted file mode 100644 index 7caa664ab..000000000 --- a/tools/tests/unit_tests/codesigntests-entitlements.plist +++ /dev/null @@ -1,8 +0,0 @@ - - - - - com.apple.security.some-entitlements - some-value - - diff --git a/tools/tests/unit_tests/codesigntests.c b/tools/tests/unit_tests/codesigntests.c deleted file mode 100644 index 0ec624ee2..000000000 --- a/tools/tests/unit_tests/codesigntests.c +++ /dev/null @@ -1,130 +0,0 @@ -#import -#import -#import -#import - -#import -#import -#import - -int -get_blob(pid_t pid, int op) -{ - uint8_t header[8]; - unsigned int cnt; - int rcent; - - for (cnt = 0; cnt < sizeof(header); cnt++) { - rcent = csops(pid, op, header, 1); - if (rcent != -1 && errno != ERANGE) - err(1, "errno != ERANGE for short header"); - } - - rcent = csops(pid, op, header, sizeof(header)); - if (rcent == -1 && errno == ERANGE) { - uint32_t len, bufferlen, bufferlen2; - - memcpy(&len, &header[4], 4); - bufferlen = ntohl(len); - if (bufferlen > 1024 * 1024) - errx(1, "invalid length on blob from kernel"); - else if (bufferlen == 0) - errx(1, "bufferlen == 0"); - else if (bufferlen < 8) - errx(1, "bufferlen <8 0"); - - uint8_t buffer[bufferlen + 1]; - - rcent = csops(pid, op, buffer, bufferlen - 1); - if (rcent != -1 && errno != ERANGE) - errx(1, "csops with full buffer - 1 failed"); - - rcent = csops(pid, op, buffer, bufferlen); - if (rcent != 0) - errx(1, "csops with full buffer failed"); - - memcpy(&len, &buffer[4], 4); - bufferlen2 = ntohl(len); - - if (op == CS_OPS_BLOB) { - if (bufferlen2 > bufferlen) - errx(1, "buffer larger on second try"); - if (bufferlen2 != bufferlen) - warnx("buffer shrunk since codesign can't tell the right size to codesign_allocate"); - } else { - if (bufferlen2 != bufferlen) - errx(1, "buffer sizes different"); - } - - rcent = csops(pid, op, buffer, bufferlen + 1); - if (rcent != 0) - errx(1, "csops with full buffer + 1 didn't pass"); - - return 0; - - } else if (rcent == 0) { - return 0; - } else { - return 1; - } -} - -int -main(int argc, const char * argv[]) -{ - uint32_t status; - int rcent; - pid_t pid; - - pid = getpid(); - - if (get_blob(pid, CS_OPS_ENTITLEMENTS_BLOB)) - errx(1, "failed to get entitlements"); - - if (get_blob(0, CS_OPS_ENTITLEMENTS_BLOB)) - errx(1, "failed to get entitlements"); - - if (get_blob(pid, CS_OPS_BLOB)) - errx(1, "failed to get blob"); - - if (get_blob(0, CS_OPS_BLOB)) - errx(1, "failed to get blob"); - - if (get_blob(pid, CS_OPS_IDENTITY)) - errx(1, "failed to get identity"); - - if (get_blob(0, CS_OPS_IDENTITY)) - errx(1, "failed to get identity"); - - rcent = csops(pid, CS_OPS_SET_STATUS, &status, sizeof(status) - 1); - if (rcent == 0) - err(1, "passed when passed in too short status buffer"); - - status = htonl(CS_RESTRICT); - rcent = csops(pid, CS_OPS_SET_STATUS, &status, sizeof(status)); - if (rcent != 0) - errx(1, "failed to mark proc RESTRICTED"); - - rcent = csops(pid, CS_OPS_MARKINVALID, NULL, 0); - if (rcent != 0) - errx(1, "failed to mark proc invalid"); - - status = htonl(CS_VALID); - rcent = csops(pid, CS_OPS_SET_STATUS, &status, sizeof(status)); - if (rcent == 0) - errx(1, "managed set flags on an INVALID proc"); - - if (!get_blob(pid, CS_OPS_ENTITLEMENTS_BLOB)) - errx(1, "got entitlements while invalid"); - - if (!get_blob(pid, CS_OPS_IDENTITY)) - errx(1, "got identity"); - - if (!get_blob(0, CS_OPS_IDENTITY)) - errx(1, "got identity"); - - if (!get_blob(pid, CS_OPS_BLOB)) - errx(1, "got blob"); - - return 0; -} diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.m b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.m deleted file mode 100644 index 3cba388d9..000000000 --- a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.m +++ /dev/null @@ -1,467 +0,0 @@ -#define TARGET_OS_EMBEDDED 1 -#include -#include -#include -#include -#include -#include -#include - -#import - -#include -#include - -#define MAX_THREADS 1000 - -char *pname; - -volatile int spin = 0; -pthread_mutex_t lock; -pthread_cond_t cv; -volatile int ready = 0; - -int exit_after = 600; - -int limit = 0; // Worker thread should apply per-thread limit to self? -int limit_period = 5000; - -boolean_t reset_to_defaults = FALSE; - -boolean_t stress_test = FALSE; - -void usage(void) { - printf("usage: cpu_hog [-l percentage] [-s msecs] [-n nthreads] [-p percentage] [-i secs] [-e secs] [-t num]\n"); - printf("\t-l: worker thread should apply per-thread limit to self (default: no limit)\n"); - printf("\t-s: worker thread's per-thread limit refill period (msecs) (default: 5000)\n"); - printf("\t-n: create nthreads and choose 1 to be worker. (default: 2)\n"); - printf("\t-p: worker thread should consume this percent CPU over -i seconds (default: 1)\n"); - printf("\t-i: interval for CPU consumption given with -p (DEFAULT: 1 second)\n"); - printf("\t-x: disable CPU usage monitor after this many seconds (0 == upon launch)\n"); - printf("\t-r: reset CPU usage monitor to default params after this many seconds (0 == upon launch)\n"); - printf("\t-c: change this process's CPU monitor percentage to this value upon launch\n"); - printf("\t-C: change this process's CPU monitor interval to this value upon launch (requires -c)\n"); - printf("\t-d: change this process's CPU monitor percentage to this value (with -D interval) -- after -w seconds\n"); - printf("\t-D: change this process's CPU monitor interval to this value (with -d percentage) -- after -w seconds\n"); - printf("\t-w: wait this number of seconds until changing CPU monitor percentage to -d percent\n"); - printf("\t-e: exit after this many seconds (default: 10 mins)\n"); - printf("\t-P: confirm that this process's CPU monitor parameters match this percentage (requires -I)\n"); - printf("\t-I: interval to match (with -P)\n"); - printf("\t-t: spin up additional CPU burner threads (each will consume 100%% CPU)\n"); -} - -void set_my_limit(int percent, int refill_period) -{ - int err; - int cpupercent = percent | (refill_period << 8); - - if ((err = sysctlbyname("kern.setthread_cpupercent", 0, 0, - &cpupercent, sizeof (int))) != 0) { - printf("sysctl: error %d\n", err); - } -} - -static void print_cpumon_params(void) { - int new_percentage = -1, new_interval = -1; - - proc_get_cpumon_params(getpid(), &new_percentage, &new_interval); - - printf("CPU monitor params: percentage = %d interval = %d\n", new_percentage, new_interval); -} - -void *burner_thread(void *arg) -{ - int x = 1, y = 2; - - while (1) { - x = rand(); - y = x * rand(); - } -} - -void *spinner_thread(void *arg) -{ - int am_i_the_one = (arg != NULL) ? 1 : 0; - int j = 0; - int err; - - if (am_i_the_one) { - if ((err = pthread_mutex_lock(&lock)) != 0) { - printf("spinner: pthread_mutex_lock: %d", err); - exit(1); - } - - /* - * Apply per-thread limit to self? - */ - if (limit != 0) { - set_my_limit(limit, limit_period); - } - - /* - * Tell the main thread we're ready to get to work. - */ - ready = 1; - pthread_mutex_unlock(&lock); - pthread_cond_signal(&cv); - - while (1) { - /* - * Go to sleep until the main thread wakes us. - */ - pthread_cond_wait(&cv, &lock); - - /* - * Do useless work until the main thread tells us to - * stop. - */ - while (spin) { - j += rand(); - if (reset_to_defaults) { - reset_to_defaults = FALSE; - printf("%s: resetting CPU usage monitor to default params.\n", pname); - proc_set_cpumon_defaults(getpid()); - print_cpumon_params(); - } - - if (stress_test) { -// printf("%s: resetting CPU usage monitor to default params.\n", pname); - proc_set_cpumon_defaults(getpid()); -// print_cpumon_params(); -// printf("%s: disabling CPU usage monitor\n", pname); - proc_disable_cpumon(getpid()); -// print_cpumon_params(); - } - - } - } - } - - while(1) { - sleep(6000); - } -} - -void *disable_thread(void *arg) -{ - sleep((int)arg); - - printf("%s: disabling CPU usage monitor.\n", pname); - proc_disable_cpumon(getpid()); - print_cpumon_params(); - - return (NULL); -} - -void *reset_thread(void *arg) -{ - sleep((int)arg); - - reset_to_defaults = TRUE; - - return (NULL); -} - -void *exit_thread(void *arg) -{ - sleep(exit_after); - printf("...exiting.\n"); - exit(0); - - return (NULL); -} - -int delayed_cpumon_percentage = -1; -int delayed_cpumon_interval = -1; -int delayed_cpumon_percentage_wait = -1; - -void *change_cpumon_thread(void *arg) -{ - sleep(delayed_cpumon_percentage_wait); - printf("changing CPU monitor params to %d %% over %d seconds\n", delayed_cpumon_percentage, delayed_cpumon_interval); - proc_set_cpumon_params(getpid(), delayed_cpumon_percentage, delayed_cpumon_interval); - - print_cpumon_params(); - - return (NULL); -} - -int main(int argc, char *argv[]) -{ - int ch; - int i = 0; - int nthreads = 1; - int chosen_thr; - pthread_t chosen_thr_id; - int percent = 100; - - int interval = 2 * 1000000; // Default period for cycle is 2 seconds. Units are usecs. - int on_time, off_time; - - int new_cpumon_percentage = -1; - int new_cpumon_interval = -1; - - int disable_delay = -1; - int reset_params_delay = -1; - - int confirm_cpumon_percentage = -1; - int confirm_cpumon_interval = -1; - - int num_burner_threads = 0; - - pthread_t thr_id; - - printf("In CPU hogging test program...\n"); - - pname = argv[0]; - - while ((ch = getopt(argc, argv, "r:x:l:s:n:p:i:c:C:d:D:w:e:P:I:St:")) != -1) { - switch (ch) { - case 'l': - limit = atoi(optarg); - break; - case 's': - limit_period = atoi(optarg); - break; - case 'n': - nthreads = atoi(optarg); - break; - case 'p': - percent = atoi(optarg); - break; - case 'i': - interval = atoi(optarg) * 1000000; // using usleep - break; - case 'x': - disable_delay = atoi(optarg); - break; - case 'r': - reset_params_delay = atoi(optarg); - break; - case 'c': - new_cpumon_percentage = atoi(optarg); - break; - case 'C': - new_cpumon_interval = atoi(optarg); - break; - case 'd': - delayed_cpumon_percentage = atoi(optarg); - break; - case 'D': - delayed_cpumon_interval = atoi(optarg); - break; - case 'w': - delayed_cpumon_percentage_wait = atoi(optarg); - break; - case 'e': - exit_after = atoi(optarg); - break; - case 'P': - confirm_cpumon_percentage = atoi(optarg); - break; - case 'I': - confirm_cpumon_interval = atoi(optarg); - break; - case 'S': - stress_test = TRUE; - break; - case 't': - num_burner_threads = atoi(optarg); - break; - default: - usage(); - exit(1); - } - } - argc -= optind; - argv += optind; - - if (argc != 0) { - usage(); - exit(1); - } - - if (((delayed_cpumon_percentage != -1) && (delayed_cpumon_percentage_wait == -1)) || - ((delayed_cpumon_percentage == -1) && (delayed_cpumon_percentage_wait != -1))) { - printf("must specify -d and -w together\n"); - usage(); - exit(1); - } - - if ((nthreads <= 0) || (nthreads > MAX_THREADS)) { - printf("%s: %d threads too many (max is %d)\n", argv[0], - nthreads ,MAX_THREADS); - exit(1); - } - - if ((percent <= 0) || (percent > 100)) { - printf("%s: invalid percentage %d\n", argv[0], percent); - exit(1); - } - - if (interval <= 0) { - printf("%s: invalid interval %d\n", argv[0], interval); - exit(1); - } - - if ((new_cpumon_interval != -1) && (new_cpumon_percentage == -1)) { - printf("%s: -C requires that you also specify -c\n", argv[0]); - exit(1); - } - - print_cpumon_params(); - - if (confirm_cpumon_percentage != -1) { - int my_percentage, my_interval; - proc_get_cpumon_params(getpid(), &my_percentage, &my_interval); - if ((my_percentage != confirm_cpumon_percentage) || - (my_interval != confirm_cpumon_interval)) { - printf("parameters don't match values given with -P and -I\n"); - exit(1); - } - - printf("parameters match values given with -P and -I.\n"); - exit(0); - } - - on_time = (percent * interval) / 100; - off_time = interval - on_time; - - /* - * Randomly choose a thread to be the naughty one. - */ - srand(MAX_THREADS); // Want this to be repeatable, for now - chosen_thr = rand() % nthreads; - - if (pthread_mutex_init(&lock, NULL) != 0) { - perror("pthread_mutex_init"); - exit(1); - } - - if (pthread_cond_init(&cv, NULL) != 0) { - perror("pthread_cond_init"); - exit(1); - } - - if (pthread_mutex_lock(&lock) != 0) { - perror("pthread_mutex_lock"); - exit(1); - } - - if (pthread_create(&thr_id, NULL, exit_thread, NULL) != 0) { - perror("pthread_create"); - exit(1); - } - - if (delayed_cpumon_percentage != -1) { - if (pthread_create(&thr_id, NULL, change_cpumon_thread, NULL) != 0) { - perror("pthread_create"); - exit(1); - } - } - - printf("Creating %d threads. Thread %d will try to consume " - "%d%% of a CPU over %d seconds.\n", nthreads, chosen_thr, - percent, interval / 1000000); - if (limit != 0) { - printf("Worker thread %d will first self-apply a per-thread" - " CPU limit of %d percent over %d seconds\n", - chosen_thr, limit, limit_period); - } - - for (i = 0; i < nthreads; i++) { - if (pthread_create(&thr_id, NULL, spinner_thread, - (void *)((i == chosen_thr) ? (void *)1 : NULL)) != 0) { - perror("pthread_create"); - exit(1); - } - if (i == chosen_thr) { - chosen_thr_id = thr_id; - } - } - - /* - * Try to adjust the CPU usage monitor limit. - */ - if (new_cpumon_percentage != -1) { - proc_set_cpumon_params(getpid(), new_cpumon_percentage, new_cpumon_interval); - print_cpumon_params(); - } - - if (disable_delay != -1) { - if (pthread_create(&thr_id, NULL, disable_thread, (void *)disable_delay) != 0) { - perror("pthread_create"); - exit(1); - } - } - - if (reset_params_delay != -1) { - if (pthread_create(&thr_id, NULL, reset_thread, (void *)reset_params_delay) != 0) { - perror("pthread_create"); - exit(1); - } - } - - if (num_burner_threads > 0) { - for (i = 0; i < num_burner_threads; i++) { - if (pthread_create(&thr_id, NULL, burner_thread, NULL) != 0) { - perror("pthread_create"); - exit(1); - } - } - } - - // Wait for the worker thread to come alive and get ready to work. - while (ready == 0) { - pthread_cond_wait(&cv, &lock); - } - - if (pthread_mutex_unlock(&lock) != 0) { - perror("spinner: pthread_mutex_unlock"); - exit(1); - } - - /* - * Control the worker thread's CPU consumption. - */ - while (1) { - /* - * Worker thread is waiting for us to awaken him, with the - * lock dropped. - */ - if (pthread_mutex_lock(&lock) != 0) { - perror("pthread_mutex_lock"); - exit(1); - } - - /* - * Go to sleep until we are ready to awaken the worker. - */ - usleep(off_time); - - /* - * Tell the worker to get to work. - */ - spin = 1; - - if (pthread_mutex_unlock(&lock) != 0) { - perror("spinner: pthread_mutex_unlock"); - exit(1); - } - - pthread_cond_signal(&cv); - - /* - * Go to sleep until we're ready to stop the worker. - */ - usleep(on_time); - - /* - * Stop the worker. He will drop the lock and wait - * for us to wake him again. - */ - spin = 0; - } - - return (1); -} diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.pbxproj b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.pbxproj deleted file mode 100644 index 2f6a41f39..000000000 --- a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.pbxproj +++ /dev/null @@ -1,356 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 46; - objects = { - -/* Begin PBXAggregateTarget section */ - 15A9B5C1157E853C00B44B4F /* default */ = { - isa = PBXAggregateTarget; - buildConfigurationList = 15A9B5C2157E853D00B44B4F /* Build configuration list for PBXAggregateTarget "default" */; - buildPhases = ( - ); - dependencies = ( - 15A9B5C6157E856F00B44B4F /* PBXTargetDependency */, - 15A9B5C8157E857000B44B4F /* PBXTargetDependency */, - ); - name = default; - productName = "cpu_hog-default"; - }; -/* End PBXAggregateTarget section */ - -/* Begin PBXBuildFile section */ - 155F2812157E81B100D7B917 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 159F7E4E1537850F00588242 /* Foundation.framework */; }; - 159F7E4F1537850F00588242 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 159F7E4E1537850F00588242 /* Foundation.framework */; }; - 15A9B5B4157E83C100B44B4F /* cpu_hog.m in Sources */ = {isa = PBXBuildFile; fileRef = 15A9B5B3157E83C100B44B4F /* cpu_hog.m */; }; - 15A9B5B5157E83C100B44B4F /* cpu_hog.m in Sources */ = {isa = PBXBuildFile; fileRef = 15A9B5B3157E83C100B44B4F /* cpu_hog.m */; }; -/* End PBXBuildFile section */ - -/* Begin PBXContainerItemProxy section */ - 15A9B5C5157E856F00B44B4F /* PBXContainerItemProxy */ = { - isa = PBXContainerItemProxy; - containerPortal = 159F7E411537850F00588242 /* Project object */; - proxyType = 1; - remoteGlobalIDString = 159F7E491537850F00588242; - remoteInfo = cpu_hog; - }; - 15A9B5C7157E857000B44B4F /* PBXContainerItemProxy */ = { - isa = PBXContainerItemProxy; - containerPortal = 159F7E411537850F00588242 /* Project object */; - proxyType = 1; - remoteGlobalIDString = 155F280E157E81B100D7B917; - remoteInfo = "cpu_hog-unentitled"; - }; -/* End PBXContainerItemProxy section */ - -/* Begin PBXCopyFilesBuildPhase section */ - 155F2813157E81B100D7B917 /* CopyFiles */ = { - isa = PBXCopyFilesBuildPhase; - buildActionMask = 2147483647; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; - files = ( - ); - runOnlyForDeploymentPostprocessing = 1; - }; - 159F7E481537850F00588242 /* CopyFiles */ = { - isa = PBXCopyFilesBuildPhase; - buildActionMask = 2147483647; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; - files = ( - ); - runOnlyForDeploymentPostprocessing = 1; - }; -/* End PBXCopyFilesBuildPhase section */ - -/* Begin PBXFileReference section */ - 155F2818157E81B100D7B917 /* cpu_hog-unentitled */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "cpu_hog-unentitled"; sourceTree = BUILT_PRODUCTS_DIR; }; - 159F7E4A1537850F00588242 /* cpu_hog */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = cpu_hog; sourceTree = BUILT_PRODUCTS_DIR; }; - 159F7E4E1537850F00588242 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; - 15A9B5B3157E83C100B44B4F /* cpu_hog.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = cpu_hog.m; sourceTree = ""; }; -/* End PBXFileReference section */ - -/* Begin PBXFrameworksBuildPhase section */ - 155F2811157E81B100D7B917 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - 155F2812157E81B100D7B917 /* Foundation.framework in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - 159F7E471537850F00588242 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - 159F7E4F1537850F00588242 /* Foundation.framework in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXFrameworksBuildPhase section */ - -/* Begin PBXGroup section */ - 159F7E3F1537850F00588242 = { - isa = PBXGroup; - children = ( - 15A9B5B3157E83C100B44B4F /* cpu_hog.m */, - 159F7E4D1537850F00588242 /* Frameworks */, - 159F7E4B1537850F00588242 /* Products */, - ); - sourceTree = ""; - }; - 159F7E4B1537850F00588242 /* Products */ = { - isa = PBXGroup; - children = ( - 159F7E4A1537850F00588242 /* cpu_hog */, - 155F2818157E81B100D7B917 /* cpu_hog-unentitled */, - ); - name = Products; - sourceTree = ""; - }; - 159F7E4D1537850F00588242 /* Frameworks */ = { - isa = PBXGroup; - children = ( - 159F7E4E1537850F00588242 /* Foundation.framework */, - ); - name = Frameworks; - sourceTree = ""; - }; -/* End PBXGroup section */ - -/* Begin PBXNativeTarget section */ - 155F280E157E81B100D7B917 /* cpu_hog-unentitled */ = { - isa = PBXNativeTarget; - buildConfigurationList = 155F2815157E81B100D7B917 /* Build configuration list for PBXNativeTarget "cpu_hog-unentitled" */; - buildPhases = ( - 155F280F157E81B100D7B917 /* Sources */, - 155F2811157E81B100D7B917 /* Frameworks */, - 155F2813157E81B100D7B917 /* CopyFiles */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "cpu_hog-unentitled"; - productName = cpumon_test; - productReference = 155F2818157E81B100D7B917 /* cpu_hog-unentitled */; - productType = "com.apple.product-type.tool"; - }; - 159F7E491537850F00588242 /* cpu_hog */ = { - isa = PBXNativeTarget; - buildConfigurationList = 159F7E5A1537850F00588242 /* Build configuration list for PBXNativeTarget "cpu_hog" */; - buildPhases = ( - 159F7E461537850F00588242 /* Sources */, - 159F7E471537850F00588242 /* Frameworks */, - 159F7E481537850F00588242 /* CopyFiles */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = cpu_hog; - productName = cpumon_test; - productReference = 159F7E4A1537850F00588242 /* cpu_hog */; - productType = "com.apple.product-type.tool"; - }; -/* End PBXNativeTarget section */ - -/* Begin PBXProject section */ - 159F7E411537850F00588242 /* Project object */ = { - isa = PBXProject; - attributes = { - LastUpgradeCheck = 0450; - ORGANIZATIONNAME = Apple; - }; - buildConfigurationList = 159F7E441537850F00588242 /* Build configuration list for PBXProject "cpu_hog" */; - compatibilityVersion = "Xcode 3.2"; - developmentRegion = English; - hasScannedForEncodings = 0; - knownRegions = ( - en, - ); - mainGroup = 159F7E3F1537850F00588242; - productRefGroup = 159F7E4B1537850F00588242 /* Products */; - projectDirPath = ""; - projectRoot = ""; - targets = ( - 15A9B5C1157E853C00B44B4F /* default */, - 159F7E491537850F00588242 /* cpu_hog */, - 155F280E157E81B100D7B917 /* cpu_hog-unentitled */, - ); - }; -/* End PBXProject section */ - -/* Begin PBXSourcesBuildPhase section */ - 155F280F157E81B100D7B917 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 15A9B5B5157E83C100B44B4F /* cpu_hog.m in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - 159F7E461537850F00588242 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 15A9B5B4157E83C100B44B4F /* cpu_hog.m in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXSourcesBuildPhase section */ - -/* Begin PBXTargetDependency section */ - 15A9B5C6157E856F00B44B4F /* PBXTargetDependency */ = { - isa = PBXTargetDependency; - target = 159F7E491537850F00588242 /* cpu_hog */; - targetProxy = 15A9B5C5157E856F00B44B4F /* PBXContainerItemProxy */; - }; - 15A9B5C8157E857000B44B4F /* PBXTargetDependency */ = { - isa = PBXTargetDependency; - target = 155F280E157E81B100D7B917 /* cpu_hog-unentitled */; - targetProxy = 15A9B5C7157E857000B44B4F /* PBXContainerItemProxy */; - }; -/* End PBXTargetDependency section */ - -/* Begin XCBuildConfiguration section */ - 155F2816157E81B100D7B917 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_ENTITLEMENTS = ""; - CODE_SIGN_IDENTITY = "-"; - PRODUCT_NAME = "cpu_hog-unentitled"; - PROVISIONING_PROFILE = ""; - }; - name = Debug; - }; - 155F2817157E81B100D7B917 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_ENTITLEMENTS = ""; - CODE_SIGN_IDENTITY = "-"; - PRODUCT_NAME = "cpu_hog-unentitled"; - PROVISIONING_PROFILE = ""; - }; - name = Release; - }; - 159F7E581537850F00588242 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - ALWAYS_SEARCH_USER_PATHS = NO; - ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; - CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - COPY_PHASE_STRIP = NO; - GCC_C_LANGUAGE_STANDARD = gnu99; - GCC_DYNAMIC_NO_PIC = NO; - GCC_OPTIMIZATION_LEVEL = 0; - GCC_PREPROCESSOR_DEFINITIONS = ( - "DEBUG=1", - "$(inherited)", - ); - GCC_SYMBOLS_PRIVATE_EXTERN = NO; - GCC_VERSION = com.apple.compilers.llvm.clang.1_0; - GCC_WARN_ABOUT_RETURN_TYPE = YES; - GCC_WARN_UNINITIALIZED_AUTOS = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 6.0; - }; - name = Debug; - }; - 159F7E591537850F00588242 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - ALWAYS_SEARCH_USER_PATHS = NO; - ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; - CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - COPY_PHASE_STRIP = NO; - GCC_C_LANGUAGE_STANDARD = gnu99; - GCC_VERSION = com.apple.compilers.llvm.clang.1_0; - GCC_WARN_ABOUT_RETURN_TYPE = YES; - GCC_WARN_UNINITIALIZED_AUTOS = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 6.0; - VALIDATE_PRODUCT = YES; - }; - name = Release; - }; - 159F7E5B1537850F00588242 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_ENTITLEMENTS = "cpu_hog-Entitlements.plist"; - CODE_SIGN_IDENTITY = "-"; - PRODUCT_NAME = cpu_hog; - PROVISIONING_PROFILE = ""; - }; - name = Debug; - }; - 159F7E5C1537850F00588242 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_ENTITLEMENTS = "cpu_hog-Entitlements.plist"; - CODE_SIGN_IDENTITY = "-"; - PRODUCT_NAME = cpu_hog; - PROVISIONING_PROFILE = ""; - }; - name = Release; - }; - 15A9B5C3157E853D00B44B4F /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Debug; - }; - 15A9B5C4157E853D00B44B4F /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Release; - }; -/* End XCBuildConfiguration section */ - -/* Begin XCConfigurationList section */ - 155F2815157E81B100D7B917 /* Build configuration list for PBXNativeTarget "cpu_hog-unentitled" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 155F2816157E81B100D7B917 /* Debug */, - 155F2817157E81B100D7B917 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - 159F7E441537850F00588242 /* Build configuration list for PBXProject "cpu_hog" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 159F7E581537850F00588242 /* Debug */, - 159F7E591537850F00588242 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - 159F7E5A1537850F00588242 /* Build configuration list for PBXNativeTarget "cpu_hog" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 159F7E5B1537850F00588242 /* Debug */, - 159F7E5C1537850F00588242 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - 15A9B5C2157E853D00B44B4F /* Build configuration list for PBXAggregateTarget "default" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 15A9B5C3157E853D00B44B4F /* Debug */, - 15A9B5C4157E853D00B44B4F /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; -/* End XCConfigurationList section */ - }; - rootObject = 159F7E411537850F00588242 /* Project object */; -} diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.xcworkspace/contents.xcworkspacedata deleted file mode 100644 index 8b5db5879..000000000 --- a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.xcworkspace/contents.xcworkspacedata +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpumon_test_framework.c b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpumon_test_framework.c deleted file mode 100644 index e49dd7750..000000000 --- a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpumon_test_framework.c +++ /dev/null @@ -1,529 +0,0 @@ -/* - * Testing Framework for CPU Usage Monitor - * - * The framework tests for correctness of the CPU Usage Monitor. - * It creates a new exception port and an associated handling thread. - * For each test case, the framework sets its own exception port to the - * newly allocated port, execs a new child (which inherits the new - * exception port) and restores the parent's exception port to the - * original handler. The child process is invoked with a different - * parameters based on the scenario being tested. - * - * Usage: ./cpu_monitor_tests_11646922 [test case ID] - * If no test case ID is supplied, the framework runs all test cases. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_TEST_ID_LEN 16 -#define MAX_ARGV 8 - -#define GENERATE_TEST_EXC_CODE(type, flavor) \ - ((0) | ((type & 0x7ULL) << 61) | ((flavor & 0x7ULL) << 58)) - -/* - * To add a new test case to this framework: - * - Increment the NUMTESTS value - * - Add exec args for cpu_hog/cpu_hog unentitled to test the - * scenario. Also add a case to the main loop child_args assignment. - * - Add timeout for exception. If no timeout, specify 0. - * - Add expected duration for exception. 0 if no exception expected. - * - Add (Exception Type | flavor) to "test_exception_code" if the - * test case generates an exception; 0 otherwise - */ - -#define NUMTESTS 7 - -const char *test_description[] = { - "Basic test for EXC_RESOURCE.", - "Test Program stays under limit.", - "Test Program disables monitor.", - "Unentitled Test Program attempts to disable monitor.", - "Test Program resets monitor to default.", - "Set high watermark, munch past it, and confirm EXC_RESOURCE received for FLAVOR_HIGH_WATERMARK.", - "Set high watermark but don't munch past it. Confirm no EXC_RESOURCE received.", -}; - -/* - * Exec arguments for cpu hogging programs - * (NULL indicates test should not be run) - */ -char *test_argv_0[] = { "./cpu_hog-unentitled", "-c", "30", "-C", "10", "-p", "100", "-i", "1", NULL }; -char *test_argv_1[] = { "./cpu_hog-unentitled", "-c", "50", "-C", "15", "-p", "25", "-i", "1", NULL }; -#ifdef TARGET_SDK_iphoneos_internal -char *test_argv_2[] = { "./cpu_hog", "-c", "20", "-C", "15", "-x", "0", "-p", "100", "-i", "1", NULL }; -char *test_argv_3[] = { "./cpu_hog-unentitled", "-c", "20", "-C", "15", "-x", "1", "-p", "100", "-i", "1", NULL }; -#else -char *test_argv_2[] = { "./cpu_hog-unentitled", "-c", "20", "-C", "15", "-x", "0", "-p", "100", "-i", "1", NULL }; -char **test_argv_3 = NULL; -#endif -char *test_argv_4[] = { "./cpu_hog-unentitled", "-c", "20", "-C", "15", "-r", "1", "-p", "100", "-i", "1", NULL }; -#ifdef TARGET_SDK_iphoneos_internal -char *test_argv_5[] = { "./mem_hog", "-e", "-w", "50", "-m", "150", "10", "200", NULL }; -char *test_argv_6[] = { "./mem_hog", "-e", "-w", "190", "-m", "160", "10", "200", NULL }; -#else -char **test_argv_5 = NULL; -char **test_argv_6 = NULL; -#endif - -/* - * Timeout in seconds for test scenario to complete - * (0 indicates no timeout enabled) - */ -int timeout_secs[] = { - 15, - 20, - 20, - 110, - 110, - 20, - 20, -}; - -/* - * Exception should be generated within the specified duration - * (0 indicates no exception/time constraints for the exception - * to occur) - */ -int exc_expected_at[] = { - 0, - 0, - 0, - 90, - 90, - 10, - 0, -}; - -/* - * EXC_RESOURCE exception codes expected (0 indicates no - * exception expected) - */ -uint64_t test_exception_code[] = { - GENERATE_TEST_EXC_CODE(RESOURCE_TYPE_CPU, FLAVOR_CPU_MONITOR), - 0, - 0, - GENERATE_TEST_EXC_CODE(RESOURCE_TYPE_CPU, FLAVOR_CPU_MONITOR), - GENERATE_TEST_EXC_CODE(RESOURCE_TYPE_CPU, FLAVOR_CPU_MONITOR), - GENERATE_TEST_EXC_CODE(RESOURCE_TYPE_MEMORY, FLAVOR_HIGH_WATERMARK), - 0, -}; - -#define DEFAULT_PERCENTAGE "50" -#define DEFAULT_INTERVAL "180" - -/* Global Variables used by parent/child */ -mach_port_t exc_port; /* Exception port for child process */ -uint64_t exception_code; /* Exception code for the exception generated */ -int time_for_exc; /* Time (in secs.) for the exception to be generated */ -extern char **environ; /* Environment variables for the child process */ -int test_status; /* Test Suite Status */ -int indiv_results[NUMTESTS]; /* Results of individual tests (-1=didn't run; 0=pass; 1=fail) */ - -/* Cond Var and Mutex to indicate timeout for child process */ -pthread_cond_t cv; -pthread_mutex_t lock; - -/* Timer Routines to calculate elapsed time and run timer thread */ -time_t start_time; /* Test case start time (in secs.) */ - -int elapsed(void) -{ - return (time(NULL) - start_time); -} - -void *timeout_thread(void *arg) -{ - int err; - int timeout = (int)arg; - - sleep(timeout); - fprintf(stderr, "Test Program timed out... Terminating!\n"); - - if ((err = pthread_cond_broadcast(&cv)) != 0) { - fprintf(stderr, "pthread_cond_broadcast: %s\n", strerror(err)); - exit(1); - } - - return (NULL); -} - -/* Routine to wait for child to complete */ -void *wait4_child_thread(void *arg) -{ - int err; - int child_stat; - - wait4(-1, &child_stat, 0, NULL); - - if ((err = pthread_cond_broadcast(&cv)) != 0) { - fprintf(stderr, "pthread_cond_broadcast: %s\n", strerror(err)); - exit(1); - } - - return (NULL); -} - -/* Mach Server Routines */ -boolean_t mach_exc_server( - mach_msg_header_t *InHeadP, - mach_msg_header_t *OutHeadP); - -kern_return_t catch_mach_exception_raise -( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt - ) -{ - if (exception == EXC_RESOURCE) { - /* Set global variable to indicate exception received */ - exception_code = *((uint64_t *)code); - time_for_exc = elapsed(); - } else { - /* Terminate test on all other unexpected exceptions */ - fprintf(stderr, "received unexpected exception type %#x\n", exception); - exit(1); - } - - return (KERN_SUCCESS); -} - -kern_return_t catch_mach_exception_raise_state -( - mach_port_t exception_port, - exception_type_t exception, - const mach_exception_data_t code, - mach_msg_type_number_t codeCnt, - int *flavor, - const thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt - ) -{ - fprintf(stderr, "Unexpected exception handler called\n"); - exit(1); - return (KERN_FAILURE); -} - - -kern_return_t catch_mach_exception_raise_state_identity -( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt, - int *flavor, - thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt - ) -{ - fprintf(stderr, "Unexpected exception handler called\n"); - exit(1); - return (KERN_FAILURE); -} - -void *server_thread(void *arg) -{ - kern_return_t kr; - - while(1) { - /* Handle exceptions on exc_port */ - if ((kr = mach_msg_server_once(mach_exc_server, 4096, exc_port, 0)) != KERN_SUCCESS) { - fprintf(stderr, "mach_msg_server_once: error %#x\n", kr); - exit(1); - } - } - return (NULL); -} - -int main(int argc, char *argv[]) -{ - posix_spawnattr_t attrs; - uint64_t percent, interval; - int i, err, ret = 0; - - kern_return_t kr; - mach_port_t task = mach_task_self(); - mach_port_t child_task; - char **child_args; - - pthread_t exception_thread; - pthread_t timer_thread; - pthread_t wait_thread; - - mach_msg_type_number_t maskCount = 1; - exception_mask_t mask; - exception_handler_t handler; - exception_behavior_t behavior; - thread_state_flavor_t flavor; - - pid_t child_pid; - int test_case_id = -1; - - if (argc > 1) - test_case_id = atoi(argv[1]); - - /* Initialize mutex and condition variable */ - if ((err = pthread_mutex_init(&lock, NULL)) != 0) { - fprintf(stderr,"pthread_mutex_init: %s\n", strerror(err)); - exit(1); - } - - if ((err = pthread_cond_init(&cv, NULL)) != 0) { - fprintf(stderr, "pthread_cond_init: %s\n", strerror(err)); - exit(1); - } - - /* Allocate and initialize new exception port */ - if ((kr = mach_port_allocate(task, MACH_PORT_RIGHT_RECEIVE, &exc_port)) != KERN_SUCCESS) { - fprintf(stderr, "mach_port_allocate: %s\n", mach_error_string(kr)); - exit(1); - } - - if ((kr = mach_port_insert_right(task, exc_port, - exc_port, MACH_MSG_TYPE_MAKE_SEND)) != KERN_SUCCESS) { - fprintf(stderr, "mach_port_allocate: %s\n", mach_error_string(kr)); - exit(1); - } - - /* Get Current exception ports */ - if ((kr = task_get_exception_ports(task, EXC_MASK_RESOURCE, &mask, - &maskCount, &handler, &behavior, &flavor)) != KERN_SUCCESS) { - fprintf(stderr,"task_get_exception_ports: %s\n", mach_error_string(kr)); - exit(1); - } - - /* Create exception serving thread */ - if ((err = pthread_create(&exception_thread, NULL, server_thread, 0)) != 0) { - fprintf(stderr, "pthread_create server_thread: %s\n", strerror(err)); - exit(1); - } - - fprintf(stderr, "---------------System Configuration------------------------------------------\n"); - fprintf(stderr, "System Kernel Version: "); - system("uname -a"); - fprintf(stderr, "System SDK Version: "); - system("sw_vers"); - - for (i = 0; i < NUMTESTS; i++) { - indiv_results[i] = -1; - } - - /* Run Tests */ - for(i=0; i 0) - fprintf(stderr, "EXC_RESOURCE Received after %d secs\n", time_for_exc); - - if (!!exception_code != !!test_exception_code[i]) { - test_status = 1; - test_case_status = 1; - indiv_results[i] = 1; - } - - if (exception_code) { - /* Validate test success by checking code and expected time */ - if ((exception_code & test_exception_code[i]) != test_exception_code[i]) { - fprintf(stderr, "Test Failure Reason: EXC_RESOURCE code did not match expected exception code!\n"); - fprintf(stderr, "Expected: 0x%llx Found: 0x%llx\n", test_exception_code[i], exception_code); - test_status = 1; - test_case_status = 1; - indiv_results[i] = 1; - } - if(exc_expected_at[i] && - (time_for_exc < (exc_expected_at[i] - 10) || - time_for_exc > (exc_expected_at[i] + 10))) { - fprintf(stderr, "Test Failure Reason: Test case did not receive EXC_RESOURCE within expected time!\n"); - test_status = 1; - test_case_status = 1; - indiv_results[i] = 1; - } - } - - if(test_case_status) - fprintf(stderr, "[FAILED]\n"); - else - fprintf(stderr, "[PASSED]\n"); - fprintf(stderr, "-------------------------------------------------------------------------------\n"); - - } - - if (test_case_id == -1) { - fprintf(stderr, "--------------- Results Summary -----------------------------------------------\n"); - - for (i = 0; i < NUMTESTS; i++) { - fprintf(stderr, "%2d: %s\n", i, (indiv_results[i] < 0) ? "N/A" : - (indiv_results[i] == 0) ? "PASSED" : "FAILED"); - } - } - -cleanup: - kill(child_pid, SIGKILL); - exit(test_status); -} - - diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mach_exc.defs b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mach_exc.defs deleted file mode 100644 index 4b6cc647b..000000000 --- a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mach_exc.defs +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ - -#include diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c deleted file mode 100644 index 4579161d4..000000000 --- a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c +++ /dev/null @@ -1,221 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define PAGESIZE 4096 - -/* Trigger forced jetsam */ -#define MEMORYSTATUS_CMD_TEST_JETSAM 1000 - -static void -dirty_chunk(void *chunk, int chunk_size) -{ - int i; - char *p; - - // Dirty every word in the chunk. - for (p = chunk; p < (char *)chunk + (chunk_size * 1024 * 1024); p += 4) { - *p = 'Z'; - } -} - -char *pname; - -void usage(void) { - printf("usage: %s [-re] [-l MB] [-w MB] [-m MB] [-o num] [-k pid] \n", pname); - printf("\t-r: after reaching max, re-dirty it all when the user prompts to do so.\n"); - printf("\t-l: program the task's physical footprint limit to this value (in MB).\n"); - printf("\t-w: program the task's jetsam high watermark to this value (in MB).\n"); - printf("\t-m: dirty no more than this amount (in MB).\n"); - printf("\t-e: exit after reaching -m max dirty.\n"); - printf("\t-o: oscillate at the max this number of times and then continue on up.\n"); - printf("\t-k: trigger explicit jetsam kill of this pid (and then exit).\n"); -} - -int main(int argc, char *argv[]) -{ - int ch; - void **chunks; - int nchunks; - int max_chunks; - int oscillations = -1; - int tot_mb = 0; - int chunk_size; - int interval; - int max = -1; - int limit = -2; - int high_watermark = -1; - int victim = -1; - int old_limit; - boolean_t redirty = FALSE; - boolean_t exit_after_max = FALSE; - - int oscillation_cnt = 0; - - pname = argv[0]; - - printf("pid: %d\n", getpid()); - - while ((ch = getopt(argc, argv, "rem:l:w:k:o:")) != -1) { - switch (ch) { - case 'm': - max = atoi(optarg); - break; - case 'l': - limit = atoi(optarg); - break; - case 'w': - high_watermark = atoi(optarg); - break; - case 'o': - oscillations = atoi(optarg); - break; - case 'r': - redirty = TRUE; - break; - case 'e': - exit_after_max = TRUE; - break; - case 'k': - victim = atoi(optarg); - break; - case 'h': - default: - usage(); - exit(1); - } - } - - argc -= optind; - argv += optind; - - if (victim != -1) { - int r; - /* - * int memorystatus_control(uint32_t command, int32_t pid, uint32_t flags, user_addr_t buffer, size_t buffersize); - */ - if ((r = memorystatus_control(MEMORYSTATUS_CMD_TEST_JETSAM, victim, 0, 0, 0)) != 0) { - perror("memorystatus_control"); - exit(1); - } - printf("killed process %d\n", victim); - - } - - if (argc != 2) { - usage(); - exit(1); - } - - chunk_size = atoi(argv[0]); - interval = atoi(argv[1]); - - if (limit != -2) { - kern_return_t kr; - if ((kr = task_set_phys_footprint_limit(mach_task_self(), limit, &old_limit)) != KERN_SUCCESS) { - fprintf(stderr, "task_set_phys_footprint_limit() failed: %s\n", mach_error_string(kr)); - exit(1); - } - printf("phys footprint limit set to %d MB (was: %d MB)\n", limit, old_limit); - } - - if (high_watermark != -1) { - int r; - /* - * int memorystatus_control(uint32_t command, int32_t pid, uint32_t flags, user_addr_t buffer, size_t buffersize); - */ - if ((r = memorystatus_control(MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK, getpid(), high_watermark, 0, 0)) != 0) { - perror("memorystatus_control"); - exit(1); - } - printf("high watermark set to %d MB\n", high_watermark); - } - - printf("consuming memory in chunks of %d MB every %d milliseconds.\n", chunk_size, interval); - - printf("total consumed: "); - fflush(stdout); - - /* - * Estimate max number of chunks possible, using 4GB as absolute max amount of memory - * we could ever use. - */ - max_chunks = 4000 / chunk_size; - if ((chunks = calloc(max_chunks, sizeof (*chunks))) == NULL) { - perror("malloc"); - exit(1); - } - nchunks = 0; - - while (1) { - if ((chunks[nchunks] = malloc(chunk_size * 1024 * 1024)) == NULL) { - perror("malloc"); - exit(1); - } - - tot_mb += chunk_size; - - dirty_chunk(chunks[nchunks], chunk_size); - - nchunks++; - - putchar(0x8); putchar(0x8); putchar(0x8); putchar(0x8); - printf("%4d", tot_mb); - fflush(stdout); - - if ((max != -1) && (tot_mb > max)) { - printf("\nMax reached.\n"); - - if (exit_after_max) { - exit(0); - } - - if ((oscillations == -1) || (oscillation_cnt < oscillations)) { - if (redirty) { - while (1) { - int i, ch; - - printf("Press any key to re-dirty ('q' to quit)..."); - fflush(stdout); - if ((ch = getchar()) == 'q') { - exit(0); - } - - for (i = 0; i < nchunks; i++) { - dirty_chunk(chunks[i], chunk_size); - } - } - } - - /* - * We've broken the limit of what we should be consuming; free the - * most recent three chunks and go round again. - */ - nchunks--; - free(chunks[nchunks]); - chunks[nchunks] = NULL; - tot_mb -= chunk_size; - - if (nchunks > 1) { - nchunks--; - free(chunks[nchunks]); - chunks[nchunks] = NULL; - tot_mb -= chunk_size; - nchunks--; - free(chunks[nchunks]); - chunks[nchunks] = NULL; - tot_mb -= chunk_size; - } - - oscillation_cnt++; - } - } - - usleep(interval * 1000); - } - - return (1); -} diff --git a/tools/tests/unit_tests/fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c b/tools/tests/unit_tests/fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c deleted file mode 100644 index be921a3d5..000000000 --- a/tools/tests/unit_tests/fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c +++ /dev/null @@ -1,210 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define SUCCESS 0 -#define FAILURE -1 - -int do_fcntl_lock(int fd, int cmd, short lock_type, off_t start, short when, off_t len, int ret){ - struct flock fl; - bzero(&fl, sizeof(fl)); - fl.l_start = start; - fl.l_len = len; - fl.l_type = lock_type; - fl.l_whence = when; - errno = 0; - int retval = fcntl(fd, cmd, &fl); - printf ("fcntl with flock(%lld,%lld,%d,%d) returned %d and errno %d \n", start, len, lock_type, when, retval, errno); - if ( retval < 0) - perror("fcntl"); - - if (retval != ret) { - printf("[FAILED] fcntl test failed\n"); - exit(-1); - } - return retval; -} - -#define read_lock(fd, offset, whence, len, ret) \ - do_fcntl_lock(fd, F_SETLK, F_RDLCK, offset, whence, len, ret) -#define readw_lock(fd, offset, whence, len, ret) \ - do_fcntl_lock(fd, F_SETLKW, F_RDLCK, offset, whence, len, ret) -#define write_lock(fd, offset, whence, len, ret) \ - do_fcntl_lock(fd, F_SETLK, F_WRLCK, offset, whence, len, ret) -#define writew_lock(fd, offset, whence, len, ret) \ - do_fcntl_lock(fd, F_SETLKW, F_WRLCK, offset, whence, len, ret) -#define un_lock(fd, offset, whence, len, ret) \ - do_fcntl_lock(fd, F_SETLK, F_UNLCK, offset, whence, len, ret) -#define is_read_lock(fd, offset, whence, len, ret) \ - do_fcntl_lock(fd, F_GETLK, F_RDLCK, offset, whence, len, ret) -#define is_write_lock(fd, offset, whence, len, ret) \ - do_fcntl_lock(fd, F_GETLK, F_WRLCK, offset, whence, len, ret) - - -int main(){ - int fd = 0; - char *tmpfile ="/tmp/fcntltry.txt"; - - unlink(tmpfile); - fd = creat(tmpfile, S_IRWXU); - if (fd < 0) { - perror("creat"); - goto failed; - } - - /* fcntl with seek position set to 1 */ - if (lseek(fd, (off_t)1, SEEK_SET) != 1){ - perror("lseek"); - goto failed; - } - off_t lock_start = 0, lock_len = 0; - - printf("Testing with SEEK_SET\n"); - - /* testing F_GETLK for SEEK_SET with lock_start = constant and len changes */ - lock_start = 0; - is_read_lock(fd, lock_start, SEEK_SET, 0, SUCCESS); - is_read_lock(fd, lock_start, SEEK_SET, LLONG_MAX, SUCCESS); - is_read_lock(fd, lock_start, SEEK_SET, LLONG_MIN, FAILURE); - - /* testing F_GETLK for SEEK_SET with len fixed 0 and lock_start changing */ - lock_len = 0; - is_read_lock(fd, 0, SEEK_SET, lock_len, SUCCESS); - is_read_lock(fd, LLONG_MAX, SEEK_SET, lock_len, SUCCESS); - is_read_lock(fd, LLONG_MIN, SEEK_SET, lock_len, FAILURE); - - /* testing F_GETLK for SEEK_SET with len fixed max and lock_start changing */ - lock_len = LLONG_MAX; - is_read_lock(fd, 0, SEEK_SET, lock_len, SUCCESS); - is_read_lock(fd, 1, SEEK_SET, lock_len, SUCCESS); - is_read_lock(fd, 2, SEEK_SET, lock_len, FAILURE); - is_read_lock(fd, LLONG_MAX, SEEK_SET, lock_len, FAILURE); - is_read_lock(fd, LLONG_MIN, SEEK_SET, lock_len, FAILURE); - - /* testing F_GETLK for SEEK_SET with len fixed min and lock_start changing */ - lock_len = LLONG_MIN; - is_read_lock(fd, 0, SEEK_SET, lock_len, FAILURE); - is_read_lock(fd, LLONG_MAX, SEEK_SET, lock_len, FAILURE); - is_read_lock(fd, LLONG_MIN, SEEK_SET, lock_len, FAILURE); - - /* testing F_GETLK for SEEK_SET with len fixed min and lock_start changing */ - lock_len = 20; - is_read_lock(fd, 0, SEEK_SET, lock_len, SUCCESS); - is_read_lock(fd, 100, SEEK_SET, lock_len, SUCCESS); - is_read_lock(fd, -100, SEEK_SET, lock_len, FAILURE); - - /* testing F_GETLK for SEEK_SET with len fixed min and lock_start changing */ - lock_len = -20; - is_read_lock(fd, 0, SEEK_SET, lock_len, FAILURE); - is_read_lock(fd, 100, SEEK_SET, lock_len, SUCCESS); - is_read_lock(fd, -100, SEEK_SET, lock_len, FAILURE); - - printf("Testing with SEEK_CUR with offset 1 \n"); - - /* testing F_GETLK for SEEK_CUR with lock_start = constant and len changes */ - lock_start = 0; - is_read_lock(fd, lock_start, SEEK_CUR, 0, SUCCESS); - is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MAX, SUCCESS); - is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MIN, FAILURE); - - /* testing F_GETLK for SEEK_CUR with len fixed 0 and lock_start changing */ - lock_len = 0; - is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); - is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MAX - 1, SEEK_CUR, lock_len, SUCCESS); - is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); - - /* testing F_GETLK for SEEK_CUR with len fixed max and lock_start changing */ - lock_len = LLONG_MAX; - is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); - is_read_lock(fd, 1, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, 2, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); - - /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ - lock_len = LLONG_MIN; - is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); - - /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ - lock_len = 20; - is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); - is_read_lock(fd, 100, SEEK_CUR, lock_len, SUCCESS); - is_read_lock(fd, -100, SEEK_CUR, lock_len, FAILURE); - - /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ - lock_len = -20; - is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, 100, SEEK_CUR, lock_len, SUCCESS); - is_read_lock(fd, -100, SEEK_CUR, lock_len, FAILURE); - - close(fd); - - unlink(tmpfile); - fd = creat(tmpfile, S_IRWXU); - if (fd < 0) { - perror("creat"); - goto failed; - } - - /* fcntl with seek position set to 1 */ - if (lseek(fd, (off_t)LLONG_MAX - 1, SEEK_SET) != (LLONG_MAX - 1)){ - perror("lseek"); - goto failed; - } - - - printf("Testing with SEEK_CUR with offset LLONG_MAX - 1\n"); - - /* testing F_GETLK for SEEK_CUR with lock_start = constant and len changes */ - lock_start = 0; - is_read_lock(fd, lock_start, SEEK_CUR, 0, SUCCESS); - is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MAX, FAILURE); - is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MIN, FAILURE); - is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MIN + 2, SUCCESS); - - /* testing F_GETLK for SEEK_CUR with len fixed 0 and lock_start changing */ - lock_len = 0; - is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); - is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MIN + 2, SEEK_CUR, lock_len, SUCCESS); - - /* testing F_GETLK for SEEK_CUR with len fixed max and lock_start changing */ - lock_len = LLONG_MAX; - is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MIN + 2, SEEK_CUR, lock_len, SUCCESS); - - /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ - lock_len = LLONG_MIN; - is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); - - /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ - lock_len = 20; - is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, -100, SEEK_CUR, lock_len, SUCCESS); - - /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ - lock_len = -20; - is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); - is_read_lock(fd, 100, SEEK_CUR, lock_len, FAILURE); - is_read_lock(fd, -100, SEEK_CUR, lock_len, SUCCESS); - - - printf("[PASSED] fcntl test passed \n"); - return 0; -failed: - printf("[FAILED] fcntl test failed\n"); - return -1; - -} diff --git a/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test.c b/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test.c deleted file mode 100644 index ae587fdff..000000000 --- a/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test.c +++ /dev/null @@ -1,532 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "guarded_test_common.h" - -#include - -#if !defined(SYS_guarded_kqueue_np) -#define guarded_kqueue_np(gp, gf) syscall(443, gp, gf) -#endif - -#if !defined(SYS_change_fdguard_np) -#define change_fdguard_np(fd, gp, gf, ngp, nfg, flp) \ - syscall(444, fd, gp, gf, ngp, nfg, flp) -#endif - -#define SERVER_NAME "/tmp/fdserver" - -typedef union { - struct cmsghdrcmsghdr; - u_char msg_control[0]; -} cmsghdr_msg_control_t; - -/* Test case for closing a guarded fd */ -void close_guarded_fd(int); -/* Test case for duping a guarded fd */ -void dup_guarded_fd(int); -/* Test case for removing flag from guarded fd */ -void remove_flag_guarded_fd(int); -/* Test case for closing guarded fd with bad guard */ -void badguard_close_guarded_fd(int, guardid_t); -/* Test case for guarded closing an unguarded fd */ -void guard_close_unguarded_fd(guardid_t); -/* Test case for guarded closing a guarded fd correctly */ -void guard_close_guarded_fd(int, guardid_t); -/* Test case for creating a file port from a guarded fd */ -void fileport_makeport_guarded_fd(int); -/* Test case for sending guarded fd over socket */ -void sendmsg_guarded_fd(int); -/* Test case for removing the guard from a guarded fd */ -void remove_guard(int, guardid_t, u_int, int); -/* Test case for adding a guard to a tcp socket */ -void add_guard_to_socket(guardid_t); -/* Test case for a guarded kqueue */ -void create_and_close_guarded_kqueue(guardid_t); - -/* Helper routines */ -void *client_recv_fd(void *); -int receive_fd_using_sockfd(int *, int); -int send_fd_using_sockfd(int, int); -int setup_server(const char *); - -const guardid_t guard = 0x123456789abcdefull; -char *pname; - -static void usage(void) -{ - printf("usage: %s [test number]\n", pname); - printf("test 0: Test case for closing a guarded fd\n"); - printf("test 1: Test case for duping a guarded fd\n"); - printf("test 2: Test case for removing FD_CLOEXEC flag from a guarded fd\n"); - printf("test 3: Test case for closing a guarded fd with a bad guard\n"); - printf("test 4: Test case for closing an unguarded fd using a guarded close\n"); - printf("test 5: Test case for closing a guarded fd with the correct guard\n"); - printf("test 6: Test case for creating a file port from a guarded fd\n"); - printf("test 7: Test case for sending a guarded fd over a socket\n"); - printf("test 8: Test case for removing the guard from a guarded fd\n"); - printf("test 9: Test case for adding a guard to a tcp socket\n"); - printf("test 10: Test case for a guarded kqueue\n"); -} - -int main(int argc, char *argv[]) -{ - int option, fd; - - pname = argv[0]; - if (argc != 2) { - usage(); - exit(1); - } - printf("Test Program invoked with option [%s]\n", argv[1]); - option = atoi(argv[1]); - - close(TEST_FD); - fd = guarded_open_np( - "/tmp/try.txt", - &guard, - GUARD_CLOSE | GUARD_DUP | GUARD_SOCKET_IPC | GUARD_FILEPORT, - O_CREAT | O_CLOEXEC | O_RDWR, - 0666); - - if (-1 == fd) { - perror("guarded_open_np"); - exit(1); - } - - switch(option) { - - case 0: - close_guarded_fd(fd); - break; - case 1: - dup_guarded_fd(fd); - break; - case 2: - remove_flag_guarded_fd(fd); - break; - case 3: - badguard_close_guarded_fd(fd, guard); - break; - case 4: - guard_close_unguarded_fd(guard); - break; - case 5: - guard_close_guarded_fd(fd, guard); - break; - case 6: - fileport_makeport_guarded_fd(fd); - break; - case 7: - sendmsg_guarded_fd(fd); - break; - case 8: - remove_guard(fd, guard, GUARD_CLOSE | GUARD_DUP | - GUARD_SOCKET_IPC | GUARD_FILEPORT, FD_CLOEXEC); - break; - case 9: - add_guard_to_socket(guard); - break; - case 10: - create_and_close_guarded_kqueue(guard); - break; - default: - usage(); - exit(1); - } - - return 0; -} - -void close_guarded_fd(int fd) -{ - int ret_val; - printf("Performing close on a guarded fd...\n"); - - /* Brute force way of ensuring that the child process - * uses the TEST_FD which is checked by the parent - */ - while(fd != TEST_FD && fd <= TEST_FD) { - fd = guarded_open_np( - "/tmp/try.txt", - &guard, - GUARD_CLOSE | GUARD_DUP | GUARD_SOCKET_IPC | GUARD_FILEPORT, - O_CREAT | O_CLOEXEC | O_RDWR, - 0666); - - if (-1 == fd) { - perror("guarded_open_np"); - exit(1); - } - } - - ret_val = close(TEST_FD); - fprintf(stderr, "close() returned (%d) on a guarded fd?!\n", ret_val); - exit(1); -} - -void dup_guarded_fd(int fd) -{ - int ret_val; - printf("Performing dup on a guarded fd...\n"); - ret_val = dup(fd); - fprintf(stderr, "dup() returned (%d) on a guarded fd?!\n", ret_val); - exit(1); -} - -void remove_flag_guarded_fd(int fd) -{ - int ret_val, value; - printf("Removing FD_CLOEXEC from a guarded fd...\n"); - value = fcntl(fd, F_GETFD); - if (-1 == value) { - fprintf(stderr, "fcntl:F_GETFD failed with %s!\n", strerror(errno)); - exit(1); - } - ret_val = fcntl(fd, F_SETFD, value & ~FD_CLOEXEC); - fprintf(stderr, "fcntl:F_SETFD returned (%d) on a guarded fd?!\n", ret_val); - exit(1); -} - -void badguard_close_guarded_fd(int fd, guardid_t guard) -{ - int ret_val; - printf("Closing guarded fd with a bad guard...\n"); - guardid_t badguard = guard << 1; - ret_val = guarded_close_np(fd, &badguard); - if (-1 == ret_val) { - switch (errno) { - case EPERM: - /* Expected */ - perror("guarded_close_np"); - exit(0); - default: - perror("guarded_close_np"); - break; - } - } - fprintf(stderr, - "Close with bad guard returned (%d) on a guarded fd?!\n", ret_val); - exit(1); -} - -void guard_close_unguarded_fd(guardid_t guard) -{ - printf("Closing Unguarded fd with guarded_close_np...\n"); - int newfd, ret_val; - - if ((newfd = dup(fileno(stderr))) == -1) { - fprintf(stderr, "Failed to dup stderr!\n"); - exit(1); - } - - ret_val = guarded_close_np(newfd, &guard); - if (-1 == ret_val) { - /* Expected */ - perror("guarded_close_np"); - exit(0); - } - else { - fprintf(stderr, "Closing unguarded fd with guarded_fd succeeded with return value (%d)?!\n", ret_val); - exit(1); - } -} - -void guard_close_guarded_fd(int fd, guardid_t guard) -{ - printf("Closing a guarded fd with correct guard...\n"); - if (-1 == guarded_close_np(fd, &guard)) { - fprintf(stderr, "Closing guarded fd with correct guard failed?!\n"); - exit(1); - } - /* Expected */ - exit(0); -} - -void fileport_makeport_guarded_fd(int fd) -{ - mach_port_name_t fdname = MACH_PORT_NULL; - int ret_val; - printf("Creating a file port from a guarded fd...\n"); - ret_val = fileport_makeport(fd, &fdname); - fprintf(stderr, "Creating a file port from guarded fd returned (%d)?!\n", ret_val); - exit(1); -} - -void sendmsg_guarded_fd(int fd) -{ - int sockfd, err; - int csockfd; - socklen_t len; - struct sockaddr_un client_unix_addr; - pthread_t client_thread; - int ret_val; - - /* Setup fd server */ - if ((sockfd = setup_server(SERVER_NAME)) < 0) { - exit(1); - } - - if(-1 == listen(sockfd, 5)) { - perror("listen"); - exit(1); - } - - /* Create client thread */ - if ((err = pthread_create(&client_thread, NULL, client_recv_fd, 0)) != 0) { - fprintf(stderr, "pthread_create server_thread: %s\n", strerror(err)); - exit(1); - } - - pthread_detach(client_thread); - - for (;;) { - len = sizeof (client_unix_addr); - csockfd = accept(sockfd, - (struct sockaddr *)&client_unix_addr, &len); - if (csockfd < 0) { - perror("accept"); - exit(1); - } - - printf("Sending guarded fd on a socket...\n"); - ret_val = send_fd_using_sockfd(fd, csockfd); - if(ret_val < 0) { - /* Expected */ - fprintf(stderr, "sendmsg failed with return value (%d)!\n", ret_val); - } - else { - fprintf(stderr, "Sending guarded fd on socket succeeded with return value (%d)?!\n", ret_val); - } - } - - exit(0); -} - -void -remove_guard(int fd, guardid_t guard, u_int guardflags, int fdflags) -{ - printf("Remove the guard from a guarded fd, then dup(2) it ...\n"); - - int ret_val = change_fdguard_np(fd, &guard, guardflags, NULL, 0, &fdflags); - - if (ret_val == -1) { - perror("change_fdguard_np"); - exit(1); - } - - printf("Dup-ing the unguarded fd ...\n"); - - /* - * Now that the GUARD_DUP has been removed, we should be able - * to dup the descriptor with no exception generation. - */ - int newfd = dup(fd); - - if (-1 == newfd) { - perror("dup"); - exit(1); - } - exit(0); -} - -void -add_guard_to_socket(guardid_t guard) -{ - printf("Add a close guard to an unguarded socket fd, then close it ...\n"); - - int s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); - - if (-1 == s) { - perror("socket"); - exit(1); - } - - int ret_val = change_fdguard_np(s, NULL, 0, &guard, GUARD_CLOSE | GUARD_DUP, NULL); - - if (-1 == ret_val) { - perror("change_fdguard_np"); - exit(1); - } - - /* - * Now we've added a GUARD_CLOSE successfully, let's try and do a close - */ - if (-1 == close(s)) - perror("close"); - /* - * This is an error, because we should've received a fatal EXC_GUARD - */ - exit(1); -} - -void -create_and_close_guarded_kqueue(guardid_t guard) -{ - printf("Create a guarded kqueue, then guarded_close_np() it ...\n"); - - int kq = guarded_kqueue_np(&guard, GUARD_CLOSE | GUARD_DUP); - - int ret_val = guarded_close_np(kq, &guard); - if (-1 == ret_val) { - perror("guarded_close_np"); - exit(1); - } - - printf("Create a guarded kqueue, then close() it ...\n"); - - kq = guarded_kqueue_np(&guard, GUARD_CLOSE | GUARD_DUP); - if (-1 == close(kq)) - perror("close"); - /* - * This is always an error, because we should've received a fatal EXC_GUARD - */ - exit(1); -} - -/* - * Helper Routines - */ - -int setup_server(const char *name) -{ - int sockfd, len; - struct sockaddr_un server_unix_addr; - - if ((sockfd = socket(AF_LOCAL, SOCK_STREAM, 0)) < 0) { - perror("socket"); - return (sockfd); - } - - (void) unlink(name); - bzero(&server_unix_addr, sizeof (server_unix_addr)); - server_unix_addr.sun_family = AF_LOCAL; - (void) strcpy(server_unix_addr.sun_path, name); - len = strlen(name) + 1; - len += sizeof (server_unix_addr.sun_family); - - if (bind(sockfd, (struct sockaddr *)&server_unix_addr, len) < 0) { - (void) close(sockfd); - return (-1); - } - return (sockfd); -} - -int send_fd_using_sockfd(int fd, int sockfd) -{ - ssize_t ret; - struct iovec iovec[1]; - struct msghdr msg; - struct cmsghdr *cmsghdrp; - cmsghdr_msg_control_t *cmsghdr_msg_control; - - cmsghdr_msg_control = malloc(CMSG_SPACE(sizeof (int))); - - iovec[0].iov_base = ""; - iovec[0].iov_len = 1; - - msg.msg_name = 0; - msg.msg_namelen = 0; - msg.msg_iov = iovec; - msg.msg_iovlen = 1; - msg.msg_control = cmsghdr_msg_control->msg_control; - msg.msg_controllen = CMSG_SPACE(sizeof (int)); - msg.msg_flags = 0; - - cmsghdrp = CMSG_FIRSTHDR(&msg); - cmsghdrp->cmsg_len = CMSG_LEN(sizeof (int)); - cmsghdrp->cmsg_level = SOL_SOCKET; - cmsghdrp->cmsg_type = SCM_RIGHTS; - - *((int *)CMSG_DATA(cmsghdrp)) = fd; - - if ((ret = sendmsg(sockfd, &msg, 0)) < 0) { - perror("sendmsg"); - return ret; - } - - return 0; -} - -int receive_fd_using_sockfd(int *fd, int sockfd) -{ - ssize_t ret; - u_char c; - int errcount = 0; - struct iovec iovec[1]; - struct msghdr msg; - struct cmsghdr *cmsghdrp; - cmsghdr_msg_control_t *cmsghdr_msg_control; - - cmsghdr_msg_control = malloc(CMSG_SPACE(sizeof (int))); - - iovec[0].iov_base = &c; - iovec[0].iov_len = 1; - - msg.msg_name = 0; - msg.msg_namelen = 0; - msg.msg_iov = iovec; - msg.msg_iovlen = 1; - msg.msg_control = cmsghdr_msg_control->msg_control; - msg.msg_controllen = CMSG_SPACE(sizeof (int)); - msg.msg_flags = 0; - - if ((ret = recvmsg(sockfd, &msg, 0)) < 0) { - perror("recvmsg"); - return ret; - } - - cmsghdrp = CMSG_FIRSTHDR(&msg); - if (cmsghdrp == NULL) { - *fd = -1; - return ret; - } - - if (cmsghdrp->cmsg_len != CMSG_LEN(sizeof (int))) - errcount++; - if (cmsghdrp->cmsg_level != SOL_SOCKET) - errcount++; - if (cmsghdrp->cmsg_type != SCM_RIGHTS) - errcount++; - if (errcount) { - *fd = -1; - } else - *fd = *((int *)CMSG_DATA(cmsghdrp)); - return ret; -} - -void *client_recv_fd(void *arg) -{ - char buf[512]; - int fd = -1, sockfd, len, ret; - struct sockaddr_un server_unix_addr; - - bzero(&server_unix_addr, sizeof (server_unix_addr)); - strcpy(server_unix_addr.sun_path, SERVER_NAME); - server_unix_addr.sun_family = AF_LOCAL; - len = strlen(SERVER_NAME) + 1; - len += sizeof (server_unix_addr.sun_family); - - if ((sockfd = socket(AF_LOCAL, SOCK_STREAM, 0)) < 0) { - perror("socket"); - exit(1); - } - - if (connect(sockfd, (struct sockaddr *)&server_unix_addr, len) < 0) { - perror("connect"); - exit(1); - } - - ret = receive_fd_using_sockfd(&fd, sockfd); - return (NULL); -} diff --git a/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_common.h b/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_common.h deleted file mode 100644 index ce3bcf7c8..000000000 --- a/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_common.h +++ /dev/null @@ -1,11 +0,0 @@ -/* - * Common file for Guarded fd Unit Tests - */ - -#ifndef _GUARDED_TEST_COMMON_H_ -#define _GUARDED_TEST_COMMON_H_ - -/* Exception causing fd for test program */ -#define TEST_FD 25 - -#endif /* _GUARDED_TEST_COMMON_H_ */ diff --git a/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_framework.c b/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_framework.c deleted file mode 100644 index 9fdc2d770..000000000 --- a/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_framework.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Testing Framework for EXC_GUARD exceptions - * - * The framework tests for exception conditions for guarded fds. - * It creates a new exception port and an associated handling thread. - * For each test case, the framework sets its own exception port to the - * newly allocated port, execs a new child (which inherits the new - * exception port) and restores the parent's exception port to the - * original handler. The child process is invoked with a different - * test case identifier and invokes the corresponding test case. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "guarded_test_common.h" -#include - -#define MAX_TEST_ID_LEN 16 -#define MAX_ARGV 8 -#define EXC_GUARD_FLAVOR_SHIFT 32 -#define EXC_GUARD_TYPE_SHIFT 61 -#define EXC_GUARD_FD_MASK 0xFFFFFFFF - -/* - * To add a new test case to this framework: - * - Increment the NUMTESTS value - * - Add (Guard Type | flavor) to "test_exception_code" if the - * test case generates an exception; 0 otherwise - * - Add CHK_TEST_FD/IGN_TEST_FD depending on whether - * framework should look for TEST_FD in the exception message - * - Add a new case and routine in guarded_test.c to - * test the scenario - */ - -#define NUMTESTS 11 - -uint64_t test_exception_code[] = { - (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_CLOSE) << EXC_GUARD_FLAVOR_SHIFT), - (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_DUP) << EXC_GUARD_FLAVOR_SHIFT), - (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_NOCLOEXEC) << EXC_GUARD_FLAVOR_SHIFT), - 0, - 0, - 0, - (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_FILEPORT) << EXC_GUARD_FLAVOR_SHIFT), - (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_SOCKET_IPC) << EXC_GUARD_FLAVOR_SHIFT), - 0, - (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_CLOSE) << EXC_GUARD_FLAVOR_SHIFT), - (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_CLOSE) << EXC_GUARD_FLAVOR_SHIFT) -}; - -#define CHK_TEST_FD 1 -#define IGN_TEST_FD 0 - -uint64_t test_fd[] = { - CHK_TEST_FD, - IGN_TEST_FD, - IGN_TEST_FD, - IGN_TEST_FD, - IGN_TEST_FD, - IGN_TEST_FD, - IGN_TEST_FD, - IGN_TEST_FD, - IGN_TEST_FD, - IGN_TEST_FD, - IGN_TEST_FD -}; - -mach_port_t exc_port; -uint64_t exception_code; -extern char **environ; - -boolean_t mach_exc_server( - mach_msg_header_t *InHeadP, - mach_msg_header_t *OutHeadP); - -kern_return_t catch_mach_exception_raise -( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt, - int *flavor, - thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt - ) -{ - if (exception == EXC_GUARD) { - /* Set global variable to indicate exception received */ - exception_code = *((uint64_t *)code); - } else { - /* Terminate test on all other unexpected exceptions */ - fprintf(stderr, "received unexpected exception type %#x\n", exception); - exit(1); - } - - return (KERN_SUCCESS); -} - -kern_return_t catch_mach_exception_raise_state -( - mach_port_t exception_port, - exception_type_t exception, - const mach_exception_data_t code, - mach_msg_type_number_t codeCnt, - int *flavor, - const thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt - ) -{ - fprintf(stderr, "Unexpected exception handler called\n"); - exit(1); - return (KERN_FAILURE); -} - - -kern_return_t catch_mach_exception_raise_state_identity -( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt - ) -{ - fprintf(stderr, "Unexpected exception handler called\n"); - exit(1); - return (KERN_FAILURE); -} - - -void *server_thread(void *arg) -{ - kern_return_t kr; - - while(1) { - /* Handle exceptions on exc_port */ - if ((kr = mach_msg_server_once(mach_exc_server, 4096, exc_port, 0)) != KERN_SUCCESS) { - fprintf(stderr, "mach_msg_server_once: error %#x\n", kr); - exit(1); - } - } - return (NULL); -} - -int main(int argc, char *argv[]) -{ - posix_spawnattr_t attrs; - kern_return_t kr; - mach_port_t task = mach_task_self(); - - mach_msg_type_number_t maskCount = 1; - exception_mask_t mask; - exception_handler_t handler; - exception_behavior_t behavior; - thread_state_flavor_t flavor; - pthread_t exception_thread; - uint64_t exc_id; - unsigned int exc_fd; - - char *test_prog_name = "./guarded_test"; - char *child_args[MAX_ARGV]; - char test_id[MAX_TEST_ID_LEN]; - int i, err; - int child_status; - int test_status = 0; - - /* Allocate and initialize new exception port */ - if ((kr = mach_port_allocate(task, MACH_PORT_RIGHT_RECEIVE, &exc_port)) != KERN_SUCCESS) { - fprintf(stderr, "mach_port_allocate: %#x\n", kr); - exit(1); - } - - if ((kr = mach_port_insert_right(task, exc_port, - exc_port, MACH_MSG_TYPE_MAKE_SEND)) != KERN_SUCCESS) { - fprintf(stderr, "mach_port_allocate: %#x\n", kr); - exit(1); - } - - /* Get Current exception ports */ - if ((kr = task_get_exception_ports(task, EXC_MASK_GUARD, &mask, - &maskCount, &handler, &behavior, &flavor)) != KERN_SUCCESS) { - fprintf(stderr,"task_get_exception_ports: %#x\n", kr); - exit(1); - } - - /* Create exception serving thread */ - if ((err = pthread_create(&exception_thread, NULL, server_thread, 0)) != 0) { - fprintf(stderr, "pthread_create server_thread: %s\n", strerror(err)); - exit(1); - } - - pthread_detach(exception_thread); - - /* Initialize posix_spawn attributes */ - posix_spawnattr_init(&attrs); - - if ((err = posix_spawnattr_setflags(&attrs, POSIX_SPAWN_SETEXEC)) != 0) { - fprintf(stderr, "posix_spawnattr_setflags: %s\n", strerror(err)); - exit(1); - } - - /* Run Tests */ - for(i=0; i diff --git a/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test.c b/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test.c deleted file mode 100644 index 12e522aa4..000000000 --- a/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test.c +++ /dev/null @@ -1,536 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CONTEXT_VALUE1 0x12345678 -#define CONTEXT_VALUE2 0x11111111 - -char *pname; - -static void usage(void) -{ - printf("usage: %s [test number]\n", pname); - printf("Test 0: Test case for constructing a mach port with options\n"); - printf("Test 1: Test case for destructing guarded mach port\n"); - printf("Test 2: Test case for destroying guarded mach port\n"); - printf("Test 3: Test case for mod_ref() guarded mach port\n"); - printf("Test 4: Test case for guarding mach port\n"); - printf("Test 5: Test case for unguarding mach port\n"); - printf("Test 6: Test case for unguarding a non-guarded port\n"); - printf("Test 7: Test case for guarding a mach port with context\n"); - printf("Test 8: Test case for mach_port_get_context()\n"); - printf("Test 9: Test case for mach_port_set_context()\n"); -} - -/* Test case for constructing a mach port with options */ -void construct_mach_port(); -/* Test case for destructing guarded mach port */ -void destruct_guarded_mach_port(); -/* Test case for destroying guarded mach port */ -void destroy_guarded_mach_port(); -/* Test case for mod_ref() guarded mach port */ -void mod_ref_guarded_mach_port(); -/* Test case for guarding mach port */ -void guard_mach_port(); -/* Test case for unguarding mach port */ -void unguard_mach_port(); -/* Test case for unguarding a non-guarded port */ -void unguard_nonguarded_mach_port(); -/* Test case for guarding a mach port with context */ -void guard_port_with_context(); -/* Test case for mach_port_get_context() */ -void get_context_mach_port(); -/* Test case for mach_port_set_context() */ -void set_context_mach_port(); - -int main(int argc, char *argv[]) -{ - int option, fd; - - pname = argv[0]; - if (argc != 2) { - usage(); - exit(1); - } - printf("Test Program invoked with option [%s]\n", argv[1]); - option = atoi(argv[1]); - - - switch(option) { - - case 0: - construct_mach_port(); - break; - case 1: - destruct_guarded_mach_port(); - break; - case 2: - destroy_guarded_mach_port(); - break; - case 3: - mod_ref_guarded_mach_port(); - break; - case 4: - guard_mach_port(); - break; - case 5: - unguard_mach_port(); - break; - case 6: - unguard_nonguarded_mach_port(); - break; - case 7: - guard_port_with_context(); - break; - case 8: - get_context_mach_port(); - break; - case 9: - set_context_mach_port(); - break; - default: - usage(); - exit(1); - } - - return 0; -} - -void construct_mach_port() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - mach_port_context_t g; - int kret; - - printf("Testing All mach_port_construct() options...\n"); - - printf("No options specified: "); - options.flags = 0; - kret = mach_port_construct(mach_task_self(), &options, 0, &port); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("Options MPO_GUARD: "); - options.flags = MPO_CONTEXT_AS_GUARD; - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - - printf("Options MPO_GUARD|MPO_STRICT: "); - options.flags = MPO_CONTEXT_AS_GUARD|MPO_STRICT; - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret == KERN_SUCCESS) { - kret = mach_port_get_context(mach_task_self(), port, &g); - if (kret != KERN_SUCCESS || g != 0) - goto failed; - else - printf("[PASSED]\n"); - } - else - goto failed; - - printf("Options MPO_QLIMIT: "); - options.flags = MPO_QLIMIT; - mach_port_limits_t limits = { MACH_PORT_QLIMIT_SMALL }; - options.mpl = limits; - kret = mach_port_construct(mach_task_self(), &options, 0, &port); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("Options MPO_TEMPOWNER: "); - options.flags = MPO_TEMPOWNER; - kret = mach_port_construct(mach_task_self(), &options, 0, &port); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("Options MPO_IMPORTANCE_RECEIVER: "); - options.flags = MPO_IMPORTANCE_RECEIVER; - kret = mach_port_construct(mach_task_self(), &options, 0, &port); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("Options MPO_INSERT_SEND_RIGHT: "); - options.flags = MPO_INSERT_SEND_RIGHT; - kret = mach_port_construct(mach_task_self(), &options, 0, &port); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("No options specified (Construct Port-Set): "); - options.flags = 0; - kret = mach_port_construct(mach_task_self(), &options, 0, &port); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("...Complete\n"); - return; - -failed: - printf("[FAILED %d]\n", kret); - exit(1); -} - -void destruct_guarded_mach_port() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - int kret; - - printf("Destructing guarded mach port with correct guard: "); - options.flags = (MPO_CONTEXT_AS_GUARD); - - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_destruct(mach_task_self(), port, 0, gval); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("Destructing guarded mach ports with incorrect send right count: "); - options.flags = (MPO_CONTEXT_AS_GUARD); - - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_destruct(mach_task_self(), port, -1, gval); - if (kret != KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("Destructing guarded mach ports with correct send right and correct guard: "); - options.flags = (MPO_CONTEXT_AS_GUARD|MPO_INSERT_SEND_RIGHT); - - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_destruct(mach_task_self(), port, -1, gval); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("Destructing guarded mach port with incorrect guard (Expecting exception)...\n"); - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_destruct(mach_task_self(), port, 0, 0); - if (kret == KERN_SUCCESS) - goto failed; - return; - -failed: - printf("[FAILED]\n"); - exit(1); - -} - -void destroy_guarded_mach_port() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - int kret; - - printf("Destroying guarded mach port (Expecting exception)...\n"); - options.flags = (MPO_CONTEXT_AS_GUARD); - - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_destroy(mach_task_self(), port); - if (kret == KERN_SUCCESS) { - printf("[FAILED]\n"); - exit(1); - } - - return; -} - -void mod_ref_guarded_mach_port() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - int kret; - - printf("mach_port_mod_refs() guarded mach port (Expecting exception)...\n"); - options.flags = (MPO_CONTEXT_AS_GUARD); - - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_mod_refs(mach_task_self(), port, MACH_PORT_RIGHT_RECEIVE, -1); - if (kret == KERN_SUCCESS) { - printf("[FAILED]\n"); - exit(1); - } - - return; -} - -void guard_mach_port() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - int kret; - - printf("Testing guarding a non-guarded mach port: "); - kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_guard(mach_task_self(), port, gval, 0); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("Testing guarding a guarded mach port: "); - kret = mach_port_guard(mach_task_self(), port, CONTEXT_VALUE2, 0); - if (kret != KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - return; - -failed: - printf("[FAILED]\n"); - exit(1); - -} - -void unguard_mach_port() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - int kret; - - printf("Testing unguard with correct guard: \n"); - - options.flags = (MPO_CONTEXT_AS_GUARD); - - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_unguard(mach_task_self(), port, gval); - if (kret == KERN_SUCCESS) - printf("[PASSED]\n"); - else - goto failed; - - printf("Testing unguard with incorrect guard (Expecting Exception)... \n"); - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - kret = mach_port_unguard(mach_task_self(), port, CONTEXT_VALUE2); - if (kret == KERN_SUCCESS) - goto failed; - - return; - -failed: - printf("[FAILED]\n"); - exit(1); - -} - -void unguard_nonguarded_mach_port() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - int kret; - - printf("Testing mach_port_unguard() for non-guarded port (Expecting exception)...\n"); - - options.flags = 0; - - kret = mach_port_construct(mach_task_self(), &options, 0, &port); - if (kret != KERN_SUCCESS) - exit(1); - kret = mach_port_unguard(mach_task_self(), port, gval); - if (kret == KERN_SUCCESS) { - printf("[FAILED]\n"); - exit(1); - } - - return; -} - -void guard_port_with_context() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - int kret; - - printf("Testing mach_port_guard() for a port with context: "); - kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_set_context(mach_task_self(), port, gval); - if (kret != KERN_SUCCESS) - exit(1); - kret = mach_port_guard(mach_task_self(), port, gval, 0); - if (kret != KERN_SUCCESS) - printf("[PASSED]\n"); - else { - printf("[FAILED]\n"); - exit(1); - } - return; -} - -void get_context_mach_port() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - mach_port_context_t g; - int kret; - - options.flags = (MPO_CONTEXT_AS_GUARD); - - printf("Testing get_context() for non-strict guarded port: "); - - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_get_context(mach_task_self(), port, &g); - if (kret != KERN_SUCCESS || g != gval) - goto failed; - else - printf("[PASSED]\n"); - - printf("Testing get_context() for strict guarded port: "); - options.flags = (MPO_CONTEXT_AS_GUARD|MPO_STRICT); - - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_get_context(mach_task_self(), port, &g); - if (kret != KERN_SUCCESS || g != 0) - goto failed; - else - printf("[PASSED]\n"); - - printf("Testing get_context() for strict guard port (guarded using mach_port_guard): "); - kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); - if (kret != KERN_SUCCESS) - exit(1); - kret = mach_port_guard(mach_task_self(), port, gval, 1); - if (kret != KERN_SUCCESS) - exit(1); - kret = mach_port_get_context(mach_task_self(), port, &g); - if (kret != KERN_SUCCESS || g != 0) - goto failed; - else - printf("[PASSED]\n"); - - printf("Testing get_context() for non-guarded port with context: "); - kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); - if (kret != KERN_SUCCESS) - exit(1); - kret = mach_port_set_context(mach_task_self(), port, gval); - if (kret != KERN_SUCCESS) - exit(1); - kret = mach_port_get_context(mach_task_self(), port, &g); - if (kret != KERN_SUCCESS || g != gval) - goto failed; - else - printf("[PASSED]\n"); - - return; - - -failed: - printf("[FAILED]\n"); - exit(1); -} - -void set_context_mach_port() -{ - mach_port_t port; - mach_port_options_t options; - mach_port_context_t gval = CONTEXT_VALUE1; - mach_port_context_t g; - int kret; - - printf("Testing set_context() with non-guarded port: "); - kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); - if (kret != KERN_SUCCESS) - exit(1); - kret = mach_port_set_context(mach_task_self(), port, gval); - if (kret != KERN_SUCCESS) - goto failed; - else - printf("[PASSED]\n"); - - printf("Testing setting context on non-guarded port with pre-existing context: "); - kret = mach_port_set_context(mach_task_self(), port, CONTEXT_VALUE2); - if (kret != KERN_SUCCESS) - goto failed; - else - printf("[PASSED]\n"); - - printf("Testing setting context on strict guarded port (Expecting Exception)...\n"); - - options.flags = (MPO_CONTEXT_AS_GUARD|MPO_STRICT); - - kret = mach_port_construct(mach_task_self(), &options, gval, &port); - if (kret != KERN_SUCCESS) - exit(1); - - kret = mach_port_set_context(mach_task_self(), port, CONTEXT_VALUE2); - if (kret == KERN_SUCCESS) - goto failed; - - return; - -failed: - printf("[FAILED]\n"); - exit(1); -} - - - - diff --git a/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test_framework.c b/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test_framework.c deleted file mode 100644 index 15a7d6702..000000000 --- a/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test_framework.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Testing Framework for EXC_GUARD exceptions - * - * The framework tests for exception conditions for guarded mach ports. - * It creates a new exception port and an associated handling thread. - * For each test case, the framework sets its own exception port to the - * newly allocated port, execs a new child (which inherits the new - * exception port) and restores the parent's exception port to the - * original handler. The child process is invoked with a different - * test case identifier and invokes the corresponding test case. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_TEST_ID_LEN 16 -#define MAX_ARGV 8 -#define EXC_CODE_SHIFT 32 -#define EXC_GUARD_TYPE_SHIFT 29 - -/* - * To add a new test case to this framework: - * - Increment the NUMTESTS value - * - Add (Guard Type | flavor) to "test_exception_code" if the - * test case generates an exception; 0 otherwise - * - Add a new case and routine in guarded_test.c to - * test the scenario - */ - -#define NUMTESTS 10 - -uint64_t test_exception_code[] = { - 0, - (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_DESTROY, - (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_DESTROY, - (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_MOD_REFS, - 0, - (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_INCORRECT_GUARD, - (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_UNGUARDED, - 0, - 0, - (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_SET_CONTEXT -}; - -mach_port_t exc_port; -uint64_t exception_code; -extern char **environ; - -boolean_t mach_exc_server( - mach_msg_header_t *InHeadP, - mach_msg_header_t *OutHeadP); - -kern_return_t catch_mach_exception_raise -( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt, - int *flavor, - thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt - ) -{ - if (exception == EXC_GUARD) { - /* Set global variable to indicate exception received */ - exception_code = *((uint64_t *)code); - } else { - /* Terminate test on all other unexpected exceptions */ - fprintf(stderr, "received unexpected exception type %#x\n", exception); - exit(1); - } - - return (KERN_SUCCESS); -} - -kern_return_t catch_mach_exception_raise_state -( - mach_port_t exception_port, - exception_type_t exception, - const mach_exception_data_t code, - mach_msg_type_number_t codeCnt, - int *flavor, - const thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt - ) -{ - fprintf(stderr, "Unexpected exception handler called\n"); - exit(1); - return (KERN_FAILURE); -} - - -kern_return_t catch_mach_exception_raise_state_identity -( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt - ) -{ - fprintf(stderr, "Unexpected exception handler called\n"); - exit(1); - return (KERN_FAILURE); -} - - -void *server_thread(void *arg) -{ - kern_return_t kr; - - while(1) { - /* Handle exceptions on exc_port */ - if ((kr = mach_msg_server_once(mach_exc_server, 4096, exc_port, 0)) != KERN_SUCCESS) { - fprintf(stderr, "mach_msg_server_once: error %#x\n", kr); - exit(1); - } - } - return (NULL); -} - -int main(int argc, char *argv[]) -{ - posix_spawnattr_t attrs; - kern_return_t kr; - mach_port_t task = mach_task_self(); - - mach_msg_type_number_t maskCount = 1; - exception_mask_t mask; - exception_handler_t handler; - exception_behavior_t behavior; - thread_state_flavor_t flavor; - pthread_t exception_thread; - uint64_t exc_id; - unsigned int exc_fd; - - char *test_prog_name = "./guarded_mp_test"; - char *child_args[MAX_ARGV]; - char test_id[MAX_TEST_ID_LEN]; - int i, err; - int child_status; - int test_status = 0; - - /* Allocate and initialize new exception port */ - if ((kr = mach_port_allocate(task, MACH_PORT_RIGHT_RECEIVE, &exc_port)) != KERN_SUCCESS) { - fprintf(stderr, "mach_port_allocate: %#x\n", kr); - exit(1); - } - - if ((kr = mach_port_insert_right(task, exc_port, - exc_port, MACH_MSG_TYPE_MAKE_SEND)) != KERN_SUCCESS) { - fprintf(stderr, "mach_port_allocate: %#x\n", kr); - exit(1); - } - - /* Get Current exception ports */ - if ((kr = task_get_exception_ports(task, EXC_MASK_GUARD, &mask, - &maskCount, &handler, &behavior, &flavor)) != KERN_SUCCESS) { - fprintf(stderr,"task_get_exception_ports: %#x\n", kr); - exit(1); - } - - /* Create exception serving thread */ - if ((err = pthread_create(&exception_thread, NULL, server_thread, 0)) != 0) { - fprintf(stderr, "pthread_create server_thread: %s\n", strerror(err)); - exit(1); - } - - pthread_detach(exception_thread); - - /* Initialize posix_spawn attributes */ - posix_spawnattr_init(&attrs); - - if ((err = posix_spawnattr_setflags(&attrs, POSIX_SPAWN_SETEXEC)) != 0) { - fprintf(stderr, "posix_spawnattr_setflags: %s\n", strerror(err)); - exit(1); - } - - /* Run Tests */ - for(i=0; i> EXC_CODE_SHIFT); - printf("EXC_GUARD Received: "); - (exc_id != 0)?printf("Yes (Code 0x%llx)\n", exception_code):printf("No\n"); - printf("Expected Exception Code: 0x%llx\n", test_exception_code[i]); - printf("Test Result: "); - if((WIFEXITED(child_status) && WEXITSTATUS(child_status)) || - (exc_id != test_exception_code[i])) { - test_status = 1; - printf("FAILED\n"); - } - else { - printf("PASSED\n"); - } - printf("-------------------\n"); - - } - - exit(test_status); -} - - diff --git a/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/mach_exc.defs b/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/mach_exc.defs deleted file mode 100644 index 4b6cc647b..000000000 --- a/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/mach_exc.defs +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ - -#include diff --git a/tools/tests/unit_tests/libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c b/tools/tests/unit_tests/libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c deleted file mode 100644 index 65f876b70..000000000 --- a/tools/tests/unit_tests/libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Unit test to verify that PROC_PIDUNIQIDENTIFIERINFO is an unprivilege operation. - * - * Test calls PROC_PIDTBSDINFO, PROC_PIDTASKINFO, PROC_PIDT_SHORTBSDINFO, PROC_PIDUNIQIDENTIFIERINFO on the process - * as well as on launchd to verify that PROC_PIDT_SHORTBSDINFO and PROC_PIDUNIQIDENTIFIERINFO are unpirivilege - * operations while PROC_PIDTBSDINFO and PROC_PIDTASKINFO are privelege ones. - */ - -#include -#include -#include -#include -#include -#include - - -#define TEST_PASS 1 -#define TEST_FAIL 0 - -int -bsdinfo_test(int pid, int result) -{ - struct proc_bsdinfo bsdinfo; - int error; - - - error = proc_pidinfo(pid, PROC_PIDTBSDINFO, 0, &bsdinfo, sizeof(bsdinfo)); - if ((error > 0 && result == TEST_PASS) || (error <= 0 && result == TEST_FAIL)) { - printf("[PASS]: Privilege test on pid = %d for PROC_PIDTBSDINFO passed\n", pid); - return 0; - } else { - printf("[FAIL]: Privilege test on pid = %d for PROC_PIDTBSDINFO failed\n", pid); - return 1; - } - -} - -int -taskinfo_test(int pid, int result) -{ - struct proc_taskinfo taskinfo; - int error; - - - error = proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &taskinfo, sizeof(taskinfo)); - if ((error > 0 && result == TEST_PASS) || (error <= 0 && result == TEST_FAIL)) { - printf("[PASS]: Privilege test on pid = %d for PROC_PIDTASKINFO passed\n", pid); - return 0; - } else { - printf("[FAIL] Privilege test on pid = %d for PROC_PIDTASKINFO failed\n", pid); - return 1; - } -} - -int -bsdshortinfo_test(int pid, int result) -{ - struct proc_bsdshortinfo bsdshortinfo; - int error; - - - error = proc_pidinfo(pid, PROC_PIDT_SHORTBSDINFO, 0, &bsdshortinfo, sizeof(bsdshortinfo)); - if ((error > 0 && result == TEST_PASS) || (error <= 0 && result == TEST_FAIL)) { - printf("[PASS]: Privilege test on pid = %d for PROC_PIDT_SHORTBSDINFO passed\n", pid); - return 0; - } else { - printf("[FAIL]: Privilege test on pid = %d for PROC_PIDT_SHORTBSDINFO failed\n", pid); - return 1; - } -} - - -int -piduniqid_test(int pid, int result) -{ - struct proc_uniqidentifierinfo uniqidinfo; - int error; - - - error = proc_pidinfo(pid, PROC_PIDUNIQIDENTIFIERINFO, 0, &uniqidinfo, sizeof(uniqidinfo)); - if ((error > 0 && result == TEST_PASS) || (error <= 0 && result == TEST_FAIL)) { - printf("[PASS]: Privilege test on pid = %d for PROC_PIDUNIQIDENTIFIERINFO passed\n", pid); - return 0; - } else { - printf("[FAIL]: Privilege test on pid = %d for PROC_PIDUNIQIDENTIFIERINFO failed\n", pid); - return 1; - } - -} - - -int main() -{ - int selfpid, launchdpid; - - selfpid = getpid(); - launchdpid = 1; - - if (bsdinfo_test(selfpid, TEST_PASS)) - goto fail; - if (bsdinfo_test(launchdpid, TEST_FAIL)) - goto fail; - - if (taskinfo_test(selfpid, TEST_PASS)) - goto fail; - if (taskinfo_test(launchdpid, TEST_FAIL)) - goto fail; - - if (bsdshortinfo_test(selfpid, TEST_PASS)) - goto fail; - if (bsdshortinfo_test(launchdpid, TEST_PASS)) - goto fail; - - if (piduniqid_test(selfpid, TEST_PASS)) - goto fail; - if (piduniqid_test(launchdpid, TEST_PASS)) - goto fail; - - - printf("Privilege test for libproc passed [PASS] \n"); - return 0; - -fail: - printf("Privilege test for libproc failed [FAIL] \n"); - return 1; -} - diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress-Entitlements.plist b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress-Entitlements.plist deleted file mode 100644 index a5398e575..000000000 --- a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress-Entitlements.plist +++ /dev/null @@ -1,8 +0,0 @@ - - - - - com.apple.private.kernel.override-cpumon - - - diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.pbxproj b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.pbxproj deleted file mode 100644 index 0aa6e053d..000000000 --- a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.pbxproj +++ /dev/null @@ -1,324 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 46; - objects = { - -/* Begin PBXBuildFile section */ - 15181D991683B73E0002FB18 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 15181D981683B73E0002FB18 /* Foundation.framework */; }; - 15181D9E1683B73E0002FB18 /* monitor_stress.m in Sources */ = {isa = PBXBuildFile; fileRef = 15181D9D1683B73E0002FB18 /* monitor_stress.m */; }; - 15181DA91683B7550002FB18 /* monitor_stress.m in Sources */ = {isa = PBXBuildFile; fileRef = 15181D9D1683B73E0002FB18 /* monitor_stress.m */; }; - 15181DAB1683B7550002FB18 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 15181D981683B73E0002FB18 /* Foundation.framework */; }; -/* End PBXBuildFile section */ - -/* Begin PBXCopyFilesBuildPhase section */ - 15181D931683B73E0002FB18 /* CopyFiles */ = { - isa = PBXCopyFilesBuildPhase; - buildActionMask = 2147483647; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; - files = ( - ); - runOnlyForDeploymentPostprocessing = 1; - }; - 15181DAC1683B7550002FB18 /* CopyFiles */ = { - isa = PBXCopyFilesBuildPhase; - buildActionMask = 2147483647; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; - files = ( - ); - runOnlyForDeploymentPostprocessing = 1; - }; -/* End PBXCopyFilesBuildPhase section */ - -/* Begin PBXFileReference section */ - 15181D951683B73E0002FB18 /* monitor_stress */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = monitor_stress; sourceTree = BUILT_PRODUCTS_DIR; }; - 15181D981683B73E0002FB18 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; - 15181D9C1683B73E0002FB18 /* AspenFamily.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = AspenFamily.xcconfig; path = AppleInternal/XcodeConfig/AspenFamily.xcconfig; sourceTree = DEVELOPER_DIR; }; - 15181D9D1683B73E0002FB18 /* monitor_stress.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = monitor_stress.m; sourceTree = ""; }; - 15181DB11683B7550002FB18 /* monitor_stress */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = monitor_stress; sourceTree = BUILT_PRODUCTS_DIR; }; -/* End PBXFileReference section */ - -/* Begin PBXFrameworksBuildPhase section */ - 15181D921683B73E0002FB18 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - 15181D991683B73E0002FB18 /* Foundation.framework in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - 15181DAA1683B7550002FB18 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - 15181DAB1683B7550002FB18 /* Foundation.framework in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXFrameworksBuildPhase section */ - -/* Begin PBXGroup section */ - 15181D8C1683B73E0002FB18 = { - isa = PBXGroup; - children = ( - 15181D9A1683B73E0002FB18 /* monitor_stress */, - 15181D971683B73E0002FB18 /* Frameworks */, - 15181D961683B73E0002FB18 /* Products */, - ); - sourceTree = ""; - }; - 15181D961683B73E0002FB18 /* Products */ = { - isa = PBXGroup; - children = ( - 15181D951683B73E0002FB18 /* monitor_stress */, - 15181DB11683B7550002FB18 /* monitor_stress */, - ); - name = Products; - sourceTree = ""; - }; - 15181D971683B73E0002FB18 /* Frameworks */ = { - isa = PBXGroup; - children = ( - 15181D981683B73E0002FB18 /* Foundation.framework */, - ); - name = Frameworks; - sourceTree = ""; - }; - 15181D9A1683B73E0002FB18 /* monitor_stress */ = { - isa = PBXGroup; - children = ( - 15181D9D1683B73E0002FB18 /* monitor_stress.m */, - 15181D9B1683B73E0002FB18 /* Supporting Files */, - ); - path = monitor_stress; - sourceTree = ""; - }; - 15181D9B1683B73E0002FB18 /* Supporting Files */ = { - isa = PBXGroup; - children = ( - 15181D9C1683B73E0002FB18 /* AspenFamily.xcconfig */, - ); - name = "Supporting Files"; - sourceTree = ""; - }; -/* End PBXGroup section */ - -/* Begin PBXNativeTarget section */ - 15181D941683B73E0002FB18 /* ios */ = { - isa = PBXNativeTarget; - buildConfigurationList = 15181DA41683B73E0002FB18 /* Build configuration list for PBXNativeTarget "ios" */; - buildPhases = ( - 15181D911683B73E0002FB18 /* Sources */, - 15181D921683B73E0002FB18 /* Frameworks */, - 15181D931683B73E0002FB18 /* CopyFiles */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = ios; - productName = monitor_stress; - productReference = 15181D951683B73E0002FB18 /* monitor_stress */; - productType = "com.apple.product-type.tool"; - }; - 15181DA71683B7550002FB18 /* osx */ = { - isa = PBXNativeTarget; - buildConfigurationList = 15181DAE1683B7550002FB18 /* Build configuration list for PBXNativeTarget "osx" */; - buildPhases = ( - 15181DA81683B7550002FB18 /* Sources */, - 15181DAA1683B7550002FB18 /* Frameworks */, - 15181DAC1683B7550002FB18 /* CopyFiles */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = osx; - productName = monitor_stress; - productReference = 15181DB11683B7550002FB18 /* monitor_stress */; - productType = "com.apple.product-type.tool"; - }; -/* End PBXNativeTarget section */ - -/* Begin PBXProject section */ - 15181D8D1683B73E0002FB18 /* Project object */ = { - isa = PBXProject; - attributes = { - LastUpgradeCheck = 0460; - ORGANIZATIONNAME = apple; - }; - buildConfigurationList = 15181D901683B73E0002FB18 /* Build configuration list for PBXProject "monitor_stress" */; - compatibilityVersion = "Xcode 3.2"; - developmentRegion = English; - hasScannedForEncodings = 0; - knownRegions = ( - en, - ); - mainGroup = 15181D8C1683B73E0002FB18; - productRefGroup = 15181D961683B73E0002FB18 /* Products */; - projectDirPath = ""; - projectRoot = ""; - targets = ( - 15181DA71683B7550002FB18 /* osx */, - 15181D941683B73E0002FB18 /* ios */, - ); - }; -/* End PBXProject section */ - -/* Begin PBXSourcesBuildPhase section */ - 15181D911683B73E0002FB18 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 15181D9E1683B73E0002FB18 /* monitor_stress.m in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - 15181DA81683B7550002FB18 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 15181DA91683B7550002FB18 /* monitor_stress.m in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXSourcesBuildPhase section */ - -/* Begin XCBuildConfiguration section */ - 15181DA21683B73E0002FB18 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = 15181D9C1683B73E0002FB18 /* AspenFamily.xcconfig */; - buildSettings = { - ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; - CLANG_CXX_LIBRARY = "libc++"; - CLANG_ENABLE_OBJC_ARC = YES; - CLANG_WARN_CONSTANT_CONVERSION = YES; - CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; - CLANG_WARN_EMPTY_BODY = YES; - CLANG_WARN_INT_CONVERSION = YES; - CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - COPY_PHASE_STRIP = NO; - GCC_C_LANGUAGE_STANDARD = gnu99; - GCC_DYNAMIC_NO_PIC = NO; - GCC_OPTIMIZATION_LEVEL = 0; - GCC_PREPROCESSOR_DEFINITIONS = ( - "DEBUG=1", - "$(inherited)", - ); - GCC_STRICT_ALIASING = YES; - GCC_SYMBOLS_PRIVATE_EXTERN = NO; - GCC_WARN_64_TO_32_BIT_CONVERSION = YES; - GCC_WARN_ABOUT_RETURN_TYPE = YES; - GCC_WARN_UNINITIALIZED_AUTOS = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 7.0; - ONLY_ACTIVE_ARCH = YES; - SDKROOT = iphoneos.internal; - }; - name = Debug; - }; - 15181DA31683B73E0002FB18 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = 15181D9C1683B73E0002FB18 /* AspenFamily.xcconfig */; - buildSettings = { - ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; - CLANG_CXX_LIBRARY = "libc++"; - CLANG_ENABLE_OBJC_ARC = YES; - CLANG_WARN_CONSTANT_CONVERSION = YES; - CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; - CLANG_WARN_EMPTY_BODY = YES; - CLANG_WARN_INT_CONVERSION = YES; - CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - COPY_PHASE_STRIP = NO; - GCC_C_LANGUAGE_STANDARD = gnu99; - GCC_STRICT_ALIASING = YES; - GCC_WARN_64_TO_32_BIT_CONVERSION = YES; - GCC_WARN_ABOUT_RETURN_TYPE = YES; - GCC_WARN_UNINITIALIZED_AUTOS = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 7.0; - SDKROOT = iphoneos.internal; - VALIDATE_PRODUCT = YES; - }; - name = Release; - }; - 15181DA51683B73E0002FB18 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_ENTITLEMENTS = "monitor_stress-Entitlements.plist"; - CODE_SIGN_IDENTITY = "-"; - PRODUCT_NAME = "$(PROJECT_NAME)"; - PROVISIONING_PROFILE = ""; - }; - name = Debug; - }; - 15181DA61683B73E0002FB18 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_ENTITLEMENTS = "monitor_stress-Entitlements.plist"; - CODE_SIGN_IDENTITY = "-"; - PRODUCT_NAME = "$(PROJECT_NAME)"; - PROVISIONING_PROFILE = ""; - }; - name = Release; - }; - 15181DAF1683B7550002FB18 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CLANG_ENABLE_OBJC_ARC = NO; - PRODUCT_NAME = "$(PROJECT_NAME)"; - SDKROOT = macosx.internal; - SUPPORTED_PLATFORMS = macosx; - }; - name = Debug; - }; - 15181DB01683B7550002FB18 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CLANG_ENABLE_OBJC_ARC = NO; - PRODUCT_NAME = "$(PROJECT_NAME)"; - SDKROOT = macosx.internal; - SUPPORTED_PLATFORMS = macosx; - }; - name = Release; - }; -/* End XCBuildConfiguration section */ - -/* Begin XCConfigurationList section */ - 15181D901683B73E0002FB18 /* Build configuration list for PBXProject "monitor_stress" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 15181DA21683B73E0002FB18 /* Debug */, - 15181DA31683B73E0002FB18 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - 15181DA41683B73E0002FB18 /* Build configuration list for PBXNativeTarget "ios" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 15181DA51683B73E0002FB18 /* Debug */, - 15181DA61683B73E0002FB18 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - 15181DAE1683B7550002FB18 /* Build configuration list for PBXNativeTarget "osx" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 15181DAF1683B7550002FB18 /* Debug */, - 15181DB01683B7550002FB18 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; -/* End XCConfigurationList section */ - }; - rootObject = 15181D8D1683B73E0002FB18 /* Project object */; -} diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/contents.xcworkspacedata deleted file mode 100644 index 99f7ee1f3..000000000 --- a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/contents.xcworkspacedata +++ /dev/null @@ -1,7 +0,0 @@ - - - - - diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/xcuserdata/rab.xcuserdatad/UserInterfaceState.xcuserstate b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/xcuserdata/rab.xcuserdatad/UserInterfaceState.xcuserstate deleted file mode 100644 index ea729bdfb8023c559d96edc98223a3f8b3dc5e48..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 87338 zcmdqK1$Z0B^9FpocRD`Zg|UspX~PVq7-Yva*0RH?>ex!+#<5*Pnlx#9Q)Xril$n|7 zSB8|CnVH*f=cJP*S&}8&{eR!{wS5{znt6A3c6XNFS=m%y*U~y=%1a#KD93XG=i$U& z{d)CZJZgQgxuveLVezQOmHP*4T3eQks&1}XQ^&rp9M#&`)SDwmZMnXea1u3{gq3h{Jx|tqI52HuYW9aepBzh`6gPu*#qZiUGbUVF_UP-T^ z*VCKmt@I9hH@%NONFSm9qfgRj==1bN`U-uWzD3`qAJC8KXY@<@4gH?}M1Q4!(7$<( z=XsG=c%3)--h4lPAU}lPnIF!N;>Ys4@)P+f{B(X6KbPN~-;2-Tef$Exh%e>$;Vbz? z{8D}ezaL-2uj1?YdcKiw=G*uU{3iYo9{3~pqxs|b6Zup4)A_UbbNLJSi}`K*rTi8A z)%>^AMCJWPqnZg`lH(^g9TgVd%guR6l zpnB6oSGUVXe?093Zp`2MPxX2Me}vxNwwktZ;&GvT&Mkrf`mMzHpJS zRk%dBT)0ZOR=7d9S-4HOQ@BUCUwBA(RCru?N_bXyL3l}cRd_>qTX;|SQ20dnT=-h} zPWWExHv)_DefXp6sL=`#XZDau}CZyOT;R1p}0s4i2I4v;!5!#ag(@NJXkzL zJXEwr5Dya%7mpB65Kk0O63-CN5ziGb6)zJn7q1Yn6t5Dm7OxR+6Ymo57Vj4y79SB` z5?>Zy5nmNw6JHnK5Z@Hv7e5id5x*6`6TcUK5dRSWluXHzdP%*dK2l$)pVVI(APtm; zN?Fn>avuADCy%EfY-Tp?G< zi{)kVzH+r(E3c8)%8hc9e29FgY|BT;$I8daXUJ#DXUXTuS1IQ!7b#nnOO(r%tCVY% z8o75I{y}D7|tRAW!rXHytqaLrGq@Jptp`NXtr(URTQMap? zsaLAksMo7Eskf?ksCTRPsSm1;sQ*)+RG(3wS6@_LQD0ZzQr}fSP(M~bQ@>QdQNLGz zQh!zdQ2*9Ajn_m?(R9t!dTaf(f!YvlXKlDPN*k-~s!i0UXw$V>+FWgSZ7(fH^JxpT zBCS;0N2}BpX-l;g+J0J%wo0qh>a|9#S!>faXq&V{G|-OFj@FLTPSj4(PS?)T&ebl^ zF4nebmugpNS8La4H)^+Nw`+H4_i7Jl4{MKUPiRkT&uKffm$lclH??=P_qC6-PqipSUL`bd3@zKcFVpR7;QXX6 zY&}me(D&9$^m5&=FVvUl%k_Z1QV;5NdcA&-zDeJ#AFLmuAFA8>QToyP3HllOnfh7! z+4{x$7JaLJseYAywSJ9$n|`}~hkmDimwuoAtp1$-y#9i|Lw`|!Nqz#Q5C!%J|m!!T8zu&G^&!$4k6|SMsV}!)tl_c>8+?d53z3c}I9h zd&haldnb9PdS`fNd*^xg@Xq(~t@W<%BL5Z}r~cz1w@A z_d)L?-v4=@^giQ#-ut5W74Pfbx4iFqKk$C+{mlEN_Z#o`-k-d`djIhLZE_}Wil$=f zrfK#z`gG&ZIxK%mfu=v zEwPqc0c)idwANT_tp@7=tJONtI>!;SE!I|Rn{|bC zrFE5ci*>7Yn{~T&pY^!)g!QELl=ZarjPYubWxUpp^`!hU~S~#kvzPhDl z8)tA{o7(&~&fPMbb)Znkgg#M#+1^JnBv z$)7nRcSgYKTbSot)Y!bXrK!3mnAcd-wl3Jv>ZaVE8^(>hf*ZgM}kH~bC`u@PMtG-;*9Cj=S-YCGdpi$ zes13ExwB{crp%s^6R?Icfb5!9Hf>Fu-RRs_Ha6C;tZpuBSy2=>W$l&z;266{bkN%TXrjQPi_G>?n-ViZa$aI<#4%N9_Qoo zxdOYl-N){0_p|%k1MGqJAbaqYTp_E%BCeP#;Y!)3a&8}c2&=-KfSd^AbRbvTLxJ1| zWFNa|87memP;vFjV12-{*x&qX8V@Y3USGGmy0x)+1^dERTgN_SHwUW&)|+tgwz zp{lj6zOHp6iuu_}qIF?i@W2pY1S|v*Q|&PjdBK*t)eQ*{FiRo)x?l^oL#CErS05~| zZe0_w`e(Pau$4=7Lu)9fEydLv8{1l0m91-Rz^n&aTAPC{Et59Xuy)tf+_=Bu+BCqe z!OpQCSIw>DYPec1$gSd5+dJFC>@0h@J;EMokFrN!!8*qN%#QnT>$nE4k!xZdV+@*b zA0VrMYyh$m2-roexR}$t;L5hutAk;S^^aK4Y4v`V&)VwdS~M&QhqAmh^hc;)^kF~c z1y@zK)weoOA+u$LzbePISW7GG7Zt25!4~$Rx|J=?SZC|yrW~n&rS*;}Dp{vaj1+iZ{B${k{l!(5LJr5-k0vIJa!!?+_5;Bb4Ft=y6Ju69vnNp)R=`|GjXiCpjP z+;QCT+zIx0dxAZ2J9iSZ+$r`X$6mGQ*LI0XkeQ@4=w~giVYTxyua0%n#)j%<;m)=v+oqeX^SEu?xXZcoxeK@pxr?}qxh>pQ zdx|~Po@P(CXV^3CS(kI$xl6c9xy#tUS8!L_v+X%Rs(|e0{8tTRrCqcf8(m&?Yjx$u zrr@$r#+)p(wwCRzh^v{eh^-e^Eujv;ekp7Xt}AtX=OEWjC-`lM`|(nFNF~9xjTF@D+cbvh51fnikqcJvL@CNDiU+{Nq=+A_c-??w)7|L`CGZC>}>2Rz48`TMtT=kXG^HXJ;&|X z#y!uyVCUEi?7dxpm$+BC-rKmBxmWC5J8v8J8uz;Gv-9nO6%3}mQkmaie@BN|QoWja zthU-<`N;Xf`5gWe8ZzGF-d|qokMs;SM!d)E<|gwY_eoqbpRr^L?Shf>kD3w%@)h^> zzNP+>wpMJV&QPFi~D;?i&Jx~F)wMYTNi9;tzOr(tTsGXVU5||$1bTrK zJ*(ULHH|H;lNUDDvpG%6eTFM)8|g!(9kw< z+VrW@CbzPoyJd1KOQL0R8?$C0^f7iU2)HFSD21E9`wQBSXnf%<-~+$#6RWgc+BO zSE2tZuq}_wWwW&Erj}rBmeWD9SodxUHn(of%4H6Dbz{i?4syF(tdQfIMULEWA z)f+_aMupHz}6vXCqyi^&qQlq@65$qKSB36T9rHCah&NG%DHRb(|;L+Z%>WG$&D z>qrA>Bu(T1(o9-ND`_L^$$?}8*+>o|o5*HzFgb)AN^An;FmgCKf*eVXB1e;B$g$)& zay&VKoJdY0CzDgispK?rIyr-!NzNi?lXJ+qKfILVZ zA`g>C$fM*j@;~x8d4fDio+3|^XUMbUIr2Ptf$ShJl9$NKH9w=b|Sv@fzRwzt?@?QQmU`x5(7`!f4-`wII?`zrft`x^UN`#Sr2`v&_)`zHHl z`xg6F`!@S_`wsg~`!4%#`yTsV`#$@A`vLnw`yu;b`w{z5`!V}}_T%;w_LKHg_S5z= z_OteL_Ve}&_73|+`z8Bj`xX0D`!)M@`wja|`z`xz`yKmT`#t-8`vdz!`y=~f`xE<9 z`!oA<`wRO^`z!lv`y2aP`#bx4`v?0+`zQNn`xpCH`#1Y{`w#n1`!D-%`ycyXARG|Z zl4zu;e&On9%$pSJQ$Os@Kfs6t&8ps$RV}XnVvI~%1fs6+-0mwukY*IKG$P^$` zflLE39motIGl9$kG8+h+fz1Un56Esnb_cQtkUfFy1!O*uY#=#6a)IOl@d3#PQUGKD zkU}7P11SPh41`T-O6|ROa2g##hti#vuw^$}39vbU%~mqQh%;krWz(fLHjUoAIXf?} zu(GhMG`l$9&nxmfV0;7PJRjs z4k96U9~po)Z}u-KiO#_EBw}Sbdq<1SLb1^a&=zD@U^Xj!xn(8g{y<(~gQ&m`;7g(5GQJ7s?87bB7C_5xxwjeh*u+UfGXEo)5?uCNm;sx`3IaLLL z!cw$UHdbsP^vTuAL9MazS_QsRUqyDMkLA8Bzp%Klv>=dMmgjSoe5f=(UMW)2!qVL0 zsyts{fiF9coj5QV@+Ai^E6;-8y+~RDOvcHmLJU5Wd+T@a@ zzK9WvP;FYgnm;$YG>~1)T5nlZMXoQva_?g~D63#LaB>wfY$@uGCb?{$pXFyOZq_3< zZ^lvv{DlRj0bgllVP&zeg#8&&*#}jICsko9R#aNznyC`SCnOcG@P+eOQH6Od&#qkH zYA!;}QE{4Ol?!|oPCbNjm|vV-5Xst7)EGeWf=%^}8*%50HRsBT!txlQ6(}?`Mku?u z(pOQ6Rb5a~R#on1V82w7Ws6wra3yO{a!3-)lBhbDs$oM zP%A4&E6-P6T($%&CQw|K>ojUtx*ny+B$F;L^H;J4nCoK+l@_=*? zVocJUTrS2SZ78>MLOe7_0Bgiedjrb$jgie`)fVLiH=)Gn7>SbX+%o^-K$ICOvnvXG z5yyE5stk%z@fQ`Avku^t%1s+kY^NBp+_Lf|f${~}e%1%D@wlQ#py;R=QRjCT(s6T6 z$96T2MvZ|9H2jrIiX(pHIFuS0BNZwN1I+U+EX4ksT~b&a=}{-5LTqF6`^vK&R~3`v zQ&4bh402)lg0fOy8FM}D?I+f7r-#ssg3S%V`YblzZmSP&-pneE_0K?Qc1a}PXQALw zl30)#xgt^*=ca=6SC&Ogd;tpXoCGq?(_f6T14waQ!&=8_gdJ zZWqd1Qe2qByhjQ1Gle1uh7rN@Fo603G?#Btjf{Ek&b^ID)os{V!xLK$_pcva3cy#Xcu6^ zkKb3Z(8uc6%}HTtAscG4OY;KE$+b@uZb8kca{3mBrxa`gkW*G1@<6T~Z%5(&?a3f! zfVs-b!qP}q?n0R{?J`)vz@oy+1vqw<=4Dsp1(@+G*vQ^qzI#!pU%L+L)e8b;fyyQ2 z5!eS%q;I=OSp}Lo9!0*Gk znKSxD#COC&xA$?Uf&xA^M|Dl`GRj7oz|Crm30^~?LG9`LS%JgWj2=YajLT+KY09SY zP8{l@?21yHD#h9VeH4o_6f1mG6_m#q{-cm=d3DX&>eb;HNeQ-WHa}pW)mJTUA+J#k!R< z@eU`((-eutB$LOi%RCKEh+JJyio`;-5uS0o3Nk9h)ONcC+7ol7HIyC`T^4LV*q)iJ z<5~TI1^I0F%&yFKb-buEHd@DZ0Fk+WSur}TsL3tg3zY`ODmjyf=rNY>i&CRvq@31- zRTp6Hx1((bbSOZ1RZcNmIi$$TVAL5GoeEAPSo?HZotuuYl&uTcw{Ed_LamsNv8aOe zJyy>6KE!9C=vd+pRv-Ar zF^`{|2r0ZS%M586BE_54F=Pre%?y>A9jdoxRkEFN$G?WUc%_dys_3rG&q*Zgh-K&F zsOh*fw?XWN!UIVxvLn2JtvOt&JyB{vBB_ePNZZOrp;2UE=(1^O2nf%>m=khF>e!hW zpNASd$7qBl*dP$=KluWb9i3b@G~Tf(RBo}G>%CDWE4fO@^|;a{ok;tu^4o_EzC3Bx z(eL<|u=z=ei|j|);mPyiR7hn0%P&On>})(1%qlKkSdzs~37goFN^{nXNz<|>W;yHr zf?z|i**QAfycxGt^4W|C=R>*9dqwCHTz10-6KQTyd`Wd z&pfX)W6yP=f(TWVAyg!XPIt;JV@*4|AiS^R0M-WW+%+Alr6#?$cVpgK;TRo6IQ#g)S0ERl~bv&7?;Ps;<9o~$Ib2m zh&nw3^J2FM6jzqH0IdkHdj)_tA zvs=s`gkWa+wqKa;DZq*Q-4sopn@$X{n`0gLfnOnWXeXk-7?HnRcS>T+g2(r zD=Uux9gaXL`d4vv!|Jx`)oddIt)0s@l>>$TKoy&)7nHI!Gk)P3>L`Sompo6gP~rBJ z?Jo%w`IfL5be@ZIY$s)l#mNoeE^QTdN_VyE0-b>6E?%4*>|_L+nNI1V zz?@aLyM4r;mNtZAYu0I*ar{e4E3@5&4u2*>%t|L02vJz-OjFsEHELbYpMw}_S9!Re za?8uv>@wF^9y5TPk5IGI%}x|l1zU!>)pQYpr zfeD|a#LV~aLG85KSy;QgxQgvxR{3L4?njjQ>Er=he!FS#Bbyh;E#MzQw6t7a7_B43 zqZwr=Y+2{5j%^^h4Jy*wA4jxIJy&_QpLLn|DZ*0-m14Hi-Yl_f{srvNmwAe+$SyAb zECS@GYwFM>CA=By9#Do>tWhoT1;kvM9FyJCYie$+Vb^G#g<06@?VCeUyEbg@TNz-f z=CKa%R&>OUFJU?MPC;AP;w-zqKC3=F z@rf>d?gC$KQNYi(*4dUZ&d6fUXZiOqg+<*;Av=fdE#VOn^I8l(uVWwh5EDtU&I;Qm zK`}z!%c(|RNjW=z$tlZ=%uPN)*s5;jJ)a$UI}^g}TpSK^+1?>Leux}j^Pgi9DaQHa zImd?X1)8$%-q!jA66~4+U<|Lyg+X6xC@@|#FxtV~| zK(#K>VDpSSvi#1#+WjTY}Y7CUGYCtc`{i)ofQNVv;_XM3=mM z#3bSF#SY#pB9`cnuw805Q61RNCZMbXmsPRTKDRG-=*GezOltqG7#&Lx8r0c@DSUB& z9lp56!lsjaxEF?Ex~saGu3Id3ChF`=<9!e3rcnG8Ll}l>F6?TW&IpI43QaSdbsml$ zYDq#bTyHh4i?XeuQ z>!nqZUOl77OzhkOWry;viDx5niv5eIsax*&@$4R>b92&Z&}`R;jq$jUXPpQ4rtvf) z(kkYqZwUuDV%k!+PrInO{O*xqej~683iH@4$_TRC)FSzvkHBSV^^e&6 zhQ_|a($Ib2viy8@6QL}h-95^!a5un(Tm<)}4<2P@wskpWar^OaB)|CxncfsO^wO0* z%+1P;t-!o0oT%Zo8rFyxl;yEZx1UT2g_uHm)3jtMIJ-TOAtbx949{-Z^njgcSGFJD z3dKE?k~=#Hr@)S=*%m9CRux99R@OzUMVb*?9~3~s6IWVdPI@=8N9W{?!lBs?#2rHBf#!{dkeM2KR4gm!*VUN5h2UE0~vBjt|O0G zeQieYyzYSK=ChrMoc3wXp@>+}Ekt%+$sAeKodw}A1YOV_(D1B`9l5v9Lykn$ZdDfR z@%hfe*{!T&x}8zBosjL^qhtpmF6!}!T9Tsp{$Nd8Gvk2h(Bs&p)2a%*cI})i;UREt z83S&wu9FbDuv^gVB$u5Cvpi>)yT^ONsR+BcTd?d>17_aGP}$M5vpn+Ug*IDUyPSck z_;EI+1p6{72vO6s|lL%1HbMyJ+drf}j^k2~8hc?ut(W}F)d7YEabE6K+wJux;NcKXZi;^3oB_9U5&@d3OAxUkT- zi0ws|M;N_&>)|}aLp{7lKpvGaqlo*qdEa)Hl@M7JlUT>-irT+suVV@uYJ(ffR=K+EzX4ee zWDyX?p0N@Z}7IekKylOn`e>{)o5^Q_zxi}KTm%Td%nK{$i6oB|0%+iP_YMlh9qtW+dMMl~r$=0(pa)(l2_26)DL`XMdg%IcQj zGWKV4u%4}A*9RH(F{xoS6e`uOp7HG3BV)I1x zxhcp&H)js}G-vj#gAWdO9nW;n9B$kto*AB*o>?Bot*{!%8X$E*_P@k4*E5d{_b`Tq zwLsPZsQ}Vw7cE)H7`TursAUnOkk_o8o-D$N<4h`*ky}}t_^jodLW=!7|-^b%eahBtz zt)5CC2c#fr@+@W~O`av5r9hg2v_)u|JS#l=Me`?lsy!=#v;b*kL`^X{9yx!L?eHnN z$*=b8AEUX}Qx9Z4kOS?4W#K#{dmGbhiqSj3(;V_+E>N52KvoWiYRbd-;@Ei$bGmwMrf+ehCLTzv4{JQ;Ol0DAD$l2QYs5~cm zP6l!)5F3fVOt(VA)O920FMq(%e#vu&=S_I1 z7M9N0p6$Z-o=b%97Bi;t`r77T!@{O&_W#ypH3;3%iX3cgbgd10{A7K68S)r~Ir1>A zV}TqG1kYj)x;rjcdLsf7McCl-L9O-)R8$1S(qdL0m)NVGme0Q=E zZWq4u3=qBpavbLCc$-muVU^#9>7C1r5~hpvJmh(p&D2=!M8=mG*2LIq*yeczU0S;0 zA^-B2=YQ-KRT?R5^E{5|>4>^5`$^B!?LG7vASby^`+02IFL-tUVU6(A_NM)^=e6iY z%Dv`!!)6RGry!l#;pfD(=XX5s$BBN3qNkx~V^)mlr=Bmkaoaqfc|Hen29PtidA{^~ z1>`IsXFD##bw&}gO4hU@gMCq5>!ipJlOm&g8X~S0e(?Ok_y;{ddVcc!?D@s>tLHb* z??BE0!YcYaAm;A9GZO)~zSE%?97fm|6PVhz)h zRSeULUM=hntY6WEbgt4d}udUn0&B{JG9LzpbU?&&VO`+t9?wDTB2R zuO0Slw(?_NvqV@2N{)<3n@M$_KHEx39f2v4n~p>iARlF(`L?P=wibn_285_G67rWE@4?w~GCNT*9msJFjvj2a7wg z_8<-shXT12$YuDEu^6$Bm)b?;Nt8y3W7_jF7RVKLfs41RI6lUsi<87D?XryB=&E+v z8RE=X**W68NFNay*U>fMJ|gbP?bt5vCC(Qa-4Sy^HvqXg(ow`bu^_tjiwnd;AlCu8 z9xa4Su8}GTjX_QGqkDu{D((}bULpE{+z8|*HtI3ukgpDDGT(&%xMf`|E{oAzF0z^A zEkJH{6OM4T#v~lDHvYfHiakUN0f3FIyy zcLTWx$h|=B19Cr*2Y@_y73+>o+&*Hn*dn%yZQ^?Dq}U}NLP|Yh1qdra2!e0~2uEUv zMW50)hYkH}n;PpHT3a06VaBS*I^y6kk!#4nxPKLF$!~0CvpdG7IUq#g$~ta{5IHBf zhAlK0g{{>$L|t6b*1%|T*#l1J?cVaxm;RbH!F8;+PART#X~jdB`XEcHoyaxxdk&jp z);bDokCsh@*(8;X`fk4trN^=`Fx1~e*>isl=Z>8k=Ckp(rLdu-uAwfHSC+e&zlWzj zuF;PakHNlnlz23dhk-n@O*~dS4#=ZGo<}>4iSe_s=}z)HOrrQ?u^Y4P5DB+1JbL*{Wm|l+D#ECwHWbjAM|D`|G52t|iVBFJ_Z) z@qFp3*e!1B=4$*qV=mpqg0o5lSU z9usocR5w>Mj@;%7c8xf=VnJPPkXz-!!6IX;*E?BQ@l>RUSz)M1t2d1d1kb zEx~F}L}u$!<%Ty{LLDW{m74(z@A#ONXb2^mC%1eJ^Ai!4-2a$_HxayB&IFXe$%#^3 z`@bc=(>|2G3*=pQDE)vCrV)!AD zkJ97mt(_k;IQ}U9(gE)`2k#SykT+%_DE=ieKG`e8zr}yVev!{XLK$fcyyLCm_E9`OR^wYeN<79K}ah zs=o&5`O9%!Dhh6NC$ZBz$c1KUj>y2Nj9Gtb-rN~eXU&=(nO5uq=fWtcQt8oN~*1LS8Qzpx#rP-A2y>`g&7MQ*5J{I|~9 zN!nE!&zPCRq{@u9c02nfv`NGM;-)@HnjD+hG--NtVzbzqPhy)ge?&1SOY@}Nq}{o{ zfw2Gn0`jLrj=Z>g4l90SiulaQ=<@HBpBLV6^PDZQ9h~h_j+860U7UY_{0o%ZF6B!F z(gL6aCqdTZo;oXVu{qQMNFa%F#&s02Lw>$Wo=WjCotBN?Ir_k`_x#q@_ST zKt-SuP#LHKRJ}}EF0F8hkb!DI_jYNJ_hEZo*j4_2%7iTKk4(sWN^7NhX`R#{HA+p= z0aCNnBDG3w(t7DYX@j&;I!M|iZI%v}4v`L(Yzd^pq{F2nq$8!Hq@$%{q+_Myq~oO% zq!Xo+q?4snq*JBSq|>D{q%)H63(pG7kv|YMHx>UMM zx?H*fs1DQs>IG^7wSe{l+8byepnZY%1KJe3d|Q&Sks8C@MLJvY9Z7 zO3ra?DsD>2Idjd3n^Fp$w`Ru6C^_e?X;BSI&QWU$)oDkNfI*1{(k-kBw6u0sdLPc_zoX|%49)UA0dZ&ao(obE< zZyJO)(ys`dNn^rl4?i|fN@yeffv72-h<7k=9HEW$Hv*)15ZwU)vz3U@M&=MH1(Qia zq$GqkGLK*>s0I>(#S_}dBBEsK50Vnv$O^)xpovJBsRV>JvW`G$lNRJAAX$-3gh{5T>gHAM#k#POJ7(5PZnH zB1+l>AD!@N$P*DQ(}tdqPeY!9P$`^cY~^+2(~zelKzc7Ex<=0+&%zYadp(jYg~-#P z4o|V=xrpASC$-p`ou_iK@1v7G#g=!+qi$win_mtX71rKsMG*E;e(2+$L+b>vD!io0H! z^p>`~2&GfsWRALuDlbLxG;R#bD^ReDH-_c?P`Q%}wb8k%LHSN@t+Kr`Imj+`%Bz4D zMYb_y=ap3F3-0jtpIk4m3++&{3zAVA$+3~ST;T&`c6}*vph0;9caMCKyotLXXqnA6 zUCM#7eYZ;7cS{bO>NjUMPzJUeC?5v2B5KQ2K2kmkgUtc01nPIf$%P}hzDB-QzD~YgzCpfGzDd4WzD2%OzD>SezC*rK zzDvGazDK@SzE8eien5Uuen@^;enfs$eoX$K{J8vt{G|Mp{IvXx{H*+({Ji{vyhDCb zeo1~=enoy&eocN|enWm!eoKB^en);+eoua1{y_dv{z(2<{zU#%{!IQ{{zCpz{!0E@ z{zm>*{!ad0{z3jx{z?8>{zd*({!RW}{zLv#{!9K_{zv{-;S{1!g;xZ{qlk*6$cmz< zil*p_p?DQjv6Nm)Z>5jYSLvtpR|Y5pl|jm2Wr#9V*-6=18Kz_@!<7-rNM)2VS{b8^ zRmLg1D7z}-l?lp3Ws)*knW9WprYX~v8Ols$mNHwJqs&$2DZ44VD|;w=DtjsOm24$P z$yM?cpOUW>C<~NAWpAZODOO69Ql(5OSN2gV6u(lbR4EIUMap7jiLz8#rYu)hDEle_ zWk01_S*g@0wMtM~rL0!gD0RyI%37sfS*J88jY^YpfYPkAD6LAHvR*k**`RDx4pKHL zo0Wr=LzF`mTLI-T<#6Q)>e-4|#8=zc(}fvyBv1GE-s5Gd>4tAVZoS_gE0plgBF16>ES z0ca!8CZGoZZ3fx`v=wL@(DgtM1iAs}MxX})-2`+q(1U><0`yRzHc$Y{deq@Sj{tfk z(4&AJ4fGhG#{xYL=Y=zT!%2l@ce2Z25W^kJZn0DTnbV?h50 z^l_k10DThZQ$U{v`V7!#fj$THd7v)<-2wDPpf3S^8R#oOUj_Oa(AR;!0rX9vZvlN9 z=sQ5)1^OP)_kn%@^h2N@0sR>0CqO?1`WevAfqnt>OQ2r?{Tk>uK)(h09nkNA{s8nx zpg#fq8R#!Se+Bv*(BFan0rXFxe*yg)=s!UJ1)c+*08fGEffs=H051YB0WSlu0IveC z0j~pZ0Ph9f1l|I^7x2A-?*n{a;QImJANT>l4+MS?@PmOL0{l?mcLIKA;D-U91^jT} zM*u$(_))-*27V0iV}TzB{4T)n3jBEBCjdVY_({M|27U_gQ-Plb{B+=F06!D>S-{T* zeh%<+fu9HbZouyj{2svX3H)Bb&j&sm_#EJKfzJcp2Yf#81(ESbaUO#Wiw{!PA?ljL ze5-81ajstn;iy<<%61eP5HG^sNkjz^QZ7TGksXARhEh|UM_EJS6*`7eQ=C(QPD80F z&PhN5F}Bc-&Zo2WpV1kE;%xk<7fMZWw)N8prKUI=`I#PED9&Dfh6fjlv!mZr2N#O7 z%b)SVh2m`WcRiGv5Ititw zIHv*Wgi=#NPpo2&Jlcb^H=k}OHO1L|&n%Rh z;%u#V9!gDd_RSN^#Tdld22U@Pn&Rwjr#h}uoK5PCj;j=B2fEWxYKpVZoK`3`)!9hS zB$S%!>=t(xN=g>KIgNzELraBv}>4j2LoxRhj=uwG6sj1GsXjA~L&O@oG&W>kPh_SFr z!cc0ev#*)f`KIb@VrCdhO?CDxQyhw_&NgIM4n$Qs)!BoKY6+oD;Mh=JsMER5#{}0(5lT(1L+O}^QBl${&!yFR6zxJNHMJ2H(+;JkHh0KUvQTPj8;W)w zN=@B>a$O6hrfx#r3__`?honf1Su`OjcX&FfLW;yfwGqq2t{#C3F}0l{l$v@pN@o^I zO+5~EG7P1ro`_1Fg;G;bL8(kasi~)TC_ty7)YP+3C)4{e>ba;D(=l9{g7{Es>IEno zAM>k|P-^PMsFZ#vHFX=xXAnwFy%ZJF45g-Cfx4s8MpXfw7CX|}`bkeMcf_g=TQd6Hp*|bBcsXI_S z)0-wqZ=R_yBU~nzh0?isroM)#DK2^>yuXt8=9&5?Vr6>$Dl!*LcJoYq2Vqm3_9x6@ z!kcI6`v{fd#5f^TB!|gwo~a*orRbgBJX1gIN`BM0d8U4Wz?n2AJUj2~=9&66qNcb^ z(ZRfNH_z1X5TGY+o~b_~QV-ocQ-49Q6n9`clnbpDfAdWJ9Z@p%2T5<9sed6{iu+3m zGnL@xnffmRrRZO=S71_wQqw3xbw8Av=INwtaal?l{Y;Y)C`B`92MT%1RH4)~4Z+g7 zq89}g6G~0DkkwE+m#%}{FEU<66~a-Xvi;KYe{ zy?lHqHEkyZOfjsr*L##>OMHJ(%R;Oay+0vV{QX63B%-7kdJ>|f5lT%PlacN4VkEMN zVUXcSke-0U*&klzX}e?$nEIxrHUR-sc&hfIhx$#jo0i(-jBJ?xO-pSW0(bkSr8W~` zx_Z-6n}gbEwX+mAEw$YcCGDG*Y}FoSRSvVy#V+8rJrOM}mxui*(H?-7ol%B5x%#Q) zAzG%ME8*2otpK4?%vRc)C6=w@)lY421nAjNYFY_m{`aBOv~o-*#RM>XcQUh3YMLKW zQ_SCzqsE0&(-tC94~J6ImLTYVc~@6kjwy60!W6c#sN1wrwDs}99JBx?l46||b?-Iq zLU72xI>~ehrKYV!*zQG2>Liq!7Q`e{jPuEJ9x+c$C^c;j0(L3%Qj{rJ@E<4QTgQlD z+FHc#UW-W`N=<9P6w+hwPf?i(L#b&8Aa<8R!9-anVJJ1N6`{M-(4(QoG8?6Ne_uNs zlj@#>h)t?PC^hXUOrlHuIj$butL(9%)U;y}zvrGHXeacPN!;jQ9YU#TCu0&_^7avv zBnYLZorbVoYBy0GIIU1>+L@Tt|9vPm?Ho+^|2CAGc0Q*0--c4tF2Xdr=TMS|Qq#5~ z`hO8hO}nHgkl6yZJ-V#BWW5}byVpk&gi_P4Lf9^ipOLPYB$S$VZI9(RI+U7rLyws_ zE|i*fGa{$hzlfT;WfV$HyDfc7#6Gr48A?sNGsFBwYOIsTVcIZZTGiq>xc4HSLA&n=Wci8mX_2q13dOFs1Z5NKC<^<`oV++nVYSN=l$!PxYIZS{n)WR!_gpA7?FYo`R&=iTP-@!G2$?D>Qlwobi}sd0l$!P%g7;)7 zHSJGC?BP&q+CK=|)1lOKf~ei9toTrBy3p;6CJ3dbONiRNm|ICgsp%?0_jo8Z-9Xs? zI+U7jVJiP&C^fwgCh%W{Qq%h*diSa|I^J1SC^dZ$LZ@;X-3X9}B49iH^J$6E?79`(_v*0oS-`Z(0hG?bb?9<|~kRmEi#qi01u_|PYzN@@av zXz{6?i$^g7=rd3}lTd2d5>r<>=SzjQRf`;P(T* zdb@s;ezSfH@GF6@0lt>8Lx*^?yG^L#Z*18RD*fG9`g?$95zpMb+^;{zNLBO)^au5a z^oR9F^hbeT1w8ABYk;oNv;JR9V;;Ba(v#>3wfY;I*t;B4Sc$``{J&7110H-;caMu~;hH#%`_Vx@8!KYMi( zV?mEiX=7F{3l_2(16r{pqbX`!EM!fOf(lveW%Np~0KYE6#h^QzB%OPukp5dpH}kdr zw*HR(uKu3>zW#y!q5hHnvHpqvss5S%x&DRzrT&%vwf>F%t^S?Zng~r}Skx^`v z7^Oy;QEu#GR2Y7v(x@^P8jFm@#u8(xvCLR*tT6U90>*wuwXxEuF=~yVvC3F&tTF10 z{f)Ioy|K<{FdB^};{c=CXfaxiHe>DaNVBX~yZs8OE8$S;pDMImWrhdB*w11;&NOMaIR( z7GtZi&Dd^SVq9umW?XJuVO(ikWn67sV_a)oXIyXGVBBcjWZZ1rV%%!nX54PvVccol zW!!DtW87=pXWVZ*U_59%WISvw!NI zcs9;#1pXl4HvzvH_=ABz1o%UNw}A)XnSVVTc;;V^1pX-Cj|To2;Ex5Kxzgi-KLPj? zfjK%_W^%D@DBk0An*?X|1j{60M9(eW5E9p_{V`~26+vRVRsPr0AWuM_5xu(2-zUyfRGD99tb`V@nQLKz6< zAh0+~6(IOQs05)3goPk10%0)-OF&o(!m@~WGoHaI+^~2Km&8mn#`7rLuY+(D(~R*V ziVTPsiDjBGUO}Oe9fXoH%^0tv!jO1{j!ZMgTPT=-LdL(OB#J`DcsGd{+lY@A`vAo< zVwy2NMwRrKW{l5JHVvj3<4Y9GlxfEJ2DLI|nlZjdrJiD%F@8d{jG1PPUs0{=Of$wG zsGkwVzVSDzq%+Dpi!(2W;$2~y@$#sd3Db;MM2$|EX1odtrNcDi)l*4E6RUVll#C_+ zNXRtf?Tvz&GR=7Vq1Nyi$`E!rJEySJ7hs3d<%NO5lI#MXOMl=Uh~nuI(|CuVbY{df z-knh=G2u)>MOh?*xOaGRxfuJ6Lb>#aX}n`mHWe|AcUP3ih?vGZv7-t*0?w;qZz1wn z>~%)0QCLHnaPwwHe^gcM3q*cq>sfL+TjsB2?-MGmLj>TrpBH!+2Mq za0-SR?|vu{#iEjcI>uXrGMQ1wcvqoLO6nMI9g3u)j`7x`L{v*iL>=R8M4_mb5P6K7 zlsd-Sj4DxuX0KA>sbjotC=rjGG$ii2+Nt{tdjyoaD{lnLCdM)Lr8 z0fnMSUi|FoR#^oLts96DJ0dQdsi|YUN5`QirjGF*hhkBNNi>(NuIW-m6n27OIWd*JR%7P$8zaQ_#V9Z$#btT2_200%}}Y+qJ#0i zlSnuT9gO#V6z+@;#`_UUbwUT@{S<{Vp@Z>$fg0)2!Faz$*^KC5yx*Zpx^yt!A3Kqb zp@Z@Mk~Hfb=wQ6RqiouAFy6mVJX89Iq;xRee-SRl1I`Zg525RosRNOj6j4*Wo9}>H zfKOxDI)tsD;sTMG9>hv74#t!bHpL6zgs?7AKujPqQ$wf}ciIy|MRJ&& z4#xC$rRbf~!I-_elHWAwV9dS!P4SJhyshD zgE8l%4S_8?5gm-V8$xuC4#wOQG19K`aKBF&IL^#QsBY51n0W}2;+c4RHKAELr-LyI z5HQ8C77G|Ipz{c!LpVEgZ^TN``x9ct)4`Y}h>~LHNr;jL9gJC?k?lICgE9RX1E!{f zF&83W3QyHu^iaP^Mh9aq$;gK3)4`a_5xCoQFlGQ@x=IIQu0-v$+F1%Z7&C|{Jw*p& zu0gc4Tpo7JM0)_{+Ke(3?ut28g~gSHr4hm+_YA~rK(tIfS3)`%^8kcOF0rzg5Vm`451r7#m?vWrDaQHaIggkph7QI&4FS7E9}#7W zAlrVmQpgx7V{FsPVbdzGVI6`;|?KL&C3yeMOrU^lcC4*s3Z-!YF>p&b*~f0 zCe?wm#k>}i=u)4H)Oc(Xv6L<54T#@!lr84XJ!O)llr83Mm_(O6a>OJFC|k@s5w=V1 zCaMFcMcHEBgGv40Q?{7*W4iyhlr82%nC5>=*`w_B&TdKA4l~6g0jVYswa>W zQMQ=RB69coNCL_h^96+M()bzadPyi-%$IsB$I+B6=Bqts;yB6{^9@8!vBwZKy~>EP z#e6$`OJqRVV!oGQej_#331y481kux51|&vLPT67!h~3S8n3A%^k}!oXQ?^(ts&|93#WE0}Cn#Gi z3z51)*<$rU%`Q^5Sp8AC=O|mOL5SBa7La(#7HcR%rea@+w9CvXTdZLS-jkFq)(AxG zVagV3G=latWs5ZqQM*-H@susr_-E*8r`65v2qdozoBfg@)0|ZYax!?ghihS zPoAwpl#V0K=pfyJvc)P!wXRXNSY@c2DP@aQfm(5l7;zbmqinINP$g~37He_m;!zYG z)-n{>Eoj7d2Ab+PyUz^>_20wHlQ>p$|zw*<$UVMxx0mTdZ}c z9A}Yunv;%{Emjk%Wk}g#wV+b^lr7f!G~}b(!$y>kYoPIEiKT3@Hls=>Hc!+d4n@6m zC|j()Pt8TN|6_O~yltx|-_Ny2gf<$qxU-r0S-o$+?XUt-*%YmdPxQU_CNfOfF{{ zs~VfvwM?!nM-aaYGKn!*IKM;*v**tpe|Y>>2qeeMcr;)%IH|E zOF*dEYF!2bV+L8lih^_%{?_KYhSmG7Yiy`v*$uR`HV0c;T)nHU>lw?4b&YkcbsY$+ zKv)gJn(fvN){PQ-D_#e};Xo-GYYIclu4%1XU)Q>^BG}Z}+?w0CuBkqVrS`Y2TUXt@ zF_e%$xOyEHn_aE$piT_+>hM!#U2A>Np~JB5;C5VM-D%xr-EG|i!u}u}074rG z2Ss>etoyBpG1m`R4_Xg_uoi@R5Y}zC9?at#R`AXK3TUw_~d5KV7@QAzZzIfF7U~>zrM&{kjnqM~ZL;ayzHrmolk7mli_ zuWo5sUh23W2z(8BH6d$BWi%Bvnm&WFocG`KEnl*gHNcO$I?w}w`bya?S_39S(ckBZr z=Z^^$zL#Oyk}Wf;qT;riy4vc3=IVyp;KZE9`q~|1#*W)%X(*QsOlNWEV-PVgt!?9W zWw4=j=!+93s*@&9VaYE-C&3EkWW(Q7T@%ci!dbknGPr5e-TZBvwk!Ml%vnq@ua5Pi z#s=13w#~#Z=gbXQy`zj$8#33ndAlb3K46)p{-WT z2t`hkK?OmQAS!|+k(@)3Ls5VPMI=d*tmGg;lq4b`N|Y?9Mb45H5hREpA{p*u_qqS> zIJd`mnq%+%tu^O6=eNh{9^KZ6cr&7LM6-z25#1tsMD&Rm9WgFqe#F-iYa+h=Z@^DP z{P)lB(*OMVKYv7Q8j_^nK~eD-}O?>;xHt$pa(zQ@t&j0?^|9wNQ zN8k209rBX@6@4>$TlB5yfBy5T_aBnvJ#YJvBm;&d8MrX|PDJkLd(rnJa{HSOB^l&z zG&Cg1;2}xg|M&lhsQd3vK%W2oU9I1*hoATV&zF?tO9lS-lm7iuz25!e|Nq~t-}&EP zW#7zODaC)|IT({Pq>f1zllsl}v>N%h}=+Ru+k8Or z|9@;yOs1IF2K9Ptz!C*(6hdb5P=r#H!$?!srU7l}LLc5^2tygpXvQ*uk66n-PVonS za)X=P;x>1=&%+Q>rNz%)s&r%^Be7&58#&0uv(%$Ib2uA9YW1hiP8sUcg4U=vbvwH8 z4(d%kit$WiIty6KdaQHmO>AKs-?5V)NaQsC-uDZ?a)C=+Q(mv#I2vIVKl0lRVqMkzkQCYDAQMr+6 zlp3R6q!C@wXOvk+tz;KJVMbBEaS?Nhy22mmHR>J@coc%G{Cbm=QnY3QcEN8{h%uX( zOqfATHgckl7F@+h-X90^?!fLj&gYWqfb4jn4^m<9J zhxBLoh4WnGGJo)A2pP;hgIY4kF@u^j%)xmXc88F0Da%;SN>=d=YgoqyHX_H2Tk%{* zb!1dWMs;LVM@DsIR7b`G$TZ^-j&YomoaQX*%lIqu&Zx$Wm$-sDGpaM=4Q_Ia+uX%W zGCt&S2$>>ylB6UjC8>!diWt(P-%OdvOjgVzQ%-V|mwXhU5Jf1)vlQofN>Q4!l&1nO z@iMRS8da%A4Qf%FI@F^9jc7tMTF{C(+R~nmbfzoa=uS_1(TBI`M}Gz|h#?GRI3pRw z7{)PykC?<1rZIzA%w{g1@F@#e#1fXWjODCk72mLib!=cGo7u{CcJMtvvWq?J;{b;^ z!ZD6>lGB{!7k=e8E^-OKpPByTFK*%YB-4E!g%BHo_c1m(em7zx@dFc^o{VJ1Z%AxT z@{o^$6yX_)Q}1R$b|&T!>$fS^3}P1||JW~(eXQTASh>fpMdq=akaz5N$T~I= zImhlt#<55F37N)T;Sc{WM-;IXM(>&RoLR4#U!w&b@Y|JHt(n!Bc^s?w7WZZLJCfOK zGMi17CrLwEoR{T!O5vO=&dK7OETfo+9r|&b3D~8aQ<=*rs3qqQB=R>8c^pD6b>xy^u7XtMP4tw@^SSI` zuKs+?0v4g4T+Ya~8|UP5PA=!<&P`#8Qj-QWVhE!di@D@Bm)y&+54r6_Zu^iYC34A= zfnvNs8RFNvz`w4r>C9q^t05UJ`H)B!Mu-m?&&Z15_LR%mR~~1mxM^7@O(Z!*4M=+=2 z@+uKaZt~Kc_H<+sE6{g|tJuL3w?lZo0#&dJ&yT{n&#U$Mef)%4OBSR!&tqOC&8wt& zm6UPGL%6?``IO2=4&v#<+sLQXx5%f|4ep_*7wp0dm3W2WjORmsKt}$9)=)YlInht) zIJ)5M((|z^rOl`GKghdG1TE2P8CjN*VHp{gkztufA(WM0*_6~nE@geU>^G>Z>=yra zI}0)^mye#j%X=K=G-pF7@3ZpuxcqadxxAXotGWC|uJcz2FWSo&Yhq3>&Oz=kE(xK+ zlcXUn-RR3Z{K!GvQ?UTfu2=$dsA#?w&9|aiRn)^vfd;7krPeIqOIC#N?|-mSDF$<> z)QV2XtCAU2l4YeMA-pW>mupaq)of;K2(Re<6}`WrzgN`s$`FR4u2)W?|5uAr3Uhna zXRnS$|F51w|F2#Op|Z~_SHxT^PhdJT`5m*bd^3dCH1XQ&RLAG9eZr^QLXTE4RPlM0 zI+#_J&soJcAymykc5*V1k&NPJ++X!d2(QV|mpX2DW zy4@1=cWbDt#&&kIH-wt*t?AyH?yc!N zHGQY1@6_DEzn{Cmmi%kwMgFyV;2o>gALrC^PA%ViBQtq;8uh=S{x{VB#xCs68%IN^ zU7T{fi2bQOi`iV~F84!t)BNA8PeT^768YBg`&P&ATOGe|b>vW|JaVYhnYVCfodtZ! z3QlsK3nA1^OD5D<*E4kuv91}`ji(QO2kOpY5lcA28O*KjqY&!JvR+EuS+6pluP3W| zvZ^PmdhV;YjtyKyKlT0!p?)@GR=)r>X+R@9SARGoSF~9~!oz10(ntJvX$+4VU6KtD&82 zD1(ObYj`_^M(N3j{2HmJQBKUdk(wH{pfzo2PXcOeq{l{s7|%3TBcDbm_!WINy2(G> z38Aq(8vAbJ&rna}&HRAxH1>Iuyy&BenKf~KlOc@5d)#CqQ<#J2n|Qv7T${+Xi94HY zVhhL6Qv(_)x=(|RbUQoIU(-|k#%0viR9#KYvgzXxnk69_YHAjVdYYx@Ddf?t zC}pWgC0?N_e!H60qBhNFM^E0O7kzjinK!e8%|0kY@6NX0gpmxuKwnkDa=bWp*L^SkM|ga8k&E?V!p=Qn*WNN zn!BU<6|RNQ;z{Jw!n@hRuD6IH9a+giZt^0h7SB?Gmyu74Ml{9#wlKpM@6ewC3`RaJ z%(I1gwlL2YllT}tx7fuk^wjcc3Q?QpIICqF+S7?H$h@WVTROkxK%C!F-Yq9$Pg+i8 z7IJSX_m=Zn%33zEnXS0HOtz^n*;u2%M- zmCReoyw$ttx0QNY4P^vVkbkS0=(p8e%%at2EW*3hN?ol^^B{!Q=_yS$^w@d=o@?#7 z*7|5|e_Ai*bH3$AcC(Lz9Kn2BUqJ1x@1XA1vW+v}xMZXxHD(-_m4X!E8H!Vq7kHJ* zyoUE6u0G}*XU=ggXpIcwy3+$$#FmYYqBb2E%51)7 zD}RU3PTlQtQ=YoCq8%Nv!ut2fNYXDdgFqC}q)S2Yq(XX9sFA7(Er~-O9p%x{c^zfY@m&Tm z7@2k)!6?SCfeRsY%77X>*~3or*~BjPa)86Ar;~a*okO0TE~B1Ke`2>fM;(MRXoA#}-tI=Yxam#)mf^Ii5~hFyN;JinpGE`M{2f4CDuSG(Rd z8G7uR68&{G@2;_Ar8woN${W9j=9EE1fVVEzf_pJ4t8lbMG3CzyYN`6rlv!a|myr-bEv#W$>F19mxK zE8p=wiKsJSKZp3Y-s7C&Ea%XBg5DGKo}l*xy(j2BLGKCoco_V*7kCo;(A_R{PeT+j z*t72D)V(sbsLi_!MQ`2J*>gA4J=G5y1&g&(=-uZZj=NQOHMq$5tpW){a`uMz$v-*@}7~`104)&tg zzEQ*?)4t7Vk8JwRXDQ1tpT75a5W?H#c$L=}zzFQp+dpssGk+%odVMD+O=*jB-*MhM zU$YwXe&->NL+DqFhBRg}^5|zC{;MOQpE>k99m2b6d^b0F=|FdSvXb?D%U|5bx&7wOR1~!K9UUu?RkZ$zl9d@!0Jq&QifTGB6fX@dEVhA!Fa0z=mFelC!SeVvyrYoPb z3h&6kpD@>fzlJakihA-ln99UPAg2g`8qIF=x%!Rj8o7MTv-g*_Q8lfmj3 ztd7BFxyns$ahrP~42d8K$w*0R^f^R5LmJSSX0)ISZ_|(d3_!+1hA@;5nZ#6P@G*0- zlS4jZSqMYNGJ%OqVLG#z!zcLO&_$TX(0{+bf>o?$9rk7DX1uFIy$?g}?@;{?)$dUK z4n52|sBLILdKOah7xZ z#_wF=8aMcx+t{sP4?|F582%*5uwTQ|5JftkB9^S=AU97_fWj2xIiBYQ%JL#F@d~f; zIyHEMI@G5TO=&@E+R}l}#M7O(=)*hoXCOlu#z;mpjt`l{RA%robD7Tq7PFKuS;^O| zVLcn!!ghA@BfHthL5}bfCpp6}oaZ8!`Gf1+c!H#)AT?=;Ap@DnLUwYI zm;4l>D9=)YQk0=Q6?vJ;RHZt#c$0cGq%m>GbA-W;7>|5L$Y6xtN33EGN72`aM>KGeE40;)>m$7;o+ZeMLYZhZiGl?mzU>zIyh09zGVVqjVrROPL;&rN{ zj&U*^*NchFLQms7Kkhqra*^x&6~cJ^jCaO(J2u`qFbgE^wV76_Yh{J;VClFjCORu_hr`hH6Pe6W&hkqLv$K&8@5=11^rkOgvysg_ z4q=YE=A@!3Z{i)8^C9+r&MfphN56A^4`FUD%xSK?=C&oC1QxM^ulS4m=zCsi?BKjt z_<)a?#9^E}PpzM1AunqEq!;fo5cB%Pygo6n`7)lL1NYBwNk=-fhAnL4Q3#(VM?Rmv z$ZP27)At#}IQDa#lOcST9vOY++|TUUXI;?GXJ7CQ&i?EV?8;~6v*2aqy`Uyb(CY$O zE|B3u87`FJ!fMEGVSVNxmxaFTkFZExi&E1bnJwzUx9s5i5Ekd8AcgVSVtc%}KWbjA z=EZ7WQiAeSz+NtyhB+;{gxr_>9m3~t(1<3iWh>i5Sek+CxM%5GID6>;%wehdE;ZjT z%<2n0d{KjWEMY0vxy$_!mer#<=CJH@Rw1u1&FD*6ewhnde>s)uAuNwTHp|nX_vLzD zuD|7KTE3gTA*@i>iu~w*#oG+V+*bVi*)jCLvJm=TSrVVG9EG{AJi)L07Q$E0WAGSKAcsTvrU>r;<^^W*DGNea zorFlDuuH2ww>qB9{6Jy|YqIh*`dlOTH6!={JFvzMtg!=Y9)_?s87ZjBo780p`dK@c zP5ACwb*)QFCNksRb?#l~-gUmS&Ue=N&iZJ4zTW-o<-a}w`LExAcWnL65H>hxgL5|c z-iCH`qdV%~p#BZ&|289b=iA)8$8bhsf4;rI?;&g~Pi3lL{u}4=2{(Cwd^gp`@7pH7 zZ=2+>X#{fEB=}7map$J%+~s}Bkawp!V;M@DnFF!_Ua?yN4m{ zcmh4`P~Q&q?XV|1vXPTK6reE0FsB{r-|;5u-O+)r*ts1&>5IE}*qh@!|ku>_?kgIW?R@-mgF zid+)qk|>u%xg<8kToPrI*b22Lwx<(aNx+;Ed-FE$GJwGhWdx%b%LFDeh3U*<4zf

W`=1~Q=Ca>B_PcX`dlGQp{?W*6{}kl0egAAL4(a8P{tuPJo*lABhnmuycNxlX)N<$x%=(a79y-cTsOOM+4y)&I z9-MX9?jEkno7BZQhn;iSIfoaqg0E1|Vf7qV&k>m%k;##Cyo`R1)I_dF^l_vg6PS){ zj;!KaHgS>b{1w8{RHWxAic*Tw=$DY>uwPc}EYUmZN7wIQArI zNXxU7p&a#TK`Z*>tYagP?=fc_`-TIY;1mx-_$e7Fc#g7^rzIWf%qS)@nQeGqev5P+Ra?;+tF*^lPxW9qYf<+pJ9+vElJE@Xc3NJio6(LAe85Lc;!D=BjzgT{ObBO^Vs2;L zeWp0&coAou=|*?teP$N3(es%fk^7me+~Rf!XS4D&`RPJ0`r!L#egCZQ|7=Hoeu_-A zB_6%~thS%k_Osf4k;5-)`9%)Dw4@`Q@%b;#`ej=P=j41&&gbNO?hP8zgz0?3r~Jk> zu7~hz5xld%zQDT-WjNc}&E62sr$t@oGt&?|dp?f2*!%N#_Pl=2>-YTa5Pq}gzm-8= zzr96&2C$mVYz^T;1gX&Xg(}#=3w4;rJnX`SUvcgQwO%Yl3DkP=ea0{j^SWqW7tQN; z8UJ1k_y2A_zxU-GcCwEHAzYHrrHshuQVqO^m-KYWE?k<)$DHMNE{AYA4>G#!+{^at zazFHQc>~|!?8^^AxMEkXn9r3rk@uBmtVOR^WO-GFS7mrrhF2RSzpHIohFq@t?jKR8 z>yIq-MrMBuVi$)w8p5?_C{0;>cFi7N8;zQ;srj0k|9qM1)I>jjE?`jz*JXY^6*9lx z6?a_k%lGW(U6@gXyq#ZB~i<538I)uTCf@UKt#f-gh3nG8MLjG+Y`=)@1m z{-#;|U5RSQ{_n5Z$mS4k>HAi0@}j0&db>3Qb=^A7$q@c2%=47OXa9_5ET{O5iy_?h z`Rx~}$T+4loeTWQjS%ispbD?!^E-3-gq!H`jvnv&{BCXPu$Yys3gMo2?_O52^B%() z$r;>#?@|c&YtfL#tiii^-@ExB1D<=36Ys@?{tRF*$I$0Pxj!`9hvn(cJ9sZ2u3#M- zxEI2sCrL_4D)16L@!d!2dbE{9c8BoTy^r1d*u9T^=dtfR_MOMu`S!DagE#D4G~XPqb~JX$Z}SOh$K&t znn<3bEamA)58RpLQ@-F!j&qLlAtGrSo%5`tUIeSi~VtafSyWB3V+B z5KWK9LNwx zFp9B^XFlsgM5@}jGgTbz@O&znrRqj^%r}*1Qq5o<@=vvpC9Gf->Psb;R2$G+>XhW6 z2qh^^IVw;I`KESn>V`C-IjvBCYPF}1$K9#rpL!-g@HY>693s*rB^{Z`Mo!$9rVvGW zmJ+x-jXTr0GmZPwxF=0p+>^#VY21^h7qUvz4|7fM& zxhR11qh7|YL{-K7qiXRc_8_V??u&9?l>4IG7p1Q#dmPo98EnE_qxBP=m8a1|bScW< z{OH$lZnXJCJ1g25(awl=Mzq?a2k}03En4l-&W)bKXW>8p@Lv7rA2FH#`{)1sb42vQ F{{!E!ey{)l diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress 2.xcscheme b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress 2.xcscheme deleted file mode 100644 index e978aa76a..000000000 --- a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress 2.xcscheme +++ /dev/null @@ -1,59 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress copy.xcscheme b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress copy.xcscheme deleted file mode 100644 index 8018bd6a5..000000000 --- a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress copy.xcscheme +++ /dev/null @@ -1,86 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress.xcscheme b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress.xcscheme deleted file mode 100644 index 15afb2f64..000000000 --- a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress.xcscheme +++ /dev/null @@ -1,86 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/xcschememanagement.plist b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/xcschememanagement.plist deleted file mode 100644 index fb0224c31..000000000 --- a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/xcschememanagement.plist +++ /dev/null @@ -1,42 +0,0 @@ - - - - - SchemeUserState - - monitor_stress 2.xcscheme - - orderHint - 2 - - monitor_stress copy.xcscheme - - orderHint - 1 - - monitor_stress.xcscheme - - orderHint - 0 - - - SuppressBuildableAutocreation - - 15181D941683B73E0002FB18 - - primary - - - 15181DA71683B7550002FB18 - - primary - - - 15181DB21683B8700002FB18 - - primary - - - - - diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress/monitor_stress.m b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress/monitor_stress.m deleted file mode 100644 index 267bb86c5..000000000 --- a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress/monitor_stress.m +++ /dev/null @@ -1,178 +0,0 @@ -#define TARGET_OS_EMBEDDED 1 -#include -#include -#include -#include -#include -#include -#include -#include - -#import - -#include -#include - -#define MAX_THREADS 100 - -char *pname; - -int pid; - -int exit_after = -1; - -int percentage = 95, interval = 600; - -int wakemon_rate = 150; - -int limit = 0; // Worker thread should apply per-thread limit to self? -int limit_period = 5000; - -void usage(void) { - printf("usage: monitor_stress [ -c nthreads ] [ -w nthreads ] \n"); - printf("\t-c: number of CPU usage monitor stress threads to use (default: 2\n"); - printf("\t-w: number of wakeups monitor stress threads to use (default: 0\n"); - printf("\t-e: exit after this many seconds (default: run forever)\n"); - printf("\t-p: act on this pid (default: self)\n"); -} - -void *perthr_limit_thread(void *arg) -{ - int percent = 90, refill_period = 30; // time unit is milliseconds - int err; - int cpupercent; - -top: - cpupercent = percent | (refill_period << 8); - - if ((err = sysctlbyname("kern.setthread_cpupercent", 0, 0, - &cpupercent, sizeof (int))) != 0) { - printf("kern.setthread_cpupercent: error %d\n", err); - exit(1); - } - goto top; -} - -void *cpumon_stress_thread(void *arg) -{ -top: - if (proc_set_cpumon_params(pid, percentage, interval) != 0) { - perror("proc_set_cpumon_params"); - exit(1); - } - if (proc_disable_cpumon(pid) != 0) { - perror("proc_disable_cpumon"); - exit(1); - } - goto top; -} - -void *wakemon_stress_thread(void *arg) -{ -top: - if (proc_set_wakemon_params(pid, wakemon_rate, 0) != 0) { - perror("proc_set_wakemon_params"); - exit(1); - } - if (proc_disable_wakemon(pid) != 0) { - perror("proc_disable_wakemon"); - exit(1); - } - goto top; -} - -void *exit_thread(void *arg) -{ - sleep(exit_after); - printf("...exiting.\n"); - exit(0); - - return (NULL); -} - -int main(int argc, char *argv[]) -{ - int ch; - int i = 0; - int cpumon_threads = 2; - int wakemon_threads = 0; - - pthread_t thr_id; - - pname = basename(argv[0]); - pid = getpid(); - - while ((ch = getopt(argc, argv, "c:w:e:p:h?")) != -1) { - switch (ch) { - case 'c': - cpumon_threads = atoi(optarg); - break; - case 'w': - wakemon_threads = atoi(optarg); - break; - case 'e': - exit_after = atoi(optarg); - break; - case 'p': - pid = atoi(optarg); - break; - case 'h': - default: - usage(); - exit(1); - - } - } - argc -= optind; - argv += optind; - - if (argc != 0) { - usage(); - exit(1); - } - - if ((cpumon_threads <= 0) || (cpumon_threads > MAX_THREADS) || - (wakemon_threads < 0) || (wakemon_threads > MAX_THREADS)) { - printf("%s: %d/%d threads too many (max is %d)\n", pname, - cpumon_threads, wakemon_threads, MAX_THREADS); - exit(1); - } - - printf("%s: creating %d CPU usage monitor stress threads (1 will be main thread), ", pname, cpumon_threads); - if (wakemon_threads > 0) { - printf( "%d wakeups monitor stress threads, ", wakemon_threads); - } - printf("and 1 per-thread CPU limit stress thread.\n"); - - if (pthread_create(&thr_id, NULL, perthr_limit_thread, NULL) != 0) { - perror("pthread_create"); - exit(1); - } - - for (i = 0; i < wakemon_threads; i++) { - if (pthread_create(&thr_id, NULL, wakemon_stress_thread, NULL) != 0) { - perror("pthread_create"); - exit(1); - } - } - - // main thread will be used as stress thread too, so start count at 1 - for (i = 1; i < cpumon_threads; i++) { - if (pthread_create(&thr_id, NULL, cpumon_stress_thread, NULL) != 0) { - perror("pthread_create"); - exit(1); - } - } - - if (exit_after >= 0) { - printf("%s: will exit after %d seconds\n", pname, exit_after); - if (pthread_create(&thr_id, NULL, exit_thread, NULL) != 0) { - perror("pthread_create"); - exit(1); - } - } - - cpumon_stress_thread(NULL); - - return (0); -} diff --git a/tools/tests/unit_tests/pipe_test_10807398_src/child.c b/tools/tests/unit_tests/pipe_test_10807398_src/child.c deleted file mode 100644 index fbd51922c..000000000 --- a/tools/tests/unit_tests/pipe_test_10807398_src/child.c +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include - -int main(int argc, char **argv) -{ - int fd, r; - char buf[32]; - - if (argc != 2) { - fprintf(stderr, "Usage: %s fd\n", argv[0]); - return 1; - } - fd = atoi(argv[1]); - - printf("child read(%d)...\n", fd); - r = read(fd, buf, sizeof buf - 1); - if (r < 0) - perror("read"); - else { - buf[r] = 0; - printf("child read(%d) = \"%s\"\n", fd, buf); - } - close(fd); - printf("child done\n"); - return 0; -} diff --git a/tools/tests/unit_tests/pipe_test_10807398_src/parent.c b/tools/tests/unit_tests/pipe_test_10807398_src/parent.c deleted file mode 100644 index bafaf0277..000000000 --- a/tools/tests/unit_tests/pipe_test_10807398_src/parent.c +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include -#include -#include -#include - -int main(int argc, char **argv) -{ - int fd[2], pid, r; - char *args[3], buf[32]; - struct pollfd pfd; - extern char **environ; - - if (pipe(fd) < 0) { - perror("pipe"); - return 1; - } - - snprintf(buf, sizeof buf, "%d", fd[0]); - - args[0] = "./child"; - args[1] = buf; - args[2] = 0; - - switch (pid = fork()) { - case -1: - perror("fork"); - return 1; - case 0: /* child */ - close(fd[1]); - execve(args[0], args, environ); - perror(args[0]); - _exit(1); - default: /* parent */ - close(fd[0]); - pfd.fd = fd[1]; - pfd.events = POLLOUT; - pfd.revents = 0; - printf("parent poll(%d)...\n", pfd.fd); - errno = 0; - r = poll(&pfd, 1, -1); - printf("parent poll(%d) returned %d errno %d[%s]\n", - pfd.fd, r, errno, strerror(errno)); - write(fd[1], "howdy", 5); - close(fd[1]); - printf("parent done\n"); - } - - return 0; -} diff --git a/tools/tests/unit_tests/pipes_fill_procinfo_11179336.c b/tools/tests/unit_tests/pipes_fill_procinfo_11179336.c deleted file mode 100644 index 18bce49ed..000000000 --- a/tools/tests/unit_tests/pipes_fill_procinfo_11179336.c +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include -#include -#include -#include - -int main(){ - int pipe_fds[2]; - if (pipe(&pipe_fds[0]) < 0) { - perror("pipe"); - goto fail; - } - struct pipe_fdinfo pdinfo; - /* from the headers - int proc_pidfdinfo(int pid, int fd, int flavor, void * buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); - */ - int mypid = getpid(); - int flavor = PROC_PIDFDPIPEINFO; - int nv = proc_pidfdinfo(mypid, pipe_fds[0], flavor, (void *) &pdinfo, sizeof(pdinfo)); - if (nv < 0) { - perror("proc_pidinfo"); - goto fail; - } - printf("handle value = %p \n", (void *)pdinfo.pipeinfo.pipe_handle); - struct stat mystat; - fstat(pipe_fds[0], &mystat); - printf("ino value = %p \n", (void *)mystat.st_ino); - - if ( (uintptr_t)mystat.st_ino == (uintptr_t)pdinfo.pipeinfo.pipe_handle) - goto success; - fail: - printf("[FAILED] fill_pipeinfo returned wrong values. (i.e. pipeinfo->pipe_handle != fstat->st_ino ) \n"); - return -1; - success: - printf("[PASSED] fill_pipeinfo returned correct values. (i.e. pipeinfo->pipe_handle == fstat->st_ino ) \n"); - return 0; -} - diff --git a/tools/tests/unit_tests/ptcwd_test_11269991_src/ptcwd_test_11269991.c b/tools/tests/unit_tests/ptcwd_test_11269991_src/ptcwd_test_11269991.c deleted file mode 100644 index 2f5866590..000000000 --- a/tools/tests/unit_tests/ptcwd_test_11269991_src/ptcwd_test_11269991.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Test program for checking the per-thread current working directories - * are happy. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef SYS___pthread_chdir -#define SYS___pthread_chdir 348 -#endif - -#ifndef SYS___pthread_fchdir -#define SYS___pthread_fchdir 349 -#endif - -/* - * /tmp is a symlink, so use full path for strict compare - */ -#define WORKDIR "/private/tmp/ptwork" -#define WORKDIR1 WORKDIR "/one" -#define WORKDIR2 WORKDIR "/two" - - -int -pthread_chdir_np(char *path) -{ - return syscall(SYS___pthread_chdir, path); -} - -int -pthread_fchdir_np(int fd) -{ - return syscall(SYS___pthread_fchdir, fd); -} - - -/* - * This is a slow routine, just like getcwd(); people should remember that - * they set something, instead of asking us what they told us. - */ -char * -pthread_getcwd_np(char *buf, size_t size) -{ - int fd_cwd; - - /* - * XXX disable compatibility hack, since we have no compatibility - * XXX to protect. - */ - if (buf == NULL) - return (NULL); - - /* - * Open the "current working directory"; if we are running on a per - * thread working directory, that's the one we will get. - */ - if ((fd_cwd = open(".", O_RDONLY)) == -1) - return (NULL); - - /* - * Switch off the per thread current working directory, in case we - * were on one; this fails if we aren't running with one. - */ - if (pthread_fchdir_np( -1) == -1) { - /* We aren't runniing with one... alll done. */ - close (fd_cwd); - return (NULL); - } - - /* - * If we successfully switched off, then we switch back... - * this may fail catastrophically, if we no longer have rights; - * this should never happen, but threads may clobber our fd out - * from under us, etc.. - */ - if (pthread_fchdir_np(fd_cwd) == -1) { - close(fd_cwd); - errno = EBADF; /* sigil for catastrophic failure */ - return (NULL); - } - - /* Close our directory handle */ - close(fd_cwd); - - /* - * And call the regular getcwd(), which will return the per thread - * current working directory instead of the process one. - */ - return getcwd(buf, size); -} - - -int -main(int ac, char *av[]) -{ - char buf[MAXPATHLEN]; - char *p; - - /* - * First, verify that we are NOT using a per thread current working - * directory... - */ - if (pthread_fchdir_np( -1) != -1) { - fprintf(stderr, "FAIL: Started out on PT CWD\n"); - exit(1); - } - - /* Blow the umask to avoid shooting our foot */ - umask(0); /* "always successful" */ - - /* Now set us up the test directories... */ - - if (mkdir(WORKDIR, 0777) == -1 && errno != EEXIST) { - perror("FAIL: mkdir: " WORKDIR); - exit(2); - } - - printf("workdir \"" WORKDIR "\" created\n"); - - if (mkdir(WORKDIR1, 0777) == -1 && errno != EEXIST) { - perror("FAIL: mkdir: " WORKDIR1); - exit(2); - } - - printf("workdir \"" WORKDIR1 "\" created\n"); - - if (mkdir(WORKDIR2, 0777) == -1 && errno != EEXIST) { - perror("FAIL: mkdir: " WORKDIR2); - exit(2); - } - - printf("workdir \"" WORKDIR2 "\" created\n"); - - /* Change the process current working directory to WORKDIR1 */ - - if (chdir(WORKDIR1) == -1) { - perror("FAIL: chdir: \"" WORKDIR1 "\" failed\n"); - exit(3); - } - - printf("process current working directory changed to \"" WORKDIR1 "\"...\n"); - - printf("verifying; getcwd says: \"%s\"\n", getcwd(buf, MAXPATHLEN)); - if (strcmp(WORKDIR1, buf)) { - fprintf(stderr, "FAIL: \"%s\" != \"%s\"\n", WORKDIR1, buf); - exit(3); - } - printf("verified.\n"); - - /* Verify that we don't get an answer for pthread_getcwd_np() */ - - if ((p = pthread_getcwd_np(buf, MAXPATHLEN)) != NULL) { - fprintf(stderr, "FAIL: pthread_getcwd_np should fail, got \"%s\" instead\n", p); - exit(4); - } - - printf("Good so far: pthread_getcwd_np() got no answer (correct)\n"); - - if (pthread_chdir_np(WORKDIR2) == -1) { - perror("FAIL: pthread_chdir_np: " WORKDIR2); - exit(5); - } - - printf("Set per thread current working directory to \"" WORKDIR2"\"\n"); - printf("verifying; getcwd says: \"%s\"\n", getcwd(buf, MAXPATHLEN)); - if (strcmp(WORKDIR2, buf)) { - fprintf(stderr, "FAIL: \"%s\" != \"%s\"\n", WORKDIR2, buf); - exit(3); - } - printf("verified.\n"); - - /* Now verify we get an answer for pthread_getcwd_np() */ - if ((p = pthread_getcwd_np(buf, MAXPATHLEN)) == NULL) { - perror("FAIL: pthread_getcwd_np"); - exit(6); - } - - printf("verifying... pthread_getcwd_np says \"%s\"\n", p); - if (strcmp(WORKDIR2, buf)) { - fprintf(stderr, "FAIL: \"%s\" != \"%s\"\n", WORKDIR2, buf); - exit(7); - } - printf("verified.\n"); - - printf("verifying our old cwd still exists by going of PT CWD...\n"); - if (pthread_fchdir_np(-1) != 0) { - perror("FAIL: pthread_fchdir_np"); - exit(8); - } - printf("off... but are we really off?\n"); - - printf("Check by verifying that pthread_getcwd_np now fails\n"); - if ((p = pthread_getcwd_np(buf, MAXPATHLEN)) != NULL) { - fprintf(stderr, "FAIL: pthread_getcwd_np should fail, got \"%s\" instead\n", p); - exit(9); - } - - printf("verified.\n"); - - printf("One last check: see that getcwd says \"" WORKDIR1 "\" again\n"); - printf("verifying; getcwd says: \"%s\"\n", getcwd(buf, MAXPATHLEN)); - if (strcmp(WORKDIR1, buf)) { - fprintf(stderr, "FAIL: \"%s\" != \"%s\"\n", WORKDIR1, buf); - exit(10); - } - printf("verified.\n"); - - - printf("\nPASS: testing was successful\n"); - - exit(0); -} diff --git a/tools/tests/unit_tests/ptrace_test_12507045_src/ptrace_test.c b/tools/tests/unit_tests/ptrace_test_12507045_src/ptrace_test.c deleted file mode 100644 index 55728c60c..000000000 --- a/tools/tests/unit_tests/ptrace_test_12507045_src/ptrace_test.c +++ /dev/null @@ -1,749 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * We create a process hierarchy of: - * - * grandparent -> parent -> child - * \ - * \--> debugger - * - * When the debugger calls ptrace(2) on child, it - * is temporarily reparented. - * - * We may also create a hierarchy of: - * - * grandparent -> parent/debugger -> child - * - */ - -typedef enum { - eParentExitAfterWaitpid = 0, - eParentExitAfterWaitpidAndSIGCHLD, - eParentExitBeforeWaitpid, - eParentExitAfterDebuggerAttach, - eParentExitBeforeDebuggerAttach, - eParentIsDebugger -} parent_exit_t; - -typedef enum { - eDebuggerExitAfterKillAndWaitpid = 0, - eDebuggerExitAfterKillWithoutWaitpid, - eDebuggerExitAfterDetach, - eDebuggerExitWithoutDetach -} debugger_exit_t; - -void do_grandparent(pid_t parent, pid_t child, pid_t debugger, debugger_exit_t debugger_exit_time) __attribute__((noreturn)); -void do_parent(pid_t child, pid_t debugger, parent_exit_t parent_exit_time, debugger_exit_t debugger_exit_time) __attribute__((noreturn)); -void do_child(void) __attribute__((noreturn)); -void do_debugger(pid_t child, debugger_exit_t debugger_exit_time) __attribute__((noreturn)); - -bool iszombie(pid_t p); - -char *str_kev_filter(int filter); -char *str_kev_flags(int filter, uint16_t flags); -char *str_kev_fflags(int filter, uint32_t fflags); -char *str_kev_data(int filter, uint32_t fflags, int64_t data, uint64_t udata); -char *print_exit(pid_t p, int stat_loc); - -void logline(const char *format, ...); - -void usage(void); -int test_all_permutations(void); -void test(parent_exit_t parent_exit_time, debugger_exit_t debugger_exit_time) __attribute__((noreturn)); - -int main(int argc, char *argv[]) { - int ch; - - int parent_exit_time = -1; - int debugger_exit_time = -1; - - while ((ch = getopt(argc, argv, "p:w:")) != -1) { - switch (ch) { - case 'p': - parent_exit_time = atoi(optarg); - break; - case 'w': - debugger_exit_time = atoi(optarg); - break; - case '?': - default: - usage(); - } - } - - /* no explicit options, loop through them all */ - if (parent_exit_time == -1 && - debugger_exit_time == -1) { - return test_all_permutations(); - } - - if (parent_exit_time == -1 || - debugger_exit_time == -1) { - usage(); - } - - test((parent_exit_t)parent_exit_time, - (debugger_exit_t)debugger_exit_time); - - return 0; /* never reached */ -} - -void test(parent_exit_t parent_exit_time, debugger_exit_t debugger_exit_time) -{ - pid_t parent, child, debugger; - int ret; - int fds[2]; - - /* pipe for parent to send child pid to grandparent */ - ret = pipe(fds); - if (-1 == ret) { - err(1, "failed to create pipe"); - } - - parent = fork(); - if (parent == 0) { - /* parent sub-branch */ - - ret = close(fds[0]); - if (ret == -1) { - err(1, "close read end of pipe"); - } - - child = fork(); - if (child == 0) { - /* child */ - ret = close(fds[1]); - if (ret == -1) { - err(1, "close write end of pipe"); - } - - do_child(); - } else if (child == -1) { - err(1, "parent failed to fork child"); - } else { - /* parent */ - if (-1 == write(fds[1], &child, sizeof(child))) { - err(1, "writing child pid to grandparent"); - } - - if (parent_exit_time == eParentIsDebugger) { - debugger = -1; - - if (-1 == write(fds[1], &debugger, sizeof(debugger))) { - err(1, "writing debugger pid to grandparent"); - } - ret = close(fds[1]); - if (ret == -1) { - err(1, "close write end of pipe"); - } - - do_debugger(child, debugger_exit_time); - } else { - debugger = fork(); - if (debugger == 0) { - /* debugger */ - ret = close(fds[1]); - if (ret == -1) { - err(1, "close write end of pipe"); - } - - do_debugger(child, debugger_exit_time); - } else if (debugger == -1) { - err(1, "parent failed to fork debugger"); - } else { - /* still parent */ - if (-1 == write(fds[1], &debugger, sizeof(debugger))) { - err(1, "writing debugger pid to grandparent"); - } - ret = close(fds[1]); - if (ret == -1) { - err(1, "close write end of pipe"); - } - - do_parent(child, debugger, parent_exit_time, debugger_exit_time); - } - } - } - } else if (parent == -1) { - err(1, "grandparent failed to fork parent"); - } else { - ret = close(fds[1]); - if (ret == -1) { - err(1, "close write end of pipe"); - } - - if (-1 == read(fds[0], &child, sizeof(child))) { - err(1, "could not read child pid"); - } - - if (-1 == read(fds[0], &debugger, sizeof(debugger))) { - err(1, "could not read debugger pid"); - } - - ret = close(fds[0]); - if (ret == -1) { - err(1, "close read end of pipe"); - } - - do_grandparent(parent, child, debugger, debugger_exit_time); - } -} - -void usage(void) -{ - errx(1, "Usage: %s [-p -w ]", getprogname()); -} - -int test_all_permutations(void) -{ - int p, w; - bool has_failure = false; - - for (p = 0; p <= 5; p++) { - for (w = 0; w <= 3; w++) { - int testpid; - int ret; - - testpid = fork(); - if (testpid == 0) { - logline("-------------------------------------------------------"); - logline("*** Executing self-test: %s -p %d -w %d", - getprogname(), p, w); - test((parent_exit_t)p, - (debugger_exit_t)w); - _exit(1); /* never reached */ - } else if (testpid == -1) { - err(1, "failed to fork test pid"); - } else { - int stat_loc; - - ret = waitpid(testpid, &stat_loc, 0); - if (ret == -1) - err(1, "waitpid(%d) by test harness failed", testpid); - - logline("test process: %s", print_exit(testpid, stat_loc)); - if (!WIFEXITED(stat_loc) || (0 != WEXITSTATUS(stat_loc))) { - logline("FAILED TEST"); - has_failure = true; - } - } - } - } - - if (has_failure) { - logline("test failures found"); - return 1; - } - - return 0; -} - -void do_grandparent(pid_t parent, pid_t child, pid_t debugger, debugger_exit_t debugger_exit_time) -{ - pid_t result; - int stat_loc; - int exit_code = 0; - int kq; - int ret; - struct kevent64_s kev; - int neededdeathcount = (debugger != -1) ? 3 : 2; - - setprogname("GRANDPARENT"); - - logline("grandparent pid %d has parent pid %d and child pid %d. waiting for parent process exit...", getpid(), parent, child); - - /* make sure we can at least observe real child's exit */ - kq = kqueue(); - if (kq < 0) - err(1, "kqueue"); - - EV_SET64(&kev, child, EVFILT_PROC, EV_ADD|EV_ENABLE, - NOTE_EXIT, 0, child, 0, 0); - ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); - if (ret == -1) - err(1, "kevent64 EVFILT_PROC"); - - EV_SET64(&kev, parent, EVFILT_PROC, EV_ADD|EV_ENABLE, - NOTE_EXIT, 0, parent, 0, 0); - ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); - if (ret == -1) - err(1, "kevent64 EVFILT_PROC"); - - if (debugger != -1) { - EV_SET64(&kev, debugger, EVFILT_PROC, EV_ADD|EV_ENABLE, - NOTE_EXIT, 0, debugger, 0, 0); - ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); - if (ret == -1) - err(1, "kevent64 EVFILT_PROC"); - } - - EV_SET64(&kev, 5, EVFILT_TIMER, EV_ADD|EV_ENABLE|EV_ONESHOT, - NOTE_SECONDS, 5, 0, 0, 0); - ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); - if (ret == -1) - err(1, "kevent64 EVFILT_TIMER"); - - while(1) { - - ret = kevent64(kq, NULL, 0, &kev, 1, 0, NULL); - if (ret == -1) { - if (errno == EINTR) - continue; - err(1, "kevent64"); - } else if (ret == 0) { - break; - } - - logline("kevent64 returned ident %llu filter %s fflags %s data %s", - kev.ident, str_kev_filter(kev.filter), - str_kev_fflags(kev.filter, kev.fflags), - str_kev_data(kev.filter, kev.fflags, kev.data, kev.udata)); - if (kev.filter == EVFILT_PROC) { - if (child == kev.udata) { - neededdeathcount--; - } else if (parent == kev.udata) { - neededdeathcount--; - } else if ((debugger != -1) && (debugger == kev.udata)) { - neededdeathcount--; - } - } else if (kev.filter == EVFILT_TIMER) { - logline("timed out waiting for NOTE_EXIT"); - exit_code = 1; - break; - } - - if (neededdeathcount == 0) { - break; - } - } - - result = waitpid(parent, &stat_loc, 0); - if (result == -1) - err(1, "waitpid(%d) by grandparent failed", parent); - - - logline("parent process: %s", print_exit(parent, stat_loc)); - if (!WIFEXITED(stat_loc) || (0 != WEXITSTATUS(stat_loc))) { - exit_code = 1; - } - - if (iszombie(parent)) { - logline("parent %d is now a zombie", parent); - exit_code = 1; - } - - if (iszombie(child)) { - logline("child %d is now a zombie", child); - exit_code = 1; - } - - if ((debugger != -1) && iszombie(debugger)) { - logline("debugger %d is now a zombie", debugger); - exit_code = 1; - } - - exit(exit_code); -} - -/* - * debugger will register kevents, wait for quorum on events, then exit - */ -void do_parent(pid_t child, pid_t debugger, parent_exit_t parent_exit_time, debugger_exit_t debugger_exit_time) -{ - int kq; - int ret; - struct kevent64_s kev; - int deathcount = 0; - int childsignalcount = 0; - int stat_loc; - - setprogname("PARENT"); - - logline("parent pid %d has child pid %d and debugger pid %d. waiting for processes to exit...", getpid(), child, debugger); - - kq = kqueue(); - if (kq < 0) - err(1, "kqueue"); - - EV_SET64(&kev, child, EVFILT_PROC, EV_ADD|EV_ENABLE, - NOTE_EXIT|NOTE_EXITSTATUS|NOTE_EXIT_DETAIL|NOTE_FORK|NOTE_EXEC|NOTE_SIGNAL, - 0, child, 0, 0); - ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); - if (ret == -1) - err(1, "kevent64 EVFILT_PROC"); - - EV_SET64(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, - 0, 0, child, 0, 0); - ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); - if (ret == -1) - err(1, "kevent64 EVFILT_SIGNAL"); - - EV_SET64(&kev, 7, EVFILT_TIMER, EV_ADD|EV_ENABLE|EV_ONESHOT, - NOTE_SECONDS, 7, 0, 0, 0); - ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); - if (ret == -1) - err(1, "kevent64 EVFILT_TIMER"); - - while(1) { - ret = kevent64(kq, NULL, 0, &kev, 1, 0, NULL); - if (ret == -1) { - if (errno == EINTR) - continue; - err(1, "kevent64"); - } else if (ret == 0) { - break; - } - - logline("kevent64 returned ident %llu filter %s fflags %s data %s", - kev.ident, str_kev_filter(kev.filter), - str_kev_fflags(kev.filter, kev.fflags), - str_kev_data(kev.filter, kev.fflags, kev.data, kev.udata)); - if (kev.filter == EVFILT_SIGNAL) { - /* must be SIGCHLD */ - deathcount++; - } else if (kev.filter == EVFILT_PROC) { - if (child == kev.udata) { - if ((kev.fflags & (NOTE_EXIT|NOTE_EXITSTATUS)) == (NOTE_EXIT|NOTE_EXITSTATUS)) { - deathcount++; - } else if (kev.fflags & NOTE_SIGNAL) { - childsignalcount++; - if ((parent_exit_time == eParentExitAfterDebuggerAttach) && (childsignalcount >= 2)) { - /* second signal is attach */ - logline("exiting because of eParentExitAfterDebuggerAttach"); - exit(0); - } - } else if (kev.fflags & NOTE_FORK) { - if (parent_exit_time == eParentExitBeforeDebuggerAttach) { - logline("exiting because of eParentExitBeforeDebuggerAttach"); - exit(0); - } - } - } - } else if (kev.filter == EVFILT_TIMER) { - errx(1, "timed out waiting for NOTE_EXIT"); - } - - if (deathcount >= (parent_exit_time == eParentExitAfterWaitpidAndSIGCHLD ? 2 : 1)) { - break; - } - } - - if (parent_exit_time == eParentExitBeforeWaitpid) { - logline("exiting because of eParentExitBeforeWaitpid"); - exit(0); - } - - ret = waitpid(child, &stat_loc, 0); - if (ret == -1) - err(1, "waitpid(%d) by parent failed", child); - - logline("child process: %s", print_exit(child, stat_loc)); - if (!WIFSIGNALED(stat_loc) || (SIGKILL != WTERMSIG(stat_loc))) - errx(1, "child did not exit as expected"); - - ret = waitpid(debugger, &stat_loc, 0); - if (ret == -1) - err(1, "waitpid(%d) by parent failed", debugger); - - logline("debugger process: %s", print_exit(debugger, stat_loc)); - if (!WIFEXITED(stat_loc) || (0 != WEXITSTATUS(stat_loc))) - errx(1, "debugger did not exit as expected"); - - /* Received both SIGCHLD and NOTE_EXIT, as needed */ - logline("exiting beacuse of eParentExitAfterWaitpid/eParentExitAfterWaitpidAndSIGCHLD"); - exit(0); -} - -/* child will spin waiting to be killed by debugger or parent or someone */ -void do_child(void) -{ - pid_t doublechild; - int ret; - setprogname("CHILD"); - - logline("child pid %d. waiting for external termination...", getpid()); - - usleep(500000); - - doublechild = fork(); - if (doublechild == 0) { - exit(0); - } else if (doublechild == -1) { - err(1, "doublechild"); - } else { - ret = waitpid(doublechild, NULL, 0); - if (ret == -1) - err(1, "waitpid(%d) by parent failed", doublechild); - } - - while (1) { - sleep(60); - } -} - -/* - * debugger will register kevents, attach+kill child, wait for quorum on events, - * then exit. - */ -void do_debugger(pid_t child, debugger_exit_t debugger_exit_time) -{ - int kq; - int ret; - struct kevent64_s kev; - int deathcount = 0; - int stat_loc; - - setprogname("DEBUGGER"); - - logline("debugger pid %d has child pid %d. waiting for process exit...", getpid(), child); - - sleep(1); - fprintf(stderr, "\n"); - ret = ptrace(PT_ATTACH, child, 0, 0); - if (ret == -1) - err(1, "ptrace(PT_ATTACH)"); - - ret = waitpid(child, &stat_loc, WUNTRACED); - if (ret == -1) - err(1, "waitpid(child, WUNTRACED)"); - - logline("child process stopped: %s", print_exit(child, stat_loc)); - - if (debugger_exit_time == eDebuggerExitWithoutDetach) { - logline("exiting because of eDebuggerExitWithoutDetach"); - exit(0); - } else if (debugger_exit_time == eDebuggerExitAfterDetach) { - ret = ptrace(PT_DETACH, child, 0, 0); - if (ret == -1) - err(1, "ptrace(PT_DETACH)"); - - ret = kill(child, SIGKILL); - if (ret == -1) - err(1, "kill(SIGKILL)"); - - logline("exiting because of eDebuggerExitAfterDetach"); - exit(0); - } - - kq = kqueue(); - if (kq < 0) - err(1, "kqueue"); - - EV_SET64(&kev, child, EVFILT_PROC, EV_ADD|EV_ENABLE, - NOTE_EXIT|NOTE_EXITSTATUS|NOTE_EXIT_DETAIL|NOTE_FORK|NOTE_EXEC|NOTE_SIGNAL, - 0, child, 0, 0); - ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); - if (ret == -1) - err(1, "kevent64 EVFILT_PROC"); - - EV_SET64(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, - 0, 0, child, 0, 0); - ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); - if (ret == -1) - err(1, "kevent64 EVFILT_SIGNAL"); - - sleep(1); - fprintf(stderr, "\n"); - ret = ptrace(PT_KILL, child, 0, 0); - if (ret == -1) - err(1, "ptrace(PT_KILL)"); - - while(1) { - ret = kevent64(kq, NULL, 0, &kev, 1, 0, NULL); - if (ret == -1) { - if (errno == EINTR) - continue; - err(1, "kevent64"); - } else if (ret == 0) { - continue; - } - - logline("kevent64 returned ident %llu filter %s fflags %s data %s", - kev.ident, str_kev_filter(kev.filter), - str_kev_fflags(kev.filter, kev.fflags), - str_kev_data(kev.filter, kev.fflags, kev.data, kev.udata)); - if (kev.filter == EVFILT_SIGNAL) { - /* must be SIGCHLD */ - deathcount++; - } else if (kev.filter == EVFILT_PROC) { - if ((kev.fflags & (NOTE_EXIT|NOTE_EXITSTATUS)) == (NOTE_EXIT|NOTE_EXITSTATUS)) { - deathcount++; - } - } - - if (deathcount >= 2) { - break; - } - } - - if (debugger_exit_time == eDebuggerExitAfterKillWithoutWaitpid) { - logline("exiting because of eDebuggerExitAfterKillWithoutWaitpid"); - exit(0); - } - - sleep(1); - fprintf(stderr, "\n"); - ret = waitpid(child, &stat_loc, 0); - if (ret == -1) - err(1, "waitpid(%d) by debugger failed", child); - - logline("child process: %s", print_exit(child, stat_loc)); - - /* Received both SIGCHLD and NOTE_EXIT */ - exit(0); -} - -void logline(const char *format, ...) -{ - char *line = NULL; - char newformat[1024]; - - snprintf(newformat, sizeof(newformat), "%s: %s\n", getprogname(), format); - - va_list va; - - va_start(va, format); - vasprintf(&line, newformat, va); - va_end(va); - - if (line) { - write(STDOUT_FILENO, line, strlen(line)); - free(line); - } else { - write(STDOUT_FILENO, "error\n", 6); - } -} - - -char *str_kev_filter(int filter) -{ - static char filter_string[32]; - if (filter == EVFILT_PROC) - strlcpy(filter_string, "EVFILT_PROC", sizeof(filter_string)); - else if (filter == EVFILT_SIGNAL) - strlcpy(filter_string, "EVFILT_SIGNAL", sizeof(filter_string)); - else if (filter == EVFILT_TIMER) - strlcpy(filter_string, "EVFILT_TIMER", sizeof(filter_string)); - else - strlcpy(filter_string, "EVFILT_UNKNOWN", sizeof(filter_string)); - - return filter_string; -} - -char *str_kev_flags(int filter, uint16_t flags) -{ - static char flags_string[128]; - - flags_string[0] = '\0'; - if (filter & EV_ADD) strlcat(flags_string, "|EV_ADD", sizeof(flags_string)); - if (filter & EV_DELETE) strlcat(flags_string, "|EV_DELETE", sizeof(flags_string)); - if (filter & EV_ENABLE) strlcat(flags_string, "|EV_ENABLE", sizeof(flags_string)); - if (filter & EV_DISABLE) strlcat(flags_string, "|EV_DISABLE", sizeof(flags_string)); - if (filter & EV_RECEIPT) strlcat(flags_string, "|EV_RECEIPT", sizeof(flags_string)); - if (filter & EV_ONESHOT) strlcat(flags_string, "|EV_ONESHOT", sizeof(flags_string)); - if (filter & EV_CLEAR) strlcat(flags_string, "|EV_CLEAR", sizeof(flags_string)); - if (filter & EV_DISPATCH) strlcat(flags_string, "|EV_DISPATCH", sizeof(flags_string)); - if (filter & EV_EOF) strlcat(flags_string, "|EV_EOF", sizeof(flags_string)); - if (filter & EV_ERROR) strlcat(flags_string, "|EV_ERROR", sizeof(flags_string)); - - if (flags_string[0] == '|') - return &flags_string[1]; - else - return flags_string; -} - -char *str_kev_fflags(int filter, uint32_t fflags) -{ - static char fflags_string[128]; - - fflags_string[0] = '\0'; - - if (filter == EVFILT_SIGNAL) { - if (fflags & NOTE_SIGNAL) strlcat(fflags_string, "|NOTE_SIGNAL", sizeof(fflags_string)); - } else if (filter == EVFILT_PROC) { - if (fflags & NOTE_EXIT) strlcat(fflags_string, "|NOTE_EXIT", sizeof(fflags_string)); - if (fflags & NOTE_FORK) strlcat(fflags_string, "|NOTE_FORK", sizeof(fflags_string)); - if (fflags & NOTE_EXEC) strlcat(fflags_string, "|NOTE_EXEC", sizeof(fflags_string)); - if (fflags & NOTE_SIGNAL) strlcat(fflags_string, "|NOTE_SIGNAL", sizeof(fflags_string)); - if (fflags & NOTE_EXITSTATUS) strlcat(fflags_string, "|NOTE_EXITSTATUS", sizeof(fflags_string)); - if (fflags & NOTE_EXIT_DETAIL) strlcat(fflags_string, "|NOTE_EXIT_DETAIL", sizeof(fflags_string)); - if (fflags & NOTE_EXIT_DECRYPTFAIL) strlcat(fflags_string, "|NOTE_EXIT_DECRYPTFAIL", sizeof(fflags_string)); - if (fflags & NOTE_EXIT_MEMORY) strlcat(fflags_string, "|NOTE_EXIT_MEMORY", sizeof(fflags_string)); -#ifdef NOTE_EXIT_CSERROR - if (fflags & NOTE_EXIT_CSERROR) strlcat(fflags_string, "|NOTE_EXIT_CSERROR", sizeof(fflags_string)); -#endif - } else if (filter == EVFILT_TIMER) { - if (fflags & NOTE_SECONDS) strlcat(fflags_string, "|NOTE_SECONDS", sizeof(fflags_string)); - } else { - strlcat(fflags_string, "UNKNOWN", sizeof(fflags_string)); - } - - if (fflags_string[0] == '|') - return &fflags_string[1]; - else - return fflags_string; -} - -char *str_kev_data(int filter, uint32_t fflags, int64_t data, uint64_t udata) -{ - static char data_string[128]; - - if (filter == EVFILT_PROC) { - if ((fflags & (NOTE_EXIT|NOTE_EXITSTATUS)) == (NOTE_EXIT|NOTE_EXITSTATUS)) { - if (WIFEXITED(data)) { - snprintf(data_string, sizeof(data_string), "pid %llu exited with status %d", udata, WEXITSTATUS(data)); - } else if (WIFSIGNALED(data)) { - snprintf(data_string, sizeof(data_string), "pid %llu received signal %d%s", udata, WTERMSIG(data), WCOREDUMP(data) ? " (core dumped)" : ""); - } else if (WIFSTOPPED(data)) { - snprintf(data_string, sizeof(data_string), "pid %llu stopped with signal %d", udata, WSTOPSIG(data)); - } else { - snprintf(data_string, sizeof(data_string), "pid %llu unknown exit status 0x%08llx", udata, data); - } - } else if (fflags & NOTE_EXIT) { - snprintf(data_string, sizeof(data_string), "pid %llu exited", udata); - } else { - data_string[0] = '\0'; - } - } else if (filter == EVFILT_TIMER) { - snprintf(data_string, sizeof(data_string), "timer fired %lld time(s)", data); - } else { - data_string[0] = '\0'; - } - - return data_string; -} - -char *print_exit(pid_t p, int stat_loc) -{ - return str_kev_data(EVFILT_PROC, NOTE_EXIT|NOTE_EXITSTATUS, stat_loc, p); -} - -bool iszombie(pid_t p) -{ - int ret; - struct proc_bsdshortinfo bsdinfo; - - ret = proc_pidinfo(p, PROC_PIDT_SHORTBSDINFO, 1, &bsdinfo, sizeof(bsdinfo)); - if (ret != sizeof(bsdinfo)) { - return false; - } - - if (bsdinfo.pbsi_status == SZOMB) { - return true; - } else { - return false; - } -} diff --git a/tools/tests/unit_tests/ptrace_tests_10767133_src/ptrace_tests_10767133.c b/tools/tests/unit_tests/ptrace_tests_10767133_src/ptrace_tests_10767133.c deleted file mode 100644 index c863c03d1..000000000 --- a/tools/tests/unit_tests/ptrace_tests_10767133_src/ptrace_tests_10767133.c +++ /dev/null @@ -1,281 +0,0 @@ -/* - * File: ptrace_tests_10767133.c - * Test Description: Testing different functions of the ptrace call. - * Radar: - * compile command: cc -o ../BUILD/ptrace_tests_10767133 ptrace_tests_10767133.c - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define assert_condition(condition, exit_status, cause) \ - if (!(condition)) { \ - printf("[FAILED] %s:%s at %d error: %s \n", "test_10767133", __func__ , __LINE__, cause ); \ - if (errno) \ - perror(cause); \ - exit(exit_status); \ - } \ - -#define log_message(msg) \ - printf("%s:%d -> %s \n", __func__, __LINE__, msg); - - -typedef int * pipe_t; - -ssize_t pipe_read_data(pipe_t p, void *dest_buf, int size) -{ - int fd = p[0]; - int retval = read(fd, dest_buf, size); - if (retval == -1) { - printf("Error reading from buffer. "); - perror("pipe_read"); - } - return retval; -} - -ssize_t pipe_write_data(pipe_t p, void *src_buf, int size) -{ - int fd = p[1]; - int retval = write(fd, src_buf, size); - if (retval == -1) { - printf("Error writing to buffer. "); - perror("pipe_write"); - } - return retval; -} - - - -void test_ptrace_deny_tace_sigexc(); -void test_ptrace_attach_detach(); -void test_ptrace_step_kill(); - -int main(){ - int retval =0; - log_message(" Testing for PT_FORCEQUOTA. it should return EPERM for non root program. "); - errno=0; - retval = ptrace(PT_FORCEQUOTA, getpid(), NULL, 0); - assert_condition( (retval == -1 && errno == EPERM), -1, "PT_FORCEQUOTA"); - - log_message(" Testing to PT_DENY_ATTACH. should return successfully as nobody is tracing me.") - retval = ptrace(PT_DENY_ATTACH, getpid(), NULL, 0); - assert_condition (retval == 0 , -2, "PR_DENY_ATTACH"); - test_ptrace_deny_tace_sigexc(); - test_ptrace_attach_detach(); - test_ptrace_step_kill(); - success: - printf("[PASSED] Test test_10767133 passed. \n"); - return 0; - fail: - printf("[FAILED] Test test_10767133 failed. \n"); - return -1; -} - -void test_ptrace_step_kill(){ - int retval = 0, status=1; - int parentpipe[2], childpipe[2], data; - enum data_state { begin, finished_child_loop, finished_parent_detach }; - retval = pipe(childpipe); - assert_condition(retval == 0, -1, "Pipe create"); - retval = pipe(parentpipe); - assert_condition(retval == 0, -1, "Pipe create"); - int childpid = fork(); - assert_condition(childpid >=0, -1, "fork failed"); - - if (childpid == 0){ /* child */ - pipe_read_data(parentpipe, &data, sizeof(data)); - assert_condition(data == begin, -1, "child: parent not setting begin"); - pipe_write_data(childpipe, &data, sizeof(data)); - log_message("child: running the sleep loop"); - int i = 5; - log_message("child: sleep loop"); - while (i-- > 0){ - sleep(1); - printf(".z.\n"); - } - data = finished_child_loop; - log_message("child: finished sleep loop"); - pipe_write_data(childpipe, &data, sizeof(data)); - pipe_read_data(parentpipe, &data, sizeof(data)); - assert_condition(data == finished_parent_detach, -1, "child: parent not done with detach"); - i = 5; - log_message("child: sleep loop 2"); - while (i-- > 0){ - sleep(1); - printf(".Z.\n"); - } - exit(57); - }else{ /* parent */ - data = begin; - pipe_write_data(parentpipe, &data, sizeof(data)); - data = getpid(); - pipe_read_data(childpipe, &data, sizeof(data)); - assert_condition(data == begin, -1, "child is not ready with TRACE_ME setup"); - printf("parent: attaching to child with pid %d \n", childpid); - retval = ptrace(PT_ATTACH, childpid, NULL, 0); - assert_condition(retval == 0, -1, "parent: failed to attach to child"); - sleep(2); - log_message("parent: attached to child. Now PT_STEP through it"); - retval = ptrace(PT_STEP, childpid, (caddr_t)1, 0); - assert_condition(retval == 0, -1, "parent: failed to continue the child"); - sleep(2); - retval = ptrace(PT_STEP, childpid, (caddr_t)1, 0); - assert_condition(retval == 0, -1, "parent: failed to continue the child"); - log_message("parent: issuing PT_KILL to child "); - sleep(2); - retval = ptrace(PT_KILL, childpid, NULL, 0); - assert_condition(retval == 0, -1, "parent: failed to PT_KILL the child"); - data = finished_parent_detach; - pipe_write_data(parentpipe, &data, sizeof(data)); - waitpid(childpid,&status,0); - assert_condition(status != 57, -1, "child has exited successfully. It should have died with signal 9"); - assert_condition(status == 9, -1, "child has exited unexpectedly. Should have died with signal 9"); - } - -} - -void test_ptrace_attach_detach(){ - int retval = 0, status=1; - int parentpipe[2], childpipe[2], data; - enum data_state { begin, finished_child_loop, finished_parent_detach }; - retval = pipe(childpipe); - assert_condition(retval == 0, -1, "Pipe create"); - retval = pipe(parentpipe); - assert_condition(retval == 0, -1, "Pipe create"); - int childpid = fork(); - assert_condition(childpid >=0, -1, "fork failed"); - - if (childpid == 0){ /* child */ - //retval = ptrace(PT_TRACE_ME, getpid(), NULL, 0); - //assert_condition(retval == 0, -1, "PT_TRACE_ME failed"); - pipe_read_data(parentpipe, &data, sizeof(data)); - assert_condition(data == begin, -1, "child: parent not setting begin"); - pipe_write_data(childpipe, &data, sizeof(data)); - log_message("child: running the sleep loop"); - int i = 5; - log_message("child: sleep looping"); - while (i-- > 0){ - sleep(1); - printf(".z.\n"); - } - data = finished_child_loop; - log_message("child: finished sleep loop"); - pipe_write_data(childpipe, &data, sizeof(data)); - pipe_read_data(parentpipe, &data, sizeof(data)); - assert_condition(data == finished_parent_detach, -1, "child: parent not done with detach"); - i = 5; - log_message("child sleep looping too"); - while (i-- > 0){ - sleep(1); - printf(".Z.\n"); - } - exit(0); - }else{ /* parent */ - data = begin; - pipe_write_data(parentpipe, &data, sizeof(data)); - data = getpid(); - pipe_read_data(childpipe, &data, sizeof(data)); - assert_condition(data == begin, -1, "child is not ready with TRACE_ME setup"); - printf("parent: attaching to child with pid %d \n", childpid); - retval = ptrace(PT_ATTACH, childpid, NULL, 0); - assert_condition(retval == 0, -1, "parent: failed to attach to child"); - sleep(2); - log_message("parent: attached to child. Now continuing it"); - retval = ptrace(PT_CONTINUE, childpid, (caddr_t)1, 0); - assert_condition(retval == 0, -1, "parent: failed to continue the child"); - - pipe_read_data(childpipe, &data, sizeof(data)); - assert_condition(data == finished_child_loop, -1, "parent: child has not finished while loop"); - - retval = kill(childpid, SIGSTOP); - assert_condition(retval == 0, -1, "parent: failed to SIGSTOP child"); - sleep(2); - - log_message("parent: child has finished loop. Now detaching the child"); - retval = ptrace(PT_DETACH, childpid, NULL, 0); - assert_condition(retval == 0, -1, "parent: failed to detach"); - - data = finished_parent_detach; - pipe_write_data(parentpipe, &data, sizeof(data)); - waitpid(childpid,&status,0); - assert_condition(status == 0, -1, "child has exited unexpectedly"); - } -} - - -void test_ptrace_deny_tace_sigexc(){ - enum ptrace_state { begin,denied_attach, sigexc_tested,trace_me_set, attached, stepped, continued, killed }; - int retval =0; - int childpipe[2],parentpipe[2], data[2]; - retval = pipe(childpipe); - assert_condition( retval == 0, -3, "Pipe create"); - retval = pipe(parentpipe); - assert_condition( retval == 0, -3, "Pipe create"); - - data[0] = begin; // parent - data[1] = begin; //child - - int childpid = fork(); - int status = 0; - assert_condition(childpid >=0, -4, "fork failed"); - - if (childpid == 0){ - /* child */ - retval = ptrace(PT_DENY_ATTACH, getpid(), NULL,0); - data[1] = denied_attach; - pipe_write_data(childpipe, &data[1], sizeof(int)); - log_message("child: waiting for parent to write something"); - pipe_read_data(parentpipe, &data[0], sizeof(int)); - assert_condition(data[0] == begin , -5, "child: parent didnt begin with right state"); - - /* waiting for parent to verify that PT_SIGEXC fails since child is not yet traced. */ - - pipe_read_data(parentpipe, &data[0], sizeof(int)); - assert_condition(data[0] == sigexc_tested, -5, " child: parent didnt test for sigexc failure"); - log_message("child: setting myself to be traced"); - retval = ptrace(PT_TRACE_ME, getpid(), NULL ,0); - assert_condition(retval == 0, -6, "child: failed to setmyself for tracing"); - data[1]=trace_me_set; - pipe_write_data(childpipe, &data[1], sizeof(int)); - log_message("child: setting signals to be exceptions. PT_SIGEXC"); - retval = ptrace(PT_SIGEXC, getpid(), NULL, 0); - assert_condition(retval == 0, -7, "child: failed to set PT_SIGEXC"); - - exit(0); - - }else { - /* parent */ - // get status of child - pipe_read_data(childpipe, &data[1], sizeof(int)); - assert_condition(data[1] == denied_attach, -5, "parent: deny_attach_check"); - pipe_write_data(parentpipe, &data[0], sizeof(int)); - - log_message("parent: testing for failure fo PT_SIGEXC "); - retval = ptrace(PT_SIGEXC, childpid, NULL, 0); - assert_condition(retval < 0 , -5, "PT_SIGEXC did not fail for untraced child"); - data[0] = sigexc_tested; - pipe_write_data(parentpipe, &data[0], sizeof(int)); - - pipe_read_data(childpipe, &data[1], sizeof(int)); - assert_condition(data[1] == trace_me_set , -7, "parent: child has not set PT_TRACE_ME"); - - waitpid(childpid, &status, 0); - if ( status != 0){ - log_message("Child exited with non zero status"); - } - } - - close(childpipe[0]); - close(childpipe[1]); - - close(parentpipe[0]); - close(parentpipe[1]); - -} diff --git a/tools/tests/unit_tests/sampletest.c b/tools/tests/unit_tests/sampletest.c deleted file mode 100644 index aee888903..000000000 --- a/tools/tests/unit_tests/sampletest.c +++ /dev/null @@ -1,28 +0,0 @@ -#include -/* -Sample test file. Do not remove this. -*/ -int main(int argc, char *argv[]){ - char os_version[20] = TARGET_OS_VERS; - char os_build[20] = TARGET_OS_BUILD_VERS; - printf("Sample test for xnu unit tests. This file is just an example for future unit tests.\n"); - printf("This test was build with OS version %s and build %s\n", os_version, os_build); - /* an example of how SDKTARGET is used for different builds */ -#ifdef TARGET_SDK_macosx - printf("The SDKTARGET for building this test is macosx\n"); -#endif - -#ifdef TARGET_SDK_macosx_internal - printf("The SDKTARGET for building this test is macosx.internal\n"); -#endif - -#ifdef TARGET_SDK_iphoneos - printf("The SDKTARGET for building this test is iphoneos\n"); -#endif - -#ifdef TARGET_SDK_iphoneos_internal - printf("The SDKTARGET for building this test is iphoneos.internal\n"); -#endif - - return 0; -} diff --git a/tools/tests/unit_tests/semctl_test_8534495_src/semctl_test_8534495.c b/tools/tests/unit_tests/semctl_test_8534495_src/semctl_test_8534495.c deleted file mode 100644 index 5c25f5581..000000000 --- a/tools/tests/unit_tests/semctl_test_8534495_src/semctl_test_8534495.c +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include -#include -#include -#include - -int main(void) { - key_t key; - - if ((key = ftok(".", 1)) == (key_t)-1) { - perror("ftok"); - exit(EXIT_FAILURE); - } - - int semid; - if ((semid = semget(key, 1, IPC_CREAT | S_IRUSR | S_IWUSR)) == -1) { - perror("semget"); - exit(EXIT_FAILURE); - } - - union semun arg; - - /* Test for sem value > SEMVMX */ - arg.val = 32768; - if (semctl(semid, 0, SETVAL, arg) == 0) { - printf("semctl should have failed for SETVAL 32768\n"); - exit(EXIT_FAILURE); - } - - /* Test for sem value < 0 */ - arg.val = -1; - if (semctl(semid, 0, SETVAL, arg) == 0) { - printf("semctl should have failed for SETVAL -1\n"); - exit(EXIT_FAILURE); - } - - return 0; -} diff --git a/tools/tests/unit_tests/sprace_test_11891562_src/sprace_test_11891562.c b/tools/tests/unit_tests/sprace_test_11891562_src/sprace_test_11891562.c deleted file mode 100644 index cf37ef7cd..000000000 --- a/tools/tests/unit_tests/sprace_test_11891562_src/sprace_test_11891562.c +++ /dev/null @@ -1,265 +0,0 @@ - -/* - * File: sprace_test_11891562.c - * Test Description: The test ensures that there are no race conditions when multiple threads - * attempt to send messages to a mach port with a subset of threads waiting for a send possible - * notification. - * Radar: - */ -#include -#include -#include -#include -#include -#include -#include - -#include - -#define VERBOSE 1 -#define COUNT 3000000 - -semaphore_t sender_sema = SEMAPHORE_NULL; -mach_port_t msg_port = MACH_PORT_NULL; -boolean_t msg_port_modref = FALSE; - -void * -sender(void *arg) -{ - mach_msg_empty_send_t smsg; - mach_port_t notify, old_notify; - kern_return_t kr; - boolean_t msg_inited; - boolean_t use_sp = *(boolean_t *)arg; - int send_possible_count = 0; - - fprintf(stderr, "starting a thread %susing send-possible notifications.\n", - (!use_sp) ? "not " : ""); - - if (use_sp) { - kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, ¬ify); - if (KERN_SUCCESS != kr) { - mach_error("mach_port_allocate(notify)", kr); - exit(1); - } - - request: - kr = mach_port_request_notification(mach_task_self(), msg_port, - MACH_NOTIFY_SEND_POSSIBLE, 0 /* delayed */, - notify, MACH_MSG_TYPE_MAKE_SEND_ONCE, - &old_notify); - if (KERN_INVALID_ARGUMENT == kr && msg_port_modref) - goto done; - - if (KERN_SUCCESS != kr) { - mach_error("mach_port_request_notification(MACH_NOTIFY_SEND_POSSIBLE)", kr); - exit(1); - } - if (MACH_PORT_NULL != old_notify) { - fprintf(stderr, "unexecpted old notify port (0x%x)\n", old_notify); - exit(1); - } - } - - msg_inited = FALSE; - - for (;;) { - mach_send_possible_notification_t nmsg; - mach_msg_option_t options; - mach_msg_return_t mret; - - if (!msg_inited) { - mach_msg_option_t options; - - smsg.header.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); - smsg.header.msgh_remote_port = msg_port; - smsg.header.msgh_local_port = MACH_PORT_NULL; - smsg.header.msgh_size = sizeof(smsg); - smsg.header.msgh_id = 0; - msg_inited = TRUE; - } - - options = MACH_SEND_MSG | MACH_SEND_TIMEOUT; - if (use_sp) - options |= MACH_SEND_NOTIFY; - - mret = mach_msg(&smsg.header, options, - sizeof(smsg), 0, - MACH_PORT_NULL, - MACH_MSG_TIMEOUT_NONE /* immediate timeout */, - MACH_PORT_NULL); - - if (MACH_MSG_SUCCESS == mret) { - msg_inited = FALSE; - continue; - } - - if (MACH_SEND_INVALID_DEST == mret) - break; - - if (MACH_SEND_TIMED_OUT != mret) { - mach_error("mach_msg(send)", mret); - exit(1); - } - - if (use_sp) { - - /* Wait for the send-possible notification */ - mret = mach_msg(&nmsg.not_header, MACH_RCV_MSG | MACH_RCV_TIMEOUT, - 0, sizeof(nmsg), - notify, - 10000 /* 10 second timeout */, - MACH_PORT_NULL); - - if (msg_port_modref) - goto done; - - if (MACH_RCV_TIMED_OUT == mret) { - fprintf(stderr, "FAILED! Didn't receive send-possible notification\n"); - exit(1); - } - - if (MACH_MSG_SUCCESS != mret) { - mach_error("mach_msg_receive(notify)\n", mret); - exit(1); - } - - switch (nmsg.not_header.msgh_id) { - - case MACH_NOTIFY_SEND_POSSIBLE: - if (nmsg.not_port != msg_port) { - fprintf(stderr, "send possible notification about wrong port (0x%x != 0x%x)\n", nmsg.not_port, msg_port); - exit(1); - } - send_possible_count++; - - semaphore_signal_all(sender_sema); - goto request; - - case MACH_NOTIFY_DEAD_NAME: - if (nmsg.not_port != msg_port) { - fprintf(stderr, "dead name notification about wrong port (0x%x != 0x%x)\n", nmsg.not_port, msg_port); - exit(1); - } - goto done; - default: - fprintf(stderr, "unexected notify id (%d)\n", nmsg.not_header.msgh_id); - exit(1); - } - } else { - semaphore_wait(sender_sema); - } - } - - done: - if (use_sp) { - mach_port_destroy(mach_task_self(), notify); - fprintf(stderr, "received %d send-possible notifications\n", send_possible_count); - } - return(NULL); -} - -int -main(int argc, char **argv) { - mach_msg_return_t mret; - mach_port_limits_t limits; - pthread_t thread1, thread2, thread3; - boolean_t thread1_arg, thread2_arg, thread3_arg; - kern_return_t kr; - int i, res; - - /* allocate receive and send right for the message port */ - kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &msg_port); - if (KERN_SUCCESS != kr) { - mach_error("mach_port_allocate(msg_port)", kr); - exit(1); - } - kr = mach_port_insert_right(mach_task_self(), msg_port, msg_port, MACH_MSG_TYPE_MAKE_SEND); - if (KERN_SUCCESS != kr) { - mach_error("mach_port_insert_right(msg_port)", kr); - exit(1); - } - - /* bump its qlimit up enough to allow races to develop between threads */ - limits.mpl_qlimit = 100; - kr = mach_port_set_attributes(mach_task_self(), msg_port, - MACH_PORT_LIMITS_INFO, (mach_port_info_t)&limits, sizeof(limits)/sizeof(int)); - if (KERN_SUCCESS != kr) { - mach_error("mach_port_allocate(msg_port)", kr); - exit(1); - } - - kr = semaphore_create(mach_task_self(), &sender_sema, SYNC_POLICY_FIFO, 0 /* initial value */); - if (KERN_SUCCESS != kr) { - mach_error("semaphore_create(sender_sema)\n", kr); - exit(1); - } - - thread1_arg = FALSE; /* don't use send-possible notifications */ - res = pthread_create(&thread1, (pthread_attr_t *)NULL, sender, &thread1_arg); - if (res) { - perror("pthread_create(non-send-possible_thread-1)"); - exit(1); - } - - thread2_arg = FALSE; /* don't use send-possible notifications */ - res = pthread_create(&thread2, (pthread_attr_t *)NULL, sender, &thread2_arg); - if (res) { - perror("pthread_create(non-send-possible_thread-2)"); - exit(1); - } - - thread3_arg = TRUE; /* use send-possible notifications */ - res = pthread_create(&thread3, (pthread_attr_t *)NULL, sender, &thread3_arg); - if (res) { - perror("pthread_create(send-possible-thread-3)"); - exit(1); - } - - for (i=0; i < COUNT; i++) { - mach_msg_empty_rcv_t rmsg; - - mret = mach_msg(&rmsg.header, MACH_RCV_MSG, - 0, sizeof(rmsg), - msg_port, - MACH_MSG_TIMEOUT_NONE, - MACH_PORT_NULL); - if (MACH_MSG_SUCCESS != mret) { - mach_error("mach_msg_receive(msg_port)\n", mret); - exit(1); - } - } - - msg_port_modref = TRUE; - kr = mach_port_mod_refs(mach_task_self(), msg_port, MACH_PORT_RIGHT_RECEIVE, -1); - if (KERN_SUCCESS != kr) { - mach_error("mach_port_mod_refs(msg_port)", kr); - exit(1); - } - - kr = semaphore_destroy(mach_task_self(), sender_sema); - if (KERN_SUCCESS != kr) { - mach_error("semaphore_destroy(sender_sema)", kr); - exit(1); - } - - res = pthread_join(thread1, NULL); - if (res) { - perror("pthread_join(thread1)"); - exit(1); - } - res = pthread_join(thread2, NULL); - if (res) { - perror("pthread_join(thread2)"); - exit(1); - } - res = pthread_join(thread3, NULL); - if (res) { - perror("pthread_join(thread3)"); - exit(1); - } - - printf("[PASSED] Test sprace_test_11891562 passed. \n"); - exit(0); -} - diff --git a/tools/tests/unit_tests/test_waitqlocktry_12053360.c b/tools/tests/unit_tests/test_waitqlocktry_12053360.c deleted file mode 100644 index 6ce335f2a..000000000 --- a/tools/tests/unit_tests/test_waitqlocktry_12053360.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * File: test_waitqlocktry_12053360.c - * Test Description: This is a load test for wait queues in the kernel. It is designed to excercise the locking of threads and - * wait queues in the face of timer expirations. The overall runtime is limited to 90 secs. - * In case of inconsistency we have found to be panic'ing within the first 15 secs. - * Radar: - */ - -#include -#include -#include -#include -#include -#include - -#define MAX_TEST_RUN_TIME 90 -uint32_t test_usleep_max; - -void* -test_thread(void *arg __unused) -{ - while (1) { - usleep(random() % test_usleep_max); - } - - return NULL; -} - - -int -main(int argc, const char **argv) -{ - pthread_t *threads; - uint32_t nthreads, i; - int tmp, result; - - if (argc != 3) { - printf("Usage: %s \n", argv[0]); - printf("Currently defaulting to 100us and 100 threads\n"); - test_usleep_max = 100; - nthreads = 100; - }else { - - tmp = atoi(argv[1]); - if (tmp < 0) { - printf("Sleep time must be > 0.\n"); - exit(1); - } - - test_usleep_max = (uint32_t)tmp; - - tmp = atoi(argv[2]); - if (tmp < 0) { - printf("Num threads must be > 0.\n"); - exit(1); - } - nthreads = (uint32_t)tmp; - } - threads = (pthread_t*)malloc(nthreads * sizeof(pthread_t)); - if (threads == NULL) { - printf("Failed to allocate thread array.\n"); - exit(1); - } - - printf("Creating %u threads with a max sleep time of %uusec.\n", nthreads, test_usleep_max); - srand(time(NULL)); - for (i = 0; i < nthreads; i++) { - result = pthread_create(&threads[i], NULL, test_thread, NULL); - if (result != 0) { - printf("Failed to allocate thread.\n"); - exit(1); - } - } - - printf("Main thread sleeping for %d secs\n", MAX_TEST_RUN_TIME); - sleep(MAX_TEST_RUN_TIME); - printf("Success. Exiting..\n"); - return 0; -} diff --git a/tools/tests/unit_tests/test_wq_exit_race_panic_10970548.c b/tools/tests/unit_tests/test_wq_exit_race_panic_10970548.c deleted file mode 100644 index d71ab1bcc..000000000 --- a/tools/tests/unit_tests/test_wq_exit_race_panic_10970548.c +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include -#include - -int main(int argc, char *argv[]) -{ - char **envp = { NULL }; - char *mycount = "1"; - char *nargvp[] = { argv[0], mycount , NULL}; - char *progpath = argv[0]; - char buf[50]; - char oldcount[30]; - int envcount=0; - if (argc >= 2){ - envcount = atoi(argv[1]); - printf("count = %d \n", envcount); - sprintf(buf, "%d", envcount+1); - nargvp[1] = buf; - } - char **nargvpp = nargvp; - if (envcount < 8 ) - fork(); - if (envcount > 320) - exit(0); - dispatch_apply(32, - dispatch_get_global_queue(0,0), - ^(size_t i __attribute__((unused))) { - execve(progpath,nargvpp,envp); - }); - - return 0; -} diff --git a/tools/tests/unit_tests/thread_get_state_11918811_src/excserver.defs b/tools/tests/unit_tests/thread_get_state_11918811_src/excserver.defs deleted file mode 100644 index e528df455..000000000 --- a/tools/tests/unit_tests/thread_get_state_11918811_src/excserver.defs +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/tools/tests/unit_tests/thread_get_state_11918811_src/thread_get_state.c b/tools/tests/unit_tests/thread_get_state_11918811_src/thread_get_state.c deleted file mode 100644 index e5ab85d45..000000000 --- a/tools/tests/unit_tests/thread_get_state_11918811_src/thread_get_state.c +++ /dev/null @@ -1,190 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "excserver.h" - -/* - * Test program that sets up a Mach exception handler, - * then performs 1000 invalid memory accesses and makes - * sure all thread_get_state variants can be executed - * from inside the exception handler. - */ -void *handler(void *); -void *spin(void *); -dispatch_semaphore_t start_sema; -volatile int iteration; - -#define COUNT 10000 - -int main(int argc, char *argv[]) { - int ret; - pthread_t handle_thread; - char *buffer = valloc(4096); - int i; - int ncpu; - size_t ncpucount = sizeof(ncpu); - - start_sema = dispatch_semaphore_create(0); - - ret = sysctlbyname("hw.ncpu", &ncpu, &ncpucount, NULL, 0); - if (ret) - err(1, "sysctlbyname"); - - for (i=0; i < ncpu; i++) { - pthread_t spin_thread; - - ret = pthread_create(&spin_thread, NULL, spin, NULL); - if (ret) - err(1, "pthread_create"); - } - - sleep(1); - ret = pthread_create(&handle_thread, NULL, handler, NULL); - if (ret) - err(1, "pthread_create"); - - dispatch_semaphore_wait(start_sema, DISPATCH_TIME_FOREVER); - - for (iteration = 0; iteration < COUNT; iteration++) { - ret = mprotect(buffer, 4096, PROT_NONE); - if (ret != 0) - err(1, "mprotect"); - - usleep(1000); - - volatile float a = ((float)iteration)/2.4f; - *buffer = '!'; - } - - return 0; -} - -void *handler(void *arg __unused) { - kern_return_t kret; - mach_port_t exception_port; - - kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, - &exception_port); - if (kret != KERN_SUCCESS) - errx(1, "mach_port_allocate: %s (%d)", mach_error_string(kret), kret); - - kret = mach_port_insert_right(mach_task_self(), exception_port, exception_port, MACH_MSG_TYPE_MAKE_SEND); - if (kret != KERN_SUCCESS) - errx(1, "mach_port_insert_right: %s (%d)", mach_error_string(kret), kret); - - kret = task_set_exception_ports(mach_task_self(), - EXC_MASK_BAD_ACCESS, - exception_port, - EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES, - 0); - if (kret != KERN_SUCCESS) - errx(1, "task_set_exception_ports: %s (%d)", mach_error_string(kret), kret); - - dispatch_semaphore_signal(start_sema); - - kret = mach_msg_server(mach_exc_server, MACH_MSG_SIZE_RELIABLE, exception_port, 0); - if (kret != KERN_SUCCESS) - errx(1, "mach_msg_server: %s (%d)", mach_error_string(kret), kret); - - return NULL; -} - -kern_return_t catch_mach_exception_raise -( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt -) -{ - int ret; - kern_return_t kret; - thread_state_flavor_t flavors[128]; - thread_state_data_t state; - mach_msg_type_number_t count; - int i, flcount; - -// printf("Successfully caught EXC_BAD_ACCESS %s(%d) at 0x%016llx\n", mach_error_string((int)code[0]), (int)code[0], code[1]); - - count = sizeof(flavors)/sizeof(natural_t); - kret = thread_get_state(thread, THREAD_STATE_FLAVOR_LIST_NEW, (thread_state_t)flavors, &count); - if (kret == KERN_INVALID_ARGUMENT) { - /* try older query */ - count = sizeof(flavors)/sizeof(natural_t); - kret = thread_get_state(thread, THREAD_STATE_FLAVOR_LIST, (thread_state_t)flavors, &count); - if (kret != KERN_SUCCESS) - errx(1, "thread_get_state(THREAD_STATE_FLAVOR_LIST): %s (%d)", mach_error_string(kret), kret); - } else if (kret != KERN_SUCCESS) - errx(1, "thread_get_state(THREAD_STATE_FLAVOR_LIST_NEW): %s (%d)", mach_error_string(kret), kret); - - flcount = count; - for (i=0; i < flcount; i++) { - thread_state_flavor_t flavor; - - flavor = flavors[(i + iteration) % flcount]; - count = THREAD_STATE_MAX; - kret = thread_get_state(thread, flavor, (thread_state_t)state, &count); - if (kret != KERN_SUCCESS) - errx(1, "thread_get_state(%d): %s (%d)", flavor, mach_error_string(kret), kret); - } - - ret = mprotect((void *)code[1], 4096, PROT_WRITE); - if (ret != 0) - err(1, "mprotect"); - - return KERN_SUCCESS; -} - -kern_return_t catch_mach_exception_raise_state -( - mach_port_t exception_port, - exception_type_t exception, - const mach_exception_data_t code, - mach_msg_type_number_t codeCnt, - int *flavor, - const thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt -) -{ - errx(1, "Unsupported catch_mach_exception_raise_state"); - return KERN_NOT_SUPPORTED; -} - -kern_return_t catch_mach_exception_raise_state_identity -( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt, - int *flavor, - thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt -) -{ - errx(1, "Unsupported catch_mach_exception_raise_state_identity"); - return KERN_NOT_SUPPORTED; -} - -void *spin(void *arg __unused) { - volatile unsigned int a; - - while (1) { - a++; - } - - return NULL; -} diff --git a/tools/tests/unit_tests/xnu_raft_tests.py b/tools/tests/unit_tests/xnu_raft_tests.py deleted file mode 100755 index bf273c79a..000000000 --- a/tools/tests/unit_tests/xnu_raft_tests.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# -# Python Imports -import os -import sys -import re - -""" -xnu_raft_tests -Automate testing of unit tests for xnu. - -2012/02/23 -""" - -# this needs to be first thing for raft to load its environment correctly -if __name__ == '__main__': - # The following code allows this test to be invoked outside the harness and should be left unchanged - args = [os.path.realpath(os.path.expanduser("/usr/local/bin/raft")), "-f"] + sys.argv - os.execv(args[0], args) - - -# Library Imports -from raftlibs.coreos import crashReporterStop, crashReporterStart, doPrivileged, runFunctionWithTestReRun -from raftlibs.coreos import runUniversalLogProcess, spotlightStopSubtest, spotlightStartSubtest, svnCheckoutTestTool, svnCheckoutToPath, runSimpleProcess - -from raft.core.logging import log_note - -# Raft Imports -from __test__ import __path__ - -# This is a Raft test. For more information see http://raft.apple.com -testDescription = "Runs all tests defined as targets in Makefile" # Add a brief description of test functionality -testVersion = "0.1" # Used to differentiate between results for different versions of the test -testState = DevelopmentState # Possible values: DevelopmentState, ProductionState - - -# class definitions -class xnuTest: - """ A container to hold test and its result """ - def __init__(self,testName): - self.name = str(testName) - self.buildStatus = False - self.executeStatus = False - self.exitValue = None - self.comments = '' - - def getName(self): - return self.name - - @staticmethod - def getSummaryHeader(): - return "| {0: ^40s} |{1: >6s} |{2: >5s} |{3: >10s} |{4}".format("Test Name", "Build", "Run", "ExitVal", "Comments") - - def getSummary(self): - formatString ="| {0: <40s} |{1: >6s} |{2: >5s} |{3: >10s} |{4}" - nameVal = str(self.name) - buildVal = str(self.buildStatus) - execVal = str(self.executeStatus) - exitVal = str(self.exitValue) - commentsVal = str(self.comments) - return formatString.format(nameVal, buildVal, execVal, exitVal, commentsVal) - -# global functions -def getTestsFromMakeFile(makeFilePath): - makeTargets=[] - targetRegex = re.compile("^\s*([a-zA-Z0-9_.]+)\s*:\s*([a-zA-Z0-9_.]*).*",re.IGNORECASE|re.DOTALL) - fh = open(makeFilePath,"r"); - for line in fh: - tmp_res = targetRegex.findall(line) - if len(tmp_res) == 1: - makeTargets.append(xnuTest(tmp_res[0][0])) - fh.close() - return makeTargets - - -def buildTest(test, path): - os.chdir(path) - result = doCommand("/usr/bin/make",test) - if result['status'] != 0: - print "Failed to Build %s" % test - print "**STDOUT**\n%s" % result['stdout'] - print "**STDERR**\n%s" % result['stderr'] - raise StandardError - log_note("Built %s successfully" % test) - -def executeTest(testObject,path): - os.chdir(path) - test = testObject.getName() - executable_path = os.path.join(path, test) - print "[TEST] %s" % test - print "[BEGIN] %s" % test - try: - result = runSimpleProcess(executable_path,testName()+"_"+test, wait_time=120) - testObject.exitValue = result['status'] - if result['status'] == 0: - print "[PASS] %s returned %d" % (test,result['status']) - except: - print "[FAIL] %s returned %d" % (test, result['status']) - testObject.comments = "Failed due to timeout or file not found error" - log_note("Completed running test %s" % test) - -def removeTestExecutable(test,path): - os.chdir(path) - doCommand("/bin/rm",test) - -def runTest(params): - # Change to /tmp, because make doesn't support directory paths with spaces - os.chdir("/private/tmp") - output= {'status': 1 } - try: - output = svnCheckoutTestTool("unit_tests") - except: - pass - if output['status'] != 0 : - # since we are not fully published yet. lets get data from a branch - print "Fetching unit_test roots from Branch instead of trunk" - baseURL = "http://src.apple.com/svn/xnu/branches/PR-10938974/tools/tests/unit_tests/" - output = svnCheckoutToPath(baseURL) - if output['status'] != 0 : - logFail("[FAIL] error in checkout from branch") - sys.exit(1) - - local_path = os.path.join(os.getcwd(), "unit_tests") - makefile_path = os.path.join(local_path, "Makefile") - build_path = os.path.join(local_path, "BUILD") - - - tests_to_run = getTestsFromMakeFile(makefile_path) - log_note("Starting raft tests for XNU") - stats = {"total":len(tests_to_run) , "pass":0, "fail":0} - for testObject in tests_to_run: - test = testObject.getName() - if test == "clean": - stats["pass"]+=1 - testObject.buildStatus = True - testObject.executeStatus = True - testObject.exitValue = 0 - continue - - log_note("Running test :%s" % test) - try: - buildTest(test,local_path) - testObject.buildStatus = True - res = executeTest(testObject,build_path) - testObject.executeStatus = True - if testObject.exitValue == 0 : - stats["pass"]+=1 - else: - stats["fail"]+=1 - removeTestExecutable(test,build_path) - logPass(test) - except: - logFail("[FAIL] %s failed." % test) - print "Finished running tests. Cleaning up" - doCommand("/usr/bin/make","clean") - #Now to print the Summary and statistics - print "\n\n Test Summary \n" - print xnuTest.getSummaryHeader() - for testObject in tests_to_run: - print testObject.getSummary() - print "\n===============================\n" - print "[SUMMARY]" - print "Total tests: %d" % stats["total"] - print "Passed : %d" % stats["pass"] - print "Failed : %d" % stats["fail"] - print "================================\n\n" - - logPass() # This line is implicit and can be removed diff --git a/tools/tests/xnu_quick_test/32bit_inode_tests.c b/tools/tests/xnu_quick_test/32bit_inode_tests.c index c6b1e6f48..209c64030 100644 --- a/tools/tests/xnu_quick_test/32bit_inode_tests.c +++ b/tools/tests/xnu_quick_test/32bit_inode_tests.c @@ -229,7 +229,7 @@ int statfs_32bit_inode_tests( void * the_argp ) } /* open kernel to use as test file for fstatfs */ - my_fd = open( "/mach_kernel", O_RDONLY, 0 ); + my_fd = open( "/System/Library/Kernels/kernel", O_RDONLY, 0 ); if ( my_fd == -1 ) { printf( "open call failed. got errno %d - %s. \n", errno, strerror( errno ) ); goto test_failed_exit; @@ -257,7 +257,7 @@ int statfs_32bit_inode_tests( void * the_argp ) } /* try again with statfs */ - my_err = statfs( "/mach_kernel", my_statfsp ); + my_err = statfs( "/System/Library/Kernels/kernel", my_statfsp ); if ( my_err == -1 ) { printf( "statfs call failed. got errno %d - %s. \n", errno, strerror( errno ) ); goto test_failed_exit; diff --git a/tools/tests/xnu_quick_test/README b/tools/tests/xnu_quick_test/README index 587999af2..861df134d 100644 --- a/tools/tests/xnu_quick_test/README +++ b/tools/tests/xnu_quick_test/README @@ -33,10 +33,28 @@ for "todo" in the source files for this project to locate which tests have known failures. And please tag any new exceptions you find with "todo" in the comment and the radar number of the bug. -To build a fat binary, export ARCH="i386 x86_64". This will work -for any architectures that Apple gcc recognizes. - -Added four defines which you can use at the compile line to build variants. +Building: +xnu_quick_test is built automatically by BNI for both Mac (10.9 and later), and +iOS (7 and later) trains, and is delivered on AppleInternal builds in +/AppleInternal/CoreOS/xnu_quick_test. It is built as part of the xnu_quick_test +build alias, so you can also find a copy on ~rc at: +~rc/Software/$RELEASE/Updates/$RELEASEVERSION/Roots/xnu_quick_test/AppleInternal/CoreOS/xnu_quick_test. + +Alternatively you can build it yourself using make like so: +SDKROOT=/path/to/sdk make + +For example: +# build for Mac, current OS +SDKROOT=/ make +# build for iOS +SDKROOT=`xcodebuild -sdk iphoneos.internal -version Path` make + +By default xnu builds all-way fat, but you can restrict this by explicitly +specifying architectures like so: +# build for only armv7 and armv7s +SDKROOT=`xcodebuild -sdk iphoneos.internal -version Path` make ARCH="armv7 armv7s" + +There are four defines which you can use at the compile line to build variants. DEBUG turn on additional printfs CONFORMANCE_TESTS_IN_XNU diff --git a/tools/tests/xnu_quick_test/commpage_tests.c b/tools/tests/xnu_quick_test/commpage_tests.c index 37e1ae621..5c3ac4c89 100644 --- a/tools/tests/xnu_quick_test/commpage_tests.c +++ b/tools/tests/xnu_quick_test/commpage_tests.c @@ -190,10 +190,8 @@ int commpage_data_tests( void * the_argp ) } /* We shouldn't be supporting userspace processor_start/processor_exit on embedded */ -#if !TARGET_OS_EMBEDDED ret = active_cpu_test(); if (ret) goto fail; -#endif /* !TARGET_OS_EMBEDDED */ #endif /* _COMM_PAGE_ACTIVE_CPUS */ #ifdef _COMM_PAGE_PHYSICAL_CPUS diff --git a/tools/tests/xnu_quick_test/kqueue_tests.c b/tools/tests/xnu_quick_test/kqueue_tests.c index 40069048c..663f1bb98 100644 --- a/tools/tests/xnu_quick_test/kqueue_tests.c +++ b/tools/tests/xnu_quick_test/kqueue_tests.c @@ -39,11 +39,12 @@ kmsg_send(mach_port_t remote_port, int index) VM_MAKE_TAG(VM_MEMORY_MACH_MSG) | TRUE ); if (my_kr != KERN_SUCCESS) return my_kr; - my_kmsg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); + my_kmsg->msgh_bits = + MACH_MSGH_BITS_SET(MACH_MSG_TYPE_COPY_SEND, 0, 0, 0); my_kmsg->msgh_size = size; my_kmsg->msgh_remote_port = remote_port; my_kmsg->msgh_local_port = MACH_PORT_NULL; - my_kmsg->msgh_reserved = 0; + my_kmsg->msgh_voucher_port = MACH_PORT_NULL; my_kmsg->msgh_id = msgh_id; my_kr = mach_msg( my_kmsg, MACH_SEND_MSG | MACH_MSG_OPTION_NONE, @@ -85,7 +86,6 @@ kmsg_recv(mach_port_t portset, mach_port_t port, int * msgh_id_return) static void * kmsg_consumer_thread(void * arg) { -#if !TARGET_OS_EMBEDDED int my_kqueue = *(int *)arg; int my_err; kern_return_t my_kr; @@ -130,10 +130,6 @@ kmsg_consumer_thread(void * arg) } } return (void *)0; -#else - printf( "\t--> Not supported on EMBEDDED TARGET\n" ); - return (void *)0; -#endif } /* ************************************************************************************************************** @@ -152,9 +148,7 @@ int kqueue_tests( void * the_argp ) size_t my_count, my_index; int my_sockets[ 2 ] = {-1, -1}; struct kevent my_keventv[3]; -#if !TARGET_OS_EMBEDDED struct kevent64_s my_kevent64; -#endif struct timespec my_timeout; char my_buffer[ 16 ]; kern_return_t kr; @@ -270,7 +264,6 @@ int kqueue_tests( void * the_argp ) goto test_failed_exit; } -#if !TARGET_OS_EMBEDDED /* use kevent64 to test EVFILT_PROC */ EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0, 0, 0 ); my_err = kevent64( my_kqueue, &my_kevent64, 1, NULL, 0, 0, 0); @@ -286,7 +279,6 @@ int kqueue_tests( void * the_argp ) printf( "kevent64 call to get proc exit failed with error %d - \"%s\" \n", errno, strerror( errno) ); goto test_failed_exit; } -#endif /* tell child to get to work */ my_count = write( my_sockets[0], "g", 1 ); @@ -341,7 +333,6 @@ int kqueue_tests( void * the_argp ) goto test_failed_exit; } -#if !TARGET_OS_EMBEDDED /* look for child exit notification on the kevent64 kqueue */ EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_CLEAR, NOTE_EXIT, 0, 0, 0, 0 ); my_err = kevent64( my_kqueue64, NULL, 0, &my_kevent64, 1, 0, 0); @@ -513,7 +504,6 @@ int kqueue_tests( void * the_argp ) printf( "data %ld \n", (long int) my_keventv[0].data ); goto test_failed_exit; } -#endif my_err = 0; goto test_passed_exit; diff --git a/tools/tests/xnu_quick_test/main.c b/tools/tests/xnu_quick_test/main.c index dda938411..468249080 100644 --- a/tools/tests/xnu_quick_test/main.c +++ b/tools/tests/xnu_quick_test/main.c @@ -72,9 +72,7 @@ struct test_entry g_tests[] = {1, &access_chmod_fchmod_test, NULL, "access, chmod, fchmod"}, {1, &chown_fchown_lchown_lstat_symlink_test, NULL, "chown, fchown, lchown, lstat, readlink, symlink"}, {1, &fs_stat_tests, NULL, "fstatfs, getfsstat, statfs, fstatfs64, getfsstat64, statfs64"}, -#if !TARGET_OS_EMBEDDED {1, &statfs_32bit_inode_tests, NULL, "32-bit inode versions: fstatfs, getfsstat, statfs"}, -#endif {1, &getpid_getppid_pipe_test, NULL, "getpid, getppid, pipe"}, {1, &uid_tests, NULL, "getauid, gettid, getuid, geteuid, issetugid, setaudit_addr, seteuid, settid, settid_with_pid, setuid"}, {1, &mkdir_rmdir_umask_test, NULL, "mkdir, rmdir, umask"}, @@ -102,10 +100,8 @@ struct test_entry g_tests[] = {1, "actl_test, NULL, "quotactl"}, {1, &limit_tests, NULL, "getrlimit, setrlimit"}, {1, &directory_tests, NULL, "getattrlist, getdirentriesattr, setattrlist"}, -#if !TARGET_OS_EMBEDDED {1, &getdirentries_test, NULL, "getdirentries"}, {1, &exchangedata_test, NULL, "exchangedata"}, -#endif {1, &searchfs_test, NULL, "searchfs"}, {1, &sema2_tests, NULL, "sem_close, sem_open, sem_post, sem_trywait, sem_unlink, sem_wait"}, {1, &sema_tests, NULL, "semctl, semget, semop"}, @@ -122,11 +118,16 @@ struct test_entry g_tests[] = {1, &atomic_fifo_queue_test, NULL, "OSAtomicFifoEnqueue, OSAtomicFifoDequeue"}, #endif {1, &sched_tests, NULL, "Scheduler tests"}, -#if TARGET_OS_EMBEDDED - {1, &content_protection_test, NULL, "Content protection tests"}, -#endif {1, &pipes_test, NULL, "Pipes tests"}, {1, &kaslr_test, NULL, "KASLR tests"}, + {1, &getattrlistbulk_test, NULL, "getattrlistbulk"}, + {1, &openat_close_test, NULL, "openat, fpathconf, fstatat, close"}, + {1, &linkat_fstatat_unlinkat_test, NULL, "linkat, statat, unlinkat"}, + {1, &faccessat_fchmodat_fchmod_test, NULL, "faccessat, fchmodat, fchmod"}, + {1, &fchownat_fchown_symlinkat_test, NULL, "fchownat, symlinkat, readlinkat"}, + {1, &mkdirat_unlinkat_umask_test, NULL, "mkdirat, unlinkat, umask"}, + {1, &renameat_test, NULL, "renameat, fstatat"}, + {1, &set_exception_ports_test, NULL, "thread_set_exception_ports, task_set_exception_ports, host_set_exception_ports"}, {0, NULL, NULL, "last one"} }; @@ -135,9 +136,7 @@ static void list_all_tests( void ); static void mark_tests_to_run( long my_start, long my_end ); static int parse_tests_to_run( int argc, const char * argv[], int * indexp ); static void usage( void ); -#if !TARGET_OS_EMBEDDED static int setgroups_if_single_user(void); -#endif static const char *current_arch( void ); /* globals */ @@ -295,12 +294,10 @@ g_testbots_active = 1; printf("[TEST] xnu_quick_test \n"); /* Declare the beginning of test suite */ } -#if !TARGET_OS_EMBEDDED /* Populate groups list if we're in single user mode */ if (setgroups_if_single_user()) { return 1; } -#endif if ( list_the_tests != 0 ) { list_all_tests( ); return 0; @@ -555,7 +552,6 @@ static void usage( void ) } /* usage */ -#if !TARGET_OS_EMBEDDED /* This is a private API between Libinfo, Libc, and the DirectoryService daemon. * Since we are trying to determine if an external provider will back group * lookups, we can use this, without relying on additional APIs or tools @@ -612,7 +608,6 @@ setgroups_if_single_user(void) return retval; } -#endif static const char *current_arch( void ) { diff --git a/tools/tests/xnu_quick_test/makefile b/tools/tests/xnu_quick_test/makefile index 9717d4c26..8f397832d 100644 --- a/tools/tests/xnu_quick_test/makefile +++ b/tools/tests/xnu_quick_test/makefile @@ -1,37 +1,23 @@ SDKROOT ?= / -Product ?= $(shell xcodebuild -sdk $(SDKROOT) -version PlatformPath | head -1 | sed 's,^.*/\([^/]*\)\.platform$$,\1,') - -# This should not be a long term solution to ; this -# makefile needs to be changed to identify its targets appropriately in the -# absence of tconf, but we'll go with the quick change for now. - -ifeq "$(RC_TARGET_CONFIG)" "iPhone" -Embedded?=YES -else -Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) -endif - -ifeq "$(Embedded)" "YES" -Product?=iPhone -else -Product?=MacOSX -endif +PLATFORMPATH := $(shell xcrun -sdk $(SDKROOT) -show-sdk-platform-path) +PLATFORM := $(shell echo $(PLATFORMPATH) | sed 's,^.*/\([^/]*\)\.platform$$,\1,') SDKVERSION:=$(shell xcodebuild -sdk $(SDKROOT) -version SDKVersion | head -1) - -ifeq "$(Product)" "iPhoneOS" SDKPATH := $(shell xcodebuild -sdk $(SDKROOT) -version Path) + +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) CFLAGS += -isysroot $(SDKPATH) -miphoneos-version-min=$(SDKVERSION) LIBFLAGS += -isysroot $(SDKPATH) -miphoneos-version-min=$(SDKVERSION) else CFLAGS += -mmacosx-version-min=$(SDKVERSION) LIBFLAGS += -mmacosx-version-min=$(SDKVERSION) -Product ?= MacOSX endif -CC := xcrun -sdk $(SDKROOT) cc +CC := $(shell xcrun -sdk "$(SDKROOT)" -find cc) HOSTCC := cc +CODESIGN := $(shell xcrun -sdk "$(SDKROOT)" -find codesign) + SRCROOT?=$(shell /bin/pwd) OBJROOT?=$(SRCROOT)/BUILD/obj DSTROOT?=$(SRCROOT)/BUILD/dst @@ -43,12 +29,10 @@ OBJSUBPATH := $(OBJROOT) ifdef RC_ARCHS ARCH:=$(RC_ARCHS) else - ifeq "$(Product)" "MacOSX" + ifeq ($(PLATFORM),MacOSX) ARCH:=i386 x86_64 - else ifeq "$(Product)" "iPhoneOS" - ARCH:=armv7s armv7 - else ifeq "$(Product)" "AppleTV" - ARCH:=i386 + else ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) + ARCH:=arm64 armv7s armv7 endif endif @@ -63,7 +47,7 @@ CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B LIBFLAGS += -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders -F/AppleInternal/Library/Frameworks/ # The current implementation of the content protection test requires IOKit. -ifeq "$(Product)" "iPhoneOS" +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) LIBFLAGS += -framework IOKit endif @@ -75,9 +59,11 @@ MY_OBJECTS := $(OBJSUBPATH)/main.o $(OBJSUBPATH)/memory_tests.o $(OBJSUBPATH)/mi $(OBJSUBPATH)/atomic_fifo_queue_test.o $(OBJSUBPATH)/sched_tests.o \ $(OBJSUBPATH)/pipes_tests.o -ifneq "$(Product)" "iPhoneOS" +ifeq ($(PLATFORM),MacOSX) MY_OBJECTS += $(OBJSUBPATH)/32bit_inode_tests.o -else +endif + +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) MY_OBJECTS += $(OBJSUBPATH)/content_protection_test.o endif @@ -88,6 +74,9 @@ ifndef RC_ProjectName endif $(CC) -g $(MY_ARCH) $(LIBFLAGS) -o $(SYMROOT)/xnu_quick_test $(MY_OBJECTS) /usr/bin/dsymutil $(SYMROOT)/xnu_quick_test +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) + $(CODESIGN) -f -s - --entitlements $(SRCSUBPATH)/xnu_quick_test.entitlements $(SYMROOT)/xnu_quick_test +endif /usr/bin/ditto $(SYMROOT)/xnu_quick_test $(DSTROOT)/xnu_quick_test # This target is defined for testbots. @@ -103,13 +92,13 @@ testbots: xnu_quick_test # helper processes for the 64-bit version of xnu_quick_test to test the conversion # from a 32-bit process to a 64-bit process. helpers : $(SRCSUBPATH)/helpers/sleep.c $(SRCSUBPATH)/helpers/launch.c $(SRCSUBPATH)/helpers/arch.c $(SRCSUBPATH)/helpers/data_exec.c helperdir $(OBJSUBPATH)/misc.o -ifneq "$(Product)" "iPhoneOS" +ifeq ($(PLATFORM),MacOSX) ifneq "$(ARCH_32)" "" $(CC) -g $(ARCH_32_FLAGS) $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-i386 /usr/bin/ditto $(SYMROOT)/sleep-i386 $(DSTROOT)/helpers/ endif endif -ifeq "$(Product)" "MacOSX" +ifeq ($(PLATFORM),MacOSX) ifneq "$(ARCH_32)" "" $(CC) -g $(LIBFLAGS) $(ARCH_32_FLAGS) $(OBJSUBPATH)/misc.o $(SRCSUBPATH)/helpers/launch.c -o $(SYMROOT)/launch-i386 $(CC) -g $(ARCH_32_FLAGS) -DNXDATA32TESTNONX $(SRCSUBPATH)/helpers/data_exec.c -o $(SYMROOT)/data_exec32nonxspawn @@ -124,22 +113,26 @@ endif $(CC) -g $(MY_ARCH) $(SRCSUBPATH)/helpers/data_exec.c -o $(SYMROOT)/data_exec /usr/bin/ditto $(SYMROOT)/data_exec $(DSTROOT)/helpers/ endif -ifeq "$(Product)" "iPhoneOS" +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) ifneq "$(ARCH_32)" "" $(CC) $(CFLAGS) $(ARCH_32_FLAGS) $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-arm $(CC) $(LIBFLAGS) $(CFLAGS) $(ARCH_32_FLAGS) $(OBJSUBPATH)/misc.o $(SRCSUBPATH)/helpers/launch.c -o $(SYMROOT)/launch-arm /usr/bin/ditto $(SYMROOT)/sleep-arm $(SYMROOT)/launch-arm $(DSTROOT)/helpers/ endif +ifneq "$(ARCH_64)" "" + $(CC) $(CFLAGS) $(ARCH_64_FLAGS) $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-arm64 + /usr/bin/ditto $(SYMROOT)/sleep-arm64 $(DSTROOT)/helpers/ +endif endif $(CC) -g $(MY_ARCH) $(CFLAGS) $(SRCSUBPATH)/helpers/arch.c -o $(SYMROOT)/arch /usr/bin/ditto $(SYMROOT)/arch $(DSTROOT)/helpers/ - + helperdir : mkdir -p $(DSTROOT)/helpers $(OBJSUBPATH) : mkdir -p $(OBJSUBPATH); - + $(DSTROOT) : mkdir -p $(DSTROOT); @@ -148,21 +141,21 @@ $(SYMROOT) : $(OBJSUBPATH)/main.o : $(SRCSUBPATH)/main.c $(SRCSUBPATH)/tests.h $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/main.c -o $@ - + $(OBJSUBPATH)/memory_tests.o : $(SRCSUBPATH)/memory_tests.c $(SRCSUBPATH)/tests.h $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/memory_tests.c -o $@ # misc.o has to be built 3-way for the helpers to link $(OBJSUBPATH)/misc.o : $(SRCSUBPATH)/misc.c $(SRCSUBPATH)/tests.h -ifeq "$(Product)" "iPhoneOS" +ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),) $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/misc.c -o $@ else $(CC) -arch i386 -arch x86_64 $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/misc.c -o $@ endif - + $(OBJSUBPATH)/sema_tests.o : $(SRCSUBPATH)/sema_tests.c $(SRCSUBPATH)/tests.h $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/sema_tests.c -o $@ - + $(OBJSUBPATH)/shared_memory_tests.o : $(SRCSUBPATH)/shared_memory_tests.c $(SRCSUBPATH)/tests.h $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/shared_memory_tests.c -o $@ @@ -189,7 +182,7 @@ $(OBJSUBPATH)/32bit_inode_tests.o : $(SRCSUBPATH)/32bit_inode_tests.c $(SRCSUBPA $(OBJSUBPATH)/commpage_tests.o : $(SRCSUBPATH)/commpage_tests.c $(SRCSUBPATH)/tests.h $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/commpage_tests.c -o $@ - + $(OBJSUBPATH)/atomic_fifo_queue_test.o : $(SRCSUBPATH)/atomic_fifo_queue_test.c $(SRCSUBPATH)/tests.h $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/atomic_fifo_queue_test.c -o $@ diff --git a/tools/tests/xnu_quick_test/misc.c b/tools/tests/xnu_quick_test/misc.c index 72393c60e..74c454a36 100644 --- a/tools/tests/xnu_quick_test/misc.c +++ b/tools/tests/xnu_quick_test/misc.c @@ -179,6 +179,7 @@ int do_execve_test(char * path, char * argv[], void * envp, int killwait) printf("CWD= %s\n", getwd(NULL)); fflush(stdout); #endif + /* vfork then execve sleep system command (which we will kill from the parent process) */ my_pid = vfork(); if (my_pid == -1) { @@ -331,6 +332,9 @@ int get_architecture() rval = INTEL; break; case CPU_TYPE_ARM: +#ifdef CPU_TYPE_ARM64 + case CPU_TYPE_ARM64: +#endif rval = ARM; break; } diff --git a/tools/tests/xnu_quick_test/pipes_tests.c b/tools/tests/xnu_quick_test/pipes_tests.c index 594a6aa62..b6e8384c9 100644 --- a/tools/tests/xnu_quick_test/pipes_tests.c +++ b/tools/tests/xnu_quick_test/pipes_tests.c @@ -45,6 +45,7 @@ #include #include #include +#include /**************************/ /**************************/ @@ -498,7 +499,7 @@ void clear_data(int *ptr, int len); #define BUFMAX 20000 #define BUFMAXLEN (BUFMAX * sizeof(int)) -const unsigned int pipesize_blocks[] = {128,256,1024,2048,PAGE_SIZE,PAGE_SIZE*2,PAGE_SIZE*4}; +const unsigned int pipesize_blocks[] = {128,256,1024,2048,4096,8192,16384}; static const int bufsizes[] = { 128, 512, 1024, 2048, 4096, 16384 }; int data[BUFMAX],readbuf[BUFMAX]; diff --git a/tools/tests/xnu_quick_test/sema_tests.c b/tools/tests/xnu_quick_test/sema_tests.c index bbb84439d..6c5bc805d 100644 --- a/tools/tests/xnu_quick_test/sema_tests.c +++ b/tools/tests/xnu_quick_test/sema_tests.c @@ -17,7 +17,6 @@ */ int sema_tests( void * the_argp ) { -#if !TARGET_OS_EMBEDDED int my_err, i; int my_sem_id = -1; union semun my_sem_union; @@ -94,10 +93,6 @@ test_passed_exit: semctl( my_sem_id, 0, IPC_RMID, my_sem_union ); } return( my_err ); -#else - printf( "\t--> Not supported on EMBEDDED TARGET\n" ); - return 0; -#endif } diff --git a/tools/tests/xnu_quick_test/shared_memory_tests.c b/tools/tests/xnu_quick_test/shared_memory_tests.c index e22ce034e..876e7c6b1 100644 --- a/tools/tests/xnu_quick_test/shared_memory_tests.c +++ b/tools/tests/xnu_quick_test/shared_memory_tests.c @@ -21,7 +21,6 @@ extern char g_target_path[ PATH_MAX ]; */ int shm_tests( void * the_argp ) { -#if !TARGET_OS_EMBEDDED int my_err; int my_shm_id; void * my_shm_addr = NULL; @@ -93,10 +92,6 @@ test_passed_exit: shmctl( my_shm_id, IPC_RMID, NULL); } return( my_err ); -#else - printf( "\t--> Not supported on EMBEDDED TARGET\n" ); - return 0; -#endif } diff --git a/tools/tests/xnu_quick_test/socket_tests.c b/tools/tests/xnu_quick_test/socket_tests.c index e9a34380f..306592c83 100644 --- a/tools/tests/xnu_quick_test/socket_tests.c +++ b/tools/tests/xnu_quick_test/socket_tests.c @@ -210,7 +210,6 @@ int socket_tests( void * the_argp ) } #endif -#if !TARGET_OS_EMBEDDED /* sendfile test. Open libsystem, set up some headers, and send it */ struct sf_hdtr my_sf_hdtr; int my_libsys_fd; @@ -255,7 +254,6 @@ int socket_tests( void * the_argp ) close ( my_child_fd ); exit ( -1 ); } -#endif /* tell parent we're done */ my_result = write( my_child_fd, "all done", 8 ); @@ -328,7 +326,6 @@ int socket_tests( void * the_argp ) } #endif -#if !TARGET_OS_EMBEDDED size_t neededBytes = 11; /* Check for sendfile output */ @@ -354,7 +351,6 @@ int socket_tests( void * the_argp ) goto test_failed_exit; } -#endif /* see if child is done. bzero so that string is NUL terminated */ bzero( (void *)&my_parent_buffer[0], sizeof(my_parent_buffer) ); diff --git a/tools/tests/xnu_quick_test/tests.c b/tools/tests/xnu_quick_test/tests.c index 11aec361a..ad892f2ae 100644 --- a/tools/tests/xnu_quick_test/tests.c +++ b/tools/tests/xnu_quick_test/tests.c @@ -972,7 +972,6 @@ int access_chmod_fchmod_test( void * the_argp ) /* another test for the access system call -- refer ro radar# 6725311 */ -#if !TARGET_OS_EMBEDDED /* * This test makes sure that the access system call does not give the current user extra @@ -1040,7 +1039,6 @@ int access_chmod_fchmod_test( void * the_argp ) goto test_failed_exit; } -#endif /* end of test*/ @@ -1088,7 +1086,6 @@ test_passed_exit: return( my_err ); } -#if !TARGET_OS_EMBEDDED static bool _prime_groups(void) { /* @@ -1106,7 +1103,6 @@ static bool _prime_groups(void) return true; } -#endif /* ************************************************************************************************************** * Test chown, fchown, lchown, lstat, readlink, symlink system calls. @@ -1114,7 +1110,6 @@ static bool _prime_groups(void) */ int chown_fchown_lchown_lstat_symlink_test( void * the_argp ) { -#if !TARGET_OS_EMBEDDED int my_err, my_group_count, i; int my_fd = -1; char * my_pathp = NULL; @@ -1302,10 +1297,6 @@ test_passed_exit: vm_deallocate(mach_task_self(), (vm_address_t)my_link_pathp, PATH_MAX); } return( my_err ); -#else - printf( "\t--> Test not designed for EMBEDDED TARGET\n" ); - return 0; -#endif } /* ************************************************************************************************************** @@ -1338,7 +1329,6 @@ int fs_stat_tests( void * the_argp ) struct statfs * my_statfsp; kern_return_t my_kr; -#if !TARGET_OS_EMBEDDED void * my_buffer64p = NULL; struct statfs64 * my_statfs64p; @@ -1350,7 +1340,6 @@ int fs_stat_tests( void * the_argp ) goto test_failed_exit; } -#endif my_buffer_size = (sizeof(struct statfs) * 10); my_kr = vm_allocate((vm_map_t) mach_task_self(),(vm_address_t*) &my_bufferp, my_buffer_size, VM_FLAGS_ANYWHERE); @@ -1391,7 +1380,6 @@ int fs_stat_tests( void * the_argp ) goto test_failed_exit; } -#if !TARGET_OS_EMBEDDED /* now try statfs64 */ my_statfs64p = (struct statfs64 *) my_buffer64p; my_err = statfs64( STATFS_TEST_PATH, my_statfs64p ); @@ -1426,7 +1414,6 @@ int fs_stat_tests( void * the_argp ) printf( "getfsstat64 call failed. could not find valid f_fstypename! \n" ); goto test_failed_exit; } -#endif /* set up to validate results via multiple sources. we use getattrlist to get volume * related attributes to verify against results from fstatfs and statfs - but only if @@ -1450,7 +1437,6 @@ int fs_stat_tests( void * the_argp ) goto test_failed_exit; } -#if !TARGET_OS_EMBEDDED /* testing fstatfs64 */ my_statfs64p = (struct statfs64 *) my_buffer64p; my_err = fstatfs64( my_fd, my_statfs64p ); @@ -1465,7 +1451,6 @@ int fs_stat_tests( void * the_argp ) printf( "fstatfs64 call failed. could not find valid f_fstypename! \n" ); goto test_failed_exit; } -#endif /* testing fstatfs */ my_statfsp = (struct statfs *) my_bufferp; @@ -1518,11 +1503,9 @@ test_passed_exit: if ( my_bufferp != NULL ) { vm_deallocate(mach_task_self(), (vm_address_t)my_bufferp, my_buffer_size); } -#if !TARGET_OS_EMBEDDED if ( my_buffer64p != NULL ) { vm_deallocate(mach_task_self(), (vm_address_t)my_buffer64p, my_buffer64_size); } -#endif return( my_err ); } @@ -1758,7 +1741,6 @@ int uid_tests( void * the_argp ) exit( -1 ); } -#if !TARGET_OS_EMBEDDED /* * test to make sure setaudit_addr doesn't cause audit info to get lost from * the credential. @@ -1785,7 +1767,6 @@ int uid_tests( void * the_argp ) printf("test failed - wrong audit ID was set - %d \n", my_aia.ai_auid); exit( -1 ); } -#endif /* change real uid and effective uid to current euid */ my_err = setuid( my_euid ); @@ -2124,7 +2105,23 @@ int execve_kill_vfork_test( void * the_argp ) } } else if(get_architecture() == ARM) { - errmsg = "execve failed: from arm forking and exec()ing arm process.\n"; +#ifdef CPU_TYPE_ARM64 + if (bits == 64) { + /* Running on arm64 hardware. */ + errmsg = "execve failed: from arm64 forking and exec()ing 64-bit arm process.\n"; + argvs[0] = "sleep-arm"; + if (do_execve_test("helpers/sleep-arm64", argvs, NULL, 1)) + goto test_failed_exit; + + /* Test posix_spawn for arm64 (should succeed) */ + errmsg = NULL; + if (do_spawn_test(CPU_TYPE_ARM64, 0)) + goto test_failed_exit; + } +#endif + + /* Exec arm test on both arm and arm64 */ + errmsg = "execve failed: from arm forking and exec()ing 32-bit arm process.\n"; argvs[0] = "sleep-arm"; if (do_execve_test("helpers/sleep-arm", argvs, NULL, 1)) goto test_failed_exit; @@ -2156,7 +2153,6 @@ test_failed_exit: */ int groups_test( void * the_argp ) { -#if !TARGET_OS_EMBEDDED int my_err, i; int my_group_count, my_orig_group_count; gid_t my_real_gid; @@ -2290,10 +2286,6 @@ test_failed_exit: test_passed_exit: return( my_err ); -#else - printf( "\t--> Test not designed for EMBEDDED TARGET\n" ); - return 0; -#endif } @@ -3441,7 +3433,6 @@ int fcntl_test( void * the_argp ) close( my_newfd ); my_newfd = -1; -#if !TARGET_OS_EMBEDDED /* This section of the test is specific for the desktop platform, refer */ /* While we're here, dup it via an open of /dev/fd/ .. */ { @@ -3470,7 +3461,6 @@ int fcntl_test( void * the_argp ) } close ( my_newfd ); my_newfd = -1; -#endif my_err = 0; goto test_passed_exit; @@ -4041,7 +4031,6 @@ test_passed_exit: */ int quotactl_test( void * the_argp ) { -#if !TARGET_OS_EMBEDDED int my_err; int is_quotas_on = 0; struct dqblk my_quota_blk; @@ -4053,7 +4042,7 @@ int quotactl_test( void * the_argp ) } /* start off by checking the status of quotas on the boot volume */ - my_err = quotactl( "/mach_kernel", QCMD(Q_QUOTASTAT, USRQUOTA), 0, (caddr_t)&is_quotas_on ); + my_err = quotactl( "/System/Library/Kernels/kernel", QCMD(Q_QUOTASTAT, USRQUOTA), 0, (caddr_t)&is_quotas_on ); if ( my_err == -1 ) { printf( "quotactl - Q_QUOTASTAT - failed with errno %d - %s \n", errno, strerror( errno ) ); goto test_failed_exit; @@ -4065,7 +4054,7 @@ int quotactl_test( void * the_argp ) goto test_passed_exit; } - my_err = quotactl( "/mach_kernel", QCMD(Q_GETQUOTA, USRQUOTA), getuid(), (caddr_t)&my_quota_blk ); + my_err = quotactl( "/System/Library/Kernels/kernel", QCMD(Q_GETQUOTA, USRQUOTA), getuid(), (caddr_t)&my_quota_blk ); if ( my_err == -1 ) { printf( "quotactl - Q_GETQUOTA - failed with errno %d - %s \n", errno, strerror( errno ) ); goto test_failed_exit; @@ -4079,10 +4068,6 @@ test_failed_exit: test_passed_exit: return( my_err ); -#else - printf( "\t--> Not supported on EMBEDDED TARGET\n" ); - return 0; -#endif } /* ************************************************************************************************************** @@ -4499,11 +4484,10 @@ typedef struct packed_result packed_result; typedef struct packed_result * packed_result_p; #define MAX_MATCHES 10 -#define MAX_EBUSY_RETRIES 5 +#define MAX_EBUSY_RETRIES 20 int searchfs_test( void * the_argp ) { -#if !TARGET_OS_EMBEDDED int my_err, my_items_found = 0, my_ebusy_count; char * my_pathp = NULL; unsigned long my_matches; @@ -4698,10 +4682,6 @@ test_passed_exit: vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); -#else - printf( "\t--> Not supported on EMBEDDED TARGET\n" ); - return 0; -#endif } @@ -4986,7 +4966,6 @@ test_passed_exit: */ int message_queue_tests( void * the_argp ) { -#if !TARGET_OS_EMBEDDED int my_err; int my_msg_queue_id = -1; ssize_t my_result; @@ -5082,10 +5061,6 @@ test_passed_exit: msgctl( my_msg_queue_id, IPC_RMID, NULL ); } return( my_err ); -#else - printf( "\t--> Not supported on EMBEDDED TARGET \n" ); - return 0; -#endif } @@ -5232,6 +5207,1650 @@ test_failed_exit: return -1; } +typedef struct attrs { + uint32_t attrs_length; + attribute_set_t attrs_returned; + uint32_t attr_error; + attrreference_t attr_name; + fsobj_type_t attr_obj_type; + + union { + struct { + uint32_t entry_count; + } directory; + struct { + off_t size; + } file; + } attr_obj; + +} attrs_t; + +int getattrlistbulk_test( void * the_argp ) +{ + + int error; + struct attrlist attr_list; + attrs_t *attrsptr; + char *entry_start; + int retcount = 0, totalcount = 0; + int index; + char *nameptr; + int attr_buf_size; + char *attr_buf; + int dirfd = -1; + char* target = "/System/Library/CoreServices"; + + memset(&attr_list, 0, sizeof(attr_list)); + attr_list.bitmapcount = ATTR_BIT_MAP_COUNT; + attr_list.commonattr = ATTR_CMN_RETURNED_ATTRS | + ATTR_CMN_NAME | + ATTR_CMN_OBJTYPE | + ATTR_CMN_ERROR | + ATTR_FILE_TOTALSIZE| + ATTR_DIR_ENTRYCOUNT; + + error = 0; + /*allocate a buffer for 10 items*/ + attr_buf_size = 10 * (sizeof(attrs_t) + FILENAME_MAX ); + if (vm_allocate((vm_map_t) mach_task_self(), + (vm_address_t*)&attr_buf, + attr_buf_size, VM_FLAGS_ANYWHERE) != KERN_SUCCESS) { + printf( "vm_allocate failed with error %d - \"%s\" \n", + errno, strerror( errno) ); + attr_buf = NULL; + error = -1; + goto last_exit; + } + + dirfd = openat (AT_FDCWD, target, O_RDONLY, 0); + if (dirfd == -1) { + printf("openat \"%s\" failed with error %d - \"%s\" \n", + target, errno, strerror( errno)); + error = -1; + goto last_exit; + } + + do { + retcount = getattrlistbulk(dirfd, + &attr_list, &attr_buf[0], + attr_buf_size, FSOPT_PACK_INVAL_ATTRS); + if (retcount == -1) { + printf("getattrlistbulk on %s returned %d items\n", + target, totalcount); + printf("getattrlistbulk failed with error %d - \"%s\" \n", + errno, strerror( errno)); + error = -1; + break; + } else if (retcount == 0) { + /* No more entries in directory */ + printf("getattrlistbulk succeded: found %d entries in %s\n", totalcount, target); + error = 0; + break; + } else { + totalcount += retcount; + entry_start = &attr_buf[0]; + for (index = 0; index < retcount; index++) { + /*set attrsptr to item record buffer*/ + attrsptr = (attrs_t *)entry_start; + + /* + *calculate starting point for next item in bulk + *list + */ + entry_start += attrsptr->attrs_length; + + if ((attrsptr->attrs_returned.commonattr & ATTR_CMN_ERROR) && + attrsptr->attr_error) { + nameptr = (char*)(&(attrsptr->attr_name)) + attrsptr->attr_name.attr_dataoffset; + printf("getattrlistbulk item \"%s\" ATTR_CMN_ERROR %d \"%s\"\n", + nameptr, attrsptr->attr_error, + strerror(attrsptr->attr_error)); + } + } + } + } while (1); + +last_exit: + if (dirfd != -1) { + (void)close(dirfd); + } + + if (attr_buf != NULL) { + vm_deallocate( + mach_task_self(), (vm_address_t)attr_buf, attr_buf_size); + } + + return error; +} + +#define INVALID_FD -173 +static int create_random_name_at(int the_dirfd, char *the_dirpathp, + char *the_namep, size_t the_namep_len, + char *the_pathp, size_t the_pathp_len, + int do_create ); +/* + * create_random_name_at - creates a file with a random / unique name in the given directory. + * when do_create is true we create a file else we generaate a name that does not exist in the + * given directory (we do not create anything when do_open is 0). + * A name is generated relative to the directory fd. If both a directory path and + * and a buffer to hold the full pathname are provided, an abolute pathname is also returned. + * An absolute pathname for the generated filename is returned in my_pathp. + * WARNING - caller provides enough space in the_namep buffer for longest possible name (NAME_MAX). + * WARNING - caller provides enough space in the_pathp buffer for longest possible path (PATH_MAX). + * RAND_MAX is currently 2147483647 (ten characters plus one for a slash) + */ +int create_random_name_at(int the_dirfd, char *the_dirpathp, + char *the_namep, size_t the_namep_len, + char *the_pathp, size_t the_pathp_len, + int do_create ) +{ + int i, my_err; + int my_fd = -1; + + for ( i = 0; i < 1; i++ ) { + int my_rand; + char *myp; + char my_name[32]; + + my_rand = rand( ); + sprintf( &my_name[0], "%d", my_rand ); + if ( (strlen( &my_name[0] ) + strlen( the_dirpathp ) + 2) > PATH_MAX ) { + printf( "%s - path to test file greater than PATH_MAX \n", __FUNCTION__ ); + return( -1 ); + } + + // generate name and absolute path + myp = the_namep; + *(myp) = (char)0x00; + strlcat(the_namep, &my_name[0], the_namep_len); + + /* + *If the caller has passed in a path pointer and directory path + *it means an absolute path is to be returned as well. + */ + if (the_pathp && the_dirpathp) { + *the_pathp = (char)0x00; + strlcat(the_pathp, the_dirpathp, the_pathp_len); + strlcat(the_pathp, "/", the_pathp_len); + strlcat(the_pathp, the_namep, the_pathp_len); + } + + if (do_create) { + /* create a file with this name */ + my_fd = openat( the_dirfd, the_namep, (O_RDWR | O_CREAT | O_EXCL), + (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) ); + if ( my_fd == -1 ) { + if ( errno != EEXIST ) { + printf( "%s - open failed with errno %d - %s \n", + __FUNCTION__, errno, strerror( errno ) ); + return( -1 ); + } + // name already exists, try another + i--; + continue; + } + } + + else { + /* make sure the name is unique */ + struct stat my_sb; + my_err = fstatat( the_dirfd, the_namep, &my_sb, 0 ); + if ( my_err != 0 ) { + if ( errno == ENOENT ) { + break; + } + else { + printf( "%s - open failed with errno %d - %s \n", + __FUNCTION__, errno, strerror( errno ) ); + return( -1 ); + } + } + /* name already exists, try another */ + i--; + continue; + } + } + + if ( my_fd != -1 ) + close( my_fd ); + + return( 0 ); + +} /* create_random_name_at */ + +/* ************************************************************************************************************** + * Test close, fpathconf, fstat, open, pathconf system calls. + * ************************************************************************************************************** + */ +int openat_close_test( void * the_argp ) +{ + int my_err; + int my_dirfd = -1; + int my_fd = -1; + int error_fd = -1; + char * my_dirpathp = NULL; + char * my_namep = NULL; + char * my_pathp = NULL; + ssize_t my_result; + long my_pconf_result; + struct stat my_sb; + char my_buffer[32]; + kern_return_t my_kr; + + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_dirpathp = 0x00; + strlcat( my_dirpathp, &g_target_path[0], PATH_MAX ); + + my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_namep = 0x00; + if (my_pathp) { + *my_pathp = 0x00; + } + + /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */ + if (*my_dirpathp == '/') { + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + } + + /* + * Some basic openat validation. If pathname is absolute, invalid fd should + * not matter. + */ + + if (*my_dirpathp == '/') { + my_dirfd = openat( INVALID_FD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + printf( "\t Was Absolute pathname, invalid fd, %d, provided as input \n", INVALID_FD); + goto test_failed_exit; + } + close(my_dirfd); + + } + + my_dirfd = openat( AT_FDCWD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + goto test_failed_exit; + } + + /* create a test file */ + my_err = create_random_name_at( my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1 ); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + /* + * If pathname is not absolute, an openat relative to a invalid directory fd + * should not work. + */ + if (my_pathp) { + /* test O_WRONLY case */ + my_fd = openat( INVALID_FD, my_namep, O_WRONLY, 0 ); + if ( my_fd != -1 ) { + printf( "openat call relative to invalid dir fd worked\n"); + printf( "\t file we attempted to open -> \"%s\" relative to fd -173\n", my_pathp ); + goto test_failed_exit; + } + } + + /* test O_WRONLY case */ + my_fd = openat( my_dirfd, my_namep, O_WRONLY, 0 ); + if ( my_fd == -1 ) { + printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t file we attempted to open -> \"%s\" \n", my_pathp ); + goto test_failed_exit; + } + + /* + * try to open relative to non-directory fd. + * It should fail with ENOTDIR. + */ + if ((error_fd = openat(my_fd, my_namep, O_WRONLY, 0)) != -1) { + printf( "openat call succeded with fd being a non-directory fd\n"); + printf( "\t file we attempted to open (reltive to itself)-> \"%s\" \n", my_pathp ); + close(error_fd); + goto test_failed_exit; + } else if (errno != ENOTDIR) { + printf( "openat call should have failed with errno 20 (ENOTDIR). actually failed with %d - \"%s\" \n", my_err, strerror( my_err) ); + } + + my_pconf_result = fpathconf( my_fd, _PC_NAME_MAX ); + if ( my_pconf_result == -1 ) { + printf( "fpathconf - _PC_PATH_MAX - failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + // printf( "_PC_NAME_MAX %ld \n", my_pconf_result ); + /* results look OK? */ + if ( my_pconf_result < 6 ) { + printf( "fpathconf - _PC_NAME_MAX - looks like wrong results \n" ); + goto test_failed_exit; + } + + /* write some data then try to read it */ + my_result = write( my_fd, "kat", 3 ); + my_err = errno; + if ( my_result != 3 ) { + if ( sizeof( ssize_t ) > sizeof( int ) ) { + printf( "write failed. should have written 3 bytes actually wrote - %ld \n", (long int) my_result ); + } + else { + printf( "write failed. should have written 3 bytes actually wrote - %d \n", (int) my_result ); + } + goto test_failed_exit; + } + + /* Try to read - this should fail since we opened file with O_WRONLY */ + my_result = read( my_fd, &my_buffer[0], sizeof(my_buffer) ); + my_err = errno; + if ( my_result != -1 ) { + printf( "read call should have failed with errno 9 (EBADF) \n" ); + goto test_failed_exit; + } + else if ( my_err != EBADF ) { + printf( "read call should have failed with errno 9 (EBADF). actually failed with %d - \"%s\" \n", my_err, strerror( my_err) ); + goto test_failed_exit; + } + + close( my_fd ); + + /* test O_TRUNC and O_APPEND case */ + my_fd = openat( my_dirfd, my_namep, (O_RDWR | O_TRUNC | O_APPEND), 0 ); + if ( my_fd == -1 ) { + printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t file we attempted to open -> \"%s\" \n", my_pathp ); + goto test_failed_exit; + } + + my_result = read( my_fd, &my_buffer[0], sizeof(my_buffer) ); + if ( my_result == -1 ) { + printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_result != 0 ) { + printf( "read failed - should have read 0 bytes. \n" ); + goto test_failed_exit; + } + + my_result = write( my_fd, "kat", 3 ); + my_err = errno; + if ( my_result != 3 ) { + if ( sizeof( ssize_t ) > sizeof( int ) ) { + printf( "write failed. should have written 3 bytes actually wrote - %ld \n", (long int) my_result ); + } + else { + printf( "write failed. should have written 3 bytes actually wrote - %d \n", (int) my_result ); + } + goto test_failed_exit; + } + + /* add some more data to the test file - this should be appended */ + lseek( my_fd, 0, SEEK_SET ); + my_result = write( my_fd, "zzz", 3 ); + my_err = errno; + if ( my_result != 3 ) { + if ( sizeof( ssize_t ) > sizeof( int ) ) { + printf( "write failed. should have written 3 bytes actually wrote - %ld \n", (long int) my_result ); + } + else { + printf( "write failed. should have written 3 bytes actually wrote - %d \n", (int) my_result ); + } + goto test_failed_exit; + } + + /* now verify the writes */ + bzero( (void *)&my_buffer[0], sizeof(my_buffer) ); + lseek( my_fd, 0, SEEK_SET ); + my_result = read( my_fd, &my_buffer[0], sizeof(my_buffer) ); + if ( my_result == -1 ) { + printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_buffer[0] != 'k' || my_buffer[5] != 'z' ) { + printf( "read failed to get correct data \n" ); + goto test_failed_exit; + } + + /* + * try to stat relative to non-directory fd. + * It should fail with ENOTDIR. + */ + if ((fstatat( my_fd, my_namep, &my_sb, 0 )) != -1) { + printf( "fstatat call succeded with fd being a non-directory fd\n"); + printf( "\t file we attempted to stat (relative to itself)-> \"%s\" \n", my_pathp ); + goto test_failed_exit; + } else if (errno != ENOTDIR) { + printf( "fstatat call should have failed with errno 20 (ENOTDIR). actually failed with %d - \"%s\" \n", my_err, strerror( my_err) ); + } + + /* test fstatat */ + my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 ); + if ( my_err == -1 ) { + printf( "fstatat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_sb.st_size != 6 ) { + printf( "fstatat call failed - st_size is wrong \n" ); + goto test_failed_exit; + } + if ( !S_ISREG( my_sb.st_mode ) ) { + printf( "fstatat call failed - st_mode does not indicate regular file \n" ); + goto test_failed_exit; + } + + my_err = 0; + goto test_passed_exit; + +test_failed_exit: + my_err = -1; + +test_passed_exit: + if ( my_fd != -1 ) + close( my_fd ); + + if ( my_pathp != NULL ) { + remove(my_pathp); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); + } + + if ( my_namep ) { + unlinkat( my_dirfd, my_pathp, 0 ); + vm_deallocate(mach_task_self(), (vm_address_t)my_namep, NAME_MAX); + } + + if ( my_dirfd != -1) + close(my_dirfd); + + if ( my_dirpathp != NULL ) { + vm_deallocate(mach_task_self(), (vm_address_t)my_dirpathp, PATH_MAX); + } + + return( my_err ); +} + +/* ************************************************************************************************************** + * Test linkat, fstatat and unlinkat system calls. + * ************************************************************************************************************** + */ +int linkat_fstatat_unlinkat_test( void * the_argp ) +{ + int my_err; + int my_dirfd = -1; + int my_fd = -1; + char * my_dirpathp = NULL; + char * my_namep = NULL; + char * my_pathp = NULL; + char * my_name2p = NULL; + nlink_t my_link_count; + ssize_t my_result; + struct stat my_sb; + kern_return_t my_kr; + + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_dirpathp = 0x00; + strlcat( my_dirpathp, &g_target_path[0], PATH_MAX ); + + my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_namep = 0x00; + if (my_pathp) { + *my_pathp = 0x00; + } + + /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */ + if (*my_dirpathp == '/') { + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + } + + /* create a test file */ + my_err = create_random_name_at( my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1 ); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_name2p, NAME_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_name2p = 0x00; + + /* now create a name for the link file */ + strlcat( my_name2p, my_namep, NAME_MAX ); + strlcat( my_name2p, "link", NAME_MAX ); + + /* get the current link count */ + my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "stat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + my_link_count = my_sb.st_nlink; + + /* Double check with absolute path name */ + if (my_pathp) { + my_err = fstatat(INVALID_FD, my_pathp, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "fstatat with INVALID_FD and absolute pathname failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + if (my_link_count != my_sb.st_nlink) { + printf( "fstatat call did not return correct number of links" ); + goto test_failed_exit; + } + } + + /* check file size (should be 0) */ + if ( my_sb.st_size != 0 ) { + printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp ); + printf( "st_size is not 0 \n" ); + goto test_failed_exit; + } + + /* change file size */ + my_fd = openat(my_dirfd, my_namep, O_RDWR, 0 ); + if ( my_fd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t file we attempted to open -> \"%s\" \n", my_pathp ); + goto test_failed_exit; + } + + my_result = write( my_fd, "kat", 3 ); + my_err = errno; + if ( my_result != 3 ) { + if ( sizeof( ssize_t ) > sizeof( int ) ) { + printf( "write failed. should have written 3 bytes actually wrote - %ld \n", (long int) my_result ); + } + else { + printf( "write failed. should have written 3 bytes actually wrote - %d \n", (int) my_result ); + } + goto test_failed_exit; + } + close( my_fd ); + my_fd = -1; + + /* now link another file to our test file and recheck link count */ + /* N.B. - HFS only supports AT_SYMLINK_FOLLOW */ + my_err = linkat( my_dirfd, my_namep, my_dirfd, my_name2p, AT_SYMLINK_FOLLOW ); + if ( my_err != 0 ) { + printf( "linkat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + my_err = fstatat( my_dirfd, my_pathp, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "fstatat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + if ( (my_link_count + 1) != my_sb.st_nlink ) { + printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp ); + printf( "incorrect st_nlink \n" ); + goto test_failed_exit; + } + + /* check file size (should be 3) */ + if ( my_sb.st_size != 3 ) { + printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp ); + printf( "st_size is not 3 \n" ); + goto test_failed_exit; + } + + /* now make sure unlink works OK */ + my_err = unlinkat( my_dirfd, my_name2p, 0 ); + if ( my_err != 0 ) { + printf( "unlinkat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "stat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + if ( my_link_count != my_sb.st_nlink ) { + printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp ); + printf( "incorrect st_nlink \n" ); + goto test_failed_exit; + } + + my_err = 0; + goto test_passed_exit; + +test_failed_exit: + my_err = -1; + +test_passed_exit: + if ( my_fd != -1 ) + close( my_fd ); + + if ( my_name2p != NULL ) { + (void)unlinkat( my_dirfd, my_name2p, 0 ); + vm_deallocate(mach_task_self(), (vm_address_t)my_name2p, NAME_MAX); + } + + if ( my_namep != NULL ) { + (void)unlinkat( my_dirfd, my_name2p, 0 ); + vm_deallocate(mach_task_self(), (vm_address_t)my_name2p, NAME_MAX); + } + + if ( my_pathp != NULL ) { + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); + } + + if ( my_dirpathp != NULL ) { + vm_deallocate(mach_task_self(), (vm_address_t)my_dirpathp, PATH_MAX); + } + + if ( my_dirfd != -1 ) + close( my_dirfd ); + + return( my_err ); +} + +/* ************************************************************************************************************** + * Test faccessat, fchmodat and fchmod system calls. + * ************************************************************************************************************** + */ +int faccessat_fchmodat_fchmod_test( void * the_argp ) +{ + int error_occurred; + int is_absolute_path = 0; + int my_err; + int my_dirfd = -1; + int my_fd = -1; + + char * my_dirpathp = NULL; + char * my_namep = NULL; + char * my_pathp = NULL; + + uid_t euid,ruid; + struct stat my_sb; + + FILE * file_handle; + + kern_return_t my_kr; + + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_dirpathp = 0x00; + strlcat( my_dirpathp, &g_target_path[0], PATH_MAX ); + + /* + * Some basic openat validation. If pathname is absolute, an invalid fd should + * not matter. + */ + + if (*my_dirpathp == '/') { + is_absolute_path = 1; + my_dirfd = openat(INVALID_FD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + printf( "\t Was Absolute pathname, invalid fd, %d, provided as input \n", INVALID_FD); + goto test_failed_exit; + } + close( my_dirfd ); + } + + my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_namep = 0x00; + + if (is_absolute_path) { + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_pathp = 0x00; + } + + /* create a test file */ + my_err = create_random_name_at(my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + /* test chmod */ + my_err = fchmodat(my_dirfd, my_namep, S_IRWXU, 0); + if ( my_err == -1 ) { + printf( "chmod call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + my_err = fchmodat( my_dirfd, my_namep, (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP), 0 ); + if ( my_err == -1 ) { + printf( "chmod call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* test access - this should fail */ + my_err = faccessat( my_dirfd, my_namep, (X_OK), 0 ); + if ( my_err == 0 ) { + printf( "access call should have failed, but did not. \n" ); + goto test_failed_exit; + } + else if ( my_err == -1 ) { + int tmp = 0; + tmp = getuid( ); + + /* special case when running as root - we get back EPERM when running as root */ + my_err = errno; + if ( ( tmp == 0 && my_err != EPERM) || (tmp != 0 && my_err != EACCES) ) { + printf( "access failed with errno %d - %s. \n", my_err, strerror( my_err ) ); + goto test_failed_exit; + } + } + + /* verify correct modes are set */ + /* First check that Absolute path works even with an invalid FD */ + if (is_absolute_path) { + my_err = fstatat( INVALID_FD, my_pathp, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "fstatat call failed with an absolute pathname. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + } + + my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "stat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + if ( (my_sb.st_mode & (S_IRWXO | S_IXGRP)) != 0 || + (my_sb.st_mode & (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) == 0 ) { + printf( "chmod call appears to have failed. stat shows incorrect values in st_mode! \n" ); + goto test_failed_exit; + } + + + /* another test for the access system call -- refer ro radar# 6725311 */ + + + /* + * This test makes sure that the access system call does not give the current user extra + * permissions on files the current user does not own. From radar #6725311, this could + * happen when the current user calls access() on a file owned by the current user in + * the same directory as the other files not owned by the current user. + * + * Note: This test expects that the effective uid (euid) is set to root. + * + */ + + /* Create a file that root owns */ + file_handle = fopen(FILE_NOTME, "w"); + fclose(file_handle); + + /* Currently running as root (through setreuid manipulation), switch to running as the current user. */ + euid = geteuid(); + ruid = getuid(); + setreuid(ruid, ruid); + + /* Create a file that the current user owns */ + file_handle = fopen(FILE_ME, "w"); + fclose(file_handle); + + error_occurred = 0; + + /* Try to remove the file owned by root (this should fail). */ + my_err = unlinkat( AT_FDCWD, FILE_NOTME, 0 ); + + if (my_err < 0) { + my_err = errno; + } + + if (my_err == 0) { + printf("Unresolved: First attempt deleted '" FILE_NOTME "'! \n"); + error_occurred = 1; + } else { + printf("Passed: First attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err )); + + /* Set _DELETE_OK on a file that the current user owns */ + faccessat(AT_FDCWD, FILE_ME, _DELETE_OK, 0 ); + + /* Try to remove the file owned by root again (should give us: EPERM [13]) */ + my_err = unlinkat(AT_FDCWD, FILE_NOTME, 0); + + if (my_err < 0) { + my_err = errno; + } + + if (my_err == 0) { + printf("Failed: Second attempt deleted '" FILE_NOTME "'!\n"); + error_occurred = 1; + } else if (my_err == 13) { + printf("Passed: Second attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err )); + } else { + printf("Failed: Second attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err )); + error_occurred = 1; + } + } + + /* Reset to running as root */ + setreuid(ruid, euid); + + if(error_occurred == 1) { + goto test_failed_exit; + } + + + /* end of test*/ + + + /* test fchmod */ + my_fd = openat( my_dirfd, my_namep, O_RDONLY, 0); + if ( my_fd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t we attempted to open -> \"%s\" \n", &g_target_path[0] ); + goto test_failed_exit; + } + + my_err = fchmod( my_fd, S_IRWXU ); + if ( my_err == -1 ) { + printf( "fchmod call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + my_err = fstatat( INVALID_FD, my_pathp, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "stat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* verify correct modes are set */ + if ( (my_sb.st_mode & (S_IRWXG | S_IRWXO)) != 0 || + (my_sb.st_mode & (S_IRWXU)) == 0 ) { + printf( "fchmod call appears to have failed. stat shows incorrect values in st_mode! \n" ); + goto test_failed_exit; + } + + my_err = 0; + goto test_passed_exit; + +test_failed_exit: + my_err = -1; + +test_passed_exit: + if ( my_fd != -1 ) + close( my_fd ); + if ( my_pathp != NULL ) { + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); + } + if ( my_namep != NULL ) { + unlinkat(my_dirfd, my_namep, 0); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, NAME_MAX); + + } + + if ( my_dirfd != -1) + close( my_dirfd); + + if ( my_dirpathp != NULL ) { + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); + } + + return( my_err ); +} + +/* ************************************************************************************************************** + * Test fchownat, fchown, readlinkat, symlinkat system calls. + * ************************************************************************************************************** + */ +int fchownat_fchown_symlinkat_test( void * the_argp ) +{ + int my_err, my_group_count, i; + int my_fd = -1; + int my_dirfd = -1; + char * my_dirpathp = NULL; + char * my_namep = NULL; + char * my_link_namep = NULL; + char * my_pathp = NULL; + char * my_link_pathp = NULL; + int is_absolute_path = 0; + uid_t my_orig_uid; + gid_t my_orig_gid, my_new_gid1 = 0, my_new_gid2 = 0; + ssize_t my_result; + struct stat my_sb; + gid_t my_groups[ NGROUPS_MAX ]; + char my_buffer[ 64 ]; + kern_return_t my_kr; + + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_dirpathp = 0x00; + strlcat( my_dirpathp, &g_target_path[0], PATH_MAX ); + + /* + * Some basic openat validation. If pathname is absolute, an invalid fd should + * not matter. + */ + if (*my_dirpathp == '/') { + is_absolute_path = 1; + my_dirfd = openat(INVALID_FD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + printf( "\t Was Absolute pathname, invalid fd, %d, provided as input \n", INVALID_FD); + goto test_failed_exit; + } + close( my_dirfd ); + } + + my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_namep = 0x00; + + if (is_absolute_path) { + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_pathp = 0x00; + } + + /* create a test file */ + my_err = create_random_name_at(my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_link_namep, NAME_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_link_namep = 0x00; + + if (is_absolute_path) { + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_link_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_link_pathp = 0x00; + } + + /* get a name for the link (to create the symlink later) */ + my_err = create_random_name_at(my_dirfd, my_dirpathp, my_link_namep, NAME_MAX, my_link_pathp, PATH_MAX, 0 ); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + if ( !_prime_groups() ) { + goto test_failed_exit; + } + + /* set up by getting a list of groups */ + my_group_count = getgroups( NGROUPS_MAX, &my_groups[0] ); + + if ( my_group_count == -1 || my_group_count < 1 ) { + printf( "getgroups call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "stat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* now change group owner to something other than current value */ + my_orig_gid = my_sb.st_gid; + my_orig_uid = my_sb.st_uid; + + for ( i = 0; i < my_group_count; i++ ) { + if ( my_orig_gid != my_groups[ i ] ) { + if ( my_new_gid1 == 0 ) { + my_new_gid1 = my_groups[ i ]; + } + else if( my_new_gid1 != my_groups[ i ] ) { + my_new_gid2 = my_groups[ i ]; + break; + } + } + } + if ( i >= my_group_count ) { + printf( "not enough groups to choose from. st_gid is the same as current groups! \n" ); + goto test_failed_exit; + } + + my_err = fchownat( my_dirfd, my_namep, my_orig_uid, my_new_gid1, 0 ); + if ( my_err != 0 ) { + printf( "chown call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* make sure the group owner was changed */ + my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "stat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + if ( my_sb.st_gid == my_orig_gid ) { + printf( "chown call failed. st_gid is not correct! \n" ); + goto test_failed_exit; + } + + /* change group owner back using fchown */ + if (is_absolute_path) { + my_fd = openat( INVALID_FD, my_pathp, O_RDWR, 0 ); + } else { + my_fd = openat( my_dirfd, my_namep, O_RDWR, 0 ); + } + + if ( my_fd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t we attempted to open -> \"%s\" \n", &g_target_path[0] ); + goto test_failed_exit; + } + + my_err = fchown( my_fd, my_orig_uid, my_new_gid2 ); + if ( my_err != 0 ) { + printf( "fchown call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* make sure the group owner was changed back to the original value */ + my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "fstatat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + if ( my_sb.st_gid == my_new_gid1 ) { + printf( "fchown call failed. st_gid is not correct! \n" ); + goto test_failed_exit; + } + + /* create a link file and test fstatat(..., AT_SYMLINK_NOFOLLOW) */ + my_err = symlinkat( my_namep, my_dirfd, my_link_namep ); + if ( my_err != 0 ) { + printf( "symlinkat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + my_err = fstatat( my_dirfd, my_link_namep, &my_sb, AT_SYMLINK_NOFOLLOW ); + if ( my_err != 0 ) { + printf( "fstatat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* now change group owner to something other than current value */ + my_orig_gid = my_sb.st_gid; + my_orig_uid = my_sb.st_uid; + my_err = fchownat( my_dirfd, my_link_namep, my_orig_uid, my_new_gid1, AT_SYMLINK_NOFOLLOW ); + if ( my_err != 0 ) { + printf( "fchownat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* make sure the group owner was changed to new value */ + my_err = fstatat( my_dirfd, my_link_namep, &my_sb, AT_SYMLINK_NOFOLLOW ); + if ( my_err != 0 ) { + printf( "fstatat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + if ( my_sb.st_gid == my_new_gid2 ) { + printf( "fchownat call failed. st_gid is not correct! \n" ); + goto test_failed_exit; + } + + /* make sure we can read the symlink file */ + my_result = readlinkat( my_dirfd, my_link_namep, &my_buffer[0], sizeof(my_buffer) ); + if ( my_result == -1 ) { + printf( "readlinkat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + /* make sure we read some data */ + if ( my_result < 1 ) { + printf( "readlinkat failed to read any data. \n" ); + goto test_failed_exit; + } + + my_err = 0; + goto test_passed_exit; + +test_failed_exit: + my_err = -1; + +test_passed_exit: + if ( my_fd != -1 ) + close( my_fd ); + if ( my_namep ) { + unlinkat( my_dirfd, my_namep, 0); + vm_deallocate(mach_task_self(), (vm_address_t)my_namep, NAME_MAX); + } + if ( my_pathp != NULL ) { + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); + } + if ( my_link_namep ) { + unlinkat( my_dirfd, my_link_namep, 0); + vm_deallocate(mach_task_self(), (vm_address_t)my_link_namep, NAME_MAX); + } + if ( my_link_pathp != NULL ) { + unlink( my_link_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_link_pathp, PATH_MAX); + } + if ( my_dirfd != -1 ) + close(my_dirfd); + + if ( my_dirpathp != NULL ) { + vm_deallocate(mach_task_self(), (vm_address_t)my_dirpathp, PATH_MAX); + } + + + return( my_err ); +} + +/* ************************************************************************************************************** + * Test mkdirat, unlinkat, umask system calls. + * ************************************************************************************************************** + */ +int mkdirat_unlinkat_umask_test( void * the_argp ) +{ + int my_err; + int my_dirfd = -1; + int my_fd = -1; + int did_umask = 0; + char * my_dirpathp = NULL; + char * my_namep = NULL; + char * my_pathp = NULL; + mode_t my_orig_mask; + struct stat my_sb; + kern_return_t my_kr; + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_dirpathp = 0x00; + strlcat( my_dirpathp, &g_target_path[0], PATH_MAX ); + + my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + goto test_failed_exit; + } + + /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */ + if (*my_dirpathp == '/') { + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_namep = 0x00; + if (my_pathp) { + *my_pathp = 0x00; + } + + /* get a random name to use with mkdirat (don't create) */ + my_err = create_random_name_at( my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 0 ); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + /* set umask to clear WX for other and group and clear X for user */ + my_orig_mask = umask( (S_IXUSR | S_IWGRP | S_IXGRP | S_IWOTH | S_IXOTH) ); + did_umask = 1; + + /* create a directory with RWX for user, group, other (which should be limited by umask) */ + my_err = mkdirat( my_dirfd, my_namep, (S_IRWXU | S_IRWXG | S_IRWXO) ); + if ( my_err == -1 ) { + printf( "mkdirat failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + /* verify results - (S_IXUSR | S_IWGRP | S_IXGRP | S_IWOTH | S_IXOTH) should be clear*/ + my_err = fstatat( my_dirfd, my_pathp, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "fstat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( (my_sb.st_mode & (S_IXUSR | S_IWGRP | S_IXGRP | S_IWOTH | S_IXOTH)) != 0 ) { + printf( "umask did not limit modes as it should have \n" ); + goto test_failed_exit; + } + + /* get rid of our test directory */ + my_err = unlinkat( my_dirfd, my_namep, AT_REMOVEDIR ); + if ( my_err == -1 ) { + printf( "unlinkat(..., AT_REMOVEDIR) failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + my_err = 0; + goto test_passed_exit; + +test_failed_exit: + my_err = -1; + +test_passed_exit: + if ( my_fd != -1 ) + close( my_fd ); + + if ( my_namep ) { + unlinkat( my_dirfd, my_namep, AT_REMOVEDIR ); + vm_deallocate(mach_task_self(), (vm_address_t)my_namep, NAME_MAX); + } + + if ( my_pathp != NULL ) { + rmdir( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); + } + + if ( my_dirfd != -1 ) + close(my_dirfd); + + if ( my_dirpathp != NULL ) { + vm_deallocate(mach_task_self(), (vm_address_t)my_dirpathp, PATH_MAX); + } + + if ( did_umask != 0 ) { + umask( my_orig_mask ); + } + + return( my_err ); +} + +/* ************************************************************************************************************** + * Test renameat, fstatat system calls. + * ************************************************************************************************************** + */ +int renameat_test( void * the_argp ) +{ + int my_err; + int my_dirfd = -1; + char * my_dirpathp = NULL; + char * my_namep = NULL; + char * my_pathp = NULL; + char * my_new_namep = NULL; + char * my_new_pathp = NULL; + ino_t my_file_id; + struct stat my_sb; + kern_return_t my_kr; + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_dirpathp = 0x00; + strlcat( my_dirpathp, &g_target_path[0], PATH_MAX ); + + my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 ); + if ( my_dirfd == -1 ) { + printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp ); + goto test_failed_exit; + } + + /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */ + if (*my_dirpathp == '/') { + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_namep = 0x00; + if (my_pathp) { + *my_pathp = 0x00; + } + + /* create random file */ + my_err = create_random_name_at( my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1 ); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + + /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */ + if (*my_dirpathp == '/') { + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_new_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_new_namep, NAME_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_new_namep = 0x00; + if (my_new_pathp) { + *my_new_pathp = 0x00; + } + + /* create random file */ + my_err = create_random_name_at( my_dirfd, my_dirpathp, my_new_namep, NAME_MAX, my_new_pathp, PATH_MAX, 0 ); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + /* save file ID for later use */ + my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "fstatat - failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + my_file_id = my_sb.st_ino; + + /* test rename */ + my_err = renameat( my_dirfd, my_namep, my_dirfd, my_new_namep ); + if ( my_err == -1 ) { + printf( "rename - failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + /* make sure old name is no longer there */ + my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 ); + if ( my_err == 0 ) { + printf( "renameat call failed - found old name \n" ); + goto test_failed_exit; + } + + /* make sure new name is there and is correct file id */ + my_err = fstatat( my_dirfd, my_new_namep, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "stat - failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_file_id != my_sb.st_ino ) { + printf( "rename failed - wrong file id \n" ); + goto test_failed_exit; + } + + /* cross check with absolute path and invalid fd */ + if (my_new_pathp) { + my_err = fstatat( INVALID_FD, my_new_pathp, &my_sb, 0 ); + if ( my_err != 0 ) { + printf( "stat - failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_file_id != my_sb.st_ino ) { + printf( "rename failed - wrong file id \n" ); + goto test_failed_exit; + } + } + + my_err = 0; + goto test_passed_exit; + +test_failed_exit: + my_err = -1; + +test_passed_exit: + if ( my_pathp != NULL ) { + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); + } + if ( my_new_pathp != NULL ) { + remove( my_new_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_new_pathp, PATH_MAX); + } + return( my_err ); +} + +/* ************************************************************************************************************** + * Test task_set_exception_ports, host_set_exception_ports + * ************************************************************************************************************** + */ +static int __get_except_port(int which, mach_port_t *portp, + exception_behavior_t *behaviorp, + thread_state_flavor_t *flavorp) +{ + exception_mask_t masks[EXC_TYPES_COUNT]; + mach_msg_type_number_t nmasks = 0; + exception_port_t ports[EXC_TYPES_COUNT]; + exception_behavior_t behaviors[EXC_TYPES_COUNT]; + thread_state_flavor_t flavors[EXC_TYPES_COUNT]; + + *portp = MACH_PORT_NULL; + *behaviorp = 0; + *flavorp = 0; + + kern_return_t kr = KERN_FAILURE; + if (which == 0) { /* host port */ + kr = host_get_exception_ports(mach_host_self(), EXC_MASK_BAD_ACCESS, + masks, &nmasks, ports, behaviors, flavors); + } else if (which == 1) { /* task port */ + kr = task_get_exception_ports(mach_task_self(), EXC_MASK_BAD_ACCESS, + masks, &nmasks, ports, behaviors, flavors); + } else if (which == 2) { /* thread_port */ + kr = thread_get_exception_ports(mach_thread_self(), EXC_MASK_BAD_ACCESS, + masks, &nmasks, ports, behaviors, flavors); + } else { + printf("ERROR: invalid 'which' in %s\n", __func__); + return -1; + } + if (kr != KERN_SUCCESS) { + printf("ERROR getting %s exception port!\n", which == 0 ? "task" : "host"); + return -1; + } + *portp = ports[0]; + *behaviorp = behaviors[0]; + *flavorp = flavors[0]; + + return 0; +} + +int set_exception_ports_test( void * the_argp ) +{ + int testFlavor = -900000; + kern_return_t ret; + mach_port_t exception_port; + + mach_port_t old_except_port; + exception_behavior_t old_behavior; + thread_state_flavor_t old_flavor; + + + ret = mach_port_allocate( mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &exception_port ); + if (ret != KERN_SUCCESS) { + printf("ERROR allocating new exception port?!\n"); + return -1; + } + ret = mach_port_insert_right( mach_task_self(), exception_port, exception_port, MACH_MSG_TYPE_MAKE_SEND ); + if (ret != KERN_SUCCESS) { + printf("ERROR inserting send right into new exception port?!\n"); + goto test_failed_exit; + } + + if (__get_except_port(2, &old_except_port, &old_behavior, &old_flavor) < 0) + goto test_failed_exit; + + ret = thread_set_exception_ports( mach_thread_self(), + EXC_MASK_BAD_ACCESS, + exception_port, + EXCEPTION_STATE_IDENTITY, + testFlavor ); + /* + * this test _fails_ if we successfully set the exception port + * with an invalid thread flavor + */ + if (ret == KERN_SUCCESS) { + thread_set_exception_ports( mach_thread_self(), + EXC_MASK_BAD_ACCESS, + old_except_port, old_behavior, old_flavor ); + printf("thread_set_exception_ports failed: expected !KERN_SUCCESS for flavor %d\n", testFlavor); + goto test_failed_exit; + } + + /* + * so far, so good: the thread_set_exception_ports call failed, + * so we don't need to reset anything, but we do need to + * drop our reference to the old exception port we grabbed. + */ + mach_port_deallocate( mach_task_self(), old_except_port ); + + if (__get_except_port(1, &old_except_port, &old_behavior, &old_flavor) < 0) + goto test_failed_exit; + + ret = task_set_exception_ports( mach_task_self(), + EXC_MASK_BAD_ACCESS, + exception_port, + EXCEPTION_STATE_IDENTITY, + testFlavor ); + /* + * this test _fails_ if we successfully set the exception port + * with an invalid thread flavor + */ + if (ret == KERN_SUCCESS) { + task_set_exception_ports( mach_task_self(), + EXC_MASK_BAD_ACCESS, + old_except_port, old_behavior, old_flavor ); + printf("task_set_exception_ports failed: expected !KERN_SUCCESS for flavor %d\n", testFlavor); + goto test_failed_exit; + } + + /* + * so far, so good: the task_set_exception_ports call failed, + * so we don't need to reset anything, but we do need to + * drop our reference to the old exception port we grabbed. + */ + mach_port_deallocate( mach_task_self(), old_except_port ); + + /* + * Now try the host exception port + */ + if (__get_except_port(0, &old_except_port, &old_behavior, &old_flavor) < 0) + goto test_failed_exit; + + ret = host_set_exception_ports( mach_host_self(), + EXC_MASK_BAD_ACCESS, + exception_port, + EXCEPTION_STATE_IDENTITY, + testFlavor ); + /* + * this test _fails_ if we successfully set the exception port + * with an invalid thread flavor + */ + if (ret == KERN_SUCCESS) { + host_set_exception_ports( mach_host_self(), + EXC_MASK_BAD_ACCESS, + old_except_port, old_behavior, old_flavor ); + printf("host_set_exception_ports failed: expected !KERN_SUCCESS for flavor %d\n", testFlavor); + goto test_failed_exit; + } + + mach_port_deallocate( mach_task_self(), exception_port ); + mach_port_deallocate( mach_task_self(), old_except_port ); + return 0; + +test_failed_exit: + mach_port_deallocate( mach_task_self(), exception_port ); + if (old_except_port != MACH_PORT_NULL) + mach_port_deallocate( mach_task_self(), old_except_port ); + return -1; +} + + #if TEST_SYSTEM_CALLS /* ************************************************************************************************************** diff --git a/tools/tests/xnu_quick_test/tests.h b/tools/tests/xnu_quick_test/tests.h index 6edbaa9b6..992d21730 100644 --- a/tools/tests/xnu_quick_test/tests.h +++ b/tools/tests/xnu_quick_test/tests.h @@ -123,6 +123,14 @@ int sched_tests( void * the_argp ); int content_protection_test( void * the_argp ); int pipes_test( void * the_argp ); int kaslr_test( void * the_argp ); +int getattrlistbulk_test( void * the_argp ); +int openat_close_test( void * the_argp ); +int linkat_fstatat_unlinkat_test( void * the_argp ); +int faccessat_fchmodat_fchmod_test( void * the_argp ); +int fchownat_fchown_symlinkat_test( void * the_argp ); +int mkdirat_unlinkat_umask_test( void * the_argp ); +int renameat_test( void * the_argp ); +int set_exception_ports_test( void * the_argp ); struct test_entry { diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog-Entitlements.plist b/tools/tests/xnu_quick_test/xnu_quick_test.entitlements similarity index 77% rename from tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog-Entitlements.plist rename to tools/tests/xnu_quick_test/xnu_quick_test.entitlements index a5398e575..1f58459c9 100644 --- a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog-Entitlements.plist +++ b/tools/tests/xnu_quick_test/xnu_quick_test.entitlements @@ -2,7 +2,7 @@ - com.apple.private.kernel.override-cpumon + com.apple.private.security.disk-device-access diff --git a/tools/tests/zero-to-n/Makefile b/tools/tests/zero-to-n/Makefile index 4c3b62c8e..e84843601 100644 --- a/tools/tests/zero-to-n/Makefile +++ b/tools/tests/zero-to-n/Makefile @@ -5,19 +5,19 @@ else Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) endif -CC:=xcrun -sdk "$(SDKROOT)" cc +CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc) ifdef RC_ARCHS ARCHS:=$(RC_ARCHS) else ifeq "$(Embedded)" "YES" - ARCHS:=armv7 armv7s + ARCHS:=armv7 armv7s arm64 else ARCHS:=x86_64 i386 endif endif -CFLAGS := -g $(patsubst %, -arch %, $(ARCHS)) +CFLAGS := -g $(patsubst %, -arch %, $(ARCHS)) -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders DSTROOT?=$(shell /bin/pwd) SYMROOT?=$(shell /bin/pwd) diff --git a/tools/tests/zero-to-n/zero-to-n.c b/tools/tests/zero-to-n/zero-to-n.c index 2df39a02e..97910a06f 100644 --- a/tools/tests/zero-to-n/zero-to-n.c +++ b/tools/tests/zero-to-n/zero-to-n.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -37,8 +38,14 @@ #include #include #include +#include #include +#include +#include +#include +#include + #include #include @@ -67,6 +74,7 @@ void print_usage(); int thread_setup(int my_id); my_policy_type_t parse_thread_policy(const char *str); int thread_finish_iteration(); +void selfexec_with_apptype(int argc, char *argv[]); /* Global variables (general) */ int g_numthreads; @@ -152,7 +160,8 @@ thread_setup(int my_id) switch (g_policy) { case MY_POLICY_TIMESHARE: { - return 0; + res = KERN_SUCCESS; + break; } case MY_POLICY_REALTIME: { @@ -270,9 +279,7 @@ child_thread_func(void *arg) /* Tell main thread when everyone has set up */ new = OSAtomicIncrement32(&g_done_threads); - if (new == g_numthreads) { - semaphore_signal(g_main_sem); - } + semaphore_signal(g_main_sem); /* For each iteration */ for (i = 0; i < g_iterations; i++) { @@ -413,10 +420,11 @@ main(int argc, char **argv) uint64_t max, min; uint64_t traceworthy_latency_ns = TRACEWORTHY_NANOS; float avg, stddev; + boolean_t seen_apptype = FALSE; srand(time(NULL)); - if (argc < 5 || argc > 9) { + if (argc < 5 || argc > 10) { print_usage(); goto fail; } @@ -444,12 +452,18 @@ main(int argc, char **argv) traceworthy_latency_ns = strtoull(argv[++i], NULL, 10); } else if (strcmp(argv[i], "-affinity") == 0) { g_do_affinity = TRUE; + } else if (strcmp(argv[i], "-switched_apptype") == 0) { + seen_apptype = TRUE; } else { print_usage(); goto fail; } } + if (!seen_apptype) { + selfexec_with_apptype(argc, argv); + } + mach_timebase_info(&g_mti); #if MIMIC_DIGI_LEAD_TIME @@ -496,9 +510,32 @@ main(int argc, char **argv) assert(res == 0, fail); } + res = setpriority(PRIO_DARWIN_ROLE, 0, PRIO_DARWIN_ROLE_UI_FOCAL); + assert(res == 0, fail); + thread_setup(0); + + /* Switching to fixed pri may have stripped our main thread QoS and priority, so re-instate */ + if (g_policy == MY_POLICY_FIXEDPRI) { + thread_precedence_policy_data_t prec; + mach_msg_type_number_t count; + boolean_t get_default = FALSE; + + count = THREAD_PRECEDENCE_POLICY_COUNT; + res = thread_policy_get(mach_thread_self(), THREAD_PRECEDENCE_POLICY, (thread_policy_t) &prec, &count, &get_default); + assert(res == 0, fail); + + prec.importance += 16; /* 47 - 31 */ + res = thread_policy_set(mach_thread_self(), THREAD_PRECEDENCE_POLICY, (thread_policy_t) &prec, THREAD_PRECEDENCE_POLICY_COUNT); + assert(res == 0, fail); + } + /* Let everyone get settled */ - semaphore_wait(g_main_sem); - sleep(1); + for (i = 0; i < g_numthreads; i++) { + res = semaphore_wait(g_main_sem); + assert(res == 0, fail); + } + /* Let worker threads get back to sleep... */ + usleep(g_numthreads * 10); /* Go! */ for (i = 0; i < g_iterations; i++) { @@ -591,3 +628,42 @@ main(int argc, char **argv) fail: return 1; } + +/* + * WARNING: This is SPI specifically intended for use by launchd to start UI + * apps. We use it here for a test tool only to opt into QoS using the same + * policies. Do not use this outside xnu or libxpc/launchd. + */ +void +selfexec_with_apptype(int argc, char *argv[]) +{ + int ret; + posix_spawnattr_t attr; + extern char **environ; + char *new_argv[argc + 1 + 1 /* NULL */]; + int i; + char prog[PATH_MAX]; + int32_t prog_size = PATH_MAX; + + ret = _NSGetExecutablePath(prog, &prog_size); + if (ret != 0) err(1, "_NSGetExecutablePath"); + + for (i=0; i < argc; i++) { + new_argv[i] = argv[i]; + } + + new_argv[i] = "-switched_apptype"; + new_argv[i+1] = NULL; + + ret = posix_spawnattr_init(&attr); + if (ret != 0) errc(1, ret, "posix_spawnattr_init"); + + ret = posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETEXEC); + if (ret != 0) errc(1, ret, "posix_spawnattr_setflags"); + + ret = posix_spawnattr_setprocesstype_np(&attr, POSIX_SPAWN_PROC_TYPE_APP_DEFAULT); + if (ret != 0) errc(1, ret, "posix_spawnattr_setprocesstype_np"); + + ret = posix_spawn(NULL, prog, NULL, &attr, new_argv, environ); + if (ret != 0) errc(1, ret, "posix_spawn"); +} -- 2.45.2